Coverage Report

Created: 2024-04-24 06:23

/src/icu/source/common/ucnvisci.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 2000-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*   file name:  ucnvisci.c
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2001JUN26
14
*   created by: Ram Viswanadha
15
*
16
*   Date        Name        Description
17
*   24/7/2001   Ram         Added support for EXT character handling
18
*/
19
20
#include "unicode/utypes.h"
21
22
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
23
24
#include "unicode/ucnv.h"
25
#include "unicode/ucnv_cb.h"
26
#include "unicode/utf16.h"
27
#include "cmemory.h"
28
#include "ucnv_bld.h"
29
#include "ucnv_cnv.h"
30
#include "cstring.h"
31
#include "uassert.h"
32
33
0
#define UCNV_OPTIONS_VERSION_MASK 0xf
34
#define NUKTA               0x093c
35
#define HALANT              0x094d
36
0
#define ZWNJ                0x200c /* Zero Width Non Joiner */
37
0
#define ZWJ                 0x200d /* Zero width Joiner */
38
#define INVALID_CHAR        0xffff
39
0
#define ATR                 0xEF   /* Attribute code */
40
0
#define EXT                 0xF0   /* Extension code */
41
0
#define DANDA               0x0964
42
0
#define DOUBLE_DANDA        0x0965
43
0
#define ISCII_NUKTA         0xE9
44
0
#define ISCII_HALANT        0xE8
45
0
#define ISCII_DANDA         0xEA
46
0
#define ISCII_INV           0xD9
47
0
#define ISCII_VOWEL_SIGN_E  0xE0
48
0
#define INDIC_BLOCK_BEGIN   0x0900
49
0
#define INDIC_BLOCK_END     0x0D7F
50
0
#define INDIC_RANGE         (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
51
0
#define VOCALLIC_RR         0x0931
52
0
#define LF                  0x0A
53
0
#define ASCII_END           0xA0
54
0
#define NO_CHAR_MARKER      0xFFFE
55
0
#define TELUGU_DELTA        DELTA * TELUGU
56
0
#define DEV_ABBR_SIGN       0x0970
57
0
#define DEV_ANUDATTA        0x0952
58
0
#define EXT_RANGE_BEGIN     0xA1
59
0
#define EXT_RANGE_END       0xEE
60
61
0
#define PNJ_DELTA           0x0100
62
0
#define PNJ_BINDI           0x0A02
63
0
#define PNJ_TIPPI           0x0A70
64
0
#define PNJ_SIGN_VIRAMA     0x0A4D
65
0
#define PNJ_ADHAK           0x0A71
66
0
#define PNJ_HA              0x0A39
67
0
#define PNJ_RRA             0x0A5C
68
69
typedef enum {
70
    DEVANAGARI =0,
71
    BENGALI,
72
    GURMUKHI,
73
    GUJARATI,
74
    ORIYA,
75
    TAMIL,
76
    TELUGU,
77
    KANNADA,
78
    MALAYALAM,
79
    DELTA=0x80
80
}UniLang;
81
82
/**
83
 * Enumeration for switching code pages if <ATR>+<one of below values>
84
 * is encountered
85
 */
86
typedef enum {
87
    DEF = 0x40,
88
    RMN = 0x41,
89
    DEV = 0x42,
90
    BNG = 0x43,
91
    TML = 0x44,
92
    TLG = 0x45,
93
    ASM = 0x46,
94
    ORI = 0x47,
95
    KND = 0x48,
96
    MLM = 0x49,
97
    GJR = 0x4A,
98
    PNJ = 0x4B,
99
    ARB = 0x71,
100
    PES = 0x72,
101
    URD = 0x73,
102
    SND = 0x74,
103
    KSM = 0x75,
104
    PST = 0x76
105
}ISCIILang;
106
107
typedef enum {
108
    DEV_MASK =0x80,
109
    PNJ_MASK =0x40,
110
    GJR_MASK =0x20,
111
    ORI_MASK =0x10,
112
    BNG_MASK =0x08,
113
    KND_MASK =0x04,
114
    MLM_MASK =0x02,
115
    TML_MASK =0x01,
116
    ZERO =0x00
117
}MaskEnum;
118
119
#define ISCII_CNV_PREFIX "ISCII,version="
120
121
typedef struct {
122
    UChar contextCharToUnicode;         /* previous Unicode codepoint for contextual analysis */
123
    UChar contextCharFromUnicode;       /* previous Unicode codepoint for contextual analysis */
124
    uint16_t defDeltaToUnicode;         /* delta for switching to default state when DEF is encountered  */
125
    uint16_t currentDeltaFromUnicode;   /* current delta in Indic block */
126
    uint16_t currentDeltaToUnicode;     /* current delta in Indic block */
127
    MaskEnum currentMaskFromUnicode;    /* mask for current state in toUnicode */
128
    MaskEnum currentMaskToUnicode;      /* mask for current state in toUnicode */
129
    MaskEnum defMaskToUnicode;          /* mask for default state in toUnicode */
130
    UBool isFirstBuffer;                /* boolean for fromUnicode to see if we need to announce the first script */
131
    UBool resetToDefaultToUnicode;      /* boolean for resetting to default delta and mask when a newline is encountered*/
132
    char name[sizeof(ISCII_CNV_PREFIX) + 1];
133
    UChar32 prevToUnicodeStatus;        /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
134
} UConverterDataISCII;
135
136
typedef struct LookupDataStruct {
137
    UniLang uniLang;
138
    MaskEnum maskEnum;
139
    ISCIILang isciiLang;
140
} LookupDataStruct;
141
142
static const LookupDataStruct lookupInitialData[]={
143
    { DEVANAGARI, DEV_MASK,  DEV },
144
    { BENGALI,    BNG_MASK,  BNG },
145
    { GURMUKHI,   PNJ_MASK,  PNJ },
146
    { GUJARATI,   GJR_MASK,  GJR },
147
    { ORIYA,      ORI_MASK,  ORI },
148
    { TAMIL,      TML_MASK,  TML },
149
    { TELUGU,     KND_MASK,  TLG },
150
    { KANNADA,    KND_MASK,  KND },
151
    { MALAYALAM,  MLM_MASK,  MLM }
152
};
153
154
/*
155
 * For special handling of certain Gurmukhi characters.
156
 * Bit 0 (value 1): PNJ consonant
157
 * Bit 1 (value 2): PNJ Bindi Tippi
158
 */
159
static const uint8_t pnjMap[80] = {
160
    /* 0A00..0A0F */
161
    0, 0, 0, 0, 0, 2, 0, 2,  0, 0, 0, 0, 0, 0, 0, 0,
162
    /* 0A10..0A1F */
163
    0, 0, 0, 0, 0, 3, 3, 3,  3, 3, 3, 3, 3, 3, 3, 3,
164
    /* 0A20..0A2F */
165
    3, 3, 3, 3, 3, 3, 3, 3,  3, 0, 3, 3, 3, 3, 3, 3,
166
    /* 0A30..0A3F */
167
    3, 0, 0, 0, 0, 3, 3, 0,  3, 3, 0, 0, 0, 0, 0, 2,
168
    /* 0A40..0A4F */
169
    0, 2, 2, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0
170
};
171
172
static UBool
173
0
isPNJConsonant(UChar32 c) {
174
0
    if (c < 0xa00 || 0xa50 <= c) {
175
0
        return FALSE;
176
0
    } else {
177
0
        return (UBool)(pnjMap[c - 0xa00] & 1);
178
0
    }
179
0
}
180
181
static UBool
182
0
isPNJBindiTippi(UChar32 c) {
183
0
    if (c < 0xa00 || 0xa50 <= c) {
184
0
        return FALSE;
185
0
    } else {
186
0
        return (UBool)(pnjMap[c - 0xa00] >> 1);
187
0
    }
188
0
}
189
U_CDECL_BEGIN
190
static void  U_CALLCONV
191
0
_ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
192
0
    if(pArgs->onlyTestIsLoadable) {
193
0
        return;
194
0
    }
195
196
0
    cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
197
198
0
    if (cnv->extraInfo != NULL) {
199
0
        int32_t len=0;
200
0
        UConverterDataISCII *converterData=
201
0
                (UConverterDataISCII *) cnv->extraInfo;
202
0
        converterData->contextCharToUnicode=NO_CHAR_MARKER;
203
0
        cnv->toUnicodeStatus = missingCharMarker;
204
0
        converterData->contextCharFromUnicode=0x0000;
205
0
        converterData->resetToDefaultToUnicode=FALSE;
206
        /* check if the version requested is supported */
207
0
        if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
208
            /* initialize state variables */
209
0
            converterData->currentDeltaFromUnicode
210
0
                    = converterData->currentDeltaToUnicode
211
0
                            = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
212
213
0
            converterData->currentMaskFromUnicode
214
0
                    = converterData->currentMaskToUnicode
215
0
                            = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
216
            
217
0
            converterData->isFirstBuffer=TRUE;
218
0
            (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
219
0
            len = (int32_t)uprv_strlen(converterData->name);
220
0
            converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
221
0
            converterData->name[len+1]=0;
222
            
223
0
            converterData->prevToUnicodeStatus = 0x0000;
224
0
        } else {
225
0
            uprv_free(cnv->extraInfo);
226
0
            cnv->extraInfo = NULL;
227
0
            *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
228
0
        }
229
230
0
    } else {
231
0
        *errorCode =U_MEMORY_ALLOCATION_ERROR;
232
0
    }
233
0
}
234
235
static void U_CALLCONV
236
0
_ISCIIClose(UConverter *cnv) {
237
0
    if (cnv->extraInfo!=NULL) {
238
0
        if (!cnv->isExtraLocal) {
239
0
            uprv_free(cnv->extraInfo);
240
0
        }
241
0
        cnv->extraInfo=NULL;
242
0
    }
243
0
}
244
245
static const char*  U_CALLCONV
246
0
_ISCIIgetName(const UConverter* cnv) {
247
0
    if (cnv->extraInfo) {
248
0
        UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
249
0
        return myData->name;
250
0
    }
251
0
    return NULL;
252
0
}
253
254
static void U_CALLCONV
255
0
_ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
256
0
    UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
257
0
    if (choice<=UCNV_RESET_TO_UNICODE) {
258
0
        cnv->toUnicodeStatus = missingCharMarker;
259
0
        cnv->mode=0;
260
0
        data->currentDeltaToUnicode=data->defDeltaToUnicode;
261
0
        data->currentMaskToUnicode = data->defMaskToUnicode;
262
0
        data->contextCharToUnicode=NO_CHAR_MARKER;
263
0
        data->prevToUnicodeStatus = 0x0000;
264
0
    }
265
0
    if (choice!=UCNV_RESET_TO_UNICODE) {
266
0
        cnv->fromUChar32=0x0000;
267
0
        data->contextCharFromUnicode=0x00;
268
0
        data->currentMaskFromUnicode=data->defMaskToUnicode;
269
0
        data->currentDeltaFromUnicode=data->defDeltaToUnicode;
270
0
        data->isFirstBuffer=TRUE;
271
0
        data->resetToDefaultToUnicode=FALSE;
272
0
    }
273
0
}
274
275
/**
276
 * The values in validity table are indexed by the lower bits of Unicode
277
 * range 0x0900 - 0x09ff. The values have a structure like:
278
 *       ---------------------------------------------------------------
279
 *      | DEV   | PNJ   | GJR   | ORI   | BNG   | TLG   | MLM   | TML   |
280
 *      |       |       |       |       | ASM   | KND   |       |       |
281
 *       ---------------------------------------------------------------
282
 * If a code point is valid in a particular script
283
 * then that bit is turned on
284
 *
285
 * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
286
 * to represent these languages
287
 *
288
 * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
289
 * and combine and use 1 bit to represent these languages.
290
 *
291
 * TODO: It is probably easier to understand and maintain to change this
292
 * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
293
 */
294
295
static const uint8_t validityTable[128] = {
296
/* This state table is tool generated please do not edit unless you know exactly what you are doing */
297
/* Note: This table was edited to mirror the Windows XP implementation */
298
/*ISCII:Valid:Unicode */
299
/*0xa0 : 0x00: 0x900  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
300
/*0xa1 : 0xb8: 0x901  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
301
/*0xa2 : 0xfe: 0x902  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
302
/*0xa3 : 0xbf: 0x903  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
303
/*0x00 : 0x00: 0x904  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
304
/*0xa4 : 0xff: 0x905  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
305
/*0xa5 : 0xff: 0x906  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
306
/*0xa6 : 0xff: 0x907  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
307
/*0xa7 : 0xff: 0x908  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
308
/*0xa8 : 0xff: 0x909  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
309
/*0xa9 : 0xff: 0x90a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
310
/*0xaa : 0xfe: 0x90b  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
311
/*0x00 : 0x00: 0x90c  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
312
/*0xae : 0x80: 0x90d  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
313
/*0xab : 0x87: 0x90e  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
314
/*0xac : 0xff: 0x90f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
315
/*0xad : 0xff: 0x910  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
316
/*0xb2 : 0x80: 0x911  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
317
/*0xaf : 0x87: 0x912  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
318
/*0xb0 : 0xff: 0x913  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
319
/*0xb1 : 0xff: 0x914  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
320
/*0xb3 : 0xff: 0x915  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
321
/*0xb4 : 0xfe: 0x916  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
322
/*0xb5 : 0xfe: 0x917  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
323
/*0xb6 : 0xfe: 0x918  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
324
/*0xb7 : 0xff: 0x919  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
325
/*0xb8 : 0xff: 0x91a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
326
/*0xb9 : 0xfe: 0x91b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
327
/*0xba : 0xff: 0x91c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
328
/*0xbb : 0xfe: 0x91d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
329
/*0xbc : 0xff: 0x91e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
330
/*0xbd : 0xff: 0x91f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
331
/*0xbe : 0xfe: 0x920  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
332
/*0xbf : 0xfe: 0x921  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
333
/*0xc0 : 0xfe: 0x922  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
334
/*0xc1 : 0xff: 0x923  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
335
/*0xc2 : 0xff: 0x924  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
336
/*0xc3 : 0xfe: 0x925  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
337
/*0xc4 : 0xfe: 0x926  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
338
/*0xc5 : 0xfe: 0x927  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
339
/*0xc6 : 0xff: 0x928  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
340
/*0xc7 : 0x81: 0x929  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + TML_MASK ,
341
/*0xc8 : 0xff: 0x92a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
342
/*0xc9 : 0xfe: 0x92b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
343
/*0xca : 0xfe: 0x92c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
344
/*0xcb : 0xfe: 0x92d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
345
/*0xcc : 0xfe: 0x92e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
346
/*0xcd : 0xff: 0x92f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
347
/*0xcf : 0xff: 0x930  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
348
/*0xd0 : 0x87: 0x931  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
349
/*0xd1 : 0xff: 0x932  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
350
/*0xd2 : 0xb7: 0x933  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
351
/*0xd3 : 0x83: 0x934  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
352
/*0xd4 : 0xff: 0x935  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
353
/*0xd5 : 0xfe: 0x936  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
354
/*0xd6 : 0xbf: 0x937  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
355
/*0xd7 : 0xff: 0x938  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
356
/*0xd8 : 0xff: 0x939  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
357
/*0x00 : 0x00: 0x93A  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
358
/*0x00 : 0x00: 0x93B  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
359
/*0xe9 : 0xda: 0x93c  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
360
/*0x00 : 0x00: 0x93d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
361
/*0xda : 0xff: 0x93e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
362
/*0xdb : 0xff: 0x93f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
363
/*0xdc : 0xff: 0x940  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
364
/*0xdd : 0xff: 0x941  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
365
/*0xde : 0xff: 0x942  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
366
/*0xdf : 0xbe: 0x943  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
367
/*0x00 : 0x00: 0x944  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + BNG_MASK + KND_MASK + ZERO     + ZERO     ,
368
/*0xe3 : 0x80: 0x945  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
369
/*0xe0 : 0x87: 0x946  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
370
/*0xe1 : 0xff: 0x947  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
371
/*0xe2 : 0xff: 0x948  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
372
/*0xe7 : 0x80: 0x949  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
373
/*0xe4 : 0x87: 0x94a  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
374
/*0xe5 : 0xff: 0x94b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
375
/*0xe6 : 0xff: 0x94c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
376
/*0xe8 : 0xff: 0x94d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
377
/*0xec : 0x00: 0x94e  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
378
/*0xed : 0x00: 0x94f  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
379
/*0x00 : 0x00: 0x950  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
380
/*0x00 : 0x00: 0x951  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
381
/*0x00 : 0x00: 0x952  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
382
/*0x00 : 0x00: 0x953  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
383
/*0x00 : 0x00: 0x954  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
384
/*0x00 : 0x00: 0x955  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + ZERO     + ZERO     ,
385
/*0x00 : 0x00: 0x956  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + KND_MASK + ZERO     + ZERO     ,
386
/*0x00 : 0x00: 0x957  */ ZERO     + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + MLM_MASK + ZERO     ,
387
/*0x00 : 0x00: 0x958  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
388
/*0x00 : 0x00: 0x959  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
389
/*0x00 : 0x00: 0x95a  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
390
/*0x00 : 0x00: 0x95b  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
391
/*0x00 : 0x00: 0x95c  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
392
/*0x00 : 0x00: 0x95d  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
393
/*0x00 : 0x00: 0x95e  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
394
/*0xce : 0x98: 0x95f  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
395
/*0x00 : 0x00: 0x960  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
396
/*0x00 : 0x00: 0x961  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
397
/*0x00 : 0x00: 0x962  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
398
/*0x00 : 0x00: 0x963  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
399
/*0xea : 0xf8: 0x964  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
400
/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
401
/*0xf1 : 0xff: 0x966  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
402
/*0xf2 : 0xff: 0x967  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
403
/*0xf3 : 0xff: 0x968  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
404
/*0xf4 : 0xff: 0x969  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
405
/*0xf5 : 0xff: 0x96a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
406
/*0xf6 : 0xff: 0x96b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
407
/*0xf7 : 0xff: 0x96c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
408
/*0xf8 : 0xff: 0x96d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
409
/*0xf9 : 0xff: 0x96e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
410
/*0xfa : 0xff: 0x96f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
411
/*0x00 : 0x80: 0x970  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
412
/*
413
 * The length of the array is 128 to provide values for 0x900..0x97f.
414
 * The last 15 entries for 0x971..0x97f of the validity table are all zero
415
 * because no Indic script uses such Unicode code points.
416
 */
417
/*0x00 : 0x00: 0x9yz  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO
418
};
419
420
static const uint16_t fromUnicodeTable[128]={
421
    0x00a0 ,/* 0x0900 */
422
    0x00a1 ,/* 0x0901 */
423
    0x00a2 ,/* 0x0902 */
424
    0x00a3 ,/* 0x0903 */
425
    0xa4e0 ,/* 0x0904 */
426
    0x00a4 ,/* 0x0905 */
427
    0x00a5 ,/* 0x0906 */
428
    0x00a6 ,/* 0x0907 */
429
    0x00a7 ,/* 0x0908 */
430
    0x00a8 ,/* 0x0909 */
431
    0x00a9 ,/* 0x090a */
432
    0x00aa ,/* 0x090b */
433
    0xA6E9 ,/* 0x090c */
434
    0x00ae ,/* 0x090d */
435
    0x00ab ,/* 0x090e */
436
    0x00ac ,/* 0x090f */
437
    0x00ad ,/* 0x0910 */
438
    0x00b2 ,/* 0x0911 */
439
    0x00af ,/* 0x0912 */
440
    0x00b0 ,/* 0x0913 */
441
    0x00b1 ,/* 0x0914 */
442
    0x00b3 ,/* 0x0915 */
443
    0x00b4 ,/* 0x0916 */
444
    0x00b5 ,/* 0x0917 */
445
    0x00b6 ,/* 0x0918 */
446
    0x00b7 ,/* 0x0919 */
447
    0x00b8 ,/* 0x091a */
448
    0x00b9 ,/* 0x091b */
449
    0x00ba ,/* 0x091c */
450
    0x00bb ,/* 0x091d */
451
    0x00bc ,/* 0x091e */
452
    0x00bd ,/* 0x091f */
453
    0x00be ,/* 0x0920 */
454
    0x00bf ,/* 0x0921 */
455
    0x00c0 ,/* 0x0922 */
456
    0x00c1 ,/* 0x0923 */
457
    0x00c2 ,/* 0x0924 */
458
    0x00c3 ,/* 0x0925 */
459
    0x00c4 ,/* 0x0926 */
460
    0x00c5 ,/* 0x0927 */
461
    0x00c6 ,/* 0x0928 */
462
    0x00c7 ,/* 0x0929 */
463
    0x00c8 ,/* 0x092a */
464
    0x00c9 ,/* 0x092b */
465
    0x00ca ,/* 0x092c */
466
    0x00cb ,/* 0x092d */
467
    0x00cc ,/* 0x092e */
468
    0x00cd ,/* 0x092f */
469
    0x00cf ,/* 0x0930 */
470
    0x00d0 ,/* 0x0931 */
471
    0x00d1 ,/* 0x0932 */
472
    0x00d2 ,/* 0x0933 */
473
    0x00d3 ,/* 0x0934 */
474
    0x00d4 ,/* 0x0935 */
475
    0x00d5 ,/* 0x0936 */
476
    0x00d6 ,/* 0x0937 */
477
    0x00d7 ,/* 0x0938 */
478
    0x00d8 ,/* 0x0939 */
479
    0xFFFF ,/* 0x093A */
480
    0xFFFF ,/* 0x093B */
481
    0x00e9 ,/* 0x093c */
482
    0xEAE9 ,/* 0x093d */
483
    0x00da ,/* 0x093e */
484
    0x00db ,/* 0x093f */
485
    0x00dc ,/* 0x0940 */
486
    0x00dd ,/* 0x0941 */
487
    0x00de ,/* 0x0942 */
488
    0x00df ,/* 0x0943 */
489
    0xDFE9 ,/* 0x0944 */
490
    0x00e3 ,/* 0x0945 */
491
    0x00e0 ,/* 0x0946 */
492
    0x00e1 ,/* 0x0947 */
493
    0x00e2 ,/* 0x0948 */
494
    0x00e7 ,/* 0x0949 */
495
    0x00e4 ,/* 0x094a */
496
    0x00e5 ,/* 0x094b */
497
    0x00e6 ,/* 0x094c */
498
    0x00e8 ,/* 0x094d */
499
    0x00ec ,/* 0x094e */
500
    0x00ed ,/* 0x094f */
501
    0xA1E9 ,/* 0x0950 */ /* OM Symbol */
502
    0xFFFF ,/* 0x0951 */
503
    0xF0B8 ,/* 0x0952 */
504
    0xFFFF ,/* 0x0953 */
505
    0xFFFF ,/* 0x0954 */
506
    0xFFFF ,/* 0x0955 */
507
    0xFFFF ,/* 0x0956 */
508
    0xFFFF ,/* 0x0957 */
509
    0xb3e9 ,/* 0x0958 */
510
    0xb4e9 ,/* 0x0959 */
511
    0xb5e9 ,/* 0x095a */
512
    0xbae9 ,/* 0x095b */
513
    0xbfe9 ,/* 0x095c */
514
    0xC0E9 ,/* 0x095d */
515
    0xc9e9 ,/* 0x095e */
516
    0x00ce ,/* 0x095f */
517
    0xAAe9 ,/* 0x0960 */
518
    0xA7E9 ,/* 0x0961 */
519
    0xDBE9 ,/* 0x0962 */
520
    0xDCE9 ,/* 0x0963 */
521
    0x00ea ,/* 0x0964 */
522
    0xeaea ,/* 0x0965 */
523
    0x00f1 ,/* 0x0966 */
524
    0x00f2 ,/* 0x0967 */
525
    0x00f3 ,/* 0x0968 */
526
    0x00f4 ,/* 0x0969 */
527
    0x00f5 ,/* 0x096a */
528
    0x00f6 ,/* 0x096b */
529
    0x00f7 ,/* 0x096c */
530
    0x00f8 ,/* 0x096d */
531
    0x00f9 ,/* 0x096e */
532
    0x00fa ,/* 0x096f */
533
    0xF0BF ,/* 0x0970 */
534
    0xFFFF ,/* 0x0971 */
535
    0xFFFF ,/* 0x0972 */
536
    0xFFFF ,/* 0x0973 */
537
    0xFFFF ,/* 0x0974 */
538
    0xFFFF ,/* 0x0975 */
539
    0xFFFF ,/* 0x0976 */
540
    0xFFFF ,/* 0x0977 */
541
    0xFFFF ,/* 0x0978 */
542
    0xFFFF ,/* 0x0979 */
543
    0xFFFF ,/* 0x097a */
544
    0xFFFF ,/* 0x097b */
545
    0xFFFF ,/* 0x097c */
546
    0xFFFF ,/* 0x097d */
547
    0xFFFF ,/* 0x097e */
548
    0xFFFF ,/* 0x097f */
549
};
550
static const uint16_t toUnicodeTable[256]={
551
    0x0000,/* 0x00 */
552
    0x0001,/* 0x01 */
553
    0x0002,/* 0x02 */
554
    0x0003,/* 0x03 */
555
    0x0004,/* 0x04 */
556
    0x0005,/* 0x05 */
557
    0x0006,/* 0x06 */
558
    0x0007,/* 0x07 */
559
    0x0008,/* 0x08 */
560
    0x0009,/* 0x09 */
561
    0x000a,/* 0x0a */
562
    0x000b,/* 0x0b */
563
    0x000c,/* 0x0c */
564
    0x000d,/* 0x0d */
565
    0x000e,/* 0x0e */
566
    0x000f,/* 0x0f */
567
    0x0010,/* 0x10 */
568
    0x0011,/* 0x11 */
569
    0x0012,/* 0x12 */
570
    0x0013,/* 0x13 */
571
    0x0014,/* 0x14 */
572
    0x0015,/* 0x15 */
573
    0x0016,/* 0x16 */
574
    0x0017,/* 0x17 */
575
    0x0018,/* 0x18 */
576
    0x0019,/* 0x19 */
577
    0x001a,/* 0x1a */
578
    0x001b,/* 0x1b */
579
    0x001c,/* 0x1c */
580
    0x001d,/* 0x1d */
581
    0x001e,/* 0x1e */
582
    0x001f,/* 0x1f */
583
    0x0020,/* 0x20 */
584
    0x0021,/* 0x21 */
585
    0x0022,/* 0x22 */
586
    0x0023,/* 0x23 */
587
    0x0024,/* 0x24 */
588
    0x0025,/* 0x25 */
589
    0x0026,/* 0x26 */
590
    0x0027,/* 0x27 */
591
    0x0028,/* 0x28 */
592
    0x0029,/* 0x29 */
593
    0x002a,/* 0x2a */
594
    0x002b,/* 0x2b */
595
    0x002c,/* 0x2c */
596
    0x002d,/* 0x2d */
597
    0x002e,/* 0x2e */
598
    0x002f,/* 0x2f */
599
    0x0030,/* 0x30 */
600
    0x0031,/* 0x31 */
601
    0x0032,/* 0x32 */
602
    0x0033,/* 0x33 */
603
    0x0034,/* 0x34 */
604
    0x0035,/* 0x35 */
605
    0x0036,/* 0x36 */
606
    0x0037,/* 0x37 */
607
    0x0038,/* 0x38 */
608
    0x0039,/* 0x39 */
609
    0x003A,/* 0x3A */
610
    0x003B,/* 0x3B */
611
    0x003c,/* 0x3c */
612
    0x003d,/* 0x3d */
613
    0x003e,/* 0x3e */
614
    0x003f,/* 0x3f */
615
    0x0040,/* 0x40 */
616
    0x0041,/* 0x41 */
617
    0x0042,/* 0x42 */
618
    0x0043,/* 0x43 */
619
    0x0044,/* 0x44 */
620
    0x0045,/* 0x45 */
621
    0x0046,/* 0x46 */
622
    0x0047,/* 0x47 */
623
    0x0048,/* 0x48 */
624
    0x0049,/* 0x49 */
625
    0x004a,/* 0x4a */
626
    0x004b,/* 0x4b */
627
    0x004c,/* 0x4c */
628
    0x004d,/* 0x4d */
629
    0x004e,/* 0x4e */
630
    0x004f,/* 0x4f */
631
    0x0050,/* 0x50 */
632
    0x0051,/* 0x51 */
633
    0x0052,/* 0x52 */
634
    0x0053,/* 0x53 */
635
    0x0054,/* 0x54 */
636
    0x0055,/* 0x55 */
637
    0x0056,/* 0x56 */
638
    0x0057,/* 0x57 */
639
    0x0058,/* 0x58 */
640
    0x0059,/* 0x59 */
641
    0x005a,/* 0x5a */
642
    0x005b,/* 0x5b */
643
    0x005c,/* 0x5c */
644
    0x005d,/* 0x5d */
645
    0x005e,/* 0x5e */
646
    0x005f,/* 0x5f */
647
    0x0060,/* 0x60 */
648
    0x0061,/* 0x61 */
649
    0x0062,/* 0x62 */
650
    0x0063,/* 0x63 */
651
    0x0064,/* 0x64 */
652
    0x0065,/* 0x65 */
653
    0x0066,/* 0x66 */
654
    0x0067,/* 0x67 */
655
    0x0068,/* 0x68 */
656
    0x0069,/* 0x69 */
657
    0x006a,/* 0x6a */
658
    0x006b,/* 0x6b */
659
    0x006c,/* 0x6c */
660
    0x006d,/* 0x6d */
661
    0x006e,/* 0x6e */
662
    0x006f,/* 0x6f */
663
    0x0070,/* 0x70 */
664
    0x0071,/* 0x71 */
665
    0x0072,/* 0x72 */
666
    0x0073,/* 0x73 */
667
    0x0074,/* 0x74 */
668
    0x0075,/* 0x75 */
669
    0x0076,/* 0x76 */
670
    0x0077,/* 0x77 */
671
    0x0078,/* 0x78 */
672
    0x0079,/* 0x79 */
673
    0x007a,/* 0x7a */
674
    0x007b,/* 0x7b */
675
    0x007c,/* 0x7c */
676
    0x007d,/* 0x7d */
677
    0x007e,/* 0x7e */
678
    0x007f,/* 0x7f */
679
    0x0080,/* 0x80 */
680
    0x0081,/* 0x81 */
681
    0x0082,/* 0x82 */
682
    0x0083,/* 0x83 */
683
    0x0084,/* 0x84 */
684
    0x0085,/* 0x85 */
685
    0x0086,/* 0x86 */
686
    0x0087,/* 0x87 */
687
    0x0088,/* 0x88 */
688
    0x0089,/* 0x89 */
689
    0x008a,/* 0x8a */
690
    0x008b,/* 0x8b */
691
    0x008c,/* 0x8c */
692
    0x008d,/* 0x8d */
693
    0x008e,/* 0x8e */
694
    0x008f,/* 0x8f */
695
    0x0090,/* 0x90 */
696
    0x0091,/* 0x91 */
697
    0x0092,/* 0x92 */
698
    0x0093,/* 0x93 */
699
    0x0094,/* 0x94 */
700
    0x0095,/* 0x95 */
701
    0x0096,/* 0x96 */
702
    0x0097,/* 0x97 */
703
    0x0098,/* 0x98 */
704
    0x0099,/* 0x99 */
705
    0x009a,/* 0x9a */
706
    0x009b,/* 0x9b */
707
    0x009c,/* 0x9c */
708
    0x009d,/* 0x9d */
709
    0x009e,/* 0x9e */
710
    0x009f,/* 0x9f */
711
    0x00A0,/* 0xa0 */
712
    0x0901,/* 0xa1 */
713
    0x0902,/* 0xa2 */
714
    0x0903,/* 0xa3 */
715
    0x0905,/* 0xa4 */
716
    0x0906,/* 0xa5 */
717
    0x0907,/* 0xa6 */
718
    0x0908,/* 0xa7 */
719
    0x0909,/* 0xa8 */
720
    0x090a,/* 0xa9 */
721
    0x090b,/* 0xaa */
722
    0x090e,/* 0xab */
723
    0x090f,/* 0xac */
724
    0x0910,/* 0xad */
725
    0x090d,/* 0xae */
726
    0x0912,/* 0xaf */
727
    0x0913,/* 0xb0 */
728
    0x0914,/* 0xb1 */
729
    0x0911,/* 0xb2 */
730
    0x0915,/* 0xb3 */
731
    0x0916,/* 0xb4 */
732
    0x0917,/* 0xb5 */
733
    0x0918,/* 0xb6 */
734
    0x0919,/* 0xb7 */
735
    0x091a,/* 0xb8 */
736
    0x091b,/* 0xb9 */
737
    0x091c,/* 0xba */
738
    0x091d,/* 0xbb */
739
    0x091e,/* 0xbc */
740
    0x091f,/* 0xbd */
741
    0x0920,/* 0xbe */
742
    0x0921,/* 0xbf */
743
    0x0922,/* 0xc0 */
744
    0x0923,/* 0xc1 */
745
    0x0924,/* 0xc2 */
746
    0x0925,/* 0xc3 */
747
    0x0926,/* 0xc4 */
748
    0x0927,/* 0xc5 */
749
    0x0928,/* 0xc6 */
750
    0x0929,/* 0xc7 */
751
    0x092a,/* 0xc8 */
752
    0x092b,/* 0xc9 */
753
    0x092c,/* 0xca */
754
    0x092d,/* 0xcb */
755
    0x092e,/* 0xcc */
756
    0x092f,/* 0xcd */
757
    0x095f,/* 0xce */
758
    0x0930,/* 0xcf */
759
    0x0931,/* 0xd0 */
760
    0x0932,/* 0xd1 */
761
    0x0933,/* 0xd2 */
762
    0x0934,/* 0xd3 */
763
    0x0935,/* 0xd4 */
764
    0x0936,/* 0xd5 */
765
    0x0937,/* 0xd6 */
766
    0x0938,/* 0xd7 */
767
    0x0939,/* 0xd8 */
768
    0x200D,/* 0xd9 */
769
    0x093e,/* 0xda */
770
    0x093f,/* 0xdb */
771
    0x0940,/* 0xdc */
772
    0x0941,/* 0xdd */
773
    0x0942,/* 0xde */
774
    0x0943,/* 0xdf */
775
    0x0946,/* 0xe0 */
776
    0x0947,/* 0xe1 */
777
    0x0948,/* 0xe2 */
778
    0x0945,/* 0xe3 */
779
    0x094a,/* 0xe4 */
780
    0x094b,/* 0xe5 */
781
    0x094c,/* 0xe6 */
782
    0x0949,/* 0xe7 */
783
    0x094d,/* 0xe8 */
784
    0x093c,/* 0xe9 */
785
    0x0964,/* 0xea */
786
    0xFFFF,/* 0xeb */
787
    0xFFFF,/* 0xec */
788
    0xFFFF,/* 0xed */
789
    0xFFFF,/* 0xee */
790
    0xFFFF,/* 0xef */
791
    0xFFFF,/* 0xf0 */
792
    0x0966,/* 0xf1 */
793
    0x0967,/* 0xf2 */
794
    0x0968,/* 0xf3 */
795
    0x0969,/* 0xf4 */
796
    0x096a,/* 0xf5 */
797
    0x096b,/* 0xf6 */
798
    0x096c,/* 0xf7 */
799
    0x096d,/* 0xf8 */
800
    0x096e,/* 0xf9 */
801
    0x096f,/* 0xfa */
802
    0xFFFF,/* 0xfb */
803
    0xFFFF,/* 0xfc */
804
    0xFFFF,/* 0xfd */
805
    0xFFFF,/* 0xfe */
806
    0xFFFF /* 0xff */
807
};
808
809
static const uint16_t vowelSignESpecialCases[][2]={
810
  { 2 /*length of array*/    , 0      },
811
  { 0xA4 , 0x0904 },
812
};
813
814
static const uint16_t nuktaSpecialCases[][2]={
815
    { 16 /*length of array*/   , 0      },
816
    { 0xA6 , 0x090c },
817
    { 0xEA , 0x093D },
818
    { 0xDF , 0x0944 },
819
    { 0xA1 , 0x0950 },
820
    { 0xb3 , 0x0958 },
821
    { 0xb4 , 0x0959 },
822
    { 0xb5 , 0x095a },
823
    { 0xba , 0x095b },
824
    { 0xbf , 0x095c },
825
    { 0xC0 , 0x095d },
826
    { 0xc9 , 0x095e },
827
    { 0xAA , 0x0960 },
828
    { 0xA7 , 0x0961 },
829
    { 0xDB , 0x0962 },
830
    { 0xDC , 0x0963 },
831
};
832
833
834
0
#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err) UPRV_BLOCK_MACRO_BEGIN { \
835
0
    int32_t offset = (int32_t)(source - args->source-1);                                        \
836
0
      /* write the targetUniChar  to target */                                                  \
837
0
    if(target < targetLimit){                                                                   \
838
0
        if(targetByteUnit <= 0xFF){                                                             \
839
0
            *(target)++ = (uint8_t)(targetByteUnit);                                            \
840
0
            if(offsets){                                                                        \
841
0
                *(offsets++) = offset;                                                          \
842
0
            }                                                                                   \
843
0
        }else{                                                                                  \
844
0
            if (targetByteUnit > 0xFFFF) {                                                      \
845
0
                *(target)++ = (uint8_t)(targetByteUnit>>16);                                    \
846
0
                if (offsets) {                                                                  \
847
0
                    --offset;                                                                   \
848
0
                    *(offsets++) = offset;                                                      \
849
0
                }                                                                               \
850
0
            }                                                                                   \
851
0
            if (!(target < targetLimit)) {                                                      \
852
0
                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
853
0
                                (uint8_t)(targetByteUnit >> 8);                                 \
854
0
                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
855
0
                                (uint8_t)targetByteUnit;                                        \
856
0
                *err = U_BUFFER_OVERFLOW_ERROR;                                                 \
857
0
            } else {                                                                            \
858
0
                *(target)++ = (uint8_t)(targetByteUnit>>8);                                     \
859
0
                if(offsets){                                                                    \
860
0
                    *(offsets++) = offset;                                                      \
861
0
                }                                                                               \
862
0
                if(target < targetLimit){                                                       \
863
0
                    *(target)++ = (uint8_t)  targetByteUnit;                                    \
864
0
                    if(offsets){                                                                \
865
0
                        *(offsets++) = offset                            ;                      \
866
0
                    }                                                                           \
867
0
                }else{                                                                          \
868
0
                    args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
869
0
                                (uint8_t) (targetByteUnit);                                     \
870
0
                    *err = U_BUFFER_OVERFLOW_ERROR;                                             \
871
0
                }                                                                               \
872
0
            }                                                                                   \
873
0
        }                                                                                       \
874
0
    }else{                                                                                      \
875
0
        if (targetByteUnit & 0xFF0000) {                                                        \
876
0
            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
877
0
                        (uint8_t) (targetByteUnit >>16);                                        \
878
0
        }                                                                                       \
879
0
        if(targetByteUnit & 0xFF00){                                                            \
880
0
            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
881
0
                        (uint8_t) (targetByteUnit >>8);                                         \
882
0
        }                                                                                       \
883
0
        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =            \
884
0
                        (uint8_t) (targetByteUnit);                                             \
885
0
        *err = U_BUFFER_OVERFLOW_ERROR;                                                         \
886
0
    }                                                                                           \
887
0
} UPRV_BLOCK_MACRO_END
888
889
/* Rules:
890
 *    Explicit Halant :
891
 *                      <HALANT> + <ZWNJ>
892
 *    Soft Halant :
893
 *                      <HALANT> + <ZWJ>
894
 */
895
static void U_CALLCONV
896
UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
897
0
        UConverterFromUnicodeArgs * args, UErrorCode * err) {
898
0
    const UChar *source = args->source;
899
0
    const UChar *sourceLimit = args->sourceLimit;
900
0
    unsigned char *target = (unsigned char *) args->target;
901
0
    unsigned char *targetLimit = (unsigned char *) args->targetLimit;
902
0
    int32_t* offsets = args->offsets;
903
0
    uint32_t targetByteUnit = 0x0000;
904
0
    UChar32 sourceChar = 0x0000;
905
0
    UChar32 tempContextFromUnicode = 0x0000;    /* For special handling of the Gurmukhi script. */
906
0
    UConverterDataISCII *converterData;
907
0
    uint16_t newDelta=0;
908
0
    uint16_t range = 0;
909
0
    UBool deltaChanged = FALSE;
910
911
0
    if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
912
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
913
0
        return;
914
0
    }
915
    /* initialize data */
916
0
    converterData=(UConverterDataISCII*)args->converter->extraInfo;
917
0
    newDelta=converterData->currentDeltaFromUnicode;
918
0
    range = (uint16_t)(newDelta/DELTA);
919
920
0
    if ((sourceChar = args->converter->fromUChar32)!=0) {
921
0
        goto getTrail;
922
0
    }
923
924
    /*writing the char to the output stream */
925
0
    while (source < sourceLimit) {
926
        /* Write the language code following LF only if LF is not the last character. */
927
0
        if (args->converter->fromUnicodeStatus == LF) {
928
0
            targetByteUnit = ATR<<8;
929
0
            targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
930
0
            args->converter->fromUnicodeStatus = 0x0000;
931
            /* now append ATR and language code */
932
0
            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
933
0
            if (U_FAILURE(*err)) {
934
0
                break;
935
0
            }
936
0
        }
937
        
938
0
        sourceChar = *source++;
939
0
        tempContextFromUnicode = converterData->contextCharFromUnicode;
940
        
941
0
        targetByteUnit = missingCharMarker;
942
        
943
        /*check if input is in ASCII and C0 control codes range*/
944
0
        if (sourceChar <= ASCII_END) {
945
0
            args->converter->fromUnicodeStatus = sourceChar;
946
0
            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
947
0
            if (U_FAILURE(*err)) {
948
0
                break;
949
0
            }
950
0
            continue;
951
0
        }
952
0
        switch (sourceChar) {
953
0
        case ZWNJ:
954
            /* contextChar has HALANT */
955
0
            if (converterData->contextCharFromUnicode) {
956
0
                converterData->contextCharFromUnicode = 0x00;
957
0
                targetByteUnit = ISCII_HALANT;
958
0
            } else {
959
                /* consume ZWNJ and continue */
960
0
                converterData->contextCharFromUnicode = 0x00;
961
0
                continue;
962
0
            }
963
0
            break;
964
0
        case ZWJ:
965
            /* contextChar has HALANT */
966
0
            if (converterData->contextCharFromUnicode) {
967
0
                targetByteUnit = ISCII_NUKTA;
968
0
            } else {
969
0
                targetByteUnit =ISCII_INV;
970
0
            }
971
0
            converterData->contextCharFromUnicode = 0x00;
972
0
            break;
973
0
        default:
974
            /* is the sourceChar in the INDIC_RANGE? */
975
0
            if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
976
                /* Danda and Double Danda are valid in Northern scripts.. since Unicode
977
                 * does not include these codepoints in all Northern scrips we need to
978
                 * filter them out
979
                 */
980
0
                if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
981
                    /* find out to which block the souceChar belongs*/
982
0
                    range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
983
0
                    newDelta =(uint16_t)(range*DELTA);
984
985
                    /* Now are we in the same block as the previous? */
986
0
                    if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
987
0
                        converterData->currentDeltaFromUnicode = newDelta;
988
0
                        converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
989
0
                        deltaChanged =TRUE;
990
0
                        converterData->isFirstBuffer=FALSE;
991
0
                    }
992
                    
993
0
                    if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { 
994
0
                        if (sourceChar == PNJ_TIPPI) {
995
                            /* Make sure Tippi is converted to Bindi. */
996
0
                            sourceChar = PNJ_BINDI;
997
0
                        } else if (sourceChar == PNJ_ADHAK) {
998
                            /* This is for consonant cluster handling. */
999
0
                            converterData->contextCharFromUnicode = PNJ_ADHAK;
1000
0
                        }
1001
                        
1002
0
                    }
1003
                    /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
1004
                    /* now subtract the new delta from sourceChar*/
1005
0
                    sourceChar -= converterData->currentDeltaFromUnicode;
1006
0
                }
1007
1008
                /* get the target byte unit */
1009
0
                targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
1010
1011
                /* is the code point valid in current script? */
1012
0
                if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
1013
                    /* Vocallic RR is assigned in ISCII Telugu and Unicode */
1014
0
                    if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
1015
0
                        targetByteUnit=missingCharMarker;
1016
0
                    }
1017
0
                }
1018
1019
0
                if (deltaChanged) {
1020
                    /* we are in a script block which is different than
1021
                     * previous sourceChar's script block write ATR and language codes
1022
                     */
1023
0
                    uint32_t temp=0;
1024
0
                    temp =(uint16_t)(ATR<<8);
1025
0
                    temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
1026
                    /* reset */
1027
0
                    deltaChanged=FALSE;
1028
                    /* now append ATR and language code */
1029
0
                    WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
1030
0
                    if (U_FAILURE(*err)) {
1031
0
                        break;
1032
0
                    }
1033
0
                }
1034
                
1035
0
                if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
1036
0
                    continue;
1037
0
                }
1038
0
            }
1039
            /* reset context char */
1040
0
            converterData->contextCharFromUnicode = 0x00;
1041
0
            break;
1042
0
        }
1043
0
        if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
1044
            /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
1045
            /* reset context char */
1046
0
            converterData->contextCharFromUnicode = 0x0000;
1047
0
            targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
1048
            /* write targetByteUnit to target */
1049
0
            WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
1050
0
            if (U_FAILURE(*err)) {
1051
0
                break;
1052
0
            }
1053
0
        } else if (targetByteUnit != missingCharMarker) {
1054
0
            if (targetByteUnit==ISCII_HALANT) {
1055
0
                converterData->contextCharFromUnicode = (UChar)targetByteUnit;
1056
0
            }
1057
            /* write targetByteUnit to target*/
1058
0
            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
1059
0
            if (U_FAILURE(*err)) {
1060
0
                break;
1061
0
            }
1062
0
        } else {
1063
            /* oops.. the code point is unassigned */
1064
            /*check if the char is a First surrogate*/
1065
0
            if (U16_IS_SURROGATE(sourceChar)) {
1066
0
                if (U16_IS_SURROGATE_LEAD(sourceChar)) {
1067
0
getTrail:
1068
                    /*look ahead to find the trail surrogate*/
1069
0
                    if (source < sourceLimit) {
1070
                        /* test the following code unit */
1071
0
                        UChar trail= (*source);
1072
0
                        if (U16_IS_TRAIL(trail)) {
1073
0
                            source++;
1074
0
                            sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
1075
0
                            *err =U_INVALID_CHAR_FOUND;
1076
                            /* convert this surrogate code point */
1077
                            /* exit this condition tree */
1078
0
                        } else {
1079
                            /* this is an unmatched lead code unit (1st surrogate) */
1080
                            /* callback(illegal) */
1081
0
                            *err=U_ILLEGAL_CHAR_FOUND;
1082
0
                        }
1083
0
                    } else {
1084
                        /* no more input */
1085
0
                        *err = U_ZERO_ERROR;
1086
0
                    }
1087
0
                } else {
1088
                    /* this is an unmatched trail code unit (2nd surrogate) */
1089
                    /* callback(illegal) */
1090
0
                    *err=U_ILLEGAL_CHAR_FOUND;
1091
0
                }
1092
0
            } else {
1093
                /* callback(unassigned) for a BMP code point */
1094
0
                *err = U_INVALID_CHAR_FOUND;
1095
0
            }
1096
1097
0
            args->converter->fromUChar32=sourceChar;
1098
0
            break;
1099
0
        }
1100
0
    }/* end while(mySourceIndex<mySourceLength) */
1101
1102
    /*save the state and return */
1103
0
    args->source = source;
1104
0
    args->target = (char*)target;
1105
0
}
1106
1107
static const uint16_t lookupTable[][2]={
1108
    { ZERO,       ZERO     },     /*DEFAULT*/
1109
    { ZERO,       ZERO     },     /*ROMAN*/
1110
    { DEVANAGARI, DEV_MASK },
1111
    { BENGALI,    BNG_MASK },
1112
    { TAMIL,      TML_MASK },
1113
    { TELUGU,     KND_MASK },
1114
    { BENGALI,    BNG_MASK },
1115
    { ORIYA,      ORI_MASK },
1116
    { KANNADA,    KND_MASK },
1117
    { MALAYALAM,  MLM_MASK },
1118
    { GUJARATI,   GJR_MASK },
1119
    { GURMUKHI,   PNJ_MASK }
1120
};
1121
1122
0
#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err) UPRV_BLOCK_MACRO_BEGIN { \
1123
0
    /* add offset to current Indic Block */                                              \
1124
0
    if(targetUniChar>ASCII_END &&                                                        \
1125
0
           targetUniChar != ZWJ &&                                                       \
1126
0
           targetUniChar != ZWNJ &&                                                      \
1127
0
           targetUniChar != DANDA &&                                                     \
1128
0
           targetUniChar != DOUBLE_DANDA){                                               \
1129
0
                                                                                         \
1130
0
           targetUniChar+=(uint16_t)(delta);                                             \
1131
0
    }                                                                                    \
1132
0
    /* now write the targetUniChar */                                                    \
1133
0
    if(target<args->targetLimit){                                                        \
1134
0
        *(target)++ = (UChar)targetUniChar;                                              \
1135
0
        if(offsets){                                                                     \
1136
0
            *(offsets)++ = (int32_t)(offset);                                            \
1137
0
        }                                                                                \
1138
0
    }else{                                                                               \
1139
0
        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] =   \
1140
0
            (UChar)targetUniChar;                                                        \
1141
0
        *err = U_BUFFER_OVERFLOW_ERROR;                                                  \
1142
0
    }                                                                                    \
1143
0
} UPRV_BLOCK_MACRO_END
1144
1145
0
#define GET_MAPPING(sourceChar,targetUniChar,data) UPRV_BLOCK_MACRO_BEGIN {              \
1146
0
    targetUniChar = toUnicodeTable[(sourceChar)] ;                                       \
1147
0
    /* is the code point valid in current script? */                                     \
1148
0
    if(sourceChar> ASCII_END &&                                                          \
1149
0
            (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){    \
1150
0
        /* Vocallic RR is assigned in ISCII Telugu and Unicode */                         \
1151
0
        if(data->currentDeltaToUnicode!=(TELUGU_DELTA) ||                                \
1152
0
                    targetUniChar!=VOCALLIC_RR){                                         \
1153
0
            targetUniChar=missingCharMarker;                                             \
1154
0
        }                                                                                \
1155
0
    }                                                                                    \
1156
0
} UPRV_BLOCK_MACRO_END
1157
1158
/***********
1159
 *  Rules for ISCII to Unicode converter
1160
 *  ISCII is stateful encoding. To convert ISCII bytes to Unicode,
1161
 *  which has both precomposed and decomposed forms characters
1162
 *  pre-context and post-context need to be considered.
1163
 *
1164
 *  Post context
1165
 *  i)  ATR : Attribute code is used to declare the font and script switching.
1166
 *      Currently we only switch scripts and font codes consumed without generating an error
1167
 *  ii) EXT : Extension code is used to declare switching to Sanskrit and for obscure,
1168
 *      obsolete characters
1169
 *  Pre context
1170
 *  i)  Halant: if preceded by a halant then it is a explicit halant
1171
 *  ii) Nukta :
1172
 *       a) if preceded by a halant then it is a soft halant
1173
 *       b) if preceded by specific consonants and the ligatures have pre-composed
1174
 *          characters in Unicode then convert to pre-composed characters
1175
 *  iii) Danda: If Danda is preceded by a Danda then convert to Double Danda
1176
 *
1177
 */
1178
1179
static void U_CALLCONV
1180
0
UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
1181
0
    const char *source = ( char *) args->source;
1182
0
    UChar *target = args->target;
1183
0
    const char *sourceLimit = args->sourceLimit;
1184
0
    const UChar* targetLimit = args->targetLimit;
1185
0
    uint32_t targetUniChar = 0x0000;
1186
0
    uint8_t sourceChar = 0x0000;
1187
0
    UConverterDataISCII* data;
1188
0
    UChar32* toUnicodeStatus=NULL;
1189
0
    UChar32 tempTargetUniChar = 0x0000;
1190
0
    UChar* contextCharToUnicode= NULL;
1191
0
    UBool found;
1192
0
    int i; 
1193
0
    int offset = 0;
1194
1195
0
    if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {
1196
0
        *err = U_ILLEGAL_ARGUMENT_ERROR;
1197
0
        return;
1198
0
    }
1199
1200
0
    data = (UConverterDataISCII*)(args->converter->extraInfo);
1201
0
    contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
1202
0
    toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
1203
1204
0
    while (U_SUCCESS(*err) && source<sourceLimit) {
1205
1206
0
        targetUniChar = missingCharMarker;
1207
1208
0
        if (target < targetLimit) {
1209
0
            sourceChar = (unsigned char)*(source)++;
1210
1211
            /* look at the post-context perform special processing */
1212
0
            if (*contextCharToUnicode==ATR) {
1213
1214
                /* If we have ATR in *contextCharToUnicode then we need to change our
1215
                 * state to the Indic Script specified by sourceChar
1216
                 */
1217
1218
                /* check if the sourceChar is supported script range*/
1219
0
                if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
1220
0
                    data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
1221
0
                    data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
1222
0
                } else if (sourceChar==DEF) {
1223
                    /* switch back to default */
1224
0
                    data->currentDeltaToUnicode = data->defDeltaToUnicode;
1225
0
                    data->currentMaskToUnicode = data->defMaskToUnicode;
1226
0
                } else {
1227
0
                    if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
1228
                        /* these are display codes consume and continue */
1229
0
                    } else {
1230
0
                        *err =U_ILLEGAL_CHAR_FOUND;
1231
                        /* reset */
1232
0
                        *contextCharToUnicode=NO_CHAR_MARKER;
1233
0
                        goto CALLBACK;
1234
0
                    }
1235
0
                }
1236
1237
                /* reset */
1238
0
                *contextCharToUnicode=NO_CHAR_MARKER;
1239
1240
0
                continue;
1241
1242
0
            } else if (*contextCharToUnicode==EXT) {
1243
                /* check if sourceChar is in 0xA1-0xEE range */
1244
0
                if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
1245
                    /* We currently support only Anudatta and Devanagari abbreviation sign */
1246
0
                    if (sourceChar==0xBF || sourceChar == 0xB8) {
1247
0
                        targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
1248
                        
1249
                        /* find out if the mapping is valid in this state */
1250
0
                        if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1251
0
                            *contextCharToUnicode= NO_CHAR_MARKER;
1252
1253
                            /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1254
0
                            if (data->prevToUnicodeStatus) {
1255
0
                                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1256
0
                                data->prevToUnicodeStatus = 0x0000;
1257
0
                            }
1258
                            /* write to target */
1259
0
                            WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1260
1261
0
                            continue;
1262
0
                        }
1263
0
                    }
1264
                    /* byte unit is unassigned */
1265
0
                    targetUniChar = missingCharMarker;
1266
0
                    *err= U_INVALID_CHAR_FOUND;
1267
0
                } else {
1268
                    /* only 0xA1 - 0xEE are legal after EXT char */
1269
0
                    *contextCharToUnicode= NO_CHAR_MARKER;
1270
0
                    *err = U_ILLEGAL_CHAR_FOUND;
1271
0
                }
1272
0
                goto CALLBACK;
1273
0
            } else if (*contextCharToUnicode==ISCII_INV) {
1274
0
                if (sourceChar==ISCII_HALANT) {
1275
0
                    targetUniChar = 0x0020; /* replace with space according to Indic FAQ */
1276
0
                } else {
1277
0
                    targetUniChar = ZWJ;
1278
0
                }
1279
1280
                /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1281
0
                if (data->prevToUnicodeStatus) {
1282
0
                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1283
0
                    data->prevToUnicodeStatus = 0x0000;
1284
0
                }
1285
                /* write to target */
1286
0
                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1287
                /* reset */
1288
0
                *contextCharToUnicode=NO_CHAR_MARKER;
1289
0
            }
1290
1291
            /* look at the pre-context and perform special processing */
1292
0
            switch (sourceChar) {
1293
0
            case ISCII_INV:
1294
0
            case EXT:
1295
0
            case ATR:
1296
0
                *contextCharToUnicode = (UChar)sourceChar;
1297
1298
0
                if (*toUnicodeStatus != missingCharMarker) {
1299
                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1300
0
                    if (data->prevToUnicodeStatus) {
1301
0
                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1302
0
                        data->prevToUnicodeStatus = 0x0000;
1303
0
                    }
1304
0
                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1305
0
                    *toUnicodeStatus = missingCharMarker;
1306
0
                }
1307
0
                continue;
1308
0
            case ISCII_DANDA:
1309
                /* handle double danda*/
1310
0
                if (*contextCharToUnicode== ISCII_DANDA) {
1311
0
                    targetUniChar = DOUBLE_DANDA;
1312
                    /* clear the context */
1313
0
                    *contextCharToUnicode = NO_CHAR_MARKER;
1314
0
                    *toUnicodeStatus = missingCharMarker;
1315
0
                } else {
1316
0
                    GET_MAPPING(sourceChar,targetUniChar,data);
1317
0
                    *contextCharToUnicode = sourceChar;
1318
0
                }
1319
0
                break;
1320
0
            case ISCII_HALANT:
1321
                /* handle explicit halant */
1322
0
                if (*contextCharToUnicode == ISCII_HALANT) {
1323
0
                    targetUniChar = ZWNJ;
1324
                    /* clear the context */
1325
0
                    *contextCharToUnicode = NO_CHAR_MARKER;
1326
0
                } else {
1327
0
                    GET_MAPPING(sourceChar,targetUniChar,data);
1328
0
                    *contextCharToUnicode = sourceChar;
1329
0
                }
1330
0
                break;
1331
0
            case 0x0A:
1332
0
            case 0x0D:
1333
0
                data->resetToDefaultToUnicode = TRUE;
1334
0
                GET_MAPPING(sourceChar,targetUniChar,data)
1335
0
                ;
1336
0
                *contextCharToUnicode = sourceChar;
1337
0
                break;
1338
1339
0
            case ISCII_VOWEL_SIGN_E:
1340
0
                i=1;
1341
0
                found=FALSE;
1342
0
                for (; i<vowelSignESpecialCases[0][0]; i++) {
1343
0
                    U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases));
1344
0
                    if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
1345
0
                        targetUniChar=vowelSignESpecialCases[i][1];
1346
0
                        found=TRUE;
1347
0
                        break;
1348
0
                    }
1349
0
                }
1350
0
                if (found) {
1351
                    /* find out if the mapping is valid in this state */
1352
0
                    if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1353
                        /*targetUniChar += data->currentDeltaToUnicode ;*/
1354
0
                        *contextCharToUnicode= NO_CHAR_MARKER;
1355
0
                        *toUnicodeStatus = missingCharMarker;
1356
0
                        break;
1357
0
                    }
1358
0
                }
1359
0
                GET_MAPPING(sourceChar,targetUniChar,data);
1360
0
                *contextCharToUnicode = sourceChar;
1361
0
                break;
1362
1363
0
            case ISCII_NUKTA:
1364
                /* handle soft halant */
1365
0
                if (*contextCharToUnicode == ISCII_HALANT) {
1366
0
                    targetUniChar = ZWJ;
1367
                    /* clear the context */
1368
0
                    *contextCharToUnicode = NO_CHAR_MARKER;
1369
0
                    break;
1370
0
                } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
1371
                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1372
0
                    if (data->prevToUnicodeStatus) {
1373
0
                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1374
0
                        data->prevToUnicodeStatus = 0x0000;
1375
0
                    }
1376
                    /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
1377
                     * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
1378
                     */
1379
0
                    targetUniChar = PNJ_RRA;
1380
0
                    WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1381
0
                    if (U_SUCCESS(*err)) {
1382
0
                        targetUniChar = PNJ_SIGN_VIRAMA;
1383
0
                        WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1384
0
                        if (U_SUCCESS(*err)) {
1385
0
                            targetUniChar = PNJ_HA;
1386
0
                            WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1387
0
                        } else {
1388
0
                            args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
1389
0
                        }
1390
0
                    } else {
1391
0
                        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
1392
0
                        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
1393
0
                    }
1394
0
                    *toUnicodeStatus = missingCharMarker;
1395
0
                    data->contextCharToUnicode = NO_CHAR_MARKER;
1396
0
                    continue;
1397
0
                } else {
1398
                    /* try to handle <CHAR> + ISCII_NUKTA special mappings */
1399
0
                    i=1;
1400
0
                    found =FALSE;
1401
0
                    for (; i<nuktaSpecialCases[0][0]; i++) {
1402
0
                        if (nuktaSpecialCases[i][0]==(uint8_t)
1403
0
                                *contextCharToUnicode) {
1404
0
                            targetUniChar=nuktaSpecialCases[i][1];
1405
0
                            found =TRUE;
1406
0
                            break;
1407
0
                        }
1408
0
                    }
1409
0
                    if (found) {
1410
                        /* find out if the mapping is valid in this state */
1411
0
                        if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1412
                            /*targetUniChar += data->currentDeltaToUnicode ;*/
1413
0
                            *contextCharToUnicode= NO_CHAR_MARKER;
1414
0
                            *toUnicodeStatus = missingCharMarker;
1415
0
                            if (data->currentDeltaToUnicode == PNJ_DELTA) {
1416
                                /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1417
0
                                if (data->prevToUnicodeStatus) {
1418
0
                                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1419
0
                                    data->prevToUnicodeStatus = 0x0000;
1420
0
                                }
1421
0
                                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1422
0
                                continue;
1423
0
                            }
1424
0
                            break;
1425
0
                        }
1426
                        /* else fall through to default */
1427
0
                    }
1428
                    /* else fall through to default */
1429
0
                    U_FALLTHROUGH;
1430
0
                }
1431
0
            default:GET_MAPPING(sourceChar,targetUniChar,data)
1432
0
                ;
1433
0
                *contextCharToUnicode = sourceChar;
1434
0
                break;
1435
0
            }
1436
1437
0
            if (*toUnicodeStatus != missingCharMarker) {
1438
                /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
1439
0
                if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
1440
0
                        (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && ((UChar32)(targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus)) {
1441
                    /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
1442
0
                    offset = (int)(source-args->source - 3);
1443
0
                    tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
1444
0
                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
1445
0
                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
1446
0
                    data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
1447
0
                    *toUnicodeStatus = missingCharMarker;
1448
0
                    continue;
1449
0
                } else {
1450
                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1451
0
                    if (data->prevToUnicodeStatus) {
1452
0
                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1453
0
                        data->prevToUnicodeStatus = 0x0000;
1454
0
                    }
1455
                    /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. 
1456
                     * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
1457
                     */
1458
0
                    if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
1459
0
                        targetUniChar = PNJ_TIPPI - PNJ_DELTA;
1460
0
                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
1461
0
                    } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
1462
                        /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
1463
0
                        data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
1464
0
                    } else {
1465
                        /* write the previously mapped codepoint */
1466
0
                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1467
0
                    }
1468
0
                }
1469
0
                *toUnicodeStatus = missingCharMarker;
1470
0
            }
1471
1472
0
            if (targetUniChar != missingCharMarker) {
1473
                /* now save the targetUniChar for delayed write */
1474
0
                *toUnicodeStatus = (UChar) targetUniChar;
1475
0
                if (data->resetToDefaultToUnicode==TRUE) {
1476
0
                    data->currentDeltaToUnicode = data->defDeltaToUnicode;
1477
0
                    data->currentMaskToUnicode = data->defMaskToUnicode;
1478
0
                    data->resetToDefaultToUnicode=FALSE;
1479
0
                }
1480
0
            } else {
1481
1482
                /* we reach here only if targetUniChar == missingCharMarker
1483
                 * so assign codes to reason and err
1484
                 */
1485
0
                *err = U_INVALID_CHAR_FOUND;
1486
0
CALLBACK:
1487
0
                args->converter->toUBytes[0] = (uint8_t) sourceChar;
1488
0
                args->converter->toULength = 1;
1489
0
                break;
1490
0
            }
1491
1492
0
        } else {
1493
0
            *err =U_BUFFER_OVERFLOW_ERROR;
1494
0
            break;
1495
0
        }
1496
0
    }
1497
1498
0
    if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
1499
        /* end of the input stream */
1500
0
        UConverter *cnv = args->converter;
1501
1502
0
        if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
1503
            /* set toUBytes[] */
1504
0
            cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
1505
0
            cnv->toULength = 1;
1506
1507
            /* avoid looping on truncated sequences */
1508
0
            *contextCharToUnicode = NO_CHAR_MARKER;
1509
0
        } else {
1510
0
            cnv->toULength = 0;
1511
0
        }
1512
1513
0
        if (*toUnicodeStatus != missingCharMarker) {
1514
            /* output a remaining target character */
1515
0
            WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1516
0
            *toUnicodeStatus = missingCharMarker;
1517
0
        }
1518
0
    }
1519
1520
0
    args->target = target;
1521
0
    args->source = source;
1522
0
}
1523
1524
/* structure for SafeClone calculations */
1525
struct cloneISCIIStruct {
1526
    UConverter cnv;
1527
    UConverterDataISCII mydata;
1528
};
1529
1530
static UConverter * U_CALLCONV
1531
_ISCII_SafeClone(const UConverter *cnv,
1532
              void *stackBuffer,
1533
              int32_t *pBufferSize,
1534
              UErrorCode *status)
1535
0
{
1536
0
    struct cloneISCIIStruct * localClone;
1537
0
    int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
1538
1539
0
    if (U_FAILURE(*status)) {
1540
0
        return 0;
1541
0
    }
1542
1543
0
    if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
1544
0
        *pBufferSize = bufferSizeNeeded;
1545
0
        return 0;
1546
0
    }
1547
1548
0
    localClone = (struct cloneISCIIStruct *)stackBuffer;
1549
    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1550
1551
0
    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
1552
0
    localClone->cnv.extraInfo = &localClone->mydata;
1553
0
    localClone->cnv.isExtraLocal = TRUE;
1554
1555
0
    return &localClone->cnv;
1556
0
}
1557
1558
static void U_CALLCONV
1559
_ISCIIGetUnicodeSet(const UConverter *cnv,
1560
                    const USetAdder *sa,
1561
                    UConverterUnicodeSet which,
1562
                    UErrorCode *pErrorCode)
1563
0
{
1564
0
    (void)cnv;
1565
0
    (void)which;
1566
0
    (void)pErrorCode;
1567
0
    int32_t idx, script;
1568
0
    uint8_t mask;
1569
1570
    /* Since all ISCII versions allow switching to other ISCII
1571
    scripts, we add all roundtrippable characters to this set. */
1572
0
    sa->addRange(sa->set, 0, ASCII_END);
1573
0
    for (script = DEVANAGARI; script <= MALAYALAM; script++) {
1574
0
        mask = (uint8_t)(lookupInitialData[script].maskEnum);
1575
0
        for (idx = 0; idx < DELTA; idx++) {
1576
            /* added check for TELUGU character */
1577
0
            if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
1578
0
                sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
1579
0
            }
1580
0
        }
1581
0
    }
1582
0
    sa->add(sa->set, DANDA);
1583
0
    sa->add(sa->set, DOUBLE_DANDA);
1584
0
    sa->add(sa->set, ZWNJ);
1585
0
    sa->add(sa->set, ZWJ);
1586
0
}
1587
U_CDECL_END
1588
static const UConverterImpl _ISCIIImpl={
1589
1590
    UCNV_ISCII,
1591
1592
    NULL,
1593
    NULL,
1594
1595
    _ISCIIOpen,
1596
    _ISCIIClose,
1597
    _ISCIIReset,
1598
1599
    UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1600
    UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1601
    UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1602
    UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1603
    NULL,
1604
1605
    NULL,
1606
    _ISCIIgetName,
1607
    NULL,
1608
    _ISCII_SafeClone,
1609
    _ISCIIGetUnicodeSet,
1610
    NULL,
1611
    NULL
1612
};
1613
1614
static const UConverterStaticData _ISCIIStaticData={
1615
    sizeof(UConverterStaticData),
1616
        "ISCII",
1617
         0,
1618
         UCNV_IBM,
1619
         UCNV_ISCII,
1620
         1,
1621
         4,
1622
        { 0x1a, 0, 0, 0 },
1623
        0x1,
1624
        FALSE,
1625
        FALSE,
1626
        0x0,
1627
        0x0,
1628
        { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
1629
1630
};
1631
1632
const UConverterSharedData _ISCIIData=
1633
        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl);
1634
1635
#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */