Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/i18n/csrsbcs.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
 **********************************************************************
5
 *   Copyright (C) 2005-2016, International Business Machines
6
 *   Corporation and others.  All Rights Reserved.
7
 **********************************************************************
8
 */
9
10
#include "unicode/utypes.h"
11
12
#include "cmemory.h"
13
14
#if !UCONFIG_NO_CONVERSION
15
#include "csrsbcs.h"
16
#include "csmatch.h"
17
18
#define N_GRAM_SIZE 3
19
0
#define N_GRAM_MASK 0xFFFFFF
20
21
U_NAMESPACE_BEGIN
22
23
NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
24
 : ngram(0), byteIndex(0)
25
0
{
26
0
    ngramList = theNgramList;
27
0
    charMap   = theCharMap;
28
0
29
0
    ngramCount = hitCount = 0;
30
0
}
31
32
NGramParser::~NGramParser()
33
0
{
34
0
}
35
36
/*
37
 * Binary search for value in table, which must have exactly 64 entries.
38
 */
39
40
int32_t NGramParser::search(const int32_t *table, int32_t value)
41
0
{
42
0
    int32_t index = 0;
43
0
44
0
    if (table[index + 32] <= value) {
45
0
        index += 32;
46
0
    }
47
0
48
0
    if (table[index + 16] <= value) {
49
0
        index += 16;
50
0
    }
51
0
52
0
    if (table[index + 8] <= value) {
53
0
        index += 8;
54
0
    }
55
0
56
0
    if (table[index + 4] <= value) {
57
0
        index += 4;
58
0
    }
59
0
60
0
    if (table[index + 2] <= value) {
61
0
        index += 2;
62
0
    }
63
0
64
0
    if (table[index + 1] <= value) {
65
0
        index += 1;
66
0
    }
67
0
68
0
    if (table[index] > value) {
69
0
        index -= 1;
70
0
    }
71
0
72
0
    if (index < 0 || table[index] != value) {
73
0
        return -1;
74
0
    }
75
0
76
0
    return index;
77
0
}
78
79
void NGramParser::lookup(int32_t thisNgram)
80
0
{
81
0
    ngramCount += 1;
82
0
83
0
    if (search(ngramList, thisNgram) >= 0) {
84
0
        hitCount += 1;
85
0
    }
86
0
87
0
}
88
89
void NGramParser::addByte(int32_t b)
90
0
{
91
0
    ngram = ((ngram << 8) + b) & N_GRAM_MASK;
92
0
    lookup(ngram);
93
0
}
94
95
int32_t NGramParser::nextByte(InputText *det)
96
0
{
97
0
    if (byteIndex >= det->fInputLen) {
98
0
        return -1;
99
0
    }
100
0
101
0
    return det->fInputBytes[byteIndex++];
102
0
}
103
104
void NGramParser::parseCharacters(InputText *det)
105
0
{
106
0
    int32_t b;
107
0
    bool ignoreSpace = FALSE;
108
0
109
0
    while ((b = nextByte(det)) >= 0) {
110
0
        uint8_t mb = charMap[b];
111
0
112
0
        // TODO: 0x20 might not be a space in all character sets...
113
0
        if (mb != 0) {
114
0
            if (!(mb == 0x20 && ignoreSpace)) {
115
0
                addByte(mb);
116
0
            }
117
0
118
0
            ignoreSpace = (mb == 0x20);
119
0
        }
120
0
    }
121
0
}
122
123
int32_t NGramParser::parse(InputText *det)
124
0
{
125
0
    parseCharacters(det);
126
0
127
0
    // TODO: Is this OK? The buffer could have ended in the middle of a word...
128
0
    addByte(0x20);
129
0
130
0
    double rawPercent = (double) hitCount / (double) ngramCount;
131
0
132
0
    //            if (rawPercent <= 2.0) {
133
0
    //                return 0;
134
0
    //            }
135
0
136
0
    // TODO - This is a bit of a hack to take care of a case
137
0
    // were we were getting a confidence of 135...
138
0
    if (rawPercent > 0.33) {
139
0
        return 98;
140
0
    }
141
0
142
0
    return (int32_t) (rawPercent * 300.0);
143
0
}
144
145
#if !UCONFIG_ONLY_HTML_CONVERSION
146
static const uint8_t unshapeMap_IBM420[] = {
147
/*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
148
/* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
149
/* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
150
/* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
151
/* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
152
/* 4- */    0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 
153
/* 5- */    0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 
154
/* 6- */    0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
155
/* 7- */    0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 
156
/* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x8B, 0x8B, 0x8D, 0x8D, 0x8F, 
157
/* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9A, 0x9A, 0x9A, 0x9E, 0x9E, 
158
/* A- */    0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0xAB, 0xAB, 0xAD, 0xAD, 0xAF, 
159
/* B- */    0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0xBB, 0xBB, 0xBD, 0xBD, 0xBF, 
160
/* C- */    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xBF, 0xCC, 0xBF, 0xCE, 0xCF, 
161
/* D- */    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDA, 0xDC, 0xDC, 0xDC, 0xDF, 
162
/* E- */    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
163
/* F- */    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
164
};
165
166
NGramParser_IBM420::NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap):NGramParser(theNgramList, theCharMap)
167
0
{
168
0
  alef = 0x00;
169
0
}
170
171
NGramParser_IBM420::~NGramParser_IBM420() {}
172
173
int32_t NGramParser_IBM420::isLamAlef(int32_t b)
174
0
{
175
0
  if(b == 0xB2 || b == 0xB3){
176
0
          return 0x47;            
177
0
        }else if(b == 0xB4 || b == 0xB5){
178
0
          return 0x49;
179
0
        }else if(b == 0xB8 || b == 0xB9){
180
0
          return 0x56;
181
0
        }else
182
0
          return 0x00;
183
0
}
184
185
/*
186
* Arabic shaping needs to be done manually. Cannot call ArabicShaping class
187
* because CharsetDetector is dealing with bytes not Unicode code points. We could
188
* convert the bytes to Unicode code points but that would leave us dependent
189
* on CharsetICU which we try to avoid. IBM420 converter amongst different versions
190
* of JDK can produce different results and therefore is also avoided.
191
*/ 
192
int32_t NGramParser_IBM420::nextByte(InputText *det)
193
0
{
194
0
  
195
0
    if (byteIndex >= det->fInputLen || det->fInputBytes[byteIndex] == 0) {
196
0
        return -1;
197
0
    }              
198
0
    int next;
199
0
             
200
0
    alef = isLamAlef(det->fInputBytes[byteIndex]);
201
0
    if(alef != 0x00)
202
0
        next = 0xB1 & 0xFF;
203
0
    else
204
0
        next = unshapeMap_IBM420[det->fInputBytes[byteIndex]& 0xFF] & 0xFF;
205
0
            
206
0
    byteIndex++;
207
0
             
208
0
    return next;
209
0
}
210
211
void NGramParser_IBM420::parseCharacters(InputText *det)
212
0
{
213
0
  int32_t b;
214
0
    bool ignoreSpace = FALSE;
215
0
216
0
    while ((b = nextByte(det)) >= 0) {
217
0
        uint8_t mb = charMap[b];
218
0
219
0
        // TODO: 0x20 might not be a space in all character sets...
220
0
        if (mb != 0) {
221
0
            if (!(mb == 0x20 && ignoreSpace)) {
222
0
                addByte(mb);
223
0
            }
224
0
            ignoreSpace = (mb == 0x20);
225
0
        }
226
0
    
227
0
    if(alef != 0x00){
228
0
            mb = charMap[alef & 0xFF];
229
0
                     
230
0
            // TODO: 0x20 might not be a space in all character sets...
231
0
            if (mb != 0) {
232
0
                if (!(mb == 0x20 && ignoreSpace)) {
233
0
                    addByte(mb);                    
234
0
                }
235
0
                         
236
0
                ignoreSpace = (mb == 0x20);
237
0
            }
238
0
                   
239
0
        }
240
0
    }
241
0
}
242
#endif
243
244
CharsetRecog_sbcs::CharsetRecog_sbcs()
245
0
{
246
0
    // nothing else to do
247
0
}
248
249
CharsetRecog_sbcs::~CharsetRecog_sbcs()
250
0
{
251
0
    // nothing to do
252
0
}
253
254
int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
255
0
{
256
0
    NGramParser parser(ngrams, byteMap);
257
0
    int32_t result;
258
0
259
0
    result = parser.parse(det);
260
0
261
0
    return result;
262
0
}
263
264
static const uint8_t charMap_8859_1[] = {
265
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
266
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
267
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
268
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
269
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
270
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
271
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
272
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
273
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
274
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
275
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
276
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
277
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
278
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
279
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
280
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
281
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
282
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
283
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
284
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
285
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
286
    0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 
287
    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 
288
    0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, 
289
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
290
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
291
    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
292
    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 
293
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
294
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
295
    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
296
    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
297
};
298
299
static const uint8_t charMap_8859_2[] = {
300
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
301
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
302
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
303
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
304
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
305
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
306
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
307
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
308
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
309
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
310
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
311
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
312
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
313
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
314
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
315
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
316
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
317
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
318
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
319
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
320
    0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20, 
321
    0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, 
322
    0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7, 
323
    0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, 
324
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
325
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
326
    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
327
    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 
328
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
329
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
330
    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
331
    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, 
332
};
333
334
static const uint8_t charMap_8859_5[] = {
335
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
336
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
337
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
338
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
339
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
340
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
341
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
342
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
343
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
344
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
345
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
346
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
347
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
348
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
349
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
350
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
351
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
352
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
353
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
354
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
355
    0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
356
    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, 
357
    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
358
    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
359
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
360
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
361
    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
362
    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
363
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
364
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
365
    0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
366
    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, 
367
};
368
369
static const uint8_t charMap_8859_6[] = {
370
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
371
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
372
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
373
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
374
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
375
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
376
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
377
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
378
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
379
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
380
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
381
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
382
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
383
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
384
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
385
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
386
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
387
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
388
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
389
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
390
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
391
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
392
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
393
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
394
    0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 
395
    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 
396
    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
397
    0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20, 
398
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
399
    0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20, 
400
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
401
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
402
};
403
404
static const uint8_t charMap_8859_7[] = {
405
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
406
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
407
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
408
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
409
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
410
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
411
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
412
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
413
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
414
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
415
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
416
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
417
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
418
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
419
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
420
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
421
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
422
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
423
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
424
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
425
    0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20, 
426
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
427
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20, 
428
    0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE, 
429
    0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
430
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
431
    0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
432
    0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF, 
433
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
434
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
435
    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
436
    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, 
437
};
438
439
static const uint8_t charMap_8859_8[] = {
440
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
441
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
442
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
443
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
444
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
445
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
446
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
447
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
448
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
449
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
450
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
451
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
452
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
453
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
454
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
455
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
456
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
457
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
458
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
459
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
460
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
461
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
462
    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 
463
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
464
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
465
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
466
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
467
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
468
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
469
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
470
    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
471
    0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20, 
472
};
473
474
static const uint8_t charMap_8859_9[] = {
475
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
476
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
477
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
478
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
479
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
480
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
481
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
482
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
483
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
484
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
485
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
486
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
487
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
488
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
489
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
490
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
491
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
492
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
493
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
494
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
495
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
496
    0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 
497
    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 
498
    0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, 
499
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
500
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
501
    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
502
    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF, 
503
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
504
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
505
    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
506
    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
507
};
508
509
static const int32_t ngrams_windows_1251[] = {
510
    0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, 
511
    0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, 
512
    0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, 
513
    0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, 
514
};
515
516
static const uint8_t charMap_windows_1251[] = {
517
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
518
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
519
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
520
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
521
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
522
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
523
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
524
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
525
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
526
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
527
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
528
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
529
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
530
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
531
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
532
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
533
    0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, 
534
    0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, 
535
    0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
536
    0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, 
537
    0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20, 
538
    0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF, 
539
    0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20, 
540
    0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF, 
541
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
542
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
543
    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
544
    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
545
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
546
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
547
    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
548
    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
549
};
550
551
static const int32_t ngrams_windows_1256[] = {
552
    0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, 
553
    0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, 
554
    0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, 
555
    0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, 
556
};
557
558
static const uint8_t charMap_windows_1256[] = {
559
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
560
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
561
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
562
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
563
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
564
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
565
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
566
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
567
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
568
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
569
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
570
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
571
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
572
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
573
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
574
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
575
    0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, 
576
    0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F, 
577
    0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
578
    0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F, 
579
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
580
    0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 
581
    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 
582
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
583
    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 
584
    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 
585
    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20, 
586
    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
587
    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
588
    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
589
    0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20, 
590
    0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF, 
591
};
592
593
static const int32_t ngrams_KOI8_R[] = {
594
    0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, 
595
    0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, 
596
    0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, 
597
    0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, 
598
};
599
600
static const uint8_t charMap_KOI8_R[] = {
601
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
602
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
603
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
604
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
605
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
606
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
607
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
608
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
609
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
610
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
611
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
612
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
613
    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
614
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
615
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
616
    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
617
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
618
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
619
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
620
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
621
    0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, 
622
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
623
    0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, 
624
    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
625
    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 
626
    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 
627
    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
628
    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
629
    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 
630
    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 
631
    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
632
    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
633
};
634
635
#if !UCONFIG_ONLY_HTML_CONVERSION
636
static const int32_t ngrams_IBM424_he_rtl[] = {
637
    0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, 
638
    0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, 
639
    0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, 
640
    0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, 
641
};
642
643
static const int32_t ngrams_IBM424_he_ltr[] = {
644
    0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
645
    0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
646
    0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
647
    0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651,
648
};
649
650
static const uint8_t charMap_IBM424_he[] = {
651
/*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
652
/* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
653
/* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
654
/* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
655
/* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
656
/* 4- */    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
657
/* 5- */    0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
658
/* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
659
/* 7- */    0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40, 
660
/* 8- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
661
/* 9- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
662
/* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
663
/* B- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
664
/* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
665
/* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
666
/* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
667
/* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
668
};
669
670
static const int32_t ngrams_IBM420_ar_rtl[] = {
671
    0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
672
    0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
673
    0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
674
    0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
675
};
676
677
static const int32_t ngrams_IBM420_ar_ltr[] = {
678
    0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, 
679
    0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
680
    0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
681
    0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
682
};
683
684
static const uint8_t charMap_IBM420_ar[]= {
685
/*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
686
/* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
687
/* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
688
/* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
689
/* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
690
/* 4- */    0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
691
/* 5- */    0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
692
/* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
693
/* 7- */    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
694
/* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 
695
/* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 
696
/* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 
697
/* B- */    0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 
698
/* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF, 
699
/* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
700
/* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, 
701
/* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, 
702
};
703
#endif
704
705
//ISO-8859-1,2,5,6,7,8,9 Ngrams
706
707
struct NGramsPlusLang {
708
    const int32_t ngrams[64];
709
    const char *  lang;
710
};
711
712
static const NGramsPlusLang ngrams_8859_1[] =  {
713
  { 
714
    {
715
    0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, 
716
    0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, 
717
    0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, 
718
    0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, 
719
    },
720
    "en"
721
  },
722
  { 
723
    {
724
    0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, 
725
    0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, 
726
    0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, 
727
    0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, 
728
    },
729
    "da"
730
  },
731
  { 
732
    {
733
    0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, 
734
    0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, 
735
    0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, 
736
    0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, 
737
    },
738
    "de"
739
  },
740
  {
741
    {
742
    0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, 
743
    0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, 
744
    0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, 
745
    0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, 
746
    },
747
    "es"
748
  },
749
  {
750
    {
751
    0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
752
    0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
753
    0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
754
    0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
755
    },
756
    "fr"
757
  },
758
  {
759
    {
760
    0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
761
    0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
762
    0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
763
    0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
764
    },
765
    "it"
766
  },
767
  {
768
    {
769
    0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
770
    0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
771
    0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
772
    0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
773
    },
774
    "nl"
775
  },
776
  {
777
    {
778
    0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
779
    0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
780
    0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
781
    0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
782
    },
783
    "no"
784
  },
785
  {
786
    {
787
    0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
788
    0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
789
    0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
790
    0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
791
    },
792
    "pt"
793
  },
794
  {
795
    {
796
    0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
797
    0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
798
    0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
799
    0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
800
    },
801
    "sv"
802
  }
803
};
804
805
806
static const NGramsPlusLang ngrams_8859_2[] =  {
807
  {
808
    {
809
    0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
810
    0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
811
    0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
812
    0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
813
    },
814
    "cs"
815
  },
816
  {
817
    {
818
    0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
819
    0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
820
    0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
821
    0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
822
    },
823
    "hu"
824
  },
825
  {
826
    {
827
    0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
828
    0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
829
    0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
830
    0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
831
    },
832
    "pl"
833
  },
834
  {
835
    {
836
    0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
837
    0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
838
    0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
839
    0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
840
    },
841
    "ro"
842
  }
843
};
844
845
static const int32_t ngrams_8859_5_ru[] = {
846
    0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
847
    0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
848
    0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
849
    0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
850
};
851
852
static const int32_t ngrams_8859_6_ar[] = {
853
    0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
854
    0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
855
    0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
856
    0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
857
};
858
859
static const int32_t ngrams_8859_7_el[] = {
860
    0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
861
    0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
862
    0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
863
    0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
864
};
865
866
static const int32_t ngrams_8859_8_I_he[] = {
867
    0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
868
    0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
869
    0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
870
    0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
871
};
872
873
static const int32_t ngrams_8859_8_he[] = {
874
    0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
875
    0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
876
    0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
877
    0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
878
};
879
880
static const int32_t ngrams_8859_9_tr[] = {
881
    0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
882
    0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
883
    0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
884
    0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
885
};
886
887
CharsetRecog_8859_1::~CharsetRecog_8859_1()
888
{
889
    // nothing to do
890
}
891
892
0
UBool CharsetRecog_8859_1::match(InputText *textIn, CharsetMatch *results) const {
893
0
    const char *name = textIn->fC1Bytes? "windows-1252" : "ISO-8859-1";
894
0
    uint32_t i;
895
0
    int32_t bestConfidenceSoFar = -1;
896
0
    for (i=0; i < UPRV_LENGTHOF(ngrams_8859_1) ; i++) {
897
0
        const int32_t *ngrams = ngrams_8859_1[i].ngrams;
898
0
        const char    *lang   = ngrams_8859_1[i].lang;
899
0
        int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_1);
900
0
        if (confidence > bestConfidenceSoFar) {
901
0
            results->set(textIn, this, confidence, name, lang);
902
0
            bestConfidenceSoFar = confidence;
903
0
        }
904
0
    }
905
0
    return (bestConfidenceSoFar > 0);
906
0
}
907
908
const char *CharsetRecog_8859_1::getName() const
909
0
{
910
0
    return "ISO-8859-1";
911
0
}
912
913
914
CharsetRecog_8859_2::~CharsetRecog_8859_2()
915
{
916
    // nothing to do
917
}
918
919
0
UBool CharsetRecog_8859_2::match(InputText *textIn, CharsetMatch *results) const {
920
0
    const char *name = textIn->fC1Bytes? "windows-1250" : "ISO-8859-2";
921
0
    uint32_t i;
922
0
    int32_t bestConfidenceSoFar = -1;
923
0
    for (i=0; i < UPRV_LENGTHOF(ngrams_8859_2) ; i++) {
924
0
        const int32_t *ngrams = ngrams_8859_2[i].ngrams;
925
0
        const char    *lang   = ngrams_8859_2[i].lang;
926
0
        int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_2);
927
0
        if (confidence > bestConfidenceSoFar) {
928
0
            results->set(textIn, this, confidence, name, lang);
929
0
            bestConfidenceSoFar = confidence;
930
0
        }
931
0
    }
932
0
    return (bestConfidenceSoFar > 0);
933
0
}
934
935
const char *CharsetRecog_8859_2::getName() const
936
0
{
937
0
    return "ISO-8859-2";
938
0
}
939
940
941
CharsetRecog_8859_5::~CharsetRecog_8859_5()
942
{
943
    // nothing to do
944
}
945
946
const char *CharsetRecog_8859_5::getName() const
947
0
{
948
0
    return "ISO-8859-5";
949
0
}
950
951
CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
952
{
953
    // nothing to do
954
}
955
956
const char *CharsetRecog_8859_5_ru::getLanguage() const
957
0
{
958
0
    return "ru";
959
0
}
960
961
UBool CharsetRecog_8859_5_ru::match(InputText *textIn, CharsetMatch *results) const
962
0
{
963
0
    int32_t confidence = match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
964
0
    results->set(textIn, this, confidence);
965
0
    return (confidence > 0);
966
0
}
967
968
CharsetRecog_8859_6::~CharsetRecog_8859_6()
969
{
970
    // nothing to do
971
}
972
973
const char *CharsetRecog_8859_6::getName() const
974
0
{
975
0
    return "ISO-8859-6";
976
0
}
977
978
CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
979
{
980
    // nothing to do
981
}
982
983
const char *CharsetRecog_8859_6_ar::getLanguage() const
984
0
{
985
0
    return "ar";
986
0
}
987
988
UBool CharsetRecog_8859_6_ar::match(InputText *textIn, CharsetMatch *results) const
989
0
{
990
0
    int32_t confidence = match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
991
0
    results->set(textIn, this, confidence);
992
0
    return (confidence > 0);
993
0
}
994
995
CharsetRecog_8859_7::~CharsetRecog_8859_7()
996
{
997
    // nothing to do
998
}
999
1000
const char *CharsetRecog_8859_7::getName() const
1001
0
{
1002
0
    return "ISO-8859-7";
1003
0
}
1004
1005
CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
1006
{
1007
    // nothing to do
1008
}
1009
1010
const char *CharsetRecog_8859_7_el::getLanguage() const
1011
0
{
1012
0
    return "el";
1013
0
}
1014
1015
UBool CharsetRecog_8859_7_el::match(InputText *textIn, CharsetMatch *results) const
1016
0
{
1017
0
    const char *name = textIn->fC1Bytes? "windows-1253" : "ISO-8859-7";
1018
0
    int32_t confidence = match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
1019
0
    results->set(textIn, this, confidence, name, "el");
1020
0
    return (confidence > 0);
1021
0
}
1022
1023
CharsetRecog_8859_8::~CharsetRecog_8859_8()
1024
{
1025
    // nothing to do
1026
}
1027
1028
const char *CharsetRecog_8859_8::getName() const 
1029
0
{
1030
0
    return "ISO-8859-8";
1031
0
}
1032
1033
CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
1034
{
1035
    // nothing to do
1036
}
1037
1038
const char *CharsetRecog_8859_8_I_he::getName() const
1039
0
{
1040
0
    return "ISO-8859-8-I";
1041
0
}
1042
1043
const char *CharsetRecog_8859_8_I_he::getLanguage() const
1044
0
{
1045
0
    return "he";
1046
0
}
1047
1048
UBool CharsetRecog_8859_8_I_he::match(InputText *textIn, CharsetMatch *results) const
1049
0
{
1050
0
    const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8-I";
1051
0
    int32_t confidence = match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
1052
0
    results->set(textIn, this, confidence, name, "he");
1053
0
    return (confidence > 0);
1054
0
}
1055
1056
CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
1057
{
1058
    // od ot gnihton
1059
}
1060
1061
const char *CharsetRecog_8859_8_he::getLanguage() const
1062
0
{
1063
0
    return "he";
1064
0
}
1065
1066
UBool CharsetRecog_8859_8_he::match(InputText *textIn, CharsetMatch *results) const
1067
0
{
1068
0
    const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8";
1069
0
    int32_t confidence = match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
1070
0
    results->set(textIn, this, confidence, name, "he");
1071
0
    return (confidence > 0);
1072
0
}
1073
1074
CharsetRecog_8859_9::~CharsetRecog_8859_9()
1075
{
1076
    // nothing to do
1077
}
1078
1079
const char *CharsetRecog_8859_9::getName() const
1080
0
{
1081
0
    return "ISO-8859-9";
1082
0
}
1083
1084
CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
1085
{
1086
    // nothing to do
1087
}
1088
1089
const char *CharsetRecog_8859_9_tr::getLanguage() const
1090
0
{
1091
0
    return "tr";
1092
0
}
1093
1094
UBool CharsetRecog_8859_9_tr::match(InputText *textIn, CharsetMatch *results) const
1095
0
{
1096
0
    const char *name = textIn->fC1Bytes? "windows-1254" : "ISO-8859-9";
1097
0
    int32_t confidence = match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
1098
0
    results->set(textIn, this, confidence, name, "tr");
1099
0
    return (confidence > 0);
1100
0
}
1101
1102
CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
1103
{
1104
    // nothing to do
1105
}
1106
1107
const char *CharsetRecog_windows_1256::getName() const
1108
0
{
1109
0
    return  "windows-1256";
1110
0
}
1111
1112
const char *CharsetRecog_windows_1256::getLanguage() const
1113
0
{
1114
0
    return "ar";
1115
0
}
1116
1117
UBool CharsetRecog_windows_1256::match(InputText *textIn, CharsetMatch *results) const
1118
0
{
1119
0
    int32_t confidence = match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
1120
0
    results->set(textIn, this, confidence);
1121
0
    return (confidence > 0);
1122
0
}
1123
1124
CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
1125
{
1126
    // nothing to do
1127
}
1128
1129
const char *CharsetRecog_windows_1251::getName() const
1130
0
{
1131
0
    return  "windows-1251";
1132
0
}
1133
1134
const char *CharsetRecog_windows_1251::getLanguage() const
1135
0
{
1136
0
    return "ru";
1137
0
}
1138
1139
UBool CharsetRecog_windows_1251::match(InputText *textIn, CharsetMatch *results) const
1140
0
{
1141
0
    int32_t confidence = match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
1142
0
    results->set(textIn, this, confidence);
1143
0
    return (confidence > 0);
1144
0
}
1145
1146
CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
1147
{
1148
    // nothing to do
1149
}
1150
1151
const char *CharsetRecog_KOI8_R::getName() const
1152
0
{
1153
0
    return  "KOI8-R";
1154
0
}
1155
1156
const char *CharsetRecog_KOI8_R::getLanguage() const
1157
0
{
1158
0
    return "ru";
1159
0
}
1160
1161
UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
1162
0
{
1163
0
    int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
1164
0
    results->set(textIn, this, confidence);
1165
0
    return (confidence > 0);
1166
0
}
1167
1168
#if !UCONFIG_ONLY_HTML_CONVERSION
1169
CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
1170
{
1171
    // nothing to do
1172
}
1173
1174
const char *CharsetRecog_IBM424_he::getLanguage() const
1175
0
{
1176
0
    return "he";
1177
0
}
1178
1179
CharsetRecog_IBM424_he_rtl::~CharsetRecog_IBM424_he_rtl()
1180
{
1181
    // nothing to do
1182
}
1183
1184
const char *CharsetRecog_IBM424_he_rtl::getName() const
1185
0
{
1186
0
    return  "IBM424_rtl";
1187
0
}
1188
1189
UBool CharsetRecog_IBM424_he_rtl::match(InputText *textIn, CharsetMatch *results) const
1190
0
{
1191
0
    int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_rtl, charMap_IBM424_he);
1192
0
    results->set(textIn, this, confidence);
1193
0
    return (confidence > 0);
1194
0
}
1195
1196
CharsetRecog_IBM424_he_ltr::~CharsetRecog_IBM424_he_ltr()
1197
{
1198
    // nothing to do
1199
}
1200
1201
const char *CharsetRecog_IBM424_he_ltr::getName() const
1202
0
{
1203
0
    return  "IBM424_ltr";
1204
0
}
1205
1206
UBool CharsetRecog_IBM424_he_ltr::match(InputText *textIn, CharsetMatch *results) const
1207
0
{
1208
0
    int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_ltr, charMap_IBM424_he);
1209
0
    results->set(textIn, this, confidence);
1210
0
    return (confidence > 0);
1211
0
}
1212
1213
CharsetRecog_IBM420_ar::~CharsetRecog_IBM420_ar()
1214
{
1215
    // nothing to do
1216
}
1217
1218
const char *CharsetRecog_IBM420_ar::getLanguage() const
1219
0
{
1220
0
    return "ar";
1221
0
}
1222
1223
    
1224
int32_t CharsetRecog_IBM420_ar::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
1225
0
{
1226
0
    NGramParser_IBM420 parser(ngrams, byteMap);
1227
0
    int32_t result;
1228
0
    
1229
0
    result = parser.parse(det);
1230
0
        
1231
0
    return result;
1232
0
}
1233
1234
CharsetRecog_IBM420_ar_rtl::~CharsetRecog_IBM420_ar_rtl()
1235
{
1236
    // nothing to do
1237
}
1238
1239
const char *CharsetRecog_IBM420_ar_rtl::getName() const
1240
0
{
1241
0
    return  "IBM420_rtl";
1242
0
}
1243
1244
UBool CharsetRecog_IBM420_ar_rtl::match(InputText *textIn, CharsetMatch *results) const
1245
0
{
1246
0
    int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_rtl, charMap_IBM420_ar);
1247
0
    results->set(textIn, this, confidence);
1248
0
    return (confidence > 0);
1249
0
}
1250
1251
CharsetRecog_IBM420_ar_ltr::~CharsetRecog_IBM420_ar_ltr()
1252
{
1253
    // nothing to do
1254
}
1255
1256
const char *CharsetRecog_IBM420_ar_ltr::getName() const
1257
0
{
1258
0
    return  "IBM420_ltr";
1259
0
}
1260
1261
UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results) const
1262
0
{
1263
0
    int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420_ar);
1264
0
    results->set(textIn, this, confidence);
1265
0
    return (confidence > 0);
1266
0
}
1267
#endif
1268
1269
U_NAMESPACE_END
1270
#endif
1271