Coverage Report

Created: 2022-02-19 20:31

/src/php-src/oniguruma/src/regenc.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regenc.c -  Oniguruma (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2020  K.Kosako
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "regint.h"
31
32
0
#define LARGE_S   0x53
33
0
#define SMALL_S   0x73
34
35
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
36
37
6.08k
#define INITED_LIST_SIZE  20
38
39
static int InitedListNum;
40
41
static struct {
42
  OnigEncoding enc;
43
  int          inited;
44
} InitedList[INITED_LIST_SIZE];
45
46
static int
47
enc_inited_entry(OnigEncoding enc)
48
6.08k
{
49
6.08k
  int i;
50
51
6.08k
  for (i = 0; i < InitedListNum; i++) {
52
0
    if (InitedList[i].enc == enc) {
53
0
      InitedList[i].inited = 1;
54
0
      return i;
55
0
    }
56
0
  }
57
58
6.08k
  i = InitedListNum;
59
6.08k
  if (i < INITED_LIST_SIZE - 1) {
60
6.08k
    InitedList[i].enc    = enc;
61
6.08k
    InitedList[i].inited = 1;
62
6.08k
    InitedListNum++;
63
6.08k
    return i;
64
6.08k
  }
65
66
0
  return -1;
67
0
}
68
69
static int
70
enc_is_inited(OnigEncoding enc)
71
6.08k
{
72
6.08k
  int i;
73
74
6.08k
  for (i = 0; i < InitedListNum; i++) {
75
0
    if (InitedList[i].enc == enc) {
76
0
      return InitedList[i].inited;
77
0
    }
78
0
  }
79
80
6.08k
  return 0;
81
6.08k
}
82
83
static int OnigEncInited;
84
85
extern int
86
onigenc_init(void)
87
6.08k
{
88
6.08k
  if (OnigEncInited != 0) return 0;
89
90
6.08k
  OnigEncInited = 1;
91
6.08k
  return 0;
92
6.08k
}
93
94
extern int
95
onigenc_end(void)
96
0
{
97
0
  int i;
98
99
0
  for (i = 0; i < InitedListNum; i++) {
100
0
    InitedList[i].enc    = 0;
101
0
    InitedList[i].inited = 0;
102
0
  }
103
0
  InitedListNum = 0;
104
105
0
  OnigEncInited = 0;
106
0
  return ONIG_NORMAL;
107
0
}
108
109
extern int
110
onig_initialize_encoding(OnigEncoding enc)
111
6.08k
{
112
6.08k
  int r;
113
114
6.08k
  if (enc != ONIG_ENCODING_ASCII &&
115
0
      ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
116
0
    OnigEncoding ascii = ONIG_ENCODING_ASCII;
117
0
    if (ascii->init != 0 && enc_is_inited(ascii) == 0) {
118
0
      r = ascii->init();
119
0
      if (r != ONIG_NORMAL) return r;
120
0
      enc_inited_entry(ascii);
121
0
    }
122
0
  }
123
124
6.08k
  if (enc->init != 0 &&
125
6.08k
      enc_is_inited(enc) == 0) {
126
6.08k
    r = (enc->init)();
127
6.08k
    if (r == ONIG_NORMAL)
128
6.08k
      enc_inited_entry(enc);
129
6.08k
    return r;
130
6.08k
  }
131
132
0
  return 0;
133
0
}
134
135
extern OnigEncoding
136
onigenc_get_default_encoding(void)
137
0
{
138
0
  return OnigEncDefaultCharEncoding;
139
0
}
140
141
extern int
142
onigenc_set_default_encoding(OnigEncoding enc)
143
0
{
144
0
  OnigEncDefaultCharEncoding = enc;
145
0
  return 0;
146
0
}
147
148
extern UChar*
149
onigenc_strdup(OnigEncoding enc, const UChar* s, const UChar* end)
150
42.6k
{
151
42.6k
  int slen, term_len, i;
152
42.6k
  UChar *r;
153
154
42.6k
  slen = (int )(end - s);
155
42.6k
  term_len = ONIGENC_MBC_MINLEN(enc);
156
157
42.6k
  r = (UChar* )xmalloc(slen + term_len);
158
42.6k
  CHECK_NULL_RETURN(r);
159
42.6k
  xmemcpy(r, s, slen);
160
161
85.2k
  for (i = 0; i < term_len; i++)
162
42.6k
    r[slen + i] = (UChar )0;
163
164
42.6k
  return r;
165
42.6k
}
166
167
extern UChar*
168
onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
169
0
{
170
0
  UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
171
0
  if (p < s) {
172
0
    p += enclen(enc, p);
173
0
  }
174
0
  return p;
175
0
}
176
177
extern UChar*
178
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
179
            const UChar* start, const UChar* s, const UChar** prev)
180
0
{
181
0
  UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
182
183
0
  if (p < s) {
184
0
    if (prev) *prev = (const UChar* )p;
185
0
    p += enclen(enc, p);
186
0
  }
187
0
  else {
188
0
    if (prev)
189
0
      *prev = onigenc_get_prev_char_head(enc, start, p);
190
0
  }
191
0
  return p;
192
0
}
193
194
extern UChar*
195
onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
196
0
{
197
0
  if (s <= start)
198
0
    return (UChar* )NULL;
199
200
0
  return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
201
0
}
202
203
extern UChar*
204
onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
205
0
{
206
0
  while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
207
0
    if (s <= start)
208
0
      return (UChar* )NULL;
209
210
0
    s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
211
0
  }
212
0
  return (UChar* )s;
213
0
}
214
215
extern UChar*
216
onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
217
0
{
218
0
  UChar* q = (UChar* )p;
219
0
  while (n-- > 0) {
220
0
    q += ONIGENC_MBC_ENC_LEN(enc, q);
221
0
  }
222
0
  return (q <= end ? q : NULL);
223
0
}
224
225
extern int
226
onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
227
0
{
228
0
  int n = 0;
229
0
  UChar* q = (UChar* )p;
230
231
0
  while (q < end) {
232
0
    q += ONIGENC_MBC_ENC_LEN(enc, q);
233
0
    n++;
234
0
  }
235
0
  return n;
236
0
}
237
238
extern int
239
onigenc_strlen_null(OnigEncoding enc, const UChar* s)
240
0
{
241
0
  int n = 0;
242
0
  UChar* p = (UChar* )s;
243
244
0
  while (1) {
245
0
    if (*p == '\0') {
246
0
      UChar* q;
247
0
      int len = ONIGENC_MBC_MINLEN(enc);
248
249
0
      if (len == 1) return n;
250
0
      q = p + 1;
251
0
      while (len > 1) {
252
0
        if (*q != '\0') break;
253
0
        q++;
254
0
        len--;
255
0
      }
256
0
      if (len == 1) return n;
257
0
    }
258
0
    p += ONIGENC_MBC_ENC_LEN(enc, p);
259
0
    n++;
260
0
  }
261
0
}
262
263
extern int
264
onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
265
42.6k
{
266
42.6k
  const UChar* start = s;
267
42.6k
  const UChar* p = s;
268
269
280k
  while (1) {
270
280k
    if (*p == '\0') {
271
42.6k
      const UChar* q;
272
42.6k
      int len = ONIGENC_MBC_MINLEN(enc);
273
274
42.6k
      if (len == 1) return (int )(p - start);
275
0
      q = p + 1;
276
0
      while (len > 1) {
277
0
        if (*q != '\0') break;
278
0
        q++;
279
0
        len--;
280
0
      }
281
0
      if (len == 1) return (int )(p - start);
282
237k
    }
283
237k
    p += ONIGENC_MBC_ENC_LEN(enc, p);
284
237k
  }
285
42.6k
}
286
287
const UChar OnigEncAsciiToLowerCaseTable[] = {
288
  '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
289
  '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
290
  '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
291
  '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
292
  '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
293
  '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
294
  '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
295
  '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
296
  '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
297
  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
298
  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
299
  '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
300
  '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
301
  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
302
  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
303
  '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
304
  '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
305
  '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
306
  '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
307
  '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
308
  '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
309
  '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
310
  '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
311
  '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
312
  '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
313
  '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
314
  '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
315
  '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
316
  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
317
  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
318
  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
319
  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
320
};
321
322
#ifdef USE_UPPER_CASE_TABLE
323
const UChar OnigEncAsciiToUpperCaseTable[256] = {
324
  '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
325
  '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
326
  '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
327
  '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
328
  '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
329
  '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
330
  '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
331
  '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
332
  '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
333
  '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
334
  '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
335
  '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
336
  '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
337
  '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
338
  '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
339
  '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
340
  '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
341
  '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
342
  '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
343
  '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
344
  '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
345
  '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
346
  '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
347
  '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
348
  '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
349
  '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
350
  '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
351
  '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
352
  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
353
  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
354
  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
355
  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
356
};
357
#endif
358
359
const unsigned short OnigEncAsciiCtypeTable[256] = {
360
  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
361
  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
362
  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
363
  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
364
  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
365
  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
366
  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
367
  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
368
  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
369
  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
370
  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
371
  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
372
  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
373
  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
374
  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
375
  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
376
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
377
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
378
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
379
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
380
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
381
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
382
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
383
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
384
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
385
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
386
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
387
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
388
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
389
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
390
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
391
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
392
};
393
394
const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
395
  '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
396
  '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
397
  '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
398
  '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
399
  '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
400
  '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
401
  '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
402
  '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
403
  '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
404
  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
405
  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
406
  '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
407
  '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
408
  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
409
  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
410
  '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
411
  '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
412
  '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
413
  '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
414
  '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
415
  '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
416
  '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
417
  '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
418
  '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
419
  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
420
  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
421
  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
422
  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
423
  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
424
  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
425
  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
426
  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
427
};
428
429
#ifdef USE_UPPER_CASE_TABLE
430
const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
431
  '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
432
  '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
433
  '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
434
  '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
435
  '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
436
  '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
437
  '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
438
  '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
439
  '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
440
  '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
441
  '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
442
  '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
443
  '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
444
  '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
445
  '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
446
  '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
447
  '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
448
  '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
449
  '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
450
  '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
451
  '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
452
  '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
453
  '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
454
  '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
455
  '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
456
  '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
457
  '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
458
  '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
459
  '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
460
  '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
461
  '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
462
  '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
463
};
464
#endif
465
466
extern void
467
onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
468
0
{
469
  /* nothing */
470
  /* obsoleted. */
471
0
}
472
473
extern UChar*
474
onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
475
0
{
476
0
  return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
477
0
}
478
479
const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
480
  { 0x41, 0x61 },
481
  { 0x42, 0x62 },
482
  { 0x43, 0x63 },
483
  { 0x44, 0x64 },
484
  { 0x45, 0x65 },
485
  { 0x46, 0x66 },
486
  { 0x47, 0x67 },
487
  { 0x48, 0x68 },
488
  { 0x49, 0x69 },
489
  { 0x4a, 0x6a },
490
  { 0x4b, 0x6b },
491
  { 0x4c, 0x6c },
492
  { 0x4d, 0x6d },
493
  { 0x4e, 0x6e },
494
  { 0x4f, 0x6f },
495
  { 0x50, 0x70 },
496
  { 0x51, 0x71 },
497
  { 0x52, 0x72 },
498
  { 0x53, 0x73 },
499
  { 0x54, 0x74 },
500
  { 0x55, 0x75 },
501
  { 0x56, 0x76 },
502
  { 0x57, 0x77 },
503
  { 0x58, 0x78 },
504
  { 0x59, 0x79 },
505
  { 0x5a, 0x7a }
506
};
507
508
extern int
509
onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
510
                                  OnigApplyAllCaseFoldFunc f, void* arg)
511
0
{
512
0
  OnigCodePoint code;
513
0
  int i, r;
514
515
0
  for (i = 0;
516
0
       i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
517
0
       i++) {
518
0
    code = OnigAsciiLowerMap[i].to;
519
0
    r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
520
0
    if (r != 0) return r;
521
522
0
    code = OnigAsciiLowerMap[i].from;
523
0
    r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
524
0
    if (r != 0) return r;
525
0
  }
526
527
0
  return 0;
528
0
}
529
530
extern int
531
onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
532
    const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
533
    OnigCaseFoldCodeItem items[])
534
158k
{
535
158k
  if (0x41 <= *p && *p <= 0x5a) {
536
0
    items[0].byte_len = 1;
537
0
    items[0].code_len = 1;
538
0
    items[0].code[0] = (OnigCodePoint )(*p + 0x20);
539
0
    return 1;
540
0
  }
541
158k
  else if (0x61 <= *p && *p <= 0x7a) {
542
140k
    items[0].byte_len = 1;
543
140k
    items[0].code_len = 1;
544
140k
    items[0].code[0] = (OnigCodePoint )(*p - 0x20);
545
140k
    return 1;
546
140k
  }
547
18.2k
  else
548
18.2k
    return 0;
549
158k
}
550
551
static int
552
ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
553
                       OnigApplyAllCaseFoldFunc f, void* arg)
554
0
{
555
0
  static OnigCodePoint ss[] = { SMALL_S, SMALL_S };
556
557
0
  return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
558
0
}
559
560
extern int
561
onigenc_apply_all_case_fold_with_map(int map_size,
562
    const OnigPairCaseFoldCodes map[],
563
    int ess_tsett_flag, OnigCaseFoldType flag,
564
    OnigApplyAllCaseFoldFunc f, void* arg)
565
0
{
566
0
  OnigCodePoint code;
567
0
  int i, r;
568
569
0
  r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
570
0
  if (r != 0) return r;
571
572
0
  if (CASE_FOLD_IS_ASCII_ONLY(flag))
573
0
    return 0;
574
575
0
  for (i = 0; i < map_size; i++) {
576
0
    code = map[i].to;
577
0
    r = (*f)(map[i].from, &code, 1, arg);
578
0
    if (r != 0) return r;
579
580
0
    code = map[i].from;
581
0
    r = (*f)(map[i].to, &code, 1, arg);
582
0
    if (r != 0) return r;
583
0
  }
584
585
0
  if (ess_tsett_flag != 0)
586
0
    return ss_apply_all_case_fold(flag, f, arg);
587
588
0
  return 0;
589
0
}
590
591
extern int
592
onigenc_get_case_fold_codes_by_str_with_map(int map_size,
593
    const OnigPairCaseFoldCodes map[],
594
    int ess_tsett_flag, OnigCaseFoldType flag,
595
    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
596
0
{
597
0
  int i, j, n;
598
0
  static OnigUChar sa[] = { LARGE_S, SMALL_S };
599
600
0
  if (0x41 <= *p && *p <= 0x5a) { /* A - Z */
601
0
    if (*p == LARGE_S && ess_tsett_flag != 0 && end > p + 1
602
0
        && (*(p+1) == LARGE_S || *(p+1) == SMALL_S) /* SS */
603
0
        && CASE_FOLD_IS_NOT_ASCII_ONLY(flag)) {
604
0
    ss_combination:
605
0
      items[0].byte_len = 2;
606
0
      items[0].code_len = 1;
607
0
      items[0].code[0] = (OnigCodePoint )0xdf;
608
609
0
      n = 1;
610
0
      for (i = 0; i < 2; i++) {
611
0
        for (j = 0; j < 2; j++) {
612
0
          if (sa[i] == *p && sa[j] == *(p+1))
613
0
            continue;
614
615
0
          items[n].byte_len = 2;
616
0
          items[n].code_len = 2;
617
0
          items[n].code[0] = (OnigCodePoint )sa[i];
618
0
          items[n].code[1] = (OnigCodePoint )sa[j];
619
0
          n++;
620
0
        }
621
0
      }
622
0
      return 4;
623
0
    }
624
625
0
    items[0].byte_len = 1;
626
0
    items[0].code_len = 1;
627
0
    items[0].code[0] = (OnigCodePoint )(*p + 0x20);
628
0
    return 1;
629
0
  }
630
0
  else if (0x61 <= *p && *p <= 0x7a) { /* a - z */
631
0
    if (*p == SMALL_S && ess_tsett_flag != 0 && end > p + 1
632
0
        && (*(p+1) == SMALL_S || *(p+1) == LARGE_S)
633
0
        && CASE_FOLD_IS_NOT_ASCII_ONLY(flag)) {
634
0
      goto ss_combination;
635
0
    }
636
637
0
    items[0].byte_len = 1;
638
0
    items[0].code_len = 1;
639
0
    items[0].code[0] = (OnigCodePoint )(*p - 0x20);
640
0
    return 1;
641
0
  }
642
0
  else if (*p == 0xdf && ess_tsett_flag != 0
643
0
           && CASE_FOLD_IS_NOT_ASCII_ONLY(flag)) {
644
0
    items[0].byte_len = 1;
645
0
    items[0].code_len = 2;
646
0
    items[0].code[0] = (OnigCodePoint )'s';
647
0
    items[0].code[1] = (OnigCodePoint )'s';
648
649
0
    items[1].byte_len = 1;
650
0
    items[1].code_len = 2;
651
0
    items[1].code[0] = (OnigCodePoint )'S';
652
0
    items[1].code[1] = (OnigCodePoint )'S';
653
654
0
    items[2].byte_len = 1;
655
0
    items[2].code_len = 2;
656
0
    items[2].code[0] = (OnigCodePoint )'s';
657
0
    items[2].code[1] = (OnigCodePoint )'S';
658
659
0
    items[3].byte_len = 1;
660
0
    items[3].code_len = 2;
661
0
    items[3].code[0] = (OnigCodePoint )'S';
662
0
    items[3].code[1] = (OnigCodePoint )'s';
663
664
0
    return 4;
665
0
  }
666
0
  else {
667
0
    int i;
668
669
0
    if (CASE_FOLD_IS_ASCII_ONLY(flag))
670
0
      return 0;
671
672
0
    for (i = 0; i < map_size; i++) {
673
0
      if (*p == map[i].from) {
674
0
        items[0].byte_len = 1;
675
0
        items[0].code_len = 1;
676
0
        items[0].code[0] = map[i].to;
677
0
        return 1;
678
0
      }
679
0
      else if (*p == map[i].to) {
680
0
        items[0].byte_len = 1;
681
0
        items[0].code_len = 1;
682
0
        items[0].code[0] = map[i].from;
683
0
        return 1;
684
0
      }
685
0
    }
686
0
  }
687
688
0
  return 0;
689
0
}
690
691
692
extern int
693
onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
694
                                         OnigCodePoint* sb_out ARG_UNUSED,
695
                                         const OnigCodePoint* ranges[] ARG_UNUSED)
696
0
{
697
0
  return ONIG_NO_SUPPORT_CONFIG;
698
0
}
699
700
extern int
701
onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
702
0
{
703
0
  if (p < end) {
704
0
    if (*p == NEWLINE_CODE) return 1;
705
0
  }
706
0
  return 0;
707
0
}
708
709
/* for single byte encodings */
710
extern int
711
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
712
                            const UChar*end ARG_UNUSED, UChar* lower)
713
0
{
714
0
  *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
715
716
0
  (*p)++;
717
0
  return 1; /* return byte length of converted char to lower */
718
0
}
719
720
extern int
721
onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
722
895k
{
723
895k
  return 1;
724
895k
}
725
726
extern OnigCodePoint
727
onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
728
578k
{
729
578k
  return (OnigCodePoint )(*p);
730
578k
}
731
732
extern int
733
onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
734
280k
{
735
280k
  return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
736
280k
}
737
738
extern int
739
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
740
0
{
741
0
  *buf = (UChar )(code & 0xff);
742
0
  return 1;
743
0
}
744
745
extern UChar*
746
onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
747
                                          const UChar* s)
748
0
{
749
0
  return (UChar* )s;
750
0
}
751
752
extern int
753
onigenc_always_true_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
754
                                             const UChar* end ARG_UNUSED)
755
0
{
756
0
  return TRUE;
757
0
}
758
759
extern int
760
onigenc_always_false_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
761
                                              const UChar* end ARG_UNUSED)
762
0
{
763
0
  return FALSE;
764
0
}
765
766
extern int
767
onigenc_always_true_is_valid_mbc_string(const UChar* s   ARG_UNUSED,
768
                                        const UChar* end ARG_UNUSED)
769
18.2k
{
770
18.2k
  return TRUE;
771
18.2k
}
772
773
extern int
774
onigenc_length_check_is_valid_mbc_string(OnigEncoding enc,
775
                                         const UChar* p, const UChar* end)
776
0
{
777
0
  while (p < end) {
778
0
    p += enclen(enc, p);
779
0
  }
780
781
0
  if (p != end)
782
0
    return FALSE;
783
0
  else
784
0
    return TRUE;
785
0
}
786
787
extern int
788
onigenc_is_valid_mbc_string(OnigEncoding enc, const UChar* s, const UChar* end)
789
0
{
790
0
  return ONIGENC_IS_VALID_MBC_STRING(enc, s, end);
791
0
}
792
793
extern OnigCodePoint
794
onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
795
0
{
796
0
  int c, i, len;
797
0
  OnigCodePoint n;
798
799
0
  len = enclen(enc, p);
800
0
  n = (OnigCodePoint )(*p++);
801
0
  if (len == 1) return n;
802
803
0
  for (i = 1; i < len; i++) {
804
0
    if (p >= end) break;
805
0
    c = *p++;
806
0
    n <<= 8;  n += c;
807
0
  }
808
0
  return n;
809
0
}
810
811
extern int
812
onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
813
                          const UChar** pp, const UChar* end ARG_UNUSED,
814
                          UChar* lower)
815
0
{
816
0
  int len;
817
0
  const UChar *p = *pp;
818
819
0
  if (ONIGENC_IS_MBC_ASCII(p)) {
820
0
    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
821
0
    (*pp)++;
822
0
    return 1;
823
0
  }
824
0
  else {
825
0
    int i;
826
827
0
    len = enclen(enc, p);
828
0
    for (i = 0; i < len; i++) {
829
0
      *lower++ = *p++;
830
0
    }
831
0
    (*pp) += len;
832
0
    return len; /* return byte length of converted to lower char */
833
0
  }
834
0
}
835
836
extern int
837
onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
838
0
{
839
0
  UChar *p = buf;
840
841
0
  if ((code & 0xff00) != 0) {
842
0
    *p++ = (UChar )((code >>  8) & 0xff);
843
0
  }
844
0
  *p++ = (UChar )(code & 0xff);
845
846
0
#if 1
847
0
  if (enclen(enc, buf) != (p - buf))
848
0
    return ONIGERR_INVALID_CODE_POINT_VALUE;
849
0
#endif
850
0
  return (int )(p - buf);
851
0
}
852
853
extern int
854
onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
855
0
{
856
0
  UChar *p = buf;
857
858
0
  if ((code & 0xff000000) != 0) {
859
0
    *p++ = (UChar )((code >> 24) & 0xff);
860
0
  }
861
0
  if ((code & 0xff0000) != 0 || p != buf) {
862
0
    *p++ = (UChar )((code >> 16) & 0xff);
863
0
  }
864
0
  if ((code & 0xff00) != 0 || p != buf) {
865
0
    *p++ = (UChar )((code >> 8) & 0xff);
866
0
  }
867
0
  *p++ = (UChar )(code & 0xff);
868
869
0
#if 1
870
0
  if (enclen(enc, buf) != (p - buf))
871
0
    return ONIGERR_INVALID_CODE_POINT_VALUE;
872
0
#endif
873
0
  return (int )(p - buf);
874
0
}
875
876
extern int
877
onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
878
0
{
879
0
  static PosixBracketEntryType PBS[] = {
880
0
    { (UChar* )"Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
881
0
    { (UChar* )"Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
882
0
    { (UChar* )"Blank",  ONIGENC_CTYPE_BLANK,  5 },
883
0
    { (UChar* )"Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
884
0
    { (UChar* )"Digit",  ONIGENC_CTYPE_DIGIT,  5 },
885
0
    { (UChar* )"Graph",  ONIGENC_CTYPE_GRAPH,  5 },
886
0
    { (UChar* )"Lower",  ONIGENC_CTYPE_LOWER,  5 },
887
0
    { (UChar* )"Print",  ONIGENC_CTYPE_PRINT,  5 },
888
0
    { (UChar* )"Punct",  ONIGENC_CTYPE_PUNCT,  5 },
889
0
    { (UChar* )"Space",  ONIGENC_CTYPE_SPACE,  5 },
890
0
    { (UChar* )"Upper",  ONIGENC_CTYPE_UPPER,  5 },
891
0
    { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
892
0
    { (UChar* )"ASCII",  ONIGENC_CTYPE_ASCII,  5 },
893
0
    { (UChar* )"Word",   ONIGENC_CTYPE_WORD,   4 },
894
0
    { (UChar* )NULL, -1, 0 }
895
0
  };
896
897
0
  PosixBracketEntryType *pb;
898
0
  int len;
899
900
0
  len = onigenc_strlen(enc, p, end);
901
0
  for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
902
0
    if (len == pb->len &&
903
0
        onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
904
0
      return pb->ctype;
905
0
  }
906
907
0
  return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
908
0
}
909
910
extern int
911
onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end)
912
0
{
913
0
  OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end);
914
915
0
  if (code > ASCII_LIMIT) return 0;
916
917
0
  return ONIGENC_IS_ASCII_CODE_WORD(code);
918
0
}
919
920
extern int
921
onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
922
                          unsigned int ctype)
923
0
{
924
0
  if (code < 128)
925
0
    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
926
0
  else {
927
0
    if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
928
0
      return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
929
0
    }
930
0
  }
931
932
0
  return FALSE;
933
0
}
934
935
extern int
936
onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
937
                          unsigned int ctype)
938
0
{
939
0
  if (code < 128)
940
0
    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
941
0
  else {
942
0
    if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
943
0
      return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
944
0
    }
945
0
  }
946
947
0
  return FALSE;
948
0
}
949
950
extern int
951
onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
952
                           const UChar* sascii /* ascii */, int n)
953
0
{
954
0
  int x, c;
955
956
0
  while (n-- > 0) {
957
0
    if (p >= end) return (int )(*sascii);
958
959
0
    c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
960
0
    x = *sascii - c;
961
0
    if (x) return x;
962
963
0
    sascii++;
964
0
    p += enclen(enc, p);
965
0
  }
966
0
  return 0;
967
0
}
968
969
extern int
970
onig_codes_cmp(OnigCodePoint a[], OnigCodePoint b[], int n)
971
0
{
972
0
  int i;
973
974
0
  for (i = 0; i < n; i++) {
975
0
    if (a[i] != b[i])
976
0
      return -1;
977
0
  }
978
979
0
  return 0;
980
0
}
981
982
extern int
983
onig_codes_byte_at(OnigCodePoint codes[], int at)
984
0
{
985
0
  int index;
986
0
  int b;
987
0
  OnigCodePoint code;
988
989
0
  index = at / 3;
990
0
  b     = at % 3;
991
0
  code = codes[index];
992
993
0
  return ((code >> ((2 - b) * 8)) & 0xff);
994
0
}