Coverage Report

Created: 2023-03-29 06:15

/src/icu/icu4c/source/i18n/uregex.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*   Copyright (C) 2004-2015, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
*   file name:  uregex.cpp
9
*/
10
11
#include "unicode/utypes.h"
12
13
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
14
15
#include "unicode/regex.h"
16
#include "unicode/uregex.h"
17
#include "unicode/unistr.h"
18
#include "unicode/ustring.h"
19
#include "unicode/uchar.h"
20
#include "unicode/uobject.h"
21
#include "unicode/utf16.h"
22
#include "cmemory.h"
23
#include "uassert.h"
24
#include "uhash.h"
25
#include "umutex.h"
26
#include "uvectr32.h"
27
28
#include "regextxt.h"
29
30
U_NAMESPACE_BEGIN
31
32
0
#define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
33
34
struct RegularExpression: public UMemory {
35
public:
36
    RegularExpression();
37
    ~RegularExpression();
38
    int32_t           fMagic;
39
    RegexPattern     *fPat;
40
    u_atomic_int32_t *fPatRefCount;
41
    char16_t         *fPatString;
42
    int32_t           fPatStringLen;
43
    RegexMatcher     *fMatcher;
44
    const char16_t   *fText;         // Text from setText()
45
    int32_t           fTextLength;   // Length provided by user with setText(), which
46
                                     //  may be -1.
47
    UBool             fOwnsText;
48
};
49
50
static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
51
52
11.5k
RegularExpression::RegularExpression() {
53
11.5k
    fMagic        = REXP_MAGIC;
54
11.5k
    fPat          = nullptr;
55
11.5k
    fPatRefCount  = nullptr;
56
11.5k
    fPatString    = nullptr;
57
11.5k
    fPatStringLen = 0;
58
11.5k
    fMatcher      = nullptr;
59
11.5k
    fText         = nullptr;
60
11.5k
    fTextLength   = 0;
61
11.5k
    fOwnsText     = false;
62
11.5k
}
63
64
11.5k
RegularExpression::~RegularExpression() {
65
11.5k
    delete fMatcher;
66
11.5k
    fMatcher = nullptr;
67
11.5k
    if (fPatRefCount!=nullptr && umtx_atomic_dec(fPatRefCount)==0) {
68
11.5k
        delete fPat;
69
11.5k
        uprv_free(fPatString);
70
11.5k
        uprv_free((void *)fPatRefCount);
71
11.5k
    }
72
11.5k
    if (fOwnsText && fText!=nullptr) {
73
0
        uprv_free((void *)fText);
74
0
    }
75
11.5k
    fMagic = 0;
76
11.5k
}
77
78
U_NAMESPACE_END
79
80
U_NAMESPACE_USE
81
82
//----------------------------------------------------------------------------------------
83
//
84
//   validateRE    Do boilerplate style checks on API function parameters.
85
//                 Return true if they look OK.
86
//----------------------------------------------------------------------------------------
87
4.44k
static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
88
4.44k
    if (U_FAILURE(*status)) {
89
0
        return false;
90
0
    }
91
4.44k
    if (re == nullptr || re->fMagic != REXP_MAGIC) {
92
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
93
0
        return false;
94
0
    }
95
    // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
96
4.44k
    if (requiresText && re->fText == nullptr && !re->fOwnsText) {
97
0
        *status = U_REGEX_INVALID_STATE;
98
0
        return false;
99
0
    }
100
4.44k
    return true;
101
4.44k
}
102
103
//----------------------------------------------------------------------------------------
104
//
105
//    uregex_open
106
//
107
//----------------------------------------------------------------------------------------
108
U_CAPI URegularExpression *  U_EXPORT2
109
uregex_open( const  char16_t       *pattern,
110
                    int32_t         patternLength,
111
                    uint32_t        flags,
112
                    UParseError    *pe,
113
11.5k
                    UErrorCode     *status) {
114
115
11.5k
    if (U_FAILURE(*status)) {
116
0
        return nullptr;
117
0
    }
118
11.5k
    if (pattern == nullptr || patternLength < -1 || patternLength == 0) {
119
1
        *status = U_ILLEGAL_ARGUMENT_ERROR;
120
1
        return nullptr;
121
1
    }
122
11.5k
    int32_t actualPatLen = patternLength;
123
11.5k
    if (actualPatLen == -1) {
124
0
        actualPatLen = u_strlen(pattern);
125
0
    }
126
127
11.5k
    RegularExpression  *re     = new RegularExpression;
128
11.5k
    u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
129
11.5k
    char16_t           *patBuf = (char16_t *)uprv_malloc(sizeof(char16_t)*(actualPatLen+1));
130
11.5k
    if (re == nullptr || refC == nullptr || patBuf == nullptr) {
131
0
        *status = U_MEMORY_ALLOCATION_ERROR;
132
0
        delete re;
133
0
        uprv_free((void *)refC);
134
0
        uprv_free(patBuf);
135
0
        return nullptr;
136
0
    }
137
11.5k
    re->fPatRefCount = refC;
138
11.5k
    *re->fPatRefCount = 1;
139
140
    //
141
    // Make a copy of the pattern string, so we can return it later if asked.
142
    //    For compiling the pattern, we will use a UText wrapper around
143
    //    this local copy, to avoid making even more copies.
144
    //
145
11.5k
    re->fPatString    = patBuf;
146
11.5k
    re->fPatStringLen = patternLength;
147
11.5k
    u_memcpy(patBuf, pattern, actualPatLen);
148
11.5k
    patBuf[actualPatLen] = 0;
149
150
11.5k
    UText patText = UTEXT_INITIALIZER;
151
11.5k
    utext_openUChars(&patText, patBuf, patternLength, status);
152
153
    //
154
    // Compile the pattern
155
    //
156
11.5k
    if (pe != nullptr) {
157
11.5k
        re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
158
11.5k
    } else {
159
0
        re->fPat = RegexPattern::compile(&patText, flags, *status);
160
0
    }
161
11.5k
    utext_close(&patText);
162
163
11.5k
    if (U_FAILURE(*status)) {
164
7.08k
        goto ErrorExit;
165
7.08k
    }
166
167
    //
168
    // Create the matcher object
169
    //
170
4.44k
    re->fMatcher = re->fPat->matcher(*status);
171
4.44k
    if (U_SUCCESS(*status)) {
172
4.44k
        return (URegularExpression*)re;
173
4.44k
    }
174
175
7.08k
ErrorExit:
176
7.08k
    delete re;
177
7.08k
    return nullptr;
178
179
4.44k
}
180
181
//----------------------------------------------------------------------------------------
182
//
183
//    uregex_openUText
184
//
185
//----------------------------------------------------------------------------------------
186
U_CAPI URegularExpression *  U_EXPORT2
187
uregex_openUText(UText          *pattern,
188
                 uint32_t        flags,
189
                 UParseError    *pe,
190
0
                 UErrorCode     *status) {
191
192
0
    if (U_FAILURE(*status)) {
193
0
        return nullptr;
194
0
    }
195
0
    if (pattern == nullptr) {
196
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
197
0
        return nullptr;
198
0
    }
199
200
0
    int64_t patternNativeLength = utext_nativeLength(pattern);
201
202
0
    if (patternNativeLength == 0) {
203
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
204
0
        return nullptr;
205
0
    }
206
207
0
    RegularExpression *re     = new RegularExpression;
208
209
0
    UErrorCode lengthStatus = U_ZERO_ERROR;
210
0
    int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, nullptr, 0, &lengthStatus);
211
212
0
    u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
213
0
    char16_t           *patBuf = (char16_t *)uprv_malloc(sizeof(char16_t)*(pattern16Length+1));
214
0
    if (re == nullptr || refC == nullptr || patBuf == nullptr) {
215
0
        *status = U_MEMORY_ALLOCATION_ERROR;
216
0
        delete re;
217
0
        uprv_free((void *)refC);
218
0
        uprv_free(patBuf);
219
0
        return nullptr;
220
0
    }
221
0
    re->fPatRefCount = refC;
222
0
    *re->fPatRefCount = 1;
223
224
    //
225
    // Make a copy of the pattern string, so we can return it later if asked.
226
    //    For compiling the pattern, we will use a read-only UText wrapper
227
    //    around this local copy, to avoid making even more copies.
228
    //
229
0
    re->fPatString    = patBuf;
230
0
    re->fPatStringLen = pattern16Length;
231
0
    utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
232
233
0
    UText patText = UTEXT_INITIALIZER;
234
0
    utext_openUChars(&patText, patBuf, pattern16Length, status);
235
236
    //
237
    // Compile the pattern
238
    //
239
0
    if (pe != nullptr) {
240
0
        re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
241
0
    } else {
242
0
        re->fPat = RegexPattern::compile(&patText, flags, *status);
243
0
    }
244
0
    utext_close(&patText);
245
246
0
    if (U_FAILURE(*status)) {
247
0
        goto ErrorExit;
248
0
    }
249
250
    //
251
    // Create the matcher object
252
    //
253
0
    re->fMatcher = re->fPat->matcher(*status);
254
0
    if (U_SUCCESS(*status)) {
255
0
        return (URegularExpression*)re;
256
0
    }
257
258
0
ErrorExit:
259
0
    delete re;
260
0
    return nullptr;
261
262
0
}
263
264
//----------------------------------------------------------------------------------------
265
//
266
//    uregex_close
267
//
268
//----------------------------------------------------------------------------------------
269
U_CAPI void  U_EXPORT2
270
4.44k
uregex_close(URegularExpression  *re2) {
271
4.44k
    RegularExpression *re = (RegularExpression*)re2;
272
4.44k
    UErrorCode  status = U_ZERO_ERROR;
273
4.44k
    if (validateRE(re, false, &status) == false) {
274
0
        return;
275
0
    }
276
4.44k
    delete re;
277
4.44k
}
278
279
280
//----------------------------------------------------------------------------------------
281
//
282
//    uregex_clone
283
//
284
//----------------------------------------------------------------------------------------
285
U_CAPI URegularExpression * U_EXPORT2
286
0
uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
287
0
    RegularExpression *source = (RegularExpression*)source2;
288
0
    if (validateRE(source, false, status) == false) {
289
0
        return nullptr;
290
0
    }
291
292
0
    RegularExpression *clone = new RegularExpression;
293
0
    if (clone == nullptr) {
294
0
        *status = U_MEMORY_ALLOCATION_ERROR;
295
0
        return nullptr;
296
0
    }
297
298
0
    clone->fMatcher = source->fPat->matcher(*status);
299
0
    if (U_FAILURE(*status)) {
300
0
        delete clone;
301
0
        return nullptr;
302
0
    }
303
304
0
    clone->fPat          = source->fPat;
305
0
    clone->fPatRefCount  = source->fPatRefCount;
306
0
    clone->fPatString    = source->fPatString;
307
0
    clone->fPatStringLen = source->fPatStringLen;
308
0
    umtx_atomic_inc(source->fPatRefCount);
309
    // Note:  fText is not cloned.
310
311
0
    return (URegularExpression*)clone;
312
0
}
313
314
315
316
317
//------------------------------------------------------------------------------
318
//
319
//    uregex_pattern
320
//
321
//------------------------------------------------------------------------------
322
U_CAPI const char16_t * U_EXPORT2
323
uregex_pattern(const  URegularExpression *regexp2,
324
                      int32_t            *patLength,
325
0
                      UErrorCode         *status)  {
326
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
327
328
0
    if (validateRE(regexp, false, status) == false) {
329
0
        return nullptr;
330
0
    }
331
0
    if (patLength != nullptr) {
332
0
        *patLength = regexp->fPatStringLen;
333
0
    }
334
0
    return regexp->fPatString;
335
0
}
336
337
338
//------------------------------------------------------------------------------
339
//
340
//    uregex_patternUText
341
//
342
//------------------------------------------------------------------------------
343
U_CAPI UText * U_EXPORT2
344
uregex_patternUText(const URegularExpression *regexp2,
345
0
                          UErrorCode         *status)  {
346
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
347
0
    return regexp->fPat->patternText(*status);
348
0
}
349
350
351
//------------------------------------------------------------------------------
352
//
353
//    uregex_flags
354
//
355
//------------------------------------------------------------------------------
356
U_CAPI int32_t U_EXPORT2
357
0
uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
358
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
359
0
    if (validateRE(regexp, false, status) == false) {
360
0
        return 0;
361
0
    }
362
0
    int32_t flags = regexp->fPat->flags();
363
0
    return flags;
364
0
}
365
366
367
//------------------------------------------------------------------------------
368
//
369
//    uregex_setText
370
//
371
//------------------------------------------------------------------------------
372
U_CAPI void U_EXPORT2
373
uregex_setText(URegularExpression *regexp2,
374
               const char16_t     *text,
375
               int32_t             textLength,
376
0
               UErrorCode         *status)  {
377
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
378
0
    if (validateRE(regexp, false, status) == false) {
379
0
        return;
380
0
    }
381
0
    if (text == nullptr || textLength < -1) {
382
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
383
0
        return;
384
0
    }
385
386
0
    if (regexp->fOwnsText && regexp->fText != nullptr) {
387
0
        uprv_free((void *)regexp->fText);
388
0
    }
389
390
0
    regexp->fText       = text;
391
0
    regexp->fTextLength = textLength;
392
0
    regexp->fOwnsText   = false;
393
394
0
    UText input = UTEXT_INITIALIZER;
395
0
    utext_openUChars(&input, text, textLength, status);
396
0
    regexp->fMatcher->reset(&input);
397
0
    utext_close(&input); // reset() made a shallow clone, so we don't need this copy
398
0
}
399
400
401
//------------------------------------------------------------------------------
402
//
403
//    uregex_setUText
404
//
405
//------------------------------------------------------------------------------
406
U_CAPI void U_EXPORT2
407
uregex_setUText(URegularExpression *regexp2,
408
                UText              *text,
409
0
                UErrorCode         *status) {
410
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
411
0
    if (validateRE(regexp, false, status) == false) {
412
0
        return;
413
0
    }
414
0
    if (text == nullptr) {
415
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
416
0
        return;
417
0
    }
418
419
0
    if (regexp->fOwnsText && regexp->fText != nullptr) {
420
0
        uprv_free((void *)regexp->fText);
421
0
    }
422
423
0
    regexp->fText       = nullptr; // only fill it in on request
424
0
    regexp->fTextLength = -1;
425
0
    regexp->fOwnsText   = true;
426
0
    regexp->fMatcher->reset(text);
427
0
}
428
429
430
431
//------------------------------------------------------------------------------
432
//
433
//    uregex_getText
434
//
435
//------------------------------------------------------------------------------
436
U_CAPI const char16_t * U_EXPORT2
437
uregex_getText(URegularExpression *regexp2,
438
               int32_t            *textLength,
439
0
               UErrorCode         *status)  {
440
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
441
0
    if (validateRE(regexp, false, status) == false) {
442
0
        return nullptr;
443
0
    }
444
445
0
    if (regexp->fText == nullptr) {
446
        // need to fill in the text
447
0
        UText *inputText = regexp->fMatcher->inputText();
448
0
        int64_t inputNativeLength = utext_nativeLength(inputText);
449
0
        if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
450
0
            regexp->fText = inputText->chunkContents;
451
0
            regexp->fTextLength = (int32_t)inputNativeLength;
452
0
            regexp->fOwnsText = false; // because the UText owns it
453
0
        } else {
454
0
            UErrorCode lengthStatus = U_ZERO_ERROR;
455
0
            regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, nullptr, 0, &lengthStatus); // buffer overflow error
456
0
            char16_t *inputChars = (char16_t *)uprv_malloc(sizeof(char16_t)*(regexp->fTextLength+1));
457
458
0
            utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
459
0
            regexp->fText = inputChars;
460
0
            regexp->fOwnsText = true; // should already be set but just in case
461
0
        }
462
0
    }
463
464
0
    if (textLength != nullptr) {
465
0
        *textLength = regexp->fTextLength;
466
0
    }
467
0
    return regexp->fText;
468
0
}
469
470
471
//------------------------------------------------------------------------------
472
//
473
//    uregex_getUText
474
//
475
//------------------------------------------------------------------------------
476
U_CAPI UText * U_EXPORT2
477
uregex_getUText(URegularExpression *regexp2,
478
                UText              *dest,
479
0
                UErrorCode         *status)  {
480
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
481
0
    if (validateRE(regexp, false, status) == false) {
482
0
        return dest;
483
0
    }
484
0
    return regexp->fMatcher->getInput(dest, *status);
485
0
}
486
487
488
//------------------------------------------------------------------------------
489
//
490
//    uregex_refreshUText
491
//
492
//------------------------------------------------------------------------------
493
U_CAPI void U_EXPORT2
494
uregex_refreshUText(URegularExpression *regexp2,
495
                    UText              *text,
496
0
                    UErrorCode         *status) {
497
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
498
0
    if (validateRE(regexp, false, status) == false) {
499
0
        return;
500
0
    }
501
0
    regexp->fMatcher->refreshInputText(text, *status);
502
0
}
503
504
505
//------------------------------------------------------------------------------
506
//
507
//    uregex_matches
508
//
509
//------------------------------------------------------------------------------
510
U_CAPI UBool U_EXPORT2
511
uregex_matches(URegularExpression *regexp2,
512
               int32_t            startIndex,
513
0
               UErrorCode        *status)  {
514
0
    return uregex_matches64( regexp2, (int64_t)startIndex, status);
515
0
}
516
517
U_CAPI UBool U_EXPORT2
518
uregex_matches64(URegularExpression *regexp2,
519
                 int64_t            startIndex,
520
0
                 UErrorCode        *status)  {
521
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
522
0
    UBool result = false;
523
0
    if (validateRE(regexp, true, status) == false) {
524
0
        return result;
525
0
    }
526
0
    if (startIndex == -1) {
527
0
        result = regexp->fMatcher->matches(*status);
528
0
    } else {
529
0
        result = regexp->fMatcher->matches(startIndex, *status);
530
0
    }
531
0
    return result;
532
0
}
533
534
535
//------------------------------------------------------------------------------
536
//
537
//    uregex_lookingAt
538
//
539
//------------------------------------------------------------------------------
540
U_CAPI UBool U_EXPORT2
541
uregex_lookingAt(URegularExpression *regexp2,
542
                 int32_t             startIndex,
543
0
                 UErrorCode         *status)  {
544
0
    return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
545
0
}
546
547
U_CAPI UBool U_EXPORT2
548
uregex_lookingAt64(URegularExpression *regexp2,
549
                   int64_t             startIndex,
550
0
                   UErrorCode         *status)  {
551
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
552
0
    UBool result = false;
553
0
    if (validateRE(regexp, true, status) == false) {
554
0
        return result;
555
0
    }
556
0
    if (startIndex == -1) {
557
0
        result = regexp->fMatcher->lookingAt(*status);
558
0
    } else {
559
0
        result = regexp->fMatcher->lookingAt(startIndex, *status);
560
0
    }
561
0
    return result;
562
0
}
563
564
565
566
//------------------------------------------------------------------------------
567
//
568
//    uregex_find
569
//
570
//------------------------------------------------------------------------------
571
U_CAPI UBool U_EXPORT2
572
uregex_find(URegularExpression *regexp2,
573
            int32_t             startIndex,
574
0
            UErrorCode         *status)  {
575
0
    return uregex_find64( regexp2, (int64_t)startIndex, status);
576
0
}
577
578
U_CAPI UBool U_EXPORT2
579
uregex_find64(URegularExpression *regexp2,
580
              int64_t             startIndex,
581
0
              UErrorCode         *status)  {
582
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
583
0
    UBool result = false;
584
0
    if (validateRE(regexp, true, status) == false) {
585
0
        return result;
586
0
    }
587
0
    if (startIndex == -1) {
588
0
        regexp->fMatcher->resetPreserveRegion();
589
0
        result = regexp->fMatcher->find(*status);
590
0
    } else {
591
0
        result = regexp->fMatcher->find(startIndex, *status);
592
0
    }
593
0
    return result;
594
0
}
595
596
597
//------------------------------------------------------------------------------
598
//
599
//    uregex_findNext
600
//
601
//------------------------------------------------------------------------------
602
U_CAPI UBool U_EXPORT2
603
uregex_findNext(URegularExpression *regexp2,
604
0
                UErrorCode         *status)  {
605
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
606
0
    if (validateRE(regexp, true, status) == false) {
607
0
        return false;
608
0
    }
609
0
    UBool result = regexp->fMatcher->find(*status);
610
0
    return result;
611
0
}
612
613
//------------------------------------------------------------------------------
614
//
615
//    uregex_groupCount
616
//
617
//------------------------------------------------------------------------------
618
U_CAPI int32_t U_EXPORT2
619
uregex_groupCount(URegularExpression *regexp2,
620
0
                  UErrorCode         *status)  {
621
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
622
0
    if (validateRE(regexp, false, status) == false) {
623
0
        return 0;
624
0
    }
625
0
    int32_t  result = regexp->fMatcher->groupCount();
626
0
    return result;
627
0
}
628
629
630
//------------------------------------------------------------------------------
631
//
632
//    uregex_groupNumberFromName
633
//
634
//------------------------------------------------------------------------------
635
int32_t
636
uregex_groupNumberFromName(URegularExpression *regexp2,
637
                           const char16_t     *groupName,
638
                           int32_t             nameLength,
639
0
                           UErrorCode          *status) {
640
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
641
0
    if (validateRE(regexp, false, status) == false) {
642
0
        return 0;
643
0
    }
644
0
    int32_t  result = regexp->fPat->groupNumberFromName(UnicodeString(groupName, nameLength), *status);
645
0
    return result;
646
0
}
647
648
int32_t
649
uregex_groupNumberFromCName(URegularExpression *regexp2,
650
                            const char         *groupName,
651
                            int32_t             nameLength,
652
0
                            UErrorCode          *status) {
653
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
654
0
    if (validateRE(regexp, false, status) == false) {
655
0
        return 0;
656
0
    }
657
0
    return regexp->fPat->groupNumberFromName(groupName, nameLength, *status);
658
0
}
659
660
//------------------------------------------------------------------------------
661
//
662
//    uregex_group
663
//
664
//------------------------------------------------------------------------------
665
U_CAPI int32_t U_EXPORT2
666
uregex_group(URegularExpression *regexp2,
667
             int32_t             groupNum,
668
             char16_t           *dest,
669
             int32_t             destCapacity,
670
0
             UErrorCode          *status)  {
671
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
672
0
    if (validateRE(regexp, true, status) == false) {
673
0
        return 0;
674
0
    }
675
0
    if (destCapacity < 0 || (destCapacity > 0 && dest == nullptr)) {
676
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
677
0
        return 0;
678
0
    }
679
680
0
    if (destCapacity == 0 || regexp->fText != nullptr) {
681
        // If preflighting or if we already have the text as UChars,
682
        // this is a little cheaper than extracting from the UText
683
684
        //
685
        // Pick up the range of characters from the matcher
686
        //
687
0
        int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
688
0
        int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
689
0
        if (U_FAILURE(*status)) {
690
0
            return 0;
691
0
        }
692
693
        //
694
        // Trim length based on buffer capacity
695
        //
696
0
        int32_t fullLength = endIx - startIx;
697
0
        int32_t copyLength = fullLength;
698
0
        if (copyLength < destCapacity) {
699
0
            dest[copyLength] = 0;
700
0
        } else if (copyLength == destCapacity) {
701
0
            *status = U_STRING_NOT_TERMINATED_WARNING;
702
0
        } else {
703
0
            copyLength = destCapacity;
704
0
            *status = U_BUFFER_OVERFLOW_ERROR;
705
0
        }
706
707
        //
708
        // Copy capture group to user's buffer
709
        //
710
0
        if (copyLength > 0) {
711
0
            u_memcpy(dest, &regexp->fText[startIx], copyLength);
712
0
        }
713
0
        return fullLength;
714
0
    } else {
715
0
        int64_t  start = regexp->fMatcher->start64(groupNum, *status);
716
0
        int64_t  limit = regexp->fMatcher->end64(groupNum, *status);
717
0
        if (U_FAILURE(*status)) {
718
0
            return 0;
719
0
        }
720
        // Note edge cases:
721
        //   Group didn't match: start == end == -1. UText trims to 0, UText gives zero length result.
722
        //   Zero Length Match: start == end.
723
0
        int32_t length = utext_extract(regexp->fMatcher->inputText(), start, limit, dest, destCapacity, status);
724
0
        return length;
725
0
    }
726
727
0
}
728
729
730
//------------------------------------------------------------------------------
731
//
732
//    uregex_groupUText
733
//
734
//------------------------------------------------------------------------------
735
U_CAPI UText * U_EXPORT2
736
uregex_groupUText(URegularExpression *regexp2,
737
                  int32_t             groupNum,
738
                  UText              *dest,
739
                  int64_t            *groupLength,
740
0
                  UErrorCode         *status)  {
741
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
742
0
    if (validateRE(regexp, true, status) == false) {
743
0
        UErrorCode emptyTextStatus = U_ZERO_ERROR;
744
0
        return (dest ? dest : utext_openUChars(nullptr, nullptr, 0, &emptyTextStatus));
745
0
    }
746
747
0
    return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
748
0
}
749
750
//------------------------------------------------------------------------------
751
//
752
//    uregex_start
753
//
754
//------------------------------------------------------------------------------
755
U_CAPI int32_t U_EXPORT2
756
uregex_start(URegularExpression *regexp2,
757
             int32_t             groupNum,
758
0
             UErrorCode          *status)  {
759
0
    return (int32_t)uregex_start64( regexp2, groupNum, status);
760
0
}
761
762
U_CAPI int64_t U_EXPORT2
763
uregex_start64(URegularExpression *regexp2,
764
               int32_t             groupNum,
765
0
               UErrorCode          *status)  {
766
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
767
0
    if (validateRE(regexp, true, status) == false) {
768
0
        return 0;
769
0
    }
770
0
    int64_t result = regexp->fMatcher->start64(groupNum, *status);
771
0
    return result;
772
0
}
773
774
//------------------------------------------------------------------------------
775
//
776
//    uregex_end
777
//
778
//------------------------------------------------------------------------------
779
U_CAPI int32_t U_EXPORT2
780
uregex_end(URegularExpression   *regexp2,
781
           int32_t               groupNum,
782
0
           UErrorCode           *status)  {
783
0
    return (int32_t)uregex_end64( regexp2, groupNum, status);
784
0
}
785
786
U_CAPI int64_t U_EXPORT2
787
uregex_end64(URegularExpression   *regexp2,
788
             int32_t               groupNum,
789
0
             UErrorCode           *status)  {
790
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
791
0
    if (validateRE(regexp, true, status) == false) {
792
0
        return 0;
793
0
    }
794
0
    int64_t result = regexp->fMatcher->end64(groupNum, *status);
795
0
    return result;
796
0
}
797
798
//------------------------------------------------------------------------------
799
//
800
//    uregex_reset
801
//
802
//------------------------------------------------------------------------------
803
U_CAPI void U_EXPORT2
804
uregex_reset(URegularExpression    *regexp2,
805
             int32_t               index,
806
0
             UErrorCode            *status)  {
807
0
    uregex_reset64( regexp2, (int64_t)index, status);
808
0
}
809
810
U_CAPI void U_EXPORT2
811
uregex_reset64(URegularExpression    *regexp2,
812
               int64_t               index,
813
0
               UErrorCode            *status)  {
814
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
815
0
    if (validateRE(regexp, true, status) == false) {
816
0
        return;
817
0
    }
818
0
    regexp->fMatcher->reset(index, *status);
819
0
}
820
821
822
//------------------------------------------------------------------------------
823
//
824
//    uregex_setRegion
825
//
826
//------------------------------------------------------------------------------
827
U_CAPI void U_EXPORT2
828
uregex_setRegion(URegularExpression   *regexp2,
829
                 int32_t               regionStart,
830
                 int32_t               regionLimit,
831
0
                 UErrorCode           *status)  {
832
0
    uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
833
0
}
834
835
U_CAPI void U_EXPORT2
836
uregex_setRegion64(URegularExpression   *regexp2,
837
                   int64_t               regionStart,
838
                   int64_t               regionLimit,
839
0
                   UErrorCode           *status)  {
840
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
841
0
    if (validateRE(regexp, true, status) == false) {
842
0
        return;
843
0
    }
844
0
    regexp->fMatcher->region(regionStart, regionLimit, *status);
845
0
}
846
847
848
//------------------------------------------------------------------------------
849
//
850
//    uregex_setRegionAndStart
851
//
852
//------------------------------------------------------------------------------
853
U_CAPI void U_EXPORT2
854
uregex_setRegionAndStart(URegularExpression   *regexp2,
855
                 int64_t               regionStart,
856
                 int64_t               regionLimit,
857
                 int64_t               startIndex,
858
0
                 UErrorCode           *status)  {
859
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
860
0
    if (validateRE(regexp, true, status) == false) {
861
0
        return;
862
0
    }
863
0
    regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
864
0
}
865
866
//------------------------------------------------------------------------------
867
//
868
//    uregex_regionStart
869
//
870
//------------------------------------------------------------------------------
871
U_CAPI int32_t U_EXPORT2
872
uregex_regionStart(const  URegularExpression   *regexp2,
873
0
                          UErrorCode           *status)  {
874
0
    return (int32_t)uregex_regionStart64(regexp2, status);
875
0
}
876
877
U_CAPI int64_t U_EXPORT2
878
uregex_regionStart64(const  URegularExpression   *regexp2,
879
0
                            UErrorCode           *status)  {
880
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
881
0
    if (validateRE(regexp, true, status) == false) {
882
0
        return 0;
883
0
    }
884
0
    return regexp->fMatcher->regionStart();
885
0
}
886
887
888
//------------------------------------------------------------------------------
889
//
890
//    uregex_regionEnd
891
//
892
//------------------------------------------------------------------------------
893
U_CAPI int32_t U_EXPORT2
894
uregex_regionEnd(const  URegularExpression   *regexp2,
895
0
                        UErrorCode           *status)  {
896
0
    return (int32_t)uregex_regionEnd64(regexp2, status);
897
0
}
898
899
U_CAPI int64_t U_EXPORT2
900
uregex_regionEnd64(const  URegularExpression   *regexp2,
901
0
                          UErrorCode           *status)  {
902
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
903
0
    if (validateRE(regexp, true, status) == false) {
904
0
        return 0;
905
0
    }
906
0
    return regexp->fMatcher->regionEnd();
907
0
}
908
909
910
//------------------------------------------------------------------------------
911
//
912
//    uregex_hasTransparentBounds
913
//
914
//------------------------------------------------------------------------------
915
U_CAPI UBool U_EXPORT2
916
uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
917
0
                                   UErrorCode           *status)  {
918
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
919
0
    if (validateRE(regexp, false, status) == false) {
920
0
        return false;
921
0
    }
922
0
    return regexp->fMatcher->hasTransparentBounds();
923
0
}
924
925
926
//------------------------------------------------------------------------------
927
//
928
//    uregex_useTransparentBounds
929
//
930
//------------------------------------------------------------------------------
931
U_CAPI void U_EXPORT2
932
uregex_useTransparentBounds(URegularExpression    *regexp2,
933
                            UBool                  b,
934
0
                            UErrorCode            *status)  {
935
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
936
0
    if (validateRE(regexp, false, status) == false) {
937
0
        return;
938
0
    }
939
0
    regexp->fMatcher->useTransparentBounds(b);
940
0
}
941
942
943
//------------------------------------------------------------------------------
944
//
945
//    uregex_hasAnchoringBounds
946
//
947
//------------------------------------------------------------------------------
948
U_CAPI UBool U_EXPORT2
949
uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
950
0
                                 UErrorCode           *status)  {
951
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
952
0
    if (validateRE(regexp, false, status) == false) {
953
0
        return false;
954
0
    }
955
0
    return regexp->fMatcher->hasAnchoringBounds();
956
0
}
957
958
959
//------------------------------------------------------------------------------
960
//
961
//    uregex_useAnchoringBounds
962
//
963
//------------------------------------------------------------------------------
964
U_CAPI void U_EXPORT2
965
uregex_useAnchoringBounds(URegularExpression    *regexp2,
966
                          UBool                  b,
967
0
                          UErrorCode            *status)  {
968
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
969
0
    if (validateRE(regexp, false, status) == false) {
970
0
        return;
971
0
    }
972
0
    regexp->fMatcher->useAnchoringBounds(b);
973
0
}
974
975
976
//------------------------------------------------------------------------------
977
//
978
//    uregex_hitEnd
979
//
980
//------------------------------------------------------------------------------
981
U_CAPI UBool U_EXPORT2
982
uregex_hitEnd(const  URegularExpression   *regexp2,
983
0
                     UErrorCode           *status)  {
984
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
985
0
    if (validateRE(regexp, true, status) == false) {
986
0
        return false;
987
0
    }
988
0
    return regexp->fMatcher->hitEnd();
989
0
}
990
991
992
//------------------------------------------------------------------------------
993
//
994
//    uregex_requireEnd
995
//
996
//------------------------------------------------------------------------------
997
U_CAPI UBool U_EXPORT2
998
uregex_requireEnd(const  URegularExpression   *regexp2,
999
0
                         UErrorCode           *status)  {
1000
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1001
0
    if (validateRE(regexp, true, status) == false) {
1002
0
        return false;
1003
0
    }
1004
0
    return regexp->fMatcher->requireEnd();
1005
0
}
1006
1007
1008
//------------------------------------------------------------------------------
1009
//
1010
//    uregex_setTimeLimit
1011
//
1012
//------------------------------------------------------------------------------
1013
U_CAPI void U_EXPORT2
1014
uregex_setTimeLimit(URegularExpression   *regexp2,
1015
                    int32_t               limit,
1016
0
                    UErrorCode           *status) {
1017
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1018
0
    if (validateRE(regexp, false, status)) {
1019
0
        regexp->fMatcher->setTimeLimit(limit, *status);
1020
0
    }
1021
0
}
1022
1023
1024
1025
//------------------------------------------------------------------------------
1026
//
1027
//    uregex_getTimeLimit
1028
//
1029
//------------------------------------------------------------------------------
1030
U_CAPI int32_t U_EXPORT2
1031
uregex_getTimeLimit(const  URegularExpression   *regexp2,
1032
0
                           UErrorCode           *status) {
1033
0
    int32_t retVal = 0;
1034
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1035
0
    if (validateRE(regexp, false, status)) {
1036
0
        retVal = regexp->fMatcher->getTimeLimit();
1037
0
    }
1038
0
    return retVal;
1039
0
}
1040
1041
1042
1043
//------------------------------------------------------------------------------
1044
//
1045
//    uregex_setStackLimit
1046
//
1047
//------------------------------------------------------------------------------
1048
U_CAPI void U_EXPORT2
1049
uregex_setStackLimit(URegularExpression   *regexp2,
1050
                     int32_t               limit,
1051
0
                     UErrorCode           *status) {
1052
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1053
0
    if (validateRE(regexp, false, status)) {
1054
0
        regexp->fMatcher->setStackLimit(limit, *status);
1055
0
    }
1056
0
}
1057
1058
1059
1060
//------------------------------------------------------------------------------
1061
//
1062
//    uregex_getStackLimit
1063
//
1064
//------------------------------------------------------------------------------
1065
U_CAPI int32_t U_EXPORT2
1066
uregex_getStackLimit(const  URegularExpression   *regexp2,
1067
0
                            UErrorCode           *status) {
1068
0
    int32_t retVal = 0;
1069
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1070
0
    if (validateRE(regexp, false, status)) {
1071
0
        retVal = regexp->fMatcher->getStackLimit();
1072
0
    }
1073
0
    return retVal;
1074
0
}
1075
1076
1077
//------------------------------------------------------------------------------
1078
//
1079
//    uregex_setMatchCallback
1080
//
1081
//------------------------------------------------------------------------------
1082
U_CAPI void U_EXPORT2
1083
uregex_setMatchCallback(URegularExpression      *regexp2,
1084
                        URegexMatchCallback     *callback,
1085
                        const void              *context,
1086
0
                        UErrorCode              *status) {
1087
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1088
0
    if (validateRE(regexp, false, status)) {
1089
0
        regexp->fMatcher->setMatchCallback(callback, context, *status);
1090
0
    }
1091
0
}
1092
1093
1094
//------------------------------------------------------------------------------
1095
//
1096
//    uregex_getMatchCallback
1097
//
1098
//------------------------------------------------------------------------------
1099
U_CAPI void U_EXPORT2
1100
uregex_getMatchCallback(const URegularExpression    *regexp2,
1101
                        URegexMatchCallback        **callback,
1102
                        const void                 **context,
1103
0
                        UErrorCode                  *status) {
1104
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1105
0
     if (validateRE(regexp, false, status)) {
1106
0
         regexp->fMatcher->getMatchCallback(*callback, *context, *status);
1107
0
     }
1108
0
}
1109
1110
1111
//------------------------------------------------------------------------------
1112
//
1113
//    uregex_setMatchProgressCallback
1114
//
1115
//------------------------------------------------------------------------------
1116
U_CAPI void U_EXPORT2
1117
uregex_setFindProgressCallback(URegularExpression              *regexp2,
1118
                                URegexFindProgressCallback      *callback,
1119
                                const void                      *context,
1120
0
                                UErrorCode                      *status) {
1121
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1122
0
    if (validateRE(regexp, false, status)) {
1123
0
        regexp->fMatcher->setFindProgressCallback(callback, context, *status);
1124
0
    }
1125
0
}
1126
1127
1128
//------------------------------------------------------------------------------
1129
//
1130
//    uregex_getMatchCallback
1131
//
1132
//------------------------------------------------------------------------------
1133
U_CAPI void U_EXPORT2
1134
uregex_getFindProgressCallback(const URegularExpression          *regexp2,
1135
                                URegexFindProgressCallback        **callback,
1136
                                const void                        **context,
1137
0
                                UErrorCode                        *status) {
1138
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1139
0
     if (validateRE(regexp, false, status)) {
1140
0
         regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
1141
0
     }
1142
0
}
1143
1144
1145
//------------------------------------------------------------------------------
1146
//
1147
//    uregex_replaceAll
1148
//
1149
//------------------------------------------------------------------------------
1150
U_CAPI int32_t U_EXPORT2
1151
uregex_replaceAll(URegularExpression    *regexp2,
1152
                  const char16_t        *replacementText,
1153
                  int32_t                replacementLength,
1154
                  char16_t              *destBuf,
1155
                  int32_t                destCapacity,
1156
0
                  UErrorCode            *status)  {
1157
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1158
0
    if (validateRE(regexp, true, status) == false) {
1159
0
        return 0;
1160
0
    }
1161
0
    if (replacementText == nullptr || replacementLength < -1 ||
1162
0
        (destBuf == nullptr && destCapacity > 0) ||
1163
0
        destCapacity < 0) {
1164
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
1165
0
        return 0;
1166
0
    }
1167
1168
0
    int32_t   len = 0;
1169
1170
0
    uregex_reset(regexp2, 0, status);
1171
1172
    // Note: Separate error code variables for findNext() and appendReplacement()
1173
    //       are used so that destination buffer overflow errors
1174
    //       in appendReplacement won't stop findNext() from working.
1175
    //       appendReplacement() and appendTail() special case incoming buffer
1176
    //       overflow errors, continuing to return the correct length.
1177
0
    UErrorCode  findStatus = *status;
1178
0
    while (uregex_findNext(regexp2, &findStatus)) {
1179
0
        len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
1180
0
                                        &destBuf, &destCapacity, status);
1181
0
    }
1182
0
    len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
1183
1184
0
    if (U_FAILURE(findStatus)) {
1185
        // If anything went wrong with the findNext(), make that error trump
1186
        //   whatever may have happened with the append() operations.
1187
        //   Errors in findNext() are not expected.
1188
0
        *status = findStatus;
1189
0
    }
1190
1191
0
    return len;
1192
0
}
1193
1194
1195
//------------------------------------------------------------------------------
1196
//
1197
//    uregex_replaceAllUText
1198
//
1199
//------------------------------------------------------------------------------
1200
U_CAPI UText * U_EXPORT2
1201
uregex_replaceAllUText(URegularExpression    *regexp2,
1202
                       UText                 *replacementText,
1203
                       UText                 *dest,
1204
0
                       UErrorCode            *status)  {
1205
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1206
0
    if (validateRE(regexp, true, status) == false) {
1207
0
        return 0;
1208
0
    }
1209
0
    if (replacementText == nullptr) {
1210
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
1211
0
        return 0;
1212
0
    }
1213
1214
0
    dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
1215
0
    return dest;
1216
0
}
1217
1218
1219
//------------------------------------------------------------------------------
1220
//
1221
//    uregex_replaceFirst
1222
//
1223
//------------------------------------------------------------------------------
1224
U_CAPI int32_t U_EXPORT2
1225
uregex_replaceFirst(URegularExpression  *regexp2,
1226
                    const char16_t      *replacementText,
1227
                    int32_t              replacementLength,
1228
                    char16_t            *destBuf,
1229
                    int32_t              destCapacity,
1230
0
                    UErrorCode          *status)  {
1231
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1232
0
    if (validateRE(regexp, true, status) == false) {
1233
0
        return 0;
1234
0
    }
1235
0
    if (replacementText == nullptr || replacementLength < -1 ||
1236
0
        (destBuf == nullptr && destCapacity > 0) ||
1237
0
        destCapacity < 0) {
1238
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
1239
0
        return 0;
1240
0
    }
1241
1242
0
    int32_t   len = 0;
1243
0
    UBool     findSucceeded;
1244
0
    uregex_reset(regexp2, 0, status);
1245
0
    findSucceeded = uregex_find(regexp2, 0, status);
1246
0
    if (findSucceeded) {
1247
0
        len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
1248
0
                                       &destBuf, &destCapacity, status);
1249
0
    }
1250
0
    len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
1251
1252
0
    return len;
1253
0
}
1254
1255
1256
//------------------------------------------------------------------------------
1257
//
1258
//    uregex_replaceFirstUText
1259
//
1260
//------------------------------------------------------------------------------
1261
U_CAPI UText * U_EXPORT2
1262
uregex_replaceFirstUText(URegularExpression  *regexp2,
1263
                         UText                 *replacementText,
1264
                         UText                 *dest,
1265
0
                         UErrorCode            *status)  {
1266
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1267
0
    if (validateRE(regexp, true, status) == false) {
1268
0
        return 0;
1269
0
    }
1270
0
    if (replacementText == nullptr) {
1271
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
1272
0
        return 0;
1273
0
    }
1274
1275
0
    dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
1276
0
    return dest;
1277
0
}
1278
1279
1280
//------------------------------------------------------------------------------
1281
//
1282
//    uregex_appendReplacement
1283
//
1284
//------------------------------------------------------------------------------
1285
1286
U_NAMESPACE_BEGIN
1287
//
1288
//  Dummy class, because these functions need to be friends of class RegexMatcher,
1289
//               and stand-alone C functions don't work as friends
1290
//
1291
class RegexCImpl {
1292
 public:
1293
   inline static  int32_t appendReplacement(RegularExpression    *regexp,
1294
                      const char16_t        *replacementText,
1295
                      int32_t                replacementLength,
1296
                      char16_t             **destBuf,
1297
                      int32_t               *destCapacity,
1298
                      UErrorCode            *status);
1299
1300
   inline static int32_t appendTail(RegularExpression    *regexp,
1301
        char16_t             **destBuf,
1302
        int32_t               *destCapacity,
1303
        UErrorCode            *status);
1304
1305
    inline static int32_t split(RegularExpression    *regexp,
1306
        char16_t              *destBuf,
1307
        int32_t                destCapacity,
1308
        int32_t               *requiredCapacity,
1309
        char16_t              *destFields[],
1310
        int32_t                destFieldsCapacity,
1311
        UErrorCode            *status);
1312
};
1313
1314
U_NAMESPACE_END
1315
1316
1317
1318
static const char16_t BACKSLASH  = 0x5c;
1319
static const char16_t DOLLARSIGN = 0x24;
1320
static const char16_t LEFTBRACKET = 0x7b;
1321
static const char16_t RIGHTBRACKET = 0x7d;
1322
1323
//
1324
//  Move a character to an output buffer, with bounds checking on the index.
1325
//      Index advances even if capacity is exceeded, for preflight size computations.
1326
//      This little sequence is used a LOT.
1327
//
1328
0
static inline void appendToBuf(char16_t c, int32_t *idx, char16_t *buf, int32_t bufCapacity) {
1329
0
    if (*idx < bufCapacity) {
1330
0
        buf[*idx] = c;
1331
0
    }
1332
0
    (*idx)++;
1333
0
}
1334
1335
1336
//
1337
//  appendReplacement, the actual implementation.
1338
//
1339
int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
1340
                                      const char16_t        *replacementText,
1341
                                      int32_t                replacementLength,
1342
                                      char16_t             **destBuf,
1343
                                      int32_t               *destCapacity,
1344
0
                                      UErrorCode            *status)  {
1345
1346
    // If we come in with a buffer overflow error, don't suppress the operation.
1347
    //  A series of appendReplacements, appendTail need to correctly preflight
1348
    //  the buffer size when an overflow happens somewhere in the middle.
1349
0
    UBool pendingBufferOverflow = false;
1350
0
    if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != nullptr && *destCapacity == 0) {
1351
0
        pendingBufferOverflow = true;
1352
0
        *status = U_ZERO_ERROR;
1353
0
    }
1354
1355
    //
1356
    // Validate all parameters
1357
    //
1358
0
    if (validateRE(regexp, true, status) == false) {
1359
0
        return 0;
1360
0
    }
1361
0
    if (replacementText == nullptr || replacementLength < -1 ||
1362
0
        destCapacity == nullptr || destBuf == nullptr ||
1363
0
        (*destBuf == nullptr && *destCapacity > 0) ||
1364
0
        *destCapacity < 0) {
1365
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
1366
0
        return 0;
1367
0
    }
1368
1369
0
    RegexMatcher *m = regexp->fMatcher;
1370
0
    if (m->fMatch == false) {
1371
0
        *status = U_REGEX_INVALID_STATE;
1372
0
        return 0;
1373
0
    }
1374
1375
0
    char16_t *dest             = *destBuf;
1376
0
    int32_t   capacity         = *destCapacity;
1377
0
    int32_t   destIdx          =  0;
1378
0
    int32_t   i;
1379
1380
    // If it wasn't supplied by the caller,  get the length of the replacement text.
1381
    //   TODO:  slightly smarter logic in the copy loop could watch for the NUL on
1382
    //          the fly and avoid this step.
1383
0
    if (replacementLength == -1) {
1384
0
        replacementLength = u_strlen(replacementText);
1385
0
    }
1386
1387
    // Copy input string from the end of previous match to start of current match
1388
0
    if (regexp->fText != nullptr) {
1389
0
        int32_t matchStart;
1390
0
        int32_t lastMatchEnd;
1391
0
        if (UTEXT_USES_U16(m->fInputText)) {
1392
0
            lastMatchEnd = (int32_t)m->fLastMatchEnd;
1393
0
            matchStart = (int32_t)m->fMatchStart;
1394
0
        } else {
1395
            // !!!: Would like a better way to do this!
1396
0
            UErrorCode tempStatus = U_ZERO_ERROR;
1397
0
            lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, nullptr, 0, &tempStatus);
1398
0
            tempStatus = U_ZERO_ERROR;
1399
0
            matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, nullptr, 0, &tempStatus);
1400
0
        }
1401
0
        for (i=lastMatchEnd; i<matchStart; i++) {
1402
0
            appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
1403
0
        }
1404
0
    } else {
1405
0
        UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
1406
0
        destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
1407
0
                                 dest==nullptr?nullptr:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
1408
0
                                 &possibleOverflowError);
1409
0
    }
1410
0
    U_ASSERT(destIdx >= 0);
1411
1412
    // scan the replacement text, looking for substitutions ($n) and \escapes.
1413
0
    int32_t  replIdx = 0;
1414
0
    while (replIdx < replacementLength && U_SUCCESS(*status)) {
1415
0
        char16_t  c = replacementText[replIdx];
1416
0
        replIdx++;
1417
0
        if (c != DOLLARSIGN && c != BACKSLASH) {
1418
            // Common case, no substitution, no escaping,
1419
            //  just copy the char to the dest buf.
1420
0
            appendToBuf(c, &destIdx, dest, capacity);
1421
0
            continue;
1422
0
        }
1423
1424
0
        if (c == BACKSLASH) {
1425
            // Backslash Escape.  Copy the following char out without further checks.
1426
            //                    Note:  Surrogate pairs don't need any special handling
1427
            //                           The second half wont be a '$' or a '\', and
1428
            //                           will move to the dest normally on the next
1429
            //                           loop iteration.
1430
0
            if (replIdx >= replacementLength) {
1431
0
                break;
1432
0
            }
1433
0
            c = replacementText[replIdx];
1434
1435
0
            if (c==0x55/*U*/ || c==0x75/*u*/) {
1436
                // We have a \udddd or \Udddddddd escape sequence.
1437
0
                UChar32 escapedChar =
1438
0
                    u_unescapeAt(uregex_ucstr_unescape_charAt,
1439
0
                       &replIdx,                   // Index is updated by unescapeAt
1440
0
                       replacementLength,          // Length of replacement text
1441
0
                       (void *)replacementText);
1442
1443
0
                if (escapedChar != (UChar32)0xFFFFFFFF) {
1444
0
                    if (escapedChar <= 0xffff) {
1445
0
                        appendToBuf((char16_t)escapedChar, &destIdx, dest, capacity);
1446
0
                    } else {
1447
0
                        appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
1448
0
                        appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
1449
0
                    }
1450
0
                    continue;
1451
0
                }
1452
                // Note:  if the \u escape was invalid, just fall through and
1453
                //        treat it as a plain \<anything> escape.
1454
0
            }
1455
1456
            // Plain backslash escape.  Just put out the escaped character.
1457
0
            appendToBuf(c, &destIdx, dest, capacity);
1458
1459
0
            replIdx++;
1460
0
            continue;
1461
0
        }
1462
1463
        // We've got a $.  Pick up the following capture group name or number.
1464
        // For numbers, consume only digits that produce a valid capture group for the pattern.
1465
1466
0
        int32_t groupNum  = 0;
1467
0
        U_ASSERT(c == DOLLARSIGN);
1468
0
        UChar32 c32 = -1;
1469
0
        if (replIdx < replacementLength) {
1470
0
            U16_GET(replacementText, 0, replIdx, replacementLength, c32);
1471
0
        }
1472
0
        if (u_isdigit(c32)) {
1473
0
            int32_t numDigits = 0;
1474
0
            int32_t numCaptureGroups = m->fPattern->fGroupMap->size();
1475
0
            for (;;) {
1476
0
                if (replIdx >= replacementLength) {
1477
0
                    break;
1478
0
                }
1479
0
                U16_GET(replacementText, 0, replIdx, replacementLength, c32);
1480
0
                if (u_isdigit(c32) == false) {
1481
0
                    break;
1482
0
                }
1483
1484
0
                int32_t digitVal = u_charDigitValue(c32);
1485
0
                if (groupNum * 10 + digitVal <= numCaptureGroups) {
1486
0
                    groupNum = groupNum * 10 + digitVal;
1487
0
                    U16_FWD_1(replacementText, replIdx, replacementLength);
1488
0
                    numDigits++;
1489
0
                } else {
1490
0
                    if (numDigits == 0) {
1491
0
                        *status = U_INDEX_OUTOFBOUNDS_ERROR;
1492
0
                    }
1493
0
                    break;
1494
0
                }
1495
0
            }
1496
0
        } else if (c32 == LEFTBRACKET) {
1497
            // Scan for Named Capture Group, ${name}.
1498
0
            UnicodeString groupName;
1499
0
            U16_FWD_1(replacementText, replIdx, replacementLength);
1500
0
            while (U_SUCCESS(*status) && c32 != RIGHTBRACKET) { 
1501
0
                if (replIdx >= replacementLength) {
1502
0
                    *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
1503
0
                    break;
1504
0
                }
1505
0
                U16_NEXT(replacementText, replIdx, replacementLength, c32);
1506
0
                if ((c32 >= 0x41 && c32 <= 0x5a) ||           // A..Z
1507
0
                        (c32 >= 0x61 && c32 <= 0x7a) ||       // a..z
1508
0
                        (c32 >= 0x31 && c32 <= 0x39)) {       // 0..9
1509
0
                    groupName.append(c32);
1510
0
                } else if (c32 == RIGHTBRACKET) {
1511
0
                    groupNum = regexp->fPat->fNamedCaptureMap ?
1512
0
                            uhash_geti(regexp->fPat->fNamedCaptureMap, &groupName) : 0;
1513
0
                    if (groupNum == 0) {
1514
                        // Name not defined by pattern.
1515
0
                        *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
1516
0
                    }
1517
0
                } else {
1518
                    // Character was something other than a name char or a closing '}'
1519
0
                    *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
1520
0
                }
1521
0
            }
1522
0
        } else {
1523
            // $ not followed by {name} or digits.
1524
0
            *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
1525
0
        }
1526
1527
1528
        // Finally, append the capture group data to the destination.
1529
0
        if (U_SUCCESS(*status)) {
1530
0
            destIdx += uregex_group((URegularExpression*)regexp, groupNum,
1531
0
                                    dest==nullptr?nullptr:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
1532
0
            if (*status == U_BUFFER_OVERFLOW_ERROR) {
1533
                // Ignore buffer overflow when extracting the group.  We need to
1534
                //   continue on to get full size of the untruncated result.  We will
1535
                //   raise our own buffer overflow error at the end.
1536
0
                *status = U_ZERO_ERROR;
1537
0
            }
1538
0
        }
1539
1540
0
        if (U_FAILURE(*status)) {
1541
            // bad group number or name.
1542
0
            break;
1543
0
        }
1544
0
    }
1545
1546
    //
1547
    //  Nul Terminate the dest buffer if possible.
1548
    //  Set the appropriate buffer overflow or not terminated error, if needed.
1549
    //
1550
0
    if (destIdx < capacity) {
1551
0
        dest[destIdx] = 0;
1552
0
    } else if (U_SUCCESS(*status)) {
1553
0
        if (destIdx == *destCapacity) {
1554
0
            *status = U_STRING_NOT_TERMINATED_WARNING;
1555
0
        } else {
1556
0
            *status = U_BUFFER_OVERFLOW_ERROR;
1557
0
        }
1558
0
    }
1559
1560
    //
1561
    // Return an updated dest buffer and capacity to the caller.
1562
    //
1563
0
    if (destIdx > 0 &&  *destCapacity > 0) {
1564
0
        if (destIdx < capacity) {
1565
0
            *destBuf      += destIdx;
1566
0
            *destCapacity -= destIdx;
1567
0
        } else {
1568
0
            *destBuf      += capacity;
1569
0
            *destCapacity =  0;
1570
0
        }
1571
0
    }
1572
1573
    // If we came in with a buffer overflow, make sure we go out with one also.
1574
    //   (A zero length match right at the end of the previous match could
1575
    //    make this function succeed even though a previous call had overflowed the buf)
1576
0
    if (pendingBufferOverflow && U_SUCCESS(*status)) {
1577
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1578
0
    }
1579
1580
0
    return destIdx;
1581
0
}
1582
1583
//
1584
//   appendReplacement   the actual API function,
1585
//
1586
U_CAPI int32_t U_EXPORT2
1587
uregex_appendReplacement(URegularExpression    *regexp2,
1588
                         const char16_t        *replacementText,
1589
                         int32_t                replacementLength,
1590
                         char16_t             **destBuf,
1591
                         int32_t               *destCapacity,
1592
0
                         UErrorCode            *status) {
1593
1594
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1595
0
    return RegexCImpl::appendReplacement(
1596
0
        regexp, replacementText, replacementLength,destBuf, destCapacity, status);
1597
0
}
1598
1599
//
1600
//   uregex_appendReplacementUText...can just use the normal C++ method
1601
//
1602
U_CAPI void U_EXPORT2
1603
uregex_appendReplacementUText(URegularExpression    *regexp2,
1604
                              UText                 *replText,
1605
                              UText                 *dest,
1606
0
                              UErrorCode            *status)  {
1607
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1608
0
    regexp->fMatcher->appendReplacement(dest, replText, *status);
1609
0
}
1610
1611
1612
//------------------------------------------------------------------------------
1613
//
1614
//    uregex_appendTail
1615
//
1616
//------------------------------------------------------------------------------
1617
int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
1618
                               char16_t             **destBuf,
1619
                               int32_t               *destCapacity,
1620
                               UErrorCode            *status)
1621
0
{
1622
1623
    // If we come in with a buffer overflow error, don't suppress the operation.
1624
    //  A series of appendReplacements, appendTail need to correctly preflight
1625
    //  the buffer size when an overflow happens somewhere in the middle.
1626
0
    UBool pendingBufferOverflow = false;
1627
0
    if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != nullptr && *destCapacity == 0) {
1628
0
        pendingBufferOverflow = true;
1629
0
        *status = U_ZERO_ERROR;
1630
0
    }
1631
1632
0
    if (validateRE(regexp, true, status) == false) {
1633
0
        return 0;
1634
0
    }
1635
1636
0
    if (destCapacity == nullptr || destBuf == nullptr ||
1637
0
        (*destBuf == nullptr && *destCapacity > 0) ||
1638
0
        *destCapacity < 0)
1639
0
    {
1640
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
1641
0
        return 0;
1642
0
    }
1643
1644
0
    RegexMatcher *m = regexp->fMatcher;
1645
1646
0
    int32_t  destIdx     = 0;
1647
0
    int32_t  destCap     = *destCapacity;
1648
0
    char16_t *dest       = *destBuf;
1649
1650
0
    if (regexp->fText != nullptr) {
1651
0
        int32_t srcIdx;
1652
0
        int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
1653
0
        if (nativeIdx == -1) {
1654
0
            srcIdx = 0;
1655
0
        } else if (UTEXT_USES_U16(m->fInputText)) {
1656
0
            srcIdx = (int32_t)nativeIdx;
1657
0
        } else {
1658
0
            UErrorCode newStatus = U_ZERO_ERROR;
1659
0
            srcIdx = utext_extract(m->fInputText, 0, nativeIdx, nullptr, 0, &newStatus);
1660
0
        }
1661
1662
0
        for (;;) {
1663
0
            U_ASSERT(destIdx >= 0);
1664
1665
0
            if (srcIdx == regexp->fTextLength) {
1666
0
                break;
1667
0
            }
1668
0
            char16_t c = regexp->fText[srcIdx];
1669
0
            if (c == 0 && regexp->fTextLength == -1) {
1670
0
                regexp->fTextLength = srcIdx;
1671
0
                break;
1672
0
            }
1673
1674
0
            if (destIdx < destCap) {
1675
0
                dest[destIdx] = c;
1676
0
            } else {
1677
                // We've overflowed the dest buffer.
1678
                //  If the total input string length is known, we can
1679
                //    compute the total buffer size needed without scanning through the string.
1680
0
                if (regexp->fTextLength > 0) {
1681
0
                    destIdx += (regexp->fTextLength - srcIdx);
1682
0
                    break;
1683
0
                }
1684
0
            }
1685
0
            srcIdx++;
1686
0
            destIdx++;
1687
0
        }
1688
0
    } else {
1689
0
        int64_t  srcIdx;
1690
0
        if (m->fMatch) {
1691
            // The most recent call to find() succeeded.
1692
0
            srcIdx = m->fMatchEnd;
1693
0
        } else {
1694
            // The last call to find() on this matcher failed().
1695
            //   Look back to the end of the last find() that succeeded for src index.
1696
0
            srcIdx = m->fLastMatchEnd;
1697
0
            if (srcIdx == -1)  {
1698
                // There has been no successful match with this matcher.
1699
                //   We want to copy the whole string.
1700
0
                srcIdx = 0;
1701
0
            }
1702
0
        }
1703
1704
0
        destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
1705
0
    }
1706
1707
    //
1708
    //  NUL terminate the output string, if possible, otherwise issue the
1709
    //   appropriate error or warning.
1710
    //
1711
0
    if (destIdx < destCap) {
1712
0
        dest[destIdx] = 0;
1713
0
    } else  if (destIdx == destCap) {
1714
0
        *status = U_STRING_NOT_TERMINATED_WARNING;
1715
0
    } else {
1716
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1717
0
    }
1718
1719
    //
1720
    // Update the user's buffer ptr and capacity vars to reflect the
1721
    //   amount used.
1722
    //
1723
0
    if (destIdx < destCap) {
1724
0
        *destBuf      += destIdx;
1725
0
        *destCapacity -= destIdx;
1726
0
    } else if (*destBuf != nullptr) {
1727
0
        *destBuf      += destCap;
1728
0
        *destCapacity  = 0;
1729
0
    }
1730
1731
0
    if (pendingBufferOverflow && U_SUCCESS(*status)) {
1732
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1733
0
    }
1734
1735
0
    return destIdx;
1736
0
}
1737
1738
1739
//
1740
//   appendTail   the actual API function
1741
//
1742
U_CAPI int32_t U_EXPORT2
1743
uregex_appendTail(URegularExpression    *regexp2,
1744
                  char16_t             **destBuf,
1745
                  int32_t               *destCapacity,
1746
0
                  UErrorCode            *status)  {
1747
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1748
0
    return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
1749
0
}
1750
1751
1752
//
1753
//   uregex_appendTailUText...can just use the normal C++ method
1754
//
1755
U_CAPI UText * U_EXPORT2
1756
uregex_appendTailUText(URegularExpression    *regexp2,
1757
                       UText                 *dest,
1758
0
                       UErrorCode            *status)  {
1759
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1760
0
    return regexp->fMatcher->appendTail(dest, *status);
1761
0
}
1762
1763
1764
//------------------------------------------------------------------------------
1765
//
1766
//    copyString     Internal utility to copy a string to an output buffer,
1767
//                   while managing buffer overflow and preflight size
1768
//                   computation.  NUL termination is added to destination,
1769
//                   and the NUL is counted in the output size.
1770
//
1771
//------------------------------------------------------------------------------
1772
#if 0
1773
static void copyString(char16_t     *destBuffer,    //  Destination buffer.
1774
                       int32_t       destCapacity,  //  Total capacity of dest buffer
1775
                       int32_t      *destIndex,     //  Index into dest buffer.  Updated on return.
1776
                                                    //    Update not clipped to destCapacity.
1777
                       const char16_t  *srcPtr,        //  Pointer to source string
1778
                       int32_t       srcLen)        //  Source string len.
1779
{
1780
    int32_t  si;
1781
    int32_t  di = *destIndex;
1782
    char16_t c;
1783
1784
    for (si=0; si<srcLen;  si++) {
1785
        c = srcPtr[si];
1786
        if (di < destCapacity) {
1787
            destBuffer[di] = c;
1788
            di++;
1789
        } else {
1790
            di += srcLen - si;
1791
            break;
1792
        }
1793
    }
1794
    if (di<destCapacity) {
1795
        destBuffer[di] = 0;
1796
    }
1797
    di++;
1798
    *destIndex = di;
1799
}
1800
#endif
1801
1802
//------------------------------------------------------------------------------
1803
//
1804
//    uregex_split
1805
//
1806
//------------------------------------------------------------------------------
1807
int32_t RegexCImpl::split(RegularExpression     *regexp,
1808
                          char16_t              *destBuf,
1809
                          int32_t                destCapacity,
1810
                          int32_t               *requiredCapacity,
1811
                          char16_t              *destFields[],
1812
                          int32_t                destFieldsCapacity,
1813
0
                          UErrorCode            *status) {
1814
    //
1815
    // Reset for the input text
1816
    //
1817
0
    regexp->fMatcher->reset();
1818
0
    UText *inputText = regexp->fMatcher->fInputText;
1819
0
    int64_t   nextOutputStringStart = 0;
1820
0
    int64_t   inputLen = regexp->fMatcher->fInputLength;
1821
0
    if (inputLen == 0) {
1822
0
        return 0;
1823
0
    }
1824
1825
    //
1826
    // Loop through the input text, searching for the delimiter pattern
1827
    //
1828
0
    int32_t   i;             // Index of the field being processed.
1829
0
    int32_t   destIdx = 0;   // Next available position in destBuf;
1830
0
    int32_t   numCaptureGroups = regexp->fMatcher->groupCount();
1831
0
    UErrorCode  tStatus = U_ZERO_ERROR;   // Want to ignore any buffer overflow errors so that the strings are still counted
1832
0
    for (i=0; ; i++) {
1833
0
        if (i>=destFieldsCapacity-1) {
1834
            // There are one or zero output strings left.
1835
            // Fill the last output string with whatever is left from the input, then exit the loop.
1836
            //  ( i will be == destFieldsCapacity if we filled the output array while processing
1837
            //    capture groups of the delimiter expression, in which case we will discard the
1838
            //    last capture group saved in favor of the unprocessed remainder of the
1839
            //    input string.)
1840
0
            if (inputLen > nextOutputStringStart) {
1841
0
                if (i != destFieldsCapacity-1) {
1842
                    // No fields are left.  Recycle the last one for holding the trailing part of
1843
                    //   the input string.
1844
0
                    i = destFieldsCapacity-1;
1845
0
                    destIdx = (int32_t)(destFields[i] - destFields[0]);
1846
0
                }
1847
1848
0
                destFields[i] = (destBuf == nullptr) ? nullptr :  &destBuf[destIdx];
1849
0
                destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
1850
0
                                             destFields[i], REMAINING_CAPACITY(destIdx, destCapacity), status);
1851
0
            }
1852
0
            break;
1853
0
        }
1854
1855
0
        if (regexp->fMatcher->find()) {
1856
            // We found another delimiter.  Move everything from where we started looking
1857
            //  up until the start of the delimiter into the next output string.
1858
0
            destFields[i] = (destBuf == nullptr) ? nullptr :  &destBuf[destIdx];
1859
1860
0
            destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
1861
0
                                         destFields[i], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
1862
0
            if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
1863
0
                tStatus = U_ZERO_ERROR;
1864
0
            } else {
1865
0
                *status = tStatus;
1866
0
            }
1867
0
            nextOutputStringStart = regexp->fMatcher->fMatchEnd;
1868
1869
            // If the delimiter pattern has capturing parentheses, the captured
1870
            //  text goes out into the next n destination strings.
1871
0
            int32_t groupNum;
1872
0
            for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
1873
                // If we've run out of output string slots, bail out.
1874
0
                if (i==destFieldsCapacity-1) {
1875
0
                    break;
1876
0
                }
1877
0
                i++;
1878
1879
                // Set up to extract the capture group contents into the dest buffer.
1880
0
                destFields[i] = &destBuf[destIdx];
1881
0
                tStatus = U_ZERO_ERROR;
1882
0
                int32_t t = uregex_group((URegularExpression*)regexp,
1883
0
                                         groupNum,
1884
0
                                         destFields[i],
1885
0
                                         REMAINING_CAPACITY(destIdx, destCapacity),
1886
0
                                         &tStatus);
1887
0
                destIdx += t + 1;    // Record the space used in the output string buffer.
1888
                                     //  +1 for the NUL that terminates the string.
1889
0
                if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
1890
0
                    tStatus = U_ZERO_ERROR;
1891
0
                } else {
1892
0
                    *status = tStatus;
1893
0
                }
1894
0
            }
1895
1896
0
            if (nextOutputStringStart == inputLen) {
1897
                // The delimiter was at the end of the string.
1898
                // Output an empty string, and then we are done.
1899
0
                if (destIdx < destCapacity) {
1900
0
                    destBuf[destIdx] = 0;
1901
0
                }
1902
0
                if (i < destFieldsCapacity-1) {
1903
0
                   ++i;
1904
0
                }
1905
0
                if (destIdx < destCapacity) {
1906
0
                    destFields[i] = destBuf + destIdx;
1907
0
                }
1908
0
                ++destIdx;
1909
0
                break;
1910
0
            }
1911
1912
0
        }
1913
0
        else
1914
0
        {
1915
            // We ran off the end of the input while looking for the next delimiter.
1916
            // All the remaining text goes into the current output string.
1917
0
            destFields[i] = (destBuf == nullptr) ? nullptr : &destBuf[destIdx];
1918
0
            destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
1919
0
                                         destFields[i], REMAINING_CAPACITY(destIdx, destCapacity), status);
1920
0
            break;
1921
0
        }
1922
0
    }
1923
1924
    // Zero out any unused portion of the destFields array
1925
0
    int j;
1926
0
    for (j=i+1; j<destFieldsCapacity; j++) {
1927
0
        destFields[j] = nullptr;
1928
0
    }
1929
1930
0
    if (requiredCapacity != nullptr) {
1931
0
        *requiredCapacity = destIdx;
1932
0
    }
1933
0
    if (destIdx > destCapacity) {
1934
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1935
0
    }
1936
0
    return i+1;
1937
0
}
1938
1939
//
1940
//   uregex_split   The actual API function
1941
//
1942
U_CAPI int32_t U_EXPORT2
1943
uregex_split(URegularExpression      *regexp2,
1944
             char16_t                *destBuf,
1945
             int32_t                  destCapacity,
1946
             int32_t                 *requiredCapacity,
1947
             char16_t                *destFields[],
1948
             int32_t                  destFieldsCapacity,
1949
0
             UErrorCode              *status) {
1950
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1951
0
    if (validateRE(regexp, true, status) == false) {
1952
0
        return 0;
1953
0
    }
1954
0
    if ((destBuf == nullptr && destCapacity > 0) ||
1955
0
        destCapacity < 0 ||
1956
0
        destFields == nullptr ||
1957
0
        destFieldsCapacity < 1 ) {
1958
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
1959
0
        return 0;
1960
0
    }
1961
1962
0
    return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
1963
0
}
1964
1965
1966
//
1967
//   uregex_splitUText...can just use the normal C++ method
1968
//
1969
U_CAPI int32_t U_EXPORT2
1970
uregex_splitUText(URegularExpression    *regexp2,
1971
                  UText                 *destFields[],
1972
                  int32_t                destFieldsCapacity,
1973
0
                  UErrorCode            *status) {
1974
0
    RegularExpression *regexp = (RegularExpression*)regexp2;
1975
0
    return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
1976
0
}
1977
1978
1979
#endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
1980