Coverage Report

Created: 2025-06-24 06:54

/src/icu/icu4c/source/i18n/repattrn.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
//
4
//  file:  repattrn.cpp
5
//
6
/*
7
***************************************************************************
8
*   Copyright (C) 2002-2016 International Business Machines Corporation
9
*   and others. All rights reserved.
10
***************************************************************************
11
*/
12
13
#include "unicode/utypes.h"
14
15
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
16
17
#include "unicode/regex.h"
18
#include "unicode/uclean.h"
19
#include "cmemory.h"
20
#include "cstr.h"
21
#include "uassert.h"
22
#include "uhash.h"
23
#include "uvector.h"
24
#include "uvectr32.h"
25
#include "uvectr64.h"
26
#include "regexcmp.h"
27
#include "regeximp.h"
28
#include "regexst.h"
29
30
U_NAMESPACE_BEGIN
31
32
//--------------------------------------------------------------------------
33
//
34
//    RegexPattern    Default Constructor
35
//
36
//--------------------------------------------------------------------------
37
25.1k
RegexPattern::RegexPattern() {
38
    // Init all of this instances data.
39
25.1k
    init();
40
25.1k
}
41
42
43
//--------------------------------------------------------------------------
44
//
45
//   Copy Constructor        Note:  This is a rather inefficient implementation,
46
//                                  but it probably doesn't matter.
47
//
48
//--------------------------------------------------------------------------
49
0
RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
50
0
    init();
51
0
    *this = other;
52
0
}
53
54
55
56
//--------------------------------------------------------------------------
57
//
58
//    Assignment Operator
59
//
60
//--------------------------------------------------------------------------
61
0
RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
62
0
    if (this == &other) {
63
        // Source and destination are the same.  Don't do anything.
64
0
        return *this;
65
0
    }
66
67
    // Clean out any previous contents of object being assigned to.
68
0
    zap();
69
70
    // Give target object a default initialization
71
0
    init();
72
73
    // Copy simple fields
74
0
    fDeferredStatus   = other.fDeferredStatus;
75
76
0
    if (U_FAILURE(fDeferredStatus)) {
77
0
        return *this;
78
0
    }
79
80
0
    if (other.fPatternString == nullptr) {
81
0
        fPatternString = nullptr;
82
0
        fPattern = utext_clone(fPattern, other.fPattern, false, true, &fDeferredStatus);
83
0
    } else {
84
0
        fPatternString = new UnicodeString(*(other.fPatternString));
85
0
        if (fPatternString == nullptr) {
86
0
            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
87
0
        } else {
88
0
            fPattern = utext_openConstUnicodeString(nullptr, fPatternString, &fDeferredStatus);
89
0
        }
90
0
    }
91
0
    if (U_FAILURE(fDeferredStatus)) {
92
0
        return *this;
93
0
    }
94
95
0
    fFlags            = other.fFlags;
96
0
    fLiteralText      = other.fLiteralText;
97
0
    fMinMatchLen      = other.fMinMatchLen;
98
0
    fFrameSize        = other.fFrameSize;
99
0
    fDataSize         = other.fDataSize;
100
101
0
    fStartType        = other.fStartType;
102
0
    fInitialStringIdx = other.fInitialStringIdx;
103
0
    fInitialStringLen = other.fInitialStringLen;
104
0
    *fInitialChars    = *other.fInitialChars;
105
0
    fInitialChar      = other.fInitialChar;
106
0
    *fInitialChars8   = *other.fInitialChars8;
107
0
    fNeedsAltInput    = other.fNeedsAltInput;
108
109
    //  Copy the pattern.  It's just values, nothing deep to copy.
110
0
    fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
111
0
    fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
112
113
    //  Copy the Unicode Sets.
114
    //    Could be made more efficient if the sets were reference counted and shared,
115
    //    but I doubt that pattern copying will be particularly common.
116
    //    Note:  init() already added an empty element zero to fSets
117
0
    int32_t i;
118
0
    int32_t  numSets = other.fSets->size();
119
0
    fSets8 = new Regex8BitSet[numSets];
120
0
    if (fSets8 == nullptr) {
121
0
      fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
122
0
      return *this;
123
0
    }
124
0
    for (i=1; i<numSets; i++) {
125
0
        if (U_FAILURE(fDeferredStatus)) {
126
0
            return *this;
127
0
        }
128
0
        UnicodeSet* sourceSet = static_cast<UnicodeSet*>(other.fSets->elementAt(i));
129
0
        UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
130
0
        if (newSet == nullptr) {
131
0
            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
132
0
            break;
133
0
        }
134
0
        fSets->addElement(newSet, fDeferredStatus);
135
0
        fSets8[i] = other.fSets8[i];
136
0
    }
137
138
    // Copy the named capture group hash map.
139
0
    if (other.fNamedCaptureMap != nullptr && initNamedCaptureMap()) {
140
0
        int32_t hashPos = UHASH_FIRST;
141
0
        while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
142
0
            if (U_FAILURE(fDeferredStatus)) {
143
0
                break;
144
0
            }
145
0
            const UnicodeString* name = static_cast<const UnicodeString*>(hashEl->key.pointer);
146
0
            UnicodeString *key = new UnicodeString(*name);
147
0
            int32_t val = hashEl->value.integer;
148
0
            if (key == nullptr) {
149
0
                fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
150
0
            } else {
151
0
                uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
152
0
            }
153
0
        }
154
0
    }
155
0
    return *this;
156
0
}
157
158
159
//--------------------------------------------------------------------------
160
//
161
//    init        Shared initialization for use by constructors.
162
//                Bring an uninitialized RegexPattern up to a default state.
163
//
164
//--------------------------------------------------------------------------
165
25.1k
void RegexPattern::init() {
166
25.1k
    fFlags            = 0;
167
25.1k
    fCompiledPat      = nullptr;
168
25.1k
    fLiteralText.remove();
169
25.1k
    fSets             = nullptr;
170
25.1k
    fSets8            = nullptr;
171
25.1k
    fDeferredStatus   = U_ZERO_ERROR;
172
25.1k
    fMinMatchLen      = 0;
173
25.1k
    fFrameSize        = 0;
174
25.1k
    fDataSize         = 0;
175
25.1k
    fGroupMap         = nullptr;
176
25.1k
    fStartType        = START_NO_INFO;
177
25.1k
    fInitialStringIdx = 0;
178
25.1k
    fInitialStringLen = 0;
179
25.1k
    fInitialChars     = nullptr;
180
25.1k
    fInitialChar      = 0;
181
25.1k
    fInitialChars8    = nullptr;
182
25.1k
    fNeedsAltInput    = false;
183
25.1k
    fNamedCaptureMap  = nullptr;
184
185
25.1k
    fPattern          = nullptr; // will be set later
186
25.1k
    fPatternString    = nullptr; // may be set later
187
25.1k
    fCompiledPat      = new UVector64(fDeferredStatus);
188
25.1k
    fGroupMap         = new UVector32(fDeferredStatus);
189
25.1k
    fSets             = new UVector(fDeferredStatus);
190
25.1k
    fInitialChars     = new UnicodeSet;
191
25.1k
    fInitialChars8    = new Regex8BitSet;
192
25.1k
    if (U_FAILURE(fDeferredStatus)) {
193
0
        return;
194
0
    }
195
25.1k
    if (fCompiledPat == nullptr  || fGroupMap == nullptr || fSets == nullptr ||
196
25.1k
            fInitialChars == nullptr || fInitialChars8 == nullptr) {
197
0
        fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
198
0
        return;
199
0
    }
200
201
    // Slot zero of the vector of sets is reserved.  Fill it here.
202
25.1k
    fSets->addElement(static_cast<int32_t>(0), fDeferredStatus);
203
25.1k
}
204
205
206
803
bool RegexPattern::initNamedCaptureMap() {
207
803
    if (fNamedCaptureMap) {
208
471
        return true;
209
471
    }
210
332
    fNamedCaptureMap  = uhash_openSize(uhash_hashUnicodeString,     // Key hash function
211
332
                                       uhash_compareUnicodeString,  // Key comparator function
212
332
                                       uhash_compareLong,           // Value comparator function
213
332
                                       7,                           // Initial table capacity
214
332
                                       &fDeferredStatus);
215
332
    if (U_FAILURE(fDeferredStatus)) {
216
0
        return false;
217
0
    }
218
219
    // fNamedCaptureMap owns its key strings, type (UnicodeString *)
220
332
    uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
221
332
    return true;
222
332
}
223
224
//--------------------------------------------------------------------------
225
//
226
//   zap            Delete everything owned by this RegexPattern.
227
//
228
//--------------------------------------------------------------------------
229
25.1k
void RegexPattern::zap() {
230
25.1k
    delete fCompiledPat;
231
25.1k
    fCompiledPat = nullptr;
232
25.1k
    int i;
233
404k
    for (i=1; i<fSets->size(); i++) {
234
379k
        UnicodeSet *s;
235
379k
        s = static_cast<UnicodeSet*>(fSets->elementAt(i));
236
379k
        delete s;
237
379k
    }
238
25.1k
    delete fSets;
239
25.1k
    fSets = nullptr;
240
25.1k
    delete[] fSets8;
241
25.1k
    fSets8 = nullptr;
242
25.1k
    delete fGroupMap;
243
25.1k
    fGroupMap = nullptr;
244
25.1k
    delete fInitialChars;
245
25.1k
    fInitialChars = nullptr;
246
25.1k
    delete fInitialChars8;
247
25.1k
    fInitialChars8 = nullptr;
248
25.1k
    if (fPattern != nullptr) {
249
25.1k
        utext_close(fPattern);
250
25.1k
        fPattern = nullptr;
251
25.1k
    }
252
25.1k
    if (fPatternString != nullptr) {
253
12.8k
        delete fPatternString;
254
12.8k
        fPatternString = nullptr;
255
12.8k
    }
256
25.1k
    if (fNamedCaptureMap != nullptr) {
257
332
        uhash_close(fNamedCaptureMap);
258
332
        fNamedCaptureMap = nullptr;
259
332
    }
260
25.1k
}
261
262
263
//--------------------------------------------------------------------------
264
//
265
//   Destructor
266
//
267
//--------------------------------------------------------------------------
268
25.1k
RegexPattern::~RegexPattern() {
269
25.1k
    zap();
270
25.1k
}
271
272
273
//--------------------------------------------------------------------------
274
//
275
//   Clone
276
//
277
//--------------------------------------------------------------------------
278
0
RegexPattern  *RegexPattern::clone() const {
279
0
    RegexPattern  *copy = new RegexPattern(*this);
280
0
    return copy;
281
0
}
282
283
284
//--------------------------------------------------------------------------
285
//
286
//   operator ==   (comparison)    Consider to patterns to be == if the
287
//                                 pattern strings and the flags are the same.
288
//                                 Note that pattern strings with the same
289
//                                 characters can still be considered different.
290
//
291
//--------------------------------------------------------------------------
292
0
bool    RegexPattern::operator ==(const RegexPattern &other) const {
293
0
    if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
294
0
        if (this->fPatternString != nullptr && other.fPatternString != nullptr) {
295
0
            return *(this->fPatternString) == *(other.fPatternString);
296
0
        } else if (this->fPattern == nullptr) {
297
0
            if (other.fPattern == nullptr) {
298
0
                return true;
299
0
            }
300
0
        } else if (other.fPattern != nullptr) {
301
0
            UTEXT_SETNATIVEINDEX(this->fPattern, 0);
302
0
            UTEXT_SETNATIVEINDEX(other.fPattern, 0);
303
0
            return utext_equals(this->fPattern, other.fPattern);
304
0
        }
305
0
    }
306
0
    return false;
307
0
}
308
309
//---------------------------------------------------------------------
310
//
311
//   compile
312
//
313
//---------------------------------------------------------------------
314
RegexPattern * U_EXPORT2
315
RegexPattern::compile(const UnicodeString &regex,
316
                      uint32_t             flags,
317
                      UParseError          &pe,
318
                      UErrorCode           &status)
319
12.8k
{
320
12.8k
    if (U_FAILURE(status)) {
321
0
        return nullptr;
322
0
    }
323
324
12.8k
    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
325
12.8k
    UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
326
12.8k
    UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
327
328
12.8k
    if ((flags & ~allFlags) != 0) {
329
0
        status = U_REGEX_INVALID_FLAG;
330
0
        return nullptr;
331
0
    }
332
333
12.8k
    if ((flags & UREGEX_CANON_EQ) != 0) {
334
0
        status = U_REGEX_UNIMPLEMENTED;
335
0
        return nullptr;
336
0
    }
337
338
12.8k
    RegexPattern *This = new RegexPattern;
339
12.8k
    if (This == nullptr) {
340
0
        status = U_MEMORY_ALLOCATION_ERROR;
341
0
        return nullptr;
342
0
    }
343
12.8k
    if (U_FAILURE(This->fDeferredStatus)) {
344
0
        status = This->fDeferredStatus;
345
0
        delete This;
346
0
        return nullptr;
347
0
    }
348
12.8k
    This->fFlags = flags;
349
350
12.8k
    RegexCompile     compiler(This, status);
351
12.8k
    compiler.compile(regex, pe, status);
352
353
12.8k
    if (U_FAILURE(status)) {
354
1.72k
        delete This;
355
1.72k
        This = nullptr;
356
1.72k
    }
357
358
12.8k
    return This;
359
12.8k
}
360
361
362
//
363
//   compile, UText mode
364
//
365
RegexPattern * U_EXPORT2
366
RegexPattern::compile(UText                *regex,
367
                      uint32_t             flags,
368
                      UParseError          &pe,
369
                      UErrorCode           &status)
370
12.3k
{
371
12.3k
    if (U_FAILURE(status)) {
372
0
        return nullptr;
373
0
    }
374
375
12.3k
    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
376
12.3k
                              UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
377
12.3k
                              UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
378
379
12.3k
    if ((flags & ~allFlags) != 0) {
380
0
        status = U_REGEX_INVALID_FLAG;
381
0
        return nullptr;
382
0
    }
383
384
12.3k
    if ((flags & UREGEX_CANON_EQ) != 0) {
385
0
        status = U_REGEX_UNIMPLEMENTED;
386
0
        return nullptr;
387
0
    }
388
389
12.3k
    RegexPattern *This = new RegexPattern;
390
12.3k
    if (This == nullptr) {
391
0
        status = U_MEMORY_ALLOCATION_ERROR;
392
0
        return nullptr;
393
0
    }
394
12.3k
    if (U_FAILURE(This->fDeferredStatus)) {
395
0
        status = This->fDeferredStatus;
396
0
        delete This;
397
0
        return nullptr;
398
0
    }
399
12.3k
    This->fFlags = flags;
400
401
12.3k
    RegexCompile     compiler(This, status);
402
12.3k
    compiler.compile(regex, pe, status);
403
404
12.3k
    if (U_FAILURE(status)) {
405
7.42k
        delete This;
406
7.42k
        This = nullptr;
407
7.42k
    }
408
409
12.3k
    return This;
410
12.3k
}
411
412
//
413
//   compile with default flags.
414
//
415
RegexPattern * U_EXPORT2
416
RegexPattern::compile(const UnicodeString &regex,
417
                      UParseError         &pe,
418
                      UErrorCode          &err)
419
0
{
420
0
    return compile(regex, 0, pe, err);
421
0
}
422
423
424
//
425
//   compile with default flags, UText mode
426
//
427
RegexPattern * U_EXPORT2
428
RegexPattern::compile(UText               *regex,
429
                      UParseError         &pe,
430
                      UErrorCode          &err)
431
0
{
432
0
    return compile(regex, 0, pe, err);
433
0
}
434
435
436
//
437
//   compile with no UParseErr parameter.
438
//
439
RegexPattern * U_EXPORT2
440
RegexPattern::compile(const UnicodeString &regex,
441
                      uint32_t             flags,
442
                      UErrorCode          &err)
443
12.8k
{
444
12.8k
    UParseError pe;
445
12.8k
    return compile(regex, flags, pe, err);
446
12.8k
}
447
448
449
//
450
//   compile with no UParseErr parameter, UText mode
451
//
452
RegexPattern * U_EXPORT2
453
RegexPattern::compile(UText                *regex,
454
                      uint32_t             flags,
455
                      UErrorCode           &err)
456
0
{
457
0
    UParseError pe;
458
0
    return compile(regex, flags, pe, err);
459
0
}
460
461
462
//---------------------------------------------------------------------
463
//
464
//   flags
465
//
466
//---------------------------------------------------------------------
467
0
uint32_t RegexPattern::flags() const {
468
0
    return fFlags;
469
0
}
470
471
472
//---------------------------------------------------------------------
473
//
474
//   matcher(UnicodeString, err)
475
//
476
//---------------------------------------------------------------------
477
RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
478
11.1k
                                    UErrorCode          &status)  const {
479
11.1k
    RegexMatcher    *retMatcher = matcher(status);
480
11.1k
    if (retMatcher != nullptr) {
481
11.1k
        retMatcher->fDeferredStatus = status;
482
11.1k
        retMatcher->reset(input);
483
11.1k
    }
484
11.1k
    return retMatcher;
485
11.1k
}
486
487
488
//---------------------------------------------------------------------
489
//
490
//   matcher(status)
491
//
492
//---------------------------------------------------------------------
493
16.0k
RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
494
16.0k
    RegexMatcher    *retMatcher = nullptr;
495
496
16.0k
    if (U_FAILURE(status)) {
497
0
        return nullptr;
498
0
    }
499
16.0k
    if (U_FAILURE(fDeferredStatus)) {
500
0
        status = fDeferredStatus;
501
0
        return nullptr;
502
0
    }
503
504
16.0k
    retMatcher = new RegexMatcher(this);
505
16.0k
    if (retMatcher == nullptr) {
506
0
        status = U_MEMORY_ALLOCATION_ERROR;
507
0
        return nullptr;
508
0
    }
509
16.0k
    return retMatcher;
510
16.0k
}
511
512
513
514
//---------------------------------------------------------------------
515
//
516
//   matches        Convenience function to test for a match, starting
517
//                  with a pattern string and a data string.
518
//
519
//---------------------------------------------------------------------
520
UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
521
              const UnicodeString   &input,
522
                    UParseError     &pe,
523
0
                    UErrorCode      &status) {
524
525
0
    if (U_FAILURE(status)) {return false;}
526
527
0
    UBool         retVal;
528
0
    RegexPattern *pat     = nullptr;
529
0
    RegexMatcher *matcher = nullptr;
530
531
0
    pat     = RegexPattern::compile(regex, 0, pe, status);
532
0
    matcher = pat->matcher(input, status);
533
0
    retVal  = matcher->matches(status);
534
535
0
    delete matcher;
536
0
    delete pat;
537
0
    return retVal;
538
0
}
539
540
541
//
542
//   matches, UText mode
543
//
544
UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
545
                    UText           *input,
546
                    UParseError     &pe,
547
0
                    UErrorCode      &status) {
548
549
0
    if (U_FAILURE(status)) {return false;}
550
551
0
    UBool         retVal  = false;
552
0
    RegexPattern *pat     = nullptr;
553
0
    RegexMatcher *matcher = nullptr;
554
555
0
    pat     = RegexPattern::compile(regex, 0, pe, status);
556
0
    matcher = pat->matcher(status);
557
0
    if (U_SUCCESS(status)) {
558
0
        matcher->reset(input);
559
0
        retVal  = matcher->matches(status);
560
0
    }
561
562
0
    delete matcher;
563
0
    delete pat;
564
0
    return retVal;
565
0
}
566
567
568
569
570
571
//---------------------------------------------------------------------
572
//
573
//   pattern
574
//
575
//---------------------------------------------------------------------
576
0
UnicodeString RegexPattern::pattern() const {
577
0
    if (fPatternString != nullptr) {
578
0
        return *fPatternString;
579
0
    } else if (fPattern == nullptr) {
580
0
        return {};
581
0
    } else {
582
0
        UErrorCode status = U_ZERO_ERROR;
583
0
        int64_t nativeLen = utext_nativeLength(fPattern);
584
0
        int32_t len16 = utext_extract(fPattern, 0, nativeLen, nullptr, 0, &status); // buffer overflow error
585
0
        UnicodeString result;
586
587
0
        status = U_ZERO_ERROR;
588
0
        char16_t *resultChars = result.getBuffer(len16);
589
0
        utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
590
0
        result.releaseBuffer(len16);
591
592
0
        return result;
593
0
    }
594
0
}
595
596
597
598
599
//---------------------------------------------------------------------
600
//
601
//   patternText
602
//
603
//---------------------------------------------------------------------
604
0
UText *RegexPattern::patternText(UErrorCode      &status) const {
605
0
    if (U_FAILURE(status)) {return nullptr;}
606
0
    status = U_ZERO_ERROR;
607
608
0
    if (fPattern != nullptr) {
609
0
        return fPattern;
610
0
    } else {
611
0
        RegexStaticSets::initGlobals(&status);
612
0
        return RegexStaticSets::gStaticSets->fEmptyText;
613
0
    }
614
0
}
615
616
617
//--------------------------------------------------------------------------------
618
//
619
//  groupNumberFromName()
620
//
621
//--------------------------------------------------------------------------------
622
0
int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
623
0
    if (U_FAILURE(status)) {
624
0
        return 0;
625
0
    }
626
627
    // No need to explicitly check for syntactically valid names.
628
    // Invalid ones will never be in the map, and the lookup will fail.
629
630
0
    int32_t number = fNamedCaptureMap ? uhash_geti(fNamedCaptureMap, &groupName) : 0;
631
0
    if (number == 0) {
632
0
        status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
633
0
    }
634
0
    return number;
635
0
}
636
637
0
int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
638
0
    if (U_FAILURE(status)) {
639
0
        return 0;
640
0
    }
641
0
    UnicodeString name(groupName, nameLength, US_INV);
642
0
    return groupNumberFromName(name, status);
643
0
}
644
645
646
//---------------------------------------------------------------------
647
//
648
//   split
649
//
650
//---------------------------------------------------------------------
651
int32_t  RegexPattern::split(const UnicodeString &input,
652
        UnicodeString    dest[],
653
        int32_t          destCapacity,
654
        UErrorCode      &status) const
655
0
{
656
0
    if (U_FAILURE(status)) {
657
0
        return 0;
658
0
    }
659
660
0
    RegexMatcher  m(this);
661
0
    int32_t r = 0;
662
    // Check m's status to make sure all is ok.
663
0
    if (U_SUCCESS(m.fDeferredStatus)) {
664
0
      r = m.split(input, dest, destCapacity, status);
665
0
    }
666
0
    return r;
667
0
}
668
669
//
670
//   split, UText mode
671
//
672
int32_t  RegexPattern::split(UText *input,
673
        UText           *dest[],
674
        int32_t          destCapacity,
675
        UErrorCode      &status) const
676
0
{
677
0
    if (U_FAILURE(status)) {
678
0
        return 0;
679
0
    }
680
681
0
    RegexMatcher  m(this);
682
0
    int32_t r = 0;
683
    // Check m's status to make sure all is ok.
684
0
    if (U_SUCCESS(m.fDeferredStatus)) {
685
0
      r = m.split(input, dest, destCapacity, status);
686
0
    }
687
0
    return r;
688
0
}
689
690
691
//---------------------------------------------------------------------
692
//
693
//   dump    Output the compiled form of the pattern.
694
//           Debugging function only.
695
//
696
//---------------------------------------------------------------------
697
0
void   RegexPattern::dumpOp(int32_t index) const {
698
0
    (void)index;  // Suppress warnings in non-debug build.
699
#if defined(REGEX_DEBUG)
700
    static const char * const opNames[] = {URX_OPCODE_NAMES};
701
    int32_t op          = fCompiledPat->elementAti(index);
702
    int32_t val         = URX_VAL(op);
703
    int32_t type        = URX_TYPE(op);
704
    int32_t pinnedType  = type;
705
    if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
706
        pinnedType = 0;
707
    }
708
709
    printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
710
    switch (type) {
711
    case URX_NOP:
712
    case URX_DOTANY:
713
    case URX_DOTANY_ALL:
714
    case URX_FAIL:
715
    case URX_CARET:
716
    case URX_DOLLAR:
717
    case URX_BACKSLASH_G:
718
    case URX_BACKSLASH_X:
719
    case URX_END:
720
    case URX_DOLLAR_M:
721
    case URX_CARET_M:
722
        // Types with no operand field of interest.
723
        break;
724
725
    case URX_RESERVED_OP:
726
    case URX_START_CAPTURE:
727
    case URX_END_CAPTURE:
728
    case URX_STATE_SAVE:
729
    case URX_JMP:
730
    case URX_JMP_SAV:
731
    case URX_JMP_SAV_X:
732
    case URX_BACKSLASH_B:
733
    case URX_BACKSLASH_BU:
734
    case URX_BACKSLASH_D:
735
    case URX_BACKSLASH_Z:
736
    case URX_STRING_LEN:
737
    case URX_CTR_INIT:
738
    case URX_CTR_INIT_NG:
739
    case URX_CTR_LOOP:
740
    case URX_CTR_LOOP_NG:
741
    case URX_RELOC_OPRND:
742
    case URX_STO_SP:
743
    case URX_LD_SP:
744
    case URX_BACKREF:
745
    case URX_STO_INP_LOC:
746
    case URX_JMPX:
747
    case URX_LA_START:
748
    case URX_LA_END:
749
    case URX_BACKREF_I:
750
    case URX_LB_START:
751
    case URX_LB_CONT:
752
    case URX_LB_END:
753
    case URX_LBN_CONT:
754
    case URX_LBN_END:
755
    case URX_LOOP_C:
756
    case URX_LOOP_DOT_I:
757
    case URX_BACKSLASH_H:
758
    case URX_BACKSLASH_R:
759
    case URX_BACKSLASH_V:
760
        // types with an integer operand field.
761
        printf("%d", val);
762
        break;
763
764
    case URX_ONECHAR:
765
    case URX_ONECHAR_I:
766
        if (val < 0x20) {
767
            printf("%#x", val);
768
        } else {
769
            printf("'%s'", CStr(UnicodeString(val))());
770
        }
771
        break;
772
773
    case URX_STRING:
774
    case URX_STRING_I:
775
        {
776
            int32_t lengthOp       = fCompiledPat->elementAti(index+1);
777
            U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
778
            int32_t length = URX_VAL(lengthOp);
779
            UnicodeString str(fLiteralText, val, length);
780
            printf("%s", CStr(str)());
781
        }
782
        break;
783
784
    case URX_SETREF:
785
    case URX_LOOP_SR_I:
786
        {
787
            UnicodeString s;
788
            UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
789
            set->toPattern(s, true);
790
            printf("%s", CStr(s)());
791
        }
792
        break;
793
794
    case URX_STATIC_SETREF:
795
    case URX_STAT_SETREF_N:
796
        {
797
            UnicodeString s;
798
            if (val & URX_NEG_SET) {
799
                printf("NOT ");
800
                val &= ~URX_NEG_SET;
801
            }
802
            UnicodeSet &set = RegexStaticSets::gStaticSets->fPropSets[val];
803
            set.toPattern(s, true);
804
            printf("%s", CStr(s)());
805
        }
806
        break;
807
808
809
    default:
810
        printf("??????");
811
        break;
812
    }
813
    printf("\n");
814
#endif
815
0
}
816
817
818
0
void RegexPattern::dumpPattern() const {
819
#if defined(REGEX_DEBUG)
820
    int      index;
821
822
    UnicodeString patStr;
823
    for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
824
        patStr.append(c);
825
    }
826
    printf("Original Pattern:  \"%s\"\n", CStr(patStr)());
827
    printf("   Min Match Length:  %d\n", fMinMatchLen);
828
    printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
829
    if (fStartType == START_STRING) {
830
        UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
831
        printf("   Initial match string: \"%s\"\n", CStr(initialString)());
832
    } else if (fStartType == START_SET) {
833
        UnicodeString s;
834
        fInitialChars->toPattern(s, true);
835
        printf("    Match First Chars: %s\n", CStr(s)());
836
837
    } else if (fStartType == START_CHAR) {
838
        printf("    First char of Match: ");
839
        if (fInitialChar > 0x20) {
840
                printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
841
            } else {
842
                printf("%#x\n", fInitialChar);
843
            }
844
    }
845
846
    printf("Named Capture Groups:\n");
847
    if (!fNamedCaptureMap || uhash_count(fNamedCaptureMap) == 0) {
848
        printf("   None\n");
849
    } else {
850
        int32_t pos = UHASH_FIRST;
851
        const UHashElement *el = nullptr;
852
        while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
853
            const UnicodeString *name = (const UnicodeString *)el->key.pointer;
854
            int32_t number = el->value.integer;
855
            printf("   %d\t%s\n", number, CStr(*name)());
856
        }
857
    }
858
859
    printf("\nIndex   Binary     Type             Operand\n" \
860
           "-------------------------------------------\n");
861
    for (index = 0; index<fCompiledPat->size(); index++) {
862
        dumpOp(index);
863
    }
864
    printf("\n\n");
865
#endif
866
0
}
867
868
869
870
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
871
872
U_NAMESPACE_END
873
#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS