Coverage Report

Created: 2022-11-20 06:20

/src/icu/icu4c/source/i18n/repattrn.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
//
4
//  file:  repattrn.cpp
5
//
6
/*
7
***************************************************************************
8
*   Copyright (C) 2002-2016 International Business Machines Corporation
9
*   and others. All rights reserved.
10
***************************************************************************
11
*/
12
13
#include "unicode/utypes.h"
14
15
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
16
17
#include "unicode/regex.h"
18
#include "unicode/uclean.h"
19
#include "cmemory.h"
20
#include "cstr.h"
21
#include "uassert.h"
22
#include "uhash.h"
23
#include "uvector.h"
24
#include "uvectr32.h"
25
#include "uvectr64.h"
26
#include "regexcmp.h"
27
#include "regeximp.h"
28
#include "regexst.h"
29
30
U_NAMESPACE_BEGIN
31
32
//--------------------------------------------------------------------------
33
//
34
//    RegexPattern    Default Constructor
35
//
36
//--------------------------------------------------------------------------
37
10.4k
RegexPattern::RegexPattern() {
38
    // Init all of this instances data.
39
10.4k
    init();
40
10.4k
}
41
42
43
//--------------------------------------------------------------------------
44
//
45
//   Copy Constructor        Note:  This is a rather inefficient implementation,
46
//                                  but it probably doesn't matter.
47
//
48
//--------------------------------------------------------------------------
49
0
RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
50
0
    init();
51
0
    *this = other;
52
0
}
53
54
55
56
//--------------------------------------------------------------------------
57
//
58
//    Assignment Operator
59
//
60
//--------------------------------------------------------------------------
61
0
RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
62
0
    if (this == &other) {
63
        // Source and destination are the same.  Don't do anything.
64
0
        return *this;
65
0
    }
66
67
    // Clean out any previous contents of object being assigned to.
68
0
    zap();
69
70
    // Give target object a default initialization
71
0
    init();
72
73
    // Copy simple fields
74
0
    fDeferredStatus   = other.fDeferredStatus;
75
76
0
    if (U_FAILURE(fDeferredStatus)) {
77
0
        return *this;
78
0
    }
79
80
0
    if (other.fPatternString == NULL) {
81
0
        fPatternString = NULL;
82
0
        fPattern = utext_clone(fPattern, other.fPattern, false, true, &fDeferredStatus);
83
0
    } else {
84
0
        fPatternString = new UnicodeString(*(other.fPatternString));
85
0
        if (fPatternString == NULL) {
86
0
            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
87
0
        } else {
88
0
            fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
89
0
        }
90
0
    }
91
0
    if (U_FAILURE(fDeferredStatus)) {
92
0
        return *this;
93
0
    }
94
95
0
    fFlags            = other.fFlags;
96
0
    fLiteralText      = other.fLiteralText;
97
0
    fMinMatchLen      = other.fMinMatchLen;
98
0
    fFrameSize        = other.fFrameSize;
99
0
    fDataSize         = other.fDataSize;
100
101
0
    fStartType        = other.fStartType;
102
0
    fInitialStringIdx = other.fInitialStringIdx;
103
0
    fInitialStringLen = other.fInitialStringLen;
104
0
    *fInitialChars    = *other.fInitialChars;
105
0
    fInitialChar      = other.fInitialChar;
106
0
    *fInitialChars8   = *other.fInitialChars8;
107
0
    fNeedsAltInput    = other.fNeedsAltInput;
108
109
    //  Copy the pattern.  It's just values, nothing deep to copy.
110
0
    fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
111
0
    fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
112
113
    //  Copy the Unicode Sets.
114
    //    Could be made more efficient if the sets were reference counted and shared,
115
    //    but I doubt that pattern copying will be particularly common.
116
    //    Note:  init() already added an empty element zero to fSets
117
0
    int32_t i;
118
0
    int32_t  numSets = other.fSets->size();
119
0
    fSets8 = new Regex8BitSet[numSets];
120
0
    if (fSets8 == NULL) {
121
0
      fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
122
0
      return *this;
123
0
    }
124
0
    for (i=1; i<numSets; i++) {
125
0
        if (U_FAILURE(fDeferredStatus)) {
126
0
            return *this;
127
0
        }
128
0
        UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
129
0
        UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
130
0
        if (newSet == NULL) {
131
0
            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
132
0
            break;
133
0
        }
134
0
        fSets->addElement(newSet, fDeferredStatus);
135
0
        fSets8[i] = other.fSets8[i];
136
0
    }
137
138
    // Copy the named capture group hash map.
139
0
    if (other.fNamedCaptureMap != nullptr && initNamedCaptureMap()) {
140
0
        int32_t hashPos = UHASH_FIRST;
141
0
        while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
142
0
            if (U_FAILURE(fDeferredStatus)) {
143
0
                break;
144
0
            }
145
0
            const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
146
0
            UnicodeString *key = new UnicodeString(*name);
147
0
            int32_t val = hashEl->value.integer;
148
0
            if (key == NULL) {
149
0
                fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
150
0
            } else {
151
0
                uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
152
0
            }
153
0
        }
154
0
    }
155
0
    return *this;
156
0
}
157
158
159
//--------------------------------------------------------------------------
160
//
161
//    init        Shared initialization for use by constructors.
162
//                Bring an uninitialized RegexPattern up to a default state.
163
//
164
//--------------------------------------------------------------------------
165
10.4k
void RegexPattern::init() {
166
10.4k
    fFlags            = 0;
167
10.4k
    fCompiledPat      = 0;
168
10.4k
    fLiteralText.remove();
169
10.4k
    fSets             = NULL;
170
10.4k
    fSets8            = NULL;
171
10.4k
    fDeferredStatus   = U_ZERO_ERROR;
172
10.4k
    fMinMatchLen      = 0;
173
10.4k
    fFrameSize        = 0;
174
10.4k
    fDataSize         = 0;
175
10.4k
    fGroupMap         = NULL;
176
10.4k
    fStartType        = START_NO_INFO;
177
10.4k
    fInitialStringIdx = 0;
178
10.4k
    fInitialStringLen = 0;
179
10.4k
    fInitialChars     = NULL;
180
10.4k
    fInitialChar      = 0;
181
10.4k
    fInitialChars8    = NULL;
182
10.4k
    fNeedsAltInput    = false;
183
10.4k
    fNamedCaptureMap  = NULL;
184
185
10.4k
    fPattern          = NULL; // will be set later
186
10.4k
    fPatternString    = NULL; // may be set later
187
10.4k
    fCompiledPat      = new UVector64(fDeferredStatus);
188
10.4k
    fGroupMap         = new UVector32(fDeferredStatus);
189
10.4k
    fSets             = new UVector(fDeferredStatus);
190
10.4k
    fInitialChars     = new UnicodeSet;
191
10.4k
    fInitialChars8    = new Regex8BitSet;
192
10.4k
    if (U_FAILURE(fDeferredStatus)) {
193
0
        return;
194
0
    }
195
10.4k
    if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
196
10.4k
            fInitialChars == NULL || fInitialChars8 == NULL) {
197
0
        fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
198
0
        return;
199
0
    }
200
201
    // Slot zero of the vector of sets is reserved.  Fill it here.
202
10.4k
    fSets->addElement((int32_t)0, fDeferredStatus);
203
10.4k
}
204
205
206
507
bool RegexPattern::initNamedCaptureMap() {
207
507
    if (fNamedCaptureMap) {
208
319
        return true;
209
319
    }
210
188
    fNamedCaptureMap  = uhash_openSize(uhash_hashUnicodeString,     // Key hash function
211
188
                                       uhash_compareUnicodeString,  // Key comparator function
212
188
                                       uhash_compareLong,           // Value comparator function
213
188
                                       7,                           // Initial table capacity
214
188
                                       &fDeferredStatus);
215
188
    if (U_FAILURE(fDeferredStatus)) {
216
0
        return false;
217
0
    }
218
219
    // fNamedCaptureMap owns its key strings, type (UnicodeString *)
220
188
    uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
221
188
    return true;
222
188
}
223
224
//--------------------------------------------------------------------------
225
//
226
//   zap            Delete everything owned by this RegexPattern.
227
//
228
//--------------------------------------------------------------------------
229
10.4k
void RegexPattern::zap() {
230
10.4k
    delete fCompiledPat;
231
10.4k
    fCompiledPat = NULL;
232
10.4k
    int i;
233
146k
    for (i=1; i<fSets->size(); i++) {
234
135k
        UnicodeSet *s;
235
135k
        s = (UnicodeSet *)fSets->elementAt(i);
236
135k
        if (s != NULL) {
237
135k
            delete s;
238
135k
        }
239
135k
    }
240
10.4k
    delete fSets;
241
10.4k
    fSets = NULL;
242
10.4k
    delete[] fSets8;
243
10.4k
    fSets8 = NULL;
244
10.4k
    delete fGroupMap;
245
10.4k
    fGroupMap = NULL;
246
10.4k
    delete fInitialChars;
247
10.4k
    fInitialChars = NULL;
248
10.4k
    delete fInitialChars8;
249
10.4k
    fInitialChars8 = NULL;
250
10.4k
    if (fPattern != NULL) {
251
10.4k
        utext_close(fPattern);
252
10.4k
        fPattern = NULL;
253
10.4k
    }
254
10.4k
    if (fPatternString != NULL) {
255
0
        delete fPatternString;
256
0
        fPatternString = NULL;
257
0
    }
258
10.4k
    if (fNamedCaptureMap != NULL) {
259
188
        uhash_close(fNamedCaptureMap);
260
188
        fNamedCaptureMap = NULL;
261
188
    }
262
10.4k
}
263
264
265
//--------------------------------------------------------------------------
266
//
267
//   Destructor
268
//
269
//--------------------------------------------------------------------------
270
10.4k
RegexPattern::~RegexPattern() {
271
10.4k
    zap();
272
10.4k
}
273
274
275
//--------------------------------------------------------------------------
276
//
277
//   Clone
278
//
279
//--------------------------------------------------------------------------
280
0
RegexPattern  *RegexPattern::clone() const {
281
0
    RegexPattern  *copy = new RegexPattern(*this);
282
0
    return copy;
283
0
}
284
285
286
//--------------------------------------------------------------------------
287
//
288
//   operator ==   (comparison)    Consider to patterns to be == if the
289
//                                 pattern strings and the flags are the same.
290
//                                 Note that pattern strings with the same
291
//                                 characters can still be considered different.
292
//
293
//--------------------------------------------------------------------------
294
0
bool    RegexPattern::operator ==(const RegexPattern &other) const {
295
0
    if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
296
0
        if (this->fPatternString != NULL && other.fPatternString != NULL) {
297
0
            return *(this->fPatternString) == *(other.fPatternString);
298
0
        } else if (this->fPattern == NULL) {
299
0
            if (other.fPattern == NULL) {
300
0
                return true;
301
0
            }
302
0
        } else if (other.fPattern != NULL) {
303
0
            UTEXT_SETNATIVEINDEX(this->fPattern, 0);
304
0
            UTEXT_SETNATIVEINDEX(other.fPattern, 0);
305
0
            return utext_equals(this->fPattern, other.fPattern);
306
0
        }
307
0
    }
308
0
    return false;
309
0
}
310
311
//---------------------------------------------------------------------
312
//
313
//   compile
314
//
315
//---------------------------------------------------------------------
316
RegexPattern * U_EXPORT2
317
RegexPattern::compile(const UnicodeString &regex,
318
                      uint32_t             flags,
319
                      UParseError          &pe,
320
                      UErrorCode           &status)
321
0
{
322
0
    if (U_FAILURE(status)) {
323
0
        return NULL;
324
0
    }
325
326
0
    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
327
0
    UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
328
0
    UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
329
330
0
    if ((flags & ~allFlags) != 0) {
331
0
        status = U_REGEX_INVALID_FLAG;
332
0
        return NULL;
333
0
    }
334
335
0
    if ((flags & UREGEX_CANON_EQ) != 0) {
336
0
        status = U_REGEX_UNIMPLEMENTED;
337
0
        return NULL;
338
0
    }
339
340
0
    RegexPattern *This = new RegexPattern;
341
0
    if (This == NULL) {
342
0
        status = U_MEMORY_ALLOCATION_ERROR;
343
0
        return NULL;
344
0
    }
345
0
    if (U_FAILURE(This->fDeferredStatus)) {
346
0
        status = This->fDeferredStatus;
347
0
        delete This;
348
0
        return NULL;
349
0
    }
350
0
    This->fFlags = flags;
351
352
0
    RegexCompile     compiler(This, status);
353
0
    compiler.compile(regex, pe, status);
354
355
0
    if (U_FAILURE(status)) {
356
0
        delete This;
357
0
        This = NULL;
358
0
    }
359
360
0
    return This;
361
0
}
362
363
364
//
365
//   compile, UText mode
366
//
367
RegexPattern * U_EXPORT2
368
RegexPattern::compile(UText                *regex,
369
                      uint32_t             flags,
370
                      UParseError          &pe,
371
                      UErrorCode           &status)
372
10.4k
{
373
10.4k
    if (U_FAILURE(status)) {
374
0
        return NULL;
375
0
    }
376
377
10.4k
    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
378
10.4k
                              UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
379
10.4k
                              UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
380
381
10.4k
    if ((flags & ~allFlags) != 0) {
382
0
        status = U_REGEX_INVALID_FLAG;
383
0
        return NULL;
384
0
    }
385
386
10.4k
    if ((flags & UREGEX_CANON_EQ) != 0) {
387
0
        status = U_REGEX_UNIMPLEMENTED;
388
0
        return NULL;
389
0
    }
390
391
10.4k
    RegexPattern *This = new RegexPattern;
392
10.4k
    if (This == NULL) {
393
0
        status = U_MEMORY_ALLOCATION_ERROR;
394
0
        return NULL;
395
0
    }
396
10.4k
    if (U_FAILURE(This->fDeferredStatus)) {
397
0
        status = This->fDeferredStatus;
398
0
        delete This;
399
0
        return NULL;
400
0
    }
401
10.4k
    This->fFlags = flags;
402
403
10.4k
    RegexCompile     compiler(This, status);
404
10.4k
    compiler.compile(regex, pe, status);
405
406
10.4k
    if (U_FAILURE(status)) {
407
6.50k
        delete This;
408
6.50k
        This = NULL;
409
6.50k
    }
410
411
10.4k
    return This;
412
10.4k
}
413
414
//
415
//   compile with default flags.
416
//
417
RegexPattern * U_EXPORT2
418
RegexPattern::compile(const UnicodeString &regex,
419
                      UParseError         &pe,
420
                      UErrorCode          &err)
421
0
{
422
0
    return compile(regex, 0, pe, err);
423
0
}
424
425
426
//
427
//   compile with default flags, UText mode
428
//
429
RegexPattern * U_EXPORT2
430
RegexPattern::compile(UText               *regex,
431
                      UParseError         &pe,
432
                      UErrorCode          &err)
433
0
{
434
0
    return compile(regex, 0, pe, err);
435
0
}
436
437
438
//
439
//   compile with no UParseErr parameter.
440
//
441
RegexPattern * U_EXPORT2
442
RegexPattern::compile(const UnicodeString &regex,
443
                      uint32_t             flags,
444
                      UErrorCode          &err)
445
0
{
446
0
    UParseError pe;
447
0
    return compile(regex, flags, pe, err);
448
0
}
449
450
451
//
452
//   compile with no UParseErr parameter, UText mode
453
//
454
RegexPattern * U_EXPORT2
455
RegexPattern::compile(UText                *regex,
456
                      uint32_t             flags,
457
                      UErrorCode           &err)
458
0
{
459
0
    UParseError pe;
460
0
    return compile(regex, flags, pe, err);
461
0
}
462
463
464
//---------------------------------------------------------------------
465
//
466
//   flags
467
//
468
//---------------------------------------------------------------------
469
0
uint32_t RegexPattern::flags() const {
470
0
    return fFlags;
471
0
}
472
473
474
//---------------------------------------------------------------------
475
//
476
//   matcher(UnicodeString, err)
477
//
478
//---------------------------------------------------------------------
479
RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
480
0
                                    UErrorCode          &status)  const {
481
0
    RegexMatcher    *retMatcher = matcher(status);
482
0
    if (retMatcher != NULL) {
483
0
        retMatcher->fDeferredStatus = status;
484
0
        retMatcher->reset(input);
485
0
    }
486
0
    return retMatcher;
487
0
}
488
489
490
//---------------------------------------------------------------------
491
//
492
//   matcher(status)
493
//
494
//---------------------------------------------------------------------
495
3.94k
RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
496
3.94k
    RegexMatcher    *retMatcher = NULL;
497
498
3.94k
    if (U_FAILURE(status)) {
499
0
        return NULL;
500
0
    }
501
3.94k
    if (U_FAILURE(fDeferredStatus)) {
502
0
        status = fDeferredStatus;
503
0
        return NULL;
504
0
    }
505
506
3.94k
    retMatcher = new RegexMatcher(this);
507
3.94k
    if (retMatcher == NULL) {
508
0
        status = U_MEMORY_ALLOCATION_ERROR;
509
0
        return NULL;
510
0
    }
511
3.94k
    return retMatcher;
512
3.94k
}
513
514
515
516
//---------------------------------------------------------------------
517
//
518
//   matches        Convenience function to test for a match, starting
519
//                  with a pattern string and a data string.
520
//
521
//---------------------------------------------------------------------
522
UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
523
              const UnicodeString   &input,
524
                    UParseError     &pe,
525
0
                    UErrorCode      &status) {
526
527
0
    if (U_FAILURE(status)) {return false;}
528
529
0
    UBool         retVal;
530
0
    RegexPattern *pat     = NULL;
531
0
    RegexMatcher *matcher = NULL;
532
533
0
    pat     = RegexPattern::compile(regex, 0, pe, status);
534
0
    matcher = pat->matcher(input, status);
535
0
    retVal  = matcher->matches(status);
536
537
0
    delete matcher;
538
0
    delete pat;
539
0
    return retVal;
540
0
}
541
542
543
//
544
//   matches, UText mode
545
//
546
UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
547
                    UText           *input,
548
                    UParseError     &pe,
549
0
                    UErrorCode      &status) {
550
551
0
    if (U_FAILURE(status)) {return false;}
552
553
0
    UBool         retVal  = false;
554
0
    RegexPattern *pat     = NULL;
555
0
    RegexMatcher *matcher = NULL;
556
557
0
    pat     = RegexPattern::compile(regex, 0, pe, status);
558
0
    matcher = pat->matcher(status);
559
0
    if (U_SUCCESS(status)) {
560
0
        matcher->reset(input);
561
0
        retVal  = matcher->matches(status);
562
0
    }
563
564
0
    delete matcher;
565
0
    delete pat;
566
0
    return retVal;
567
0
}
568
569
570
571
572
573
//---------------------------------------------------------------------
574
//
575
//   pattern
576
//
577
//---------------------------------------------------------------------
578
0
UnicodeString RegexPattern::pattern() const {
579
0
    if (fPatternString != NULL) {
580
0
        return *fPatternString;
581
0
    } else if (fPattern == NULL) {
582
0
        return UnicodeString();
583
0
    } else {
584
0
        UErrorCode status = U_ZERO_ERROR;
585
0
        int64_t nativeLen = utext_nativeLength(fPattern);
586
0
        int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
587
0
        UnicodeString result;
588
589
0
        status = U_ZERO_ERROR;
590
0
        UChar *resultChars = result.getBuffer(len16);
591
0
        utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
592
0
        result.releaseBuffer(len16);
593
594
0
        return result;
595
0
    }
596
0
}
597
598
599
600
601
//---------------------------------------------------------------------
602
//
603
//   patternText
604
//
605
//---------------------------------------------------------------------
606
0
UText *RegexPattern::patternText(UErrorCode      &status) const {
607
0
    if (U_FAILURE(status)) {return NULL;}
608
0
    status = U_ZERO_ERROR;
609
610
0
    if (fPattern != NULL) {
611
0
        return fPattern;
612
0
    } else {
613
0
        RegexStaticSets::initGlobals(&status);
614
0
        return RegexStaticSets::gStaticSets->fEmptyText;
615
0
    }
616
0
}
617
618
619
//--------------------------------------------------------------------------------
620
//
621
//  groupNumberFromName()
622
//
623
//--------------------------------------------------------------------------------
624
0
int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
625
0
    if (U_FAILURE(status)) {
626
0
        return 0;
627
0
    }
628
629
    // No need to explicitly check for syntactically valid names.
630
    // Invalid ones will never be in the map, and the lookup will fail.
631
632
0
    int32_t number = fNamedCaptureMap ? uhash_geti(fNamedCaptureMap, &groupName) : 0;
633
0
    if (number == 0) {
634
0
        status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
635
0
    }
636
0
    return number;
637
0
}
638
639
0
int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
640
0
    if (U_FAILURE(status)) {
641
0
        return 0;
642
0
    }
643
0
    UnicodeString name(groupName, nameLength, US_INV);
644
0
    return groupNumberFromName(name, status);
645
0
}
646
647
648
//---------------------------------------------------------------------
649
//
650
//   split
651
//
652
//---------------------------------------------------------------------
653
int32_t  RegexPattern::split(const UnicodeString &input,
654
        UnicodeString    dest[],
655
        int32_t          destCapacity,
656
        UErrorCode      &status) const
657
0
{
658
0
    if (U_FAILURE(status)) {
659
0
        return 0;
660
0
    }
661
662
0
    RegexMatcher  m(this);
663
0
    int32_t r = 0;
664
    // Check m's status to make sure all is ok.
665
0
    if (U_SUCCESS(m.fDeferredStatus)) {
666
0
      r = m.split(input, dest, destCapacity, status);
667
0
    }
668
0
    return r;
669
0
}
670
671
//
672
//   split, UText mode
673
//
674
int32_t  RegexPattern::split(UText *input,
675
        UText           *dest[],
676
        int32_t          destCapacity,
677
        UErrorCode      &status) const
678
0
{
679
0
    if (U_FAILURE(status)) {
680
0
        return 0;
681
0
    }
682
683
0
    RegexMatcher  m(this);
684
0
    int32_t r = 0;
685
    // Check m's status to make sure all is ok.
686
0
    if (U_SUCCESS(m.fDeferredStatus)) {
687
0
      r = m.split(input, dest, destCapacity, status);
688
0
    }
689
0
    return r;
690
0
}
691
692
693
//---------------------------------------------------------------------
694
//
695
//   dump    Output the compiled form of the pattern.
696
//           Debugging function only.
697
//
698
//---------------------------------------------------------------------
699
0
void   RegexPattern::dumpOp(int32_t index) const {
700
0
    (void)index;  // Suppress warnings in non-debug build.
701
#if defined(REGEX_DEBUG)
702
    static const char * const opNames[] = {URX_OPCODE_NAMES};
703
    int32_t op          = fCompiledPat->elementAti(index);
704
    int32_t val         = URX_VAL(op);
705
    int32_t type        = URX_TYPE(op);
706
    int32_t pinnedType  = type;
707
    if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
708
        pinnedType = 0;
709
    }
710
711
    printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
712
    switch (type) {
713
    case URX_NOP:
714
    case URX_DOTANY:
715
    case URX_DOTANY_ALL:
716
    case URX_FAIL:
717
    case URX_CARET:
718
    case URX_DOLLAR:
719
    case URX_BACKSLASH_G:
720
    case URX_BACKSLASH_X:
721
    case URX_END:
722
    case URX_DOLLAR_M:
723
    case URX_CARET_M:
724
        // Types with no operand field of interest.
725
        break;
726
727
    case URX_RESERVED_OP:
728
    case URX_START_CAPTURE:
729
    case URX_END_CAPTURE:
730
    case URX_STATE_SAVE:
731
    case URX_JMP:
732
    case URX_JMP_SAV:
733
    case URX_JMP_SAV_X:
734
    case URX_BACKSLASH_B:
735
    case URX_BACKSLASH_BU:
736
    case URX_BACKSLASH_D:
737
    case URX_BACKSLASH_Z:
738
    case URX_STRING_LEN:
739
    case URX_CTR_INIT:
740
    case URX_CTR_INIT_NG:
741
    case URX_CTR_LOOP:
742
    case URX_CTR_LOOP_NG:
743
    case URX_RELOC_OPRND:
744
    case URX_STO_SP:
745
    case URX_LD_SP:
746
    case URX_BACKREF:
747
    case URX_STO_INP_LOC:
748
    case URX_JMPX:
749
    case URX_LA_START:
750
    case URX_LA_END:
751
    case URX_BACKREF_I:
752
    case URX_LB_START:
753
    case URX_LB_CONT:
754
    case URX_LB_END:
755
    case URX_LBN_CONT:
756
    case URX_LBN_END:
757
    case URX_LOOP_C:
758
    case URX_LOOP_DOT_I:
759
    case URX_BACKSLASH_H:
760
    case URX_BACKSLASH_R:
761
    case URX_BACKSLASH_V:
762
        // types with an integer operand field.
763
        printf("%d", val);
764
        break;
765
766
    case URX_ONECHAR:
767
    case URX_ONECHAR_I:
768
        if (val < 0x20) {
769
            printf("%#x", val);
770
        } else {
771
            printf("'%s'", CStr(UnicodeString(val))());
772
        }
773
        break;
774
775
    case URX_STRING:
776
    case URX_STRING_I:
777
        {
778
            int32_t lengthOp       = fCompiledPat->elementAti(index+1);
779
            U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
780
            int32_t length = URX_VAL(lengthOp);
781
            UnicodeString str(fLiteralText, val, length);
782
            printf("%s", CStr(str)());
783
        }
784
        break;
785
786
    case URX_SETREF:
787
    case URX_LOOP_SR_I:
788
        {
789
            UnicodeString s;
790
            UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
791
            set->toPattern(s, true);
792
            printf("%s", CStr(s)());
793
        }
794
        break;
795
796
    case URX_STATIC_SETREF:
797
    case URX_STAT_SETREF_N:
798
        {
799
            UnicodeString s;
800
            if (val & URX_NEG_SET) {
801
                printf("NOT ");
802
                val &= ~URX_NEG_SET;
803
            }
804
            UnicodeSet &set = RegexStaticSets::gStaticSets->fPropSets[val];
805
            set.toPattern(s, true);
806
            printf("%s", CStr(s)());
807
        }
808
        break;
809
810
811
    default:
812
        printf("??????");
813
        break;
814
    }
815
    printf("\n");
816
#endif
817
0
}
818
819
820
0
void RegexPattern::dumpPattern() const {
821
#if defined(REGEX_DEBUG)
822
    int      index;
823
824
    UnicodeString patStr;
825
    for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
826
        patStr.append(c);
827
    }
828
    printf("Original Pattern:  \"%s\"\n", CStr(patStr)());
829
    printf("   Min Match Length:  %d\n", fMinMatchLen);
830
    printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
831
    if (fStartType == START_STRING) {
832
        UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
833
        printf("   Initial match string: \"%s\"\n", CStr(initialString)());
834
    } else if (fStartType == START_SET) {
835
        UnicodeString s;
836
        fInitialChars->toPattern(s, true);
837
        printf("    Match First Chars: %s\n", CStr(s)());
838
839
    } else if (fStartType == START_CHAR) {
840
        printf("    First char of Match: ");
841
        if (fInitialChar > 0x20) {
842
                printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
843
            } else {
844
                printf("%#x\n", fInitialChar);
845
            }
846
    }
847
848
    printf("Named Capture Groups:\n");
849
    if (!fNamedCaptureMap || uhash_count(fNamedCaptureMap) == 0) {
850
        printf("   None\n");
851
    } else {
852
        int32_t pos = UHASH_FIRST;
853
        const UHashElement *el = NULL;
854
        while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
855
            const UnicodeString *name = (const UnicodeString *)el->key.pointer;
856
            int32_t number = el->value.integer;
857
            printf("   %d\t%s\n", number, CStr(*name)());
858
        }
859
    }
860
861
    printf("\nIndex   Binary     Type             Operand\n" \
862
           "-------------------------------------------\n");
863
    for (index = 0; index<fCompiledPat->size(); index++) {
864
        dumpOp(index);
865
    }
866
    printf("\n\n");
867
#endif
868
0
}
869
870
871
872
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
873
874
U_NAMESPACE_END
875
#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS