Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/i18n/stsearch.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 2001-2014 IBM and others. All rights reserved.
6
**********************************************************************
7
*   Date        Name        Description
8
*  03/22/2000   helena      Creation.
9
**********************************************************************
10
*/
11
12
#include "unicode/utypes.h"
13
14
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
15
16
#include "unicode/stsearch.h"
17
#include "usrchimp.h"
18
#include "cmemory.h"
19
20
U_NAMESPACE_BEGIN
21
22
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
23
24
// public constructors and destructors -----------------------------------
25
26
StringSearch::StringSearch(const UnicodeString &pattern,
27
                           const UnicodeString &text,
28
                           const Locale        &locale,
29
                                 BreakIterator *breakiter,
30
                                 UErrorCode    &status) :
31
                           SearchIterator(text, breakiter),
32
                           m_pattern_(pattern)
33
0
{
34
0
    if (U_FAILURE(status)) {
35
0
        m_strsrch_ = NULL;
36
0
        return;
37
0
    }
38
0
39
0
    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
40
0
                              m_text_.getBuffer(), m_text_.length(),
41
0
                              locale.getName(), (UBreakIterator *)breakiter,
42
0
                              &status);
43
0
    uprv_free(m_search_);
44
0
    m_search_ = NULL;
45
0
46
0
    if (U_SUCCESS(status)) {
47
0
        // m_search_ has been created by the base SearchIterator class
48
0
        m_search_        = m_strsrch_->search;
49
0
    }
50
0
}
51
52
StringSearch::StringSearch(const UnicodeString     &pattern,
53
                           const UnicodeString     &text,
54
                                 RuleBasedCollator *coll,
55
                                 BreakIterator     *breakiter,
56
                                 UErrorCode        &status) :
57
                           SearchIterator(text, breakiter),
58
                           m_pattern_(pattern)
59
0
{
60
0
    if (U_FAILURE(status)) {
61
0
        m_strsrch_ = NULL;
62
0
        return;
63
0
    }
64
0
    if (coll == NULL) {
65
0
        status     = U_ILLEGAL_ARGUMENT_ERROR;
66
0
        m_strsrch_ = NULL;
67
0
        return;
68
0
    }
69
0
    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
70
0
                                          m_pattern_.length(),
71
0
                                          m_text_.getBuffer(),
72
0
                                          m_text_.length(), coll->toUCollator(),
73
0
                                          (UBreakIterator *)breakiter,
74
0
                                          &status);
75
0
    uprv_free(m_search_);
76
0
    m_search_ = NULL;
77
0
78
0
    if (U_SUCCESS(status)) {
79
0
        // m_search_ has been created by the base SearchIterator class
80
0
        m_search_ = m_strsrch_->search;
81
0
    }
82
0
}
83
84
StringSearch::StringSearch(const UnicodeString     &pattern,
85
                                 CharacterIterator &text,
86
                           const Locale            &locale,
87
                                 BreakIterator     *breakiter,
88
                                 UErrorCode        &status) :
89
                           SearchIterator(text, breakiter),
90
                           m_pattern_(pattern)
91
0
{
92
0
    if (U_FAILURE(status)) {
93
0
        m_strsrch_ = NULL;
94
0
        return;
95
0
    }
96
0
    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
97
0
                              m_text_.getBuffer(), m_text_.length(),
98
0
                              locale.getName(), (UBreakIterator *)breakiter,
99
0
                              &status);
100
0
    uprv_free(m_search_);
101
0
    m_search_ = NULL;
102
0
103
0
    if (U_SUCCESS(status)) {
104
0
        // m_search_ has been created by the base SearchIterator class
105
0
        m_search_ = m_strsrch_->search;
106
0
    }
107
0
}
108
109
StringSearch::StringSearch(const UnicodeString     &pattern,
110
                                 CharacterIterator &text,
111
                                 RuleBasedCollator *coll,
112
                                 BreakIterator     *breakiter,
113
                                 UErrorCode        &status) :
114
                           SearchIterator(text, breakiter),
115
                           m_pattern_(pattern)
116
0
{
117
0
    if (U_FAILURE(status)) {
118
0
        m_strsrch_ = NULL;
119
0
        return;
120
0
    }
121
0
    if (coll == NULL) {
122
0
        status     = U_ILLEGAL_ARGUMENT_ERROR;
123
0
        m_strsrch_ = NULL;
124
0
        return;
125
0
    }
126
0
    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
127
0
                                          m_pattern_.length(),
128
0
                                          m_text_.getBuffer(),
129
0
                                          m_text_.length(), coll->toUCollator(),
130
0
                                          (UBreakIterator *)breakiter,
131
0
                                          &status);
132
0
    uprv_free(m_search_);
133
0
    m_search_ = NULL;
134
0
135
0
    if (U_SUCCESS(status)) {
136
0
        // m_search_ has been created by the base SearchIterator class
137
0
        m_search_ = m_strsrch_->search;
138
0
    }
139
0
}
140
141
StringSearch::StringSearch(const StringSearch &that) :
142
                       SearchIterator(that.m_text_, that.m_breakiterator_),
143
                       m_pattern_(that.m_pattern_)
144
0
{
145
0
    UErrorCode status = U_ZERO_ERROR;
146
0
147
0
    // Free m_search_ from the superclass
148
0
    uprv_free(m_search_);
149
0
    m_search_ = NULL;
150
0
151
0
    if (that.m_strsrch_ == NULL) {
152
0
        // This was not a good copy
153
0
        m_strsrch_ = NULL;
154
0
    }
155
0
    else {
156
0
        // Make a deep copy
157
0
        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
158
0
                                              m_pattern_.length(),
159
0
                                              m_text_.getBuffer(),
160
0
                                              m_text_.length(),
161
0
                                              that.m_strsrch_->collator,
162
0
                                             (UBreakIterator *)that.m_breakiterator_,
163
0
                                              &status);
164
0
        if (U_SUCCESS(status)) {
165
0
            // m_search_ has been created by the base SearchIterator class
166
0
            m_search_        = m_strsrch_->search;
167
0
        }
168
0
    }
169
0
}
170
171
StringSearch::~StringSearch()
172
0
{
173
0
    if (m_strsrch_ != NULL) {
174
0
        usearch_close(m_strsrch_);
175
0
        m_search_ = NULL;
176
0
    }
177
0
}
178
179
StringSearch *
180
0
StringSearch::clone() const {
181
0
    return new StringSearch(*this);
182
0
}
183
184
// operator overloading ---------------------------------------------
185
StringSearch & StringSearch::operator=(const StringSearch &that)
186
0
{
187
0
    if ((*this) != that) {
188
0
        UErrorCode status = U_ZERO_ERROR;
189
0
        m_text_          = that.m_text_;
190
0
        m_breakiterator_ = that.m_breakiterator_;
191
0
        m_pattern_       = that.m_pattern_;
192
0
        // all m_search_ in the parent class is linked up with m_strsrch_
193
0
        usearch_close(m_strsrch_);
194
0
        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
195
0
                                              m_pattern_.length(),
196
0
                                              m_text_.getBuffer(),
197
0
                                              m_text_.length(),
198
0
                                              that.m_strsrch_->collator,
199
0
                                              NULL, &status);
200
0
        // Check null pointer
201
0
        if (m_strsrch_ != NULL) {
202
0
            m_search_ = m_strsrch_->search;
203
0
        }
204
0
    }
205
0
    return *this;
206
0
}
207
208
UBool StringSearch::operator==(const SearchIterator &that) const
209
0
{
210
0
    if (this == &that) {
211
0
        return TRUE;
212
0
    }
213
0
    if (SearchIterator::operator ==(that)) {
214
0
        StringSearch &thatsrch = (StringSearch &)that;
215
0
        return (this->m_pattern_ == thatsrch.m_pattern_ &&
216
0
                this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
217
0
    }
218
0
    return FALSE;
219
0
}
220
221
// public get and set methods ----------------------------------------
222
223
void StringSearch::setOffset(int32_t position, UErrorCode &status)
224
0
{
225
0
    // status checked in usearch_setOffset
226
0
    usearch_setOffset(m_strsrch_, position, &status);
227
0
}
228
229
int32_t StringSearch::getOffset(void) const
230
0
{
231
0
    return usearch_getOffset(m_strsrch_);
232
0
}
233
234
void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
235
0
{
236
0
    if (U_SUCCESS(status)) {
237
0
        m_text_ = text;
238
0
        usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
239
0
    }
240
0
}
241
242
void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
243
0
{
244
0
    if (U_SUCCESS(status)) {
245
0
        text.getText(m_text_);
246
0
        usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
247
0
    }
248
0
}
249
250
RuleBasedCollator * StringSearch::getCollator() const
251
0
{
252
0
    // Note the const_cast. It would be cleaner if this const method returned a const collator.
253
0
    return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
254
0
}
255
256
void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
257
0
{
258
0
    if (U_SUCCESS(status)) {
259
0
        usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
260
0
    }
261
0
}
262
263
void StringSearch::setPattern(const UnicodeString &pattern,
264
                                    UErrorCode    &status)
265
0
{
266
0
    if (U_SUCCESS(status)) {
267
0
        m_pattern_ = pattern;
268
0
        usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
269
0
                           &status);
270
0
    }
271
0
}
272
273
const UnicodeString & StringSearch::getPattern() const
274
0
{
275
0
    return m_pattern_;
276
0
}
277
278
// public methods ----------------------------------------------------
279
280
void StringSearch::reset()
281
0
{
282
0
    usearch_reset(m_strsrch_);
283
0
}
284
285
SearchIterator * StringSearch::safeClone(void) const
286
0
{
287
0
    UErrorCode status = U_ZERO_ERROR;
288
0
    StringSearch *result = new StringSearch(m_pattern_, m_text_,
289
0
                                            getCollator(),
290
0
                                            m_breakiterator_,
291
0
                                            status);
292
0
    /* test for NULL */
293
0
    if (result == 0) {
294
0
        status = U_MEMORY_ALLOCATION_ERROR;
295
0
        return 0;
296
0
    }
297
0
    result->setOffset(getOffset(), status);
298
0
    result->setMatchStart(m_strsrch_->search->matchedIndex);
299
0
    result->setMatchLength(m_strsrch_->search->matchedLength);
300
0
    if (U_FAILURE(status)) {
301
0
        return NULL;
302
0
    }
303
0
    return result;
304
0
}
305
306
// protected method -------------------------------------------------
307
308
int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
309
0
{
310
0
    // values passed here are already in the pre-shift position
311
0
    if (U_SUCCESS(status)) {
312
0
        if (m_strsrch_->pattern.cesLength == 0) {
313
0
            m_search_->matchedIndex =
314
0
                                    m_search_->matchedIndex == USEARCH_DONE ?
315
0
                                    getOffset() : m_search_->matchedIndex + 1;
316
0
            m_search_->matchedLength = 0;
317
0
            ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
318
0
                           &status);
319
0
            if (m_search_->matchedIndex == m_search_->textLength) {
320
0
                m_search_->matchedIndex = USEARCH_DONE;
321
0
            }
322
0
        }
323
0
        else {
324
0
            // looking at usearch.cpp, this part is shifted out to
325
0
            // StringSearch instead of SearchIterator because m_strsrch_ is
326
0
            // not accessible in SearchIterator
327
#if 0
328
            if (position + m_strsrch_->pattern.defaultShiftSize
329
                > m_search_->textLength) {
330
                setMatchNotFound();
331
                return USEARCH_DONE;
332
            }
333
#endif
334
0
            if (m_search_->matchedLength <= 0) {
335
0
                // the flipping direction issue has already been handled
336
0
                // in next()
337
0
                // for boundary check purposes. this will ensure that the
338
0
                // next match will not preceed the current offset
339
0
                // note search->matchedIndex will always be set to something
340
0
                // in the code
341
0
                m_search_->matchedIndex = position - 1;
342
0
            }
343
0
344
0
            ucol_setOffset(m_strsrch_->textIter, position, &status);
345
0
            
346
#if 0
347
            for (;;) {
348
                if (m_search_->isCanonicalMatch) {
349
                    // can't use exact here since extra accents are allowed.
350
                    usearch_handleNextCanonical(m_strsrch_, &status);
351
                }
352
                else {
353
                    usearch_handleNextExact(m_strsrch_, &status);
354
                }
355
                if (U_FAILURE(status)) {
356
                    return USEARCH_DONE;
357
                }
358
                if (m_breakiterator_ == NULL
359
#if !UCONFIG_NO_BREAK_ITERATION
360
                    ||
361
                    m_search_->matchedIndex == USEARCH_DONE ||
362
                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
363
                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
364
                                                  m_search_->matchedLength))
365
#endif
366
                ) {
367
                    if (m_search_->matchedIndex == USEARCH_DONE) {
368
                        ucol_setOffset(m_strsrch_->textIter,
369
                                       m_search_->textLength, &status);
370
                    }
371
                    else {
372
                        ucol_setOffset(m_strsrch_->textIter,
373
                                       m_search_->matchedIndex, &status);
374
                    }
375
                    return m_search_->matchedIndex;
376
                }
377
            }
378
#else
379
            // if m_strsrch_->breakIter is always the same as m_breakiterator_
380
0
            // then we don't need to check the match boundaries here because
381
0
            // usearch_handleNextXXX will already have done it.
382
0
            if (m_search_->isCanonicalMatch) {
383
0
              // *could* actually use exact here 'cause no extra accents allowed...
384
0
              usearch_handleNextCanonical(m_strsrch_, &status);
385
0
            } else {
386
0
              usearch_handleNextExact(m_strsrch_, &status);
387
0
            }
388
0
            
389
0
            if (U_FAILURE(status)) {
390
0
              return USEARCH_DONE;
391
0
            }
392
0
            
393
0
            if (m_search_->matchedIndex == USEARCH_DONE) {
394
0
              ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
395
0
            } else {
396
0
              ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
397
0
            }
398
0
            
399
0
            return m_search_->matchedIndex;
400
0
#endif
401
0
        }
402
0
    }
403
0
    return USEARCH_DONE;
404
0
}
405
406
int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
407
0
{
408
0
    // values passed here are already in the pre-shift position
409
0
    if (U_SUCCESS(status)) {
410
0
        if (m_strsrch_->pattern.cesLength == 0) {
411
0
            m_search_->matchedIndex =
412
0
                  (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
413
0
                   m_search_->matchedIndex);
414
0
            if (m_search_->matchedIndex == 0) {
415
0
                setMatchNotFound();
416
0
            }
417
0
            else {
418
0
                m_search_->matchedIndex --;
419
0
                ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
420
0
                               &status);
421
0
                m_search_->matchedLength = 0;
422
0
            }
423
0
        }
424
0
        else {
425
0
            // looking at usearch.cpp, this part is shifted out to
426
0
            // StringSearch instead of SearchIterator because m_strsrch_ is
427
0
            // not accessible in SearchIterator
428
#if 0
429
            if (!m_search_->isOverlap &&
430
                position - m_strsrch_->pattern.defaultShiftSize < 0) {
431
                setMatchNotFound();
432
                return USEARCH_DONE;
433
            }
434
            
435
            for (;;) {
436
                if (m_search_->isCanonicalMatch) {
437
                    // can't use exact here since extra accents are allowed.
438
                    usearch_handlePreviousCanonical(m_strsrch_, &status);
439
                }
440
                else {
441
                    usearch_handlePreviousExact(m_strsrch_, &status);
442
                }
443
                if (U_FAILURE(status)) {
444
                    return USEARCH_DONE;
445
                }
446
                if (m_breakiterator_ == NULL
447
#if !UCONFIG_NO_BREAK_ITERATION
448
                    ||
449
                    m_search_->matchedIndex == USEARCH_DONE ||
450
                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
451
                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
452
                                                  m_search_->matchedLength))
453
#endif
454
                ) {
455
                    return m_search_->matchedIndex;
456
                }
457
            }
458
#else
459
0
            ucol_setOffset(m_strsrch_->textIter, position, &status);
460
0
            
461
0
            if (m_search_->isCanonicalMatch) {
462
0
              // *could* use exact match here since extra accents *not* allowed!
463
0
              usearch_handlePreviousCanonical(m_strsrch_, &status);
464
0
            } else {
465
0
              usearch_handlePreviousExact(m_strsrch_, &status);
466
0
            }
467
0
            
468
0
            if (U_FAILURE(status)) {
469
0
              return USEARCH_DONE;
470
0
            }
471
0
            
472
0
            return m_search_->matchedIndex;
473
0
#endif
474
0
        }
475
0
476
0
        return m_search_->matchedIndex;
477
0
    }
478
0
    return USEARCH_DONE;
479
0
}
480
481
U_NAMESPACE_END
482
483
#endif /* #if !UCONFIG_NO_COLLATION */