Coverage Report

Created: 2025-06-24 06:54

/src/icu/icu4c/source/i18n/uitercollationiterator.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2012-2016, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* uitercollationiterator.h
9
*
10
* created on: 2012sep23 (from utf16collationiterator.h)
11
* created by: Markus W. Scherer
12
*/
13
14
#ifndef __UITERCOLLATIONITERATOR_H__
15
#define __UITERCOLLATIONITERATOR_H__
16
17
#include "unicode/utypes.h"
18
19
#if !UCONFIG_NO_COLLATION
20
21
#include "unicode/uiter.h"
22
#include "cmemory.h"
23
#include "collation.h"
24
#include "collationdata.h"
25
#include "collationiterator.h"
26
#include "normalizer2impl.h"
27
28
U_NAMESPACE_BEGIN
29
30
/**
31
 * UCharIterator-based collation element and character iterator.
32
 * Handles normalized text inline, with length or NUL-terminated.
33
 * Unnormalized text is handled by a subclass.
34
 */
35
class U_I18N_API UIterCollationIterator : public CollationIterator {
36
public:
37
    UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
38
0
            : CollationIterator(d, numeric), iter(ui) {}
39
40
    virtual ~UIterCollationIterator();
41
42
    virtual void resetToOffset(int32_t newOffset) override;
43
44
    virtual int32_t getOffset() const override;
45
46
    virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
47
48
    virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
49
50
protected:
51
    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
52
53
    virtual char16_t handleGetTrailSurrogate() override;
54
55
    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
56
57
    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
58
59
    UCharIterator &iter;
60
};
61
62
/**
63
 * Incrementally checks the input text for FCD and normalizes where necessary.
64
 */
65
class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
66
public:
67
    FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
68
0
            : UIterCollationIterator(data, numeric, ui),
69
0
              state(ITER_CHECK_FWD), start(startIndex),
70
0
              nfcImpl(data->nfcImpl) {}
71
72
    virtual ~FCDUIterCollationIterator();
73
74
    virtual void resetToOffset(int32_t newOffset) override;
75
76
    virtual int32_t getOffset() const override;
77
78
    virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
79
80
    virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
81
82
protected:
83
    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
84
85
    virtual char16_t handleGetTrailSurrogate() override;
86
87
88
    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
89
90
    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
91
92
private:
93
    /**
94
     * Switches to forward checking if possible.
95
     */
96
    void switchToForward();
97
98
    /**
99
     * Extends the FCD text segment forward or normalizes around pos.
100
     * @return true if success
101
     */
102
    UBool nextSegment(UErrorCode &errorCode);
103
104
    /**
105
     * Switches to backward checking.
106
     */
107
    void switchToBackward();
108
109
    /**
110
     * Extends the FCD text segment backward or normalizes around pos.
111
     * @return true if success
112
     */
113
    UBool previousSegment(UErrorCode &errorCode);
114
115
    UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
116
117
    enum State {
118
        /**
119
         * The input text [start..(iter index)[ passes the FCD check.
120
         * Moving forward checks incrementally.
121
         * pos & limit are undefined.
122
         */
123
        ITER_CHECK_FWD,
124
        /**
125
         * The input text [(iter index)..limit[ passes the FCD check.
126
         * Moving backward checks incrementally.
127
         * start & pos are undefined.
128
         */
129
        ITER_CHECK_BWD,
130
        /**
131
         * The input text [start..limit[ passes the FCD check.
132
         * pos tracks the current text index.
133
         */
134
        ITER_IN_FCD_SEGMENT,
135
        /**
136
         * The input text [start..limit[ failed the FCD check and was normalized.
137
         * pos tracks the current index in the normalized string.
138
         * The text iterator is at the limit index.
139
         */
140
        IN_NORM_ITER_AT_LIMIT,
141
        /**
142
         * The input text [start..limit[ failed the FCD check and was normalized.
143
         * pos tracks the current index in the normalized string.
144
         * The text iterator is at the start index.
145
         */
146
        IN_NORM_ITER_AT_START
147
    };
148
149
    State state;
150
151
    int32_t start;
152
    int32_t pos;
153
    int32_t limit;
154
155
    const Normalizer2Impl &nfcImpl;
156
    UnicodeString normalized;
157
};
158
159
U_NAMESPACE_END
160
161
#endif  // !UCONFIG_NO_COLLATION
162
#endif  // __UITERCOLLATIONITERATOR_H__