Coverage Report

Created: 2025-07-11 06:23

/src/icu/source/common/unicode/edits.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// edits.h
5
// created: 2016dec30 Markus W. Scherer
6
7
#ifndef __EDITS_H__
8
#define __EDITS_H__
9
10
#include "unicode/utypes.h"
11
#include "unicode/uobject.h"
12
13
/**
14
 * \file
15
 * \brief C++ API: C++ class Edits for low-level string transformations on styled text.
16
 */
17
18
U_NAMESPACE_BEGIN
19
20
#ifndef U_HIDE_DRAFT_API
21
22
/**
23
 * Records lengths of string edits but not replacement text.
24
 * Supports replacements, insertions, deletions in linear progression.
25
 * Does not support moving/reordering of text.
26
 *
27
 * An Edits object tracks a separate UErrorCode, but ICU string transformation functions
28
 * (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
29
 *
30
 * @draft ICU 59
31
 */
32
class U_COMMON_API Edits U_FINAL : public UMemory {
33
public:
34
    /**
35
     * Constructs an empty object.
36
     * @draft ICU 59
37
     */
38
    Edits() :
39
            array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
40
0
            errorCode(U_ZERO_ERROR) {}
41
    /**
42
     * Destructor.
43
     * @draft ICU 59
44
     */
45
    ~Edits();
46
47
    /**
48
     * Resets the data but may not release memory.
49
     * @draft ICU 59
50
     */
51
    void reset();
52
53
    /**
54
     * Adds a record for an unchanged segment of text.
55
     * Normally called from inside ICU string transformation functions, not user code.
56
     * @draft ICU 59
57
     */
58
    void addUnchanged(int32_t unchangedLength);
59
    /**
60
     * Adds a record for a text replacement/insertion/deletion.
61
     * Normally called from inside ICU string transformation functions, not user code.
62
     * @draft ICU 59
63
     */
64
    void addReplace(int32_t oldLength, int32_t newLength);
65
    /**
66
     * Sets the UErrorCode if an error occurred while recording edits.
67
     * Preserves older error codes in the outErrorCode.
68
     * Normally called from inside ICU string transformation functions, not user code.
69
     * @return TRUE if U_FAILURE(outErrorCode)
70
     * @draft ICU 59
71
     */
72
    UBool copyErrorTo(UErrorCode &outErrorCode);
73
74
    /**
75
     * How much longer is the new text compared with the old text?
76
     * @return new length minus old length
77
     * @draft ICU 59
78
     */
79
0
    int32_t lengthDelta() const { return delta; }
80
    /**
81
     * @return TRUE if there are any change edits
82
     * @draft ICU 59
83
     */
84
    UBool hasChanges() const;
85
86
    /**
87
     * Access to the list of edits.
88
     * @see getCoarseIterator
89
     * @see getFineIterator
90
     * @draft ICU 59
91
     */
92
    struct U_COMMON_API Iterator U_FINAL : public UMemory {
93
        /**
94
         * Copy constructor.
95
         * @draft ICU 59
96
         */
97
        Iterator(const Iterator &other) = default;
98
        /**
99
         * Assignment operator.
100
         * @draft ICU 59
101
         */
102
        Iterator &operator=(const Iterator &other) = default;
103
104
        /**
105
         * Advances to the next edit.
106
         * @return TRUE if there is another edit
107
         * @draft ICU 59
108
         */
109
0
        UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
110
111
        /**
112
         * Finds the edit that contains the source index.
113
         * The source index may be found in a non-change
114
         * even if normal iteration would skip non-changes.
115
         * Normal iteration can continue from a found edit.
116
         *
117
         * The iterator state before this search logically does not matter.
118
         * (It may affect the performance of the search.)
119
         *
120
         * The iterator state after this search is undefined
121
         * if the source index is out of bounds for the source string.
122
         *
123
         * @param i source index
124
         * @return TRUE if the edit for the source index was found
125
         * @draft ICU 59
126
         */
127
        UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
128
129
        /**
130
         * @return TRUE if this edit replaces oldLength() units with newLength() different ones.
131
         *         FALSE if oldLength units remain unchanged.
132
         * @draft ICU 59
133
         */
134
0
        UBool hasChange() const { return changed; }
135
        /**
136
         * @return the number of units in the original string which are replaced or remain unchanged.
137
         * @draft ICU 59
138
         */
139
0
        int32_t oldLength() const { return oldLength_; }
140
        /**
141
         * @return the number of units in the modified string, if hasChange() is TRUE.
142
         *         Same as oldLength if hasChange() is FALSE.
143
         * @draft ICU 59
144
         */
145
0
        int32_t newLength() const { return newLength_; }
146
147
        /**
148
         * @return the current index into the source string
149
         * @draft ICU 59
150
         */
151
0
        int32_t sourceIndex() const { return srcIndex; }
152
        /**
153
         * @return the current index into the replacement-characters-only string,
154
         *         not counting unchanged spans
155
         * @draft ICU 59
156
         */
157
0
        int32_t replacementIndex() const { return replIndex; }
158
        /**
159
         * @return the current index into the full destination string
160
         * @draft ICU 59
161
         */
162
0
        int32_t destinationIndex() const { return destIndex; }
163
164
    private:
165
        friend class Edits;
166
167
        Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
168
169
        int32_t readLength(int32_t head);
170
        void updateIndexes();
171
        UBool noNext();
172
        UBool next(UBool onlyChanges, UErrorCode &errorCode);
173
174
        const uint16_t *array;
175
        int32_t index, length;
176
        int32_t remaining;
177
        UBool onlyChanges_, coarse;
178
179
        UBool changed;
180
        int32_t oldLength_, newLength_;
181
        int32_t srcIndex, replIndex, destIndex;
182
    };
183
184
    /**
185
     * Returns an Iterator for coarse-grained changes for simple string updates.
186
     * Skips non-changes.
187
     * @return an Iterator that merges adjacent changes.
188
     * @draft ICU 59
189
     */
190
0
    Iterator getCoarseChangesIterator() const {
191
0
        return Iterator(array, length, TRUE, TRUE);
192
0
    }
193
194
    /**
195
     * Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
196
     * @return an Iterator that merges adjacent changes.
197
     * @draft ICU 59
198
     */
199
0
    Iterator getCoarseIterator() const {
200
0
        return Iterator(array, length, FALSE, TRUE);
201
0
    }
202
203
    /**
204
     * Returns an Iterator for fine-grained changes for modifying styled text.
205
     * Skips non-changes.
206
     * @return an Iterator that separates adjacent changes.
207
     * @draft ICU 59
208
     */
209
0
    Iterator getFineChangesIterator() const {
210
0
        return Iterator(array, length, TRUE, FALSE);
211
0
    }
212
213
    /**
214
     * Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
215
     * @return an Iterator that separates adjacent changes.
216
     * @draft ICU 59
217
     */
218
0
    Iterator getFineIterator() const {
219
0
        return Iterator(array, length, FALSE, FALSE);
220
0
    }
221
222
private:
223
    Edits(const Edits &) = delete;
224
    Edits &operator=(const Edits &) = delete;
225
226
0
    void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
227
0
    int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
228
229
    void append(int32_t r);
230
    UBool growArray();
231
232
    static const int32_t STACK_CAPACITY = 100;
233
    uint16_t *array;
234
    int32_t capacity;
235
    int32_t length;
236
    int32_t delta;
237
    UErrorCode errorCode;
238
    uint16_t stackArray[STACK_CAPACITY];
239
};
240
241
#endif  // U_HIDE_DRAFT_API
242
243
U_NAMESPACE_END
244
245
#endif  // __EDITS_H__