/src/skia/third_party/externals/icu/source/common/ubrk.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************** |
5 | | * Copyright (C) 1996-2015, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ******************************************************************************** |
8 | | */ |
9 | | |
10 | | #include "unicode/utypes.h" |
11 | | |
12 | | #if !UCONFIG_NO_BREAK_ITERATION |
13 | | |
14 | | #include "unicode/ubrk.h" |
15 | | |
16 | | #include "unicode/brkiter.h" |
17 | | #include "unicode/uloc.h" |
18 | | #include "unicode/ustring.h" |
19 | | #include "unicode/uchriter.h" |
20 | | #include "unicode/rbbi.h" |
21 | | #include "rbbirb.h" |
22 | | #include "uassert.h" |
23 | | #include "cmemory.h" |
24 | | |
25 | | U_NAMESPACE_USE |
26 | | |
27 | | //------------------------------------------------------------------------------ |
28 | | // |
29 | | // ubrk_open Create a canned type of break iterator based on type (word, line, etc.) |
30 | | // and locale. |
31 | | // |
32 | | //------------------------------------------------------------------------------ |
33 | | U_CAPI UBreakIterator* U_EXPORT2 |
34 | | ubrk_open(UBreakIteratorType type, |
35 | | const char *locale, |
36 | | const UChar *text, |
37 | | int32_t textLength, |
38 | | UErrorCode *status) |
39 | 39.5k | { |
40 | | |
41 | 39.5k | if(U_FAILURE(*status)) return 0; |
42 | | |
43 | 39.5k | BreakIterator *result = 0; |
44 | | |
45 | 39.5k | switch(type) { |
46 | | |
47 | 19.7k | case UBRK_CHARACTER: |
48 | 19.7k | result = BreakIterator::createCharacterInstance(Locale(locale), *status); |
49 | 19.7k | break; |
50 | | |
51 | 0 | case UBRK_WORD: |
52 | 0 | result = BreakIterator::createWordInstance(Locale(locale), *status); |
53 | 0 | break; |
54 | | |
55 | 19.7k | case UBRK_LINE: |
56 | 19.7k | result = BreakIterator::createLineInstance(Locale(locale), *status); |
57 | 19.7k | break; |
58 | | |
59 | 0 | case UBRK_SENTENCE: |
60 | 0 | result = BreakIterator::createSentenceInstance(Locale(locale), *status); |
61 | 0 | break; |
62 | | |
63 | 0 | case UBRK_TITLE: |
64 | 0 | result = BreakIterator::createTitleInstance(Locale(locale), *status); |
65 | 0 | break; |
66 | | |
67 | 0 | default: |
68 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
69 | 39.5k | } |
70 | | |
71 | | // check for allocation error |
72 | 39.5k | if (U_FAILURE(*status)) { |
73 | 0 | return 0; |
74 | 0 | } |
75 | 39.5k | if(result == 0) { |
76 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
77 | 0 | return 0; |
78 | 0 | } |
79 | | |
80 | | |
81 | 39.5k | UBreakIterator *uBI = (UBreakIterator *)result; |
82 | 39.5k | if (text != NULL) { |
83 | 0 | ubrk_setText(uBI, text, textLength, status); |
84 | 0 | } |
85 | 39.5k | return uBI; |
86 | 39.5k | } |
87 | | |
88 | | |
89 | | |
90 | | //------------------------------------------------------------------------------ |
91 | | // |
92 | | // ubrk_openRules open a break iterator from a set of break rules. |
93 | | // Invokes the rule builder. |
94 | | // |
95 | | //------------------------------------------------------------------------------ |
96 | | U_CAPI UBreakIterator* U_EXPORT2 |
97 | | ubrk_openRules( const UChar *rules, |
98 | | int32_t rulesLength, |
99 | | const UChar *text, |
100 | | int32_t textLength, |
101 | | UParseError *parseErr, |
102 | 0 | UErrorCode *status) { |
103 | |
|
104 | 0 | if (status == NULL || U_FAILURE(*status)){ |
105 | 0 | return 0; |
106 | 0 | } |
107 | | |
108 | 0 | BreakIterator *result = 0; |
109 | 0 | UnicodeString ruleString(rules, rulesLength); |
110 | 0 | result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status); |
111 | 0 | if(U_FAILURE(*status)) { |
112 | 0 | return 0; |
113 | 0 | } |
114 | | |
115 | 0 | UBreakIterator *uBI = (UBreakIterator *)result; |
116 | 0 | if (text != NULL) { |
117 | 0 | ubrk_setText(uBI, text, textLength, status); |
118 | 0 | } |
119 | 0 | return uBI; |
120 | 0 | } |
121 | | |
122 | | |
123 | | U_CAPI UBreakIterator* U_EXPORT2 |
124 | | ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, |
125 | | const UChar * text, int32_t textLength, |
126 | | UErrorCode * status) |
127 | 0 | { |
128 | 0 | if (U_FAILURE(*status)) { |
129 | 0 | return NULL; |
130 | 0 | } |
131 | 0 | if (rulesLength < 0) { |
132 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
133 | 0 | return NULL; |
134 | 0 | } |
135 | 0 | LocalPointer<RuleBasedBreakIterator> lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status); |
136 | 0 | if (U_FAILURE(*status)) { |
137 | 0 | return NULL; |
138 | 0 | } |
139 | 0 | UBreakIterator *uBI = reinterpret_cast<UBreakIterator *>(lpRBBI.orphan()); |
140 | 0 | if (text != NULL) { |
141 | 0 | ubrk_setText(uBI, text, textLength, status); |
142 | 0 | } |
143 | 0 | return uBI; |
144 | 0 | } |
145 | | |
146 | | |
147 | | U_CAPI UBreakIterator * U_EXPORT2 |
148 | | ubrk_safeClone( |
149 | | const UBreakIterator *bi, |
150 | | void * /*stackBuffer*/, |
151 | | int32_t *pBufferSize, |
152 | | UErrorCode *status) |
153 | 0 | { |
154 | 0 | if (status == NULL || U_FAILURE(*status)){ |
155 | 0 | return NULL; |
156 | 0 | } |
157 | 0 | if (bi == NULL) { |
158 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
159 | 0 | return NULL; |
160 | 0 | } |
161 | 0 | if (pBufferSize != NULL) { |
162 | 0 | int32_t inputSize = *pBufferSize; |
163 | 0 | *pBufferSize = 1; |
164 | 0 | if (inputSize == 0) { |
165 | 0 | return NULL; // preflighting for deprecated functionality |
166 | 0 | } |
167 | 0 | } |
168 | 0 | BreakIterator *newBI = ((BreakIterator *)bi)->clone(); |
169 | 0 | if (newBI == NULL) { |
170 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
171 | 0 | } else { |
172 | 0 | *status = U_SAFECLONE_ALLOCATED_WARNING; |
173 | 0 | } |
174 | 0 | return (UBreakIterator *)newBI; |
175 | 0 | } |
176 | | |
177 | | U_CAPI UBreakIterator * U_EXPORT2 |
178 | 7.52k | ubrk_clone(const UBreakIterator *bi, UErrorCode *status) { |
179 | 7.52k | if (U_FAILURE(*status)) { |
180 | 0 | return nullptr; |
181 | 0 | } |
182 | 7.52k | BreakIterator *newBI = ((BreakIterator *)bi)->clone(); |
183 | 7.52k | if (newBI == nullptr) { |
184 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
185 | 0 | return nullptr; |
186 | 0 | } |
187 | 7.52k | return (UBreakIterator *)newBI; |
188 | 7.52k | } |
189 | | |
190 | | |
191 | | U_CAPI void U_EXPORT2 |
192 | | ubrk_close(UBreakIterator *bi) |
193 | 47.1k | { |
194 | 47.1k | delete (BreakIterator *)bi; |
195 | 47.1k | } |
196 | | |
197 | | U_CAPI void U_EXPORT2 |
198 | | ubrk_setText(UBreakIterator* bi, |
199 | | const UChar* text, |
200 | | int32_t textLength, |
201 | | UErrorCode* status) |
202 | 0 | { |
203 | 0 | UText ut = UTEXT_INITIALIZER; |
204 | 0 | utext_openUChars(&ut, text, textLength, status); |
205 | 0 | ((BreakIterator*)bi)->setText(&ut, *status); |
206 | | // A stack allocated UText wrapping a UChar * string |
207 | | // can be dumped without explicitly closing it. |
208 | 0 | } |
209 | | |
210 | | |
211 | | |
212 | | U_CAPI void U_EXPORT2 |
213 | | ubrk_setUText(UBreakIterator *bi, |
214 | | UText *text, |
215 | | UErrorCode *status) |
216 | 426k | { |
217 | 426k | ((BreakIterator*)bi)->setText(text, *status); |
218 | 426k | } |
219 | | |
220 | | |
221 | | |
222 | | |
223 | | |
224 | | U_CAPI int32_t U_EXPORT2 |
225 | | ubrk_current(const UBreakIterator *bi) |
226 | 0 | { |
227 | |
|
228 | 0 | return ((BreakIterator*)bi)->current(); |
229 | 0 | } |
230 | | |
231 | | U_CAPI int32_t U_EXPORT2 |
232 | | ubrk_next(UBreakIterator *bi) |
233 | 1.84M | { |
234 | | |
235 | 1.84M | return ((BreakIterator*)bi)->next(); |
236 | 1.84M | } |
237 | | |
238 | | U_CAPI int32_t U_EXPORT2 |
239 | | ubrk_previous(UBreakIterator *bi) |
240 | 0 | { |
241 | |
|
242 | 0 | return ((BreakIterator*)bi)->previous(); |
243 | 0 | } |
244 | | |
245 | | U_CAPI int32_t U_EXPORT2 |
246 | | ubrk_first(UBreakIterator *bi) |
247 | 7.52k | { |
248 | | |
249 | 7.52k | return ((BreakIterator*)bi)->first(); |
250 | 7.52k | } |
251 | | |
252 | | U_CAPI int32_t U_EXPORT2 |
253 | | ubrk_last(UBreakIterator *bi) |
254 | 0 | { |
255 | |
|
256 | 0 | return ((BreakIterator*)bi)->last(); |
257 | 0 | } |
258 | | |
259 | | U_CAPI int32_t U_EXPORT2 |
260 | | ubrk_preceding(UBreakIterator *bi, |
261 | | int32_t offset) |
262 | 0 | { |
263 | |
|
264 | 0 | return ((BreakIterator*)bi)->preceding(offset); |
265 | 0 | } |
266 | | |
267 | | U_CAPI int32_t U_EXPORT2 |
268 | | ubrk_following(UBreakIterator *bi, |
269 | | int32_t offset) |
270 | 0 | { |
271 | |
|
272 | 0 | return ((BreakIterator*)bi)->following(offset); |
273 | 0 | } |
274 | | |
275 | | U_CAPI const char* U_EXPORT2 |
276 | | ubrk_getAvailable(int32_t index) |
277 | 0 | { |
278 | |
|
279 | 0 | return uloc_getAvailable(index); |
280 | 0 | } |
281 | | |
282 | | U_CAPI int32_t U_EXPORT2 |
283 | | ubrk_countAvailable() |
284 | 0 | { |
285 | |
|
286 | 0 | return uloc_countAvailable(); |
287 | 0 | } |
288 | | |
289 | | |
290 | | U_CAPI UBool U_EXPORT2 |
291 | | ubrk_isBoundary(UBreakIterator *bi, int32_t offset) |
292 | 0 | { |
293 | 0 | return ((BreakIterator*)bi)->isBoundary(offset); |
294 | 0 | } |
295 | | |
296 | | |
297 | | U_CAPI int32_t U_EXPORT2 |
298 | | ubrk_getRuleStatus(UBreakIterator *bi) |
299 | 599k | { |
300 | 599k | return ((BreakIterator*)bi)->getRuleStatus(); |
301 | 599k | } |
302 | | |
303 | | U_CAPI int32_t U_EXPORT2 |
304 | | ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status) |
305 | 0 | { |
306 | 0 | return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status); |
307 | 0 | } |
308 | | |
309 | | |
310 | | U_CAPI const char* U_EXPORT2 |
311 | | ubrk_getLocaleByType(const UBreakIterator *bi, |
312 | | ULocDataLocaleType type, |
313 | | UErrorCode* status) |
314 | 0 | { |
315 | 0 | if (bi == NULL) { |
316 | 0 | if (U_SUCCESS(*status)) { |
317 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
318 | 0 | } |
319 | 0 | return NULL; |
320 | 0 | } |
321 | 0 | return ((BreakIterator*)bi)->getLocaleID(type, *status); |
322 | 0 | } |
323 | | |
324 | | |
325 | | U_CAPI void U_EXPORT2 |
326 | | ubrk_refreshUText(UBreakIterator *bi, |
327 | | UText *text, |
328 | | UErrorCode *status) |
329 | 0 | { |
330 | 0 | BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi); |
331 | 0 | bii->refreshInputText(text, *status); |
332 | 0 | } |
333 | | |
334 | | U_CAPI int32_t U_EXPORT2 |
335 | | ubrk_getBinaryRules(UBreakIterator *bi, |
336 | | uint8_t * binaryRules, int32_t rulesCapacity, |
337 | | UErrorCode * status) |
338 | 0 | { |
339 | 0 | if (U_FAILURE(*status)) { |
340 | 0 | return 0; |
341 | 0 | } |
342 | 0 | if ((binaryRules == NULL && rulesCapacity > 0) || rulesCapacity < 0) { |
343 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
344 | 0 | return 0; |
345 | 0 | } |
346 | 0 | RuleBasedBreakIterator* rbbi; |
347 | 0 | if ((rbbi = dynamic_cast<RuleBasedBreakIterator*>(reinterpret_cast<BreakIterator*>(bi))) == NULL) { |
348 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
349 | 0 | return 0; |
350 | 0 | } |
351 | 0 | uint32_t rulesLength; |
352 | 0 | const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength); |
353 | 0 | if (rulesLength > INT32_MAX) { |
354 | 0 | *status = U_INDEX_OUTOFBOUNDS_ERROR; |
355 | 0 | return 0; |
356 | 0 | } |
357 | 0 | if (binaryRules != NULL) { // if not preflighting |
358 | | // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely |
359 | 0 | if ((int32_t)rulesLength > rulesCapacity) { |
360 | 0 | *status = U_BUFFER_OVERFLOW_ERROR; |
361 | 0 | } else { |
362 | 0 | uprv_memcpy(binaryRules, returnedRules, rulesLength); |
363 | 0 | } |
364 | 0 | } |
365 | 0 | return (int32_t)rulesLength; |
366 | 0 | } |
367 | | |
368 | | |
369 | | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |