/src/icu/source/common/unicode/brkiter.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | ********************************************************************************  | 
5  |  | *   Copyright (C) 1997-2016, International Business Machines  | 
6  |  | *   Corporation and others.  All Rights Reserved.  | 
7  |  | ********************************************************************************  | 
8  |  | *  | 
9  |  | * File brkiter.h  | 
10  |  | *  | 
11  |  | * Modification History:  | 
12  |  | *  | 
13  |  | *   Date        Name        Description  | 
14  |  | *   02/18/97    aliu        Added typedef for TextCount.  Made DONE const.  | 
15  |  | *   05/07/97    aliu        Fixed DLL declaration.  | 
16  |  | *   07/09/97    jfitz       Renamed BreakIterator and interface synced with JDK  | 
17  |  | *   08/11/98    helena      Sync-up JDK1.2.  | 
18  |  | *   01/13/2000  helena      Added UErrorCode parameter to createXXXInstance methods.  | 
19  |  | ********************************************************************************  | 
20  |  | */  | 
21  |  |  | 
22  |  | #ifndef BRKITER_H  | 
23  |  | #define BRKITER_H  | 
24  |  |  | 
25  |  | #include "unicode/utypes.h"  | 
26  |  |  | 
27  |  | /**  | 
28  |  |  * \file  | 
29  |  |  * \brief C++ API: Break Iterator.  | 
30  |  |  */  | 
31  |  |  | 
32  |  | #include "unicode/utypes.h"  | 
33  |  |  | 
34  |  | #if U_SHOW_CPLUSPLUS_API  | 
35  |  |  | 
36  |  | #if UCONFIG_NO_BREAK_ITERATION  | 
37  |  |  | 
38  |  | U_NAMESPACE_BEGIN  | 
39  |  |  | 
40  |  | /*  | 
41  |  |  * Allow the declaration of APIs with pointers to BreakIterator  | 
42  |  |  * even when break iteration is removed from the build.  | 
43  |  |  */  | 
44  |  | class BreakIterator;  | 
45  |  |  | 
46  |  | U_NAMESPACE_END  | 
47  |  |  | 
48  |  | #else  | 
49  |  |  | 
50  |  | #include "unicode/uobject.h"  | 
51  |  | #include "unicode/unistr.h"  | 
52  |  | #include "unicode/chariter.h"  | 
53  |  | #include "unicode/locid.h"  | 
54  |  | #include "unicode/ubrk.h"  | 
55  |  | #include "unicode/strenum.h"  | 
56  |  | #include "unicode/utext.h"  | 
57  |  | #include "unicode/umisc.h"  | 
58  |  |  | 
59  |  | U_NAMESPACE_BEGIN  | 
60  |  |  | 
61  |  | /**  | 
62  |  |  * The BreakIterator class implements methods for finding the location  | 
63  |  |  * of boundaries in text. BreakIterator is an abstract base class.  | 
64  |  |  * Instances of BreakIterator maintain a current position and scan over  | 
65  |  |  * text returning the index of characters where boundaries occur.  | 
66  |  |  * <p>  | 
67  |  |  * Line boundary analysis determines where a text string can be broken  | 
68  |  |  * when line-wrapping. The mechanism correctly handles punctuation and  | 
69  |  |  * hyphenated words.  | 
70  |  |  * <p>  | 
71  |  |  * Sentence boundary analysis allows selection with correct  | 
72  |  |  * interpretation of periods within numbers and abbreviations, and  | 
73  |  |  * trailing punctuation marks such as quotation marks and parentheses.  | 
74  |  |  * <p>  | 
75  |  |  * Word boundary analysis is used by search and replace functions, as  | 
76  |  |  * well as within text editing applications that allow the user to  | 
77  |  |  * select words with a double click. Word selection provides correct  | 
78  |  |  * interpretation of punctuation marks within and following  | 
79  |  |  * words. Characters that are not part of a word, such as symbols or  | 
80  |  |  * punctuation marks, have word-breaks on both sides.  | 
81  |  |  * <p>  | 
82  |  |  * Character boundary analysis allows users to interact with  | 
83  |  |  * characters as they expect to, for example, when moving the cursor  | 
84  |  |  * through a text string. Character boundary analysis provides correct  | 
85  |  |  * navigation of through character strings, regardless of how the  | 
86  |  |  * character is stored.  For example, an accented character might be  | 
87  |  |  * stored as a base character and a diacritical mark. What users  | 
88  |  |  * consider to be a character can differ between languages.  | 
89  |  |  * <p>  | 
90  |  |  * The text boundary positions are found according to the rules  | 
91  |  |  * described in Unicode Standard Annex #29, Text Boundaries, and  | 
92  |  |  * Unicode Standard Annex #14, Line Breaking Properties.  These  | 
93  |  |  * are available at http://www.unicode.org/reports/tr14/ and  | 
94  |  |  * http://www.unicode.org/reports/tr29/.  | 
95  |  |  * <p>  | 
96  |  |  * In addition to the C++ API defined in this header file, a  | 
97  |  |  * plain C API with equivalent functionality is defined in the  | 
98  |  |  * file ubrk.h  | 
99  |  |  * <p>  | 
100  |  |  * Code snippets illustrating the use of the Break Iterator APIs  | 
101  |  |  * are available in the ICU User Guide,  | 
102  |  |  * http://icu-project.org/userguide/boundaryAnalysis.html  | 
103  |  |  * and in the sample program icu/source/samples/break/break.cpp  | 
104  |  |  *  | 
105  |  |  */  | 
106  |  | class U_COMMON_API BreakIterator : public UObject { | 
107  |  | public:  | 
108  |  |     /**  | 
109  |  |      *  destructor  | 
110  |  |      *  @stable ICU 2.0  | 
111  |  |      */  | 
112  |  |     virtual ~BreakIterator();  | 
113  |  |  | 
114  |  |     /**  | 
115  |  |      * Return true if another object is semantically equal to this  | 
116  |  |      * one. The other object should be an instance of the same subclass of  | 
117  |  |      * BreakIterator. Objects of different subclasses are considered  | 
118  |  |      * unequal.  | 
119  |  |      * <P>  | 
120  |  |      * Return true if this BreakIterator is at the same position in the  | 
121  |  |      * same text, and is the same class and type (word, line, etc.) of  | 
122  |  |      * BreakIterator, as the argument.  Text is considered the same if  | 
123  |  |      * it contains the same characters, it need not be the same  | 
124  |  |      * object, and styles are not considered.  | 
125  |  |      * @stable ICU 2.0  | 
126  |  |      */  | 
127  |  |     virtual bool operator==(const BreakIterator&) const = 0;  | 
128  |  |  | 
129  |  |     /**  | 
130  |  |      * Returns the complement of the result of operator==  | 
131  |  |      * @param rhs The BreakIterator to be compared for inequality  | 
132  |  |      * @return the complement of the result of operator==  | 
133  |  |      * @stable ICU 2.0  | 
134  |  |      */  | 
135  | 0  |     bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); } | 
136  |  |  | 
137  |  |     /**  | 
138  |  |      * Return a polymorphic copy of this object.  This is an abstract  | 
139  |  |      * method which subclasses implement.  | 
140  |  |      * @stable ICU 2.0  | 
141  |  |      */  | 
142  |  |     virtual BreakIterator* clone() const = 0;  | 
143  |  |  | 
144  |  |     /**  | 
145  |  |      * Return a polymorphic class ID for this object. Different subclasses  | 
146  |  |      * will return distinct unequal values.  | 
147  |  |      * @stable ICU 2.0  | 
148  |  |      */  | 
149  |  |     virtual UClassID getDynamicClassID(void) const = 0;  | 
150  |  |  | 
151  |  |     /**  | 
152  |  |      * Return a CharacterIterator over the text being analyzed.  | 
153  |  |      * @stable ICU 2.0  | 
154  |  |      */  | 
155  |  |     virtual CharacterIterator& getText(void) const = 0;  | 
156  |  |  | 
157  |  |  | 
158  |  |     /**  | 
159  |  |       *  Get a UText for the text being analyzed.  | 
160  |  |       *  The returned UText is a shallow clone of the UText used internally  | 
161  |  |       *  by the break iterator implementation.  It can safely be used to  | 
162  |  |       *  access the text without impacting any break iterator operations,  | 
163  |  |       *  but the underlying text itself must not be altered.  | 
164  |  |       *  | 
165  |  |       * @param fillIn A UText to be filled in.  If NULL, a new UText will be  | 
166  |  |       *           allocated to hold the result.  | 
167  |  |       * @param status receives any error codes.  | 
168  |  |       * @return   The current UText for this break iterator.  If an input  | 
169  |  |       *           UText was provided, it will always be returned.  | 
170  |  |       * @stable ICU 3.4  | 
171  |  |       */  | 
172  |  |      virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;  | 
173  |  |  | 
174  |  |     /**  | 
175  |  |      * Change the text over which this operates. The text boundary is  | 
176  |  |      * reset to the start.  | 
177  |  |      *  | 
178  |  |      * The BreakIterator will retain a reference to the supplied string.  | 
179  |  |      * The caller must not modify or delete the text while the BreakIterator  | 
180  |  |      * retains the reference.  | 
181  |  |      *  | 
182  |  |      * @param text The UnicodeString used to change the text.  | 
183  |  |      * @stable ICU 2.0  | 
184  |  |      */  | 
185  |  |     virtual void  setText(const UnicodeString &text) = 0;  | 
186  |  |  | 
187  |  |     /**  | 
188  |  |      * Reset the break iterator to operate over the text represented by  | 
189  |  |      * the UText.  The iterator position is reset to the start.  | 
190  |  |      *  | 
191  |  |      * This function makes a shallow clone of the supplied UText.  This means  | 
192  |  |      * that the caller is free to immediately close or otherwise reuse the  | 
193  |  |      * Utext that was passed as a parameter, but that the underlying text itself  | 
194  |  |      * must not be altered while being referenced by the break iterator.  | 
195  |  |      *  | 
196  |  |      * All index positions returned by break iterator functions are  | 
197  |  |      * native indices from the UText. For example, when breaking UTF-8  | 
198  |  |      * encoded text, the break positions returned by next(), previous(), etc.  | 
199  |  |      * will be UTF-8 string indices, not UTF-16 positions.  | 
200  |  |      *  | 
201  |  |      * @param text The UText used to change the text.  | 
202  |  |      * @param status receives any error codes.  | 
203  |  |      * @stable ICU 3.4  | 
204  |  |      */  | 
205  |  |     virtual void  setText(UText *text, UErrorCode &status) = 0;  | 
206  |  |  | 
207  |  |     /**  | 
208  |  |      * Change the text over which this operates. The text boundary is  | 
209  |  |      * reset to the start.  | 
210  |  |      * Note that setText(UText *) provides similar functionality to this function,  | 
211  |  |      * and is more efficient.  | 
212  |  |      * @param it The CharacterIterator used to change the text.  | 
213  |  |      * @stable ICU 2.0  | 
214  |  |      */  | 
215  |  |     virtual void  adoptText(CharacterIterator* it) = 0;  | 
216  |  |  | 
217  |  |     enum { | 
218  |  |         /**  | 
219  |  |          * DONE is returned by previous() and next() after all valid  | 
220  |  |          * boundaries have been returned.  | 
221  |  |          * @stable ICU 2.0  | 
222  |  |          */  | 
223  |  |         DONE = (int32_t)-1  | 
224  |  |     };  | 
225  |  |  | 
226  |  |     /**  | 
227  |  |      * Sets the current iteration position to the beginning of the text, position zero.  | 
228  |  |      * @return The offset of the beginning of the text, zero.  | 
229  |  |      * @stable ICU 2.0  | 
230  |  |      */  | 
231  |  |     virtual int32_t first(void) = 0;  | 
232  |  |  | 
233  |  |     /**  | 
234  |  |      * Set the iterator position to the index immediately BEYOND the last character in the text being scanned.  | 
235  |  |      * @return The index immediately BEYOND the last character in the text being scanned.  | 
236  |  |      * @stable ICU 2.0  | 
237  |  |      */  | 
238  |  |     virtual int32_t last(void) = 0;  | 
239  |  |  | 
240  |  |     /**  | 
241  |  |      * Set the iterator position to the boundary preceding the current boundary.  | 
242  |  |      * @return The character index of the previous text boundary or DONE if all  | 
243  |  |      * boundaries have been returned.  | 
244  |  |      * @stable ICU 2.0  | 
245  |  |      */  | 
246  |  |     virtual int32_t previous(void) = 0;  | 
247  |  |  | 
248  |  |     /**  | 
249  |  |      * Advance the iterator to the boundary following the current boundary.  | 
250  |  |      * @return The character index of the next text boundary or DONE if all  | 
251  |  |      * boundaries have been returned.  | 
252  |  |      * @stable ICU 2.0  | 
253  |  |      */  | 
254  |  |     virtual int32_t next(void) = 0;  | 
255  |  |  | 
256  |  |     /**  | 
257  |  |      * Return character index of the current iterator position within the text.  | 
258  |  |      * @return The boundary most recently returned.  | 
259  |  |      * @stable ICU 2.0  | 
260  |  |      */  | 
261  |  |     virtual int32_t current(void) const = 0;  | 
262  |  |  | 
263  |  |     /**  | 
264  |  |      * Advance the iterator to the first boundary following the specified offset.  | 
265  |  |      * The value returned is always greater than the offset or  | 
266  |  |      * the value BreakIterator.DONE  | 
267  |  |      * @param offset the offset to begin scanning.  | 
268  |  |      * @return The first boundary after the specified offset.  | 
269  |  |      * @stable ICU 2.0  | 
270  |  |      */  | 
271  |  |     virtual int32_t following(int32_t offset) = 0;  | 
272  |  |  | 
273  |  |     /**  | 
274  |  |      * Set the iterator position to the first boundary preceding the specified offset.  | 
275  |  |      * The value returned is always smaller than the offset or  | 
276  |  |      * the value BreakIterator.DONE  | 
277  |  |      * @param offset the offset to begin scanning.  | 
278  |  |      * @return The first boundary before the specified offset.  | 
279  |  |      * @stable ICU 2.0  | 
280  |  |      */  | 
281  |  |     virtual int32_t preceding(int32_t offset) = 0;  | 
282  |  |  | 
283  |  |     /**  | 
284  |  |      * Return true if the specified position is a boundary position.  | 
285  |  |      * As a side effect, the current position of the iterator is set  | 
286  |  |      * to the first boundary position at or following the specified offset.  | 
287  |  |      * @param offset the offset to check.  | 
288  |  |      * @return True if "offset" is a boundary position.  | 
289  |  |      * @stable ICU 2.0  | 
290  |  |      */  | 
291  |  |     virtual UBool isBoundary(int32_t offset) = 0;  | 
292  |  |  | 
293  |  |     /**  | 
294  |  |      * Set the iterator position to the nth boundary from the current boundary  | 
295  |  |      * @param n the number of boundaries to move by.  A value of 0  | 
296  |  |      * does nothing.  Negative values move to previous boundaries  | 
297  |  |      * and positive values move to later boundaries.  | 
298  |  |      * @return The new iterator position, or  | 
299  |  |      * DONE if there are fewer than |n| boundaries in the specified direction.  | 
300  |  |      * @stable ICU 2.0  | 
301  |  |      */  | 
302  |  |     virtual int32_t next(int32_t n) = 0;  | 
303  |  |  | 
304  |  |    /**  | 
305  |  |      * For RuleBasedBreakIterators, return the status tag from the break rule  | 
306  |  |      * that determined the boundary at the current iteration position.  | 
307  |  |      * <p>  | 
308  |  |      * For break iterator types that do not support a rule status,  | 
309  |  |      * a default value of 0 is returned.  | 
310  |  |      * <p>  | 
311  |  |      * @return the status from the break rule that determined the boundary at  | 
312  |  |      *         the current iteration position.  | 
313  |  |      * @see RuleBaseBreakIterator::getRuleStatus()  | 
314  |  |      * @see UWordBreak  | 
315  |  |      * @stable ICU 52  | 
316  |  |      */  | 
317  |  |     virtual int32_t getRuleStatus() const;  | 
318  |  |  | 
319  |  |    /**  | 
320  |  |     * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)  | 
321  |  |     * that determined the boundary at the current iteration position.  | 
322  |  |     * <p>  | 
323  |  |     * For break iterator types that do not support rule status,  | 
324  |  |     * no values are returned.  | 
325  |  |     * <p>  | 
326  |  |     * The returned status value(s) are stored into an array provided by the caller.  | 
327  |  |     * The values are stored in sorted (ascending) order.  | 
328  |  |     * If the capacity of the output array is insufficient to hold the data,  | 
329  |  |     *  the output will be truncated to the available length, and a  | 
330  |  |     *  U_BUFFER_OVERFLOW_ERROR will be signaled.  | 
331  |  |     * <p>  | 
332  |  |     * @see RuleBaseBreakIterator::getRuleStatusVec  | 
333  |  |     *  | 
334  |  |     * @param fillInVec an array to be filled in with the status values.  | 
335  |  |     * @param capacity  the length of the supplied vector.  A length of zero causes  | 
336  |  |     *                  the function to return the number of status values, in the  | 
337  |  |     *                  normal way, without attempting to store any values.  | 
338  |  |     * @param status    receives error codes.  | 
339  |  |     * @return          The number of rule status values from rules that determined  | 
340  |  |     *                  the boundary at the current iteration position.  | 
341  |  |     *                  In the event of a U_BUFFER_OVERFLOW_ERROR, the return value  | 
342  |  |     *                  is the total number of status values that were available,  | 
343  |  |     *                  not the reduced number that were actually returned.  | 
344  |  |     * @see getRuleStatus  | 
345  |  |     * @stable ICU 52  | 
346  |  |     */  | 
347  |  |     virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);  | 
348  |  |  | 
349  |  |     /**  | 
350  |  |      * Create BreakIterator for word-breaks using the given locale.  | 
351  |  |      * Returns an instance of a BreakIterator implementing word breaks.  | 
352  |  |      * WordBreak is useful for word selection (ex. double click)  | 
353  |  |      * @param where the locale.  | 
354  |  |      * @param status the error code  | 
355  |  |      * @return A BreakIterator for word-breaks.  The UErrorCode& status  | 
356  |  |      * parameter is used to return status information to the user.  | 
357  |  |      * To check whether the construction succeeded or not, you should check  | 
358  |  |      * the value of U_SUCCESS(err).  If you wish more detailed information, you  | 
359  |  |      * can check for informational error results which still indicate success.  | 
360  |  |      * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For  | 
361  |  |      * example, 'de_CH' was requested, but nothing was found there, so 'de' was  | 
362  |  |      * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was  | 
363  |  |      * used; neither the requested locale nor any of its fall back locales  | 
364  |  |      * could be found.  | 
365  |  |      * The caller owns the returned object and is responsible for deleting it.  | 
366  |  |      * @stable ICU 2.0  | 
367  |  |      */  | 
368  |  |     static BreakIterator* U_EXPORT2  | 
369  |  |     createWordInstance(const Locale& where, UErrorCode& status);  | 
370  |  |  | 
371  |  |     /**  | 
372  |  |      * Create BreakIterator for line-breaks using specified locale.  | 
373  |  |      * Returns an instance of a BreakIterator implementing line breaks. Line  | 
374  |  |      * breaks are logically possible line breaks, actual line breaks are  | 
375  |  |      * usually determined based on display width.  | 
376  |  |      * LineBreak is useful for word wrapping text.  | 
377  |  |      * @param where the locale.  | 
378  |  |      * @param status The error code.  | 
379  |  |      * @return A BreakIterator for line-breaks.  The UErrorCode& status  | 
380  |  |      * parameter is used to return status information to the user.  | 
381  |  |      * To check whether the construction succeeded or not, you should check  | 
382  |  |      * the value of U_SUCCESS(err).  If you wish more detailed information, you  | 
383  |  |      * can check for informational error results which still indicate success.  | 
384  |  |      * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For  | 
385  |  |      * example, 'de_CH' was requested, but nothing was found there, so 'de' was  | 
386  |  |      * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was  | 
387  |  |      * used; neither the requested locale nor any of its fall back locales  | 
388  |  |      * could be found.  | 
389  |  |      * The caller owns the returned object and is responsible for deleting it.  | 
390  |  |      * @stable ICU 2.0  | 
391  |  |      */  | 
392  |  |     static BreakIterator* U_EXPORT2  | 
393  |  |     createLineInstance(const Locale& where, UErrorCode& status);  | 
394  |  |  | 
395  |  |     /**  | 
396  |  |      * Create BreakIterator for character-breaks using specified locale  | 
397  |  |      * Returns an instance of a BreakIterator implementing character breaks.  | 
398  |  |      * Character breaks are boundaries of combining character sequences.  | 
399  |  |      * @param where the locale.  | 
400  |  |      * @param status The error code.  | 
401  |  |      * @return A BreakIterator for character-breaks.  The UErrorCode& status  | 
402  |  |      * parameter is used to return status information to the user.  | 
403  |  |      * To check whether the construction succeeded or not, you should check  | 
404  |  |      * the value of U_SUCCESS(err).  If you wish more detailed information, you  | 
405  |  |      * can check for informational error results which still indicate success.  | 
406  |  |      * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For  | 
407  |  |      * example, 'de_CH' was requested, but nothing was found there, so 'de' was  | 
408  |  |      * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was  | 
409  |  |      * used; neither the requested locale nor any of its fall back locales  | 
410  |  |      * could be found.  | 
411  |  |      * The caller owns the returned object and is responsible for deleting it.  | 
412  |  |      * @stable ICU 2.0  | 
413  |  |      */  | 
414  |  |     static BreakIterator* U_EXPORT2  | 
415  |  |     createCharacterInstance(const Locale& where, UErrorCode& status);  | 
416  |  |  | 
417  |  |     /**  | 
418  |  |      * Create BreakIterator for sentence-breaks using specified locale  | 
419  |  |      * Returns an instance of a BreakIterator implementing sentence breaks.  | 
420  |  |      * @param where the locale.  | 
421  |  |      * @param status The error code.  | 
422  |  |      * @return A BreakIterator for sentence-breaks.  The UErrorCode& status  | 
423  |  |      * parameter is used to return status information to the user.  | 
424  |  |      * To check whether the construction succeeded or not, you should check  | 
425  |  |      * the value of U_SUCCESS(err).  If you wish more detailed information, you  | 
426  |  |      * can check for informational error results which still indicate success.  | 
427  |  |      * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For  | 
428  |  |      * example, 'de_CH' was requested, but nothing was found there, so 'de' was  | 
429  |  |      * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was  | 
430  |  |      * used; neither the requested locale nor any of its fall back locales  | 
431  |  |      * could be found.  | 
432  |  |      * The caller owns the returned object and is responsible for deleting it.  | 
433  |  |      * @stable ICU 2.0  | 
434  |  |      */  | 
435  |  |     static BreakIterator* U_EXPORT2  | 
436  |  |     createSentenceInstance(const Locale& where, UErrorCode& status);  | 
437  |  |  | 
438  |  | #ifndef U_HIDE_DEPRECATED_API  | 
439  |  |     /**  | 
440  |  |      * Create BreakIterator for title-casing breaks using the specified locale  | 
441  |  |      * Returns an instance of a BreakIterator implementing title breaks.  | 
442  |  |      * The iterator returned locates title boundaries as described for  | 
443  |  |      * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,  | 
444  |  |      * please use a word boundary iterator. See {@link #createWordInstance }. | 
445  |  |      *  | 
446  |  |      * @param where the locale.  | 
447  |  |      * @param status The error code.  | 
448  |  |      * @return A BreakIterator for title-breaks.  The UErrorCode& status  | 
449  |  |      * parameter is used to return status information to the user.  | 
450  |  |      * To check whether the construction succeeded or not, you should check  | 
451  |  |      * the value of U_SUCCESS(err).  If you wish more detailed information, you  | 
452  |  |      * can check for informational error results which still indicate success.  | 
453  |  |      * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For  | 
454  |  |      * example, 'de_CH' was requested, but nothing was found there, so 'de' was  | 
455  |  |      * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was  | 
456  |  |      * used; neither the requested locale nor any of its fall back locales  | 
457  |  |      * could be found.  | 
458  |  |      * The caller owns the returned object and is responsible for deleting it.  | 
459  |  |      * @deprecated ICU 64 Use createWordInstance instead.  | 
460  |  |      */  | 
461  |  |     static BreakIterator* U_EXPORT2  | 
462  |  |     createTitleInstance(const Locale& where, UErrorCode& status);  | 
463  |  | #endif /* U_HIDE_DEPRECATED_API */  | 
464  |  |  | 
465  |  |     /**  | 
466  |  |      * Get the set of Locales for which TextBoundaries are installed.  | 
467  |  |      * <p><b>Note:</b> this will not return locales added through the register  | 
468  |  |      * call. To see the registered locales too, use the getAvailableLocales  | 
469  |  |      * function that returns a StringEnumeration object </p>  | 
470  |  |      * @param count the output parameter of number of elements in the locale list  | 
471  |  |      * @return available locales  | 
472  |  |      * @stable ICU 2.0  | 
473  |  |      */  | 
474  |  |     static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);  | 
475  |  |  | 
476  |  |     /**  | 
477  |  |      * Get name of the object for the desired Locale, in the desired language.  | 
478  |  |      * @param objectLocale must be from getAvailableLocales.  | 
479  |  |      * @param displayLocale specifies the desired locale for output.  | 
480  |  |      * @param name the fill-in parameter of the return value  | 
481  |  |      * Uses best match.  | 
482  |  |      * @return user-displayable name  | 
483  |  |      * @stable ICU 2.0  | 
484  |  |      */  | 
485  |  |     static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,  | 
486  |  |                                          const Locale& displayLocale,  | 
487  |  |                                          UnicodeString& name);  | 
488  |  |  | 
489  |  |     /**  | 
490  |  |      * Get name of the object for the desired Locale, in the language of the  | 
491  |  |      * default locale.  | 
492  |  |      * @param objectLocale must be from getMatchingLocales  | 
493  |  |      * @param name the fill-in parameter of the return value  | 
494  |  |      * @return user-displayable name  | 
495  |  |      * @stable ICU 2.0  | 
496  |  |      */  | 
497  |  |     static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,  | 
498  |  |                                          UnicodeString& name);  | 
499  |  |  | 
500  |  | #ifndef U_FORCE_HIDE_DEPRECATED_API  | 
501  |  |     /**  | 
502  |  |      * Deprecated functionality. Use clone() instead.  | 
503  |  |      *  | 
504  |  |      * Thread safe client-buffer-based cloning operation  | 
505  |  |      *    Do NOT call delete on a safeclone, since 'new' is not used to create it.  | 
506  |  |      * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.  | 
507  |  |      * If buffer is not large enough, new memory will be allocated.  | 
508  |  |      * @param BufferSize reference to size of allocated space.  | 
509  |  |      * If BufferSize == 0, a sufficient size for use in cloning will  | 
510  |  |      * be returned ('pre-flighting') | 
511  |  |      * If BufferSize is not enough for a stack-based safe clone,  | 
512  |  |      * new memory will be allocated.  | 
513  |  |      * @param status to indicate whether the operation went on smoothly or there were errors  | 
514  |  |      *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were  | 
515  |  |      *  necessary.  | 
516  |  |      * @return pointer to the new clone  | 
517  |  |      *  | 
518  |  |      * @deprecated ICU 52. Use clone() instead.  | 
519  |  |      */  | 
520  |  |     virtual BreakIterator *  createBufferClone(void *stackBuffer,  | 
521  |  |                                                int32_t &BufferSize,  | 
522  |  |                                                UErrorCode &status) = 0;  | 
523  |  | #endif  // U_FORCE_HIDE_DEPRECATED_API  | 
524  |  |  | 
525  |  | #ifndef U_HIDE_DEPRECATED_API  | 
526  |  |  | 
527  |  |     /**  | 
528  |  |      *   Determine whether the BreakIterator was created in user memory by  | 
529  |  |      *   createBufferClone(), and thus should not be deleted.  Such objects  | 
530  |  |      *   must be closed by an explicit call to the destructor (not delete).  | 
531  |  |      * @deprecated ICU 52. Always delete the BreakIterator.  | 
532  |  |      */  | 
533  |  |     inline UBool isBufferClone(void);  | 
534  |  |  | 
535  |  | #endif /* U_HIDE_DEPRECATED_API */  | 
536  |  |  | 
537  |  | #if !UCONFIG_NO_SERVICE  | 
538  |  |     /**  | 
539  |  |      * Register a new break iterator of the indicated kind, to use in the given locale.  | 
540  |  |      * The break iterator will be adopted.  Clones of the iterator will be returned  | 
541  |  |      * if a request for a break iterator of the given kind matches or falls back to  | 
542  |  |      * this locale.  | 
543  |  |      * Because ICU may choose to cache BreakIterators internally, this must  | 
544  |  |      * be called at application startup, prior to any calls to  | 
545  |  |      * BreakIterator::createXXXInstance to avoid undefined behavior.  | 
546  |  |      * @param toAdopt the BreakIterator instance to be adopted  | 
547  |  |      * @param locale the Locale for which this instance is to be registered  | 
548  |  |      * @param kind the type of iterator for which this instance is to be registered  | 
549  |  |      * @param status the in/out status code, no special meanings are assigned  | 
550  |  |      * @return a registry key that can be used to unregister this instance  | 
551  |  |      * @stable ICU 2.4  | 
552  |  |      */  | 
553  |  |     static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,  | 
554  |  |                                         const Locale& locale,  | 
555  |  |                                         UBreakIteratorType kind,  | 
556  |  |                                         UErrorCode& status);  | 
557  |  |  | 
558  |  |     /**  | 
559  |  |      * Unregister a previously-registered BreakIterator using the key returned from the  | 
560  |  |      * register call.  Key becomes invalid after a successful call and should not be used again.  | 
561  |  |      * The BreakIterator corresponding to the key will be deleted.  | 
562  |  |      * Because ICU may choose to cache BreakIterators internally, this should  | 
563  |  |      * be called during application shutdown, after all calls to  | 
564  |  |      * BreakIterator::createXXXInstance to avoid undefined behavior.  | 
565  |  |      * @param key the registry key returned by a previous call to registerInstance  | 
566  |  |      * @param status the in/out status code, no special meanings are assigned  | 
567  |  |      * @return true if the iterator for the key was successfully unregistered  | 
568  |  |      * @stable ICU 2.4  | 
569  |  |      */  | 
570  |  |     static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);  | 
571  |  |  | 
572  |  |     /**  | 
573  |  |      * Return a StringEnumeration over the locales available at the time of the call,  | 
574  |  |      * including registered locales.  | 
575  |  |      * @return a StringEnumeration over the locales available at the time of the call  | 
576  |  |      * @stable ICU 2.4  | 
577  |  |      */  | 
578  |  |     static StringEnumeration* U_EXPORT2 getAvailableLocales(void);  | 
579  |  | #endif  | 
580  |  |  | 
581  |  |     /**  | 
582  |  |      * Returns the locale for this break iterator. Two flavors are available: valid and  | 
583  |  |      * actual locale.  | 
584  |  |      * @stable ICU 2.8  | 
585  |  |      */  | 
586  |  |     Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;  | 
587  |  |  | 
588  |  | #ifndef U_HIDE_INTERNAL_API  | 
589  |  |     /** Get the locale for this break iterator object. You can choose between valid and actual locale.  | 
590  |  |      *  @param type type of the locale we're looking for (valid or actual)  | 
591  |  |      *  @param status error code for the operation  | 
592  |  |      *  @return the locale  | 
593  |  |      *  @internal  | 
594  |  |      */  | 
595  |  |     const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;  | 
596  |  | #endif  /* U_HIDE_INTERNAL_API */  | 
597  |  |  | 
598  |  |     /**  | 
599  |  |      *  Set the subject text string upon which the break iterator is operating  | 
600  |  |      *  without changing any other aspect of the matching state.  | 
601  |  |      *  The new and previous text strings must have the same content.  | 
602  |  |      *  | 
603  |  |      *  This function is intended for use in environments where ICU is operating on  | 
604  |  |      *  strings that may move around in memory.  It provides a mechanism for notifying  | 
605  |  |      *  ICU that the string has been relocated, and providing a new UText to access the  | 
606  |  |      *  string in its new position.  | 
607  |  |      *  | 
608  |  |      *  Note that the break iterator implementation never copies the underlying text  | 
609  |  |      *  of a string being processed, but always operates directly on the original text  | 
610  |  |      *  provided by the user. Refreshing simply drops the references to the old text  | 
611  |  |      *  and replaces them with references to the new.  | 
612  |  |      *  | 
613  |  |      *  Caution:  this function is normally used only by very specialized,  | 
614  |  |      *  system-level code.  One example use case is with garbage collection that moves  | 
615  |  |      *  the text in memory.  | 
616  |  |      *  | 
617  |  |      * @param input      The new (moved) text string.  | 
618  |  |      * @param status     Receives errors detected by this function.  | 
619  |  |      * @return           *this  | 
620  |  |      *  | 
621  |  |      * @stable ICU 49  | 
622  |  |      */  | 
623  |  |     virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;  | 
624  |  |  | 
625  |  |  private:  | 
626  |  |     static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);  | 
627  |  |     static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);  | 
628  |  |     static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);  | 
629  |  |  | 
630  |  |     friend class ICUBreakIteratorFactory;  | 
631  |  |     friend class ICUBreakIteratorService;  | 
632  |  |  | 
633  |  | protected:  | 
634  |  |     // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API  | 
635  |  |     // or else the compiler will create a public ones.  | 
636  |  |     /** @internal */  | 
637  |  |     BreakIterator();  | 
638  |  |     /** @internal */  | 
639  |  |     BreakIterator (const BreakIterator &other);  | 
640  |  | #ifndef U_HIDE_INTERNAL_API  | 
641  |  |     /** @internal */  | 
642  |  |     BreakIterator (const Locale& valid, const Locale &actual);  | 
643  |  |     /** @internal. Assignment Operator, used by RuleBasedBreakIterator. */  | 
644  |  |     BreakIterator &operator = (const BreakIterator &other);  | 
645  |  | #endif  /* U_HIDE_INTERNAL_API */  | 
646  |  |  | 
647  |  | private:  | 
648  |  |  | 
649  |  |     /** @internal (private) */  | 
650  |  |     char actualLocale[ULOC_FULLNAME_CAPACITY];  | 
651  |  |     char validLocale[ULOC_FULLNAME_CAPACITY];  | 
652  |  | };  | 
653  |  |  | 
654  |  | #ifndef U_HIDE_DEPRECATED_API  | 
655  |  |  | 
656  |  | inline UBool BreakIterator::isBufferClone()  | 
657  | 0  | { | 
658  | 0  |     return false;  | 
659  | 0  | }  | 
660  |  |  | 
661  |  | #endif /* U_HIDE_DEPRECATED_API */  | 
662  |  |  | 
663  |  | U_NAMESPACE_END  | 
664  |  |  | 
665  |  | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */  | 
666  |  |  | 
667  |  | #endif /* U_SHOW_CPLUSPLUS_API */  | 
668  |  |  | 
669  |  | #endif // BRKITER_H  | 
670  |  | //eof  |