/src/mozilla-central/intl/icu/source/common/normlzr.cpp

Source (jump to first uncovered line)
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *************************************************************************
 * COPYRIGHT: 
 * Copyright (c) 1996-2012, International Business Machines Corporation and
 * others. All Rights Reserved.
 *************************************************************************
 */

#include "unicode/utypes.h"

#if !UCONFIG_NO_NORMALIZATION

#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/chariter.h"
#include "unicode/schriter.h"
#include "unicode/uchriter.h"
#include "unicode/normlzr.h"
#include "unicode/utf16.h"
#include "cmemory.h"
#include "normalizer2impl.h"
#include "uprops.h"  // for uniset_getUnicode32Instance()

#if defined(_ARM64_) && defined(move32)
 // System can define move32 intrinsics, but the char iters define move32 method
 // using same undef trick in headers, so undef here to re-enable the method.
#undef move32
#endif

U_NAMESPACE_BEGIN

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)

//-------------------------------------------------------------------------
// Constructors and other boilerplate
//-------------------------------------------------------------------------

Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
    text(new StringCharacterIterator(str)),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init();
}

Normalizer::Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode) :
    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
    text(new UCharCharacterIterator(str, length)),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init();
}

Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
    text(iter.clone()),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init();
}

Normalizer::Normalizer(const Normalizer &copy) :
    UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
    text(copy.text->clone()),
    currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
    buffer(copy.buffer), bufferPos(copy.bufferPos)
{
    init();
}

void
Normalizer::init() {
    UErrorCode errorCode=U_ZERO_ERROR;
    fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
    if(fOptions&UNORM_UNICODE_3_2) {
        delete fFilteredNorm2;
        fNorm2=fFilteredNorm2=
            new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode));
    }
    if(U_FAILURE(errorCode)) {
        errorCode=U_ZERO_ERROR;
        fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
    }
}

Normalizer::~Normalizer()
{
    delete fFilteredNorm2;
    delete text;
}

Normalizer* 
Normalizer::clone() const
{
    return new Normalizer(*this);
}

/**
 * Generates a hash code for this iterator.
 */
int32_t Normalizer::hashCode() const
{
    return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
}
    
UBool Normalizer::operator==(const Normalizer& that) const
{
    return
        this==&that ||
        (fUMode==that.fUMode &&
        fOptions==that.fOptions &&
        *text==*that.text &&
        buffer==that.buffer &&
        bufferPos==that.bufferPos &&
        nextIndex==that.nextIndex);
}

//-------------------------------------------------------------------------
// Static utility methods
//-------------------------------------------------------------------------

void U_EXPORT2
Normalizer::normalize(const UnicodeString& source, 
                      UNormalizationMode mode, int32_t options,
                      UnicodeString& result, 
                      UErrorCode &status) {
    if(source.isBogus() || U_FAILURE(status)) {
        result.setToBogus();
        if(U_SUCCESS(status)) {
            status=U_ILLEGAL_ARGUMENT_ERROR;
        }
    } else {
        UnicodeString localDest;
        UnicodeString *dest;

        if(&source!=&result) {
            dest=&result;
        } else {
            // the source and result strings are the same object, use a temporary one
            dest=&localDest;
        }
        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
        if(U_SUCCESS(status)) {
            if(options&UNORM_UNICODE_3_2) {
                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
                    normalize(source, *dest, status);
            } else {
                n2->normalize(source, *dest, status);
            }
        }
        if(dest==&localDest && U_SUCCESS(status)) {
            result=*dest;
        }
    }
}

void U_EXPORT2
Normalizer::compose(const UnicodeString& source, 
                    UBool compat, int32_t options,
                    UnicodeString& result, 
                    UErrorCode &status) {
    normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
}

void U_EXPORT2
Normalizer::decompose(const UnicodeString& source, 
                      UBool compat, int32_t options,
                      UnicodeString& result, 
                      UErrorCode &status) {
    normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
}

UNormalizationCheckResult
Normalizer::quickCheck(const UnicodeString& source,
                       UNormalizationMode mode, int32_t options,
                       UErrorCode &status) {
    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
    if(U_SUCCESS(status)) {
        if(options&UNORM_UNICODE_3_2) {
            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
                quickCheck(source, status);
        } else {
            return n2->quickCheck(source, status);
        }
    } else {
        return UNORM_MAYBE;
    }
}

UBool
Normalizer::isNormalized(const UnicodeString& source,
                         UNormalizationMode mode, int32_t options,
                         UErrorCode &status) {
    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
    if(U_SUCCESS(status)) {
        if(options&UNORM_UNICODE_3_2) {
            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
                isNormalized(source, status);
        } else {
            return n2->isNormalized(source, status);
        }
    } else {
        return FALSE;
    }
}

UnicodeString & U_EXPORT2
Normalizer::concatenate(const UnicodeString &left, const UnicodeString &right,
                        UnicodeString &result,
                        UNormalizationMode mode, int32_t options,
                        UErrorCode &errorCode) {
    if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
        result.setToBogus();
        if(U_SUCCESS(errorCode)) {
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        }
    } else {
        UnicodeString localDest;
        UnicodeString *dest;

        if(&right!=&result) {
            dest=&result;
        } else {
            // the right and result strings are the same object, use a temporary one
            dest=&localDest;
        }
        *dest=left;
        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
        if(U_SUCCESS(errorCode)) {
            if(options&UNORM_UNICODE_3_2) {
                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)).
                    append(*dest, right, errorCode);
            } else {
                n2->append(*dest, right, errorCode);
            }
        }
        if(dest==&localDest && U_SUCCESS(errorCode)) {
            result=*dest;
        }
    }
    return result;
}

//-------------------------------------------------------------------------
// Iteration API
//-------------------------------------------------------------------------

/**
 * Return the current character in the normalized text.
 */
UChar32 Normalizer::current() {
    if(bufferPos<buffer.length() || nextNormalize()) {
        return buffer.char32At(bufferPos);
    } else {
        return DONE;
    }
}

/**
 * Return the next character in the normalized text and advance
 * the iteration position by one.  If the end
 * of the text has already been reached, {@link #DONE} is returned.
 */
UChar32 Normalizer::next() {
    if(bufferPos<buffer.length() ||  nextNormalize()) {
        UChar32 c=buffer.char32At(bufferPos);
        bufferPos+=U16_LENGTH(c);
        return c;
    } else {
        return DONE;
    }
}

/**
 * Return the previous character in the normalized text and decrement
 * the iteration position by one.  If the beginning
 * of the text has already been reached, {@link #DONE} is returned.
 */
UChar32 Normalizer::previous() {
    if(bufferPos>0 || previousNormalize()) {
        UChar32 c=buffer.char32At(bufferPos-1);
        bufferPos-=U16_LENGTH(c);
        return c;
    } else {
        return DONE;
    }
}

void Normalizer::reset() {
    currentIndex=nextIndex=text->setToStart();
    clearBuffer();
}

void
Normalizer::setIndexOnly(int32_t index) {
    text->setIndex(index);  // pins index
    currentIndex=nextIndex=text->getIndex();
    clearBuffer();
}

/**
 * Return the first character in the normalized text.  This resets
 * the <tt>Normalizer's</tt> position to the beginning of the text.
 */
UChar32 Normalizer::first() {
    reset();
    return next();
}

/**
 * Return the last character in the normalized text.  This resets
 * the <tt>Normalizer's</tt> position to be just before the
 * the input text corresponding to that normalized character.
 */
UChar32 Normalizer::last() {
    currentIndex=nextIndex=text->setToEnd();
    clearBuffer();
    return previous();
}

/**
 * Retrieve the current iteration position in the input text that is
 * being normalized.  This method is useful in applications such as
 * searching, where you need to be able to determine the position in
 * the input text that corresponds to a given normalized output character.
 * <p>
 * <b>Note:</b> This method sets the position in the <em>input</em>, while
 * {@link #next} and {@link #previous} iterate through characters in the
 * <em>output</em>.  This means that there is not necessarily a one-to-one
 * correspondence between characters returned by <tt>next</tt> and
 * <tt>previous</tt> and the indices passed to and returned from
 * <tt>setIndex</tt> and {@link #getIndex}.
 *
 */
int32_t Normalizer::getIndex() const {
    if(bufferPos<buffer.length()) {
        return currentIndex;
    } else {
        return nextIndex;
    }
}

/**
 * Retrieve the index of the start of the input text.  This is the begin index
 * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
 * over which this <tt>Normalizer</tt> is iterating
 */
int32_t Normalizer::startIndex() const {
    return text->startIndex();
}

/**
 * Retrieve the index of the end of the input text.  This is the end index
 * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
 * over which this <tt>Normalizer</tt> is iterating
 */
int32_t Normalizer::endIndex() const {
    return text->endIndex();
}

//-------------------------------------------------------------------------
// Property access methods
//-------------------------------------------------------------------------

void
Normalizer::setMode(UNormalizationMode newMode) 
{
    fUMode = newMode;
    init();
}

UNormalizationMode
Normalizer::getUMode() const
{
    return fUMode;
}

void
Normalizer::setOption(int32_t option, 
                      UBool value) 
{
    if (value) {
        fOptions |= option;
    } else {
        fOptions &= (~option);
    }
    init();
}

UBool
Normalizer::getOption(int32_t option) const
{
    return (fOptions & option) != 0;
}

/**
 * Set the input text over which this <tt>Normalizer</tt> will iterate.
 * The iteration position is set to the beginning of the input text.
 */
void
Normalizer::setText(const UnicodeString& newText, 
                    UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = new StringCharacterIterator(newText);
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete text;
    text = newIter;
    reset();
}

/**
 * Set the input text over which this <tt>Normalizer</tt> will iterate.
 * The iteration position is set to the beginning of the string.
 */
void
Normalizer::setText(const CharacterIterator& newText, 
                    UErrorCode &status) 
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = newText.clone();
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete text;
    text = newIter;
    reset();
}

void
Normalizer::setText(ConstChar16Ptr newText,
                    int32_t length,
                    UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete text;
    text = newIter;
    reset();
}

/**
 * Copies the text under iteration into the UnicodeString referred to by "result".
 * @param result Receives a copy of the text under iteration.
 */
void
Normalizer::getText(UnicodeString&  result) 
{
    text->getText(result);
}

//-------------------------------------------------------------------------
// Private utility methods
//-------------------------------------------------------------------------

void Normalizer::clearBuffer() {
    buffer.remove();
    bufferPos=0;
}

UBool
Normalizer::nextNormalize() {
    clearBuffer();
    currentIndex=nextIndex;
    text->setIndex(nextIndex);
    if(!text->hasNext()) {
        return FALSE;
    }
    // Skip at least one character so we make progress.
    UnicodeString segment(text->next32PostInc());
    while(text->hasNext()) {
        UChar32 c;
        if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
            text->move32(-1, CharacterIterator::kCurrent);
            break;
        }
        segment.append(c);
    }
    nextIndex=text->getIndex();
    UErrorCode errorCode=U_ZERO_ERROR;
    fNorm2->normalize(segment, buffer, errorCode);
    return U_SUCCESS(errorCode) && !buffer.isEmpty();
}

UBool
Normalizer::previousNormalize() {
    clearBuffer();
    nextIndex=currentIndex;
    text->setIndex(currentIndex);
    if(!text->hasPrevious()) {
        return FALSE;
    }
    UnicodeString segment;
    while(text->hasPrevious()) {
        UChar32 c=text->previous32();
        segment.insert(0, c);
        if(fNorm2->hasBoundaryBefore(c)) {
            break;
        }
    }
    currentIndex=text->getIndex();
    UErrorCode errorCode=U_ZERO_ERROR;
    fNorm2->normalize(segment, buffer, errorCode);
    bufferPos=buffer.length();
    return U_SUCCESS(errorCode) && !buffer.isEmpty();
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_NORMALIZATION */

Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		// © 2016 and later: Unicode, Inc. and others.
2		// License & terms of use: http://www.unicode.org/copyright.html
3		/*
4		*************************************************************************
5		* COPYRIGHT:
6		* Copyright (c) 1996-2012, International Business Machines Corporation and
7		* others. All Rights Reserved.
8		*************************************************************************
9		*/
10
11		#include "unicode/utypes.h"
12
13		#if !UCONFIG_NO_NORMALIZATION
14
15		#include "unicode/uniset.h"
16		#include "unicode/unistr.h"
17		#include "unicode/chariter.h"
18		#include "unicode/schriter.h"
19		#include "unicode/uchriter.h"
20		#include "unicode/normlzr.h"
21		#include "unicode/utf16.h"
22		#include "cmemory.h"
23		#include "normalizer2impl.h"
24		#include "uprops.h" // for uniset_getUnicode32Instance()
25
26		#if defined(_ARM64_) && defined(move32)
27		// System can define move32 intrinsics, but the char iters define move32 method
28		// using same undef trick in headers, so undef here to re-enable the method.
29		#undef move32
30		#endif
31
32		U_NAMESPACE_BEGIN
33
34		UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
35
36		//-------------------------------------------------------------------------
37		// Constructors and other boilerplate
38		//-------------------------------------------------------------------------
39
40		Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
41		UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
42		text(new StringCharacterIterator(str)),
43		currentIndex(0), nextIndex(0),
44		buffer(), bufferPos(0)
45	0	{
46	0	init();
47	0	}
48
49		Normalizer::Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode) :
50		UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
51		text(new UCharCharacterIterator(str, length)),
52		currentIndex(0), nextIndex(0),
53		buffer(), bufferPos(0)
54	0	{
55	0	init();
56	0	}
57
58		Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
59		UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
60		text(iter.clone()),
61		currentIndex(0), nextIndex(0),
62		buffer(), bufferPos(0)
63	0	{
64	0	init();
65	0	}
66
67		Normalizer::Normalizer(const Normalizer &copy) :
68		UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
69		text(copy.text->clone()),
70		currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
71		buffer(copy.buffer), bufferPos(copy.bufferPos)
72	0	{
73	0	init();
74	0	}
75
76		void
77	0	Normalizer::init() {
78	0	UErrorCode errorCode=U_ZERO_ERROR;
79	0	fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
80	0	if(fOptions&UNORM_UNICODE_3_2) {
81	0	delete fFilteredNorm2;
82	0	fNorm2=fFilteredNorm2=
83	0	new FilteredNormalizer2(fNorm2, uniset_getUnicode32Instance(errorCode));
84	0	}
85	0	if(U_FAILURE(errorCode)) {
86	0	errorCode=U_ZERO_ERROR;
87	0	fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
88	0	}
89	0	}
90
91		Normalizer::~Normalizer()
92	0	{
93	0	delete fFilteredNorm2;
94	0	delete text;
95	0	}
96
97		Normalizer*
98		Normalizer::clone() const
99	0	{
100	0	return new Normalizer(*this);
101	0	}
102
103		/**
104		* Generates a hash code for this iterator.
105		*/
106		int32_t Normalizer::hashCode() const
107	0	{
108	0	return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
109	0	}
110
111		UBool Normalizer::operator==(const Normalizer& that) const
112	0	{
113	0	return
114	0	this==&that \|\|
115	0	(fUMode==that.fUMode &&
116	0	fOptions==that.fOptions &&
117	0	text==that.text &&
118	0	buffer==that.buffer &&
119	0	bufferPos==that.bufferPos &&
120	0	nextIndex==that.nextIndex);
121	0	}
122
123		//-------------------------------------------------------------------------
124		// Static utility methods
125		//-------------------------------------------------------------------------
126
127		void U_EXPORT2
128		Normalizer::normalize(const UnicodeString& source,
129		UNormalizationMode mode, int32_t options,
130		UnicodeString& result,
131	0	UErrorCode &status) {
132	0	if(source.isBogus() \|\| U_FAILURE(status)) {
133	0	result.setToBogus();
134	0	if(U_SUCCESS(status)) {
135	0	status=U_ILLEGAL_ARGUMENT_ERROR;
136	0	}
137	0	} else {
138	0	UnicodeString localDest;
139	0	UnicodeString *dest;
140	0
141	0	if(&source!=&result) {
142	0	dest=&result;
143	0	} else {
144	0	// the source and result strings are the same object, use a temporary one
145	0	dest=&localDest;
146	0	}
147	0	const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
148	0	if(U_SUCCESS(status)) {
149	0	if(options&UNORM_UNICODE_3_2) {
150	0	FilteredNormalizer2(n2, uniset_getUnicode32Instance(status)).
151	0	normalize(source, *dest, status);
152	0	} else {
153	0	n2->normalize(source, *dest, status);
154	0	}
155	0	}
156	0	if(dest==&localDest && U_SUCCESS(status)) {
157	0	result=*dest;
158	0	}
159	0	}
160	0	}
161
162		void U_EXPORT2
163		Normalizer::compose(const UnicodeString& source,
164		UBool compat, int32_t options,
165		UnicodeString& result,
166	0	UErrorCode &status) {
167	0	normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
168	0	}
169
170		void U_EXPORT2
171		Normalizer::decompose(const UnicodeString& source,
172		UBool compat, int32_t options,
173		UnicodeString& result,
174	0	UErrorCode &status) {
175	0	normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
176	0	}
177
178		UNormalizationCheckResult
179		Normalizer::quickCheck(const UnicodeString& source,
180		UNormalizationMode mode, int32_t options,
181	0	UErrorCode &status) {
182	0	const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
183	0	if(U_SUCCESS(status)) {
184	0	if(options&UNORM_UNICODE_3_2) {
185	0	return FilteredNormalizer2(n2, uniset_getUnicode32Instance(status)).
186	0	quickCheck(source, status);
187	0	} else {
188	0	return n2->quickCheck(source, status);
189	0	}
190	0	} else {
191	0	return UNORM_MAYBE;
192	0	}
193	0	}
194
195		UBool
196		Normalizer::isNormalized(const UnicodeString& source,
197		UNormalizationMode mode, int32_t options,
198	0	UErrorCode &status) {
199	0	const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
200	0	if(U_SUCCESS(status)) {
201	0	if(options&UNORM_UNICODE_3_2) {
202	0	return FilteredNormalizer2(n2, uniset_getUnicode32Instance(status)).
203	0	isNormalized(source, status);
204	0	} else {
205	0	return n2->isNormalized(source, status);
206	0	}
207	0	} else {
208	0	return FALSE;
209	0	}
210	0	}
211
212		UnicodeString & U_EXPORT2
213		Normalizer::concatenate(const UnicodeString &left, const UnicodeString &right,
214		UnicodeString &result,
215		UNormalizationMode mode, int32_t options,
216	0	UErrorCode &errorCode) {
217	0	if(left.isBogus() \|\| right.isBogus() \|\| U_FAILURE(errorCode)) {
218	0	result.setToBogus();
219	0	if(U_SUCCESS(errorCode)) {
220	0	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
221	0	}
222	0	} else {
223	0	UnicodeString localDest;
224	0	UnicodeString *dest;
225	0
226	0	if(&right!=&result) {
227	0	dest=&result;
228	0	} else {
229	0	// the right and result strings are the same object, use a temporary one
230	0	dest=&localDest;
231	0	}
232	0	*dest=left;
233	0	const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
234	0	if(U_SUCCESS(errorCode)) {
235	0	if(options&UNORM_UNICODE_3_2) {
236	0	FilteredNormalizer2(n2, uniset_getUnicode32Instance(errorCode)).
237	0	append(*dest, right, errorCode);
238	0	} else {
239	0	n2->append(*dest, right, errorCode);
240	0	}
241	0	}
242	0	if(dest==&localDest && U_SUCCESS(errorCode)) {
243	0	result=*dest;
244	0	}
245	0	}
246	0	return result;
247	0	}
248
249		//-------------------------------------------------------------------------
250		// Iteration API
251		//-------------------------------------------------------------------------
252
253		/**
254		* Return the current character in the normalized text.
255		*/
256	0	UChar32 Normalizer::current() {
257	0	if(bufferPos<buffer.length() \|\| nextNormalize()) {
258	0	return buffer.char32At(bufferPos);
259	0	} else {
260	0	return DONE;
261	0	}
262	0	}
263
264		/**
265		* Return the next character in the normalized text and advance
266		* the iteration position by one. If the end
267		* of the text has already been reached, {@link #DONE} is returned.
268		*/
269	0	UChar32 Normalizer::next() {
270	0	if(bufferPos<buffer.length() \|\| nextNormalize()) {
271	0	UChar32 c=buffer.char32At(bufferPos);
272	0	bufferPos+=U16_LENGTH(c);
273	0	return c;
274	0	} else {
275	0	return DONE;
276	0	}
277	0	}
278
279		/**
280		* Return the previous character in the normalized text and decrement
281		* the iteration position by one. If the beginning
282		* of the text has already been reached, {@link #DONE} is returned.
283		*/
284	0	UChar32 Normalizer::previous() {
285	0	if(bufferPos>0 \|\| previousNormalize()) {
286	0	UChar32 c=buffer.char32At(bufferPos-1);
287	0	bufferPos-=U16_LENGTH(c);
288	0	return c;
289	0	} else {
290	0	return DONE;
291	0	}
292	0	}
293
294	0	void Normalizer::reset() {
295	0	currentIndex=nextIndex=text->setToStart();
296	0	clearBuffer();
297	0	}
298
299		void
300	0	Normalizer::setIndexOnly(int32_t index) {
301	0	text->setIndex(index); // pins index
302	0	currentIndex=nextIndex=text->getIndex();
303	0	clearBuffer();
304	0	}
305
306		/**
307		* Return the first character in the normalized text. This resets
308		* the <tt>Normalizer's</tt> position to the beginning of the text.
309		*/
310	0	UChar32 Normalizer::first() {
311	0	reset();
312	0	return next();
313	0	}
314
315		/**
316		* Return the last character in the normalized text. This resets
317		* the <tt>Normalizer's</tt> position to be just before the
318		* the input text corresponding to that normalized character.
319		*/
320	0	UChar32 Normalizer::last() {
321	0	currentIndex=nextIndex=text->setToEnd();
322	0	clearBuffer();
323	0	return previous();
324	0	}
325
326		/**
327		* Retrieve the current iteration position in the input text that is
328		* being normalized. This method is useful in applications such as
329		* searching, where you need to be able to determine the position in
330		* the input text that corresponds to a given normalized output character.
331		* <p>
332		* <b>Note:</b> This method sets the position in the <em>input</em>, while
333		* {@link #next} and {@link #previous} iterate through characters in the
334		* <em>output</em>. This means that there is not necessarily a one-to-one
335		* correspondence between characters returned by <tt>next</tt> and
336		* <tt>previous</tt> and the indices passed to and returned from
337		* <tt>setIndex</tt> and {@link #getIndex}.
338		*
339		*/
340	0	int32_t Normalizer::getIndex() const {
341	0	if(bufferPos<buffer.length()) {
342	0	return currentIndex;
343	0	} else {
344	0	return nextIndex;
345	0	}
346	0	}
347
348		/**
349		* Retrieve the index of the start of the input text. This is the begin index
350		* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
351		* over which this <tt>Normalizer</tt> is iterating
352		*/
353	0	int32_t Normalizer::startIndex() const {
354	0	return text->startIndex();
355	0	}
356
357		/**
358		* Retrieve the index of the end of the input text. This is the end index
359		* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
360		* over which this <tt>Normalizer</tt> is iterating
361		*/
362	0	int32_t Normalizer::endIndex() const {
363	0	return text->endIndex();
364	0	}
365
366		//-------------------------------------------------------------------------
367		// Property access methods
368		//-------------------------------------------------------------------------
369
370		void
371		Normalizer::setMode(UNormalizationMode newMode)
372	0	{
373	0	fUMode = newMode;
374	0	init();
375	0	}
376
377		UNormalizationMode
378		Normalizer::getUMode() const
379	0	{
380	0	return fUMode;
381	0	}
382
383		void
384		Normalizer::setOption(int32_t option,
385		UBool value)
386	0	{
387	0	if (value) {
388	0	fOptions \|= option;
389	0	} else {
390	0	fOptions &= (~option);
391	0	}
392	0	init();
393	0	}
394
395		UBool
396		Normalizer::getOption(int32_t option) const
397	0	{
398	0	return (fOptions & option) != 0;
399	0	}
400
401		/**
402		* Set the input text over which this <tt>Normalizer</tt> will iterate.
403		* The iteration position is set to the beginning of the input text.
404		*/
405		void
406		Normalizer::setText(const UnicodeString& newText,
407		UErrorCode &status)
408	0	{
409	0	if (U_FAILURE(status)) {
410	0	return;
411	0	}
412	0	CharacterIterator *newIter = new StringCharacterIterator(newText);
413	0	if (newIter == NULL) {
414	0	status = U_MEMORY_ALLOCATION_ERROR;
415	0	return;
416	0	}
417	0	delete text;
418	0	text = newIter;
419	0	reset();
420	0	}
421
422		/**
423		* Set the input text over which this <tt>Normalizer</tt> will iterate.
424		* The iteration position is set to the beginning of the string.
425		*/
426		void
427		Normalizer::setText(const CharacterIterator& newText,
428		UErrorCode &status)
429	0	{
430	0	if (U_FAILURE(status)) {
431	0	return;
432	0	}
433	0	CharacterIterator *newIter = newText.clone();
434	0	if (newIter == NULL) {
435	0	status = U_MEMORY_ALLOCATION_ERROR;
436	0	return;
437	0	}
438	0	delete text;
439	0	text = newIter;
440	0	reset();
441	0	}
442
443		void
444		Normalizer::setText(ConstChar16Ptr newText,
445		int32_t length,
446		UErrorCode &status)
447	0	{
448	0	if (U_FAILURE(status)) {
449	0	return;
450	0	}
451	0	CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
452	0	if (newIter == NULL) {
453	0	status = U_MEMORY_ALLOCATION_ERROR;
454	0	return;
455	0	}
456	0	delete text;
457	0	text = newIter;
458	0	reset();
459	0	}
460
461		/**
462		* Copies the text under iteration into the UnicodeString referred to by "result".
463		* @param result Receives a copy of the text under iteration.
464		*/
465		void
466		Normalizer::getText(UnicodeString& result)
467	0	{
468	0	text->getText(result);
469	0	}
470
471		//-------------------------------------------------------------------------
472		// Private utility methods
473		//-------------------------------------------------------------------------
474
475	0	void Normalizer::clearBuffer() {
476	0	buffer.remove();
477	0	bufferPos=0;
478	0	}
479
480		UBool
481	0	Normalizer::nextNormalize() {
482	0	clearBuffer();
483	0	currentIndex=nextIndex;
484	0	text->setIndex(nextIndex);
485	0	if(!text->hasNext()) {
486	0	return FALSE;
487	0	}
488	0	// Skip at least one character so we make progress.
489	0	UnicodeString segment(text->next32PostInc());
490	0	while(text->hasNext()) {
491	0	UChar32 c;
492	0	if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
493	0	text->move32(-1, CharacterIterator::kCurrent);
494	0	break;
495	0	}
496	0	segment.append(c);
497	0	}
498	0	nextIndex=text->getIndex();
499	0	UErrorCode errorCode=U_ZERO_ERROR;
500	0	fNorm2->normalize(segment, buffer, errorCode);
501	0	return U_SUCCESS(errorCode) && !buffer.isEmpty();
502	0	}
503
504		UBool
505	0	Normalizer::previousNormalize() {
506	0	clearBuffer();
507	0	nextIndex=currentIndex;
508	0	text->setIndex(currentIndex);
509	0	if(!text->hasPrevious()) {
510	0	return FALSE;
511	0	}
512	0	UnicodeString segment;
513	0	while(text->hasPrevious()) {
514	0	UChar32 c=text->previous32();
515	0	segment.insert(0, c);
516	0	if(fNorm2->hasBoundaryBefore(c)) {
517	0	break;
518	0	}
519	0	}
520	0	currentIndex=text->getIndex();
521	0	UErrorCode errorCode=U_ZERO_ERROR;
522	0	fNorm2->normalize(segment, buffer, errorCode);
523	0	bufferPos=buffer.length();
524	0	return U_SUCCESS(errorCode) && !buffer.isEmpty();
525	0	}
526
527		U_NAMESPACE_END
528
529		#endif /* #if !UCONFIG_NO_NORMALIZATION */