/src/icu/source/i18n/collationdata.cpp

Source (jump to first uncovered line)
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2012-2015, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* collationdata.cpp
*
* created on: 2012jul28
* created by: Markus W. Scherer
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION

#include "unicode/ucol.h"
#include "unicode/udata.h"
#include "unicode/uscript.h"
#include "cmemory.h"
#include "collation.h"
#include "collationdata.h"
#include "uassert.h"
#include "utrie2.h"
#include "uvectr32.h"

U_NAMESPACE_BEGIN

uint32_t
CollationData::getIndirectCE32(uint32_t ce32) const {
    U_ASSERT(Collation::isSpecialCE32(ce32));
    int32_t tag = Collation::tagFromCE32(ce32);
    if(tag == Collation::DIGIT_TAG) {
        // Fetch the non-numeric-collation CE32.
        ce32 = ce32s[Collation::indexFromCE32(ce32)];
    } else if(tag == Collation::LEAD_SURROGATE_TAG) {
        ce32 = Collation::UNASSIGNED_CE32;
    } else if(tag == Collation::U0000_TAG) {
        // Fetch the normal ce32 for U+0000.
        ce32 = ce32s[0];
    }
    return ce32;
}

uint32_t
CollationData::getFinalCE32(uint32_t ce32) const {
    if(Collation::isSpecialCE32(ce32)) {
        ce32 = getIndirectCE32(ce32);
    }
    return ce32;
}

int64_t
CollationData::getSingleCE(UChar32 c, UErrorCode &errorCode) const {
    if(U_FAILURE(errorCode)) { return 0; }
    // Keep parallel with CollationDataBuilder::getSingleCE().
    const CollationData *d;
    uint32_t ce32 = getCE32(c);
    if(ce32 == Collation::FALLBACK_CE32) {
        d = base;
        ce32 = base->getCE32(c);
    } else {
        d = this;
    }
    while(Collation::isSpecialCE32(ce32)) {
        switch(Collation::tagFromCE32(ce32)) {
        case Collation::LATIN_EXPANSION_TAG:
        case Collation::BUILDER_DATA_TAG:
        case Collation::PREFIX_TAG:
        case Collation::CONTRACTION_TAG:
        case Collation::HANGUL_TAG:
        case Collation::LEAD_SURROGATE_TAG:
            errorCode = U_UNSUPPORTED_ERROR;
            return 0;
        case Collation::FALLBACK_TAG:
        case Collation::RESERVED_TAG_3:
            errorCode = U_INTERNAL_PROGRAM_ERROR;
            return 0;
        case Collation::LONG_PRIMARY_TAG:
            return Collation::ceFromLongPrimaryCE32(ce32);
        case Collation::LONG_SECONDARY_TAG:
            return Collation::ceFromLongSecondaryCE32(ce32);
        case Collation::EXPANSION32_TAG:
            if(Collation::lengthFromCE32(ce32) == 1) {
                ce32 = d->ce32s[Collation::indexFromCE32(ce32)];
                break;
            } else {
                errorCode = U_UNSUPPORTED_ERROR;
                return 0;
            }
        case Collation::EXPANSION_TAG: {
            if(Collation::lengthFromCE32(ce32) == 1) {
                return d->ces[Collation::indexFromCE32(ce32)];
            } else {
                errorCode = U_UNSUPPORTED_ERROR;
                return 0;
            }
        }
        case Collation::DIGIT_TAG:
            // Fetch the non-numeric-collation CE32 and continue.
            ce32 = d->ce32s[Collation::indexFromCE32(ce32)];
            break;
        case Collation::U0000_TAG:
            U_ASSERT(c == 0);
            // Fetch the normal ce32 for U+0000 and continue.
            ce32 = d->ce32s[0];
            break;
        case Collation::OFFSET_TAG:
            return d->getCEFromOffsetCE32(c, ce32);
        case Collation::IMPLICIT_TAG:
            return Collation::unassignedCEFromCodePoint(c);
        }
    }
    return Collation::ceFromSimpleCE32(ce32);
}

uint32_t
CollationData::getFirstPrimaryForGroup(int32_t script) const {
    int32_t index = getScriptIndex(script);
    return index == 0 ? 0 : (uint32_t)scriptStarts[index] << 16;
}

uint32_t
CollationData::getLastPrimaryForGroup(int32_t script) const {
    int32_t index = getScriptIndex(script);
    if(index == 0) {
        return 0;
    }
    uint32_t limit = scriptStarts[index + 1];
    return (limit << 16) - 1;
}

int32_t
CollationData::getGroupForPrimary(uint32_t p) const {
    p >>= 16;
    if(p < scriptStarts[1] || scriptStarts[scriptStartsLength - 1] <= p) {
        return -1;
    }
    int32_t index = 1;
    while(p >= scriptStarts[index + 1]) { ++index; }
    for(int32_t i = 0; i < numScripts; ++i) {
        if(scriptsIndex[i] == index) {
            return i;
        }
    }
    for(int32_t i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
        if(scriptsIndex[numScripts + i] == index) {
            return UCOL_REORDER_CODE_FIRST + i;
        }
    }
    return -1;
}

int32_t
CollationData::getScriptIndex(int32_t script) const {
    if(script < 0) {
        return 0;
    } else if(script < numScripts) {
        return scriptsIndex[script];
    } else if(script < UCOL_REORDER_CODE_FIRST) {
        return 0;
    } else {
        script -= UCOL_REORDER_CODE_FIRST;
        if(script < MAX_NUM_SPECIAL_REORDER_CODES) {
            return scriptsIndex[numScripts + script];
        } else {
            return 0;
        }
    }
}

int32_t
CollationData::getEquivalentScripts(int32_t script,
                                    int32_t dest[], int32_t capacity,
                                    UErrorCode &errorCode) const {
    if(U_FAILURE(errorCode)) { return 0; }
    int32_t index = getScriptIndex(script);
    if(index == 0) { return 0; }
    if(script >= UCOL_REORDER_CODE_FIRST) {
        // Special groups have no aliases.
        if(capacity > 0) {
            dest[0] = script;
        } else {
            errorCode = U_BUFFER_OVERFLOW_ERROR;
        }
        return 1;
    }

    int32_t length = 0;
    for(int32_t i = 0; i < numScripts; ++i) {
        if(scriptsIndex[i] == index) {
            if(length < capacity) {
                dest[length] = i;
            }
            ++length;
        }
    }
    if(length > capacity) {
        errorCode = U_BUFFER_OVERFLOW_ERROR;
    }
    return length;
}

void
CollationData::makeReorderRanges(const int32_t *reorder, int32_t length,
                                 UVector32 &ranges, UErrorCode &errorCode) const {
    makeReorderRanges(reorder, length, FALSE, ranges, errorCode);
}

void
CollationData::makeReorderRanges(const int32_t *reorder, int32_t length,
                                 UBool latinMustMove,
                                 UVector32 &ranges, UErrorCode &errorCode) const {
    if(U_FAILURE(errorCode)) { return; }
    ranges.removeAllElements();
    if(length == 0 || (length == 1 && reorder[0] == USCRIPT_UNKNOWN)) {
        return;
    }

    // Maps each script-or-group range to a new lead byte.
    uint8_t table[MAX_NUM_SCRIPT_RANGES];
    uprv_memset(table, 0, sizeof(table));

    {
        // Set "don't care" values for reserved ranges.
        int32_t index = scriptsIndex[
                numScripts + REORDER_RESERVED_BEFORE_LATIN - UCOL_REORDER_CODE_FIRST];
        if(index != 0) {
            table[index] = 0xff;
        }
        index = scriptsIndex[
                numScripts + REORDER_RESERVED_AFTER_LATIN - UCOL_REORDER_CODE_FIRST];
        if(index != 0) {
            table[index] = 0xff;
        }
    }

    // Never reorder special low and high primary lead bytes.
    U_ASSERT(scriptStartsLength >= 2);
    U_ASSERT(scriptStarts[0] == 0);
    int32_t lowStart = scriptStarts[1];
    U_ASSERT(lowStart == ((Collation::MERGE_SEPARATOR_BYTE + 1) << 8));
    int32_t highLimit = scriptStarts[scriptStartsLength - 1];
    U_ASSERT(highLimit == (Collation::TRAIL_WEIGHT_BYTE << 8));

    // Get the set of special reorder codes in the input list.
    // This supports a fixed number of special reorder codes;
    // it works for data with codes beyond UCOL_REORDER_CODE_LIMIT.
    uint32_t specials = 0;
    for(int32_t i = 0; i < length; ++i) {
        int32_t reorderCode = reorder[i] - UCOL_REORDER_CODE_FIRST;
        if(0 <= reorderCode && reorderCode < MAX_NUM_SPECIAL_REORDER_CODES) {
            specials |= (uint32_t)1 << reorderCode;
        }
    }

    // Start the reordering with the special low reorder codes that do not occur in the input.
    for(int32_t i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
        int32_t index = scriptsIndex[numScripts + i];
        if(index != 0 && (specials & ((uint32_t)1 << i)) == 0) {
            lowStart = addLowScriptRange(table, index, lowStart);
        }
    }

    // Skip the reserved range before Latin if Latin is the first script,
    // so that we do not move it unnecessarily.
    int32_t skippedReserved = 0;
    if(specials == 0 && reorder[0] == USCRIPT_LATIN && !latinMustMove) {
        int32_t index = scriptsIndex[USCRIPT_LATIN];
        U_ASSERT(index != 0);
        int32_t start = scriptStarts[index];
        U_ASSERT(lowStart <= start);
        skippedReserved = start - lowStart;
        lowStart = start;
    }

    // Reorder according to the input scripts, continuing from the bottom of the primary range.
    int32_t originalLength = length;  // length will be decremented if "others" is in the list.
    UBool hasReorderToEnd = FALSE;
    for(int32_t i = 0; i < length;) {
        int32_t script = reorder[i++];
        if(script == USCRIPT_UNKNOWN) {
            // Put the remaining scripts at the top.
            hasReorderToEnd = TRUE;
            while(i < length) {
                script = reorder[--length];
                if(script == USCRIPT_UNKNOWN ||  // Must occur at most once.
                        script == UCOL_REORDER_CODE_DEFAULT) {
                    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
                    return;
                }
                int32_t index = getScriptIndex(script);
                if(index == 0) { continue; }
                if(table[index] != 0) {  // Duplicate or equivalent script.
                    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
                    return;
                }
                highLimit = addHighScriptRange(table, index, highLimit);
            }
            break;
        }
        if(script == UCOL_REORDER_CODE_DEFAULT) {
            // The default code must be the only one in the list, and that is handled by the caller.
            // Otherwise it must not be used.
            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
        int32_t index = getScriptIndex(script);
        if(index == 0) { continue; }
        if(table[index] != 0) {  // Duplicate or equivalent script.
            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
        lowStart = addLowScriptRange(table, index, lowStart);
    }

    // Put all remaining scripts into the middle.
    for(int32_t i = 1; i < scriptStartsLength - 1; ++i) {
        int32_t leadByte = table[i];
        if(leadByte != 0) { continue; }
        int32_t start = scriptStarts[i];
        if(!hasReorderToEnd && start > lowStart) {
            // No need to move this script.
            lowStart = start;
        }
        lowStart = addLowScriptRange(table, i, lowStart);
    }
    if(lowStart > highLimit) {
        if((lowStart - (skippedReserved & 0xff00)) <= highLimit) {
            // Try not skipping the before-Latin reserved range.
            makeReorderRanges(reorder, originalLength, TRUE, ranges, errorCode);
            return;
        }
        // We need more primary lead bytes than available, despite the reserved ranges.
        errorCode = U_BUFFER_OVERFLOW_ERROR;
        return;
    }

    // Turn lead bytes into a list of (limit, offset) pairs.
    // Encode each pair in one list element:
    // Upper 16 bits = limit, lower 16 = signed lead byte offset.
    int32_t offset = 0;
    for(int32_t i = 1;; ++i) {
        int32_t nextOffset = offset;
        while(i < scriptStartsLength - 1) {
            int32_t newLeadByte = table[i];
            if(newLeadByte == 0xff) {
                // "Don't care" lead byte for reserved range, continue with current offset.
            } else {
                nextOffset = newLeadByte - (scriptStarts[i] >> 8);
                if(nextOffset != offset) { break; }
            }
            ++i;
        }
        if(offset != 0 || i < scriptStartsLength - 1) {
            ranges.addElement(((int32_t)scriptStarts[i] << 16) | (offset & 0xffff), errorCode);
        }
        if(i == scriptStartsLength - 1) { break; }
        offset = nextOffset;
    }
}

int32_t
CollationData::addLowScriptRange(uint8_t table[], int32_t index, int32_t lowStart) const {
    int32_t start = scriptStarts[index];
    if((start & 0xff) < (lowStart & 0xff)) {
        lowStart += 0x100;
    }
    table[index] = (uint8_t)(lowStart >> 8);
    int32_t limit = scriptStarts[index + 1];
    lowStart = ((lowStart & 0xff00) + ((limit & 0xff00) - (start & 0xff00))) | (limit & 0xff);
    return lowStart;
}

int32_t
CollationData::addHighScriptRange(uint8_t table[], int32_t index, int32_t highLimit) const {
    int32_t limit = scriptStarts[index + 1];
    if((limit & 0xff) > (highLimit & 0xff)) {
        highLimit -= 0x100;
    }
    int32_t start = scriptStarts[index];
    highLimit = ((highLimit & 0xff00) - ((limit & 0xff00) - (start & 0xff00))) | (start & 0xff);
    table[index] = (uint8_t)(highLimit >> 8);
    return highLimit;
}

U_NAMESPACE_END

#endif  // !UCONFIG_NO_COLLATION

Coverage Report

Created: 2023-02-22 06:51

Line	Count	Source (jump to first uncovered line)
1		// © 2016 and later: Unicode, Inc. and others.
2		// License & terms of use: http://www.unicode.org/copyright.html
3		/*
4		*******************************************************************************
5		* Copyright (C) 2012-2015, International Business Machines
6		* Corporation and others. All Rights Reserved.
7		*******************************************************************************
8		* collationdata.cpp
9		*
10		* created on: 2012jul28
11		* created by: Markus W. Scherer
12		*/
13
14		#include "unicode/utypes.h"
15
16		#if !UCONFIG_NO_COLLATION
17
18		#include "unicode/ucol.h"
19		#include "unicode/udata.h"
20		#include "unicode/uscript.h"
21		#include "cmemory.h"
22		#include "collation.h"
23		#include "collationdata.h"
24		#include "uassert.h"
25		#include "utrie2.h"
26		#include "uvectr32.h"
27
28		U_NAMESPACE_BEGIN
29
30		uint32_t
31	0	CollationData::getIndirectCE32(uint32_t ce32) const {
32	0	U_ASSERT(Collation::isSpecialCE32(ce32));
33	0	int32_t tag = Collation::tagFromCE32(ce32);
34	0	if(tag == Collation::DIGIT_TAG) {
35		// Fetch the non-numeric-collation CE32.
36	0	ce32 = ce32s[Collation::indexFromCE32(ce32)];
37	0	} else if(tag == Collation::LEAD_SURROGATE_TAG) {
38	0	ce32 = Collation::UNASSIGNED_CE32;
39	0	} else if(tag == Collation::U0000_TAG) {
40		// Fetch the normal ce32 for U+0000.
41	0	ce32 = ce32s[0];
42	0	}
43	0	return ce32;
44	0	}
45
46		uint32_t
47	0	CollationData::getFinalCE32(uint32_t ce32) const {
48	0	if(Collation::isSpecialCE32(ce32)) {
49	0	ce32 = getIndirectCE32(ce32);
50	0	}
51	0	return ce32;
52	0	}
53
54		int64_t
55	0	CollationData::getSingleCE(UChar32 c, UErrorCode &errorCode) const {
56	0	if(U_FAILURE(errorCode)) { return 0; }
57		// Keep parallel with CollationDataBuilder::getSingleCE().
58	0	const CollationData *d;
59	0	uint32_t ce32 = getCE32(c);
60	0	if(ce32 == Collation::FALLBACK_CE32) {
61	0	d = base;
62	0	ce32 = base->getCE32(c);
63	0	} else {
64	0	d = this;
65	0	}
66	0	while(Collation::isSpecialCE32(ce32)) {
67	0	switch(Collation::tagFromCE32(ce32)) {
68	0	case Collation::LATIN_EXPANSION_TAG:
69	0	case Collation::BUILDER_DATA_TAG:
70	0	case Collation::PREFIX_TAG:
71	0	case Collation::CONTRACTION_TAG:
72	0	case Collation::HANGUL_TAG:
73	0	case Collation::LEAD_SURROGATE_TAG:
74	0	errorCode = U_UNSUPPORTED_ERROR;
75	0	return 0;
76	0	case Collation::FALLBACK_TAG:
77	0	case Collation::RESERVED_TAG_3:
78	0	errorCode = U_INTERNAL_PROGRAM_ERROR;
79	0	return 0;
80	0	case Collation::LONG_PRIMARY_TAG:
81	0	return Collation::ceFromLongPrimaryCE32(ce32);
82	0	case Collation::LONG_SECONDARY_TAG:
83	0	return Collation::ceFromLongSecondaryCE32(ce32);
84	0	case Collation::EXPANSION32_TAG:
85	0	if(Collation::lengthFromCE32(ce32) == 1) {
86	0	ce32 = d->ce32s[Collation::indexFromCE32(ce32)];
87	0	break;
88	0	} else {
89	0	errorCode = U_UNSUPPORTED_ERROR;
90	0	return 0;
91	0	}
92	0	case Collation::EXPANSION_TAG: {
93	0	if(Collation::lengthFromCE32(ce32) == 1) {
94	0	return d->ces[Collation::indexFromCE32(ce32)];
95	0	} else {
96	0	errorCode = U_UNSUPPORTED_ERROR;
97	0	return 0;
98	0	}
99	0	}
100	0	case Collation::DIGIT_TAG:
101		// Fetch the non-numeric-collation CE32 and continue.
102	0	ce32 = d->ce32s[Collation::indexFromCE32(ce32)];
103	0	break;
104	0	case Collation::U0000_TAG:
105	0	U_ASSERT(c == 0);
106		// Fetch the normal ce32 for U+0000 and continue.
107	0	ce32 = d->ce32s[0];
108	0	break;
109	0	case Collation::OFFSET_TAG:
110	0	return d->getCEFromOffsetCE32(c, ce32);
111	0	case Collation::IMPLICIT_TAG:
112	0	return Collation::unassignedCEFromCodePoint(c);
113	0	}
114	0	}
115	0	return Collation::ceFromSimpleCE32(ce32);
116	0	}
117
118		uint32_t
119	0	CollationData::getFirstPrimaryForGroup(int32_t script) const {
120	0	int32_t index = getScriptIndex(script);
121	0	return index == 0 ? 0 : (uint32_t)scriptStarts[index] << 16;
122	0	}
123
124		uint32_t
125	0	CollationData::getLastPrimaryForGroup(int32_t script) const {
126	0	int32_t index = getScriptIndex(script);
127	0	if(index == 0) {
128	0	return 0;
129	0	}
130	0	uint32_t limit = scriptStarts[index + 1];
131	0	return (limit << 16) - 1;
132	0	}
133
134		int32_t
135	0	CollationData::getGroupForPrimary(uint32_t p) const {
136	0	p >>= 16;
137	0	if(p < scriptStarts[1] \|\| scriptStarts[scriptStartsLength - 1] <= p) {
138	0	return -1;
139	0	}
140	0	int32_t index = 1;
141	0	while(p >= scriptStarts[index + 1]) { ++index; }
142	0	for(int32_t i = 0; i < numScripts; ++i) {
143	0	if(scriptsIndex[i] == index) {
144	0	return i;
145	0	}
146	0	}
147	0	for(int32_t i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
148	0	if(scriptsIndex[numScripts + i] == index) {
149	0	return UCOL_REORDER_CODE_FIRST + i;
150	0	}
151	0	}
152	0	return -1;
153	0	}
154
155		int32_t
156	0	CollationData::getScriptIndex(int32_t script) const {
157	0	if(script < 0) {
158	0	return 0;
159	0	} else if(script < numScripts) {
160	0	return scriptsIndex[script];
161	0	} else if(script < UCOL_REORDER_CODE_FIRST) {
162	0	return 0;
163	0	} else {
164	0	script -= UCOL_REORDER_CODE_FIRST;
165	0	if(script < MAX_NUM_SPECIAL_REORDER_CODES) {
166	0	return scriptsIndex[numScripts + script];
167	0	} else {
168	0	return 0;
169	0	}
170	0	}
171	0	}
172
173		int32_t
174		CollationData::getEquivalentScripts(int32_t script,
175		int32_t dest[], int32_t capacity,
176	0	UErrorCode &errorCode) const {
177	0	if(U_FAILURE(errorCode)) { return 0; }
178	0	int32_t index = getScriptIndex(script);
179	0	if(index == 0) { return 0; }
180	0	if(script >= UCOL_REORDER_CODE_FIRST) {
181		// Special groups have no aliases.
182	0	if(capacity > 0) {
183	0	dest[0] = script;
184	0	} else {
185	0	errorCode = U_BUFFER_OVERFLOW_ERROR;
186	0	}
187	0	return 1;
188	0	}
189
190	0	int32_t length = 0;
191	0	for(int32_t i = 0; i < numScripts; ++i) {
192	0	if(scriptsIndex[i] == index) {
193	0	if(length < capacity) {
194	0	dest[length] = i;
195	0	}
196	0	++length;
197	0	}
198	0	}
199	0	if(length > capacity) {
200	0	errorCode = U_BUFFER_OVERFLOW_ERROR;
201	0	}
202	0	return length;
203	0	}
204
205		void
206		CollationData::makeReorderRanges(const int32_t *reorder, int32_t length,
207	0	UVector32 &ranges, UErrorCode &errorCode) const {
208	0	makeReorderRanges(reorder, length, FALSE, ranges, errorCode);
209	0	}
210
211		void
212		CollationData::makeReorderRanges(const int32_t *reorder, int32_t length,
213		UBool latinMustMove,
214	0	UVector32 &ranges, UErrorCode &errorCode) const {
215	0	if(U_FAILURE(errorCode)) { return; }
216	0	ranges.removeAllElements();
217	0	if(length == 0 \|\| (length == 1 && reorder[0] == USCRIPT_UNKNOWN)) {
218	0	return;
219	0	}
220
221		// Maps each script-or-group range to a new lead byte.
222	0	uint8_t table[MAX_NUM_SCRIPT_RANGES];
223	0	uprv_memset(table, 0, sizeof(table));
224
225	0	{
226		// Set "don't care" values for reserved ranges.
227	0	int32_t index = scriptsIndex[
228	0	numScripts + REORDER_RESERVED_BEFORE_LATIN - UCOL_REORDER_CODE_FIRST];
229	0	if(index != 0) {
230	0	table[index] = 0xff;
231	0	}
232	0	index = scriptsIndex[
233	0	numScripts + REORDER_RESERVED_AFTER_LATIN - UCOL_REORDER_CODE_FIRST];
234	0	if(index != 0) {
235	0	table[index] = 0xff;
236	0	}
237	0	}
238
239		// Never reorder special low and high primary lead bytes.
240	0	U_ASSERT(scriptStartsLength >= 2);
241	0	U_ASSERT(scriptStarts[0] == 0);
242	0	int32_t lowStart = scriptStarts[1];
243	0	U_ASSERT(lowStart == ((Collation::MERGE_SEPARATOR_BYTE + 1) << 8));
244	0	int32_t highLimit = scriptStarts[scriptStartsLength - 1];
245	0	U_ASSERT(highLimit == (Collation::TRAIL_WEIGHT_BYTE << 8));
246
247		// Get the set of special reorder codes in the input list.
248		// This supports a fixed number of special reorder codes;
249		// it works for data with codes beyond UCOL_REORDER_CODE_LIMIT.
250	0	uint32_t specials = 0;
251	0	for(int32_t i = 0; i < length; ++i) {
252	0	int32_t reorderCode = reorder[i] - UCOL_REORDER_CODE_FIRST;
253	0	if(0 <= reorderCode && reorderCode < MAX_NUM_SPECIAL_REORDER_CODES) {
254	0	specials \|= (uint32_t)1 << reorderCode;
255	0	}
256	0	}
257
258		// Start the reordering with the special low reorder codes that do not occur in the input.
259	0	for(int32_t i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
260	0	int32_t index = scriptsIndex[numScripts + i];
261	0	if(index != 0 && (specials & ((uint32_t)1 << i)) == 0) {
262	0	lowStart = addLowScriptRange(table, index, lowStart);
263	0	}
264	0	}
265
266		// Skip the reserved range before Latin if Latin is the first script,
267		// so that we do not move it unnecessarily.
268	0	int32_t skippedReserved = 0;
269	0	if(specials == 0 && reorder[0] == USCRIPT_LATIN && !latinMustMove) {
270	0	int32_t index = scriptsIndex[USCRIPT_LATIN];
271	0	U_ASSERT(index != 0);
272	0	int32_t start = scriptStarts[index];
273	0	U_ASSERT(lowStart <= start);
274	0	skippedReserved = start - lowStart;
275	0	lowStart = start;
276	0	}
277
278		// Reorder according to the input scripts, continuing from the bottom of the primary range.
279	0	int32_t originalLength = length; // length will be decremented if "others" is in the list.
280	0	UBool hasReorderToEnd = FALSE;
281	0	for(int32_t i = 0; i < length;) {
282	0	int32_t script = reorder[i++];
283	0	if(script == USCRIPT_UNKNOWN) {
284		// Put the remaining scripts at the top.
285	0	hasReorderToEnd = TRUE;
286	0	while(i < length) {
287	0	script = reorder[--length];
288	0	if(script == USCRIPT_UNKNOWN \|\| // Must occur at most once.
289	0	script == UCOL_REORDER_CODE_DEFAULT) {
290	0	errorCode = U_ILLEGAL_ARGUMENT_ERROR;
291	0	return;
292	0	}
293	0	int32_t index = getScriptIndex(script);
294	0	if(index == 0) { continue; }
295	0	if(table[index] != 0) { // Duplicate or equivalent script.
296	0	errorCode = U_ILLEGAL_ARGUMENT_ERROR;
297	0	return;
298	0	}
299	0	highLimit = addHighScriptRange(table, index, highLimit);
300	0	}
301	0	break;
302	0	}
303	0	if(script == UCOL_REORDER_CODE_DEFAULT) {
304		// The default code must be the only one in the list, and that is handled by the caller.
305		// Otherwise it must not be used.
306	0	errorCode = U_ILLEGAL_ARGUMENT_ERROR;
307	0	return;
308	0	}
309	0	int32_t index = getScriptIndex(script);
310	0	if(index == 0) { continue; }
311	0	if(table[index] != 0) { // Duplicate or equivalent script.
312	0	errorCode = U_ILLEGAL_ARGUMENT_ERROR;
313	0	return;
314	0	}
315	0	lowStart = addLowScriptRange(table, index, lowStart);
316	0	}
317
318		// Put all remaining scripts into the middle.
319	0	for(int32_t i = 1; i < scriptStartsLength - 1; ++i) {
320	0	int32_t leadByte = table[i];
321	0	if(leadByte != 0) { continue; }
322	0	int32_t start = scriptStarts[i];
323	0	if(!hasReorderToEnd && start > lowStart) {
324		// No need to move this script.
325	0	lowStart = start;
326	0	}
327	0	lowStart = addLowScriptRange(table, i, lowStart);
328	0	}
329	0	if(lowStart > highLimit) {
330	0	if((lowStart - (skippedReserved & 0xff00)) <= highLimit) {
331		// Try not skipping the before-Latin reserved range.
332	0	makeReorderRanges(reorder, originalLength, TRUE, ranges, errorCode);
333	0	return;
334	0	}
335		// We need more primary lead bytes than available, despite the reserved ranges.
336	0	errorCode = U_BUFFER_OVERFLOW_ERROR;
337	0	return;
338	0	}
339
340		// Turn lead bytes into a list of (limit, offset) pairs.
341		// Encode each pair in one list element:
342		// Upper 16 bits = limit, lower 16 = signed lead byte offset.
343	0	int32_t offset = 0;
344	0	for(int32_t i = 1;; ++i) {
345	0	int32_t nextOffset = offset;
346	0	while(i < scriptStartsLength - 1) {
347	0	int32_t newLeadByte = table[i];
348	0	if(newLeadByte == 0xff) {
349		// "Don't care" lead byte for reserved range, continue with current offset.
350	0	} else {
351	0	nextOffset = newLeadByte - (scriptStarts[i] >> 8);
352	0	if(nextOffset != offset) { break; }
353	0	}
354	0	++i;
355	0	}
356	0	if(offset != 0 \|\| i < scriptStartsLength - 1) {
357	0	ranges.addElement(((int32_t)scriptStarts[i] << 16) \| (offset & 0xffff), errorCode);
358	0	}
359	0	if(i == scriptStartsLength - 1) { break; }
360	0	offset = nextOffset;
361	0	}
362	0	}
363
364		int32_t
365	0	CollationData::addLowScriptRange(uint8_t table[], int32_t index, int32_t lowStart) const {
366	0	int32_t start = scriptStarts[index];
367	0	if((start & 0xff) < (lowStart & 0xff)) {
368	0	lowStart += 0x100;
369	0	}
370	0	table[index] = (uint8_t)(lowStart >> 8);
371	0	int32_t limit = scriptStarts[index + 1];
372	0	lowStart = ((lowStart & 0xff00) + ((limit & 0xff00) - (start & 0xff00))) \| (limit & 0xff);
373	0	return lowStart;
374	0	}
375
376		int32_t
377	0	CollationData::addHighScriptRange(uint8_t table[], int32_t index, int32_t highLimit) const {
378	0	int32_t limit = scriptStarts[index + 1];
379	0	if((limit & 0xff) > (highLimit & 0xff)) {
380	0	highLimit -= 0x100;
381	0	}
382	0	int32_t start = scriptStarts[index];
383	0	highLimit = ((highLimit & 0xff00) - ((limit & 0xff00) - (start & 0xff00))) \| (start & 0xff);
384	0	table[index] = (uint8_t)(highLimit >> 8);
385	0	return highLimit;
386	0	}
387
388		U_NAMESPACE_END
389
390		#endif // !UCONFIG_NO_COLLATION