/src/icu/icu4c/source/i18n/collationdatawriter.cpp

Source
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2013-2015, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* collationdatawriter.cpp
*
* created on: 2013aug06
* created by: Markus W. Scherer
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION

#include "unicode/tblcoll.h"
#include "unicode/udata.h"
#include "unicode/uniset.h"
#include "cmemory.h"
#include "collationdata.h"
#include "collationdatabuilder.h"
#include "collationdatareader.h"
#include "collationdatawriter.h"
#include "collationfastlatin.h"
#include "collationsettings.h"
#include "collationtailoring.h"
#include "uassert.h"
#include "ucmndata.h"

U_NAMESPACE_BEGIN

uint8_t *
RuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &errorCode) const {
    if(U_FAILURE(errorCode)) { return nullptr; }
    LocalMemory<uint8_t> buffer(static_cast<uint8_t*>(uprv_malloc(20000)));
    if(buffer.isNull()) {
        errorCode = U_MEMORY_ALLOCATION_ERROR;
        return nullptr;
    }
    UErrorCode bufferStatus = U_ZERO_ERROR;
    length = cloneBinary(buffer.getAlias(), 20000, bufferStatus);
    if(bufferStatus == U_BUFFER_OVERFLOW_ERROR) {
        if(buffer.allocateInsteadAndCopy(length, 0) == nullptr) {
            errorCode = U_MEMORY_ALLOCATION_ERROR;
            return nullptr;
        }
        bufferStatus = U_ZERO_ERROR;
        length = cloneBinary(buffer.getAlias(), length, bufferStatus);
    }
    if(U_FAILURE(bufferStatus)) {
        errorCode = bufferStatus;
        return nullptr;
    }
    return buffer.orphan();
}

int32_t
RuleBasedCollator::cloneBinary(uint8_t *dest, int32_t capacity, UErrorCode &errorCode) const {
    int32_t indexes[CollationDataReader::IX_TOTAL_SIZE + 1];
    return CollationDataWriter::writeTailoring(
            *tailoring, *settings, indexes, dest, capacity,
            errorCode);
}

static const UDataInfo dataInfo = {
    sizeof(UDataInfo),
    0,

    U_IS_BIG_ENDIAN,
    U_CHARSET_FAMILY,
    U_SIZEOF_UCHAR,
    0,

    { 0x55, 0x43, 0x6f, 0x6c },         // dataFormat="UCol"
    { 5, 0, 0, 0 },                     // formatVersion
    { 6, 3, 0, 0 }                      // dataVersion
};

int32_t
CollationDataWriter::writeBase(const CollationData &data, const CollationSettings &settings,
                               const void *rootElements, int32_t rootElementsLength,
                               int32_t indexes[], uint8_t *dest, int32_t capacity,
                               UErrorCode &errorCode) {
    return write(true, nullptr,
                 data, settings,
                 rootElements, rootElementsLength,
                 indexes, dest, capacity, errorCode);
}

int32_t
CollationDataWriter::writeTailoring(const CollationTailoring &t, const CollationSettings &settings,
                                    int32_t indexes[], uint8_t *dest, int32_t capacity,
                                    UErrorCode &errorCode) {
    return write(false, t.version,
                 *t.data, settings,
                 nullptr, 0,
                 indexes, dest, capacity, errorCode);
}

int32_t
CollationDataWriter::write(UBool isBase, const UVersionInfo dataVersion,
                           const CollationData &data, const CollationSettings &settings,
                           const void *rootElements, int32_t rootElementsLength,
                           int32_t indexes[], uint8_t *dest, int32_t capacity,
                           UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return 0; }
    if(capacity < 0 || (capacity > 0 && dest == nullptr)) {
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    // Figure out which data items to write before settling on
    // the indexes length and writing offsets.
    // For any data item, we need to write the start and limit offsets,
    // so the indexes length must be at least index-of-start-offset + 2.
    int32_t indexesLength;
    UBool hasMappings;
    UnicodeSet unsafeBackwardSet;
    const CollationData *baseData = data.base;

    int32_t fastLatinVersion;
    if(data.fastLatinTable != nullptr) {
        fastLatinVersion = static_cast<int32_t>(CollationFastLatin::VERSION) << 16;
    } else {
        fastLatinVersion = 0;
    }
    int32_t fastLatinTableLength = 0;

    if(isBase) {
        // For the root collator, we write an even number of indexes
        // so that we start with an 8-aligned offset.
        indexesLength = CollationDataReader::IX_TOTAL_SIZE + 1;
        U_ASSERT(settings.reorderCodesLength == 0);
        hasMappings = true;
        unsafeBackwardSet = *data.unsafeBackwardSet;
        fastLatinTableLength = data.fastLatinTableLength;
    } else if(baseData == nullptr) {
        hasMappings = false;
        if(settings.reorderCodesLength == 0) {
            // only options
            indexesLength = CollationDataReader::IX_OPTIONS + 1;  // no limit offset here
        } else {
            // only options, reorder codes, and the reorder table
            indexesLength = CollationDataReader::IX_REORDER_TABLE_OFFSET + 2;
        }
    } else {
        hasMappings = true;
        // Tailored mappings, and what else?
        // Check in ascending order of optional tailoring data items.
        indexesLength = CollationDataReader::IX_CE32S_OFFSET + 2;
        if(data.contextsLength != 0) {
            indexesLength = CollationDataReader::IX_CONTEXTS_OFFSET + 2;
        }
        unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->unsafeBackwardSet);
        if(!unsafeBackwardSet.isEmpty()) {
            indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2;
        }
        if(data.fastLatinTable != baseData->fastLatinTable) {
            fastLatinTableLength = data.fastLatinTableLength;
            indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2;
        }
    }

    UVector32 codesAndRanges(errorCode);
    const int32_t *reorderCodes = settings.reorderCodes;
    int32_t reorderCodesLength = settings.reorderCodesLength;
    if(settings.hasReordering() &&
            CollationSettings::reorderTableHasSplitBytes(settings.reorderTable)) {
        // Rebuild the full list of reorder ranges.
        // The list in the settings is truncated for efficiency.
        data.makeReorderRanges(reorderCodes, reorderCodesLength, codesAndRanges, errorCode);
        // Write the codes, then the ranges.
        for(int32_t i = 0; i < reorderCodesLength; ++i) {
            codesAndRanges.insertElementAt(reorderCodes[i], i, errorCode);
        }
        if(U_FAILURE(errorCode)) { return 0; }
        reorderCodes = codesAndRanges.getBuffer();
        reorderCodesLength = codesAndRanges.size();
    }

    int32_t headerSize;
    if(isBase) {
        headerSize = 0;  // udata_create() writes the header
    } else {
        DataHeader header;
        header.dataHeader.magic1 = 0xda;
        header.dataHeader.magic2 = 0x27;
        uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo));
        uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo));
        headerSize = static_cast<int32_t>(sizeof(header));
        U_ASSERT((headerSize & 3) == 0);  // multiple of 4 bytes
        if(hasMappings && data.cesLength != 0) {
            // Sum of the sizes of the data items which are
            // not automatically multiples of 8 bytes and which are placed before the CEs.
            int32_t sum = headerSize + (indexesLength + reorderCodesLength) * 4;
            if((sum & 7) != 0) {
                // We need to add padding somewhere so that the 64-bit CEs are 8-aligned.
                // We add to the header size here.
                // Alternatively, we could increment the indexesLength
                // or add a few bytes to the reorderTable.
                headerSize += 4;
            }
        }
        header.dataHeader.headerSize = static_cast<uint16_t>(headerSize);
        if(headerSize <= capacity) {
            uprv_memcpy(dest, &header, sizeof(header));
            // Write 00 bytes so that the padding is not mistaken for a copyright string.
            uprv_memset(dest + sizeof(header), 0, headerSize - (int32_t)sizeof(header));
            dest += headerSize;
            capacity -= headerSize;
        } else {
            dest = nullptr;
            capacity = 0;
        }
    }

    indexes[CollationDataReader::IX_INDEXES_LENGTH] = indexesLength;
    U_ASSERT((settings.options & ~0xffff) == 0);
    indexes[CollationDataReader::IX_OPTIONS] =
            data.numericPrimary | fastLatinVersion | settings.options;
    indexes[CollationDataReader::IX_RESERVED2] = 0;
    indexes[CollationDataReader::IX_RESERVED3] = 0;

    // Byte offsets of data items all start from the start of the indexes.
    // We add the headerSize at the very end.
    int32_t totalSize = indexesLength * 4;

    if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) {
        indexes[CollationDataReader::IX_JAMO_CE32S_START] = static_cast<int32_t>(data.jamoCE32s - data.ce32s);
    } else {
        indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1;
    }

    indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize;
    totalSize += reorderCodesLength * 4;

    indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize;
    if(settings.reorderTable != nullptr) {
        totalSize += 256;
    }

    indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize;
    if(hasMappings) {
        UErrorCode errorCode2 = U_ZERO_ERROR;
        int32_t length;
        if(totalSize < capacity) {
            length = utrie2_serialize(data.trie, dest + totalSize,
                                      capacity - totalSize, &errorCode2);
        } else {
            length = utrie2_serialize(data.trie, nullptr, 0, &errorCode2);
        }
        if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
            errorCode = errorCode2;
            return 0;
        }
        // The trie size should be a multiple of 8 bytes due to the way
        // compactIndex2(UNewTrie2 *trie) currently works.
        U_ASSERT((length & 7) == 0);
        totalSize += length;
    }

    indexes[CollationDataReader::IX_RESERVED8_OFFSET] = totalSize;
    indexes[CollationDataReader::IX_CES_OFFSET] = totalSize;
    if(hasMappings && data.cesLength != 0) {
        U_ASSERT(((headerSize + totalSize) & 7) == 0);
        totalSize += data.cesLength * 8;
    }

    indexes[CollationDataReader::IX_RESERVED10_OFFSET] = totalSize;
    indexes[CollationDataReader::IX_CE32S_OFFSET] = totalSize;
    if(hasMappings) {
        totalSize += data.ce32sLength * 4;
    }

    indexes[CollationDataReader::IX_ROOT_ELEMENTS_OFFSET] = totalSize;
    totalSize += rootElementsLength * 4;

    indexes[CollationDataReader::IX_CONTEXTS_OFFSET] = totalSize;
    if(hasMappings) {
        totalSize += data.contextsLength * 2;
    }

    indexes[CollationDataReader::IX_UNSAFE_BWD_OFFSET] = totalSize;
    if(hasMappings && !unsafeBackwardSet.isEmpty()) {
        UErrorCode errorCode2 = U_ZERO_ERROR;
        int32_t length;
        if(totalSize < capacity) {
            uint16_t *p = reinterpret_cast<uint16_t *>(dest + totalSize);
            length = unsafeBackwardSet.serialize(
                    p, (capacity - totalSize) / 2, errorCode2);
        } else {
            length = unsafeBackwardSet.serialize(nullptr, 0, errorCode2);
        }
        if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
            errorCode = errorCode2;
            return 0;
        }
        totalSize += length * 2;
    }

    indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize;
    totalSize += fastLatinTableLength * 2;

    UnicodeString scripts;
    indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize;
    if(isBase) {
        scripts.append(static_cast<char16_t>(data.numScripts));
        scripts.append(reinterpret_cast<const char16_t *>(data.scriptsIndex), data.numScripts + 16);
        scripts.append(reinterpret_cast<const char16_t *>(data.scriptStarts), data.scriptStartsLength);
        totalSize += scripts.length() * 2;
    }

    indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize;
    if(isBase) {
        totalSize += 256;
    }

    indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize;
    indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize;

    if(totalSize > capacity) {
        errorCode = U_BUFFER_OVERFLOW_ERROR;
        return headerSize + totalSize;
    }

    uprv_memcpy(dest, indexes, indexesLength * 4);
    copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, reorderCodes, dest);
    copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reorderTable, dest);
    // The trie has already been serialized into the dest buffer.
    copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest);
    copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest);
    copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements, dest);
    copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, dest);
    // The unsafeBackwardSet has already been serialized into the dest buffer.
    copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fastLatinTable, dest);
    copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, scripts.getBuffer(), dest);
    copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.compressibleBytes, dest);

    return headerSize + totalSize;
}

void
CollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex,
                              const void *src, uint8_t *dest) {
    int32_t start = indexes[startIndex];
    int32_t limit = indexes[startIndex + 1];
    if(start < limit) {
        uprv_memcpy(dest + start, src, limit - start);
    }
}

U_NAMESPACE_END

#endif  // !UCONFIG_NO_COLLATION

Coverage Report

Created: 2026-03-31 06:12

Line	Count	Source
1		// © 2016 and later: Unicode, Inc. and others.
2		// License & terms of use: http://www.unicode.org/copyright.html
3		/*
4		*******************************************************************************
5		* Copyright (C) 2013-2015, International Business Machines
6		* Corporation and others. All Rights Reserved.
7		*******************************************************************************
8		* collationdatawriter.cpp
9		*
10		* created on: 2013aug06
11		* created by: Markus W. Scherer
12		*/
13
14		#include "unicode/utypes.h"
15
16		#if !UCONFIG_NO_COLLATION
17
18		#include "unicode/tblcoll.h"
19		#include "unicode/udata.h"
20		#include "unicode/uniset.h"
21		#include "cmemory.h"
22		#include "collationdata.h"
23		#include "collationdatabuilder.h"
24		#include "collationdatareader.h"
25		#include "collationdatawriter.h"
26		#include "collationfastlatin.h"
27		#include "collationsettings.h"
28		#include "collationtailoring.h"
29		#include "uassert.h"
30		#include "ucmndata.h"
31
32		U_NAMESPACE_BEGIN
33
34		uint8_t *
35	0	RuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &errorCode) const {
36	0	if(U_FAILURE(errorCode)) { return nullptr; }
37	0	LocalMemory<uint8_t> buffer(static_cast<uint8_t*>(uprv_malloc(20000)));
38	0	if(buffer.isNull()) {
39	0	errorCode = U_MEMORY_ALLOCATION_ERROR;
40	0	return nullptr;
41	0	}
42	0	UErrorCode bufferStatus = U_ZERO_ERROR;
43	0	length = cloneBinary(buffer.getAlias(), 20000, bufferStatus);
44	0	if(bufferStatus == U_BUFFER_OVERFLOW_ERROR) {
45	0	if(buffer.allocateInsteadAndCopy(length, 0) == nullptr) {
46	0	errorCode = U_MEMORY_ALLOCATION_ERROR;
47	0	return nullptr;
48	0	}
49	0	bufferStatus = U_ZERO_ERROR;
50	0	length = cloneBinary(buffer.getAlias(), length, bufferStatus);
51	0	}
52	0	if(U_FAILURE(bufferStatus)) {
53	0	errorCode = bufferStatus;
54	0	return nullptr;
55	0	}
56	0	return buffer.orphan();
57	0	}
58
59		int32_t
60	0	RuleBasedCollator::cloneBinary(uint8_t *dest, int32_t capacity, UErrorCode &errorCode) const {
61	0	int32_t indexes[CollationDataReader::IX_TOTAL_SIZE + 1];
62	0	return CollationDataWriter::writeTailoring(
63	0	tailoring, settings, indexes, dest, capacity,
64	0	errorCode);
65	0	}
66
67		static const UDataInfo dataInfo = {
68		sizeof(UDataInfo),
69		0,
70
71		U_IS_BIG_ENDIAN,
72		U_CHARSET_FAMILY,
73		U_SIZEOF_UCHAR,
74		0,
75
76		{ 0x55, 0x43, 0x6f, 0x6c }, // dataFormat="UCol"
77		{ 5, 0, 0, 0 }, // formatVersion
78		{ 6, 3, 0, 0 } // dataVersion
79		};
80
81		int32_t
82		CollationDataWriter::writeBase(const CollationData &data, const CollationSettings &settings,
83		const void *rootElements, int32_t rootElementsLength,
84		int32_t indexes[], uint8_t *dest, int32_t capacity,
85	0	UErrorCode &errorCode) {
86	0	return write(true, nullptr,
87	0	data, settings,
88	0	rootElements, rootElementsLength,
89	0	indexes, dest, capacity, errorCode);
90	0	}
91
92		int32_t
93		CollationDataWriter::writeTailoring(const CollationTailoring &t, const CollationSettings &settings,
94		int32_t indexes[], uint8_t *dest, int32_t capacity,
95	0	UErrorCode &errorCode) {
96	0	return write(false, t.version,
97	0	*t.data, settings,
98	0	nullptr, 0,
99	0	indexes, dest, capacity, errorCode);
100	0	}
101
102		int32_t
103		CollationDataWriter::write(UBool isBase, const UVersionInfo dataVersion,
104		const CollationData &data, const CollationSettings &settings,
105		const void *rootElements, int32_t rootElementsLength,
106		int32_t indexes[], uint8_t *dest, int32_t capacity,
107	0	UErrorCode &errorCode) {
108	0	if(U_FAILURE(errorCode)) { return 0; }
109	0	if(capacity < 0 \|\| (capacity > 0 && dest == nullptr)) {
110	0	errorCode = U_ILLEGAL_ARGUMENT_ERROR;
111	0	return 0;
112	0	}
113
114		// Figure out which data items to write before settling on
115		// the indexes length and writing offsets.
116		// For any data item, we need to write the start and limit offsets,
117		// so the indexes length must be at least index-of-start-offset + 2.
118	0	int32_t indexesLength;
119	0	UBool hasMappings;
120	0	UnicodeSet unsafeBackwardSet;
121	0	const CollationData *baseData = data.base;
122
123	0	int32_t fastLatinVersion;
124	0	if(data.fastLatinTable != nullptr) {
125	0	fastLatinVersion = static_cast<int32_t>(CollationFastLatin::VERSION) << 16;
126	0	} else {
127	0	fastLatinVersion = 0;
128	0	}
129	0	int32_t fastLatinTableLength = 0;
130
131	0	if(isBase) {
132		// For the root collator, we write an even number of indexes
133		// so that we start with an 8-aligned offset.
134	0	indexesLength = CollationDataReader::IX_TOTAL_SIZE + 1;
135	0	U_ASSERT(settings.reorderCodesLength == 0);
136	0	hasMappings = true;
137	0	unsafeBackwardSet = *data.unsafeBackwardSet;
138	0	fastLatinTableLength = data.fastLatinTableLength;
139	0	} else if(baseData == nullptr) {
140	0	hasMappings = false;
141	0	if(settings.reorderCodesLength == 0) {
142		// only options
143	0	indexesLength = CollationDataReader::IX_OPTIONS + 1; // no limit offset here
144	0	} else {
145		// only options, reorder codes, and the reorder table
146	0	indexesLength = CollationDataReader::IX_REORDER_TABLE_OFFSET + 2;
147	0	}
148	0	} else {
149	0	hasMappings = true;
150		// Tailored mappings, and what else?
151		// Check in ascending order of optional tailoring data items.
152	0	indexesLength = CollationDataReader::IX_CE32S_OFFSET + 2;
153	0	if(data.contextsLength != 0) {
154	0	indexesLength = CollationDataReader::IX_CONTEXTS_OFFSET + 2;
155	0	}
156	0	unsafeBackwardSet.addAll(data.unsafeBackwardSet).removeAll(baseData->unsafeBackwardSet);
157	0	if(!unsafeBackwardSet.isEmpty()) {
158	0	indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2;
159	0	}
160	0	if(data.fastLatinTable != baseData->fastLatinTable) {
161	0	fastLatinTableLength = data.fastLatinTableLength;
162	0	indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2;
163	0	}
164	0	}
165
166	0	UVector32 codesAndRanges(errorCode);
167	0	const int32_t *reorderCodes = settings.reorderCodes;
168	0	int32_t reorderCodesLength = settings.reorderCodesLength;
169	0	if(settings.hasReordering() &&
170	0	CollationSettings::reorderTableHasSplitBytes(settings.reorderTable)) {
171		// Rebuild the full list of reorder ranges.
172		// The list in the settings is truncated for efficiency.
173	0	data.makeReorderRanges(reorderCodes, reorderCodesLength, codesAndRanges, errorCode);
174		// Write the codes, then the ranges.
175	0	for(int32_t i = 0; i < reorderCodesLength; ++i) {
176	0	codesAndRanges.insertElementAt(reorderCodes[i], i, errorCode);
177	0	}
178	0	if(U_FAILURE(errorCode)) { return 0; }
179	0	reorderCodes = codesAndRanges.getBuffer();
180	0	reorderCodesLength = codesAndRanges.size();
181	0	}
182
183	0	int32_t headerSize;
184	0	if(isBase) {
185	0	headerSize = 0; // udata_create() writes the header
186	0	} else {
187	0	DataHeader header;
188	0	header.dataHeader.magic1 = 0xda;
189	0	header.dataHeader.magic2 = 0x27;
190	0	uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo));
191	0	uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo));
192	0	headerSize = static_cast<int32_t>(sizeof(header));
193	0	U_ASSERT((headerSize & 3) == 0); // multiple of 4 bytes
194	0	if(hasMappings && data.cesLength != 0) {
195		// Sum of the sizes of the data items which are
196		// not automatically multiples of 8 bytes and which are placed before the CEs.
197	0	int32_t sum = headerSize + (indexesLength + reorderCodesLength) * 4;
198	0	if((sum & 7) != 0) {
199		// We need to add padding somewhere so that the 64-bit CEs are 8-aligned.
200		// We add to the header size here.
201		// Alternatively, we could increment the indexesLength
202		// or add a few bytes to the reorderTable.
203	0	headerSize += 4;
204	0	}
205	0	}
206	0	header.dataHeader.headerSize = static_cast<uint16_t>(headerSize);
207	0	if(headerSize <= capacity) {
208	0	uprv_memcpy(dest, &header, sizeof(header));
209		// Write 00 bytes so that the padding is not mistaken for a copyright string.
210	0	uprv_memset(dest + sizeof(header), 0, headerSize - (int32_t)sizeof(header));
211	0	dest += headerSize;
212	0	capacity -= headerSize;
213	0	} else {
214	0	dest = nullptr;
215	0	capacity = 0;
216	0	}
217	0	}
218
219	0	indexes[CollationDataReader::IX_INDEXES_LENGTH] = indexesLength;
220	0	U_ASSERT((settings.options & ~0xffff) == 0);
221	0	indexes[CollationDataReader::IX_OPTIONS] =
222	0	data.numericPrimary \| fastLatinVersion \| settings.options;
223	0	indexes[CollationDataReader::IX_RESERVED2] = 0;
224	0	indexes[CollationDataReader::IX_RESERVED3] = 0;
225
226		// Byte offsets of data items all start from the start of the indexes.
227		// We add the headerSize at the very end.
228	0	int32_t totalSize = indexesLength * 4;
229
230	0	if(hasMappings && (isBase \|\| data.jamoCE32s != baseData->jamoCE32s)) {
231	0	indexes[CollationDataReader::IX_JAMO_CE32S_START] = static_cast<int32_t>(data.jamoCE32s - data.ce32s);
232	0	} else {
233	0	indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1;
234	0	}
235
236	0	indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize;
237	0	totalSize += reorderCodesLength * 4;
238
239	0	indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize;
240	0	if(settings.reorderTable != nullptr) {
241	0	totalSize += 256;
242	0	}
243
244	0	indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize;
245	0	if(hasMappings) {
246	0	UErrorCode errorCode2 = U_ZERO_ERROR;
247	0	int32_t length;
248	0	if(totalSize < capacity) {
249	0	length = utrie2_serialize(data.trie, dest + totalSize,
250	0	capacity - totalSize, &errorCode2);
251	0	} else {
252	0	length = utrie2_serialize(data.trie, nullptr, 0, &errorCode2);
253	0	}
254	0	if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
255	0	errorCode = errorCode2;
256	0	return 0;
257	0	}
258		// The trie size should be a multiple of 8 bytes due to the way
259		// compactIndex2(UNewTrie2 *trie) currently works.
260	0	U_ASSERT((length & 7) == 0);
261	0	totalSize += length;
262	0	}
263
264	0	indexes[CollationDataReader::IX_RESERVED8_OFFSET] = totalSize;
265	0	indexes[CollationDataReader::IX_CES_OFFSET] = totalSize;
266	0	if(hasMappings && data.cesLength != 0) {
267	0	U_ASSERT(((headerSize + totalSize) & 7) == 0);
268	0	totalSize += data.cesLength * 8;
269	0	}
270
271	0	indexes[CollationDataReader::IX_RESERVED10_OFFSET] = totalSize;
272	0	indexes[CollationDataReader::IX_CE32S_OFFSET] = totalSize;
273	0	if(hasMappings) {
274	0	totalSize += data.ce32sLength * 4;
275	0	}
276
277	0	indexes[CollationDataReader::IX_ROOT_ELEMENTS_OFFSET] = totalSize;
278	0	totalSize += rootElementsLength * 4;
279
280	0	indexes[CollationDataReader::IX_CONTEXTS_OFFSET] = totalSize;
281	0	if(hasMappings) {
282	0	totalSize += data.contextsLength * 2;
283	0	}
284
285	0	indexes[CollationDataReader::IX_UNSAFE_BWD_OFFSET] = totalSize;
286	0	if(hasMappings && !unsafeBackwardSet.isEmpty()) {
287	0	UErrorCode errorCode2 = U_ZERO_ERROR;
288	0	int32_t length;
289	0	if(totalSize < capacity) {
290	0	uint16_t p = reinterpret_cast<uint16_t >(dest + totalSize);
291	0	length = unsafeBackwardSet.serialize(
292	0	p, (capacity - totalSize) / 2, errorCode2);
293	0	} else {
294	0	length = unsafeBackwardSet.serialize(nullptr, 0, errorCode2);
295	0	}
296	0	if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
297	0	errorCode = errorCode2;
298	0	return 0;
299	0	}
300	0	totalSize += length * 2;
301	0	}
302
303	0	indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize;
304	0	totalSize += fastLatinTableLength * 2;
305
306	0	UnicodeString scripts;
307	0	indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize;
308	0	if(isBase) {
309	0	scripts.append(static_cast<char16_t>(data.numScripts));
310	0	scripts.append(reinterpret_cast<const char16_t *>(data.scriptsIndex), data.numScripts + 16);
311	0	scripts.append(reinterpret_cast<const char16_t *>(data.scriptStarts), data.scriptStartsLength);
312	0	totalSize += scripts.length() * 2;
313	0	}
314
315	0	indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize;
316	0	if(isBase) {
317	0	totalSize += 256;
318	0	}
319
320	0	indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize;
321	0	indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize;
322
323	0	if(totalSize > capacity) {
324	0	errorCode = U_BUFFER_OVERFLOW_ERROR;
325	0	return headerSize + totalSize;
326	0	}
327
328	0	uprv_memcpy(dest, indexes, indexesLength * 4);
329	0	copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, reorderCodes, dest);
330	0	copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reorderTable, dest);
331		// The trie has already been serialized into the dest buffer.
332	0	copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest);
333	0	copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest);
334	0	copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements, dest);
335	0	copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, dest);
336		// The unsafeBackwardSet has already been serialized into the dest buffer.
337	0	copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fastLatinTable, dest);
338	0	copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, scripts.getBuffer(), dest);
339	0	copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.compressibleBytes, dest);
340
341	0	return headerSize + totalSize;
342	0	}
343
344		void
345		CollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex,
346	0	const void src, uint8_t dest) {
347	0	int32_t start = indexes[startIndex];
348	0	int32_t limit = indexes[startIndex + 1];
349	0	if(start < limit) {
350	0	uprv_memcpy(dest + start, src, limit - start);
351	0	}
352	0	}
353
354		U_NAMESPACE_END
355
356		#endif // !UCONFIG_NO_COLLATION