/src/libreoffice/i18npool/source/indexentry/indexentrysupplier_default.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <indexentrysupplier_default.hxx> |
21 | | #include <collatorImpl.hxx> |
22 | | #include <localedata.hxx> |
23 | | #include <i18nutil/unicode.hxx> |
24 | | #include <com/sun/star/i18n/CollatorOptions.hpp> |
25 | | #include <o3tl/temporary.hxx> |
26 | | |
27 | | using namespace ::com::sun::star; |
28 | | using namespace ::com::sun::star::uno; |
29 | | using namespace ::com::sun::star::i18n; |
30 | | using namespace ::com::sun::star::lang; |
31 | | |
32 | | namespace i18npool { |
33 | | |
34 | | IndexEntrySupplier_Unicode::IndexEntrySupplier_Unicode( |
35 | | const css::uno::Reference < css::uno::XComponentContext >& rxContext ) : |
36 | 0 | IndexEntrySupplier_Common(rxContext) |
37 | 0 | { |
38 | 0 | implementationName = "com.sun.star.i18n.IndexEntrySupplier_Unicode"; |
39 | 0 | index.reset( new Index(rxContext) ); |
40 | 0 | } |
41 | | |
42 | | IndexEntrySupplier_Unicode::~IndexEntrySupplier_Unicode() |
43 | 0 | { |
44 | 0 | } |
45 | | |
46 | | sal_Bool SAL_CALL IndexEntrySupplier_Unicode::loadAlgorithm( const lang::Locale& rLocale, |
47 | | const OUString& rAlgorithm, sal_Int32 collatorOptions ) |
48 | 0 | { |
49 | 0 | index->init(rLocale, rAlgorithm); |
50 | 0 | return IndexEntrySupplier_Common::loadAlgorithm(rLocale, rAlgorithm, collatorOptions); |
51 | 0 | } |
52 | | |
53 | | OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexKey( const OUString& rIndexEntry, |
54 | | const OUString& rPhoneticEntry, const lang::Locale& rLocale ) |
55 | 0 | { |
56 | 0 | return index->getIndexDescription(getEntry(rIndexEntry, rPhoneticEntry, rLocale)); |
57 | 0 | } |
58 | | |
59 | | sal_Int16 SAL_CALL IndexEntrySupplier_Unicode::compareIndexEntry( |
60 | | const OUString& rIndexEntry1, const OUString& rPhoneticEntry1, const lang::Locale& rLocale1, |
61 | | const OUString& rIndexEntry2, const OUString& rPhoneticEntry2, const lang::Locale& rLocale2 ) |
62 | 0 | { |
63 | 0 | sal_Int16 result = |
64 | 0 | index->getIndexWeight(getEntry(rIndexEntry1, rPhoneticEntry1, rLocale1)) - |
65 | 0 | index->getIndexWeight(getEntry(rIndexEntry2, rPhoneticEntry2, rLocale2)); |
66 | 0 | if (result == 0) |
67 | 0 | return IndexEntrySupplier_Common::compareIndexEntry( |
68 | 0 | rIndexEntry1, rPhoneticEntry1, rLocale1, |
69 | 0 | rIndexEntry2, rPhoneticEntry2, rLocale2); |
70 | 0 | return result > 0 ? 1 : -1; |
71 | 0 | } |
72 | | |
73 | | OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexCharacter( const OUString& rIndexEntry, |
74 | 0 | const lang::Locale& rLocale, const OUString& rAlgorithm ) { |
75 | |
|
76 | 0 | if (loadAlgorithm( rLocale, rAlgorithm, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT)) |
77 | 0 | return index->getIndexDescription(rIndexEntry); |
78 | 0 | else |
79 | 0 | return IndexEntrySupplier_Common::getIndexCharacter(rIndexEntry, rLocale, rAlgorithm); |
80 | 0 | } |
81 | | |
82 | | IndexTable::IndexTable() |
83 | 0 | : start(0) |
84 | 0 | , end(0) |
85 | 0 | , table(nullptr) |
86 | 0 | { |
87 | 0 | } |
88 | | |
89 | | IndexTable::~IndexTable() |
90 | 0 | { |
91 | 0 | if (table) free(table); |
92 | 0 | } |
93 | | |
94 | | void IndexTable::init(sal_Unicode start_, sal_Unicode end_, IndexKey const *keys, sal_Int16 key_count, Index *index) |
95 | 0 | { |
96 | 0 | start=start_; |
97 | 0 | end=end_; |
98 | 0 | table = static_cast<sal_uInt8*>(malloc((end-start+1)*sizeof(sal_uInt8))); |
99 | 0 | assert(table && "Don't handle OOM conditions"); |
100 | 0 | for (sal_Unicode i = start; i <= end; i++) { |
101 | 0 | sal_Int16 j; |
102 | 0 | for (j = 0; j < key_count; j++) { |
103 | 0 | if (keys[j].key > 0 && (i == keys[j].key || index->compare(i, keys[j].key) == 0)) { |
104 | 0 | table[i-start] = sal::static_int_cast<sal_uInt8>(j); |
105 | 0 | break; |
106 | 0 | } |
107 | 0 | } |
108 | 0 | if (j == key_count) |
109 | 0 | table[i-start] = 0xFF; |
110 | 0 | } |
111 | 0 | } |
112 | | |
113 | | Index::Index(const css::uno::Reference < css::uno::XComponentContext >& rxContext) |
114 | 0 | : table_count(0) |
115 | 0 | , key_count(0) |
116 | 0 | , mkey_count(0) |
117 | 0 | , collator( new CollatorImpl(rxContext) ) |
118 | 0 | { |
119 | 0 | } |
120 | | |
121 | | Index::~Index() |
122 | 0 | { |
123 | 0 | } |
124 | | |
125 | | sal_Int16 Index::compare(sal_Unicode c1, sal_Unicode c2) |
126 | 0 | { |
127 | 0 | return sal::static_int_cast<sal_Int16>( collator->compareString(OUString(&c1, 1), OUString(&c2, 1)) ); |
128 | 0 | } |
129 | | |
130 | | sal_Int16 Index::getIndexWeight(const OUString& rIndexEntry) |
131 | 0 | { |
132 | 0 | sal_Int32 startPos=0; |
133 | 0 | if (!skipping_chars.isEmpty()) |
134 | 0 | while (skipping_chars.indexOf(rIndexEntry[startPos]) >= 0) |
135 | 0 | startPos++; |
136 | 0 | if (mkey_count > 0) { |
137 | 0 | for (sal_Int16 i = 0; i < mkey_count; i++) { |
138 | 0 | sal_Int32 len = keys[mkeys[i]].mkey.getLength(); |
139 | 0 | if (collator->compareSubstring(rIndexEntry, startPos, len, |
140 | 0 | keys[mkeys[i]].mkey, 0, len) == 0) |
141 | 0 | return mkeys[i]; |
142 | 0 | } |
143 | 0 | } |
144 | 0 | sal_Unicode code = startPos < rIndexEntry.getLength() ? rIndexEntry[startPos] : 0; |
145 | 0 | for (sal_Int16 i = 0; i < table_count; i++) { |
146 | 0 | if (tables[i].start <= code && code <= tables[i].end) |
147 | 0 | return tables[i].table[code-tables[i].start]; |
148 | 0 | } |
149 | 0 | return 0xFF; |
150 | 0 | } |
151 | | |
152 | | OUString Index::getIndexDescription(const OUString& rIndexEntry) |
153 | 0 | { |
154 | 0 | sal_Int16 wgt = getIndexWeight(rIndexEntry); |
155 | 0 | if (wgt < MAX_KEYS) { |
156 | 0 | if (!keys[wgt].desc.isEmpty()) |
157 | 0 | return keys[wgt].desc; |
158 | 0 | else if (keys[wgt].key > 0) |
159 | 0 | return OUString(&keys[wgt].key, 1); |
160 | 0 | else |
161 | 0 | return keys[wgt].mkey; |
162 | 0 | } |
163 | 0 | sal_uInt32 indexChar=rIndexEntry.iterateCodePoints(&o3tl::temporary(sal_Int32(0)), 0); |
164 | 0 | return OUString(&indexChar, 1); |
165 | 0 | } |
166 | | |
167 | 0 | #define LOCALE_EN lang::Locale(u"en"_ustr, OUString(), OUString()) |
168 | | |
169 | | void Index::makeIndexKeys(const lang::Locale &rLocale, std::u16string_view algorithm) |
170 | 0 | { |
171 | 0 | OUString keyStr = LocaleDataImpl::get()->getIndexKeysByAlgorithm(rLocale, algorithm); |
172 | |
|
173 | 0 | if (keyStr.isEmpty()) { |
174 | 0 | keyStr = LocaleDataImpl::get()->getIndexKeysByAlgorithm(LOCALE_EN, |
175 | 0 | LocaleDataImpl::get()->getDefaultIndexAlgorithm(LOCALE_EN)); |
176 | 0 | if (keyStr.isEmpty()) |
177 | 0 | throw RuntimeException( |
178 | 0 | u"Index::makeIndexKeys: No index keys returned by algorithm"_ustr); |
179 | 0 | } |
180 | | |
181 | 0 | sal_Int16 len = sal::static_int_cast<sal_Int16>( keyStr.getLength() ); |
182 | 0 | mkey_count=key_count=0; |
183 | 0 | skipping_chars=OUString(); |
184 | 0 | sal_Int16 i, j; |
185 | |
|
186 | 0 | for (i = 0; i < len && key_count < MAX_KEYS; i++) |
187 | 0 | { |
188 | 0 | sal_Unicode curr = keyStr[i]; |
189 | 0 | sal_Unicode close = ')'; |
190 | |
|
191 | 0 | if (unicode::isWhiteSpace(curr)) |
192 | 0 | continue; |
193 | | |
194 | 0 | switch(curr) { |
195 | 0 | case u'-': { |
196 | 0 | if (key_count <= 0 || i + 1 >= len) |
197 | 0 | throw RuntimeException(u"Index::makeIndexKeys: key_count<=0||" |
198 | 0 | "'-' is the last char of KeyString"_ustr); |
199 | 0 | for (curr = keyStr[++i]; key_count < MAX_KEYS && keys[key_count-1].key < curr; key_count++) { |
200 | 0 | keys[key_count].key = keys[key_count-1].key+1; |
201 | 0 | keys[key_count].desc.clear(); |
202 | 0 | } |
203 | 0 | break; |
204 | 0 | } |
205 | 0 | case u'[': |
206 | 0 | for (i++; i < len && keyStr[i] != ']'; i++) { |
207 | 0 | if (unicode::isWhiteSpace(keyStr[i])) { |
208 | 0 | continue; |
209 | 0 | } else if (keyStr[i] == '_') { |
210 | 0 | for (curr=keyStr[i-1]+1; curr <= keyStr[i+1]; curr++) |
211 | 0 | skipping_chars+=OUStringChar(curr); |
212 | 0 | i+=2; |
213 | 0 | } else { |
214 | 0 | skipping_chars+=OUStringChar(keyStr[i]); |
215 | 0 | } |
216 | 0 | } |
217 | 0 | break; |
218 | 0 | case u'{': |
219 | 0 | close = '}'; |
220 | 0 | [[fallthrough]]; |
221 | 0 | case u'(': { |
222 | 0 | if (key_count <= 0) |
223 | 0 | throw RuntimeException(u"Index::makeIndexKeys: key_count<=0"_ustr); |
224 | | |
225 | 0 | sal_Int16 end = i+1; |
226 | 0 | for (; end < len && keyStr[end] != close; end++) ; |
227 | |
|
228 | 0 | if (end >= len) // no found |
229 | 0 | throw RuntimeException(u"Index::makeIndexKeys: Closing bracket not found"_ustr); |
230 | 0 | if (close == ')') |
231 | 0 | keys[key_count-1].desc = keyStr.copy(i+1, end-i-1); |
232 | 0 | else { |
233 | 0 | mkeys[mkey_count++]=key_count; |
234 | 0 | keys[key_count].key = 0; |
235 | 0 | keys[key_count].mkey = keyStr.copy(i+1, end-i-1); |
236 | 0 | keys[key_count++].desc.clear(); |
237 | 0 | } |
238 | 0 | i=end+1; |
239 | 0 | break; |
240 | 0 | } |
241 | 0 | default: |
242 | 0 | keys[key_count].key = curr; |
243 | 0 | keys[key_count++].desc.clear(); |
244 | 0 | break; |
245 | 0 | } |
246 | 0 | } |
247 | 0 | for (i = 0; i < mkey_count; i++) { |
248 | 0 | for (j=i+1; j < mkey_count; j++) { |
249 | 0 | if (keys[mkeys[i]].mkey.getLength() < keys[mkeys[j]].mkey.getLength()) { |
250 | 0 | std::swap(mkeys[i], mkeys[j]); |
251 | 0 | } |
252 | 0 | } |
253 | 0 | } |
254 | 0 | } |
255 | | |
256 | | void Index::init(const lang::Locale &rLocale, const OUString& algorithm) |
257 | 0 | { |
258 | 0 | makeIndexKeys(rLocale, algorithm); |
259 | |
|
260 | 0 | Sequence< UnicodeScript > scriptList = LocaleDataImpl::get()->getUnicodeScripts( rLocale ); |
261 | |
|
262 | 0 | if (!scriptList.hasElements()) { |
263 | 0 | scriptList = LocaleDataImpl::get()->getUnicodeScripts(LOCALE_EN); |
264 | 0 | if (!scriptList.hasElements()) |
265 | 0 | throw RuntimeException(u"Index::init: scriptList is empty"_ustr); |
266 | 0 | } |
267 | | |
268 | 0 | table_count = sal::static_int_cast<sal_Int16>( scriptList.getLength() ); |
269 | 0 | if (table_count > MAX_TABLES) |
270 | 0 | throw RuntimeException(u"Index::init: Length of scriptList is too big"_ustr); |
271 | | |
272 | 0 | collator->loadCollatorAlgorithm(algorithm, rLocale, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT); |
273 | 0 | sal_Int16 j=0; |
274 | 0 | sal_Unicode start = unicode::getUnicodeScriptStart(UnicodeScript(0)); |
275 | 0 | sal_Unicode end = unicode::getUnicodeScriptEnd(UnicodeScript(0)); |
276 | 0 | for (sal_Int32 i= (scriptList[0] == UnicodeScript(0)) ? 1 : 0; i< scriptList.getLength(); i++) { |
277 | 0 | if (unicode::getUnicodeScriptStart(scriptList[i]) != end+1) { |
278 | 0 | tables[j++].init(start, end, keys, key_count, this); |
279 | 0 | start = unicode::getUnicodeScriptStart(scriptList[i]); |
280 | 0 | } |
281 | 0 | end = unicode::getUnicodeScriptEnd(scriptList[i]); |
282 | 0 | } |
283 | 0 | tables[j++].init(start, end, keys, key_count, this); |
284 | 0 | table_count = j; |
285 | 0 | } |
286 | | |
287 | | } |
288 | | |
289 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |