Coverage Report

Created: 2026-05-16 09:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/i18nutil/source/utility/casefolding.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <i18nutil/casefolding.hxx>
21
#include "casefolding_data.h"
22
#include <i18nutil/oneToOneMapping.hxx>
23
#include <i18nutil/widthfolding.hxx>
24
#include <i18nutil/transliteration.hxx>
25
#include <com/sun/star/lang/Locale.hpp>
26
#include <com/sun/star/uno/RuntimeException.hpp>
27
#include <rtl/character.hxx>
28
29
#include <unicode/uchar.h>
30
31
using namespace com::sun::star::lang;
32
using namespace com::sun::star::uno;
33
34
namespace i18nutil {
35
36
const Mapping mapping_03a3[] = {{0, 1, {0x03c2, 0, 0}},{0, 1, {0x03c3, 0, 0}}};
37
const Mapping mapping_0307[] = {{0, 0, {0, 0, 0}},{0, 1, {0x0307, 0, 0}}};
38
const Mapping mapping_004a[] = {{0, 2, {0x006a, 0x0307, 0}},{0, 1, {0x006a, 0, 0}}};
39
const Mapping mapping_012e[] = {{0, 2, {0x012f, 0x0307, 0}},{0, 1, {0x012f, 0, 0}}};
40
const Mapping mapping_00cc[] = {{0, 3, {0x0069, 0x0307, 0x0300}},{0, 1, {0x00ec, 0, 0}}};
41
const Mapping mapping_00cd[] = {{0, 3, {0x0069, 0x0307, 0x0301}},{0, 1, {0x00ed, 0, 0}}};
42
const Mapping mapping_0128[] = {{0, 3, {0x0069, 0x0307, 0x0303}},{0, 1, {0x0129, 0, 0}}};
43
const Mapping mapping_0049[] = {{0, 2, {0x0069, 0x0307, 0}},{0, 1, {0x0131, 0, 0}},{0, 1, {0x0069, 0, 0}}};
44
const Mapping mapping_0069[] = {{0, 1, {0x0130, 0, 0}},{0, 1, {0x0049, 0, 0}}};
45
const Mapping mapping_0130[] = {{0, 1, {0x0069, 0, 0}},{0, 1, {0x0130, 0, 0}}};
46
47
55.9M
#define langIs(lang) (aLocale.Language == lang)
48
49
// only check simple case, there is more complicated case need to be checked.
50
0
#define type_i(ch) ((ch) == 0x0069 || (ch) == 0x006a)
51
52
static bool cased_letter(sal_Unicode ch)
53
10.1k
{
54
10.1k
    int msb = ch >> 8;
55
10.1k
    int cmi = CaseMappingIndex[msb];
56
10.1k
    if (cmi < 0)
57
427
        return false;
58
9.71k
    int cmv_idx = (cmi << 8) + (ch & 0xff);
59
9.71k
    return bool(static_cast<MappingType>(CaseMappingValue[cmv_idx].type) & MappingType::CasedLetterMask);
60
10.1k
}
61
62
// for Lithuanian, condition to make explicit dot above when lowercasing capital I's and J's
63
// whenever there are more accents above.
64
0
#define accent_above(ch) (((ch) >= 0x0300 && (ch) <= 0x0314) || ((ch) >= 0x033D && (ch) <= 0x0344) || (ch) == 0x0346 || ((ch) >= 0x034A && (ch) <= 0x034C))
65
66
const Mapping& casefolding::getConditionalValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale const & aLocale, MappingType nMappingType)
67
18.4M
{
68
18.4M
        switch(str[pos]) {
69
8.68k
        case 0x03a3:
70
            // final_sigma (not followed by cased and preceded by cased character)
71
            // DOES NOT check ignorable sequence yet (more complicated implementation).
72
8.68k
            return !(pos < len && cased_letter(str[pos+1])) && (pos > 0 && cased_letter(str[pos-1])) ?
73
7.96k
                mapping_03a3[0] : mapping_03a3[1];
74
15.4k
        case 0x0307:
75
15.4k
            return (((nMappingType == MappingType::LowerToUpper && langIs("lt")) ||
76
15.4k
                (nMappingType == MappingType::UpperToLower && (langIs("tr") || langIs("az")))) &&
77
0
                (pos > 0 && type_i(str[pos-1]))) ?      // after_i
78
15.4k
                    mapping_0307[0] : mapping_0307[1];
79
4.96k
        case 0x0130:
80
4.96k
            return (langIs("tr") || langIs("az")) ? mapping_0130[0] : mapping_0130[1];
81
18.0M
        case 0x0069:
82
18.0M
            return (langIs("tr") || langIs("az")) ? mapping_0069[0] : mapping_0069[1];
83
340k
        case 0x0049: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_0049[0] :
84
340k
                    (langIs("tr") || langIs("az")) ? mapping_0049[1] : mapping_0049[2];
85
60.0k
        case 0x004a: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_004a[0] : mapping_004a[1];
86
460
        case 0x012e: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_012e[0] : mapping_012e[1];
87
13.9k
        case 0x00cc: return langIs("lt") ? mapping_00cc[0] : mapping_00cc[1];
88
9.61k
        case 0x00cd: return langIs("lt") ? mapping_00cd[0] : mapping_00cd[1];
89
257
        case 0x0128: return langIs("lt") ? mapping_0128[0] : mapping_0128[1];
90
18.4M
        }
91
        // Should not come here
92
0
        throw RuntimeException();
93
18.4M
}
94
95
Mapping casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale const & aLocale, MappingType nMappingType)
96
1.30G
{
97
1.30G
    if (pos > 0 && rtl::isHighSurrogate(str[pos-1]) && rtl::isLowSurrogate(str[pos]))
98
76.1k
        return { 0, 0, { 0, 0, 0 } };
99
100
1.30G
    Mapping dummy = { 0, 1, { str[pos], 0, 0 } };
101
102
1.30G
    sal_uInt32 c;
103
1.30G
    if (pos + 1 < len && rtl::isHighSurrogate(str[pos]) && rtl::isLowSurrogate(str[pos + 1]))
104
76.1k
        c = rtl::combineSurrogates(str[pos], str[pos + 1]);
105
1.30G
    else
106
1.30G
        c = str[pos];
107
108
1.30G
    sal_Int16 address = -1;
109
1.30G
    if (c < SAL_N_ELEMENTS(CaseMappingIndex) * 256)
110
1.30G
        address = CaseMappingIndex[c >> 8];
111
112
1.30G
    if (address >= 0) {
113
1.29G
        address = (address << 8) + (c & 0xFF);
114
1.29G
        if (static_cast<MappingType>(CaseMappingValue[address].type) & nMappingType) {
115
223M
            MappingType type = static_cast<MappingType>(CaseMappingValue[address].type);
116
223M
            if (type & MappingType::NotValue) {
117
18.7M
                if (CaseMappingValue[address].value == 0)
118
18.3M
                    return getConditionalValue(str, pos, len, aLocale, nMappingType);
119
392k
                else {
120
392k
                    for (int map = CaseMappingValue[address].value;
121
492k
                            map < CaseMappingValue[address].value + MaxCaseMappingExtras; map++) {
122
492k
                        if (static_cast<MappingType>(CaseMappingExtra[map].type) & nMappingType) {
123
392k
                            if (static_cast<MappingType>(CaseMappingExtra[map].type) & MappingType::NotValue)
124
93.0k
                                return getConditionalValue(str, pos, len, aLocale, nMappingType);
125
298k
                            else
126
298k
                                return CaseMappingExtra[map];
127
392k
                        }
128
492k
                    }
129
                    // Should not come here
130
0
                    throw RuntimeException();
131
392k
                }
132
18.7M
            }
133
204M
            else
134
204M
            {
135
204M
                dummy.map[0] = CaseMappingValue[address].value;
136
204M
                return dummy;
137
204M
            }
138
223M
        }
139
1.29G
    }
140
141
    // If the code point is not supported by our case mapping tables,
142
    // fallback to ICU functions.
143
    // TODO: this does not handle special case mapping as these require
144
    // using ustring.h APIs, which work on the whole string not character
145
    // by character.
146
    // TODO: what is the difference between ToLower and UpperToLower etc.?
147
1.08G
    sal_uInt32 value = c;
148
1.08G
    switch (nMappingType)
149
1.08G
    {
150
16.5M
        case MappingType::ToLower:
151
16.5M
        case MappingType::UpperToLower:
152
16.5M
            value = u_tolower(c);
153
16.5M
            break;
154
1.04G
        case MappingType::ToUpper:
155
1.04G
        case MappingType::LowerToUpper:
156
1.04G
            value = u_toupper(c);
157
1.04G
            break;
158
0
        case MappingType::ToTitle:
159
0
            value = u_totitle(c);
160
0
            break;
161
0
        case MappingType::SimpleFolding:
162
21.0M
        case MappingType::FullFolding:
163
21.0M
            value = u_foldCase(c, U_FOLD_CASE_DEFAULT);
164
21.0M
            break;
165
0
        default: break;
166
1.08G
    }
167
168
1.08G
    dummy.nmap = rtl::splitSurrogates(value, dummy.map);
169
170
1.08G
    return dummy;
171
1.08G
}
172
173
static bool
174
is_ja_voice_sound_mark(sal_Unicode& current, sal_Unicode next)
175
0
{
176
0
        if (next != 0x3099 && next != 0x309a)
177
0
            return false;
178
0
        sal_Unicode c = widthfolding::getCompositionChar(current, next);
179
0
        if (c != 0)
180
0
            current = c;
181
0
        return c != 0;
182
0
}
183
184
sal_Unicode casefolding::getNextChar(const sal_Unicode *str, sal_Int32& idx, sal_Int32 len, MappingElement& e, Locale const & aLocale, MappingType nMappingType, TransliterationFlags moduleLoaded)
185
25.8M
{
186
25.8M
        if( idx >= len )
187
96
        {
188
96
            e = MappingElement();
189
96
            return 0;
190
96
        }
191
192
25.8M
        sal_Unicode c;
193
194
25.8M
        if (moduleLoaded & TransliterationFlags::IGNORE_CASE) {
195
25.8M
            if( e.current >= e.element.nmap ) {
196
25.8M
                e.element = getValue(str, idx++, len, aLocale, nMappingType);
197
25.8M
                e.current = 0;
198
25.8M
            }
199
25.8M
            c = e.element.map[e.current++];
200
25.8M
        } else {
201
0
            c = *(str + idx++);
202
0
        }
203
204
25.8M
        if (moduleLoaded & TransliterationFlags::IGNORE_KANA) {
205
0
            if ((0x3040 <= c && c <= 0x3094) || (0x309d <= c && c <= 0x309f))
206
0
                c += 0x60;
207
0
        }
208
209
        // composition: KA + voice-mark --> GA. see halfwidthToFullwidth.cxx for detail
210
25.8M
        if (moduleLoaded & TransliterationFlags::IGNORE_WIDTH) {
211
0
            static oneToOneMapping& half2fullTable = widthfolding::gethalf2fullTable();
212
0
            c = half2fullTable[c];
213
0
            if (0x3040 <= c && c <= 0x30ff && idx < len &&
214
0
                    is_ja_voice_sound_mark(c, half2fullTable[*(str + idx)]))
215
0
                idx++;
216
0
        }
217
218
25.8M
        return c;
219
25.8M
}
220
221
}
222
223
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */