Coverage Report

Created: 2026-02-14 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/vcl/source/gdi/scrptrun.cxx
Line
Count
Source
1
/*
2
 *******************************************************************************
3
 *
4
 *   Copyright (c) 1995-2013 International Business Machines Corporation and others
5
 *
6
 *   All rights reserved.
7
 *
8
 *   Permission is hereby granted, free of charge, to any person obtaining a copy of
9
 *   this software and associated documentation files (the "Software"), to deal in
10
 *   the Software without restriction, including without limitation the rights to
11
 *   use, copy, modify, merge, publish, distribute, and/or sell copies of the
12
 *   Software, and to permit persons to whom the Software is furnished to do so,
13
 *   provided that the above copyright notice(s) and this permission notice appear
14
 *   in all copies of the Software and that both the above copyright notice(s) and
15
 *   this permission notice appear in supporting documentation.
16
 *
17
 *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 *   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 *   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
20
 *   NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
21
 *   LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
22
 *   DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
23
 *   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
24
 *   CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
25
 *
26
 *   Except as contained in this notice, the name of a copyright holder shall not be
27
 *   used in advertising or otherwise to promote the sale, use or other dealings in
28
 *   this Software without prior written authorization of the copyright holder.
29
 *
30
 *******************************************************************************
31
 *   file name:  scrptrun.cpp
32
 *
33
 *   created on: 10/17/2001
34
 *   created by: Eric R. Mader
35
 */
36
/**
37
  * This file is largely copied from the ICU project,
38
  * under folder source/extra/scrptrun/scrptrun.cpp
39
  */
40
41
#include <sal/config.h>
42
43
#include <rtl/character.hxx>
44
#include <unicode/uchar.h>
45
#include <unicode/utypes.h>
46
#include <unicode/uscript.h>
47
48
#include <scrptrun.h>
49
#include <algorithm>
50
51
namespace {
52
53
struct PairIndices
54
{
55
    int8_t ma00[0xff];
56
    int8_t ma20[0x7f];
57
    int8_t ma30[0x7f];
58
59
    PairIndices()
60
110
    {
61
110
        std::fill_n(ma00, 0xff, -1);
62
110
        std::fill_n(ma20, 0x7f, -1);
63
110
        std::fill_n(ma30, 0x7f, -1);
64
65
        // characters in the range 0x0000 - 0x007e (inclusive)
66
        // ascii paired punctuation
67
110
        ma00[0x28] =  0;
68
110
        ma00[0x29] =  1;
69
110
        ma00[0x3c] =  2;
70
110
        ma00[0x3e] =  3;
71
110
        ma00[0x5b] =  4;
72
110
        ma00[0x5d] =  5;
73
110
        ma00[0x7b] =  6;
74
110
        ma00[0x7d] =  7;
75
        // guillemets
76
110
        ma00[0xab] =  8;
77
110
        ma00[0xbb] =  9;
78
79
        // characters in the range 0x2000 - 0x207e (inclusive)
80
        // general punctuation
81
110
        ma20[0x18] = 10;
82
110
        ma20[0x19] = 11;
83
110
        ma20[0x1c] = 12;
84
110
        ma20[0x1d] = 13;
85
110
        ma20[0x39] = 14;
86
110
        ma20[0x3a] = 15;
87
88
        // characters in the range 0x3000 - 0x307e (inclusive)
89
        // chinese paired punctuation
90
110
        ma30[0x08] = 16;
91
110
        ma30[0x09] = 17;
92
110
        ma30[0x0a] = 18;
93
110
        ma30[0x0b] = 19;
94
110
        ma30[0x0c] = 20;
95
110
        ma30[0x0d] = 21;
96
110
        ma30[0x0e] = 22;
97
110
        ma30[0x0f] = 23;
98
110
        ma30[0x10] = 24;
99
110
        ma30[0x11] = 25;
100
110
        ma30[0x14] = 26;
101
110
        ma30[0x15] = 27;
102
110
        ma30[0x16] = 28;
103
110
        ma30[0x17] = 29;
104
110
        ma30[0x18] = 30;
105
110
        ma30[0x19] = 31;
106
110
        ma30[0x1a] = 32;
107
110
        ma30[0x1b] = 33;
108
110
    }
109
110
    int32_t getPairIndex(UChar32 ch) const
111
280M
    {
112
280M
        if (ch < 0xff)
113
252M
            return ma00[ch];
114
27.9M
        if (ch >= 0x2000 && ch < 0x207f)
115
1.01M
            return ma20[ch - 0x2000];
116
26.9M
        if (ch >= 0x3000 && ch < 0x307f)
117
75.3k
            return ma30[ch - 0x3000];
118
26.8M
        return -1;
119
26.9M
    }
120
121
};
122
123
UScriptCode getScript(UChar32 ch, UErrorCode* status)
124
280M
{
125
    // tdf#154549
126
    // Make combining marks inherit the script of their bases, regardless of
127
    // their own script.
128
280M
    if (u_getIntPropertyValue(ch, UCHAR_GENERAL_CATEGORY) == U_NON_SPACING_MARK)
129
979k
        return USCRIPT_INHERITED;
130
131
279M
    UScriptCode script = uscript_getScript(ch, status);
132
279M
    if (U_FAILURE(*status))
133
0
        return script;
134
135
    // There are three Unicode script codes for Japanese text, but only one
136
    // OpenType script tag, so we want to keep them in one run as splitting is
137
    // pointless for the purpose of OpenType shaping.
138
279M
    if (script == USCRIPT_KATAKANA || script == USCRIPT_KATAKANA_OR_HIRAGANA)
139
325k
        return USCRIPT_HIRAGANA;
140
278M
    return script;
141
279M
}
142
143
}
144
145
const PairIndices gPairIndices;
146
147
148
namespace vcl {
149
150
const char ScriptRun::fgClassID=0;
151
152
static bool sameScript(int32_t scriptOne, int32_t scriptTwo)
153
280M
{
154
280M
    return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo;
155
280M
}
156
157
UBool ScriptRun::next()
158
10.4M
{
159
10.4M
    int32_t startSP  = parenSP;  // used to find the first new open character
160
10.4M
    UErrorCode error = U_ZERO_ERROR;
161
162
    // if we've fallen off the end of the text, we're done
163
10.4M
    if (scriptEnd >= charLimit) {
164
1.87M
        return false;
165
1.87M
    }
166
167
8.53M
    scriptCode = USCRIPT_COMMON;
168
169
282M
    for (scriptStart = scriptEnd; scriptEnd < charLimit; scriptEnd += 1) {
170
280M
        UChar   high = charArray[scriptEnd];
171
280M
        UChar32 ch   = high;
172
173
        // if the character is a high surrogate and it's not the last one
174
        // in the text, see if it's followed by a low surrogate
175
280M
        if (rtl::isHighSurrogate(high) && scriptEnd < charLimit - 1)
176
342k
        {
177
342k
            UChar low = charArray[scriptEnd + 1];
178
179
            // if it is followed by a low surrogate,
180
            // consume it and form the full character
181
342k
            if (rtl::isLowSurrogate(low)) {
182
197k
                ch = rtl::combineSurrogates(high, low);
183
197k
                scriptEnd += 1;
184
197k
            }
185
342k
        }
186
187
280M
        UScriptCode sc = getScript(ch, &error);
188
280M
        int32_t pairIndex = gPairIndices.getPairIndex(ch);
189
190
        // Paired character handling:
191
192
        // if it's an open character, push it onto the stack.
193
        // if it's a close character, find the matching open on the
194
        // stack, and use that script code. Any non-matching open
195
        // characters above it on the stack will be popped.
196
280M
        if (pairIndex >= 0) {
197
5.75M
            if ((pairIndex & 1) == 0) {
198
3.04M
                ++parenSP;
199
3.04M
                int32_t nVecSize = parenStack.size();
200
3.04M
                if (parenSP == nVecSize)
201
178k
                    parenStack.resize(nVecSize + 128);
202
3.04M
                parenStack[parenSP].pairIndex = pairIndex;
203
3.04M
                parenStack[parenSP].scriptCode  = scriptCode;
204
3.04M
            } else if (parenSP >= 0) {
205
1.16M
                int32_t pi = pairIndex & ~1;
206
207
2.87M
                while (parenSP >= 0 && parenStack[parenSP].pairIndex != pi) {
208
1.70M
                    parenSP -= 1;
209
1.70M
                }
210
211
1.16M
                if (parenSP < startSP) {
212
67.4k
                    startSP = parenSP;
213
67.4k
                }
214
215
1.16M
                if (parenSP >= 0) {
216
991k
                    sc = parenStack[parenSP].scriptCode;
217
991k
                }
218
1.16M
            }
219
5.75M
        }
220
221
280M
        if (sameScript(scriptCode, sc)) {
222
273M
            if (scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
223
7.50M
                scriptCode = sc;
224
225
                // now that we have a final script code, fix any open
226
                // characters we pushed before we knew the script code.
227
7.54M
                while (startSP < parenSP) {
228
41.1k
                    parenStack[++startSP].scriptCode = scriptCode;
229
41.1k
                }
230
7.50M
            }
231
232
            // if this character is a close paired character,
233
            // pop it from the stack
234
273M
            if (pairIndex >= 0 && (pairIndex & 1) != 0 && parenSP >= 0) {
235
906k
                parenSP -= 1;
236
                /* decrement startSP only if it is >= 0,
237
                   decrementing it unnecessarily will lead to memory corruption
238
                   while processing the above while block.
239
                   e.g. startSP = -4 , parenSP = -1
240
                */
241
906k
                if (startSP >= 0) {
242
294k
                    startSP -= 1;
243
294k
                }
244
906k
            }
245
273M
        } else {
246
            // if the run broke on a surrogate pair,
247
            // end it before the high surrogate
248
6.66M
            if (ch >= 0x10000) {
249
67.9k
                scriptEnd -= 1;
250
67.9k
            }
251
252
6.66M
            break;
253
6.66M
        }
254
280M
    }
255
256
8.53M
    return true;
257
10.4M
}
258
259
}