/src/mozilla-central/gfx/thebes/gfxScriptItemizer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | /* |
7 | | * This file is based on usc_impl.c from ICU 4.2.0.1, slightly adapted |
8 | | * for use within Mozilla Gecko, separate from a standard ICU build. |
9 | | * |
10 | | * The original ICU license of the code follows: |
11 | | * |
12 | | * ICU License - ICU 1.8.1 and later |
13 | | * |
14 | | * COPYRIGHT AND PERMISSION NOTICE |
15 | | * |
16 | | * Copyright (c) 1995-2009 International Business Machines Corporation and |
17 | | * others |
18 | | * |
19 | | * All rights reserved. |
20 | | * |
21 | | * Permission is hereby granted, free of charge, to any person obtaining a |
22 | | * copy of this software and associated documentation files (the "Software"), |
23 | | * to deal in the Software without restriction, including without limitation |
24 | | * the rights to use, copy, modify, merge, publish, distribute, and/or sell |
25 | | * copies of the Software, and to permit persons to whom the Software is |
26 | | * furnished to do so, provided that the above copyright notice(s) and this |
27 | | * permission notice appear in all copies of the Software and that both the |
28 | | * above copyright notice(s) and this permission notice appear in supporting |
29 | | * documentation. |
30 | | * |
31 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
32 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
33 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. |
34 | | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE |
35 | | * BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, |
36 | | * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
37 | | * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
38 | | * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS |
39 | | * SOFTWARE. |
40 | | * |
41 | | * Except as contained in this notice, the name of a copyright holder shall |
42 | | * not be used in advertising or otherwise to promote the sale, use or other |
43 | | * dealings in this Software without prior written authorization of the |
44 | | * copyright holder. |
45 | | * |
46 | | * All trademarks and registered trademarks mentioned herein are the property |
47 | | * of their respective owners. |
48 | | */ |
49 | | |
50 | | #include "gfxScriptItemizer.h" |
51 | | #include "nsUnicodeProperties.h" |
52 | | #include "nsCharTraits.h" |
53 | | #include "harfbuzz/hb.h" |
54 | | |
55 | | using namespace mozilla::unicode; |
56 | | |
57 | 0 | #define MOD(sp) ((sp) % PAREN_STACK_DEPTH) |
58 | 0 | #define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH) |
59 | 0 | #define INC(sp,count) (MOD((sp) + (count))) |
60 | 0 | #define INC1(sp) (INC(sp, 1)) |
61 | 0 | #define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count))) |
62 | 0 | #define DEC1(sp) (DEC(sp, 1)) |
63 | 0 | #define STACK_IS_EMPTY() (pushCount <= 0) |
64 | 0 | #define STACK_IS_NOT_EMPTY() (! STACK_IS_EMPTY()) |
65 | 0 | #define TOP() (parenStack[parenSP]) |
66 | 0 | #define SYNC_FIXUP() (fixupCount = 0) |
67 | | |
68 | | void |
69 | | gfxScriptItemizer::push(uint32_t endPairChar, Script newScriptCode) |
70 | 0 | { |
71 | 0 | pushCount = LIMIT_INC(pushCount); |
72 | 0 | fixupCount = LIMIT_INC(fixupCount); |
73 | 0 |
|
74 | 0 | parenSP = INC1(parenSP); |
75 | 0 | parenStack[parenSP].endPairChar = endPairChar; |
76 | 0 | parenStack[parenSP].scriptCode = newScriptCode; |
77 | 0 | } |
78 | | |
79 | | void |
80 | | gfxScriptItemizer::pop() |
81 | 0 | { |
82 | 0 | if (STACK_IS_EMPTY()) { |
83 | 0 | return; |
84 | 0 | } |
85 | 0 | |
86 | 0 | if (fixupCount > 0) { |
87 | 0 | fixupCount -= 1; |
88 | 0 | } |
89 | 0 |
|
90 | 0 | pushCount -= 1; |
91 | 0 | parenSP = DEC1(parenSP); |
92 | 0 | |
93 | 0 | /* If the stack is now empty, reset the stack |
94 | 0 | pointers to their initial values. |
95 | 0 | */ |
96 | 0 | if (STACK_IS_EMPTY()) { |
97 | 0 | parenSP = -1; |
98 | 0 | } |
99 | 0 | } |
100 | | |
101 | | void |
102 | | gfxScriptItemizer::fixup(Script newScriptCode) |
103 | 0 | { |
104 | 0 | int32_t fixupSP = DEC(parenSP, fixupCount); |
105 | 0 |
|
106 | 0 | while (fixupCount-- > 0) { |
107 | 0 | fixupSP = INC1(fixupSP); |
108 | 0 | parenStack[fixupSP].scriptCode = newScriptCode; |
109 | 0 | } |
110 | 0 | } |
111 | | |
112 | | // We regard the current char as having the same script as the in-progress run |
113 | | // if either script code is Common or Inherited, or if the run script appears |
114 | | // in the character's ScriptExtensions, or if the char is a cluster extender. |
115 | | static inline bool |
116 | | SameScript(Script runScript, Script currCharScript, uint32_t aCurrCh) |
117 | 0 | { |
118 | 0 | return runScript <= Script::INHERITED || |
119 | 0 | currCharScript <= Script::INHERITED || |
120 | 0 | currCharScript == runScript || |
121 | 0 | IsClusterExtender(aCurrCh) || |
122 | 0 | HasScript(aCurrCh, runScript); |
123 | 0 | } |
124 | | |
125 | | gfxScriptItemizer::gfxScriptItemizer(const char16_t *src, uint32_t length) |
126 | | : textPtr(src), textLength(length) |
127 | 0 | { |
128 | 0 | reset(); |
129 | 0 | } |
130 | | |
131 | | void |
132 | | gfxScriptItemizer::SetText(const char16_t *src, uint32_t length) |
133 | 0 | { |
134 | 0 | textPtr = src; |
135 | 0 | textLength = length; |
136 | 0 |
|
137 | 0 | reset(); |
138 | 0 | } |
139 | | |
140 | | bool |
141 | | gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit, |
142 | | Script& aRunScript) |
143 | 0 | { |
144 | 0 | /* if we've fallen off the end of the text, we're done */ |
145 | 0 | if (scriptLimit >= textLength) { |
146 | 0 | return false; |
147 | 0 | } |
148 | 0 | |
149 | 0 | SYNC_FIXUP(); |
150 | 0 | scriptCode = Script::COMMON; |
151 | 0 |
|
152 | 0 | for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) { |
153 | 0 | uint32_t ch; |
154 | 0 | Script sc; |
155 | 0 | uint32_t startOfChar = scriptLimit; |
156 | 0 |
|
157 | 0 | ch = textPtr[scriptLimit]; |
158 | 0 |
|
159 | 0 | /* decode UTF-16 (may be surrogate pair) */ |
160 | 0 | if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) { |
161 | 0 | uint32_t low = textPtr[scriptLimit + 1]; |
162 | 0 | if (NS_IS_LOW_SURROGATE(low)) { |
163 | 0 | ch = SURROGATE_TO_UCS4(ch, low); |
164 | 0 | scriptLimit += 1; |
165 | 0 | } |
166 | 0 | } |
167 | 0 |
|
168 | 0 | // Initialize gc to UNASSIGNED; we'll only set it to the true GC |
169 | 0 | // if the character has script=COMMON, otherwise we don't care. |
170 | 0 | uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; |
171 | 0 |
|
172 | 0 | sc = GetScriptCode(ch); |
173 | 0 | if (sc == Script::COMMON) { |
174 | 0 | /* |
175 | 0 | * Paired character handling: |
176 | 0 | * |
177 | 0 | * if it's an open character, push it onto the stack. |
178 | 0 | * if it's a close character, find the matching open on the |
179 | 0 | * stack, and use that script code. Any non-matching open |
180 | 0 | * characters above it on the stack will be popped. |
181 | 0 | * |
182 | 0 | * We only do this if the script is COMMON; for chars with |
183 | 0 | * specific script assignments, we just use them as-is. |
184 | 0 | */ |
185 | 0 | gc = GetGeneralCategory(ch); |
186 | 0 | if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) { |
187 | 0 | uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch); |
188 | 0 | if (endPairChar != ch) { |
189 | 0 | push(endPairChar, scriptCode); |
190 | 0 | } |
191 | 0 | } else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && |
192 | 0 | HasMirroredChar(ch)) |
193 | 0 | { |
194 | 0 | while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) { |
195 | 0 | pop(); |
196 | 0 | } |
197 | 0 |
|
198 | 0 | if (STACK_IS_NOT_EMPTY()) { |
199 | 0 | sc = TOP().scriptCode; |
200 | 0 | } |
201 | 0 | } |
202 | 0 | } |
203 | 0 |
|
204 | 0 | if (SameScript(scriptCode, sc, ch)) { |
205 | 0 | if (scriptCode <= Script::INHERITED && |
206 | 0 | sc > Script::INHERITED) |
207 | 0 | { |
208 | 0 | scriptCode = sc; |
209 | 0 | fixup(scriptCode); |
210 | 0 | } |
211 | 0 |
|
212 | 0 | /* |
213 | 0 | * if this character is a close paired character, |
214 | 0 | * pop the matching open character from the stack |
215 | 0 | */ |
216 | 0 | if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && |
217 | 0 | HasMirroredChar(ch)) { |
218 | 0 | pop(); |
219 | 0 | } |
220 | 0 | } else { |
221 | 0 | /* |
222 | 0 | * reset scriptLimit in case it was advanced during reading a |
223 | 0 | * multiple-code-unit character |
224 | 0 | */ |
225 | 0 | scriptLimit = startOfChar; |
226 | 0 |
|
227 | 0 | break; |
228 | 0 | } |
229 | 0 | } |
230 | 0 |
|
231 | 0 | aRunStart = scriptStart; |
232 | 0 | aRunLimit = scriptLimit; |
233 | 0 | aRunScript = scriptCode; |
234 | 0 |
|
235 | 0 | return true; |
236 | 0 | } |