/src/mozilla-central/accessible/atk/DOMtoATK.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set ts=2 et sw=2 tw=80: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #include "AccessibleWrap.h" |
8 | | #include "nsString.h" |
9 | | #include "nsMai.h" |
10 | | |
11 | | /** |
12 | | * ATK offsets are counted in unicode codepoints, while DOM offsets are counted |
13 | | * in UTF-16 code units. That makes a difference for non-BMP characters, |
14 | | * which need two UTF-16 code units to be represented (a pair of surrogates), |
15 | | * while they are just one unicode character. |
16 | | * |
17 | | * To keep synchronization between ATK offsets (unicode codepoints) and DOM |
18 | | * offsets (UTF-16 code units), after translation from UTF-16 to UTF-8 we add a |
19 | | * BOM after each non-BMP character (which would otherwise use 2 UTF-16 |
20 | | * code units for only 1 unicode codepoint). |
21 | | * |
22 | | * BOMs (Byte Order Marks, U+FEFF, also known as ZERO WIDTH NO-BREAK SPACE, but |
23 | | * that usage is deprecated) normally only appear at the beginning of unicode |
24 | | * files, but their occurrence within text (notably after cut&paste) is not |
25 | | * uncommon, and are thus considered as non-text. |
26 | | * |
27 | | * Since the selection requested through ATK may not contain both surrogates |
28 | | * at the ends of the selection, we need to fetch one UTF-16 code point more |
29 | | * on both side, and get rid of it before returning the string to ATK. The |
30 | | * ATKStringConverterHelper class maintains this, NewATKString should be used |
31 | | * to call it properly. |
32 | | * |
33 | | * In the end, |
34 | | * - if the start is between the high and low surrogates, the UTF-8 result |
35 | | * includes a BOM from it but not the character |
36 | | * - if the end is between the high and low surrogates, the UTF-8 result |
37 | | * includes the character but *not* the BOM |
38 | | * - all non-BMP characters that are fully in the string are in the UTF-8 result |
39 | | * as character followed by BOM |
40 | | */ |
41 | | namespace mozilla { |
42 | | namespace a11y { |
43 | | |
44 | | namespace DOMtoATK |
45 | | { |
46 | | |
47 | | /** |
48 | | * Converts a string of accessible text into ATK gchar* string (by adding |
49 | | * BOMs). This can be used when offsets do not need to be adjusted because |
50 | | * ends of the string can not fall between surrogates. |
51 | | */ |
52 | | gchar* Convert(const nsAString& aStr); |
53 | | |
54 | | /** |
55 | | * Add a BOM after each non-BMP character. |
56 | | */ |
57 | | void AddBOMs(nsACString& aDest, const nsACString& aSource); |
58 | | |
59 | | /** |
60 | | * Replace all characters with asterisks (e.g. for password fields). |
61 | | */ |
62 | | void ConvertTexttoAsterisks(nsAString& aString); |
63 | | |
64 | | /** |
65 | | * Parameterize conversion. |
66 | | */ |
67 | | enum class AtkStringConvertFlags : uint32_t { |
68 | | None = 0, |
69 | | ConvertTextToAsterisks = 1 << 0, |
70 | | }; |
71 | | |
72 | | MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(AtkStringConvertFlags) |
73 | | |
74 | | class ATKStringConverterHelper { |
75 | | public: |
76 | | ATKStringConverterHelper(void) : |
77 | | #ifdef DEBUG |
78 | | mAdjusted (false), |
79 | | #endif |
80 | | mStartShifted (false), |
81 | 0 | mEndShifted (false) { } |
82 | | |
83 | | /** |
84 | | * In order to properly get non-BMP values, offsets need to be changed |
85 | | * to get one character more on each end, so that ConvertUTF16toUTF8 can |
86 | | * convert surrogates even if the originally requested offsets fall between |
87 | | * them. |
88 | | */ |
89 | | void AdjustOffsets(gint* aStartOffset, gint* aEndOffset, gint count); |
90 | | |
91 | | /** |
92 | | * Converts a string of accessible text with adjusted offsets into ATK |
93 | | * gchar* string (by adding BOMs). Note, AdjustOffsets has to be called |
94 | | * before getting the text passed to this. |
95 | | */ |
96 | | gchar* ConvertAdjusted(const nsAString& aStr); |
97 | | |
98 | | private: |
99 | | /** |
100 | | * Remove the additional characters requested by PrepareUTF16toUTF8. |
101 | | */ |
102 | | gchar* FinishUTF16toUTF8(nsCString& aStr); |
103 | | |
104 | | #ifdef DEBUG |
105 | | bool mAdjusted; |
106 | | #endif |
107 | | bool mStartShifted; |
108 | | bool mEndShifted; |
109 | | }; |
110 | | |
111 | | /** |
112 | | * Get text from aAccessible, using ATKStringConverterHelper to properly |
113 | | * introduce appropriate BOMs. |
114 | | */ |
115 | | template <class AccessibleOrProxy> |
116 | | gchar* NewATKString(AccessibleOrProxy* aAccessible, |
117 | | gint aStartOffset, gint aEndOffset, |
118 | | AtkStringConvertFlags aFlags) |
119 | 0 | { |
120 | 0 | gint startOffset = aStartOffset, endOffset = aEndOffset; |
121 | 0 | ATKStringConverterHelper converter; |
122 | 0 | converter.AdjustOffsets(&startOffset, &endOffset, |
123 | 0 | gint(aAccessible->CharacterCount())); |
124 | 0 | nsAutoString str; |
125 | 0 | aAccessible->TextSubstring(startOffset, endOffset, str); |
126 | 0 | if (aFlags & AtkStringConvertFlags::ConvertTextToAsterisks) |
127 | 0 | ConvertTexttoAsterisks(str); |
128 | 0 | return converter.ConvertAdjusted(str); |
129 | 0 | } Unexecuted instantiation: char* mozilla::a11y::DOMtoATK::NewATKString<mozilla::a11y::HyperTextAccessible>(mozilla::a11y::HyperTextAccessible*, int, int, mozilla::a11y::DOMtoATK::AtkStringConvertFlags) Unexecuted instantiation: char* mozilla::a11y::DOMtoATK::NewATKString<mozilla::a11y::ProxyAccessible>(mozilla::a11y::ProxyAccessible*, int, int, mozilla::a11y::DOMtoATK::AtkStringConvertFlags) |
130 | | |
131 | | /** |
132 | | * Get a character from aAccessible, fetching more data as appropriate to |
133 | | * properly get non-BMP characters or a BOM as appropriate. |
134 | | */ |
135 | | template <class AccessibleCharAt> |
136 | | gunichar ATKCharacter(AccessibleCharAt* aAccessible, gint aOffset) |
137 | 0 | { |
138 | 0 | // char16_t is unsigned short in Mozilla, gnuichar is guint32 in glib. |
139 | 0 | gunichar character = static_cast<gunichar>(aAccessible->CharAt(aOffset)); |
140 | 0 |
|
141 | 0 | if (NS_IS_LOW_SURROGATE(character)) { |
142 | 0 | // Trailing surrogate, return BOM instead. |
143 | 0 | return 0xFEFF; |
144 | 0 | } |
145 | 0 | |
146 | 0 | if (NS_IS_HIGH_SURROGATE(character)) { |
147 | 0 | // Heading surrogate, get the trailing surrogate and combine them. |
148 | 0 | gunichar characterLow = static_cast<gunichar>(aAccessible->CharAt(aOffset + 1)); |
149 | 0 |
|
150 | 0 | if (!NS_IS_LOW_SURROGATE(characterLow)) { |
151 | 0 | // It should have been a trailing surrogate... Flag the error. |
152 | 0 | return 0xFFFD; |
153 | 0 | } |
154 | 0 | return SURROGATE_TO_UCS4(character, characterLow); |
155 | 0 | } |
156 | 0 |
|
157 | 0 | return character; |
158 | 0 | } Unexecuted instantiation: unsigned int mozilla::a11y::DOMtoATK::ATKCharacter<mozilla::a11y::HyperTextAccessible>(mozilla::a11y::HyperTextAccessible*, int) Unexecuted instantiation: unsigned int mozilla::a11y::DOMtoATK::ATKCharacter<mozilla::a11y::ProxyAccessible>(mozilla::a11y::ProxyAccessible*, int) |
159 | | |
160 | | } |
161 | | |
162 | | } // namespace a11y |
163 | | } // namespace mozilla |