/work/obj-fuzz/dist/include/mozilla/Utf8.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | /* |
8 | | * UTF-8-related functionality, including a type-safe structure representing a |
9 | | * UTF-8 code unit. |
10 | | */ |
11 | | |
12 | | #ifndef mozilla_Utf8_h |
13 | | #define mozilla_Utf8_h |
14 | | |
15 | | #include "mozilla/Casting.h" // for mozilla::AssertedCast |
16 | | #include "mozilla/Likely.h" // for MOZ_UNLIKELY |
17 | | #include "mozilla/Maybe.h" // for mozilla::Maybe |
18 | | #include "mozilla/TextUtils.h" // for mozilla::IsAscii |
19 | | #include "mozilla/Types.h" // for MFBT_API |
20 | | |
21 | | #include <limits.h> // for CHAR_BIT |
22 | | #include <stddef.h> // for size_t |
23 | | #include <stdint.h> // for uint8_t |
24 | | |
25 | | namespace mozilla { |
26 | | |
27 | | union Utf8Unit; |
28 | | |
29 | | static_assert(CHAR_BIT == 8, |
30 | | "Utf8Unit won't work so well with non-octet chars"); |
31 | | |
32 | | /** |
33 | | * A code unit within a UTF-8 encoded string. (A code unit is the smallest |
34 | | * unit within the Unicode encoding of a string. For UTF-8 this is an 8-bit |
35 | | * number; for UTF-16 it would be a 16-bit number.) |
36 | | * |
37 | | * This is *not* the same as a single code point: in UTF-8, non-ASCII code |
38 | | * points are constituted by multiple code units. |
39 | | */ |
40 | | union Utf8Unit |
41 | | { |
42 | | private: |
43 | | // Utf8Unit is a union wrapping a raw |char|. The C++ object model and C++ |
44 | | // requirements as to how objects may be accessed with respect to their actual |
45 | | // types (almost?) uniquely compel this choice. |
46 | | // |
47 | | // Our requirements for a UTF-8 code unit representation are: |
48 | | // |
49 | | // 1. It must be "compatible" with C++ character/string literals that use |
50 | | // the UTF-8 encoding. Given a properly encoded C++ literal, you should |
51 | | // be able to use |Utf8Unit| and friends to access it; given |Utf8Unit| |
52 | | // and friends (particularly UnicodeData), you should be able to access |
53 | | // C++ character types for their contents. |
54 | | // 2. |Utf8Unit| and friends must convert to/from |char| and |char*| only by |
55 | | // explicit operation. |
56 | | // 3. |Utf8Unit| must participate in overload resolution and template type |
57 | | // equivalence (that is, given |template<class> class X|, when |X<T>| and |
58 | | // |X<U>| are the same type) distinctly from the C++ character types. |
59 | | // |
60 | | // And a few nice-to-haves (at least for the moment): |
61 | | // |
62 | | // 4. The representation should use unsigned numbers, to avoid undefined |
63 | | // behavior that can arise with signed types, and because Unicode code |
64 | | // points and code units are unsigned. |
65 | | // 5. |Utf8Unit| and friends should be convertible to/from |unsigned char| |
66 | | // and |unsigned char*|, for APIs that (because of #4 above) use those |
67 | | // types as the "natural" choice for UTF-8 data. |
68 | | // |
69 | | // #1 requires that |Utf8Unit| "incorporate" a C++ character type: one of |
70 | | // |{,{un,}signed} char|.[0] |uint8_t| won't work because it might not be a |
71 | | // C++ character type. |
72 | | // |
73 | | // #2 and #3 mean that |Utf8Unit| can't *be* such a type (or a typedef to one: |
74 | | // typedefs don't generate *new* types, just type aliases). This requires a |
75 | | // compound type. |
76 | | // |
77 | | // The ultimate representation (and character type in it) is constrained by |
78 | | // C++14 [basic.lval]p10 that defines how objects may be accessed, with |
79 | | // respect to the dynamic type in memory and the actual type used to access |
80 | | // them. It reads: |
81 | | // |
82 | | // If a program attempts to access the stored value of an object |
83 | | // through a glvalue of other than one of the following types the |
84 | | // behavior is undefined: |
85 | | // |
86 | | // 1. the dynamic type of the object, |
87 | | // 2. a cv-qualified version of the dynamic type of the object, |
88 | | // ...other types irrelevant here... |
89 | | // 3. an aggregate or union type that includes one of the |
90 | | // aforementioned types among its elements or non-static data |
91 | | // members (including, recursively, an element or non-static |
92 | | // data member of a subaggregate or contained union), |
93 | | // ...more irrelevant types... |
94 | | // 4. a char or unsigned char type. |
95 | | // |
96 | | // Accessing (wrapped) UTF-8 data as |char|/|unsigned char| is allowed no |
97 | | // matter the representation by #4. (Briefly set aside what values are seen.) |
98 | | // (And #2 allows |const| on either the dynamic type or the accessing type.) |
99 | | // (|signed char| is really only useful for small signed numbers, not |
100 | | // characters, so we ignore it.) |
101 | | // |
102 | | // If we interpret contents as |char|/|unsigned char| contrary to the actual |
103 | | // type stored there, what happens? C++14 [basic.fundamental]p1 requires |
104 | | // character types be identically aligned/sized; C++14 [basic.fundamental]p3 |
105 | | // requires |signed char| and |unsigned char| have the same value |
106 | | // representation. C++ doesn't require identical bitwise representation, tho. |
107 | | // Practically we could assume it, but this verges on C++ spec bits best not |
108 | | // *relied* on for correctness, if possible. |
109 | | // |
110 | | // So we don't expose |Utf8Unit|'s contents as |unsigned char*|: only |char| |
111 | | // and |char*|. Instead we safely expose |unsigned char| by fully-defined |
112 | | // *integral conversion* (C++14 [conv.integral]p2). Integral conversion from |
113 | | // |unsigned char| → |char| has only implementation-defined behavior. It'd be |
114 | | // better not to depend on that, but given twos-complement won, it should be |
115 | | // okay. (Also |unsigned char*| is awkward enough to work with for strings |
116 | | // that it probably doesn't appear in string manipulation much anyway, only in |
117 | | // places that should really use |Utf8Unit| directly.) |
118 | | // |
119 | | // The opposite direction -- interpreting |char| or |char*| data through |
120 | | // |Utf8Unit| -- isn't tricky as long as |Utf8Unit| contains a |char| as |
121 | | // decided above, using #3. An "aggregate or union" will work that contains a |
122 | | // |char|. Oddly, an aggregate won't work: C++14 [dcl.init.aggr]p1 says |
123 | | // aggregates must have "no private or protected non-static data members", and |
124 | | // we want to keep the inner |char| hidden. So a |struct| is out, and only |
125 | | // |union| remains. |
126 | | // |
127 | | // (Enums are not "an aggregate or union type", so [maybe surprisingly] we |
128 | | // can't make |Utf8Unit| an enum class with |char| underlying type, because we |
129 | | // are given no license to treat |char| memory as such an |enum|'s memory.) |
130 | | // |
131 | | // Therefore |Utf8Unit| is a union type with a |char| non-static data member. |
132 | | // This satisfies all our requirements. It also supports the nice-to-haves of |
133 | | // creating a |Utf8Unit| from an |unsigned char|, and being convertible to |
134 | | // |unsigned char|. It doesn't satisfy the nice-to-haves of using an |
135 | | // |unsigned char| internally, nor of letting us wrap an existing |
136 | | // |unsigned char| or pointer to one. We probably *could* do these, if we |
137 | | // were willing to rely harder on implementation-defined behaviors, but for |
138 | | // now we privilege C++'s main character type over some conceptual purity. |
139 | | // |
140 | | // 0. There's a proposal for a UTF-8 character type distinct from the existing |
141 | | // C++ narrow character types: |
142 | | // |
143 | | // http://open-std.org/JTC1/SC22/WG21/docs/papers/2016/p0482r0.html |
144 | | // |
145 | | // but it hasn't been standardized (and might never be), and none of the |
146 | | // compilers we really care about have implemented it. Maybe someday we |
147 | | // can change our implementation to it without too much trouble, if we're |
148 | | // lucky... |
149 | | char mValue; |
150 | | |
151 | | public: |
152 | | explicit constexpr Utf8Unit(char aUnit) |
153 | | : mValue(aUnit) |
154 | 0 | {} |
155 | | |
156 | | explicit constexpr Utf8Unit(unsigned char aUnit) |
157 | | : mValue(static_cast<char>(aUnit)) |
158 | 0 | { |
159 | 0 | // Per the above comment, the prior cast is integral conversion with |
160 | 0 | // implementation-defined semantics, and we regretfully but unavoidably |
161 | 0 | // assume the conversion does what we want it to. |
162 | 0 | } |
163 | | |
164 | | constexpr bool operator==(const Utf8Unit& aOther) const |
165 | 0 | { |
166 | 0 | return mValue == aOther.mValue; |
167 | 0 | } |
168 | | |
169 | | constexpr bool operator!=(const Utf8Unit& aOther) const |
170 | 0 | { |
171 | 0 | return !(*this == aOther); |
172 | 0 | } |
173 | | |
174 | | /** Convert a UTF-8 code unit to a raw char. */ |
175 | | constexpr char toChar() const |
176 | 0 | { |
177 | 0 | // Only a |char| is ever permitted to be written into this location, so this |
178 | 0 | // is both permissible and returns the desired value. |
179 | 0 | return mValue; |
180 | 0 | } |
181 | | |
182 | | /** Convert a UTF-8 code unit to a raw unsigned char. */ |
183 | | constexpr unsigned char toUnsignedChar() const |
184 | 0 | { |
185 | 0 | // Per the above comment, this is well-defined integral conversion. |
186 | 0 | return static_cast<unsigned char>(mValue); |
187 | 0 | } |
188 | | |
189 | | /** Convert a UTF-8 code unit to a uint8_t. */ |
190 | | constexpr uint8_t toUint8() const |
191 | 0 | { |
192 | 0 | // Per the above comment, this is well-defined integral conversion. |
193 | 0 | return static_cast<uint8_t>(mValue); |
194 | 0 | } |
195 | | |
196 | | // We currently don't expose |&mValue|. |UnicodeData| sort of does, but |
197 | | // that's a somewhat separate concern, justified in different comments in |
198 | | // that other code. |
199 | | }; |
200 | | |
201 | | /** |
202 | | * Reinterpret the address of a UTF-8 code unit as |const unsigned char*|. |
203 | | * |
204 | | * Assuming proper backing has been set up, the resulting |const unsigned char*| |
205 | | * may validly be dereferenced. |
206 | | * |
207 | | * No access is provided to mutate this underlying memory as |unsigned char|. |
208 | | * Presently memory inside |Utf8Unit| is *only* stored as |char|, and we are |
209 | | * loath to offer a way to write non-|char| data until absolutely necessary. |
210 | | */ |
211 | | inline const unsigned char* |
212 | | Utf8AsUnsignedChars(const Utf8Unit* aUnits) |
213 | 0 | { |
214 | 0 | static_assert(sizeof(Utf8Unit) == sizeof(unsigned char), |
215 | 0 | "sizes must match to permissibly reinterpret_cast<>"); |
216 | 0 | static_assert(alignof(Utf8Unit) == alignof(unsigned char), |
217 | 0 | "alignment must match to permissibly reinterpret_cast<>"); |
218 | 0 |
|
219 | 0 | // The static_asserts above only enable the reinterpret_cast<> to occur. |
220 | 0 | // |
221 | 0 | // Dereferencing the resulting pointer is a separate question. Any object's |
222 | 0 | // memory may be interpreted as |unsigned char| per C++11 [basic.lval]p10, but |
223 | 0 | // this doesn't guarantee what values will be observed. If |char| is |
224 | 0 | // implemented to act like |unsigned char|, we're good to go: memory for the |
225 | 0 | // |char| in |Utf8Unit| acts as we need. But if |char| is implemented to act |
226 | 0 | // like |signed char|, dereferencing produces the right value only if the |
227 | 0 | // |char| types all use two's-complement representation. Every modern |
228 | 0 | // compiler does this, and there's a C++ proposal to standardize it. |
229 | 0 | // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0907r0.html So |
230 | 0 | // *technically* this is implementation-defined -- but everyone does it and |
231 | 0 | // this behavior is being standardized. |
232 | 0 | return reinterpret_cast<const unsigned char*>(aUnits); |
233 | 0 | } |
234 | | |
235 | | /** Returns true iff |aUnit| is an ASCII value. */ |
236 | | inline bool |
237 | | IsAscii(Utf8Unit aUnit) |
238 | 0 | { |
239 | 0 | return IsAscii(aUnit.toUint8()); |
240 | 0 | } |
241 | | |
242 | | /** |
243 | | * Returns true if the given length-delimited memory consists of a valid UTF-8 |
244 | | * string, false otherwise. |
245 | | * |
246 | | * A valid UTF-8 string contains no overlong-encoded code points (as one would |
247 | | * expect) and contains no code unit sequence encoding a UTF-16 surrogate. The |
248 | | * string *may* contain U+0000 NULL code points. |
249 | | */ |
250 | | extern MFBT_API bool |
251 | | IsValidUtf8(const void* aCodeUnits, size_t aCount); |
252 | | |
253 | | /** |
254 | | * Returns true iff |aUnit| is a UTF-8 trailing code unit matching the pattern |
255 | | * 0b10xx'xxxx. |
256 | | */ |
257 | | inline bool |
258 | | IsTrailingUnit(Utf8Unit aUnit) |
259 | 0 | { |
260 | 0 | return (aUnit.toUint8() & 0b1100'0000) == 0b1000'0000; |
261 | 0 | } |
262 | | |
263 | | /** |
264 | | * Given |aLeadUnit| that is a non-ASCII code unit, a pointer to an |Iter aIter| |
265 | | * that (initially) itself points one unit past |aLeadUnit|, and |
266 | | * |const EndIter& aEnd| that denotes the end of the UTF-8 data when compared |
267 | | * against |*aIter| using |aEnd - *aIter|: |
268 | | * |
269 | | * If |aLeadUnit| and subsequent code units computed using |*aIter| (up to |
270 | | * |aEnd|) encode a valid code point -- not exceeding Unicode's range, not a |
271 | | * surrogate, in shortest form -- then return Some(that code point) and advance |
272 | | * |*aIter| past those code units. |
273 | | * |
274 | | * Otherwise decrement |*aIter| (so that it points at |aLeadUnit|) and return |
275 | | * Nothing(). |
276 | | * |
277 | | * |Iter| and |EndIter| are generalized concepts most easily understood as if |
278 | | * they were |const char*|, |const unsigned char*|, or |const Utf8Unit*|: |
279 | | * iterators that when dereferenced can be used to construct a |Utf8Unit| and |
280 | | * that can be compared and modified in certain limited ways. (Carefully note |
281 | | * that this function mutates |*aIter|.) |Iter| and |EndIter| are template |
282 | | * parameters to support more-complicated adaptor iterators. |
283 | | * |
284 | | * The template parameters after |Iter| allow users to implement custom handling |
285 | | * for various forms of invalid UTF-8. A version of this function that defaults |
286 | | * all such handling to no-ops is defined below this function. To learn how to |
287 | | * define your own custom handling, consult the implementation of that function, |
288 | | * which documents exactly how custom handler functors are invoked. |
289 | | * |
290 | | * This function is MOZ_ALWAYS_INLINE: if you don't need that, use the version |
291 | | * of this function without the "Inline" suffix on the name. |
292 | | */ |
293 | | template<typename Iter, |
294 | | typename EndIter, |
295 | | class OnBadLeadUnit, |
296 | | class OnNotEnoughUnits, |
297 | | class OnBadTrailingUnit, |
298 | | class OnBadCodePoint, |
299 | | class OnNotShortestForm> |
300 | | MOZ_ALWAYS_INLINE Maybe<char32_t> |
301 | | DecodeOneUtf8CodePointInline(const Utf8Unit aLeadUnit, |
302 | | Iter* aIter, const EndIter& aEnd, |
303 | | OnBadLeadUnit aOnBadLeadUnit, |
304 | | OnNotEnoughUnits aOnNotEnoughUnits, |
305 | | OnBadTrailingUnit aOnBadTrailingUnit, |
306 | | OnBadCodePoint aOnBadCodePoint, |
307 | | OnNotShortestForm aOnNotShortestForm) |
308 | 0 | { |
309 | 0 | MOZ_ASSERT(Utf8Unit((*aIter)[-1]) == aLeadUnit); |
310 | 0 |
|
311 | 0 | char32_t n = aLeadUnit.toUint8(); |
312 | 0 | MOZ_ASSERT(!IsAscii(n)); |
313 | 0 |
|
314 | 0 | // |aLeadUnit| determines the number of trailing code units in the code point |
315 | 0 | // and the bits of |aLeadUnit| that contribute to the code point's value. |
316 | 0 | uint8_t remaining; |
317 | 0 | uint32_t min; |
318 | 0 | if ((n & 0b1110'0000) == 0b1100'0000) { |
319 | 0 | remaining = 1; |
320 | 0 | min = 0x80; |
321 | 0 | n &= 0b0001'1111; |
322 | 0 | } else if ((n & 0b1111'0000) == 0b1110'0000) { |
323 | 0 | remaining = 2; |
324 | 0 | min = 0x800; |
325 | 0 | n &= 0b0000'1111; |
326 | 0 | } else if ((n & 0b1111'1000) == 0b1111'0000) { |
327 | 0 | remaining = 3; |
328 | 0 | min = 0x10000; |
329 | 0 | n &= 0b0000'0111; |
330 | 0 | } else { |
331 | 0 | *aIter -= 1; |
332 | 0 | aOnBadLeadUnit(); |
333 | 0 | return Nothing(); |
334 | 0 | } |
335 | 0 | |
336 | 0 | // If the code point would require more code units than remain, the encoding |
337 | 0 | // is invalid. |
338 | 0 | auto actual = aEnd - *aIter; |
339 | 0 | if (MOZ_UNLIKELY(actual < remaining)) { |
340 | 0 | *aIter -= 1; |
341 | 0 | aOnNotEnoughUnits(AssertedCast<uint8_t>(actual + 1), remaining + 1); |
342 | 0 | return Nothing(); |
343 | 0 | } |
344 | 0 | |
345 | 0 | for (uint8_t i = 0; i < remaining; i++) { |
346 | 0 | const Utf8Unit unit(*(*aIter)++); |
347 | 0 |
|
348 | 0 | // Every non-leading code unit in properly encoded UTF-8 has its high |
349 | 0 | // bit set and the next-highest bit unset. |
350 | 0 | if (MOZ_UNLIKELY(!IsTrailingUnit(unit))) { |
351 | 0 | uint8_t unitsObserved = i + 1 + 1; |
352 | 0 | *aIter -= unitsObserved; |
353 | 0 | aOnBadTrailingUnit(unitsObserved); |
354 | 0 | return Nothing(); |
355 | 0 | } |
356 | 0 | |
357 | 0 | // The code point being encoded is the concatenation of all the |
358 | 0 | // unconstrained bits. |
359 | 0 | n = (n << 6) | (unit.toUint8() & 0b0011'1111); |
360 | 0 | } |
361 | 0 |
|
362 | 0 | // UTF-16 surrogates and values outside the Unicode range are invalid. |
363 | 0 | if (MOZ_UNLIKELY(n > 0x10FFFF || (0xD800 <= n && n <= 0xDFFF))) { |
364 | 0 | uint8_t unitsObserved = remaining + 1; |
365 | 0 | *aIter -= unitsObserved; |
366 | 0 | aOnBadCodePoint(n, unitsObserved); |
367 | 0 | return Nothing(); |
368 | 0 | } |
369 | 0 | |
370 | 0 | // Overlong code points are also invalid. |
371 | 0 | if (MOZ_UNLIKELY(n < min)) { |
372 | 0 | uint8_t unitsObserved = remaining + 1; |
373 | 0 | *aIter -= unitsObserved; |
374 | 0 | aOnNotShortestForm(n, unitsObserved); |
375 | 0 | return Nothing(); |
376 | 0 | } |
377 | 0 | |
378 | 0 | return Some(n); |
379 | 0 | } Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda()#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(unsigned char, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(char32_t, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda()#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(unsigned char, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(char32_t, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(char32_t, unsigned char)#2}) Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda()#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(unsigned char, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(char32_t, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda()#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(unsigned char, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(char32_t, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(char32_t, unsigned char)#2}) Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator*, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd const&, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#2}) Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator*, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd const&, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2}) Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator*, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd const&, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#2}) Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator*, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd const&, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2}) |
380 | | |
381 | | /** |
382 | | * Identical to the above function, but not forced to be instantiated inline -- |
383 | | * the compiler is permitted to common up separate invocations if it chooses. |
384 | | */ |
385 | | template<typename Iter, |
386 | | typename EndIter, |
387 | | class OnBadLeadUnit, |
388 | | class OnNotEnoughUnits, |
389 | | class OnBadTrailingUnit, |
390 | | class OnBadCodePoint, |
391 | | class OnNotShortestForm> |
392 | | inline Maybe<char32_t> |
393 | | DecodeOneUtf8CodePoint(const Utf8Unit aLeadUnit, |
394 | | Iter* aIter, const EndIter& aEnd, |
395 | | OnBadLeadUnit aOnBadLeadUnit, |
396 | | OnNotEnoughUnits aOnNotEnoughUnits, |
397 | | OnBadTrailingUnit aOnBadTrailingUnit, |
398 | | OnBadCodePoint aOnBadCodePoint, |
399 | | OnNotShortestForm aOnNotShortestForm) |
400 | 0 | { |
401 | 0 | return DecodeOneUtf8CodePointInline(aLeadUnit, aIter, aEnd, |
402 | 0 | aOnBadLeadUnit, aOnNotEnoughUnits, |
403 | 0 | aOnBadTrailingUnit, aOnBadCodePoint, |
404 | 0 | aOnNotShortestForm); |
405 | 0 | } Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePoint<js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator*, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd const&, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2}) Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePoint<js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator*, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd const&, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2}) |
406 | | |
407 | | /** |
408 | | * Like the always-inlined function above, but with no-op behavior from all |
409 | | * trailing if-invalid notifier functors. |
410 | | * |
411 | | * This function is MOZ_ALWAYS_INLINE: if you don't need that, use the version |
412 | | * of this function without the "Inline" suffix on the name. |
413 | | */ |
414 | | template<typename Iter, typename EndIter> |
415 | | MOZ_ALWAYS_INLINE Maybe<char32_t> |
416 | | DecodeOneUtf8CodePointInline(const Utf8Unit aLeadUnit, |
417 | | Iter* aIter, const EndIter& aEnd) |
418 | 0 | { |
419 | 0 | // aOnBadLeadUnit is called when |aLeadUnit| itself is an invalid lead unit in |
420 | 0 | // a multi-unit code point. It is passed no arguments: the caller already has |
421 | 0 | // |aLeadUnit| on hand, so no need to provide it again. |
422 | 0 | auto onBadLeadUnit = []() {}; Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda()#1}::operator()() const Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda()#1}::operator()() const |
423 | 0 |
|
424 | 0 | // aOnNotEnoughUnits is called when |aLeadUnit| properly indicates a code |
425 | 0 | // point length, but there aren't enough units from |*aIter| to |aEnd| to |
426 | 0 | // satisfy that length. It is passed the number of code units actually |
427 | 0 | // available (according to |aEnd - *aIter|) and the number of code units that |
428 | 0 | // |aLeadUnit| indicates are needed. Both numbers include the contribution |
429 | 0 | // of |aLeadUnit| itself: so |aUnitsAvailable <= 3|, |aUnitsNeeded <= 4|, and |
430 | 0 | // |aUnitsAvailable < aUnitsNeeded|. As above, it also is not passed the lead |
431 | 0 | // code unit. |
432 | 0 | auto onNotEnoughUnits = [](uint8_t aUnitsAvailable, uint8_t aUnitsNeeded) {}; Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(unsigned char, unsigned char)#1}::operator()(unsigned char, unsigned char) const Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(unsigned char, unsigned char)#1}::operator()(unsigned char, unsigned char) const |
433 | 0 |
|
434 | 0 | // aOnBadTrailingUnit is called when one of the trailing code units implied by |
435 | 0 | // |aLeadUnit| doesn't match the 0b10xx'xxxx bit pattern that all UTF-8 |
436 | 0 | // trailing code units must satisfy. It is passed the total count of units |
437 | 0 | // observed (including |aLeadUnit|). The bad trailing code unit will |
438 | 0 | // conceptually be at |(*aIter)[aUnitsObserved - 1]| if this functor is |
439 | 0 | // called, and so |aUnitsObserved <= 4|. |
440 | 0 | auto onBadTrailingUnit = [](uint8_t aUnitsObserved) {}; Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(unsigned char)#1}::operator()(unsigned char) const Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(unsigned char)#1}::operator()(unsigned char) const |
441 | 0 |
|
442 | 0 | // aOnBadCodePoint is called when a structurally-correct code point encoding |
443 | 0 | // is found, but the *value* that is encoded is not a valid code point: either |
444 | 0 | // because it exceeded the U+10FFFF Unicode maximum code point, or because it |
445 | 0 | // was a UTF-16 surrogate. It is passed the non-code point value and the |
446 | 0 | // number of code units used to encode it. |
447 | 0 | auto onBadCodePoint = [](char32_t aBadCodePoint, uint8_t aUnitsObserved) {}; Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(char32_t, unsigned char)#1}::operator()(char32_t, unsigned char) const Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(char32_t, unsigned char)#1}::operator()(char32_t, unsigned char) const |
448 | 0 |
|
449 | 0 | // aOnNotShortestForm is called when structurally-correct encoding is found, |
450 | 0 | // but the encoded value should have been encoded in fewer code units (e.g. |
451 | 0 | // mis-encoding U+0000 as 0b1100'0000 0b1000'0000 in two code units instead of |
452 | 0 | // as 0b0000'0000). It is passed the mis-encoded code point (which will be |
453 | 0 | // valid and not a surrogate) and the count of code units that mis-encoded it. |
454 | 0 | auto onNotShortestForm = [](char32_t aBadCodePoint, uint8_t aUnitsObserved) {}; Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(char32_t, unsigned char)#2}::operator()(char32_t, unsigned char) const Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(char32_t, unsigned char)#2}::operator()(char32_t, unsigned char) const |
455 | 0 |
|
456 | 0 | return DecodeOneUtf8CodePointInline(aLeadUnit, aIter, aEnd, |
457 | 0 | onBadLeadUnit, onNotEnoughUnits, |
458 | 0 | onBadTrailingUnit, onBadCodePoint, |
459 | 0 | onNotShortestForm); |
460 | 0 | } Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&) Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&) |
461 | | |
462 | | /** |
463 | | * Identical to the above function, but not forced to be instantiated inline -- |
464 | | * the compiler/linker are allowed to common up separate invocations. |
465 | | */ |
466 | | template<typename Iter, typename EndIter> |
467 | | inline Maybe<char32_t> |
468 | | DecodeOneUtf8CodePoint(const Utf8Unit aLeadUnit, |
469 | | Iter* aIter, const EndIter& aEnd) |
470 | 0 | { |
471 | 0 | return DecodeOneUtf8CodePointInline(aLeadUnit, aIter, aEnd); |
472 | 0 | } Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePoint<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&) Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePoint<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&) |
473 | | |
474 | | } // namespace mozilla |
475 | | |
476 | | #endif /* mozilla_Utf8_h */ |