Coverage Report

Created: 2018-09-25 14:53

/work/obj-fuzz/dist/include/mozilla/Utf8.h
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
/*
8
 * UTF-8-related functionality, including a type-safe structure representing a
9
 * UTF-8 code unit.
10
 */
11
12
#ifndef mozilla_Utf8_h
13
#define mozilla_Utf8_h
14
15
#include "mozilla/Casting.h" // for mozilla::AssertedCast
16
#include "mozilla/Likely.h" // for MOZ_UNLIKELY
17
#include "mozilla/Maybe.h" // for mozilla::Maybe
18
#include "mozilla/TextUtils.h" // for mozilla::IsAscii
19
#include "mozilla/Types.h" // for MFBT_API
20
21
#include <limits.h> // for CHAR_BIT
22
#include <stddef.h> // for size_t
23
#include <stdint.h> // for uint8_t
24
25
namespace mozilla {
26
27
union Utf8Unit;
28
29
static_assert(CHAR_BIT == 8,
30
              "Utf8Unit won't work so well with non-octet chars");
31
32
/**
33
 * A code unit within a UTF-8 encoded string.  (A code unit is the smallest
34
 * unit within the Unicode encoding of a string.  For UTF-8 this is an 8-bit
35
 * number; for UTF-16 it would be a 16-bit number.)
36
 *
37
 * This is *not* the same as a single code point: in UTF-8, non-ASCII code
38
 * points are constituted by multiple code units.
39
 */
40
union Utf8Unit
41
{
42
private:
43
  // Utf8Unit is a union wrapping a raw |char|.  The C++ object model and C++
44
  // requirements as to how objects may be accessed with respect to their actual
45
  // types (almost?) uniquely compel this choice.
46
  //
47
  // Our requirements for a UTF-8 code unit representation are:
48
  //
49
  //   1. It must be "compatible" with C++ character/string literals that use
50
  //      the UTF-8 encoding.  Given a properly encoded C++ literal, you should
51
  //      be able to use |Utf8Unit| and friends to access it; given |Utf8Unit|
52
  //      and friends (particularly UnicodeData), you should be able to access
53
  //      C++ character types for their contents.
54
  //   2. |Utf8Unit| and friends must convert to/from |char| and |char*| only by
55
  //      explicit operation.
56
  //   3. |Utf8Unit| must participate in overload resolution and template type
57
  //      equivalence (that is, given |template<class> class X|, when |X<T>| and
58
  //      |X<U>| are the same type) distinctly from the C++ character types.
59
  //
60
  // And a few nice-to-haves (at least for the moment):
61
  //
62
  //   4. The representation should use unsigned numbers, to avoid undefined
63
  //      behavior that can arise with signed types, and because Unicode code
64
  //      points and code units are unsigned.
65
  //   5. |Utf8Unit| and friends should be convertible to/from |unsigned char|
66
  //      and |unsigned char*|, for APIs that (because of #4 above) use those
67
  //      types as the "natural" choice for UTF-8 data.
68
  //
69
  // #1 requires that |Utf8Unit| "incorporate" a C++ character type: one of
70
  // |{,{un,}signed} char|.[0]  |uint8_t| won't work because it might not be a
71
  // C++ character type.
72
  //
73
  // #2 and #3 mean that |Utf8Unit| can't *be* such a type (or a typedef to one:
74
  // typedefs don't generate *new* types, just type aliases).  This requires a
75
  // compound type.
76
  //
77
  // The ultimate representation (and character type in it) is constrained by
78
  // C++14 [basic.lval]p10 that defines how objects may be accessed, with
79
  // respect to the dynamic type in memory and the actual type used to access
80
  // them.  It reads:
81
  //
82
  //     If a program attempts to access the stored value of an object
83
  //     through a glvalue of other than one of the following types the
84
  //     behavior is undefined:
85
  //
86
  //       1. the dynamic type of the object,
87
  //       2. a cv-qualified version of the dynamic type of the object,
88
  //       ...other types irrelevant here...
89
  //       3. an aggregate or union type that includes one of the
90
  //          aforementioned types among its elements or non-static data
91
  //          members (including, recursively, an element or non-static
92
  //          data member of a subaggregate or contained union),
93
  //       ...more irrelevant types...
94
  //       4. a char or unsigned char type.
95
  //
96
  // Accessing (wrapped) UTF-8 data as |char|/|unsigned char| is allowed no
97
  // matter the representation by #4.  (Briefly set aside what values are seen.)
98
  // (And #2 allows |const| on either the dynamic type or the accessing type.)
99
  // (|signed char| is really only useful for small signed numbers, not
100
  // characters, so we ignore it.)
101
  //
102
  // If we interpret contents as |char|/|unsigned char| contrary to the actual
103
  // type stored there, what happens?  C++14 [basic.fundamental]p1 requires
104
  // character types be identically aligned/sized; C++14 [basic.fundamental]p3
105
  // requires |signed char| and |unsigned char| have the same value
106
  // representation.  C++ doesn't require identical bitwise representation, tho.
107
  // Practically we could assume it, but this verges on C++ spec bits best not
108
  // *relied* on for correctness, if possible.
109
  //
110
  // So we don't expose |Utf8Unit|'s contents as |unsigned char*|: only |char|
111
  // and |char*|.  Instead we safely expose |unsigned char| by fully-defined
112
  // *integral conversion* (C++14 [conv.integral]p2).  Integral conversion from
113
  // |unsigned char| → |char| has only implementation-defined behavior.  It'd be
114
  // better not to depend on that, but given twos-complement won, it should be
115
  // okay.  (Also |unsigned char*| is awkward enough to work with for strings
116
  // that it probably doesn't appear in string manipulation much anyway, only in
117
  // places that should really use |Utf8Unit| directly.)
118
  //
119
  // The opposite direction -- interpreting |char| or |char*| data through
120
  // |Utf8Unit| -- isn't tricky as long as |Utf8Unit| contains a |char| as
121
  // decided above, using #3.  An "aggregate or union" will work that contains a
122
  // |char|.  Oddly, an aggregate won't work: C++14 [dcl.init.aggr]p1 says
123
  // aggregates must have "no private or protected non-static data members", and
124
  // we want to keep the inner |char| hidden.  So a |struct| is out, and only
125
  // |union| remains.
126
  //
127
  // (Enums are not "an aggregate or union type", so [maybe surprisingly] we
128
  // can't make |Utf8Unit| an enum class with |char| underlying type, because we
129
  // are given no license to treat |char| memory as such an |enum|'s memory.)
130
  //
131
  // Therefore |Utf8Unit| is a union type with a |char| non-static data member.
132
  // This satisfies all our requirements.  It also supports the nice-to-haves of
133
  // creating a |Utf8Unit| from an |unsigned char|, and being convertible to
134
  // |unsigned char|.  It doesn't satisfy the nice-to-haves of using an
135
  // |unsigned char| internally, nor of letting us wrap an existing
136
  // |unsigned char| or pointer to one.  We probably *could* do these, if we
137
  // were willing to rely harder on implementation-defined behaviors, but for
138
  // now we privilege C++'s main character type over some conceptual purity.
139
  //
140
  // 0. There's a proposal for a UTF-8 character type distinct from the existing
141
  //    C++ narrow character types:
142
  //
143
  //      http://open-std.org/JTC1/SC22/WG21/docs/papers/2016/p0482r0.html
144
  //
145
  //    but it hasn't been standardized (and might never be), and none of the
146
  //    compilers we really care about have implemented it.  Maybe someday we
147
  //    can change our implementation to it without too much trouble, if we're
148
  //    lucky...
149
  char mValue;
150
151
public:
152
  explicit constexpr Utf8Unit(char aUnit)
153
    : mValue(aUnit)
154
0
  {}
155
156
  explicit constexpr Utf8Unit(unsigned char aUnit)
157
    : mValue(static_cast<char>(aUnit))
158
0
  {
159
0
    // Per the above comment, the prior cast is integral conversion with
160
0
    // implementation-defined semantics, and we regretfully but unavoidably
161
0
    // assume the conversion does what we want it to.
162
0
  }
163
164
  constexpr bool operator==(const Utf8Unit& aOther) const
165
0
  {
166
0
    return mValue == aOther.mValue;
167
0
  }
168
169
  constexpr bool operator!=(const Utf8Unit& aOther) const
170
0
  {
171
0
    return !(*this == aOther);
172
0
  }
173
174
  /** Convert a UTF-8 code unit to a raw char. */
175
  constexpr char toChar() const
176
0
  {
177
0
    // Only a |char| is ever permitted to be written into this location, so this
178
0
    // is both permissible and returns the desired value.
179
0
    return mValue;
180
0
  }
181
182
  /** Convert a UTF-8 code unit to a raw unsigned char. */
183
  constexpr unsigned char toUnsignedChar() const
184
0
  {
185
0
    // Per the above comment, this is well-defined integral conversion.
186
0
    return static_cast<unsigned char>(mValue);
187
0
  }
188
189
  /** Convert a UTF-8 code unit to a uint8_t. */
190
  constexpr uint8_t toUint8() const
191
0
  {
192
0
    // Per the above comment, this is well-defined integral conversion.
193
0
    return static_cast<uint8_t>(mValue);
194
0
  }
195
196
  // We currently don't expose |&mValue|.  |UnicodeData| sort of does, but
197
  // that's a somewhat separate concern, justified in different comments in
198
  // that other code.
199
};
200
201
/**
202
 * Reinterpret the address of a UTF-8 code unit as |const unsigned char*|.
203
 *
204
 * Assuming proper backing has been set up, the resulting |const unsigned char*|
205
 * may validly be dereferenced.
206
 *
207
 * No access is provided to mutate this underlying memory as |unsigned char|.
208
 * Presently memory inside |Utf8Unit| is *only* stored as |char|, and we are
209
 * loath to offer a way to write non-|char| data until absolutely necessary.
210
 */
211
inline const unsigned char*
212
Utf8AsUnsignedChars(const Utf8Unit* aUnits)
213
0
{
214
0
  static_assert(sizeof(Utf8Unit) == sizeof(unsigned char),
215
0
                "sizes must match to permissibly reinterpret_cast<>");
216
0
  static_assert(alignof(Utf8Unit) == alignof(unsigned char),
217
0
                "alignment must match to permissibly reinterpret_cast<>");
218
0
219
0
  // The static_asserts above only enable the reinterpret_cast<> to occur.
220
0
  //
221
0
  // Dereferencing the resulting pointer is a separate question.  Any object's
222
0
  // memory may be interpreted as |unsigned char| per C++11 [basic.lval]p10, but
223
0
  // this doesn't guarantee what values will be observed.  If |char| is
224
0
  // implemented to act like |unsigned char|, we're good to go: memory for the
225
0
  // |char| in |Utf8Unit| acts as we need.  But if |char| is implemented to act
226
0
  // like |signed char|, dereferencing produces the right value only if the
227
0
  // |char| types all use two's-complement representation.  Every modern
228
0
  // compiler does this, and there's a C++ proposal to standardize it.
229
0
  // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0907r0.html   So
230
0
  // *technically* this is implementation-defined -- but everyone does it and
231
0
  // this behavior is being standardized.
232
0
  return reinterpret_cast<const unsigned char*>(aUnits);
233
0
}
234
235
/** Returns true iff |aUnit| is an ASCII value. */
236
inline bool
237
IsAscii(Utf8Unit aUnit)
238
0
{
239
0
  return IsAscii(aUnit.toUint8());
240
0
}
241
242
/**
243
 * Returns true if the given length-delimited memory consists of a valid UTF-8
244
 * string, false otherwise.
245
 *
246
 * A valid UTF-8 string contains no overlong-encoded code points (as one would
247
 * expect) and contains no code unit sequence encoding a UTF-16 surrogate.  The
248
 * string *may* contain U+0000 NULL code points.
249
 */
250
extern MFBT_API bool
251
IsValidUtf8(const void* aCodeUnits, size_t aCount);
252
253
/**
254
 * Returns true iff |aUnit| is a UTF-8 trailing code unit matching the pattern
255
 * 0b10xx'xxxx.
256
 */
257
inline bool
258
IsTrailingUnit(Utf8Unit aUnit)
259
0
{
260
0
  return (aUnit.toUint8() & 0b1100'0000) == 0b1000'0000;
261
0
}
262
263
/**
264
 * Given |aLeadUnit| that is a non-ASCII code unit, a pointer to an |Iter aIter|
265
 * that (initially) itself points one unit past |aLeadUnit|, and
266
 * |const EndIter& aEnd| that denotes the end of the UTF-8 data when compared
267
 * against |*aIter| using |aEnd - *aIter|:
268
 *
269
 * If |aLeadUnit| and subsequent code units computed using |*aIter| (up to
270
 * |aEnd|) encode a valid code point -- not exceeding Unicode's range, not a
271
 * surrogate, in shortest form -- then return Some(that code point) and advance
272
 * |*aIter| past those code units.
273
 *
274
 * Otherwise decrement |*aIter| (so that it points at |aLeadUnit|) and return
275
 * Nothing().
276
 *
277
 * |Iter| and |EndIter| are generalized concepts most easily understood as if
278
 * they were |const char*|, |const unsigned char*|, or |const Utf8Unit*|:
279
 * iterators that when dereferenced can be used to construct a |Utf8Unit| and
280
 * that can be compared and modified in certain limited ways.  (Carefully note
281
 * that this function mutates |*aIter|.)  |Iter| and |EndIter| are template
282
 * parameters to support more-complicated adaptor iterators.
283
 *
284
 * The template parameters after |Iter| allow users to implement custom handling
285
 * for various forms of invalid UTF-8.  A version of this function that defaults
286
 * all such handling to no-ops is defined below this function.  To learn how to
287
 * define your own custom handling, consult the implementation of that function,
288
 * which documents exactly how custom handler functors are invoked.
289
 *
290
 * This function is MOZ_ALWAYS_INLINE: if you don't need that, use the version
291
 * of this function without the "Inline" suffix on the name.
292
 */
293
template<typename Iter,
294
         typename EndIter,
295
         class OnBadLeadUnit,
296
         class OnNotEnoughUnits,
297
         class OnBadTrailingUnit,
298
         class OnBadCodePoint,
299
         class OnNotShortestForm>
300
MOZ_ALWAYS_INLINE Maybe<char32_t>
301
DecodeOneUtf8CodePointInline(const Utf8Unit aLeadUnit,
302
                             Iter* aIter, const EndIter& aEnd,
303
                             OnBadLeadUnit aOnBadLeadUnit,
304
                             OnNotEnoughUnits aOnNotEnoughUnits,
305
                             OnBadTrailingUnit aOnBadTrailingUnit,
306
                             OnBadCodePoint aOnBadCodePoint,
307
                             OnNotShortestForm aOnNotShortestForm)
308
0
{
309
0
  MOZ_ASSERT(Utf8Unit((*aIter)[-1]) == aLeadUnit);
310
0
311
0
  char32_t n = aLeadUnit.toUint8();
312
0
  MOZ_ASSERT(!IsAscii(n));
313
0
314
0
  // |aLeadUnit| determines the number of trailing code units in the code point
315
0
  // and the bits of |aLeadUnit| that contribute to the code point's value.
316
0
  uint8_t remaining;
317
0
  uint32_t min;
318
0
  if ((n & 0b1110'0000) == 0b1100'0000) {
319
0
    remaining = 1;
320
0
    min = 0x80;
321
0
    n &= 0b0001'1111;
322
0
  } else if ((n & 0b1111'0000) == 0b1110'0000) {
323
0
    remaining = 2;
324
0
    min = 0x800;
325
0
    n &= 0b0000'1111;
326
0
  } else if ((n & 0b1111'1000) == 0b1111'0000) {
327
0
    remaining = 3;
328
0
    min = 0x10000;
329
0
    n &= 0b0000'0111;
330
0
  } else {
331
0
    *aIter -= 1;
332
0
    aOnBadLeadUnit();
333
0
    return Nothing();
334
0
  }
335
0
336
0
  // If the code point would require more code units than remain, the encoding
337
0
  // is invalid.
338
0
  auto actual = aEnd - *aIter;
339
0
  if (MOZ_UNLIKELY(actual < remaining)) {
340
0
    *aIter -= 1;
341
0
    aOnNotEnoughUnits(AssertedCast<uint8_t>(actual + 1), remaining + 1);
342
0
    return Nothing();
343
0
  }
344
0
345
0
  for (uint8_t i = 0; i < remaining; i++) {
346
0
    const Utf8Unit unit(*(*aIter)++);
347
0
348
0
    // Every non-leading code unit in properly encoded UTF-8 has its high
349
0
    // bit set and the next-highest bit unset.
350
0
    if (MOZ_UNLIKELY(!IsTrailingUnit(unit))) {
351
0
      uint8_t unitsObserved = i + 1 + 1;
352
0
      *aIter -= unitsObserved;
353
0
      aOnBadTrailingUnit(unitsObserved);
354
0
      return Nothing();
355
0
    }
356
0
357
0
    // The code point being encoded is the concatenation of all the
358
0
    // unconstrained bits.
359
0
    n = (n << 6) | (unit.toUint8() & 0b0011'1111);
360
0
  }
361
0
362
0
  // UTF-16 surrogates and values outside the Unicode range are invalid.
363
0
  if (MOZ_UNLIKELY(n > 0x10FFFF || (0xD800 <= n && n <= 0xDFFF))) {
364
0
    uint8_t unitsObserved = remaining + 1;
365
0
    *aIter -= unitsObserved;
366
0
    aOnBadCodePoint(n, unitsObserved);
367
0
    return Nothing();
368
0
  }
369
0
370
0
  // Overlong code points are also invalid.
371
0
  if (MOZ_UNLIKELY(n < min)) {
372
0
    uint8_t unitsObserved = remaining + 1;
373
0
    *aIter -= unitsObserved;
374
0
    aOnNotShortestForm(n, unitsObserved);
375
0
    return Nothing();
376
0
  }
377
0
378
0
  return Some(n);
379
0
}
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda()#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(unsigned char, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(char32_t, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda()#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(unsigned char, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(char32_t, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(char32_t, unsigned char)#2})
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda()#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(unsigned char, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(char32_t, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda()#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(unsigned char, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(char32_t, unsigned char)#1}, mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(char32_t, unsigned char)#2})
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator*, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd const&, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#2})
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator*, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd const&, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2})
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator*, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd const&, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePointDontNormalize(mozilla::Utf8Unit, char32_t*)::{lambda(char32_t, unsigned char)#2})
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator*, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd const&, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2})
380
381
/**
382
 * Identical to the above function, but not forced to be instantiated inline --
383
 * the compiler is permitted to common up separate invocations if it chooses.
384
 */
385
template<typename Iter,
386
         typename EndIter,
387
         class OnBadLeadUnit,
388
         class OnNotEnoughUnits,
389
         class OnBadTrailingUnit,
390
         class OnBadCodePoint,
391
         class OnNotShortestForm>
392
inline Maybe<char32_t>
393
DecodeOneUtf8CodePoint(const Utf8Unit aLeadUnit,
394
                       Iter* aIter, const EndIter& aEnd,
395
                       OnBadLeadUnit aOnBadLeadUnit,
396
                       OnNotEnoughUnits aOnNotEnoughUnits,
397
                       OnBadTrailingUnit aOnBadTrailingUnit,
398
                       OnBadCodePoint aOnBadCodePoint,
399
                       OnNotShortestForm aOnNotShortestForm)
400
0
{
401
0
  return DecodeOneUtf8CodePointInline(aLeadUnit, aIter, aEnd,
402
0
                                      aOnBadLeadUnit, aOnNotEnoughUnits,
403
0
                                      aOnBadTrailingUnit, aOnBadCodePoint,
404
0
                                      aOnNotShortestForm);
405
0
}
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePoint<js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator*, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd const&, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::FullParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2})
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePoint<js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2}>(mozilla::Utf8Unit, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsIterator*, js::frontend::SpecializedTokenStreamCharsBase<mozilla::Utf8Unit>::SourceUnitsEnd const&, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda()#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#1}, js::frontend::TokenStreamChars<mozilla::Utf8Unit, js::frontend::ParserAnyCharsAccess<js::frontend::GeneralParser<js::frontend::SyntaxParseHandler, mozilla::Utf8Unit> > >::getNonAsciiCodePoint(int, int*)::{lambda(char32_t, unsigned char)#2})
406
407
/**
408
 * Like the always-inlined function above, but with no-op behavior from all
409
 * trailing if-invalid notifier functors.
410
 *
411
 * This function is MOZ_ALWAYS_INLINE: if you don't need that, use the version
412
 * of this function without the "Inline" suffix on the name.
413
 */
414
template<typename Iter, typename EndIter>
415
MOZ_ALWAYS_INLINE Maybe<char32_t>
416
DecodeOneUtf8CodePointInline(const Utf8Unit aLeadUnit,
417
                             Iter* aIter, const EndIter& aEnd)
418
0
{
419
0
  // aOnBadLeadUnit is called when |aLeadUnit| itself is an invalid lead unit in
420
0
  // a multi-unit code point.  It is passed no arguments: the caller already has
421
0
  // |aLeadUnit| on hand, so no need to provide it again.
422
0
  auto onBadLeadUnit = []() {};
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda()#1}::operator()() const
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda()#1}::operator()() const
423
0
424
0
  // aOnNotEnoughUnits is called when |aLeadUnit| properly indicates a code
425
0
  // point length, but there aren't enough units from |*aIter| to |aEnd| to
426
0
  // satisfy that length.  It is passed the number of code units actually
427
0
  // available (according to |aEnd - *aIter|) and the number of code units that
428
0
  // |aLeadUnit| indicates are needed.  Both numbers include the contribution
429
0
  // of |aLeadUnit| itself: so |aUnitsAvailable <= 3|, |aUnitsNeeded <= 4|, and
430
0
  // |aUnitsAvailable < aUnitsNeeded|.  As above, it also is not passed the lead
431
0
  // code unit.
432
0
  auto onNotEnoughUnits = [](uint8_t aUnitsAvailable, uint8_t aUnitsNeeded) {};
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(unsigned char, unsigned char)#1}::operator()(unsigned char, unsigned char) const
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(unsigned char, unsigned char)#1}::operator()(unsigned char, unsigned char) const
433
0
434
0
  // aOnBadTrailingUnit is called when one of the trailing code units implied by
435
0
  // |aLeadUnit| doesn't match the 0b10xx'xxxx bit pattern that all UTF-8
436
0
  // trailing code units must satisfy.  It is passed the total count of units
437
0
  // observed (including |aLeadUnit|).  The bad trailing code unit will
438
0
  // conceptually be at |(*aIter)[aUnitsObserved - 1]| if this functor is
439
0
  // called, and so |aUnitsObserved <= 4|.
440
0
  auto onBadTrailingUnit = [](uint8_t aUnitsObserved) {};
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(unsigned char)#1}::operator()(unsigned char) const
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(unsigned char)#1}::operator()(unsigned char) const
441
0
442
0
  // aOnBadCodePoint is called when a structurally-correct code point encoding
443
0
  // is found, but the *value* that is encoded is not a valid code point: either
444
0
  // because it exceeded the U+10FFFF Unicode maximum code point, or because it
445
0
  // was a UTF-16 surrogate.  It is passed the non-code point value and the
446
0
  // number of code units used to encode it.
447
0
  auto onBadCodePoint = [](char32_t aBadCodePoint, uint8_t aUnitsObserved) {};
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(char32_t, unsigned char)#1}::operator()(char32_t, unsigned char) const
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(char32_t, unsigned char)#1}::operator()(char32_t, unsigned char) const
448
0
449
0
  // aOnNotShortestForm is called when structurally-correct encoding is found,
450
0
  // but the encoded value should have been encoded in fewer code units (e.g.
451
0
  // mis-encoding U+0000 as 0b1100'0000 0b1000'0000 in two code units instead of
452
0
  // as 0b0000'0000).  It is passed the mis-encoded code point (which will be
453
0
  // valid and not a surrogate) and the count of code units that mis-encoded it.
454
0
  auto onNotShortestForm = [](char32_t aBadCodePoint, uint8_t aUnitsObserved) {};
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)::{lambda(char32_t, unsigned char)#2}::operator()(char32_t, unsigned char) const
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)::{lambda(char32_t, unsigned char)#2}::operator()(char32_t, unsigned char) const
455
0
456
0
  return DecodeOneUtf8CodePointInline(aLeadUnit, aIter, aEnd,
457
0
                                      onBadLeadUnit, onNotEnoughUnits,
458
0
                                      onBadTrailingUnit, onBadCodePoint,
459
0
                                      onNotShortestForm);
460
0
}
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePointInline<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)
461
462
/**
463
 * Identical to the above function, but not forced to be instantiated inline --
464
 * the compiler/linker are allowed to common up separate invocations.
465
 */
466
template<typename Iter, typename EndIter>
467
inline Maybe<char32_t>
468
DecodeOneUtf8CodePoint(const Utf8Unit aLeadUnit,
469
                       Iter* aIter, const EndIter& aEnd)
470
0
{
471
0
  return DecodeOneUtf8CodePointInline(aLeadUnit, aIter, aEnd);
472
0
}
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePoint<unsigned char const*, unsigned char const*>(mozilla::Utf8Unit, unsigned char const**, unsigned char const* const&)
Unexecuted instantiation: mozilla::Maybe<char32_t> mozilla::DecodeOneUtf8CodePoint<mozilla::Utf8Unit const*, mozilla::Utf8Unit const*>(mozilla::Utf8Unit, mozilla::Utf8Unit const**, mozilla::Utf8Unit const* const&)
473
474
} // namespace mozilla
475
476
#endif /* mozilla_Utf8_h */