Coverage Report

Created: 2018-09-25 14:53

/work/obj-fuzz/dist/include/nsCharTraits.h
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#ifndef nsCharTraits_h___
8
#define nsCharTraits_h___
9
10
#include <ctype.h> // for |EOF|, |WEOF|
11
#include <string.h> // for |memcpy|, et al
12
#include "mozilla/MemoryChecking.h"
13
14
// This file may be used (through nsUTF8Utils.h) from non-XPCOM code, in
15
// particular the standalone software updater. In that case stub out
16
// the macros provided by nsDebug.h which are only usable when linking XPCOM
17
18
#ifdef NS_NO_XPCOM
19
#define NS_WARNING(msg)
20
#define NS_ASSERTION(cond, msg)
21
#define NS_ERROR(msg)
22
#else
23
#include "nsDebug.h"  // for NS_ASSERTION
24
#endif
25
26
/*
27
 * Some macros for converting char16_t (UTF-16) to and from Unicode scalar
28
 * values.
29
 *
30
 * Note that UTF-16 represents all Unicode scalar values up to U+10FFFF by
31
 * using "surrogate pairs". These consist of a high surrogate, i.e. a code
32
 * point in the range U+D800 - U+DBFF, and a low surrogate, i.e. a code point
33
 * in the range U+DC00 - U+DFFF, like this:
34
 *
35
 *  U+D800 U+DC00 =  U+10000
36
 *  U+D800 U+DC01 =  U+10001
37
 *  ...
38
 *  U+DBFF U+DFFE = U+10FFFE
39
 *  U+DBFF U+DFFF = U+10FFFF
40
 *
41
 * These surrogate code points U+D800 - U+DFFF are not themselves valid Unicode
42
 * scalar values and are not well-formed UTF-16 except as high-surrogate /
43
 * low-surrogate pairs.
44
 */
45
46
105k
#define PLANE1_BASE          uint32_t(0x00010000)
47
// High surrogates are in the range 0xD800 -- OxDBFF
48
2.50M
#define NS_IS_HIGH_SURROGATE(u) ((uint32_t(u) & 0xFFFFFC00) == 0xD800)
49
// Low surrogates are in the range 0xDC00 -- 0xDFFF
50
34
#define NS_IS_LOW_SURROGATE(u)  ((uint32_t(u) & 0xFFFFFC00) == 0xDC00)
51
// Faster than testing NS_IS_HIGH_SURROGATE || NS_IS_LOW_SURROGATE
52
0
#define IS_SURROGATE(u)      ((uint32_t(u) & 0xFFFFF800) == 0xD800)
53
54
// Everything else is not a surrogate: 0x000 -- 0xD7FF, 0xE000 -- 0xFFFF
55
56
// N = (H - 0xD800) * 0x400 + 0x10000 + (L - 0xDC00)
57
// I wonder whether we could somehow assert that H is a high surrogate
58
// and L is a low surrogate
59
34
#define SURROGATE_TO_UCS4(h, l) (((uint32_t(h) & 0x03FF) << 10) + \
60
34
                                 (uint32_t(l) & 0x03FF) + PLANE1_BASE)
61
62
// Extract surrogates from a UCS4 char
63
// Reference: the Unicode standard 4.0, section 3.9
64
// Since (c - 0x10000) >> 10 == (c >> 10) - 0x0080 and
65
// 0xD7C0 == 0xD800 - 0x0080,
66
// ((c - 0x10000) >> 10) + 0xD800 can be simplified to
67
0
#define H_SURROGATE(c) char16_t(char16_t(uint32_t(c) >> 10) + \
68
0
                                char16_t(0xD7C0))
69
// where it's to be noted that 0xD7C0 is not bitwise-OR'd
70
// but added.
71
72
// Since 0x10000 & 0x03FF == 0,
73
// (c - 0x10000) & 0x03FF == c & 0x03FF so that
74
// ((c - 0x10000) & 0x03FF) | 0xDC00 is equivalent to
75
0
#define L_SURROGATE(c) char16_t(char16_t(uint32_t(c) & uint32_t(0x03FF)) | \
76
0
                                 char16_t(0xDC00))
77
78
0
#define IS_IN_BMP(ucs) (uint32_t(ucs) < PLANE1_BASE)
79
#define UCS2_REPLACEMENT_CHAR char16_t(0xFFFD)
80
81
#define UCS_END uint32_t(0x00110000)
82
#define IS_VALID_CHAR(c) ((uint32_t(c) < UCS_END) && !IS_SURROGATE(c))
83
#define ENSURE_VALID_CHAR(c) (IS_VALID_CHAR(c) ? (c) : UCS2_REPLACEMENT_CHAR)
84
85
template <class CharT>
86
struct nsCharTraits
87
{
88
};
89
90
template <>
91
struct nsCharTraits<char16_t>
92
{
93
  typedef char16_t char_type;
94
  typedef uint16_t  unsigned_char_type;
95
  typedef char      incompatible_char_type;
96
97
  static char_type* const sEmptyBuffer;
98
99
  // integer representation of characters:
100
  typedef int int_type;
101
102
  static char_type
103
  to_char_type(int_type aChar)
104
0
  {
105
0
    return char_type(aChar);
106
0
  }
107
108
  static int_type
109
  to_int_type(char_type aChar)
110
18.3M
  {
111
18.3M
    return int_type(static_cast<unsigned_char_type>(aChar));
112
18.3M
  }
113
114
  static bool
115
  eq_int_type(int_type aLhs, int_type aRhs)
116
620k
  {
117
620k
    return aLhs == aRhs;
118
620k
  }
119
120
121
  // |char_type| comparisons:
122
123
  static bool
124
  eq(char_type aLhs, char_type aRhs)
125
2.21M
  {
126
2.21M
    return aLhs == aRhs;
127
2.21M
  }
128
129
  static bool
130
  lt(char_type aLhs, char_type aRhs)
131
0
  {
132
0
    return aLhs < aRhs;
133
0
  }
134
135
136
  // operations on s[n] arrays:
137
138
  static char_type*
139
  move(char_type* aStr1, const char_type* aStr2, size_t aN)
140
36.4M
  {
141
36.4M
    return static_cast<char_type*>(memmove(aStr1, aStr2,
142
36.4M
                                           aN * sizeof(char_type)));
143
36.4M
  }
144
145
  static char_type*
146
  copy(char_type* aStr1, const char_type* aStr2, size_t aN)
147
25.1M
  {
148
25.1M
    return static_cast<char_type*>(memcpy(aStr1, aStr2,
149
25.1M
                                          aN * sizeof(char_type)));
150
25.1M
  }
151
152
  static void uninitialize(char_type* aStr, size_t aN)
153
36.4M
  {
154
#ifdef DEBUG
155
    memset(aStr, 0xE4, aN * sizeof(char_type));
156
#endif
157
36.4M
    MOZ_MAKE_MEM_UNDEFINED(aStr, aN * sizeof(char_type));
158
36.4M
  }
159
160
  static char_type*
161
  copyASCII(char_type* aStr1, const char* aStr2, size_t aN)
162
1.61M
  {
163
17.3M
    for (char_type* s = aStr1; aN--; ++s, ++aStr2) {
164
15.7M
      NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
165
15.7M
      *s = static_cast<char_type>(*aStr2);
166
15.7M
    }
167
1.61M
    return aStr1;
168
1.61M
  }
169
170
  static int
171
  compare(const char_type* aStr1, const char_type* aStr2, size_t aN)
172
5.71k
  {
173
15.0k
    for (; aN--; ++aStr1, ++aStr2) {
174
14.2k
      if (!eq(*aStr1, *aStr2)) {
175
4.83k
        return to_int_type(*aStr1) - to_int_type(*aStr2);
176
4.83k
      }
177
14.2k
    }
178
5.71k
179
5.71k
    return 0;
180
5.71k
  }
181
182
  static int
183
  compareASCII(const char_type* aStr1, const char* aStr2, size_t aN)
184
0
  {
185
0
    for (; aN--; ++aStr1, ++aStr2) {
186
0
      NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
187
0
      if (!eq_int_type(to_int_type(*aStr1),
188
0
                       to_int_type(static_cast<char_type>(*aStr2)))) {
189
0
        return to_int_type(*aStr1) -
190
0
               to_int_type(static_cast<char_type>(*aStr2));
191
0
      }
192
0
    }
193
0
194
0
    return 0;
195
0
  }
196
197
  // this version assumes that s2 is null-terminated and s1 has length n.
198
  // if s1 is shorter than s2 then we return -1; if s1 is longer than s2,
199
  // we return 1.
200
  static int
201
  compareASCIINullTerminated(const char_type* aStr1, size_t aN,
202
                             const char* aStr2)
203
432k
  {
204
646k
    for (; aN--; ++aStr1, ++aStr2) {
205
624k
      if (!*aStr2) {
206
3.90k
        return 1;
207
3.90k
      }
208
620k
      NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
209
620k
      if (!eq_int_type(to_int_type(*aStr1),
210
620k
                       to_int_type(static_cast<char_type>(*aStr2)))) {
211
406k
        return to_int_type(*aStr1) -
212
406k
               to_int_type(static_cast<char_type>(*aStr2));
213
406k
      }
214
620k
    }
215
432k
216
432k
    if (*aStr2) {
217
0
      return -1;
218
0
    }
219
21.7k
220
21.7k
    return 0;
221
21.7k
  }
222
223
  /**
224
   * Convert c to its lower-case form, but only if c is in the ASCII
225
   * range. Otherwise leave it alone.
226
   */
227
  static char_type
228
  ASCIIToLower(char_type aChar)
229
8.65M
  {
230
8.65M
    if (aChar >= 'A' && aChar <= 'Z') {
231
1.03M
      return char_type(aChar + ('a' - 'A'));
232
1.03M
    }
233
7.61M
234
7.61M
    return aChar;
235
7.61M
  }
236
237
  static int
238
  compareLowerCaseToASCII(const char_type* aStr1, const char* aStr2, size_t aN)
239
0
  {
240
0
    for (; aN--; ++aStr1, ++aStr2) {
241
0
      NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
242
0
      NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'),
243
0
                   "Unexpected uppercase character");
244
0
      char_type lower_s1 = ASCIIToLower(*aStr1);
245
0
      if (lower_s1 != static_cast<char_type>(*aStr2)) {
246
0
        return to_int_type(lower_s1) -
247
0
               to_int_type(static_cast<char_type>(*aStr2));
248
0
      }
249
0
    }
250
0
251
0
    return 0;
252
0
  }
253
254
  // this version assumes that s2 is null-terminated and s1 has length n.
255
  // if s1 is shorter than s2 then we return -1; if s1 is longer than s2,
256
  // we return 1.
257
  static int
258
  compareLowerCaseToASCIINullTerminated(const char_type* aStr1,
259
                                        size_t aN, const char* aStr2)
260
8.15M
  {
261
8.67M
    for (; aN--; ++aStr1, ++aStr2) {
262
8.65M
      if (!*aStr2) {
263
340
        return 1;
264
340
      }
265
8.65M
      NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
266
8.65M
      NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'),
267
8.65M
                   "Unexpected uppercase character");
268
8.65M
      char_type lower_s1 = ASCIIToLower(*aStr1);
269
8.65M
      if (lower_s1 != static_cast<char_type>(*aStr2)) {
270
8.12M
        return to_int_type(lower_s1) -
271
8.12M
               to_int_type(static_cast<char_type>(*aStr2));
272
8.12M
      }
273
8.65M
    }
274
8.15M
275
8.15M
    if (*aStr2) {
276
5.91k
      return -1;
277
5.91k
    }
278
22.5k
279
22.5k
    return 0;
280
22.5k
  }
281
282
  static size_t
283
  length(const char_type* aStr)
284
218k
  {
285
218k
    size_t result = 0;
286
2.17M
    while (!eq(*aStr++, char_type(0))) {
287
1.96M
      ++result;
288
1.96M
    }
289
218k
    return result;
290
218k
  }
291
292
  static const char_type*
293
  find(const char_type* aStr, size_t aN, char_type aChar)
294
3.68k
  {
295
22.0k
    while (aN--) {
296
21.3k
      if (eq(*aStr, aChar)) {
297
3.01k
        return aStr;
298
3.01k
      }
299
18.3k
      ++aStr;
300
18.3k
    }
301
3.68k
302
3.68k
    return 0;
303
3.68k
  }
304
};
305
306
template <>
307
struct nsCharTraits<char>
308
{
309
  typedef char           char_type;
310
  typedef unsigned char  unsigned_char_type;
311
  typedef char16_t      incompatible_char_type;
312
313
  static char_type* const sEmptyBuffer;
314
315
  // integer representation of characters:
316
317
  typedef int int_type;
318
319
  static char_type
320
  to_char_type(int_type aChar)
321
0
  {
322
0
    return char_type(aChar);
323
0
  }
324
325
  static int_type
326
  to_int_type(char_type aChar)
327
13.7M
  {
328
13.7M
    return int_type(static_cast<unsigned_char_type>(aChar));
329
13.7M
  }
330
331
  static bool
332
  eq_int_type(int_type aLhs, int_type aRhs)
333
0
  {
334
0
    return aLhs == aRhs;
335
0
  }
336
337
338
  // |char_type| comparisons:
339
340
  static bool eq(char_type aLhs, char_type aRhs)
341
0
  {
342
0
    return aLhs == aRhs;
343
0
  }
344
345
  static bool
346
  lt(char_type aLhs, char_type aRhs)
347
0
  {
348
0
    return aLhs < aRhs;
349
0
  }
350
351
352
  // operations on s[n] arrays:
353
354
  static char_type*
355
  move(char_type* aStr1, const char_type* aStr2, size_t aN)
356
29.4M
  {
357
29.4M
    return static_cast<char_type*>(memmove(aStr1, aStr2,
358
29.4M
                                           aN * sizeof(char_type)));
359
29.4M
  }
360
361
  static char_type*
362
  copy(char_type* aStr1, const char_type* aStr2, size_t aN)
363
68.6M
  {
364
68.6M
    return static_cast<char_type*>(memcpy(aStr1, aStr2,
365
68.6M
                                          aN * sizeof(char_type)));
366
68.6M
  }
367
368
  static void uninitialize(char_type* aStr, size_t aN)
369
29.4M
  {
370
#ifdef DEBUG
371
    memset(aStr, 0xE4, aN * sizeof(char_type));
372
#endif
373
29.4M
    MOZ_MAKE_MEM_UNDEFINED(aStr, aN * sizeof(char_type));
374
29.4M
  }
375
376
  static char_type*
377
  copyASCII(char_type* aStr1, const char* aStr2, size_t aN)
378
28.3k
  {
379
28.3k
    return copy(aStr1, aStr2, aN);
380
28.3k
  }
381
382
  static int
383
  compare(const char_type* aStr1, const char_type* aStr2, size_t aN)
384
3.00M
  {
385
3.00M
    return memcmp(aStr1, aStr2, aN);
386
3.00M
  }
387
388
  static int
389
  compareASCII(const char_type* aStr1, const char* aStr2, size_t aN)
390
7.23k
  {
391
#ifdef DEBUG
392
    for (size_t i = 0; i < aN; ++i) {
393
      NS_ASSERTION(!(aStr2[i] & ~0x7F), "Unexpected non-ASCII character");
394
    }
395
#endif
396
    return compare(aStr1, aStr2, aN);
397
7.23k
  }
398
399
  // this version assumes that s2 is null-terminated and s1 has length n.
400
  // if s1 is shorter than s2 then we return -1; if s1 is longer than s2,
401
  // we return 1.
402
  static int
403
  compareASCIINullTerminated(const char_type* aStr1, size_t aN,
404
                             const char* aStr2)
405
866
  {
406
866
    // can't use strcmp here because we don't want to stop when aStr1
407
866
    // contains a null
408
5.33k
    for (; aN--; ++aStr1, ++aStr2) {
409
4.47k
      if (!*aStr2) {
410
0
        return 1;
411
0
      }
412
4.47k
      NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
413
4.47k
      if (*aStr1 != *aStr2) {
414
10
        return to_int_type(*aStr1) - to_int_type(*aStr2);
415
10
      }
416
4.47k
    }
417
866
418
866
    if (*aStr2) {
419
0
      return -1;
420
0
    }
421
856
422
856
    return 0;
423
856
  }
424
425
  /**
426
   * Convert c to its lower-case form, but only if c is ASCII.
427
   */
428
  static char_type
429
  ASCIIToLower(char_type aChar)
430
12.0M
  {
431
12.0M
    if (aChar >= 'A' && aChar <= 'Z') {
432
1.80k
      return char_type(aChar + ('a' - 'A'));
433
1.80k
    }
434
12.0M
435
12.0M
    return aChar;
436
12.0M
  }
437
438
  static int
439
  compareLowerCaseToASCII(const char_type* aStr1, const char* aStr2, size_t aN)
440
5.49M
  {
441
14.1M
    for (; aN--; ++aStr1, ++aStr2) {
442
12.0M
      NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
443
12.0M
      NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'),
444
12.0M
                   "Unexpected uppercase character");
445
12.0M
      char_type lower_s1 = ASCIIToLower(*aStr1);
446
12.0M
      if (lower_s1 != *aStr2) {
447
3.30M
        return to_int_type(lower_s1) - to_int_type(*aStr2);
448
3.30M
      }
449
12.0M
    }
450
5.49M
    return 0;
451
5.49M
  }
452
453
  // this version assumes that s2 is null-terminated and s1 has length n.
454
  // if s1 is shorter than s2 then we return -1; if s1 is longer than s2,
455
  // we return 1.
456
  static int
457
  compareLowerCaseToASCIINullTerminated(const char_type* aStr1, size_t aN,
458
                                        const char* aStr2)
459
81
  {
460
81
    for (; aN--; ++aStr1, ++aStr2) {
461
81
      if (!*aStr2) {
462
0
        return 1;
463
0
      }
464
81
      NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
465
81
      NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'),
466
81
                   "Unexpected uppercase character");
467
81
      char_type lower_s1 = ASCIIToLower(*aStr1);
468
81
      if (lower_s1 != *aStr2) {
469
81
        return to_int_type(lower_s1) - to_int_type(*aStr2);
470
81
      }
471
81
    }
472
81
473
81
    if (*aStr2) {
474
0
      return -1;
475
0
    }
476
0
477
0
    return 0;
478
0
  }
479
480
  static size_t
481
  length(const char_type* aStr)
482
20.6M
  {
483
20.6M
    return strlen(aStr);
484
20.6M
  }
485
486
  static const char_type*
487
  find(const char_type* aStr, size_t aN, char_type aChar)
488
7.18M
  {
489
7.18M
    return reinterpret_cast<const char_type*>(memchr(aStr, to_int_type(aChar),
490
7.18M
                                                     aN));
491
7.18M
  }
492
};
493
494
template <class InputIterator>
495
struct nsCharSourceTraits
496
{
497
  typedef typename InputIterator::difference_type difference_type;
498
499
  static uint32_t
500
  readable_distance(const InputIterator& aFirst, const InputIterator& aLast)
501
  {
502
    // assumes single fragment
503
    return uint32_t(aLast.get() - aFirst.get());
504
  }
505
506
  static const typename InputIterator::value_type*
507
  read(const InputIterator& aIter)
508
  {
509
    return aIter.get();
510
  }
511
512
  static void
513
  advance(InputIterator& aStr, difference_type aN)
514
  {
515
    aStr.advance(aN);
516
  }
517
};
518
519
template <class CharT>
520
struct nsCharSourceTraits<CharT*>
521
{
522
  typedef ptrdiff_t difference_type;
523
524
  static uint32_t
525
  readable_distance(CharT* aStr)
526
  {
527
    return uint32_t(nsCharTraits<CharT>::length(aStr));
528
    // return numeric_limits<uint32_t>::max();
529
  }
530
531
  static uint32_t
532
  readable_distance(CharT* aFirst, CharT* aLast)
533
  {
534
    return uint32_t(aLast - aFirst);
535
  }
536
537
  static const CharT*
538
  read(CharT* aStr)
539
  {
540
    return aStr;
541
  }
542
543
  static void
544
  advance(CharT*& aStr, difference_type aN)
545
  {
546
    aStr += aN;
547
  }
548
};
549
550
template <class OutputIterator>
551
struct nsCharSinkTraits
552
{
553
  static void
554
  write(OutputIterator& aIter, const typename OutputIterator::value_type* aStr,
555
        uint32_t aN)
556
  {
557
    aIter.write(aStr, aN);
558
  }
559
};
560
561
template <class CharT>
562
struct nsCharSinkTraits<CharT*>
563
{
564
  static void
565
  write(CharT*& aIter, const CharT* aStr, uint32_t aN)
566
0
  {
567
0
    nsCharTraits<CharT>::move(aIter, aStr, aN);
568
0
    aIter += aN;
569
0
  }
570
};
571
572
#endif // !defined(nsCharTraits_h___)