Coverage Report

Created: 2025-12-12 07:27

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/hermes/include/hermes/VM/StringView.h
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
#ifndef HERMES_VM_STRINGVIEW_H
9
#define HERMES_VM_STRINGVIEW_H
10
11
#include "SmallXString.h"
12
#include "hermes/VM/Runtime.h"
13
#include "hermes/VM/StringPrimitive.h"
14
#include "hermes/VM/StringRefUtils.h"
15
#include "hermes/VM/TwineChar16.h"
16
#pragma GCC diagnostic push
17
18
#ifdef HERMES_COMPILER_SUPPORTS_WSHORTEN_64_TO_32
19
#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
20
#endif
21
namespace hermes {
22
namespace vm {
23
24
/// StringView is a view to the string content from StringPrimitive.
25
/// It hides the difference between ASCII string and UTF16 string, and hence
26
/// allow you to iterate through a string without worrying about the type.
27
/// Internally, it's a char pointer and a char16 pointer (only one is valid).
28
///
29
/// Performance: Iterating from StringView is slightly slower than normal
30
/// iterations: every operation has one extra conditional check on the type.
31
/// If you are in a extremely performance sensitive setting, consider getting
32
/// raw pointers directly out of StringPrimitive and explicitly duplicate code
33
/// to handle char and char16 strings separately.
34
///
35
/// Alternatively, if you know the string is very likely to be UTF16, or the
36
/// string is short, consider call getUTF16Ref (which may invoke a string copy
37
/// if it turns out to be an ASCII string).
38
class StringView {
39
  friend class StringPrimitive;
40
  friend class IdentifierTable;
41
42
  union {
43
    /// StringView can be used to represent a view to a non-GC-managed string,
44
    /// a.k.a persistent identifiers whose string content is from a static
45
    /// memory address (either C++ literal or from a persistent bytecode module.
46
    const void *nonManagedStringPtr_;
47
48
    /// Handle pointing to the actual string. We need a handle to allow a
49
    /// StringView to survive allocations, so that we can have multiple
50
    /// StringViews around at the same time. Note that the StringPrimitive
51
    /// must have been resolved if it's a rope, i.e. we should be able to obtain
52
    /// a char/char16 pointer directly from str_.
53
    ///
54
    /// NOTE: we are using \c llvh::AlignedCharArrayUnion to avoid constructing
55
    /// the handle (which doesn't have a default constructor).
56
    llvh::AlignedCharArrayUnion<Handle<StringPrimitive>> strPrim_;
57
  };
58
59
  /// Starting index in the StringPrimitive as the beginning of this view.
60
  uint32_t startIndex_ : 30;
61
62
  /// Whether we are storing a handle or a non-managed pointer.
63
  uint32_t isHandle_ : 1;
64
65
  /// Whether the string is ASCII.
66
  uint32_t isASCII_ : 1;
67
68
  /// Length of the string.
69
  uint32_t length_;
70
71
 public:
72
  /// Iterator for StringView. It's mostly standard except *operator does not
73
  /// return a reference, which disables certain things such as creating a
74
  /// reverse_iterator using std::reverse_iterator.
75
  class const_iterator {
76
    friend class StringView;
77
78
    /// Current pointer position if the underlying string is char string.
79
    const char *charPtr_{nullptr};
80
81
    /// Current pointer position if the underlying string is char16 string.
82
    const char16_t *char16Ptr_{nullptr};
83
84
    const_iterator(const char *charPtr, const char16_t *char16Ptr)
85
252k
        : charPtr_(charPtr), char16Ptr_(char16Ptr) {
86
252k
      assert(
87
252k
          ((!charPtr_) ^ (!char16Ptr_)) &&
88
252k
          "Must provide one of char or char16 pointer");
89
252k
    }
90
91
2.05k
    explicit const_iterator(const char *ptr) : const_iterator(ptr, nullptr) {}
92
93
    explicit const_iterator(const char16_t *ptr)
94
249k
        : const_iterator(nullptr, ptr) {}
95
96
   public:
97
    using iterator_category = std::random_access_iterator_tag;
98
    using value_type = char16_t;
99
    using pointer = char16_t *;
100
    using difference_type = std::ptrdiff_t;
101
    using reference = char16_t;
102
103
    const_iterator() = default;
104
105
    /// Allows for copying.
106
    const_iterator(const const_iterator &other) = default;
107
    const_iterator &operator=(const const_iterator &other) = default;
108
109
1.80M
    const_iterator &operator++() {
110
1.80M
      if (charPtr_) {
111
104k
        ++charPtr_;
112
1.69M
      } else {
113
1.69M
        ++char16Ptr_;
114
1.69M
      }
115
1.80M
      return *this;
116
1.80M
    }
117
0
    const_iterator &operator--() {
118
0
      if (charPtr_) {
119
0
        --charPtr_;
120
0
      } else {
121
0
        --char16Ptr_;
122
0
      }
123
0
      return *this;
124
0
    }
125
0
    const_iterator &operator+=(difference_type rhs) {
126
0
      if (charPtr_) {
127
0
        charPtr_ += rhs;
128
0
      } else {
129
0
        char16Ptr_ += rhs;
130
0
      }
131
0
      return *this;
132
0
    }
133
0
    const_iterator &operator-=(difference_type rhs) {
134
0
      if (charPtr_) {
135
0
        charPtr_ -= rhs;
136
0
      } else {
137
0
        char16Ptr_ -= rhs;
138
0
      }
139
0
      return *this;
140
0
    }
141
0
    const_iterator operator++(int) {
142
0
      const_iterator tmp(charPtr_, char16Ptr_);
143
0
      if (charPtr_) {
144
0
        ++charPtr_;
145
0
      } else {
146
0
        ++char16Ptr_;
147
0
      }
148
0
      return tmp;
149
0
    }
150
0
    const_iterator operator--(int) {
151
0
      const_iterator tmp(charPtr_, char16Ptr_);
152
0
      if (charPtr_) {
153
0
        --charPtr_;
154
0
      } else {
155
0
        --char16Ptr_;
156
0
      }
157
0
      return tmp;
158
0
    }
159
160
287
    difference_type operator-(const const_iterator &rhs) const {
161
287
      if (charPtr_) {
162
287
        return charPtr_ - rhs.charPtr_;
163
287
      }
164
0
      return char16Ptr_ - rhs.char16Ptr_;
165
287
    }
166
167
174
    const_iterator operator-(difference_type rhs) const {
168
174
      if (charPtr_) {
169
174
        return const_iterator(charPtr_ - rhs, char16Ptr_);
170
174
      }
171
0
      return const_iterator(charPtr_, char16Ptr_ - rhs);
172
174
    }
173
0
    const_iterator operator+(difference_type rhs) const {
174
0
      if (charPtr_) {
175
0
        return const_iterator(charPtr_ + rhs, char16Ptr_);
176
0
      }
177
0
      return const_iterator(charPtr_, char16Ptr_ + rhs);
178
0
    }
179
180
    /// Const dereference. Note that we cannot return a reference here (without
181
    /// losing efficiency, and hence making this iterator non-standard.
182
1.80M
    char16_t operator*() const {
183
1.80M
      return charPtr_ ? *charPtr_ : *char16Ptr_;
184
1.80M
    }
185
186
    /// Comparisons.
187
1.92M
    bool operator==(const const_iterator &rhs) const {
188
1.92M
      if (charPtr_) {
189
106k
        return charPtr_ == rhs.charPtr_;
190
106k
      }
191
1.82M
      return char16Ptr_ == rhs.char16Ptr_;
192
1.92M
    }
193
1.92M
    bool operator!=(const const_iterator &rhs) const {
194
1.92M
      return !(*this == rhs);
195
1.92M
    }
196
0
    bool operator>(const const_iterator &rhs) const {
197
0
      if (charPtr_) {
198
0
        return charPtr_ > rhs.charPtr_;
199
0
      }
200
0
      return char16Ptr_ > rhs.char16Ptr_;
201
0
    }
202
0
    bool operator<(const const_iterator &rhs) const {
203
0
      if (charPtr_) {
204
0
        return charPtr_ < rhs.charPtr_;
205
0
      }
206
0
      return char16Ptr_ < rhs.char16Ptr_;
207
0
    }
208
0
    bool operator>=(const const_iterator &rhs) const {
209
0
      return !(*this < rhs);
210
0
    }
211
0
    bool operator<=(const const_iterator &rhs) const {
212
0
      return !(*this > rhs);
213
0
    }
214
  };
215
216
  /// Reverse iterator type.
217
  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
218
219
// In debug mode the handle is non-trivial, which makes us non-trivial too and
220
// we need to invoke its copy constructor and destructor.
221
// We could also deal with this using templates, by inheriting from a different
222
// base class depending on std::is_trivially_copyable<>, but the complexity is
223
// probably not worth it.
224
#ifndef NDEBUG
225
511k
  StringView(const StringView &other) {
226
511k
    ::memcpy(this, &other, sizeof(*this));
227
511k
    if (isHandle_)
228
47.4k
      new (strPrim_.buffer) Handle<StringPrimitive>(other.strPrim());
229
511k
  }
230
231
0
  StringView &operator=(const StringView &other) {
232
0
    if (this != &other) {
233
0
      if (isHandle_)
234
0
        strPrim().~Handle<StringPrimitive>();
235
0
      ::memcpy(this, &other, sizeof(*this));
236
0
      if (isHandle_)
237
0
        new (strPrim_.buffer) Handle<StringPrimitive>(other.strPrim());
238
0
    }
239
0
    return *this;
240
0
  }
241
242
1.53M
  ~StringView() {
243
1.53M
    if (isHandle_)
244
581k
      strPrim().~Handle<StringPrimitive>();
245
1.53M
  }
246
#else
247
  StringView(const StringView &other) = default;
248
  ~StringView() = default;
249
#endif
250
251
0
  StringView(const char *ptr) : StringView(ASCIIRef(ptr, strlen(ptr))) {}
252
253
  /// \return an iterator pointing at the beginning of the string.
254
126k
  const_iterator begin() const {
255
126k
    if (isASCII()) {
256
1.07k
      return const_iterator(castToCharPtr());
257
1.07k
    }
258
124k
    return const_iterator(castToChar16Ptr());
259
126k
  }
260
261
  /// \return an iterator pointing at one pass the end of the string.
262
125k
  const_iterator end() const {
263
125k
    if (isASCII()) {
264
984
      return const_iterator(castToCharPtr() + length_);
265
984
    }
266
124k
    return const_iterator(castToChar16Ptr() + length_);
267
125k
  }
268
269
  /// \return a reverse iterator pointing at the end of the string.
270
0
  const_reverse_iterator rbegin() const {
271
0
    return const_reverse_iterator(end());
272
0
  }
273
274
  /// \return a reverse iterator pointing at one pass the begin of the string.
275
0
  const_reverse_iterator rend() const {
276
0
    return const_reverse_iterator(begin());
277
0
  }
278
279
  /// \return the length.
280
1.12M
  size_t length() const {
281
1.12M
    return length_;
282
1.12M
  }
283
284
  /// \return whether this string is empty.
285
0
  bool empty() const {
286
0
    return !length_;
287
0
  }
288
289
  /// \return whether this is a char string.
290
2.30M
  bool isASCII() const {
291
2.30M
    return isASCII_;
292
2.30M
  }
293
294
  /// Direct indexing, \return character at \p index.
295
255
  char16_t operator[](uint32_t index) const {
296
255
    assert(index < length_ && "Out of bound indexing");
297
255
    if (isASCII()) {
298
255
      return castToCharPtr()[index];
299
255
    }
300
0
    return castToChar16Ptr()[index];
301
255
  }
302
303
  /// \return a new StringView with the string sliced from \p start with
304
  /// length \p length.
305
87
  StringView slice(uint32_t start, uint32_t length) const {
306
87
    assert(start + length <= length_ && "Out of bound slicing");
307
87
    auto newStringView = *this;
308
87
    newStringView.startIndex_ += start;
309
87
    newStringView.length_ = length;
310
87
    return newStringView;
311
87
  }
312
313
  /// \return a new StringView with the string sliced from \p start till
314
  /// the end of the string.
315
0
  StringView slice(uint32_t start) const {
316
0
    assert(start <= length_ && "Out of bound slicing");
317
0
    return slice(start, length_ - start);
318
0
  }
319
320
  /// \return a new StringView with the string sliced between [first, last).
321
87
  StringView slice(const_iterator first, const_iterator last) const {
322
87
    return slice(first - begin(), last - first);
323
87
  }
324
325
  /// \return a UTF16Ref that pointing at the beginning of the string.
326
  /// If the string is already UTF16, we return the pointer directly;
327
  /// otherwise (it's ASCII) we copy the string into the end of \p allocator,
328
  /// and \return a pointer to the beginning of this string in the allocator.
329
  /// \pre allocator must be empty when passed in.
330
104k
  UTF16Ref getUTF16Ref(llvh::SmallVectorImpl<char16_t> &allocator) const {
331
104k
    assert(allocator.empty() && "Shouldn't use a non-empty allocator");
332
104k
    return getUTF16Ref(allocator, false);
333
104k
  }
334
335
  /// Append the string into \p allocator, even though the string may already be
336
  /// UTF16.
337
0
  void appendUTF16String(llvh::SmallVectorImpl<char16_t> &allocator) const {
338
0
    (void)getUTF16Ref(allocator, true);
339
0
  }
340
341
  /// Assuming the StringView represents a char string, \return the pointer.
342
899k
  const char *castToCharPtr() const {
343
899k
    assert(isASCII() && "Cannot cast char16_t pointer to char pointer");
344
899k
    if (!isHandle_) {
345
488k
      return static_cast<const char *>(nonManagedStringPtr_) + startIndex_;
346
488k
    }
347
899k
    assert(isHandle_ && "StringView does not contain a valid string");
348
411k
    return (*strPrim())->castToASCIIPointer() + startIndex_;
349
411k
  }
350
351
  /// Assuming the StringView represents a char16 string, \return the pointer.
352
250k
  const char16_t *castToChar16Ptr() const {
353
250k
    assert(!isASCII() && "Cannot cast char pointer to char16 pointer");
354
250k
    if (!isHandle_) {
355
0
      return static_cast<const char16_t *>(nonManagedStringPtr_) + startIndex_;
356
0
    }
357
250k
    assert(isHandle_ && "StringView does not contain a valid string");
358
250k
    return (*strPrim())->castToUTF16Pointer() + startIndex_;
359
250k
  }
360
361
  /// Check if two StringViews are equal.
362
348
  bool equals(const StringView &other) const {
363
348
    if (other.isASCII()) {
364
348
      return equals(ASCIIRef(other.castToCharPtr(), other.length()));
365
348
    }
366
0
    return equals(UTF16Ref(other.castToChar16Ptr(), other.length()));
367
348
  }
368
369
  /// Check if a StringView is equal to an ArrayRef.
370
  template <typename T>
371
1.67k
  bool equals(const llvh::ArrayRef<T> &other) const {
372
1.67k
    if (isASCII()) {
373
1.40k
      return stringRefEquals(ASCIIRef(castToCharPtr(), length()), other);
374
1.40k
    }
375
263
    return stringRefEquals(UTF16Ref(castToChar16Ptr(), length()), other);
376
1.67k
  }
bool hermes::vm::StringView::equals<char>(llvh::ArrayRef<char> const&) const
Line
Count
Source
371
704
  bool equals(const llvh::ArrayRef<T> &other) const {
372
704
    if (isASCII()) {
373
704
      return stringRefEquals(ASCIIRef(castToCharPtr(), length()), other);
374
704
    }
375
0
    return stringRefEquals(UTF16Ref(castToChar16Ptr(), length()), other);
376
704
  }
bool hermes::vm::StringView::equals<char16_t>(llvh::ArrayRef<char16_t> const&) const
Line
Count
Source
371
966
  bool equals(const llvh::ArrayRef<T> &other) const {
372
966
    if (isASCII()) {
373
703
      return stringRefEquals(ASCIIRef(castToCharPtr(), length()), other);
374
703
    }
375
263
    return stringRefEquals(UTF16Ref(castToChar16Ptr(), length()), other);
376
966
  }
Unexecuted instantiation: bool hermes::vm::StringView::equals<unsigned char>(llvh::ArrayRef<unsigned char> const&) const
377
378
36
  TwineChar16 toTwine() const {
379
36
    if (isASCII()) {
380
36
      return TwineChar16(llvh::StringRef(castToCharPtr(), length()));
381
36
    }
382
0
    return TwineChar16(UTF16Ref(castToChar16Ptr(), length()));
383
36
  }
384
385
36
  operator TwineChar16() const {
386
36
    return toTwine();
387
36
  }
388
389
 private:
390
  /// These constructors should only be called from self or from
391
  /// StringPrimitive.
392
393
  // Create a StringView from a StringPrimitive
394
  explicit StringView(Handle<StringPrimitive> str)
395
534k
      : startIndex_(0),
396
534k
        isHandle_(true),
397
534k
        isASCII_(str->isASCII()),
398
534k
        length_(str->getStringLength()) {
399
534k
    new (strPrim_.buffer) Handle<StringPrimitive>(str);
400
534k
  }
401
402
  /// Create a StringView from lazy identifier.
403
  explicit StringView(ASCIIRef asciiRef)
404
488k
      : nonManagedStringPtr_(asciiRef.data()),
405
488k
        startIndex_(0),
406
488k
        isHandle_(false),
407
488k
        isASCII_(true),
408
488k
        length_(asciiRef.size()) {}
409
  explicit StringView(UTF16Ref utf16Ref)
410
0
      : nonManagedStringPtr_(utf16Ref.data()),
411
0
        startIndex_(0),
412
0
        isHandle_(false),
413
0
        isASCII_(false),
414
0
        length_(utf16Ref.size()) {}
415
416
  /// Helper function for getUTF16Ref and copyUTF16String.
417
  UTF16Ref getUTF16Ref(
418
      llvh::SmallVectorImpl<char16_t> &allocator,
419
      bool alwaysCopy) const;
420
421
581k
  Handle<StringPrimitive> &strPrim() {
422
581k
    assert(isHandle_ && "must be a handle");
423
    // Need to go through a variable to placate gcc4.9.
424
581k
    char *buffer = strPrim_.buffer;
425
581k
    return *reinterpret_cast<Handle<StringPrimitive> *>(buffer);
426
581k
  }
427
709k
  const Handle<StringPrimitive> &strPrim() const {
428
709k
    assert(isHandle_ && "must be a handle");
429
    // Need to go through a variable to placate gcc4.9.
430
709k
    const char *buffer = strPrim_.buffer;
431
709k
    return *reinterpret_cast<const Handle<StringPrimitive> *>(buffer);
432
709k
  }
433
};
434
435
llvh::raw_ostream &operator<<(llvh::raw_ostream &os, const StringView &sv);
436
437
} // namespace vm
438
} // namespace hermes
439
440
#pragma GCC diagnostic pop
441
#endif // HERMES_VM_STRINGVIEW_H