Line data Source code
1 : // Copyright 2014 the V8 project authors. All rights reserved.
2 : // Redistribution and use in source and binary forms, with or without
3 : // modification, are permitted provided that the following conditions are
4 : // met:
5 : //
6 : // * Redistributions of source code must retain the above copyright
7 : // notice, this list of conditions and the following disclaimer.
8 : // * Redistributions in binary form must reproduce the above
9 : // copyright notice, this list of conditions and the following
10 : // disclaimer in the documentation and/or other materials provided
11 : // with the distribution.
12 : // * Neither the name of Google Inc. nor the names of its
13 : // contributors may be used to endorse or promote products derived
14 : // from this software without specific prior written permission.
15 : //
16 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 :
28 : #ifndef V8_AST_AST_VALUE_FACTORY_H_
29 : #define V8_AST_AST_VALUE_FACTORY_H_
30 :
31 : #include "src/base/hashmap.h"
32 : #include "src/conversions.h"
33 : #include "src/factory.h"
34 : #include "src/globals.h"
35 : #include "src/isolate.h"
36 : #include "src/utils.h"
37 :
38 : // Ast(Raw|Cons)String, AstValue and AstValueFactory are for storing strings and
39 : // values independent of the V8 heap and internalizing them later. During
40 : // parsing, they are created and stored outside the heap, in AstValueFactory.
41 : // After parsing, the strings and values are internalized (moved into the V8
42 : // heap).
43 : namespace v8 {
44 : namespace internal {
45 :
46 : class AstRawString final : public ZoneObject {
47 : public:
48 75227716 : bool IsEmpty() const { return literal_bytes_.length() == 0; }
49 173870568 : int length() const {
50 259094746 : return is_one_byte() ? literal_bytes_.length()
51 259243250 : : literal_bytes_.length() / 2;
52 : }
53 : bool AsArrayIndex(uint32_t* index) const;
54 : bool IsOneByteEqualTo(const char* data) const;
55 : uint16_t FirstCharacter() const;
56 :
57 : void Internalize(Isolate* isolate);
58 :
59 : // Access the physical representation:
60 : bool is_one_byte() const { return is_one_byte_; }
61 0 : int byte_length() const { return literal_bytes_.length(); }
62 2473106 : const unsigned char* raw_data() const {
63 87784755 : return literal_bytes_.start();
64 : }
65 :
66 : // For storing AstRawStrings in a hash map.
67 : uint32_t hash() const {
68 : return hash_;
69 : }
70 :
71 : // This function can be called after internalizing.
72 : V8_INLINE Handle<String> string() const {
73 : DCHECK_NOT_NULL(string_);
74 : DCHECK(has_string_);
75 : return Handle<String>(string_);
76 : }
77 :
78 : private:
79 : friend class AstRawStringInternalizationKey;
80 : friend class AstStringConstants;
81 : friend class AstValueFactory;
82 :
83 : // Members accessed only by the AstValueFactory & related classes:
84 : static bool Compare(void* a, void* b);
85 : AstRawString(bool is_one_byte, const Vector<const byte>& literal_bytes,
86 : uint32_t hash)
87 : : next_(nullptr),
88 : literal_bytes_(literal_bytes),
89 : hash_(hash),
90 169849802 : is_one_byte_(is_one_byte) {}
91 : AstRawString* next() {
92 : DCHECK(!has_string_);
93 : return next_;
94 : }
95 : AstRawString** next_location() {
96 : DCHECK(!has_string_);
97 : return &next_;
98 : }
99 :
100 : void set_string(Handle<String> string) {
101 : DCHECK(!string.is_null());
102 : DCHECK(!has_string_);
103 43860720 : string_ = string.location();
104 : #ifdef DEBUG
105 : has_string_ = true;
106 : #endif
107 : }
108 :
109 : // {string_} is stored as String** instead of a Handle<String> so it can be
110 : // stored in a union with {next_}.
111 : union {
112 : AstRawString* next_;
113 : String** string_;
114 : };
115 :
116 : Vector<const byte> literal_bytes_; // Memory owned by Zone.
117 : uint32_t hash_;
118 : bool is_one_byte_;
119 : #ifdef DEBUG
120 : // (Debug-only:) Verify the object life-cylce: Some functions may only be
121 : // called after internalization (that is, after a v8::internal::String has
122 : // been set); some only before.
123 : bool has_string_ = false;
124 : #endif
125 : };
126 :
127 : class AstConsString final : public ZoneObject {
128 : public:
129 33339846 : AstConsString* AddString(Zone* zone, const AstRawString* s) {
130 19686028 : if (s->IsEmpty()) return this;
131 13653818 : if (!IsEmpty()) {
132 : // We're putting the new string to the head of the list, meaning
133 : // the string segments will be in reverse order.
134 5938986 : Segment* tmp = new (zone->New(sizeof(Segment))) Segment;
135 5938986 : *tmp = segment_;
136 5938986 : segment_.next = tmp;
137 : }
138 13653818 : segment_.string = s;
139 13653818 : return this;
140 : }
141 :
142 : bool IsEmpty() const {
143 : DCHECK_IMPLIES(segment_.string == nullptr, segment_.next == nullptr);
144 : DCHECK_IMPLIES(segment_.string != nullptr, !segment_.string->IsEmpty());
145 : return segment_.string == nullptr;
146 : }
147 :
148 : void Internalize(Isolate* isolate);
149 :
150 : V8_INLINE Handle<String> string() const {
151 : DCHECK_NOT_NULL(string_);
152 : return Handle<String>(string_);
153 : }
154 :
155 : private:
156 : friend class AstValueFactory;
157 :
158 17714991 : AstConsString() : next_(nullptr), segment_({nullptr, nullptr}) {}
159 :
160 : AstConsString* next() const { return next_; }
161 : AstConsString** next_location() { return &next_; }
162 :
163 : // {string_} is stored as String** instead of a Handle<String> so it can be
164 : // stored in a union with {next_}.
165 17232962 : void set_string(Handle<String> string) { string_ = string.location(); }
166 : union {
167 : AstConsString* next_;
168 : String** string_;
169 : };
170 :
171 : struct Segment {
172 : const AstRawString* string;
173 : AstConsString::Segment* next;
174 : };
175 : Segment segment_;
176 : };
177 :
178 : enum class AstSymbol : uint8_t { kHomeObjectSymbol };
179 :
180 : // AstValue is either a string, a symbol, a number, a string array, a boolean,
181 : // or a special value (null, undefined, the hole).
182 : class AstValue : public ZoneObject {
183 : public:
184 5824426 : bool IsString() const {
185 5824426 : return type_ == STRING;
186 : }
187 :
188 : bool IsSymbol() const { return type_ == SYMBOL; }
189 :
190 14345314 : bool IsNumber() const { return IsSmi() || IsHeapNumber(); }
191 :
192 : bool ContainsDot() const {
193 3431887 : return type_ == NUMBER_WITH_DOT || type_ == SMI_WITH_DOT;
194 : }
195 :
196 3990425 : const AstRawString* AsString() const {
197 16942834 : CHECK_EQ(STRING, type_);
198 16942834 : return string_;
199 : }
200 :
201 : AstSymbol AsSymbol() const {
202 2200 : CHECK_EQ(SYMBOL, type_);
203 2200 : return symbol_;
204 : }
205 :
206 3268998 : double AsNumber() const {
207 3268998 : if (IsHeapNumber()) return number_;
208 3093302 : if (IsSmi()) return smi_;
209 0 : UNREACHABLE();
210 : return 0;
211 : }
212 :
213 5697565 : Smi* AsSmi() const {
214 5697565 : CHECK(IsSmi());
215 11395130 : return Smi::FromInt(smi_);
216 : }
217 :
218 2524732 : bool ToUint32(uint32_t* value) const {
219 2524732 : if (IsSmi()) {
220 15972 : int num = smi_;
221 15972 : if (num < 0) return false;
222 15972 : *value = static_cast<uint32_t>(num);
223 15972 : return true;
224 : }
225 2508760 : if (IsHeapNumber()) {
226 42 : return DoubleToUint32IfEqualToSelf(number_, value);
227 : }
228 : return false;
229 : }
230 :
231 : bool EqualsString(const AstRawString* string) const {
232 4681468 : return type_ == STRING && string_ == string;
233 : }
234 :
235 : bool IsPropertyName() const;
236 :
237 : bool BooleanValue() const;
238 :
239 26874283 : bool IsSmi() const { return type_ == SMI || type_ == SMI_WITH_DOT; }
240 : bool IsHeapNumber() const {
241 8103140 : return type_ == NUMBER || type_ == NUMBER_WITH_DOT;
242 : }
243 11318372 : bool IsFalse() const { return type_ == BOOLEAN && !bool_; }
244 11555451 : bool IsTrue() const { return type_ == BOOLEAN && bool_; }
245 : bool IsUndefined() const { return type_ == UNDEFINED; }
246 176 : bool IsTheHole() const { return type_ == THE_HOLE; }
247 1156723 : bool IsNull() const { return type_ == NULL_TYPE; }
248 :
249 : void Internalize(Isolate* isolate);
250 :
251 : // Can be called after Internalize has been called.
252 : V8_INLINE Handle<Object> value() const {
253 30734304 : if (type_ == STRING) {
254 12286167 : return string_->string();
255 : }
256 : DCHECK_NOT_NULL(value_);
257 18448137 : return Handle<Object>(value_);
258 : }
259 : AstValue* next() const { return next_; }
260 57350888 : void set_next(AstValue* next) { next_ = next; }
261 :
262 : private:
263 20710782 : void set_value(Handle<Object> object) { value_ = object.location(); }
264 : friend class AstValueFactory;
265 :
266 : enum Type {
267 : STRING,
268 : SYMBOL,
269 : NUMBER,
270 : NUMBER_WITH_DOT,
271 : SMI,
272 : SMI_WITH_DOT,
273 : BOOLEAN,
274 : NULL_TYPE,
275 : UNDEFINED,
276 : THE_HOLE
277 : };
278 :
279 36639061 : explicit AstValue(const AstRawString* s) : type_(STRING), next_(nullptr) {
280 36639061 : string_ = s;
281 : }
282 :
283 6119 : explicit AstValue(AstSymbol symbol) : type_(SYMBOL), next_(nullptr) {
284 6119 : symbol_ = symbol;
285 : }
286 :
287 5080760 : explicit AstValue(double n, bool with_dot) : next_(nullptr) {
288 : int int_value;
289 5080760 : if (DoubleToSmiInteger(n, &int_value)) {
290 2847604 : type_ = with_dot ? SMI_WITH_DOT : SMI;
291 2847604 : smi_ = int_value;
292 : } else {
293 2233156 : type_ = with_dot ? NUMBER_WITH_DOT : NUMBER;
294 2233156 : number_ = n;
295 : }
296 5080760 : }
297 :
298 14085141 : AstValue(Type t, int i) : type_(t), next_(nullptr) {
299 : DCHECK(type_ == SMI);
300 14085141 : smi_ = i;
301 : }
302 :
303 655471 : explicit AstValue(bool b) : type_(BOOLEAN), next_(nullptr) { bool_ = b; }
304 :
305 884336 : explicit AstValue(Type t) : type_(t), next_(nullptr) {
306 : DCHECK(t == NULL_TYPE || t == UNDEFINED || t == THE_HOLE);
307 : }
308 :
309 : Type type_;
310 :
311 : // {value_} is stored as Object** instead of a Handle<Object> so it can be
312 : // stored in a union with {next_}.
313 : union {
314 : Object** value_; // if internalized
315 : AstValue* next_; // if !internalized
316 : };
317 :
318 : // Uninternalized value.
319 : union {
320 : const AstRawString* string_;
321 : double number_;
322 : int smi_;
323 : bool bool_;
324 : AstSymbol symbol_;
325 : };
326 : };
327 :
328 : // For generating constants.
329 : #define STRING_CONSTANTS(F) \
330 : F(anonymous_function, "(anonymous function)") \
331 : F(arguments, "arguments") \
332 : F(async, "async") \
333 : F(await, "await") \
334 : F(boolean, "boolean") \
335 : F(constructor, "constructor") \
336 : F(default, "default") \
337 : F(done, "done") \
338 : F(dot, ".") \
339 : F(dot_for, ".for") \
340 : F(dot_generator_object, ".generator_object") \
341 : F(dot_iterator, ".iterator") \
342 : F(dot_result, ".result") \
343 : F(dot_switch_tag, ".switch_tag") \
344 : F(dot_catch, ".catch") \
345 : F(empty, "") \
346 : F(eval, "eval") \
347 : F(function, "function") \
348 : F(get_space, "get ") \
349 : F(length, "length") \
350 : F(let, "let") \
351 : F(name, "name") \
352 : F(native, "native") \
353 : F(new_target, ".new.target") \
354 : F(next, "next") \
355 : F(number, "number") \
356 : F(object, "object") \
357 : F(proto, "__proto__") \
358 : F(prototype, "prototype") \
359 : F(return, "return") \
360 : F(set_space, "set ") \
361 : F(star_default_star, "*default*") \
362 : F(string, "string") \
363 : F(symbol, "symbol") \
364 : F(this, "this") \
365 : F(this_function, ".this_function") \
366 : F(throw, "throw") \
367 : F(undefined, "undefined") \
368 : F(use_asm, "use asm") \
369 : F(use_strict, "use strict") \
370 : F(value, "value")
371 :
372 59285 : class AstStringConstants final {
373 : public:
374 60782 : AstStringConstants(Isolate* isolate, uint32_t hash_seed)
375 : : zone_(isolate->allocator(), ZONE_NAME),
376 : string_table_(AstRawString::Compare),
377 121564 : hash_seed_(hash_seed) {
378 : DCHECK(ThreadId::Current().Equals(isolate->thread_id()));
379 : #define F(name, str) \
380 : { \
381 : const char* data = str; \
382 : Vector<const uint8_t> literal(reinterpret_cast<const uint8_t*>(data), \
383 : static_cast<int>(strlen(data))); \
384 : uint32_t hash = StringHasher::HashSequentialString<uint8_t>( \
385 : literal.start(), literal.length(), hash_seed_); \
386 : name##_string_ = new (&zone_) AstRawString(true, literal, hash); \
387 : /* The Handle returned by the factory is located on the roots */ \
388 : /* array, not on the temporary HandleScope, so this is safe. */ \
389 : name##_string_->set_string(isolate->factory()->name##_string()); \
390 : base::HashMap::Entry* entry = \
391 : string_table_.InsertNew(name##_string_, name##_string_->hash()); \
392 : DCHECK(entry->value == nullptr); \
393 : entry->value = reinterpret_cast<void*>(1); \
394 : }
395 5044906 : STRING_CONSTANTS(F)
396 : #undef F
397 60782 : }
398 :
399 : #define F(name, str) \
400 : const AstRawString* name##_string() const { return name##_string_; }
401 : STRING_CONSTANTS(F)
402 : #undef F
403 :
404 : uint32_t hash_seed() const { return hash_seed_; }
405 : const base::CustomMatcherHashMap* string_table() const {
406 : return &string_table_;
407 : }
408 :
409 : private:
410 : Zone zone_;
411 : base::CustomMatcherHashMap string_table_;
412 : uint32_t hash_seed_;
413 :
414 : #define F(name, str) AstRawString* name##_string_;
415 : STRING_CONSTANTS(F)
416 : #undef F
417 :
418 : DISALLOW_COPY_AND_ASSIGN(AstStringConstants);
419 : };
420 :
421 : #define OTHER_CONSTANTS(F) \
422 : F(true_value) \
423 : F(false_value) \
424 : F(null_value) \
425 : F(undefined_value) \
426 : F(the_hole_value)
427 :
428 : class AstValueFactory {
429 : public:
430 3429793 : AstValueFactory(Zone* zone, const AstStringConstants* string_constants,
431 : uint32_t hash_seed)
432 : : string_table_(string_constants->string_table()),
433 : values_(nullptr),
434 : strings_(nullptr),
435 : strings_end_(&strings_),
436 : cons_strings_(nullptr),
437 : cons_strings_end_(&cons_strings_),
438 : string_constants_(string_constants),
439 : empty_cons_string_(nullptr),
440 : zone_(zone),
441 6859576 : hash_seed_(hash_seed) {
442 : #define F(name) name##_ = nullptr;
443 3429783 : OTHER_CONSTANTS(F)
444 : #undef F
445 : DCHECK_EQ(hash_seed, string_constants->hash_seed());
446 3429783 : std::fill(smis_, smis_ + arraysize(smis_), nullptr);
447 : std::fill(one_character_strings_,
448 : one_character_strings_ + arraysize(one_character_strings_),
449 3429783 : nullptr);
450 3429783 : empty_cons_string_ = NewConsString();
451 3429790 : }
452 :
453 : Zone* zone() const { return zone_; }
454 :
455 : const AstRawString* GetOneByteString(Vector<const uint8_t> literal) {
456 143518055 : return GetOneByteStringInternal(literal);
457 : }
458 817419 : const AstRawString* GetOneByteString(const char* string) {
459 : return GetOneByteString(Vector<const uint8_t>(
460 817419 : reinterpret_cast<const uint8_t*>(string), StrLength(string)));
461 : }
462 : const AstRawString* GetTwoByteString(Vector<const uint16_t> literal) {
463 66191 : return GetTwoByteStringInternal(literal);
464 : }
465 : const AstRawString* GetString(Handle<String> literal);
466 : V8_EXPORT_PRIVATE AstConsString* NewConsString();
467 : AstConsString* NewConsString(const AstRawString* str);
468 : AstConsString* NewConsString(const AstRawString* str1,
469 : const AstRawString* str2);
470 :
471 : V8_EXPORT_PRIVATE void Internalize(Isolate* isolate);
472 :
473 : #define F(name, str) \
474 : const AstRawString* name##_string() const { \
475 : return string_constants_->name##_string(); \
476 : }
477 422692695 : STRING_CONSTANTS(F)
478 : #undef F
479 1133 : const AstConsString* empty_cons_string() const { return empty_cons_string_; }
480 :
481 : V8_EXPORT_PRIVATE const AstValue* NewString(const AstRawString* string);
482 : // A JavaScript symbol (ECMA-262 edition 6).
483 : const AstValue* NewSymbol(AstSymbol symbol);
484 : V8_EXPORT_PRIVATE const AstValue* NewNumber(double number,
485 : bool with_dot = false);
486 : const AstValue* NewSmi(uint32_t number);
487 : const AstValue* NewBoolean(bool b);
488 : const AstValue* NewStringList(ZoneList<const AstRawString*>* strings);
489 : const AstValue* NewNull();
490 : const AstValue* NewUndefined();
491 : const AstValue* NewTheHole();
492 :
493 : private:
494 : static const uint32_t kMaxCachedSmi = 1 << 10;
495 :
496 : STATIC_ASSERT(kMaxCachedSmi <= Smi::kMaxValue);
497 :
498 : AstValue* AddValue(AstValue* value) {
499 57350888 : value->set_next(values_);
500 57350888 : values_ = value;
501 : return value;
502 : }
503 : AstRawString* AddString(AstRawString* string) {
504 41441090 : *strings_end_ = string;
505 41441090 : strings_end_ = string->next_location();
506 : return string;
507 : }
508 : AstConsString* AddConsString(AstConsString* string) {
509 17714991 : *cons_strings_end_ = string;
510 17714991 : cons_strings_end_ = string->next_location();
511 : return string;
512 : }
513 : void ResetStrings() {
514 5591153 : strings_ = nullptr;
515 5591153 : strings_end_ = &strings_;
516 5591153 : cons_strings_ = nullptr;
517 5591153 : cons_strings_end_ = &cons_strings_;
518 : }
519 : V8_EXPORT_PRIVATE AstRawString* GetOneByteStringInternal(
520 : Vector<const uint8_t> literal);
521 : AstRawString* GetTwoByteStringInternal(Vector<const uint16_t> literal);
522 : AstRawString* GetString(uint32_t hash, bool is_one_byte,
523 : Vector<const byte> literal_bytes);
524 :
525 : // All strings are copied here, one after another (no NULLs inbetween).
526 : base::CustomMatcherHashMap string_table_;
527 : // For keeping track of all AstValues and AstRawStrings we've created (so that
528 : // they can be internalized later).
529 : AstValue* values_;
530 :
531 : // We need to keep track of strings_ in order since cons strings require their
532 : // members to be internalized first.
533 : AstRawString* strings_;
534 : AstRawString** strings_end_;
535 : AstConsString* cons_strings_;
536 : AstConsString** cons_strings_end_;
537 :
538 : // Holds constant string values which are shared across the isolate.
539 : const AstStringConstants* string_constants_;
540 : const AstConsString* empty_cons_string_;
541 :
542 : // Caches for faster access: small numbers, one character lowercase strings
543 : // (for minified code).
544 : AstValue* smis_[kMaxCachedSmi + 1];
545 : AstRawString* one_character_strings_[26];
546 :
547 : Zone* zone_;
548 :
549 : uint32_t hash_seed_;
550 :
551 : #define F(name) AstValue* name##_;
552 : OTHER_CONSTANTS(F)
553 : #undef F
554 : };
555 : } // namespace internal
556 : } // namespace v8
557 :
558 : #undef STRING_CONSTANTS
559 : #undef OTHER_CONSTANTS
560 :
561 : #endif // V8_AST_AST_VALUE_FACTORY_H_
|