/src/hermes/external/llvh/include/llvh/ADT/Twine.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | |
10 | | #ifndef LLVM_ADT_TWINE_H |
11 | | #define LLVM_ADT_TWINE_H |
12 | | |
13 | | #include "llvh/ADT/SmallVector.h" |
14 | | #include "llvh/ADT/StringRef.h" |
15 | | #include "llvh/Support/ErrorHandling.h" |
16 | | #include <cassert> |
17 | | #include <cstdint> |
18 | | #include <string> |
19 | | |
20 | | namespace llvh { |
21 | | |
22 | | class formatv_object_base; |
23 | | class raw_ostream; |
24 | | |
25 | | /// Twine - A lightweight data structure for efficiently representing the |
26 | | /// concatenation of temporary values as strings. |
27 | | /// |
28 | | /// A Twine is a kind of rope, it represents a concatenated string using a |
29 | | /// binary-tree, where the string is the preorder of the nodes. Since the |
30 | | /// Twine can be efficiently rendered into a buffer when its result is used, |
31 | | /// it avoids the cost of generating temporary values for intermediate string |
32 | | /// results -- particularly in cases when the Twine result is never |
33 | | /// required. By explicitly tracking the type of leaf nodes, we can also avoid |
34 | | /// the creation of temporary strings for conversions operations (such as |
35 | | /// appending an integer to a string). |
36 | | /// |
37 | | /// A Twine is not intended for use directly and should not be stored, its |
38 | | /// implementation relies on the ability to store pointers to temporary stack |
39 | | /// objects which may be deallocated at the end of a statement. Twines should |
40 | | /// only be used accepted as const references in arguments, when an API wishes |
41 | | /// to accept possibly-concatenated strings. |
42 | | /// |
43 | | /// Twines support a special 'null' value, which always concatenates to form |
44 | | /// itself, and renders as an empty string. This can be returned from APIs to |
45 | | /// effectively nullify any concatenations performed on the result. |
46 | | /// |
47 | | /// \b Implementation |
48 | | /// |
49 | | /// Given the nature of a Twine, it is not possible for the Twine's |
50 | | /// concatenation method to construct interior nodes; the result must be |
51 | | /// represented inside the returned value. For this reason a Twine object |
52 | | /// actually holds two values, the left- and right-hand sides of a |
53 | | /// concatenation. We also have nullary Twine objects, which are effectively |
54 | | /// sentinel values that represent empty strings. |
55 | | /// |
56 | | /// Thus, a Twine can effectively have zero, one, or two children. The \see |
57 | | /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for |
58 | | /// testing the number of children. |
59 | | /// |
60 | | /// We maintain a number of invariants on Twine objects (FIXME: Why): |
61 | | /// - Nullary twines are always represented with their Kind on the left-hand |
62 | | /// side, and the Empty kind on the right-hand side. |
63 | | /// - Unary twines are always represented with the value on the left-hand |
64 | | /// side, and the Empty kind on the right-hand side. |
65 | | /// - If a Twine has another Twine as a child, that child should always be |
66 | | /// binary (otherwise it could have been folded into the parent). |
67 | | /// |
68 | | /// These invariants are check by \see isValid(). |
69 | | /// |
70 | | /// \b Efficiency Considerations |
71 | | /// |
72 | | /// The Twine is designed to yield efficient and small code for common |
73 | | /// situations. For this reason, the concat() method is inlined so that |
74 | | /// concatenations of leaf nodes can be optimized into stores directly into a |
75 | | /// single stack allocated object. |
76 | | /// |
77 | | /// In practice, not all compilers can be trusted to optimize concat() fully, |
78 | | /// so we provide two additional methods (and accompanying operator+ |
79 | | /// overloads) to guarantee that particularly important cases (cstring plus |
80 | | /// StringRef) codegen as desired. |
81 | | class Twine { |
82 | | /// NodeKind - Represent the type of an argument. |
83 | | enum NodeKind : unsigned char { |
84 | | /// An empty string; the result of concatenating anything with it is also |
85 | | /// empty. |
86 | | NullKind, |
87 | | |
88 | | /// The empty string. |
89 | | EmptyKind, |
90 | | |
91 | | /// A pointer to a Twine instance. |
92 | | TwineKind, |
93 | | |
94 | | /// A pointer to a C string instance. |
95 | | CStringKind, |
96 | | |
97 | | /// A pointer to an std::string instance. |
98 | | StdStringKind, |
99 | | |
100 | | /// A pointer to a StringRef instance. |
101 | | StringRefKind, |
102 | | |
103 | | /// A pointer to a SmallString instance. |
104 | | SmallStringKind, |
105 | | |
106 | | /// A pointer to a formatv_object_base instance. |
107 | | FormatvObjectKind, |
108 | | |
109 | | /// A char value, to render as a character. |
110 | | CharKind, |
111 | | |
112 | | /// An unsigned int value, to render as an unsigned decimal integer. |
113 | | DecUIKind, |
114 | | |
115 | | /// An int value, to render as a signed decimal integer. |
116 | | DecIKind, |
117 | | |
118 | | /// A pointer to an unsigned long value, to render as an unsigned decimal |
119 | | /// integer. |
120 | | DecULKind, |
121 | | |
122 | | /// A pointer to a long value, to render as a signed decimal integer. |
123 | | DecLKind, |
124 | | |
125 | | /// A pointer to an unsigned long long value, to render as an unsigned |
126 | | /// decimal integer. |
127 | | DecULLKind, |
128 | | |
129 | | /// A pointer to a long long value, to render as a signed decimal integer. |
130 | | DecLLKind, |
131 | | |
132 | | /// A pointer to a uint64_t value, to render as an unsigned hexadecimal |
133 | | /// integer. |
134 | | UHexKind |
135 | | }; |
136 | | |
137 | | union Child |
138 | | { |
139 | | const Twine *twine; |
140 | | const char *cString; |
141 | | const std::string *stdString; |
142 | | const StringRef *stringRef; |
143 | | const SmallVectorImpl<char> *smallString; |
144 | | const formatv_object_base *formatvObject; |
145 | | char character; |
146 | | unsigned int decUI; |
147 | | int decI; |
148 | | const unsigned long *decUL; |
149 | | const long *decL; |
150 | | const unsigned long long *decULL; |
151 | | const long long *decLL; |
152 | | const uint64_t *uHex; |
153 | | }; |
154 | | |
155 | | /// LHS - The prefix in the concatenation, which may be uninitialized for |
156 | | /// Null or Empty kinds. |
157 | | Child LHS; |
158 | | |
159 | | /// RHS - The suffix in the concatenation, which may be uninitialized for |
160 | | /// Null or Empty kinds. |
161 | | Child RHS; |
162 | | |
163 | | /// LHSKind - The NodeKind of the left hand side, \see getLHSKind(). |
164 | | NodeKind LHSKind = EmptyKind; |
165 | | |
166 | | /// RHSKind - The NodeKind of the right hand side, \see getRHSKind(). |
167 | | NodeKind RHSKind = EmptyKind; |
168 | | |
169 | | /// Construct a nullary twine; the kind must be NullKind or EmptyKind. |
170 | 0 | explicit Twine(NodeKind Kind) : LHSKind(Kind) { |
171 | 0 | assert(isNullary() && "Invalid kind!"); |
172 | 0 | } |
173 | | |
174 | | /// Construct a binary twine. |
175 | | explicit Twine(const Twine &LHS, const Twine &RHS) |
176 | 0 | : LHSKind(TwineKind), RHSKind(TwineKind) { |
177 | 0 | this->LHS.twine = &LHS; |
178 | 0 | this->RHS.twine = &RHS; |
179 | 0 | assert(isValid() && "Invalid twine!"); |
180 | 0 | } |
181 | | |
182 | | /// Construct a twine from explicit values. |
183 | | explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind) |
184 | 64 | : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) { |
185 | 64 | assert(isValid() && "Invalid twine!"); |
186 | 64 | } |
187 | | |
188 | | /// Check for the null twine. |
189 | 808 | bool isNull() const { |
190 | 808 | return getLHSKind() == NullKind; |
191 | 808 | } |
192 | | |
193 | | /// Check for the empty twine. |
194 | 808 | bool isEmpty() const { |
195 | 808 | return getLHSKind() == EmptyKind; |
196 | 808 | } |
197 | | |
198 | | /// Check if this is a nullary twine (null or empty). |
199 | 692 | bool isNullary() const { |
200 | 692 | return isNull() || isEmpty(); |
201 | 692 | } |
202 | | |
203 | | /// Check if this is a unary twine. |
204 | 116 | bool isUnary() const { |
205 | 116 | return getRHSKind() == EmptyKind && !isNullary(); |
206 | 116 | } |
207 | | |
208 | | /// Check if this is a binary twine. |
209 | 39 | bool isBinary() const { |
210 | 39 | return getLHSKind() != NullKind && getRHSKind() != EmptyKind; |
211 | 39 | } |
212 | | |
213 | | /// Check if this is a valid twine (satisfying the invariants on |
214 | | /// order and number of arguments). |
215 | 615 | bool isValid() const { |
216 | | // Nullary twines always have Empty on the RHS. |
217 | 615 | if (isNullary() && getRHSKind() != EmptyKind) |
218 | 0 | return false; |
219 | | |
220 | | // Null should never appear on the RHS. |
221 | 615 | if (getRHSKind() == NullKind) |
222 | 0 | return false; |
223 | | |
224 | | // The RHS cannot be non-empty if the LHS is empty. |
225 | 615 | if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind) |
226 | 0 | return false; |
227 | | |
228 | | // A twine child should always be binary. |
229 | 615 | if (getLHSKind() == TwineKind && |
230 | 615 | !LHS.twine->isBinary()) |
231 | 0 | return false; |
232 | 615 | if (getRHSKind() == TwineKind && |
233 | 615 | !RHS.twine->isBinary()) |
234 | 0 | return false; |
235 | | |
236 | 615 | return true; |
237 | 615 | } |
238 | | |
239 | | /// Get the NodeKind of the left-hand side. |
240 | 3.95k | NodeKind getLHSKind() const { return LHSKind; } |
241 | | |
242 | | /// Get the NodeKind of the right-hand side. |
243 | 3.07k | NodeKind getRHSKind() const { return RHSKind; } |
244 | | |
245 | | /// Print one child from a twine. |
246 | | void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const; |
247 | | |
248 | | /// Print the representation of one child from a twine. |
249 | | void printOneChildRepr(raw_ostream &OS, Child Ptr, |
250 | | NodeKind Kind) const; |
251 | | |
252 | | public: |
253 | | /// @name Constructors |
254 | | /// @{ |
255 | | |
256 | | /// Construct from an empty string. |
257 | 0 | /*implicit*/ Twine() { |
258 | 0 | assert(isValid() && "Invalid twine!"); |
259 | 0 | } |
260 | | |
261 | | Twine(const Twine &) = default; |
262 | | |
263 | | /// Construct from a C string. |
264 | | /// |
265 | | /// We take care here to optimize "" into the empty twine -- this will be |
266 | | /// optimized out for string constants. This allows Twine arguments have |
267 | | /// default "" values, without introducing unnecessary string constants. |
268 | 294 | /*implicit*/ Twine(const char *Str) { |
269 | 294 | if (Str[0] != '\0') { |
270 | 281 | LHS.cString = Str; |
271 | 281 | LHSKind = CStringKind; |
272 | 281 | } else |
273 | 13 | LHSKind = EmptyKind; |
274 | | |
275 | 294 | assert(isValid() && "Invalid twine!"); |
276 | 294 | } |
277 | | |
278 | | /// Construct from an std::string. |
279 | 8 | /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) { |
280 | 8 | LHS.stdString = &Str; |
281 | 8 | assert(isValid() && "Invalid twine!"); |
282 | 8 | } |
283 | | |
284 | | /// Construct from a StringRef. |
285 | 249 | /*implicit*/ Twine(const StringRef &Str) : LHSKind(StringRefKind) { |
286 | 249 | LHS.stringRef = &Str; |
287 | 249 | assert(isValid() && "Invalid twine!"); |
288 | 249 | } |
289 | | |
290 | | /// Construct from a SmallString. |
291 | | /*implicit*/ Twine(const SmallVectorImpl<char> &Str) |
292 | 0 | : LHSKind(SmallStringKind) { |
293 | 0 | LHS.smallString = &Str; |
294 | 0 | assert(isValid() && "Invalid twine!"); |
295 | 0 | } |
296 | | |
297 | | /// Construct from a formatv_object_base. |
298 | | /*implicit*/ Twine(const formatv_object_base &Fmt) |
299 | 0 | : LHSKind(FormatvObjectKind) { |
300 | 0 | LHS.formatvObject = &Fmt; |
301 | 0 | assert(isValid() && "Invalid twine!"); |
302 | 0 | } |
303 | | |
304 | | /// Construct from a char. |
305 | 0 | explicit Twine(char Val) : LHSKind(CharKind) { |
306 | 0 | LHS.character = Val; |
307 | 0 | } |
308 | | |
309 | | /// Construct from a signed char. |
310 | 0 | explicit Twine(signed char Val) : LHSKind(CharKind) { |
311 | 0 | LHS.character = static_cast<char>(Val); |
312 | 0 | } |
313 | | |
314 | | /// Construct from an unsigned char. |
315 | 0 | explicit Twine(unsigned char Val) : LHSKind(CharKind) { |
316 | 0 | LHS.character = static_cast<char>(Val); |
317 | 0 | } |
318 | | |
319 | | /// Construct a twine to print \p Val as an unsigned decimal integer. |
320 | 0 | explicit Twine(unsigned Val) : LHSKind(DecUIKind) { |
321 | 0 | LHS.decUI = Val; |
322 | 0 | } |
323 | | |
324 | | /// Construct a twine to print \p Val as a signed decimal integer. |
325 | 26 | explicit Twine(int Val) : LHSKind(DecIKind) { |
326 | 26 | LHS.decI = Val; |
327 | 26 | } |
328 | | |
329 | | /// Construct a twine to print \p Val as an unsigned decimal integer. |
330 | 0 | explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) { |
331 | 0 | LHS.decUL = &Val; |
332 | 0 | } |
333 | | |
334 | | /// Construct a twine to print \p Val as a signed decimal integer. |
335 | 0 | explicit Twine(const long &Val) : LHSKind(DecLKind) { |
336 | 0 | LHS.decL = &Val; |
337 | 0 | } |
338 | | |
339 | | /// Construct a twine to print \p Val as an unsigned decimal integer. |
340 | 0 | explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) { |
341 | 0 | LHS.decULL = &Val; |
342 | 0 | } |
343 | | |
344 | | /// Construct a twine to print \p Val as a signed decimal integer. |
345 | 0 | explicit Twine(const long long &Val) : LHSKind(DecLLKind) { |
346 | 0 | LHS.decLL = &Val; |
347 | 0 | } |
348 | | |
349 | | // FIXME: Unfortunately, to make sure this is as efficient as possible we |
350 | | // need extra binary constructors from particular types. We can't rely on |
351 | | // the compiler to be smart enough to fold operator+()/concat() down to the |
352 | | // right thing. Yet. |
353 | | |
354 | | /// Construct as the concatenation of a C string and a StringRef. |
355 | | /*implicit*/ Twine(const char *LHS, const StringRef &RHS) |
356 | 0 | : LHSKind(CStringKind), RHSKind(StringRefKind) { |
357 | 0 | this->LHS.cString = LHS; |
358 | 0 | this->RHS.stringRef = &RHS; |
359 | 0 | assert(isValid() && "Invalid twine!"); |
360 | 0 | } |
361 | | |
362 | | /// Construct as the concatenation of a StringRef and a C string. |
363 | | /*implicit*/ Twine(const StringRef &LHS, const char *RHS) |
364 | 0 | : LHSKind(StringRefKind), RHSKind(CStringKind) { |
365 | 0 | this->LHS.stringRef = &LHS; |
366 | 0 | this->RHS.cString = RHS; |
367 | 0 | assert(isValid() && "Invalid twine!"); |
368 | 0 | } |
369 | | |
370 | | /// Since the intended use of twines is as temporary objects, assignments |
371 | | /// when concatenating might cause undefined behavior or stack corruptions |
372 | | Twine &operator=(const Twine &) = delete; |
373 | | |
374 | | /// Create a 'null' string, which is an empty string that always |
375 | | /// concatenates to form another empty string. |
376 | 0 | static Twine createNull() { |
377 | 0 | return Twine(NullKind); |
378 | 0 | } |
379 | | |
380 | | /// @} |
381 | | /// @name Numeric Conversions |
382 | | /// @{ |
383 | | |
384 | | // Construct a twine to print \p Val as an unsigned hexadecimal integer. |
385 | 6 | static Twine utohexstr(const uint64_t &Val) { |
386 | 6 | Child LHS, RHS; |
387 | 6 | LHS.uHex = &Val; |
388 | 6 | RHS.twine = nullptr; |
389 | 6 | return Twine(LHS, UHexKind, RHS, EmptyKind); |
390 | 6 | } |
391 | | |
392 | | /// @} |
393 | | /// @name Predicate Operations |
394 | | /// @{ |
395 | | |
396 | | /// Check if this twine is trivially empty; a false return value does not |
397 | | /// necessarily mean the twine is empty. |
398 | 0 | bool isTriviallyEmpty() const { |
399 | 0 | return isNullary(); |
400 | 0 | } |
401 | | |
402 | | /// Return true if this twine can be dynamically accessed as a single |
403 | | /// StringRef value with getSingleStringRef(). |
404 | 1.00k | bool isSingleStringRef() const { |
405 | 1.00k | if (getRHSKind() != EmptyKind) return false; |
406 | | |
407 | 994 | switch (getLHSKind()) { |
408 | 26 | case EmptyKind: |
409 | 524 | case CStringKind: |
410 | 524 | case StdStringKind: |
411 | 994 | case StringRefKind: |
412 | 994 | case SmallStringKind: |
413 | 994 | return true; |
414 | 0 | default: |
415 | 0 | return false; |
416 | 994 | } |
417 | 994 | } |
418 | | |
419 | | /// @} |
420 | | /// @name String Operations |
421 | | /// @{ |
422 | | |
423 | | Twine concat(const Twine &Suffix) const; |
424 | | |
425 | | /// @} |
426 | | /// @name Output & Conversion. |
427 | | /// @{ |
428 | | |
429 | | /// Return the twine contents as a std::string. |
430 | | std::string str() const; |
431 | | |
432 | | /// Append the concatenated string into the given SmallString or SmallVector. |
433 | | void toVector(SmallVectorImpl<char> &Out) const; |
434 | | |
435 | | /// This returns the twine as a single StringRef. This method is only valid |
436 | | /// if isSingleStringRef() is true. |
437 | 497 | StringRef getSingleStringRef() const { |
438 | 497 | assert(isSingleStringRef() &&"This cannot be had as a single stringref!"); |
439 | 497 | switch (getLHSKind()) { |
440 | 0 | default: llvm_unreachable("Out of sync with isSingleStringRef"); |
441 | 13 | case EmptyKind: return StringRef(); |
442 | 249 | case CStringKind: return StringRef(LHS.cString); |
443 | 0 | case StdStringKind: return StringRef(*LHS.stdString); |
444 | 235 | case StringRefKind: return *LHS.stringRef; |
445 | 0 | case SmallStringKind: |
446 | 0 | return StringRef(LHS.smallString->data(), LHS.smallString->size()); |
447 | 497 | } |
448 | 497 | } |
449 | | |
450 | | /// This returns the twine as a single StringRef if it can be |
451 | | /// represented as such. Otherwise the twine is written into the given |
452 | | /// SmallVector and a StringRef to the SmallVector's data is returned. |
453 | 512 | StringRef toStringRef(SmallVectorImpl<char> &Out) const { |
454 | 512 | if (isSingleStringRef()) |
455 | 497 | return getSingleStringRef(); |
456 | 15 | toVector(Out); |
457 | 15 | return StringRef(Out.data(), Out.size()); |
458 | 512 | } |
459 | | |
460 | | /// This returns the twine as a single null terminated StringRef if it |
461 | | /// can be represented as such. Otherwise the twine is written into the |
462 | | /// given SmallVector and a StringRef to the SmallVector's data is returned. |
463 | | /// |
464 | | /// The returned StringRef's size does not include the null terminator. |
465 | | StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const; |
466 | | |
467 | | /// Write the concatenated string represented by this twine to the |
468 | | /// stream \p OS. |
469 | | void print(raw_ostream &OS) const; |
470 | | |
471 | | /// Dump the concatenated string represented by this twine to stderr. |
472 | | void dump() const; |
473 | | |
474 | | /// Write the representation of this twine to the stream \p OS. |
475 | | void printRepr(raw_ostream &OS) const; |
476 | | |
477 | | /// Dump the representation of this twine to stderr. |
478 | | void dumpRepr() const; |
479 | | |
480 | | /// @} |
481 | | }; |
482 | | |
483 | | /// @name Twine Inline Implementations |
484 | | /// @{ |
485 | | |
486 | 58 | inline Twine Twine::concat(const Twine &Suffix) const { |
487 | | // Concatenation with null is null. |
488 | 58 | if (isNull() || Suffix.isNull()) |
489 | 0 | return Twine(NullKind); |
490 | | |
491 | | // Concatenation with empty yields the other side. |
492 | 58 | if (isEmpty()) |
493 | 0 | return Suffix; |
494 | 58 | if (Suffix.isEmpty()) |
495 | 0 | return *this; |
496 | | |
497 | | // Otherwise we need to create a new node, taking care to fold in unary |
498 | | // twines. |
499 | 58 | Child NewLHS, NewRHS; |
500 | 58 | NewLHS.twine = this; |
501 | 58 | NewRHS.twine = &Suffix; |
502 | 58 | NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind; |
503 | 58 | if (isUnary()) { |
504 | 19 | NewLHS = LHS; |
505 | 19 | NewLHSKind = getLHSKind(); |
506 | 19 | } |
507 | 58 | if (Suffix.isUnary()) { |
508 | 58 | NewRHS = Suffix.LHS; |
509 | 58 | NewRHSKind = Suffix.getLHSKind(); |
510 | 58 | } |
511 | | |
512 | 58 | return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind); |
513 | 58 | } |
514 | | |
515 | 58 | inline Twine operator+(const Twine &LHS, const Twine &RHS) { |
516 | 58 | return LHS.concat(RHS); |
517 | 58 | } |
518 | | |
519 | | /// Additional overload to guarantee simplified codegen; this is equivalent to |
520 | | /// concat(). |
521 | | |
522 | 0 | inline Twine operator+(const char *LHS, const StringRef &RHS) { |
523 | 0 | return Twine(LHS, RHS); |
524 | 0 | } |
525 | | |
526 | | /// Additional overload to guarantee simplified codegen; this is equivalent to |
527 | | /// concat(). |
528 | | |
529 | 0 | inline Twine operator+(const StringRef &LHS, const char *RHS) { |
530 | 0 | return Twine(LHS, RHS); |
531 | 0 | } |
532 | | |
533 | 0 | inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) { |
534 | 0 | RHS.print(OS); |
535 | 0 | return OS; |
536 | 0 | } |
537 | | |
538 | | /// @} |
539 | | |
540 | | } // end namespace llvh |
541 | | |
542 | | #endif // LLVM_ADT_TWINE_H |