xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ADT/Twine.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_ADT_TWINE_H
10 #define LLVM_ADT_TWINE_H
11 
12 #include "llvm/ADT/SmallVector.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/Compiler.h"
15 #include "llvm/Support/ErrorHandling.h"
16 #include <cassert>
17 #include <cstdint>
18 #include <string>
19 #include <string_view>
20 
21 namespace llvm {
22 
23   class formatv_object_base;
24   class raw_ostream;
25 
26   /// Twine - A lightweight data structure for efficiently representing the
27   /// concatenation of temporary values as strings.
28   ///
29   /// A Twine is a kind of rope, it represents a concatenated string using a
30   /// binary-tree, where the string is the preorder of the nodes. Since the
31   /// Twine can be efficiently rendered into a buffer when its result is used,
32   /// it avoids the cost of generating temporary values for intermediate string
33   /// results -- particularly in cases when the Twine result is never
34   /// required. By explicitly tracking the type of leaf nodes, we can also avoid
35   /// the creation of temporary strings for conversions operations (such as
36   /// appending an integer to a string).
37   ///
38   /// A Twine is not intended for use directly and should not be stored, its
39   /// implementation relies on the ability to store pointers to temporary stack
40   /// objects which may be deallocated at the end of a statement. Twines should
41   /// only be used as const references in arguments, when an API wishes
42   /// to accept possibly-concatenated strings.
43   ///
44   /// Twines support a special 'null' value, which always concatenates to form
45   /// itself, and renders as an empty string. This can be returned from APIs to
46   /// effectively nullify any concatenations performed on the result.
47   ///
48   /// \b Implementation
49   ///
50   /// Given the nature of a Twine, it is not possible for the Twine's
51   /// concatenation method to construct interior nodes; the result must be
52   /// represented inside the returned value. For this reason a Twine object
53   /// actually holds two values, the left- and right-hand sides of a
54   /// concatenation. We also have nullary Twine objects, which are effectively
55   /// sentinel values that represent empty strings.
56   ///
57   /// Thus, a Twine can effectively have zero, one, or two children. The \see
58   /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for
59   /// testing the number of children.
60   ///
61   /// We maintain a number of invariants on Twine objects (FIXME: Why):
62   ///  - Nullary twines are always represented with their Kind on the left-hand
63   ///    side, and the Empty kind on the right-hand side.
64   ///  - Unary twines are always represented with the value on the left-hand
65   ///    side, and the Empty kind on the right-hand side.
66   ///  - If a Twine has another Twine as a child, that child should always be
67   ///    binary (otherwise it could have been folded into the parent).
68   ///
69   /// These invariants are check by \see isValid().
70   ///
71   /// \b Efficiency Considerations
72   ///
73   /// The Twine is designed to yield efficient and small code for common
74   /// situations. For this reason, the concat() method is inlined so that
75   /// concatenations of leaf nodes can be optimized into stores directly into a
76   /// single stack allocated object.
77   ///
78   /// In practice, not all compilers can be trusted to optimize concat() fully,
79   /// so we provide two additional methods (and accompanying operator+
80   /// overloads) to guarantee that particularly important cases (cstring plus
81   /// StringRef) codegen as desired.
82   class Twine {
83     /// NodeKind - Represent the type of an argument.
84     enum NodeKind : unsigned char {
85       /// An empty string; the result of concatenating anything with it is also
86       /// empty.
87       NullKind,
88 
89       /// The empty string.
90       EmptyKind,
91 
92       /// A pointer to a Twine instance.
93       TwineKind,
94 
95       /// A pointer to a C string instance.
96       CStringKind,
97 
98       /// A pointer to an std::string instance.
99       StdStringKind,
100 
101       /// A Pointer and Length representation. Used for std::string_view,
102       /// StringRef, and SmallString.  Can't use a StringRef here
103       /// because they are not trivally constructible.
104       PtrAndLengthKind,
105 
106       /// A pointer and length representation that's also null-terminated.
107       /// Guaranteed to be constructed from a compile-time string literal.
108       StringLiteralKind,
109 
110       /// A pointer to a formatv_object_base instance.
111       FormatvObjectKind,
112 
113       /// A char value, to render as a character.
114       CharKind,
115 
116       /// An unsigned int value, to render as an unsigned decimal integer.
117       DecUIKind,
118 
119       /// An int value, to render as a signed decimal integer.
120       DecIKind,
121 
122       /// A pointer to an unsigned long value, to render as an unsigned decimal
123       /// integer.
124       DecULKind,
125 
126       /// A pointer to a long value, to render as a signed decimal integer.
127       DecLKind,
128 
129       /// A pointer to an unsigned long long value, to render as an unsigned
130       /// decimal integer.
131       DecULLKind,
132 
133       /// A pointer to a long long value, to render as a signed decimal integer.
134       DecLLKind,
135 
136       /// A pointer to a uint64_t value, to render as an unsigned hexadecimal
137       /// integer.
138       UHexKind
139     };
140 
141     union Child
142     {
143       const Twine *twine;
144       const char *cString;
145       const std::string *stdString;
146       struct {
147         const char *ptr;
148         size_t length;
149       } ptrAndLength;
150       const formatv_object_base *formatvObject;
151       char character;
152       unsigned int decUI;
153       int decI;
154       const unsigned long *decUL;
155       const long *decL;
156       const unsigned long long *decULL;
157       const long long *decLL;
158       const uint64_t *uHex;
159     };
160 
161     /// LHS - The prefix in the concatenation, which may be uninitialized for
162     /// Null or Empty kinds.
163     Child LHS;
164 
165     /// RHS - The suffix in the concatenation, which may be uninitialized for
166     /// Null or Empty kinds.
167     Child RHS;
168 
169     /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
170     NodeKind LHSKind = EmptyKind;
171 
172     /// RHSKind - The NodeKind of the right hand side, \see getRHSKind().
173     NodeKind RHSKind = EmptyKind;
174 
175     /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
Twine(NodeKind Kind)176     explicit Twine(NodeKind Kind) : LHSKind(Kind) {
177       assert(isNullary() && "Invalid kind!");
178     }
179 
180     /// Construct a binary twine.
Twine(const Twine & LHS,const Twine & RHS)181     explicit Twine(const Twine &LHS, const Twine &RHS)
182         : LHSKind(TwineKind), RHSKind(TwineKind) {
183       this->LHS.twine = &LHS;
184       this->RHS.twine = &RHS;
185       assert(isValid() && "Invalid twine!");
186     }
187 
188     /// Construct a twine from explicit values.
Twine(Child LHS,NodeKind LHSKind,Child RHS,NodeKind RHSKind)189     explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind)
190         : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) {
191       assert(isValid() && "Invalid twine!");
192     }
193 
194     /// Check for the null twine.
isNull()195     bool isNull() const {
196       return getLHSKind() == NullKind;
197     }
198 
199     /// Check for the empty twine.
isEmpty()200     bool isEmpty() const {
201       return getLHSKind() == EmptyKind;
202     }
203 
204     /// Check if this is a nullary twine (null or empty).
isNullary()205     bool isNullary() const {
206       return isNull() || isEmpty();
207     }
208 
209     /// Check if this is a unary twine.
isUnary()210     bool isUnary() const {
211       return getRHSKind() == EmptyKind && !isNullary();
212     }
213 
214     /// Check if this is a binary twine.
isBinary()215     bool isBinary() const {
216       return getLHSKind() != NullKind && getRHSKind() != EmptyKind;
217     }
218 
219     /// Check if this is a valid twine (satisfying the invariants on
220     /// order and number of arguments).
isValid()221     bool isValid() const {
222       // Nullary twines always have Empty on the RHS.
223       if (isNullary() && getRHSKind() != EmptyKind)
224         return false;
225 
226       // Null should never appear on the RHS.
227       if (getRHSKind() == NullKind)
228         return false;
229 
230       // The RHS cannot be non-empty if the LHS is empty.
231       if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind)
232         return false;
233 
234       // A twine child should always be binary.
235       if (getLHSKind() == TwineKind &&
236           !LHS.twine->isBinary())
237         return false;
238       if (getRHSKind() == TwineKind &&
239           !RHS.twine->isBinary())
240         return false;
241 
242       return true;
243     }
244 
245     /// Get the NodeKind of the left-hand side.
getLHSKind()246     NodeKind getLHSKind() const { return LHSKind; }
247 
248     /// Get the NodeKind of the right-hand side.
getRHSKind()249     NodeKind getRHSKind() const { return RHSKind; }
250 
251     /// Print one child from a twine.
252     void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const;
253 
254     /// Print the representation of one child from a twine.
255     void printOneChildRepr(raw_ostream &OS, Child Ptr,
256                            NodeKind Kind) const;
257 
258   public:
259     /// @name Constructors
260     /// @{
261 
262     /// Construct from an empty string.
Twine()263     /*implicit*/ Twine() {
264       assert(isValid() && "Invalid twine!");
265     }
266 
267     Twine(const Twine &) = default;
268 
269     /// Construct from a C string.
270     ///
271     /// We take care here to optimize "" into the empty twine -- this will be
272     /// optimized out for string constants. This allows Twine arguments have
273     /// default "" values, without introducing unnecessary string constants.
Twine(const char * Str)274     /*implicit*/ Twine(const char *Str) {
275       if (Str[0] != '\0') {
276         LHS.cString = Str;
277         LHSKind = CStringKind;
278       } else {
279         LHSKind = EmptyKind;
280       }
281 
282       assert(isValid() && "Invalid twine!");
283     }
284     /// Delete the implicit conversion from nullptr as Twine(const char *)
285     /// cannot take nullptr.
286     /*implicit*/ Twine(std::nullptr_t) = delete;
287 
288     /// Construct from an std::string.
Twine(const std::string & Str)289     /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) {
290       LHS.stdString = &Str;
291       assert(isValid() && "Invalid twine!");
292     }
293 
294     /// Construct from an std::string_view by converting it to a pointer and
295     /// length.  This handles string_views on a pure API basis, and avoids
296     /// storing one (or a pointer to one) inside a Twine, which avoids problems
297     /// when mixing code compiled under various C++ standards.
Twine(const std::string_view & Str)298     /*implicit*/ Twine(const std::string_view &Str)
299         : LHSKind(PtrAndLengthKind) {
300       LHS.ptrAndLength.ptr = Str.data();
301       LHS.ptrAndLength.length = Str.length();
302       assert(isValid() && "Invalid twine!");
303     }
304 
305     /// Construct from a StringRef.
Twine(const StringRef & Str)306     /*implicit*/ Twine(const StringRef &Str) : LHSKind(PtrAndLengthKind) {
307       LHS.ptrAndLength.ptr = Str.data();
308       LHS.ptrAndLength.length = Str.size();
309       assert(isValid() && "Invalid twine!");
310     }
311 
312     /// Construct from a StringLiteral.
Twine(const StringLiteral & Str)313     /*implicit*/ Twine(const StringLiteral &Str)
314         : LHSKind(StringLiteralKind) {
315       LHS.ptrAndLength.ptr = Str.data();
316       LHS.ptrAndLength.length = Str.size();
317       assert(isValid() && "Invalid twine!");
318     }
319 
320     /// Construct from a SmallString.
Twine(const SmallVectorImpl<char> & Str)321     /*implicit*/ Twine(const SmallVectorImpl<char> &Str)
322         : LHSKind(PtrAndLengthKind) {
323       LHS.ptrAndLength.ptr = Str.data();
324       LHS.ptrAndLength.length = Str.size();
325       assert(isValid() && "Invalid twine!");
326     }
327 
328     /// Construct from a formatv_object_base.
Twine(const formatv_object_base & Fmt)329     /*implicit*/ Twine(const formatv_object_base &Fmt)
330         : LHSKind(FormatvObjectKind) {
331       LHS.formatvObject = &Fmt;
332       assert(isValid() && "Invalid twine!");
333     }
334 
335     /// Construct from a char.
Twine(char Val)336     explicit Twine(char Val) : LHSKind(CharKind) {
337       LHS.character = Val;
338     }
339 
340     /// Construct from a signed char.
Twine(signed char Val)341     explicit Twine(signed char Val) : LHSKind(CharKind) {
342       LHS.character = static_cast<char>(Val);
343     }
344 
345     /// Construct from an unsigned char.
Twine(unsigned char Val)346     explicit Twine(unsigned char Val) : LHSKind(CharKind) {
347       LHS.character = static_cast<char>(Val);
348     }
349 
350     /// Construct a twine to print \p Val as an unsigned decimal integer.
Twine(unsigned Val)351     explicit Twine(unsigned Val) : LHSKind(DecUIKind) {
352       LHS.decUI = Val;
353     }
354 
355     /// Construct a twine to print \p Val as a signed decimal integer.
Twine(int Val)356     explicit Twine(int Val) : LHSKind(DecIKind) {
357       LHS.decI = Val;
358     }
359 
360     /// Construct a twine to print \p Val as an unsigned decimal integer.
Twine(const unsigned long & Val)361     explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) {
362       LHS.decUL = &Val;
363     }
364 
365     /// Construct a twine to print \p Val as a signed decimal integer.
Twine(const long & Val)366     explicit Twine(const long &Val) : LHSKind(DecLKind) {
367       LHS.decL = &Val;
368     }
369 
370     /// Construct a twine to print \p Val as an unsigned decimal integer.
Twine(const unsigned long long & Val)371     explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) {
372       LHS.decULL = &Val;
373     }
374 
375     /// Construct a twine to print \p Val as a signed decimal integer.
Twine(const long long & Val)376     explicit Twine(const long long &Val) : LHSKind(DecLLKind) {
377       LHS.decLL = &Val;
378     }
379 
380     // FIXME: Unfortunately, to make sure this is as efficient as possible we
381     // need extra binary constructors from particular types. We can't rely on
382     // the compiler to be smart enough to fold operator+()/concat() down to the
383     // right thing. Yet.
384 
385     /// Construct as the concatenation of a C string and a StringRef.
Twine(const char * LHS,const StringRef & RHS)386     /*implicit*/ Twine(const char *LHS, const StringRef &RHS)
387         : LHSKind(CStringKind), RHSKind(PtrAndLengthKind) {
388       this->LHS.cString = LHS;
389       this->RHS.ptrAndLength.ptr = RHS.data();
390       this->RHS.ptrAndLength.length = RHS.size();
391       assert(isValid() && "Invalid twine!");
392     }
393 
394     /// Construct as the concatenation of a StringRef and a C string.
Twine(const StringRef & LHS,const char * RHS)395     /*implicit*/ Twine(const StringRef &LHS, const char *RHS)
396         : LHSKind(PtrAndLengthKind), RHSKind(CStringKind) {
397       this->LHS.ptrAndLength.ptr = LHS.data();
398       this->LHS.ptrAndLength.length = LHS.size();
399       this->RHS.cString = RHS;
400       assert(isValid() && "Invalid twine!");
401     }
402 
403     /// Since the intended use of twines is as temporary objects, assignments
404     /// when concatenating might cause undefined behavior or stack corruptions
405     Twine &operator=(const Twine &) = delete;
406 
407     /// Create a 'null' string, which is an empty string that always
408     /// concatenates to form another empty string.
createNull()409     static Twine createNull() {
410       return Twine(NullKind);
411     }
412 
413     /// @}
414     /// @name Numeric Conversions
415     /// @{
416 
417     // Construct a twine to print \p Val as an unsigned hexadecimal integer.
utohexstr(const uint64_t & Val)418     static Twine utohexstr(const uint64_t &Val) {
419       Child LHS, RHS;
420       LHS.uHex = &Val;
421       RHS.twine = nullptr;
422       return Twine(LHS, UHexKind, RHS, EmptyKind);
423     }
424 
425     /// @}
426     /// @name Predicate Operations
427     /// @{
428 
429     /// Check if this twine is trivially empty; a false return value does not
430     /// necessarily mean the twine is empty.
isTriviallyEmpty()431     bool isTriviallyEmpty() const {
432       return isNullary();
433     }
434 
435     /// Check if this twine is guaranteed to refer to single string literal.
isSingleStringLiteral()436     bool isSingleStringLiteral() const {
437       return isUnary() && getLHSKind() == StringLiteralKind;
438     }
439 
440     /// Return true if this twine can be dynamically accessed as a single
441     /// StringRef value with getSingleStringRef().
isSingleStringRef()442     bool isSingleStringRef() const {
443       if (getRHSKind() != EmptyKind) return false;
444 
445       switch (getLHSKind()) {
446       case EmptyKind:
447       case CStringKind:
448       case StdStringKind:
449       case PtrAndLengthKind:
450       case StringLiteralKind:
451         return true;
452       default:
453         return false;
454       }
455     }
456 
457     /// @}
458     /// @name String Operations
459     /// @{
460 
461     Twine concat(const Twine &Suffix) const;
462 
463     /// @}
464     /// @name Output & Conversion.
465     /// @{
466 
467     /// Return the twine contents as a std::string.
468     LLVM_ABI std::string str() const;
469 
470     /// Append the concatenated string into the given SmallString or SmallVector.
471     LLVM_ABI void toVector(SmallVectorImpl<char> &Out) const;
472 
473     /// This returns the twine as a single StringRef.  This method is only valid
474     /// if isSingleStringRef() is true.
getSingleStringRef()475     StringRef getSingleStringRef() const {
476       assert(isSingleStringRef() &&"This cannot be had as a single stringref!");
477       switch (getLHSKind()) {
478       default: llvm_unreachable("Out of sync with isSingleStringRef");
479       case EmptyKind:
480         return StringRef();
481       case CStringKind:
482         return StringRef(LHS.cString);
483       case StdStringKind:
484         return StringRef(*LHS.stdString);
485       case PtrAndLengthKind:
486       case StringLiteralKind:
487         return StringRef(LHS.ptrAndLength.ptr, LHS.ptrAndLength.length);
488       }
489     }
490 
491     /// This returns the twine as a single StringRef if it can be
492     /// represented as such. Otherwise the twine is written into the given
493     /// SmallVector and a StringRef to the SmallVector's data is returned.
toStringRef(SmallVectorImpl<char> & Out)494     StringRef toStringRef(SmallVectorImpl<char> &Out) const {
495       if (isSingleStringRef())
496         return getSingleStringRef();
497       toVector(Out);
498       return StringRef(Out.data(), Out.size());
499     }
500 
501     /// This returns the twine as a single null terminated StringRef if it
502     /// can be represented as such. Otherwise the twine is written into the
503     /// given SmallVector and a StringRef to the SmallVector's data is returned.
504     ///
505     /// The returned StringRef's size does not include the null terminator.
506     LLVM_ABI StringRef
507     toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const;
508 
509     /// Write the concatenated string represented by this twine to the
510     /// stream \p OS.
511     LLVM_ABI void print(raw_ostream &OS) const;
512 
513     /// Write the representation of this twine to the stream \p OS.
514     LLVM_ABI void printRepr(raw_ostream &OS) const;
515 
516 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
517     /// Dump the concatenated string represented by this twine to stderr.
518     LLVM_DUMP_METHOD void dump() const;
519 
520     /// Dump the representation of this twine to stderr.
521     LLVM_DUMP_METHOD void dumpRepr() const;
522 #endif
523 
524     /// @}
525   };
526 
527   /// @name Twine Inline Implementations
528   /// @{
529 
concat(const Twine & Suffix)530   inline Twine Twine::concat(const Twine &Suffix) const {
531     // Concatenation with null is null.
532     if (isNull() || Suffix.isNull())
533       return Twine(NullKind);
534 
535     // Concatenation with empty yields the other side.
536     if (isEmpty())
537       return Suffix;
538     if (Suffix.isEmpty())
539       return *this;
540 
541     // Otherwise we need to create a new node, taking care to fold in unary
542     // twines.
543     Child NewLHS, NewRHS;
544     NewLHS.twine = this;
545     NewRHS.twine = &Suffix;
546     NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind;
547     if (isUnary()) {
548       NewLHS = LHS;
549       NewLHSKind = getLHSKind();
550     }
551     if (Suffix.isUnary()) {
552       NewRHS = Suffix.LHS;
553       NewRHSKind = Suffix.getLHSKind();
554     }
555 
556     return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind);
557   }
558 
559   inline Twine operator+(const Twine &LHS, const Twine &RHS) {
560     return LHS.concat(RHS);
561   }
562 
563   /// Additional overload to guarantee simplified codegen; this is equivalent to
564   /// concat().
565 
566   inline Twine operator+(const char *LHS, const StringRef &RHS) {
567     return Twine(LHS, RHS);
568   }
569 
570   /// Additional overload to guarantee simplified codegen; this is equivalent to
571   /// concat().
572 
573   inline Twine operator+(const StringRef &LHS, const char *RHS) {
574     return Twine(LHS, RHS);
575   }
576 
577   inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) {
578     RHS.print(OS);
579     return OS;
580   }
581 
582   /// @}
583 
584 } // end namespace llvm
585 
586 #endif // LLVM_ADT_TWINE_H
587