xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ADT/Twine.h (revision cb14a3fe5122c879eae1fb480ed7ce82a699ddb6)
1  //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  
9  #ifndef LLVM_ADT_TWINE_H
10  #define LLVM_ADT_TWINE_H
11  
12  #include "llvm/ADT/SmallVector.h"
13  #include "llvm/ADT/StringRef.h"
14  #include "llvm/Support/ErrorHandling.h"
15  #include <cassert>
16  #include <cstdint>
17  #include <string>
18  #include <string_view>
19  
20  namespace llvm {
21  
22    class formatv_object_base;
23    class raw_ostream;
24  
25    /// Twine - A lightweight data structure for efficiently representing the
26    /// concatenation of temporary values as strings.
27    ///
28    /// A Twine is a kind of rope, it represents a concatenated string using a
29    /// binary-tree, where the string is the preorder of the nodes. Since the
30    /// Twine can be efficiently rendered into a buffer when its result is used,
31    /// it avoids the cost of generating temporary values for intermediate string
32    /// results -- particularly in cases when the Twine result is never
33    /// required. By explicitly tracking the type of leaf nodes, we can also avoid
34    /// the creation of temporary strings for conversions operations (such as
35    /// appending an integer to a string).
36    ///
37    /// A Twine is not intended for use directly and should not be stored, its
38    /// implementation relies on the ability to store pointers to temporary stack
39    /// objects which may be deallocated at the end of a statement. Twines should
40    /// only be used as const references in arguments, when an API wishes
41    /// to accept possibly-concatenated strings.
42    ///
43    /// Twines support a special 'null' value, which always concatenates to form
44    /// itself, and renders as an empty string. This can be returned from APIs to
45    /// effectively nullify any concatenations performed on the result.
46    ///
47    /// \b Implementation
48    ///
49    /// Given the nature of a Twine, it is not possible for the Twine's
50    /// concatenation method to construct interior nodes; the result must be
51    /// represented inside the returned value. For this reason a Twine object
52    /// actually holds two values, the left- and right-hand sides of a
53    /// concatenation. We also have nullary Twine objects, which are effectively
54    /// sentinel values that represent empty strings.
55    ///
56    /// Thus, a Twine can effectively have zero, one, or two children. The \see
57    /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for
58    /// testing the number of children.
59    ///
60    /// We maintain a number of invariants on Twine objects (FIXME: Why):
61    ///  - Nullary twines are always represented with their Kind on the left-hand
62    ///    side, and the Empty kind on the right-hand side.
63    ///  - Unary twines are always represented with the value on the left-hand
64    ///    side, and the Empty kind on the right-hand side.
65    ///  - If a Twine has another Twine as a child, that child should always be
66    ///    binary (otherwise it could have been folded into the parent).
67    ///
68    /// These invariants are check by \see isValid().
69    ///
70    /// \b Efficiency Considerations
71    ///
72    /// The Twine is designed to yield efficient and small code for common
73    /// situations. For this reason, the concat() method is inlined so that
74    /// concatenations of leaf nodes can be optimized into stores directly into a
75    /// single stack allocated object.
76    ///
77    /// In practice, not all compilers can be trusted to optimize concat() fully,
78    /// so we provide two additional methods (and accompanying operator+
79    /// overloads) to guarantee that particularly important cases (cstring plus
80    /// StringRef) codegen as desired.
81    class Twine {
82      /// NodeKind - Represent the type of an argument.
83      enum NodeKind : unsigned char {
84        /// An empty string; the result of concatenating anything with it is also
85        /// empty.
86        NullKind,
87  
88        /// The empty string.
89        EmptyKind,
90  
91        /// A pointer to a Twine instance.
92        TwineKind,
93  
94        /// A pointer to a C string instance.
95        CStringKind,
96  
97        /// A pointer to an std::string instance.
98        StdStringKind,
99  
100        /// A Pointer and Length representation. Used for std::string_view,
101        /// StringRef, and SmallString.  Can't use a StringRef here
102        /// because they are not trivally constructible.
103        PtrAndLengthKind,
104  
105        /// A pointer and length representation that's also null-terminated.
106        /// Guaranteed to be constructed from a compile-time string literal.
107        StringLiteralKind,
108  
109        /// A pointer to a formatv_object_base instance.
110        FormatvObjectKind,
111  
112        /// A char value, to render as a character.
113        CharKind,
114  
115        /// An unsigned int value, to render as an unsigned decimal integer.
116        DecUIKind,
117  
118        /// An int value, to render as a signed decimal integer.
119        DecIKind,
120  
121        /// A pointer to an unsigned long value, to render as an unsigned decimal
122        /// integer.
123        DecULKind,
124  
125        /// A pointer to a long value, to render as a signed decimal integer.
126        DecLKind,
127  
128        /// A pointer to an unsigned long long value, to render as an unsigned
129        /// decimal integer.
130        DecULLKind,
131  
132        /// A pointer to a long long value, to render as a signed decimal integer.
133        DecLLKind,
134  
135        /// A pointer to a uint64_t value, to render as an unsigned hexadecimal
136        /// integer.
137        UHexKind
138      };
139  
140      union Child
141      {
142        const Twine *twine;
143        const char *cString;
144        const std::string *stdString;
145        struct {
146          const char *ptr;
147          size_t length;
148        } ptrAndLength;
149        const formatv_object_base *formatvObject;
150        char character;
151        unsigned int decUI;
152        int decI;
153        const unsigned long *decUL;
154        const long *decL;
155        const unsigned long long *decULL;
156        const long long *decLL;
157        const uint64_t *uHex;
158      };
159  
160      /// LHS - The prefix in the concatenation, which may be uninitialized for
161      /// Null or Empty kinds.
162      Child LHS;
163  
164      /// RHS - The suffix in the concatenation, which may be uninitialized for
165      /// Null or Empty kinds.
166      Child RHS;
167  
168      /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
169      NodeKind LHSKind = EmptyKind;
170  
171      /// RHSKind - The NodeKind of the right hand side, \see getRHSKind().
172      NodeKind RHSKind = EmptyKind;
173  
174      /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
175      explicit Twine(NodeKind Kind) : LHSKind(Kind) {
176        assert(isNullary() && "Invalid kind!");
177      }
178  
179      /// Construct a binary twine.
180      explicit Twine(const Twine &LHS, const Twine &RHS)
181          : LHSKind(TwineKind), RHSKind(TwineKind) {
182        this->LHS.twine = &LHS;
183        this->RHS.twine = &RHS;
184        assert(isValid() && "Invalid twine!");
185      }
186  
187      /// Construct a twine from explicit values.
188      explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind)
189          : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) {
190        assert(isValid() && "Invalid twine!");
191      }
192  
193      /// Check for the null twine.
194      bool isNull() const {
195        return getLHSKind() == NullKind;
196      }
197  
198      /// Check for the empty twine.
199      bool isEmpty() const {
200        return getLHSKind() == EmptyKind;
201      }
202  
203      /// Check if this is a nullary twine (null or empty).
204      bool isNullary() const {
205        return isNull() || isEmpty();
206      }
207  
208      /// Check if this is a unary twine.
209      bool isUnary() const {
210        return getRHSKind() == EmptyKind && !isNullary();
211      }
212  
213      /// Check if this is a binary twine.
214      bool isBinary() const {
215        return getLHSKind() != NullKind && getRHSKind() != EmptyKind;
216      }
217  
218      /// Check if this is a valid twine (satisfying the invariants on
219      /// order and number of arguments).
220      bool isValid() const {
221        // Nullary twines always have Empty on the RHS.
222        if (isNullary() && getRHSKind() != EmptyKind)
223          return false;
224  
225        // Null should never appear on the RHS.
226        if (getRHSKind() == NullKind)
227          return false;
228  
229        // The RHS cannot be non-empty if the LHS is empty.
230        if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind)
231          return false;
232  
233        // A twine child should always be binary.
234        if (getLHSKind() == TwineKind &&
235            !LHS.twine->isBinary())
236          return false;
237        if (getRHSKind() == TwineKind &&
238            !RHS.twine->isBinary())
239          return false;
240  
241        return true;
242      }
243  
244      /// Get the NodeKind of the left-hand side.
245      NodeKind getLHSKind() const { return LHSKind; }
246  
247      /// Get the NodeKind of the right-hand side.
248      NodeKind getRHSKind() const { return RHSKind; }
249  
250      /// Print one child from a twine.
251      void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const;
252  
253      /// Print the representation of one child from a twine.
254      void printOneChildRepr(raw_ostream &OS, Child Ptr,
255                             NodeKind Kind) const;
256  
257    public:
258      /// @name Constructors
259      /// @{
260  
261      /// Construct from an empty string.
262      /*implicit*/ Twine() {
263        assert(isValid() && "Invalid twine!");
264      }
265  
266      Twine(const Twine &) = default;
267  
268      /// Construct from a C string.
269      ///
270      /// We take care here to optimize "" into the empty twine -- this will be
271      /// optimized out for string constants. This allows Twine arguments have
272      /// default "" values, without introducing unnecessary string constants.
273      /*implicit*/ Twine(const char *Str) {
274        if (Str[0] != '\0') {
275          LHS.cString = Str;
276          LHSKind = CStringKind;
277        } else
278          LHSKind = EmptyKind;
279  
280        assert(isValid() && "Invalid twine!");
281      }
282      /// Delete the implicit conversion from nullptr as Twine(const char *)
283      /// cannot take nullptr.
284      /*implicit*/ Twine(std::nullptr_t) = delete;
285  
286      /// Construct from an std::string.
287      /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) {
288        LHS.stdString = &Str;
289        assert(isValid() && "Invalid twine!");
290      }
291  
292      /// Construct from an std::string_view by converting it to a pointer and
293      /// length.  This handles string_views on a pure API basis, and avoids
294      /// storing one (or a pointer to one) inside a Twine, which avoids problems
295      /// when mixing code compiled under various C++ standards.
296      /*implicit*/ Twine(const std::string_view &Str)
297          : LHSKind(PtrAndLengthKind) {
298        LHS.ptrAndLength.ptr = Str.data();
299        LHS.ptrAndLength.length = Str.length();
300        assert(isValid() && "Invalid twine!");
301      }
302  
303      /// Construct from a StringRef.
304      /*implicit*/ Twine(const StringRef &Str) : LHSKind(PtrAndLengthKind) {
305        LHS.ptrAndLength.ptr = Str.data();
306        LHS.ptrAndLength.length = Str.size();
307        assert(isValid() && "Invalid twine!");
308      }
309  
310      /// Construct from a StringLiteral.
311      /*implicit*/ Twine(const StringLiteral &Str)
312          : LHSKind(StringLiteralKind) {
313        LHS.ptrAndLength.ptr = Str.data();
314        LHS.ptrAndLength.length = Str.size();
315        assert(isValid() && "Invalid twine!");
316      }
317  
318      /// Construct from a SmallString.
319      /*implicit*/ Twine(const SmallVectorImpl<char> &Str)
320          : LHSKind(PtrAndLengthKind) {
321        LHS.ptrAndLength.ptr = Str.data();
322        LHS.ptrAndLength.length = Str.size();
323        assert(isValid() && "Invalid twine!");
324      }
325  
326      /// Construct from a formatv_object_base.
327      /*implicit*/ Twine(const formatv_object_base &Fmt)
328          : LHSKind(FormatvObjectKind) {
329        LHS.formatvObject = &Fmt;
330        assert(isValid() && "Invalid twine!");
331      }
332  
333      /// Construct from a char.
334      explicit Twine(char Val) : LHSKind(CharKind) {
335        LHS.character = Val;
336      }
337  
338      /// Construct from a signed char.
339      explicit Twine(signed char Val) : LHSKind(CharKind) {
340        LHS.character = static_cast<char>(Val);
341      }
342  
343      /// Construct from an unsigned char.
344      explicit Twine(unsigned char Val) : LHSKind(CharKind) {
345        LHS.character = static_cast<char>(Val);
346      }
347  
348      /// Construct a twine to print \p Val as an unsigned decimal integer.
349      explicit Twine(unsigned Val) : LHSKind(DecUIKind) {
350        LHS.decUI = Val;
351      }
352  
353      /// Construct a twine to print \p Val as a signed decimal integer.
354      explicit Twine(int Val) : LHSKind(DecIKind) {
355        LHS.decI = Val;
356      }
357  
358      /// Construct a twine to print \p Val as an unsigned decimal integer.
359      explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) {
360        LHS.decUL = &Val;
361      }
362  
363      /// Construct a twine to print \p Val as a signed decimal integer.
364      explicit Twine(const long &Val) : LHSKind(DecLKind) {
365        LHS.decL = &Val;
366      }
367  
368      /// Construct a twine to print \p Val as an unsigned decimal integer.
369      explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) {
370        LHS.decULL = &Val;
371      }
372  
373      /// Construct a twine to print \p Val as a signed decimal integer.
374      explicit Twine(const long long &Val) : LHSKind(DecLLKind) {
375        LHS.decLL = &Val;
376      }
377  
378      // FIXME: Unfortunately, to make sure this is as efficient as possible we
379      // need extra binary constructors from particular types. We can't rely on
380      // the compiler to be smart enough to fold operator+()/concat() down to the
381      // right thing. Yet.
382  
383      /// Construct as the concatenation of a C string and a StringRef.
384      /*implicit*/ Twine(const char *LHS, const StringRef &RHS)
385          : LHSKind(CStringKind), RHSKind(PtrAndLengthKind) {
386        this->LHS.cString = LHS;
387        this->RHS.ptrAndLength.ptr = RHS.data();
388        this->RHS.ptrAndLength.length = RHS.size();
389        assert(isValid() && "Invalid twine!");
390      }
391  
392      /// Construct as the concatenation of a StringRef and a C string.
393      /*implicit*/ Twine(const StringRef &LHS, const char *RHS)
394          : LHSKind(PtrAndLengthKind), RHSKind(CStringKind) {
395        this->LHS.ptrAndLength.ptr = LHS.data();
396        this->LHS.ptrAndLength.length = LHS.size();
397        this->RHS.cString = RHS;
398        assert(isValid() && "Invalid twine!");
399      }
400  
401      /// Since the intended use of twines is as temporary objects, assignments
402      /// when concatenating might cause undefined behavior or stack corruptions
403      Twine &operator=(const Twine &) = delete;
404  
405      /// Create a 'null' string, which is an empty string that always
406      /// concatenates to form another empty string.
407      static Twine createNull() {
408        return Twine(NullKind);
409      }
410  
411      /// @}
412      /// @name Numeric Conversions
413      /// @{
414  
415      // Construct a twine to print \p Val as an unsigned hexadecimal integer.
416      static Twine utohexstr(const uint64_t &Val) {
417        Child LHS, RHS;
418        LHS.uHex = &Val;
419        RHS.twine = nullptr;
420        return Twine(LHS, UHexKind, RHS, EmptyKind);
421      }
422  
423      /// @}
424      /// @name Predicate Operations
425      /// @{
426  
427      /// Check if this twine is trivially empty; a false return value does not
428      /// necessarily mean the twine is empty.
429      bool isTriviallyEmpty() const {
430        return isNullary();
431      }
432  
433      /// Check if this twine is guaranteed to refer to single string literal.
434      bool isSingleStringLiteral() const {
435        return isUnary() && getLHSKind() == StringLiteralKind;
436      }
437  
438      /// Return true if this twine can be dynamically accessed as a single
439      /// StringRef value with getSingleStringRef().
440      bool isSingleStringRef() const {
441        if (getRHSKind() != EmptyKind) return false;
442  
443        switch (getLHSKind()) {
444        case EmptyKind:
445        case CStringKind:
446        case StdStringKind:
447        case PtrAndLengthKind:
448        case StringLiteralKind:
449          return true;
450        default:
451          return false;
452        }
453      }
454  
455      /// @}
456      /// @name String Operations
457      /// @{
458  
459      Twine concat(const Twine &Suffix) const;
460  
461      /// @}
462      /// @name Output & Conversion.
463      /// @{
464  
465      /// Return the twine contents as a std::string.
466      std::string str() const;
467  
468      /// Append the concatenated string into the given SmallString or SmallVector.
469      void toVector(SmallVectorImpl<char> &Out) const;
470  
471      /// This returns the twine as a single StringRef.  This method is only valid
472      /// if isSingleStringRef() is true.
473      StringRef getSingleStringRef() const {
474        assert(isSingleStringRef() &&"This cannot be had as a single stringref!");
475        switch (getLHSKind()) {
476        default: llvm_unreachable("Out of sync with isSingleStringRef");
477        case EmptyKind:
478          return StringRef();
479        case CStringKind:
480          return StringRef(LHS.cString);
481        case StdStringKind:
482          return StringRef(*LHS.stdString);
483        case PtrAndLengthKind:
484        case StringLiteralKind:
485          return StringRef(LHS.ptrAndLength.ptr, LHS.ptrAndLength.length);
486        }
487      }
488  
489      /// This returns the twine as a single StringRef if it can be
490      /// represented as such. Otherwise the twine is written into the given
491      /// SmallVector and a StringRef to the SmallVector's data is returned.
492      StringRef toStringRef(SmallVectorImpl<char> &Out) const {
493        if (isSingleStringRef())
494          return getSingleStringRef();
495        toVector(Out);
496        return StringRef(Out.data(), Out.size());
497      }
498  
499      /// This returns the twine as a single null terminated StringRef if it
500      /// can be represented as such. Otherwise the twine is written into the
501      /// given SmallVector and a StringRef to the SmallVector's data is returned.
502      ///
503      /// The returned StringRef's size does not include the null terminator.
504      StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const;
505  
506      /// Write the concatenated string represented by this twine to the
507      /// stream \p OS.
508      void print(raw_ostream &OS) const;
509  
510      /// Dump the concatenated string represented by this twine to stderr.
511      void dump() const;
512  
513      /// Write the representation of this twine to the stream \p OS.
514      void printRepr(raw_ostream &OS) const;
515  
516      /// Dump the representation of this twine to stderr.
517      void dumpRepr() const;
518  
519      /// @}
520    };
521  
522    /// @name Twine Inline Implementations
523    /// @{
524  
525    inline Twine Twine::concat(const Twine &Suffix) const {
526      // Concatenation with null is null.
527      if (isNull() || Suffix.isNull())
528        return Twine(NullKind);
529  
530      // Concatenation with empty yields the other side.
531      if (isEmpty())
532        return Suffix;
533      if (Suffix.isEmpty())
534        return *this;
535  
536      // Otherwise we need to create a new node, taking care to fold in unary
537      // twines.
538      Child NewLHS, NewRHS;
539      NewLHS.twine = this;
540      NewRHS.twine = &Suffix;
541      NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind;
542      if (isUnary()) {
543        NewLHS = LHS;
544        NewLHSKind = getLHSKind();
545      }
546      if (Suffix.isUnary()) {
547        NewRHS = Suffix.LHS;
548        NewRHSKind = Suffix.getLHSKind();
549      }
550  
551      return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind);
552    }
553  
554    inline Twine operator+(const Twine &LHS, const Twine &RHS) {
555      return LHS.concat(RHS);
556    }
557  
558    /// Additional overload to guarantee simplified codegen; this is equivalent to
559    /// concat().
560  
561    inline Twine operator+(const char *LHS, const StringRef &RHS) {
562      return Twine(LHS, RHS);
563    }
564  
565    /// Additional overload to guarantee simplified codegen; this is equivalent to
566    /// concat().
567  
568    inline Twine operator+(const StringRef &LHS, const char *RHS) {
569      return Twine(LHS, RHS);
570    }
571  
572    inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) {
573      RHS.print(OS);
574      return OS;
575    }
576  
577    /// @}
578  
579  } // end namespace llvm
580  
581  #endif // LLVM_ADT_TWINE_H
582