1 //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_ADT_TWINE_H 10 #define LLVM_ADT_TWINE_H 11 12 #include "llvm/ADT/SmallVector.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/Compiler.h" 15 #include "llvm/Support/ErrorHandling.h" 16 #include <cassert> 17 #include <cstdint> 18 #include <string> 19 #include <string_view> 20 21 namespace llvm { 22 23 class formatv_object_base; 24 class raw_ostream; 25 26 /// Twine - A lightweight data structure for efficiently representing the 27 /// concatenation of temporary values as strings. 28 /// 29 /// A Twine is a kind of rope, it represents a concatenated string using a 30 /// binary-tree, where the string is the preorder of the nodes. Since the 31 /// Twine can be efficiently rendered into a buffer when its result is used, 32 /// it avoids the cost of generating temporary values for intermediate string 33 /// results -- particularly in cases when the Twine result is never 34 /// required. By explicitly tracking the type of leaf nodes, we can also avoid 35 /// the creation of temporary strings for conversions operations (such as 36 /// appending an integer to a string). 37 /// 38 /// A Twine is not intended for use directly and should not be stored, its 39 /// implementation relies on the ability to store pointers to temporary stack 40 /// objects which may be deallocated at the end of a statement. Twines should 41 /// only be used as const references in arguments, when an API wishes 42 /// to accept possibly-concatenated strings. 43 /// 44 /// Twines support a special 'null' value, which always concatenates to form 45 /// itself, and renders as an empty string. This can be returned from APIs to 46 /// effectively nullify any concatenations performed on the result. 47 /// 48 /// \b Implementation 49 /// 50 /// Given the nature of a Twine, it is not possible for the Twine's 51 /// concatenation method to construct interior nodes; the result must be 52 /// represented inside the returned value. For this reason a Twine object 53 /// actually holds two values, the left- and right-hand sides of a 54 /// concatenation. We also have nullary Twine objects, which are effectively 55 /// sentinel values that represent empty strings. 56 /// 57 /// Thus, a Twine can effectively have zero, one, or two children. The \see 58 /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for 59 /// testing the number of children. 60 /// 61 /// We maintain a number of invariants on Twine objects (FIXME: Why): 62 /// - Nullary twines are always represented with their Kind on the left-hand 63 /// side, and the Empty kind on the right-hand side. 64 /// - Unary twines are always represented with the value on the left-hand 65 /// side, and the Empty kind on the right-hand side. 66 /// - If a Twine has another Twine as a child, that child should always be 67 /// binary (otherwise it could have been folded into the parent). 68 /// 69 /// These invariants are check by \see isValid(). 70 /// 71 /// \b Efficiency Considerations 72 /// 73 /// The Twine is designed to yield efficient and small code for common 74 /// situations. For this reason, the concat() method is inlined so that 75 /// concatenations of leaf nodes can be optimized into stores directly into a 76 /// single stack allocated object. 77 /// 78 /// In practice, not all compilers can be trusted to optimize concat() fully, 79 /// so we provide two additional methods (and accompanying operator+ 80 /// overloads) to guarantee that particularly important cases (cstring plus 81 /// StringRef) codegen as desired. 82 class Twine { 83 /// NodeKind - Represent the type of an argument. 84 enum NodeKind : unsigned char { 85 /// An empty string; the result of concatenating anything with it is also 86 /// empty. 87 NullKind, 88 89 /// The empty string. 90 EmptyKind, 91 92 /// A pointer to a Twine instance. 93 TwineKind, 94 95 /// A pointer to a C string instance. 96 CStringKind, 97 98 /// A pointer to an std::string instance. 99 StdStringKind, 100 101 /// A Pointer and Length representation. Used for std::string_view, 102 /// StringRef, and SmallString. Can't use a StringRef here 103 /// because they are not trivally constructible. 104 PtrAndLengthKind, 105 106 /// A pointer and length representation that's also null-terminated. 107 /// Guaranteed to be constructed from a compile-time string literal. 108 StringLiteralKind, 109 110 /// A pointer to a formatv_object_base instance. 111 FormatvObjectKind, 112 113 /// A char value, to render as a character. 114 CharKind, 115 116 /// An unsigned int value, to render as an unsigned decimal integer. 117 DecUIKind, 118 119 /// An int value, to render as a signed decimal integer. 120 DecIKind, 121 122 /// A pointer to an unsigned long value, to render as an unsigned decimal 123 /// integer. 124 DecULKind, 125 126 /// A pointer to a long value, to render as a signed decimal integer. 127 DecLKind, 128 129 /// A pointer to an unsigned long long value, to render as an unsigned 130 /// decimal integer. 131 DecULLKind, 132 133 /// A pointer to a long long value, to render as a signed decimal integer. 134 DecLLKind, 135 136 /// A pointer to a uint64_t value, to render as an unsigned hexadecimal 137 /// integer. 138 UHexKind 139 }; 140 141 union Child 142 { 143 const Twine *twine; 144 const char *cString; 145 const std::string *stdString; 146 struct { 147 const char *ptr; 148 size_t length; 149 } ptrAndLength; 150 const formatv_object_base *formatvObject; 151 char character; 152 unsigned int decUI; 153 int decI; 154 const unsigned long *decUL; 155 const long *decL; 156 const unsigned long long *decULL; 157 const long long *decLL; 158 const uint64_t *uHex; 159 }; 160 161 /// LHS - The prefix in the concatenation, which may be uninitialized for 162 /// Null or Empty kinds. 163 Child LHS; 164 165 /// RHS - The suffix in the concatenation, which may be uninitialized for 166 /// Null or Empty kinds. 167 Child RHS; 168 169 /// LHSKind - The NodeKind of the left hand side, \see getLHSKind(). 170 NodeKind LHSKind = EmptyKind; 171 172 /// RHSKind - The NodeKind of the right hand side, \see getRHSKind(). 173 NodeKind RHSKind = EmptyKind; 174 175 /// Construct a nullary twine; the kind must be NullKind or EmptyKind. Twine(NodeKind Kind)176 explicit Twine(NodeKind Kind) : LHSKind(Kind) { 177 assert(isNullary() && "Invalid kind!"); 178 } 179 180 /// Construct a binary twine. Twine(const Twine & LHS,const Twine & RHS)181 explicit Twine(const Twine &LHS, const Twine &RHS) 182 : LHSKind(TwineKind), RHSKind(TwineKind) { 183 this->LHS.twine = &LHS; 184 this->RHS.twine = &RHS; 185 assert(isValid() && "Invalid twine!"); 186 } 187 188 /// Construct a twine from explicit values. Twine(Child LHS,NodeKind LHSKind,Child RHS,NodeKind RHSKind)189 explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind) 190 : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) { 191 assert(isValid() && "Invalid twine!"); 192 } 193 194 /// Check for the null twine. isNull()195 bool isNull() const { 196 return getLHSKind() == NullKind; 197 } 198 199 /// Check for the empty twine. isEmpty()200 bool isEmpty() const { 201 return getLHSKind() == EmptyKind; 202 } 203 204 /// Check if this is a nullary twine (null or empty). isNullary()205 bool isNullary() const { 206 return isNull() || isEmpty(); 207 } 208 209 /// Check if this is a unary twine. isUnary()210 bool isUnary() const { 211 return getRHSKind() == EmptyKind && !isNullary(); 212 } 213 214 /// Check if this is a binary twine. isBinary()215 bool isBinary() const { 216 return getLHSKind() != NullKind && getRHSKind() != EmptyKind; 217 } 218 219 /// Check if this is a valid twine (satisfying the invariants on 220 /// order and number of arguments). isValid()221 bool isValid() const { 222 // Nullary twines always have Empty on the RHS. 223 if (isNullary() && getRHSKind() != EmptyKind) 224 return false; 225 226 // Null should never appear on the RHS. 227 if (getRHSKind() == NullKind) 228 return false; 229 230 // The RHS cannot be non-empty if the LHS is empty. 231 if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind) 232 return false; 233 234 // A twine child should always be binary. 235 if (getLHSKind() == TwineKind && 236 !LHS.twine->isBinary()) 237 return false; 238 if (getRHSKind() == TwineKind && 239 !RHS.twine->isBinary()) 240 return false; 241 242 return true; 243 } 244 245 /// Get the NodeKind of the left-hand side. getLHSKind()246 NodeKind getLHSKind() const { return LHSKind; } 247 248 /// Get the NodeKind of the right-hand side. getRHSKind()249 NodeKind getRHSKind() const { return RHSKind; } 250 251 /// Print one child from a twine. 252 void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const; 253 254 /// Print the representation of one child from a twine. 255 void printOneChildRepr(raw_ostream &OS, Child Ptr, 256 NodeKind Kind) const; 257 258 public: 259 /// @name Constructors 260 /// @{ 261 262 /// Construct from an empty string. Twine()263 /*implicit*/ Twine() { 264 assert(isValid() && "Invalid twine!"); 265 } 266 267 Twine(const Twine &) = default; 268 269 /// Construct from a C string. 270 /// 271 /// We take care here to optimize "" into the empty twine -- this will be 272 /// optimized out for string constants. This allows Twine arguments have 273 /// default "" values, without introducing unnecessary string constants. Twine(const char * Str)274 /*implicit*/ Twine(const char *Str) { 275 if (Str[0] != '\0') { 276 LHS.cString = Str; 277 LHSKind = CStringKind; 278 } else { 279 LHSKind = EmptyKind; 280 } 281 282 assert(isValid() && "Invalid twine!"); 283 } 284 /// Delete the implicit conversion from nullptr as Twine(const char *) 285 /// cannot take nullptr. 286 /*implicit*/ Twine(std::nullptr_t) = delete; 287 288 /// Construct from an std::string. Twine(const std::string & Str)289 /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) { 290 LHS.stdString = &Str; 291 assert(isValid() && "Invalid twine!"); 292 } 293 294 /// Construct from an std::string_view by converting it to a pointer and 295 /// length. This handles string_views on a pure API basis, and avoids 296 /// storing one (or a pointer to one) inside a Twine, which avoids problems 297 /// when mixing code compiled under various C++ standards. Twine(const std::string_view & Str)298 /*implicit*/ Twine(const std::string_view &Str) 299 : LHSKind(PtrAndLengthKind) { 300 LHS.ptrAndLength.ptr = Str.data(); 301 LHS.ptrAndLength.length = Str.length(); 302 assert(isValid() && "Invalid twine!"); 303 } 304 305 /// Construct from a StringRef. Twine(const StringRef & Str)306 /*implicit*/ Twine(const StringRef &Str) : LHSKind(PtrAndLengthKind) { 307 LHS.ptrAndLength.ptr = Str.data(); 308 LHS.ptrAndLength.length = Str.size(); 309 assert(isValid() && "Invalid twine!"); 310 } 311 312 /// Construct from a StringLiteral. Twine(const StringLiteral & Str)313 /*implicit*/ Twine(const StringLiteral &Str) 314 : LHSKind(StringLiteralKind) { 315 LHS.ptrAndLength.ptr = Str.data(); 316 LHS.ptrAndLength.length = Str.size(); 317 assert(isValid() && "Invalid twine!"); 318 } 319 320 /// Construct from a SmallString. Twine(const SmallVectorImpl<char> & Str)321 /*implicit*/ Twine(const SmallVectorImpl<char> &Str) 322 : LHSKind(PtrAndLengthKind) { 323 LHS.ptrAndLength.ptr = Str.data(); 324 LHS.ptrAndLength.length = Str.size(); 325 assert(isValid() && "Invalid twine!"); 326 } 327 328 /// Construct from a formatv_object_base. Twine(const formatv_object_base & Fmt)329 /*implicit*/ Twine(const formatv_object_base &Fmt) 330 : LHSKind(FormatvObjectKind) { 331 LHS.formatvObject = &Fmt; 332 assert(isValid() && "Invalid twine!"); 333 } 334 335 /// Construct from a char. Twine(char Val)336 explicit Twine(char Val) : LHSKind(CharKind) { 337 LHS.character = Val; 338 } 339 340 /// Construct from a signed char. Twine(signed char Val)341 explicit Twine(signed char Val) : LHSKind(CharKind) { 342 LHS.character = static_cast<char>(Val); 343 } 344 345 /// Construct from an unsigned char. Twine(unsigned char Val)346 explicit Twine(unsigned char Val) : LHSKind(CharKind) { 347 LHS.character = static_cast<char>(Val); 348 } 349 350 /// Construct a twine to print \p Val as an unsigned decimal integer. Twine(unsigned Val)351 explicit Twine(unsigned Val) : LHSKind(DecUIKind) { 352 LHS.decUI = Val; 353 } 354 355 /// Construct a twine to print \p Val as a signed decimal integer. Twine(int Val)356 explicit Twine(int Val) : LHSKind(DecIKind) { 357 LHS.decI = Val; 358 } 359 360 /// Construct a twine to print \p Val as an unsigned decimal integer. Twine(const unsigned long & Val)361 explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) { 362 LHS.decUL = &Val; 363 } 364 365 /// Construct a twine to print \p Val as a signed decimal integer. Twine(const long & Val)366 explicit Twine(const long &Val) : LHSKind(DecLKind) { 367 LHS.decL = &Val; 368 } 369 370 /// Construct a twine to print \p Val as an unsigned decimal integer. Twine(const unsigned long long & Val)371 explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) { 372 LHS.decULL = &Val; 373 } 374 375 /// Construct a twine to print \p Val as a signed decimal integer. Twine(const long long & Val)376 explicit Twine(const long long &Val) : LHSKind(DecLLKind) { 377 LHS.decLL = &Val; 378 } 379 380 // FIXME: Unfortunately, to make sure this is as efficient as possible we 381 // need extra binary constructors from particular types. We can't rely on 382 // the compiler to be smart enough to fold operator+()/concat() down to the 383 // right thing. Yet. 384 385 /// Construct as the concatenation of a C string and a StringRef. Twine(const char * LHS,const StringRef & RHS)386 /*implicit*/ Twine(const char *LHS, const StringRef &RHS) 387 : LHSKind(CStringKind), RHSKind(PtrAndLengthKind) { 388 this->LHS.cString = LHS; 389 this->RHS.ptrAndLength.ptr = RHS.data(); 390 this->RHS.ptrAndLength.length = RHS.size(); 391 assert(isValid() && "Invalid twine!"); 392 } 393 394 /// Construct as the concatenation of a StringRef and a C string. Twine(const StringRef & LHS,const char * RHS)395 /*implicit*/ Twine(const StringRef &LHS, const char *RHS) 396 : LHSKind(PtrAndLengthKind), RHSKind(CStringKind) { 397 this->LHS.ptrAndLength.ptr = LHS.data(); 398 this->LHS.ptrAndLength.length = LHS.size(); 399 this->RHS.cString = RHS; 400 assert(isValid() && "Invalid twine!"); 401 } 402 403 /// Since the intended use of twines is as temporary objects, assignments 404 /// when concatenating might cause undefined behavior or stack corruptions 405 Twine &operator=(const Twine &) = delete; 406 407 /// Create a 'null' string, which is an empty string that always 408 /// concatenates to form another empty string. createNull()409 static Twine createNull() { 410 return Twine(NullKind); 411 } 412 413 /// @} 414 /// @name Numeric Conversions 415 /// @{ 416 417 // Construct a twine to print \p Val as an unsigned hexadecimal integer. utohexstr(const uint64_t & Val)418 static Twine utohexstr(const uint64_t &Val) { 419 Child LHS, RHS; 420 LHS.uHex = &Val; 421 RHS.twine = nullptr; 422 return Twine(LHS, UHexKind, RHS, EmptyKind); 423 } 424 425 /// @} 426 /// @name Predicate Operations 427 /// @{ 428 429 /// Check if this twine is trivially empty; a false return value does not 430 /// necessarily mean the twine is empty. isTriviallyEmpty()431 bool isTriviallyEmpty() const { 432 return isNullary(); 433 } 434 435 /// Check if this twine is guaranteed to refer to single string literal. isSingleStringLiteral()436 bool isSingleStringLiteral() const { 437 return isUnary() && getLHSKind() == StringLiteralKind; 438 } 439 440 /// Return true if this twine can be dynamically accessed as a single 441 /// StringRef value with getSingleStringRef(). isSingleStringRef()442 bool isSingleStringRef() const { 443 if (getRHSKind() != EmptyKind) return false; 444 445 switch (getLHSKind()) { 446 case EmptyKind: 447 case CStringKind: 448 case StdStringKind: 449 case PtrAndLengthKind: 450 case StringLiteralKind: 451 return true; 452 default: 453 return false; 454 } 455 } 456 457 /// @} 458 /// @name String Operations 459 /// @{ 460 461 Twine concat(const Twine &Suffix) const; 462 463 /// @} 464 /// @name Output & Conversion. 465 /// @{ 466 467 /// Return the twine contents as a std::string. 468 LLVM_ABI std::string str() const; 469 470 /// Append the concatenated string into the given SmallString or SmallVector. 471 LLVM_ABI void toVector(SmallVectorImpl<char> &Out) const; 472 473 /// This returns the twine as a single StringRef. This method is only valid 474 /// if isSingleStringRef() is true. getSingleStringRef()475 StringRef getSingleStringRef() const { 476 assert(isSingleStringRef() &&"This cannot be had as a single stringref!"); 477 switch (getLHSKind()) { 478 default: llvm_unreachable("Out of sync with isSingleStringRef"); 479 case EmptyKind: 480 return StringRef(); 481 case CStringKind: 482 return StringRef(LHS.cString); 483 case StdStringKind: 484 return StringRef(*LHS.stdString); 485 case PtrAndLengthKind: 486 case StringLiteralKind: 487 return StringRef(LHS.ptrAndLength.ptr, LHS.ptrAndLength.length); 488 } 489 } 490 491 /// This returns the twine as a single StringRef if it can be 492 /// represented as such. Otherwise the twine is written into the given 493 /// SmallVector and a StringRef to the SmallVector's data is returned. toStringRef(SmallVectorImpl<char> & Out)494 StringRef toStringRef(SmallVectorImpl<char> &Out) const { 495 if (isSingleStringRef()) 496 return getSingleStringRef(); 497 toVector(Out); 498 return StringRef(Out.data(), Out.size()); 499 } 500 501 /// This returns the twine as a single null terminated StringRef if it 502 /// can be represented as such. Otherwise the twine is written into the 503 /// given SmallVector and a StringRef to the SmallVector's data is returned. 504 /// 505 /// The returned StringRef's size does not include the null terminator. 506 LLVM_ABI StringRef 507 toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const; 508 509 /// Write the concatenated string represented by this twine to the 510 /// stream \p OS. 511 LLVM_ABI void print(raw_ostream &OS) const; 512 513 /// Write the representation of this twine to the stream \p OS. 514 LLVM_ABI void printRepr(raw_ostream &OS) const; 515 516 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 517 /// Dump the concatenated string represented by this twine to stderr. 518 LLVM_DUMP_METHOD void dump() const; 519 520 /// Dump the representation of this twine to stderr. 521 LLVM_DUMP_METHOD void dumpRepr() const; 522 #endif 523 524 /// @} 525 }; 526 527 /// @name Twine Inline Implementations 528 /// @{ 529 concat(const Twine & Suffix)530 inline Twine Twine::concat(const Twine &Suffix) const { 531 // Concatenation with null is null. 532 if (isNull() || Suffix.isNull()) 533 return Twine(NullKind); 534 535 // Concatenation with empty yields the other side. 536 if (isEmpty()) 537 return Suffix; 538 if (Suffix.isEmpty()) 539 return *this; 540 541 // Otherwise we need to create a new node, taking care to fold in unary 542 // twines. 543 Child NewLHS, NewRHS; 544 NewLHS.twine = this; 545 NewRHS.twine = &Suffix; 546 NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind; 547 if (isUnary()) { 548 NewLHS = LHS; 549 NewLHSKind = getLHSKind(); 550 } 551 if (Suffix.isUnary()) { 552 NewRHS = Suffix.LHS; 553 NewRHSKind = Suffix.getLHSKind(); 554 } 555 556 return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind); 557 } 558 559 inline Twine operator+(const Twine &LHS, const Twine &RHS) { 560 return LHS.concat(RHS); 561 } 562 563 /// Additional overload to guarantee simplified codegen; this is equivalent to 564 /// concat(). 565 566 inline Twine operator+(const char *LHS, const StringRef &RHS) { 567 return Twine(LHS, RHS); 568 } 569 570 /// Additional overload to guarantee simplified codegen; this is equivalent to 571 /// concat(). 572 573 inline Twine operator+(const StringRef &LHS, const char *RHS) { 574 return Twine(LHS, RHS); 575 } 576 577 inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) { 578 RHS.print(OS); 579 return OS; 580 } 581 582 /// @} 583 584 } // end namespace llvm 585 586 #endif // LLVM_ADT_TWINE_H 587