1 //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_ADT_TWINE_H 10 #define LLVM_ADT_TWINE_H 11 12 #include "llvm/ADT/SmallVector.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/ErrorHandling.h" 15 #include <cassert> 16 #include <cstdint> 17 #include <string> 18 #include <string_view> 19 20 namespace llvm { 21 22 class formatv_object_base; 23 class raw_ostream; 24 25 /// Twine - A lightweight data structure for efficiently representing the 26 /// concatenation of temporary values as strings. 27 /// 28 /// A Twine is a kind of rope, it represents a concatenated string using a 29 /// binary-tree, where the string is the preorder of the nodes. Since the 30 /// Twine can be efficiently rendered into a buffer when its result is used, 31 /// it avoids the cost of generating temporary values for intermediate string 32 /// results -- particularly in cases when the Twine result is never 33 /// required. By explicitly tracking the type of leaf nodes, we can also avoid 34 /// the creation of temporary strings for conversions operations (such as 35 /// appending an integer to a string). 36 /// 37 /// A Twine is not intended for use directly and should not be stored, its 38 /// implementation relies on the ability to store pointers to temporary stack 39 /// objects which may be deallocated at the end of a statement. Twines should 40 /// only be used as const references in arguments, when an API wishes 41 /// to accept possibly-concatenated strings. 42 /// 43 /// Twines support a special 'null' value, which always concatenates to form 44 /// itself, and renders as an empty string. This can be returned from APIs to 45 /// effectively nullify any concatenations performed on the result. 46 /// 47 /// \b Implementation 48 /// 49 /// Given the nature of a Twine, it is not possible for the Twine's 50 /// concatenation method to construct interior nodes; the result must be 51 /// represented inside the returned value. For this reason a Twine object 52 /// actually holds two values, the left- and right-hand sides of a 53 /// concatenation. We also have nullary Twine objects, which are effectively 54 /// sentinel values that represent empty strings. 55 /// 56 /// Thus, a Twine can effectively have zero, one, or two children. The \see 57 /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for 58 /// testing the number of children. 59 /// 60 /// We maintain a number of invariants on Twine objects (FIXME: Why): 61 /// - Nullary twines are always represented with their Kind on the left-hand 62 /// side, and the Empty kind on the right-hand side. 63 /// - Unary twines are always represented with the value on the left-hand 64 /// side, and the Empty kind on the right-hand side. 65 /// - If a Twine has another Twine as a child, that child should always be 66 /// binary (otherwise it could have been folded into the parent). 67 /// 68 /// These invariants are check by \see isValid(). 69 /// 70 /// \b Efficiency Considerations 71 /// 72 /// The Twine is designed to yield efficient and small code for common 73 /// situations. For this reason, the concat() method is inlined so that 74 /// concatenations of leaf nodes can be optimized into stores directly into a 75 /// single stack allocated object. 76 /// 77 /// In practice, not all compilers can be trusted to optimize concat() fully, 78 /// so we provide two additional methods (and accompanying operator+ 79 /// overloads) to guarantee that particularly important cases (cstring plus 80 /// StringRef) codegen as desired. 81 class Twine { 82 /// NodeKind - Represent the type of an argument. 83 enum NodeKind : unsigned char { 84 /// An empty string; the result of concatenating anything with it is also 85 /// empty. 86 NullKind, 87 88 /// The empty string. 89 EmptyKind, 90 91 /// A pointer to a Twine instance. 92 TwineKind, 93 94 /// A pointer to a C string instance. 95 CStringKind, 96 97 /// A pointer to an std::string instance. 98 StdStringKind, 99 100 /// A Pointer and Length representation. Used for std::string_view, 101 /// StringRef, and SmallString. Can't use a StringRef here 102 /// because they are not trivally constructible. 103 PtrAndLengthKind, 104 105 /// A pointer and length representation that's also null-terminated. 106 /// Guaranteed to be constructed from a compile-time string literal. 107 StringLiteralKind, 108 109 /// A pointer to a formatv_object_base instance. 110 FormatvObjectKind, 111 112 /// A char value, to render as a character. 113 CharKind, 114 115 /// An unsigned int value, to render as an unsigned decimal integer. 116 DecUIKind, 117 118 /// An int value, to render as a signed decimal integer. 119 DecIKind, 120 121 /// A pointer to an unsigned long value, to render as an unsigned decimal 122 /// integer. 123 DecULKind, 124 125 /// A pointer to a long value, to render as a signed decimal integer. 126 DecLKind, 127 128 /// A pointer to an unsigned long long value, to render as an unsigned 129 /// decimal integer. 130 DecULLKind, 131 132 /// A pointer to a long long value, to render as a signed decimal integer. 133 DecLLKind, 134 135 /// A pointer to a uint64_t value, to render as an unsigned hexadecimal 136 /// integer. 137 UHexKind 138 }; 139 140 union Child 141 { 142 const Twine *twine; 143 const char *cString; 144 const std::string *stdString; 145 struct { 146 const char *ptr; 147 size_t length; 148 } ptrAndLength; 149 const formatv_object_base *formatvObject; 150 char character; 151 unsigned int decUI; 152 int decI; 153 const unsigned long *decUL; 154 const long *decL; 155 const unsigned long long *decULL; 156 const long long *decLL; 157 const uint64_t *uHex; 158 }; 159 160 /// LHS - The prefix in the concatenation, which may be uninitialized for 161 /// Null or Empty kinds. 162 Child LHS; 163 164 /// RHS - The suffix in the concatenation, which may be uninitialized for 165 /// Null or Empty kinds. 166 Child RHS; 167 168 /// LHSKind - The NodeKind of the left hand side, \see getLHSKind(). 169 NodeKind LHSKind = EmptyKind; 170 171 /// RHSKind - The NodeKind of the right hand side, \see getRHSKind(). 172 NodeKind RHSKind = EmptyKind; 173 174 /// Construct a nullary twine; the kind must be NullKind or EmptyKind. 175 explicit Twine(NodeKind Kind) : LHSKind(Kind) { 176 assert(isNullary() && "Invalid kind!"); 177 } 178 179 /// Construct a binary twine. 180 explicit Twine(const Twine &LHS, const Twine &RHS) 181 : LHSKind(TwineKind), RHSKind(TwineKind) { 182 this->LHS.twine = &LHS; 183 this->RHS.twine = &RHS; 184 assert(isValid() && "Invalid twine!"); 185 } 186 187 /// Construct a twine from explicit values. 188 explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind) 189 : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) { 190 assert(isValid() && "Invalid twine!"); 191 } 192 193 /// Check for the null twine. 194 bool isNull() const { 195 return getLHSKind() == NullKind; 196 } 197 198 /// Check for the empty twine. 199 bool isEmpty() const { 200 return getLHSKind() == EmptyKind; 201 } 202 203 /// Check if this is a nullary twine (null or empty). 204 bool isNullary() const { 205 return isNull() || isEmpty(); 206 } 207 208 /// Check if this is a unary twine. 209 bool isUnary() const { 210 return getRHSKind() == EmptyKind && !isNullary(); 211 } 212 213 /// Check if this is a binary twine. 214 bool isBinary() const { 215 return getLHSKind() != NullKind && getRHSKind() != EmptyKind; 216 } 217 218 /// Check if this is a valid twine (satisfying the invariants on 219 /// order and number of arguments). 220 bool isValid() const { 221 // Nullary twines always have Empty on the RHS. 222 if (isNullary() && getRHSKind() != EmptyKind) 223 return false; 224 225 // Null should never appear on the RHS. 226 if (getRHSKind() == NullKind) 227 return false; 228 229 // The RHS cannot be non-empty if the LHS is empty. 230 if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind) 231 return false; 232 233 // A twine child should always be binary. 234 if (getLHSKind() == TwineKind && 235 !LHS.twine->isBinary()) 236 return false; 237 if (getRHSKind() == TwineKind && 238 !RHS.twine->isBinary()) 239 return false; 240 241 return true; 242 } 243 244 /// Get the NodeKind of the left-hand side. 245 NodeKind getLHSKind() const { return LHSKind; } 246 247 /// Get the NodeKind of the right-hand side. 248 NodeKind getRHSKind() const { return RHSKind; } 249 250 /// Print one child from a twine. 251 void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const; 252 253 /// Print the representation of one child from a twine. 254 void printOneChildRepr(raw_ostream &OS, Child Ptr, 255 NodeKind Kind) const; 256 257 public: 258 /// @name Constructors 259 /// @{ 260 261 /// Construct from an empty string. 262 /*implicit*/ Twine() { 263 assert(isValid() && "Invalid twine!"); 264 } 265 266 Twine(const Twine &) = default; 267 268 /// Construct from a C string. 269 /// 270 /// We take care here to optimize "" into the empty twine -- this will be 271 /// optimized out for string constants. This allows Twine arguments have 272 /// default "" values, without introducing unnecessary string constants. 273 /*implicit*/ Twine(const char *Str) { 274 if (Str[0] != '\0') { 275 LHS.cString = Str; 276 LHSKind = CStringKind; 277 } else 278 LHSKind = EmptyKind; 279 280 assert(isValid() && "Invalid twine!"); 281 } 282 /// Delete the implicit conversion from nullptr as Twine(const char *) 283 /// cannot take nullptr. 284 /*implicit*/ Twine(std::nullptr_t) = delete; 285 286 /// Construct from an std::string. 287 /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) { 288 LHS.stdString = &Str; 289 assert(isValid() && "Invalid twine!"); 290 } 291 292 /// Construct from an std::string_view by converting it to a pointer and 293 /// length. This handles string_views on a pure API basis, and avoids 294 /// storing one (or a pointer to one) inside a Twine, which avoids problems 295 /// when mixing code compiled under various C++ standards. 296 /*implicit*/ Twine(const std::string_view &Str) 297 : LHSKind(PtrAndLengthKind) { 298 LHS.ptrAndLength.ptr = Str.data(); 299 LHS.ptrAndLength.length = Str.length(); 300 assert(isValid() && "Invalid twine!"); 301 } 302 303 /// Construct from a StringRef. 304 /*implicit*/ Twine(const StringRef &Str) : LHSKind(PtrAndLengthKind) { 305 LHS.ptrAndLength.ptr = Str.data(); 306 LHS.ptrAndLength.length = Str.size(); 307 assert(isValid() && "Invalid twine!"); 308 } 309 310 /// Construct from a StringLiteral. 311 /*implicit*/ Twine(const StringLiteral &Str) 312 : LHSKind(StringLiteralKind) { 313 LHS.ptrAndLength.ptr = Str.data(); 314 LHS.ptrAndLength.length = Str.size(); 315 assert(isValid() && "Invalid twine!"); 316 } 317 318 /// Construct from a SmallString. 319 /*implicit*/ Twine(const SmallVectorImpl<char> &Str) 320 : LHSKind(PtrAndLengthKind) { 321 LHS.ptrAndLength.ptr = Str.data(); 322 LHS.ptrAndLength.length = Str.size(); 323 assert(isValid() && "Invalid twine!"); 324 } 325 326 /// Construct from a formatv_object_base. 327 /*implicit*/ Twine(const formatv_object_base &Fmt) 328 : LHSKind(FormatvObjectKind) { 329 LHS.formatvObject = &Fmt; 330 assert(isValid() && "Invalid twine!"); 331 } 332 333 /// Construct from a char. 334 explicit Twine(char Val) : LHSKind(CharKind) { 335 LHS.character = Val; 336 } 337 338 /// Construct from a signed char. 339 explicit Twine(signed char Val) : LHSKind(CharKind) { 340 LHS.character = static_cast<char>(Val); 341 } 342 343 /// Construct from an unsigned char. 344 explicit Twine(unsigned char Val) : LHSKind(CharKind) { 345 LHS.character = static_cast<char>(Val); 346 } 347 348 /// Construct a twine to print \p Val as an unsigned decimal integer. 349 explicit Twine(unsigned Val) : LHSKind(DecUIKind) { 350 LHS.decUI = Val; 351 } 352 353 /// Construct a twine to print \p Val as a signed decimal integer. 354 explicit Twine(int Val) : LHSKind(DecIKind) { 355 LHS.decI = Val; 356 } 357 358 /// Construct a twine to print \p Val as an unsigned decimal integer. 359 explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) { 360 LHS.decUL = &Val; 361 } 362 363 /// Construct a twine to print \p Val as a signed decimal integer. 364 explicit Twine(const long &Val) : LHSKind(DecLKind) { 365 LHS.decL = &Val; 366 } 367 368 /// Construct a twine to print \p Val as an unsigned decimal integer. 369 explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) { 370 LHS.decULL = &Val; 371 } 372 373 /// Construct a twine to print \p Val as a signed decimal integer. 374 explicit Twine(const long long &Val) : LHSKind(DecLLKind) { 375 LHS.decLL = &Val; 376 } 377 378 // FIXME: Unfortunately, to make sure this is as efficient as possible we 379 // need extra binary constructors from particular types. We can't rely on 380 // the compiler to be smart enough to fold operator+()/concat() down to the 381 // right thing. Yet. 382 383 /// Construct as the concatenation of a C string and a StringRef. 384 /*implicit*/ Twine(const char *LHS, const StringRef &RHS) 385 : LHSKind(CStringKind), RHSKind(PtrAndLengthKind) { 386 this->LHS.cString = LHS; 387 this->RHS.ptrAndLength.ptr = RHS.data(); 388 this->RHS.ptrAndLength.length = RHS.size(); 389 assert(isValid() && "Invalid twine!"); 390 } 391 392 /// Construct as the concatenation of a StringRef and a C string. 393 /*implicit*/ Twine(const StringRef &LHS, const char *RHS) 394 : LHSKind(PtrAndLengthKind), RHSKind(CStringKind) { 395 this->LHS.ptrAndLength.ptr = LHS.data(); 396 this->LHS.ptrAndLength.length = LHS.size(); 397 this->RHS.cString = RHS; 398 assert(isValid() && "Invalid twine!"); 399 } 400 401 /// Since the intended use of twines is as temporary objects, assignments 402 /// when concatenating might cause undefined behavior or stack corruptions 403 Twine &operator=(const Twine &) = delete; 404 405 /// Create a 'null' string, which is an empty string that always 406 /// concatenates to form another empty string. 407 static Twine createNull() { 408 return Twine(NullKind); 409 } 410 411 /// @} 412 /// @name Numeric Conversions 413 /// @{ 414 415 // Construct a twine to print \p Val as an unsigned hexadecimal integer. 416 static Twine utohexstr(const uint64_t &Val) { 417 Child LHS, RHS; 418 LHS.uHex = &Val; 419 RHS.twine = nullptr; 420 return Twine(LHS, UHexKind, RHS, EmptyKind); 421 } 422 423 /// @} 424 /// @name Predicate Operations 425 /// @{ 426 427 /// Check if this twine is trivially empty; a false return value does not 428 /// necessarily mean the twine is empty. 429 bool isTriviallyEmpty() const { 430 return isNullary(); 431 } 432 433 /// Check if this twine is guaranteed to refer to single string literal. 434 bool isSingleStringLiteral() const { 435 return isUnary() && getLHSKind() == StringLiteralKind; 436 } 437 438 /// Return true if this twine can be dynamically accessed as a single 439 /// StringRef value with getSingleStringRef(). 440 bool isSingleStringRef() const { 441 if (getRHSKind() != EmptyKind) return false; 442 443 switch (getLHSKind()) { 444 case EmptyKind: 445 case CStringKind: 446 case StdStringKind: 447 case PtrAndLengthKind: 448 case StringLiteralKind: 449 return true; 450 default: 451 return false; 452 } 453 } 454 455 /// @} 456 /// @name String Operations 457 /// @{ 458 459 Twine concat(const Twine &Suffix) const; 460 461 /// @} 462 /// @name Output & Conversion. 463 /// @{ 464 465 /// Return the twine contents as a std::string. 466 std::string str() const; 467 468 /// Append the concatenated string into the given SmallString or SmallVector. 469 void toVector(SmallVectorImpl<char> &Out) const; 470 471 /// This returns the twine as a single StringRef. This method is only valid 472 /// if isSingleStringRef() is true. 473 StringRef getSingleStringRef() const { 474 assert(isSingleStringRef() &&"This cannot be had as a single stringref!"); 475 switch (getLHSKind()) { 476 default: llvm_unreachable("Out of sync with isSingleStringRef"); 477 case EmptyKind: 478 return StringRef(); 479 case CStringKind: 480 return StringRef(LHS.cString); 481 case StdStringKind: 482 return StringRef(*LHS.stdString); 483 case PtrAndLengthKind: 484 case StringLiteralKind: 485 return StringRef(LHS.ptrAndLength.ptr, LHS.ptrAndLength.length); 486 } 487 } 488 489 /// This returns the twine as a single StringRef if it can be 490 /// represented as such. Otherwise the twine is written into the given 491 /// SmallVector and a StringRef to the SmallVector's data is returned. 492 StringRef toStringRef(SmallVectorImpl<char> &Out) const { 493 if (isSingleStringRef()) 494 return getSingleStringRef(); 495 toVector(Out); 496 return StringRef(Out.data(), Out.size()); 497 } 498 499 /// This returns the twine as a single null terminated StringRef if it 500 /// can be represented as such. Otherwise the twine is written into the 501 /// given SmallVector and a StringRef to the SmallVector's data is returned. 502 /// 503 /// The returned StringRef's size does not include the null terminator. 504 StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const; 505 506 /// Write the concatenated string represented by this twine to the 507 /// stream \p OS. 508 void print(raw_ostream &OS) const; 509 510 /// Dump the concatenated string represented by this twine to stderr. 511 void dump() const; 512 513 /// Write the representation of this twine to the stream \p OS. 514 void printRepr(raw_ostream &OS) const; 515 516 /// Dump the representation of this twine to stderr. 517 void dumpRepr() const; 518 519 /// @} 520 }; 521 522 /// @name Twine Inline Implementations 523 /// @{ 524 525 inline Twine Twine::concat(const Twine &Suffix) const { 526 // Concatenation with null is null. 527 if (isNull() || Suffix.isNull()) 528 return Twine(NullKind); 529 530 // Concatenation with empty yields the other side. 531 if (isEmpty()) 532 return Suffix; 533 if (Suffix.isEmpty()) 534 return *this; 535 536 // Otherwise we need to create a new node, taking care to fold in unary 537 // twines. 538 Child NewLHS, NewRHS; 539 NewLHS.twine = this; 540 NewRHS.twine = &Suffix; 541 NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind; 542 if (isUnary()) { 543 NewLHS = LHS; 544 NewLHSKind = getLHSKind(); 545 } 546 if (Suffix.isUnary()) { 547 NewRHS = Suffix.LHS; 548 NewRHSKind = Suffix.getLHSKind(); 549 } 550 551 return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind); 552 } 553 554 inline Twine operator+(const Twine &LHS, const Twine &RHS) { 555 return LHS.concat(RHS); 556 } 557 558 /// Additional overload to guarantee simplified codegen; this is equivalent to 559 /// concat(). 560 561 inline Twine operator+(const char *LHS, const StringRef &RHS) { 562 return Twine(LHS, RHS); 563 } 564 565 /// Additional overload to guarantee simplified codegen; this is equivalent to 566 /// concat(). 567 568 inline Twine operator+(const StringRef &LHS, const char *RHS) { 569 return Twine(LHS, RHS); 570 } 571 572 inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) { 573 RHS.print(OS); 574 return OS; 575 } 576 577 /// @} 578 579 } // end namespace llvm 580 581 #endif // LLVM_ADT_TWINE_H 582