1 //===-- ConstString.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLDB_UTILITY_CONSTSTRING_H 10 #define LLDB_UTILITY_CONSTSTRING_H 11 12 #include "llvm/ADT/DenseMapInfo.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/FormatVariadic.h" 15 16 #include <cstddef> 17 #include <string_view> 18 19 namespace lldb_private { 20 class Stream; 21 } 22 namespace llvm { 23 class raw_ostream; 24 } 25 26 namespace lldb_private { 27 28 /// \class ConstString ConstString.h "lldb/Utility/ConstString.h" 29 /// A uniqued constant string class. 30 /// 31 /// Provides an efficient way to store strings as uniqued strings. After the 32 /// strings are uniqued, finding strings that are equal to one another is very 33 /// fast as just the pointers need to be compared. It also allows for many 34 /// common strings from many different sources to be shared to keep the memory 35 /// footprint low. 36 /// 37 /// No reference counting is done on strings that are added to the string 38 /// pool, once strings are added they are in the string pool for the life of 39 /// the program. 40 class ConstString { 41 public: 42 /// Default constructor 43 /// 44 /// Initializes the string to an empty string. 45 ConstString() = default; 46 47 explicit ConstString(llvm::StringRef s); 48 49 /// Construct with C String value 50 /// 51 /// Constructs this object with a C string by looking to see if the 52 /// C string already exists in the global string pool. If it doesn't 53 /// exist, it is added to the string pool. 54 /// 55 /// \param[in] cstr 56 /// A NULL terminated C string to add to the string pool. 57 explicit ConstString(const char *cstr); 58 59 /// Construct with C String value with max length 60 /// 61 /// Constructs this object with a C string with a length. If \a max_cstr_len 62 /// is greater than the actual length of the string, the string length will 63 /// be truncated. This allows substrings to be created without the need to 64 /// NULL terminate the string as it is passed into this function. 65 /// 66 /// \param[in] cstr 67 /// A pointer to the first character in the C string. The C 68 /// string can be NULL terminated in a buffer that contains 69 /// more characters than the length of the string, or the 70 /// string can be part of another string and a new substring 71 /// can be created. 72 /// 73 /// \param[in] max_cstr_len 74 /// The max length of \a cstr. If the string length of \a cstr 75 /// is less than \a max_cstr_len, then the string will be 76 /// truncated. If the string length of \a cstr is greater than 77 /// \a max_cstr_len, then only max_cstr_len bytes will be used 78 /// from \a cstr. 79 explicit ConstString(const char *cstr, size_t max_cstr_len); 80 81 /// Convert to bool operator. 82 /// 83 /// This allows code to check a ConstString object to see if it contains a 84 /// valid string using code such as: 85 /// 86 /// \code 87 /// ConstString str(...); 88 /// if (str) 89 /// { ... 90 /// \endcode 91 /// 92 /// \return 93 /// /b True this object contains a valid non-empty C string, \b 94 /// false otherwise. 95 explicit operator bool() const { return !IsEmpty(); } 96 97 /// Equal to operator 98 /// 99 /// Returns true if this string is equal to the string in \a rhs. This 100 /// operation is very fast as it results in a pointer comparison since all 101 /// strings are in a uniqued in a global string pool. 102 /// 103 /// \param[in] rhs 104 /// Another string object to compare this object to. 105 /// 106 /// \return 107 /// true if this object is equal to \a rhs. 108 /// false if this object is not equal to \a rhs. 109 bool operator==(ConstString rhs) const { 110 // We can do a pointer compare to compare these strings since they must 111 // come from the same pool in order to be equal. 112 return m_string == rhs.m_string; 113 } 114 115 /// Equal to operator against a non-ConstString value. 116 /// 117 /// Returns true if this string is equal to the string in \a rhs. This 118 /// overload is usually slower than comparing against a ConstString value. 119 /// However, if the rhs string not already a ConstString and it is impractical 120 /// to turn it into a non-temporary variable, then this overload is faster. 121 /// 122 /// \param[in] rhs 123 /// Another string object to compare this object to. 124 /// 125 /// \return 126 /// \b true if this object is equal to \a rhs. 127 /// \b false if this object is not equal to \a rhs. 128 bool operator==(const char *rhs) const { 129 // ConstString differentiates between empty strings and nullptr strings, but 130 // StringRef doesn't. Therefore we have to do this check manually now. 131 if (m_string == nullptr && rhs != nullptr) 132 return false; 133 if (m_string != nullptr && rhs == nullptr) 134 return false; 135 136 return GetStringRef() == rhs; 137 } 138 139 /// Not equal to operator 140 /// 141 /// Returns true if this string is not equal to the string in \a rhs. This 142 /// operation is very fast as it results in a pointer comparison since all 143 /// strings are in a uniqued in a global string pool. 144 /// 145 /// \param[in] rhs 146 /// Another string object to compare this object to. 147 /// 148 /// \return 149 /// \b true if this object is not equal to \a rhs. 150 /// \b false if this object is equal to \a rhs. 151 bool operator!=(ConstString rhs) const { return m_string != rhs.m_string; } 152 153 /// Not equal to operator against a non-ConstString value. 154 /// 155 /// Returns true if this string is not equal to the string in \a rhs. This 156 /// overload is usually slower than comparing against a ConstString value. 157 /// However, if the rhs string not already a ConstString and it is impractical 158 /// to turn it into a non-temporary variable, then this overload is faster. 159 /// 160 /// \param[in] rhs 161 /// Another string object to compare this object to. 162 /// 163 /// \return \b true if this object is not equal to \a rhs, false otherwise. 164 bool operator!=(const char *rhs) const { return !(*this == rhs); } 165 166 bool operator<(ConstString rhs) const; 167 168 // Implicitly convert \class ConstString instances to \class StringRef. StringRef()169 operator llvm::StringRef() const { return GetStringRef(); } 170 171 // Explicitly convert \class ConstString instances to \class std::string_view. string_view()172 explicit operator std::string_view() const { 173 return std::string_view(m_string, GetLength()); 174 } 175 176 // Explicitly convert \class ConstString instances to \class std::string. string()177 explicit operator std::string() const { return GetString(); } 178 179 /// Get the string value as a C string. 180 /// 181 /// Get the value of the contained string as a NULL terminated C string 182 /// value. 183 /// 184 /// If \a value_if_empty is nullptr, then nullptr will be returned. 185 /// 186 /// \return Returns \a value_if_empty if the string is empty, otherwise 187 /// the C string value contained in this object. 188 const char *AsCString(const char *value_if_empty = nullptr) const { 189 return (IsEmpty() ? value_if_empty : m_string); 190 } 191 192 /// Get the string value as a llvm::StringRef 193 /// 194 /// \return 195 /// Returns a new llvm::StringRef object filled in with the 196 /// needed data. GetStringRef()197 llvm::StringRef GetStringRef() const { 198 return llvm::StringRef(m_string, GetLength()); 199 } 200 201 /// Get the string value as a std::string GetString()202 std::string GetString() const { 203 return std::string(AsCString(""), GetLength()); 204 } 205 206 /// Get the string value as a C string. 207 /// 208 /// Get the value of the contained string as a NULL terminated C string 209 /// value. Similar to the ConstString::AsCString() function, yet this 210 /// function will always return nullptr if the string is not valid. So this 211 /// function is a direct accessor to the string pointer value. 212 /// 213 /// \return 214 /// Returns nullptr the string is invalid, otherwise the C string 215 /// value contained in this object. GetCString()216 const char *GetCString() const { return m_string; } 217 218 /// Get the length in bytes of string value. 219 /// 220 /// The string pool stores the length of the string, so we can avoid calling 221 /// strlen() on the pointer value with this function. 222 /// 223 /// \return 224 /// Returns the number of bytes that this string occupies in 225 /// memory, not including the NULL termination byte. 226 size_t GetLength() const; 227 228 /// Clear this object's state. 229 /// 230 /// Clear any contained string and reset the value to the empty string 231 /// value. Clear()232 void Clear() { m_string = nullptr; } 233 234 /// Equal to operator 235 /// 236 /// Returns true if this string is equal to the string in \a rhs. If case 237 /// sensitive equality is tested, this operation is very fast as it results 238 /// in a pointer comparison since all strings are in a uniqued in a global 239 /// string pool. 240 /// 241 /// \param[in] lhs 242 /// The Left Hand Side const ConstString object reference. 243 /// 244 /// \param[in] rhs 245 /// The Right Hand Side const ConstString object reference. 246 /// 247 /// \param[in] case_sensitive 248 /// Case sensitivity. If true, case sensitive equality 249 /// will be tested, otherwise character case will be ignored 250 /// 251 /// \return \b true if this object is equal to \a rhs, \b false otherwise. 252 static bool Equals(ConstString lhs, ConstString rhs, 253 const bool case_sensitive = true); 254 255 /// Compare two string objects. 256 /// 257 /// Compares the C string values contained in \a lhs and \a rhs and returns 258 /// an integer result. 259 /// 260 /// NOTE: only call this function when you want a true string 261 /// comparison. If you want string equality use the, use the == operator as 262 /// it is much more efficient. Also if you want string inequality, use the 263 /// != operator for the same reasons. 264 /// 265 /// \param[in] lhs 266 /// The Left Hand Side const ConstString object reference. 267 /// 268 /// \param[in] rhs 269 /// The Right Hand Side const ConstString object reference. 270 /// 271 /// \param[in] case_sensitive 272 /// Case sensitivity of compare. If true, case sensitive compare 273 /// will be performed, otherwise character case will be ignored 274 /// 275 /// \return -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs 276 static int Compare(ConstString lhs, ConstString rhs, 277 const bool case_sensitive = true); 278 279 /// Dump the object description to a stream. 280 /// 281 /// Dump the string value to the stream \a s. If the contained string is 282 /// empty, print \a value_if_empty to the stream instead. If \a 283 /// value_if_empty is nullptr, then nothing will be dumped to the stream. 284 /// 285 /// \param[in] s 286 /// The stream that will be used to dump the object description. 287 /// 288 /// \param[in] value_if_empty 289 /// The value to dump if the string is empty. If nullptr, nothing 290 /// will be output to the stream. 291 void Dump(Stream *s, const char *value_if_empty = nullptr) const; 292 293 /// Dump the object debug description to a stream. 294 /// 295 /// \param[in] s 296 /// The stream that will be used to dump the object description. 297 void DumpDebug(Stream *s) const; 298 299 /// Test for empty string. 300 /// 301 /// \return 302 /// \b true if the contained string is empty. 303 /// \b false if the contained string is not empty. IsEmpty()304 bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; } 305 306 /// Test for null string. 307 /// 308 /// \return 309 /// \b true if there is no string associated with this instance. 310 /// \b false if there is a string associated with this instance. IsNull()311 bool IsNull() const { return m_string == nullptr; } 312 313 /// Set the C string value. 314 /// 315 /// Set the string value in the object by uniquing the \a cstr string value 316 /// in our global string pool. 317 /// 318 /// If the C string already exists in the global string pool, it finds the 319 /// current entry and returns the existing value. If it doesn't exist, it is 320 /// added to the string pool. 321 /// 322 /// \param[in] cstr 323 /// A NULL terminated C string to add to the string pool. 324 void SetCString(const char *cstr); 325 326 void SetString(llvm::StringRef s); 327 328 /// Set the C string value and its mangled counterpart. 329 /// 330 /// Object files and debug symbols often use mangled string to represent the 331 /// linkage name for a symbol, function or global. The string pool can 332 /// efficiently store these values and their counterparts so when we run 333 /// into another instance of a mangled name, we can avoid calling the name 334 /// demangler over and over on the same strings and then trying to unique 335 /// them. 336 /// 337 /// \param[in] demangled 338 /// The demangled string to correlate with the \a mangled name. 339 /// 340 /// \param[in] mangled 341 /// The already uniqued mangled ConstString to correlate the 342 /// soon to be uniqued version of \a demangled. 343 void SetStringWithMangledCounterpart(llvm::StringRef demangled, 344 ConstString mangled); 345 346 /// Retrieve the mangled or demangled counterpart for a mangled or demangled 347 /// ConstString. 348 /// 349 /// Object files and debug symbols often use mangled string to represent the 350 /// linkage name for a symbol, function or global. The string pool can 351 /// efficiently store these values and their counterparts so when we run 352 /// into another instance of a mangled name, we can avoid calling the name 353 /// demangler over and over on the same strings and then trying to unique 354 /// them. 355 /// 356 /// \param[in] counterpart 357 /// A reference to a ConstString object that might get filled in 358 /// with the demangled/mangled counterpart. 359 /// 360 /// \return 361 /// /b True if \a counterpart was filled in with the counterpart 362 /// /b false otherwise. 363 bool GetMangledCounterpart(ConstString &counterpart) const; 364 365 /// Set the C string value with length. 366 /// 367 /// Set the string value in the object by uniquing \a cstr_len bytes 368 /// starting at the \a cstr string value in our global string pool. If trim 369 /// is true, then \a cstr_len indicates a maximum length of the CString and 370 /// if the actual length of the string is less, then it will be trimmed. 371 /// 372 /// If the C string already exists in the global string pool, it finds the 373 /// current entry and returns the existing value. If it doesn't exist, it is 374 /// added to the string pool. 375 /// 376 /// \param[in] cstr 377 /// A NULL terminated C string to add to the string pool. 378 /// 379 /// \param[in] cstr_len 380 /// The maximum length of the C string. 381 void SetCStringWithLength(const char *cstr, size_t cstr_len); 382 383 /// Set the C string value with the minimum length between \a fixed_cstr_len 384 /// and the actual length of the C string. This can be used for data 385 /// structures that have a fixed length to store a C string where the string 386 /// might not be NULL terminated if the string takes the entire buffer. 387 void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len); 388 389 /// Get the memory cost of this object. 390 /// 391 /// Return the size in bytes that this object takes in memory. This returns 392 /// the size in bytes of this object, which does not include any the shared 393 /// string values it may refer to. 394 /// 395 /// \return 396 /// The number of bytes that this object occupies in memory. MemorySize()397 size_t MemorySize() const { return sizeof(ConstString); } 398 399 struct MemoryStats { GetBytesTotalMemoryStats400 size_t GetBytesTotal() const { return bytes_total; } GetBytesUsedMemoryStats401 size_t GetBytesUsed() const { return bytes_used; } GetBytesUnusedMemoryStats402 size_t GetBytesUnused() const { return bytes_total - bytes_used; } 403 size_t bytes_total = 0; 404 size_t bytes_used = 0; 405 }; 406 407 static MemoryStats GetMemoryStats(); 408 409 protected: 410 template <typename T, typename Enable> friend struct ::llvm::DenseMapInfo; 411 /// Only used by DenseMapInfo. FromStringPoolPointer(const char * ptr)412 static ConstString FromStringPoolPointer(const char *ptr) { 413 ConstString s; 414 s.m_string = ptr; 415 return s; 416 }; 417 418 const char *m_string = nullptr; 419 }; 420 421 /// Stream the string value \a str to the stream \a s 422 Stream &operator<<(Stream &s, ConstString str); 423 424 } // namespace lldb_private 425 426 namespace llvm { 427 template <> struct format_provider<lldb_private::ConstString> { 428 static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS, 429 llvm::StringRef Options); 430 }; 431 432 /// DenseMapInfo implementation. 433 /// \{ 434 template <> struct DenseMapInfo<lldb_private::ConstString> { 435 static inline lldb_private::ConstString getEmptyKey() { 436 return lldb_private::ConstString::FromStringPoolPointer( 437 DenseMapInfo<const char *>::getEmptyKey()); 438 } 439 static inline lldb_private::ConstString getTombstoneKey() { 440 return lldb_private::ConstString::FromStringPoolPointer( 441 DenseMapInfo<const char *>::getTombstoneKey()); 442 } 443 static unsigned getHashValue(lldb_private::ConstString val) { 444 return DenseMapInfo<const char *>::getHashValue(val.m_string); 445 } 446 static bool isEqual(lldb_private::ConstString LHS, 447 lldb_private::ConstString RHS) { 448 return LHS == RHS; 449 } 450 }; 451 /// \} 452 453 inline raw_ostream &operator<<(raw_ostream &os, lldb_private::ConstString s) { 454 os << s.GetStringRef(); 455 return os; 456 } 457 } // namespace llvm 458 459 #endif // LLDB_UTILITY_CONSTSTRING_H 460