xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ADT/StringRef.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_ADT_STRINGREF_H
10 #define LLVM_ADT_STRINGREF_H
11 
12 #include "llvm/ADT/DenseMapInfo.h"
13 #include "llvm/ADT/STLFunctionalExtras.h"
14 #include "llvm/ADT/iterator_range.h"
15 #include "llvm/Support/Compiler.h"
16 #include <algorithm>
17 #include <cassert>
18 #include <cstddef>
19 #include <cstring>
20 #include <iterator>
21 #include <limits>
22 #include <string>
23 #include <string_view>
24 #include <type_traits>
25 #include <utility>
26 
27 namespace llvm {
28 
29   class APInt;
30   class hash_code;
31   template <typename T> class SmallVectorImpl;
32   class StringRef;
33 
34   /// Helper functions for StringRef::getAsInteger.
35   LLVM_ABI bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
36                                      unsigned long long &Result);
37 
38   LLVM_ABI bool getAsSignedInteger(StringRef Str, unsigned Radix,
39                                    long long &Result);
40 
41   LLVM_ABI bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
42                                        unsigned long long &Result);
43   LLVM_ABI bool consumeSignedInteger(StringRef &Str, unsigned Radix,
44                                      long long &Result);
45 
46   /// StringRef - Represent a constant reference to a string, i.e. a character
47   /// array and a length, which need not be null terminated.
48   ///
49   /// This class does not own the string data, it is expected to be used in
50   /// situations where the character data resides in some other buffer, whose
51   /// lifetime extends past that of the StringRef. For this reason, it is not in
52   /// general safe to store a StringRef.
53   class LLVM_GSL_POINTER StringRef {
54   public:
55     static constexpr size_t npos = ~size_t(0);
56 
57     using iterator = const char *;
58     using const_iterator = const char *;
59     using size_type = size_t;
60     using value_type = char;
61     using reverse_iterator = std::reverse_iterator<iterator>;
62     using const_reverse_iterator = std::reverse_iterator<const_iterator>;
63 
64   private:
65     /// The start of the string, in an external buffer.
66     const char *Data = nullptr;
67 
68     /// The length of the string.
69     size_t Length = 0;
70 
71     // Workaround memcmp issue with null pointers (undefined behavior)
72     // by providing a specialized version
compareMemory(const char * Lhs,const char * Rhs,size_t Length)73     static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
74       if (Length == 0) { return 0; }
75       return ::memcmp(Lhs,Rhs,Length);
76     }
77 
78   public:
79     /// @name Constructors
80     /// @{
81 
82     /// Construct an empty string ref.
83     /*implicit*/ StringRef() = default;
84 
85     /// Disable conversion from nullptr.  This prevents things like
86     /// if (S == nullptr)
87     StringRef(std::nullptr_t) = delete;
88 
89     /// Construct a string ref from a cstring.
StringRef(const char * Str LLVM_LIFETIME_BOUND)90     /*implicit*/ constexpr StringRef(const char *Str LLVM_LIFETIME_BOUND)
91         : Data(Str), Length(Str ?
92     // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen.
93 #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8
94                                 __builtin_strlen(Str)
95 #else
96                                 std::char_traits<char>::length(Str)
97 #endif
98                                 : 0) {
99     }
100 
101     /// Construct a string ref from a pointer and length.
StringRef(const char * data LLVM_LIFETIME_BOUND,size_t length)102     /*implicit*/ constexpr StringRef(const char *data LLVM_LIFETIME_BOUND,
103                                      size_t length)
104         : Data(data), Length(length) {}
105 
106     /// Construct a string ref from an std::string.
StringRef(const std::string & Str)107     /*implicit*/ StringRef(const std::string &Str)
108         : Data(Str.data()), Length(Str.length()) {}
109 
110     /// Construct a string ref from an std::string_view.
StringRef(std::string_view Str)111     /*implicit*/ constexpr StringRef(std::string_view Str)
112         : Data(Str.data()), Length(Str.size()) {}
113 
114     /// @}
115     /// @name Iterators
116     /// @{
117 
begin()118     iterator begin() const { return data(); }
119 
end()120     iterator end() const { return data() + size(); }
121 
rbegin()122     reverse_iterator rbegin() const {
123       return std::make_reverse_iterator(end());
124     }
125 
rend()126     reverse_iterator rend() const {
127       return std::make_reverse_iterator(begin());
128     }
129 
bytes_begin()130     const unsigned char *bytes_begin() const {
131       return reinterpret_cast<const unsigned char *>(begin());
132     }
bytes_end()133     const unsigned char *bytes_end() const {
134       return reinterpret_cast<const unsigned char *>(end());
135     }
bytes()136     iterator_range<const unsigned char *> bytes() const {
137       return make_range(bytes_begin(), bytes_end());
138     }
139 
140     /// @}
141     /// @name String Operations
142     /// @{
143 
144     /// data - Get a pointer to the start of the string (which may not be null
145     /// terminated).
data()146     [[nodiscard]] constexpr const char *data() const { return Data; }
147 
148     /// empty - Check if the string is empty.
empty()149     [[nodiscard]] constexpr bool empty() const { return size() == 0; }
150 
151     /// size - Get the string size.
size()152     [[nodiscard]] constexpr size_t size() const { return Length; }
153 
154     /// front - Get the first character in the string.
front()155     [[nodiscard]] char front() const {
156       assert(!empty());
157       return data()[0];
158     }
159 
160     /// back - Get the last character in the string.
back()161     [[nodiscard]] char back() const {
162       assert(!empty());
163       return data()[size() - 1];
164     }
165 
166     // copy - Allocate copy in Allocator and return StringRef to it.
167     template <typename Allocator>
copy(Allocator & A)168     [[nodiscard]] StringRef copy(Allocator &A) const {
169       // Don't request a length 0 copy from the allocator.
170       if (empty())
171         return StringRef();
172       char *S = A.template Allocate<char>(size());
173       std::copy(begin(), end(), S);
174       return StringRef(S, size());
175     }
176 
177     /// Check for string equality, ignoring case.
equals_insensitive(StringRef RHS)178     [[nodiscard]] bool equals_insensitive(StringRef RHS) const {
179       return size() == RHS.size() && compare_insensitive(RHS) == 0;
180     }
181 
182     /// compare - Compare two strings; the result is negative, zero, or positive
183     /// if this string is lexicographically less than, equal to, or greater than
184     /// the \p RHS.
compare(StringRef RHS)185     [[nodiscard]] int compare(StringRef RHS) const {
186       // Check the prefix for a mismatch.
187       if (int Res =
188               compareMemory(data(), RHS.data(), std::min(size(), RHS.size())))
189         return Res < 0 ? -1 : 1;
190 
191       // Otherwise the prefixes match, so we only need to check the lengths.
192       if (size() == RHS.size())
193         return 0;
194       return size() < RHS.size() ? -1 : 1;
195     }
196 
197     /// Compare two strings, ignoring case.
198     [[nodiscard]] LLVM_ABI int compare_insensitive(StringRef RHS) const;
199 
200     /// compare_numeric - Compare two strings, treating sequences of digits as
201     /// numbers.
202     [[nodiscard]] LLVM_ABI int compare_numeric(StringRef RHS) const;
203 
204     /// Determine the edit distance between this string and another
205     /// string.
206     ///
207     /// \param Other the string to compare this string against.
208     ///
209     /// \param AllowReplacements whether to allow character
210     /// replacements (change one character into another) as a single
211     /// operation, rather than as two operations (an insertion and a
212     /// removal).
213     ///
214     /// \param MaxEditDistance If non-zero, the maximum edit distance that
215     /// this routine is allowed to compute. If the edit distance will exceed
216     /// that maximum, returns \c MaxEditDistance+1.
217     ///
218     /// \returns the minimum number of character insertions, removals,
219     /// or (if \p AllowReplacements is \c true) replacements needed to
220     /// transform one of the given strings into the other. If zero,
221     /// the strings are identical.
222     [[nodiscard]] LLVM_ABI unsigned
223     edit_distance(StringRef Other, bool AllowReplacements = true,
224                   unsigned MaxEditDistance = 0) const;
225 
226     [[nodiscard]] LLVM_ABI unsigned
227     edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
228                               unsigned MaxEditDistance = 0) const;
229 
230     /// str - Get the contents as an std::string.
str()231     [[nodiscard]] std::string str() const {
232       if (!data())
233         return std::string();
234       return std::string(data(), size());
235     }
236 
237     /// @}
238     /// @name Operator Overloads
239     /// @{
240 
241     [[nodiscard]] char operator[](size_t Index) const {
242       assert(Index < size() && "Invalid index!");
243       return data()[Index];
244     }
245 
246     /// Disallow accidental assignment from a temporary std::string.
247     ///
248     /// The declaration here is extra complicated so that `stringRef = {}`
249     /// and `stringRef = "abc"` continue to select the move assignment operator.
250     template <typename T>
251     std::enable_if_t<std::is_same<T, std::string>::value, StringRef> &
252     operator=(T &&Str) = delete;
253 
254     /// @}
255     /// @name Type Conversions
256     /// @{
257 
string_view()258     constexpr operator std::string_view() const {
259       return std::string_view(data(), size());
260     }
261 
262     /// @}
263     /// @name String Predicates
264     /// @{
265 
266     /// Check if this string starts with the given \p Prefix.
starts_with(StringRef Prefix)267     [[nodiscard]] bool starts_with(StringRef Prefix) const {
268       return size() >= Prefix.size() &&
269              compareMemory(data(), Prefix.data(), Prefix.size()) == 0;
270     }
starts_with(char Prefix)271     [[nodiscard]] bool starts_with(char Prefix) const {
272       return !empty() && front() == Prefix;
273     }
274 
275     /// Check if this string starts with the given \p Prefix, ignoring case.
276     [[nodiscard]] LLVM_ABI bool starts_with_insensitive(StringRef Prefix) const;
277 
278     /// Check if this string ends with the given \p Suffix.
ends_with(StringRef Suffix)279     [[nodiscard]] bool ends_with(StringRef Suffix) const {
280       return size() >= Suffix.size() &&
281              compareMemory(end() - Suffix.size(), Suffix.data(),
282                            Suffix.size()) == 0;
283     }
ends_with(char Suffix)284     [[nodiscard]] bool ends_with(char Suffix) const {
285       return !empty() && back() == Suffix;
286     }
287 
288     /// Check if this string ends with the given \p Suffix, ignoring case.
289     [[nodiscard]] LLVM_ABI bool ends_with_insensitive(StringRef Suffix) const;
290 
291     /// @}
292     /// @name String Searching
293     /// @{
294 
295     /// Search for the first character \p C in the string.
296     ///
297     /// \returns The index of the first occurrence of \p C, or npos if not
298     /// found.
299     [[nodiscard]] size_t find(char C, size_t From = 0) const {
300       return std::string_view(*this).find(C, From);
301     }
302 
303     /// Search for the first character \p C in the string, ignoring case.
304     ///
305     /// \returns The index of the first occurrence of \p C, or npos if not
306     /// found.
307     [[nodiscard]] LLVM_ABI size_t find_insensitive(char C,
308                                                    size_t From = 0) const;
309 
310     /// Search for the first character satisfying the predicate \p F
311     ///
312     /// \returns The index of the first character satisfying \p F starting from
313     /// \p From, or npos if not found.
314     [[nodiscard]] size_t find_if(function_ref<bool(char)> F,
315                                  size_t From = 0) const {
316       StringRef S = drop_front(From);
317       while (!S.empty()) {
318         if (F(S.front()))
319           return size() - S.size();
320         S = S.drop_front();
321       }
322       return npos;
323     }
324 
325     /// Search for the first character not satisfying the predicate \p F
326     ///
327     /// \returns The index of the first character not satisfying \p F starting
328     /// from \p From, or npos if not found.
329     [[nodiscard]] size_t find_if_not(function_ref<bool(char)> F,
330                                      size_t From = 0) const {
331       return find_if([F](char c) { return !F(c); }, From);
332     }
333 
334     /// Search for the first string \p Str in the string.
335     ///
336     /// \returns The index of the first occurrence of \p Str, or npos if not
337     /// found.
338     [[nodiscard]] LLVM_ABI size_t find(StringRef Str, size_t From = 0) const;
339 
340     /// Search for the first string \p Str in the string, ignoring case.
341     ///
342     /// \returns The index of the first occurrence of \p Str, or npos if not
343     /// found.
344     [[nodiscard]] LLVM_ABI size_t find_insensitive(StringRef Str,
345                                                    size_t From = 0) const;
346 
347     /// Search for the last character \p C in the string.
348     ///
349     /// \returns The index of the last occurrence of \p C, or npos if not
350     /// found.
351     [[nodiscard]] size_t rfind(char C, size_t From = npos) const {
352       size_t I = std::min(From, size());
353       while (I) {
354         --I;
355         if (data()[I] == C)
356           return I;
357       }
358       return npos;
359     }
360 
361     /// Search for the last character \p C in the string, ignoring case.
362     ///
363     /// \returns The index of the last occurrence of \p C, or npos if not
364     /// found.
365     [[nodiscard]] LLVM_ABI size_t rfind_insensitive(char C,
366                                                     size_t From = npos) const;
367 
368     /// Search for the last string \p Str in the string.
369     ///
370     /// \returns The index of the last occurrence of \p Str, or npos if not
371     /// found.
372     [[nodiscard]] LLVM_ABI size_t rfind(StringRef Str) const;
373 
374     /// Search for the last string \p Str in the string, ignoring case.
375     ///
376     /// \returns The index of the last occurrence of \p Str, or npos if not
377     /// found.
378     [[nodiscard]] LLVM_ABI size_t rfind_insensitive(StringRef Str) const;
379 
380     /// Find the first character in the string that is \p C, or npos if not
381     /// found. Same as find.
382     [[nodiscard]] size_t find_first_of(char C, size_t From = 0) const {
383       return find(C, From);
384     }
385 
386     /// Find the first character in the string that is in \p Chars, or npos if
387     /// not found.
388     ///
389     /// Complexity: O(size() + Chars.size())
390     [[nodiscard]] LLVM_ABI size_t find_first_of(StringRef Chars,
391                                                 size_t From = 0) const;
392 
393     /// Find the first character in the string that is not \p C or npos if not
394     /// found.
395     [[nodiscard]] LLVM_ABI size_t find_first_not_of(char C,
396                                                     size_t From = 0) const;
397 
398     /// Find the first character in the string that is not in the string
399     /// \p Chars, or npos if not found.
400     ///
401     /// Complexity: O(size() + Chars.size())
402     [[nodiscard]] LLVM_ABI size_t find_first_not_of(StringRef Chars,
403                                                     size_t From = 0) const;
404 
405     /// Find the last character in the string that is \p C, or npos if not
406     /// found.
407     [[nodiscard]] size_t find_last_of(char C, size_t From = npos) const {
408       return rfind(C, From);
409     }
410 
411     /// Find the last character in the string that is in \p C, or npos if not
412     /// found.
413     ///
414     /// Complexity: O(size() + Chars.size())
415     [[nodiscard]] LLVM_ABI size_t find_last_of(StringRef Chars,
416                                                size_t From = npos) const;
417 
418     /// Find the last character in the string that is not \p C, or npos if not
419     /// found.
420     [[nodiscard]] LLVM_ABI size_t find_last_not_of(char C,
421                                                    size_t From = npos) const;
422 
423     /// Find the last character in the string that is not in \p Chars, or
424     /// npos if not found.
425     ///
426     /// Complexity: O(size() + Chars.size())
427     [[nodiscard]] LLVM_ABI size_t find_last_not_of(StringRef Chars,
428                                                    size_t From = npos) const;
429 
430     /// Return true if the given string is a substring of *this, and false
431     /// otherwise.
contains(StringRef Other)432     [[nodiscard]] bool contains(StringRef Other) const {
433       return find(Other) != npos;
434     }
435 
436     /// Return true if the given character is contained in *this, and false
437     /// otherwise.
contains(char C)438     [[nodiscard]] bool contains(char C) const {
439       return find_first_of(C) != npos;
440     }
441 
442     /// Return true if the given string is a substring of *this, and false
443     /// otherwise.
contains_insensitive(StringRef Other)444     [[nodiscard]] bool contains_insensitive(StringRef Other) const {
445       return find_insensitive(Other) != npos;
446     }
447 
448     /// Return true if the given character is contained in *this, and false
449     /// otherwise.
contains_insensitive(char C)450     [[nodiscard]] bool contains_insensitive(char C) const {
451       return find_insensitive(C) != npos;
452     }
453 
454     /// @}
455     /// @name Helpful Algorithms
456     /// @{
457 
458     /// Return the number of occurrences of \p C in the string.
count(char C)459     [[nodiscard]] size_t count(char C) const {
460       size_t Count = 0;
461       for (size_t I = 0; I != size(); ++I)
462         if (data()[I] == C)
463           ++Count;
464       return Count;
465     }
466 
467     /// Return the number of non-overlapped occurrences of \p Str in
468     /// the string.
469     LLVM_ABI size_t count(StringRef Str) const;
470 
471     /// Parse the current string as an integer of the specified radix.  If
472     /// \p Radix is specified as zero, this does radix autosensing using
473     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
474     ///
475     /// If the string is invalid or if only a subset of the string is valid,
476     /// this returns true to signify the error.  The string is considered
477     /// erroneous if empty or if it overflows T.
getAsInteger(unsigned Radix,T & Result)478     template <typename T> bool getAsInteger(unsigned Radix, T &Result) const {
479       if constexpr (std::numeric_limits<T>::is_signed) {
480         long long LLVal;
481         if (getAsSignedInteger(*this, Radix, LLVal) ||
482             static_cast<T>(LLVal) != LLVal)
483           return true;
484         Result = LLVal;
485       } else {
486         unsigned long long ULLVal;
487         // The additional cast to unsigned long long is required to avoid the
488         // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
489         // 'unsigned __int64' when instantiating getAsInteger with T = bool.
490         if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
491             static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
492           return true;
493         Result = ULLVal;
494       }
495       return false;
496     }
497 
498     /// Parse the current string as an integer of the specified radix.  If
499     /// \p Radix is specified as zero, this does radix autosensing using
500     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
501     ///
502     /// If the string does not begin with a number of the specified radix,
503     /// this returns true to signify the error. The string is considered
504     /// erroneous if empty or if it overflows T.
505     /// The portion of the string representing the discovered numeric value
506     /// is removed from the beginning of the string.
consumeInteger(unsigned Radix,T & Result)507     template <typename T> bool consumeInteger(unsigned Radix, T &Result) {
508       if constexpr (std::numeric_limits<T>::is_signed) {
509         long long LLVal;
510         if (consumeSignedInteger(*this, Radix, LLVal) ||
511             static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
512           return true;
513         Result = LLVal;
514       } else {
515         unsigned long long ULLVal;
516         if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
517             static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
518           return true;
519         Result = ULLVal;
520       }
521       return false;
522     }
523 
524     /// Parse the current string as an integer of the specified \p Radix, or of
525     /// an autosensed radix if the \p Radix given is 0.  The current value in
526     /// \p Result is discarded, and the storage is changed to be wide enough to
527     /// store the parsed integer.
528     ///
529     /// \returns true if the string does not solely consist of a valid
530     /// non-empty number in the appropriate base.
531     ///
532     /// APInt::fromString is superficially similar but assumes the
533     /// string is well-formed in the given radix.
534     LLVM_ABI bool getAsInteger(unsigned Radix, APInt &Result) const;
535 
536     /// Parse the current string as an integer of the specified \p Radix.  If
537     /// \p Radix is specified as zero, this does radix autosensing using
538     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
539     ///
540     /// If the string does not begin with a number of the specified radix,
541     /// this returns true to signify the error. The string is considered
542     /// erroneous if empty.
543     /// The portion of the string representing the discovered numeric value
544     /// is removed from the beginning of the string.
545     LLVM_ABI bool consumeInteger(unsigned Radix, APInt &Result);
546 
547     /// Parse the current string as an IEEE double-precision floating
548     /// point value.  The string must be a well-formed double.
549     ///
550     /// If \p AllowInexact is false, the function will fail if the string
551     /// cannot be represented exactly.  Otherwise, the function only fails
552     /// in case of an overflow or underflow, or an invalid floating point
553     /// representation.
554     LLVM_ABI bool getAsDouble(double &Result, bool AllowInexact = true) const;
555 
556     /// @}
557     /// @name String Operations
558     /// @{
559 
560     // Convert the given ASCII string to lowercase.
561     [[nodiscard]] LLVM_ABI std::string lower() const;
562 
563     /// Convert the given ASCII string to uppercase.
564     [[nodiscard]] LLVM_ABI std::string upper() const;
565 
566     /// @}
567     /// @name Substring Operations
568     /// @{
569 
570     /// Return a reference to the substring from [Start, Start + N).
571     ///
572     /// \param Start The index of the starting character in the substring; if
573     /// the index is npos or greater than the length of the string then the
574     /// empty substring will be returned.
575     ///
576     /// \param N The number of characters to included in the substring. If N
577     /// exceeds the number of characters remaining in the string, the string
578     /// suffix (starting with \p Start) will be returned.
579     [[nodiscard]] constexpr StringRef substr(size_t Start,
580                                              size_t N = npos) const {
581       Start = std::min(Start, size());
582       return StringRef(data() + Start, std::min(N, size() - Start));
583     }
584 
585     /// Return a StringRef equal to 'this' but with only the first \p N
586     /// elements remaining.  If \p N is greater than the length of the
587     /// string, the entire string is returned.
588     [[nodiscard]] StringRef take_front(size_t N = 1) const {
589       if (N >= size())
590         return *this;
591       return drop_back(size() - N);
592     }
593 
594     /// Return a StringRef equal to 'this' but with only the last \p N
595     /// elements remaining.  If \p N is greater than the length of the
596     /// string, the entire string is returned.
597     [[nodiscard]] StringRef take_back(size_t N = 1) const {
598       if (N >= size())
599         return *this;
600       return drop_front(size() - N);
601     }
602 
603     /// Return the longest prefix of 'this' such that every character
604     /// in the prefix satisfies the given predicate.
take_while(function_ref<bool (char)> F)605     [[nodiscard]] StringRef take_while(function_ref<bool(char)> F) const {
606       return substr(0, find_if_not(F));
607     }
608 
609     /// Return the longest prefix of 'this' such that no character in
610     /// the prefix satisfies the given predicate.
take_until(function_ref<bool (char)> F)611     [[nodiscard]] StringRef take_until(function_ref<bool(char)> F) const {
612       return substr(0, find_if(F));
613     }
614 
615     /// Return a StringRef equal to 'this' but with the first \p N elements
616     /// dropped.
617     [[nodiscard]] StringRef drop_front(size_t N = 1) const {
618       assert(size() >= N && "Dropping more elements than exist");
619       return substr(N);
620     }
621 
622     /// Return a StringRef equal to 'this' but with the last \p N elements
623     /// dropped.
624     [[nodiscard]] StringRef drop_back(size_t N = 1) const {
625       assert(size() >= N && "Dropping more elements than exist");
626       return substr(0, size()-N);
627     }
628 
629     /// Return a StringRef equal to 'this', but with all characters satisfying
630     /// the given predicate dropped from the beginning of the string.
drop_while(function_ref<bool (char)> F)631     [[nodiscard]] StringRef drop_while(function_ref<bool(char)> F) const {
632       return substr(find_if_not(F));
633     }
634 
635     /// Return a StringRef equal to 'this', but with all characters not
636     /// satisfying the given predicate dropped from the beginning of the string.
drop_until(function_ref<bool (char)> F)637     [[nodiscard]] StringRef drop_until(function_ref<bool(char)> F) const {
638       return substr(find_if(F));
639     }
640 
641     /// Returns true if this StringRef has the given prefix and removes that
642     /// prefix.
consume_front(StringRef Prefix)643     bool consume_front(StringRef Prefix) {
644       if (!starts_with(Prefix))
645         return false;
646 
647       *this = substr(Prefix.size());
648       return true;
649     }
650 
651     /// Returns true if this StringRef has the given prefix, ignoring case,
652     /// and removes that prefix.
consume_front_insensitive(StringRef Prefix)653     bool consume_front_insensitive(StringRef Prefix) {
654       if (!starts_with_insensitive(Prefix))
655         return false;
656 
657       *this = substr(Prefix.size());
658       return true;
659     }
660 
661     /// Returns true if this StringRef has the given suffix and removes that
662     /// suffix.
consume_back(StringRef Suffix)663     bool consume_back(StringRef Suffix) {
664       if (!ends_with(Suffix))
665         return false;
666 
667       *this = substr(0, size() - Suffix.size());
668       return true;
669     }
670 
671     /// Returns true if this StringRef has the given suffix, ignoring case,
672     /// and removes that suffix.
consume_back_insensitive(StringRef Suffix)673     bool consume_back_insensitive(StringRef Suffix) {
674       if (!ends_with_insensitive(Suffix))
675         return false;
676 
677       *this = substr(0, size() - Suffix.size());
678       return true;
679     }
680 
681     /// Return a reference to the substring from [Start, End).
682     ///
683     /// \param Start The index of the starting character in the substring; if
684     /// the index is npos or greater than the length of the string then the
685     /// empty substring will be returned.
686     ///
687     /// \param End The index following the last character to include in the
688     /// substring. If this is npos or exceeds the number of characters
689     /// remaining in the string, the string suffix (starting with \p Start)
690     /// will be returned. If this is less than \p Start, an empty string will
691     /// be returned.
slice(size_t Start,size_t End)692     [[nodiscard]] StringRef slice(size_t Start, size_t End) const {
693       Start = std::min(Start, size());
694       End = std::clamp(End, Start, size());
695       return StringRef(data() + Start, End - Start);
696     }
697 
698     /// Split into two substrings around the first occurrence of a separator
699     /// character.
700     ///
701     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
702     /// such that (*this == LHS + Separator + RHS) is true and RHS is
703     /// maximal. If \p Separator is not in the string, then the result is a
704     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
705     ///
706     /// \param Separator The character to split on.
707     /// \returns The split substrings.
split(char Separator)708     [[nodiscard]] std::pair<StringRef, StringRef> split(char Separator) const {
709       return split(StringRef(&Separator, 1));
710     }
711 
712     /// Split into two substrings around the first occurrence of a separator
713     /// string.
714     ///
715     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
716     /// such that (*this == LHS + Separator + RHS) is true and RHS is
717     /// maximal. If \p Separator is not in the string, then the result is a
718     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
719     ///
720     /// \param Separator - The string to split on.
721     /// \return - The split substrings.
722     [[nodiscard]] std::pair<StringRef, StringRef>
split(StringRef Separator)723     split(StringRef Separator) const {
724       size_t Idx = find(Separator);
725       if (Idx == npos)
726         return std::make_pair(*this, StringRef());
727       return std::make_pair(slice(0, Idx), substr(Idx + Separator.size()));
728     }
729 
730     /// Split into two substrings around the last occurrence of a separator
731     /// string.
732     ///
733     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
734     /// such that (*this == LHS + Separator + RHS) is true and RHS is
735     /// minimal. If \p Separator is not in the string, then the result is a
736     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
737     ///
738     /// \param Separator - The string to split on.
739     /// \return - The split substrings.
740     [[nodiscard]] std::pair<StringRef, StringRef>
rsplit(StringRef Separator)741     rsplit(StringRef Separator) const {
742       size_t Idx = rfind(Separator);
743       if (Idx == npos)
744         return std::make_pair(*this, StringRef());
745       return std::make_pair(slice(0, Idx), substr(Idx + Separator.size()));
746     }
747 
748     /// Split into substrings around the occurrences of a separator string.
749     ///
750     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
751     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
752     /// elements are added to A.
753     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
754     /// still count when considering \p MaxSplit
755     /// An useful invariant is that
756     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
757     ///
758     /// \param A - Where to put the substrings.
759     /// \param Separator - The string to split on.
760     /// \param MaxSplit - The maximum number of times the string is split.
761     /// \param KeepEmpty - True if empty substring should be added.
762     LLVM_ABI void split(SmallVectorImpl<StringRef> &A, StringRef Separator,
763                         int MaxSplit = -1, bool KeepEmpty = true) const;
764 
765     /// Split into substrings around the occurrences of a separator character.
766     ///
767     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
768     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
769     /// elements are added to A.
770     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
771     /// still count when considering \p MaxSplit
772     /// An useful invariant is that
773     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
774     ///
775     /// \param A - Where to put the substrings.
776     /// \param Separator - The string to split on.
777     /// \param MaxSplit - The maximum number of times the string is split.
778     /// \param KeepEmpty - True if empty substring should be added.
779     LLVM_ABI void split(SmallVectorImpl<StringRef> &A, char Separator,
780                         int MaxSplit = -1, bool KeepEmpty = true) const;
781 
782     /// Split into two substrings around the last occurrence of a separator
783     /// character.
784     ///
785     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
786     /// such that (*this == LHS + Separator + RHS) is true and RHS is
787     /// minimal. If \p Separator is not in the string, then the result is a
788     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
789     ///
790     /// \param Separator - The character to split on.
791     /// \return - The split substrings.
rsplit(char Separator)792     [[nodiscard]] std::pair<StringRef, StringRef> rsplit(char Separator) const {
793       return rsplit(StringRef(&Separator, 1));
794     }
795 
796     /// Return string with consecutive \p Char characters starting from the
797     /// the left removed.
ltrim(char Char)798     [[nodiscard]] StringRef ltrim(char Char) const {
799       return drop_front(std::min(size(), find_first_not_of(Char)));
800     }
801 
802     /// Return string with consecutive characters in \p Chars starting from
803     /// the left removed.
804     [[nodiscard]] StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
805       return drop_front(std::min(size(), find_first_not_of(Chars)));
806     }
807 
808     /// Return string with consecutive \p Char characters starting from the
809     /// right removed.
rtrim(char Char)810     [[nodiscard]] StringRef rtrim(char Char) const {
811       return drop_back(size() - std::min(size(), find_last_not_of(Char) + 1));
812     }
813 
814     /// Return string with consecutive characters in \p Chars starting from
815     /// the right removed.
816     [[nodiscard]] StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
817       return drop_back(size() - std::min(size(), find_last_not_of(Chars) + 1));
818     }
819 
820     /// Return string with consecutive \p Char characters starting from the
821     /// left and right removed.
trim(char Char)822     [[nodiscard]] StringRef trim(char Char) const {
823       return ltrim(Char).rtrim(Char);
824     }
825 
826     /// Return string with consecutive characters in \p Chars starting from
827     /// the left and right removed.
828     [[nodiscard]] StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
829       return ltrim(Chars).rtrim(Chars);
830     }
831 
832     /// Detect the line ending style of the string.
833     ///
834     /// If the string contains a line ending, return the line ending character
835     /// sequence that is detected. Otherwise return '\n' for unix line endings.
836     ///
837     /// \return - The line ending character sequence.
detectEOL()838     [[nodiscard]] StringRef detectEOL() const {
839       size_t Pos = find('\r');
840       if (Pos == npos) {
841         // If there is no carriage return, assume unix
842         return "\n";
843       }
844       if (Pos + 1 < size() && data()[Pos + 1] == '\n')
845         return "\r\n"; // Windows
846       if (Pos > 0 && data()[Pos - 1] == '\n')
847         return "\n\r"; // You monster!
848       return "\r";     // Classic Mac
849     }
850     /// @}
851   };
852 
853   /// A wrapper around a string literal that serves as a proxy for constructing
854   /// global tables of StringRefs with the length computed at compile time.
855   /// In order to avoid the invocation of a global constructor, StringLiteral
856   /// should *only* be used in a constexpr context, as such:
857   ///
858   /// constexpr StringLiteral S("test");
859   ///
860   class StringLiteral : public StringRef {
861   private:
StringLiteral(const char * Str,size_t N)862     constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
863     }
864 
865   public:
866     template <size_t N>
StringLiteral(const char (& Str)[N])867     constexpr StringLiteral(const char (&Str)[N])
868 #if defined(__clang__) && __has_attribute(enable_if)
869 #pragma clang diagnostic push
870 #pragma clang diagnostic ignored "-Wgcc-compat"
871         __attribute((enable_if(__builtin_strlen(Str) == N - 1,
872                                "invalid string literal")))
873 #pragma clang diagnostic pop
874 #endif
875         : StringRef(Str, N - 1) {
876     }
877 
878     // Explicit construction for strings like "foo\0bar".
879     template <size_t N>
withInnerNUL(const char (& Str)[N])880     static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
881       return StringLiteral(Str, N - 1);
882     }
883   };
884 
885   /// @name StringRef Comparison Operators
886   /// @{
887 
888   inline bool operator==(StringRef LHS, StringRef RHS) {
889     if (LHS.size() != RHS.size())
890       return false;
891     if (LHS.empty())
892       return true;
893     return ::memcmp(LHS.data(), RHS.data(), LHS.size()) == 0;
894   }
895 
896   inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
897 
898   inline bool operator<(StringRef LHS, StringRef RHS) {
899     return LHS.compare(RHS) < 0;
900   }
901 
902   inline bool operator<=(StringRef LHS, StringRef RHS) {
903     return LHS.compare(RHS) <= 0;
904   }
905 
906   inline bool operator>(StringRef LHS, StringRef RHS) {
907     return LHS.compare(RHS) > 0;
908   }
909 
910   inline bool operator>=(StringRef LHS, StringRef RHS) {
911     return LHS.compare(RHS) >= 0;
912   }
913 
914   inline std::string &operator+=(std::string &buffer, StringRef string) {
915     return buffer.append(string.data(), string.size());
916   }
917 
918   /// @}
919 
920   /// Compute a hash_code for a StringRef.
921   [[nodiscard]] LLVM_ABI hash_code hash_value(StringRef S);
922 
923   // Provide DenseMapInfo for StringRefs.
924   template <> struct DenseMapInfo<StringRef, void> {
925     static inline StringRef getEmptyKey() {
926       return StringRef(
927           reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0);
928     }
929 
930     static inline StringRef getTombstoneKey() {
931       return StringRef(
932           reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0);
933     }
934 
935     LLVM_ABI static unsigned getHashValue(StringRef Val);
936 
937     static bool isEqual(StringRef LHS, StringRef RHS) {
938       if (RHS.data() == getEmptyKey().data())
939         return LHS.data() == getEmptyKey().data();
940       if (RHS.data() == getTombstoneKey().data())
941         return LHS.data() == getTombstoneKey().data();
942       return LHS == RHS;
943     }
944   };
945 
946 } // end namespace llvm
947 
948 #endif // LLVM_ADT_STRINGREF_H
949