10b57cec5SDimitry Andric //===-- StringExtras.cpp - Implement the StringExtras header --------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the StringExtras.h header 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h" 140b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 150b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 165ffd83dbSDimitry Andric #include <cctype> 175ffd83dbSDimitry Andric 180b57cec5SDimitry Andric using namespace llvm; 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric /// StrInStrNoCase - Portable version of strcasestr. Locates the first 210b57cec5SDimitry Andric /// occurrence of string 's1' in string 's2', ignoring case. Returns 220b57cec5SDimitry Andric /// the offset of s2 in s1 or npos if s2 cannot be found. 230b57cec5SDimitry Andric StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) { 240b57cec5SDimitry Andric size_t N = s2.size(), M = s1.size(); 250b57cec5SDimitry Andric if (N > M) 260b57cec5SDimitry Andric return StringRef::npos; 270b57cec5SDimitry Andric for (size_t i = 0, e = M - N + 1; i != e; ++i) 28fe6060f1SDimitry Andric if (s1.substr(i, N).equals_insensitive(s2)) 290b57cec5SDimitry Andric return i; 300b57cec5SDimitry Andric return StringRef::npos; 310b57cec5SDimitry Andric } 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric /// getToken - This function extracts one token from source, ignoring any 340b57cec5SDimitry Andric /// leading characters that appear in the Delimiters string, and ending the 350b57cec5SDimitry Andric /// token at any of the characters that appear in the Delimiters string. If 360b57cec5SDimitry Andric /// there are no tokens in the source string, an empty string is returned. 370b57cec5SDimitry Andric /// The function returns a pair containing the extracted token and the 380b57cec5SDimitry Andric /// remaining tail string. 390b57cec5SDimitry Andric std::pair<StringRef, StringRef> llvm::getToken(StringRef Source, 400b57cec5SDimitry Andric StringRef Delimiters) { 410b57cec5SDimitry Andric // Figure out where the token starts. 420b57cec5SDimitry Andric StringRef::size_type Start = Source.find_first_not_of(Delimiters); 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric // Find the next occurrence of the delimiter. 450b57cec5SDimitry Andric StringRef::size_type End = Source.find_first_of(Delimiters, Start); 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric return std::make_pair(Source.slice(Start, End), Source.substr(End)); 480b57cec5SDimitry Andric } 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric /// SplitString - Split up the specified string according to the specified 510b57cec5SDimitry Andric /// delimiters, appending the result fragments to the output list. 520b57cec5SDimitry Andric void llvm::SplitString(StringRef Source, 530b57cec5SDimitry Andric SmallVectorImpl<StringRef> &OutFragments, 540b57cec5SDimitry Andric StringRef Delimiters) { 550b57cec5SDimitry Andric std::pair<StringRef, StringRef> S = getToken(Source, Delimiters); 560b57cec5SDimitry Andric while (!S.first.empty()) { 570b57cec5SDimitry Andric OutFragments.push_back(S.first); 580b57cec5SDimitry Andric S = getToken(S.second, Delimiters); 590b57cec5SDimitry Andric } 600b57cec5SDimitry Andric } 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric void llvm::printEscapedString(StringRef Name, raw_ostream &Out) { 634824e7fdSDimitry Andric for (unsigned char C : Name) { 648bcb0991SDimitry Andric if (C == '\\') 658bcb0991SDimitry Andric Out << '\\' << C; 668bcb0991SDimitry Andric else if (isPrint(C) && C != '"') 670b57cec5SDimitry Andric Out << C; 680b57cec5SDimitry Andric else 690b57cec5SDimitry Andric Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); 700b57cec5SDimitry Andric } 710b57cec5SDimitry Andric } 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric void llvm::printHTMLEscaped(StringRef String, raw_ostream &Out) { 740b57cec5SDimitry Andric for (char C : String) { 750b57cec5SDimitry Andric if (C == '&') 760b57cec5SDimitry Andric Out << "&"; 770b57cec5SDimitry Andric else if (C == '<') 780b57cec5SDimitry Andric Out << "<"; 790b57cec5SDimitry Andric else if (C == '>') 800b57cec5SDimitry Andric Out << ">"; 810b57cec5SDimitry Andric else if (C == '\"') 820b57cec5SDimitry Andric Out << """; 830b57cec5SDimitry Andric else if (C == '\'') 840b57cec5SDimitry Andric Out << "'"; 850b57cec5SDimitry Andric else 860b57cec5SDimitry Andric Out << C; 870b57cec5SDimitry Andric } 880b57cec5SDimitry Andric } 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric void llvm::printLowerCase(StringRef String, raw_ostream &Out) { 910b57cec5SDimitry Andric for (const char C : String) 920b57cec5SDimitry Andric Out << toLower(C); 930b57cec5SDimitry Andric } 945ffd83dbSDimitry Andric 955ffd83dbSDimitry Andric std::string llvm::convertToSnakeFromCamelCase(StringRef input) { 965ffd83dbSDimitry Andric if (input.empty()) 975ffd83dbSDimitry Andric return ""; 985ffd83dbSDimitry Andric 995ffd83dbSDimitry Andric std::string snakeCase; 1005ffd83dbSDimitry Andric snakeCase.reserve(input.size()); 101*5f757f3fSDimitry Andric auto check = [&input](size_t j, function_ref<bool(int)> predicate) { 102*5f757f3fSDimitry Andric return j < input.size() && predicate(input[j]); 103*5f757f3fSDimitry Andric }; 104*5f757f3fSDimitry Andric for (size_t i = 0; i < input.size(); ++i) { 105*5f757f3fSDimitry Andric snakeCase.push_back(tolower(input[i])); 106*5f757f3fSDimitry Andric // Handles "runs" of capitals, such as in OPName -> op_name. 107*5f757f3fSDimitry Andric if (check(i, isupper) && check(i + 1, isupper) && check(i + 2, islower)) 1085ffd83dbSDimitry Andric snakeCase.push_back('_'); 109*5f757f3fSDimitry Andric if ((check(i, islower) || check(i, isdigit)) && check(i + 1, isupper)) 110*5f757f3fSDimitry Andric snakeCase.push_back('_'); 1115ffd83dbSDimitry Andric } 1125ffd83dbSDimitry Andric return snakeCase; 1135ffd83dbSDimitry Andric } 1145ffd83dbSDimitry Andric 1155ffd83dbSDimitry Andric std::string llvm::convertToCamelFromSnakeCase(StringRef input, 1165ffd83dbSDimitry Andric bool capitalizeFirst) { 1175ffd83dbSDimitry Andric if (input.empty()) 1185ffd83dbSDimitry Andric return ""; 1195ffd83dbSDimitry Andric 1205ffd83dbSDimitry Andric std::string output; 1215ffd83dbSDimitry Andric output.reserve(input.size()); 1225ffd83dbSDimitry Andric 1235ffd83dbSDimitry Andric // Push the first character, capatilizing if necessary. 1245ffd83dbSDimitry Andric if (capitalizeFirst && std::islower(input.front())) 1255ffd83dbSDimitry Andric output.push_back(llvm::toUpper(input.front())); 1265ffd83dbSDimitry Andric else 1275ffd83dbSDimitry Andric output.push_back(input.front()); 1285ffd83dbSDimitry Andric 1295ffd83dbSDimitry Andric // Walk the input converting any `*_[a-z]` snake case into `*[A-Z]` camelCase. 1305ffd83dbSDimitry Andric for (size_t pos = 1, e = input.size(); pos < e; ++pos) { 1315ffd83dbSDimitry Andric if (input[pos] == '_' && pos != (e - 1) && std::islower(input[pos + 1])) 1325ffd83dbSDimitry Andric output.push_back(llvm::toUpper(input[++pos])); 1335ffd83dbSDimitry Andric else 1345ffd83dbSDimitry Andric output.push_back(input[pos]); 1355ffd83dbSDimitry Andric } 1365ffd83dbSDimitry Andric return output; 1375ffd83dbSDimitry Andric } 138