1 //===- IdentifierTable.cpp - Hash table for identifier lookup -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the IdentifierInfo, IdentifierVisitor, and 10 // IdentifierTable interfaces. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Basic/IdentifierTable.h" 15 #include "clang/Basic/CharInfo.h" 16 #include "clang/Basic/DiagnosticLex.h" 17 #include "clang/Basic/LangOptions.h" 18 #include "clang/Basic/OperatorKinds.h" 19 #include "clang/Basic/Specifiers.h" 20 #include "clang/Basic/TargetBuiltins.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/DenseMapInfo.h" 23 #include "llvm/ADT/FoldingSet.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringMap.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Support/Allocator.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <cassert> 31 #include <cstdio> 32 #include <cstring> 33 #include <string> 34 35 using namespace clang; 36 37 // A check to make sure the ObjCOrBuiltinID has sufficient room to store the 38 // largest possible target/aux-target combination. If we exceed this, we likely 39 // need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h. 40 static_assert(2 * LargestBuiltinID < (2 << (ObjCOrBuiltinIDBits - 1)), 41 "Insufficient ObjCOrBuiltinID Bits"); 42 43 //===----------------------------------------------------------------------===// 44 // IdentifierTable Implementation 45 //===----------------------------------------------------------------------===// 46 47 IdentifierIterator::~IdentifierIterator() = default; 48 49 IdentifierInfoLookup::~IdentifierInfoLookup() = default; 50 51 namespace { 52 53 /// A simple identifier lookup iterator that represents an 54 /// empty sequence of identifiers. 55 class EmptyLookupIterator : public IdentifierIterator 56 { 57 public: 58 StringRef Next() override { return StringRef(); } 59 }; 60 61 } // namespace 62 63 IdentifierIterator *IdentifierInfoLookup::getIdentifiers() { 64 return new EmptyLookupIterator(); 65 } 66 67 IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup) 68 : HashTable(8192), // Start with space for 8K identifiers. 69 ExternalLookup(ExternalLookup) {} 70 71 IdentifierTable::IdentifierTable(const LangOptions &LangOpts, 72 IdentifierInfoLookup *ExternalLookup) 73 : IdentifierTable(ExternalLookup) { 74 // Populate the identifier table with info about keywords for the current 75 // language. 76 AddKeywords(LangOpts); 77 } 78 79 //===----------------------------------------------------------------------===// 80 // Language Keyword Implementation 81 //===----------------------------------------------------------------------===// 82 83 // Constants for TokenKinds.def 84 namespace { 85 86 enum TokenKey : unsigned { 87 KEYC99 = 0x1, 88 KEYCXX = 0x2, 89 KEYCXX11 = 0x4, 90 KEYGNU = 0x8, 91 KEYMS = 0x10, 92 BOOLSUPPORT = 0x20, 93 KEYALTIVEC = 0x40, 94 KEYNOCXX = 0x80, 95 KEYBORLAND = 0x100, 96 KEYOPENCLC = 0x200, 97 KEYC2X = 0x400, 98 KEYNOMS18 = 0x800, 99 KEYNOOPENCL = 0x1000, 100 WCHARSUPPORT = 0x2000, 101 HALFSUPPORT = 0x4000, 102 CHAR8SUPPORT = 0x8000, 103 KEYOBJC = 0x10000, 104 KEYZVECTOR = 0x20000, 105 KEYCOROUTINES = 0x40000, 106 KEYMODULES = 0x80000, 107 KEYCXX20 = 0x100000, 108 KEYOPENCLCXX = 0x200000, 109 KEYMSCOMPAT = 0x400000, 110 KEYSYCL = 0x800000, 111 KEYCUDA = 0x1000000, 112 KEYHLSL = 0x2000000, 113 KEYMAX = KEYHLSL, // The maximum key 114 KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20, 115 KEYALL = (KEYMAX | (KEYMAX-1)) & ~KEYNOMS18 & 116 ~KEYNOOPENCL // KEYNOMS18 and KEYNOOPENCL are used to exclude. 117 }; 118 119 /// How a keyword is treated in the selected standard. This enum is ordered 120 /// intentionally so that the value that 'wins' is the most 'permissive'. 121 enum KeywordStatus { 122 KS_Unknown, // Not yet calculated. Used when figuring out the status. 123 KS_Disabled, // Disabled 124 KS_Future, // Is a keyword in future standard 125 KS_Extension, // Is an extension 126 KS_Enabled, // Enabled 127 }; 128 129 } // namespace 130 131 // This works on a single TokenKey flag and checks the LangOpts to get the 132 // KeywordStatus based exclusively on this flag, so that it can be merged in 133 // getKeywordStatus. Most should be enabled/disabled, but some might imply 134 // 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to 135 // be disabled, and the calling function makes it 'disabled' if no other flag 136 // changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags. 137 static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts, 138 TokenKey Flag) { 139 // Flag is a single bit version of TokenKey (that is, not 140 // KEYALL/KEYALLCXX/etc), so we can check with == throughout this function. 141 assert((Flag & ~(Flag - 1)) == Flag && "Multiple bits set?"); 142 143 switch (Flag) { 144 case KEYC99: 145 if (LangOpts.C99) 146 return KS_Enabled; 147 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown; 148 case KEYC2X: 149 if (LangOpts.C2x) 150 return KS_Enabled; 151 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown; 152 case KEYCXX: 153 return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown; 154 case KEYCXX11: 155 if (LangOpts.CPlusPlus11) 156 return KS_Enabled; 157 return LangOpts.CPlusPlus ? KS_Future : KS_Unknown; 158 case KEYCXX20: 159 if (LangOpts.CPlusPlus20) 160 return KS_Enabled; 161 return LangOpts.CPlusPlus ? KS_Future : KS_Unknown; 162 case KEYGNU: 163 return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown; 164 case KEYMS: 165 return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown; 166 case BOOLSUPPORT: 167 if (LangOpts.Bool) return KS_Enabled; 168 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown; 169 case KEYALTIVEC: 170 return LangOpts.AltiVec ? KS_Enabled : KS_Unknown; 171 case KEYBORLAND: 172 return LangOpts.Borland ? KS_Extension : KS_Unknown; 173 case KEYOPENCLC: 174 return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled 175 : KS_Unknown; 176 case WCHARSUPPORT: 177 return LangOpts.WChar ? KS_Enabled : KS_Unknown; 178 case HALFSUPPORT: 179 return LangOpts.Half ? KS_Enabled : KS_Unknown; 180 case CHAR8SUPPORT: 181 if (LangOpts.Char8) return KS_Enabled; 182 if (LangOpts.CPlusPlus20) return KS_Unknown; 183 if (LangOpts.CPlusPlus) return KS_Future; 184 return KS_Unknown; 185 case KEYOBJC: 186 // We treat bridge casts as objective-C keywords so we can warn on them 187 // in non-arc mode. 188 return LangOpts.ObjC ? KS_Enabled : KS_Unknown; 189 case KEYZVECTOR: 190 return LangOpts.ZVector ? KS_Enabled : KS_Unknown; 191 case KEYCOROUTINES: 192 return LangOpts.Coroutines ? KS_Enabled : KS_Unknown; 193 case KEYMODULES: 194 return LangOpts.ModulesTS ? KS_Enabled : KS_Unknown; 195 case KEYOPENCLCXX: 196 return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown; 197 case KEYMSCOMPAT: 198 return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown; 199 case KEYSYCL: 200 return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown; 201 case KEYCUDA: 202 return LangOpts.CUDA ? KS_Enabled : KS_Unknown; 203 case KEYHLSL: 204 return LangOpts.HLSL ? KS_Enabled : KS_Unknown; 205 case KEYNOCXX: 206 // This is enabled in all non-C++ modes, but might be enabled for other 207 // reasons as well. 208 return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled; 209 case KEYNOOPENCL: 210 // The disable behavior for this is handled in getKeywordStatus. 211 return KS_Unknown; 212 case KEYNOMS18: 213 // The disable behavior for this is handled in getKeywordStatus. 214 return KS_Unknown; 215 default: 216 llvm_unreachable("Unknown KeywordStatus flag"); 217 } 218 } 219 220 /// Translates flags as specified in TokenKinds.def into keyword status 221 /// in the given language standard. 222 static KeywordStatus getKeywordStatus(const LangOptions &LangOpts, 223 unsigned Flags) { 224 // KEYALL means always enabled, so special case this one. 225 if (Flags == KEYALL) return KS_Enabled; 226 // These are tests that need to 'always win', as they are special in that they 227 // disable based on certain conditions. 228 if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled; 229 if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) && 230 !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015)) 231 return KS_Disabled; 232 233 KeywordStatus CurStatus = KS_Unknown; 234 235 while (Flags != 0) { 236 unsigned CurFlag = Flags & ~(Flags - 1); 237 Flags = Flags & ~CurFlag; 238 CurStatus = std::max( 239 CurStatus, 240 getKeywordStatusHelper(LangOpts, static_cast<TokenKey>(CurFlag))); 241 } 242 243 if (CurStatus == KS_Unknown) 244 return KS_Disabled; 245 return CurStatus; 246 } 247 248 /// AddKeyword - This method is used to associate a token ID with specific 249 /// identifiers because they are language keywords. This causes the lexer to 250 /// automatically map matching identifiers to specialized token codes. 251 static void AddKeyword(StringRef Keyword, 252 tok::TokenKind TokenCode, unsigned Flags, 253 const LangOptions &LangOpts, IdentifierTable &Table) { 254 KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags); 255 256 // Don't add this keyword if disabled in this language. 257 if (AddResult == KS_Disabled) return; 258 259 IdentifierInfo &Info = 260 Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode); 261 Info.setIsExtensionToken(AddResult == KS_Extension); 262 Info.setIsFutureCompatKeyword(AddResult == KS_Future); 263 } 264 265 /// AddCXXOperatorKeyword - Register a C++ operator keyword alternative 266 /// representations. 267 static void AddCXXOperatorKeyword(StringRef Keyword, 268 tok::TokenKind TokenCode, 269 IdentifierTable &Table) { 270 IdentifierInfo &Info = Table.get(Keyword, TokenCode); 271 Info.setIsCPlusPlusOperatorKeyword(); 272 } 273 274 /// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector" 275 /// or "property". 276 static void AddObjCKeyword(StringRef Name, 277 tok::ObjCKeywordKind ObjCID, 278 IdentifierTable &Table) { 279 Table.get(Name).setObjCKeywordID(ObjCID); 280 } 281 282 /// AddKeywords - Add all keywords to the symbol table. 283 /// 284 void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { 285 // Add keywords and tokens for the current language. 286 #define KEYWORD(NAME, FLAGS) \ 287 AddKeyword(StringRef(#NAME), tok::kw_ ## NAME, \ 288 FLAGS, LangOpts, *this); 289 #define ALIAS(NAME, TOK, FLAGS) \ 290 AddKeyword(StringRef(NAME), tok::kw_ ## TOK, \ 291 FLAGS, LangOpts, *this); 292 #define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \ 293 if (LangOpts.CXXOperatorNames) \ 294 AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this); 295 #define OBJC_AT_KEYWORD(NAME) \ 296 if (LangOpts.ObjC) \ 297 AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this); 298 #define TESTING_KEYWORD(NAME, FLAGS) 299 #include "clang/Basic/TokenKinds.def" 300 301 if (LangOpts.ParseUnknownAnytype) 302 AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL, 303 LangOpts, *this); 304 305 if (LangOpts.DeclSpecKeyword) 306 AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this); 307 308 if (LangOpts.IEEE128) 309 AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this); 310 311 // Add the 'import' contextual keyword. 312 get("import").setModulesImport(true); 313 } 314 315 /// Checks if the specified token kind represents a keyword in the 316 /// specified language. 317 /// \returns Status of the keyword in the language. 318 static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts, 319 tok::TokenKind K) { 320 switch (K) { 321 #define KEYWORD(NAME, FLAGS) \ 322 case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS); 323 #include "clang/Basic/TokenKinds.def" 324 default: return KS_Disabled; 325 } 326 } 327 328 /// Returns true if the identifier represents a keyword in the 329 /// specified language. 330 bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const { 331 switch (getTokenKwStatus(LangOpts, getTokenID())) { 332 case KS_Enabled: 333 case KS_Extension: 334 return true; 335 default: 336 return false; 337 } 338 } 339 340 /// Returns true if the identifier represents a C++ keyword in the 341 /// specified language. 342 bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const { 343 if (!LangOpts.CPlusPlus || !isKeyword(LangOpts)) 344 return false; 345 // This is a C++ keyword if this identifier is not a keyword when checked 346 // using LangOptions without C++ support. 347 LangOptions LangOptsNoCPP = LangOpts; 348 LangOptsNoCPP.CPlusPlus = false; 349 LangOptsNoCPP.CPlusPlus11 = false; 350 LangOptsNoCPP.CPlusPlus20 = false; 351 return !isKeyword(LangOptsNoCPP); 352 } 353 354 ReservedIdentifierStatus 355 IdentifierInfo::isReserved(const LangOptions &LangOpts) const { 356 StringRef Name = getName(); 357 358 // '_' is a reserved identifier, but its use is so common (e.g. to store 359 // ignored values) that we don't warn on it. 360 if (Name.size() <= 1) 361 return ReservedIdentifierStatus::NotReserved; 362 363 // [lex.name] p3 364 if (Name[0] == '_') { 365 366 // Each name that begins with an underscore followed by an uppercase letter 367 // or another underscore is reserved. 368 if (Name[1] == '_') 369 return ReservedIdentifierStatus::StartsWithDoubleUnderscore; 370 371 if ('A' <= Name[1] && Name[1] <= 'Z') 372 return ReservedIdentifierStatus:: 373 StartsWithUnderscoreFollowedByCapitalLetter; 374 375 // This is a bit misleading: it actually means it's only reserved if we're 376 // at global scope because it starts with an underscore. 377 return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope; 378 } 379 380 // Each name that contains a double underscore (__) is reserved. 381 if (LangOpts.CPlusPlus && Name.contains("__")) 382 return ReservedIdentifierStatus::ContainsDoubleUnderscore; 383 384 return ReservedIdentifierStatus::NotReserved; 385 } 386 387 StringRef IdentifierInfo::deuglifiedName() const { 388 StringRef Name = getName(); 389 if (Name.size() >= 2 && Name.front() == '_' && 390 (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z'))) 391 return Name.ltrim('_'); 392 return Name; 393 } 394 395 tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { 396 // We use a perfect hash function here involving the length of the keyword, 397 // the first and third character. For preprocessor ID's there are no 398 // collisions (if there were, the switch below would complain about duplicate 399 // case values). Note that this depends on 'if' being null terminated. 400 401 #define HASH(LEN, FIRST, THIRD) \ 402 (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31) 403 #define CASE(LEN, FIRST, THIRD, NAME) \ 404 case HASH(LEN, FIRST, THIRD): \ 405 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME 406 407 unsigned Len = getLength(); 408 if (Len < 2) return tok::pp_not_keyword; 409 const char *Name = getNameStart(); 410 switch (HASH(Len, Name[0], Name[2])) { 411 default: return tok::pp_not_keyword; 412 CASE( 2, 'i', '\0', if); 413 CASE( 4, 'e', 'i', elif); 414 CASE( 4, 'e', 's', else); 415 CASE( 4, 'l', 'n', line); 416 CASE( 4, 's', 'c', sccs); 417 CASE( 5, 'e', 'd', endif); 418 CASE( 5, 'e', 'r', error); 419 CASE( 5, 'i', 'e', ident); 420 CASE( 5, 'i', 'd', ifdef); 421 CASE( 5, 'u', 'd', undef); 422 423 CASE( 6, 'a', 's', assert); 424 CASE( 6, 'd', 'f', define); 425 CASE( 6, 'i', 'n', ifndef); 426 CASE( 6, 'i', 'p', import); 427 CASE( 6, 'p', 'a', pragma); 428 429 CASE( 7, 'd', 'f', defined); 430 CASE( 7, 'e', 'i', elifdef); 431 CASE( 7, 'i', 'c', include); 432 CASE( 7, 'w', 'r', warning); 433 434 CASE( 8, 'e', 'i', elifndef); 435 CASE( 8, 'u', 'a', unassert); 436 CASE(12, 'i', 'c', include_next); 437 438 CASE(14, '_', 'p', __public_macro); 439 440 CASE(15, '_', 'p', __private_macro); 441 442 CASE(16, '_', 'i', __include_macros); 443 #undef CASE 444 #undef HASH 445 } 446 } 447 448 //===----------------------------------------------------------------------===// 449 // Stats Implementation 450 //===----------------------------------------------------------------------===// 451 452 /// PrintStats - Print statistics about how well the identifier table is doing 453 /// at hashing identifiers. 454 void IdentifierTable::PrintStats() const { 455 unsigned NumBuckets = HashTable.getNumBuckets(); 456 unsigned NumIdentifiers = HashTable.getNumItems(); 457 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers; 458 unsigned AverageIdentifierSize = 0; 459 unsigned MaxIdentifierLength = 0; 460 461 // TODO: Figure out maximum times an identifier had to probe for -stats. 462 for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator 463 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) { 464 unsigned IdLen = I->getKeyLength(); 465 AverageIdentifierSize += IdLen; 466 if (MaxIdentifierLength < IdLen) 467 MaxIdentifierLength = IdLen; 468 } 469 470 fprintf(stderr, "\n*** Identifier Table Stats:\n"); 471 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers); 472 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets); 473 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n", 474 NumIdentifiers/(double)NumBuckets); 475 fprintf(stderr, "Ave identifier length: %f\n", 476 (AverageIdentifierSize/(double)NumIdentifiers)); 477 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength); 478 479 // Compute statistics about the memory allocated for identifiers. 480 HashTable.getAllocator().PrintStats(); 481 } 482 483 //===----------------------------------------------------------------------===// 484 // SelectorTable Implementation 485 //===----------------------------------------------------------------------===// 486 487 unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) { 488 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr()); 489 } 490 491 namespace clang { 492 493 /// One of these variable length records is kept for each 494 /// selector containing more than one keyword. We use a folding set 495 /// to unique aggregate names (keyword selectors in ObjC parlance). Access to 496 /// this class is provided strictly through Selector. 497 class alignas(IdentifierInfoAlignment) MultiKeywordSelector 498 : public detail::DeclarationNameExtra, 499 public llvm::FoldingSetNode { 500 MultiKeywordSelector(unsigned nKeys) : DeclarationNameExtra(nKeys) {} 501 502 public: 503 // Constructor for keyword selectors. 504 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) 505 : DeclarationNameExtra(nKeys) { 506 assert((nKeys > 1) && "not a multi-keyword selector"); 507 508 // Fill in the trailing keyword array. 509 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this + 1); 510 for (unsigned i = 0; i != nKeys; ++i) 511 KeyInfo[i] = IIV[i]; 512 } 513 514 // getName - Derive the full selector name and return it. 515 std::string getName() const; 516 517 using DeclarationNameExtra::getNumArgs; 518 519 using keyword_iterator = IdentifierInfo *const *; 520 521 keyword_iterator keyword_begin() const { 522 return reinterpret_cast<keyword_iterator>(this + 1); 523 } 524 525 keyword_iterator keyword_end() const { 526 return keyword_begin() + getNumArgs(); 527 } 528 529 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const { 530 assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index"); 531 return keyword_begin()[i]; 532 } 533 534 static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys, 535 unsigned NumArgs) { 536 ID.AddInteger(NumArgs); 537 for (unsigned i = 0; i != NumArgs; ++i) 538 ID.AddPointer(ArgTys[i]); 539 } 540 541 void Profile(llvm::FoldingSetNodeID &ID) { 542 Profile(ID, keyword_begin(), getNumArgs()); 543 } 544 }; 545 546 } // namespace clang. 547 548 bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const { 549 assert(!Names.empty() && "must have >= 1 selector slots"); 550 if (getNumArgs() != Names.size()) 551 return false; 552 for (unsigned I = 0, E = Names.size(); I != E; ++I) { 553 if (getNameForSlot(I) != Names[I]) 554 return false; 555 } 556 return true; 557 } 558 559 bool Selector::isUnarySelector(StringRef Name) const { 560 return isUnarySelector() && getNameForSlot(0) == Name; 561 } 562 563 unsigned Selector::getNumArgs() const { 564 unsigned IIF = getIdentifierInfoFlag(); 565 if (IIF <= ZeroArg) 566 return 0; 567 if (IIF == OneArg) 568 return 1; 569 // We point to a MultiKeywordSelector. 570 MultiKeywordSelector *SI = getMultiKeywordSelector(); 571 return SI->getNumArgs(); 572 } 573 574 IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const { 575 if (getIdentifierInfoFlag() < MultiArg) { 576 assert(argIndex == 0 && "illegal keyword index"); 577 return getAsIdentifierInfo(); 578 } 579 580 // We point to a MultiKeywordSelector. 581 MultiKeywordSelector *SI = getMultiKeywordSelector(); 582 return SI->getIdentifierInfoForSlot(argIndex); 583 } 584 585 StringRef Selector::getNameForSlot(unsigned int argIndex) const { 586 IdentifierInfo *II = getIdentifierInfoForSlot(argIndex); 587 return II ? II->getName() : StringRef(); 588 } 589 590 std::string MultiKeywordSelector::getName() const { 591 SmallString<256> Str; 592 llvm::raw_svector_ostream OS(Str); 593 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) { 594 if (*I) 595 OS << (*I)->getName(); 596 OS << ':'; 597 } 598 599 return std::string(OS.str()); 600 } 601 602 std::string Selector::getAsString() const { 603 if (InfoPtr == 0) 604 return "<null selector>"; 605 606 if (getIdentifierInfoFlag() < MultiArg) { 607 IdentifierInfo *II = getAsIdentifierInfo(); 608 609 if (getNumArgs() == 0) { 610 assert(II && "If the number of arguments is 0 then II is guaranteed to " 611 "not be null."); 612 return std::string(II->getName()); 613 } 614 615 if (!II) 616 return ":"; 617 618 return II->getName().str() + ":"; 619 } 620 621 // We have a multiple keyword selector. 622 return getMultiKeywordSelector()->getName(); 623 } 624 625 void Selector::print(llvm::raw_ostream &OS) const { 626 OS << getAsString(); 627 } 628 629 LLVM_DUMP_METHOD void Selector::dump() const { print(llvm::errs()); } 630 631 /// Interpreting the given string using the normal CamelCase 632 /// conventions, determine whether the given string starts with the 633 /// given "word", which is assumed to end in a lowercase letter. 634 static bool startsWithWord(StringRef name, StringRef word) { 635 if (name.size() < word.size()) return false; 636 return ((name.size() == word.size() || !isLowercase(name[word.size()])) && 637 name.startswith(word)); 638 } 639 640 ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) { 641 IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); 642 if (!first) return OMF_None; 643 644 StringRef name = first->getName(); 645 if (sel.isUnarySelector()) { 646 if (name == "autorelease") return OMF_autorelease; 647 if (name == "dealloc") return OMF_dealloc; 648 if (name == "finalize") return OMF_finalize; 649 if (name == "release") return OMF_release; 650 if (name == "retain") return OMF_retain; 651 if (name == "retainCount") return OMF_retainCount; 652 if (name == "self") return OMF_self; 653 if (name == "initialize") return OMF_initialize; 654 } 655 656 if (name == "performSelector" || name == "performSelectorInBackground" || 657 name == "performSelectorOnMainThread") 658 return OMF_performSelector; 659 660 // The other method families may begin with a prefix of underscores. 661 while (!name.empty() && name.front() == '_') 662 name = name.substr(1); 663 664 if (name.empty()) return OMF_None; 665 switch (name.front()) { 666 case 'a': 667 if (startsWithWord(name, "alloc")) return OMF_alloc; 668 break; 669 case 'c': 670 if (startsWithWord(name, "copy")) return OMF_copy; 671 break; 672 case 'i': 673 if (startsWithWord(name, "init")) return OMF_init; 674 break; 675 case 'm': 676 if (startsWithWord(name, "mutableCopy")) return OMF_mutableCopy; 677 break; 678 case 'n': 679 if (startsWithWord(name, "new")) return OMF_new; 680 break; 681 default: 682 break; 683 } 684 685 return OMF_None; 686 } 687 688 ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) { 689 IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); 690 if (!first) return OIT_None; 691 692 StringRef name = first->getName(); 693 694 if (name.empty()) return OIT_None; 695 switch (name.front()) { 696 case 'a': 697 if (startsWithWord(name, "array")) return OIT_Array; 698 break; 699 case 'd': 700 if (startsWithWord(name, "default")) return OIT_ReturnsSelf; 701 if (startsWithWord(name, "dictionary")) return OIT_Dictionary; 702 break; 703 case 's': 704 if (startsWithWord(name, "shared")) return OIT_ReturnsSelf; 705 if (startsWithWord(name, "standard")) return OIT_Singleton; 706 break; 707 case 'i': 708 if (startsWithWord(name, "init")) return OIT_Init; 709 break; 710 default: 711 break; 712 } 713 return OIT_None; 714 } 715 716 ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) { 717 IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); 718 if (!first) return SFF_None; 719 720 StringRef name = first->getName(); 721 722 switch (name.front()) { 723 case 'a': 724 if (name == "appendFormat") return SFF_NSString; 725 break; 726 727 case 'i': 728 if (name == "initWithFormat") return SFF_NSString; 729 break; 730 731 case 'l': 732 if (name == "localizedStringWithFormat") return SFF_NSString; 733 break; 734 735 case 's': 736 if (name == "stringByAppendingFormat" || 737 name == "stringWithFormat") return SFF_NSString; 738 break; 739 } 740 return SFF_None; 741 } 742 743 namespace { 744 745 struct SelectorTableImpl { 746 llvm::FoldingSet<MultiKeywordSelector> Table; 747 llvm::BumpPtrAllocator Allocator; 748 }; 749 750 } // namespace 751 752 static SelectorTableImpl &getSelectorTableImpl(void *P) { 753 return *static_cast<SelectorTableImpl*>(P); 754 } 755 756 SmallString<64> 757 SelectorTable::constructSetterName(StringRef Name) { 758 SmallString<64> SetterName("set"); 759 SetterName += Name; 760 SetterName[3] = toUppercase(SetterName[3]); 761 return SetterName; 762 } 763 764 Selector 765 SelectorTable::constructSetterSelector(IdentifierTable &Idents, 766 SelectorTable &SelTable, 767 const IdentifierInfo *Name) { 768 IdentifierInfo *SetterName = 769 &Idents.get(constructSetterName(Name->getName())); 770 return SelTable.getUnarySelector(SetterName); 771 } 772 773 std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) { 774 StringRef Name = Sel.getNameForSlot(0); 775 assert(Name.startswith("set") && "invalid setter name"); 776 return (Twine(toLowercase(Name[3])) + Name.drop_front(4)).str(); 777 } 778 779 size_t SelectorTable::getTotalMemory() const { 780 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl); 781 return SelTabImpl.Allocator.getTotalMemory(); 782 } 783 784 Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) { 785 if (nKeys < 2) 786 return Selector(IIV[0], nKeys); 787 788 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl); 789 790 // Unique selector, to guarantee there is one per name. 791 llvm::FoldingSetNodeID ID; 792 MultiKeywordSelector::Profile(ID, IIV, nKeys); 793 794 void *InsertPos = nullptr; 795 if (MultiKeywordSelector *SI = 796 SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos)) 797 return Selector(SI); 798 799 // MultiKeywordSelector objects are not allocated with new because they have a 800 // variable size array (for parameter types) at the end of them. 801 unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *); 802 MultiKeywordSelector *SI = 803 (MultiKeywordSelector *)SelTabImpl.Allocator.Allocate( 804 Size, alignof(MultiKeywordSelector)); 805 new (SI) MultiKeywordSelector(nKeys, IIV); 806 SelTabImpl.Table.InsertNode(SI, InsertPos); 807 return Selector(SI); 808 } 809 810 SelectorTable::SelectorTable() { 811 Impl = new SelectorTableImpl(); 812 } 813 814 SelectorTable::~SelectorTable() { 815 delete &getSelectorTableImpl(Impl); 816 } 817 818 const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) { 819 switch (Operator) { 820 case OO_None: 821 case NUM_OVERLOADED_OPERATORS: 822 return nullptr; 823 824 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \ 825 case OO_##Name: return Spelling; 826 #include "clang/Basic/OperatorKinds.def" 827 } 828 829 llvm_unreachable("Invalid OverloadedOperatorKind!"); 830 } 831 832 StringRef clang::getNullabilitySpelling(NullabilityKind kind, 833 bool isContextSensitive) { 834 switch (kind) { 835 case NullabilityKind::NonNull: 836 return isContextSensitive ? "nonnull" : "_Nonnull"; 837 838 case NullabilityKind::Nullable: 839 return isContextSensitive ? "nullable" : "_Nullable"; 840 841 case NullabilityKind::NullableResult: 842 assert(!isContextSensitive && 843 "_Nullable_result isn't supported as context-sensitive keyword"); 844 return "_Nullable_result"; 845 846 case NullabilityKind::Unspecified: 847 return isContextSensitive ? "null_unspecified" : "_Null_unspecified"; 848 } 849 llvm_unreachable("Unknown nullability kind."); 850 } 851 852 diag::kind 853 IdentifierTable::getFutureCompatDiagKind(const IdentifierInfo &II, 854 const LangOptions &LangOpts) { 855 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 856 857 unsigned Flags = llvm::StringSwitch<unsigned>(II.getName()) 858 #define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS) 859 #include "clang/Basic/TokenKinds.def" 860 #undef KEYWORD 861 ; 862 863 if (LangOpts.CPlusPlus) { 864 if ((Flags & KEYCXX11) == KEYCXX11) 865 return diag::warn_cxx11_keyword; 866 867 // char8_t is not modeled as a CXX20_KEYWORD because it's not 868 // unconditionally enabled in C++20 mode. (It can be disabled 869 // by -fno-char8_t.) 870 if (((Flags & KEYCXX20) == KEYCXX20) || 871 ((Flags & CHAR8SUPPORT) == CHAR8SUPPORT)) 872 return diag::warn_cxx20_keyword; 873 } else { 874 if ((Flags & KEYC99) == KEYC99) 875 return diag::warn_c99_keyword; 876 if ((Flags & KEYC2X) == KEYC2X) 877 return diag::warn_c2x_keyword; 878 } 879 880 llvm_unreachable( 881 "Keyword not known to come from a newer Standard or proposed Standard"); 882 } 883