xref: /freebsd/contrib/llvm-project/clang/lib/Basic/IdentifierTable.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- IdentifierTable.cpp - Hash table for identifier lookup -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the IdentifierInfo, IdentifierVisitor, and
10 // IdentifierTable interfaces.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/IdentifierTable.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/DiagnosticLex.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/OperatorKinds.h"
19 #include "clang/Basic/Specifiers.h"
20 #include "clang/Basic/TargetBuiltins.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/DenseMapInfo.h"
23 #include "llvm/ADT/FoldingSet.h"
24 #include "llvm/ADT/StringMap.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/Support/Allocator.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cassert>
29 #include <cstdio>
30 #include <cstring>
31 #include <string>
32 
33 using namespace clang;
34 
35 // A check to make sure the ObjCOrBuiltinID has sufficient room to store the
36 // largest possible target/aux-target combination. If we exceed this, we likely
37 // need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h.
38 static_assert(2 * LargestBuiltinID < (2 << (InterestingIdentifierBits - 1)),
39               "Insufficient ObjCOrBuiltinID Bits");
40 
41 //===----------------------------------------------------------------------===//
42 // IdentifierTable Implementation
43 //===----------------------------------------------------------------------===//
44 
45 IdentifierIterator::~IdentifierIterator() = default;
46 
47 IdentifierInfoLookup::~IdentifierInfoLookup() = default;
48 
49 namespace {
50 
51 /// A simple identifier lookup iterator that represents an
52 /// empty sequence of identifiers.
53 class EmptyLookupIterator : public IdentifierIterator {
54 public:
55   StringRef Next() override { return StringRef(); }
56 };
57 
58 } // namespace
59 
60 IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
61   return new EmptyLookupIterator();
62 }
63 
64 IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup)
65     : HashTable(8192), // Start with space for 8K identifiers.
66       ExternalLookup(ExternalLookup) {}
67 
68 IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
69                                  IdentifierInfoLookup *ExternalLookup)
70     : IdentifierTable(ExternalLookup) {
71   // Populate the identifier table with info about keywords for the current
72   // language.
73   AddKeywords(LangOpts);
74 }
75 
76 //===----------------------------------------------------------------------===//
77 // Language Keyword Implementation
78 //===----------------------------------------------------------------------===//
79 
80 // Constants for TokenKinds.def
81 namespace {
82 
83 enum TokenKey : unsigned {
84   KEYC99 = 0x1,
85   KEYCXX = 0x2,
86   KEYCXX11 = 0x4,
87   KEYGNU = 0x8,
88   KEYMS = 0x10,
89   BOOLSUPPORT = 0x20,
90   KEYALTIVEC = 0x40,
91   KEYNOCXX = 0x80,
92   KEYBORLAND = 0x100,
93   KEYOPENCLC = 0x200,
94   KEYC23 = 0x400,
95   KEYNOMS18 = 0x800,
96   KEYNOOPENCL = 0x1000,
97   WCHARSUPPORT = 0x2000,
98   HALFSUPPORT = 0x4000,
99   CHAR8SUPPORT = 0x8000,
100   KEYOBJC = 0x10000,
101   KEYZVECTOR = 0x20000,
102   KEYCOROUTINES = 0x40000,
103   KEYMODULES = 0x80000,
104   KEYCXX20 = 0x100000,
105   KEYOPENCLCXX = 0x200000,
106   KEYMSCOMPAT = 0x400000,
107   KEYSYCL = 0x800000,
108   KEYCUDA = 0x1000000,
109   KEYZOS = 0x2000000,
110   KEYNOZOS = 0x4000000,
111   KEYHLSL = 0x8000000,
112   KEYFIXEDPOINT = 0x10000000,
113   KEYMAX = KEYFIXEDPOINT, // The maximum key
114   KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20,
115   KEYALL = (KEYMAX | (KEYMAX - 1)) & ~KEYNOMS18 & ~KEYNOOPENCL &
116            ~KEYNOZOS // KEYNOMS18, KEYNOOPENCL, KEYNOZOS are excluded.
117 };
118 
119 /// How a keyword is treated in the selected standard. This enum is ordered
120 /// intentionally so that the value that 'wins' is the most 'permissive'.
121 enum KeywordStatus {
122   KS_Unknown,   // Not yet calculated. Used when figuring out the status.
123   KS_Disabled,  // Disabled
124   KS_Future,    // Is a keyword in future standard
125   KS_Extension, // Is an extension
126   KS_Enabled,   // Enabled
127 };
128 
129 } // namespace
130 
131 // This works on a single TokenKey flag and checks the LangOpts to get the
132 // KeywordStatus based exclusively on this flag, so that it can be merged in
133 // getKeywordStatus. Most should be enabled/disabled, but some might imply
134 // 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to
135 // be disabled, and the calling function makes it 'disabled' if no other flag
136 // changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags.
137 static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts,
138                                             TokenKey Flag) {
139   // Flag is a single bit version of TokenKey (that is, not
140   // KEYALL/KEYALLCXX/etc), so we can check with == throughout this function.
141   assert((Flag & ~(Flag - 1)) == Flag && "Multiple bits set?");
142 
143   switch (Flag) {
144   case KEYC99:
145     if (LangOpts.C99)
146       return KS_Enabled;
147     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
148   case KEYC23:
149     if (LangOpts.C23)
150       return KS_Enabled;
151     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
152   case KEYCXX:
153     return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown;
154   case KEYCXX11:
155     if (LangOpts.CPlusPlus11)
156       return KS_Enabled;
157     return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
158   case KEYCXX20:
159     if (LangOpts.CPlusPlus20)
160       return KS_Enabled;
161     return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
162   case KEYGNU:
163     return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown;
164   case KEYMS:
165     return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown;
166   case BOOLSUPPORT:
167     if (LangOpts.Bool)      return KS_Enabled;
168     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
169   case KEYALTIVEC:
170     return LangOpts.AltiVec ? KS_Enabled : KS_Unknown;
171   case KEYBORLAND:
172     return LangOpts.Borland ? KS_Extension : KS_Unknown;
173   case KEYOPENCLC:
174     return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled
175                                                         : KS_Unknown;
176   case WCHARSUPPORT:
177     return LangOpts.WChar ? KS_Enabled : KS_Unknown;
178   case HALFSUPPORT:
179     return LangOpts.Half ? KS_Enabled : KS_Unknown;
180   case CHAR8SUPPORT:
181     if (LangOpts.Char8) return KS_Enabled;
182     if (LangOpts.CPlusPlus20) return KS_Unknown;
183     if (LangOpts.CPlusPlus) return KS_Future;
184     return KS_Unknown;
185   case KEYOBJC:
186     // We treat bridge casts as objective-C keywords so we can warn on them
187     // in non-arc mode.
188     return LangOpts.ObjC ? KS_Enabled : KS_Unknown;
189   case KEYZVECTOR:
190     return LangOpts.ZVector ? KS_Enabled : KS_Unknown;
191   case KEYCOROUTINES:
192     return LangOpts.Coroutines ? KS_Enabled : KS_Unknown;
193   case KEYMODULES:
194     return KS_Unknown;
195   case KEYOPENCLCXX:
196     return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown;
197   case KEYMSCOMPAT:
198     return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown;
199   case KEYSYCL:
200     return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown;
201   case KEYCUDA:
202     return LangOpts.CUDA ? KS_Enabled : KS_Unknown;
203   case KEYZOS:
204     return LangOpts.ZOSExt ? KS_Enabled : KS_Unknown;
205   case KEYHLSL:
206     return LangOpts.HLSL ? KS_Enabled : KS_Unknown;
207   case KEYNOCXX:
208     // This is enabled in all non-C++ modes, but might be enabled for other
209     // reasons as well.
210     return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled;
211   case KEYNOOPENCL:
212   case KEYNOMS18:
213   case KEYNOZOS:
214     // The disable behavior for this is handled in getKeywordStatus.
215     return KS_Unknown;
216   case KEYFIXEDPOINT:
217     return LangOpts.FixedPoint ? KS_Enabled : KS_Disabled;
218   default:
219     llvm_unreachable("Unknown KeywordStatus flag");
220   }
221 }
222 
223 /// Translates flags as specified in TokenKinds.def into keyword status
224 /// in the given language standard.
225 static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
226                                       unsigned Flags) {
227   // KEYALL means always enabled, so special case this one.
228   if (Flags == KEYALL) return KS_Enabled;
229   // These are tests that need to 'always win', as they are special in that they
230   // disable based on certain conditions.
231   if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled;
232   if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
233       !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015))
234     return KS_Disabled;
235   if (LangOpts.ZOSExt && (Flags & KEYNOZOS))
236     return KS_Disabled;
237   KeywordStatus CurStatus = KS_Unknown;
238 
239   while (Flags != 0) {
240     unsigned CurFlag = Flags & ~(Flags - 1);
241     Flags = Flags & ~CurFlag;
242     CurStatus = std::max(
243         CurStatus,
244         getKeywordStatusHelper(LangOpts, static_cast<TokenKey>(CurFlag)));
245   }
246 
247   if (CurStatus == KS_Unknown)
248     return KS_Disabled;
249   return CurStatus;
250 }
251 
252 static bool IsKeywordInCpp(unsigned Flags) {
253   return (Flags & (KEYCXX | KEYCXX11 | KEYCXX20 | BOOLSUPPORT | WCHARSUPPORT |
254                    CHAR8SUPPORT)) != 0;
255 }
256 
257 static void MarkIdentifierAsKeywordInCpp(IdentifierTable &Table,
258                                          StringRef Name) {
259   IdentifierInfo &II = Table.get(Name, tok::identifier);
260   II.setIsKeywordInCPlusPlus();
261   II.setHandleIdentifierCase();
262 }
263 
264 /// AddKeyword - This method is used to associate a token ID with specific
265 /// identifiers because they are language keywords.  This causes the lexer to
266 /// automatically map matching identifiers to specialized token codes.
267 static void AddKeyword(StringRef Keyword,
268                        tok::TokenKind TokenCode, unsigned Flags,
269                        const LangOptions &LangOpts, IdentifierTable &Table) {
270   KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags);
271 
272   // Don't add this keyword if disabled in this language and isn't otherwise
273   // special.
274   if (AddResult == KS_Disabled) {
275     // We do not consider any identifiers to be C++ keywords when in
276     // Objective-C because @ effectively introduces a custom grammar where C++
277     // keywords can be used (and similar for selectors). We could enable this
278     // for Objective-C, but it would require more logic to ensure we do not
279     // issue compatibility diagnostics in these cases.
280     if (!LangOpts.ObjC && IsKeywordInCpp(Flags))
281       MarkIdentifierAsKeywordInCpp(Table, Keyword);
282     return;
283   }
284 
285   IdentifierInfo &Info =
286       Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode);
287   Info.setIsExtensionToken(AddResult == KS_Extension);
288   Info.setIsFutureCompatKeyword(AddResult == KS_Future);
289 }
290 
291 /// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
292 /// representations.
293 static void AddCXXOperatorKeyword(StringRef Keyword,
294                                   tok::TokenKind TokenCode,
295                                   IdentifierTable &Table) {
296   IdentifierInfo &Info = Table.get(Keyword, TokenCode);
297   Info.setIsCPlusPlusOperatorKeyword();
298 }
299 
300 /// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector"
301 /// or "property".
302 static void AddObjCKeyword(StringRef Name,
303                            tok::ObjCKeywordKind ObjCID,
304                            IdentifierTable &Table) {
305   Table.get(Name).setObjCKeywordID(ObjCID);
306 }
307 
308 static void AddNotableIdentifier(StringRef Name,
309                                  tok::NotableIdentifierKind BTID,
310                                  IdentifierTable &Table) {
311   // Don't add 'not_notable' identifier.
312   if (BTID != tok::not_notable) {
313     IdentifierInfo &Info = Table.get(Name, tok::identifier);
314     Info.setNotableIdentifierID(BTID);
315   }
316 }
317 
318 /// AddKeywords - Add all keywords to the symbol table.
319 ///
320 void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
321   // Add keywords and tokens for the current language.
322 #define KEYWORD(NAME, FLAGS) \
323   AddKeyword(StringRef(#NAME), tok::kw_ ## NAME,  \
324              FLAGS, LangOpts, *this);
325 #define ALIAS(NAME, TOK, FLAGS) \
326   AddKeyword(StringRef(NAME), tok::kw_ ## TOK,  \
327              FLAGS, LangOpts, *this);
328 #define CXX_KEYWORD_OPERATOR(NAME, ALIAS)                                      \
329   if (LangOpts.CXXOperatorNames)                                               \
330     AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this);                \
331   else                                                                         \
332     MarkIdentifierAsKeywordInCpp(*this, StringRef(#NAME));
333 #define OBJC_AT_KEYWORD(NAME)  \
334   if (LangOpts.ObjC)           \
335     AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this);
336 #define NOTABLE_IDENTIFIER(NAME)                                               \
337   AddNotableIdentifier(StringRef(#NAME), tok::NAME, *this);
338 
339 #define TESTING_KEYWORD(NAME, FLAGS)
340 #include "clang/Basic/TokenKinds.def"
341 
342   if (LangOpts.ParseUnknownAnytype)
343     AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL,
344                LangOpts, *this);
345 
346   if (LangOpts.DeclSpecKeyword)
347     AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this);
348 
349   if (LangOpts.IEEE128)
350     AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
351 
352   // Add the 'import' contextual keyword.
353   get("import").setModulesImport(true);
354 }
355 
356 /// Checks if the specified token kind represents a keyword in the
357 /// specified language.
358 /// \returns Status of the keyword in the language.
359 static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts,
360                                       tok::TokenKind K) {
361   switch (K) {
362 #define KEYWORD(NAME, FLAGS) \
363   case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS);
364 #include "clang/Basic/TokenKinds.def"
365   default: return KS_Disabled;
366   }
367 }
368 
369 /// Returns true if the identifier represents a keyword in the
370 /// specified language.
371 bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const {
372   switch (getTokenKwStatus(LangOpts, getTokenID())) {
373   case KS_Enabled:
374   case KS_Extension:
375     return true;
376   default:
377     return false;
378   }
379 }
380 
381 /// Returns true if the identifier represents a C++ keyword in the
382 /// specified language.
383 bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const {
384   if (!LangOpts.CPlusPlus || !isKeyword(LangOpts))
385     return false;
386   // This is a C++ keyword if this identifier is not a keyword when checked
387   // using LangOptions without C++ support.
388   LangOptions LangOptsNoCPP = LangOpts;
389   LangOptsNoCPP.CPlusPlus = false;
390   LangOptsNoCPP.CPlusPlus11 = false;
391   LangOptsNoCPP.CPlusPlus20 = false;
392   return !isKeyword(LangOptsNoCPP);
393 }
394 
395 ReservedIdentifierStatus
396 IdentifierInfo::isReserved(const LangOptions &LangOpts) const {
397   StringRef Name = getName();
398 
399   // '_' is a reserved identifier, but its use is so common (e.g. to store
400   // ignored values) that we don't warn on it.
401   if (Name.size() <= 1)
402     return ReservedIdentifierStatus::NotReserved;
403 
404   // [lex.name] p3
405   if (Name[0] == '_') {
406 
407     // Each name that begins with an underscore followed by an uppercase letter
408     // or another underscore is reserved.
409     if (Name[1] == '_')
410       return ReservedIdentifierStatus::StartsWithDoubleUnderscore;
411 
412     if ('A' <= Name[1] && Name[1] <= 'Z')
413       return ReservedIdentifierStatus::
414           StartsWithUnderscoreFollowedByCapitalLetter;
415 
416     // This is a bit misleading: it actually means it's only reserved if we're
417     // at global scope because it starts with an underscore.
418     return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope;
419   }
420 
421   // Each name that contains a double underscore (__) is reserved.
422   if (LangOpts.CPlusPlus && Name.contains("__"))
423     return ReservedIdentifierStatus::ContainsDoubleUnderscore;
424 
425   return ReservedIdentifierStatus::NotReserved;
426 }
427 
428 ReservedLiteralSuffixIdStatus
429 IdentifierInfo::isReservedLiteralSuffixId() const {
430   StringRef Name = getName();
431 
432   // Note: the diag::warn_deprecated_literal_operator_id diagnostic depends on
433   // this being the first check we do, so if this order changes, we have to fix
434   // that as well.
435   if (Name[0] != '_')
436     return ReservedLiteralSuffixIdStatus::NotStartsWithUnderscore;
437 
438   if (Name.contains("__"))
439     return ReservedLiteralSuffixIdStatus::ContainsDoubleUnderscore;
440 
441   return ReservedLiteralSuffixIdStatus::NotReserved;
442 }
443 
444 StringRef IdentifierInfo::deuglifiedName() const {
445   StringRef Name = getName();
446   if (Name.size() >= 2 && Name.front() == '_' &&
447       (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z')))
448     return Name.ltrim('_');
449   return Name;
450 }
451 
452 tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
453   // We use a perfect hash function here involving the length of the keyword,
454   // the first and third character.  For preprocessor ID's there are no
455   // collisions (if there were, the switch below would complain about duplicate
456   // case values).  Note that this depends on 'if' being null terminated.
457 
458 #define HASH(LEN, FIRST, THIRD)                                                \
459   (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63)
460 #define CASE(LEN, FIRST, THIRD, NAME) \
461   case HASH(LEN, FIRST, THIRD): \
462     return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
463 
464   unsigned Len = getLength();
465   if (Len < 2) return tok::pp_not_keyword;
466   const char *Name = getNameStart();
467   switch (HASH(Len, Name[0], Name[2])) {
468   default: return tok::pp_not_keyword;
469   CASE( 2, 'i', '\0', if);
470   CASE( 4, 'e', 'i', elif);
471   CASE( 4, 'e', 's', else);
472   CASE( 4, 'l', 'n', line);
473   CASE( 4, 's', 'c', sccs);
474   CASE( 5, 'e', 'b', embed);
475   CASE( 5, 'e', 'd', endif);
476   CASE( 5, 'e', 'r', error);
477   CASE( 5, 'i', 'e', ident);
478   CASE( 5, 'i', 'd', ifdef);
479   CASE( 5, 'u', 'd', undef);
480 
481   CASE( 6, 'a', 's', assert);
482   CASE( 6, 'd', 'f', define);
483   CASE( 6, 'i', 'n', ifndef);
484   CASE( 6, 'i', 'p', import);
485   CASE( 6, 'p', 'a', pragma);
486 
487   CASE( 7, 'd', 'f', defined);
488   CASE( 7, 'e', 'i', elifdef);
489   CASE( 7, 'i', 'c', include);
490   CASE( 7, 'w', 'r', warning);
491 
492   CASE( 8, 'e', 'i', elifndef);
493   CASE( 8, 'u', 'a', unassert);
494   CASE(12, 'i', 'c', include_next);
495 
496   CASE(14, '_', 'p', __public_macro);
497 
498   CASE(15, '_', 'p', __private_macro);
499 
500   CASE(16, '_', 'i', __include_macros);
501 #undef CASE
502 #undef HASH
503   }
504 }
505 
506 //===----------------------------------------------------------------------===//
507 // Stats Implementation
508 //===----------------------------------------------------------------------===//
509 
510 /// PrintStats - Print statistics about how well the identifier table is doing
511 /// at hashing identifiers.
512 void IdentifierTable::PrintStats() const {
513   unsigned NumBuckets = HashTable.getNumBuckets();
514   unsigned NumIdentifiers = HashTable.getNumItems();
515   unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
516   unsigned AverageIdentifierSize = 0;
517   unsigned MaxIdentifierLength = 0;
518 
519   // TODO: Figure out maximum times an identifier had to probe for -stats.
520   for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
521        I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
522     unsigned IdLen = I->getKeyLength();
523     AverageIdentifierSize += IdLen;
524     if (MaxIdentifierLength < IdLen)
525       MaxIdentifierLength = IdLen;
526   }
527 
528   fprintf(stderr, "\n*** Identifier Table Stats:\n");
529   fprintf(stderr, "# Identifiers:   %d\n", NumIdentifiers);
530   fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
531   fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
532           NumIdentifiers/(double)NumBuckets);
533   fprintf(stderr, "Ave identifier length: %f\n",
534           (AverageIdentifierSize/(double)NumIdentifiers));
535   fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
536 
537   // Compute statistics about the memory allocated for identifiers.
538   HashTable.getAllocator().PrintStats();
539 }
540 
541 //===----------------------------------------------------------------------===//
542 // SelectorTable Implementation
543 //===----------------------------------------------------------------------===//
544 
545 unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
546   return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
547 }
548 
549 bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
550   assert(!Names.empty() && "must have >= 1 selector slots");
551   if (getNumArgs() != Names.size())
552     return false;
553   for (unsigned I = 0, E = Names.size(); I != E; ++I) {
554     if (getNameForSlot(I) != Names[I])
555       return false;
556   }
557   return true;
558 }
559 
560 bool Selector::isUnarySelector(StringRef Name) const {
561   return isUnarySelector() && getNameForSlot(0) == Name;
562 }
563 
564 unsigned Selector::getNumArgs() const {
565   unsigned IIF = getIdentifierInfoFlag();
566   if (IIF <= ZeroArg)
567     return 0;
568   if (IIF == OneArg)
569     return 1;
570   // We point to a MultiKeywordSelector.
571   MultiKeywordSelector *SI = getMultiKeywordSelector();
572   return SI->getNumArgs();
573 }
574 
575 const IdentifierInfo *
576 Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
577   if (getIdentifierInfoFlag() < MultiArg) {
578     assert(argIndex == 0 && "illegal keyword index");
579     return getAsIdentifierInfo();
580   }
581 
582   // We point to a MultiKeywordSelector.
583   MultiKeywordSelector *SI = getMultiKeywordSelector();
584   return SI->getIdentifierInfoForSlot(argIndex);
585 }
586 
587 StringRef Selector::getNameForSlot(unsigned int argIndex) const {
588   const IdentifierInfo *II = getIdentifierInfoForSlot(argIndex);
589   return II ? II->getName() : StringRef();
590 }
591 
592 std::string MultiKeywordSelector::getName() const {
593   SmallString<256> Str;
594   llvm::raw_svector_ostream OS(Str);
595   for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
596     if (*I)
597       OS << (*I)->getName();
598     OS << ':';
599   }
600 
601   return std::string(OS.str());
602 }
603 
604 std::string Selector::getAsString() const {
605   if (isNull())
606     return "<null selector>";
607 
608   if (getIdentifierInfoFlag() < MultiArg) {
609     const IdentifierInfo *II = getAsIdentifierInfo();
610 
611     if (getNumArgs() == 0) {
612       assert(II && "If the number of arguments is 0 then II is guaranteed to "
613                    "not be null.");
614       return std::string(II->getName());
615     }
616 
617     if (!II)
618       return ":";
619 
620     return II->getName().str() + ":";
621   }
622 
623   // We have a multiple keyword selector.
624   return getMultiKeywordSelector()->getName();
625 }
626 
627 void Selector::print(llvm::raw_ostream &OS) const {
628   OS << getAsString();
629 }
630 
631 LLVM_DUMP_METHOD void Selector::dump() const { print(llvm::errs()); }
632 
633 /// Interpreting the given string using the normal CamelCase
634 /// conventions, determine whether the given string starts with the
635 /// given "word", which is assumed to end in a lowercase letter.
636 static bool startsWithWord(StringRef name, StringRef word) {
637   if (name.size() < word.size()) return false;
638   return ((name.size() == word.size() || !isLowercase(name[word.size()])) &&
639           name.starts_with(word));
640 }
641 
642 ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
643   const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
644   if (!first) return OMF_None;
645 
646   StringRef name = first->getName();
647   if (sel.isUnarySelector()) {
648     if (name == "autorelease") return OMF_autorelease;
649     if (name == "dealloc") return OMF_dealloc;
650     if (name == "finalize") return OMF_finalize;
651     if (name == "release") return OMF_release;
652     if (name == "retain") return OMF_retain;
653     if (name == "retainCount") return OMF_retainCount;
654     if (name == "self") return OMF_self;
655     if (name == "initialize") return OMF_initialize;
656   }
657 
658   if (name == "performSelector" || name == "performSelectorInBackground" ||
659       name == "performSelectorOnMainThread")
660     return OMF_performSelector;
661 
662   // The other method families may begin with a prefix of underscores.
663   name = name.ltrim('_');
664 
665   if (name.empty()) return OMF_None;
666   switch (name.front()) {
667   case 'a':
668     if (startsWithWord(name, "alloc")) return OMF_alloc;
669     break;
670   case 'c':
671     if (startsWithWord(name, "copy")) return OMF_copy;
672     break;
673   case 'i':
674     if (startsWithWord(name, "init")) return OMF_init;
675     break;
676   case 'm':
677     if (startsWithWord(name, "mutableCopy")) return OMF_mutableCopy;
678     break;
679   case 'n':
680     if (startsWithWord(name, "new")) return OMF_new;
681     break;
682   default:
683     break;
684   }
685 
686   return OMF_None;
687 }
688 
689 ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) {
690   const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
691   if (!first) return OIT_None;
692 
693   StringRef name = first->getName();
694 
695   if (name.empty()) return OIT_None;
696   switch (name.front()) {
697     case 'a':
698       if (startsWithWord(name, "array")) return OIT_Array;
699       break;
700     case 'd':
701       if (startsWithWord(name, "default")) return OIT_ReturnsSelf;
702       if (startsWithWord(name, "dictionary")) return OIT_Dictionary;
703       break;
704     case 's':
705       if (startsWithWord(name, "shared")) return OIT_ReturnsSelf;
706       if (startsWithWord(name, "standard")) return OIT_Singleton;
707       break;
708     case 'i':
709       if (startsWithWord(name, "init")) return OIT_Init;
710       break;
711     default:
712       break;
713   }
714   return OIT_None;
715 }
716 
717 ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) {
718   const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
719   if (!first) return SFF_None;
720 
721   StringRef name = first->getName();
722 
723   switch (name.front()) {
724     case 'a':
725       if (name == "appendFormat") return SFF_NSString;
726       break;
727 
728     case 'i':
729       if (name == "initWithFormat") return SFF_NSString;
730       break;
731 
732     case 'l':
733       if (name == "localizedStringWithFormat") return SFF_NSString;
734       break;
735 
736     case 's':
737       if (name == "stringByAppendingFormat" ||
738           name == "stringWithFormat") return SFF_NSString;
739       break;
740   }
741   return SFF_None;
742 }
743 
744 namespace {
745 
746 struct SelectorTableImpl {
747   llvm::FoldingSet<MultiKeywordSelector> Table;
748   llvm::BumpPtrAllocator Allocator;
749 };
750 
751 } // namespace
752 
753 static SelectorTableImpl &getSelectorTableImpl(void *P) {
754   return *static_cast<SelectorTableImpl*>(P);
755 }
756 
757 SmallString<64>
758 SelectorTable::constructSetterName(StringRef Name) {
759   SmallString<64> SetterName("set");
760   SetterName += Name;
761   SetterName[3] = toUppercase(SetterName[3]);
762   return SetterName;
763 }
764 
765 Selector
766 SelectorTable::constructSetterSelector(IdentifierTable &Idents,
767                                        SelectorTable &SelTable,
768                                        const IdentifierInfo *Name) {
769   IdentifierInfo *SetterName =
770     &Idents.get(constructSetterName(Name->getName()));
771   return SelTable.getUnarySelector(SetterName);
772 }
773 
774 std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) {
775   StringRef Name = Sel.getNameForSlot(0);
776   assert(Name.starts_with("set") && "invalid setter name");
777   return (Twine(toLowercase(Name[3])) + Name.drop_front(4)).str();
778 }
779 
780 size_t SelectorTable::getTotalMemory() const {
781   SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
782   return SelTabImpl.Allocator.getTotalMemory();
783 }
784 
785 Selector SelectorTable::getSelector(unsigned nKeys,
786                                     const IdentifierInfo **IIV) {
787   if (nKeys < 2)
788     return Selector(IIV[0], nKeys);
789 
790   SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
791 
792   // Unique selector, to guarantee there is one per name.
793   llvm::FoldingSetNodeID ID;
794   MultiKeywordSelector::Profile(ID, IIV, nKeys);
795 
796   void *InsertPos = nullptr;
797   if (MultiKeywordSelector *SI =
798         SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
799     return Selector(SI);
800 
801   // MultiKeywordSelector objects are not allocated with new because they have a
802   // variable size array (for parameter types) at the end of them.
803   unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
804   MultiKeywordSelector *SI =
805       (MultiKeywordSelector *)SelTabImpl.Allocator.Allocate(
806           Size, alignof(MultiKeywordSelector));
807   new (SI) MultiKeywordSelector(nKeys, IIV);
808   SelTabImpl.Table.InsertNode(SI, InsertPos);
809   return Selector(SI);
810 }
811 
812 SelectorTable::SelectorTable() {
813   Impl = new SelectorTableImpl();
814 }
815 
816 SelectorTable::~SelectorTable() {
817   delete &getSelectorTableImpl(Impl);
818 }
819 
820 const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
821   switch (Operator) {
822   case OO_None:
823   case NUM_OVERLOADED_OPERATORS:
824     return nullptr;
825 
826 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
827   case OO_##Name: return Spelling;
828 #include "clang/Basic/OperatorKinds.def"
829   }
830 
831   llvm_unreachable("Invalid OverloadedOperatorKind!");
832 }
833 
834 StringRef clang::getNullabilitySpelling(NullabilityKind kind,
835                                         bool isContextSensitive) {
836   switch (kind) {
837   case NullabilityKind::NonNull:
838     return isContextSensitive ? "nonnull" : "_Nonnull";
839 
840   case NullabilityKind::Nullable:
841     return isContextSensitive ? "nullable" : "_Nullable";
842 
843   case NullabilityKind::NullableResult:
844     assert(!isContextSensitive &&
845            "_Nullable_result isn't supported as context-sensitive keyword");
846     return "_Nullable_result";
847 
848   case NullabilityKind::Unspecified:
849     return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
850   }
851   llvm_unreachable("Unknown nullability kind.");
852 }
853 
854 llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS,
855                                      NullabilityKind NK) {
856   switch (NK) {
857   case NullabilityKind::NonNull:
858     return OS << "NonNull";
859   case NullabilityKind::Nullable:
860     return OS << "Nullable";
861   case NullabilityKind::NullableResult:
862     return OS << "NullableResult";
863   case NullabilityKind::Unspecified:
864     return OS << "Unspecified";
865   }
866   llvm_unreachable("Unknown nullability kind.");
867 }
868 
869 diag::kind
870 IdentifierTable::getFutureCompatDiagKind(const IdentifierInfo &II,
871                                          const LangOptions &LangOpts) {
872   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
873 
874   unsigned Flags = llvm::StringSwitch<unsigned>(II.getName())
875 #define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS)
876 #include "clang/Basic/TokenKinds.def"
877 #undef KEYWORD
878       ;
879 
880   if (LangOpts.CPlusPlus) {
881     if ((Flags & KEYCXX11) == KEYCXX11)
882       return diag::warn_cxx11_keyword;
883 
884     // char8_t is not modeled as a CXX20_KEYWORD because it's not
885     // unconditionally enabled in C++20 mode. (It can be disabled
886     // by -fno-char8_t.)
887     if (((Flags & KEYCXX20) == KEYCXX20) ||
888         ((Flags & CHAR8SUPPORT) == CHAR8SUPPORT))
889       return diag::warn_cxx20_keyword;
890   } else {
891     if ((Flags & KEYC99) == KEYC99)
892       return diag::warn_c99_keyword;
893     if ((Flags & KEYC23) == KEYC23)
894       return diag::warn_c23_keyword;
895   }
896 
897   llvm_unreachable(
898       "Keyword not known to come from a newer Standard or proposed Standard");
899 }
900