xref: /freebsd/contrib/llvm-project/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1 //===-- CPlusPlusNameParser.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "CPlusPlusNameParser.h"
10 
11 #include "clang/Basic/IdentifierTable.h"
12 #include "clang/Basic/TokenKinds.h"
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/Support/Threading.h"
15 #include <optional>
16 
17 using namespace lldb;
18 using namespace lldb_private;
19 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
20 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
21 namespace tok = clang::tok;
22 
ParseAsFunctionDefinition()23 std::optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
24   m_next_token_index = 0;
25   std::optional<ParsedFunction> result(std::nullopt);
26 
27   // Try to parse the name as function without a return type specified e.g.
28   // main(int, char*[])
29   {
30     Bookmark start_position = SetBookmark();
31     result = ParseFunctionImpl(false);
32     if (result && !HasMoreTokens())
33       return result;
34   }
35 
36   // Try to parse the name as function with function pointer return type e.g.
37   // void (*get_func(const char*))()
38   result = ParseFuncPtr(true);
39   if (result)
40     return result;
41 
42   // Finally try to parse the name as a function with non-function return type
43   // e.g. int main(int, char*[])
44   result = ParseFunctionImpl(true);
45   if (HasMoreTokens())
46     return std::nullopt;
47   return result;
48 }
49 
ParseAsFullName()50 std::optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
51   m_next_token_index = 0;
52   std::optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
53   if (!name_ranges)
54     return std::nullopt;
55   if (HasMoreTokens())
56     return std::nullopt;
57   ParsedName result;
58   result.basename = GetTextForRange(name_ranges->basename_range);
59   result.context = GetTextForRange(name_ranges->context_range);
60   return result;
61 }
62 
HasMoreTokens()63 bool CPlusPlusNameParser::HasMoreTokens() {
64   return m_next_token_index < m_tokens.size();
65 }
66 
Advance()67 void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
68 
TakeBack()69 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
70 
ConsumeToken(tok::TokenKind kind)71 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
72   if (!HasMoreTokens())
73     return false;
74 
75   if (!Peek().is(kind))
76     return false;
77 
78   Advance();
79   return true;
80 }
81 
ConsumeToken(Ts...kinds)82 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
83   if (!HasMoreTokens())
84     return false;
85 
86   if (!Peek().isOneOf(kinds...))
87     return false;
88 
89   Advance();
90   return true;
91 }
92 
SetBookmark()93 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
94   return Bookmark(m_next_token_index);
95 }
96 
GetCurrentPosition()97 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
98 
Peek()99 clang::Token &CPlusPlusNameParser::Peek() {
100   assert(HasMoreTokens());
101   return m_tokens[m_next_token_index];
102 }
103 
104 std::optional<ParsedFunction>
ParseFunctionImpl(bool expect_return_type)105 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
106   Bookmark start_position = SetBookmark();
107 
108   ParsedFunction result;
109   if (expect_return_type) {
110     size_t return_start = GetCurrentPosition();
111     // Consume return type if it's expected.
112     if (!ConsumeToken(tok::kw_auto) && !ConsumeTypename())
113       return std::nullopt;
114 
115     size_t return_end = GetCurrentPosition();
116     result.return_type = GetTextForRange(Range(return_start, return_end));
117   }
118 
119   auto maybe_name = ParseFullNameImpl();
120   if (!maybe_name) {
121     return std::nullopt;
122   }
123 
124   size_t argument_start = GetCurrentPosition();
125   if (!ConsumeArguments()) {
126     return std::nullopt;
127   }
128 
129   size_t qualifiers_start = GetCurrentPosition();
130   SkipFunctionQualifiers();
131   size_t end_position = GetCurrentPosition();
132 
133   result.name.basename = GetTextForRange(maybe_name->basename_range);
134   result.name.context = GetTextForRange(maybe_name->context_range);
135   result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
136   result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
137   start_position.Remove();
138   return result;
139 }
140 
141 std::optional<ParsedFunction>
ParseFuncPtr(bool expect_return_type)142 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
143   // This function parses a function definition
144   // that returns a pointer type.
145   // E.g., double (*(*func(long))(int))(float)
146 
147   // Step 1:
148   // Remove the return type of the innermost
149   // function pointer type.
150   //
151   // Leaves us with:
152   //   (*(*func(long))(int))(float)
153   Bookmark start_position = SetBookmark();
154   if (expect_return_type) {
155     // Consume return type.
156     if (!ConsumeTypename())
157       return std::nullopt;
158   }
159 
160   // Step 2:
161   //
162   // Skip a pointer and parenthesis pair.
163   //
164   // Leaves us with:
165   //   (*func(long))(int))(float)
166   if (!ConsumeToken(tok::l_paren))
167     return std::nullopt;
168   if (!ConsumePtrsAndRefs())
169     return std::nullopt;
170 
171   // Step 3:
172   //
173   // Consume inner function name. This will fail unless
174   // we stripped all the pointers on the left hand side
175   // of the function name.
176   {
177     Bookmark before_inner_function_pos = SetBookmark();
178     auto maybe_inner_function_name = ParseFunctionImpl(false);
179     if (maybe_inner_function_name)
180       if (ConsumeToken(tok::r_paren))
181         if (ConsumeArguments()) {
182           SkipFunctionQualifiers();
183           start_position.Remove();
184           before_inner_function_pos.Remove();
185           return maybe_inner_function_name;
186         }
187   }
188 
189   // Step 4:
190   //
191   // Parse the remaining string as a function pointer again.
192   // This time don't consume the inner-most typename since
193   // we're left with pointers only. This will strip another
194   // layer of pointers until we're left with the innermost
195   // function name/argument. I.e., func(long))(int))(float)
196   //
197   // Once we successfully stripped all pointers and gotten
198   // the innermost function name from ParseFunctionImpl above,
199   // we consume a single ')' and the arguments '(...)' that follows.
200   //
201   // Leaves us with:
202   //   )(float)
203   //
204   // This is the remnant of the outer function pointers' arguments.
205   // Unwinding the recursive calls will remove the remaining
206   // arguments.
207   auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
208   if (maybe_inner_function_ptr_name)
209     if (ConsumeToken(tok::r_paren))
210       if (ConsumeArguments()) {
211         SkipFunctionQualifiers();
212         start_position.Remove();
213         return maybe_inner_function_ptr_name;
214       }
215 
216   return std::nullopt;
217 }
218 
ConsumeArguments()219 bool CPlusPlusNameParser::ConsumeArguments() {
220   return ConsumeBrackets(tok::l_paren, tok::r_paren);
221 }
222 
ConsumeTemplateArgs()223 bool CPlusPlusNameParser::ConsumeTemplateArgs() {
224   Bookmark start_position = SetBookmark();
225   if (!HasMoreTokens() || Peek().getKind() != tok::less)
226     return false;
227   Advance();
228 
229   // Consuming template arguments is a bit trickier than consuming function
230   // arguments, because '<' '>' brackets are not always trivially balanced. In
231   // some rare cases tokens '<' and '>' can appear inside template arguments as
232   // arithmetic or shift operators not as template brackets. Examples:
233   // std::enable_if<(10u)<(64), bool>
234   //           f<A<operator<(X,Y)::Subclass>>
235   // Good thing that compiler makes sure that really ambiguous cases of '>'
236   // usage should be enclosed within '()' brackets.
237   int template_counter = 1;
238   bool can_open_template = false;
239   while (HasMoreTokens() && template_counter > 0) {
240     tok::TokenKind kind = Peek().getKind();
241     switch (kind) {
242     case tok::greatergreater:
243       template_counter -= 2;
244       can_open_template = false;
245       Advance();
246       break;
247     case tok::greater:
248       --template_counter;
249       can_open_template = false;
250       Advance();
251       break;
252     case tok::less:
253       // '<' is an attempt to open a subteamplte
254       // check if parser is at the point where it's actually possible,
255       // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
256       // need to do the same for '>' because compiler actually makes sure that
257       // '>' always surrounded by brackets to avoid ambiguity.
258       if (can_open_template)
259         ++template_counter;
260       can_open_template = false;
261       Advance();
262       break;
263     case tok::kw_operator: // C++ operator overloading.
264       if (!ConsumeOperator())
265         return false;
266       can_open_template = true;
267       break;
268     case tok::raw_identifier:
269       can_open_template = true;
270       Advance();
271       break;
272     case tok::l_square:
273       // Handle templates tagged with an ABI tag.
274       // An example demangled/prettified version is:
275       //   func[abi:tag1][abi:tag2]<type[abi:tag3]>(int)
276       if (ConsumeAbiTag())
277         can_open_template = true;
278       else if (ConsumeBrackets(tok::l_square, tok::r_square))
279         can_open_template = false;
280       else
281         return false;
282       break;
283     case tok::l_paren:
284       if (!ConsumeArguments())
285         return false;
286       can_open_template = false;
287       break;
288     default:
289       can_open_template = false;
290       Advance();
291       break;
292     }
293   }
294 
295   if (template_counter != 0) {
296     return false;
297   }
298   start_position.Remove();
299   return true;
300 }
301 
ConsumeAbiTag()302 bool CPlusPlusNameParser::ConsumeAbiTag() {
303   Bookmark start_position = SetBookmark();
304   if (!ConsumeToken(tok::l_square))
305     return false;
306 
307   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
308       Peek().getRawIdentifier() == "abi")
309     Advance();
310   else
311     return false;
312 
313   if (!ConsumeToken(tok::colon))
314     return false;
315 
316   // Consume the actual tag string (and allow some special characters)
317   while (ConsumeToken(tok::raw_identifier, tok::comma, tok::period,
318                       tok::numeric_constant))
319     ;
320 
321   if (!ConsumeToken(tok::r_square))
322     return false;
323 
324   start_position.Remove();
325   return true;
326 }
327 
ConsumeAnonymousNamespace()328 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
329   Bookmark start_position = SetBookmark();
330   if (!ConsumeToken(tok::l_paren)) {
331     return false;
332   }
333   constexpr llvm::StringLiteral g_anonymous("anonymous");
334   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
335       Peek().getRawIdentifier() == g_anonymous) {
336     Advance();
337   } else {
338     return false;
339   }
340 
341   if (!ConsumeToken(tok::kw_namespace)) {
342     return false;
343   }
344 
345   if (!ConsumeToken(tok::r_paren)) {
346     return false;
347   }
348   start_position.Remove();
349   return true;
350 }
351 
ConsumeLambda()352 bool CPlusPlusNameParser::ConsumeLambda() {
353   Bookmark start_position = SetBookmark();
354   if (!ConsumeToken(tok::l_brace)) {
355     return false;
356   }
357   constexpr llvm::StringLiteral g_lambda("lambda");
358   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
359       Peek().getRawIdentifier() == g_lambda) {
360     // Put the matched brace back so we can use ConsumeBrackets
361     TakeBack();
362   } else {
363     return false;
364   }
365 
366   if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
367     return false;
368   }
369 
370   start_position.Remove();
371   return true;
372 }
373 
ConsumeBrackets(tok::TokenKind left,tok::TokenKind right)374 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
375                                           tok::TokenKind right) {
376   Bookmark start_position = SetBookmark();
377   if (!HasMoreTokens() || Peek().getKind() != left)
378     return false;
379   Advance();
380 
381   int counter = 1;
382   while (HasMoreTokens() && counter > 0) {
383     tok::TokenKind kind = Peek().getKind();
384     if (kind == right)
385       --counter;
386     else if (kind == left)
387       ++counter;
388     Advance();
389   }
390 
391   assert(counter >= 0);
392   if (counter > 0) {
393     return false;
394   }
395   start_position.Remove();
396   return true;
397 }
398 
ConsumeOperator()399 bool CPlusPlusNameParser::ConsumeOperator() {
400   Bookmark start_position = SetBookmark();
401   if (!ConsumeToken(tok::kw_operator))
402     return false;
403 
404   if (!HasMoreTokens()) {
405     return false;
406   }
407 
408   const auto &token = Peek();
409 
410   // When clang generates debug info it adds template parameters to names.
411   // Since clang doesn't add a space between the name and the template parameter
412   // in some cases we are not generating valid C++ names e.g.:
413   //
414   //   operator<<A::B>
415   //
416   // In some of these cases we will not parse them correctly. This fixes the
417   // issue by detecting this case and inserting tok::less in place of
418   // tok::lessless and returning successfully that we consumed the operator.
419   if (token.getKind() == tok::lessless) {
420     // Make sure we have more tokens before attempting to look ahead one more.
421     if (m_next_token_index + 1 < m_tokens.size()) {
422       // Look ahead two tokens.
423       clang::Token n_token = m_tokens[m_next_token_index + 1];
424       // If we find ( or < then this is indeed operator<< no need for fix.
425       if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) {
426         clang::Token tmp_tok;
427         tmp_tok.startToken();
428         tmp_tok.setLength(1);
429         tmp_tok.setLocation(token.getLocation().getLocWithOffset(1));
430         tmp_tok.setKind(tok::less);
431 
432         m_tokens[m_next_token_index] = tmp_tok;
433 
434         start_position.Remove();
435         return true;
436       }
437     }
438   }
439 
440   switch (token.getKind()) {
441   case tok::kw_new:
442   case tok::kw_delete:
443     // This is 'new' or 'delete' operators.
444     Advance();
445     // Check for array new/delete.
446     if (HasMoreTokens() && Peek().is(tok::l_square)) {
447       // Consume the '[' and ']'.
448       if (!ConsumeBrackets(tok::l_square, tok::r_square))
449         return false;
450     }
451     break;
452 
453 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
454   case tok::Token:                                                             \
455     Advance();                                                                 \
456     break;
457 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
458 #include "clang/Basic/OperatorKinds.def"
459 #undef OVERLOADED_OPERATOR
460 #undef OVERLOADED_OPERATOR_MULTI
461 
462   case tok::l_paren:
463     // Call operator consume '(' ... ')'.
464     if (ConsumeBrackets(tok::l_paren, tok::r_paren))
465       break;
466     return false;
467 
468   case tok::l_square:
469     // This is a [] operator.
470     // Consume the '[' and ']'.
471     if (ConsumeBrackets(tok::l_square, tok::r_square))
472       break;
473     return false;
474 
475   default:
476     // This might be a cast operator.
477     if (ConsumeTypename())
478       break;
479     return false;
480   }
481   start_position.Remove();
482   return true;
483 }
484 
SkipTypeQualifiers()485 void CPlusPlusNameParser::SkipTypeQualifiers() {
486   while (ConsumeToken(tok::kw_const, tok::kw_volatile))
487     ;
488 }
489 
SkipFunctionQualifiers()490 void CPlusPlusNameParser::SkipFunctionQualifiers() {
491   while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
492     ;
493 }
494 
ConsumeBuiltinType()495 bool CPlusPlusNameParser::ConsumeBuiltinType() {
496   bool result = false;
497   bool continue_parsing = true;
498   // Built-in types can be made of a few keywords like 'unsigned long long
499   // int'. This function consumes all built-in type keywords without checking
500   // if they make sense like 'unsigned char void'.
501   while (continue_parsing && HasMoreTokens()) {
502     switch (Peek().getKind()) {
503     case tok::kw_short:
504     case tok::kw_long:
505     case tok::kw___int64:
506     case tok::kw___int128:
507     case tok::kw_signed:
508     case tok::kw_unsigned:
509     case tok::kw_void:
510     case tok::kw_char:
511     case tok::kw_int:
512     case tok::kw_half:
513     case tok::kw_float:
514     case tok::kw_double:
515     case tok::kw___float128:
516     case tok::kw_wchar_t:
517     case tok::kw_bool:
518     case tok::kw_char16_t:
519     case tok::kw_char32_t:
520       result = true;
521       Advance();
522       break;
523     default:
524       continue_parsing = false;
525       break;
526     }
527   }
528   return result;
529 }
530 
SkipPtrsAndRefs()531 void CPlusPlusNameParser::SkipPtrsAndRefs() {
532   // Ignoring result.
533   ConsumePtrsAndRefs();
534 }
535 
ConsumePtrsAndRefs()536 bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
537   bool found = false;
538   SkipTypeQualifiers();
539   while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
540                       tok::kw_volatile)) {
541     found = true;
542     SkipTypeQualifiers();
543   }
544   return found;
545 }
546 
ConsumeDecltype()547 bool CPlusPlusNameParser::ConsumeDecltype() {
548   Bookmark start_position = SetBookmark();
549   if (!ConsumeToken(tok::kw_decltype))
550     return false;
551 
552   if (!ConsumeArguments())
553     return false;
554 
555   start_position.Remove();
556   return true;
557 }
558 
ConsumeTypename()559 bool CPlusPlusNameParser::ConsumeTypename() {
560   Bookmark start_position = SetBookmark();
561   SkipTypeQualifiers();
562   if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
563     if (!ParseFullNameImpl())
564       return false;
565   }
566   SkipPtrsAndRefs();
567   start_position.Remove();
568   return true;
569 }
570 
571 std::optional<CPlusPlusNameParser::ParsedNameRanges>
ParseFullNameImpl()572 CPlusPlusNameParser::ParseFullNameImpl() {
573   // Name parsing state machine.
574   enum class State {
575     Beginning,       // start of the name
576     AfterTwoColons,  // right after ::
577     AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
578     AfterTemplate,   // right after template brackets (<something>)
579     AfterOperator,   // right after name of C++ operator
580   };
581 
582   Bookmark start_position = SetBookmark();
583   State state = State::Beginning;
584   bool continue_parsing = true;
585   std::optional<size_t> last_coloncolon_position;
586 
587   while (continue_parsing && HasMoreTokens()) {
588     const auto &token = Peek();
589     switch (token.getKind()) {
590     case tok::raw_identifier: // Just a name.
591       if (state != State::Beginning && state != State::AfterTwoColons) {
592         continue_parsing = false;
593         break;
594       }
595       Advance();
596       state = State::AfterIdentifier;
597       break;
598     case tok::l_square: {
599       // Handles types or functions that were tagged
600       // with, e.g.,
601       //   [[gnu::abi_tag("tag1","tag2")]] func()
602       // and demangled/prettified into:
603       //   func[abi:tag1][abi:tag2]()
604 
605       // ABI tags only appear after a method or type name
606       const bool valid_state =
607           state == State::AfterIdentifier || state == State::AfterOperator;
608       if (!valid_state || !ConsumeAbiTag()) {
609         continue_parsing = false;
610       }
611 
612       break;
613     }
614     case tok::l_paren: {
615       if (state == State::Beginning || state == State::AfterTwoColons) {
616         // (anonymous namespace)
617         if (ConsumeAnonymousNamespace()) {
618           state = State::AfterIdentifier;
619           break;
620         }
621       }
622 
623       // Type declared inside a function 'func()::Type'
624       if (state != State::AfterIdentifier && state != State::AfterTemplate &&
625           state != State::AfterOperator) {
626         continue_parsing = false;
627         break;
628       }
629       Bookmark l_paren_position = SetBookmark();
630       // Consume the '(' ... ') [const]'.
631       if (!ConsumeArguments()) {
632         continue_parsing = false;
633         break;
634       }
635       SkipFunctionQualifiers();
636 
637       // Consume '::'
638       size_t coloncolon_position = GetCurrentPosition();
639       if (!ConsumeToken(tok::coloncolon)) {
640         continue_parsing = false;
641         break;
642       }
643       l_paren_position.Remove();
644       last_coloncolon_position = coloncolon_position;
645       state = State::AfterTwoColons;
646       break;
647     }
648     case tok::l_brace:
649       if (state == State::Beginning || state == State::AfterTwoColons) {
650         if (ConsumeLambda()) {
651           state = State::AfterIdentifier;
652           break;
653         }
654       }
655       continue_parsing = false;
656       break;
657     case tok::coloncolon: // Type nesting delimiter.
658       if (state != State::Beginning && state != State::AfterIdentifier &&
659           state != State::AfterTemplate) {
660         continue_parsing = false;
661         break;
662       }
663       last_coloncolon_position = GetCurrentPosition();
664       Advance();
665       state = State::AfterTwoColons;
666       break;
667     case tok::less: // Template brackets.
668       if (state != State::AfterIdentifier && state != State::AfterOperator) {
669         continue_parsing = false;
670         break;
671       }
672       if (!ConsumeTemplateArgs()) {
673         continue_parsing = false;
674         break;
675       }
676       state = State::AfterTemplate;
677       break;
678     case tok::kw_operator: // C++ operator overloading.
679       if (state != State::Beginning && state != State::AfterTwoColons) {
680         continue_parsing = false;
681         break;
682       }
683       if (!ConsumeOperator()) {
684         continue_parsing = false;
685         break;
686       }
687       state = State::AfterOperator;
688       break;
689     case tok::tilde: // Destructor.
690       if (state != State::Beginning && state != State::AfterTwoColons) {
691         continue_parsing = false;
692         break;
693       }
694       Advance();
695       if (ConsumeToken(tok::raw_identifier)) {
696         state = State::AfterIdentifier;
697       } else {
698         TakeBack();
699         continue_parsing = false;
700       }
701       break;
702     default:
703       continue_parsing = false;
704       break;
705     }
706   }
707 
708   if (state == State::AfterIdentifier || state == State::AfterOperator ||
709       state == State::AfterTemplate) {
710     ParsedNameRanges result;
711     if (last_coloncolon_position) {
712       result.context_range =
713           Range(start_position.GetSavedPosition(), *last_coloncolon_position);
714       result.basename_range =
715           Range(*last_coloncolon_position + 1, GetCurrentPosition());
716     } else {
717       result.basename_range =
718           Range(start_position.GetSavedPosition(), GetCurrentPosition());
719     }
720     start_position.Remove();
721     return result;
722   } else {
723     return std::nullopt;
724   }
725 }
726 
GetTextForRange(const Range & range)727 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
728   if (range.empty())
729     return llvm::StringRef();
730   assert(range.begin_index < range.end_index);
731   assert(range.begin_index < m_tokens.size());
732   assert(range.end_index <= m_tokens.size());
733   clang::Token &first_token = m_tokens[range.begin_index];
734   clang::Token &last_token = m_tokens[range.end_index - 1];
735   clang::SourceLocation start_loc = first_token.getLocation();
736   clang::SourceLocation end_loc = last_token.getLocation();
737   unsigned start_pos = start_loc.getRawEncoding();
738   unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
739   return m_text.take_front(end_pos).drop_front(start_pos);
740 }
741 
GetLangOptions()742 static const clang::LangOptions &GetLangOptions() {
743   static clang::LangOptions g_options;
744   static llvm::once_flag g_once_flag;
745   llvm::call_once(g_once_flag, []() {
746     g_options.LineComment = true;
747     g_options.C99 = true;
748     g_options.C11 = true;
749     g_options.CPlusPlus = true;
750     g_options.CPlusPlus11 = true;
751     g_options.CPlusPlus14 = true;
752     g_options.CPlusPlus17 = true;
753     g_options.CPlusPlus20 = true;
754   });
755   return g_options;
756 }
757 
GetKeywordsMap()758 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
759   static llvm::StringMap<tok::TokenKind> g_map{
760 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
761 #include "clang/Basic/TokenKinds.def"
762 #undef KEYWORD
763   };
764   return g_map;
765 }
766 
ExtractTokens()767 void CPlusPlusNameParser::ExtractTokens() {
768   if (m_text.empty())
769     return;
770   clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
771                      m_text.data(), m_text.data() + m_text.size());
772   const auto &kw_map = GetKeywordsMap();
773   clang::Token token;
774   for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
775        lexer.LexFromRawLexer(token)) {
776     if (token.is(clang::tok::raw_identifier)) {
777       auto it = kw_map.find(token.getRawIdentifier());
778       if (it != kw_map.end()) {
779         token.setKind(it->getValue());
780       }
781     }
782 
783     m_tokens.push_back(token);
784   }
785 }
786