1 //===- DependencyDirectivesScanner.cpp ------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This is the interface for scanning header and source files to get the 11 /// minimum necessary preprocessor directives for evaluating includes. It 12 /// reduces the source down to #define, #include, #import, @import, and any 13 /// conditional preprocessor logic that contains one of those. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "clang/Lex/DependencyDirectivesScanner.h" 18 #include "clang/Basic/CharInfo.h" 19 #include "clang/Basic/Diagnostic.h" 20 #include "clang/Lex/LexDiagnostic.h" 21 #include "clang/Lex/Lexer.h" 22 #include "llvm/ADT/ScopeExit.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/StringMap.h" 25 #include "llvm/ADT/StringSwitch.h" 26 27 using namespace clang; 28 using namespace clang::dependency_directives_scan; 29 using namespace llvm; 30 31 namespace { 32 33 struct DirectiveWithTokens { 34 DirectiveKind Kind; 35 unsigned NumTokens; 36 37 DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens) 38 : Kind(Kind), NumTokens(NumTokens) {} 39 }; 40 41 /// Does an efficient "scan" of the sources to detect the presence of 42 /// preprocessor (or module import) directives and collects the raw lexed tokens 43 /// for those directives so that the \p Lexer can "replay" them when the file is 44 /// included. 45 /// 46 /// Note that the behavior of the raw lexer is affected by the language mode, 47 /// while at this point we want to do a scan and collect tokens once, 48 /// irrespective of the language mode that the file will get included in. To 49 /// compensate for that the \p Lexer, while "replaying", will adjust a token 50 /// where appropriate, when it could affect the preprocessor's state. 51 /// For example in a directive like 52 /// 53 /// \code 54 /// #if __has_cpp_attribute(clang::fallthrough) 55 /// \endcode 56 /// 57 /// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2 58 /// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon' 59 /// while in C++ mode. 60 struct Scanner { 61 Scanner(StringRef Input, 62 SmallVectorImpl<dependency_directives_scan::Token> &Tokens, 63 DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) 64 : Input(Input), Tokens(Tokens), Diags(Diags), 65 InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()), 66 TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(), 67 Input.end()) {} 68 69 static LangOptions getLangOptsForDepScanning() { 70 LangOptions LangOpts; 71 // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'. 72 LangOpts.ObjC = true; 73 LangOpts.LineComment = true; 74 return LangOpts; 75 } 76 77 /// Lex the provided source and emit the directive tokens. 78 /// 79 /// \returns True on error. 80 bool scan(SmallVectorImpl<Directive> &Directives); 81 82 private: 83 /// Lexes next token and advances \p First and the \p Lexer. 84 LLVM_NODISCARD dependency_directives_scan::Token & 85 lexToken(const char *&First, const char *const End); 86 87 dependency_directives_scan::Token &lexIncludeFilename(const char *&First, 88 const char *const End); 89 90 /// Lexes next token and if it is identifier returns its string, otherwise 91 /// it skips the current line and returns \p None. 92 /// 93 /// In any case (whatever the token kind) \p First and the \p Lexer will 94 /// advance beyond the token. 95 LLVM_NODISCARD Optional<StringRef> 96 tryLexIdentifierOrSkipLine(const char *&First, const char *const End); 97 98 /// Used when it is certain that next token is an identifier. 99 LLVM_NODISCARD StringRef lexIdentifier(const char *&First, 100 const char *const End); 101 102 /// Lexes next token and returns true iff it is an identifier that matches \p 103 /// Id, otherwise it skips the current line and returns false. 104 /// 105 /// In any case (whatever the token kind) \p First and the \p Lexer will 106 /// advance beyond the token. 107 LLVM_NODISCARD bool isNextIdentifierOrSkipLine(StringRef Id, 108 const char *&First, 109 const char *const End); 110 111 LLVM_NODISCARD bool scanImpl(const char *First, const char *const End); 112 LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); 113 LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); 114 LLVM_NODISCARD bool lexModule(const char *&First, const char *const End); 115 LLVM_NODISCARD bool lexDefine(const char *HashLoc, const char *&First, 116 const char *const End); 117 LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); 118 LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); 119 LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, const char *&First, 120 const char *const End); 121 LLVM_NODISCARD bool lexModuleDirectiveBody(DirectiveKind Kind, 122 const char *&First, 123 const char *const End); 124 void lexPPDirectiveBody(const char *&First, const char *const End); 125 126 DirectiveWithTokens &pushDirective(DirectiveKind Kind) { 127 Tokens.append(CurDirToks); 128 DirsWithToks.emplace_back(Kind, CurDirToks.size()); 129 CurDirToks.clear(); 130 return DirsWithToks.back(); 131 } 132 void popDirective() { 133 Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens); 134 } 135 DirectiveKind topDirective() const { 136 return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind; 137 } 138 139 unsigned getOffsetAt(const char *CurPtr) const { 140 return CurPtr - Input.data(); 141 } 142 143 /// Reports a diagnostic if the diagnostic engine is provided. Always returns 144 /// true at the end. 145 bool reportError(const char *CurPtr, unsigned Err); 146 147 StringMap<char> SplitIds; 148 StringRef Input; 149 SmallVectorImpl<dependency_directives_scan::Token> &Tokens; 150 DiagnosticsEngine *Diags; 151 SourceLocation InputSourceLoc; 152 153 /// Keeps track of the tokens for the currently lexed directive. Once a 154 /// directive is fully lexed and "committed" then the tokens get appended to 155 /// \p Tokens and \p CurDirToks is cleared for the next directive. 156 SmallVector<dependency_directives_scan::Token, 32> CurDirToks; 157 /// The directives that were lexed along with the number of tokens that each 158 /// directive contains. The tokens of all the directives are kept in \p Tokens 159 /// vector, in the same order as the directives order in \p DirsWithToks. 160 SmallVector<DirectiveWithTokens, 64> DirsWithToks; 161 LangOptions LangOpts; 162 Lexer TheLexer; 163 }; 164 165 } // end anonymous namespace 166 167 bool Scanner::reportError(const char *CurPtr, unsigned Err) { 168 if (!Diags) 169 return true; 170 assert(CurPtr >= Input.data() && "invalid buffer ptr"); 171 Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err); 172 return true; 173 } 174 175 static void skipOverSpaces(const char *&First, const char *const End) { 176 while (First != End && isHorizontalWhitespace(*First)) 177 ++First; 178 } 179 180 LLVM_NODISCARD static bool isRawStringLiteral(const char *First, 181 const char *Current) { 182 assert(First <= Current); 183 184 // Check if we can even back up. 185 if (*Current != '"' || First == Current) 186 return false; 187 188 // Check for an "R". 189 --Current; 190 if (*Current != 'R') 191 return false; 192 if (First == Current || !isAsciiIdentifierContinue(*--Current)) 193 return true; 194 195 // Check for a prefix of "u", "U", or "L". 196 if (*Current == 'u' || *Current == 'U' || *Current == 'L') 197 return First == Current || !isAsciiIdentifierContinue(*--Current); 198 199 // Check for a prefix of "u8". 200 if (*Current != '8' || First == Current || *Current-- != 'u') 201 return false; 202 return First == Current || !isAsciiIdentifierContinue(*--Current); 203 } 204 205 static void skipRawString(const char *&First, const char *const End) { 206 assert(First[0] == '"'); 207 assert(First[-1] == 'R'); 208 209 const char *Last = ++First; 210 while (Last != End && *Last != '(') 211 ++Last; 212 if (Last == End) { 213 First = Last; // Hit the end... just give up. 214 return; 215 } 216 217 StringRef Terminator(First, Last - First); 218 for (;;) { 219 // Move First to just past the next ")". 220 First = Last; 221 while (First != End && *First != ')') 222 ++First; 223 if (First == End) 224 return; 225 ++First; 226 227 // Look ahead for the terminator sequence. 228 Last = First; 229 while (Last != End && size_t(Last - First) < Terminator.size() && 230 Terminator[Last - First] == *Last) 231 ++Last; 232 233 // Check if we hit it (or the end of the file). 234 if (Last == End) { 235 First = Last; 236 return; 237 } 238 if (size_t(Last - First) < Terminator.size()) 239 continue; 240 if (*Last != '"') 241 continue; 242 First = Last + 1; 243 return; 244 } 245 } 246 247 // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) 248 static unsigned isEOL(const char *First, const char *const End) { 249 if (First == End) 250 return 0; 251 if (End - First > 1 && isVerticalWhitespace(First[0]) && 252 isVerticalWhitespace(First[1]) && First[0] != First[1]) 253 return 2; 254 return !!isVerticalWhitespace(First[0]); 255 } 256 257 static void skipString(const char *&First, const char *const End) { 258 assert(*First == '\'' || *First == '"' || *First == '<'); 259 const char Terminator = *First == '<' ? '>' : *First; 260 for (++First; First != End && *First != Terminator; ++First) { 261 // String and character literals don't extend past the end of the line. 262 if (isVerticalWhitespace(*First)) 263 return; 264 if (*First != '\\') 265 continue; 266 // Skip past backslash to the next character. This ensures that the 267 // character right after it is skipped as well, which matters if it's 268 // the terminator. 269 if (++First == End) 270 return; 271 if (!isWhitespace(*First)) 272 continue; 273 // Whitespace after the backslash might indicate a line continuation. 274 const char *FirstAfterBackslashPastSpace = First; 275 skipOverSpaces(FirstAfterBackslashPastSpace, End); 276 if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { 277 // Advance the character pointer to the next line for the next 278 // iteration. 279 First = FirstAfterBackslashPastSpace + NLSize - 1; 280 } 281 } 282 if (First != End) 283 ++First; // Finish off the string. 284 } 285 286 // Returns the length of the skipped newline 287 static unsigned skipNewline(const char *&First, const char *End) { 288 if (First == End) 289 return 0; 290 assert(isVerticalWhitespace(*First)); 291 unsigned Len = isEOL(First, End); 292 assert(Len && "expected newline"); 293 First += Len; 294 return Len; 295 } 296 297 static bool wasLineContinuation(const char *First, unsigned EOLLen) { 298 return *(First - (int)EOLLen - 1) == '\\'; 299 } 300 301 static void skipToNewlineRaw(const char *&First, const char *const End) { 302 for (;;) { 303 if (First == End) 304 return; 305 306 unsigned Len = isEOL(First, End); 307 if (Len) 308 return; 309 310 do { 311 if (++First == End) 312 return; 313 Len = isEOL(First, End); 314 } while (!Len); 315 316 if (First[-1] != '\\') 317 return; 318 319 First += Len; 320 // Keep skipping lines... 321 } 322 } 323 324 static void skipLineComment(const char *&First, const char *const End) { 325 assert(First[0] == '/' && First[1] == '/'); 326 First += 2; 327 skipToNewlineRaw(First, End); 328 } 329 330 static void skipBlockComment(const char *&First, const char *const End) { 331 assert(First[0] == '/' && First[1] == '*'); 332 if (End - First < 4) { 333 First = End; 334 return; 335 } 336 for (First += 3; First != End; ++First) 337 if (First[-1] == '*' && First[0] == '/') { 338 ++First; 339 return; 340 } 341 } 342 343 /// \returns True if the current single quotation mark character is a C++ 14 344 /// digit separator. 345 static bool isQuoteCppDigitSeparator(const char *const Start, 346 const char *const Cur, 347 const char *const End) { 348 assert(*Cur == '\'' && "expected quotation character"); 349 // skipLine called in places where we don't expect a valid number 350 // body before `start` on the same line, so always return false at the start. 351 if (Start == Cur) 352 return false; 353 // The previous character must be a valid PP number character. 354 // Make sure that the L, u, U, u8 prefixes don't get marked as a 355 // separator though. 356 char Prev = *(Cur - 1); 357 if (Prev == 'L' || Prev == 'U' || Prev == 'u') 358 return false; 359 if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') 360 return false; 361 if (!isPreprocessingNumberBody(Prev)) 362 return false; 363 // The next character should be a valid identifier body character. 364 return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1)); 365 } 366 367 static void skipLine(const char *&First, const char *const End) { 368 for (;;) { 369 assert(First <= End); 370 if (First == End) 371 return; 372 373 if (isVerticalWhitespace(*First)) { 374 skipNewline(First, End); 375 return; 376 } 377 const char *Start = First; 378 while (First != End && !isVerticalWhitespace(*First)) { 379 // Iterate over strings correctly to avoid comments and newlines. 380 if (*First == '"' || 381 (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { 382 if (isRawStringLiteral(Start, First)) 383 skipRawString(First, End); 384 else 385 skipString(First, End); 386 continue; 387 } 388 389 // Iterate over comments correctly. 390 if (*First != '/' || End - First < 2) { 391 ++First; 392 continue; 393 } 394 395 if (First[1] == '/') { 396 // "//...". 397 skipLineComment(First, End); 398 continue; 399 } 400 401 if (First[1] != '*') { 402 ++First; 403 continue; 404 } 405 406 // "/*...*/". 407 skipBlockComment(First, End); 408 } 409 if (First == End) 410 return; 411 412 // Skip over the newline. 413 unsigned Len = skipNewline(First, End); 414 if (!wasLineContinuation(First, Len)) // Continue past line-continuations. 415 break; 416 } 417 } 418 419 static void skipDirective(StringRef Name, const char *&First, 420 const char *const End) { 421 if (llvm::StringSwitch<bool>(Name) 422 .Case("warning", true) 423 .Case("error", true) 424 .Default(false)) 425 // Do not process quotes or comments. 426 skipToNewlineRaw(First, End); 427 else 428 skipLine(First, End); 429 } 430 431 static void skipWhitespace(const char *&First, const char *const End) { 432 for (;;) { 433 assert(First <= End); 434 skipOverSpaces(First, End); 435 436 if (End - First < 2) 437 return; 438 439 if (First[0] == '\\' && isVerticalWhitespace(First[1])) { 440 skipNewline(++First, End); 441 continue; 442 } 443 444 // Check for a non-comment character. 445 if (First[0] != '/') 446 return; 447 448 // "// ...". 449 if (First[1] == '/') { 450 skipLineComment(First, End); 451 return; 452 } 453 454 // Cannot be a comment. 455 if (First[1] != '*') 456 return; 457 458 // "/*...*/". 459 skipBlockComment(First, End); 460 } 461 } 462 463 bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, 464 const char *const End) { 465 const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset; 466 for (;;) { 467 const dependency_directives_scan::Token &Tok = lexToken(First, End); 468 if (Tok.is(tok::eof)) 469 return reportError( 470 DirectiveLoc, 471 diag::err_dep_source_scanner_missing_semi_after_at_import); 472 if (Tok.is(tok::semi)) 473 break; 474 } 475 pushDirective(Kind); 476 skipWhitespace(First, End); 477 if (First == End) 478 return false; 479 if (!isVerticalWhitespace(*First)) 480 return reportError( 481 DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); 482 skipNewline(First, End); 483 return false; 484 } 485 486 dependency_directives_scan::Token &Scanner::lexToken(const char *&First, 487 const char *const End) { 488 clang::Token Tok; 489 TheLexer.LexFromRawLexer(Tok); 490 First = Input.data() + TheLexer.getCurrentBufferOffset(); 491 assert(First <= End); 492 493 unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); 494 CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), 495 Tok.getFlags()); 496 return CurDirToks.back(); 497 } 498 499 dependency_directives_scan::Token & 500 Scanner::lexIncludeFilename(const char *&First, const char *const End) { 501 clang::Token Tok; 502 TheLexer.LexIncludeFilename(Tok); 503 First = Input.data() + TheLexer.getCurrentBufferOffset(); 504 assert(First <= End); 505 506 unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); 507 CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), 508 Tok.getFlags()); 509 return CurDirToks.back(); 510 } 511 512 void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) { 513 while (true) { 514 const dependency_directives_scan::Token &Tok = lexToken(First, End); 515 if (Tok.is(tok::eod)) 516 break; 517 } 518 } 519 520 LLVM_NODISCARD Optional<StringRef> 521 Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { 522 const dependency_directives_scan::Token &Tok = lexToken(First, End); 523 if (Tok.isNot(tok::raw_identifier)) { 524 if (!Tok.is(tok::eod)) 525 skipLine(First, End); 526 return None; 527 } 528 529 bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning; 530 if (LLVM_LIKELY(!NeedsCleaning)) 531 return Input.slice(Tok.Offset, Tok.getEnd()); 532 533 SmallString<64> Spelling; 534 Spelling.resize(Tok.Length); 535 536 unsigned SpellingLength = 0; 537 const char *BufPtr = Input.begin() + Tok.Offset; 538 const char *AfterIdent = Input.begin() + Tok.getEnd(); 539 while (BufPtr < AfterIdent) { 540 unsigned Size; 541 Spelling[SpellingLength++] = 542 Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); 543 BufPtr += Size; 544 } 545 546 return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0) 547 .first->first(); 548 } 549 550 StringRef Scanner::lexIdentifier(const char *&First, const char *const End) { 551 Optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End); 552 assert(Id && "expected identifier token"); 553 return Id.value(); 554 } 555 556 bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First, 557 const char *const End) { 558 if (Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End)) { 559 if (*FoundId == Id) 560 return true; 561 skipLine(First, End); 562 } 563 return false; 564 } 565 566 bool Scanner::lexAt(const char *&First, const char *const End) { 567 // Handle "@import". 568 569 // Lex '@'. 570 const dependency_directives_scan::Token &AtTok = lexToken(First, End); 571 assert(AtTok.is(tok::at)); 572 (void)AtTok; 573 574 if (!isNextIdentifierOrSkipLine("import", First, End)) 575 return false; 576 return lexModuleDirectiveBody(decl_at_import, First, End); 577 } 578 579 bool Scanner::lexModule(const char *&First, const char *const End) { 580 StringRef Id = lexIdentifier(First, End); 581 bool Export = false; 582 if (Id == "export") { 583 Export = true; 584 Optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End); 585 if (!NextId) 586 return false; 587 Id = *NextId; 588 } 589 590 if (Id != "module" && Id != "import") { 591 skipLine(First, End); 592 return false; 593 } 594 595 skipWhitespace(First, End); 596 597 // Ignore this as a module directive if the next character can't be part of 598 // an import. 599 600 switch (*First) { 601 case ':': 602 case '<': 603 case '"': 604 break; 605 default: 606 if (!isAsciiIdentifierContinue(*First)) { 607 skipLine(First, End); 608 return false; 609 } 610 } 611 612 TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false); 613 614 DirectiveKind Kind; 615 if (Id == "module") 616 Kind = Export ? cxx_export_module_decl : cxx_module_decl; 617 else 618 Kind = Export ? cxx_export_import_decl : cxx_import_decl; 619 620 return lexModuleDirectiveBody(Kind, First, End); 621 } 622 623 bool Scanner::lexPragma(const char *&First, const char *const End) { 624 Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); 625 if (!FoundId) 626 return false; 627 628 StringRef Id = *FoundId; 629 auto Kind = llvm::StringSwitch<DirectiveKind>(Id) 630 .Case("once", pp_pragma_once) 631 .Case("push_macro", pp_pragma_push_macro) 632 .Case("pop_macro", pp_pragma_pop_macro) 633 .Case("include_alias", pp_pragma_include_alias) 634 .Default(pp_none); 635 if (Kind != pp_none) { 636 lexPPDirectiveBody(First, End); 637 pushDirective(Kind); 638 return false; 639 } 640 641 if (Id != "clang") { 642 skipLine(First, End); 643 return false; 644 } 645 646 // #pragma clang. 647 if (!isNextIdentifierOrSkipLine("module", First, End)) 648 return false; 649 650 // #pragma clang module. 651 if (!isNextIdentifierOrSkipLine("import", First, End)) 652 return false; 653 654 // #pragma clang module import. 655 lexPPDirectiveBody(First, End); 656 pushDirective(pp_pragma_import); 657 return false; 658 } 659 660 bool Scanner::lexEndif(const char *&First, const char *const End) { 661 // Strip out "#else" if it's empty. 662 if (topDirective() == pp_else) 663 popDirective(); 664 665 // If "#ifdef" is empty, strip it and skip the "#endif". 666 // 667 // FIXME: Once/if Clang starts disallowing __has_include in macro expansions, 668 // we can skip empty `#if` and `#elif` blocks as well after scanning for a 669 // literal __has_include in the condition. Even without that rule we could 670 // drop the tokens if we scan for identifiers in the condition and find none. 671 if (topDirective() == pp_ifdef || topDirective() == pp_ifndef) { 672 popDirective(); 673 skipLine(First, End); 674 return false; 675 } 676 677 return lexDefault(pp_endif, First, End); 678 } 679 680 bool Scanner::lexDefault(DirectiveKind Kind, const char *&First, 681 const char *const End) { 682 lexPPDirectiveBody(First, End); 683 pushDirective(Kind); 684 return false; 685 } 686 687 static bool isStartOfRelevantLine(char First) { 688 switch (First) { 689 case '#': 690 case '@': 691 case 'i': 692 case 'e': 693 case 'm': 694 return true; 695 } 696 return false; 697 } 698 699 bool Scanner::lexPPLine(const char *&First, const char *const End) { 700 assert(First != End); 701 702 skipWhitespace(First, End); 703 assert(First <= End); 704 if (First == End) 705 return false; 706 707 if (!isStartOfRelevantLine(*First)) { 708 skipLine(First, End); 709 assert(First <= End); 710 return false; 711 } 712 713 TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true); 714 715 auto ScEx1 = make_scope_exit([&]() { 716 /// Clear Scanner's CurDirToks before returning, in case we didn't push a 717 /// new directive. 718 CurDirToks.clear(); 719 }); 720 721 // Handle "@import". 722 if (*First == '@') 723 return lexAt(First, End); 724 725 if (*First == 'i' || *First == 'e' || *First == 'm') 726 return lexModule(First, End); 727 728 // Handle preprocessing directives. 729 730 TheLexer.setParsingPreprocessorDirective(true); 731 auto ScEx2 = make_scope_exit( 732 [&]() { TheLexer.setParsingPreprocessorDirective(false); }); 733 734 // Lex '#'. 735 const dependency_directives_scan::Token &HashTok = lexToken(First, End); 736 assert(HashTok.is(tok::hash)); 737 (void)HashTok; 738 739 Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); 740 if (!FoundId) 741 return false; 742 743 StringRef Id = *FoundId; 744 745 if (Id == "pragma") 746 return lexPragma(First, End); 747 748 auto Kind = llvm::StringSwitch<DirectiveKind>(Id) 749 .Case("include", pp_include) 750 .Case("__include_macros", pp___include_macros) 751 .Case("define", pp_define) 752 .Case("undef", pp_undef) 753 .Case("import", pp_import) 754 .Case("include_next", pp_include_next) 755 .Case("if", pp_if) 756 .Case("ifdef", pp_ifdef) 757 .Case("ifndef", pp_ifndef) 758 .Case("elif", pp_elif) 759 .Case("elifdef", pp_elifdef) 760 .Case("elifndef", pp_elifndef) 761 .Case("else", pp_else) 762 .Case("endif", pp_endif) 763 .Default(pp_none); 764 if (Kind == pp_none) { 765 skipDirective(Id, First, End); 766 return false; 767 } 768 769 if (Kind == pp_endif) 770 return lexEndif(First, End); 771 772 switch (Kind) { 773 case pp_include: 774 case pp___include_macros: 775 case pp_include_next: 776 case pp_import: 777 lexIncludeFilename(First, End); 778 break; 779 default: 780 break; 781 } 782 783 // Everything else. 784 return lexDefault(Kind, First, End); 785 } 786 787 static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { 788 if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && 789 First[2] == '\xbf') 790 First += 3; 791 } 792 793 bool Scanner::scanImpl(const char *First, const char *const End) { 794 skipUTF8ByteOrderMark(First, End); 795 while (First != End) 796 if (lexPPLine(First, End)) 797 return true; 798 return false; 799 } 800 801 bool Scanner::scan(SmallVectorImpl<Directive> &Directives) { 802 bool Error = scanImpl(Input.begin(), Input.end()); 803 804 if (!Error) { 805 // Add an EOF on success. 806 pushDirective(pp_eof); 807 } 808 809 ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens; 810 for (const DirectiveWithTokens &DirWithToks : DirsWithToks) { 811 assert(RemainingTokens.size() >= DirWithToks.NumTokens); 812 Directives.emplace_back(DirWithToks.Kind, 813 RemainingTokens.take_front(DirWithToks.NumTokens)); 814 RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens); 815 } 816 assert(RemainingTokens.empty()); 817 818 return Error; 819 } 820 821 bool clang::scanSourceForDependencyDirectives( 822 StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens, 823 SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags, 824 SourceLocation InputSourceLoc) { 825 return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives); 826 } 827 828 void clang::printDependencyDirectivesAsSource( 829 StringRef Source, 830 ArrayRef<dependency_directives_scan::Directive> Directives, 831 llvm::raw_ostream &OS) { 832 // Add a space separator where it is convenient for testing purposes. 833 auto needsSpaceSeparator = 834 [](tok::TokenKind Prev, 835 const dependency_directives_scan::Token &Tok) -> bool { 836 if (Prev == Tok.Kind) 837 return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, 838 tok::r_square); 839 if (Prev == tok::raw_identifier && 840 Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal, 841 tok::char_constant, tok::header_name)) 842 return true; 843 if (Prev == tok::r_paren && 844 Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal, 845 tok::char_constant, tok::unknown)) 846 return true; 847 if (Prev == tok::comma && 848 Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less)) 849 return true; 850 return false; 851 }; 852 853 for (const dependency_directives_scan::Directive &Directive : Directives) { 854 Optional<tok::TokenKind> PrevTokenKind; 855 for (const dependency_directives_scan::Token &Tok : Directive.Tokens) { 856 if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok)) 857 OS << ' '; 858 PrevTokenKind = Tok.Kind; 859 OS << Source.slice(Tok.Offset, Tok.getEnd()); 860 } 861 } 862 } 863