1 //===- TGLexer.cpp - Lexer for TableGen -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implement the Lexer for TableGen. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "TGLexer.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/StringSwitch.h" 16 #include "llvm/ADT/Twine.h" 17 #include "llvm/Config/config.h" // for strtoull()/strtoll() define 18 #include "llvm/Support/Compiler.h" 19 #include "llvm/Support/MemoryBuffer.h" 20 #include "llvm/Support/SourceMgr.h" 21 #include "llvm/TableGen/Error.h" 22 #include <algorithm> 23 #include <cctype> 24 #include <cerrno> 25 #include <cstdint> 26 #include <cstdio> 27 #include <cstdlib> 28 #include <cstring> 29 30 using namespace llvm; 31 32 namespace { 33 // A list of supported preprocessing directives with their 34 // internal token kinds and names. 35 struct { 36 tgtok::TokKind Kind; 37 const char *Word; 38 } PreprocessorDirs[] = { 39 { tgtok::Ifdef, "ifdef" }, 40 { tgtok::Ifndef, "ifndef" }, 41 { tgtok::Else, "else" }, 42 { tgtok::Endif, "endif" }, 43 { tgtok::Define, "define" } 44 }; 45 } // end anonymous namespace 46 47 TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) { 48 CurBuffer = SrcMgr.getMainFileID(); 49 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); 50 CurPtr = CurBuf.begin(); 51 TokStart = nullptr; 52 53 // Pretend that we enter the "top-level" include file. 54 PrepIncludeStack.push_back( 55 std::make_unique<std::vector<PreprocessorControlDesc>>()); 56 57 // Put all macros defined in the command line into the DefinedMacros set. 58 std::for_each(Macros.begin(), Macros.end(), 59 [this](const std::string &MacroName) { 60 DefinedMacros.insert(MacroName); 61 }); 62 } 63 64 SMLoc TGLexer::getLoc() const { 65 return SMLoc::getFromPointer(TokStart); 66 } 67 68 /// ReturnError - Set the error to the specified string at the specified 69 /// location. This is defined to always return tgtok::Error. 70 tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) { 71 PrintError(Loc, Msg); 72 return tgtok::Error; 73 } 74 75 tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) { 76 return ReturnError(SMLoc::getFromPointer(Loc), Msg); 77 } 78 79 bool TGLexer::processEOF() { 80 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); 81 if (ParentIncludeLoc != SMLoc()) { 82 // If prepExitInclude() detects a problem with the preprocessing 83 // control stack, it will return false. Pretend that we reached 84 // the final EOF and stop lexing more tokens by returning false 85 // to LexToken(). 86 if (!prepExitInclude(false)) 87 return false; 88 89 CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); 90 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); 91 CurPtr = ParentIncludeLoc.getPointer(); 92 // Make sure TokStart points into the parent file's buffer. 93 // LexToken() assigns to it before calling getNextChar(), 94 // so it is pointing into the included file now. 95 TokStart = CurPtr; 96 return true; 97 } 98 99 // Pretend that we exit the "top-level" include file. 100 // Note that in case of an error (e.g. control stack imbalance) 101 // the routine will issue a fatal error. 102 prepExitInclude(true); 103 return false; 104 } 105 106 int TGLexer::getNextChar() { 107 char CurChar = *CurPtr++; 108 switch (CurChar) { 109 default: 110 return (unsigned char)CurChar; 111 case 0: { 112 // A nul character in the stream is either the end of the current buffer or 113 // a random nul in the file. Disambiguate that here. 114 if (CurPtr-1 != CurBuf.end()) 115 return 0; // Just whitespace. 116 117 // Otherwise, return end of file. 118 --CurPtr; // Another call to lex will return EOF again. 119 return EOF; 120 } 121 case '\n': 122 case '\r': 123 // Handle the newline character by ignoring it and incrementing the line 124 // count. However, be careful about 'dos style' files with \n\r in them. 125 // Only treat a \n\r or \r\n as a single line. 126 if ((*CurPtr == '\n' || (*CurPtr == '\r')) && 127 *CurPtr != CurChar) 128 ++CurPtr; // Eat the two char newline sequence. 129 return '\n'; 130 } 131 } 132 133 int TGLexer::peekNextChar(int Index) const { 134 return *(CurPtr + Index); 135 } 136 137 tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { 138 TokStart = CurPtr; 139 // This always consumes at least one character. 140 int CurChar = getNextChar(); 141 142 switch (CurChar) { 143 default: 144 // Handle letters: [a-zA-Z_] 145 if (isalpha(CurChar) || CurChar == '_') 146 return LexIdentifier(); 147 148 // Unknown character, emit an error. 149 return ReturnError(TokStart, "Unexpected character"); 150 case EOF: 151 // Lex next token, if we just left an include file. 152 // Note that leaving an include file means that the next 153 // symbol is located at the end of 'include "..."' 154 // construct, so LexToken() is called with default 155 // false parameter. 156 if (processEOF()) 157 return LexToken(); 158 159 // Return EOF denoting the end of lexing. 160 return tgtok::Eof; 161 162 case ':': return tgtok::colon; 163 case ';': return tgtok::semi; 164 case '.': return tgtok::period; 165 case ',': return tgtok::comma; 166 case '<': return tgtok::less; 167 case '>': return tgtok::greater; 168 case ']': return tgtok::r_square; 169 case '{': return tgtok::l_brace; 170 case '}': return tgtok::r_brace; 171 case '(': return tgtok::l_paren; 172 case ')': return tgtok::r_paren; 173 case '=': return tgtok::equal; 174 case '?': return tgtok::question; 175 case '#': 176 if (FileOrLineStart) { 177 tgtok::TokKind Kind = prepIsDirective(); 178 if (Kind != tgtok::Error) 179 return lexPreprocessor(Kind); 180 } 181 182 return tgtok::paste; 183 184 case '\r': 185 PrintFatalError("getNextChar() must never return '\r'"); 186 return tgtok::Error; 187 188 case 0: 189 case ' ': 190 case '\t': 191 // Ignore whitespace. 192 return LexToken(FileOrLineStart); 193 case '\n': 194 // Ignore whitespace, and identify the new line. 195 return LexToken(true); 196 case '/': 197 // If this is the start of a // comment, skip until the end of the line or 198 // the end of the buffer. 199 if (*CurPtr == '/') 200 SkipBCPLComment(); 201 else if (*CurPtr == '*') { 202 if (SkipCComment()) 203 return tgtok::Error; 204 } else // Otherwise, this is an error. 205 return ReturnError(TokStart, "Unexpected character"); 206 return LexToken(FileOrLineStart); 207 case '-': case '+': 208 case '0': case '1': case '2': case '3': case '4': case '5': case '6': 209 case '7': case '8': case '9': { 210 int NextChar = 0; 211 if (isdigit(CurChar)) { 212 // Allow identifiers to start with a number if it is followed by 213 // an identifier. This can happen with paste operations like 214 // foo#8i. 215 int i = 0; 216 do { 217 NextChar = peekNextChar(i++); 218 } while (isdigit(NextChar)); 219 220 if (NextChar == 'x' || NextChar == 'b') { 221 // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most 222 // likely a number. 223 int NextNextChar = peekNextChar(i); 224 switch (NextNextChar) { 225 default: 226 break; 227 case '0': case '1': 228 if (NextChar == 'b') 229 return LexNumber(); 230 LLVM_FALLTHROUGH; 231 case '2': case '3': case '4': case '5': 232 case '6': case '7': case '8': case '9': 233 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 234 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 235 if (NextChar == 'x') 236 return LexNumber(); 237 break; 238 } 239 } 240 } 241 242 if (isalpha(NextChar) || NextChar == '_') 243 return LexIdentifier(); 244 245 return LexNumber(); 246 } 247 case '"': return LexString(); 248 case '$': return LexVarName(); 249 case '[': return LexBracket(); 250 case '!': return LexExclaim(); 251 } 252 } 253 254 /// LexString - Lex "[^"]*" 255 tgtok::TokKind TGLexer::LexString() { 256 const char *StrStart = CurPtr; 257 258 CurStrVal = ""; 259 260 while (*CurPtr != '"') { 261 // If we hit the end of the buffer, report an error. 262 if (*CurPtr == 0 && CurPtr == CurBuf.end()) 263 return ReturnError(StrStart, "End of file in string literal"); 264 265 if (*CurPtr == '\n' || *CurPtr == '\r') 266 return ReturnError(StrStart, "End of line in string literal"); 267 268 if (*CurPtr != '\\') { 269 CurStrVal += *CurPtr++; 270 continue; 271 } 272 273 ++CurPtr; 274 275 switch (*CurPtr) { 276 case '\\': case '\'': case '"': 277 // These turn into their literal character. 278 CurStrVal += *CurPtr++; 279 break; 280 case 't': 281 CurStrVal += '\t'; 282 ++CurPtr; 283 break; 284 case 'n': 285 CurStrVal += '\n'; 286 ++CurPtr; 287 break; 288 289 case '\n': 290 case '\r': 291 return ReturnError(CurPtr, "escaped newlines not supported in tblgen"); 292 293 // If we hit the end of the buffer, report an error. 294 case '\0': 295 if (CurPtr == CurBuf.end()) 296 return ReturnError(StrStart, "End of file in string literal"); 297 LLVM_FALLTHROUGH; 298 default: 299 return ReturnError(CurPtr, "invalid escape in string literal"); 300 } 301 } 302 303 ++CurPtr; 304 return tgtok::StrVal; 305 } 306 307 tgtok::TokKind TGLexer::LexVarName() { 308 if (!isalpha(CurPtr[0]) && CurPtr[0] != '_') 309 return ReturnError(TokStart, "Invalid variable name"); 310 311 // Otherwise, we're ok, consume the rest of the characters. 312 const char *VarNameStart = CurPtr++; 313 314 while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') 315 ++CurPtr; 316 317 CurStrVal.assign(VarNameStart, CurPtr); 318 return tgtok::VarName; 319 } 320 321 tgtok::TokKind TGLexer::LexIdentifier() { 322 // The first letter is [a-zA-Z_]. 323 const char *IdentStart = TokStart; 324 325 // Match the rest of the identifier regex: [0-9a-zA-Z_]* 326 while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') 327 ++CurPtr; 328 329 // Check to see if this identifier is a keyword. 330 StringRef Str(IdentStart, CurPtr-IdentStart); 331 332 if (Str == "include") { 333 if (LexInclude()) return tgtok::Error; 334 return Lex(); 335 } 336 337 tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str) 338 .Case("int", tgtok::Int) 339 .Case("bit", tgtok::Bit) 340 .Case("bits", tgtok::Bits) 341 .Case("string", tgtok::String) 342 .Case("list", tgtok::List) 343 .Case("code", tgtok::Code) 344 .Case("dag", tgtok::Dag) 345 .Case("class", tgtok::Class) 346 .Case("def", tgtok::Def) 347 .Case("foreach", tgtok::Foreach) 348 .Case("defm", tgtok::Defm) 349 .Case("defset", tgtok::Defset) 350 .Case("multiclass", tgtok::MultiClass) 351 .Case("field", tgtok::Field) 352 .Case("let", tgtok::Let) 353 .Case("in", tgtok::In) 354 .Case("defvar", tgtok::Defvar) 355 .Case("if", tgtok::If) 356 .Case("then", tgtok::Then) 357 .Case("else", tgtok::ElseKW) 358 .Default(tgtok::Id); 359 360 if (Kind == tgtok::Id) 361 CurStrVal.assign(Str.begin(), Str.end()); 362 return Kind; 363 } 364 365 /// LexInclude - We just read the "include" token. Get the string token that 366 /// comes next and enter the include. 367 bool TGLexer::LexInclude() { 368 // The token after the include must be a string. 369 tgtok::TokKind Tok = LexToken(); 370 if (Tok == tgtok::Error) return true; 371 if (Tok != tgtok::StrVal) { 372 PrintError(getLoc(), "Expected filename after include"); 373 return true; 374 } 375 376 // Get the string. 377 std::string Filename = CurStrVal; 378 std::string IncludedFile; 379 380 CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr), 381 IncludedFile); 382 if (!CurBuffer) { 383 PrintError(getLoc(), "Could not find include file '" + Filename + "'"); 384 return true; 385 } 386 387 Dependencies.insert(IncludedFile); 388 // Save the line number and lex buffer of the includer. 389 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); 390 CurPtr = CurBuf.begin(); 391 392 PrepIncludeStack.push_back( 393 std::make_unique<std::vector<PreprocessorControlDesc>>()); 394 return false; 395 } 396 397 void TGLexer::SkipBCPLComment() { 398 ++CurPtr; // skip the second slash. 399 while (true) { 400 switch (*CurPtr) { 401 case '\n': 402 case '\r': 403 return; // Newline is end of comment. 404 case 0: 405 // If this is the end of the buffer, end the comment. 406 if (CurPtr == CurBuf.end()) 407 return; 408 break; 409 } 410 // Otherwise, skip the character. 411 ++CurPtr; 412 } 413 } 414 415 /// SkipCComment - This skips C-style /**/ comments. The only difference from C 416 /// is that we allow nesting. 417 bool TGLexer::SkipCComment() { 418 ++CurPtr; // skip the star. 419 unsigned CommentDepth = 1; 420 421 while (true) { 422 int CurChar = getNextChar(); 423 switch (CurChar) { 424 case EOF: 425 PrintError(TokStart, "Unterminated comment!"); 426 return true; 427 case '*': 428 // End of the comment? 429 if (CurPtr[0] != '/') break; 430 431 ++CurPtr; // End the */. 432 if (--CommentDepth == 0) 433 return false; 434 break; 435 case '/': 436 // Start of a nested comment? 437 if (CurPtr[0] != '*') break; 438 ++CurPtr; 439 ++CommentDepth; 440 break; 441 } 442 } 443 } 444 445 /// LexNumber - Lex: 446 /// [-+]?[0-9]+ 447 /// 0x[0-9a-fA-F]+ 448 /// 0b[01]+ 449 tgtok::TokKind TGLexer::LexNumber() { 450 if (CurPtr[-1] == '0') { 451 if (CurPtr[0] == 'x') { 452 ++CurPtr; 453 const char *NumStart = CurPtr; 454 while (isxdigit(CurPtr[0])) 455 ++CurPtr; 456 457 // Requires at least one hex digit. 458 if (CurPtr == NumStart) 459 return ReturnError(TokStart, "Invalid hexadecimal number"); 460 461 errno = 0; 462 CurIntVal = strtoll(NumStart, nullptr, 16); 463 if (errno == EINVAL) 464 return ReturnError(TokStart, "Invalid hexadecimal number"); 465 if (errno == ERANGE) { 466 errno = 0; 467 CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16); 468 if (errno == EINVAL) 469 return ReturnError(TokStart, "Invalid hexadecimal number"); 470 if (errno == ERANGE) 471 return ReturnError(TokStart, "Hexadecimal number out of range"); 472 } 473 return tgtok::IntVal; 474 } else if (CurPtr[0] == 'b') { 475 ++CurPtr; 476 const char *NumStart = CurPtr; 477 while (CurPtr[0] == '0' || CurPtr[0] == '1') 478 ++CurPtr; 479 480 // Requires at least one binary digit. 481 if (CurPtr == NumStart) 482 return ReturnError(CurPtr-2, "Invalid binary number"); 483 CurIntVal = strtoll(NumStart, nullptr, 2); 484 return tgtok::BinaryIntVal; 485 } 486 } 487 488 // Check for a sign without a digit. 489 if (!isdigit(CurPtr[0])) { 490 if (CurPtr[-1] == '-') 491 return tgtok::minus; 492 else if (CurPtr[-1] == '+') 493 return tgtok::plus; 494 } 495 496 while (isdigit(CurPtr[0])) 497 ++CurPtr; 498 CurIntVal = strtoll(TokStart, nullptr, 10); 499 return tgtok::IntVal; 500 } 501 502 /// LexBracket - We just read '['. If this is a code block, return it, 503 /// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]' 504 tgtok::TokKind TGLexer::LexBracket() { 505 if (CurPtr[0] != '{') 506 return tgtok::l_square; 507 ++CurPtr; 508 const char *CodeStart = CurPtr; 509 while (true) { 510 int Char = getNextChar(); 511 if (Char == EOF) break; 512 513 if (Char != '}') continue; 514 515 Char = getNextChar(); 516 if (Char == EOF) break; 517 if (Char == ']') { 518 CurStrVal.assign(CodeStart, CurPtr-2); 519 return tgtok::CodeFragment; 520 } 521 } 522 523 return ReturnError(CodeStart-2, "Unterminated Code Block"); 524 } 525 526 /// LexExclaim - Lex '!' and '![a-zA-Z]+'. 527 tgtok::TokKind TGLexer::LexExclaim() { 528 if (!isalpha(*CurPtr)) 529 return ReturnError(CurPtr - 1, "Invalid \"!operator\""); 530 531 const char *Start = CurPtr++; 532 while (isalpha(*CurPtr)) 533 ++CurPtr; 534 535 // Check to see which operator this is. 536 tgtok::TokKind Kind = 537 StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start)) 538 .Case("eq", tgtok::XEq) 539 .Case("ne", tgtok::XNe) 540 .Case("le", tgtok::XLe) 541 .Case("lt", tgtok::XLt) 542 .Case("ge", tgtok::XGe) 543 .Case("gt", tgtok::XGt) 544 .Case("if", tgtok::XIf) 545 .Case("cond", tgtok::XCond) 546 .Case("isa", tgtok::XIsA) 547 .Case("head", tgtok::XHead) 548 .Case("tail", tgtok::XTail) 549 .Case("size", tgtok::XSize) 550 .Case("con", tgtok::XConcat) 551 .Case("dag", tgtok::XDag) 552 .Case("add", tgtok::XADD) 553 .Case("mul", tgtok::XMUL) 554 .Case("and", tgtok::XAND) 555 .Case("or", tgtok::XOR) 556 .Case("shl", tgtok::XSHL) 557 .Case("sra", tgtok::XSRA) 558 .Case("srl", tgtok::XSRL) 559 .Case("cast", tgtok::XCast) 560 .Case("empty", tgtok::XEmpty) 561 .Case("subst", tgtok::XSubst) 562 .Case("foldl", tgtok::XFoldl) 563 .Case("foreach", tgtok::XForEach) 564 .Case("listconcat", tgtok::XListConcat) 565 .Case("listsplat", tgtok::XListSplat) 566 .Case("strconcat", tgtok::XStrConcat) 567 .Case("setop", tgtok::XSetOp) 568 .Case("getop", tgtok::XGetOp) 569 .Default(tgtok::Error); 570 571 return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); 572 } 573 574 bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) { 575 // Report an error, if preprocessor control stack for the current 576 // file is not empty. 577 if (!PrepIncludeStack.back()->empty()) { 578 prepReportPreprocessorStackError(); 579 580 return false; 581 } 582 583 // Pop the preprocessing controls from the include stack. 584 if (PrepIncludeStack.empty()) { 585 PrintFatalError("Preprocessor include stack is empty"); 586 } 587 588 PrepIncludeStack.pop_back(); 589 590 if (IncludeStackMustBeEmpty) { 591 if (!PrepIncludeStack.empty()) 592 PrintFatalError("Preprocessor include stack is not empty"); 593 } else { 594 if (PrepIncludeStack.empty()) 595 PrintFatalError("Preprocessor include stack is empty"); 596 } 597 598 return true; 599 } 600 601 tgtok::TokKind TGLexer::prepIsDirective() const { 602 for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) { 603 int NextChar = *CurPtr; 604 bool Match = true; 605 unsigned I = 0; 606 for (; I < strlen(PreprocessorDirs[ID].Word); ++I) { 607 if (NextChar != PreprocessorDirs[ID].Word[I]) { 608 Match = false; 609 break; 610 } 611 612 NextChar = peekNextChar(I + 1); 613 } 614 615 // Check for whitespace after the directive. If there is no whitespace, 616 // then we do not recognize it as a preprocessing directive. 617 if (Match) { 618 tgtok::TokKind Kind = PreprocessorDirs[ID].Kind; 619 620 // New line and EOF may follow only #else/#endif. It will be reported 621 // as an error for #ifdef/#define after the call to prepLexMacroName(). 622 if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF || 623 NextChar == '\n' || 624 // It looks like TableGen does not support '\r' as the actual 625 // carriage return, e.g. getNextChar() treats a single '\r' 626 // as '\n'. So we do the same here. 627 NextChar == '\r') 628 return Kind; 629 630 // Allow comments after some directives, e.g.: 631 // #else// OR #else/**/ 632 // #endif// OR #endif/**/ 633 // 634 // Note that we do allow comments after #ifdef/#define here, e.g. 635 // #ifdef/**/ AND #ifdef// 636 // #define/**/ AND #define// 637 // 638 // These cases will be reported as incorrect after calling 639 // prepLexMacroName(). We could have supported C-style comments 640 // after #ifdef/#define, but this would complicate the code 641 // for little benefit. 642 if (NextChar == '/') { 643 NextChar = peekNextChar(I + 1); 644 645 if (NextChar == '*' || NextChar == '/') 646 return Kind; 647 648 // Pretend that we do not recognize the directive. 649 } 650 } 651 } 652 653 return tgtok::Error; 654 } 655 656 bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) { 657 TokStart = CurPtr; 658 659 for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) 660 if (PreprocessorDirs[ID].Kind == Kind) { 661 // Advance CurPtr to the end of the preprocessing word. 662 CurPtr += strlen(PreprocessorDirs[ID].Word); 663 return true; 664 } 665 666 PrintFatalError("Unsupported preprocessing token in " 667 "prepEatPreprocessorDirective()"); 668 return false; 669 } 670 671 tgtok::TokKind TGLexer::lexPreprocessor( 672 tgtok::TokKind Kind, bool ReturnNextLiveToken) { 673 674 // We must be looking at a preprocessing directive. Eat it! 675 if (!prepEatPreprocessorDirective(Kind)) 676 PrintFatalError("lexPreprocessor() called for unknown " 677 "preprocessor directive"); 678 679 if (Kind == tgtok::Ifdef || Kind == tgtok::Ifndef) { 680 StringRef MacroName = prepLexMacroName(); 681 StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef"; 682 if (MacroName.empty()) 683 return ReturnError(TokStart, "Expected macro name after " + IfTokName); 684 685 bool MacroIsDefined = DefinedMacros.count(MacroName) != 0; 686 687 // Canonicalize ifndef to ifdef equivalent 688 if (Kind == tgtok::Ifndef) { 689 MacroIsDefined = !MacroIsDefined; 690 Kind = tgtok::Ifdef; 691 } 692 693 // Regardless of whether we are processing tokens or not, 694 // we put the #ifdef control on stack. 695 PrepIncludeStack.back()->push_back( 696 {Kind, MacroIsDefined, SMLoc::getFromPointer(TokStart)}); 697 698 if (!prepSkipDirectiveEnd()) 699 return ReturnError(CurPtr, "Only comments are supported after " + 700 IfTokName + " NAME"); 701 702 // If we were not processing tokens before this #ifdef, 703 // then just return back to the lines skipping code. 704 if (!ReturnNextLiveToken) 705 return Kind; 706 707 // If we were processing tokens before this #ifdef, 708 // and the macro is defined, then just return the next token. 709 if (MacroIsDefined) 710 return LexToken(); 711 712 // We were processing tokens before this #ifdef, and the macro 713 // is not defined, so we have to start skipping the lines. 714 // If the skipping is successful, it will return the token following 715 // either #else or #endif corresponding to this #ifdef. 716 if (prepSkipRegion(ReturnNextLiveToken)) 717 return LexToken(); 718 719 return tgtok::Error; 720 } else if (Kind == tgtok::Else) { 721 // Check if this #else is correct before calling prepSkipDirectiveEnd(), 722 // which will move CurPtr away from the beginning of #else. 723 if (PrepIncludeStack.back()->empty()) 724 return ReturnError(TokStart, "#else without #ifdef or #ifndef"); 725 726 PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back(); 727 728 if (IfdefEntry.Kind != tgtok::Ifdef) { 729 PrintError(TokStart, "double #else"); 730 return ReturnError(IfdefEntry.SrcPos, "Previous #else is here"); 731 } 732 733 // Replace the corresponding #ifdef's control with its negation 734 // on the control stack. 735 PrepIncludeStack.back()->pop_back(); 736 PrepIncludeStack.back()->push_back( 737 {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)}); 738 739 if (!prepSkipDirectiveEnd()) 740 return ReturnError(CurPtr, "Only comments are supported after #else"); 741 742 // If we were processing tokens before this #else, 743 // we have to start skipping lines until the matching #endif. 744 if (ReturnNextLiveToken) { 745 if (prepSkipRegion(ReturnNextLiveToken)) 746 return LexToken(); 747 748 return tgtok::Error; 749 } 750 751 // Return to the lines skipping code. 752 return Kind; 753 } else if (Kind == tgtok::Endif) { 754 // Check if this #endif is correct before calling prepSkipDirectiveEnd(), 755 // which will move CurPtr away from the beginning of #endif. 756 if (PrepIncludeStack.back()->empty()) 757 return ReturnError(TokStart, "#endif without #ifdef"); 758 759 auto &IfdefOrElseEntry = PrepIncludeStack.back()->back(); 760 761 if (IfdefOrElseEntry.Kind != tgtok::Ifdef && 762 IfdefOrElseEntry.Kind != tgtok::Else) { 763 PrintFatalError("Invalid preprocessor control on the stack"); 764 return tgtok::Error; 765 } 766 767 if (!prepSkipDirectiveEnd()) 768 return ReturnError(CurPtr, "Only comments are supported after #endif"); 769 770 PrepIncludeStack.back()->pop_back(); 771 772 // If we were processing tokens before this #endif, then 773 // we should continue it. 774 if (ReturnNextLiveToken) { 775 return LexToken(); 776 } 777 778 // Return to the lines skipping code. 779 return Kind; 780 } else if (Kind == tgtok::Define) { 781 StringRef MacroName = prepLexMacroName(); 782 if (MacroName.empty()) 783 return ReturnError(TokStart, "Expected macro name after #define"); 784 785 if (!DefinedMacros.insert(MacroName).second) 786 PrintWarning(getLoc(), 787 "Duplicate definition of macro: " + Twine(MacroName)); 788 789 if (!prepSkipDirectiveEnd()) 790 return ReturnError(CurPtr, 791 "Only comments are supported after #define NAME"); 792 793 if (!ReturnNextLiveToken) { 794 PrintFatalError("#define must be ignored during the lines skipping"); 795 return tgtok::Error; 796 } 797 798 return LexToken(); 799 } 800 801 PrintFatalError("Preprocessing directive is not supported"); 802 return tgtok::Error; 803 } 804 805 bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) { 806 if (!MustNeverBeFalse) 807 PrintFatalError("Invalid recursion."); 808 809 do { 810 // Skip all symbols to the line end. 811 prepSkipToLineEnd(); 812 813 // Find the first non-whitespace symbol in the next line(s). 814 if (!prepSkipLineBegin()) 815 return false; 816 817 // If the first non-blank/comment symbol on the line is '#', 818 // it may be a start of preprocessing directive. 819 // 820 // If it is not '#' just go to the next line. 821 if (*CurPtr == '#') 822 ++CurPtr; 823 else 824 continue; 825 826 tgtok::TokKind Kind = prepIsDirective(); 827 828 // If we did not find a preprocessing directive or it is #define, 829 // then just skip to the next line. We do not have to do anything 830 // for #define in the line-skipping mode. 831 if (Kind == tgtok::Error || Kind == tgtok::Define) 832 continue; 833 834 tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false); 835 836 // If lexPreprocessor() encountered an error during lexing this 837 // preprocessor idiom, then return false to the calling lexPreprocessor(). 838 // This will force tgtok::Error to be returned to the tokens processing. 839 if (ProcessedKind == tgtok::Error) 840 return false; 841 842 if (Kind != ProcessedKind) 843 PrintFatalError("prepIsDirective() and lexPreprocessor() " 844 "returned different token kinds"); 845 846 // If this preprocessing directive enables tokens processing, 847 // then return to the lexPreprocessor() and get to the next token. 848 // We can move from line-skipping mode to processing tokens only 849 // due to #else or #endif. 850 if (prepIsProcessingEnabled()) { 851 if (Kind != tgtok::Else && Kind != tgtok::Endif) { 852 PrintFatalError("Tokens processing was enabled by an unexpected " 853 "preprocessing directive"); 854 return false; 855 } 856 857 return true; 858 } 859 } while (CurPtr != CurBuf.end()); 860 861 // We have reached the end of the file, but never left the lines-skipping 862 // mode. This means there is no matching #endif. 863 prepReportPreprocessorStackError(); 864 return false; 865 } 866 867 StringRef TGLexer::prepLexMacroName() { 868 // Skip whitespaces between the preprocessing directive and the macro name. 869 while (*CurPtr == ' ' || *CurPtr == '\t') 870 ++CurPtr; 871 872 TokStart = CurPtr; 873 // Macro names start with [a-zA-Z_]. 874 if (*CurPtr != '_' && !isalpha(*CurPtr)) 875 return ""; 876 877 // Match the rest of the identifier regex: [0-9a-zA-Z_]* 878 while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') 879 ++CurPtr; 880 881 return StringRef(TokStart, CurPtr - TokStart); 882 } 883 884 bool TGLexer::prepSkipLineBegin() { 885 while (CurPtr != CurBuf.end()) { 886 switch (*CurPtr) { 887 case ' ': 888 case '\t': 889 case '\n': 890 case '\r': 891 break; 892 893 case '/': { 894 int NextChar = peekNextChar(1); 895 if (NextChar == '*') { 896 // Skip C-style comment. 897 // Note that we do not care about skipping the C++-style comments. 898 // If the line contains "//", it may not contain any processable 899 // preprocessing directive. Just return CurPtr pointing to 900 // the first '/' in this case. We also do not care about 901 // incorrect symbols after the first '/' - we are in lines-skipping 902 // mode, so incorrect code is allowed to some extent. 903 904 // Set TokStart to the beginning of the comment to enable proper 905 // diagnostic printing in case of error in SkipCComment(). 906 TokStart = CurPtr; 907 908 // CurPtr must point to '*' before call to SkipCComment(). 909 ++CurPtr; 910 if (SkipCComment()) 911 return false; 912 } else { 913 // CurPtr points to the non-whitespace '/'. 914 return true; 915 } 916 917 // We must not increment CurPtr after the comment was lexed. 918 continue; 919 } 920 921 default: 922 return true; 923 } 924 925 ++CurPtr; 926 } 927 928 // We have reached the end of the file. Return to the lines skipping 929 // code, and allow it to handle the EOF as needed. 930 return true; 931 } 932 933 bool TGLexer::prepSkipDirectiveEnd() { 934 while (CurPtr != CurBuf.end()) { 935 switch (*CurPtr) { 936 case ' ': 937 case '\t': 938 break; 939 940 case '\n': 941 case '\r': 942 return true; 943 944 case '/': { 945 int NextChar = peekNextChar(1); 946 if (NextChar == '/') { 947 // Skip C++-style comment. 948 // We may just return true now, but let's skip to the line/buffer end 949 // to simplify the method specification. 950 ++CurPtr; 951 SkipBCPLComment(); 952 } else if (NextChar == '*') { 953 // When we are skipping C-style comment at the end of a preprocessing 954 // directive, we can skip several lines. If any meaningful TD token 955 // follows the end of the C-style comment on the same line, it will 956 // be considered as an invalid usage of TD token. 957 // For example, we want to forbid usages like this one: 958 // #define MACRO class Class {} 959 // But with C-style comments we also disallow the following: 960 // #define MACRO /* This macro is used 961 // to ... */ class Class {} 962 // One can argue that this should be allowed, but it does not seem 963 // to be worth of the complication. Moreover, this matches 964 // the C preprocessor behavior. 965 966 // Set TokStart to the beginning of the comment to enable proper 967 // diagnostic printer in case of error in SkipCComment(). 968 TokStart = CurPtr; 969 ++CurPtr; 970 if (SkipCComment()) 971 return false; 972 } else { 973 TokStart = CurPtr; 974 PrintError(CurPtr, "Unexpected character"); 975 return false; 976 } 977 978 // We must not increment CurPtr after the comment was lexed. 979 continue; 980 } 981 982 default: 983 // Do not allow any non-whitespaces after the directive. 984 TokStart = CurPtr; 985 return false; 986 } 987 988 ++CurPtr; 989 } 990 991 return true; 992 } 993 994 void TGLexer::prepSkipToLineEnd() { 995 while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) 996 ++CurPtr; 997 } 998 999 bool TGLexer::prepIsProcessingEnabled() { 1000 for (auto I = PrepIncludeStack.back()->rbegin(), 1001 E = PrepIncludeStack.back()->rend(); 1002 I != E; ++I) { 1003 if (!I->IsDefined) 1004 return false; 1005 } 1006 1007 return true; 1008 } 1009 1010 void TGLexer::prepReportPreprocessorStackError() { 1011 if (PrepIncludeStack.back()->empty()) 1012 PrintFatalError("prepReportPreprocessorStackError() called with " 1013 "empty control stack"); 1014 1015 auto &PrepControl = PrepIncludeStack.back()->back(); 1016 PrintError(CurBuf.end(), "Reached EOF without matching #endif"); 1017 PrintError(PrepControl.SrcPos, "The latest preprocessor control is here"); 1018 1019 TokStart = CurPtr; 1020 } 1021