1 //===- ExtractAPI/ExtractAPIConsumer.cpp ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the ExtractAPIAction, and ASTConsumer to collect API 11 /// information. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/AST/ASTConcept.h" 16 #include "clang/AST/ASTConsumer.h" 17 #include "clang/AST/ASTContext.h" 18 #include "clang/AST/DeclObjC.h" 19 #include "clang/Basic/DiagnosticFrontend.h" 20 #include "clang/Basic/SourceLocation.h" 21 #include "clang/Basic/SourceManager.h" 22 #include "clang/Basic/TargetInfo.h" 23 #include "clang/ExtractAPI/API.h" 24 #include "clang/ExtractAPI/APIIgnoresList.h" 25 #include "clang/ExtractAPI/ExtractAPIVisitor.h" 26 #include "clang/ExtractAPI/FrontendActions.h" 27 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h" 28 #include "clang/Frontend/ASTConsumers.h" 29 #include "clang/Frontend/CompilerInstance.h" 30 #include "clang/Frontend/FrontendOptions.h" 31 #include "clang/Frontend/MultiplexConsumer.h" 32 #include "clang/Lex/MacroInfo.h" 33 #include "clang/Lex/PPCallbacks.h" 34 #include "clang/Lex/Preprocessor.h" 35 #include "clang/Lex/PreprocessorOptions.h" 36 #include "llvm/ADT/DenseSet.h" 37 #include "llvm/ADT/STLExtras.h" 38 #include "llvm/ADT/SmallString.h" 39 #include "llvm/ADT/SmallVector.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/Error.h" 42 #include "llvm/Support/FileSystem.h" 43 #include "llvm/Support/MemoryBuffer.h" 44 #include "llvm/Support/Path.h" 45 #include "llvm/Support/Regex.h" 46 #include "llvm/Support/raw_ostream.h" 47 #include <memory> 48 #include <optional> 49 #include <utility> 50 51 using namespace clang; 52 using namespace extractapi; 53 54 namespace { 55 56 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI, 57 StringRef File, 58 bool *IsQuoted = nullptr) { 59 assert(CI.hasFileManager() && 60 "CompilerInstance does not have a FileNamager!"); 61 62 using namespace llvm::sys; 63 // Matches framework include patterns 64 const llvm::Regex Rule("/(.+)\\.framework/(.+)?Headers/(.+)"); 65 66 const auto &FS = CI.getVirtualFileSystem(); 67 68 SmallString<128> FilePath(File.begin(), File.end()); 69 FS.makeAbsolute(FilePath); 70 path::remove_dots(FilePath, true); 71 FilePath = path::convert_to_slash(FilePath); 72 File = FilePath; 73 74 // Checks whether `Dir` is a strict path prefix of `File`. If so returns 75 // the prefix length. Otherwise return 0. 76 auto CheckDir = [&](llvm::StringRef Dir) -> unsigned { 77 llvm::SmallString<32> DirPath(Dir.begin(), Dir.end()); 78 FS.makeAbsolute(DirPath); 79 path::remove_dots(DirPath, true); 80 Dir = DirPath; 81 for (auto NI = path::begin(File), NE = path::end(File), 82 DI = path::begin(Dir), DE = path::end(Dir); 83 /*termination condition in loop*/; ++NI, ++DI) { 84 // '.' components in File are ignored. 85 while (NI != NE && *NI == ".") 86 ++NI; 87 if (NI == NE) 88 break; 89 90 // '.' components in Dir are ignored. 91 while (DI != DE && *DI == ".") 92 ++DI; 93 94 // Dir is a prefix of File, up to '.' components and choice of path 95 // separators. 96 if (DI == DE) 97 return NI - path::begin(File); 98 99 // Consider all path separators equal. 100 if (NI->size() == 1 && DI->size() == 1 && 101 path::is_separator(NI->front()) && path::is_separator(DI->front())) 102 continue; 103 104 // Special case Apple .sdk folders since the search path is typically a 105 // symlink like `iPhoneSimulator14.5.sdk` while the file is instead 106 // located in `iPhoneSimulator.sdk` (the real folder). 107 if (NI->endswith(".sdk") && DI->endswith(".sdk")) { 108 StringRef NBasename = path::stem(*NI); 109 StringRef DBasename = path::stem(*DI); 110 if (DBasename.startswith(NBasename)) 111 continue; 112 } 113 114 if (*NI != *DI) 115 break; 116 } 117 return 0; 118 }; 119 120 unsigned PrefixLength = 0; 121 122 // Go through the search paths and find the first one that is a prefix of 123 // the header. 124 for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries) { 125 // Note whether the match is found in a quoted entry. 126 if (IsQuoted) 127 *IsQuoted = Entry.Group == frontend::Quoted; 128 129 if (auto EntryFile = CI.getFileManager().getOptionalFileRef(Entry.Path)) { 130 if (auto HMap = HeaderMap::Create(*EntryFile, CI.getFileManager())) { 131 // If this is a headermap entry, try to reverse lookup the full path 132 // for a spelled name before mapping. 133 StringRef SpelledFilename = HMap->reverseLookupFilename(File); 134 if (!SpelledFilename.empty()) 135 return SpelledFilename.str(); 136 137 // No matching mapping in this headermap, try next search entry. 138 continue; 139 } 140 } 141 142 // Entry is a directory search entry, try to check if it's a prefix of File. 143 PrefixLength = CheckDir(Entry.Path); 144 if (PrefixLength > 0) { 145 // The header is found in a framework path, construct the framework-style 146 // include name `<Framework/Header.h>` 147 if (Entry.IsFramework) { 148 SmallVector<StringRef, 4> Matches; 149 Rule.match(File, &Matches); 150 // Returned matches are always in stable order. 151 if (Matches.size() != 4) 152 return std::nullopt; 153 154 return path::convert_to_slash( 155 (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" + 156 Matches[3]) 157 .str()); 158 } 159 160 // The header is found in a normal search path, strip the search path 161 // prefix to get an include name. 162 return path::convert_to_slash(File.drop_front(PrefixLength)); 163 } 164 } 165 166 // Couldn't determine a include name, use full path instead. 167 return std::nullopt; 168 } 169 170 struct LocationFileChecker { 171 bool operator()(SourceLocation Loc) { 172 // If the loc refers to a macro expansion we need to first get the file 173 // location of the expansion. 174 auto &SM = CI.getSourceManager(); 175 auto FileLoc = SM.getFileLoc(Loc); 176 FileID FID = SM.getFileID(FileLoc); 177 if (FID.isInvalid()) 178 return false; 179 180 const auto *File = SM.getFileEntryForID(FID); 181 if (!File) 182 return false; 183 184 if (KnownFileEntries.count(File)) 185 return true; 186 187 if (ExternalFileEntries.count(File)) 188 return false; 189 190 StringRef FileName = File->tryGetRealPathName().empty() 191 ? File->getName() 192 : File->tryGetRealPathName(); 193 194 // Try to reduce the include name the same way we tried to include it. 195 bool IsQuoted = false; 196 if (auto IncludeName = getRelativeIncludeName(CI, FileName, &IsQuoted)) 197 if (llvm::any_of(KnownFiles, 198 [&IsQuoted, &IncludeName](const auto &KnownFile) { 199 return KnownFile.first.equals(*IncludeName) && 200 KnownFile.second == IsQuoted; 201 })) { 202 KnownFileEntries.insert(File); 203 return true; 204 } 205 206 // Record that the file was not found to avoid future reverse lookup for 207 // the same file. 208 ExternalFileEntries.insert(File); 209 return false; 210 } 211 212 LocationFileChecker(const CompilerInstance &CI, 213 SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles) 214 : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() { 215 for (const auto &KnownFile : KnownFiles) 216 if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first)) 217 KnownFileEntries.insert(*FileEntry); 218 } 219 220 private: 221 const CompilerInstance &CI; 222 SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles; 223 llvm::DenseSet<const FileEntry *> KnownFileEntries; 224 llvm::DenseSet<const FileEntry *> ExternalFileEntries; 225 }; 226 227 struct BatchExtractAPIVisitor : ExtractAPIVisitor<BatchExtractAPIVisitor> { 228 bool shouldDeclBeIncluded(const Decl *D) const { 229 bool ShouldBeIncluded = true; 230 // Check that we have the definition for redeclarable types. 231 if (auto *TD = llvm::dyn_cast<TagDecl>(D)) 232 ShouldBeIncluded = TD->isThisDeclarationADefinition(); 233 else if (auto *Interface = llvm::dyn_cast<ObjCInterfaceDecl>(D)) 234 ShouldBeIncluded = Interface->isThisDeclarationADefinition(); 235 else if (auto *Protocol = llvm::dyn_cast<ObjCProtocolDecl>(D)) 236 ShouldBeIncluded = Protocol->isThisDeclarationADefinition(); 237 238 ShouldBeIncluded = ShouldBeIncluded && LCF(D->getLocation()); 239 return ShouldBeIncluded; 240 } 241 242 BatchExtractAPIVisitor(LocationFileChecker &LCF, ASTContext &Context, 243 APISet &API) 244 : ExtractAPIVisitor<BatchExtractAPIVisitor>(Context, API), LCF(LCF) {} 245 246 private: 247 LocationFileChecker &LCF; 248 }; 249 250 class WrappingExtractAPIConsumer : public ASTConsumer { 251 public: 252 WrappingExtractAPIConsumer(ASTContext &Context, APISet &API) 253 : Visitor(Context, API) {} 254 255 void HandleTranslationUnit(ASTContext &Context) override { 256 // Use ExtractAPIVisitor to traverse symbol declarations in the context. 257 Visitor.TraverseDecl(Context.getTranslationUnitDecl()); 258 } 259 260 private: 261 ExtractAPIVisitor<> Visitor; 262 }; 263 264 class ExtractAPIConsumer : public ASTConsumer { 265 public: 266 ExtractAPIConsumer(ASTContext &Context, 267 std::unique_ptr<LocationFileChecker> LCF, APISet &API) 268 : Visitor(*LCF, Context, API), LCF(std::move(LCF)) {} 269 270 void HandleTranslationUnit(ASTContext &Context) override { 271 // Use ExtractAPIVisitor to traverse symbol declarations in the context. 272 Visitor.TraverseDecl(Context.getTranslationUnitDecl()); 273 } 274 275 private: 276 BatchExtractAPIVisitor Visitor; 277 std::unique_ptr<LocationFileChecker> LCF; 278 }; 279 280 class MacroCallback : public PPCallbacks { 281 public: 282 MacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP) 283 : SM(SM), API(API), PP(PP) {} 284 285 void MacroDefined(const Token &MacroNameToken, 286 const MacroDirective *MD) override { 287 auto *MacroInfo = MD->getMacroInfo(); 288 289 if (MacroInfo->isBuiltinMacro()) 290 return; 291 292 auto SourceLoc = MacroNameToken.getLocation(); 293 if (SM.isWrittenInBuiltinFile(SourceLoc) || 294 SM.isWrittenInCommandLineFile(SourceLoc)) 295 return; 296 297 PendingMacros.emplace_back(MacroNameToken, MD); 298 } 299 300 // If a macro gets undefined at some point during preprocessing of the inputs 301 // it means that it isn't an exposed API and we should therefore not add a 302 // macro definition for it. 303 void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD, 304 const MacroDirective *Undef) override { 305 // If this macro wasn't previously defined we don't need to do anything 306 // here. 307 if (!Undef) 308 return; 309 310 llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) { 311 return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP, 312 /*Syntactically*/ false); 313 }); 314 } 315 316 void EndOfMainFile() override { 317 for (auto &PM : PendingMacros) { 318 // `isUsedForHeaderGuard` is only set when the preprocessor leaves the 319 // file so check for it here. 320 if (PM.MD->getMacroInfo()->isUsedForHeaderGuard()) 321 continue; 322 323 if (!shouldMacroBeIncluded(PM)) 324 continue; 325 326 StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName(); 327 PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation()); 328 StringRef USR = 329 API.recordUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM); 330 331 API.addMacroDefinition( 332 Name, USR, Loc, 333 DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD), 334 DeclarationFragmentsBuilder::getSubHeadingForMacro(Name), 335 SM.isInSystemHeader(PM.MacroNameToken.getLocation())); 336 } 337 338 PendingMacros.clear(); 339 } 340 341 protected: 342 struct PendingMacro { 343 Token MacroNameToken; 344 const MacroDirective *MD; 345 346 PendingMacro(const Token &MacroNameToken, const MacroDirective *MD) 347 : MacroNameToken(MacroNameToken), MD(MD) {} 348 }; 349 350 virtual bool shouldMacroBeIncluded(const PendingMacro &PM) { return true; } 351 352 const SourceManager &SM; 353 APISet &API; 354 Preprocessor &PP; 355 llvm::SmallVector<PendingMacro> PendingMacros; 356 }; 357 358 class APIMacroCallback : public MacroCallback { 359 public: 360 APIMacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP, 361 LocationFileChecker &LCF) 362 : MacroCallback(SM, API, PP), LCF(LCF) {} 363 364 bool shouldMacroBeIncluded(const PendingMacro &PM) override { 365 // Do not include macros from external files 366 return LCF(PM.MacroNameToken.getLocation()); 367 } 368 369 private: 370 LocationFileChecker &LCF; 371 }; 372 373 } // namespace 374 375 void ExtractAPIActionBase::ImplEndSourceFileAction() { 376 if (!OS) 377 return; 378 379 // Setup a SymbolGraphSerializer to write out collected API information in 380 // the Symbol Graph format. 381 // FIXME: Make the kind of APISerializer configurable. 382 SymbolGraphSerializer SGSerializer(*API, IgnoresList); 383 SGSerializer.serialize(*OS); 384 OS.reset(); 385 } 386 387 std::unique_ptr<raw_pwrite_stream> 388 ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) { 389 std::unique_ptr<raw_pwrite_stream> OS; 390 OS = CI.createDefaultOutputFile(/*Binary=*/false, InFile, 391 /*Extension=*/"json", 392 /*RemoveFileOnSignal=*/false); 393 if (!OS) 394 return nullptr; 395 return OS; 396 } 397 398 std::unique_ptr<ASTConsumer> 399 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { 400 OS = CreateOutputFile(CI, InFile); 401 402 if (!OS) 403 return nullptr; 404 405 auto ProductName = CI.getFrontendOpts().ProductName; 406 407 // Now that we have enough information about the language options and the 408 // target triple, let's create the APISet before anyone uses it. 409 API = std::make_unique<APISet>( 410 CI.getTarget().getTriple(), 411 CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName); 412 413 auto LCF = std::make_unique<LocationFileChecker>(CI, KnownInputFiles); 414 415 CI.getPreprocessor().addPPCallbacks(std::make_unique<APIMacroCallback>( 416 CI.getSourceManager(), *API, CI.getPreprocessor(), *LCF)); 417 418 // Do not include location in anonymous decls. 419 PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy(); 420 Policy.AnonymousTagLocations = false; 421 CI.getASTContext().setPrintingPolicy(Policy); 422 423 if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) { 424 llvm::handleAllErrors( 425 APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList, 426 CI.getFileManager()) 427 .moveInto(IgnoresList), 428 [&CI](const IgnoresFileNotFound &Err) { 429 CI.getDiagnostics().Report( 430 diag::err_extract_api_ignores_file_not_found) 431 << Err.Path; 432 }); 433 } 434 435 return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(), 436 std::move(LCF), *API); 437 } 438 439 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) { 440 auto &Inputs = CI.getFrontendOpts().Inputs; 441 if (Inputs.empty()) 442 return true; 443 444 if (!CI.hasFileManager()) 445 if (!CI.createFileManager()) 446 return false; 447 448 auto Kind = Inputs[0].getKind(); 449 450 // Convert the header file inputs into a single input buffer. 451 SmallString<256> HeaderContents; 452 bool IsQuoted = false; 453 for (const FrontendInputFile &FIF : Inputs) { 454 if (Kind.isObjectiveC()) 455 HeaderContents += "#import"; 456 else 457 HeaderContents += "#include"; 458 459 StringRef FilePath = FIF.getFile(); 460 if (auto RelativeName = getRelativeIncludeName(CI, FilePath, &IsQuoted)) { 461 if (IsQuoted) 462 HeaderContents += " \""; 463 else 464 HeaderContents += " <"; 465 466 HeaderContents += *RelativeName; 467 468 if (IsQuoted) 469 HeaderContents += "\"\n"; 470 else 471 HeaderContents += ">\n"; 472 KnownInputFiles.emplace_back(static_cast<SmallString<32>>(*RelativeName), 473 IsQuoted); 474 } else { 475 HeaderContents += " \""; 476 HeaderContents += FilePath; 477 HeaderContents += "\"\n"; 478 KnownInputFiles.emplace_back(FilePath, true); 479 } 480 } 481 482 if (CI.getHeaderSearchOpts().Verbose) 483 CI.getVerboseOutputStream() << getInputBufferName() << ":\n" 484 << HeaderContents << "\n"; 485 486 Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents, 487 getInputBufferName()); 488 489 // Set that buffer up as our "real" input in the CompilerInstance. 490 Inputs.clear(); 491 Inputs.emplace_back(Buffer->getMemBufferRef(), Kind, /*IsSystem*/ false); 492 493 return true; 494 } 495 496 void ExtractAPIAction::EndSourceFileAction() { ImplEndSourceFileAction(); } 497 498 std::unique_ptr<ASTConsumer> 499 WrappingExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, 500 StringRef InFile) { 501 auto OtherConsumer = WrapperFrontendAction::CreateASTConsumer(CI, InFile); 502 if (!OtherConsumer) 503 return nullptr; 504 505 CreatedASTConsumer = true; 506 507 OS = CreateOutputFile(CI, InFile); 508 if (!OS) 509 return nullptr; 510 511 auto ProductName = CI.getFrontendOpts().ProductName; 512 513 // Now that we have enough information about the language options and the 514 // target triple, let's create the APISet before anyone uses it. 515 API = std::make_unique<APISet>( 516 CI.getTarget().getTriple(), 517 CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName); 518 519 CI.getPreprocessor().addPPCallbacks(std::make_unique<MacroCallback>( 520 CI.getSourceManager(), *API, CI.getPreprocessor())); 521 522 // Do not include location in anonymous decls. 523 PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy(); 524 Policy.AnonymousTagLocations = false; 525 CI.getASTContext().setPrintingPolicy(Policy); 526 527 if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) { 528 llvm::handleAllErrors( 529 APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList, 530 CI.getFileManager()) 531 .moveInto(IgnoresList), 532 [&CI](const IgnoresFileNotFound &Err) { 533 CI.getDiagnostics().Report( 534 diag::err_extract_api_ignores_file_not_found) 535 << Err.Path; 536 }); 537 } 538 539 auto WrappingConsumer = 540 std::make_unique<WrappingExtractAPIConsumer>(CI.getASTContext(), *API); 541 std::vector<std::unique_ptr<ASTConsumer>> Consumers; 542 Consumers.push_back(std::move(OtherConsumer)); 543 Consumers.push_back(std::move(WrappingConsumer)); 544 545 return std::make_unique<MultiplexConsumer>(std::move(Consumers)); 546 } 547 548 void WrappingExtractAPIAction::EndSourceFileAction() { 549 // Invoke wrapped action's method. 550 WrapperFrontendAction::EndSourceFileAction(); 551 552 if (CreatedASTConsumer) { 553 ImplEndSourceFileAction(); 554 } 555 } 556 557 std::unique_ptr<raw_pwrite_stream> 558 WrappingExtractAPIAction::CreateOutputFile(CompilerInstance &CI, 559 StringRef InFile) { 560 std::unique_ptr<raw_pwrite_stream> OS; 561 std::string OutputDir = CI.getFrontendOpts().SymbolGraphOutputDir; 562 563 // The symbol graphs need to be generated as a side effect of regular 564 // compilation so the output should be dumped in the directory provided with 565 // the command line option. 566 llvm::SmallString<128> OutFilePath(OutputDir); 567 auto Seperator = llvm::sys::path::get_separator(); 568 auto Infilename = llvm::sys::path::filename(InFile); 569 OutFilePath.append({Seperator, Infilename}); 570 llvm::sys::path::replace_extension(OutFilePath, "json"); 571 // StringRef outputFilePathref = *OutFilePath; 572 573 // don't use the default output file 574 OS = CI.createOutputFile(/*OutputPath=*/OutFilePath, /*Binary=*/false, 575 /*RemoveFileOnSignal=*/true, 576 /*UseTemporary=*/true, 577 /*CreateMissingDirectories=*/true); 578 if (!OS) 579 return nullptr; 580 return OS; 581 } 582