1 //===- ExtractAPI/ExtractAPIConsumer.cpp ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the ExtractAPIAction, and ASTConsumer to collect API 11 /// information. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/AST/ASTConcept.h" 16 #include "clang/AST/ASTConsumer.h" 17 #include "clang/AST/ASTContext.h" 18 #include "clang/AST/DeclObjC.h" 19 #include "clang/Basic/DiagnosticFrontend.h" 20 #include "clang/Basic/FileEntry.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Basic/TargetInfo.h" 24 #include "clang/ExtractAPI/API.h" 25 #include "clang/ExtractAPI/APIIgnoresList.h" 26 #include "clang/ExtractAPI/ExtractAPIVisitor.h" 27 #include "clang/ExtractAPI/FrontendActions.h" 28 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h" 29 #include "clang/Frontend/ASTConsumers.h" 30 #include "clang/Frontend/CompilerInstance.h" 31 #include "clang/Frontend/FrontendOptions.h" 32 #include "clang/Frontend/MultiplexConsumer.h" 33 #include "clang/Lex/MacroInfo.h" 34 #include "clang/Lex/PPCallbacks.h" 35 #include "clang/Lex/Preprocessor.h" 36 #include "clang/Lex/PreprocessorOptions.h" 37 #include "llvm/ADT/DenseSet.h" 38 #include "llvm/ADT/STLExtras.h" 39 #include "llvm/ADT/SmallString.h" 40 #include "llvm/ADT/SmallVector.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/Error.h" 43 #include "llvm/Support/FileSystem.h" 44 #include "llvm/Support/MemoryBuffer.h" 45 #include "llvm/Support/Path.h" 46 #include "llvm/Support/Regex.h" 47 #include "llvm/Support/raw_ostream.h" 48 #include <memory> 49 #include <optional> 50 #include <utility> 51 52 using namespace clang; 53 using namespace extractapi; 54 55 namespace { 56 57 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI, 58 StringRef File, 59 bool *IsQuoted = nullptr) { 60 assert(CI.hasFileManager() && 61 "CompilerInstance does not have a FileNamager!"); 62 63 using namespace llvm::sys; 64 // Matches framework include patterns 65 const llvm::Regex Rule("/(.+)\\.framework/(.+)?Headers/(.+)"); 66 67 const auto &FS = CI.getVirtualFileSystem(); 68 69 SmallString<128> FilePath(File.begin(), File.end()); 70 FS.makeAbsolute(FilePath); 71 path::remove_dots(FilePath, true); 72 FilePath = path::convert_to_slash(FilePath); 73 File = FilePath; 74 75 // Checks whether `Dir` is a strict path prefix of `File`. If so returns 76 // the prefix length. Otherwise return 0. 77 auto CheckDir = [&](llvm::StringRef Dir) -> unsigned { 78 llvm::SmallString<32> DirPath(Dir.begin(), Dir.end()); 79 FS.makeAbsolute(DirPath); 80 path::remove_dots(DirPath, true); 81 Dir = DirPath; 82 for (auto NI = path::begin(File), NE = path::end(File), 83 DI = path::begin(Dir), DE = path::end(Dir); 84 /*termination condition in loop*/; ++NI, ++DI) { 85 // '.' components in File are ignored. 86 while (NI != NE && *NI == ".") 87 ++NI; 88 if (NI == NE) 89 break; 90 91 // '.' components in Dir are ignored. 92 while (DI != DE && *DI == ".") 93 ++DI; 94 95 // Dir is a prefix of File, up to '.' components and choice of path 96 // separators. 97 if (DI == DE) 98 return NI - path::begin(File); 99 100 // Consider all path separators equal. 101 if (NI->size() == 1 && DI->size() == 1 && 102 path::is_separator(NI->front()) && path::is_separator(DI->front())) 103 continue; 104 105 // Special case Apple .sdk folders since the search path is typically a 106 // symlink like `iPhoneSimulator14.5.sdk` while the file is instead 107 // located in `iPhoneSimulator.sdk` (the real folder). 108 if (NI->ends_with(".sdk") && DI->ends_with(".sdk")) { 109 StringRef NBasename = path::stem(*NI); 110 StringRef DBasename = path::stem(*DI); 111 if (DBasename.starts_with(NBasename)) 112 continue; 113 } 114 115 if (*NI != *DI) 116 break; 117 } 118 return 0; 119 }; 120 121 unsigned PrefixLength = 0; 122 123 // Go through the search paths and find the first one that is a prefix of 124 // the header. 125 for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries) { 126 // Note whether the match is found in a quoted entry. 127 if (IsQuoted) 128 *IsQuoted = Entry.Group == frontend::Quoted; 129 130 if (auto EntryFile = CI.getFileManager().getOptionalFileRef(Entry.Path)) { 131 if (auto HMap = HeaderMap::Create(*EntryFile, CI.getFileManager())) { 132 // If this is a headermap entry, try to reverse lookup the full path 133 // for a spelled name before mapping. 134 StringRef SpelledFilename = HMap->reverseLookupFilename(File); 135 if (!SpelledFilename.empty()) 136 return SpelledFilename.str(); 137 138 // No matching mapping in this headermap, try next search entry. 139 continue; 140 } 141 } 142 143 // Entry is a directory search entry, try to check if it's a prefix of File. 144 PrefixLength = CheckDir(Entry.Path); 145 if (PrefixLength > 0) { 146 // The header is found in a framework path, construct the framework-style 147 // include name `<Framework/Header.h>` 148 if (Entry.IsFramework) { 149 SmallVector<StringRef, 4> Matches; 150 Rule.match(File, &Matches); 151 // Returned matches are always in stable order. 152 if (Matches.size() != 4) 153 return std::nullopt; 154 155 return path::convert_to_slash( 156 (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" + 157 Matches[3]) 158 .str()); 159 } 160 161 // The header is found in a normal search path, strip the search path 162 // prefix to get an include name. 163 return path::convert_to_slash(File.drop_front(PrefixLength)); 164 } 165 } 166 167 // Couldn't determine a include name, use full path instead. 168 return std::nullopt; 169 } 170 171 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI, 172 FileEntryRef FE, 173 bool *IsQuoted = nullptr) { 174 return getRelativeIncludeName(CI, FE.getNameAsRequested(), IsQuoted); 175 } 176 177 struct LocationFileChecker { 178 bool operator()(SourceLocation Loc) { 179 // If the loc refers to a macro expansion we need to first get the file 180 // location of the expansion. 181 auto &SM = CI.getSourceManager(); 182 auto FileLoc = SM.getFileLoc(Loc); 183 FileID FID = SM.getFileID(FileLoc); 184 if (FID.isInvalid()) 185 return false; 186 187 OptionalFileEntryRef File = SM.getFileEntryRefForID(FID); 188 if (!File) 189 return false; 190 191 if (KnownFileEntries.count(*File)) 192 return true; 193 194 if (ExternalFileEntries.count(*File)) 195 return false; 196 197 // Try to reduce the include name the same way we tried to include it. 198 bool IsQuoted = false; 199 if (auto IncludeName = getRelativeIncludeName(CI, *File, &IsQuoted)) 200 if (llvm::any_of(KnownFiles, 201 [&IsQuoted, &IncludeName](const auto &KnownFile) { 202 return KnownFile.first.equals(*IncludeName) && 203 KnownFile.second == IsQuoted; 204 })) { 205 KnownFileEntries.insert(*File); 206 return true; 207 } 208 209 // Record that the file was not found to avoid future reverse lookup for 210 // the same file. 211 ExternalFileEntries.insert(*File); 212 return false; 213 } 214 215 LocationFileChecker(const CompilerInstance &CI, 216 SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles) 217 : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() { 218 for (const auto &KnownFile : KnownFiles) 219 if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first)) 220 KnownFileEntries.insert(*FileEntry); 221 } 222 223 private: 224 const CompilerInstance &CI; 225 SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles; 226 llvm::DenseSet<const FileEntry *> KnownFileEntries; 227 llvm::DenseSet<const FileEntry *> ExternalFileEntries; 228 }; 229 230 struct BatchExtractAPIVisitor : ExtractAPIVisitor<BatchExtractAPIVisitor> { 231 bool shouldDeclBeIncluded(const Decl *D) const { 232 bool ShouldBeIncluded = true; 233 // Check that we have the definition for redeclarable types. 234 if (auto *TD = llvm::dyn_cast<TagDecl>(D)) 235 ShouldBeIncluded = TD->isThisDeclarationADefinition(); 236 else if (auto *Interface = llvm::dyn_cast<ObjCInterfaceDecl>(D)) 237 ShouldBeIncluded = Interface->isThisDeclarationADefinition(); 238 else if (auto *Protocol = llvm::dyn_cast<ObjCProtocolDecl>(D)) 239 ShouldBeIncluded = Protocol->isThisDeclarationADefinition(); 240 241 ShouldBeIncluded = ShouldBeIncluded && LCF(D->getLocation()); 242 return ShouldBeIncluded; 243 } 244 245 BatchExtractAPIVisitor(LocationFileChecker &LCF, ASTContext &Context, 246 APISet &API) 247 : ExtractAPIVisitor<BatchExtractAPIVisitor>(Context, API), LCF(LCF) {} 248 249 private: 250 LocationFileChecker &LCF; 251 }; 252 253 class WrappingExtractAPIConsumer : public ASTConsumer { 254 public: 255 WrappingExtractAPIConsumer(ASTContext &Context, APISet &API) 256 : Visitor(Context, API) {} 257 258 void HandleTranslationUnit(ASTContext &Context) override { 259 // Use ExtractAPIVisitor to traverse symbol declarations in the context. 260 Visitor.TraverseDecl(Context.getTranslationUnitDecl()); 261 } 262 263 private: 264 ExtractAPIVisitor<> Visitor; 265 }; 266 267 class ExtractAPIConsumer : public ASTConsumer { 268 public: 269 ExtractAPIConsumer(ASTContext &Context, 270 std::unique_ptr<LocationFileChecker> LCF, APISet &API) 271 : Visitor(*LCF, Context, API), LCF(std::move(LCF)) {} 272 273 void HandleTranslationUnit(ASTContext &Context) override { 274 // Use ExtractAPIVisitor to traverse symbol declarations in the context. 275 Visitor.TraverseDecl(Context.getTranslationUnitDecl()); 276 } 277 278 private: 279 BatchExtractAPIVisitor Visitor; 280 std::unique_ptr<LocationFileChecker> LCF; 281 }; 282 283 class MacroCallback : public PPCallbacks { 284 public: 285 MacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP) 286 : SM(SM), API(API), PP(PP) {} 287 288 void MacroDefined(const Token &MacroNameToken, 289 const MacroDirective *MD) override { 290 auto *MacroInfo = MD->getMacroInfo(); 291 292 if (MacroInfo->isBuiltinMacro()) 293 return; 294 295 auto SourceLoc = MacroNameToken.getLocation(); 296 if (SM.isWrittenInBuiltinFile(SourceLoc) || 297 SM.isWrittenInCommandLineFile(SourceLoc)) 298 return; 299 300 PendingMacros.emplace_back(MacroNameToken, MD); 301 } 302 303 // If a macro gets undefined at some point during preprocessing of the inputs 304 // it means that it isn't an exposed API and we should therefore not add a 305 // macro definition for it. 306 void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD, 307 const MacroDirective *Undef) override { 308 // If this macro wasn't previously defined we don't need to do anything 309 // here. 310 if (!Undef) 311 return; 312 313 llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) { 314 return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP, 315 /*Syntactically*/ false); 316 }); 317 } 318 319 void EndOfMainFile() override { 320 for (auto &PM : PendingMacros) { 321 // `isUsedForHeaderGuard` is only set when the preprocessor leaves the 322 // file so check for it here. 323 if (PM.MD->getMacroInfo()->isUsedForHeaderGuard()) 324 continue; 325 326 if (!shouldMacroBeIncluded(PM)) 327 continue; 328 329 StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName(); 330 PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation()); 331 StringRef USR = 332 API.recordUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM); 333 334 API.addMacroDefinition( 335 Name, USR, Loc, 336 DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD), 337 DeclarationFragmentsBuilder::getSubHeadingForMacro(Name), 338 SM.isInSystemHeader(PM.MacroNameToken.getLocation())); 339 } 340 341 PendingMacros.clear(); 342 } 343 344 protected: 345 struct PendingMacro { 346 Token MacroNameToken; 347 const MacroDirective *MD; 348 349 PendingMacro(const Token &MacroNameToken, const MacroDirective *MD) 350 : MacroNameToken(MacroNameToken), MD(MD) {} 351 }; 352 353 virtual bool shouldMacroBeIncluded(const PendingMacro &PM) { return true; } 354 355 const SourceManager &SM; 356 APISet &API; 357 Preprocessor &PP; 358 llvm::SmallVector<PendingMacro> PendingMacros; 359 }; 360 361 class APIMacroCallback : public MacroCallback { 362 public: 363 APIMacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP, 364 LocationFileChecker &LCF) 365 : MacroCallback(SM, API, PP), LCF(LCF) {} 366 367 bool shouldMacroBeIncluded(const PendingMacro &PM) override { 368 // Do not include macros from external files 369 return LCF(PM.MacroNameToken.getLocation()); 370 } 371 372 private: 373 LocationFileChecker &LCF; 374 }; 375 376 } // namespace 377 378 void ExtractAPIActionBase::ImplEndSourceFileAction() { 379 if (!OS) 380 return; 381 382 // Setup a SymbolGraphSerializer to write out collected API information in 383 // the Symbol Graph format. 384 // FIXME: Make the kind of APISerializer configurable. 385 SymbolGraphSerializer SGSerializer(*API, IgnoresList); 386 SGSerializer.serialize(*OS); 387 OS.reset(); 388 } 389 390 std::unique_ptr<raw_pwrite_stream> 391 ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) { 392 std::unique_ptr<raw_pwrite_stream> OS; 393 OS = CI.createDefaultOutputFile(/*Binary=*/false, InFile, 394 /*Extension=*/"json", 395 /*RemoveFileOnSignal=*/false); 396 if (!OS) 397 return nullptr; 398 return OS; 399 } 400 401 std::unique_ptr<ASTConsumer> 402 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { 403 OS = CreateOutputFile(CI, InFile); 404 405 if (!OS) 406 return nullptr; 407 408 auto ProductName = CI.getFrontendOpts().ProductName; 409 410 // Now that we have enough information about the language options and the 411 // target triple, let's create the APISet before anyone uses it. 412 API = std::make_unique<APISet>( 413 CI.getTarget().getTriple(), 414 CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName); 415 416 auto LCF = std::make_unique<LocationFileChecker>(CI, KnownInputFiles); 417 418 CI.getPreprocessor().addPPCallbacks(std::make_unique<APIMacroCallback>( 419 CI.getSourceManager(), *API, CI.getPreprocessor(), *LCF)); 420 421 // Do not include location in anonymous decls. 422 PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy(); 423 Policy.AnonymousTagLocations = false; 424 CI.getASTContext().setPrintingPolicy(Policy); 425 426 if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) { 427 llvm::handleAllErrors( 428 APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList, 429 CI.getFileManager()) 430 .moveInto(IgnoresList), 431 [&CI](const IgnoresFileNotFound &Err) { 432 CI.getDiagnostics().Report( 433 diag::err_extract_api_ignores_file_not_found) 434 << Err.Path; 435 }); 436 } 437 438 return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(), 439 std::move(LCF), *API); 440 } 441 442 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) { 443 auto &Inputs = CI.getFrontendOpts().Inputs; 444 if (Inputs.empty()) 445 return true; 446 447 if (!CI.hasFileManager()) 448 if (!CI.createFileManager()) 449 return false; 450 451 auto Kind = Inputs[0].getKind(); 452 453 // Convert the header file inputs into a single input buffer. 454 SmallString<256> HeaderContents; 455 bool IsQuoted = false; 456 for (const FrontendInputFile &FIF : Inputs) { 457 if (Kind.isObjectiveC()) 458 HeaderContents += "#import"; 459 else 460 HeaderContents += "#include"; 461 462 StringRef FilePath = FIF.getFile(); 463 if (auto RelativeName = getRelativeIncludeName(CI, FilePath, &IsQuoted)) { 464 if (IsQuoted) 465 HeaderContents += " \""; 466 else 467 HeaderContents += " <"; 468 469 HeaderContents += *RelativeName; 470 471 if (IsQuoted) 472 HeaderContents += "\"\n"; 473 else 474 HeaderContents += ">\n"; 475 KnownInputFiles.emplace_back(static_cast<SmallString<32>>(*RelativeName), 476 IsQuoted); 477 } else { 478 HeaderContents += " \""; 479 HeaderContents += FilePath; 480 HeaderContents += "\"\n"; 481 KnownInputFiles.emplace_back(FilePath, true); 482 } 483 } 484 485 if (CI.getHeaderSearchOpts().Verbose) 486 CI.getVerboseOutputStream() << getInputBufferName() << ":\n" 487 << HeaderContents << "\n"; 488 489 Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents, 490 getInputBufferName()); 491 492 // Set that buffer up as our "real" input in the CompilerInstance. 493 Inputs.clear(); 494 Inputs.emplace_back(Buffer->getMemBufferRef(), Kind, /*IsSystem*/ false); 495 496 return true; 497 } 498 499 void ExtractAPIAction::EndSourceFileAction() { ImplEndSourceFileAction(); } 500 501 std::unique_ptr<ASTConsumer> 502 WrappingExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, 503 StringRef InFile) { 504 auto OtherConsumer = WrapperFrontendAction::CreateASTConsumer(CI, InFile); 505 if (!OtherConsumer) 506 return nullptr; 507 508 CreatedASTConsumer = true; 509 510 OS = CreateOutputFile(CI, InFile); 511 if (!OS) 512 return nullptr; 513 514 auto ProductName = CI.getFrontendOpts().ProductName; 515 516 // Now that we have enough information about the language options and the 517 // target triple, let's create the APISet before anyone uses it. 518 API = std::make_unique<APISet>( 519 CI.getTarget().getTriple(), 520 CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName); 521 522 CI.getPreprocessor().addPPCallbacks(std::make_unique<MacroCallback>( 523 CI.getSourceManager(), *API, CI.getPreprocessor())); 524 525 // Do not include location in anonymous decls. 526 PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy(); 527 Policy.AnonymousTagLocations = false; 528 CI.getASTContext().setPrintingPolicy(Policy); 529 530 if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) { 531 llvm::handleAllErrors( 532 APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList, 533 CI.getFileManager()) 534 .moveInto(IgnoresList), 535 [&CI](const IgnoresFileNotFound &Err) { 536 CI.getDiagnostics().Report( 537 diag::err_extract_api_ignores_file_not_found) 538 << Err.Path; 539 }); 540 } 541 542 auto WrappingConsumer = 543 std::make_unique<WrappingExtractAPIConsumer>(CI.getASTContext(), *API); 544 std::vector<std::unique_ptr<ASTConsumer>> Consumers; 545 Consumers.push_back(std::move(OtherConsumer)); 546 Consumers.push_back(std::move(WrappingConsumer)); 547 548 return std::make_unique<MultiplexConsumer>(std::move(Consumers)); 549 } 550 551 void WrappingExtractAPIAction::EndSourceFileAction() { 552 // Invoke wrapped action's method. 553 WrapperFrontendAction::EndSourceFileAction(); 554 555 if (CreatedASTConsumer) { 556 ImplEndSourceFileAction(); 557 } 558 } 559 560 std::unique_ptr<raw_pwrite_stream> 561 WrappingExtractAPIAction::CreateOutputFile(CompilerInstance &CI, 562 StringRef InFile) { 563 std::unique_ptr<raw_pwrite_stream> OS; 564 std::string OutputDir = CI.getFrontendOpts().SymbolGraphOutputDir; 565 566 // The symbol graphs need to be generated as a side effect of regular 567 // compilation so the output should be dumped in the directory provided with 568 // the command line option. 569 llvm::SmallString<128> OutFilePath(OutputDir); 570 auto Seperator = llvm::sys::path::get_separator(); 571 auto Infilename = llvm::sys::path::filename(InFile); 572 OutFilePath.append({Seperator, Infilename}); 573 llvm::sys::path::replace_extension(OutFilePath, "json"); 574 // StringRef outputFilePathref = *OutFilePath; 575 576 // don't use the default output file 577 OS = CI.createOutputFile(/*OutputPath=*/OutFilePath, /*Binary=*/false, 578 /*RemoveFileOnSignal=*/true, 579 /*UseTemporary=*/true, 580 /*CreateMissingDirectories=*/true); 581 if (!OS) 582 return nullptr; 583 return OS; 584 } 585