1 //===- ExtractAPI/ExtractAPIConsumer.cpp ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the ExtractAPIAction, and ASTConsumer to collect API 11 /// information. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/AST/ASTConcept.h" 16 #include "clang/AST/ASTConsumer.h" 17 #include "clang/AST/ASTContext.h" 18 #include "clang/AST/DeclObjC.h" 19 #include "clang/Basic/DiagnosticFrontend.h" 20 #include "clang/Basic/FileEntry.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Basic/TargetInfo.h" 24 #include "clang/ExtractAPI/API.h" 25 #include "clang/ExtractAPI/APIIgnoresList.h" 26 #include "clang/ExtractAPI/ExtractAPIVisitor.h" 27 #include "clang/ExtractAPI/FrontendActions.h" 28 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h" 29 #include "clang/Frontend/ASTConsumers.h" 30 #include "clang/Frontend/CompilerInstance.h" 31 #include "clang/Frontend/FrontendOptions.h" 32 #include "clang/Frontend/MultiplexConsumer.h" 33 #include "clang/Index/USRGeneration.h" 34 #include "clang/InstallAPI/HeaderFile.h" 35 #include "clang/Lex/MacroInfo.h" 36 #include "clang/Lex/PPCallbacks.h" 37 #include "clang/Lex/Preprocessor.h" 38 #include "clang/Lex/PreprocessorOptions.h" 39 #include "llvm/ADT/DenseSet.h" 40 #include "llvm/ADT/STLExtras.h" 41 #include "llvm/ADT/SmallString.h" 42 #include "llvm/ADT/SmallVector.h" 43 #include "llvm/ADT/StringRef.h" 44 #include "llvm/Support/Casting.h" 45 #include "llvm/Support/Error.h" 46 #include "llvm/Support/FileSystem.h" 47 #include "llvm/Support/MemoryBuffer.h" 48 #include "llvm/Support/Path.h" 49 #include "llvm/Support/Regex.h" 50 #include "llvm/Support/raw_ostream.h" 51 #include <memory> 52 #include <optional> 53 #include <utility> 54 55 using namespace clang; 56 using namespace extractapi; 57 58 namespace { 59 60 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI, 61 StringRef File, 62 bool *IsQuoted = nullptr) { 63 assert(CI.hasFileManager() && 64 "CompilerInstance does not have a FileNamager!"); 65 66 using namespace llvm::sys; 67 const auto &FS = CI.getVirtualFileSystem(); 68 69 SmallString<128> FilePath(File.begin(), File.end()); 70 FS.makeAbsolute(FilePath); 71 path::remove_dots(FilePath, true); 72 FilePath = path::convert_to_slash(FilePath); 73 File = FilePath; 74 75 // Checks whether `Dir` is a strict path prefix of `File`. If so returns 76 // the prefix length. Otherwise return 0. 77 auto CheckDir = [&](llvm::StringRef Dir) -> unsigned { 78 llvm::SmallString<32> DirPath(Dir.begin(), Dir.end()); 79 FS.makeAbsolute(DirPath); 80 path::remove_dots(DirPath, true); 81 Dir = DirPath; 82 for (auto NI = path::begin(File), NE = path::end(File), 83 DI = path::begin(Dir), DE = path::end(Dir); 84 /*termination condition in loop*/; ++NI, ++DI) { 85 // '.' components in File are ignored. 86 while (NI != NE && *NI == ".") 87 ++NI; 88 if (NI == NE) 89 break; 90 91 // '.' components in Dir are ignored. 92 while (DI != DE && *DI == ".") 93 ++DI; 94 95 // Dir is a prefix of File, up to '.' components and choice of path 96 // separators. 97 if (DI == DE) 98 return NI - path::begin(File); 99 100 // Consider all path separators equal. 101 if (NI->size() == 1 && DI->size() == 1 && 102 path::is_separator(NI->front()) && path::is_separator(DI->front())) 103 continue; 104 105 // Special case Apple .sdk folders since the search path is typically a 106 // symlink like `iPhoneSimulator14.5.sdk` while the file is instead 107 // located in `iPhoneSimulator.sdk` (the real folder). 108 if (NI->ends_with(".sdk") && DI->ends_with(".sdk")) { 109 StringRef NBasename = path::stem(*NI); 110 StringRef DBasename = path::stem(*DI); 111 if (DBasename.starts_with(NBasename)) 112 continue; 113 } 114 115 if (*NI != *DI) 116 break; 117 } 118 return 0; 119 }; 120 121 unsigned PrefixLength = 0; 122 123 // Go through the search paths and find the first one that is a prefix of 124 // the header. 125 for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries) { 126 // Note whether the match is found in a quoted entry. 127 if (IsQuoted) 128 *IsQuoted = Entry.Group == frontend::Quoted; 129 130 if (auto EntryFile = CI.getFileManager().getOptionalFileRef(Entry.Path)) { 131 if (auto HMap = HeaderMap::Create(*EntryFile, CI.getFileManager())) { 132 // If this is a headermap entry, try to reverse lookup the full path 133 // for a spelled name before mapping. 134 StringRef SpelledFilename = HMap->reverseLookupFilename(File); 135 if (!SpelledFilename.empty()) 136 return SpelledFilename.str(); 137 138 // No matching mapping in this headermap, try next search entry. 139 continue; 140 } 141 } 142 143 // Entry is a directory search entry, try to check if it's a prefix of File. 144 PrefixLength = CheckDir(Entry.Path); 145 if (PrefixLength > 0) { 146 // The header is found in a framework path, construct the framework-style 147 // include name `<Framework/Header.h>` 148 if (Entry.IsFramework) { 149 SmallVector<StringRef, 4> Matches; 150 clang::installapi::HeaderFile::getFrameworkIncludeRule().match( 151 File, &Matches); 152 // Returned matches are always in stable order. 153 if (Matches.size() != 4) 154 return std::nullopt; 155 156 return path::convert_to_slash( 157 (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" + 158 Matches[3]) 159 .str()); 160 } 161 162 // The header is found in a normal search path, strip the search path 163 // prefix to get an include name. 164 return path::convert_to_slash(File.drop_front(PrefixLength)); 165 } 166 } 167 168 // Couldn't determine a include name, use full path instead. 169 return std::nullopt; 170 } 171 172 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI, 173 FileEntryRef FE, 174 bool *IsQuoted = nullptr) { 175 return getRelativeIncludeName(CI, FE.getNameAsRequested(), IsQuoted); 176 } 177 178 struct LocationFileChecker { 179 bool operator()(SourceLocation Loc) { 180 // If the loc refers to a macro expansion we need to first get the file 181 // location of the expansion. 182 auto &SM = CI.getSourceManager(); 183 auto FileLoc = SM.getFileLoc(Loc); 184 FileID FID = SM.getFileID(FileLoc); 185 if (FID.isInvalid()) 186 return false; 187 188 OptionalFileEntryRef File = SM.getFileEntryRefForID(FID); 189 if (!File) 190 return false; 191 192 if (KnownFileEntries.count(*File)) 193 return true; 194 195 if (ExternalFileEntries.count(*File)) 196 return false; 197 198 // Try to reduce the include name the same way we tried to include it. 199 bool IsQuoted = false; 200 if (auto IncludeName = getRelativeIncludeName(CI, *File, &IsQuoted)) 201 if (llvm::any_of(KnownFiles, 202 [&IsQuoted, &IncludeName](const auto &KnownFile) { 203 return KnownFile.first.equals(*IncludeName) && 204 KnownFile.second == IsQuoted; 205 })) { 206 KnownFileEntries.insert(*File); 207 return true; 208 } 209 210 // Record that the file was not found to avoid future reverse lookup for 211 // the same file. 212 ExternalFileEntries.insert(*File); 213 return false; 214 } 215 216 LocationFileChecker(const CompilerInstance &CI, 217 SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles) 218 : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() { 219 for (const auto &KnownFile : KnownFiles) 220 if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first)) 221 KnownFileEntries.insert(*FileEntry); 222 } 223 224 private: 225 const CompilerInstance &CI; 226 SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles; 227 llvm::DenseSet<const FileEntry *> KnownFileEntries; 228 llvm::DenseSet<const FileEntry *> ExternalFileEntries; 229 }; 230 231 struct BatchExtractAPIVisitor : ExtractAPIVisitor<BatchExtractAPIVisitor> { 232 bool shouldDeclBeIncluded(const Decl *D) const { 233 bool ShouldBeIncluded = true; 234 // Check that we have the definition for redeclarable types. 235 if (auto *TD = llvm::dyn_cast<TagDecl>(D)) 236 ShouldBeIncluded = TD->isThisDeclarationADefinition(); 237 else if (auto *Interface = llvm::dyn_cast<ObjCInterfaceDecl>(D)) 238 ShouldBeIncluded = Interface->isThisDeclarationADefinition(); 239 else if (auto *Protocol = llvm::dyn_cast<ObjCProtocolDecl>(D)) 240 ShouldBeIncluded = Protocol->isThisDeclarationADefinition(); 241 242 ShouldBeIncluded = ShouldBeIncluded && LCF(D->getLocation()); 243 return ShouldBeIncluded; 244 } 245 246 BatchExtractAPIVisitor(LocationFileChecker &LCF, ASTContext &Context, 247 APISet &API) 248 : ExtractAPIVisitor<BatchExtractAPIVisitor>(Context, API), LCF(LCF) {} 249 250 private: 251 LocationFileChecker &LCF; 252 }; 253 254 class WrappingExtractAPIConsumer : public ASTConsumer { 255 public: 256 WrappingExtractAPIConsumer(ASTContext &Context, APISet &API) 257 : Visitor(Context, API) {} 258 259 void HandleTranslationUnit(ASTContext &Context) override { 260 // Use ExtractAPIVisitor to traverse symbol declarations in the context. 261 Visitor.TraverseDecl(Context.getTranslationUnitDecl()); 262 } 263 264 private: 265 ExtractAPIVisitor<> Visitor; 266 }; 267 268 class ExtractAPIConsumer : public ASTConsumer { 269 public: 270 ExtractAPIConsumer(ASTContext &Context, 271 std::unique_ptr<LocationFileChecker> LCF, APISet &API) 272 : Visitor(*LCF, Context, API), LCF(std::move(LCF)) {} 273 274 void HandleTranslationUnit(ASTContext &Context) override { 275 // Use ExtractAPIVisitor to traverse symbol declarations in the context. 276 Visitor.TraverseDecl(Context.getTranslationUnitDecl()); 277 } 278 279 private: 280 BatchExtractAPIVisitor Visitor; 281 std::unique_ptr<LocationFileChecker> LCF; 282 }; 283 284 class MacroCallback : public PPCallbacks { 285 public: 286 MacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP) 287 : SM(SM), API(API), PP(PP) {} 288 289 void MacroDefined(const Token &MacroNameToken, 290 const MacroDirective *MD) override { 291 auto *MacroInfo = MD->getMacroInfo(); 292 293 if (MacroInfo->isBuiltinMacro()) 294 return; 295 296 auto SourceLoc = MacroNameToken.getLocation(); 297 if (SM.isWrittenInBuiltinFile(SourceLoc) || 298 SM.isWrittenInCommandLineFile(SourceLoc)) 299 return; 300 301 PendingMacros.emplace_back(MacroNameToken, MD); 302 } 303 304 // If a macro gets undefined at some point during preprocessing of the inputs 305 // it means that it isn't an exposed API and we should therefore not add a 306 // macro definition for it. 307 void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD, 308 const MacroDirective *Undef) override { 309 // If this macro wasn't previously defined we don't need to do anything 310 // here. 311 if (!Undef) 312 return; 313 314 llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) { 315 return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP, 316 /*Syntactically*/ false); 317 }); 318 } 319 320 void EndOfMainFile() override { 321 for (auto &PM : PendingMacros) { 322 // `isUsedForHeaderGuard` is only set when the preprocessor leaves the 323 // file so check for it here. 324 if (PM.MD->getMacroInfo()->isUsedForHeaderGuard()) 325 continue; 326 327 if (!shouldMacroBeIncluded(PM)) 328 continue; 329 330 StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName(); 331 PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation()); 332 SmallString<128> USR; 333 index::generateUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM, 334 USR); 335 336 API.createRecord<extractapi::MacroDefinitionRecord>( 337 USR, Name, SymbolReference(), Loc, 338 DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD), 339 DeclarationFragmentsBuilder::getSubHeadingForMacro(Name), 340 SM.isInSystemHeader(PM.MacroNameToken.getLocation())); 341 } 342 343 PendingMacros.clear(); 344 } 345 346 protected: 347 struct PendingMacro { 348 Token MacroNameToken; 349 const MacroDirective *MD; 350 351 PendingMacro(const Token &MacroNameToken, const MacroDirective *MD) 352 : MacroNameToken(MacroNameToken), MD(MD) {} 353 }; 354 355 virtual bool shouldMacroBeIncluded(const PendingMacro &PM) { return true; } 356 357 const SourceManager &SM; 358 APISet &API; 359 Preprocessor &PP; 360 llvm::SmallVector<PendingMacro> PendingMacros; 361 }; 362 363 class APIMacroCallback : public MacroCallback { 364 public: 365 APIMacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP, 366 LocationFileChecker &LCF) 367 : MacroCallback(SM, API, PP), LCF(LCF) {} 368 369 bool shouldMacroBeIncluded(const PendingMacro &PM) override { 370 // Do not include macros from external files 371 return LCF(PM.MacroNameToken.getLocation()); 372 } 373 374 private: 375 LocationFileChecker &LCF; 376 }; 377 378 std::unique_ptr<llvm::raw_pwrite_stream> 379 createAdditionalSymbolGraphFile(CompilerInstance &CI, Twine BaseName) { 380 auto OutputDirectory = CI.getFrontendOpts().SymbolGraphOutputDir; 381 382 SmallString<256> FileName; 383 llvm::sys::path::append(FileName, OutputDirectory, 384 BaseName + ".symbols.json"); 385 return CI.createOutputFile( 386 FileName, /*Binary*/ false, /*RemoveFileOnSignal*/ false, 387 /*UseTemporary*/ true, /*CreateMissingDirectories*/ true); 388 } 389 390 } // namespace 391 392 void ExtractAPIActionBase::ImplEndSourceFileAction(CompilerInstance &CI) { 393 SymbolGraphSerializerOption SerializationOptions; 394 SerializationOptions.Compact = !CI.getFrontendOpts().EmitPrettySymbolGraphs; 395 SerializationOptions.EmitSymbolLabelsForTesting = 396 CI.getFrontendOpts().EmitSymbolGraphSymbolLabelsForTesting; 397 398 if (CI.getFrontendOpts().EmitExtensionSymbolGraphs) { 399 auto ConstructOutputFile = [&CI](Twine BaseName) { 400 return createAdditionalSymbolGraphFile(CI, BaseName); 401 }; 402 403 SymbolGraphSerializer::serializeWithExtensionGraphs( 404 *OS, *API, IgnoresList, ConstructOutputFile, SerializationOptions); 405 } else { 406 SymbolGraphSerializer::serializeMainSymbolGraph(*OS, *API, IgnoresList, 407 SerializationOptions); 408 } 409 410 // Flush the stream and close the main output stream. 411 OS.reset(); 412 } 413 414 std::unique_ptr<ASTConsumer> 415 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { 416 auto ProductName = CI.getFrontendOpts().ProductName; 417 418 if (CI.getFrontendOpts().SymbolGraphOutputDir.empty()) 419 OS = CI.createDefaultOutputFile(/*Binary*/ false, InFile, 420 /*Extension*/ "symbols.json", 421 /*RemoveFileOnSignal*/ false, 422 /*CreateMissingDirectories*/ true); 423 else 424 OS = createAdditionalSymbolGraphFile(CI, ProductName); 425 426 if (!OS) 427 return nullptr; 428 429 // Now that we have enough information about the language options and the 430 // target triple, let's create the APISet before anyone uses it. 431 API = std::make_unique<APISet>( 432 CI.getTarget().getTriple(), 433 CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName); 434 435 auto LCF = std::make_unique<LocationFileChecker>(CI, KnownInputFiles); 436 437 CI.getPreprocessor().addPPCallbacks(std::make_unique<APIMacroCallback>( 438 CI.getSourceManager(), *API, CI.getPreprocessor(), *LCF)); 439 440 // Do not include location in anonymous decls. 441 PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy(); 442 Policy.AnonymousTagLocations = false; 443 CI.getASTContext().setPrintingPolicy(Policy); 444 445 if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) { 446 llvm::handleAllErrors( 447 APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList, 448 CI.getFileManager()) 449 .moveInto(IgnoresList), 450 [&CI](const IgnoresFileNotFound &Err) { 451 CI.getDiagnostics().Report( 452 diag::err_extract_api_ignores_file_not_found) 453 << Err.Path; 454 }); 455 } 456 457 return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(), 458 std::move(LCF), *API); 459 } 460 461 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) { 462 auto &Inputs = CI.getFrontendOpts().Inputs; 463 if (Inputs.empty()) 464 return true; 465 466 if (!CI.hasFileManager()) 467 if (!CI.createFileManager()) 468 return false; 469 470 auto Kind = Inputs[0].getKind(); 471 472 // Convert the header file inputs into a single input buffer. 473 SmallString<256> HeaderContents; 474 bool IsQuoted = false; 475 for (const FrontendInputFile &FIF : Inputs) { 476 if (Kind.isObjectiveC()) 477 HeaderContents += "#import"; 478 else 479 HeaderContents += "#include"; 480 481 StringRef FilePath = FIF.getFile(); 482 if (auto RelativeName = getRelativeIncludeName(CI, FilePath, &IsQuoted)) { 483 if (IsQuoted) 484 HeaderContents += " \""; 485 else 486 HeaderContents += " <"; 487 488 HeaderContents += *RelativeName; 489 490 if (IsQuoted) 491 HeaderContents += "\"\n"; 492 else 493 HeaderContents += ">\n"; 494 KnownInputFiles.emplace_back(static_cast<SmallString<32>>(*RelativeName), 495 IsQuoted); 496 } else { 497 HeaderContents += " \""; 498 HeaderContents += FilePath; 499 HeaderContents += "\"\n"; 500 KnownInputFiles.emplace_back(FilePath, true); 501 } 502 } 503 504 if (CI.getHeaderSearchOpts().Verbose) 505 CI.getVerboseOutputStream() << getInputBufferName() << ":\n" 506 << HeaderContents << "\n"; 507 508 Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents, 509 getInputBufferName()); 510 511 // Set that buffer up as our "real" input in the CompilerInstance. 512 Inputs.clear(); 513 Inputs.emplace_back(Buffer->getMemBufferRef(), Kind, /*IsSystem*/ false); 514 515 return true; 516 } 517 518 void ExtractAPIAction::EndSourceFileAction() { 519 ImplEndSourceFileAction(getCompilerInstance()); 520 } 521 522 std::unique_ptr<ASTConsumer> 523 WrappingExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, 524 StringRef InFile) { 525 auto OtherConsumer = WrapperFrontendAction::CreateASTConsumer(CI, InFile); 526 if (!OtherConsumer) 527 return nullptr; 528 529 CreatedASTConsumer = true; 530 531 ProductName = CI.getFrontendOpts().ProductName; 532 auto InputFilename = llvm::sys::path::filename(InFile); 533 OS = createAdditionalSymbolGraphFile(CI, InputFilename); 534 535 // Now that we have enough information about the language options and the 536 // target triple, let's create the APISet before anyone uses it. 537 API = std::make_unique<APISet>( 538 CI.getTarget().getTriple(), 539 CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName); 540 541 CI.getPreprocessor().addPPCallbacks(std::make_unique<MacroCallback>( 542 CI.getSourceManager(), *API, CI.getPreprocessor())); 543 544 // Do not include location in anonymous decls. 545 PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy(); 546 Policy.AnonymousTagLocations = false; 547 CI.getASTContext().setPrintingPolicy(Policy); 548 549 if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) { 550 llvm::handleAllErrors( 551 APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList, 552 CI.getFileManager()) 553 .moveInto(IgnoresList), 554 [&CI](const IgnoresFileNotFound &Err) { 555 CI.getDiagnostics().Report( 556 diag::err_extract_api_ignores_file_not_found) 557 << Err.Path; 558 }); 559 } 560 561 auto WrappingConsumer = 562 std::make_unique<WrappingExtractAPIConsumer>(CI.getASTContext(), *API); 563 std::vector<std::unique_ptr<ASTConsumer>> Consumers; 564 Consumers.push_back(std::move(OtherConsumer)); 565 Consumers.push_back(std::move(WrappingConsumer)); 566 567 return std::make_unique<MultiplexConsumer>(std::move(Consumers)); 568 } 569 570 void WrappingExtractAPIAction::EndSourceFileAction() { 571 // Invoke wrapped action's method. 572 WrapperFrontendAction::EndSourceFileAction(); 573 574 if (CreatedASTConsumer) { 575 ImplEndSourceFileAction(getCompilerInstance()); 576 } 577 } 578