1 //===- ExtractAPI/ExtractAPIConsumer.cpp ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the ExtractAPIAction, and ASTConsumer to collect API 11 /// information. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/AST/ASTConsumer.h" 16 #include "clang/AST/ASTContext.h" 17 #include "clang/Basic/DiagnosticFrontend.h" 18 #include "clang/Basic/SourceLocation.h" 19 #include "clang/Basic/SourceManager.h" 20 #include "clang/Basic/TargetInfo.h" 21 #include "clang/ExtractAPI/API.h" 22 #include "clang/ExtractAPI/APIIgnoresList.h" 23 #include "clang/ExtractAPI/ExtractAPIVisitor.h" 24 #include "clang/ExtractAPI/FrontendActions.h" 25 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h" 26 #include "clang/Frontend/ASTConsumers.h" 27 #include "clang/Frontend/CompilerInstance.h" 28 #include "clang/Frontend/FrontendOptions.h" 29 #include "clang/Lex/MacroInfo.h" 30 #include "clang/Lex/PPCallbacks.h" 31 #include "clang/Lex/Preprocessor.h" 32 #include "clang/Lex/PreprocessorOptions.h" 33 #include "llvm/ADT/DenseSet.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/SmallVector.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/FileSystem.h" 38 #include "llvm/Support/MemoryBuffer.h" 39 #include "llvm/Support/Path.h" 40 #include "llvm/Support/Regex.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <memory> 43 #include <optional> 44 #include <utility> 45 46 using namespace clang; 47 using namespace extractapi; 48 49 namespace { 50 51 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI, 52 StringRef File, 53 bool *IsQuoted = nullptr) { 54 assert(CI.hasFileManager() && 55 "CompilerInstance does not have a FileNamager!"); 56 57 using namespace llvm::sys; 58 // Matches framework include patterns 59 const llvm::Regex Rule("/(.+)\\.framework/(.+)?Headers/(.+)"); 60 61 const auto &FS = CI.getVirtualFileSystem(); 62 63 SmallString<128> FilePath(File.begin(), File.end()); 64 FS.makeAbsolute(FilePath); 65 path::remove_dots(FilePath, true); 66 FilePath = path::convert_to_slash(FilePath); 67 File = FilePath; 68 69 // Checks whether `Dir` is a strict path prefix of `File`. If so returns 70 // the prefix length. Otherwise return 0. 71 auto CheckDir = [&](llvm::StringRef Dir) -> unsigned { 72 llvm::SmallString<32> DirPath(Dir.begin(), Dir.end()); 73 FS.makeAbsolute(DirPath); 74 path::remove_dots(DirPath, true); 75 Dir = DirPath; 76 for (auto NI = path::begin(File), NE = path::end(File), 77 DI = path::begin(Dir), DE = path::end(Dir); 78 /*termination condition in loop*/; ++NI, ++DI) { 79 // '.' components in File are ignored. 80 while (NI != NE && *NI == ".") 81 ++NI; 82 if (NI == NE) 83 break; 84 85 // '.' components in Dir are ignored. 86 while (DI != DE && *DI == ".") 87 ++DI; 88 89 // Dir is a prefix of File, up to '.' components and choice of path 90 // separators. 91 if (DI == DE) 92 return NI - path::begin(File); 93 94 // Consider all path separators equal. 95 if (NI->size() == 1 && DI->size() == 1 && 96 path::is_separator(NI->front()) && path::is_separator(DI->front())) 97 continue; 98 99 // Special case Apple .sdk folders since the search path is typically a 100 // symlink like `iPhoneSimulator14.5.sdk` while the file is instead 101 // located in `iPhoneSimulator.sdk` (the real folder). 102 if (NI->endswith(".sdk") && DI->endswith(".sdk")) { 103 StringRef NBasename = path::stem(*NI); 104 StringRef DBasename = path::stem(*DI); 105 if (DBasename.startswith(NBasename)) 106 continue; 107 } 108 109 if (*NI != *DI) 110 break; 111 } 112 return 0; 113 }; 114 115 unsigned PrefixLength = 0; 116 117 // Go through the search paths and find the first one that is a prefix of 118 // the header. 119 for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries) { 120 // Note whether the match is found in a quoted entry. 121 if (IsQuoted) 122 *IsQuoted = Entry.Group == frontend::Quoted; 123 124 if (auto EntryFile = CI.getFileManager().getOptionalFileRef(Entry.Path)) { 125 if (auto HMap = HeaderMap::Create(*EntryFile, CI.getFileManager())) { 126 // If this is a headermap entry, try to reverse lookup the full path 127 // for a spelled name before mapping. 128 StringRef SpelledFilename = HMap->reverseLookupFilename(File); 129 if (!SpelledFilename.empty()) 130 return SpelledFilename.str(); 131 132 // No matching mapping in this headermap, try next search entry. 133 continue; 134 } 135 } 136 137 // Entry is a directory search entry, try to check if it's a prefix of File. 138 PrefixLength = CheckDir(Entry.Path); 139 if (PrefixLength > 0) { 140 // The header is found in a framework path, construct the framework-style 141 // include name `<Framework/Header.h>` 142 if (Entry.IsFramework) { 143 SmallVector<StringRef, 4> Matches; 144 Rule.match(File, &Matches); 145 // Returned matches are always in stable order. 146 if (Matches.size() != 4) 147 return std::nullopt; 148 149 return path::convert_to_slash( 150 (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" + 151 Matches[3]) 152 .str()); 153 } 154 155 // The header is found in a normal search path, strip the search path 156 // prefix to get an include name. 157 return path::convert_to_slash(File.drop_front(PrefixLength)); 158 } 159 } 160 161 // Couldn't determine a include name, use full path instead. 162 return std::nullopt; 163 } 164 165 struct LocationFileChecker { 166 bool operator()(SourceLocation Loc) { 167 // If the loc refers to a macro expansion we need to first get the file 168 // location of the expansion. 169 auto &SM = CI.getSourceManager(); 170 auto FileLoc = SM.getFileLoc(Loc); 171 FileID FID = SM.getFileID(FileLoc); 172 if (FID.isInvalid()) 173 return false; 174 175 const auto *File = SM.getFileEntryForID(FID); 176 if (!File) 177 return false; 178 179 if (KnownFileEntries.count(File)) 180 return true; 181 182 if (ExternalFileEntries.count(File)) 183 return false; 184 185 StringRef FileName = File->tryGetRealPathName().empty() 186 ? File->getName() 187 : File->tryGetRealPathName(); 188 189 // Try to reduce the include name the same way we tried to include it. 190 bool IsQuoted = false; 191 if (auto IncludeName = getRelativeIncludeName(CI, FileName, &IsQuoted)) 192 if (llvm::any_of(KnownFiles, 193 [&IsQuoted, &IncludeName](const auto &KnownFile) { 194 return KnownFile.first.equals(*IncludeName) && 195 KnownFile.second == IsQuoted; 196 })) { 197 KnownFileEntries.insert(File); 198 return true; 199 } 200 201 // Record that the file was not found to avoid future reverse lookup for 202 // the same file. 203 ExternalFileEntries.insert(File); 204 return false; 205 } 206 207 LocationFileChecker(const CompilerInstance &CI, 208 SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles) 209 : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() { 210 for (const auto &KnownFile : KnownFiles) 211 if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first)) 212 KnownFileEntries.insert(*FileEntry); 213 } 214 215 private: 216 const CompilerInstance &CI; 217 SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles; 218 llvm::DenseSet<const FileEntry *> KnownFileEntries; 219 llvm::DenseSet<const FileEntry *> ExternalFileEntries; 220 }; 221 222 class ExtractAPIConsumer : public ASTConsumer { 223 public: 224 ExtractAPIConsumer(ASTContext &Context, 225 std::unique_ptr<LocationFileChecker> LCF, APISet &API) 226 : Visitor(Context, *LCF, API), LCF(std::move(LCF)) {} 227 228 void HandleTranslationUnit(ASTContext &Context) override { 229 // Use ExtractAPIVisitor to traverse symbol declarations in the context. 230 Visitor.TraverseDecl(Context.getTranslationUnitDecl()); 231 } 232 233 private: 234 ExtractAPIVisitor Visitor; 235 std::unique_ptr<LocationFileChecker> LCF; 236 }; 237 238 class MacroCallback : public PPCallbacks { 239 public: 240 MacroCallback(const SourceManager &SM, LocationFileChecker &LCF, APISet &API, 241 Preprocessor &PP) 242 : SM(SM), LCF(LCF), API(API), PP(PP) {} 243 244 void MacroDefined(const Token &MacroNameToken, 245 const MacroDirective *MD) override { 246 auto *MacroInfo = MD->getMacroInfo(); 247 248 if (MacroInfo->isBuiltinMacro()) 249 return; 250 251 auto SourceLoc = MacroNameToken.getLocation(); 252 if (SM.isWrittenInBuiltinFile(SourceLoc) || 253 SM.isWrittenInCommandLineFile(SourceLoc)) 254 return; 255 256 PendingMacros.emplace_back(MacroNameToken, MD); 257 } 258 259 // If a macro gets undefined at some point during preprocessing of the inputs 260 // it means that it isn't an exposed API and we should therefore not add a 261 // macro definition for it. 262 void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD, 263 const MacroDirective *Undef) override { 264 // If this macro wasn't previously defined we don't need to do anything 265 // here. 266 if (!Undef) 267 return; 268 269 llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) { 270 return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP, 271 /*Syntactically*/ false); 272 }); 273 } 274 275 void EndOfMainFile() override { 276 for (auto &PM : PendingMacros) { 277 // `isUsedForHeaderGuard` is only set when the preprocessor leaves the 278 // file so check for it here. 279 if (PM.MD->getMacroInfo()->isUsedForHeaderGuard()) 280 continue; 281 282 if (!LCF(PM.MacroNameToken.getLocation())) 283 continue; 284 285 StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName(); 286 PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation()); 287 StringRef USR = 288 API.recordUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM); 289 290 API.addMacroDefinition( 291 Name, USR, Loc, 292 DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD), 293 DeclarationFragmentsBuilder::getSubHeadingForMacro(Name), 294 SM.isInSystemHeader(PM.MacroNameToken.getLocation())); 295 } 296 297 PendingMacros.clear(); 298 } 299 300 private: 301 struct PendingMacro { 302 Token MacroNameToken; 303 const MacroDirective *MD; 304 305 PendingMacro(const Token &MacroNameToken, const MacroDirective *MD) 306 : MacroNameToken(MacroNameToken), MD(MD) {} 307 }; 308 309 const SourceManager &SM; 310 LocationFileChecker &LCF; 311 APISet &API; 312 Preprocessor &PP; 313 llvm::SmallVector<PendingMacro> PendingMacros; 314 }; 315 316 } // namespace 317 318 std::unique_ptr<ASTConsumer> 319 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { 320 OS = CreateOutputFile(CI, InFile); 321 if (!OS) 322 return nullptr; 323 324 auto ProductName = CI.getFrontendOpts().ProductName; 325 326 // Now that we have enough information about the language options and the 327 // target triple, let's create the APISet before anyone uses it. 328 API = std::make_unique<APISet>( 329 CI.getTarget().getTriple(), 330 CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName); 331 332 auto LCF = std::make_unique<LocationFileChecker>(CI, KnownInputFiles); 333 334 CI.getPreprocessor().addPPCallbacks(std::make_unique<MacroCallback>( 335 CI.getSourceManager(), *LCF, *API, CI.getPreprocessor())); 336 337 // Do not include location in anonymous decls. 338 PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy(); 339 Policy.AnonymousTagLocations = false; 340 CI.getASTContext().setPrintingPolicy(Policy); 341 342 if (!CI.getFrontendOpts().ExtractAPIIgnoresFile.empty()) { 343 llvm::handleAllErrors( 344 APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFile, 345 CI.getFileManager()) 346 .moveInto(IgnoresList), 347 [&CI](const IgnoresFileNotFound &Err) { 348 CI.getDiagnostics().Report( 349 diag::err_extract_api_ignores_file_not_found) 350 << Err.Path; 351 }); 352 } 353 354 return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(), 355 std::move(LCF), *API); 356 } 357 358 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) { 359 auto &Inputs = CI.getFrontendOpts().Inputs; 360 if (Inputs.empty()) 361 return true; 362 363 if (!CI.hasFileManager()) 364 if (!CI.createFileManager()) 365 return false; 366 367 auto Kind = Inputs[0].getKind(); 368 369 // Convert the header file inputs into a single input buffer. 370 SmallString<256> HeaderContents; 371 bool IsQuoted = false; 372 for (const FrontendInputFile &FIF : Inputs) { 373 if (Kind.isObjectiveC()) 374 HeaderContents += "#import"; 375 else 376 HeaderContents += "#include"; 377 378 StringRef FilePath = FIF.getFile(); 379 if (auto RelativeName = getRelativeIncludeName(CI, FilePath, &IsQuoted)) { 380 if (IsQuoted) 381 HeaderContents += " \""; 382 else 383 HeaderContents += " <"; 384 385 HeaderContents += *RelativeName; 386 387 if (IsQuoted) 388 HeaderContents += "\"\n"; 389 else 390 HeaderContents += ">\n"; 391 KnownInputFiles.emplace_back(static_cast<SmallString<32>>(*RelativeName), 392 IsQuoted); 393 } else { 394 HeaderContents += " \""; 395 HeaderContents += FilePath; 396 HeaderContents += "\"\n"; 397 KnownInputFiles.emplace_back(FilePath, true); 398 } 399 } 400 401 if (CI.getHeaderSearchOpts().Verbose) 402 CI.getVerboseOutputStream() << getInputBufferName() << ":\n" 403 << HeaderContents << "\n"; 404 405 Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents, 406 getInputBufferName()); 407 408 // Set that buffer up as our "real" input in the CompilerInstance. 409 Inputs.clear(); 410 Inputs.emplace_back(Buffer->getMemBufferRef(), Kind, /*IsSystem*/ false); 411 412 return true; 413 } 414 415 void ExtractAPIAction::EndSourceFileAction() { 416 if (!OS) 417 return; 418 419 // Setup a SymbolGraphSerializer to write out collected API information in 420 // the Symbol Graph format. 421 // FIXME: Make the kind of APISerializer configurable. 422 SymbolGraphSerializer SGSerializer(*API, IgnoresList); 423 SGSerializer.serialize(*OS); 424 OS.reset(); 425 } 426 427 std::unique_ptr<raw_pwrite_stream> 428 ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) { 429 std::unique_ptr<raw_pwrite_stream> OS = 430 CI.createDefaultOutputFile(/*Binary=*/false, InFile, /*Extension=*/"json", 431 /*RemoveFileOnSignal=*/false); 432 if (!OS) 433 return nullptr; 434 return OS; 435 } 436