xref: /freebsd/contrib/llvm-project/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp (revision 7ebc7d1ab76b9d06be9400d6c9fc74fcc43603a1)
1 //===- ExtractAPI/ExtractAPIConsumer.cpp ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the ExtractAPIAction, and ASTConsumer to collect API
11 /// information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/AST/ASTConcept.h"
16 #include "clang/AST/ASTConsumer.h"
17 #include "clang/AST/ASTContext.h"
18 #include "clang/AST/DeclObjC.h"
19 #include "clang/Basic/DiagnosticFrontend.h"
20 #include "clang/Basic/FileEntry.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Basic/TargetInfo.h"
24 #include "clang/ExtractAPI/API.h"
25 #include "clang/ExtractAPI/APIIgnoresList.h"
26 #include "clang/ExtractAPI/ExtractAPIVisitor.h"
27 #include "clang/ExtractAPI/FrontendActions.h"
28 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h"
29 #include "clang/Frontend/ASTConsumers.h"
30 #include "clang/Frontend/CompilerInstance.h"
31 #include "clang/Frontend/FrontendOptions.h"
32 #include "clang/Frontend/MultiplexConsumer.h"
33 #include "clang/Index/USRGeneration.h"
34 #include "clang/InstallAPI/HeaderFile.h"
35 #include "clang/Lex/MacroInfo.h"
36 #include "clang/Lex/PPCallbacks.h"
37 #include "clang/Lex/Preprocessor.h"
38 #include "clang/Lex/PreprocessorOptions.h"
39 #include "llvm/ADT/DenseSet.h"
40 #include "llvm/ADT/STLExtras.h"
41 #include "llvm/ADT/SmallString.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/Error.h"
46 #include "llvm/Support/FileSystem.h"
47 #include "llvm/Support/MemoryBuffer.h"
48 #include "llvm/Support/Path.h"
49 #include "llvm/Support/Regex.h"
50 #include "llvm/Support/raw_ostream.h"
51 #include <memory>
52 #include <optional>
53 #include <utility>
54 
55 using namespace clang;
56 using namespace extractapi;
57 
58 namespace {
59 
60 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI,
61                                                   StringRef File,
62                                                   bool *IsQuoted = nullptr) {
63   assert(CI.hasFileManager() &&
64          "CompilerInstance does not have a FileNamager!");
65 
66   using namespace llvm::sys;
67   const auto &FS = CI.getVirtualFileSystem();
68 
69   SmallString<128> FilePath(File.begin(), File.end());
70   FS.makeAbsolute(FilePath);
71   path::remove_dots(FilePath, true);
72   FilePath = path::convert_to_slash(FilePath);
73   File = FilePath;
74 
75   // Checks whether `Dir` is a strict path prefix of `File`. If so returns
76   // the prefix length. Otherwise return 0.
77   auto CheckDir = [&](llvm::StringRef Dir) -> unsigned {
78     llvm::SmallString<32> DirPath(Dir.begin(), Dir.end());
79     FS.makeAbsolute(DirPath);
80     path::remove_dots(DirPath, true);
81     Dir = DirPath;
82     for (auto NI = path::begin(File), NE = path::end(File),
83               DI = path::begin(Dir), DE = path::end(Dir);
84          /*termination condition in loop*/; ++NI, ++DI) {
85       // '.' components in File are ignored.
86       while (NI != NE && *NI == ".")
87         ++NI;
88       if (NI == NE)
89         break;
90 
91       // '.' components in Dir are ignored.
92       while (DI != DE && *DI == ".")
93         ++DI;
94 
95       // Dir is a prefix of File, up to '.' components and choice of path
96       // separators.
97       if (DI == DE)
98         return NI - path::begin(File);
99 
100       // Consider all path separators equal.
101       if (NI->size() == 1 && DI->size() == 1 &&
102           path::is_separator(NI->front()) && path::is_separator(DI->front()))
103         continue;
104 
105       // Special case Apple .sdk folders since the search path is typically a
106       // symlink like `iPhoneSimulator14.5.sdk` while the file is instead
107       // located in `iPhoneSimulator.sdk` (the real folder).
108       if (NI->ends_with(".sdk") && DI->ends_with(".sdk")) {
109         StringRef NBasename = path::stem(*NI);
110         StringRef DBasename = path::stem(*DI);
111         if (DBasename.starts_with(NBasename))
112           continue;
113       }
114 
115       if (*NI != *DI)
116         break;
117     }
118     return 0;
119   };
120 
121   unsigned PrefixLength = 0;
122 
123   // Go through the search paths and find the first one that is a prefix of
124   // the header.
125   for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries) {
126     // Note whether the match is found in a quoted entry.
127     if (IsQuoted)
128       *IsQuoted = Entry.Group == frontend::Quoted;
129 
130     if (auto EntryFile = CI.getFileManager().getOptionalFileRef(Entry.Path)) {
131       if (auto HMap = HeaderMap::Create(*EntryFile, CI.getFileManager())) {
132         // If this is a headermap entry, try to reverse lookup the full path
133         // for a spelled name before mapping.
134         StringRef SpelledFilename = HMap->reverseLookupFilename(File);
135         if (!SpelledFilename.empty())
136           return SpelledFilename.str();
137 
138         // No matching mapping in this headermap, try next search entry.
139         continue;
140       }
141     }
142 
143     // Entry is a directory search entry, try to check if it's a prefix of File.
144     PrefixLength = CheckDir(Entry.Path);
145     if (PrefixLength > 0) {
146       // The header is found in a framework path, construct the framework-style
147       // include name `<Framework/Header.h>`
148       if (Entry.IsFramework) {
149         SmallVector<StringRef, 4> Matches;
150         clang::installapi::HeaderFile::getFrameworkIncludeRule().match(
151             File, &Matches);
152         // Returned matches are always in stable order.
153         if (Matches.size() != 4)
154           return std::nullopt;
155 
156         return path::convert_to_slash(
157             (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" +
158              Matches[3])
159                 .str());
160       }
161 
162       // The header is found in a normal search path, strip the search path
163       // prefix to get an include name.
164       return path::convert_to_slash(File.drop_front(PrefixLength));
165     }
166   }
167 
168   // Couldn't determine a include name, use full path instead.
169   return std::nullopt;
170 }
171 
172 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI,
173                                                   FileEntryRef FE,
174                                                   bool *IsQuoted = nullptr) {
175   return getRelativeIncludeName(CI, FE.getNameAsRequested(), IsQuoted);
176 }
177 
178 struct LocationFileChecker {
179   bool operator()(SourceLocation Loc) {
180     // If the loc refers to a macro expansion we need to first get the file
181     // location of the expansion.
182     auto &SM = CI.getSourceManager();
183     auto FileLoc = SM.getFileLoc(Loc);
184     FileID FID = SM.getFileID(FileLoc);
185     if (FID.isInvalid())
186       return false;
187 
188     OptionalFileEntryRef File = SM.getFileEntryRefForID(FID);
189     if (!File)
190       return false;
191 
192     if (KnownFileEntries.count(*File))
193       return true;
194 
195     if (ExternalFileEntries.count(*File))
196       return false;
197 
198     // Try to reduce the include name the same way we tried to include it.
199     bool IsQuoted = false;
200     if (auto IncludeName = getRelativeIncludeName(CI, *File, &IsQuoted))
201       if (llvm::any_of(KnownFiles,
202                        [&IsQuoted, &IncludeName](const auto &KnownFile) {
203                          return KnownFile.first.equals(*IncludeName) &&
204                                 KnownFile.second == IsQuoted;
205                        })) {
206         KnownFileEntries.insert(*File);
207         return true;
208       }
209 
210     // Record that the file was not found to avoid future reverse lookup for
211     // the same file.
212     ExternalFileEntries.insert(*File);
213     return false;
214   }
215 
216   LocationFileChecker(const CompilerInstance &CI,
217                       SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles)
218       : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() {
219     for (const auto &KnownFile : KnownFiles)
220       if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first))
221         KnownFileEntries.insert(*FileEntry);
222   }
223 
224 private:
225   const CompilerInstance &CI;
226   SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles;
227   llvm::DenseSet<const FileEntry *> KnownFileEntries;
228   llvm::DenseSet<const FileEntry *> ExternalFileEntries;
229 };
230 
231 struct BatchExtractAPIVisitor : ExtractAPIVisitor<BatchExtractAPIVisitor> {
232   bool shouldDeclBeIncluded(const Decl *D) const {
233     bool ShouldBeIncluded = true;
234     // Check that we have the definition for redeclarable types.
235     if (auto *TD = llvm::dyn_cast<TagDecl>(D))
236       ShouldBeIncluded = TD->isThisDeclarationADefinition();
237     else if (auto *Interface = llvm::dyn_cast<ObjCInterfaceDecl>(D))
238       ShouldBeIncluded = Interface->isThisDeclarationADefinition();
239     else if (auto *Protocol = llvm::dyn_cast<ObjCProtocolDecl>(D))
240       ShouldBeIncluded = Protocol->isThisDeclarationADefinition();
241 
242     ShouldBeIncluded = ShouldBeIncluded && LCF(D->getLocation());
243     return ShouldBeIncluded;
244   }
245 
246   BatchExtractAPIVisitor(LocationFileChecker &LCF, ASTContext &Context,
247                          APISet &API)
248       : ExtractAPIVisitor<BatchExtractAPIVisitor>(Context, API), LCF(LCF) {}
249 
250 private:
251   LocationFileChecker &LCF;
252 };
253 
254 class WrappingExtractAPIConsumer : public ASTConsumer {
255 public:
256   WrappingExtractAPIConsumer(ASTContext &Context, APISet &API)
257       : Visitor(Context, API) {}
258 
259   void HandleTranslationUnit(ASTContext &Context) override {
260     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
261     Visitor.TraverseDecl(Context.getTranslationUnitDecl());
262   }
263 
264 private:
265   ExtractAPIVisitor<> Visitor;
266 };
267 
268 class ExtractAPIConsumer : public ASTConsumer {
269 public:
270   ExtractAPIConsumer(ASTContext &Context,
271                      std::unique_ptr<LocationFileChecker> LCF, APISet &API)
272       : Visitor(*LCF, Context, API), LCF(std::move(LCF)) {}
273 
274   void HandleTranslationUnit(ASTContext &Context) override {
275     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
276     Visitor.TraverseDecl(Context.getTranslationUnitDecl());
277   }
278 
279 private:
280   BatchExtractAPIVisitor Visitor;
281   std::unique_ptr<LocationFileChecker> LCF;
282 };
283 
284 class MacroCallback : public PPCallbacks {
285 public:
286   MacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP)
287       : SM(SM), API(API), PP(PP) {}
288 
289   void MacroDefined(const Token &MacroNameToken,
290                     const MacroDirective *MD) override {
291     auto *MacroInfo = MD->getMacroInfo();
292 
293     if (MacroInfo->isBuiltinMacro())
294       return;
295 
296     auto SourceLoc = MacroNameToken.getLocation();
297     if (SM.isWrittenInBuiltinFile(SourceLoc) ||
298         SM.isWrittenInCommandLineFile(SourceLoc))
299       return;
300 
301     PendingMacros.emplace_back(MacroNameToken, MD);
302   }
303 
304   // If a macro gets undefined at some point during preprocessing of the inputs
305   // it means that it isn't an exposed API and we should therefore not add a
306   // macro definition for it.
307   void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD,
308                       const MacroDirective *Undef) override {
309     // If this macro wasn't previously defined we don't need to do anything
310     // here.
311     if (!Undef)
312       return;
313 
314     llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) {
315       return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP,
316                                               /*Syntactically*/ false);
317     });
318   }
319 
320   void EndOfMainFile() override {
321     for (auto &PM : PendingMacros) {
322       // `isUsedForHeaderGuard` is only set when the preprocessor leaves the
323       // file so check for it here.
324       if (PM.MD->getMacroInfo()->isUsedForHeaderGuard())
325         continue;
326 
327       if (!shouldMacroBeIncluded(PM))
328         continue;
329 
330       StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName();
331       PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation());
332       SmallString<128> USR;
333       index::generateUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM,
334                                  USR);
335 
336       API.createRecord<extractapi::MacroDefinitionRecord>(
337           USR, Name, SymbolReference(), Loc,
338           DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD),
339           DeclarationFragmentsBuilder::getSubHeadingForMacro(Name),
340           SM.isInSystemHeader(PM.MacroNameToken.getLocation()));
341     }
342 
343     PendingMacros.clear();
344   }
345 
346 protected:
347   struct PendingMacro {
348     Token MacroNameToken;
349     const MacroDirective *MD;
350 
351     PendingMacro(const Token &MacroNameToken, const MacroDirective *MD)
352         : MacroNameToken(MacroNameToken), MD(MD) {}
353   };
354 
355   virtual bool shouldMacroBeIncluded(const PendingMacro &PM) { return true; }
356 
357   const SourceManager &SM;
358   APISet &API;
359   Preprocessor &PP;
360   llvm::SmallVector<PendingMacro> PendingMacros;
361 };
362 
363 class APIMacroCallback : public MacroCallback {
364 public:
365   APIMacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP,
366                    LocationFileChecker &LCF)
367       : MacroCallback(SM, API, PP), LCF(LCF) {}
368 
369   bool shouldMacroBeIncluded(const PendingMacro &PM) override {
370     // Do not include macros from external files
371     return LCF(PM.MacroNameToken.getLocation());
372   }
373 
374 private:
375   LocationFileChecker &LCF;
376 };
377 
378 std::unique_ptr<llvm::raw_pwrite_stream>
379 createAdditionalSymbolGraphFile(CompilerInstance &CI, Twine BaseName) {
380   auto OutputDirectory = CI.getFrontendOpts().SymbolGraphOutputDir;
381 
382   SmallString<256> FileName;
383   llvm::sys::path::append(FileName, OutputDirectory,
384                           BaseName + ".symbols.json");
385   return CI.createOutputFile(
386       FileName, /*Binary*/ false, /*RemoveFileOnSignal*/ false,
387       /*UseTemporary*/ true, /*CreateMissingDirectories*/ true);
388 }
389 
390 } // namespace
391 
392 void ExtractAPIActionBase::ImplEndSourceFileAction(CompilerInstance &CI) {
393   SymbolGraphSerializerOption SerializationOptions;
394   SerializationOptions.Compact = !CI.getFrontendOpts().EmitPrettySymbolGraphs;
395   SerializationOptions.EmitSymbolLabelsForTesting =
396       CI.getFrontendOpts().EmitSymbolGraphSymbolLabelsForTesting;
397 
398   if (CI.getFrontendOpts().EmitExtensionSymbolGraphs) {
399     auto ConstructOutputFile = [&CI](Twine BaseName) {
400       return createAdditionalSymbolGraphFile(CI, BaseName);
401     };
402 
403     SymbolGraphSerializer::serializeWithExtensionGraphs(
404         *OS, *API, IgnoresList, ConstructOutputFile, SerializationOptions);
405   } else {
406     SymbolGraphSerializer::serializeMainSymbolGraph(*OS, *API, IgnoresList,
407                                                     SerializationOptions);
408   }
409 
410   // Flush the stream and close the main output stream.
411   OS.reset();
412 }
413 
414 std::unique_ptr<ASTConsumer>
415 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
416   auto ProductName = CI.getFrontendOpts().ProductName;
417 
418   if (CI.getFrontendOpts().SymbolGraphOutputDir.empty())
419     OS = CI.createDefaultOutputFile(/*Binary*/ false, InFile,
420                                     /*Extension*/ "symbols.json",
421                                     /*RemoveFileOnSignal*/ false,
422                                     /*CreateMissingDirectories*/ true);
423   else
424     OS = createAdditionalSymbolGraphFile(CI, ProductName);
425 
426   if (!OS)
427     return nullptr;
428 
429   // Now that we have enough information about the language options and the
430   // target triple, let's create the APISet before anyone uses it.
431   API = std::make_unique<APISet>(
432       CI.getTarget().getTriple(),
433       CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName);
434 
435   auto LCF = std::make_unique<LocationFileChecker>(CI, KnownInputFiles);
436 
437   CI.getPreprocessor().addPPCallbacks(std::make_unique<APIMacroCallback>(
438       CI.getSourceManager(), *API, CI.getPreprocessor(), *LCF));
439 
440   // Do not include location in anonymous decls.
441   PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy();
442   Policy.AnonymousTagLocations = false;
443   CI.getASTContext().setPrintingPolicy(Policy);
444 
445   if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) {
446     llvm::handleAllErrors(
447         APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList,
448                                CI.getFileManager())
449             .moveInto(IgnoresList),
450         [&CI](const IgnoresFileNotFound &Err) {
451           CI.getDiagnostics().Report(
452               diag::err_extract_api_ignores_file_not_found)
453               << Err.Path;
454         });
455   }
456 
457   return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(),
458                                               std::move(LCF), *API);
459 }
460 
461 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) {
462   auto &Inputs = CI.getFrontendOpts().Inputs;
463   if (Inputs.empty())
464     return true;
465 
466   if (!CI.hasFileManager())
467     if (!CI.createFileManager())
468       return false;
469 
470   auto Kind = Inputs[0].getKind();
471 
472   // Convert the header file inputs into a single input buffer.
473   SmallString<256> HeaderContents;
474   bool IsQuoted = false;
475   for (const FrontendInputFile &FIF : Inputs) {
476     if (Kind.isObjectiveC())
477       HeaderContents += "#import";
478     else
479       HeaderContents += "#include";
480 
481     StringRef FilePath = FIF.getFile();
482     if (auto RelativeName = getRelativeIncludeName(CI, FilePath, &IsQuoted)) {
483       if (IsQuoted)
484         HeaderContents += " \"";
485       else
486         HeaderContents += " <";
487 
488       HeaderContents += *RelativeName;
489 
490       if (IsQuoted)
491         HeaderContents += "\"\n";
492       else
493         HeaderContents += ">\n";
494       KnownInputFiles.emplace_back(static_cast<SmallString<32>>(*RelativeName),
495                                    IsQuoted);
496     } else {
497       HeaderContents += " \"";
498       HeaderContents += FilePath;
499       HeaderContents += "\"\n";
500       KnownInputFiles.emplace_back(FilePath, true);
501     }
502   }
503 
504   if (CI.getHeaderSearchOpts().Verbose)
505     CI.getVerboseOutputStream() << getInputBufferName() << ":\n"
506                                 << HeaderContents << "\n";
507 
508   Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents,
509                                                 getInputBufferName());
510 
511   // Set that buffer up as our "real" input in the CompilerInstance.
512   Inputs.clear();
513   Inputs.emplace_back(Buffer->getMemBufferRef(), Kind, /*IsSystem*/ false);
514 
515   return true;
516 }
517 
518 void ExtractAPIAction::EndSourceFileAction() {
519   ImplEndSourceFileAction(getCompilerInstance());
520 }
521 
522 std::unique_ptr<ASTConsumer>
523 WrappingExtractAPIAction::CreateASTConsumer(CompilerInstance &CI,
524                                             StringRef InFile) {
525   auto OtherConsumer = WrapperFrontendAction::CreateASTConsumer(CI, InFile);
526   if (!OtherConsumer)
527     return nullptr;
528 
529   CreatedASTConsumer = true;
530 
531   ProductName = CI.getFrontendOpts().ProductName;
532   auto InputFilename = llvm::sys::path::filename(InFile);
533   OS = createAdditionalSymbolGraphFile(CI, InputFilename);
534 
535   // Now that we have enough information about the language options and the
536   // target triple, let's create the APISet before anyone uses it.
537   API = std::make_unique<APISet>(
538       CI.getTarget().getTriple(),
539       CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName);
540 
541   CI.getPreprocessor().addPPCallbacks(std::make_unique<MacroCallback>(
542       CI.getSourceManager(), *API, CI.getPreprocessor()));
543 
544   // Do not include location in anonymous decls.
545   PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy();
546   Policy.AnonymousTagLocations = false;
547   CI.getASTContext().setPrintingPolicy(Policy);
548 
549   if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) {
550     llvm::handleAllErrors(
551         APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList,
552                                CI.getFileManager())
553             .moveInto(IgnoresList),
554         [&CI](const IgnoresFileNotFound &Err) {
555           CI.getDiagnostics().Report(
556               diag::err_extract_api_ignores_file_not_found)
557               << Err.Path;
558         });
559   }
560 
561   auto WrappingConsumer =
562       std::make_unique<WrappingExtractAPIConsumer>(CI.getASTContext(), *API);
563   std::vector<std::unique_ptr<ASTConsumer>> Consumers;
564   Consumers.push_back(std::move(OtherConsumer));
565   Consumers.push_back(std::move(WrappingConsumer));
566 
567   return std::make_unique<MultiplexConsumer>(std::move(Consumers));
568 }
569 
570 void WrappingExtractAPIAction::EndSourceFileAction() {
571   // Invoke wrapped action's method.
572   WrapperFrontendAction::EndSourceFileAction();
573 
574   if (CreatedASTConsumer) {
575     ImplEndSourceFileAction(getCompilerInstance());
576   }
577 }
578