xref: /freebsd/contrib/llvm-project/clang/lib/Basic/Sarif.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  ///
9  /// \file
10  /// This file contains the declaration of the SARIFDocumentWriter class, and
11  /// associated builders such as:
12  /// - \ref SarifArtifact
13  /// - \ref SarifArtifactLocation
14  /// - \ref SarifRule
15  /// - \ref SarifResult
16  //===----------------------------------------------------------------------===//
17  #include "clang/Basic/Sarif.h"
18  #include "clang/Basic/SourceLocation.h"
19  #include "clang/Basic/SourceManager.h"
20  #include "llvm/ADT/ArrayRef.h"
21  #include "llvm/ADT/STLExtras.h"
22  #include "llvm/ADT/StringExtras.h"
23  #include "llvm/ADT/StringRef.h"
24  #include "llvm/Support/ConvertUTF.h"
25  #include "llvm/Support/JSON.h"
26  #include "llvm/Support/Path.h"
27  
28  #include <optional>
29  #include <string>
30  #include <utility>
31  
32  using namespace clang;
33  using namespace llvm;
34  
35  using clang::detail::SarifArtifact;
36  using clang::detail::SarifArtifactLocation;
37  
getFileName(FileEntryRef FE)38  static StringRef getFileName(FileEntryRef FE) {
39    StringRef Filename = FE.getFileEntry().tryGetRealPathName();
40    if (Filename.empty())
41      Filename = FE.getName();
42    return Filename;
43  }
44  /// \name URI
45  /// @{
46  
47  /// \internal
48  /// \brief
49  /// Return the RFC3986 encoding of the input character.
50  ///
51  /// \param C Character to encode to RFC3986.
52  ///
53  /// \return The RFC3986 representation of \c C.
percentEncodeURICharacter(char C)54  static std::string percentEncodeURICharacter(char C) {
55    // RFC 3986 claims alpha, numeric, and this handful of
56    // characters are not reserved for the path component and
57    // should be written out directly. Otherwise, percent
58    // encode the character and write that out instead of the
59    // reserved character.
60    if (llvm::isAlnum(C) || StringRef("-._~:@!$&'()*+,;=").contains(C))
61      return std::string(&C, 1);
62    return "%" + llvm::toHex(StringRef(&C, 1));
63  }
64  
65  /// \internal
66  /// \brief Return a URI representing the given file name.
67  ///
68  /// \param Filename The filename to be represented as URI.
69  ///
70  /// \return RFC3986 URI representing the input file name.
fileNameToURI(StringRef Filename)71  static std::string fileNameToURI(StringRef Filename) {
72    SmallString<32> Ret = StringRef("file://");
73  
74    // Get the root name to see if it has a URI authority.
75    StringRef Root = sys::path::root_name(Filename);
76    if (Root.starts_with("//")) {
77      // There is an authority, so add it to the URI.
78      Ret += Root.drop_front(2).str();
79    } else if (!Root.empty()) {
80      // There is no authority, so end the component and add the root to the URI.
81      Ret += Twine("/" + Root).str();
82    }
83  
84    auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
85    assert(Iter != End && "Expected there to be a non-root path component.");
86    // Add the rest of the path components, encoding any reserved characters;
87    // we skip past the first path component, as it was handled it above.
88    for (StringRef Component : llvm::make_range(++Iter, End)) {
89      // For reasons unknown to me, we may get a backslash with Windows native
90      // paths for the initial backslash following the drive component, which
91      // we need to ignore as a URI path part.
92      if (Component == "\\")
93        continue;
94  
95      // Add the separator between the previous path part and the one being
96      // currently processed.
97      Ret += "/";
98  
99      // URI encode the part.
100      for (char C : Component) {
101        Ret += percentEncodeURICharacter(C);
102      }
103    }
104  
105    return std::string(Ret);
106  }
107  ///  @}
108  
109  /// \brief Calculate the column position expressed in the number of UTF-8 code
110  /// points from column start to the source location
111  ///
112  /// \param Loc The source location whose column needs to be calculated.
113  /// \param TokenLen Optional hint for when the token is multiple bytes long.
114  ///
115  /// \return The column number as a UTF-8 aware byte offset from column start to
116  /// the effective source location.
adjustColumnPos(FullSourceLoc Loc,unsigned int TokenLen=0)117  static unsigned int adjustColumnPos(FullSourceLoc Loc,
118                                      unsigned int TokenLen = 0) {
119    assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
120  
121    std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc();
122    std::optional<MemoryBufferRef> Buf =
123        Loc.getManager().getBufferOrNone(LocInfo.first);
124    assert(Buf && "got an invalid buffer for the location's file");
125    assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
126           "token extends past end of buffer?");
127  
128    // Adjust the offset to be the start of the line, since we'll be counting
129    // Unicode characters from there until our column offset.
130    unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
131    unsigned int Ret = 1;
132    while (Off < (LocInfo.second + TokenLen)) {
133      Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
134      Ret++;
135    }
136  
137    return Ret;
138  }
139  
140  /// \name SARIF Utilities
141  /// @{
142  
143  /// \internal
createMessage(StringRef Text)144  json::Object createMessage(StringRef Text) {
145    return json::Object{{"text", Text.str()}};
146  }
147  
148  /// \internal
149  /// \pre CharSourceRange must be a token range
createTextRegion(const SourceManager & SM,const CharSourceRange & R)150  static json::Object createTextRegion(const SourceManager &SM,
151                                       const CharSourceRange &R) {
152    FullSourceLoc BeginCharLoc{R.getBegin(), SM};
153    FullSourceLoc EndCharLoc{R.getEnd(), SM};
154    json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()},
155                        {"startColumn", adjustColumnPos(BeginCharLoc)}};
156  
157    if (BeginCharLoc == EndCharLoc) {
158      Region["endColumn"] = adjustColumnPos(BeginCharLoc);
159    } else {
160      Region["endLine"] = EndCharLoc.getExpansionLineNumber();
161      Region["endColumn"] = adjustColumnPos(EndCharLoc);
162    }
163    return Region;
164  }
165  
createLocation(json::Object && PhysicalLocation,StringRef Message="")166  static json::Object createLocation(json::Object &&PhysicalLocation,
167                                     StringRef Message = "") {
168    json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
169    if (!Message.empty())
170      Ret.insert({"message", createMessage(Message)});
171    return Ret;
172  }
173  
importanceToStr(ThreadFlowImportance I)174  static StringRef importanceToStr(ThreadFlowImportance I) {
175    switch (I) {
176    case ThreadFlowImportance::Important:
177      return "important";
178    case ThreadFlowImportance::Essential:
179      return "essential";
180    case ThreadFlowImportance::Unimportant:
181      return "unimportant";
182    }
183    llvm_unreachable("Fully covered switch is not so fully covered");
184  }
185  
resultLevelToStr(SarifResultLevel R)186  static StringRef resultLevelToStr(SarifResultLevel R) {
187    switch (R) {
188    case SarifResultLevel::None:
189      return "none";
190    case SarifResultLevel::Note:
191      return "note";
192    case SarifResultLevel::Warning:
193      return "warning";
194    case SarifResultLevel::Error:
195      return "error";
196    }
197    llvm_unreachable("Potentially un-handled SarifResultLevel. "
198                     "Is the switch not fully covered?");
199  }
200  
201  static json::Object
createThreadFlowLocation(json::Object && Location,const ThreadFlowImportance & Importance)202  createThreadFlowLocation(json::Object &&Location,
203                           const ThreadFlowImportance &Importance) {
204    return json::Object{{"location", std::move(Location)},
205                        {"importance", importanceToStr(Importance)}};
206  }
207  ///  @}
208  
209  json::Object
createPhysicalLocation(const CharSourceRange & R)210  SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
211    assert(R.isValid() &&
212           "Cannot create a physicalLocation from invalid SourceRange!");
213    assert(R.isCharRange() &&
214           "Cannot create a physicalLocation from a token range!");
215    FullSourceLoc Start{R.getBegin(), SourceMgr};
216    OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef();
217    assert(FE && "Diagnostic does not exist within a valid file!");
218  
219    const std::string &FileURI = fileNameToURI(getFileName(*FE));
220    auto I = CurrentArtifacts.find(FileURI);
221  
222    if (I == CurrentArtifacts.end()) {
223      uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
224      const SarifArtifactLocation &Location =
225          SarifArtifactLocation::create(FileURI).setIndex(Idx);
226      const SarifArtifact &Artifact = SarifArtifact::create(Location)
227                                          .setRoles({"resultFile"})
228                                          .setLength(FE->getSize())
229                                          .setMimeType("text/plain");
230      auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});
231      // If inserted, ensure the original iterator points to the newly inserted
232      // element, so it can be used downstream.
233      if (StatusIter.second)
234        I = StatusIter.first;
235    }
236    assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
237    const SarifArtifactLocation &Location = I->second.Location;
238    json::Object ArtifactLocationObject{{"uri", Location.URI}};
239    if (Location.Index.has_value())
240      ArtifactLocationObject["index"] = *Location.Index;
241    return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)},
242                         {"region", createTextRegion(SourceMgr, R)}}};
243  }
244  
getCurrentTool()245  json::Object &SarifDocumentWriter::getCurrentTool() {
246    assert(!Closed && "SARIF Document is closed. "
247                      "Need to call createRun() before using getcurrentTool!");
248  
249    // Since Closed = false here, expect there to be at least 1 Run, anything
250    // else is an invalid state.
251    assert(!Runs.empty() && "There are no runs associated with the document!");
252  
253    return *Runs.back().getAsObject()->get("tool")->getAsObject();
254  }
255  
reset()256  void SarifDocumentWriter::reset() {
257    CurrentRules.clear();
258    CurrentArtifacts.clear();
259  }
260  
endRun()261  void SarifDocumentWriter::endRun() {
262    // Exit early if trying to close a closed Document.
263    if (Closed) {
264      reset();
265      return;
266    }
267  
268    // Since Closed = false here, expect there to be at least 1 Run, anything
269    // else is an invalid state.
270    assert(!Runs.empty() && "There are no runs associated with the document!");
271  
272    // Flush all the rules.
273    json::Object &Tool = getCurrentTool();
274    json::Array Rules;
275    for (const SarifRule &R : CurrentRules) {
276      json::Object Config{
277          {"enabled", R.DefaultConfiguration.Enabled},
278          {"level", resultLevelToStr(R.DefaultConfiguration.Level)},
279          {"rank", R.DefaultConfiguration.Rank}};
280      json::Object Rule{
281          {"name", R.Name},
282          {"id", R.Id},
283          {"fullDescription", json::Object{{"text", R.Description}}},
284          {"defaultConfiguration", std::move(Config)}};
285      if (!R.HelpURI.empty())
286        Rule["helpUri"] = R.HelpURI;
287      Rules.emplace_back(std::move(Rule));
288    }
289    json::Object &Driver = *Tool.getObject("driver");
290    Driver["rules"] = std::move(Rules);
291  
292    // Flush all the artifacts.
293    json::Object &Run = getCurrentRun();
294    json::Array *Artifacts = Run.getArray("artifacts");
295    SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec;
296    for (const auto &[K, V] : CurrentArtifacts)
297      Vec.emplace_back(K, V);
298    llvm::sort(Vec, llvm::less_first());
299    for (const auto &[_, A] : Vec) {
300      json::Object Loc{{"uri", A.Location.URI}};
301      if (A.Location.Index.has_value()) {
302        Loc["index"] = static_cast<int64_t>(*A.Location.Index);
303      }
304      json::Object Artifact;
305      Artifact["location"] = std::move(Loc);
306      if (A.Length.has_value())
307        Artifact["length"] = static_cast<int64_t>(*A.Length);
308      if (!A.Roles.empty())
309        Artifact["roles"] = json::Array(A.Roles);
310      if (!A.MimeType.empty())
311        Artifact["mimeType"] = A.MimeType;
312      if (A.Offset.has_value())
313        Artifact["offset"] = *A.Offset;
314      Artifacts->push_back(json::Value(std::move(Artifact)));
315    }
316  
317    // Clear, reset temporaries before next run.
318    reset();
319  
320    // Mark the document as closed.
321    Closed = true;
322  }
323  
324  json::Array
createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows)325  SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
326    json::Object Ret{{"locations", json::Array{}}};
327    json::Array Locs;
328    for (const auto &ThreadFlow : ThreadFlows) {
329      json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
330      json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
331      Locs.emplace_back(
332          createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
333    }
334    Ret["locations"] = std::move(Locs);
335    return json::Array{std::move(Ret)};
336  }
337  
338  json::Object
createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows)339  SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
340    return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
341  }
342  
createRun(StringRef ShortToolName,StringRef LongToolName,StringRef ToolVersion)343  void SarifDocumentWriter::createRun(StringRef ShortToolName,
344                                      StringRef LongToolName,
345                                      StringRef ToolVersion) {
346    // Clear resources associated with a previous run.
347    endRun();
348  
349    // Signify a new run has begun.
350    Closed = false;
351  
352    json::Object Tool{
353        {"driver",
354         json::Object{{"name", ShortToolName},
355                      {"fullName", LongToolName},
356                      {"language", "en-US"},
357                      {"version", ToolVersion},
358                      {"informationUri",
359                       "https://clang.llvm.org/docs/UsersManual.html"}}}};
360    json::Object TheRun{{"tool", std::move(Tool)},
361                        {"results", {}},
362                        {"artifacts", {}},
363                        {"columnKind", "unicodeCodePoints"}};
364    Runs.emplace_back(std::move(TheRun));
365  }
366  
getCurrentRun()367  json::Object &SarifDocumentWriter::getCurrentRun() {
368    assert(!Closed &&
369           "SARIF Document is closed. "
370           "Can only getCurrentRun() if document is opened via createRun(), "
371           "create a run first");
372  
373    // Since Closed = false here, expect there to be at least 1 Run, anything
374    // else is an invalid state.
375    assert(!Runs.empty() && "There are no runs associated with the document!");
376    return *Runs.back().getAsObject();
377  }
378  
createRule(const SarifRule & Rule)379  size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {
380    size_t Ret = CurrentRules.size();
381    CurrentRules.emplace_back(Rule);
382    return Ret;
383  }
384  
appendResult(const SarifResult & Result)385  void SarifDocumentWriter::appendResult(const SarifResult &Result) {
386    size_t RuleIdx = Result.RuleIdx;
387    assert(RuleIdx < CurrentRules.size() &&
388           "Trying to reference a rule that doesn't exist");
389    const SarifRule &Rule = CurrentRules[RuleIdx];
390    assert(Rule.DefaultConfiguration.Enabled &&
391           "Cannot add a result referencing a disabled Rule");
392    json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
393                     {"ruleIndex", static_cast<int64_t>(RuleIdx)},
394                     {"ruleId", Rule.Id}};
395    if (!Result.Locations.empty()) {
396      json::Array Locs;
397      for (auto &Range : Result.Locations) {
398        Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
399      }
400      Ret["locations"] = std::move(Locs);
401    }
402    if (!Result.ThreadFlows.empty())
403      Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
404  
405    Ret["level"] = resultLevelToStr(
406        Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level));
407  
408    json::Object &Run = getCurrentRun();
409    json::Array *Results = Run.getArray("results");
410    Results->emplace_back(std::move(Ret));
411  }
412  
createDocument()413  json::Object SarifDocumentWriter::createDocument() {
414    // Flush all temporaries to their destinations if needed.
415    endRun();
416  
417    json::Object Doc{
418        {"$schema", SchemaURI},
419        {"version", SchemaVersion},
420    };
421    if (!Runs.empty())
422      Doc["runs"] = json::Array(Runs);
423    return Doc;
424  }
425