xref: /freebsd/contrib/llvm-project/clang/lib/Basic/Sarif.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the declaration of the SARIFDocumentWriter class, and
11 /// associated builders such as:
12 /// - \ref SarifArtifact
13 /// - \ref SarifArtifactLocation
14 /// - \ref SarifRule
15 /// - \ref SarifResult
16 //===----------------------------------------------------------------------===//
17 #include "clang/Basic/Sarif.h"
18 #include "clang/Basic/SourceLocation.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Support/ConvertUTF.h"
24 #include "llvm/Support/JSON.h"
25 #include "llvm/Support/Path.h"
26 
27 #include <optional>
28 #include <string>
29 #include <utility>
30 
31 using namespace clang;
32 using namespace llvm;
33 
34 using clang::detail::SarifArtifact;
35 using clang::detail::SarifArtifactLocation;
36 
getFileName(FileEntryRef FE)37 static StringRef getFileName(FileEntryRef FE) {
38   StringRef Filename = FE.getFileEntry().tryGetRealPathName();
39   if (Filename.empty())
40     Filename = FE.getName();
41   return Filename;
42 }
43 /// \name URI
44 /// @{
45 
46 /// \internal
47 /// \brief
48 /// Return the RFC3986 encoding of the input character.
49 ///
50 /// \param C Character to encode to RFC3986.
51 ///
52 /// \return The RFC3986 representation of \c C.
percentEncodeURICharacter(char C)53 static std::string percentEncodeURICharacter(char C) {
54   // RFC 3986 claims alpha, numeric, and this handful of
55   // characters are not reserved for the path component and
56   // should be written out directly. Otherwise, percent
57   // encode the character and write that out instead of the
58   // reserved character.
59   if (llvm::isAlnum(C) || StringRef("-._~:@!$&'()*+,;=").contains(C))
60     return std::string(&C, 1);
61   return "%" + llvm::toHex(StringRef(&C, 1));
62 }
63 
64 /// \internal
65 /// \brief Return a URI representing the given file name.
66 ///
67 /// \param Filename The filename to be represented as URI.
68 ///
69 /// \return RFC3986 URI representing the input file name.
fileNameToURI(StringRef Filename)70 static std::string fileNameToURI(StringRef Filename) {
71   SmallString<32> Ret = StringRef("file://");
72 
73   // Get the root name to see if it has a URI authority.
74   StringRef Root = sys::path::root_name(Filename);
75   if (Root.starts_with("//")) {
76     // There is an authority, so add it to the URI.
77     Ret += Root.drop_front(2).str();
78   } else if (!Root.empty()) {
79     // There is no authority, so end the component and add the root to the URI.
80     Ret += Twine("/" + Root).str();
81   }
82 
83   auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
84   assert(Iter != End && "Expected there to be a non-root path component.");
85   // Add the rest of the path components, encoding any reserved characters;
86   // we skip past the first path component, as it was handled it above.
87   for (StringRef Component : llvm::make_range(++Iter, End)) {
88     // For reasons unknown to me, we may get a backslash with Windows native
89     // paths for the initial backslash following the drive component, which
90     // we need to ignore as a URI path part.
91     if (Component == "\\")
92       continue;
93 
94     // Add the separator between the previous path part and the one being
95     // currently processed.
96     Ret += "/";
97 
98     // URI encode the part.
99     for (char C : Component) {
100       Ret += percentEncodeURICharacter(C);
101     }
102   }
103 
104   return std::string(Ret);
105 }
106 ///  @}
107 
108 /// \brief Calculate the column position expressed in the number of UTF-8 code
109 /// points from column start to the source location
110 ///
111 /// \param Loc The source location whose column needs to be calculated.
112 /// \param TokenLen Optional hint for when the token is multiple bytes long.
113 ///
114 /// \return The column number as a UTF-8 aware byte offset from column start to
115 /// the effective source location.
adjustColumnPos(FullSourceLoc Loc,unsigned int TokenLen=0)116 static unsigned int adjustColumnPos(FullSourceLoc Loc,
117                                     unsigned int TokenLen = 0) {
118   assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
119 
120   FileIDAndOffset LocInfo = Loc.getDecomposedExpansionLoc();
121   std::optional<MemoryBufferRef> Buf =
122       Loc.getManager().getBufferOrNone(LocInfo.first);
123   assert(Buf && "got an invalid buffer for the location's file");
124   assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
125          "token extends past end of buffer?");
126 
127   // Adjust the offset to be the start of the line, since we'll be counting
128   // Unicode characters from there until our column offset.
129   unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
130   unsigned int Ret = 1;
131   while (Off < (LocInfo.second + TokenLen)) {
132     Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
133     Ret++;
134   }
135 
136   return Ret;
137 }
138 
139 /// \name SARIF Utilities
140 /// @{
141 
142 /// \internal
createMessage(StringRef Text)143 static json::Object createMessage(StringRef Text) {
144   return json::Object{{"text", Text.str()}};
145 }
146 
147 /// \internal
148 /// \pre CharSourceRange must be a token range
createTextRegion(const SourceManager & SM,const CharSourceRange & R)149 static json::Object createTextRegion(const SourceManager &SM,
150                                      const CharSourceRange &R) {
151   FullSourceLoc BeginCharLoc{R.getBegin(), SM};
152   FullSourceLoc EndCharLoc{R.getEnd(), SM};
153   json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()},
154                       {"startColumn", adjustColumnPos(BeginCharLoc)}};
155 
156   if (BeginCharLoc == EndCharLoc) {
157     Region["endColumn"] = adjustColumnPos(BeginCharLoc);
158   } else {
159     Region["endLine"] = EndCharLoc.getExpansionLineNumber();
160     Region["endColumn"] = adjustColumnPos(EndCharLoc);
161   }
162   return Region;
163 }
164 
createLocation(json::Object && PhysicalLocation,StringRef Message="")165 static json::Object createLocation(json::Object &&PhysicalLocation,
166                                    StringRef Message = "") {
167   json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
168   if (!Message.empty())
169     Ret.insert({"message", createMessage(Message)});
170   return Ret;
171 }
172 
importanceToStr(ThreadFlowImportance I)173 static StringRef importanceToStr(ThreadFlowImportance I) {
174   switch (I) {
175   case ThreadFlowImportance::Important:
176     return "important";
177   case ThreadFlowImportance::Essential:
178     return "essential";
179   case ThreadFlowImportance::Unimportant:
180     return "unimportant";
181   }
182   llvm_unreachable("Fully covered switch is not so fully covered");
183 }
184 
resultLevelToStr(SarifResultLevel R)185 static StringRef resultLevelToStr(SarifResultLevel R) {
186   switch (R) {
187   case SarifResultLevel::None:
188     return "none";
189   case SarifResultLevel::Note:
190     return "note";
191   case SarifResultLevel::Warning:
192     return "warning";
193   case SarifResultLevel::Error:
194     return "error";
195   }
196   llvm_unreachable("Potentially un-handled SarifResultLevel. "
197                    "Is the switch not fully covered?");
198 }
199 
200 static json::Object
createThreadFlowLocation(json::Object && Location,const ThreadFlowImportance & Importance)201 createThreadFlowLocation(json::Object &&Location,
202                          const ThreadFlowImportance &Importance) {
203   return json::Object{{"location", std::move(Location)},
204                       {"importance", importanceToStr(Importance)}};
205 }
206 ///  @}
207 
208 json::Object
createPhysicalLocation(const CharSourceRange & R)209 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
210   assert(R.isValid() &&
211          "Cannot create a physicalLocation from invalid SourceRange!");
212   assert(R.isCharRange() &&
213          "Cannot create a physicalLocation from a token range!");
214   FullSourceLoc Start{R.getBegin(), SourceMgr};
215   OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef();
216   assert(FE && "Diagnostic does not exist within a valid file!");
217 
218   const std::string &FileURI = fileNameToURI(getFileName(*FE));
219   auto I = CurrentArtifacts.find(FileURI);
220 
221   if (I == CurrentArtifacts.end()) {
222     uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
223     const SarifArtifactLocation &Location =
224         SarifArtifactLocation::create(FileURI).setIndex(Idx);
225     const SarifArtifact &Artifact = SarifArtifact::create(Location)
226                                         .setRoles({"resultFile"})
227                                         .setLength(FE->getSize())
228                                         .setMimeType("text/plain");
229     auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});
230     // If inserted, ensure the original iterator points to the newly inserted
231     // element, so it can be used downstream.
232     if (StatusIter.second)
233       I = StatusIter.first;
234   }
235   assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
236   const SarifArtifactLocation &Location = I->second.Location;
237   json::Object ArtifactLocationObject{{"uri", Location.URI}};
238   if (Location.Index.has_value())
239     ArtifactLocationObject["index"] = *Location.Index;
240   return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)},
241                        {"region", createTextRegion(SourceMgr, R)}}};
242 }
243 
getCurrentTool()244 json::Object &SarifDocumentWriter::getCurrentTool() {
245   assert(!Closed && "SARIF Document is closed. "
246                     "Need to call createRun() before using getcurrentTool!");
247 
248   // Since Closed = false here, expect there to be at least 1 Run, anything
249   // else is an invalid state.
250   assert(!Runs.empty() && "There are no runs associated with the document!");
251 
252   return *Runs.back().getAsObject()->get("tool")->getAsObject();
253 }
254 
reset()255 void SarifDocumentWriter::reset() {
256   CurrentRules.clear();
257   CurrentArtifacts.clear();
258 }
259 
endRun()260 void SarifDocumentWriter::endRun() {
261   // Exit early if trying to close a closed Document.
262   if (Closed) {
263     reset();
264     return;
265   }
266 
267   // Since Closed = false here, expect there to be at least 1 Run, anything
268   // else is an invalid state.
269   assert(!Runs.empty() && "There are no runs associated with the document!");
270 
271   // Flush all the rules.
272   json::Object &Tool = getCurrentTool();
273   json::Array Rules;
274   for (const SarifRule &R : CurrentRules) {
275     json::Object Config{
276         {"enabled", R.DefaultConfiguration.Enabled},
277         {"level", resultLevelToStr(R.DefaultConfiguration.Level)},
278         {"rank", R.DefaultConfiguration.Rank}};
279     json::Object Rule{
280         {"name", R.Name},
281         {"id", R.Id},
282         {"fullDescription", json::Object{{"text", R.Description}}},
283         {"defaultConfiguration", std::move(Config)}};
284     if (!R.HelpURI.empty())
285       Rule["helpUri"] = R.HelpURI;
286     Rules.emplace_back(std::move(Rule));
287   }
288   json::Object &Driver = *Tool.getObject("driver");
289   Driver["rules"] = std::move(Rules);
290 
291   // Flush all the artifacts.
292   json::Object &Run = getCurrentRun();
293   json::Array *Artifacts = Run.getArray("artifacts");
294   SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec;
295   for (const auto &[K, V] : CurrentArtifacts)
296     Vec.emplace_back(K, V);
297   llvm::sort(Vec, llvm::less_first());
298   for (const auto &[_, A] : Vec) {
299     json::Object Loc{{"uri", A.Location.URI}};
300     if (A.Location.Index.has_value()) {
301       Loc["index"] = static_cast<int64_t>(*A.Location.Index);
302     }
303     json::Object Artifact;
304     Artifact["location"] = std::move(Loc);
305     if (A.Length.has_value())
306       Artifact["length"] = static_cast<int64_t>(*A.Length);
307     if (!A.Roles.empty())
308       Artifact["roles"] = json::Array(A.Roles);
309     if (!A.MimeType.empty())
310       Artifact["mimeType"] = A.MimeType;
311     if (A.Offset.has_value())
312       Artifact["offset"] = *A.Offset;
313     Artifacts->push_back(json::Value(std::move(Artifact)));
314   }
315 
316   // Clear, reset temporaries before next run.
317   reset();
318 
319   // Mark the document as closed.
320   Closed = true;
321 }
322 
323 json::Array
createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows)324 SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
325   json::Object Ret{{"locations", json::Array{}}};
326   json::Array Locs;
327   for (const auto &ThreadFlow : ThreadFlows) {
328     json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
329     json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
330     Locs.emplace_back(
331         createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
332   }
333   Ret["locations"] = std::move(Locs);
334   return json::Array{std::move(Ret)};
335 }
336 
337 json::Object
createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows)338 SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
339   return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
340 }
341 
createRun(StringRef ShortToolName,StringRef LongToolName,StringRef ToolVersion)342 void SarifDocumentWriter::createRun(StringRef ShortToolName,
343                                     StringRef LongToolName,
344                                     StringRef ToolVersion) {
345   // Clear resources associated with a previous run.
346   endRun();
347 
348   // Signify a new run has begun.
349   Closed = false;
350 
351   json::Object Tool{
352       {"driver",
353        json::Object{{"name", ShortToolName},
354                     {"fullName", LongToolName},
355                     {"language", "en-US"},
356                     {"version", ToolVersion},
357                     {"informationUri",
358                      "https://clang.llvm.org/docs/UsersManual.html"}}}};
359   json::Object TheRun{{"tool", std::move(Tool)},
360                       {"results", {}},
361                       {"artifacts", {}},
362                       {"columnKind", "unicodeCodePoints"}};
363   Runs.emplace_back(std::move(TheRun));
364 }
365 
getCurrentRun()366 json::Object &SarifDocumentWriter::getCurrentRun() {
367   assert(!Closed &&
368          "SARIF Document is closed. "
369          "Can only getCurrentRun() if document is opened via createRun(), "
370          "create a run first");
371 
372   // Since Closed = false here, expect there to be at least 1 Run, anything
373   // else is an invalid state.
374   assert(!Runs.empty() && "There are no runs associated with the document!");
375   return *Runs.back().getAsObject();
376 }
377 
createRule(const SarifRule & Rule)378 size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {
379   size_t Ret = CurrentRules.size();
380   CurrentRules.emplace_back(Rule);
381   return Ret;
382 }
383 
appendResult(const SarifResult & Result)384 void SarifDocumentWriter::appendResult(const SarifResult &Result) {
385   size_t RuleIdx = Result.RuleIdx;
386   assert(RuleIdx < CurrentRules.size() &&
387          "Trying to reference a rule that doesn't exist");
388   const SarifRule &Rule = CurrentRules[RuleIdx];
389   assert(Rule.DefaultConfiguration.Enabled &&
390          "Cannot add a result referencing a disabled Rule");
391   json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
392                    {"ruleIndex", static_cast<int64_t>(RuleIdx)},
393                    {"ruleId", Rule.Id}};
394   if (!Result.Locations.empty()) {
395     json::Array Locs;
396     for (auto &Range : Result.Locations) {
397       Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
398     }
399     Ret["locations"] = std::move(Locs);
400   }
401   if (!Result.ThreadFlows.empty())
402     Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
403 
404   Ret["level"] = resultLevelToStr(
405       Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level));
406 
407   json::Object &Run = getCurrentRun();
408   json::Array *Results = Run.getArray("results");
409   Results->emplace_back(std::move(Ret));
410 }
411 
createDocument()412 json::Object SarifDocumentWriter::createDocument() {
413   // Flush all temporaries to their destinations if needed.
414   endRun();
415 
416   json::Object Doc{
417       {"$schema", SchemaURI},
418       {"version", SchemaVersion},
419   };
420   if (!Runs.empty())
421     Doc["runs"] = json::Array(Runs);
422   return Doc;
423 }
424