1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the declaration of the SARIFDocumentWriter class, and 11 /// associated builders such as: 12 /// - \ref SarifArtifact 13 /// - \ref SarifArtifactLocation 14 /// - \ref SarifRule 15 /// - \ref SarifResult 16 //===----------------------------------------------------------------------===// 17 #include "clang/Basic/Sarif.h" 18 #include "clang/Basic/SourceLocation.h" 19 #include "clang/Basic/SourceManager.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/StringMap.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/ConvertUTF.h" 25 #include "llvm/Support/JSON.h" 26 #include "llvm/Support/Path.h" 27 28 #include <string> 29 #include <utility> 30 31 using namespace clang; 32 using namespace llvm; 33 34 using clang::detail::SarifArtifact; 35 using clang::detail::SarifArtifactLocation; 36 37 static StringRef getFileName(const FileEntry &FE) { 38 StringRef Filename = FE.tryGetRealPathName(); 39 if (Filename.empty()) 40 Filename = FE.getName(); 41 return Filename; 42 } 43 /// \name URI 44 /// @{ 45 46 /// \internal 47 /// \brief 48 /// Return the RFC3986 encoding of the input character. 49 /// 50 /// \param C Character to encode to RFC3986. 51 /// 52 /// \return The RFC3986 representation of \c C. 53 static std::string percentEncodeURICharacter(char C) { 54 // RFC 3986 claims alpha, numeric, and this handful of 55 // characters are not reserved for the path component and 56 // should be written out directly. Otherwise, percent 57 // encode the character and write that out instead of the 58 // reserved character. 59 if (llvm::isAlnum(C) || 60 StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) 61 return std::string(&C, 1); 62 return "%" + llvm::toHex(StringRef(&C, 1)); 63 } 64 65 /// \internal 66 /// \brief Return a URI representing the given file name. 67 /// 68 /// \param Filename The filename to be represented as URI. 69 /// 70 /// \return RFC3986 URI representing the input file name. 71 static std::string fileNameToURI(StringRef Filename) { 72 SmallString<32> Ret = StringRef("file://"); 73 74 // Get the root name to see if it has a URI authority. 75 StringRef Root = sys::path::root_name(Filename); 76 if (Root.startswith("//")) { 77 // There is an authority, so add it to the URI. 78 Ret += Root.drop_front(2).str(); 79 } else if (!Root.empty()) { 80 // There is no authority, so end the component and add the root to the URI. 81 Ret += Twine("/" + Root).str(); 82 } 83 84 auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); 85 assert(Iter != End && "Expected there to be a non-root path component."); 86 // Add the rest of the path components, encoding any reserved characters; 87 // we skip past the first path component, as it was handled it above. 88 std::for_each(++Iter, End, [&Ret](StringRef Component) { 89 // For reasons unknown to me, we may get a backslash with Windows native 90 // paths for the initial backslash following the drive component, which 91 // we need to ignore as a URI path part. 92 if (Component == "\\") 93 return; 94 95 // Add the separator between the previous path part and the one being 96 // currently processed. 97 Ret += "/"; 98 99 // URI encode the part. 100 for (char C : Component) { 101 Ret += percentEncodeURICharacter(C); 102 } 103 }); 104 105 return std::string(Ret); 106 } 107 /// @} 108 109 /// \brief Calculate the column position expressed in the number of UTF-8 code 110 /// points from column start to the source location 111 /// 112 /// \param Loc The source location whose column needs to be calculated. 113 /// \param TokenLen Optional hint for when the token is multiple bytes long. 114 /// 115 /// \return The column number as a UTF-8 aware byte offset from column start to 116 /// the effective source location. 117 static unsigned int adjustColumnPos(FullSourceLoc Loc, 118 unsigned int TokenLen = 0) { 119 assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); 120 121 std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedLoc(); 122 Optional<MemoryBufferRef> Buf = 123 Loc.getManager().getBufferOrNone(LocInfo.first); 124 assert(Buf && "got an invalid buffer for the location's file"); 125 assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && 126 "token extends past end of buffer?"); 127 128 // Adjust the offset to be the start of the line, since we'll be counting 129 // Unicode characters from there until our column offset. 130 unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); 131 unsigned int Ret = 1; 132 while (Off < (LocInfo.second + TokenLen)) { 133 Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); 134 Ret++; 135 } 136 137 return Ret; 138 } 139 140 /// \name SARIF Utilities 141 /// @{ 142 143 /// \internal 144 json::Object createMessage(StringRef Text) { 145 return json::Object{{"text", Text.str()}}; 146 } 147 148 /// \internal 149 /// \pre CharSourceRange must be a token range 150 static json::Object createTextRegion(const SourceManager &SM, 151 const CharSourceRange &R) { 152 FullSourceLoc FirstTokenLoc{R.getBegin(), SM}; 153 FullSourceLoc LastTokenLoc{R.getEnd(), SM}; 154 json::Object Region{{"startLine", FirstTokenLoc.getExpansionLineNumber()}, 155 {"startColumn", adjustColumnPos(FirstTokenLoc)}, 156 {"endColumn", adjustColumnPos(LastTokenLoc)}}; 157 if (FirstTokenLoc != LastTokenLoc) { 158 Region["endLine"] = LastTokenLoc.getExpansionLineNumber(); 159 } 160 return Region; 161 } 162 163 static json::Object createLocation(json::Object &&PhysicalLocation, 164 StringRef Message = "") { 165 json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; 166 if (!Message.empty()) 167 Ret.insert({"message", createMessage(Message)}); 168 return Ret; 169 } 170 171 static StringRef importanceToStr(ThreadFlowImportance I) { 172 switch (I) { 173 case ThreadFlowImportance::Important: 174 return "important"; 175 case ThreadFlowImportance::Essential: 176 return "essential"; 177 case ThreadFlowImportance::Unimportant: 178 return "unimportant"; 179 } 180 llvm_unreachable("Fully covered switch is not so fully covered"); 181 } 182 183 static json::Object 184 createThreadFlowLocation(json::Object &&Location, 185 const ThreadFlowImportance &Importance) { 186 return json::Object{{"location", std::move(Location)}, 187 {"importance", importanceToStr(Importance)}}; 188 } 189 /// @} 190 191 json::Object 192 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { 193 assert(R.isValid() && 194 "Cannot create a physicalLocation from invalid SourceRange!"); 195 assert(R.isCharRange() && 196 "Cannot create a physicalLocation from a token range!"); 197 FullSourceLoc Start{R.getBegin(), SourceMgr}; 198 const FileEntry *FE = Start.getExpansionLoc().getFileEntry(); 199 assert(FE != nullptr && "Diagnostic does not exist within a valid file!"); 200 201 const std::string &FileURI = fileNameToURI(getFileName(*FE)); 202 auto I = CurrentArtifacts.find(FileURI); 203 204 if (I == CurrentArtifacts.end()) { 205 uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); 206 const SarifArtifactLocation &Location = 207 SarifArtifactLocation::create(FileURI).setIndex(Idx); 208 const SarifArtifact &Artifact = SarifArtifact::create(Location) 209 .setRoles({"resultFile"}) 210 .setLength(FE->getSize()) 211 .setMimeType("text/plain"); 212 auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); 213 // If inserted, ensure the original iterator points to the newly inserted 214 // element, so it can be used downstream. 215 if (StatusIter.second) 216 I = StatusIter.first; 217 } 218 assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); 219 const SarifArtifactLocation &Location = I->second.Location; 220 uint32_t Idx = Location.Index.value(); 221 return json::Object{{{"artifactLocation", json::Object{{{"index", Idx}}}}, 222 {"region", createTextRegion(SourceMgr, R)}}}; 223 } 224 225 json::Object &SarifDocumentWriter::getCurrentTool() { 226 assert(!Closed && "SARIF Document is closed. " 227 "Need to call createRun() before using getcurrentTool!"); 228 229 // Since Closed = false here, expect there to be at least 1 Run, anything 230 // else is an invalid state. 231 assert(!Runs.empty() && "There are no runs associated with the document!"); 232 233 return *Runs.back().getAsObject()->get("tool")->getAsObject(); 234 } 235 236 void SarifDocumentWriter::reset() { 237 CurrentRules.clear(); 238 CurrentArtifacts.clear(); 239 } 240 241 void SarifDocumentWriter::endRun() { 242 // Exit early if trying to close a closed Document. 243 if (Closed) { 244 reset(); 245 return; 246 } 247 248 // Since Closed = false here, expect there to be at least 1 Run, anything 249 // else is an invalid state. 250 assert(!Runs.empty() && "There are no runs associated with the document!"); 251 252 // Flush all the rules. 253 json::Object &Tool = getCurrentTool(); 254 json::Array Rules; 255 for (const SarifRule &R : CurrentRules) { 256 json::Object Rule{ 257 {"name", R.Name}, 258 {"id", R.Id}, 259 {"fullDescription", json::Object{{"text", R.Description}}}}; 260 if (!R.HelpURI.empty()) 261 Rule["helpUri"] = R.HelpURI; 262 Rules.emplace_back(std::move(Rule)); 263 } 264 json::Object &Driver = *Tool.getObject("driver"); 265 Driver["rules"] = std::move(Rules); 266 267 // Flush all the artifacts. 268 json::Object &Run = getCurrentRun(); 269 json::Array *Artifacts = Run.getArray("artifacts"); 270 for (const auto &Pair : CurrentArtifacts) { 271 const SarifArtifact &A = Pair.getValue(); 272 json::Object Loc{{"uri", A.Location.URI}}; 273 if (A.Location.Index.has_value()) { 274 Loc["index"] = static_cast<int64_t>(A.Location.Index.value()); 275 } 276 json::Object Artifact; 277 Artifact["location"] = std::move(Loc); 278 if (A.Length.has_value()) 279 Artifact["length"] = static_cast<int64_t>(A.Length.value()); 280 if (!A.Roles.empty()) 281 Artifact["roles"] = json::Array(A.Roles); 282 if (!A.MimeType.empty()) 283 Artifact["mimeType"] = A.MimeType; 284 if (A.Offset.has_value()) 285 Artifact["offset"] = A.Offset; 286 Artifacts->push_back(json::Value(std::move(Artifact))); 287 } 288 289 // Clear, reset temporaries before next run. 290 reset(); 291 292 // Mark the document as closed. 293 Closed = true; 294 } 295 296 json::Array 297 SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { 298 json::Object Ret{{"locations", json::Array{}}}; 299 json::Array Locs; 300 for (const auto &ThreadFlow : ThreadFlows) { 301 json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); 302 json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); 303 Locs.emplace_back( 304 createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); 305 } 306 Ret["locations"] = std::move(Locs); 307 return json::Array{std::move(Ret)}; 308 } 309 310 json::Object 311 SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { 312 return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; 313 } 314 315 void SarifDocumentWriter::createRun(StringRef ShortToolName, 316 StringRef LongToolName, 317 StringRef ToolVersion) { 318 // Clear resources associated with a previous run. 319 endRun(); 320 321 // Signify a new run has begun. 322 Closed = false; 323 324 json::Object Tool{ 325 {"driver", 326 json::Object{{"name", ShortToolName}, 327 {"fullName", LongToolName}, 328 {"language", "en-US"}, 329 {"version", ToolVersion}, 330 {"informationUri", 331 "https://clang.llvm.org/docs/UsersManual.html"}}}}; 332 json::Object TheRun{{"tool", std::move(Tool)}, 333 {"results", {}}, 334 {"artifacts", {}}, 335 {"columnKind", "unicodeCodePoints"}}; 336 Runs.emplace_back(std::move(TheRun)); 337 } 338 339 json::Object &SarifDocumentWriter::getCurrentRun() { 340 assert(!Closed && 341 "SARIF Document is closed. " 342 "Can only getCurrentRun() if document is opened via createRun(), " 343 "create a run first"); 344 345 // Since Closed = false here, expect there to be at least 1 Run, anything 346 // else is an invalid state. 347 assert(!Runs.empty() && "There are no runs associated with the document!"); 348 return *Runs.back().getAsObject(); 349 } 350 351 size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { 352 size_t Ret = CurrentRules.size(); 353 CurrentRules.emplace_back(Rule); 354 return Ret; 355 } 356 357 void SarifDocumentWriter::appendResult(const SarifResult &Result) { 358 size_t RuleIdx = Result.RuleIdx; 359 assert(RuleIdx < CurrentRules.size() && 360 "Trying to reference a rule that doesn't exist"); 361 json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, 362 {"ruleIndex", static_cast<int64_t>(RuleIdx)}, 363 {"ruleId", CurrentRules[RuleIdx].Id}}; 364 if (!Result.Locations.empty()) { 365 json::Array Locs; 366 for (auto &Range : Result.Locations) { 367 Locs.emplace_back(createLocation(createPhysicalLocation(Range))); 368 } 369 Ret["locations"] = std::move(Locs); 370 } 371 if (!Result.ThreadFlows.empty()) 372 Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; 373 json::Object &Run = getCurrentRun(); 374 json::Array *Results = Run.getArray("results"); 375 Results->emplace_back(std::move(Ret)); 376 } 377 378 json::Object SarifDocumentWriter::createDocument() { 379 // Flush all temporaries to their destinations if needed. 380 endRun(); 381 382 json::Object Doc{ 383 {"$schema", SchemaURI}, 384 {"version", SchemaVersion}, 385 }; 386 if (!Runs.empty()) 387 Doc["runs"] = json::Array(Runs); 388 return Doc; 389 } 390