1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the declaration of the SARIFDocumentWriter class, and 11 /// associated builders such as: 12 /// - \ref SarifArtifact 13 /// - \ref SarifArtifactLocation 14 /// - \ref SarifRule 15 /// - \ref SarifResult 16 //===----------------------------------------------------------------------===// 17 #include "clang/Basic/Sarif.h" 18 #include "clang/Basic/SourceLocation.h" 19 #include "clang/Basic/SourceManager.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/StringExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/ConvertUTF.h" 25 #include "llvm/Support/JSON.h" 26 #include "llvm/Support/Path.h" 27 28 #include <optional> 29 #include <string> 30 #include <utility> 31 32 using namespace clang; 33 using namespace llvm; 34 35 using clang::detail::SarifArtifact; 36 using clang::detail::SarifArtifactLocation; 37 38 static StringRef getFileName(FileEntryRef FE) { 39 StringRef Filename = FE.getFileEntry().tryGetRealPathName(); 40 if (Filename.empty()) 41 Filename = FE.getName(); 42 return Filename; 43 } 44 /// \name URI 45 /// @{ 46 47 /// \internal 48 /// \brief 49 /// Return the RFC3986 encoding of the input character. 50 /// 51 /// \param C Character to encode to RFC3986. 52 /// 53 /// \return The RFC3986 representation of \c C. 54 static std::string percentEncodeURICharacter(char C) { 55 // RFC 3986 claims alpha, numeric, and this handful of 56 // characters are not reserved for the path component and 57 // should be written out directly. Otherwise, percent 58 // encode the character and write that out instead of the 59 // reserved character. 60 if (llvm::isAlnum(C) || StringRef("-._~:@!$&'()*+,;=").contains(C)) 61 return std::string(&C, 1); 62 return "%" + llvm::toHex(StringRef(&C, 1)); 63 } 64 65 /// \internal 66 /// \brief Return a URI representing the given file name. 67 /// 68 /// \param Filename The filename to be represented as URI. 69 /// 70 /// \return RFC3986 URI representing the input file name. 71 static std::string fileNameToURI(StringRef Filename) { 72 SmallString<32> Ret = StringRef("file://"); 73 74 // Get the root name to see if it has a URI authority. 75 StringRef Root = sys::path::root_name(Filename); 76 if (Root.starts_with("//")) { 77 // There is an authority, so add it to the URI. 78 Ret += Root.drop_front(2).str(); 79 } else if (!Root.empty()) { 80 // There is no authority, so end the component and add the root to the URI. 81 Ret += Twine("/" + Root).str(); 82 } 83 84 auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); 85 assert(Iter != End && "Expected there to be a non-root path component."); 86 // Add the rest of the path components, encoding any reserved characters; 87 // we skip past the first path component, as it was handled it above. 88 for (StringRef Component : llvm::make_range(++Iter, End)) { 89 // For reasons unknown to me, we may get a backslash with Windows native 90 // paths for the initial backslash following the drive component, which 91 // we need to ignore as a URI path part. 92 if (Component == "\\") 93 continue; 94 95 // Add the separator between the previous path part and the one being 96 // currently processed. 97 Ret += "/"; 98 99 // URI encode the part. 100 for (char C : Component) { 101 Ret += percentEncodeURICharacter(C); 102 } 103 } 104 105 return std::string(Ret); 106 } 107 /// @} 108 109 /// \brief Calculate the column position expressed in the number of UTF-8 code 110 /// points from column start to the source location 111 /// 112 /// \param Loc The source location whose column needs to be calculated. 113 /// \param TokenLen Optional hint for when the token is multiple bytes long. 114 /// 115 /// \return The column number as a UTF-8 aware byte offset from column start to 116 /// the effective source location. 117 static unsigned int adjustColumnPos(FullSourceLoc Loc, 118 unsigned int TokenLen = 0) { 119 assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); 120 121 std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc(); 122 std::optional<MemoryBufferRef> Buf = 123 Loc.getManager().getBufferOrNone(LocInfo.first); 124 assert(Buf && "got an invalid buffer for the location's file"); 125 assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && 126 "token extends past end of buffer?"); 127 128 // Adjust the offset to be the start of the line, since we'll be counting 129 // Unicode characters from there until our column offset. 130 unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); 131 unsigned int Ret = 1; 132 while (Off < (LocInfo.second + TokenLen)) { 133 Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); 134 Ret++; 135 } 136 137 return Ret; 138 } 139 140 /// \name SARIF Utilities 141 /// @{ 142 143 /// \internal 144 json::Object createMessage(StringRef Text) { 145 return json::Object{{"text", Text.str()}}; 146 } 147 148 /// \internal 149 /// \pre CharSourceRange must be a token range 150 static json::Object createTextRegion(const SourceManager &SM, 151 const CharSourceRange &R) { 152 FullSourceLoc BeginCharLoc{R.getBegin(), SM}; 153 FullSourceLoc EndCharLoc{R.getEnd(), SM}; 154 json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()}, 155 {"startColumn", adjustColumnPos(BeginCharLoc)}}; 156 157 if (BeginCharLoc == EndCharLoc) { 158 Region["endColumn"] = adjustColumnPos(BeginCharLoc); 159 } else { 160 Region["endLine"] = EndCharLoc.getExpansionLineNumber(); 161 Region["endColumn"] = adjustColumnPos(EndCharLoc); 162 } 163 return Region; 164 } 165 166 static json::Object createLocation(json::Object &&PhysicalLocation, 167 StringRef Message = "") { 168 json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; 169 if (!Message.empty()) 170 Ret.insert({"message", createMessage(Message)}); 171 return Ret; 172 } 173 174 static StringRef importanceToStr(ThreadFlowImportance I) { 175 switch (I) { 176 case ThreadFlowImportance::Important: 177 return "important"; 178 case ThreadFlowImportance::Essential: 179 return "essential"; 180 case ThreadFlowImportance::Unimportant: 181 return "unimportant"; 182 } 183 llvm_unreachable("Fully covered switch is not so fully covered"); 184 } 185 186 static StringRef resultLevelToStr(SarifResultLevel R) { 187 switch (R) { 188 case SarifResultLevel::None: 189 return "none"; 190 case SarifResultLevel::Note: 191 return "note"; 192 case SarifResultLevel::Warning: 193 return "warning"; 194 case SarifResultLevel::Error: 195 return "error"; 196 } 197 llvm_unreachable("Potentially un-handled SarifResultLevel. " 198 "Is the switch not fully covered?"); 199 } 200 201 static json::Object 202 createThreadFlowLocation(json::Object &&Location, 203 const ThreadFlowImportance &Importance) { 204 return json::Object{{"location", std::move(Location)}, 205 {"importance", importanceToStr(Importance)}}; 206 } 207 /// @} 208 209 json::Object 210 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { 211 assert(R.isValid() && 212 "Cannot create a physicalLocation from invalid SourceRange!"); 213 assert(R.isCharRange() && 214 "Cannot create a physicalLocation from a token range!"); 215 FullSourceLoc Start{R.getBegin(), SourceMgr}; 216 OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef(); 217 assert(FE && "Diagnostic does not exist within a valid file!"); 218 219 const std::string &FileURI = fileNameToURI(getFileName(*FE)); 220 auto I = CurrentArtifacts.find(FileURI); 221 222 if (I == CurrentArtifacts.end()) { 223 uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); 224 const SarifArtifactLocation &Location = 225 SarifArtifactLocation::create(FileURI).setIndex(Idx); 226 const SarifArtifact &Artifact = SarifArtifact::create(Location) 227 .setRoles({"resultFile"}) 228 .setLength(FE->getSize()) 229 .setMimeType("text/plain"); 230 auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); 231 // If inserted, ensure the original iterator points to the newly inserted 232 // element, so it can be used downstream. 233 if (StatusIter.second) 234 I = StatusIter.first; 235 } 236 assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); 237 const SarifArtifactLocation &Location = I->second.Location; 238 json::Object ArtifactLocationObject{{"uri", Location.URI}}; 239 if (Location.Index.has_value()) 240 ArtifactLocationObject["index"] = *Location.Index; 241 return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)}, 242 {"region", createTextRegion(SourceMgr, R)}}}; 243 } 244 245 json::Object &SarifDocumentWriter::getCurrentTool() { 246 assert(!Closed && "SARIF Document is closed. " 247 "Need to call createRun() before using getcurrentTool!"); 248 249 // Since Closed = false here, expect there to be at least 1 Run, anything 250 // else is an invalid state. 251 assert(!Runs.empty() && "There are no runs associated with the document!"); 252 253 return *Runs.back().getAsObject()->get("tool")->getAsObject(); 254 } 255 256 void SarifDocumentWriter::reset() { 257 CurrentRules.clear(); 258 CurrentArtifacts.clear(); 259 } 260 261 void SarifDocumentWriter::endRun() { 262 // Exit early if trying to close a closed Document. 263 if (Closed) { 264 reset(); 265 return; 266 } 267 268 // Since Closed = false here, expect there to be at least 1 Run, anything 269 // else is an invalid state. 270 assert(!Runs.empty() && "There are no runs associated with the document!"); 271 272 // Flush all the rules. 273 json::Object &Tool = getCurrentTool(); 274 json::Array Rules; 275 for (const SarifRule &R : CurrentRules) { 276 json::Object Config{ 277 {"enabled", R.DefaultConfiguration.Enabled}, 278 {"level", resultLevelToStr(R.DefaultConfiguration.Level)}, 279 {"rank", R.DefaultConfiguration.Rank}}; 280 json::Object Rule{ 281 {"name", R.Name}, 282 {"id", R.Id}, 283 {"fullDescription", json::Object{{"text", R.Description}}}, 284 {"defaultConfiguration", std::move(Config)}}; 285 if (!R.HelpURI.empty()) 286 Rule["helpUri"] = R.HelpURI; 287 Rules.emplace_back(std::move(Rule)); 288 } 289 json::Object &Driver = *Tool.getObject("driver"); 290 Driver["rules"] = std::move(Rules); 291 292 // Flush all the artifacts. 293 json::Object &Run = getCurrentRun(); 294 json::Array *Artifacts = Run.getArray("artifacts"); 295 SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec; 296 for (const auto &[K, V] : CurrentArtifacts) 297 Vec.emplace_back(K, V); 298 llvm::sort(Vec, llvm::less_first()); 299 for (const auto &[_, A] : Vec) { 300 json::Object Loc{{"uri", A.Location.URI}}; 301 if (A.Location.Index.has_value()) { 302 Loc["index"] = static_cast<int64_t>(*A.Location.Index); 303 } 304 json::Object Artifact; 305 Artifact["location"] = std::move(Loc); 306 if (A.Length.has_value()) 307 Artifact["length"] = static_cast<int64_t>(*A.Length); 308 if (!A.Roles.empty()) 309 Artifact["roles"] = json::Array(A.Roles); 310 if (!A.MimeType.empty()) 311 Artifact["mimeType"] = A.MimeType; 312 if (A.Offset.has_value()) 313 Artifact["offset"] = *A.Offset; 314 Artifacts->push_back(json::Value(std::move(Artifact))); 315 } 316 317 // Clear, reset temporaries before next run. 318 reset(); 319 320 // Mark the document as closed. 321 Closed = true; 322 } 323 324 json::Array 325 SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { 326 json::Object Ret{{"locations", json::Array{}}}; 327 json::Array Locs; 328 for (const auto &ThreadFlow : ThreadFlows) { 329 json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); 330 json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); 331 Locs.emplace_back( 332 createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); 333 } 334 Ret["locations"] = std::move(Locs); 335 return json::Array{std::move(Ret)}; 336 } 337 338 json::Object 339 SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { 340 return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; 341 } 342 343 void SarifDocumentWriter::createRun(StringRef ShortToolName, 344 StringRef LongToolName, 345 StringRef ToolVersion) { 346 // Clear resources associated with a previous run. 347 endRun(); 348 349 // Signify a new run has begun. 350 Closed = false; 351 352 json::Object Tool{ 353 {"driver", 354 json::Object{{"name", ShortToolName}, 355 {"fullName", LongToolName}, 356 {"language", "en-US"}, 357 {"version", ToolVersion}, 358 {"informationUri", 359 "https://clang.llvm.org/docs/UsersManual.html"}}}}; 360 json::Object TheRun{{"tool", std::move(Tool)}, 361 {"results", {}}, 362 {"artifacts", {}}, 363 {"columnKind", "unicodeCodePoints"}}; 364 Runs.emplace_back(std::move(TheRun)); 365 } 366 367 json::Object &SarifDocumentWriter::getCurrentRun() { 368 assert(!Closed && 369 "SARIF Document is closed. " 370 "Can only getCurrentRun() if document is opened via createRun(), " 371 "create a run first"); 372 373 // Since Closed = false here, expect there to be at least 1 Run, anything 374 // else is an invalid state. 375 assert(!Runs.empty() && "There are no runs associated with the document!"); 376 return *Runs.back().getAsObject(); 377 } 378 379 size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { 380 size_t Ret = CurrentRules.size(); 381 CurrentRules.emplace_back(Rule); 382 return Ret; 383 } 384 385 void SarifDocumentWriter::appendResult(const SarifResult &Result) { 386 size_t RuleIdx = Result.RuleIdx; 387 assert(RuleIdx < CurrentRules.size() && 388 "Trying to reference a rule that doesn't exist"); 389 const SarifRule &Rule = CurrentRules[RuleIdx]; 390 assert(Rule.DefaultConfiguration.Enabled && 391 "Cannot add a result referencing a disabled Rule"); 392 json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, 393 {"ruleIndex", static_cast<int64_t>(RuleIdx)}, 394 {"ruleId", Rule.Id}}; 395 if (!Result.Locations.empty()) { 396 json::Array Locs; 397 for (auto &Range : Result.Locations) { 398 Locs.emplace_back(createLocation(createPhysicalLocation(Range))); 399 } 400 Ret["locations"] = std::move(Locs); 401 } 402 if (!Result.ThreadFlows.empty()) 403 Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; 404 405 Ret["level"] = resultLevelToStr( 406 Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level)); 407 408 json::Object &Run = getCurrentRun(); 409 json::Array *Results = Run.getArray("results"); 410 Results->emplace_back(std::move(Ret)); 411 } 412 413 json::Object SarifDocumentWriter::createDocument() { 414 // Flush all temporaries to their destinations if needed. 415 endRun(); 416 417 json::Object Doc{ 418 {"$schema", SchemaURI}, 419 {"version", SchemaVersion}, 420 }; 421 if (!Runs.empty()) 422 Doc["runs"] = json::Array(Runs); 423 return Doc; 424 } 425