1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the declaration of the SARIFDocumentWriter class, and 11 /// associated builders such as: 12 /// - \ref SarifArtifact 13 /// - \ref SarifArtifactLocation 14 /// - \ref SarifRule 15 /// - \ref SarifResult 16 //===----------------------------------------------------------------------===// 17 #include "clang/Basic/Sarif.h" 18 #include "clang/Basic/SourceLocation.h" 19 #include "clang/Basic/SourceManager.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/StringExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/ConvertUTF.h" 25 #include "llvm/Support/JSON.h" 26 #include "llvm/Support/Path.h" 27 28 #include <optional> 29 #include <string> 30 #include <utility> 31 32 using namespace clang; 33 using namespace llvm; 34 35 using clang::detail::SarifArtifact; 36 using clang::detail::SarifArtifactLocation; 37 38 static StringRef getFileName(FileEntryRef FE) { 39 StringRef Filename = FE.getFileEntry().tryGetRealPathName(); 40 if (Filename.empty()) 41 Filename = FE.getName(); 42 return Filename; 43 } 44 /// \name URI 45 /// @{ 46 47 /// \internal 48 /// \brief 49 /// Return the RFC3986 encoding of the input character. 50 /// 51 /// \param C Character to encode to RFC3986. 52 /// 53 /// \return The RFC3986 representation of \c C. 54 static std::string percentEncodeURICharacter(char C) { 55 // RFC 3986 claims alpha, numeric, and this handful of 56 // characters are not reserved for the path component and 57 // should be written out directly. Otherwise, percent 58 // encode the character and write that out instead of the 59 // reserved character. 60 if (llvm::isAlnum(C) || 61 StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) 62 return std::string(&C, 1); 63 return "%" + llvm::toHex(StringRef(&C, 1)); 64 } 65 66 /// \internal 67 /// \brief Return a URI representing the given file name. 68 /// 69 /// \param Filename The filename to be represented as URI. 70 /// 71 /// \return RFC3986 URI representing the input file name. 72 static std::string fileNameToURI(StringRef Filename) { 73 SmallString<32> Ret = StringRef("file://"); 74 75 // Get the root name to see if it has a URI authority. 76 StringRef Root = sys::path::root_name(Filename); 77 if (Root.starts_with("//")) { 78 // There is an authority, so add it to the URI. 79 Ret += Root.drop_front(2).str(); 80 } else if (!Root.empty()) { 81 // There is no authority, so end the component and add the root to the URI. 82 Ret += Twine("/" + Root).str(); 83 } 84 85 auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); 86 assert(Iter != End && "Expected there to be a non-root path component."); 87 // Add the rest of the path components, encoding any reserved characters; 88 // we skip past the first path component, as it was handled it above. 89 for (StringRef Component : llvm::make_range(++Iter, End)) { 90 // For reasons unknown to me, we may get a backslash with Windows native 91 // paths for the initial backslash following the drive component, which 92 // we need to ignore as a URI path part. 93 if (Component == "\\") 94 continue; 95 96 // Add the separator between the previous path part and the one being 97 // currently processed. 98 Ret += "/"; 99 100 // URI encode the part. 101 for (char C : Component) { 102 Ret += percentEncodeURICharacter(C); 103 } 104 } 105 106 return std::string(Ret); 107 } 108 /// @} 109 110 /// \brief Calculate the column position expressed in the number of UTF-8 code 111 /// points from column start to the source location 112 /// 113 /// \param Loc The source location whose column needs to be calculated. 114 /// \param TokenLen Optional hint for when the token is multiple bytes long. 115 /// 116 /// \return The column number as a UTF-8 aware byte offset from column start to 117 /// the effective source location. 118 static unsigned int adjustColumnPos(FullSourceLoc Loc, 119 unsigned int TokenLen = 0) { 120 assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); 121 122 std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc(); 123 std::optional<MemoryBufferRef> Buf = 124 Loc.getManager().getBufferOrNone(LocInfo.first); 125 assert(Buf && "got an invalid buffer for the location's file"); 126 assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && 127 "token extends past end of buffer?"); 128 129 // Adjust the offset to be the start of the line, since we'll be counting 130 // Unicode characters from there until our column offset. 131 unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); 132 unsigned int Ret = 1; 133 while (Off < (LocInfo.second + TokenLen)) { 134 Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); 135 Ret++; 136 } 137 138 return Ret; 139 } 140 141 /// \name SARIF Utilities 142 /// @{ 143 144 /// \internal 145 json::Object createMessage(StringRef Text) { 146 return json::Object{{"text", Text.str()}}; 147 } 148 149 /// \internal 150 /// \pre CharSourceRange must be a token range 151 static json::Object createTextRegion(const SourceManager &SM, 152 const CharSourceRange &R) { 153 FullSourceLoc BeginCharLoc{R.getBegin(), SM}; 154 FullSourceLoc EndCharLoc{R.getEnd(), SM}; 155 json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()}, 156 {"startColumn", adjustColumnPos(BeginCharLoc)}}; 157 158 if (BeginCharLoc == EndCharLoc) { 159 Region["endColumn"] = adjustColumnPos(BeginCharLoc); 160 } else { 161 Region["endLine"] = EndCharLoc.getExpansionLineNumber(); 162 Region["endColumn"] = adjustColumnPos(EndCharLoc); 163 } 164 return Region; 165 } 166 167 static json::Object createLocation(json::Object &&PhysicalLocation, 168 StringRef Message = "") { 169 json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; 170 if (!Message.empty()) 171 Ret.insert({"message", createMessage(Message)}); 172 return Ret; 173 } 174 175 static StringRef importanceToStr(ThreadFlowImportance I) { 176 switch (I) { 177 case ThreadFlowImportance::Important: 178 return "important"; 179 case ThreadFlowImportance::Essential: 180 return "essential"; 181 case ThreadFlowImportance::Unimportant: 182 return "unimportant"; 183 } 184 llvm_unreachable("Fully covered switch is not so fully covered"); 185 } 186 187 static StringRef resultLevelToStr(SarifResultLevel R) { 188 switch (R) { 189 case SarifResultLevel::None: 190 return "none"; 191 case SarifResultLevel::Note: 192 return "note"; 193 case SarifResultLevel::Warning: 194 return "warning"; 195 case SarifResultLevel::Error: 196 return "error"; 197 } 198 llvm_unreachable("Potentially un-handled SarifResultLevel. " 199 "Is the switch not fully covered?"); 200 } 201 202 static json::Object 203 createThreadFlowLocation(json::Object &&Location, 204 const ThreadFlowImportance &Importance) { 205 return json::Object{{"location", std::move(Location)}, 206 {"importance", importanceToStr(Importance)}}; 207 } 208 /// @} 209 210 json::Object 211 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { 212 assert(R.isValid() && 213 "Cannot create a physicalLocation from invalid SourceRange!"); 214 assert(R.isCharRange() && 215 "Cannot create a physicalLocation from a token range!"); 216 FullSourceLoc Start{R.getBegin(), SourceMgr}; 217 OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef(); 218 assert(FE && "Diagnostic does not exist within a valid file!"); 219 220 const std::string &FileURI = fileNameToURI(getFileName(*FE)); 221 auto I = CurrentArtifacts.find(FileURI); 222 223 if (I == CurrentArtifacts.end()) { 224 uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); 225 const SarifArtifactLocation &Location = 226 SarifArtifactLocation::create(FileURI).setIndex(Idx); 227 const SarifArtifact &Artifact = SarifArtifact::create(Location) 228 .setRoles({"resultFile"}) 229 .setLength(FE->getSize()) 230 .setMimeType("text/plain"); 231 auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); 232 // If inserted, ensure the original iterator points to the newly inserted 233 // element, so it can be used downstream. 234 if (StatusIter.second) 235 I = StatusIter.first; 236 } 237 assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); 238 const SarifArtifactLocation &Location = I->second.Location; 239 json::Object ArtifactLocationObject{{"uri", Location.URI}}; 240 if (Location.Index.has_value()) 241 ArtifactLocationObject["index"] = *Location.Index; 242 return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)}, 243 {"region", createTextRegion(SourceMgr, R)}}}; 244 } 245 246 json::Object &SarifDocumentWriter::getCurrentTool() { 247 assert(!Closed && "SARIF Document is closed. " 248 "Need to call createRun() before using getcurrentTool!"); 249 250 // Since Closed = false here, expect there to be at least 1 Run, anything 251 // else is an invalid state. 252 assert(!Runs.empty() && "There are no runs associated with the document!"); 253 254 return *Runs.back().getAsObject()->get("tool")->getAsObject(); 255 } 256 257 void SarifDocumentWriter::reset() { 258 CurrentRules.clear(); 259 CurrentArtifacts.clear(); 260 } 261 262 void SarifDocumentWriter::endRun() { 263 // Exit early if trying to close a closed Document. 264 if (Closed) { 265 reset(); 266 return; 267 } 268 269 // Since Closed = false here, expect there to be at least 1 Run, anything 270 // else is an invalid state. 271 assert(!Runs.empty() && "There are no runs associated with the document!"); 272 273 // Flush all the rules. 274 json::Object &Tool = getCurrentTool(); 275 json::Array Rules; 276 for (const SarifRule &R : CurrentRules) { 277 json::Object Config{ 278 {"enabled", R.DefaultConfiguration.Enabled}, 279 {"level", resultLevelToStr(R.DefaultConfiguration.Level)}, 280 {"rank", R.DefaultConfiguration.Rank}}; 281 json::Object Rule{ 282 {"name", R.Name}, 283 {"id", R.Id}, 284 {"fullDescription", json::Object{{"text", R.Description}}}, 285 {"defaultConfiguration", std::move(Config)}}; 286 if (!R.HelpURI.empty()) 287 Rule["helpUri"] = R.HelpURI; 288 Rules.emplace_back(std::move(Rule)); 289 } 290 json::Object &Driver = *Tool.getObject("driver"); 291 Driver["rules"] = std::move(Rules); 292 293 // Flush all the artifacts. 294 json::Object &Run = getCurrentRun(); 295 json::Array *Artifacts = Run.getArray("artifacts"); 296 SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec; 297 for (const auto &[K, V] : CurrentArtifacts) 298 Vec.emplace_back(K, V); 299 llvm::sort(Vec, llvm::less_first()); 300 for (const auto &[_, A] : Vec) { 301 json::Object Loc{{"uri", A.Location.URI}}; 302 if (A.Location.Index.has_value()) { 303 Loc["index"] = static_cast<int64_t>(*A.Location.Index); 304 } 305 json::Object Artifact; 306 Artifact["location"] = std::move(Loc); 307 if (A.Length.has_value()) 308 Artifact["length"] = static_cast<int64_t>(*A.Length); 309 if (!A.Roles.empty()) 310 Artifact["roles"] = json::Array(A.Roles); 311 if (!A.MimeType.empty()) 312 Artifact["mimeType"] = A.MimeType; 313 if (A.Offset.has_value()) 314 Artifact["offset"] = *A.Offset; 315 Artifacts->push_back(json::Value(std::move(Artifact))); 316 } 317 318 // Clear, reset temporaries before next run. 319 reset(); 320 321 // Mark the document as closed. 322 Closed = true; 323 } 324 325 json::Array 326 SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { 327 json::Object Ret{{"locations", json::Array{}}}; 328 json::Array Locs; 329 for (const auto &ThreadFlow : ThreadFlows) { 330 json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); 331 json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); 332 Locs.emplace_back( 333 createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); 334 } 335 Ret["locations"] = std::move(Locs); 336 return json::Array{std::move(Ret)}; 337 } 338 339 json::Object 340 SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { 341 return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; 342 } 343 344 void SarifDocumentWriter::createRun(StringRef ShortToolName, 345 StringRef LongToolName, 346 StringRef ToolVersion) { 347 // Clear resources associated with a previous run. 348 endRun(); 349 350 // Signify a new run has begun. 351 Closed = false; 352 353 json::Object Tool{ 354 {"driver", 355 json::Object{{"name", ShortToolName}, 356 {"fullName", LongToolName}, 357 {"language", "en-US"}, 358 {"version", ToolVersion}, 359 {"informationUri", 360 "https://clang.llvm.org/docs/UsersManual.html"}}}}; 361 json::Object TheRun{{"tool", std::move(Tool)}, 362 {"results", {}}, 363 {"artifacts", {}}, 364 {"columnKind", "unicodeCodePoints"}}; 365 Runs.emplace_back(std::move(TheRun)); 366 } 367 368 json::Object &SarifDocumentWriter::getCurrentRun() { 369 assert(!Closed && 370 "SARIF Document is closed. " 371 "Can only getCurrentRun() if document is opened via createRun(), " 372 "create a run first"); 373 374 // Since Closed = false here, expect there to be at least 1 Run, anything 375 // else is an invalid state. 376 assert(!Runs.empty() && "There are no runs associated with the document!"); 377 return *Runs.back().getAsObject(); 378 } 379 380 size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { 381 size_t Ret = CurrentRules.size(); 382 CurrentRules.emplace_back(Rule); 383 return Ret; 384 } 385 386 void SarifDocumentWriter::appendResult(const SarifResult &Result) { 387 size_t RuleIdx = Result.RuleIdx; 388 assert(RuleIdx < CurrentRules.size() && 389 "Trying to reference a rule that doesn't exist"); 390 const SarifRule &Rule = CurrentRules[RuleIdx]; 391 assert(Rule.DefaultConfiguration.Enabled && 392 "Cannot add a result referencing a disabled Rule"); 393 json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, 394 {"ruleIndex", static_cast<int64_t>(RuleIdx)}, 395 {"ruleId", Rule.Id}}; 396 if (!Result.Locations.empty()) { 397 json::Array Locs; 398 for (auto &Range : Result.Locations) { 399 Locs.emplace_back(createLocation(createPhysicalLocation(Range))); 400 } 401 Ret["locations"] = std::move(Locs); 402 } 403 if (!Result.ThreadFlows.empty()) 404 Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; 405 406 Ret["level"] = resultLevelToStr( 407 Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level)); 408 409 json::Object &Run = getCurrentRun(); 410 json::Array *Results = Run.getArray("results"); 411 Results->emplace_back(std::move(Ret)); 412 } 413 414 json::Object SarifDocumentWriter::createDocument() { 415 // Flush all temporaries to their destinations if needed. 416 endRun(); 417 418 json::Object Doc{ 419 {"$schema", SchemaURI}, 420 {"version", SchemaVersion}, 421 }; 422 if (!Runs.empty()) 423 Doc["runs"] = json::Array(Runs); 424 return Doc; 425 } 426