1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the declaration of the SARIFDocumentWriter class, and 11 /// associated builders such as: 12 /// - \ref SarifArtifact 13 /// - \ref SarifArtifactLocation 14 /// - \ref SarifRule 15 /// - \ref SarifResult 16 //===----------------------------------------------------------------------===// 17 #include "clang/Basic/Sarif.h" 18 #include "clang/Basic/SourceLocation.h" 19 #include "clang/Basic/SourceManager.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Support/ConvertUTF.h" 24 #include "llvm/Support/JSON.h" 25 #include "llvm/Support/Path.h" 26 27 #include <optional> 28 #include <string> 29 #include <utility> 30 31 using namespace clang; 32 using namespace llvm; 33 34 using clang::detail::SarifArtifact; 35 using clang::detail::SarifArtifactLocation; 36 37 static StringRef getFileName(FileEntryRef FE) { 38 StringRef Filename = FE.getFileEntry().tryGetRealPathName(); 39 if (Filename.empty()) 40 Filename = FE.getName(); 41 return Filename; 42 } 43 /// \name URI 44 /// @{ 45 46 /// \internal 47 /// \brief 48 /// Return the RFC3986 encoding of the input character. 49 /// 50 /// \param C Character to encode to RFC3986. 51 /// 52 /// \return The RFC3986 representation of \c C. 53 static std::string percentEncodeURICharacter(char C) { 54 // RFC 3986 claims alpha, numeric, and this handful of 55 // characters are not reserved for the path component and 56 // should be written out directly. Otherwise, percent 57 // encode the character and write that out instead of the 58 // reserved character. 59 if (llvm::isAlnum(C) || StringRef("-._~:@!$&'()*+,;=").contains(C)) 60 return std::string(&C, 1); 61 return "%" + llvm::toHex(StringRef(&C, 1)); 62 } 63 64 /// \internal 65 /// \brief Return a URI representing the given file name. 66 /// 67 /// \param Filename The filename to be represented as URI. 68 /// 69 /// \return RFC3986 URI representing the input file name. 70 static std::string fileNameToURI(StringRef Filename) { 71 SmallString<32> Ret = StringRef("file://"); 72 73 // Get the root name to see if it has a URI authority. 74 StringRef Root = sys::path::root_name(Filename); 75 if (Root.starts_with("//")) { 76 // There is an authority, so add it to the URI. 77 Ret += Root.drop_front(2).str(); 78 } else if (!Root.empty()) { 79 // There is no authority, so end the component and add the root to the URI. 80 Ret += Twine("/" + Root).str(); 81 } 82 83 auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); 84 assert(Iter != End && "Expected there to be a non-root path component."); 85 // Add the rest of the path components, encoding any reserved characters; 86 // we skip past the first path component, as it was handled it above. 87 for (StringRef Component : llvm::make_range(++Iter, End)) { 88 // For reasons unknown to me, we may get a backslash with Windows native 89 // paths for the initial backslash following the drive component, which 90 // we need to ignore as a URI path part. 91 if (Component == "\\") 92 continue; 93 94 // Add the separator between the previous path part and the one being 95 // currently processed. 96 Ret += "/"; 97 98 // URI encode the part. 99 for (char C : Component) { 100 Ret += percentEncodeURICharacter(C); 101 } 102 } 103 104 return std::string(Ret); 105 } 106 /// @} 107 108 /// \brief Calculate the column position expressed in the number of UTF-8 code 109 /// points from column start to the source location 110 /// 111 /// \param Loc The source location whose column needs to be calculated. 112 /// \param TokenLen Optional hint for when the token is multiple bytes long. 113 /// 114 /// \return The column number as a UTF-8 aware byte offset from column start to 115 /// the effective source location. 116 static unsigned int adjustColumnPos(FullSourceLoc Loc, 117 unsigned int TokenLen = 0) { 118 assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); 119 120 FileIDAndOffset LocInfo = Loc.getDecomposedExpansionLoc(); 121 std::optional<MemoryBufferRef> Buf = 122 Loc.getManager().getBufferOrNone(LocInfo.first); 123 assert(Buf && "got an invalid buffer for the location's file"); 124 assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && 125 "token extends past end of buffer?"); 126 127 // Adjust the offset to be the start of the line, since we'll be counting 128 // Unicode characters from there until our column offset. 129 unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); 130 unsigned int Ret = 1; 131 while (Off < (LocInfo.second + TokenLen)) { 132 Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); 133 Ret++; 134 } 135 136 return Ret; 137 } 138 139 /// \name SARIF Utilities 140 /// @{ 141 142 /// \internal 143 static json::Object createMessage(StringRef Text) { 144 return json::Object{{"text", Text.str()}}; 145 } 146 147 /// \internal 148 /// \pre CharSourceRange must be a token range 149 static json::Object createTextRegion(const SourceManager &SM, 150 const CharSourceRange &R) { 151 FullSourceLoc BeginCharLoc{R.getBegin(), SM}; 152 FullSourceLoc EndCharLoc{R.getEnd(), SM}; 153 json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()}, 154 {"startColumn", adjustColumnPos(BeginCharLoc)}}; 155 156 if (BeginCharLoc == EndCharLoc) { 157 Region["endColumn"] = adjustColumnPos(BeginCharLoc); 158 } else { 159 Region["endLine"] = EndCharLoc.getExpansionLineNumber(); 160 Region["endColumn"] = adjustColumnPos(EndCharLoc); 161 } 162 return Region; 163 } 164 165 static json::Object createLocation(json::Object &&PhysicalLocation, 166 StringRef Message = "") { 167 json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; 168 if (!Message.empty()) 169 Ret.insert({"message", createMessage(Message)}); 170 return Ret; 171 } 172 173 static StringRef importanceToStr(ThreadFlowImportance I) { 174 switch (I) { 175 case ThreadFlowImportance::Important: 176 return "important"; 177 case ThreadFlowImportance::Essential: 178 return "essential"; 179 case ThreadFlowImportance::Unimportant: 180 return "unimportant"; 181 } 182 llvm_unreachable("Fully covered switch is not so fully covered"); 183 } 184 185 static StringRef resultLevelToStr(SarifResultLevel R) { 186 switch (R) { 187 case SarifResultLevel::None: 188 return "none"; 189 case SarifResultLevel::Note: 190 return "note"; 191 case SarifResultLevel::Warning: 192 return "warning"; 193 case SarifResultLevel::Error: 194 return "error"; 195 } 196 llvm_unreachable("Potentially un-handled SarifResultLevel. " 197 "Is the switch not fully covered?"); 198 } 199 200 static json::Object 201 createThreadFlowLocation(json::Object &&Location, 202 const ThreadFlowImportance &Importance) { 203 return json::Object{{"location", std::move(Location)}, 204 {"importance", importanceToStr(Importance)}}; 205 } 206 /// @} 207 208 json::Object 209 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { 210 assert(R.isValid() && 211 "Cannot create a physicalLocation from invalid SourceRange!"); 212 assert(R.isCharRange() && 213 "Cannot create a physicalLocation from a token range!"); 214 FullSourceLoc Start{R.getBegin(), SourceMgr}; 215 OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef(); 216 assert(FE && "Diagnostic does not exist within a valid file!"); 217 218 const std::string &FileURI = fileNameToURI(getFileName(*FE)); 219 auto I = CurrentArtifacts.find(FileURI); 220 221 if (I == CurrentArtifacts.end()) { 222 uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); 223 const SarifArtifactLocation &Location = 224 SarifArtifactLocation::create(FileURI).setIndex(Idx); 225 const SarifArtifact &Artifact = SarifArtifact::create(Location) 226 .setRoles({"resultFile"}) 227 .setLength(FE->getSize()) 228 .setMimeType("text/plain"); 229 auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); 230 // If inserted, ensure the original iterator points to the newly inserted 231 // element, so it can be used downstream. 232 if (StatusIter.second) 233 I = StatusIter.first; 234 } 235 assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); 236 const SarifArtifactLocation &Location = I->second.Location; 237 json::Object ArtifactLocationObject{{"uri", Location.URI}}; 238 if (Location.Index.has_value()) 239 ArtifactLocationObject["index"] = *Location.Index; 240 return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)}, 241 {"region", createTextRegion(SourceMgr, R)}}}; 242 } 243 244 json::Object &SarifDocumentWriter::getCurrentTool() { 245 assert(!Closed && "SARIF Document is closed. " 246 "Need to call createRun() before using getcurrentTool!"); 247 248 // Since Closed = false here, expect there to be at least 1 Run, anything 249 // else is an invalid state. 250 assert(!Runs.empty() && "There are no runs associated with the document!"); 251 252 return *Runs.back().getAsObject()->get("tool")->getAsObject(); 253 } 254 255 void SarifDocumentWriter::reset() { 256 CurrentRules.clear(); 257 CurrentArtifacts.clear(); 258 } 259 260 void SarifDocumentWriter::endRun() { 261 // Exit early if trying to close a closed Document. 262 if (Closed) { 263 reset(); 264 return; 265 } 266 267 // Since Closed = false here, expect there to be at least 1 Run, anything 268 // else is an invalid state. 269 assert(!Runs.empty() && "There are no runs associated with the document!"); 270 271 // Flush all the rules. 272 json::Object &Tool = getCurrentTool(); 273 json::Array Rules; 274 for (const SarifRule &R : CurrentRules) { 275 json::Object Config{ 276 {"enabled", R.DefaultConfiguration.Enabled}, 277 {"level", resultLevelToStr(R.DefaultConfiguration.Level)}, 278 {"rank", R.DefaultConfiguration.Rank}}; 279 json::Object Rule{ 280 {"name", R.Name}, 281 {"id", R.Id}, 282 {"fullDescription", json::Object{{"text", R.Description}}}, 283 {"defaultConfiguration", std::move(Config)}}; 284 if (!R.HelpURI.empty()) 285 Rule["helpUri"] = R.HelpURI; 286 Rules.emplace_back(std::move(Rule)); 287 } 288 json::Object &Driver = *Tool.getObject("driver"); 289 Driver["rules"] = std::move(Rules); 290 291 // Flush all the artifacts. 292 json::Object &Run = getCurrentRun(); 293 json::Array *Artifacts = Run.getArray("artifacts"); 294 SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec; 295 for (const auto &[K, V] : CurrentArtifacts) 296 Vec.emplace_back(K, V); 297 llvm::sort(Vec, llvm::less_first()); 298 for (const auto &[_, A] : Vec) { 299 json::Object Loc{{"uri", A.Location.URI}}; 300 if (A.Location.Index.has_value()) { 301 Loc["index"] = static_cast<int64_t>(*A.Location.Index); 302 } 303 json::Object Artifact; 304 Artifact["location"] = std::move(Loc); 305 if (A.Length.has_value()) 306 Artifact["length"] = static_cast<int64_t>(*A.Length); 307 if (!A.Roles.empty()) 308 Artifact["roles"] = json::Array(A.Roles); 309 if (!A.MimeType.empty()) 310 Artifact["mimeType"] = A.MimeType; 311 if (A.Offset.has_value()) 312 Artifact["offset"] = *A.Offset; 313 Artifacts->push_back(json::Value(std::move(Artifact))); 314 } 315 316 // Clear, reset temporaries before next run. 317 reset(); 318 319 // Mark the document as closed. 320 Closed = true; 321 } 322 323 json::Array 324 SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { 325 json::Object Ret{{"locations", json::Array{}}}; 326 json::Array Locs; 327 for (const auto &ThreadFlow : ThreadFlows) { 328 json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); 329 json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); 330 Locs.emplace_back( 331 createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); 332 } 333 Ret["locations"] = std::move(Locs); 334 return json::Array{std::move(Ret)}; 335 } 336 337 json::Object 338 SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { 339 return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; 340 } 341 342 void SarifDocumentWriter::createRun(StringRef ShortToolName, 343 StringRef LongToolName, 344 StringRef ToolVersion) { 345 // Clear resources associated with a previous run. 346 endRun(); 347 348 // Signify a new run has begun. 349 Closed = false; 350 351 json::Object Tool{ 352 {"driver", 353 json::Object{{"name", ShortToolName}, 354 {"fullName", LongToolName}, 355 {"language", "en-US"}, 356 {"version", ToolVersion}, 357 {"informationUri", 358 "https://clang.llvm.org/docs/UsersManual.html"}}}}; 359 json::Object TheRun{{"tool", std::move(Tool)}, 360 {"results", {}}, 361 {"artifacts", {}}, 362 {"columnKind", "unicodeCodePoints"}}; 363 Runs.emplace_back(std::move(TheRun)); 364 } 365 366 json::Object &SarifDocumentWriter::getCurrentRun() { 367 assert(!Closed && 368 "SARIF Document is closed. " 369 "Can only getCurrentRun() if document is opened via createRun(), " 370 "create a run first"); 371 372 // Since Closed = false here, expect there to be at least 1 Run, anything 373 // else is an invalid state. 374 assert(!Runs.empty() && "There are no runs associated with the document!"); 375 return *Runs.back().getAsObject(); 376 } 377 378 size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { 379 size_t Ret = CurrentRules.size(); 380 CurrentRules.emplace_back(Rule); 381 return Ret; 382 } 383 384 void SarifDocumentWriter::appendResult(const SarifResult &Result) { 385 size_t RuleIdx = Result.RuleIdx; 386 assert(RuleIdx < CurrentRules.size() && 387 "Trying to reference a rule that doesn't exist"); 388 const SarifRule &Rule = CurrentRules[RuleIdx]; 389 assert(Rule.DefaultConfiguration.Enabled && 390 "Cannot add a result referencing a disabled Rule"); 391 json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, 392 {"ruleIndex", static_cast<int64_t>(RuleIdx)}, 393 {"ruleId", Rule.Id}}; 394 if (!Result.Locations.empty()) { 395 json::Array Locs; 396 for (auto &Range : Result.Locations) { 397 Locs.emplace_back(createLocation(createPhysicalLocation(Range))); 398 } 399 Ret["locations"] = std::move(Locs); 400 } 401 if (!Result.ThreadFlows.empty()) 402 Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; 403 404 Ret["level"] = resultLevelToStr( 405 Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level)); 406 407 json::Object &Run = getCurrentRun(); 408 json::Array *Results = Run.getArray("results"); 409 Results->emplace_back(std::move(Ret)); 410 } 411 412 json::Object SarifDocumentWriter::createDocument() { 413 // Flush all temporaries to their destinations if needed. 414 endRun(); 415 416 json::Object Doc{ 417 {"$schema", SchemaURI}, 418 {"version", SchemaVersion}, 419 }; 420 if (!Runs.empty()) 421 Doc["runs"] = json::Array(Runs); 422 return Doc; 423 } 424