1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the declaration of the SARIFDocumentWriter class, and 11 /// associated builders such as: 12 /// - \ref SarifArtifact 13 /// - \ref SarifArtifactLocation 14 /// - \ref SarifRule 15 /// - \ref SarifResult 16 //===----------------------------------------------------------------------===// 17 #include "clang/Basic/Sarif.h" 18 #include "clang/Basic/SourceLocation.h" 19 #include "clang/Basic/SourceManager.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/StringExtras.h" 23 #include "llvm/ADT/StringMap.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/Support/ConvertUTF.h" 26 #include "llvm/Support/JSON.h" 27 #include "llvm/Support/Path.h" 28 29 #include <optional> 30 #include <string> 31 #include <utility> 32 33 using namespace clang; 34 using namespace llvm; 35 36 using clang::detail::SarifArtifact; 37 using clang::detail::SarifArtifactLocation; 38 39 static StringRef getFileName(const FileEntry &FE) { 40 StringRef Filename = FE.tryGetRealPathName(); 41 if (Filename.empty()) 42 Filename = FE.getName(); 43 return Filename; 44 } 45 /// \name URI 46 /// @{ 47 48 /// \internal 49 /// \brief 50 /// Return the RFC3986 encoding of the input character. 51 /// 52 /// \param C Character to encode to RFC3986. 53 /// 54 /// \return The RFC3986 representation of \c C. 55 static std::string percentEncodeURICharacter(char C) { 56 // RFC 3986 claims alpha, numeric, and this handful of 57 // characters are not reserved for the path component and 58 // should be written out directly. Otherwise, percent 59 // encode the character and write that out instead of the 60 // reserved character. 61 if (llvm::isAlnum(C) || 62 StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) 63 return std::string(&C, 1); 64 return "%" + llvm::toHex(StringRef(&C, 1)); 65 } 66 67 /// \internal 68 /// \brief Return a URI representing the given file name. 69 /// 70 /// \param Filename The filename to be represented as URI. 71 /// 72 /// \return RFC3986 URI representing the input file name. 73 static std::string fileNameToURI(StringRef Filename) { 74 SmallString<32> Ret = StringRef("file://"); 75 76 // Get the root name to see if it has a URI authority. 77 StringRef Root = sys::path::root_name(Filename); 78 if (Root.startswith("//")) { 79 // There is an authority, so add it to the URI. 80 Ret += Root.drop_front(2).str(); 81 } else if (!Root.empty()) { 82 // There is no authority, so end the component and add the root to the URI. 83 Ret += Twine("/" + Root).str(); 84 } 85 86 auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); 87 assert(Iter != End && "Expected there to be a non-root path component."); 88 // Add the rest of the path components, encoding any reserved characters; 89 // we skip past the first path component, as it was handled it above. 90 std::for_each(++Iter, End, [&Ret](StringRef Component) { 91 // For reasons unknown to me, we may get a backslash with Windows native 92 // paths for the initial backslash following the drive component, which 93 // we need to ignore as a URI path part. 94 if (Component == "\\") 95 return; 96 97 // Add the separator between the previous path part and the one being 98 // currently processed. 99 Ret += "/"; 100 101 // URI encode the part. 102 for (char C : Component) { 103 Ret += percentEncodeURICharacter(C); 104 } 105 }); 106 107 return std::string(Ret); 108 } 109 /// @} 110 111 /// \brief Calculate the column position expressed in the number of UTF-8 code 112 /// points from column start to the source location 113 /// 114 /// \param Loc The source location whose column needs to be calculated. 115 /// \param TokenLen Optional hint for when the token is multiple bytes long. 116 /// 117 /// \return The column number as a UTF-8 aware byte offset from column start to 118 /// the effective source location. 119 static unsigned int adjustColumnPos(FullSourceLoc Loc, 120 unsigned int TokenLen = 0) { 121 assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); 122 123 std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc(); 124 std::optional<MemoryBufferRef> Buf = 125 Loc.getManager().getBufferOrNone(LocInfo.first); 126 assert(Buf && "got an invalid buffer for the location's file"); 127 assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && 128 "token extends past end of buffer?"); 129 130 // Adjust the offset to be the start of the line, since we'll be counting 131 // Unicode characters from there until our column offset. 132 unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); 133 unsigned int Ret = 1; 134 while (Off < (LocInfo.second + TokenLen)) { 135 Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); 136 Ret++; 137 } 138 139 return Ret; 140 } 141 142 /// \name SARIF Utilities 143 /// @{ 144 145 /// \internal 146 json::Object createMessage(StringRef Text) { 147 return json::Object{{"text", Text.str()}}; 148 } 149 150 /// \internal 151 /// \pre CharSourceRange must be a token range 152 static json::Object createTextRegion(const SourceManager &SM, 153 const CharSourceRange &R) { 154 FullSourceLoc BeginCharLoc{R.getBegin(), SM}; 155 FullSourceLoc EndCharLoc{R.getEnd(), SM}; 156 json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()}, 157 {"startColumn", adjustColumnPos(BeginCharLoc)}}; 158 159 if (BeginCharLoc == EndCharLoc) { 160 Region["endColumn"] = adjustColumnPos(BeginCharLoc); 161 } else { 162 Region["endLine"] = EndCharLoc.getExpansionLineNumber(); 163 Region["endColumn"] = adjustColumnPos(EndCharLoc); 164 } 165 return Region; 166 } 167 168 static json::Object createLocation(json::Object &&PhysicalLocation, 169 StringRef Message = "") { 170 json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; 171 if (!Message.empty()) 172 Ret.insert({"message", createMessage(Message)}); 173 return Ret; 174 } 175 176 static StringRef importanceToStr(ThreadFlowImportance I) { 177 switch (I) { 178 case ThreadFlowImportance::Important: 179 return "important"; 180 case ThreadFlowImportance::Essential: 181 return "essential"; 182 case ThreadFlowImportance::Unimportant: 183 return "unimportant"; 184 } 185 llvm_unreachable("Fully covered switch is not so fully covered"); 186 } 187 188 static StringRef resultLevelToStr(SarifResultLevel R) { 189 switch (R) { 190 case SarifResultLevel::None: 191 return "none"; 192 case SarifResultLevel::Note: 193 return "note"; 194 case SarifResultLevel::Warning: 195 return "warning"; 196 case SarifResultLevel::Error: 197 return "error"; 198 } 199 llvm_unreachable("Potentially un-handled SarifResultLevel. " 200 "Is the switch not fully covered?"); 201 } 202 203 static json::Object 204 createThreadFlowLocation(json::Object &&Location, 205 const ThreadFlowImportance &Importance) { 206 return json::Object{{"location", std::move(Location)}, 207 {"importance", importanceToStr(Importance)}}; 208 } 209 /// @} 210 211 json::Object 212 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { 213 assert(R.isValid() && 214 "Cannot create a physicalLocation from invalid SourceRange!"); 215 assert(R.isCharRange() && 216 "Cannot create a physicalLocation from a token range!"); 217 FullSourceLoc Start{R.getBegin(), SourceMgr}; 218 const FileEntry *FE = Start.getExpansionLoc().getFileEntry(); 219 assert(FE != nullptr && "Diagnostic does not exist within a valid file!"); 220 221 const std::string &FileURI = fileNameToURI(getFileName(*FE)); 222 auto I = CurrentArtifacts.find(FileURI); 223 224 if (I == CurrentArtifacts.end()) { 225 uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); 226 const SarifArtifactLocation &Location = 227 SarifArtifactLocation::create(FileURI).setIndex(Idx); 228 const SarifArtifact &Artifact = SarifArtifact::create(Location) 229 .setRoles({"resultFile"}) 230 .setLength(FE->getSize()) 231 .setMimeType("text/plain"); 232 auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); 233 // If inserted, ensure the original iterator points to the newly inserted 234 // element, so it can be used downstream. 235 if (StatusIter.second) 236 I = StatusIter.first; 237 } 238 assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); 239 const SarifArtifactLocation &Location = I->second.Location; 240 json::Object ArtifactLocationObject{{"uri", Location.URI}}; 241 if (Location.Index.has_value()) 242 ArtifactLocationObject["index"] = *Location.Index; 243 return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)}, 244 {"region", createTextRegion(SourceMgr, R)}}}; 245 } 246 247 json::Object &SarifDocumentWriter::getCurrentTool() { 248 assert(!Closed && "SARIF Document is closed. " 249 "Need to call createRun() before using getcurrentTool!"); 250 251 // Since Closed = false here, expect there to be at least 1 Run, anything 252 // else is an invalid state. 253 assert(!Runs.empty() && "There are no runs associated with the document!"); 254 255 return *Runs.back().getAsObject()->get("tool")->getAsObject(); 256 } 257 258 void SarifDocumentWriter::reset() { 259 CurrentRules.clear(); 260 CurrentArtifacts.clear(); 261 } 262 263 void SarifDocumentWriter::endRun() { 264 // Exit early if trying to close a closed Document. 265 if (Closed) { 266 reset(); 267 return; 268 } 269 270 // Since Closed = false here, expect there to be at least 1 Run, anything 271 // else is an invalid state. 272 assert(!Runs.empty() && "There are no runs associated with the document!"); 273 274 // Flush all the rules. 275 json::Object &Tool = getCurrentTool(); 276 json::Array Rules; 277 for (const SarifRule &R : CurrentRules) { 278 json::Object Config{ 279 {"enabled", R.DefaultConfiguration.Enabled}, 280 {"level", resultLevelToStr(R.DefaultConfiguration.Level)}, 281 {"rank", R.DefaultConfiguration.Rank}}; 282 json::Object Rule{ 283 {"name", R.Name}, 284 {"id", R.Id}, 285 {"fullDescription", json::Object{{"text", R.Description}}}, 286 {"defaultConfiguration", std::move(Config)}}; 287 if (!R.HelpURI.empty()) 288 Rule["helpUri"] = R.HelpURI; 289 Rules.emplace_back(std::move(Rule)); 290 } 291 json::Object &Driver = *Tool.getObject("driver"); 292 Driver["rules"] = std::move(Rules); 293 294 // Flush all the artifacts. 295 json::Object &Run = getCurrentRun(); 296 json::Array *Artifacts = Run.getArray("artifacts"); 297 SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec; 298 for (const auto &[K, V] : CurrentArtifacts) 299 Vec.emplace_back(K, V); 300 llvm::sort(Vec, llvm::less_first()); 301 for (const auto &[_, A] : Vec) { 302 json::Object Loc{{"uri", A.Location.URI}}; 303 if (A.Location.Index.has_value()) { 304 Loc["index"] = static_cast<int64_t>(*A.Location.Index); 305 } 306 json::Object Artifact; 307 Artifact["location"] = std::move(Loc); 308 if (A.Length.has_value()) 309 Artifact["length"] = static_cast<int64_t>(*A.Length); 310 if (!A.Roles.empty()) 311 Artifact["roles"] = json::Array(A.Roles); 312 if (!A.MimeType.empty()) 313 Artifact["mimeType"] = A.MimeType; 314 if (A.Offset.has_value()) 315 Artifact["offset"] = *A.Offset; 316 Artifacts->push_back(json::Value(std::move(Artifact))); 317 } 318 319 // Clear, reset temporaries before next run. 320 reset(); 321 322 // Mark the document as closed. 323 Closed = true; 324 } 325 326 json::Array 327 SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { 328 json::Object Ret{{"locations", json::Array{}}}; 329 json::Array Locs; 330 for (const auto &ThreadFlow : ThreadFlows) { 331 json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); 332 json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); 333 Locs.emplace_back( 334 createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); 335 } 336 Ret["locations"] = std::move(Locs); 337 return json::Array{std::move(Ret)}; 338 } 339 340 json::Object 341 SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { 342 return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; 343 } 344 345 void SarifDocumentWriter::createRun(StringRef ShortToolName, 346 StringRef LongToolName, 347 StringRef ToolVersion) { 348 // Clear resources associated with a previous run. 349 endRun(); 350 351 // Signify a new run has begun. 352 Closed = false; 353 354 json::Object Tool{ 355 {"driver", 356 json::Object{{"name", ShortToolName}, 357 {"fullName", LongToolName}, 358 {"language", "en-US"}, 359 {"version", ToolVersion}, 360 {"informationUri", 361 "https://clang.llvm.org/docs/UsersManual.html"}}}}; 362 json::Object TheRun{{"tool", std::move(Tool)}, 363 {"results", {}}, 364 {"artifacts", {}}, 365 {"columnKind", "unicodeCodePoints"}}; 366 Runs.emplace_back(std::move(TheRun)); 367 } 368 369 json::Object &SarifDocumentWriter::getCurrentRun() { 370 assert(!Closed && 371 "SARIF Document is closed. " 372 "Can only getCurrentRun() if document is opened via createRun(), " 373 "create a run first"); 374 375 // Since Closed = false here, expect there to be at least 1 Run, anything 376 // else is an invalid state. 377 assert(!Runs.empty() && "There are no runs associated with the document!"); 378 return *Runs.back().getAsObject(); 379 } 380 381 size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { 382 size_t Ret = CurrentRules.size(); 383 CurrentRules.emplace_back(Rule); 384 return Ret; 385 } 386 387 void SarifDocumentWriter::appendResult(const SarifResult &Result) { 388 size_t RuleIdx = Result.RuleIdx; 389 assert(RuleIdx < CurrentRules.size() && 390 "Trying to reference a rule that doesn't exist"); 391 const SarifRule &Rule = CurrentRules[RuleIdx]; 392 assert(Rule.DefaultConfiguration.Enabled && 393 "Cannot add a result referencing a disabled Rule"); 394 json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, 395 {"ruleIndex", static_cast<int64_t>(RuleIdx)}, 396 {"ruleId", Rule.Id}}; 397 if (!Result.Locations.empty()) { 398 json::Array Locs; 399 for (auto &Range : Result.Locations) { 400 Locs.emplace_back(createLocation(createPhysicalLocation(Range))); 401 } 402 Ret["locations"] = std::move(Locs); 403 } 404 if (!Result.ThreadFlows.empty()) 405 Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; 406 407 Ret["level"] = resultLevelToStr( 408 Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level)); 409 410 json::Object &Run = getCurrentRun(); 411 json::Array *Results = Run.getArray("results"); 412 Results->emplace_back(std::move(Ret)); 413 } 414 415 json::Object SarifDocumentWriter::createDocument() { 416 // Flush all temporaries to their destinations if needed. 417 endRun(); 418 419 json::Object Doc{ 420 {"$schema", SchemaURI}, 421 {"version", SchemaVersion}, 422 }; 423 if (!Runs.empty()) 424 Doc["runs"] = json::Array(Runs); 425 return Doc; 426 } 427