1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the declaration of the SARIFDocumentWriter class, and 11 /// associated builders such as: 12 /// - \ref SarifArtifact 13 /// - \ref SarifArtifactLocation 14 /// - \ref SarifRule 15 /// - \ref SarifResult 16 //===----------------------------------------------------------------------===// 17 #include "clang/Basic/Sarif.h" 18 #include "clang/Basic/SourceLocation.h" 19 #include "clang/Basic/SourceManager.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/StringMap.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/ConvertUTF.h" 25 #include "llvm/Support/JSON.h" 26 #include "llvm/Support/Path.h" 27 28 #include <optional> 29 #include <string> 30 #include <utility> 31 32 using namespace clang; 33 using namespace llvm; 34 35 using clang::detail::SarifArtifact; 36 using clang::detail::SarifArtifactLocation; 37 38 static StringRef getFileName(const FileEntry &FE) { 39 StringRef Filename = FE.tryGetRealPathName(); 40 if (Filename.empty()) 41 Filename = FE.getName(); 42 return Filename; 43 } 44 /// \name URI 45 /// @{ 46 47 /// \internal 48 /// \brief 49 /// Return the RFC3986 encoding of the input character. 50 /// 51 /// \param C Character to encode to RFC3986. 52 /// 53 /// \return The RFC3986 representation of \c C. 54 static std::string percentEncodeURICharacter(char C) { 55 // RFC 3986 claims alpha, numeric, and this handful of 56 // characters are not reserved for the path component and 57 // should be written out directly. Otherwise, percent 58 // encode the character and write that out instead of the 59 // reserved character. 60 if (llvm::isAlnum(C) || 61 StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) 62 return std::string(&C, 1); 63 return "%" + llvm::toHex(StringRef(&C, 1)); 64 } 65 66 /// \internal 67 /// \brief Return a URI representing the given file name. 68 /// 69 /// \param Filename The filename to be represented as URI. 70 /// 71 /// \return RFC3986 URI representing the input file name. 72 static std::string fileNameToURI(StringRef Filename) { 73 SmallString<32> Ret = StringRef("file://"); 74 75 // Get the root name to see if it has a URI authority. 76 StringRef Root = sys::path::root_name(Filename); 77 if (Root.startswith("//")) { 78 // There is an authority, so add it to the URI. 79 Ret += Root.drop_front(2).str(); 80 } else if (!Root.empty()) { 81 // There is no authority, so end the component and add the root to the URI. 82 Ret += Twine("/" + Root).str(); 83 } 84 85 auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); 86 assert(Iter != End && "Expected there to be a non-root path component."); 87 // Add the rest of the path components, encoding any reserved characters; 88 // we skip past the first path component, as it was handled it above. 89 std::for_each(++Iter, End, [&Ret](StringRef Component) { 90 // For reasons unknown to me, we may get a backslash with Windows native 91 // paths for the initial backslash following the drive component, which 92 // we need to ignore as a URI path part. 93 if (Component == "\\") 94 return; 95 96 // Add the separator between the previous path part and the one being 97 // currently processed. 98 Ret += "/"; 99 100 // URI encode the part. 101 for (char C : Component) { 102 Ret += percentEncodeURICharacter(C); 103 } 104 }); 105 106 return std::string(Ret); 107 } 108 /// @} 109 110 /// \brief Calculate the column position expressed in the number of UTF-8 code 111 /// points from column start to the source location 112 /// 113 /// \param Loc The source location whose column needs to be calculated. 114 /// \param TokenLen Optional hint for when the token is multiple bytes long. 115 /// 116 /// \return The column number as a UTF-8 aware byte offset from column start to 117 /// the effective source location. 118 static unsigned int adjustColumnPos(FullSourceLoc Loc, 119 unsigned int TokenLen = 0) { 120 assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); 121 122 std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc(); 123 std::optional<MemoryBufferRef> Buf = 124 Loc.getManager().getBufferOrNone(LocInfo.first); 125 assert(Buf && "got an invalid buffer for the location's file"); 126 assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && 127 "token extends past end of buffer?"); 128 129 // Adjust the offset to be the start of the line, since we'll be counting 130 // Unicode characters from there until our column offset. 131 unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); 132 unsigned int Ret = 1; 133 while (Off < (LocInfo.second + TokenLen)) { 134 Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); 135 Ret++; 136 } 137 138 return Ret; 139 } 140 141 /// \name SARIF Utilities 142 /// @{ 143 144 /// \internal 145 json::Object createMessage(StringRef Text) { 146 return json::Object{{"text", Text.str()}}; 147 } 148 149 /// \internal 150 /// \pre CharSourceRange must be a token range 151 static json::Object createTextRegion(const SourceManager &SM, 152 const CharSourceRange &R) { 153 FullSourceLoc BeginCharLoc{R.getBegin(), SM}; 154 FullSourceLoc EndCharLoc{R.getEnd(), SM}; 155 json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()}, 156 {"startColumn", adjustColumnPos(BeginCharLoc)}}; 157 158 if (BeginCharLoc == EndCharLoc) { 159 Region["endColumn"] = adjustColumnPos(BeginCharLoc); 160 } else { 161 Region["endLine"] = EndCharLoc.getExpansionLineNumber(); 162 Region["endColumn"] = adjustColumnPos(EndCharLoc); 163 } 164 return Region; 165 } 166 167 static json::Object createLocation(json::Object &&PhysicalLocation, 168 StringRef Message = "") { 169 json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; 170 if (!Message.empty()) 171 Ret.insert({"message", createMessage(Message)}); 172 return Ret; 173 } 174 175 static StringRef importanceToStr(ThreadFlowImportance I) { 176 switch (I) { 177 case ThreadFlowImportance::Important: 178 return "important"; 179 case ThreadFlowImportance::Essential: 180 return "essential"; 181 case ThreadFlowImportance::Unimportant: 182 return "unimportant"; 183 } 184 llvm_unreachable("Fully covered switch is not so fully covered"); 185 } 186 187 static StringRef resultLevelToStr(SarifResultLevel R) { 188 switch (R) { 189 case SarifResultLevel::None: 190 return "none"; 191 case SarifResultLevel::Note: 192 return "note"; 193 case SarifResultLevel::Warning: 194 return "warning"; 195 case SarifResultLevel::Error: 196 return "error"; 197 } 198 llvm_unreachable("Potentially un-handled SarifResultLevel. " 199 "Is the switch not fully covered?"); 200 } 201 202 static json::Object 203 createThreadFlowLocation(json::Object &&Location, 204 const ThreadFlowImportance &Importance) { 205 return json::Object{{"location", std::move(Location)}, 206 {"importance", importanceToStr(Importance)}}; 207 } 208 /// @} 209 210 json::Object 211 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { 212 assert(R.isValid() && 213 "Cannot create a physicalLocation from invalid SourceRange!"); 214 assert(R.isCharRange() && 215 "Cannot create a physicalLocation from a token range!"); 216 FullSourceLoc Start{R.getBegin(), SourceMgr}; 217 const FileEntry *FE = Start.getExpansionLoc().getFileEntry(); 218 assert(FE != nullptr && "Diagnostic does not exist within a valid file!"); 219 220 const std::string &FileURI = fileNameToURI(getFileName(*FE)); 221 auto I = CurrentArtifacts.find(FileURI); 222 223 if (I == CurrentArtifacts.end()) { 224 uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); 225 const SarifArtifactLocation &Location = 226 SarifArtifactLocation::create(FileURI).setIndex(Idx); 227 const SarifArtifact &Artifact = SarifArtifact::create(Location) 228 .setRoles({"resultFile"}) 229 .setLength(FE->getSize()) 230 .setMimeType("text/plain"); 231 auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); 232 // If inserted, ensure the original iterator points to the newly inserted 233 // element, so it can be used downstream. 234 if (StatusIter.second) 235 I = StatusIter.first; 236 } 237 assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); 238 const SarifArtifactLocation &Location = I->second.Location; 239 json::Object ArtifactLocationObject{{"uri", Location.URI}}; 240 if (Location.Index.has_value()) 241 ArtifactLocationObject["index"] = *Location.Index; 242 return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)}, 243 {"region", createTextRegion(SourceMgr, R)}}}; 244 } 245 246 json::Object &SarifDocumentWriter::getCurrentTool() { 247 assert(!Closed && "SARIF Document is closed. " 248 "Need to call createRun() before using getcurrentTool!"); 249 250 // Since Closed = false here, expect there to be at least 1 Run, anything 251 // else is an invalid state. 252 assert(!Runs.empty() && "There are no runs associated with the document!"); 253 254 return *Runs.back().getAsObject()->get("tool")->getAsObject(); 255 } 256 257 void SarifDocumentWriter::reset() { 258 CurrentRules.clear(); 259 CurrentArtifacts.clear(); 260 } 261 262 void SarifDocumentWriter::endRun() { 263 // Exit early if trying to close a closed Document. 264 if (Closed) { 265 reset(); 266 return; 267 } 268 269 // Since Closed = false here, expect there to be at least 1 Run, anything 270 // else is an invalid state. 271 assert(!Runs.empty() && "There are no runs associated with the document!"); 272 273 // Flush all the rules. 274 json::Object &Tool = getCurrentTool(); 275 json::Array Rules; 276 for (const SarifRule &R : CurrentRules) { 277 json::Object Config{ 278 {"enabled", R.DefaultConfiguration.Enabled}, 279 {"level", resultLevelToStr(R.DefaultConfiguration.Level)}, 280 {"rank", R.DefaultConfiguration.Rank}}; 281 json::Object Rule{ 282 {"name", R.Name}, 283 {"id", R.Id}, 284 {"fullDescription", json::Object{{"text", R.Description}}}, 285 {"defaultConfiguration", std::move(Config)}}; 286 if (!R.HelpURI.empty()) 287 Rule["helpUri"] = R.HelpURI; 288 Rules.emplace_back(std::move(Rule)); 289 } 290 json::Object &Driver = *Tool.getObject("driver"); 291 Driver["rules"] = std::move(Rules); 292 293 // Flush all the artifacts. 294 json::Object &Run = getCurrentRun(); 295 json::Array *Artifacts = Run.getArray("artifacts"); 296 for (const auto &Pair : CurrentArtifacts) { 297 const SarifArtifact &A = Pair.getValue(); 298 json::Object Loc{{"uri", A.Location.URI}}; 299 if (A.Location.Index.has_value()) { 300 Loc["index"] = static_cast<int64_t>(*A.Location.Index); 301 } 302 json::Object Artifact; 303 Artifact["location"] = std::move(Loc); 304 if (A.Length.has_value()) 305 Artifact["length"] = static_cast<int64_t>(*A.Length); 306 if (!A.Roles.empty()) 307 Artifact["roles"] = json::Array(A.Roles); 308 if (!A.MimeType.empty()) 309 Artifact["mimeType"] = A.MimeType; 310 if (A.Offset.has_value()) 311 Artifact["offset"] = *A.Offset; 312 Artifacts->push_back(json::Value(std::move(Artifact))); 313 } 314 315 // Clear, reset temporaries before next run. 316 reset(); 317 318 // Mark the document as closed. 319 Closed = true; 320 } 321 322 json::Array 323 SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { 324 json::Object Ret{{"locations", json::Array{}}}; 325 json::Array Locs; 326 for (const auto &ThreadFlow : ThreadFlows) { 327 json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); 328 json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); 329 Locs.emplace_back( 330 createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); 331 } 332 Ret["locations"] = std::move(Locs); 333 return json::Array{std::move(Ret)}; 334 } 335 336 json::Object 337 SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { 338 return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; 339 } 340 341 void SarifDocumentWriter::createRun(StringRef ShortToolName, 342 StringRef LongToolName, 343 StringRef ToolVersion) { 344 // Clear resources associated with a previous run. 345 endRun(); 346 347 // Signify a new run has begun. 348 Closed = false; 349 350 json::Object Tool{ 351 {"driver", 352 json::Object{{"name", ShortToolName}, 353 {"fullName", LongToolName}, 354 {"language", "en-US"}, 355 {"version", ToolVersion}, 356 {"informationUri", 357 "https://clang.llvm.org/docs/UsersManual.html"}}}}; 358 json::Object TheRun{{"tool", std::move(Tool)}, 359 {"results", {}}, 360 {"artifacts", {}}, 361 {"columnKind", "unicodeCodePoints"}}; 362 Runs.emplace_back(std::move(TheRun)); 363 } 364 365 json::Object &SarifDocumentWriter::getCurrentRun() { 366 assert(!Closed && 367 "SARIF Document is closed. " 368 "Can only getCurrentRun() if document is opened via createRun(), " 369 "create a run first"); 370 371 // Since Closed = false here, expect there to be at least 1 Run, anything 372 // else is an invalid state. 373 assert(!Runs.empty() && "There are no runs associated with the document!"); 374 return *Runs.back().getAsObject(); 375 } 376 377 size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { 378 size_t Ret = CurrentRules.size(); 379 CurrentRules.emplace_back(Rule); 380 return Ret; 381 } 382 383 void SarifDocumentWriter::appendResult(const SarifResult &Result) { 384 size_t RuleIdx = Result.RuleIdx; 385 assert(RuleIdx < CurrentRules.size() && 386 "Trying to reference a rule that doesn't exist"); 387 const SarifRule &Rule = CurrentRules[RuleIdx]; 388 assert(Rule.DefaultConfiguration.Enabled && 389 "Cannot add a result referencing a disabled Rule"); 390 json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, 391 {"ruleIndex", static_cast<int64_t>(RuleIdx)}, 392 {"ruleId", Rule.Id}}; 393 if (!Result.Locations.empty()) { 394 json::Array Locs; 395 for (auto &Range : Result.Locations) { 396 Locs.emplace_back(createLocation(createPhysicalLocation(Range))); 397 } 398 Ret["locations"] = std::move(Locs); 399 } 400 if (!Result.ThreadFlows.empty()) 401 Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; 402 403 Ret["level"] = resultLevelToStr( 404 Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level)); 405 406 json::Object &Run = getCurrentRun(); 407 json::Array *Results = Run.getArray("results"); 408 Results->emplace_back(std::move(Ret)); 409 } 410 411 json::Object SarifDocumentWriter::createDocument() { 412 // Flush all temporaries to their destinations if needed. 413 endRun(); 414 415 json::Object Doc{ 416 {"$schema", SchemaURI}, 417 {"version", SchemaVersion}, 418 }; 419 if (!Runs.empty()) 420 Doc["runs"] = json::Array(Runs); 421 return Doc; 422 } 423