1 //== clang/Basic/Sarif.h - SARIF Diagnostics Object Model -------*- C++ -*--==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// Defines clang::SarifDocumentWriter, clang::SarifRule, clang::SarifResult. 10 /// 11 /// The document built can be accessed as a JSON Object. 12 /// Several value semantic types are also introduced which represent properties 13 /// of the SARIF standard, such as 'artifact', 'result', 'rule'. 14 /// 15 /// A SARIF (Static Analysis Results Interchange Format) document is JSON 16 /// document that describes in detail the results of running static analysis 17 /// tools on a project. Each (non-trivial) document consists of at least one 18 /// "run", which are themselves composed of details such as: 19 /// * Tool: The tool that was run 20 /// * Rules: The rules applied during the tool run, represented by 21 /// \c reportingDescriptor objects in SARIF 22 /// * Results: The matches for the rules applied against the project(s) being 23 /// evaluated, represented by \c result objects in SARIF 24 /// 25 /// Reference: 26 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html">The SARIF standard</a> 27 /// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">SARIF<pre>reportingDescriptor</pre></a> 28 /// 3. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a> 29 //===----------------------------------------------------------------------===// 30 31 #ifndef LLVM_CLANG_BASIC_SARIF_H 32 #define LLVM_CLANG_BASIC_SARIF_H 33 34 #include "clang/Basic/SourceLocation.h" 35 #include "clang/Basic/Version.h" 36 #include "llvm/ADT/ArrayRef.h" 37 #include "llvm/ADT/SmallVector.h" 38 #include "llvm/ADT/StringMap.h" 39 #include "llvm/ADT/StringRef.h" 40 #include "llvm/Support/JSON.h" 41 #include <cassert> 42 #include <cstddef> 43 #include <cstdint> 44 #include <initializer_list> 45 #include <optional> 46 #include <string> 47 48 namespace clang { 49 50 class SarifDocumentWriter; 51 class SourceManager; 52 53 namespace detail { 54 55 /// \internal 56 /// An artifact location is SARIF's way of describing the complete location 57 /// of an artifact encountered during analysis. The \c artifactLocation object 58 /// typically consists of a URI, and/or an index to reference the artifact it 59 /// locates. 60 /// 61 /// This builder makes an additional assumption: that every artifact encountered 62 /// by \c clang will be a physical, top-level artifact. Which is why the static 63 /// creation method \ref SarifArtifactLocation::create takes a mandatory URI 64 /// parameter. The official standard states that either a \c URI or \c Index 65 /// must be available in the object, \c clang picks the \c URI as a reasonable 66 /// default, because it intends to deal in physical artifacts for now. 67 /// 68 /// Reference: 69 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317427">artifactLocation object</a> 70 /// 2. \ref SarifArtifact 71 class SarifArtifactLocation { 72 private: 73 friend class clang::SarifDocumentWriter; 74 75 std::optional<uint32_t> Index; 76 std::string URI; 77 78 SarifArtifactLocation() = delete; SarifArtifactLocation(const std::string & URI)79 explicit SarifArtifactLocation(const std::string &URI) : URI(URI) {} 80 81 public: create(llvm::StringRef URI)82 static SarifArtifactLocation create(llvm::StringRef URI) { 83 return SarifArtifactLocation{URI.str()}; 84 } 85 setIndex(uint32_t Idx)86 SarifArtifactLocation setIndex(uint32_t Idx) { 87 Index = Idx; 88 return *this; 89 } 90 }; 91 92 /// \internal 93 /// An artifact in SARIF is any object (a sequence of bytes) addressable by 94 /// a URI (RFC 3986). The most common type of artifact for clang's use-case 95 /// would be source files. SARIF's artifact object is described in detail in 96 /// section 3.24. 97 // 98 /// Since every clang artifact MUST have a location (there being no nested 99 /// artifacts), the creation method \ref SarifArtifact::create requires a 100 /// \ref SarifArtifactLocation object. 101 /// 102 /// Reference: 103 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317611">artifact object</a> 104 class SarifArtifact { 105 private: 106 friend class clang::SarifDocumentWriter; 107 108 std::optional<uint32_t> Offset; 109 std::optional<size_t> Length; 110 std::string MimeType; 111 SarifArtifactLocation Location; 112 llvm::SmallVector<std::string, 4> Roles; 113 114 SarifArtifact() = delete; 115 SarifArtifact(const SarifArtifactLocation & Loc)116 explicit SarifArtifact(const SarifArtifactLocation &Loc) : Location(Loc) {} 117 118 public: create(const SarifArtifactLocation & Loc)119 static SarifArtifact create(const SarifArtifactLocation &Loc) { 120 return SarifArtifact{Loc}; 121 } 122 setOffset(uint32_t ArtifactOffset)123 SarifArtifact setOffset(uint32_t ArtifactOffset) { 124 Offset = ArtifactOffset; 125 return *this; 126 } 127 setLength(size_t NumBytes)128 SarifArtifact setLength(size_t NumBytes) { 129 Length = NumBytes; 130 return *this; 131 } 132 setRoles(std::initializer_list<llvm::StringRef> ArtifactRoles)133 SarifArtifact setRoles(std::initializer_list<llvm::StringRef> ArtifactRoles) { 134 Roles.assign(ArtifactRoles.begin(), ArtifactRoles.end()); 135 return *this; 136 } 137 setMimeType(llvm::StringRef ArtifactMimeType)138 SarifArtifact setMimeType(llvm::StringRef ArtifactMimeType) { 139 MimeType = ArtifactMimeType.str(); 140 return *this; 141 } 142 }; 143 144 } // namespace detail 145 146 enum class ThreadFlowImportance { Important, Essential, Unimportant }; 147 148 /// The level of severity associated with a \ref SarifResult. 149 /// 150 /// Of all the levels, \c None is the only one that is not associated with 151 /// a failure. 152 /// 153 /// A typical mapping for clang's DiagnosticKind to SarifResultLevel would look 154 /// like: 155 /// * \c None: \ref clang::DiagnosticsEngine::Level::Remark, \ref clang::DiagnosticsEngine::Level::Ignored 156 /// * \c Note: \ref clang::DiagnosticsEngine::Level::Note 157 /// * \c Warning: \ref clang::DiagnosticsEngine::Level::Warning 158 /// * \c Error could be generated from one of: 159 /// - \ref clang::DiagnosticsEngine::Level::Warning with \c -Werror 160 /// - \ref clang::DiagnosticsEngine::Level::Error 161 /// - \ref clang::DiagnosticsEngine::Level::Fatal when \ref clang::DiagnosticsEngine::ErrorsAsFatal is set. 162 /// 163 /// Reference: 164 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317648">level property</a> 165 enum class SarifResultLevel { None, Note, Warning, Error }; 166 167 /// A thread flow is a sequence of code locations that specify a possible path 168 /// through a single thread of execution. 169 /// A thread flow in SARIF is related to a code flow which describes 170 /// the progress of one or more programs through one or more thread flows. 171 /// 172 /// Reference: 173 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317744">threadFlow object</a> 174 /// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317740">codeFlow object</a> 175 class ThreadFlow { 176 friend class SarifDocumentWriter; 177 178 CharSourceRange Range; 179 ThreadFlowImportance Importance; 180 std::string Message; 181 182 ThreadFlow() = default; 183 184 public: create()185 static ThreadFlow create() { return {}; } 186 setRange(const CharSourceRange & ItemRange)187 ThreadFlow setRange(const CharSourceRange &ItemRange) { 188 assert(ItemRange.isCharRange() && 189 "ThreadFlows require a character granular source range!"); 190 Range = ItemRange; 191 return *this; 192 } 193 setImportance(const ThreadFlowImportance & ItemImportance)194 ThreadFlow setImportance(const ThreadFlowImportance &ItemImportance) { 195 Importance = ItemImportance; 196 return *this; 197 } 198 setMessage(llvm::StringRef ItemMessage)199 ThreadFlow setMessage(llvm::StringRef ItemMessage) { 200 Message = ItemMessage.str(); 201 return *this; 202 } 203 }; 204 205 /// A SARIF Reporting Configuration (\c reportingConfiguration) object contains 206 /// properties for a \ref SarifRule that can be configured at runtime before 207 /// analysis begins. 208 /// 209 /// Reference: 210 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317852">reportingConfiguration object</a> 211 class SarifReportingConfiguration { 212 friend class clang::SarifDocumentWriter; 213 214 bool Enabled = true; 215 SarifResultLevel Level = SarifResultLevel::Warning; 216 float Rank = -1.0f; 217 218 SarifReportingConfiguration() = default; 219 220 public: create()221 static SarifReportingConfiguration create() { return {}; }; 222 disable()223 SarifReportingConfiguration disable() { 224 Enabled = false; 225 return *this; 226 } 227 enable()228 SarifReportingConfiguration enable() { 229 Enabled = true; 230 return *this; 231 } 232 setLevel(SarifResultLevel TheLevel)233 SarifReportingConfiguration setLevel(SarifResultLevel TheLevel) { 234 Level = TheLevel; 235 return *this; 236 } 237 setRank(float TheRank)238 SarifReportingConfiguration setRank(float TheRank) { 239 assert(TheRank >= 0.0f && "Rule rank cannot be smaller than 0.0"); 240 assert(TheRank <= 100.0f && "Rule rank cannot be larger than 100.0"); 241 Rank = TheRank; 242 return *this; 243 } 244 }; 245 246 /// A SARIF rule (\c reportingDescriptor object) contains information that 247 /// describes a reporting item generated by a tool. A reporting item is 248 /// either a result of analysis or notification of a condition encountered by 249 /// the tool. Rules are arbitrary but are identifiable by a hierarchical 250 /// rule-id. 251 /// 252 /// This builder provides an interface to create SARIF \c reportingDescriptor 253 /// objects via the \ref SarifRule::create static method. 254 /// 255 /// Reference: 256 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">reportingDescriptor object</a> 257 class SarifRule { 258 friend class clang::SarifDocumentWriter; 259 260 std::string Name; 261 std::string Id; 262 std::string Description; 263 std::string HelpURI; 264 SarifReportingConfiguration DefaultConfiguration; 265 SarifRule()266 SarifRule() : DefaultConfiguration(SarifReportingConfiguration::create()) {} 267 268 public: create()269 static SarifRule create() { return {}; } 270 setName(llvm::StringRef RuleName)271 SarifRule setName(llvm::StringRef RuleName) { 272 Name = RuleName.str(); 273 return *this; 274 } 275 setRuleId(llvm::StringRef RuleId)276 SarifRule setRuleId(llvm::StringRef RuleId) { 277 Id = RuleId.str(); 278 return *this; 279 } 280 setDescription(llvm::StringRef RuleDesc)281 SarifRule setDescription(llvm::StringRef RuleDesc) { 282 Description = RuleDesc.str(); 283 return *this; 284 } 285 setHelpURI(llvm::StringRef RuleHelpURI)286 SarifRule setHelpURI(llvm::StringRef RuleHelpURI) { 287 HelpURI = RuleHelpURI.str(); 288 return *this; 289 } 290 291 SarifRule setDefaultConfiguration(const SarifReportingConfiguration & Configuration)292 setDefaultConfiguration(const SarifReportingConfiguration &Configuration) { 293 DefaultConfiguration = Configuration; 294 return *this; 295 } 296 }; 297 298 /// A SARIF result (also called a "reporting item") is a unit of output 299 /// produced when one of the tool's \c reportingDescriptor encounters a match 300 /// on the file being analysed by the tool. 301 /// 302 /// This builder provides a \ref SarifResult::create static method that can be 303 /// used to create an empty shell onto which attributes can be added using the 304 /// \c setX(...) methods. 305 /// 306 /// For example: 307 /// \code{.cpp} 308 /// SarifResult result = SarifResult::create(...) 309 /// .setRuleId(...) 310 /// .setDiagnosticMessage(...); 311 /// \endcode 312 /// 313 /// Reference: 314 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a> 315 class SarifResult { 316 friend class clang::SarifDocumentWriter; 317 318 // NOTE: 319 // This type cannot fit all possible indexes representable by JSON, but is 320 // chosen because it is the largest unsigned type that can be safely 321 // converted to an \c int64_t. 322 uint32_t RuleIdx; 323 std::string RuleId; 324 std::string DiagnosticMessage; 325 llvm::SmallVector<CharSourceRange, 8> Locations; 326 llvm::SmallVector<ThreadFlow, 8> ThreadFlows; 327 std::optional<SarifResultLevel> LevelOverride; 328 329 SarifResult() = delete; SarifResult(uint32_t RuleIdx)330 explicit SarifResult(uint32_t RuleIdx) : RuleIdx(RuleIdx) {} 331 332 public: create(uint32_t RuleIdx)333 static SarifResult create(uint32_t RuleIdx) { return SarifResult{RuleIdx}; } 334 setIndex(uint32_t Idx)335 SarifResult setIndex(uint32_t Idx) { 336 RuleIdx = Idx; 337 return *this; 338 } 339 setRuleId(llvm::StringRef Id)340 SarifResult setRuleId(llvm::StringRef Id) { 341 RuleId = Id.str(); 342 return *this; 343 } 344 setDiagnosticMessage(llvm::StringRef Message)345 SarifResult setDiagnosticMessage(llvm::StringRef Message) { 346 DiagnosticMessage = Message.str(); 347 return *this; 348 } 349 setLocations(llvm::ArrayRef<CharSourceRange> DiagLocs)350 SarifResult setLocations(llvm::ArrayRef<CharSourceRange> DiagLocs) { 351 #ifndef NDEBUG 352 for (const auto &Loc : DiagLocs) { 353 assert(Loc.isCharRange() && 354 "SARIF Results require character granular source ranges!"); 355 } 356 #endif 357 Locations.assign(DiagLocs.begin(), DiagLocs.end()); 358 return *this; 359 } setThreadFlows(llvm::ArrayRef<ThreadFlow> ThreadFlowResults)360 SarifResult setThreadFlows(llvm::ArrayRef<ThreadFlow> ThreadFlowResults) { 361 ThreadFlows.assign(ThreadFlowResults.begin(), ThreadFlowResults.end()); 362 return *this; 363 } 364 setDiagnosticLevel(const SarifResultLevel & TheLevel)365 SarifResult setDiagnosticLevel(const SarifResultLevel &TheLevel) { 366 LevelOverride = TheLevel; 367 return *this; 368 } 369 }; 370 371 /// This class handles creating a valid SARIF document given various input 372 /// attributes. However, it requires an ordering among certain method calls: 373 /// 374 /// 1. Because every SARIF document must contain at least 1 \c run, callers 375 /// must ensure that \ref SarifDocumentWriter::createRun is called before 376 /// any other methods. 377 /// 2. If SarifDocumentWriter::endRun is called, callers MUST call 378 /// SarifDocumentWriter::createRun, before invoking any of the result 379 /// aggregation methods such as SarifDocumentWriter::appendResult etc. 380 class SarifDocumentWriter { 381 private: 382 const llvm::StringRef SchemaURI{ 383 "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/" 384 "sarif-schema-2.1.0.json"}; 385 const llvm::StringRef SchemaVersion{"2.1.0"}; 386 387 /// \internal 388 /// Return a pointer to the current tool. Asserts that a run exists. 389 llvm::json::Object &getCurrentTool(); 390 391 /// \internal 392 /// Checks if there is a run associated with this document. 393 /// 394 /// \return true on success 395 bool hasRun() const; 396 397 /// \internal 398 /// Reset portions of the internal state so that the document is ready to 399 /// receive data for a new run. 400 void reset(); 401 402 /// \internal 403 /// Return a mutable reference to the current run, after asserting it exists. 404 /// 405 /// \note It is undefined behavior to call this if a run does not exist in 406 /// the SARIF document. 407 llvm::json::Object &getCurrentRun(); 408 409 /// Create a code flow object for the given threadflows. 410 /// See \ref ThreadFlow. 411 /// 412 /// \note It is undefined behavior to call this if a run does not exist in 413 /// the SARIF document. 414 llvm::json::Object 415 createCodeFlow(const llvm::ArrayRef<ThreadFlow> ThreadFlows); 416 417 /// Add the given threadflows to the ones this SARIF document knows about. 418 llvm::json::Array 419 createThreadFlows(const llvm::ArrayRef<ThreadFlow> ThreadFlows); 420 421 /// Add the given \ref CharSourceRange to the SARIF document as a physical 422 /// location, with its corresponding artifact. 423 llvm::json::Object createPhysicalLocation(const CharSourceRange &R); 424 425 public: 426 SarifDocumentWriter() = delete; 427 428 /// Create a new empty SARIF document with the given source manager. SarifDocumentWriter(const SourceManager & SourceMgr)429 SarifDocumentWriter(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} 430 431 /// Release resources held by this SARIF document. 432 ~SarifDocumentWriter() = default; 433 434 /// Create a new run with which any upcoming analysis will be associated. 435 /// Each run requires specifying the tool that is generating reporting items. 436 void createRun(const llvm::StringRef ShortToolName, 437 const llvm::StringRef LongToolName, 438 const llvm::StringRef ToolVersion = CLANG_VERSION_STRING); 439 440 /// If there is a current run, end it. 441 /// 442 /// This method collects various book-keeping required to clear and close 443 /// resources associated with the current run, but may also allocate some 444 /// for the next run. 445 /// 446 /// Calling \ref endRun before associating a run through \ref createRun leads 447 /// to undefined behaviour. 448 void endRun(); 449 450 /// Associate the given rule with the current run. 451 /// 452 /// Returns an integer rule index for the created rule that is unique within 453 /// the current run, which can then be used to create a \ref SarifResult 454 /// to add to the current run. Note that a rule must exist before being 455 /// referenced by a result. 456 /// 457 /// \pre 458 /// There must be a run associated with the document, failing to do so will 459 /// cause undefined behaviour. 460 size_t createRule(const SarifRule &Rule); 461 462 /// Append a new result to the currently in-flight run. 463 /// 464 /// \pre 465 /// There must be a run associated with the document, failing to do so will 466 /// cause undefined behaviour. 467 /// \pre 468 /// \c RuleIdx used to create the result must correspond to a rule known by 469 /// the SARIF document. It must be the value returned by a previous call 470 /// to \ref createRule. 471 void appendResult(const SarifResult &SarifResult); 472 473 /// Return the SARIF document in its current state. 474 /// Calling this will trigger a copy of the internal state including all 475 /// reported diagnostics, resulting in an expensive call. 476 llvm::json::Object createDocument(); 477 478 private: 479 /// Source Manager to use for the current SARIF document. 480 const SourceManager &SourceMgr; 481 482 /// Flag to track the state of this document: 483 /// A closed document is one on which a new runs must be created. 484 /// This could be a document that is freshly created, or has recently 485 /// finished writing to a previous run. 486 bool Closed = true; 487 488 /// A sequence of SARIF runs. 489 /// Each run object describes a single run of an analysis tool and contains 490 /// the output of that run. 491 /// 492 /// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317484">run object</a> 493 llvm::json::Array Runs; 494 495 /// The list of rules associated with the most recent active run. These are 496 /// defined using the diagnostics passed to the SarifDocument. Each rule 497 /// need not be unique through the result set. E.g. there may be several 498 /// 'syntax' errors throughout code under analysis, each of which has its 499 /// own specific diagnostic message (and consequently, RuleId). Rules are 500 /// also known as "reportingDescriptor" objects in SARIF. 501 /// 502 /// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317556">rules property</a> 503 llvm::SmallVector<SarifRule, 32> CurrentRules; 504 505 /// The list of artifacts that have been encountered on the most recent active 506 /// run. An artifact is defined in SARIF as a sequence of bytes addressable 507 /// by a URI. A common example for clang's case would be files named by 508 /// filesystem paths. 509 llvm::StringMap<detail::SarifArtifact> CurrentArtifacts; 510 }; 511 } // namespace clang 512 513 #endif // LLVM_CLANG_BASIC_SARIF_H 514