/src/llvm-project/clang/lib/Basic/Sarif.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file contains the declaration of the SARIFDocumentWriter class, and |
11 | | /// associated builders such as: |
12 | | /// - \ref SarifArtifact |
13 | | /// - \ref SarifArtifactLocation |
14 | | /// - \ref SarifRule |
15 | | /// - \ref SarifResult |
16 | | //===----------------------------------------------------------------------===// |
17 | | #include "clang/Basic/Sarif.h" |
18 | | #include "clang/Basic/SourceLocation.h" |
19 | | #include "clang/Basic/SourceManager.h" |
20 | | #include "llvm/ADT/ArrayRef.h" |
21 | | #include "llvm/ADT/STLExtras.h" |
22 | | #include "llvm/ADT/StringExtras.h" |
23 | | #include "llvm/ADT/StringRef.h" |
24 | | #include "llvm/Support/ConvertUTF.h" |
25 | | #include "llvm/Support/JSON.h" |
26 | | #include "llvm/Support/Path.h" |
27 | | |
28 | | #include <optional> |
29 | | #include <string> |
30 | | #include <utility> |
31 | | |
32 | | using namespace clang; |
33 | | using namespace llvm; |
34 | | |
35 | | using clang::detail::SarifArtifact; |
36 | | using clang::detail::SarifArtifactLocation; |
37 | | |
38 | 0 | static StringRef getFileName(FileEntryRef FE) { |
39 | 0 | StringRef Filename = FE.getFileEntry().tryGetRealPathName(); |
40 | 0 | if (Filename.empty()) |
41 | 0 | Filename = FE.getName(); |
42 | 0 | return Filename; |
43 | 0 | } |
44 | | /// \name URI |
45 | | /// @{ |
46 | | |
47 | | /// \internal |
48 | | /// \brief |
49 | | /// Return the RFC3986 encoding of the input character. |
50 | | /// |
51 | | /// \param C Character to encode to RFC3986. |
52 | | /// |
53 | | /// \return The RFC3986 representation of \c C. |
54 | 0 | static std::string percentEncodeURICharacter(char C) { |
55 | | // RFC 3986 claims alpha, numeric, and this handful of |
56 | | // characters are not reserved for the path component and |
57 | | // should be written out directly. Otherwise, percent |
58 | | // encode the character and write that out instead of the |
59 | | // reserved character. |
60 | 0 | if (llvm::isAlnum(C) || |
61 | 0 | StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) |
62 | 0 | return std::string(&C, 1); |
63 | 0 | return "%" + llvm::toHex(StringRef(&C, 1)); |
64 | 0 | } |
65 | | |
66 | | /// \internal |
67 | | /// \brief Return a URI representing the given file name. |
68 | | /// |
69 | | /// \param Filename The filename to be represented as URI. |
70 | | /// |
71 | | /// \return RFC3986 URI representing the input file name. |
72 | 0 | static std::string fileNameToURI(StringRef Filename) { |
73 | 0 | SmallString<32> Ret = StringRef("file://"); |
74 | | |
75 | | // Get the root name to see if it has a URI authority. |
76 | 0 | StringRef Root = sys::path::root_name(Filename); |
77 | 0 | if (Root.starts_with("//")) { |
78 | | // There is an authority, so add it to the URI. |
79 | 0 | Ret += Root.drop_front(2).str(); |
80 | 0 | } else if (!Root.empty()) { |
81 | | // There is no authority, so end the component and add the root to the URI. |
82 | 0 | Ret += Twine("/" + Root).str(); |
83 | 0 | } |
84 | |
|
85 | 0 | auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); |
86 | 0 | assert(Iter != End && "Expected there to be a non-root path component."); |
87 | | // Add the rest of the path components, encoding any reserved characters; |
88 | | // we skip past the first path component, as it was handled it above. |
89 | 0 | for (StringRef Component : llvm::make_range(++Iter, End)) { |
90 | | // For reasons unknown to me, we may get a backslash with Windows native |
91 | | // paths for the initial backslash following the drive component, which |
92 | | // we need to ignore as a URI path part. |
93 | 0 | if (Component == "\\") |
94 | 0 | continue; |
95 | | |
96 | | // Add the separator between the previous path part and the one being |
97 | | // currently processed. |
98 | 0 | Ret += "/"; |
99 | | |
100 | | // URI encode the part. |
101 | 0 | for (char C : Component) { |
102 | 0 | Ret += percentEncodeURICharacter(C); |
103 | 0 | } |
104 | 0 | } |
105 | |
|
106 | 0 | return std::string(Ret); |
107 | 0 | } |
108 | | /// @} |
109 | | |
110 | | /// \brief Calculate the column position expressed in the number of UTF-8 code |
111 | | /// points from column start to the source location |
112 | | /// |
113 | | /// \param Loc The source location whose column needs to be calculated. |
114 | | /// \param TokenLen Optional hint for when the token is multiple bytes long. |
115 | | /// |
116 | | /// \return The column number as a UTF-8 aware byte offset from column start to |
117 | | /// the effective source location. |
118 | | static unsigned int adjustColumnPos(FullSourceLoc Loc, |
119 | 0 | unsigned int TokenLen = 0) { |
120 | 0 | assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); |
121 | | |
122 | 0 | std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc(); |
123 | 0 | std::optional<MemoryBufferRef> Buf = |
124 | 0 | Loc.getManager().getBufferOrNone(LocInfo.first); |
125 | 0 | assert(Buf && "got an invalid buffer for the location's file"); |
126 | 0 | assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && |
127 | 0 | "token extends past end of buffer?"); |
128 | | |
129 | | // Adjust the offset to be the start of the line, since we'll be counting |
130 | | // Unicode characters from there until our column offset. |
131 | 0 | unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); |
132 | 0 | unsigned int Ret = 1; |
133 | 0 | while (Off < (LocInfo.second + TokenLen)) { |
134 | 0 | Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); |
135 | 0 | Ret++; |
136 | 0 | } |
137 | |
|
138 | 0 | return Ret; |
139 | 0 | } |
140 | | |
141 | | /// \name SARIF Utilities |
142 | | /// @{ |
143 | | |
144 | | /// \internal |
145 | 0 | json::Object createMessage(StringRef Text) { |
146 | 0 | return json::Object{{"text", Text.str()}}; |
147 | 0 | } |
148 | | |
149 | | /// \internal |
150 | | /// \pre CharSourceRange must be a token range |
151 | | static json::Object createTextRegion(const SourceManager &SM, |
152 | 0 | const CharSourceRange &R) { |
153 | 0 | FullSourceLoc BeginCharLoc{R.getBegin(), SM}; |
154 | 0 | FullSourceLoc EndCharLoc{R.getEnd(), SM}; |
155 | 0 | json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()}, |
156 | 0 | {"startColumn", adjustColumnPos(BeginCharLoc)}}; |
157 | |
|
158 | 0 | if (BeginCharLoc == EndCharLoc) { |
159 | 0 | Region["endColumn"] = adjustColumnPos(BeginCharLoc); |
160 | 0 | } else { |
161 | 0 | Region["endLine"] = EndCharLoc.getExpansionLineNumber(); |
162 | 0 | Region["endColumn"] = adjustColumnPos(EndCharLoc); |
163 | 0 | } |
164 | 0 | return Region; |
165 | 0 | } |
166 | | |
167 | | static json::Object createLocation(json::Object &&PhysicalLocation, |
168 | 0 | StringRef Message = "") { |
169 | 0 | json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; |
170 | 0 | if (!Message.empty()) |
171 | 0 | Ret.insert({"message", createMessage(Message)}); |
172 | 0 | return Ret; |
173 | 0 | } |
174 | | |
175 | 0 | static StringRef importanceToStr(ThreadFlowImportance I) { |
176 | 0 | switch (I) { |
177 | 0 | case ThreadFlowImportance::Important: |
178 | 0 | return "important"; |
179 | 0 | case ThreadFlowImportance::Essential: |
180 | 0 | return "essential"; |
181 | 0 | case ThreadFlowImportance::Unimportant: |
182 | 0 | return "unimportant"; |
183 | 0 | } |
184 | 0 | llvm_unreachable("Fully covered switch is not so fully covered"); |
185 | 0 | } |
186 | | |
187 | 0 | static StringRef resultLevelToStr(SarifResultLevel R) { |
188 | 0 | switch (R) { |
189 | 0 | case SarifResultLevel::None: |
190 | 0 | return "none"; |
191 | 0 | case SarifResultLevel::Note: |
192 | 0 | return "note"; |
193 | 0 | case SarifResultLevel::Warning: |
194 | 0 | return "warning"; |
195 | 0 | case SarifResultLevel::Error: |
196 | 0 | return "error"; |
197 | 0 | } |
198 | 0 | llvm_unreachable("Potentially un-handled SarifResultLevel. " |
199 | 0 | "Is the switch not fully covered?"); |
200 | 0 | } |
201 | | |
202 | | static json::Object |
203 | | createThreadFlowLocation(json::Object &&Location, |
204 | 0 | const ThreadFlowImportance &Importance) { |
205 | 0 | return json::Object{{"location", std::move(Location)}, |
206 | 0 | {"importance", importanceToStr(Importance)}}; |
207 | 0 | } |
208 | | /// @} |
209 | | |
210 | | json::Object |
211 | 0 | SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { |
212 | 0 | assert(R.isValid() && |
213 | 0 | "Cannot create a physicalLocation from invalid SourceRange!"); |
214 | 0 | assert(R.isCharRange() && |
215 | 0 | "Cannot create a physicalLocation from a token range!"); |
216 | 0 | FullSourceLoc Start{R.getBegin(), SourceMgr}; |
217 | 0 | OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef(); |
218 | 0 | assert(FE && "Diagnostic does not exist within a valid file!"); |
219 | | |
220 | 0 | const std::string &FileURI = fileNameToURI(getFileName(*FE)); |
221 | 0 | auto I = CurrentArtifacts.find(FileURI); |
222 | |
|
223 | 0 | if (I == CurrentArtifacts.end()) { |
224 | 0 | uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); |
225 | 0 | const SarifArtifactLocation &Location = |
226 | 0 | SarifArtifactLocation::create(FileURI).setIndex(Idx); |
227 | 0 | const SarifArtifact &Artifact = SarifArtifact::create(Location) |
228 | 0 | .setRoles({"resultFile"}) |
229 | 0 | .setLength(FE->getSize()) |
230 | 0 | .setMimeType("text/plain"); |
231 | 0 | auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); |
232 | | // If inserted, ensure the original iterator points to the newly inserted |
233 | | // element, so it can be used downstream. |
234 | 0 | if (StatusIter.second) |
235 | 0 | I = StatusIter.first; |
236 | 0 | } |
237 | 0 | assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); |
238 | 0 | const SarifArtifactLocation &Location = I->second.Location; |
239 | 0 | json::Object ArtifactLocationObject{{"uri", Location.URI}}; |
240 | 0 | if (Location.Index.has_value()) |
241 | 0 | ArtifactLocationObject["index"] = *Location.Index; |
242 | 0 | return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)}, |
243 | 0 | {"region", createTextRegion(SourceMgr, R)}}}; |
244 | 0 | } |
245 | | |
246 | 0 | json::Object &SarifDocumentWriter::getCurrentTool() { |
247 | 0 | assert(!Closed && "SARIF Document is closed. " |
248 | 0 | "Need to call createRun() before using getcurrentTool!"); |
249 | | |
250 | | // Since Closed = false here, expect there to be at least 1 Run, anything |
251 | | // else is an invalid state. |
252 | 0 | assert(!Runs.empty() && "There are no runs associated with the document!"); |
253 | | |
254 | 0 | return *Runs.back().getAsObject()->get("tool")->getAsObject(); |
255 | 0 | } |
256 | | |
257 | 0 | void SarifDocumentWriter::reset() { |
258 | 0 | CurrentRules.clear(); |
259 | 0 | CurrentArtifacts.clear(); |
260 | 0 | } |
261 | | |
262 | 0 | void SarifDocumentWriter::endRun() { |
263 | | // Exit early if trying to close a closed Document. |
264 | 0 | if (Closed) { |
265 | 0 | reset(); |
266 | 0 | return; |
267 | 0 | } |
268 | | |
269 | | // Since Closed = false here, expect there to be at least 1 Run, anything |
270 | | // else is an invalid state. |
271 | 0 | assert(!Runs.empty() && "There are no runs associated with the document!"); |
272 | | |
273 | | // Flush all the rules. |
274 | 0 | json::Object &Tool = getCurrentTool(); |
275 | 0 | json::Array Rules; |
276 | 0 | for (const SarifRule &R : CurrentRules) { |
277 | 0 | json::Object Config{ |
278 | 0 | {"enabled", R.DefaultConfiguration.Enabled}, |
279 | 0 | {"level", resultLevelToStr(R.DefaultConfiguration.Level)}, |
280 | 0 | {"rank", R.DefaultConfiguration.Rank}}; |
281 | 0 | json::Object Rule{ |
282 | 0 | {"name", R.Name}, |
283 | 0 | {"id", R.Id}, |
284 | 0 | {"fullDescription", json::Object{{"text", R.Description}}}, |
285 | 0 | {"defaultConfiguration", std::move(Config)}}; |
286 | 0 | if (!R.HelpURI.empty()) |
287 | 0 | Rule["helpUri"] = R.HelpURI; |
288 | 0 | Rules.emplace_back(std::move(Rule)); |
289 | 0 | } |
290 | 0 | json::Object &Driver = *Tool.getObject("driver"); |
291 | 0 | Driver["rules"] = std::move(Rules); |
292 | | |
293 | | // Flush all the artifacts. |
294 | 0 | json::Object &Run = getCurrentRun(); |
295 | 0 | json::Array *Artifacts = Run.getArray("artifacts"); |
296 | 0 | SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec; |
297 | 0 | for (const auto &[K, V] : CurrentArtifacts) |
298 | 0 | Vec.emplace_back(K, V); |
299 | 0 | llvm::sort(Vec, llvm::less_first()); |
300 | 0 | for (const auto &[_, A] : Vec) { |
301 | 0 | json::Object Loc{{"uri", A.Location.URI}}; |
302 | 0 | if (A.Location.Index.has_value()) { |
303 | 0 | Loc["index"] = static_cast<int64_t>(*A.Location.Index); |
304 | 0 | } |
305 | 0 | json::Object Artifact; |
306 | 0 | Artifact["location"] = std::move(Loc); |
307 | 0 | if (A.Length.has_value()) |
308 | 0 | Artifact["length"] = static_cast<int64_t>(*A.Length); |
309 | 0 | if (!A.Roles.empty()) |
310 | 0 | Artifact["roles"] = json::Array(A.Roles); |
311 | 0 | if (!A.MimeType.empty()) |
312 | 0 | Artifact["mimeType"] = A.MimeType; |
313 | 0 | if (A.Offset.has_value()) |
314 | 0 | Artifact["offset"] = *A.Offset; |
315 | 0 | Artifacts->push_back(json::Value(std::move(Artifact))); |
316 | 0 | } |
317 | | |
318 | | // Clear, reset temporaries before next run. |
319 | 0 | reset(); |
320 | | |
321 | | // Mark the document as closed. |
322 | 0 | Closed = true; |
323 | 0 | } |
324 | | |
325 | | json::Array |
326 | 0 | SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { |
327 | 0 | json::Object Ret{{"locations", json::Array{}}}; |
328 | 0 | json::Array Locs; |
329 | 0 | for (const auto &ThreadFlow : ThreadFlows) { |
330 | 0 | json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); |
331 | 0 | json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); |
332 | 0 | Locs.emplace_back( |
333 | 0 | createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); |
334 | 0 | } |
335 | 0 | Ret["locations"] = std::move(Locs); |
336 | 0 | return json::Array{std::move(Ret)}; |
337 | 0 | } |
338 | | |
339 | | json::Object |
340 | 0 | SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { |
341 | 0 | return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; |
342 | 0 | } |
343 | | |
344 | | void SarifDocumentWriter::createRun(StringRef ShortToolName, |
345 | | StringRef LongToolName, |
346 | 0 | StringRef ToolVersion) { |
347 | | // Clear resources associated with a previous run. |
348 | 0 | endRun(); |
349 | | |
350 | | // Signify a new run has begun. |
351 | 0 | Closed = false; |
352 | |
|
353 | 0 | json::Object Tool{ |
354 | 0 | {"driver", |
355 | 0 | json::Object{{"name", ShortToolName}, |
356 | 0 | {"fullName", LongToolName}, |
357 | 0 | {"language", "en-US"}, |
358 | 0 | {"version", ToolVersion}, |
359 | 0 | {"informationUri", |
360 | 0 | "https://clang.llvm.org/docs/UsersManual.html"}}}}; |
361 | 0 | json::Object TheRun{{"tool", std::move(Tool)}, |
362 | 0 | {"results", {}}, |
363 | 0 | {"artifacts", {}}, |
364 | 0 | {"columnKind", "unicodeCodePoints"}}; |
365 | 0 | Runs.emplace_back(std::move(TheRun)); |
366 | 0 | } |
367 | | |
368 | 0 | json::Object &SarifDocumentWriter::getCurrentRun() { |
369 | 0 | assert(!Closed && |
370 | 0 | "SARIF Document is closed. " |
371 | 0 | "Can only getCurrentRun() if document is opened via createRun(), " |
372 | 0 | "create a run first"); |
373 | | |
374 | | // Since Closed = false here, expect there to be at least 1 Run, anything |
375 | | // else is an invalid state. |
376 | 0 | assert(!Runs.empty() && "There are no runs associated with the document!"); |
377 | 0 | return *Runs.back().getAsObject(); |
378 | 0 | } |
379 | | |
380 | 0 | size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { |
381 | 0 | size_t Ret = CurrentRules.size(); |
382 | 0 | CurrentRules.emplace_back(Rule); |
383 | 0 | return Ret; |
384 | 0 | } |
385 | | |
386 | 0 | void SarifDocumentWriter::appendResult(const SarifResult &Result) { |
387 | 0 | size_t RuleIdx = Result.RuleIdx; |
388 | 0 | assert(RuleIdx < CurrentRules.size() && |
389 | 0 | "Trying to reference a rule that doesn't exist"); |
390 | 0 | const SarifRule &Rule = CurrentRules[RuleIdx]; |
391 | 0 | assert(Rule.DefaultConfiguration.Enabled && |
392 | 0 | "Cannot add a result referencing a disabled Rule"); |
393 | 0 | json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, |
394 | 0 | {"ruleIndex", static_cast<int64_t>(RuleIdx)}, |
395 | 0 | {"ruleId", Rule.Id}}; |
396 | 0 | if (!Result.Locations.empty()) { |
397 | 0 | json::Array Locs; |
398 | 0 | for (auto &Range : Result.Locations) { |
399 | 0 | Locs.emplace_back(createLocation(createPhysicalLocation(Range))); |
400 | 0 | } |
401 | 0 | Ret["locations"] = std::move(Locs); |
402 | 0 | } |
403 | 0 | if (!Result.ThreadFlows.empty()) |
404 | 0 | Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; |
405 | |
|
406 | 0 | Ret["level"] = resultLevelToStr( |
407 | 0 | Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level)); |
408 | |
|
409 | 0 | json::Object &Run = getCurrentRun(); |
410 | 0 | json::Array *Results = Run.getArray("results"); |
411 | 0 | Results->emplace_back(std::move(Ret)); |
412 | 0 | } |
413 | | |
414 | 0 | json::Object SarifDocumentWriter::createDocument() { |
415 | | // Flush all temporaries to their destinations if needed. |
416 | 0 | endRun(); |
417 | |
|
418 | 0 | json::Object Doc{ |
419 | 0 | {"$schema", SchemaURI}, |
420 | 0 | {"version", SchemaVersion}, |
421 | 0 | }; |
422 | 0 | if (!Runs.empty()) |
423 | 0 | Doc["runs"] = json::Array(Runs); |
424 | 0 | return Doc; |
425 | 0 | } |