xref: /freebsd/contrib/llvm-project/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp (revision a3266ba2697a383d2ede56803320d941866c7e76)
1 //===- VerifyDiagnosticConsumer.cpp - Verifying Diagnostic Client ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a concrete diagnostic client, which buffers the diagnostic messages.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Frontend/VerifyDiagnosticConsumer.h"
14 #include "clang/Basic/CharInfo.h"
15 #include "clang/Basic/Diagnostic.h"
16 #include "clang/Basic/DiagnosticOptions.h"
17 #include "clang/Basic/FileManager.h"
18 #include "clang/Basic/LLVM.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "clang/Frontend/FrontendDiagnostic.h"
23 #include "clang/Frontend/TextDiagnosticBuffer.h"
24 #include "clang/Lex/HeaderSearch.h"
25 #include "clang/Lex/Lexer.h"
26 #include "clang/Lex/PPCallbacks.h"
27 #include "clang/Lex/Preprocessor.h"
28 #include "clang/Lex/Token.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallString.h"
32 #include "llvm/ADT/StringRef.h"
33 #include "llvm/ADT/Twine.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/Regex.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include <algorithm>
38 #include <cassert>
39 #include <cstddef>
40 #include <cstring>
41 #include <iterator>
42 #include <memory>
43 #include <string>
44 #include <utility>
45 #include <vector>
46 
47 using namespace clang;
48 
49 using Directive = VerifyDiagnosticConsumer::Directive;
50 using DirectiveList = VerifyDiagnosticConsumer::DirectiveList;
51 using ExpectedData = VerifyDiagnosticConsumer::ExpectedData;
52 
53 #ifndef NDEBUG
54 
55 namespace {
56 
57 class VerifyFileTracker : public PPCallbacks {
58   VerifyDiagnosticConsumer &Verify;
59   SourceManager &SM;
60 
61 public:
62   VerifyFileTracker(VerifyDiagnosticConsumer &Verify, SourceManager &SM)
63       : Verify(Verify), SM(SM) {}
64 
65   /// Hook into the preprocessor and update the list of parsed
66   /// files when the preprocessor indicates a new file is entered.
67   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
68                    SrcMgr::CharacteristicKind FileType,
69                    FileID PrevFID) override {
70     Verify.UpdateParsedFileStatus(SM, SM.getFileID(Loc),
71                                   VerifyDiagnosticConsumer::IsParsed);
72   }
73 };
74 
75 } // namespace
76 
77 #endif
78 
79 //===----------------------------------------------------------------------===//
80 // Checking diagnostics implementation.
81 //===----------------------------------------------------------------------===//
82 
83 using DiagList = TextDiagnosticBuffer::DiagList;
84 using const_diag_iterator = TextDiagnosticBuffer::const_iterator;
85 
86 namespace {
87 
88 /// StandardDirective - Directive with string matching.
89 class StandardDirective : public Directive {
90 public:
91   StandardDirective(SourceLocation DirectiveLoc, SourceLocation DiagnosticLoc,
92                     bool MatchAnyFileAndLine, bool MatchAnyLine, StringRef Text,
93                     unsigned Min, unsigned Max)
94       : Directive(DirectiveLoc, DiagnosticLoc, MatchAnyFileAndLine,
95                   MatchAnyLine, Text, Min, Max) {}
96 
97   bool isValid(std::string &Error) override {
98     // all strings are considered valid; even empty ones
99     return true;
100   }
101 
102   bool match(StringRef S) override {
103     return S.find(Text) != StringRef::npos;
104   }
105 };
106 
107 /// RegexDirective - Directive with regular-expression matching.
108 class RegexDirective : public Directive {
109 public:
110   RegexDirective(SourceLocation DirectiveLoc, SourceLocation DiagnosticLoc,
111                  bool MatchAnyFileAndLine, bool MatchAnyLine, StringRef Text,
112                  unsigned Min, unsigned Max, StringRef RegexStr)
113       : Directive(DirectiveLoc, DiagnosticLoc, MatchAnyFileAndLine,
114                   MatchAnyLine, Text, Min, Max),
115         Regex(RegexStr) {}
116 
117   bool isValid(std::string &Error) override {
118     return Regex.isValid(Error);
119   }
120 
121   bool match(StringRef S) override {
122     return Regex.match(S);
123   }
124 
125 private:
126   llvm::Regex Regex;
127 };
128 
129 class ParseHelper
130 {
131 public:
132   ParseHelper(StringRef S)
133       : Begin(S.begin()), End(S.end()), C(Begin), P(Begin) {}
134 
135   // Return true if string literal is next.
136   bool Next(StringRef S) {
137     P = C;
138     PEnd = C + S.size();
139     if (PEnd > End)
140       return false;
141     return memcmp(P, S.data(), S.size()) == 0;
142   }
143 
144   // Return true if number is next.
145   // Output N only if number is next.
146   bool Next(unsigned &N) {
147     unsigned TMP = 0;
148     P = C;
149     PEnd = P;
150     for (; PEnd < End && *PEnd >= '0' && *PEnd <= '9'; ++PEnd) {
151       TMP *= 10;
152       TMP += *PEnd - '0';
153     }
154     if (PEnd == C)
155       return false;
156     N = TMP;
157     return true;
158   }
159 
160   // Return true if a marker is next.
161   // A marker is the longest match for /#[A-Za-z0-9_-]+/.
162   bool NextMarker() {
163     P = C;
164     if (P == End || *P != '#')
165       return false;
166     PEnd = P;
167     ++PEnd;
168     while ((isAlphanumeric(*PEnd) || *PEnd == '-' || *PEnd == '_') &&
169            PEnd < End)
170       ++PEnd;
171     return PEnd > P + 1;
172   }
173 
174   // Return true if string literal S is matched in content.
175   // When true, P marks begin-position of the match, and calling Advance sets C
176   // to end-position of the match.
177   // If S is the empty string, then search for any letter instead (makes sense
178   // with FinishDirectiveToken=true).
179   // If EnsureStartOfWord, then skip matches that don't start a new word.
180   // If FinishDirectiveToken, then assume the match is the start of a comment
181   // directive for -verify, and extend the match to include the entire first
182   // token of that directive.
183   bool Search(StringRef S, bool EnsureStartOfWord = false,
184               bool FinishDirectiveToken = false) {
185     do {
186       if (!S.empty()) {
187         P = std::search(C, End, S.begin(), S.end());
188         PEnd = P + S.size();
189       }
190       else {
191         P = C;
192         while (P != End && !isLetter(*P))
193           ++P;
194         PEnd = P + 1;
195       }
196       if (P == End)
197         break;
198       // If not start of word but required, skip and search again.
199       if (EnsureStartOfWord
200                // Check if string literal starts a new word.
201           && !(P == Begin || isWhitespace(P[-1])
202                // Or it could be preceded by the start of a comment.
203                || (P > (Begin + 1) && (P[-1] == '/' || P[-1] == '*')
204                                    &&  P[-2] == '/')))
205         continue;
206       if (FinishDirectiveToken) {
207         while (PEnd != End && (isAlphanumeric(*PEnd)
208                                || *PEnd == '-' || *PEnd == '_'))
209           ++PEnd;
210         // Put back trailing digits and hyphens to be parsed later as a count
211         // or count range.  Because -verify prefixes must start with letters,
212         // we know the actual directive we found starts with a letter, so
213         // we won't put back the entire directive word and thus record an empty
214         // string.
215         assert(isLetter(*P) && "-verify prefix must start with a letter");
216         while (isDigit(PEnd[-1]) || PEnd[-1] == '-')
217           --PEnd;
218       }
219       return true;
220     } while (Advance());
221     return false;
222   }
223 
224   // Return true if a CloseBrace that closes the OpenBrace at the current nest
225   // level is found. When true, P marks begin-position of CloseBrace.
226   bool SearchClosingBrace(StringRef OpenBrace, StringRef CloseBrace) {
227     unsigned Depth = 1;
228     P = C;
229     while (P < End) {
230       StringRef S(P, End - P);
231       if (S.startswith(OpenBrace)) {
232         ++Depth;
233         P += OpenBrace.size();
234       } else if (S.startswith(CloseBrace)) {
235         --Depth;
236         if (Depth == 0) {
237           PEnd = P + CloseBrace.size();
238           return true;
239         }
240         P += CloseBrace.size();
241       } else {
242         ++P;
243       }
244     }
245     return false;
246   }
247 
248   // Advance 1-past previous next/search.
249   // Behavior is undefined if previous next/search failed.
250   bool Advance() {
251     C = PEnd;
252     return C < End;
253   }
254 
255   // Return the text matched by the previous next/search.
256   // Behavior is undefined if previous next/search failed.
257   StringRef Match() { return StringRef(P, PEnd - P); }
258 
259   // Skip zero or more whitespace.
260   void SkipWhitespace() {
261     for (; C < End && isWhitespace(*C); ++C)
262       ;
263   }
264 
265   // Return true if EOF reached.
266   bool Done() {
267     return !(C < End);
268   }
269 
270   // Beginning of expected content.
271   const char * const Begin;
272 
273   // End of expected content (1-past).
274   const char * const End;
275 
276   // Position of next char in content.
277   const char *C;
278 
279   // Previous next/search subject start.
280   const char *P;
281 
282 private:
283   // Previous next/search subject end (1-past).
284   const char *PEnd = nullptr;
285 };
286 
287 // The information necessary to create a directive.
288 struct UnattachedDirective {
289   DirectiveList *DL = nullptr;
290   bool RegexKind = false;
291   SourceLocation DirectivePos, ContentBegin;
292   std::string Text;
293   unsigned Min = 1, Max = 1;
294 };
295 
296 // Attach the specified directive to the line of code indicated by
297 // \p ExpectedLoc.
298 void attachDirective(DiagnosticsEngine &Diags, const UnattachedDirective &UD,
299                      SourceLocation ExpectedLoc,
300                      bool MatchAnyFileAndLine = false,
301                      bool MatchAnyLine = false) {
302   // Construct new directive.
303   std::unique_ptr<Directive> D = Directive::create(
304       UD.RegexKind, UD.DirectivePos, ExpectedLoc, MatchAnyFileAndLine,
305       MatchAnyLine, UD.Text, UD.Min, UD.Max);
306 
307   std::string Error;
308   if (!D->isValid(Error)) {
309     Diags.Report(UD.ContentBegin, diag::err_verify_invalid_content)
310       << (UD.RegexKind ? "regex" : "string") << Error;
311   }
312 
313   UD.DL->push_back(std::move(D));
314 }
315 
316 } // anonymous
317 
318 // Tracker for markers in the input files. A marker is a comment of the form
319 //
320 //   n = 123; // #123
321 //
322 // ... that can be referred to by a later expected-* directive:
323 //
324 //   // expected-error@#123 {{undeclared identifier 'n'}}
325 //
326 // Marker declarations must be at the start of a comment or preceded by
327 // whitespace to distinguish them from uses of markers in directives.
328 class VerifyDiagnosticConsumer::MarkerTracker {
329   DiagnosticsEngine &Diags;
330 
331   struct Marker {
332     SourceLocation DefLoc;
333     SourceLocation RedefLoc;
334     SourceLocation UseLoc;
335   };
336   llvm::StringMap<Marker> Markers;
337 
338   // Directives that couldn't be created yet because they name an unknown
339   // marker.
340   llvm::StringMap<llvm::SmallVector<UnattachedDirective, 2>> DeferredDirectives;
341 
342 public:
343   MarkerTracker(DiagnosticsEngine &Diags) : Diags(Diags) {}
344 
345   // Register a marker.
346   void addMarker(StringRef MarkerName, SourceLocation Pos) {
347     auto InsertResult = Markers.insert(
348         {MarkerName, Marker{Pos, SourceLocation(), SourceLocation()}});
349 
350     Marker &M = InsertResult.first->second;
351     if (!InsertResult.second) {
352       // Marker was redefined.
353       M.RedefLoc = Pos;
354     } else {
355       // First definition: build any deferred directives.
356       auto Deferred = DeferredDirectives.find(MarkerName);
357       if (Deferred != DeferredDirectives.end()) {
358         for (auto &UD : Deferred->second) {
359           if (M.UseLoc.isInvalid())
360             M.UseLoc = UD.DirectivePos;
361           attachDirective(Diags, UD, Pos);
362         }
363         DeferredDirectives.erase(Deferred);
364       }
365     }
366   }
367 
368   // Register a directive at the specified marker.
369   void addDirective(StringRef MarkerName, const UnattachedDirective &UD) {
370     auto MarkerIt = Markers.find(MarkerName);
371     if (MarkerIt != Markers.end()) {
372       Marker &M = MarkerIt->second;
373       if (M.UseLoc.isInvalid())
374         M.UseLoc = UD.DirectivePos;
375       return attachDirective(Diags, UD, M.DefLoc);
376     }
377     DeferredDirectives[MarkerName].push_back(UD);
378   }
379 
380   // Ensure we have no remaining deferred directives, and no
381   // multiply-defined-and-used markers.
382   void finalize() {
383     for (auto &MarkerInfo : Markers) {
384       StringRef Name = MarkerInfo.first();
385       Marker &M = MarkerInfo.second;
386       if (M.RedefLoc.isValid() && M.UseLoc.isValid()) {
387         Diags.Report(M.UseLoc, diag::err_verify_ambiguous_marker) << Name;
388         Diags.Report(M.DefLoc, diag::note_verify_ambiguous_marker) << Name;
389         Diags.Report(M.RedefLoc, diag::note_verify_ambiguous_marker) << Name;
390       }
391     }
392 
393     for (auto &DeferredPair : DeferredDirectives) {
394       Diags.Report(DeferredPair.second.front().DirectivePos,
395                    diag::err_verify_no_such_marker)
396           << DeferredPair.first();
397     }
398   }
399 };
400 
401 /// ParseDirective - Go through the comment and see if it indicates expected
402 /// diagnostics. If so, then put them in the appropriate directive list.
403 ///
404 /// Returns true if any valid directives were found.
405 static bool ParseDirective(StringRef S, ExpectedData *ED, SourceManager &SM,
406                            Preprocessor *PP, SourceLocation Pos,
407                            VerifyDiagnosticConsumer::DirectiveStatus &Status,
408                            VerifyDiagnosticConsumer::MarkerTracker &Markers) {
409   DiagnosticsEngine &Diags = PP ? PP->getDiagnostics() : SM.getDiagnostics();
410 
411   // First, scan the comment looking for markers.
412   for (ParseHelper PH(S); !PH.Done();) {
413     if (!PH.Search("#", true))
414       break;
415     PH.C = PH.P;
416     if (!PH.NextMarker()) {
417       PH.Next("#");
418       PH.Advance();
419       continue;
420     }
421     PH.Advance();
422     Markers.addMarker(PH.Match(), Pos);
423   }
424 
425   // A single comment may contain multiple directives.
426   bool FoundDirective = false;
427   for (ParseHelper PH(S); !PH.Done();) {
428     // Search for the initial directive token.
429     // If one prefix, save time by searching only for its directives.
430     // Otherwise, search for any potential directive token and check it later.
431     const auto &Prefixes = Diags.getDiagnosticOptions().VerifyPrefixes;
432     if (!(Prefixes.size() == 1 ? PH.Search(*Prefixes.begin(), true, true)
433                                : PH.Search("", true, true)))
434       break;
435 
436     StringRef DToken = PH.Match();
437     PH.Advance();
438 
439     // Default directive kind.
440     UnattachedDirective D;
441     const char *KindStr = "string";
442 
443     // Parse the initial directive token in reverse so we can easily determine
444     // its exact actual prefix.  If we were to parse it from the front instead,
445     // it would be harder to determine where the prefix ends because there
446     // might be multiple matching -verify prefixes because some might prefix
447     // others.
448 
449     // Regex in initial directive token: -re
450     if (DToken.endswith("-re")) {
451       D.RegexKind = true;
452       KindStr = "regex";
453       DToken = DToken.substr(0, DToken.size()-3);
454     }
455 
456     // Type in initial directive token: -{error|warning|note|no-diagnostics}
457     bool NoDiag = false;
458     StringRef DType;
459     if (DToken.endswith(DType="-error"))
460       D.DL = ED ? &ED->Errors : nullptr;
461     else if (DToken.endswith(DType="-warning"))
462       D.DL = ED ? &ED->Warnings : nullptr;
463     else if (DToken.endswith(DType="-remark"))
464       D.DL = ED ? &ED->Remarks : nullptr;
465     else if (DToken.endswith(DType="-note"))
466       D.DL = ED ? &ED->Notes : nullptr;
467     else if (DToken.endswith(DType="-no-diagnostics")) {
468       NoDiag = true;
469       if (D.RegexKind)
470         continue;
471     }
472     else
473       continue;
474     DToken = DToken.substr(0, DToken.size()-DType.size());
475 
476     // What's left in DToken is the actual prefix.  That might not be a -verify
477     // prefix even if there is only one -verify prefix (for example, the full
478     // DToken is foo-bar-warning, but foo is the only -verify prefix).
479     if (!std::binary_search(Prefixes.begin(), Prefixes.end(), DToken))
480       continue;
481 
482     if (NoDiag) {
483       if (Status == VerifyDiagnosticConsumer::HasOtherExpectedDirectives)
484         Diags.Report(Pos, diag::err_verify_invalid_no_diags)
485           << /*IsExpectedNoDiagnostics=*/true;
486       else
487         Status = VerifyDiagnosticConsumer::HasExpectedNoDiagnostics;
488       continue;
489     }
490     if (Status == VerifyDiagnosticConsumer::HasExpectedNoDiagnostics) {
491       Diags.Report(Pos, diag::err_verify_invalid_no_diags)
492         << /*IsExpectedNoDiagnostics=*/false;
493       continue;
494     }
495     Status = VerifyDiagnosticConsumer::HasOtherExpectedDirectives;
496 
497     // If a directive has been found but we're not interested
498     // in storing the directive information, return now.
499     if (!D.DL)
500       return true;
501 
502     // Next optional token: @
503     SourceLocation ExpectedLoc;
504     StringRef Marker;
505     bool MatchAnyFileAndLine = false;
506     bool MatchAnyLine = false;
507     if (!PH.Next("@")) {
508       ExpectedLoc = Pos;
509     } else {
510       PH.Advance();
511       unsigned Line = 0;
512       bool FoundPlus = PH.Next("+");
513       if (FoundPlus || PH.Next("-")) {
514         // Relative to current line.
515         PH.Advance();
516         bool Invalid = false;
517         unsigned ExpectedLine = SM.getSpellingLineNumber(Pos, &Invalid);
518         if (!Invalid && PH.Next(Line) && (FoundPlus || Line < ExpectedLine)) {
519           if (FoundPlus) ExpectedLine += Line;
520           else ExpectedLine -= Line;
521           ExpectedLoc = SM.translateLineCol(SM.getFileID(Pos), ExpectedLine, 1);
522         }
523       } else if (PH.Next(Line)) {
524         // Absolute line number.
525         if (Line > 0)
526           ExpectedLoc = SM.translateLineCol(SM.getFileID(Pos), Line, 1);
527       } else if (PH.NextMarker()) {
528         Marker = PH.Match();
529       } else if (PP && PH.Search(":")) {
530         // Specific source file.
531         StringRef Filename(PH.C, PH.P-PH.C);
532         PH.Advance();
533 
534         if (Filename == "*") {
535           MatchAnyFileAndLine = true;
536           if (!PH.Next("*")) {
537             Diags.Report(Pos.getLocWithOffset(PH.C - PH.Begin),
538                          diag::err_verify_missing_line)
539                 << "'*'";
540             continue;
541           }
542           MatchAnyLine = true;
543           ExpectedLoc = SourceLocation();
544         } else {
545           // Lookup file via Preprocessor, like a #include.
546           const DirectoryLookup *CurDir;
547           Optional<FileEntryRef> File =
548               PP->LookupFile(Pos, Filename, false, nullptr, nullptr, CurDir,
549                              nullptr, nullptr, nullptr, nullptr, nullptr);
550           if (!File) {
551             Diags.Report(Pos.getLocWithOffset(PH.C - PH.Begin),
552                          diag::err_verify_missing_file)
553                 << Filename << KindStr;
554             continue;
555           }
556 
557           FileID FID = SM.translateFile(*File);
558           if (FID.isInvalid())
559             FID = SM.createFileID(*File, Pos, SrcMgr::C_User);
560 
561           if (PH.Next(Line) && Line > 0)
562             ExpectedLoc = SM.translateLineCol(FID, Line, 1);
563           else if (PH.Next("*")) {
564             MatchAnyLine = true;
565             ExpectedLoc = SM.translateLineCol(FID, 1, 1);
566           }
567         }
568       } else if (PH.Next("*")) {
569         MatchAnyLine = true;
570         ExpectedLoc = SourceLocation();
571       }
572 
573       if (ExpectedLoc.isInvalid() && !MatchAnyLine && Marker.empty()) {
574         Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
575                      diag::err_verify_missing_line) << KindStr;
576         continue;
577       }
578       PH.Advance();
579     }
580 
581     // Skip optional whitespace.
582     PH.SkipWhitespace();
583 
584     // Next optional token: positive integer or a '+'.
585     if (PH.Next(D.Min)) {
586       PH.Advance();
587       // A positive integer can be followed by a '+' meaning min
588       // or more, or by a '-' meaning a range from min to max.
589       if (PH.Next("+")) {
590         D.Max = Directive::MaxCount;
591         PH.Advance();
592       } else if (PH.Next("-")) {
593         PH.Advance();
594         if (!PH.Next(D.Max) || D.Max < D.Min) {
595           Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
596                        diag::err_verify_invalid_range) << KindStr;
597           continue;
598         }
599         PH.Advance();
600       } else {
601         D.Max = D.Min;
602       }
603     } else if (PH.Next("+")) {
604       // '+' on its own means "1 or more".
605       D.Max = Directive::MaxCount;
606       PH.Advance();
607     }
608 
609     // Skip optional whitespace.
610     PH.SkipWhitespace();
611 
612     // Next token: {{
613     if (!PH.Next("{{")) {
614       Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
615                    diag::err_verify_missing_start) << KindStr;
616       continue;
617     }
618     PH.Advance();
619     const char* const ContentBegin = PH.C; // mark content begin
620     // Search for token: }}
621     if (!PH.SearchClosingBrace("{{", "}}")) {
622       Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
623                    diag::err_verify_missing_end) << KindStr;
624       continue;
625     }
626     const char* const ContentEnd = PH.P; // mark content end
627     PH.Advance();
628 
629     D.DirectivePos = Pos;
630     D.ContentBegin = Pos.getLocWithOffset(ContentBegin - PH.Begin);
631 
632     // Build directive text; convert \n to newlines.
633     StringRef NewlineStr = "\\n";
634     StringRef Content(ContentBegin, ContentEnd-ContentBegin);
635     size_t CPos = 0;
636     size_t FPos;
637     while ((FPos = Content.find(NewlineStr, CPos)) != StringRef::npos) {
638       D.Text += Content.substr(CPos, FPos-CPos);
639       D.Text += '\n';
640       CPos = FPos + NewlineStr.size();
641     }
642     if (D.Text.empty())
643       D.Text.assign(ContentBegin, ContentEnd);
644 
645     // Check that regex directives contain at least one regex.
646     if (D.RegexKind && D.Text.find("{{") == StringRef::npos) {
647       Diags.Report(D.ContentBegin, diag::err_verify_missing_regex) << D.Text;
648       return false;
649     }
650 
651     if (Marker.empty())
652       attachDirective(Diags, D, ExpectedLoc, MatchAnyFileAndLine, MatchAnyLine);
653     else
654       Markers.addDirective(Marker, D);
655     FoundDirective = true;
656   }
657 
658   return FoundDirective;
659 }
660 
661 VerifyDiagnosticConsumer::VerifyDiagnosticConsumer(DiagnosticsEngine &Diags_)
662     : Diags(Diags_), PrimaryClient(Diags.getClient()),
663       PrimaryClientOwner(Diags.takeClient()),
664       Buffer(new TextDiagnosticBuffer()), Markers(new MarkerTracker(Diags)),
665       Status(HasNoDirectives) {
666   if (Diags.hasSourceManager())
667     setSourceManager(Diags.getSourceManager());
668 }
669 
670 VerifyDiagnosticConsumer::~VerifyDiagnosticConsumer() {
671   assert(!ActiveSourceFiles && "Incomplete parsing of source files!");
672   assert(!CurrentPreprocessor && "CurrentPreprocessor should be invalid!");
673   SrcManager = nullptr;
674   CheckDiagnostics();
675   assert(!Diags.ownsClient() &&
676          "The VerifyDiagnosticConsumer takes over ownership of the client!");
677 }
678 
679 // DiagnosticConsumer interface.
680 
681 void VerifyDiagnosticConsumer::BeginSourceFile(const LangOptions &LangOpts,
682                                                const Preprocessor *PP) {
683   // Attach comment handler on first invocation.
684   if (++ActiveSourceFiles == 1) {
685     if (PP) {
686       CurrentPreprocessor = PP;
687       this->LangOpts = &LangOpts;
688       setSourceManager(PP->getSourceManager());
689       const_cast<Preprocessor *>(PP)->addCommentHandler(this);
690 #ifndef NDEBUG
691       // Debug build tracks parsed files.
692       const_cast<Preprocessor *>(PP)->addPPCallbacks(
693                       std::make_unique<VerifyFileTracker>(*this, *SrcManager));
694 #endif
695     }
696   }
697 
698   assert((!PP || CurrentPreprocessor == PP) && "Preprocessor changed!");
699   PrimaryClient->BeginSourceFile(LangOpts, PP);
700 }
701 
702 void VerifyDiagnosticConsumer::EndSourceFile() {
703   assert(ActiveSourceFiles && "No active source files!");
704   PrimaryClient->EndSourceFile();
705 
706   // Detach comment handler once last active source file completed.
707   if (--ActiveSourceFiles == 0) {
708     if (CurrentPreprocessor)
709       const_cast<Preprocessor *>(CurrentPreprocessor)->
710           removeCommentHandler(this);
711 
712     // Diagnose any used-but-not-defined markers.
713     Markers->finalize();
714 
715     // Check diagnostics once last file completed.
716     CheckDiagnostics();
717     CurrentPreprocessor = nullptr;
718     LangOpts = nullptr;
719   }
720 }
721 
722 void VerifyDiagnosticConsumer::HandleDiagnostic(
723       DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info) {
724   if (Info.hasSourceManager()) {
725     // If this diagnostic is for a different source manager, ignore it.
726     if (SrcManager && &Info.getSourceManager() != SrcManager)
727       return;
728 
729     setSourceManager(Info.getSourceManager());
730   }
731 
732 #ifndef NDEBUG
733   // Debug build tracks unparsed files for possible
734   // unparsed expected-* directives.
735   if (SrcManager) {
736     SourceLocation Loc = Info.getLocation();
737     if (Loc.isValid()) {
738       ParsedStatus PS = IsUnparsed;
739 
740       Loc = SrcManager->getExpansionLoc(Loc);
741       FileID FID = SrcManager->getFileID(Loc);
742 
743       const FileEntry *FE = SrcManager->getFileEntryForID(FID);
744       if (FE && CurrentPreprocessor && SrcManager->isLoadedFileID(FID)) {
745         // If the file is a modules header file it shall not be parsed
746         // for expected-* directives.
747         HeaderSearch &HS = CurrentPreprocessor->getHeaderSearchInfo();
748         if (HS.findModuleForHeader(FE))
749           PS = IsUnparsedNoDirectives;
750       }
751 
752       UpdateParsedFileStatus(*SrcManager, FID, PS);
753     }
754   }
755 #endif
756 
757   // Send the diagnostic to the buffer, we will check it once we reach the end
758   // of the source file (or are destructed).
759   Buffer->HandleDiagnostic(DiagLevel, Info);
760 }
761 
762 /// HandleComment - Hook into the preprocessor and extract comments containing
763 ///  expected errors and warnings.
764 bool VerifyDiagnosticConsumer::HandleComment(Preprocessor &PP,
765                                              SourceRange Comment) {
766   SourceManager &SM = PP.getSourceManager();
767 
768   // If this comment is for a different source manager, ignore it.
769   if (SrcManager && &SM != SrcManager)
770     return false;
771 
772   SourceLocation CommentBegin = Comment.getBegin();
773 
774   const char *CommentRaw = SM.getCharacterData(CommentBegin);
775   StringRef C(CommentRaw, SM.getCharacterData(Comment.getEnd()) - CommentRaw);
776 
777   if (C.empty())
778     return false;
779 
780   // Fold any "\<EOL>" sequences
781   size_t loc = C.find('\\');
782   if (loc == StringRef::npos) {
783     ParseDirective(C, &ED, SM, &PP, CommentBegin, Status, *Markers);
784     return false;
785   }
786 
787   std::string C2;
788   C2.reserve(C.size());
789 
790   for (size_t last = 0;; loc = C.find('\\', last)) {
791     if (loc == StringRef::npos || loc == C.size()) {
792       C2 += C.substr(last);
793       break;
794     }
795     C2 += C.substr(last, loc-last);
796     last = loc + 1;
797 
798     if (C[last] == '\n' || C[last] == '\r') {
799       ++last;
800 
801       // Escape \r\n  or \n\r, but not \n\n.
802       if (last < C.size())
803         if (C[last] == '\n' || C[last] == '\r')
804           if (C[last] != C[last-1])
805             ++last;
806     } else {
807       // This was just a normal backslash.
808       C2 += '\\';
809     }
810   }
811 
812   if (!C2.empty())
813     ParseDirective(C2, &ED, SM, &PP, CommentBegin, Status, *Markers);
814   return false;
815 }
816 
817 #ifndef NDEBUG
818 /// Lex the specified source file to determine whether it contains
819 /// any expected-* directives.  As a Lexer is used rather than a full-blown
820 /// Preprocessor, directives inside skipped #if blocks will still be found.
821 ///
822 /// \return true if any directives were found.
823 static bool findDirectives(SourceManager &SM, FileID FID,
824                            const LangOptions &LangOpts) {
825   // Create a raw lexer to pull all the comments out of FID.
826   if (FID.isInvalid())
827     return false;
828 
829   // Create a lexer to lex all the tokens of the main file in raw mode.
830   llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
831   Lexer RawLex(FID, FromFile, SM, LangOpts);
832 
833   // Return comments as tokens, this is how we find expected diagnostics.
834   RawLex.SetCommentRetentionState(true);
835 
836   Token Tok;
837   Tok.setKind(tok::comment);
838   VerifyDiagnosticConsumer::DirectiveStatus Status =
839     VerifyDiagnosticConsumer::HasNoDirectives;
840   while (Tok.isNot(tok::eof)) {
841     RawLex.LexFromRawLexer(Tok);
842     if (!Tok.is(tok::comment)) continue;
843 
844     std::string Comment = RawLex.getSpelling(Tok, SM, LangOpts);
845     if (Comment.empty()) continue;
846 
847     // We don't care about tracking markers for this phase.
848     VerifyDiagnosticConsumer::MarkerTracker Markers(SM.getDiagnostics());
849 
850     // Find first directive.
851     if (ParseDirective(Comment, nullptr, SM, nullptr, Tok.getLocation(),
852                        Status, Markers))
853       return true;
854   }
855   return false;
856 }
857 #endif // !NDEBUG
858 
859 /// Takes a list of diagnostics that have been generated but not matched
860 /// by an expected-* directive and produces a diagnostic to the user from this.
861 static unsigned PrintUnexpected(DiagnosticsEngine &Diags, SourceManager *SourceMgr,
862                                 const_diag_iterator diag_begin,
863                                 const_diag_iterator diag_end,
864                                 const char *Kind) {
865   if (diag_begin == diag_end) return 0;
866 
867   SmallString<256> Fmt;
868   llvm::raw_svector_ostream OS(Fmt);
869   for (const_diag_iterator I = diag_begin, E = diag_end; I != E; ++I) {
870     if (I->first.isInvalid() || !SourceMgr)
871       OS << "\n  (frontend)";
872     else {
873       OS << "\n ";
874       if (const FileEntry *File = SourceMgr->getFileEntryForID(
875                                                 SourceMgr->getFileID(I->first)))
876         OS << " File " << File->getName();
877       OS << " Line " << SourceMgr->getPresumedLineNumber(I->first);
878     }
879     OS << ": " << I->second;
880   }
881 
882   Diags.Report(diag::err_verify_inconsistent_diags).setForceEmit()
883     << Kind << /*Unexpected=*/true << OS.str();
884   return std::distance(diag_begin, diag_end);
885 }
886 
887 /// Takes a list of diagnostics that were expected to have been generated
888 /// but were not and produces a diagnostic to the user from this.
889 static unsigned PrintExpected(DiagnosticsEngine &Diags,
890                               SourceManager &SourceMgr,
891                               std::vector<Directive *> &DL, const char *Kind) {
892   if (DL.empty())
893     return 0;
894 
895   SmallString<256> Fmt;
896   llvm::raw_svector_ostream OS(Fmt);
897   for (const auto *D : DL) {
898     if (D->DiagnosticLoc.isInvalid() || D->MatchAnyFileAndLine)
899       OS << "\n  File *";
900     else
901       OS << "\n  File " << SourceMgr.getFilename(D->DiagnosticLoc);
902     if (D->MatchAnyLine)
903       OS << " Line *";
904     else
905       OS << " Line " << SourceMgr.getPresumedLineNumber(D->DiagnosticLoc);
906     if (D->DirectiveLoc != D->DiagnosticLoc)
907       OS << " (directive at "
908          << SourceMgr.getFilename(D->DirectiveLoc) << ':'
909          << SourceMgr.getPresumedLineNumber(D->DirectiveLoc) << ')';
910     OS << ": " << D->Text;
911   }
912 
913   Diags.Report(diag::err_verify_inconsistent_diags).setForceEmit()
914     << Kind << /*Unexpected=*/false << OS.str();
915   return DL.size();
916 }
917 
918 /// Determine whether two source locations come from the same file.
919 static bool IsFromSameFile(SourceManager &SM, SourceLocation DirectiveLoc,
920                            SourceLocation DiagnosticLoc) {
921   while (DiagnosticLoc.isMacroID())
922     DiagnosticLoc = SM.getImmediateMacroCallerLoc(DiagnosticLoc);
923 
924   if (SM.isWrittenInSameFile(DirectiveLoc, DiagnosticLoc))
925     return true;
926 
927   const FileEntry *DiagFile = SM.getFileEntryForID(SM.getFileID(DiagnosticLoc));
928   if (!DiagFile && SM.isWrittenInMainFile(DirectiveLoc))
929     return true;
930 
931   return (DiagFile == SM.getFileEntryForID(SM.getFileID(DirectiveLoc)));
932 }
933 
934 /// CheckLists - Compare expected to seen diagnostic lists and return the
935 /// the difference between them.
936 static unsigned CheckLists(DiagnosticsEngine &Diags, SourceManager &SourceMgr,
937                            const char *Label,
938                            DirectiveList &Left,
939                            const_diag_iterator d2_begin,
940                            const_diag_iterator d2_end,
941                            bool IgnoreUnexpected) {
942   std::vector<Directive *> LeftOnly;
943   DiagList Right(d2_begin, d2_end);
944 
945   for (auto &Owner : Left) {
946     Directive &D = *Owner;
947     unsigned LineNo1 = SourceMgr.getPresumedLineNumber(D.DiagnosticLoc);
948 
949     for (unsigned i = 0; i < D.Max; ++i) {
950       DiagList::iterator II, IE;
951       for (II = Right.begin(), IE = Right.end(); II != IE; ++II) {
952         if (!D.MatchAnyLine) {
953           unsigned LineNo2 = SourceMgr.getPresumedLineNumber(II->first);
954           if (LineNo1 != LineNo2)
955             continue;
956         }
957 
958         if (!D.DiagnosticLoc.isInvalid() && !D.MatchAnyFileAndLine &&
959             !IsFromSameFile(SourceMgr, D.DiagnosticLoc, II->first))
960           continue;
961 
962         const std::string &RightText = II->second;
963         if (D.match(RightText))
964           break;
965       }
966       if (II == IE) {
967         // Not found.
968         if (i >= D.Min) break;
969         LeftOnly.push_back(&D);
970       } else {
971         // Found. The same cannot be found twice.
972         Right.erase(II);
973       }
974     }
975   }
976   // Now all that's left in Right are those that were not matched.
977   unsigned num = PrintExpected(Diags, SourceMgr, LeftOnly, Label);
978   if (!IgnoreUnexpected)
979     num += PrintUnexpected(Diags, &SourceMgr, Right.begin(), Right.end(), Label);
980   return num;
981 }
982 
983 /// CheckResults - This compares the expected results to those that
984 /// were actually reported. It emits any discrepencies. Return "true" if there
985 /// were problems. Return "false" otherwise.
986 static unsigned CheckResults(DiagnosticsEngine &Diags, SourceManager &SourceMgr,
987                              const TextDiagnosticBuffer &Buffer,
988                              ExpectedData &ED) {
989   // We want to capture the delta between what was expected and what was
990   // seen.
991   //
992   //   Expected \ Seen - set expected but not seen
993   //   Seen \ Expected - set seen but not expected
994   unsigned NumProblems = 0;
995 
996   const DiagnosticLevelMask DiagMask =
997     Diags.getDiagnosticOptions().getVerifyIgnoreUnexpected();
998 
999   // See if there are error mismatches.
1000   NumProblems += CheckLists(Diags, SourceMgr, "error", ED.Errors,
1001                             Buffer.err_begin(), Buffer.err_end(),
1002                             bool(DiagnosticLevelMask::Error & DiagMask));
1003 
1004   // See if there are warning mismatches.
1005   NumProblems += CheckLists(Diags, SourceMgr, "warning", ED.Warnings,
1006                             Buffer.warn_begin(), Buffer.warn_end(),
1007                             bool(DiagnosticLevelMask::Warning & DiagMask));
1008 
1009   // See if there are remark mismatches.
1010   NumProblems += CheckLists(Diags, SourceMgr, "remark", ED.Remarks,
1011                             Buffer.remark_begin(), Buffer.remark_end(),
1012                             bool(DiagnosticLevelMask::Remark & DiagMask));
1013 
1014   // See if there are note mismatches.
1015   NumProblems += CheckLists(Diags, SourceMgr, "note", ED.Notes,
1016                             Buffer.note_begin(), Buffer.note_end(),
1017                             bool(DiagnosticLevelMask::Note & DiagMask));
1018 
1019   return NumProblems;
1020 }
1021 
1022 void VerifyDiagnosticConsumer::UpdateParsedFileStatus(SourceManager &SM,
1023                                                       FileID FID,
1024                                                       ParsedStatus PS) {
1025   // Check SourceManager hasn't changed.
1026   setSourceManager(SM);
1027 
1028 #ifndef NDEBUG
1029   if (FID.isInvalid())
1030     return;
1031 
1032   const FileEntry *FE = SM.getFileEntryForID(FID);
1033 
1034   if (PS == IsParsed) {
1035     // Move the FileID from the unparsed set to the parsed set.
1036     UnparsedFiles.erase(FID);
1037     ParsedFiles.insert(std::make_pair(FID, FE));
1038   } else if (!ParsedFiles.count(FID) && !UnparsedFiles.count(FID)) {
1039     // Add the FileID to the unparsed set if we haven't seen it before.
1040 
1041     // Check for directives.
1042     bool FoundDirectives;
1043     if (PS == IsUnparsedNoDirectives)
1044       FoundDirectives = false;
1045     else
1046       FoundDirectives = !LangOpts || findDirectives(SM, FID, *LangOpts);
1047 
1048     // Add the FileID to the unparsed set.
1049     UnparsedFiles.insert(std::make_pair(FID,
1050                                       UnparsedFileStatus(FE, FoundDirectives)));
1051   }
1052 #endif
1053 }
1054 
1055 void VerifyDiagnosticConsumer::CheckDiagnostics() {
1056   // Ensure any diagnostics go to the primary client.
1057   DiagnosticConsumer *CurClient = Diags.getClient();
1058   std::unique_ptr<DiagnosticConsumer> Owner = Diags.takeClient();
1059   Diags.setClient(PrimaryClient, false);
1060 
1061 #ifndef NDEBUG
1062   // In a debug build, scan through any files that may have been missed
1063   // during parsing and issue a fatal error if directives are contained
1064   // within these files.  If a fatal error occurs, this suggests that
1065   // this file is being parsed separately from the main file, in which
1066   // case consider moving the directives to the correct place, if this
1067   // is applicable.
1068   if (!UnparsedFiles.empty()) {
1069     // Generate a cache of parsed FileEntry pointers for alias lookups.
1070     llvm::SmallPtrSet<const FileEntry *, 8> ParsedFileCache;
1071     for (const auto &I : ParsedFiles)
1072       if (const FileEntry *FE = I.second)
1073         ParsedFileCache.insert(FE);
1074 
1075     // Iterate through list of unparsed files.
1076     for (const auto &I : UnparsedFiles) {
1077       const UnparsedFileStatus &Status = I.second;
1078       const FileEntry *FE = Status.getFile();
1079 
1080       // Skip files that have been parsed via an alias.
1081       if (FE && ParsedFileCache.count(FE))
1082         continue;
1083 
1084       // Report a fatal error if this file contained directives.
1085       if (Status.foundDirectives()) {
1086         llvm::report_fatal_error(Twine("-verify directives found after rather"
1087                                        " than during normal parsing of ",
1088                                  StringRef(FE ? FE->getName() : "(unknown)")));
1089       }
1090     }
1091 
1092     // UnparsedFiles has been processed now, so clear it.
1093     UnparsedFiles.clear();
1094   }
1095 #endif // !NDEBUG
1096 
1097   if (SrcManager) {
1098     // Produce an error if no expected-* directives could be found in the
1099     // source file(s) processed.
1100     if (Status == HasNoDirectives) {
1101       Diags.Report(diag::err_verify_no_directives).setForceEmit();
1102       ++NumErrors;
1103       Status = HasNoDirectivesReported;
1104     }
1105 
1106     // Check that the expected diagnostics occurred.
1107     NumErrors += CheckResults(Diags, *SrcManager, *Buffer, ED);
1108   } else {
1109     const DiagnosticLevelMask DiagMask =
1110         ~Diags.getDiagnosticOptions().getVerifyIgnoreUnexpected();
1111     if (bool(DiagnosticLevelMask::Error & DiagMask))
1112       NumErrors += PrintUnexpected(Diags, nullptr, Buffer->err_begin(),
1113                                    Buffer->err_end(), "error");
1114     if (bool(DiagnosticLevelMask::Warning & DiagMask))
1115       NumErrors += PrintUnexpected(Diags, nullptr, Buffer->warn_begin(),
1116                                    Buffer->warn_end(), "warn");
1117     if (bool(DiagnosticLevelMask::Remark & DiagMask))
1118       NumErrors += PrintUnexpected(Diags, nullptr, Buffer->remark_begin(),
1119                                    Buffer->remark_end(), "remark");
1120     if (bool(DiagnosticLevelMask::Note & DiagMask))
1121       NumErrors += PrintUnexpected(Diags, nullptr, Buffer->note_begin(),
1122                                    Buffer->note_end(), "note");
1123   }
1124 
1125   Diags.setClient(CurClient, Owner.release() != nullptr);
1126 
1127   // Reset the buffer, we have processed all the diagnostics in it.
1128   Buffer.reset(new TextDiagnosticBuffer());
1129   ED.Reset();
1130 }
1131 
1132 std::unique_ptr<Directive> Directive::create(bool RegexKind,
1133                                              SourceLocation DirectiveLoc,
1134                                              SourceLocation DiagnosticLoc,
1135                                              bool MatchAnyFileAndLine,
1136                                              bool MatchAnyLine, StringRef Text,
1137                                              unsigned Min, unsigned Max) {
1138   if (!RegexKind)
1139     return std::make_unique<StandardDirective>(DirectiveLoc, DiagnosticLoc,
1140                                                MatchAnyFileAndLine,
1141                                                MatchAnyLine, Text, Min, Max);
1142 
1143   // Parse the directive into a regular expression.
1144   std::string RegexStr;
1145   StringRef S = Text;
1146   while (!S.empty()) {
1147     if (S.startswith("{{")) {
1148       S = S.drop_front(2);
1149       size_t RegexMatchLength = S.find("}}");
1150       assert(RegexMatchLength != StringRef::npos);
1151       // Append the regex, enclosed in parentheses.
1152       RegexStr += "(";
1153       RegexStr.append(S.data(), RegexMatchLength);
1154       RegexStr += ")";
1155       S = S.drop_front(RegexMatchLength + 2);
1156     } else {
1157       size_t VerbatimMatchLength = S.find("{{");
1158       if (VerbatimMatchLength == StringRef::npos)
1159         VerbatimMatchLength = S.size();
1160       // Escape and append the fixed string.
1161       RegexStr += llvm::Regex::escape(S.substr(0, VerbatimMatchLength));
1162       S = S.drop_front(VerbatimMatchLength);
1163     }
1164   }
1165 
1166   return std::make_unique<RegexDirective>(DirectiveLoc, DiagnosticLoc,
1167                                           MatchAnyFileAndLine, MatchAnyLine,
1168                                           Text, Min, Max, RegexStr);
1169 }
1170