xref: /freebsd/contrib/llvm-project/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===- VerifyDiagnosticConsumer.cpp - Verifying Diagnostic Client ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a concrete diagnostic client, which buffers the diagnostic messages.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Frontend/VerifyDiagnosticConsumer.h"
14 #include "clang/Basic/CharInfo.h"
15 #include "clang/Basic/Diagnostic.h"
16 #include "clang/Basic/DiagnosticOptions.h"
17 #include "clang/Basic/FileManager.h"
18 #include "clang/Basic/LLVM.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "clang/Frontend/FrontendDiagnostic.h"
23 #include "clang/Frontend/TextDiagnosticBuffer.h"
24 #include "clang/Lex/HeaderSearch.h"
25 #include "clang/Lex/Lexer.h"
26 #include "clang/Lex/PPCallbacks.h"
27 #include "clang/Lex/Preprocessor.h"
28 #include "clang/Lex/Token.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallString.h"
32 #include "llvm/ADT/StringRef.h"
33 #include "llvm/ADT/Twine.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/Regex.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include <algorithm>
38 #include <cassert>
39 #include <cstddef>
40 #include <cstring>
41 #include <iterator>
42 #include <memory>
43 #include <string>
44 #include <utility>
45 #include <vector>
46 
47 using namespace clang;
48 
49 using Directive = VerifyDiagnosticConsumer::Directive;
50 using DirectiveList = VerifyDiagnosticConsumer::DirectiveList;
51 using ExpectedData = VerifyDiagnosticConsumer::ExpectedData;
52 
53 #ifndef NDEBUG
54 
55 namespace {
56 
57 class VerifyFileTracker : public PPCallbacks {
58   VerifyDiagnosticConsumer &Verify;
59   SourceManager &SM;
60 
61 public:
62   VerifyFileTracker(VerifyDiagnosticConsumer &Verify, SourceManager &SM)
63       : Verify(Verify), SM(SM) {}
64 
65   /// Hook into the preprocessor and update the list of parsed
66   /// files when the preprocessor indicates a new file is entered.
67   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
68                    SrcMgr::CharacteristicKind FileType,
69                    FileID PrevFID) override {
70     Verify.UpdateParsedFileStatus(SM, SM.getFileID(Loc),
71                                   VerifyDiagnosticConsumer::IsParsed);
72   }
73 };
74 
75 } // namespace
76 
77 #endif
78 
79 //===----------------------------------------------------------------------===//
80 // Checking diagnostics implementation.
81 //===----------------------------------------------------------------------===//
82 
83 using DiagList = TextDiagnosticBuffer::DiagList;
84 using const_diag_iterator = TextDiagnosticBuffer::const_iterator;
85 
86 namespace {
87 
88 /// StandardDirective - Directive with string matching.
89 class StandardDirective : public Directive {
90 public:
91   StandardDirective(SourceLocation DirectiveLoc, SourceLocation DiagnosticLoc,
92                     bool MatchAnyFileAndLine, bool MatchAnyLine, StringRef Text,
93                     unsigned Min, unsigned Max)
94       : Directive(DirectiveLoc, DiagnosticLoc, MatchAnyFileAndLine,
95                   MatchAnyLine, Text, Min, Max) {}
96 
97   bool isValid(std::string &Error) override {
98     // all strings are considered valid; even empty ones
99     return true;
100   }
101 
102   bool match(StringRef S) override { return S.contains(Text); }
103 };
104 
105 /// RegexDirective - Directive with regular-expression matching.
106 class RegexDirective : public Directive {
107 public:
108   RegexDirective(SourceLocation DirectiveLoc, SourceLocation DiagnosticLoc,
109                  bool MatchAnyFileAndLine, bool MatchAnyLine, StringRef Text,
110                  unsigned Min, unsigned Max, StringRef RegexStr)
111       : Directive(DirectiveLoc, DiagnosticLoc, MatchAnyFileAndLine,
112                   MatchAnyLine, Text, Min, Max),
113         Regex(RegexStr) {}
114 
115   bool isValid(std::string &Error) override {
116     return Regex.isValid(Error);
117   }
118 
119   bool match(StringRef S) override {
120     return Regex.match(S);
121   }
122 
123 private:
124   llvm::Regex Regex;
125 };
126 
127 class ParseHelper
128 {
129 public:
130   ParseHelper(StringRef S)
131       : Begin(S.begin()), End(S.end()), C(Begin), P(Begin) {}
132 
133   // Return true if string literal is next.
134   bool Next(StringRef S) {
135     P = C;
136     PEnd = C + S.size();
137     if (PEnd > End)
138       return false;
139     return memcmp(P, S.data(), S.size()) == 0;
140   }
141 
142   // Return true if number is next.
143   // Output N only if number is next.
144   bool Next(unsigned &N) {
145     unsigned TMP = 0;
146     P = C;
147     PEnd = P;
148     for (; PEnd < End && *PEnd >= '0' && *PEnd <= '9'; ++PEnd) {
149       TMP *= 10;
150       TMP += *PEnd - '0';
151     }
152     if (PEnd == C)
153       return false;
154     N = TMP;
155     return true;
156   }
157 
158   // Return true if a marker is next.
159   // A marker is the longest match for /#[A-Za-z0-9_-]+/.
160   bool NextMarker() {
161     P = C;
162     if (P == End || *P != '#')
163       return false;
164     PEnd = P;
165     ++PEnd;
166     while ((isAlphanumeric(*PEnd) || *PEnd == '-' || *PEnd == '_') &&
167            PEnd < End)
168       ++PEnd;
169     return PEnd > P + 1;
170   }
171 
172   // Return true if string literal S is matched in content.
173   // When true, P marks begin-position of the match, and calling Advance sets C
174   // to end-position of the match.
175   // If S is the empty string, then search for any letter instead (makes sense
176   // with FinishDirectiveToken=true).
177   // If EnsureStartOfWord, then skip matches that don't start a new word.
178   // If FinishDirectiveToken, then assume the match is the start of a comment
179   // directive for -verify, and extend the match to include the entire first
180   // token of that directive.
181   bool Search(StringRef S, bool EnsureStartOfWord = false,
182               bool FinishDirectiveToken = false) {
183     do {
184       if (!S.empty()) {
185         P = std::search(C, End, S.begin(), S.end());
186         PEnd = P + S.size();
187       }
188       else {
189         P = C;
190         while (P != End && !isLetter(*P))
191           ++P;
192         PEnd = P + 1;
193       }
194       if (P == End)
195         break;
196       // If not start of word but required, skip and search again.
197       if (EnsureStartOfWord
198                // Check if string literal starts a new word.
199           && !(P == Begin || isWhitespace(P[-1])
200                // Or it could be preceded by the start of a comment.
201                || (P > (Begin + 1) && (P[-1] == '/' || P[-1] == '*')
202                                    &&  P[-2] == '/')))
203         continue;
204       if (FinishDirectiveToken) {
205         while (PEnd != End && (isAlphanumeric(*PEnd)
206                                || *PEnd == '-' || *PEnd == '_'))
207           ++PEnd;
208         // Put back trailing digits and hyphens to be parsed later as a count
209         // or count range.  Because -verify prefixes must start with letters,
210         // we know the actual directive we found starts with a letter, so
211         // we won't put back the entire directive word and thus record an empty
212         // string.
213         assert(isLetter(*P) && "-verify prefix must start with a letter");
214         while (isDigit(PEnd[-1]) || PEnd[-1] == '-')
215           --PEnd;
216       }
217       return true;
218     } while (Advance());
219     return false;
220   }
221 
222   // Return true if a CloseBrace that closes the OpenBrace at the current nest
223   // level is found. When true, P marks begin-position of CloseBrace.
224   bool SearchClosingBrace(StringRef OpenBrace, StringRef CloseBrace) {
225     unsigned Depth = 1;
226     P = C;
227     while (P < End) {
228       StringRef S(P, End - P);
229       if (S.starts_with(OpenBrace)) {
230         ++Depth;
231         P += OpenBrace.size();
232       } else if (S.starts_with(CloseBrace)) {
233         --Depth;
234         if (Depth == 0) {
235           PEnd = P + CloseBrace.size();
236           return true;
237         }
238         P += CloseBrace.size();
239       } else {
240         ++P;
241       }
242     }
243     return false;
244   }
245 
246   // Advance 1-past previous next/search.
247   // Behavior is undefined if previous next/search failed.
248   bool Advance() {
249     C = PEnd;
250     return C < End;
251   }
252 
253   // Return the text matched by the previous next/search.
254   // Behavior is undefined if previous next/search failed.
255   StringRef Match() { return StringRef(P, PEnd - P); }
256 
257   // Skip zero or more whitespace.
258   void SkipWhitespace() {
259     for (; C < End && isWhitespace(*C); ++C)
260       ;
261   }
262 
263   // Return true if EOF reached.
264   bool Done() {
265     return !(C < End);
266   }
267 
268   // Beginning of expected content.
269   const char * const Begin;
270 
271   // End of expected content (1-past).
272   const char * const End;
273 
274   // Position of next char in content.
275   const char *C;
276 
277   // Previous next/search subject start.
278   const char *P;
279 
280 private:
281   // Previous next/search subject end (1-past).
282   const char *PEnd = nullptr;
283 };
284 
285 // The information necessary to create a directive.
286 struct UnattachedDirective {
287   DirectiveList *DL = nullptr;
288   bool RegexKind = false;
289   SourceLocation DirectivePos, ContentBegin;
290   std::string Text;
291   unsigned Min = 1, Max = 1;
292 };
293 
294 // Attach the specified directive to the line of code indicated by
295 // \p ExpectedLoc.
296 void attachDirective(DiagnosticsEngine &Diags, const UnattachedDirective &UD,
297                      SourceLocation ExpectedLoc,
298                      bool MatchAnyFileAndLine = false,
299                      bool MatchAnyLine = false) {
300   // Construct new directive.
301   std::unique_ptr<Directive> D = Directive::create(
302       UD.RegexKind, UD.DirectivePos, ExpectedLoc, MatchAnyFileAndLine,
303       MatchAnyLine, UD.Text, UD.Min, UD.Max);
304 
305   std::string Error;
306   if (!D->isValid(Error)) {
307     Diags.Report(UD.ContentBegin, diag::err_verify_invalid_content)
308       << (UD.RegexKind ? "regex" : "string") << Error;
309   }
310 
311   UD.DL->push_back(std::move(D));
312 }
313 
314 } // anonymous
315 
316 // Tracker for markers in the input files. A marker is a comment of the form
317 //
318 //   n = 123; // #123
319 //
320 // ... that can be referred to by a later expected-* directive:
321 //
322 //   // expected-error@#123 {{undeclared identifier 'n'}}
323 //
324 // Marker declarations must be at the start of a comment or preceded by
325 // whitespace to distinguish them from uses of markers in directives.
326 class VerifyDiagnosticConsumer::MarkerTracker {
327   DiagnosticsEngine &Diags;
328 
329   struct Marker {
330     SourceLocation DefLoc;
331     SourceLocation RedefLoc;
332     SourceLocation UseLoc;
333   };
334   llvm::StringMap<Marker> Markers;
335 
336   // Directives that couldn't be created yet because they name an unknown
337   // marker.
338   llvm::StringMap<llvm::SmallVector<UnattachedDirective, 2>> DeferredDirectives;
339 
340 public:
341   MarkerTracker(DiagnosticsEngine &Diags) : Diags(Diags) {}
342 
343   // Register a marker.
344   void addMarker(StringRef MarkerName, SourceLocation Pos) {
345     auto InsertResult = Markers.insert(
346         {MarkerName, Marker{Pos, SourceLocation(), SourceLocation()}});
347 
348     Marker &M = InsertResult.first->second;
349     if (!InsertResult.second) {
350       // Marker was redefined.
351       M.RedefLoc = Pos;
352     } else {
353       // First definition: build any deferred directives.
354       auto Deferred = DeferredDirectives.find(MarkerName);
355       if (Deferred != DeferredDirectives.end()) {
356         for (auto &UD : Deferred->second) {
357           if (M.UseLoc.isInvalid())
358             M.UseLoc = UD.DirectivePos;
359           attachDirective(Diags, UD, Pos);
360         }
361         DeferredDirectives.erase(Deferred);
362       }
363     }
364   }
365 
366   // Register a directive at the specified marker.
367   void addDirective(StringRef MarkerName, const UnattachedDirective &UD) {
368     auto MarkerIt = Markers.find(MarkerName);
369     if (MarkerIt != Markers.end()) {
370       Marker &M = MarkerIt->second;
371       if (M.UseLoc.isInvalid())
372         M.UseLoc = UD.DirectivePos;
373       return attachDirective(Diags, UD, M.DefLoc);
374     }
375     DeferredDirectives[MarkerName].push_back(UD);
376   }
377 
378   // Ensure we have no remaining deferred directives, and no
379   // multiply-defined-and-used markers.
380   void finalize() {
381     for (auto &MarkerInfo : Markers) {
382       StringRef Name = MarkerInfo.first();
383       Marker &M = MarkerInfo.second;
384       if (M.RedefLoc.isValid() && M.UseLoc.isValid()) {
385         Diags.Report(M.UseLoc, diag::err_verify_ambiguous_marker) << Name;
386         Diags.Report(M.DefLoc, diag::note_verify_ambiguous_marker) << Name;
387         Diags.Report(M.RedefLoc, diag::note_verify_ambiguous_marker) << Name;
388       }
389     }
390 
391     for (auto &DeferredPair : DeferredDirectives) {
392       Diags.Report(DeferredPair.second.front().DirectivePos,
393                    diag::err_verify_no_such_marker)
394           << DeferredPair.first();
395     }
396   }
397 };
398 
399 static std::string DetailedErrorString(const DiagnosticsEngine &Diags) {
400   if (Diags.getDiagnosticOptions().VerifyPrefixes.empty())
401     return "expected";
402   return *Diags.getDiagnosticOptions().VerifyPrefixes.begin();
403 }
404 
405 /// ParseDirective - Go through the comment and see if it indicates expected
406 /// diagnostics. If so, then put them in the appropriate directive list.
407 ///
408 /// Returns true if any valid directives were found.
409 static bool ParseDirective(StringRef S, ExpectedData *ED, SourceManager &SM,
410                            Preprocessor *PP, SourceLocation Pos,
411                            VerifyDiagnosticConsumer::DirectiveStatus &Status,
412                            VerifyDiagnosticConsumer::MarkerTracker &Markers) {
413   DiagnosticsEngine &Diags = PP ? PP->getDiagnostics() : SM.getDiagnostics();
414 
415   // First, scan the comment looking for markers.
416   for (ParseHelper PH(S); !PH.Done();) {
417     if (!PH.Search("#", true))
418       break;
419     PH.C = PH.P;
420     if (!PH.NextMarker()) {
421       PH.Next("#");
422       PH.Advance();
423       continue;
424     }
425     PH.Advance();
426     Markers.addMarker(PH.Match(), Pos);
427   }
428 
429   // A single comment may contain multiple directives.
430   bool FoundDirective = false;
431   for (ParseHelper PH(S); !PH.Done();) {
432     // Search for the initial directive token.
433     // If one prefix, save time by searching only for its directives.
434     // Otherwise, search for any potential directive token and check it later.
435     const auto &Prefixes = Diags.getDiagnosticOptions().VerifyPrefixes;
436     if (!(Prefixes.size() == 1 ? PH.Search(*Prefixes.begin(), true, true)
437                                : PH.Search("", true, true)))
438       break;
439 
440     StringRef DToken = PH.Match();
441     PH.Advance();
442 
443     // Default directive kind.
444     UnattachedDirective D;
445     const char *KindStr = "string";
446 
447     // Parse the initial directive token in reverse so we can easily determine
448     // its exact actual prefix.  If we were to parse it from the front instead,
449     // it would be harder to determine where the prefix ends because there
450     // might be multiple matching -verify prefixes because some might prefix
451     // others.
452 
453     // Regex in initial directive token: -re
454     if (DToken.consume_back("-re")) {
455       D.RegexKind = true;
456       KindStr = "regex";
457     }
458 
459     // Type in initial directive token: -{error|warning|note|no-diagnostics}
460     bool NoDiag = false;
461     StringRef DType;
462     if (DToken.ends_with(DType = "-error"))
463       D.DL = ED ? &ED->Errors : nullptr;
464     else if (DToken.ends_with(DType = "-warning"))
465       D.DL = ED ? &ED->Warnings : nullptr;
466     else if (DToken.ends_with(DType = "-remark"))
467       D.DL = ED ? &ED->Remarks : nullptr;
468     else if (DToken.ends_with(DType = "-note"))
469       D.DL = ED ? &ED->Notes : nullptr;
470     else if (DToken.ends_with(DType = "-no-diagnostics")) {
471       NoDiag = true;
472       if (D.RegexKind)
473         continue;
474     } else
475       continue;
476     DToken = DToken.substr(0, DToken.size()-DType.size());
477 
478     // What's left in DToken is the actual prefix.  That might not be a -verify
479     // prefix even if there is only one -verify prefix (for example, the full
480     // DToken is foo-bar-warning, but foo is the only -verify prefix).
481     if (!std::binary_search(Prefixes.begin(), Prefixes.end(), DToken))
482       continue;
483 
484     if (NoDiag) {
485       if (Status == VerifyDiagnosticConsumer::HasOtherExpectedDirectives)
486         Diags.Report(Pos, diag::err_verify_invalid_no_diags)
487             << DetailedErrorString(Diags) << /*IsExpectedNoDiagnostics=*/true;
488       else
489         Status = VerifyDiagnosticConsumer::HasExpectedNoDiagnostics;
490       continue;
491     }
492     if (Status == VerifyDiagnosticConsumer::HasExpectedNoDiagnostics) {
493       Diags.Report(Pos, diag::err_verify_invalid_no_diags)
494           << DetailedErrorString(Diags) << /*IsExpectedNoDiagnostics=*/false;
495       continue;
496     }
497     Status = VerifyDiagnosticConsumer::HasOtherExpectedDirectives;
498 
499     // If a directive has been found but we're not interested
500     // in storing the directive information, return now.
501     if (!D.DL)
502       return true;
503 
504     // Next optional token: @
505     SourceLocation ExpectedLoc;
506     StringRef Marker;
507     bool MatchAnyFileAndLine = false;
508     bool MatchAnyLine = false;
509     if (!PH.Next("@")) {
510       ExpectedLoc = Pos;
511     } else {
512       PH.Advance();
513       unsigned Line = 0;
514       bool FoundPlus = PH.Next("+");
515       if (FoundPlus || PH.Next("-")) {
516         // Relative to current line.
517         PH.Advance();
518         bool Invalid = false;
519         unsigned ExpectedLine = SM.getSpellingLineNumber(Pos, &Invalid);
520         if (!Invalid && PH.Next(Line) && (FoundPlus || Line < ExpectedLine)) {
521           if (FoundPlus) ExpectedLine += Line;
522           else ExpectedLine -= Line;
523           ExpectedLoc = SM.translateLineCol(SM.getFileID(Pos), ExpectedLine, 1);
524         }
525       } else if (PH.Next(Line)) {
526         // Absolute line number.
527         if (Line > 0)
528           ExpectedLoc = SM.translateLineCol(SM.getFileID(Pos), Line, 1);
529       } else if (PH.NextMarker()) {
530         Marker = PH.Match();
531       } else if (PP && PH.Search(":")) {
532         // Specific source file.
533         StringRef Filename(PH.C, PH.P-PH.C);
534         PH.Advance();
535 
536         if (Filename == "*") {
537           MatchAnyFileAndLine = true;
538           if (!PH.Next("*")) {
539             Diags.Report(Pos.getLocWithOffset(PH.C - PH.Begin),
540                          diag::err_verify_missing_line)
541                 << "'*'";
542             continue;
543           }
544           MatchAnyLine = true;
545           ExpectedLoc = SourceLocation();
546         } else {
547           // Lookup file via Preprocessor, like a #include.
548           OptionalFileEntryRef File =
549               PP->LookupFile(Pos, Filename, false, nullptr, nullptr, nullptr,
550                              nullptr, nullptr, nullptr, nullptr, nullptr);
551           if (!File) {
552             Diags.Report(Pos.getLocWithOffset(PH.C - PH.Begin),
553                          diag::err_verify_missing_file)
554                 << Filename << KindStr;
555             continue;
556           }
557 
558           FileID FID = SM.translateFile(*File);
559           if (FID.isInvalid())
560             FID = SM.createFileID(*File, Pos, SrcMgr::C_User);
561 
562           if (PH.Next(Line) && Line > 0)
563             ExpectedLoc = SM.translateLineCol(FID, Line, 1);
564           else if (PH.Next("*")) {
565             MatchAnyLine = true;
566             ExpectedLoc = SM.translateLineCol(FID, 1, 1);
567           }
568         }
569       } else if (PH.Next("*")) {
570         MatchAnyLine = true;
571         ExpectedLoc = SourceLocation();
572       }
573 
574       if (ExpectedLoc.isInvalid() && !MatchAnyLine && Marker.empty()) {
575         Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
576                      diag::err_verify_missing_line) << KindStr;
577         continue;
578       }
579       PH.Advance();
580     }
581 
582     // Skip optional whitespace.
583     PH.SkipWhitespace();
584 
585     // Next optional token: positive integer or a '+'.
586     if (PH.Next(D.Min)) {
587       PH.Advance();
588       // A positive integer can be followed by a '+' meaning min
589       // or more, or by a '-' meaning a range from min to max.
590       if (PH.Next("+")) {
591         D.Max = Directive::MaxCount;
592         PH.Advance();
593       } else if (PH.Next("-")) {
594         PH.Advance();
595         if (!PH.Next(D.Max) || D.Max < D.Min) {
596           Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
597                        diag::err_verify_invalid_range) << KindStr;
598           continue;
599         }
600         PH.Advance();
601       } else {
602         D.Max = D.Min;
603       }
604     } else if (PH.Next("+")) {
605       // '+' on its own means "1 or more".
606       D.Max = Directive::MaxCount;
607       PH.Advance();
608     }
609 
610     // Skip optional whitespace.
611     PH.SkipWhitespace();
612 
613     // Next token: {{
614     if (!PH.Next("{{")) {
615       Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
616                    diag::err_verify_missing_start) << KindStr;
617       continue;
618     }
619     llvm::SmallString<8> CloseBrace("}}");
620     const char *const DelimBegin = PH.C;
621     PH.Advance();
622     // Count the number of opening braces for `string` kinds
623     for (; !D.RegexKind && PH.Next("{"); PH.Advance())
624       CloseBrace += '}';
625     const char* const ContentBegin = PH.C; // mark content begin
626     // Search for closing brace
627     StringRef OpenBrace(DelimBegin, ContentBegin - DelimBegin);
628     if (!PH.SearchClosingBrace(OpenBrace, CloseBrace)) {
629       Diags.Report(Pos.getLocWithOffset(PH.C - PH.Begin),
630                    diag::err_verify_missing_end)
631           << KindStr << CloseBrace;
632       continue;
633     }
634     const char* const ContentEnd = PH.P; // mark content end
635     PH.Advance();
636 
637     D.DirectivePos = Pos;
638     D.ContentBegin = Pos.getLocWithOffset(ContentBegin - PH.Begin);
639 
640     // Build directive text; convert \n to newlines.
641     StringRef NewlineStr = "\\n";
642     StringRef Content(ContentBegin, ContentEnd-ContentBegin);
643     size_t CPos = 0;
644     size_t FPos;
645     while ((FPos = Content.find(NewlineStr, CPos)) != StringRef::npos) {
646       D.Text += Content.substr(CPos, FPos-CPos);
647       D.Text += '\n';
648       CPos = FPos + NewlineStr.size();
649     }
650     if (D.Text.empty())
651       D.Text.assign(ContentBegin, ContentEnd);
652 
653     // Check that regex directives contain at least one regex.
654     if (D.RegexKind && D.Text.find("{{") == StringRef::npos) {
655       Diags.Report(D.ContentBegin, diag::err_verify_missing_regex) << D.Text;
656       return false;
657     }
658 
659     if (Marker.empty())
660       attachDirective(Diags, D, ExpectedLoc, MatchAnyFileAndLine, MatchAnyLine);
661     else
662       Markers.addDirective(Marker, D);
663     FoundDirective = true;
664   }
665 
666   return FoundDirective;
667 }
668 
669 VerifyDiagnosticConsumer::VerifyDiagnosticConsumer(DiagnosticsEngine &Diags_)
670     : Diags(Diags_), PrimaryClient(Diags.getClient()),
671       PrimaryClientOwner(Diags.takeClient()),
672       Buffer(new TextDiagnosticBuffer()), Markers(new MarkerTracker(Diags)),
673       Status(HasNoDirectives) {
674   if (Diags.hasSourceManager())
675     setSourceManager(Diags.getSourceManager());
676 }
677 
678 VerifyDiagnosticConsumer::~VerifyDiagnosticConsumer() {
679   assert(!ActiveSourceFiles && "Incomplete parsing of source files!");
680   assert(!CurrentPreprocessor && "CurrentPreprocessor should be invalid!");
681   SrcManager = nullptr;
682   CheckDiagnostics();
683   assert(!Diags.ownsClient() &&
684          "The VerifyDiagnosticConsumer takes over ownership of the client!");
685 }
686 
687 // DiagnosticConsumer interface.
688 
689 void VerifyDiagnosticConsumer::BeginSourceFile(const LangOptions &LangOpts,
690                                                const Preprocessor *PP) {
691   // Attach comment handler on first invocation.
692   if (++ActiveSourceFiles == 1) {
693     if (PP) {
694       CurrentPreprocessor = PP;
695       this->LangOpts = &LangOpts;
696       setSourceManager(PP->getSourceManager());
697       const_cast<Preprocessor *>(PP)->addCommentHandler(this);
698 #ifndef NDEBUG
699       // Debug build tracks parsed files.
700       const_cast<Preprocessor *>(PP)->addPPCallbacks(
701                       std::make_unique<VerifyFileTracker>(*this, *SrcManager));
702 #endif
703     }
704   }
705 
706   assert((!PP || CurrentPreprocessor == PP) && "Preprocessor changed!");
707   PrimaryClient->BeginSourceFile(LangOpts, PP);
708 }
709 
710 void VerifyDiagnosticConsumer::EndSourceFile() {
711   assert(ActiveSourceFiles && "No active source files!");
712   PrimaryClient->EndSourceFile();
713 
714   // Detach comment handler once last active source file completed.
715   if (--ActiveSourceFiles == 0) {
716     if (CurrentPreprocessor)
717       const_cast<Preprocessor *>(CurrentPreprocessor)->
718           removeCommentHandler(this);
719 
720     // Diagnose any used-but-not-defined markers.
721     Markers->finalize();
722 
723     // Check diagnostics once last file completed.
724     CheckDiagnostics();
725     CurrentPreprocessor = nullptr;
726     LangOpts = nullptr;
727   }
728 }
729 
730 void VerifyDiagnosticConsumer::HandleDiagnostic(
731       DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info) {
732   if (Info.hasSourceManager()) {
733     // If this diagnostic is for a different source manager, ignore it.
734     if (SrcManager && &Info.getSourceManager() != SrcManager)
735       return;
736 
737     setSourceManager(Info.getSourceManager());
738   }
739 
740 #ifndef NDEBUG
741   // Debug build tracks unparsed files for possible
742   // unparsed expected-* directives.
743   if (SrcManager) {
744     SourceLocation Loc = Info.getLocation();
745     if (Loc.isValid()) {
746       ParsedStatus PS = IsUnparsed;
747 
748       Loc = SrcManager->getExpansionLoc(Loc);
749       FileID FID = SrcManager->getFileID(Loc);
750 
751       auto FE = SrcManager->getFileEntryRefForID(FID);
752       if (FE && CurrentPreprocessor && SrcManager->isLoadedFileID(FID)) {
753         // If the file is a modules header file it shall not be parsed
754         // for expected-* directives.
755         HeaderSearch &HS = CurrentPreprocessor->getHeaderSearchInfo();
756         if (HS.findModuleForHeader(*FE))
757           PS = IsUnparsedNoDirectives;
758       }
759 
760       UpdateParsedFileStatus(*SrcManager, FID, PS);
761     }
762   }
763 #endif
764 
765   // Send the diagnostic to the buffer, we will check it once we reach the end
766   // of the source file (or are destructed).
767   Buffer->HandleDiagnostic(DiagLevel, Info);
768 }
769 
770 /// HandleComment - Hook into the preprocessor and extract comments containing
771 ///  expected errors and warnings.
772 bool VerifyDiagnosticConsumer::HandleComment(Preprocessor &PP,
773                                              SourceRange Comment) {
774   SourceManager &SM = PP.getSourceManager();
775 
776   // If this comment is for a different source manager, ignore it.
777   if (SrcManager && &SM != SrcManager)
778     return false;
779 
780   SourceLocation CommentBegin = Comment.getBegin();
781 
782   const char *CommentRaw = SM.getCharacterData(CommentBegin);
783   StringRef C(CommentRaw, SM.getCharacterData(Comment.getEnd()) - CommentRaw);
784 
785   if (C.empty())
786     return false;
787 
788   // Fold any "\<EOL>" sequences
789   size_t loc = C.find('\\');
790   if (loc == StringRef::npos) {
791     ParseDirective(C, &ED, SM, &PP, CommentBegin, Status, *Markers);
792     return false;
793   }
794 
795   std::string C2;
796   C2.reserve(C.size());
797 
798   for (size_t last = 0;; loc = C.find('\\', last)) {
799     if (loc == StringRef::npos || loc == C.size()) {
800       C2 += C.substr(last);
801       break;
802     }
803     C2 += C.substr(last, loc-last);
804     last = loc + 1;
805 
806     if (C[last] == '\n' || C[last] == '\r') {
807       ++last;
808 
809       // Escape \r\n  or \n\r, but not \n\n.
810       if (last < C.size())
811         if (C[last] == '\n' || C[last] == '\r')
812           if (C[last] != C[last-1])
813             ++last;
814     } else {
815       // This was just a normal backslash.
816       C2 += '\\';
817     }
818   }
819 
820   if (!C2.empty())
821     ParseDirective(C2, &ED, SM, &PP, CommentBegin, Status, *Markers);
822   return false;
823 }
824 
825 #ifndef NDEBUG
826 /// Lex the specified source file to determine whether it contains
827 /// any expected-* directives.  As a Lexer is used rather than a full-blown
828 /// Preprocessor, directives inside skipped #if blocks will still be found.
829 ///
830 /// \return true if any directives were found.
831 static bool findDirectives(SourceManager &SM, FileID FID,
832                            const LangOptions &LangOpts) {
833   // Create a raw lexer to pull all the comments out of FID.
834   if (FID.isInvalid())
835     return false;
836 
837   // Create a lexer to lex all the tokens of the main file in raw mode.
838   llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
839   Lexer RawLex(FID, FromFile, SM, LangOpts);
840 
841   // Return comments as tokens, this is how we find expected diagnostics.
842   RawLex.SetCommentRetentionState(true);
843 
844   Token Tok;
845   Tok.setKind(tok::comment);
846   VerifyDiagnosticConsumer::DirectiveStatus Status =
847     VerifyDiagnosticConsumer::HasNoDirectives;
848   while (Tok.isNot(tok::eof)) {
849     RawLex.LexFromRawLexer(Tok);
850     if (!Tok.is(tok::comment)) continue;
851 
852     std::string Comment = RawLex.getSpelling(Tok, SM, LangOpts);
853     if (Comment.empty()) continue;
854 
855     // We don't care about tracking markers for this phase.
856     VerifyDiagnosticConsumer::MarkerTracker Markers(SM.getDiagnostics());
857 
858     // Find first directive.
859     if (ParseDirective(Comment, nullptr, SM, nullptr, Tok.getLocation(),
860                        Status, Markers))
861       return true;
862   }
863   return false;
864 }
865 #endif // !NDEBUG
866 
867 /// Takes a list of diagnostics that have been generated but not matched
868 /// by an expected-* directive and produces a diagnostic to the user from this.
869 static unsigned PrintUnexpected(DiagnosticsEngine &Diags, SourceManager *SourceMgr,
870                                 const_diag_iterator diag_begin,
871                                 const_diag_iterator diag_end,
872                                 const char *Kind) {
873   if (diag_begin == diag_end) return 0;
874 
875   SmallString<256> Fmt;
876   llvm::raw_svector_ostream OS(Fmt);
877   for (const_diag_iterator I = diag_begin, E = diag_end; I != E; ++I) {
878     if (I->first.isInvalid() || !SourceMgr)
879       OS << "\n  (frontend)";
880     else {
881       OS << "\n ";
882       if (OptionalFileEntryRef File =
883               SourceMgr->getFileEntryRefForID(SourceMgr->getFileID(I->first)))
884         OS << " File " << File->getName();
885       OS << " Line " << SourceMgr->getPresumedLineNumber(I->first);
886     }
887     OS << ": " << I->second;
888   }
889 
890   std::string Prefix = *Diags.getDiagnosticOptions().VerifyPrefixes.begin();
891   std::string KindStr = Prefix + "-" + Kind;
892   Diags.Report(diag::err_verify_inconsistent_diags).setForceEmit()
893       << KindStr << /*Unexpected=*/true << OS.str();
894   return std::distance(diag_begin, diag_end);
895 }
896 
897 /// Takes a list of diagnostics that were expected to have been generated
898 /// but were not and produces a diagnostic to the user from this.
899 static unsigned PrintExpected(DiagnosticsEngine &Diags,
900                               SourceManager &SourceMgr,
901                               std::vector<Directive *> &DL, const char *Kind) {
902   if (DL.empty())
903     return 0;
904 
905   SmallString<256> Fmt;
906   llvm::raw_svector_ostream OS(Fmt);
907   for (const auto *D : DL) {
908     if (D->DiagnosticLoc.isInvalid() || D->MatchAnyFileAndLine)
909       OS << "\n  File *";
910     else
911       OS << "\n  File " << SourceMgr.getFilename(D->DiagnosticLoc);
912     if (D->MatchAnyLine)
913       OS << " Line *";
914     else
915       OS << " Line " << SourceMgr.getPresumedLineNumber(D->DiagnosticLoc);
916     if (D->DirectiveLoc != D->DiagnosticLoc)
917       OS << " (directive at "
918          << SourceMgr.getFilename(D->DirectiveLoc) << ':'
919          << SourceMgr.getPresumedLineNumber(D->DirectiveLoc) << ')';
920     OS << ": " << D->Text;
921   }
922 
923   std::string Prefix = *Diags.getDiagnosticOptions().VerifyPrefixes.begin();
924   std::string KindStr = Prefix + "-" + Kind;
925   Diags.Report(diag::err_verify_inconsistent_diags).setForceEmit()
926       << KindStr << /*Unexpected=*/false << OS.str();
927   return DL.size();
928 }
929 
930 /// Determine whether two source locations come from the same file.
931 static bool IsFromSameFile(SourceManager &SM, SourceLocation DirectiveLoc,
932                            SourceLocation DiagnosticLoc) {
933   while (DiagnosticLoc.isMacroID())
934     DiagnosticLoc = SM.getImmediateMacroCallerLoc(DiagnosticLoc);
935 
936   if (SM.isWrittenInSameFile(DirectiveLoc, DiagnosticLoc))
937     return true;
938 
939   const FileEntry *DiagFile = SM.getFileEntryForID(SM.getFileID(DiagnosticLoc));
940   if (!DiagFile && SM.isWrittenInMainFile(DirectiveLoc))
941     return true;
942 
943   return (DiagFile == SM.getFileEntryForID(SM.getFileID(DirectiveLoc)));
944 }
945 
946 /// CheckLists - Compare expected to seen diagnostic lists and return the
947 /// the difference between them.
948 static unsigned CheckLists(DiagnosticsEngine &Diags, SourceManager &SourceMgr,
949                            const char *Label,
950                            DirectiveList &Left,
951                            const_diag_iterator d2_begin,
952                            const_diag_iterator d2_end,
953                            bool IgnoreUnexpected) {
954   std::vector<Directive *> LeftOnly;
955   DiagList Right(d2_begin, d2_end);
956 
957   for (auto &Owner : Left) {
958     Directive &D = *Owner;
959     unsigned LineNo1 = SourceMgr.getPresumedLineNumber(D.DiagnosticLoc);
960 
961     for (unsigned i = 0; i < D.Max; ++i) {
962       DiagList::iterator II, IE;
963       for (II = Right.begin(), IE = Right.end(); II != IE; ++II) {
964         if (!D.MatchAnyLine) {
965           unsigned LineNo2 = SourceMgr.getPresumedLineNumber(II->first);
966           if (LineNo1 != LineNo2)
967             continue;
968         }
969 
970         if (!D.DiagnosticLoc.isInvalid() && !D.MatchAnyFileAndLine &&
971             !IsFromSameFile(SourceMgr, D.DiagnosticLoc, II->first))
972           continue;
973 
974         const std::string &RightText = II->second;
975         if (D.match(RightText))
976           break;
977       }
978       if (II == IE) {
979         // Not found.
980         if (i >= D.Min) break;
981         LeftOnly.push_back(&D);
982       } else {
983         // Found. The same cannot be found twice.
984         Right.erase(II);
985       }
986     }
987   }
988   // Now all that's left in Right are those that were not matched.
989   unsigned num = PrintExpected(Diags, SourceMgr, LeftOnly, Label);
990   if (!IgnoreUnexpected)
991     num += PrintUnexpected(Diags, &SourceMgr, Right.begin(), Right.end(), Label);
992   return num;
993 }
994 
995 /// CheckResults - This compares the expected results to those that
996 /// were actually reported. It emits any discrepencies. Return "true" if there
997 /// were problems. Return "false" otherwise.
998 static unsigned CheckResults(DiagnosticsEngine &Diags, SourceManager &SourceMgr,
999                              const TextDiagnosticBuffer &Buffer,
1000                              ExpectedData &ED) {
1001   // We want to capture the delta between what was expected and what was
1002   // seen.
1003   //
1004   //   Expected \ Seen - set expected but not seen
1005   //   Seen \ Expected - set seen but not expected
1006   unsigned NumProblems = 0;
1007 
1008   const DiagnosticLevelMask DiagMask =
1009     Diags.getDiagnosticOptions().getVerifyIgnoreUnexpected();
1010 
1011   // See if there are error mismatches.
1012   NumProblems += CheckLists(Diags, SourceMgr, "error", ED.Errors,
1013                             Buffer.err_begin(), Buffer.err_end(),
1014                             bool(DiagnosticLevelMask::Error & DiagMask));
1015 
1016   // See if there are warning mismatches.
1017   NumProblems += CheckLists(Diags, SourceMgr, "warning", ED.Warnings,
1018                             Buffer.warn_begin(), Buffer.warn_end(),
1019                             bool(DiagnosticLevelMask::Warning & DiagMask));
1020 
1021   // See if there are remark mismatches.
1022   NumProblems += CheckLists(Diags, SourceMgr, "remark", ED.Remarks,
1023                             Buffer.remark_begin(), Buffer.remark_end(),
1024                             bool(DiagnosticLevelMask::Remark & DiagMask));
1025 
1026   // See if there are note mismatches.
1027   NumProblems += CheckLists(Diags, SourceMgr, "note", ED.Notes,
1028                             Buffer.note_begin(), Buffer.note_end(),
1029                             bool(DiagnosticLevelMask::Note & DiagMask));
1030 
1031   return NumProblems;
1032 }
1033 
1034 void VerifyDiagnosticConsumer::UpdateParsedFileStatus(SourceManager &SM,
1035                                                       FileID FID,
1036                                                       ParsedStatus PS) {
1037   // Check SourceManager hasn't changed.
1038   setSourceManager(SM);
1039 
1040 #ifndef NDEBUG
1041   if (FID.isInvalid())
1042     return;
1043 
1044   OptionalFileEntryRef FE = SM.getFileEntryRefForID(FID);
1045 
1046   if (PS == IsParsed) {
1047     // Move the FileID from the unparsed set to the parsed set.
1048     UnparsedFiles.erase(FID);
1049     ParsedFiles.insert(std::make_pair(FID, FE ? &FE->getFileEntry() : nullptr));
1050   } else if (!ParsedFiles.count(FID) && !UnparsedFiles.count(FID)) {
1051     // Add the FileID to the unparsed set if we haven't seen it before.
1052 
1053     // Check for directives.
1054     bool FoundDirectives;
1055     if (PS == IsUnparsedNoDirectives)
1056       FoundDirectives = false;
1057     else
1058       FoundDirectives = !LangOpts || findDirectives(SM, FID, *LangOpts);
1059 
1060     // Add the FileID to the unparsed set.
1061     UnparsedFiles.insert(std::make_pair(FID,
1062                                       UnparsedFileStatus(FE, FoundDirectives)));
1063   }
1064 #endif
1065 }
1066 
1067 void VerifyDiagnosticConsumer::CheckDiagnostics() {
1068   // Ensure any diagnostics go to the primary client.
1069   DiagnosticConsumer *CurClient = Diags.getClient();
1070   std::unique_ptr<DiagnosticConsumer> Owner = Diags.takeClient();
1071   Diags.setClient(PrimaryClient, false);
1072 
1073 #ifndef NDEBUG
1074   // In a debug build, scan through any files that may have been missed
1075   // during parsing and issue a fatal error if directives are contained
1076   // within these files.  If a fatal error occurs, this suggests that
1077   // this file is being parsed separately from the main file, in which
1078   // case consider moving the directives to the correct place, if this
1079   // is applicable.
1080   if (!UnparsedFiles.empty()) {
1081     // Generate a cache of parsed FileEntry pointers for alias lookups.
1082     llvm::SmallPtrSet<const FileEntry *, 8> ParsedFileCache;
1083     for (const auto &I : ParsedFiles)
1084       if (const FileEntry *FE = I.second)
1085         ParsedFileCache.insert(FE);
1086 
1087     // Iterate through list of unparsed files.
1088     for (const auto &I : UnparsedFiles) {
1089       const UnparsedFileStatus &Status = I.second;
1090       OptionalFileEntryRef FE = Status.getFile();
1091 
1092       // Skip files that have been parsed via an alias.
1093       if (FE && ParsedFileCache.count(*FE))
1094         continue;
1095 
1096       // Report a fatal error if this file contained directives.
1097       if (Status.foundDirectives()) {
1098         llvm::report_fatal_error("-verify directives found after rather"
1099                                  " than during normal parsing of " +
1100                                  (FE ? FE->getName() : "(unknown)"));
1101       }
1102     }
1103 
1104     // UnparsedFiles has been processed now, so clear it.
1105     UnparsedFiles.clear();
1106   }
1107 #endif // !NDEBUG
1108 
1109   if (SrcManager) {
1110     // Produce an error if no expected-* directives could be found in the
1111     // source file(s) processed.
1112     if (Status == HasNoDirectives) {
1113       Diags.Report(diag::err_verify_no_directives).setForceEmit()
1114           << DetailedErrorString(Diags);
1115       ++NumErrors;
1116       Status = HasNoDirectivesReported;
1117     }
1118 
1119     // Check that the expected diagnostics occurred.
1120     NumErrors += CheckResults(Diags, *SrcManager, *Buffer, ED);
1121   } else {
1122     const DiagnosticLevelMask DiagMask =
1123         ~Diags.getDiagnosticOptions().getVerifyIgnoreUnexpected();
1124     if (bool(DiagnosticLevelMask::Error & DiagMask))
1125       NumErrors += PrintUnexpected(Diags, nullptr, Buffer->err_begin(),
1126                                    Buffer->err_end(), "error");
1127     if (bool(DiagnosticLevelMask::Warning & DiagMask))
1128       NumErrors += PrintUnexpected(Diags, nullptr, Buffer->warn_begin(),
1129                                    Buffer->warn_end(), "warn");
1130     if (bool(DiagnosticLevelMask::Remark & DiagMask))
1131       NumErrors += PrintUnexpected(Diags, nullptr, Buffer->remark_begin(),
1132                                    Buffer->remark_end(), "remark");
1133     if (bool(DiagnosticLevelMask::Note & DiagMask))
1134       NumErrors += PrintUnexpected(Diags, nullptr, Buffer->note_begin(),
1135                                    Buffer->note_end(), "note");
1136   }
1137 
1138   Diags.setClient(CurClient, Owner.release() != nullptr);
1139 
1140   // Reset the buffer, we have processed all the diagnostics in it.
1141   Buffer.reset(new TextDiagnosticBuffer());
1142   ED.Reset();
1143 }
1144 
1145 std::unique_ptr<Directive> Directive::create(bool RegexKind,
1146                                              SourceLocation DirectiveLoc,
1147                                              SourceLocation DiagnosticLoc,
1148                                              bool MatchAnyFileAndLine,
1149                                              bool MatchAnyLine, StringRef Text,
1150                                              unsigned Min, unsigned Max) {
1151   if (!RegexKind)
1152     return std::make_unique<StandardDirective>(DirectiveLoc, DiagnosticLoc,
1153                                                MatchAnyFileAndLine,
1154                                                MatchAnyLine, Text, Min, Max);
1155 
1156   // Parse the directive into a regular expression.
1157   std::string RegexStr;
1158   StringRef S = Text;
1159   while (!S.empty()) {
1160     if (S.consume_front("{{")) {
1161       size_t RegexMatchLength = S.find("}}");
1162       assert(RegexMatchLength != StringRef::npos);
1163       // Append the regex, enclosed in parentheses.
1164       RegexStr += "(";
1165       RegexStr.append(S.data(), RegexMatchLength);
1166       RegexStr += ")";
1167       S = S.drop_front(RegexMatchLength + 2);
1168     } else {
1169       size_t VerbatimMatchLength = S.find("{{");
1170       if (VerbatimMatchLength == StringRef::npos)
1171         VerbatimMatchLength = S.size();
1172       // Escape and append the fixed string.
1173       RegexStr += llvm::Regex::escape(S.substr(0, VerbatimMatchLength));
1174       S = S.drop_front(VerbatimMatchLength);
1175     }
1176   }
1177 
1178   return std::make_unique<RegexDirective>(DirectiveLoc, DiagnosticLoc,
1179                                           MatchAnyFileAndLine, MatchAnyLine,
1180                                           Text, Min, Max, RegexStr);
1181 }
1182