xref: /freebsd/contrib/llvm-project/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp (revision 9c77fb6aaa366cbabc80ee1b834bcfe4df135491)
1 //===- VerifyDiagnosticConsumer.cpp - Verifying Diagnostic Client ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a concrete diagnostic client, which buffers the diagnostic messages.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Frontend/VerifyDiagnosticConsumer.h"
14 #include "clang/Basic/CharInfo.h"
15 #include "clang/Basic/Diagnostic.h"
16 #include "clang/Basic/DiagnosticOptions.h"
17 #include "clang/Basic/LLVM.h"
18 #include "clang/Basic/SourceLocation.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "clang/Frontend/FrontendDiagnostic.h"
22 #include "clang/Frontend/TextDiagnosticBuffer.h"
23 #include "clang/Lex/HeaderSearch.h"
24 #include "clang/Lex/Lexer.h"
25 #include "clang/Lex/PPCallbacks.h"
26 #include "clang/Lex/Preprocessor.h"
27 #include "clang/Lex/Token.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/ADT/SmallPtrSet.h"
30 #include "llvm/ADT/SmallString.h"
31 #include "llvm/ADT/StringRef.h"
32 #include "llvm/ADT/Twine.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/Regex.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cassert>
38 #include <cstddef>
39 #include <cstring>
40 #include <iterator>
41 #include <memory>
42 #include <string>
43 #include <utility>
44 #include <vector>
45 
46 using namespace clang;
47 
48 using Directive = VerifyDiagnosticConsumer::Directive;
49 using DirectiveList = VerifyDiagnosticConsumer::DirectiveList;
50 using ExpectedData = VerifyDiagnosticConsumer::ExpectedData;
51 
52 #ifndef NDEBUG
53 
54 namespace {
55 
56 class VerifyFileTracker : public PPCallbacks {
57   VerifyDiagnosticConsumer &Verify;
58   SourceManager &SM;
59 
60 public:
61   VerifyFileTracker(VerifyDiagnosticConsumer &Verify, SourceManager &SM)
62       : Verify(Verify), SM(SM) {}
63 
64   /// Hook into the preprocessor and update the list of parsed
65   /// files when the preprocessor indicates a new file is entered.
66   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
67                    SrcMgr::CharacteristicKind FileType,
68                    FileID PrevFID) override {
69     Verify.UpdateParsedFileStatus(SM, SM.getFileID(Loc),
70                                   VerifyDiagnosticConsumer::IsParsed);
71   }
72 };
73 
74 } // namespace
75 
76 #endif
77 
78 //===----------------------------------------------------------------------===//
79 // Checking diagnostics implementation.
80 //===----------------------------------------------------------------------===//
81 
82 using DiagList = TextDiagnosticBuffer::DiagList;
83 using const_diag_iterator = TextDiagnosticBuffer::const_iterator;
84 
85 namespace {
86 
87 /// StandardDirective - Directive with string matching.
88 class StandardDirective : public Directive {
89 public:
90   StandardDirective(SourceLocation DirectiveLoc, SourceLocation DiagnosticLoc,
91                     StringRef Spelling, bool MatchAnyFileAndLine,
92                     bool MatchAnyLine, StringRef Text, unsigned Min,
93                     unsigned Max)
94       : Directive(DirectiveLoc, DiagnosticLoc, Spelling, MatchAnyFileAndLine,
95                   MatchAnyLine, Text, Min, Max) {}
96 
97   bool isValid(std::string &Error) override {
98     // all strings are considered valid; even empty ones
99     return true;
100   }
101 
102   bool match(StringRef S) override { return S.contains(Text); }
103 };
104 
105 /// RegexDirective - Directive with regular-expression matching.
106 class RegexDirective : public Directive {
107 public:
108   RegexDirective(SourceLocation DirectiveLoc, SourceLocation DiagnosticLoc,
109                  StringRef Spelling, bool MatchAnyFileAndLine,
110                  bool MatchAnyLine, StringRef Text, unsigned Min, unsigned Max,
111                  StringRef RegexStr)
112       : Directive(DirectiveLoc, DiagnosticLoc, Spelling, MatchAnyFileAndLine,
113                   MatchAnyLine, Text, Min, Max),
114         Regex(RegexStr) {}
115 
116   bool isValid(std::string &Error) override {
117     return Regex.isValid(Error);
118   }
119 
120   bool match(StringRef S) override {
121     return Regex.match(S);
122   }
123 
124 private:
125   llvm::Regex Regex;
126 };
127 
128 class ParseHelper
129 {
130 public:
131   ParseHelper(StringRef S)
132       : Begin(S.begin()), End(S.end()), C(Begin), P(Begin) {}
133 
134   // Return true if string literal is next.
135   bool Next(StringRef S) {
136     P = C;
137     PEnd = C + S.size();
138     if (PEnd > End)
139       return false;
140     return memcmp(P, S.data(), S.size()) == 0;
141   }
142 
143   // Return true if number is next.
144   // Output N only if number is next.
145   bool Next(unsigned &N) {
146     unsigned TMP = 0;
147     P = C;
148     PEnd = P;
149     for (; PEnd < End && *PEnd >= '0' && *PEnd <= '9'; ++PEnd) {
150       TMP *= 10;
151       TMP += *PEnd - '0';
152     }
153     if (PEnd == C)
154       return false;
155     N = TMP;
156     return true;
157   }
158 
159   // Return true if a marker is next.
160   // A marker is the longest match for /#[A-Za-z0-9_-]+/.
161   bool NextMarker() {
162     P = C;
163     if (P == End || *P != '#')
164       return false;
165     PEnd = P;
166     ++PEnd;
167     while ((isAlphanumeric(*PEnd) || *PEnd == '-' || *PEnd == '_') &&
168            PEnd < End)
169       ++PEnd;
170     return PEnd > P + 1;
171   }
172 
173   // Return true if string literal S is matched in content.
174   // When true, P marks begin-position of the match, and calling Advance sets C
175   // to end-position of the match.
176   // If S is the empty string, then search for any letter instead (makes sense
177   // with FinishDirectiveToken=true).
178   // If EnsureStartOfWord, then skip matches that don't start a new word.
179   // If FinishDirectiveToken, then assume the match is the start of a comment
180   // directive for -verify, and extend the match to include the entire first
181   // token of that directive.
182   bool Search(StringRef S, bool EnsureStartOfWord = false,
183               bool FinishDirectiveToken = false) {
184     do {
185       if (!S.empty()) {
186         P = std::search(C, End, S.begin(), S.end());
187         PEnd = P + S.size();
188       }
189       else {
190         P = C;
191         while (P != End && !isLetter(*P))
192           ++P;
193         PEnd = P + 1;
194       }
195       if (P == End)
196         break;
197       // If not start of word but required, skip and search again.
198       if (EnsureStartOfWord
199                // Check if string literal starts a new word.
200           && !(P == Begin || isWhitespace(P[-1])
201                // Or it could be preceded by the start of a comment.
202                || (P > (Begin + 1) && (P[-1] == '/' || P[-1] == '*')
203                                    &&  P[-2] == '/')))
204         continue;
205       if (FinishDirectiveToken) {
206         while (PEnd != End && (isAlphanumeric(*PEnd)
207                                || *PEnd == '-' || *PEnd == '_'))
208           ++PEnd;
209         // Put back trailing digits and hyphens to be parsed later as a count
210         // or count range.  Because -verify prefixes must start with letters,
211         // we know the actual directive we found starts with a letter, so
212         // we won't put back the entire directive word and thus record an empty
213         // string.
214         assert(isLetter(*P) && "-verify prefix must start with a letter");
215         while (isDigit(PEnd[-1]) || PEnd[-1] == '-')
216           --PEnd;
217       }
218       return true;
219     } while (Advance());
220     return false;
221   }
222 
223   // Return true if a CloseBrace that closes the OpenBrace at the current nest
224   // level is found. When true, P marks begin-position of CloseBrace.
225   bool SearchClosingBrace(StringRef OpenBrace, StringRef CloseBrace) {
226     unsigned Depth = 1;
227     P = C;
228     while (P < End) {
229       StringRef S(P, End - P);
230       if (S.starts_with(OpenBrace)) {
231         ++Depth;
232         P += OpenBrace.size();
233       } else if (S.starts_with(CloseBrace)) {
234         --Depth;
235         if (Depth == 0) {
236           PEnd = P + CloseBrace.size();
237           return true;
238         }
239         P += CloseBrace.size();
240       } else {
241         ++P;
242       }
243     }
244     return false;
245   }
246 
247   // Advance 1-past previous next/search.
248   // Behavior is undefined if previous next/search failed.
249   bool Advance() {
250     C = PEnd;
251     return C < End;
252   }
253 
254   // Return the text matched by the previous next/search.
255   // Behavior is undefined if previous next/search failed.
256   StringRef Match() { return StringRef(P, PEnd - P); }
257 
258   // Skip zero or more whitespace.
259   void SkipWhitespace() {
260     for (; C < End && isWhitespace(*C); ++C)
261       ;
262   }
263 
264   // Return true if EOF reached.
265   bool Done() {
266     return !(C < End);
267   }
268 
269   // Beginning of expected content.
270   const char * const Begin;
271 
272   // End of expected content (1-past).
273   const char * const End;
274 
275   // Position of next char in content.
276   const char *C;
277 
278   // Previous next/search subject start.
279   const char *P;
280 
281 private:
282   // Previous next/search subject end (1-past).
283   const char *PEnd = nullptr;
284 };
285 
286 // The information necessary to create a directive.
287 struct UnattachedDirective {
288   DirectiveList *DL = nullptr;
289   std::string Spelling;
290   bool RegexKind = false;
291   SourceLocation DirectivePos, ContentBegin;
292   std::string Text;
293   unsigned Min = 1, Max = 1;
294 };
295 
296 // Attach the specified directive to the line of code indicated by
297 // \p ExpectedLoc.
298 void attachDirective(DiagnosticsEngine &Diags, const UnattachedDirective &UD,
299                      SourceLocation ExpectedLoc,
300                      bool MatchAnyFileAndLine = false,
301                      bool MatchAnyLine = false) {
302   // Construct new directive.
303   std::unique_ptr<Directive> D = Directive::create(
304       UD.RegexKind, UD.DirectivePos, ExpectedLoc, UD.Spelling,
305       MatchAnyFileAndLine, MatchAnyLine, UD.Text, UD.Min, UD.Max);
306 
307   std::string Error;
308   if (!D->isValid(Error)) {
309     Diags.Report(UD.ContentBegin, diag::err_verify_invalid_content)
310       << (UD.RegexKind ? "regex" : "string") << Error;
311   }
312 
313   UD.DL->push_back(std::move(D));
314 }
315 
316 } // anonymous
317 
318 // Tracker for markers in the input files. A marker is a comment of the form
319 //
320 //   n = 123; // #123
321 //
322 // ... that can be referred to by a later expected-* directive:
323 //
324 //   // expected-error@#123 {{undeclared identifier 'n'}}
325 //
326 // Marker declarations must be at the start of a comment or preceded by
327 // whitespace to distinguish them from uses of markers in directives.
328 class VerifyDiagnosticConsumer::MarkerTracker {
329   DiagnosticsEngine &Diags;
330 
331   struct Marker {
332     SourceLocation DefLoc;
333     SourceLocation RedefLoc;
334     SourceLocation UseLoc;
335   };
336   llvm::StringMap<Marker> Markers;
337 
338   // Directives that couldn't be created yet because they name an unknown
339   // marker.
340   llvm::StringMap<llvm::SmallVector<UnattachedDirective, 2>> DeferredDirectives;
341 
342 public:
343   MarkerTracker(DiagnosticsEngine &Diags) : Diags(Diags) {}
344 
345   // Register a marker.
346   void addMarker(StringRef MarkerName, SourceLocation Pos) {
347     auto InsertResult = Markers.insert(
348         {MarkerName, Marker{Pos, SourceLocation(), SourceLocation()}});
349 
350     Marker &M = InsertResult.first->second;
351     if (!InsertResult.second) {
352       // Marker was redefined.
353       M.RedefLoc = Pos;
354     } else {
355       // First definition: build any deferred directives.
356       auto Deferred = DeferredDirectives.find(MarkerName);
357       if (Deferred != DeferredDirectives.end()) {
358         for (auto &UD : Deferred->second) {
359           if (M.UseLoc.isInvalid())
360             M.UseLoc = UD.DirectivePos;
361           attachDirective(Diags, UD, Pos);
362         }
363         DeferredDirectives.erase(Deferred);
364       }
365     }
366   }
367 
368   // Register a directive at the specified marker.
369   void addDirective(StringRef MarkerName, const UnattachedDirective &UD) {
370     auto MarkerIt = Markers.find(MarkerName);
371     if (MarkerIt != Markers.end()) {
372       Marker &M = MarkerIt->second;
373       if (M.UseLoc.isInvalid())
374         M.UseLoc = UD.DirectivePos;
375       return attachDirective(Diags, UD, M.DefLoc);
376     }
377     DeferredDirectives[MarkerName].push_back(UD);
378   }
379 
380   // Ensure we have no remaining deferred directives, and no
381   // multiply-defined-and-used markers.
382   void finalize() {
383     for (auto &MarkerInfo : Markers) {
384       StringRef Name = MarkerInfo.first();
385       Marker &M = MarkerInfo.second;
386       if (M.RedefLoc.isValid() && M.UseLoc.isValid()) {
387         Diags.Report(M.UseLoc, diag::err_verify_ambiguous_marker) << Name;
388         Diags.Report(M.DefLoc, diag::note_verify_ambiguous_marker) << Name;
389         Diags.Report(M.RedefLoc, diag::note_verify_ambiguous_marker) << Name;
390       }
391     }
392 
393     for (auto &DeferredPair : DeferredDirectives) {
394       Diags.Report(DeferredPair.second.front().DirectivePos,
395                    diag::err_verify_no_such_marker)
396           << DeferredPair.first();
397     }
398   }
399 };
400 
401 static std::string DetailedErrorString(const DiagnosticsEngine &Diags) {
402   if (Diags.getDiagnosticOptions().VerifyPrefixes.empty())
403     return "expected";
404   return *Diags.getDiagnosticOptions().VerifyPrefixes.begin();
405 }
406 
407 /// ParseDirective - Go through the comment and see if it indicates expected
408 /// diagnostics. If so, then put them in the appropriate directive list.
409 ///
410 /// Returns true if any valid directives were found.
411 static bool ParseDirective(StringRef S, ExpectedData *ED, SourceManager &SM,
412                            Preprocessor *PP, SourceLocation Pos,
413                            VerifyDiagnosticConsumer::ParsingState &State,
414                            VerifyDiagnosticConsumer::MarkerTracker &Markers) {
415   DiagnosticsEngine &Diags = PP ? PP->getDiagnostics() : SM.getDiagnostics();
416 
417   // First, scan the comment looking for markers.
418   for (ParseHelper PH(S); !PH.Done();) {
419     if (!PH.Search("#", true))
420       break;
421     PH.C = PH.P;
422     if (!PH.NextMarker()) {
423       PH.Next("#");
424       PH.Advance();
425       continue;
426     }
427     PH.Advance();
428     Markers.addMarker(PH.Match(), Pos);
429   }
430 
431   // A single comment may contain multiple directives.
432   bool FoundDirective = false;
433   for (ParseHelper PH(S); !PH.Done();) {
434     // Search for the initial directive token.
435     // If one prefix, save time by searching only for its directives.
436     // Otherwise, search for any potential directive token and check it later.
437     const auto &Prefixes = Diags.getDiagnosticOptions().VerifyPrefixes;
438     if (!(Prefixes.size() == 1 ? PH.Search(*Prefixes.begin(), true, true)
439                                : PH.Search("", true, true)))
440       break;
441 
442     StringRef DToken = PH.Match();
443     PH.Advance();
444 
445     UnattachedDirective D;
446     D.Spelling = DToken;
447     // Default directive kind.
448     const char *KindStr = "string";
449 
450     // Parse the initial directive token in reverse so we can easily determine
451     // its exact actual prefix.  If we were to parse it from the front instead,
452     // it would be harder to determine where the prefix ends because there
453     // might be multiple matching -verify prefixes because some might prefix
454     // others.
455 
456     // Regex in initial directive token: -re
457     if (DToken.consume_back("-re")) {
458       D.RegexKind = true;
459       KindStr = "regex";
460     }
461 
462     // Type in initial directive token: -{error|warning|note|no-diagnostics}
463     bool NoDiag = false;
464     StringRef DType;
465     if (DToken.ends_with(DType = "-error"))
466       D.DL = ED ? &ED->Errors : nullptr;
467     else if (DToken.ends_with(DType = "-warning"))
468       D.DL = ED ? &ED->Warnings : nullptr;
469     else if (DToken.ends_with(DType = "-remark"))
470       D.DL = ED ? &ED->Remarks : nullptr;
471     else if (DToken.ends_with(DType = "-note"))
472       D.DL = ED ? &ED->Notes : nullptr;
473     else if (DToken.ends_with(DType = "-no-diagnostics")) {
474       NoDiag = true;
475       if (D.RegexKind)
476         continue;
477     } else
478       continue;
479     DToken = DToken.substr(0, DToken.size()-DType.size());
480 
481     // What's left in DToken is the actual prefix.  That might not be a -verify
482     // prefix even if there is only one -verify prefix (for example, the full
483     // DToken is foo-bar-warning, but foo is the only -verify prefix).
484     if (!llvm::binary_search(Prefixes, DToken))
485       continue;
486 
487     if (NoDiag) {
488       if (State.Status ==
489           VerifyDiagnosticConsumer::HasOtherExpectedDirectives) {
490         Diags.Report(Pos, diag::err_verify_invalid_no_diags)
491             << D.Spelling << /*IsExpectedNoDiagnostics=*/true;
492       } else if (State.Status !=
493                  VerifyDiagnosticConsumer::HasExpectedNoDiagnostics) {
494         State.Status = VerifyDiagnosticConsumer::HasExpectedNoDiagnostics;
495         State.FirstNoDiagnosticsDirective = D.Spelling;
496       }
497       continue;
498     }
499     if (State.Status == VerifyDiagnosticConsumer::HasExpectedNoDiagnostics) {
500       Diags.Report(Pos, diag::err_verify_invalid_no_diags)
501           << D.Spelling << /*IsExpectedNoDiagnostics=*/false
502           << State.FirstNoDiagnosticsDirective;
503       continue;
504     }
505     State.Status = VerifyDiagnosticConsumer::HasOtherExpectedDirectives;
506 
507     // If a directive has been found but we're not interested
508     // in storing the directive information, return now.
509     if (!D.DL)
510       return true;
511 
512     // Next optional token: @
513     SourceLocation ExpectedLoc;
514     StringRef Marker;
515     bool MatchAnyFileAndLine = false;
516     bool MatchAnyLine = false;
517     if (!PH.Next("@")) {
518       ExpectedLoc = Pos;
519     } else {
520       PH.Advance();
521       unsigned Line = 0;
522       bool FoundPlus = PH.Next("+");
523       if (FoundPlus || PH.Next("-")) {
524         // Relative to current line.
525         PH.Advance();
526         bool Invalid = false;
527         unsigned ExpectedLine = SM.getSpellingLineNumber(Pos, &Invalid);
528         if (!Invalid && PH.Next(Line) && (FoundPlus || Line < ExpectedLine)) {
529           if (FoundPlus) ExpectedLine += Line;
530           else ExpectedLine -= Line;
531           ExpectedLoc = SM.translateLineCol(SM.getFileID(Pos), ExpectedLine, 1);
532         }
533       } else if (PH.Next(Line)) {
534         // Absolute line number.
535         if (Line > 0)
536           ExpectedLoc = SM.translateLineCol(SM.getFileID(Pos), Line, 1);
537       } else if (PH.NextMarker()) {
538         Marker = PH.Match();
539       } else if (PP && PH.Search(":")) {
540         // Specific source file.
541         StringRef Filename(PH.C, PH.P-PH.C);
542         PH.Advance();
543 
544         if (Filename == "*") {
545           MatchAnyFileAndLine = true;
546           if (!PH.Next("*")) {
547             Diags.Report(Pos.getLocWithOffset(PH.C - PH.Begin),
548                          diag::err_verify_missing_line)
549                 << "'*'";
550             continue;
551           }
552           MatchAnyLine = true;
553           ExpectedLoc = SourceLocation();
554         } else {
555           // Lookup file via Preprocessor, like a #include.
556           OptionalFileEntryRef File =
557               PP->LookupFile(Pos, Filename, false, nullptr, nullptr, nullptr,
558                              nullptr, nullptr, nullptr, nullptr, nullptr);
559           if (!File) {
560             Diags.Report(Pos.getLocWithOffset(PH.C - PH.Begin),
561                          diag::err_verify_missing_file)
562                 << Filename << KindStr;
563             continue;
564           }
565 
566           FileID FID = SM.translateFile(*File);
567           if (FID.isInvalid())
568             FID = SM.createFileID(*File, Pos, SrcMgr::C_User);
569 
570           if (PH.Next(Line) && Line > 0)
571             ExpectedLoc = SM.translateLineCol(FID, Line, 1);
572           else if (PH.Next("*")) {
573             MatchAnyLine = true;
574             ExpectedLoc = SM.translateLineCol(FID, 1, 1);
575           }
576         }
577       } else if (PH.Next("*")) {
578         MatchAnyLine = true;
579         ExpectedLoc = SourceLocation();
580       }
581 
582       if (ExpectedLoc.isInvalid() && !MatchAnyLine && Marker.empty()) {
583         Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
584                      diag::err_verify_missing_line) << KindStr;
585         continue;
586       }
587       PH.Advance();
588     }
589 
590     // Skip optional whitespace.
591     PH.SkipWhitespace();
592 
593     // Next optional token: positive integer or a '+'.
594     if (PH.Next(D.Min)) {
595       PH.Advance();
596       // A positive integer can be followed by a '+' meaning min
597       // or more, or by a '-' meaning a range from min to max.
598       if (PH.Next("+")) {
599         D.Max = Directive::MaxCount;
600         PH.Advance();
601       } else if (PH.Next("-")) {
602         PH.Advance();
603         if (!PH.Next(D.Max) || D.Max < D.Min) {
604           Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
605                        diag::err_verify_invalid_range) << KindStr;
606           continue;
607         }
608         PH.Advance();
609       } else {
610         D.Max = D.Min;
611       }
612     } else if (PH.Next("+")) {
613       // '+' on its own means "1 or more".
614       D.Max = Directive::MaxCount;
615       PH.Advance();
616     }
617 
618     // Skip optional whitespace.
619     PH.SkipWhitespace();
620 
621     // Next token: {{
622     if (!PH.Next("{{")) {
623       Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
624                    diag::err_verify_missing_start) << KindStr;
625       continue;
626     }
627     llvm::SmallString<8> CloseBrace("}}");
628     const char *const DelimBegin = PH.C;
629     PH.Advance();
630     // Count the number of opening braces for `string` kinds
631     for (; !D.RegexKind && PH.Next("{"); PH.Advance())
632       CloseBrace += '}';
633     const char* const ContentBegin = PH.C; // mark content begin
634     // Search for closing brace
635     StringRef OpenBrace(DelimBegin, ContentBegin - DelimBegin);
636     if (!PH.SearchClosingBrace(OpenBrace, CloseBrace)) {
637       Diags.Report(Pos.getLocWithOffset(PH.C - PH.Begin),
638                    diag::err_verify_missing_end)
639           << KindStr << CloseBrace;
640       continue;
641     }
642     const char* const ContentEnd = PH.P; // mark content end
643     PH.Advance();
644 
645     D.DirectivePos = Pos;
646     D.ContentBegin = Pos.getLocWithOffset(ContentBegin - PH.Begin);
647 
648     // Build directive text; convert \n to newlines.
649     StringRef NewlineStr = "\\n";
650     StringRef Content(ContentBegin, ContentEnd-ContentBegin);
651     size_t CPos = 0;
652     size_t FPos;
653     while ((FPos = Content.find(NewlineStr, CPos)) != StringRef::npos) {
654       D.Text += Content.substr(CPos, FPos-CPos);
655       D.Text += '\n';
656       CPos = FPos + NewlineStr.size();
657     }
658     if (D.Text.empty())
659       D.Text.assign(ContentBegin, ContentEnd);
660 
661     // Check that regex directives contain at least one regex.
662     if (D.RegexKind && D.Text.find("{{") == StringRef::npos) {
663       Diags.Report(D.ContentBegin, diag::err_verify_missing_regex) << D.Text;
664       return false;
665     }
666 
667     if (Marker.empty())
668       attachDirective(Diags, D, ExpectedLoc, MatchAnyFileAndLine, MatchAnyLine);
669     else
670       Markers.addDirective(Marker, D);
671     FoundDirective = true;
672   }
673 
674   return FoundDirective;
675 }
676 
677 VerifyDiagnosticConsumer::VerifyDiagnosticConsumer(DiagnosticsEngine &Diags_)
678     : Diags(Diags_), PrimaryClient(Diags.getClient()),
679       PrimaryClientOwner(Diags.takeClient()),
680       Buffer(new TextDiagnosticBuffer()), Markers(new MarkerTracker(Diags)),
681       State{HasNoDirectives, {}} {
682   if (Diags.hasSourceManager())
683     setSourceManager(Diags.getSourceManager());
684 }
685 
686 VerifyDiagnosticConsumer::~VerifyDiagnosticConsumer() {
687   assert(!ActiveSourceFiles && "Incomplete parsing of source files!");
688   assert(!CurrentPreprocessor && "CurrentPreprocessor should be invalid!");
689   SrcManager = nullptr;
690   CheckDiagnostics();
691   assert(!Diags.ownsClient() &&
692          "The VerifyDiagnosticConsumer takes over ownership of the client!");
693 }
694 
695 // DiagnosticConsumer interface.
696 
697 void VerifyDiagnosticConsumer::BeginSourceFile(const LangOptions &LangOpts,
698                                                const Preprocessor *PP) {
699   // Attach comment handler on first invocation.
700   if (++ActiveSourceFiles == 1) {
701     if (PP) {
702       CurrentPreprocessor = PP;
703       this->LangOpts = &LangOpts;
704       setSourceManager(PP->getSourceManager());
705       const_cast<Preprocessor *>(PP)->addCommentHandler(this);
706 #ifndef NDEBUG
707       // Debug build tracks parsed files.
708       const_cast<Preprocessor *>(PP)->addPPCallbacks(
709                       std::make_unique<VerifyFileTracker>(*this, *SrcManager));
710 #endif
711     }
712   }
713 
714   assert((!PP || CurrentPreprocessor == PP) && "Preprocessor changed!");
715   PrimaryClient->BeginSourceFile(LangOpts, PP);
716 }
717 
718 void VerifyDiagnosticConsumer::EndSourceFile() {
719   assert(ActiveSourceFiles && "No active source files!");
720   PrimaryClient->EndSourceFile();
721 
722   // Detach comment handler once last active source file completed.
723   if (--ActiveSourceFiles == 0) {
724     if (CurrentPreprocessor)
725       const_cast<Preprocessor *>(CurrentPreprocessor)->
726           removeCommentHandler(this);
727 
728     // Diagnose any used-but-not-defined markers.
729     Markers->finalize();
730 
731     // Check diagnostics once last file completed.
732     CheckDiagnostics();
733     CurrentPreprocessor = nullptr;
734     LangOpts = nullptr;
735   }
736 }
737 
738 void VerifyDiagnosticConsumer::HandleDiagnostic(
739       DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info) {
740   if (Info.hasSourceManager()) {
741     // If this diagnostic is for a different source manager, ignore it.
742     if (SrcManager && &Info.getSourceManager() != SrcManager)
743       return;
744 
745     setSourceManager(Info.getSourceManager());
746   }
747 
748 #ifndef NDEBUG
749   // Debug build tracks unparsed files for possible
750   // unparsed expected-* directives.
751   if (SrcManager) {
752     SourceLocation Loc = Info.getLocation();
753     if (Loc.isValid()) {
754       ParsedStatus PS = IsUnparsed;
755 
756       Loc = SrcManager->getExpansionLoc(Loc);
757       FileID FID = SrcManager->getFileID(Loc);
758 
759       auto FE = SrcManager->getFileEntryRefForID(FID);
760       if (FE && CurrentPreprocessor && SrcManager->isLoadedFileID(FID)) {
761         // If the file is a modules header file it shall not be parsed
762         // for expected-* directives.
763         HeaderSearch &HS = CurrentPreprocessor->getHeaderSearchInfo();
764         if (HS.findModuleForHeader(*FE))
765           PS = IsUnparsedNoDirectives;
766       }
767 
768       UpdateParsedFileStatus(*SrcManager, FID, PS);
769     }
770   }
771 #endif
772 
773   // Send the diagnostic to the buffer, we will check it once we reach the end
774   // of the source file (or are destructed).
775   Buffer->HandleDiagnostic(DiagLevel, Info);
776 }
777 
778 /// HandleComment - Hook into the preprocessor and extract comments containing
779 ///  expected errors and warnings.
780 bool VerifyDiagnosticConsumer::HandleComment(Preprocessor &PP,
781                                              SourceRange Comment) {
782   SourceManager &SM = PP.getSourceManager();
783 
784   // If this comment is for a different source manager, ignore it.
785   if (SrcManager && &SM != SrcManager)
786     return false;
787 
788   SourceLocation CommentBegin = Comment.getBegin();
789 
790   const char *CommentRaw = SM.getCharacterData(CommentBegin);
791   StringRef C(CommentRaw, SM.getCharacterData(Comment.getEnd()) - CommentRaw);
792 
793   if (C.empty())
794     return false;
795 
796   // Fold any "\<EOL>" sequences
797   size_t loc = C.find('\\');
798   if (loc == StringRef::npos) {
799     ParseDirective(C, &ED, SM, &PP, CommentBegin, State, *Markers);
800     return false;
801   }
802 
803   std::string C2;
804   C2.reserve(C.size());
805 
806   for (size_t last = 0;; loc = C.find('\\', last)) {
807     if (loc == StringRef::npos || loc == C.size()) {
808       C2 += C.substr(last);
809       break;
810     }
811     C2 += C.substr(last, loc-last);
812     last = loc + 1;
813 
814     if (last < C.size() && (C[last] == '\n' || C[last] == '\r')) {
815       ++last;
816 
817       // Escape \r\n  or \n\r, but not \n\n.
818       if (last < C.size())
819         if (C[last] == '\n' || C[last] == '\r')
820           if (C[last] != C[last-1])
821             ++last;
822     } else {
823       // This was just a normal backslash.
824       C2 += '\\';
825     }
826   }
827 
828   if (!C2.empty())
829     ParseDirective(C2, &ED, SM, &PP, CommentBegin, State, *Markers);
830   return false;
831 }
832 
833 #ifndef NDEBUG
834 /// Lex the specified source file to determine whether it contains
835 /// any expected-* directives.  As a Lexer is used rather than a full-blown
836 /// Preprocessor, directives inside skipped #if blocks will still be found.
837 ///
838 /// \return true if any directives were found.
839 static bool findDirectives(SourceManager &SM, FileID FID,
840                            const LangOptions &LangOpts) {
841   // Create a raw lexer to pull all the comments out of FID.
842   if (FID.isInvalid())
843     return false;
844 
845   // Create a lexer to lex all the tokens of the main file in raw mode.
846   llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
847   Lexer RawLex(FID, FromFile, SM, LangOpts);
848 
849   // Return comments as tokens, this is how we find expected diagnostics.
850   RawLex.SetCommentRetentionState(true);
851 
852   Token Tok;
853   Tok.setKind(tok::comment);
854   VerifyDiagnosticConsumer::ParsingState State = {
855       VerifyDiagnosticConsumer::HasNoDirectives, {}};
856   while (Tok.isNot(tok::eof)) {
857     RawLex.LexFromRawLexer(Tok);
858     if (!Tok.is(tok::comment)) continue;
859 
860     std::string Comment = RawLex.getSpelling(Tok, SM, LangOpts);
861     if (Comment.empty()) continue;
862 
863     // We don't care about tracking markers for this phase.
864     VerifyDiagnosticConsumer::MarkerTracker Markers(SM.getDiagnostics());
865 
866     // Find first directive.
867     if (ParseDirective(Comment, nullptr, SM, nullptr, Tok.getLocation(), State,
868                        Markers))
869       return true;
870   }
871   return false;
872 }
873 #endif // !NDEBUG
874 
875 /// Takes a list of diagnostics that have been generated but not matched
876 /// by an expected-* directive and produces a diagnostic to the user from this.
877 static unsigned PrintUnexpected(DiagnosticsEngine &Diags, SourceManager *SourceMgr,
878                                 const_diag_iterator diag_begin,
879                                 const_diag_iterator diag_end,
880                                 const char *Kind) {
881   if (diag_begin == diag_end) return 0;
882 
883   SmallString<256> Fmt;
884   llvm::raw_svector_ostream OS(Fmt);
885   for (const_diag_iterator I = diag_begin, E = diag_end; I != E; ++I) {
886     if (I->first.isInvalid() || !SourceMgr)
887       OS << "\n  (frontend)";
888     else {
889       OS << "\n ";
890       if (OptionalFileEntryRef File =
891               SourceMgr->getFileEntryRefForID(SourceMgr->getFileID(I->first)))
892         OS << " File " << File->getName();
893       OS << " Line " << SourceMgr->getPresumedLineNumber(I->first);
894     }
895     OS << ": " << I->second;
896   }
897 
898   const bool IsSinglePrefix =
899       Diags.getDiagnosticOptions().VerifyPrefixes.size() == 1;
900   std::string Prefix = *Diags.getDiagnosticOptions().VerifyPrefixes.begin();
901   Diags.Report(diag::err_verify_inconsistent_diags).setForceEmit()
902       << IsSinglePrefix << Prefix << Kind << /*Unexpected=*/true << OS.str();
903   return std::distance(diag_begin, diag_end);
904 }
905 
906 /// Takes a list of diagnostics that were expected to have been generated
907 /// but were not and produces a diagnostic to the user from this.
908 static unsigned PrintExpected(DiagnosticsEngine &Diags,
909                               SourceManager &SourceMgr,
910                               std::vector<Directive *> &DL, const char *Kind) {
911   if (DL.empty())
912     return 0;
913 
914   const bool IsSinglePrefix =
915       Diags.getDiagnosticOptions().VerifyPrefixes.size() == 1;
916 
917   SmallString<256> Fmt;
918   llvm::raw_svector_ostream OS(Fmt);
919   for (const auto *D : DL) {
920     if (D->DiagnosticLoc.isInvalid() || D->MatchAnyFileAndLine)
921       OS << "\n  File *";
922     else
923       OS << "\n  File " << SourceMgr.getFilename(D->DiagnosticLoc);
924     if (D->MatchAnyLine)
925       OS << " Line *";
926     else
927       OS << " Line " << SourceMgr.getPresumedLineNumber(D->DiagnosticLoc);
928     if (D->DirectiveLoc != D->DiagnosticLoc)
929       OS << " (directive at "
930          << SourceMgr.getFilename(D->DirectiveLoc) << ':'
931          << SourceMgr.getPresumedLineNumber(D->DirectiveLoc) << ')';
932     if (!IsSinglePrefix)
933       OS << " \'" << D->Spelling << '\'';
934     OS << ": " << D->Text;
935   }
936 
937   std::string Prefix = *Diags.getDiagnosticOptions().VerifyPrefixes.begin();
938   Diags.Report(diag::err_verify_inconsistent_diags).setForceEmit()
939       << IsSinglePrefix << Prefix << Kind << /*Unexpected=*/false << OS.str();
940   return DL.size();
941 }
942 
943 /// Determine whether two source locations come from the same file.
944 static bool IsFromSameFile(SourceManager &SM, SourceLocation DirectiveLoc,
945                            SourceLocation DiagnosticLoc) {
946   while (DiagnosticLoc.isMacroID())
947     DiagnosticLoc = SM.getImmediateMacroCallerLoc(DiagnosticLoc);
948 
949   if (SM.isWrittenInSameFile(DirectiveLoc, DiagnosticLoc))
950     return true;
951 
952   const FileEntry *DiagFile = SM.getFileEntryForID(SM.getFileID(DiagnosticLoc));
953   if (!DiagFile && SM.isWrittenInMainFile(DirectiveLoc))
954     return true;
955 
956   return (DiagFile == SM.getFileEntryForID(SM.getFileID(DirectiveLoc)));
957 }
958 
959 /// CheckLists - Compare expected to seen diagnostic lists and return the
960 /// the difference between them.
961 static unsigned CheckLists(DiagnosticsEngine &Diags, SourceManager &SourceMgr,
962                            const char *Label,
963                            DirectiveList &Left,
964                            const_diag_iterator d2_begin,
965                            const_diag_iterator d2_end,
966                            bool IgnoreUnexpected) {
967   std::vector<Directive *> LeftOnly;
968   DiagList Right(d2_begin, d2_end);
969 
970   for (auto &Owner : Left) {
971     Directive &D = *Owner;
972     unsigned LineNo1 = SourceMgr.getPresumedLineNumber(D.DiagnosticLoc);
973 
974     for (unsigned i = 0; i < D.Max; ++i) {
975       DiagList::iterator II, IE;
976       for (II = Right.begin(), IE = Right.end(); II != IE; ++II) {
977         if (!D.MatchAnyLine) {
978           unsigned LineNo2 = SourceMgr.getPresumedLineNumber(II->first);
979           if (LineNo1 != LineNo2)
980             continue;
981         }
982 
983         if (!D.DiagnosticLoc.isInvalid() && !D.MatchAnyFileAndLine &&
984             !IsFromSameFile(SourceMgr, D.DiagnosticLoc, II->first))
985           continue;
986 
987         const std::string &RightText = II->second;
988         if (D.match(RightText))
989           break;
990       }
991       if (II == IE) {
992         // Not found.
993         if (i >= D.Min) break;
994         LeftOnly.push_back(&D);
995       } else {
996         // Found. The same cannot be found twice.
997         Right.erase(II);
998       }
999     }
1000   }
1001   // Now all that's left in Right are those that were not matched.
1002   unsigned num = PrintExpected(Diags, SourceMgr, LeftOnly, Label);
1003   if (!IgnoreUnexpected)
1004     num += PrintUnexpected(Diags, &SourceMgr, Right.begin(), Right.end(), Label);
1005   return num;
1006 }
1007 
1008 /// CheckResults - This compares the expected results to those that
1009 /// were actually reported. It emits any discrepencies. Return "true" if there
1010 /// were problems. Return "false" otherwise.
1011 static unsigned CheckResults(DiagnosticsEngine &Diags, SourceManager &SourceMgr,
1012                              const TextDiagnosticBuffer &Buffer,
1013                              ExpectedData &ED) {
1014   // We want to capture the delta between what was expected and what was
1015   // seen.
1016   //
1017   //   Expected \ Seen - set expected but not seen
1018   //   Seen \ Expected - set seen but not expected
1019   unsigned NumProblems = 0;
1020 
1021   const DiagnosticLevelMask DiagMask =
1022     Diags.getDiagnosticOptions().getVerifyIgnoreUnexpected();
1023 
1024   // See if there are error mismatches.
1025   NumProblems += CheckLists(Diags, SourceMgr, "error", ED.Errors,
1026                             Buffer.err_begin(), Buffer.err_end(),
1027                             bool(DiagnosticLevelMask::Error & DiagMask));
1028 
1029   // See if there are warning mismatches.
1030   NumProblems += CheckLists(Diags, SourceMgr, "warning", ED.Warnings,
1031                             Buffer.warn_begin(), Buffer.warn_end(),
1032                             bool(DiagnosticLevelMask::Warning & DiagMask));
1033 
1034   // See if there are remark mismatches.
1035   NumProblems += CheckLists(Diags, SourceMgr, "remark", ED.Remarks,
1036                             Buffer.remark_begin(), Buffer.remark_end(),
1037                             bool(DiagnosticLevelMask::Remark & DiagMask));
1038 
1039   // See if there are note mismatches.
1040   NumProblems += CheckLists(Diags, SourceMgr, "note", ED.Notes,
1041                             Buffer.note_begin(), Buffer.note_end(),
1042                             bool(DiagnosticLevelMask::Note & DiagMask));
1043 
1044   return NumProblems;
1045 }
1046 
1047 void VerifyDiagnosticConsumer::UpdateParsedFileStatus(SourceManager &SM,
1048                                                       FileID FID,
1049                                                       ParsedStatus PS) {
1050   // Check SourceManager hasn't changed.
1051   setSourceManager(SM);
1052 
1053 #ifndef NDEBUG
1054   if (FID.isInvalid())
1055     return;
1056 
1057   OptionalFileEntryRef FE = SM.getFileEntryRefForID(FID);
1058 
1059   if (PS == IsParsed) {
1060     // Move the FileID from the unparsed set to the parsed set.
1061     UnparsedFiles.erase(FID);
1062     ParsedFiles.insert(std::make_pair(FID, FE ? &FE->getFileEntry() : nullptr));
1063   } else if (!ParsedFiles.count(FID) && !UnparsedFiles.count(FID)) {
1064     // Add the FileID to the unparsed set if we haven't seen it before.
1065 
1066     // Check for directives.
1067     bool FoundDirectives;
1068     if (PS == IsUnparsedNoDirectives)
1069       FoundDirectives = false;
1070     else
1071       FoundDirectives = !LangOpts || findDirectives(SM, FID, *LangOpts);
1072 
1073     // Add the FileID to the unparsed set.
1074     UnparsedFiles.insert(std::make_pair(FID,
1075                                       UnparsedFileStatus(FE, FoundDirectives)));
1076   }
1077 #endif
1078 }
1079 
1080 void VerifyDiagnosticConsumer::CheckDiagnostics() {
1081   // Ensure any diagnostics go to the primary client.
1082   DiagnosticConsumer *CurClient = Diags.getClient();
1083   std::unique_ptr<DiagnosticConsumer> Owner = Diags.takeClient();
1084   Diags.setClient(PrimaryClient, false);
1085 
1086 #ifndef NDEBUG
1087   // In a debug build, scan through any files that may have been missed
1088   // during parsing and issue a fatal error if directives are contained
1089   // within these files.  If a fatal error occurs, this suggests that
1090   // this file is being parsed separately from the main file, in which
1091   // case consider moving the directives to the correct place, if this
1092   // is applicable.
1093   if (!UnparsedFiles.empty()) {
1094     // Generate a cache of parsed FileEntry pointers for alias lookups.
1095     llvm::SmallPtrSet<const FileEntry *, 8> ParsedFileCache;
1096     for (const auto &I : ParsedFiles)
1097       if (const FileEntry *FE = I.second)
1098         ParsedFileCache.insert(FE);
1099 
1100     // Iterate through list of unparsed files.
1101     for (const auto &I : UnparsedFiles) {
1102       const UnparsedFileStatus &Status = I.second;
1103       OptionalFileEntryRef FE = Status.getFile();
1104 
1105       // Skip files that have been parsed via an alias.
1106       if (FE && ParsedFileCache.count(*FE))
1107         continue;
1108 
1109       // Report a fatal error if this file contained directives.
1110       if (Status.foundDirectives()) {
1111         llvm::report_fatal_error("-verify directives found after rather"
1112                                  " than during normal parsing of " +
1113                                  (FE ? FE->getName() : "(unknown)"));
1114       }
1115     }
1116 
1117     // UnparsedFiles has been processed now, so clear it.
1118     UnparsedFiles.clear();
1119   }
1120 #endif // !NDEBUG
1121 
1122   if (SrcManager) {
1123     // Produce an error if no expected-* directives could be found in the
1124     // source file(s) processed.
1125     if (State.Status == HasNoDirectives) {
1126       Diags.Report(diag::err_verify_no_directives).setForceEmit()
1127           << DetailedErrorString(Diags);
1128       ++NumErrors;
1129       State.Status = HasNoDirectivesReported;
1130     }
1131 
1132     // Check that the expected diagnostics occurred.
1133     NumErrors += CheckResults(Diags, *SrcManager, *Buffer, ED);
1134   } else {
1135     const DiagnosticLevelMask DiagMask =
1136         ~Diags.getDiagnosticOptions().getVerifyIgnoreUnexpected();
1137     if (bool(DiagnosticLevelMask::Error & DiagMask))
1138       NumErrors += PrintUnexpected(Diags, nullptr, Buffer->err_begin(),
1139                                    Buffer->err_end(), "error");
1140     if (bool(DiagnosticLevelMask::Warning & DiagMask))
1141       NumErrors += PrintUnexpected(Diags, nullptr, Buffer->warn_begin(),
1142                                    Buffer->warn_end(), "warn");
1143     if (bool(DiagnosticLevelMask::Remark & DiagMask))
1144       NumErrors += PrintUnexpected(Diags, nullptr, Buffer->remark_begin(),
1145                                    Buffer->remark_end(), "remark");
1146     if (bool(DiagnosticLevelMask::Note & DiagMask))
1147       NumErrors += PrintUnexpected(Diags, nullptr, Buffer->note_begin(),
1148                                    Buffer->note_end(), "note");
1149   }
1150 
1151   Diags.setClient(CurClient, Owner.release() != nullptr);
1152 
1153   // Reset the buffer, we have processed all the diagnostics in it.
1154   Buffer.reset(new TextDiagnosticBuffer());
1155   ED.Reset();
1156 }
1157 
1158 std::unique_ptr<Directive>
1159 Directive::create(bool RegexKind, SourceLocation DirectiveLoc,
1160                   SourceLocation DiagnosticLoc, StringRef Spelling,
1161                   bool MatchAnyFileAndLine, bool MatchAnyLine, StringRef Text,
1162                   unsigned Min, unsigned Max) {
1163   if (!RegexKind)
1164     return std::make_unique<StandardDirective>(DirectiveLoc, DiagnosticLoc,
1165                                                Spelling, MatchAnyFileAndLine,
1166                                                MatchAnyLine, Text, Min, Max);
1167 
1168   // Parse the directive into a regular expression.
1169   std::string RegexStr;
1170   StringRef S = Text;
1171   while (!S.empty()) {
1172     if (S.consume_front("{{")) {
1173       size_t RegexMatchLength = S.find("}}");
1174       assert(RegexMatchLength != StringRef::npos);
1175       // Append the regex, enclosed in parentheses.
1176       RegexStr += "(";
1177       RegexStr.append(S.data(), RegexMatchLength);
1178       RegexStr += ")";
1179       S = S.drop_front(RegexMatchLength + 2);
1180     } else {
1181       size_t VerbatimMatchLength = S.find("{{");
1182       if (VerbatimMatchLength == StringRef::npos)
1183         VerbatimMatchLength = S.size();
1184       // Escape and append the fixed string.
1185       RegexStr += llvm::Regex::escape(S.substr(0, VerbatimMatchLength));
1186       S = S.drop_front(VerbatimMatchLength);
1187     }
1188   }
1189 
1190   return std::make_unique<RegexDirective>(DirectiveLoc, DiagnosticLoc, Spelling,
1191                                           MatchAnyFileAndLine, MatchAnyLine,
1192                                           Text, Min, Max, RegexStr);
1193 }
1194