xref: /freebsd/contrib/llvm-project/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===-- ClangDiagnosticsEmitter.cpp - Generate Clang diagnostics tables ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // These tablegen backends emit Clang diagnostics tables.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "TableGenBackends.h"
14 #include "llvm/ADT/DenseSet.h"
15 #include "llvm/ADT/PointerUnion.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/Support/Casting.h"
23 #include "llvm/Support/Format.h"
24 #include "llvm/TableGen/Error.h"
25 #include "llvm/TableGen/Record.h"
26 #include "llvm/TableGen/StringToOffsetTable.h"
27 #include "llvm/TableGen/TableGenBackend.h"
28 #include <algorithm>
29 #include <cctype>
30 #include <functional>
31 #include <map>
32 #include <optional>
33 #include <set>
34 using namespace llvm;
35 
36 //===----------------------------------------------------------------------===//
37 // Diagnostic category computation code.
38 //===----------------------------------------------------------------------===//
39 
40 namespace {
41 class DiagGroupParentMap {
42   const RecordKeeper &Records;
43   std::map<const Record *, std::vector<const Record *>> Mapping;
44 
45 public:
46   DiagGroupParentMap(const RecordKeeper &records) : Records(records) {
47     for (const Record *Group : Records.getAllDerivedDefinitions("DiagGroup"))
48       for (const Record *SubGroup : Group->getValueAsListOfDefs("SubGroups"))
49         Mapping[SubGroup].push_back(Group);
50   }
51 
52   ArrayRef<const Record *> getParents(const Record *Group) {
53     return Mapping[Group];
54   }
55 };
56 } // end anonymous namespace.
57 
58 static StringRef
59 getCategoryFromDiagGroup(const Record *Group,
60                          DiagGroupParentMap &DiagGroupParents) {
61   // If the DiagGroup has a category, return it.
62   StringRef CatName = Group->getValueAsString("CategoryName");
63   if (!CatName.empty()) return CatName;
64 
65   // The diag group may the subgroup of one or more other diagnostic groups,
66   // check these for a category as well.
67   for (const Record *Parent : DiagGroupParents.getParents(Group)) {
68     CatName = getCategoryFromDiagGroup(Parent, DiagGroupParents);
69     if (!CatName.empty()) return CatName;
70   }
71   return "";
72 }
73 
74 /// getDiagnosticCategory - Return the category that the specified diagnostic
75 /// lives in.
76 static StringRef getDiagnosticCategory(const Record *R,
77                                        DiagGroupParentMap &DiagGroupParents) {
78   // If the diagnostic is in a group, and that group has a category, use it.
79   if (const auto *Group = dyn_cast<DefInit>(R->getValueInit("Group"))) {
80     // Check the diagnostic's diag group for a category.
81     StringRef CatName =
82         getCategoryFromDiagGroup(Group->getDef(), DiagGroupParents);
83     if (!CatName.empty()) return CatName;
84   }
85 
86   // If the diagnostic itself has a category, get it.
87   return R->getValueAsString("CategoryName");
88 }
89 
90 namespace {
91   class DiagCategoryIDMap {
92     const RecordKeeper &Records;
93     StringMap<unsigned> CategoryIDs;
94     std::vector<StringRef> CategoryStrings;
95 
96   public:
97     DiagCategoryIDMap(const RecordKeeper &records) : Records(records) {
98       DiagGroupParentMap ParentInfo(Records);
99 
100       // The zero'th category is "".
101       CategoryStrings.push_back("");
102       CategoryIDs[""] = 0;
103 
104       for (const Record *Diag :
105            Records.getAllDerivedDefinitions("Diagnostic")) {
106         StringRef Category = getDiagnosticCategory(Diag, ParentInfo);
107         if (Category.empty()) continue;  // Skip diags with no category.
108 
109         unsigned &ID = CategoryIDs[Category];
110         if (ID != 0) continue;  // Already seen.
111 
112         ID = CategoryStrings.size();
113         CategoryStrings.push_back(Category);
114       }
115     }
116 
117     unsigned getID(StringRef CategoryString) {
118       return CategoryIDs[CategoryString];
119     }
120 
121     typedef std::vector<StringRef>::const_iterator const_iterator;
122     const_iterator begin() const { return CategoryStrings.begin(); }
123     const_iterator end() const { return CategoryStrings.end(); }
124   };
125 
126   struct GroupInfo {
127     StringRef GroupName;
128     std::vector<const Record*> DiagsInGroup;
129     std::vector<StringRef> SubGroups;
130     unsigned IDNo = 0;
131 
132     SmallVector<const Record *, 1> Defs;
133 
134     GroupInfo() = default;
135   };
136 } // end anonymous namespace.
137 
138 static bool beforeThanCompare(const Record *LHS, const Record *RHS) {
139   assert(!LHS->getLoc().empty() && !RHS->getLoc().empty());
140   return
141     LHS->getLoc().front().getPointer() < RHS->getLoc().front().getPointer();
142 }
143 
144 static bool diagGroupBeforeByName(const Record *LHS, const Record *RHS) {
145   return LHS->getValueAsString("GroupName") <
146          RHS->getValueAsString("GroupName");
147 }
148 
149 using DiagsInGroupTy = std::map<StringRef, GroupInfo>;
150 
151 /// Invert the 1-[0/1] mapping of diags to group into a one to many
152 /// mapping of groups to diags in the group.
153 static void groupDiagnostics(ArrayRef<const Record *> Diags,
154                              ArrayRef<const Record *> DiagGroups,
155                              DiagsInGroupTy &DiagsInGroup) {
156   for (const Record *R : Diags) {
157     const auto *DI = dyn_cast<DefInit>(R->getValueInit("Group"));
158     if (!DI)
159       continue;
160     assert(R->getValueAsDef("Class")->getName() != "CLASS_NOTE" &&
161            "Note can't be in a DiagGroup");
162     StringRef GroupName = DI->getDef()->getValueAsString("GroupName");
163     DiagsInGroup[GroupName].DiagsInGroup.push_back(R);
164   }
165 
166   // Add all DiagGroup's to the DiagsInGroup list to make sure we pick up empty
167   // groups (these are warnings that GCC supports that clang never produces).
168   for (const Record *Group : DiagGroups) {
169     GroupInfo &GI = DiagsInGroup[Group->getValueAsString("GroupName")];
170     GI.GroupName = Group->getName();
171     GI.Defs.push_back(Group);
172 
173     for (const Record *SubGroup : Group->getValueAsListOfDefs("SubGroups"))
174       GI.SubGroups.push_back(SubGroup->getValueAsString("GroupName"));
175   }
176 
177   // Assign unique ID numbers to the groups.
178   for (auto [IdNo, Iter] : enumerate(DiagsInGroup))
179     Iter.second.IDNo = IdNo;
180 
181   // Warn if the same group is defined more than once (including implicitly).
182   for (auto &Group : DiagsInGroup) {
183     if (Group.second.Defs.size() == 1 &&
184         (!Group.second.Defs.front()->isAnonymous() ||
185          Group.second.DiagsInGroup.size() <= 1))
186       continue;
187 
188     bool First = true;
189     for (const Record *Def : Group.second.Defs) {
190       // Skip implicit definitions from diagnostics; we'll report those
191       // separately below.
192       bool IsImplicit = false;
193       for (const Record *Diag : Group.second.DiagsInGroup) {
194         if (cast<DefInit>(Diag->getValueInit("Group"))->getDef() == Def) {
195           IsImplicit = true;
196           break;
197         }
198       }
199       if (IsImplicit)
200         continue;
201 
202       SMLoc Loc = Def->getLoc().front();
203       if (First) {
204         SrcMgr.PrintMessage(Loc, SourceMgr::DK_Error,
205                             Twine("group '") + Group.first +
206                                 "' is defined more than once");
207         First = false;
208       } else {
209         SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, "also defined here");
210       }
211     }
212 
213     for (const Record *Diag : Group.second.DiagsInGroup) {
214       if (!cast<DefInit>(Diag->getValueInit("Group"))->getDef()->isAnonymous())
215         continue;
216 
217       SMLoc Loc = Diag->getLoc().front();
218       if (First) {
219         SrcMgr.PrintMessage(Loc, SourceMgr::DK_Error,
220                             Twine("group '") + Group.first +
221                                 "' is implicitly defined more than once");
222         First = false;
223       } else {
224         SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note,
225                             "also implicitly defined here");
226       }
227     }
228   }
229 }
230 
231 //===----------------------------------------------------------------------===//
232 // Infer members of -Wpedantic.
233 //===----------------------------------------------------------------------===//
234 
235 typedef std::vector<const Record *> RecordVec;
236 typedef DenseSet<const Record *> RecordSet;
237 typedef PointerUnion<RecordVec *, RecordSet *> VecOrSet;
238 
239 namespace {
240 class InferPedantic {
241   typedef DenseMap<const Record *, std::pair<unsigned, std::optional<unsigned>>>
242       GMap;
243 
244   DiagGroupParentMap &DiagGroupParents;
245   ArrayRef<const Record *> Diags;
246   const std::vector<const Record *> DiagGroups;
247   DiagsInGroupTy &DiagsInGroup;
248   DenseSet<const Record *> DiagsSet;
249   GMap GroupCount;
250 public:
251   InferPedantic(DiagGroupParentMap &DiagGroupParents,
252                 ArrayRef<const Record *> Diags,
253                 ArrayRef<const Record *> DiagGroups,
254                 DiagsInGroupTy &DiagsInGroup)
255       : DiagGroupParents(DiagGroupParents), Diags(Diags),
256         DiagGroups(DiagGroups), DiagsInGroup(DiagsInGroup) {}
257 
258   /// Compute the set of diagnostics and groups that are immediately
259   /// in -Wpedantic.
260   void compute(VecOrSet DiagsInPedantic,
261                VecOrSet GroupsInPedantic);
262 
263 private:
264   /// Determine whether a group is a subgroup of another group.
265   bool isSubGroupOfGroup(const Record *Group, StringRef RootGroupName);
266 
267   /// Determine if the diagnostic is an extension.
268   bool isExtension(const Record *Diag);
269 
270   /// Determine if the diagnostic is off by default.
271   bool isOffByDefault(const Record *Diag);
272 
273   /// Increment the count for a group, and transitively marked
274   /// parent groups when appropriate.
275   void markGroup(const Record *Group);
276 
277   /// Return true if the diagnostic is in a pedantic group.
278   bool groupInPedantic(const Record *Group, bool increment = false);
279 };
280 } // end anonymous namespace
281 
282 bool InferPedantic::isSubGroupOfGroup(const Record *Group, StringRef GName) {
283   StringRef GroupName = Group->getValueAsString("GroupName");
284   if (GName == GroupName)
285     return true;
286 
287   for (const Record *Parent : DiagGroupParents.getParents(Group))
288     if (isSubGroupOfGroup(Parent, GName))
289       return true;
290 
291   return false;
292 }
293 
294 /// Determine if the diagnostic is an extension.
295 bool InferPedantic::isExtension(const Record *Diag) {
296   return Diag->getValueAsDef("Class")->getName() == "CLASS_EXTENSION";
297 }
298 
299 bool InferPedantic::isOffByDefault(const Record *Diag) {
300   return Diag->getValueAsDef("DefaultSeverity")->getValueAsString("Name") ==
301          "Ignored";
302 }
303 
304 bool InferPedantic::groupInPedantic(const Record *Group, bool increment) {
305   GMap::mapped_type &V = GroupCount[Group];
306   // Lazily compute the threshold value for the group count.
307   if (!V.second) {
308     const GroupInfo &GI = DiagsInGroup[Group->getValueAsString("GroupName")];
309     V.second = GI.SubGroups.size() + GI.DiagsInGroup.size();
310   }
311 
312   if (increment)
313     ++V.first;
314 
315   // Consider a group in -Wpendatic IFF if has at least one diagnostic
316   // or subgroup AND all of those diagnostics and subgroups are covered
317   // by -Wpedantic via our computation.
318   return V.first != 0 && V.first == *V.second;
319 }
320 
321 void InferPedantic::markGroup(const Record *Group) {
322   // If all the diagnostics and subgroups have been marked as being
323   // covered by -Wpedantic, increment the count of parent groups.  Once the
324   // group's count is equal to the number of subgroups and diagnostics in
325   // that group, we can safely add this group to -Wpedantic.
326   if (groupInPedantic(Group, /* increment */ true))
327     for (const Record *Parent : DiagGroupParents.getParents(Group))
328       markGroup(Parent);
329 }
330 
331 void InferPedantic::compute(VecOrSet DiagsInPedantic,
332                             VecOrSet GroupsInPedantic) {
333   // All extensions that are not on by default are implicitly in the
334   // "pedantic" group.  For those that aren't explicitly included in -Wpedantic,
335   // mark them for consideration to be included in -Wpedantic directly.
336   for (const Record *R : Diags) {
337     if (!isExtension(R) || !isOffByDefault(R))
338       continue;
339     DiagsSet.insert(R);
340     if (const auto *Group = dyn_cast<DefInit>(R->getValueInit("Group"))) {
341       const Record *GroupRec = Group->getDef();
342       if (!isSubGroupOfGroup(GroupRec, "pedantic")) {
343         markGroup(GroupRec);
344       }
345     }
346   }
347 
348   // Compute the set of diagnostics that are directly in -Wpedantic.  We
349   // march through Diags a second time to ensure the results are emitted
350   // in deterministic order.
351   for (const Record *R : Diags) {
352     if (!DiagsSet.count(R))
353       continue;
354     // Check if the group is implicitly in -Wpedantic.  If so,
355     // the diagnostic should not be directly included in the -Wpedantic
356     // diagnostic group.
357     if (const auto *Group = dyn_cast<DefInit>(R->getValueInit("Group")))
358       if (groupInPedantic(Group->getDef()))
359         continue;
360 
361     // The diagnostic is not included in a group that is (transitively) in
362     // -Wpedantic.  Include it in -Wpedantic directly.
363     if (auto *V = dyn_cast<RecordVec *>(DiagsInPedantic))
364       V->push_back(R);
365     else
366       cast<RecordSet *>(DiagsInPedantic)->insert(R);
367   }
368 
369   if (!GroupsInPedantic)
370     return;
371 
372   // Compute the set of groups that are directly in -Wpedantic.  We
373   // march through the groups to ensure the results are emitted
374   /// in a deterministc order.
375   for (const Record *Group : DiagGroups) {
376     if (!groupInPedantic(Group))
377       continue;
378 
379     const std::vector<const Record *> &Parents =
380         DiagGroupParents.getParents(Group);
381     bool AllParentsInPedantic =
382         all_of(Parents, [&](const Record *R) { return groupInPedantic(R); });
383     // If all the parents are in -Wpedantic, this means that this diagnostic
384     // group will be indirectly included by -Wpedantic already.  In that
385     // case, do not add it directly to -Wpedantic.  If the group has no
386     // parents, obviously it should go into -Wpedantic.
387     if (Parents.size() > 0 && AllParentsInPedantic)
388       continue;
389 
390     if (auto *V = dyn_cast<RecordVec *>(GroupsInPedantic))
391       V->push_back(Group);
392     else
393       cast<RecordSet *>(GroupsInPedantic)->insert(Group);
394   }
395 }
396 
397 namespace {
398 enum PieceKind {
399   MultiPieceClass,
400   TextPieceClass,
401   PlaceholderPieceClass,
402   SelectPieceClass,
403   EnumSelectPieceClass,
404   PluralPieceClass,
405   DiffPieceClass,
406   SubstitutionPieceClass,
407 };
408 
409 enum ModifierType {
410   MT_Unknown,
411   MT_Placeholder,
412   MT_Select,
413   MT_EnumSelect,
414   MT_Sub,
415   MT_Plural,
416   MT_Diff,
417   MT_Ordinal,
418   MT_Human,
419   MT_S,
420   MT_Q,
421   MT_ObjCClass,
422   MT_ObjCInstance,
423   MT_Quoted,
424 };
425 
426 static StringRef getModifierName(ModifierType MT) {
427   switch (MT) {
428   case MT_EnumSelect:
429   case MT_Select:
430     return "select";
431   case MT_Sub:
432     return "sub";
433   case MT_Diff:
434     return "diff";
435   case MT_Plural:
436     return "plural";
437   case MT_Ordinal:
438     return "ordinal";
439   case MT_Human:
440     return "human";
441   case MT_S:
442     return "s";
443   case MT_Q:
444     return "q";
445   case MT_Placeholder:
446     return "";
447   case MT_ObjCClass:
448     return "objcclass";
449   case MT_ObjCInstance:
450     return "objcinstance";
451   case MT_Quoted:
452     return "quoted";
453   case MT_Unknown:
454     llvm_unreachable("invalid modifier type");
455   }
456   // Unhandled case
457   llvm_unreachable("invalid modifier type");
458 }
459 
460 struct Piece {
461   // This type and its derived classes are move-only.
462   Piece(PieceKind Kind) : ClassKind(Kind) {}
463   Piece(Piece const &O) = delete;
464   Piece &operator=(Piece const &) = delete;
465   virtual ~Piece() {}
466 
467   PieceKind getPieceClass() const { return ClassKind; }
468   static bool classof(const Piece *) { return true; }
469 
470 private:
471   PieceKind ClassKind;
472 };
473 
474 struct MultiPiece : Piece {
475   MultiPiece() : Piece(MultiPieceClass) {}
476   MultiPiece(std::vector<Piece *> Pieces)
477       : Piece(MultiPieceClass), Pieces(std::move(Pieces)) {}
478 
479   std::vector<Piece *> Pieces;
480 
481   static bool classof(const Piece *P) {
482     return P->getPieceClass() == MultiPieceClass;
483   }
484 };
485 
486 struct TextPiece : Piece {
487   StringRef Role;
488   std::string Text;
489   TextPiece(StringRef Text, StringRef Role = "")
490       : Piece(TextPieceClass), Role(Role), Text(Text.str()) {}
491 
492   static bool classof(const Piece *P) {
493     return P->getPieceClass() == TextPieceClass;
494   }
495 };
496 
497 struct PlaceholderPiece : Piece {
498   ModifierType Kind;
499   int Index;
500   PlaceholderPiece(ModifierType Kind, int Index)
501       : Piece(PlaceholderPieceClass), Kind(Kind), Index(Index) {}
502 
503   static bool classof(const Piece *P) {
504     return P->getPieceClass() == PlaceholderPieceClass;
505   }
506 };
507 
508 struct SelectPiece : Piece {
509 protected:
510   SelectPiece(PieceKind Kind, ModifierType ModKind)
511       : Piece(Kind), ModKind(ModKind) {}
512 
513 public:
514   SelectPiece(ModifierType ModKind) : SelectPiece(SelectPieceClass, ModKind) {}
515 
516   ModifierType ModKind;
517   std::vector<Piece *> Options;
518   int Index = 0;
519 
520   static bool classof(const Piece *P) {
521     return P->getPieceClass() == SelectPieceClass ||
522            P->getPieceClass() == EnumSelectPieceClass ||
523            P->getPieceClass() == PluralPieceClass;
524   }
525 };
526 
527 struct EnumSelectPiece : SelectPiece {
528   EnumSelectPiece() : SelectPiece(EnumSelectPieceClass, MT_EnumSelect) {}
529 
530   StringRef EnumName;
531   std::vector<StringRef> OptionEnumNames;
532 
533   static bool classof(const Piece *P) {
534     return P->getPieceClass() == EnumSelectPieceClass;
535   }
536 };
537 
538 struct EnumValuePiece : Piece {
539   ModifierType Kind;
540 };
541 
542 struct PluralPiece : SelectPiece {
543   PluralPiece() : SelectPiece(PluralPieceClass, MT_Plural) {}
544 
545   std::vector<Piece *> OptionPrefixes;
546   int Index = 0;
547 
548   static bool classof(const Piece *P) {
549     return P->getPieceClass() == PluralPieceClass;
550   }
551 };
552 
553 struct DiffPiece : Piece {
554   DiffPiece() : Piece(DiffPieceClass) {}
555 
556   Piece *Parts[4] = {};
557   int Indexes[2] = {};
558 
559   static bool classof(const Piece *P) {
560     return P->getPieceClass() == DiffPieceClass;
561   }
562 };
563 
564 struct SubstitutionPiece : Piece {
565   SubstitutionPiece() : Piece(SubstitutionPieceClass) {}
566 
567   std::string Name;
568   std::vector<int> Modifiers;
569 
570   static bool classof(const Piece *P) {
571     return P->getPieceClass() == SubstitutionPieceClass;
572   }
573 };
574 
575 /// Diagnostic text, parsed into pieces.
576 
577 
578 struct DiagnosticTextBuilder {
579   DiagnosticTextBuilder(DiagnosticTextBuilder const &) = delete;
580   DiagnosticTextBuilder &operator=(DiagnosticTextBuilder const &) = delete;
581 
582   DiagnosticTextBuilder(const RecordKeeper &Records) {
583     // Build up the list of substitution records.
584     for (auto *S : Records.getAllDerivedDefinitions("TextSubstitution")) {
585       EvaluatingRecordGuard Guard(&EvaluatingRecord, S);
586       Substitutions.try_emplace(
587           S->getName(), DiagText(*this, S->getValueAsString("Substitution")));
588     }
589 
590     // Check that no diagnostic definitions have the same name as a
591     // substitution.
592     for (const Record *Diag : Records.getAllDerivedDefinitions("Diagnostic")) {
593       StringRef Name = Diag->getName();
594       if (Substitutions.count(Name))
595         llvm::PrintFatalError(
596             Diag->getLoc(),
597             "Diagnostic '" + Name +
598                 "' has same name as TextSubstitution definition");
599     }
600   }
601 
602   std::vector<std::string> buildForDocumentation(StringRef Role,
603                                                  const Record *R);
604   std::string buildForDefinition(const Record *R);
605   llvm::SmallVector<std::pair<
606       std::string, llvm::SmallVector<std::pair<unsigned, std::string>>>>
607   buildForEnum(const Record *R);
608 
609   Piece *getSubstitution(SubstitutionPiece *S) const {
610     auto It = Substitutions.find(S->Name);
611     if (It == Substitutions.end())
612       llvm::PrintFatalError("Failed to find substitution with name: " +
613                             S->Name);
614     return It->second.Root;
615   }
616 
617   [[noreturn]] void PrintFatalError(Twine const &Msg) const {
618     assert(EvaluatingRecord && "not evaluating a record?");
619     llvm::PrintFatalError(EvaluatingRecord->getLoc(), Msg);
620   }
621 
622 private:
623   struct DiagText {
624     DiagnosticTextBuilder &Builder;
625     std::vector<Piece *> AllocatedPieces;
626     Piece *Root = nullptr;
627 
628     template <class T, class... Args> T *New(Args &&... args) {
629       static_assert(std::is_base_of<Piece, T>::value, "must be piece");
630       T *Mem = new T(std::forward<Args>(args)...);
631       AllocatedPieces.push_back(Mem);
632       return Mem;
633     }
634 
635     DiagText(DiagnosticTextBuilder &Builder, StringRef Text)
636         : Builder(Builder), Root(parseDiagText(Text, StopAt::End)) {}
637 
638     enum class StopAt {
639       // Parse until the end of the string.
640       End,
641       // Additionally stop if we hit a non-nested '|' or '}'.
642       PipeOrCloseBrace,
643       // Additionally stop if we hit a non-nested '$'.
644       Dollar,
645     };
646 
647     Piece *parseDiagText(StringRef &Text, StopAt Stop);
648     int parseModifier(StringRef &) const;
649 
650   public:
651     DiagText(DiagText &&O) noexcept
652         : Builder(O.Builder), AllocatedPieces(std::move(O.AllocatedPieces)),
653           Root(O.Root) {
654       O.Root = nullptr;
655     }
656     // The move assignment operator is defined as deleted pending further
657     // motivation.
658     DiagText &operator=(DiagText &&) = delete;
659 
660     // The copy constrcutor and copy assignment operator is defined as deleted
661     // pending further motivation.
662     DiagText(const DiagText &) = delete;
663     DiagText &operator=(const DiagText &) = delete;
664 
665     ~DiagText() {
666       for (Piece *P : AllocatedPieces)
667         delete P;
668     }
669   };
670 
671 private:
672   const Record *EvaluatingRecord = nullptr;
673   struct EvaluatingRecordGuard {
674     EvaluatingRecordGuard(const Record **Dest, const Record *New)
675         : Dest(Dest), Old(*Dest) {
676       *Dest = New;
677     }
678     ~EvaluatingRecordGuard() { *Dest = Old; }
679     const Record **Dest;
680     const Record *Old;
681   };
682 
683   StringMap<DiagText> Substitutions;
684 };
685 
686 template <class Derived> struct DiagTextVisitor {
687   using ModifierMappingsType = std::optional<std::vector<int>>;
688 
689 private:
690   Derived &getDerived() { return static_cast<Derived &>(*this); }
691 
692 public:
693   std::vector<int>
694   getSubstitutionMappings(SubstitutionPiece *P,
695                           const ModifierMappingsType &Mappings) const {
696     std::vector<int> NewMappings;
697     for (int Idx : P->Modifiers)
698       NewMappings.push_back(mapIndex(Idx, Mappings));
699     return NewMappings;
700   }
701 
702   struct SubstitutionContext {
703     SubstitutionContext(DiagTextVisitor &Visitor, SubstitutionPiece *P)
704         : Visitor(Visitor) {
705       Substitution = Visitor.Builder.getSubstitution(P);
706       OldMappings = std::move(Visitor.ModifierMappings);
707       std::vector<int> NewMappings =
708           Visitor.getSubstitutionMappings(P, OldMappings);
709       Visitor.ModifierMappings = std::move(NewMappings);
710     }
711 
712     ~SubstitutionContext() {
713       Visitor.ModifierMappings = std::move(OldMappings);
714     }
715 
716   private:
717     DiagTextVisitor &Visitor;
718     std::optional<std::vector<int>> OldMappings;
719 
720   public:
721     Piece *Substitution;
722   };
723 
724 public:
725   DiagTextVisitor(DiagnosticTextBuilder &Builder) : Builder(Builder) {}
726 
727   void Visit(Piece *P) {
728     switch (P->getPieceClass()) {
729 #define CASE(T)                                                                \
730   case T##PieceClass:                                                          \
731     return getDerived().Visit##T(static_cast<T##Piece *>(P))
732       CASE(Multi);
733       CASE(Text);
734       CASE(Placeholder);
735       CASE(Select);
736       CASE(EnumSelect);
737       CASE(Plural);
738       CASE(Diff);
739       CASE(Substitution);
740 #undef CASE
741     }
742   }
743 
744   void VisitSubstitution(SubstitutionPiece *P) {
745     SubstitutionContext Guard(*this, P);
746     Visit(Guard.Substitution);
747   }
748 
749   int mapIndex(int Idx,
750                     ModifierMappingsType const &ModifierMappings) const {
751     if (!ModifierMappings)
752       return Idx;
753     if (ModifierMappings->size() <= static_cast<unsigned>(Idx))
754       Builder.PrintFatalError("Modifier value '" + std::to_string(Idx) +
755                               "' is not valid for this mapping (has " +
756                               std::to_string(ModifierMappings->size()) +
757                               " mappings)");
758     return (*ModifierMappings)[Idx];
759   }
760 
761   int mapIndex(int Idx) const {
762     return mapIndex(Idx, ModifierMappings);
763   }
764 
765 protected:
766   DiagnosticTextBuilder &Builder;
767   ModifierMappingsType ModifierMappings;
768 };
769 
770 void escapeRST(StringRef Str, std::string &Out) {
771   for (auto K : Str) {
772     if (StringRef("`*|_[]\\").count(K))
773       Out.push_back('\\');
774     Out.push_back(K);
775   }
776 }
777 
778 template <typename It> void padToSameLength(It Begin, It End) {
779   size_t Width = 0;
780   for (It I = Begin; I != End; ++I)
781     Width = std::max(Width, I->size());
782   for (It I = Begin; I != End; ++I)
783     (*I) += std::string(Width - I->size(), ' ');
784 }
785 
786 template <typename It> void makeTableRows(It Begin, It End) {
787   if (Begin == End)
788     return;
789   padToSameLength(Begin, End);
790   for (It I = Begin; I != End; ++I)
791     *I = "|" + *I + "|";
792 }
793 
794 void makeRowSeparator(std::string &Str) {
795   for (char &K : Str)
796     K = (K == '|' ? '+' : '-');
797 }
798 
799 struct DiagTextDocPrinter : DiagTextVisitor<DiagTextDocPrinter> {
800   using BaseTy = DiagTextVisitor<DiagTextDocPrinter>;
801   DiagTextDocPrinter(DiagnosticTextBuilder &Builder,
802                      std::vector<std::string> &RST)
803       : BaseTy(Builder), RST(RST) {}
804 
805   void gatherNodes(
806       Piece *OrigP, const ModifierMappingsType &CurrentMappings,
807       std::vector<std::pair<Piece *, ModifierMappingsType>> &Pieces) const {
808     if (auto *Sub = dyn_cast<SubstitutionPiece>(OrigP)) {
809       ModifierMappingsType NewMappings =
810           getSubstitutionMappings(Sub, CurrentMappings);
811       return gatherNodes(Builder.getSubstitution(Sub), NewMappings, Pieces);
812     }
813     if (auto *MD = dyn_cast<MultiPiece>(OrigP)) {
814       for (Piece *Node : MD->Pieces)
815         gatherNodes(Node, CurrentMappings, Pieces);
816       return;
817     }
818     Pieces.push_back(std::make_pair(OrigP, CurrentMappings));
819   }
820 
821   void VisitMulti(MultiPiece *P) {
822     if (P->Pieces.empty()) {
823       RST.push_back("");
824       return;
825     }
826 
827     if (P->Pieces.size() == 1)
828       return Visit(P->Pieces[0]);
829 
830     // Flatten the list of nodes, replacing any substitution pieces with the
831     // recursively flattened substituted node.
832     std::vector<std::pair<Piece *, ModifierMappingsType>> Pieces;
833     gatherNodes(P, ModifierMappings, Pieces);
834 
835     std::string EmptyLinePrefix;
836     size_t Start = RST.size();
837     bool HasMultipleLines = true;
838     for (const std::pair<Piece *, ModifierMappingsType> &NodePair : Pieces) {
839       std::vector<std::string> Lines;
840       DiagTextDocPrinter Visitor{Builder, Lines};
841       Visitor.ModifierMappings = NodePair.second;
842       Visitor.Visit(NodePair.first);
843 
844       if (Lines.empty())
845         continue;
846 
847       // We need a vertical separator if either this or the previous piece is a
848       // multi-line piece, or this is the last piece.
849       const char *Separator = (Lines.size() > 1 || HasMultipleLines) ? "|" : "";
850       HasMultipleLines = Lines.size() > 1;
851 
852       if (Start + Lines.size() > RST.size())
853         RST.resize(Start + Lines.size(), EmptyLinePrefix);
854 
855       padToSameLength(Lines.begin(), Lines.end());
856       for (size_t I = 0; I != Lines.size(); ++I)
857         RST[Start + I] += Separator + Lines[I];
858       std::string Empty(Lines[0].size(), ' ');
859       for (size_t I = Start + Lines.size(); I != RST.size(); ++I)
860         RST[I] += Separator + Empty;
861       EmptyLinePrefix += Separator + Empty;
862     }
863     for (size_t I = Start; I != RST.size(); ++I)
864       RST[I] += "|";
865     EmptyLinePrefix += "|";
866 
867     makeRowSeparator(EmptyLinePrefix);
868     RST.insert(RST.begin() + Start, EmptyLinePrefix);
869     RST.insert(RST.end(), EmptyLinePrefix);
870   }
871 
872   void VisitText(TextPiece *P) {
873     RST.push_back("");
874     auto &S = RST.back();
875 
876     StringRef T = P->Text;
877     while (T.consume_front(" "))
878       RST.back() += " |nbsp| ";
879 
880     std::string Suffix;
881     while (T.consume_back(" "))
882       Suffix += " |nbsp| ";
883 
884     if (!T.empty()) {
885       S += ':';
886       S += P->Role;
887       S += ":`";
888       escapeRST(T, S);
889       S += '`';
890     }
891 
892     S += Suffix;
893   }
894 
895   void VisitPlaceholder(PlaceholderPiece *P) {
896     RST.push_back(std::string(":placeholder:`") +
897                   char('A' + mapIndex(P->Index)) + "`");
898   }
899 
900   void VisitSelect(SelectPiece *P) {
901     std::vector<size_t> SeparatorIndexes;
902     SeparatorIndexes.push_back(RST.size());
903     RST.emplace_back();
904     for (auto *O : P->Options) {
905       Visit(O);
906       SeparatorIndexes.push_back(RST.size());
907       RST.emplace_back();
908     }
909 
910     makeTableRows(RST.begin() + SeparatorIndexes.front(),
911                   RST.begin() + SeparatorIndexes.back() + 1);
912     for (size_t I : SeparatorIndexes)
913       makeRowSeparator(RST[I]);
914   }
915 
916   void VisitEnumSelect(EnumSelectPiece *P) {
917     // Document this as if it were a 'select', which properly prints all of the
918     // options correctly in a readable/reasonable manner. There isn't really
919     // anything valuable we could add to readers here.
920     VisitSelect(P);
921   }
922 
923   void VisitPlural(PluralPiece *P) { VisitSelect(P); }
924 
925   void VisitDiff(DiffPiece *P) {
926     // Render %diff{a $ b $ c|d}e,f as %select{a %e b %f c|d}.
927     PlaceholderPiece E(MT_Placeholder, P->Indexes[0]);
928     PlaceholderPiece F(MT_Placeholder, P->Indexes[1]);
929 
930     MultiPiece FirstOption;
931     FirstOption.Pieces.push_back(P->Parts[0]);
932     FirstOption.Pieces.push_back(&E);
933     FirstOption.Pieces.push_back(P->Parts[1]);
934     FirstOption.Pieces.push_back(&F);
935     FirstOption.Pieces.push_back(P->Parts[2]);
936 
937     SelectPiece Select(MT_Diff);
938     Select.Options.push_back(&FirstOption);
939     Select.Options.push_back(P->Parts[3]);
940 
941     VisitSelect(&Select);
942   }
943 
944   std::vector<std::string> &RST;
945 };
946 
947 struct DiagEnumPrinter : DiagTextVisitor<DiagEnumPrinter> {
948 public:
949   using BaseTy = DiagTextVisitor<DiagEnumPrinter>;
950   using EnumeratorItem = std::pair<unsigned, std::string>;
951   using EnumeratorList = llvm::SmallVector<EnumeratorItem>;
952   using ResultTy = llvm::SmallVector<std::pair<std::string, EnumeratorList>>;
953 
954   DiagEnumPrinter(DiagnosticTextBuilder &Builder, ResultTy &Result)
955       : BaseTy(Builder), Result(Result) {}
956 
957   ResultTy &Result;
958 
959   void VisitMulti(MultiPiece *P) {
960     for (auto *Child : P->Pieces)
961       Visit(Child);
962   }
963   void VisitText(TextPiece *P) {}
964   void VisitPlaceholder(PlaceholderPiece *P) {}
965   void VisitDiff(DiffPiece *P) {}
966   void VisitSelect(SelectPiece *P) {
967     for (auto *D : P->Options)
968       Visit(D);
969   }
970   void VisitPlural(PluralPiece *P) { VisitSelect(P); }
971   void VisitEnumSelect(EnumSelectPiece *P) {
972     assert(P->Options.size() == P->OptionEnumNames.size());
973 
974     if (!P->EnumName.empty()) {
975       EnumeratorList List;
976 
977       for (const auto &Tup : llvm::enumerate(P->OptionEnumNames))
978         if (!Tup.value().empty())
979           List.emplace_back(Tup.index(), Tup.value());
980 
981       Result.emplace_back(P->EnumName, List);
982     }
983 
984     VisitSelect(P);
985   }
986 };
987 
988 struct DiagTextPrinter : DiagTextVisitor<DiagTextPrinter> {
989 public:
990   using BaseTy = DiagTextVisitor<DiagTextPrinter>;
991   DiagTextPrinter(DiagnosticTextBuilder &Builder, std::string &Result)
992       : BaseTy(Builder), Result(Result) {}
993 
994   void VisitMulti(MultiPiece *P) {
995     for (auto *Child : P->Pieces)
996       Visit(Child);
997   }
998   void VisitText(TextPiece *P) { Result += P->Text; }
999   void VisitPlaceholder(PlaceholderPiece *P) {
1000     Result += "%";
1001     Result += getModifierName(P->Kind);
1002     addInt(mapIndex(P->Index));
1003   }
1004   void VisitSelect(SelectPiece *P) {
1005     Result += "%";
1006     Result += getModifierName(P->ModKind);
1007     if (P->ModKind == MT_Select || P->ModKind == MT_EnumSelect) {
1008       Result += "{";
1009       for (auto *D : P->Options) {
1010         Visit(D);
1011         Result += '|';
1012       }
1013       if (!P->Options.empty())
1014         Result.erase(--Result.end());
1015       Result += '}';
1016     }
1017     addInt(mapIndex(P->Index));
1018   }
1019 
1020   void VisitPlural(PluralPiece *P) {
1021     Result += "%plural{";
1022     assert(P->Options.size() == P->OptionPrefixes.size());
1023     for (const auto [Prefix, Option] :
1024          zip_equal(P->OptionPrefixes, P->Options)) {
1025       if (Prefix)
1026         Visit(Prefix);
1027       Visit(Option);
1028       Result += "|";
1029     }
1030     if (!P->Options.empty())
1031       Result.erase(--Result.end());
1032     Result += '}';
1033     addInt(mapIndex(P->Index));
1034   }
1035 
1036   void VisitEnumSelect(EnumSelectPiece *P) {
1037     // Print as if we are a 'select', which will result in the compiler just
1038     // treating this like a normal select.  This way we don't have to do any
1039     // special work for the compiler to consume these.
1040     VisitSelect(P);
1041   }
1042 
1043   void VisitDiff(DiffPiece *P) {
1044     Result += "%diff{";
1045     Visit(P->Parts[0]);
1046     Result += "$";
1047     Visit(P->Parts[1]);
1048     Result += "$";
1049     Visit(P->Parts[2]);
1050     Result += "|";
1051     Visit(P->Parts[3]);
1052     Result += "}";
1053     addInt(mapIndex(P->Indexes[0]));
1054     Result += ",";
1055     addInt(mapIndex(P->Indexes[1]));
1056   }
1057 
1058   void addInt(int Val) { Result += std::to_string(Val); }
1059 
1060   std::string &Result;
1061 };
1062 
1063 int DiagnosticTextBuilder::DiagText::parseModifier(StringRef &Text) const {
1064   if (Text.empty() || !isdigit(Text[0]))
1065     Builder.PrintFatalError("expected modifier in diagnostic");
1066   int Val = 0;
1067   do {
1068     Val *= 10;
1069     Val += Text[0] - '0';
1070     Text = Text.drop_front();
1071   } while (!Text.empty() && isdigit(Text[0]));
1072   return Val;
1073 }
1074 
1075 Piece *DiagnosticTextBuilder::DiagText::parseDiagText(StringRef &Text,
1076                                                       StopAt Stop) {
1077   std::vector<Piece *> Parsed;
1078 
1079   constexpr StringLiteral StopSets[] = {"%", "%|}", "%|}$"};
1080   StringRef StopSet = StopSets[static_cast<int>(Stop)];
1081 
1082   while (!Text.empty()) {
1083     size_t End = (size_t)-2;
1084     do
1085       End = Text.find_first_of(StopSet, End + 2);
1086     while (
1087         End < Text.size() - 1 && Text[End] == '%' &&
1088         (Text[End + 1] == '%' || Text[End + 1] == '|' || Text[End + 1] == '$'));
1089 
1090     if (End) {
1091       Parsed.push_back(New<TextPiece>(Text.slice(0, End), "diagtext"));
1092       Text = Text.substr(End);
1093       if (Text.empty())
1094         break;
1095     }
1096 
1097     if (Text[0] == '|' || Text[0] == '}' || Text[0] == '$')
1098       break;
1099 
1100     // Drop the '%'.
1101     Text = Text.drop_front();
1102 
1103     // Extract the (optional) modifier.
1104     size_t ModLength = Text.find_first_of("0123456789<{");
1105     StringRef Modifier = Text.slice(0, ModLength);
1106     Text = Text.substr(ModLength);
1107     ModifierType ModType = StringSwitch<ModifierType>{Modifier}
1108                                .Case("select", MT_Select)
1109                                .Case("enum_select", MT_EnumSelect)
1110                                .Case("sub", MT_Sub)
1111                                .Case("diff", MT_Diff)
1112                                .Case("plural", MT_Plural)
1113                                .Case("s", MT_S)
1114                                .Case("ordinal", MT_Ordinal)
1115                                .Case("human", MT_Human)
1116                                .Case("q", MT_Q)
1117                                .Case("objcclass", MT_ObjCClass)
1118                                .Case("objcinstance", MT_ObjCInstance)
1119                                .Case("quoted", MT_Quoted)
1120                                .Case("", MT_Placeholder)
1121                                .Default(MT_Unknown);
1122 
1123     auto ExpectAndConsume = [&](StringRef Prefix) {
1124       if (!Text.consume_front(Prefix))
1125         Builder.PrintFatalError("expected '" + Prefix + "' while parsing %" +
1126                                 Modifier);
1127     };
1128 
1129     if (ModType != MT_EnumSelect && Text[0] == '<')
1130       Builder.PrintFatalError("modifier '<' syntax not valid with %" +
1131                               Modifier);
1132 
1133     switch (ModType) {
1134     case MT_Unknown:
1135       Builder.PrintFatalError("Unknown modifier type: " + Modifier);
1136     case MT_Select: {
1137       SelectPiece *Select = New<SelectPiece>(MT_Select);
1138       do {
1139         Text = Text.drop_front(); // '{' or '|'
1140         Select->Options.push_back(
1141             parseDiagText(Text, StopAt::PipeOrCloseBrace));
1142         assert(!Text.empty() && "malformed %select");
1143       } while (Text.front() == '|');
1144       ExpectAndConsume("}");
1145       Select->Index = parseModifier(Text);
1146       Parsed.push_back(Select);
1147       continue;
1148     }
1149     case MT_EnumSelect: {
1150       EnumSelectPiece *EnumSelect = New<EnumSelectPiece>();
1151       if (Text[0] != '<')
1152         Builder.PrintFatalError("expected '<' after " + Modifier);
1153 
1154       Text = Text.drop_front(); // Drop '<'
1155       size_t EnumNameLen = Text.find_first_of('>');
1156       EnumSelect->EnumName = Text.slice(0, EnumNameLen);
1157       Text = Text.substr(EnumNameLen);
1158       ExpectAndConsume(">");
1159 
1160       if (Text[0] != '{')
1161         Builder.PrintFatalError("expected '{' after " + Modifier);
1162 
1163       do {
1164         Text = Text.drop_front(); // '{' or '|'
1165 
1166         bool BracketsRequired = false;
1167         if (Text[0] == '%') {
1168           BracketsRequired = true;
1169           Text = Text.drop_front(); // '%'
1170           size_t OptionNameLen = Text.find_first_of("{");
1171           EnumSelect->OptionEnumNames.push_back(Text.slice(0, OptionNameLen));
1172           Text = Text.substr(OptionNameLen);
1173         } else {
1174           EnumSelect->OptionEnumNames.push_back({});
1175         }
1176 
1177         if (BracketsRequired)
1178           ExpectAndConsume("{");
1179         else if (Text.front() == '{') {
1180           Text = Text.drop_front();
1181           BracketsRequired = true;
1182         }
1183 
1184         EnumSelect->Options.push_back(
1185             parseDiagText(Text, StopAt::PipeOrCloseBrace));
1186 
1187         if (BracketsRequired)
1188           ExpectAndConsume("}");
1189 
1190         assert(!Text.empty() && "malformed %select");
1191       } while (Text.front() == '|');
1192 
1193       ExpectAndConsume("}");
1194       EnumSelect->Index = parseModifier(Text);
1195       Parsed.push_back(EnumSelect);
1196       continue;
1197     }
1198     case MT_Plural: {
1199       PluralPiece *Plural = New<PluralPiece>();
1200       do {
1201         Text = Text.drop_front(); // '{' or '|'
1202         size_t End = Text.find_first_of(':');
1203         if (End == StringRef::npos)
1204           Builder.PrintFatalError("expected ':' while parsing %plural");
1205         ++End;
1206         assert(!Text.empty());
1207         Plural->OptionPrefixes.push_back(
1208             New<TextPiece>(Text.slice(0, End), "diagtext"));
1209         Text = Text.substr(End);
1210         Plural->Options.push_back(
1211             parseDiagText(Text, StopAt::PipeOrCloseBrace));
1212         assert(!Text.empty() && "malformed %plural");
1213       } while (Text.front() == '|');
1214       ExpectAndConsume("}");
1215       Plural->Index = parseModifier(Text);
1216       Parsed.push_back(Plural);
1217       continue;
1218     }
1219     case MT_Sub: {
1220       SubstitutionPiece *Sub = New<SubstitutionPiece>();
1221       ExpectAndConsume("{");
1222       size_t NameSize = Text.find_first_of('}');
1223       assert(NameSize != size_t(-1) && "failed to find the end of the name");
1224       assert(NameSize != 0 && "empty name?");
1225       Sub->Name = Text.substr(0, NameSize).str();
1226       Text = Text.drop_front(NameSize);
1227       ExpectAndConsume("}");
1228       if (!Text.empty()) {
1229         while (true) {
1230           if (!isdigit(Text[0]))
1231             break;
1232           Sub->Modifiers.push_back(parseModifier(Text));
1233           if (!Text.consume_front(","))
1234             break;
1235           assert(!Text.empty() && isdigit(Text[0]) &&
1236                  "expected another modifier");
1237         }
1238       }
1239       Parsed.push_back(Sub);
1240       continue;
1241     }
1242     case MT_Diff: {
1243       DiffPiece *Diff = New<DiffPiece>();
1244       ExpectAndConsume("{");
1245       Diff->Parts[0] = parseDiagText(Text, StopAt::Dollar);
1246       ExpectAndConsume("$");
1247       Diff->Parts[1] = parseDiagText(Text, StopAt::Dollar);
1248       ExpectAndConsume("$");
1249       Diff->Parts[2] = parseDiagText(Text, StopAt::PipeOrCloseBrace);
1250       ExpectAndConsume("|");
1251       Diff->Parts[3] = parseDiagText(Text, StopAt::PipeOrCloseBrace);
1252       ExpectAndConsume("}");
1253       Diff->Indexes[0] = parseModifier(Text);
1254       ExpectAndConsume(",");
1255       Diff->Indexes[1] = parseModifier(Text);
1256       Parsed.push_back(Diff);
1257       continue;
1258     }
1259     case MT_S: {
1260       SelectPiece *Select = New<SelectPiece>(ModType);
1261       Select->Options.push_back(New<TextPiece>(""));
1262       Select->Options.push_back(New<TextPiece>("s", "diagtext"));
1263       Select->Index = parseModifier(Text);
1264       Parsed.push_back(Select);
1265       continue;
1266     }
1267     case MT_Q:
1268     case MT_Placeholder:
1269     case MT_ObjCClass:
1270     case MT_ObjCInstance:
1271     case MT_Quoted:
1272     case MT_Ordinal:
1273     case MT_Human: {
1274       Parsed.push_back(New<PlaceholderPiece>(ModType, parseModifier(Text)));
1275       continue;
1276     }
1277     }
1278   }
1279 
1280   return New<MultiPiece>(Parsed);
1281 }
1282 
1283 std::vector<std::string>
1284 DiagnosticTextBuilder::buildForDocumentation(StringRef Severity,
1285                                              const Record *R) {
1286   EvaluatingRecordGuard Guard(&EvaluatingRecord, R);
1287   StringRef Text = R->getValueAsString("Summary");
1288 
1289   DiagText D(*this, Text);
1290   TextPiece *Prefix = D.New<TextPiece>(Severity, Severity);
1291   Prefix->Text += ": ";
1292   auto *MP = dyn_cast<MultiPiece>(D.Root);
1293   if (!MP) {
1294     MP = D.New<MultiPiece>();
1295     MP->Pieces.push_back(D.Root);
1296     D.Root = MP;
1297   }
1298   MP->Pieces.insert(MP->Pieces.begin(), Prefix);
1299   std::vector<std::string> Result;
1300   DiagTextDocPrinter{*this, Result}.Visit(D.Root);
1301   return Result;
1302 }
1303 
1304 DiagEnumPrinter::ResultTy DiagnosticTextBuilder::buildForEnum(const Record *R) {
1305   EvaluatingRecordGuard Guard(&EvaluatingRecord, R);
1306   StringRef Text = R->getValueAsString("Summary");
1307   DiagText D(*this, Text);
1308   DiagEnumPrinter::ResultTy Result;
1309   DiagEnumPrinter{*this, Result}.Visit(D.Root);
1310   return Result;
1311 }
1312 
1313 std::string DiagnosticTextBuilder::buildForDefinition(const Record *R) {
1314   EvaluatingRecordGuard Guard(&EvaluatingRecord, R);
1315   StringRef Text = R->getValueAsString("Summary");
1316   DiagText D(*this, Text);
1317   std::string Result;
1318   DiagTextPrinter{*this, Result}.Visit(D.Root);
1319   return Result;
1320 }
1321 
1322 } // namespace
1323 
1324 //===----------------------------------------------------------------------===//
1325 // Warning Tables (.inc file) generation.
1326 //===----------------------------------------------------------------------===//
1327 
1328 static bool isError(const Record &Diag) {
1329   return Diag.getValueAsDef("Class")->getName() == "CLASS_ERROR";
1330 }
1331 
1332 static bool isRemark(const Record &Diag) {
1333   return Diag.getValueAsDef("Class")->getName() == "CLASS_REMARK";
1334 }
1335 
1336 // Presumes the text has been split at the first whitespace or hyphen.
1337 static bool isExemptAtStart(StringRef Text) {
1338   // Fast path, the first character is lowercase or not alphanumeric.
1339   if (Text.empty() || isLower(Text[0]) || !isAlnum(Text[0]))
1340     return true;
1341 
1342   // If the text is all uppercase (or numbers, +, or _), then we assume it's an
1343   // acronym and that's allowed. This covers cases like ISO, C23, C++14, and
1344   // OBJECT_MODE. However, if there's only a single letter other than "C", we
1345   // do not exempt it so that we catch a case like "A really bad idea" while
1346   // still allowing a case like "C does not allow...".
1347   if (all_of(Text, [](char C) {
1348         return isUpper(C) || isDigit(C) || C == '+' || C == '_';
1349       }))
1350     return Text.size() > 1 || Text[0] == 'C';
1351 
1352   // Otherwise, there are a few other exemptions.
1353   return StringSwitch<bool>(Text)
1354       .Case("AddressSanitizer", true)
1355       .Case("CFString", true)
1356       .Case("Clang", true)
1357       .Case("Fuchsia", true)
1358       .Case("GNUstep", true)
1359       .Case("IBOutletCollection", true)
1360       .Case("Microsoft", true)
1361       .Case("Neon", true)
1362       .StartsWith("NSInvocation", true) // NSInvocation, NSInvocation's
1363       .Case("Objective", true) // Objective-C (hyphen is a word boundary)
1364       .Case("OpenACC", true)
1365       .Case("OpenCL", true)
1366       .Case("OpenMP", true)
1367       .Case("Pascal", true)
1368       .Case("Swift", true)
1369       .Case("Unicode", true)
1370       .Case("Vulkan", true)
1371       .Case("WebAssembly", true)
1372       .Default(false);
1373 }
1374 
1375 // Does not presume the text has been split at all.
1376 static bool isExemptAtEnd(StringRef Text) {
1377   // Rather than come up with a list of characters that are allowed, we go the
1378   // other way and look only for characters that are not allowed.
1379   switch (Text.back()) {
1380   default:
1381     return true;
1382   case '?':
1383     // Explicitly allowed to support "; did you mean?".
1384     return true;
1385   case '.':
1386   case '!':
1387     return false;
1388   }
1389 }
1390 
1391 static void verifyDiagnosticWording(const Record &Diag) {
1392   StringRef FullDiagText = Diag.getValueAsString("Summary");
1393 
1394   auto DiagnoseStart = [&](StringRef Text) {
1395     // Verify that the text does not start with a capital letter, except for
1396     // special cases that are exempt like ISO and C++. Find the first word
1397     // by looking for a word breaking character.
1398     char Separators[] = {' ', '-', ',', '}'};
1399     auto Iter = std::find_first_of(
1400         Text.begin(), Text.end(), std::begin(Separators), std::end(Separators));
1401 
1402     StringRef First = Text.substr(0, Iter - Text.begin());
1403     if (!isExemptAtStart(First)) {
1404       PrintError(&Diag,
1405                  "Diagnostics should not start with a capital letter; '" +
1406                      First + "' is invalid");
1407     }
1408   };
1409 
1410   auto DiagnoseEnd = [&](StringRef Text) {
1411     // Verify that the text does not end with punctuation like '.' or '!'.
1412     if (!isExemptAtEnd(Text)) {
1413       PrintError(&Diag, "Diagnostics should not end with punctuation; '" +
1414                             Text.substr(Text.size() - 1, 1) + "' is invalid");
1415     }
1416   };
1417 
1418   // If the diagnostic starts with %select, look through it to see whether any
1419   // of the options will cause a problem.
1420   if (FullDiagText.starts_with("%select{")) {
1421     // Do a balanced delimiter scan from the start of the text to find the
1422     // closing '}', skipping intermediary {} pairs.
1423 
1424     size_t BraceCount = 1;
1425     constexpr size_t PercentSelectBraceLen = sizeof("%select{") - 1;
1426     auto Iter = FullDiagText.begin() + PercentSelectBraceLen;
1427     for (auto End = FullDiagText.end(); Iter != End; ++Iter) {
1428       char Ch = *Iter;
1429       if (Ch == '{')
1430         ++BraceCount;
1431       else if (Ch == '}')
1432         --BraceCount;
1433       if (!BraceCount)
1434         break;
1435     }
1436     // Defending against a malformed diagnostic string.
1437     if (BraceCount != 0)
1438       return;
1439 
1440     StringRef SelectText =
1441         FullDiagText.substr(PercentSelectBraceLen, Iter - FullDiagText.begin() -
1442                                                        PercentSelectBraceLen);
1443     SmallVector<StringRef, 4> SelectPieces;
1444     SelectText.split(SelectPieces, '|');
1445 
1446     // Walk over all of the individual pieces of select text to see if any of
1447     // them start with an invalid character. If any of the select pieces is
1448     // empty, we need to look at the first word after the %select to see
1449     // whether that is invalid or not. If all of the pieces are fine, then we
1450     // don't need to check anything else about the start of the diagnostic.
1451     bool CheckSecondWord = false;
1452     for (StringRef Piece : SelectPieces) {
1453       if (Piece.empty())
1454         CheckSecondWord = true;
1455       else
1456         DiagnoseStart(Piece);
1457     }
1458 
1459     if (CheckSecondWord) {
1460       // There was an empty select piece, so we need to check the second
1461       // word. This catches situations like '%select{|fine}0 Not okay'. Add
1462       // two to account for the closing curly brace and the number after it.
1463       StringRef AfterSelect =
1464           FullDiagText.substr(Iter - FullDiagText.begin() + 2).ltrim();
1465       DiagnoseStart(AfterSelect);
1466     }
1467   } else {
1468     // If the start of the diagnostic is not %select, we can check the first
1469     // word and be done with it.
1470     DiagnoseStart(FullDiagText);
1471   }
1472 
1473   // If the last character in the diagnostic is a number preceded by a }, scan
1474   // backwards to see if this is for a %select{...}0. If it is, we need to look
1475   // at each piece to see whether it ends in punctuation or not.
1476   bool StillNeedToDiagEnd = true;
1477   if (isDigit(FullDiagText.back()) && *(FullDiagText.end() - 2) == '}') {
1478     // Scan backwards to find the opening curly brace.
1479     size_t BraceCount = 1;
1480     auto Iter = FullDiagText.end() - sizeof("}0");
1481     for (auto End = FullDiagText.begin(); Iter != End; --Iter) {
1482       char Ch = *Iter;
1483       if (Ch == '}')
1484         ++BraceCount;
1485       else if (Ch == '{')
1486         --BraceCount;
1487       if (!BraceCount)
1488         break;
1489     }
1490     // Defending against a malformed diagnostic string.
1491     if (BraceCount != 0)
1492       return;
1493 
1494     // Continue the backwards scan to find the word before the '{' to see if it
1495     // is 'select'.
1496     constexpr size_t SelectLen = sizeof("select") - 1;
1497     bool IsSelect =
1498         (FullDiagText.substr(Iter - SelectLen - FullDiagText.begin(),
1499                              SelectLen) == "select");
1500     if (IsSelect) {
1501       // Gather the content between the {} for the select in question so we can
1502       // split it into pieces.
1503       StillNeedToDiagEnd = false; // No longer need to handle the end.
1504       StringRef SelectText =
1505           FullDiagText.substr(Iter - FullDiagText.begin() + /*{*/ 1,
1506                               FullDiagText.end() - Iter - /*pos before }0*/ 3);
1507       SmallVector<StringRef, 4> SelectPieces;
1508       SelectText.split(SelectPieces, '|');
1509       for (StringRef Piece : SelectPieces) {
1510         // Not worrying about a situation like: "this is bar. %select{foo|}0".
1511         if (!Piece.empty())
1512           DiagnoseEnd(Piece);
1513       }
1514     }
1515   }
1516 
1517   // If we didn't already cover the diagnostic because of a %select, handle it
1518   // now.
1519   if (StillNeedToDiagEnd)
1520     DiagnoseEnd(FullDiagText);
1521 
1522   // FIXME: This could also be improved by looking for instances of clang or
1523   // gcc in the diagnostic and recommend Clang or GCC instead. However, this
1524   // runs into odd situations like [[clang::warn_unused_result]],
1525   // #pragma clang, or --unwindlib=libgcc.
1526 }
1527 
1528 /// ClangDiagsCompatIDsEmitter - Emit a set of 'compatibility diagnostic ids'
1529 /// that map to a set of 2 regular diagnostic ids each and which are used to
1530 /// simplify emitting compatibility warnings.
1531 void clang::EmitClangDiagsCompatIDs(const llvm::RecordKeeper &Records,
1532                                     llvm::raw_ostream &OS,
1533                                     const std::string &Component) {
1534   ArrayRef<const Record *> Ids =
1535       Records.getAllDerivedDefinitions("CompatWarningId");
1536 
1537   StringRef PrevComponent = "";
1538   for (auto [I, R] : enumerate(make_pointee_range(Ids))) {
1539     StringRef DiagComponent = R.getValueAsString("Component");
1540     if (!Component.empty() && Component != DiagComponent)
1541       continue;
1542 
1543     StringRef CompatDiagName = R.getValueAsString("Name");
1544     StringRef Diag = R.getValueAsString("Diag");
1545     StringRef DiagPre = R.getValueAsString("DiagPre");
1546     int64_t CXXStdVer = R.getValueAsInt("Std");
1547 
1548     // We don't want to create empty enums since some compilers (including
1549     // Clang) warn about that, so these macros are used to avoid having to
1550     // unconditionally write 'enum {' and '};' in the headers.
1551     if (PrevComponent != DiagComponent) {
1552       if (!PrevComponent.empty())
1553         OS << "DIAG_COMPAT_IDS_END()\n";
1554       OS << "DIAG_COMPAT_IDS_BEGIN()\n";
1555       PrevComponent = DiagComponent;
1556     }
1557 
1558     // FIXME: We sometimes define multiple compat diagnostics with the same
1559     // name, e.g. 'constexpr_body_invalid_stmt' exists for C++14/20/23. It would
1560     // be nice if we could combine all of them into a single compatibility diag
1561     // id.
1562     OS << "DIAG_COMPAT_ID(" << I << ",";
1563     OS << CompatDiagName << "," << CXXStdVer << "," << Diag << "," << DiagPre;
1564     OS << ")\n";
1565   }
1566 
1567   if (!PrevComponent.empty())
1568     OS << "DIAG_COMPAT_IDS_END()\n";
1569 }
1570 
1571 /// ClangDiagsIntefaceEmitter - Emit the diagnostics interface header for
1572 /// a Clang component.
1573 void clang::EmitClangDiagsInterface(llvm::raw_ostream &OS,
1574                                     const std::string &Component) {
1575   if (Component.empty())
1576     PrintFatalError("'-gen-clang-diags-iface' requires a component name");
1577 
1578   std::string ComponentUpper = StringRef(Component).upper();
1579   const char *Comp = Component.c_str();
1580   const char *Upper = ComponentUpper.c_str();
1581 
1582   OS << llvm::format(R"c++(
1583 namespace clang {
1584 namespace diag {
1585 enum {
1586 #define DIAG(ENUM, FLAGS, DEFAULT_MAPPING, DESC, GROUP, SFINAE, NOWERROR,      \
1587              SHOWINSYSHEADER, SHOWINSYSMACRO, DEFERRABLE, CATEGORY)            \
1588   ENUM,
1589 #define %sSTART
1590 #include "clang/Basic/Diagnostic%sKinds.inc"
1591 #undef DIAG
1592   NUM_BUILTIN_%s_DIAGNOSTICS
1593 };
1594 
1595 #define DIAG_ENUM(ENUM_NAME)                                                   \
1596   namespace ENUM_NAME {                                                        \
1597   enum {
1598 #define DIAG_ENUM_ITEM(IDX, NAME) NAME = IDX,
1599 #define DIAG_ENUM_END()                                                        \
1600   }                                                                            \
1601   ;                                                                            \
1602   }
1603 #include "clang/Basic/Diagnostic%sEnums.inc"
1604 #undef DIAG_ENUM_END
1605 #undef DIAG_ENUM_ITEM
1606 #undef DIAG_ENUM
1607 } // end namespace diag
1608 
1609 namespace diag_compat {
1610 #define DIAG_COMPAT_IDS_BEGIN() enum {
1611 #define DIAG_COMPAT_IDS_END()                                                  \
1612   }                                                                            \
1613   ;
1614 #define DIAG_COMPAT_ID(IDX, NAME, ...) NAME = IDX,
1615 #include "clang/Basic/Diagnostic%sCompatIDs.inc"
1616 #undef DIAG_COMPAT_ID
1617 #undef DIAG_COMPAT_IDS_BEGIN
1618 #undef DIAG_COMPAT_IDS_END
1619 } // end namespace diag_compat
1620 } // end namespace clang
1621 )c++",
1622                      Upper, Comp, Upper, Comp, Comp);
1623 }
1624 
1625 /// ClangDiagsEnumsEmitter - The top-level class emits .def files containing
1626 /// declarations of Clang diagnostic enums for selects.
1627 void clang::EmitClangDiagsEnums(const RecordKeeper &Records, raw_ostream &OS,
1628                                 const std::string &Component) {
1629   DiagnosticTextBuilder DiagTextBuilder(Records);
1630   ArrayRef<const Record *> Diags =
1631       Records.getAllDerivedDefinitions("Diagnostic");
1632 
1633   llvm::SmallVector<std::pair<const Record *, std::string>> EnumerationNames;
1634 
1635   for (const Record &R : make_pointee_range(Diags)) {
1636     DiagEnumPrinter::ResultTy Enums = DiagTextBuilder.buildForEnum(&R);
1637 
1638     for (auto &Enumeration : Enums) {
1639       bool ShouldPrint =
1640           Component.empty() || Component == R.getValueAsString("Component");
1641 
1642       auto PreviousByName = llvm::find_if(EnumerationNames, [&](auto &Prev) {
1643         return Prev.second == Enumeration.first;
1644       });
1645 
1646       if (PreviousByName != EnumerationNames.end()) {
1647         PrintError(&R,
1648                    "Duplicate enumeration name '" + Enumeration.first + "'");
1649         PrintNote(PreviousByName->first->getLoc(),
1650                   "Previous diagnostic is here");
1651       }
1652 
1653       EnumerationNames.emplace_back(&R, Enumeration.first);
1654 
1655       if (ShouldPrint)
1656         OS << "DIAG_ENUM(" << Enumeration.first << ")\n";
1657 
1658       llvm::SmallVector<std::string> EnumeratorNames;
1659       for (auto &Enumerator : Enumeration.second) {
1660         if (llvm::is_contained(EnumeratorNames, Enumerator.second))
1661           PrintError(&R,
1662                      "Duplicate enumerator name '" + Enumerator.second + "'");
1663         EnumeratorNames.push_back(Enumerator.second);
1664 
1665         if (ShouldPrint)
1666           OS << "DIAG_ENUM_ITEM(" << Enumerator.first << ", "
1667              << Enumerator.second << ")\n";
1668       }
1669       if (ShouldPrint)
1670         OS << "DIAG_ENUM_END()\n";
1671     }
1672   }
1673 }
1674 
1675 /// ClangDiagsDefsEmitter - The top-level class emits .def files containing
1676 /// declarations of Clang diagnostics.
1677 void clang::EmitClangDiagsDefs(const RecordKeeper &Records, raw_ostream &OS,
1678                                const std::string &Component) {
1679   // Write the #if guard
1680   if (!Component.empty()) {
1681     std::string ComponentName = StringRef(Component).upper();
1682     OS << "#ifdef " << ComponentName << "START\n";
1683     OS << "__" << ComponentName << "START = DIAG_START_" << ComponentName
1684        << ",\n";
1685     OS << "#undef " << ComponentName << "START\n";
1686     OS << "#endif\n\n";
1687   }
1688 
1689   DiagnosticTextBuilder DiagTextBuilder(Records);
1690 
1691   ArrayRef<const Record *> Diags =
1692       Records.getAllDerivedDefinitions("Diagnostic");
1693 
1694   ArrayRef<const Record *> DiagGroups =
1695       Records.getAllDerivedDefinitions("DiagGroup");
1696 
1697   DiagsInGroupTy DiagsInGroup;
1698   groupDiagnostics(Diags, DiagGroups, DiagsInGroup);
1699 
1700   DiagCategoryIDMap CategoryIDs(Records);
1701   DiagGroupParentMap DGParentMap(Records);
1702 
1703   // Compute the set of diagnostics that are in -Wpedantic.
1704   RecordSet DiagsInPedantic;
1705   InferPedantic inferPedantic(DGParentMap, Diags, DiagGroups, DiagsInGroup);
1706   inferPedantic.compute(&DiagsInPedantic, (RecordVec*)nullptr);
1707 
1708   for (const Record &R : make_pointee_range(Diags)) {
1709     // Check if this is an error that is accidentally in a warning
1710     // group.
1711     if (isError(R)) {
1712       if (const auto *Group = dyn_cast<DefInit>(R.getValueInit("Group"))) {
1713         const Record *GroupRec = Group->getDef();
1714         StringRef GroupName = GroupRec->getValueAsString("GroupName");
1715         PrintFatalError(R.getLoc(), "Error " + R.getName() +
1716                       " cannot be in a warning group [" + GroupName + "]");
1717       }
1718     }
1719 
1720     // Check that all remarks have an associated diagnostic group.
1721     if (isRemark(R)) {
1722       if (!isa<DefInit>(R.getValueInit("Group"))) {
1723         PrintFatalError(R.getLoc(), "Error " + R.getName() +
1724                                         " not in any diagnostic group");
1725       }
1726     }
1727 
1728     // Filter by component.
1729     if (!Component.empty() && Component != R.getValueAsString("Component"))
1730       continue;
1731 
1732     // Validate diagnostic wording for common issues.
1733     verifyDiagnosticWording(R);
1734 
1735     OS << "DIAG(" << R.getName() << ", ";
1736     OS << R.getValueAsDef("Class")->getName();
1737     OS << ", (unsigned)diag::Severity::"
1738        << R.getValueAsDef("DefaultSeverity")->getValueAsString("Name");
1739 
1740     // Description string.
1741     OS << ", \"";
1742     OS.write_escaped(DiagTextBuilder.buildForDefinition(&R)) << '"';
1743 
1744     // Warning group associated with the diagnostic. This is stored as an index
1745     // into the alphabetically sorted warning group table.
1746     if (const auto *DI = dyn_cast<DefInit>(R.getValueInit("Group"))) {
1747       auto I = DiagsInGroup.find(DI->getDef()->getValueAsString("GroupName"));
1748       assert(I != DiagsInGroup.end());
1749       OS << ", " << I->second.IDNo;
1750     } else if (DiagsInPedantic.count(&R)) {
1751       auto I = DiagsInGroup.find("pedantic");
1752       assert(I != DiagsInGroup.end() && "pedantic group not defined");
1753       OS << ", " << I->second.IDNo;
1754     } else {
1755       OS << ", 0";
1756     }
1757 
1758     // SFINAE response.
1759     OS << ", " << R.getValueAsDef("SFINAE")->getName();
1760 
1761     // Default warning has no Werror bit.
1762     if (R.getValueAsBit("WarningNoWerror"))
1763       OS << ", true";
1764     else
1765       OS << ", false";
1766 
1767     if (R.getValueAsBit("ShowInSystemHeader"))
1768       OS << ", true";
1769     else
1770       OS << ", false";
1771 
1772     if (R.getValueAsBit("ShowInSystemMacro"))
1773       OS << ", true";
1774     else
1775       OS << ", false";
1776 
1777     if (R.getValueAsBit("Deferrable"))
1778       OS << ", true";
1779     else
1780       OS << ", false";
1781 
1782     // Category number.
1783     OS << ", " << CategoryIDs.getID(getDiagnosticCategory(&R, DGParentMap));
1784     OS << ")\n";
1785   }
1786 }
1787 
1788 //===----------------------------------------------------------------------===//
1789 // Warning Group Tables generation
1790 //===----------------------------------------------------------------------===//
1791 
1792 static std::string getDiagCategoryEnum(StringRef name) {
1793   if (name.empty())
1794     return "DiagCat_None";
1795   SmallString<256> enumName = StringRef("DiagCat_");
1796   for (char C : name)
1797     enumName += isalnum(C) ? C : '_';
1798   return std::string(enumName);
1799 }
1800 
1801 /// Emit the array of diagnostic subgroups.
1802 ///
1803 /// The array of diagnostic subgroups contains for each group a list of its
1804 /// subgroups. The individual lists are separated by '-1'. Groups with no
1805 /// subgroups are skipped.
1806 ///
1807 /// \code
1808 ///   static const int16_t DiagSubGroups[] = {
1809 ///     /* Empty */ -1,
1810 ///     /* DiagSubGroup0 */ 142, -1,
1811 ///     /* DiagSubGroup13 */ 265, 322, 399, -1
1812 ///   }
1813 /// \endcode
1814 ///
1815 static void emitDiagSubGroups(DiagsInGroupTy &DiagsInGroup,
1816                               RecordVec &GroupsInPedantic, raw_ostream &OS) {
1817   OS << "static const int16_t DiagSubGroups[] = {\n"
1818      << "  /* Empty */ -1,\n";
1819   for (auto const &[Name, Group] : DiagsInGroup) {
1820     const bool IsPedantic = Name == "pedantic";
1821     const std::vector<StringRef> &SubGroups = Group.SubGroups;
1822     if (!SubGroups.empty() || (IsPedantic && !GroupsInPedantic.empty())) {
1823       OS << "  /* DiagSubGroup" << Group.IDNo << " */ ";
1824       for (StringRef SubGroup : SubGroups) {
1825         auto RI = DiagsInGroup.find(SubGroup);
1826         assert(RI != DiagsInGroup.end() && "Referenced without existing?");
1827         OS << RI->second.IDNo << ", ";
1828       }
1829       // Emit the groups implicitly in "pedantic".
1830       if (IsPedantic) {
1831         for (auto const &Group : GroupsInPedantic) {
1832           StringRef GroupName = Group->getValueAsString("GroupName");
1833           auto RI = DiagsInGroup.find(GroupName);
1834           assert(RI != DiagsInGroup.end() && "Referenced without existing?");
1835           OS << RI->second.IDNo << ", ";
1836         }
1837       }
1838 
1839       OS << "-1,\n";
1840     }
1841   }
1842   OS << "};\n\n";
1843 }
1844 
1845 /// Emit the list of diagnostic arrays.
1846 ///
1847 /// This data structure is a large array that contains itself arrays of varying
1848 /// size. Each array represents a list of diagnostics. The different arrays are
1849 /// separated by the value '-1'.
1850 ///
1851 /// \code
1852 ///   static const int16_t DiagArrays[] = {
1853 ///     /* Empty */ -1,
1854 ///     /* DiagArray1 */ diag::warn_pragma_message,
1855 ///                      -1,
1856 ///     /* DiagArray2 */ diag::warn_abs_too_small,
1857 ///                      diag::warn_unsigned_abs,
1858 ///                      diag::warn_wrong_absolute_value_type,
1859 ///                      -1
1860 ///   };
1861 /// \endcode
1862 ///
1863 static void emitDiagArrays(DiagsInGroupTy &DiagsInGroup,
1864                            RecordVec &DiagsInPedantic, raw_ostream &OS) {
1865   OS << "static const int16_t DiagArrays[] = {\n"
1866      << "  /* Empty */ -1,\n";
1867   for (const auto &[Name, Group] : DiagsInGroup) {
1868     const bool IsPedantic = Name == "pedantic";
1869 
1870     const std::vector<const Record *> &V = Group.DiagsInGroup;
1871     if (!V.empty() || (IsPedantic && !DiagsInPedantic.empty())) {
1872       OS << "  /* DiagArray" << Group.IDNo << " */ ";
1873       for (auto *Record : V)
1874         OS << "diag::" << Record->getName() << ", ";
1875       // Emit the diagnostics implicitly in "pedantic".
1876       if (IsPedantic) {
1877         for (auto const &Diag : DiagsInPedantic)
1878           OS << "diag::" << Diag->getName() << ", ";
1879       }
1880       OS << "-1,\n";
1881     }
1882   }
1883   OS << "};\n\n";
1884 }
1885 
1886 /// Emit a list of group names.
1887 ///
1888 /// This creates an `llvm::StringTable` of all the diagnostic group names.
1889 static void emitDiagGroupNames(const StringToOffsetTable &GroupNames,
1890                                raw_ostream &OS) {
1891   GroupNames.EmitStringTableDef(OS, "DiagGroupNames");
1892   OS << "\n";
1893 }
1894 
1895 /// Emit diagnostic arrays and related data structures.
1896 ///
1897 /// This creates the actual diagnostic array, an array of diagnostic subgroups
1898 /// and an array of subgroup names.
1899 ///
1900 /// \code
1901 ///  #ifdef GET_DIAG_ARRAYS
1902 ///     static const int16_t DiagArrays[];
1903 ///     static const int16_t DiagSubGroups[];
1904 ///     static constexpr llvm::StringTable DiagGroupNames;
1905 ///  #endif
1906 ///  \endcode
1907 static void emitAllDiagArrays(DiagsInGroupTy &DiagsInGroup,
1908                               RecordVec &DiagsInPedantic,
1909                               RecordVec &GroupsInPedantic,
1910                               const StringToOffsetTable &GroupNames,
1911                               raw_ostream &OS) {
1912   OS << "\n#ifdef GET_DIAG_ARRAYS\n";
1913   emitDiagArrays(DiagsInGroup, DiagsInPedantic, OS);
1914   emitDiagSubGroups(DiagsInGroup, GroupsInPedantic, OS);
1915   emitDiagGroupNames(GroupNames, OS);
1916   OS << "#endif // GET_DIAG_ARRAYS\n\n";
1917 }
1918 
1919 /// Emit diagnostic table.
1920 ///
1921 /// The table is sorted by the name of the diagnostic group. Each element
1922 /// consists of the name of the diagnostic group (given as offset in the
1923 /// group name table), a reference to a list of diagnostics (optional) and a
1924 /// reference to a set of subgroups (optional).
1925 ///
1926 /// \code
1927 /// #ifdef GET_DIAG_TABLE
1928 ///  {/* abi */              159, /* DiagArray11 */ 19, /* Empty */          0},
1929 ///  {/* aggregate-return */ 180, /* Empty */        0, /* Empty */          0},
1930 ///  {/* all */              197, /* Empty */        0, /* DiagSubGroup13 */ 3},
1931 ///  {/* deprecated */       1981,/* DiagArray1 */ 348, /* DiagSubGroup3 */  9},
1932 /// #endif
1933 /// \endcode
1934 static void emitDiagTable(DiagsInGroupTy &DiagsInGroup,
1935                           RecordVec &DiagsInPedantic,
1936                           RecordVec &GroupsInPedantic,
1937                           const StringToOffsetTable &GroupNames,
1938                           raw_ostream &OS) {
1939   unsigned MaxLen = 0;
1940 
1941   for (auto const &I: DiagsInGroup)
1942     MaxLen = std::max(MaxLen, (unsigned)I.first.size());
1943 
1944   OS << "\n#ifdef DIAG_ENTRY\n";
1945   unsigned SubGroupIndex = 1, DiagArrayIndex = 1;
1946   for (auto const &[Name, GroupInfo] : DiagsInGroup) {
1947     // Group option string.
1948     OS << "DIAG_ENTRY(";
1949     OS << GroupInfo.GroupName << " /* ";
1950 
1951     if (Name.find_first_not_of("abcdefghijklmnopqrstuvwxyz"
1952                                "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1953                                "0123456789!@#$%^*-+=:?") != std::string::npos)
1954       PrintFatalError("Invalid character in diagnostic group '" + Name + "'");
1955     OS << Name << " */, ";
1956     OS << *GroupNames.GetStringOffset(Name) << ", ";
1957 
1958     // Special handling for 'pedantic'.
1959     const bool IsPedantic = Name == "pedantic";
1960 
1961     // Diagnostics in the group.
1962     const std::vector<const Record *> &V = GroupInfo.DiagsInGroup;
1963     const bool hasDiags =
1964         !V.empty() || (IsPedantic && !DiagsInPedantic.empty());
1965     if (hasDiags) {
1966       OS << "/* DiagArray" << GroupInfo.IDNo << " */ " << DiagArrayIndex
1967          << ", ";
1968       if (IsPedantic)
1969         DiagArrayIndex += DiagsInPedantic.size();
1970       DiagArrayIndex += V.size() + 1;
1971     } else {
1972       OS << "0, ";
1973     }
1974 
1975     // Subgroups.
1976     const std::vector<StringRef> &SubGroups = GroupInfo.SubGroups;
1977     const bool hasSubGroups =
1978         !SubGroups.empty() || (IsPedantic && !GroupsInPedantic.empty());
1979     if (hasSubGroups) {
1980       OS << "/* DiagSubGroup" << GroupInfo.IDNo << " */ " << SubGroupIndex
1981          << ", ";
1982       if (IsPedantic)
1983         SubGroupIndex += GroupsInPedantic.size();
1984       SubGroupIndex += SubGroups.size() + 1;
1985     } else {
1986       OS << "0, ";
1987     }
1988 
1989     std::string Documentation = GroupInfo.Defs.back()
1990                                     ->getValue("Documentation")
1991                                     ->getValue()
1992                                     ->getAsUnquotedString();
1993 
1994     OS << "R\"(" << StringRef(Documentation).trim() << ")\"";
1995 
1996     OS << ")\n";
1997   }
1998   OS << "#endif // DIAG_ENTRY\n\n";
1999 }
2000 
2001 /// Emit the table of diagnostic categories.
2002 ///
2003 /// The table has the form of macro calls that have two parameters. The
2004 /// category's name as well as an enum that represents the category. The
2005 /// table can be used by defining the macro 'CATEGORY' and including this
2006 /// table right after.
2007 ///
2008 /// \code
2009 /// #ifdef GET_CATEGORY_TABLE
2010 ///   CATEGORY("Semantic Issue", DiagCat_Semantic_Issue)
2011 ///   CATEGORY("Lambda Issue", DiagCat_Lambda_Issue)
2012 /// #endif
2013 /// \endcode
2014 static void emitCategoryTable(const RecordKeeper &Records, raw_ostream &OS) {
2015   DiagCategoryIDMap CategoriesByID(Records);
2016   OS << "\n#ifdef GET_CATEGORY_TABLE\n";
2017   for (auto const &C : CategoriesByID)
2018     OS << "CATEGORY(\"" << C << "\", " << getDiagCategoryEnum(C) << ")\n";
2019   OS << "#endif // GET_CATEGORY_TABLE\n\n";
2020 }
2021 
2022 void clang::EmitClangDiagGroups(const RecordKeeper &Records, raw_ostream &OS) {
2023   // Compute a mapping from a DiagGroup to all of its parents.
2024   DiagGroupParentMap DGParentMap(Records);
2025 
2026   ArrayRef<const Record *> Diags =
2027       Records.getAllDerivedDefinitions("Diagnostic");
2028 
2029   ArrayRef<const Record *> DiagGroups =
2030       Records.getAllDerivedDefinitions("DiagGroup");
2031 
2032   DiagsInGroupTy DiagsInGroup;
2033   groupDiagnostics(Diags, DiagGroups, DiagsInGroup);
2034 
2035   // All extensions are implicitly in the "pedantic" group.  Record the
2036   // implicit set of groups in the "pedantic" group, and use this information
2037   // later when emitting the group information for Pedantic.
2038   RecordVec DiagsInPedantic;
2039   RecordVec GroupsInPedantic;
2040   InferPedantic inferPedantic(DGParentMap, Diags, DiagGroups, DiagsInGroup);
2041   inferPedantic.compute(&DiagsInPedantic, &GroupsInPedantic);
2042 
2043   StringToOffsetTable GroupNames;
2044   for (const auto &[Name, Group] : DiagsInGroup) {
2045     GroupNames.GetOrAddStringOffset(Name);
2046   }
2047 
2048   emitAllDiagArrays(DiagsInGroup, DiagsInPedantic, GroupsInPedantic, GroupNames,
2049                     OS);
2050   emitDiagTable(DiagsInGroup, DiagsInPedantic, GroupsInPedantic, GroupNames,
2051                 OS);
2052   emitCategoryTable(Records, OS);
2053 }
2054 
2055 //===----------------------------------------------------------------------===//
2056 // Diagnostic name index generation
2057 //===----------------------------------------------------------------------===//
2058 
2059 void clang::EmitClangDiagsIndexName(const RecordKeeper &Records,
2060                                     raw_ostream &OS) {
2061   std::vector<const Record *> Diags =
2062       Records.getAllDerivedDefinitions("Diagnostic");
2063 
2064   sort(Diags, [](const Record *LHS, const Record *RHS) {
2065     return LHS->getName() < RHS->getName();
2066   });
2067 
2068   for (const Record *Elem : Diags)
2069     OS << "DIAG_NAME_INDEX(" << Elem->getName() << ")\n";
2070 }
2071 
2072 //===----------------------------------------------------------------------===//
2073 // Diagnostic documentation generation
2074 //===----------------------------------------------------------------------===//
2075 
2076 namespace docs {
2077 namespace {
2078 
2079 bool isRemarkGroup(const Record *DiagGroup,
2080                    const DiagsInGroupTy &DiagsInGroup) {
2081   bool AnyRemarks = false, AnyNonRemarks = false;
2082 
2083   std::function<void(StringRef)> Visit = [&](StringRef GroupName) {
2084     auto &GroupInfo = DiagsInGroup.find(GroupName)->second;
2085     for (const Record *Diag : GroupInfo.DiagsInGroup)
2086       (isRemark(*Diag) ? AnyRemarks : AnyNonRemarks) = true;
2087     for (StringRef Name : GroupInfo.SubGroups)
2088       Visit(Name);
2089   };
2090   Visit(DiagGroup->getValueAsString("GroupName"));
2091 
2092   if (AnyRemarks && AnyNonRemarks)
2093     PrintFatalError(
2094         DiagGroup->getLoc(),
2095         "Diagnostic group contains both remark and non-remark diagnostics");
2096   return AnyRemarks;
2097 }
2098 
2099 std::string getDefaultSeverity(const Record *Diag) {
2100   return std::string(
2101       Diag->getValueAsDef("DefaultSeverity")->getValueAsString("Name"));
2102 }
2103 
2104 std::set<std::string> getDefaultSeverities(const Record *DiagGroup,
2105                                            const DiagsInGroupTy &DiagsInGroup) {
2106   std::set<std::string> States;
2107 
2108   std::function<void(StringRef)> Visit = [&](StringRef GroupName) {
2109     auto &GroupInfo = DiagsInGroup.find(GroupName)->second;
2110     for (const Record *Diag : GroupInfo.DiagsInGroup)
2111       States.insert(getDefaultSeverity(Diag));
2112     for (const auto &Name : GroupInfo.SubGroups)
2113       Visit(Name);
2114   };
2115   Visit(DiagGroup->getValueAsString("GroupName"));
2116   return States;
2117 }
2118 
2119 void writeHeader(StringRef Str, raw_ostream &OS, char Kind = '-') {
2120   OS << Str << "\n" << std::string(Str.size(), Kind) << "\n";
2121 }
2122 
2123 void writeDiagnosticText(DiagnosticTextBuilder &Builder, const Record *R,
2124                          StringRef Role, raw_ostream &OS) {
2125   StringRef Text = R->getValueAsString("Summary");
2126   if (Text == "%0")
2127     OS << "The text of this diagnostic is not controlled by Clang.\n\n";
2128   else {
2129     std::vector<std::string> Out = Builder.buildForDocumentation(Role, R);
2130     for (auto &Line : Out)
2131       OS << Line << "\n";
2132     OS << "\n";
2133   }
2134 }
2135 
2136 }  // namespace
2137 }  // namespace docs
2138 
2139 void clang::EmitClangDiagDocs(const RecordKeeper &Records, raw_ostream &OS) {
2140   using namespace docs;
2141 
2142   // Get the documentation introduction paragraph.
2143   const Record *Documentation = Records.getDef("GlobalDocumentation");
2144   if (!Documentation) {
2145     PrintFatalError("The Documentation top-level definition is missing, "
2146                     "no documentation will be generated.");
2147     return;
2148   }
2149 
2150   OS << Documentation->getValueAsString("Intro") << "\n";
2151 
2152   DiagnosticTextBuilder Builder(Records);
2153 
2154   ArrayRef<const Record *> Diags =
2155       Records.getAllDerivedDefinitions("Diagnostic");
2156 
2157   std::vector<const Record *> DiagGroups =
2158       Records.getAllDerivedDefinitions("DiagGroup");
2159   sort(DiagGroups, diagGroupBeforeByName);
2160 
2161   DiagGroupParentMap DGParentMap(Records);
2162 
2163   DiagsInGroupTy DiagsInGroup;
2164   groupDiagnostics(Diags, DiagGroups, DiagsInGroup);
2165 
2166   // Compute the set of diagnostics that are in -Wpedantic.
2167   {
2168     RecordSet DiagsInPedanticSet;
2169     RecordSet GroupsInPedanticSet;
2170     InferPedantic inferPedantic(DGParentMap, Diags, DiagGroups, DiagsInGroup);
2171     inferPedantic.compute(&DiagsInPedanticSet, &GroupsInPedanticSet);
2172     auto &PedDiags = DiagsInGroup["pedantic"];
2173     // Put the diagnostics into a deterministic order.
2174     RecordVec DiagsInPedantic(DiagsInPedanticSet.begin(),
2175                               DiagsInPedanticSet.end());
2176     RecordVec GroupsInPedantic(GroupsInPedanticSet.begin(),
2177                                GroupsInPedanticSet.end());
2178     sort(DiagsInPedantic, beforeThanCompare);
2179     sort(GroupsInPedantic, beforeThanCompare);
2180     PedDiags.DiagsInGroup.insert(PedDiags.DiagsInGroup.end(),
2181                                  DiagsInPedantic.begin(),
2182                                  DiagsInPedantic.end());
2183     for (auto *Group : GroupsInPedantic)
2184       PedDiags.SubGroups.push_back(Group->getValueAsString("GroupName"));
2185   }
2186 
2187   // FIXME: Write diagnostic categories and link to diagnostic groups in each.
2188 
2189   // Write out the diagnostic groups.
2190   for (const Record *G : DiagGroups) {
2191     bool IsRemarkGroup = isRemarkGroup(G, DiagsInGroup);
2192     auto &GroupInfo = DiagsInGroup[G->getValueAsString("GroupName")];
2193     bool IsSynonym = GroupInfo.DiagsInGroup.empty() &&
2194                      GroupInfo.SubGroups.size() == 1;
2195 
2196     writeHeader(((IsRemarkGroup ? "-R" : "-W") +
2197                     G->getValueAsString("GroupName")).str(),
2198                 OS);
2199 
2200     if (!IsSynonym) {
2201       // FIXME: Ideally, all the diagnostics in a group should have the same
2202       // default state, but that is not currently the case.
2203       auto DefaultSeverities = getDefaultSeverities(G, DiagsInGroup);
2204       if (!DefaultSeverities.empty() && !DefaultSeverities.count("Ignored")) {
2205         bool AnyNonErrors = DefaultSeverities.count("Warning") ||
2206                             DefaultSeverities.count("Remark");
2207         if (!AnyNonErrors)
2208           OS << "This diagnostic is an error by default, but the flag ``-Wno-"
2209              << G->getValueAsString("GroupName") << "`` can be used to disable "
2210              << "the error.\n\n";
2211         else
2212           OS << "This diagnostic is enabled by default.\n\n";
2213       } else if (DefaultSeverities.size() > 1) {
2214         OS << "Some of the diagnostics controlled by this flag are enabled "
2215            << "by default.\n\n";
2216       }
2217     }
2218 
2219     if (!GroupInfo.SubGroups.empty()) {
2220       if (IsSynonym)
2221         OS << "Synonym for ";
2222       else if (GroupInfo.DiagsInGroup.empty())
2223         OS << "Controls ";
2224       else
2225         OS << "Also controls ";
2226 
2227       sort(GroupInfo.SubGroups);
2228       ListSeparator LS;
2229       for (StringRef Name : GroupInfo.SubGroups)
2230         OS << LS << "`" << (IsRemarkGroup ? "-R" : "-W") << Name << "`_";
2231       OS << ".\n\n";
2232     }
2233 
2234     if (!GroupInfo.DiagsInGroup.empty()) {
2235       OS << "**Diagnostic text:**\n\n";
2236       for (const Record *D : GroupInfo.DiagsInGroup) {
2237         auto Severity = getDefaultSeverity(D);
2238         Severity[0] = tolower(Severity[0]);
2239         if (Severity == "ignored")
2240           Severity = IsRemarkGroup ? "remark" : "warning";
2241 
2242         writeDiagnosticText(Builder, D, Severity, OS);
2243       }
2244     }
2245 
2246     auto Doc = G->getValueAsString("Documentation");
2247     if (!Doc.empty())
2248       OS << Doc;
2249     else if (GroupInfo.SubGroups.empty() && GroupInfo.DiagsInGroup.empty())
2250       OS << "This diagnostic flag exists for GCC compatibility, and has no "
2251             "effect in Clang.\n";
2252     OS << "\n";
2253   }
2254 }
2255