xref: /freebsd/contrib/llvm-project/llvm/lib/Support/SpecialCaseList.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a utility class for instrumentation passes (like AddressSanitizer
10 // or ThreadSanitizer) to avoid instrumenting some functions or global
11 // variables, or to instrument some functions or global variables in a specific
12 // way, based on a user-supplied list.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Support/SpecialCaseList.h"
17 #include "llvm/Support/LineIterator.h"
18 #include "llvm/Support/MemoryBuffer.h"
19 #include "llvm/Support/VirtualFileSystem.h"
20 #include <stdio.h>
21 #include <string>
22 #include <system_error>
23 #include <utility>
24 
25 namespace llvm {
26 
27 Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
28                                        bool UseGlobs) {
29   if (Pattern.empty())
30     return createStringError(errc::invalid_argument,
31                              Twine("Supplied ") +
32                                  (UseGlobs ? "glob" : "regex") + " was blank");
33 
34   if (!UseGlobs) {
35     // Replace * with .*
36     auto Regexp = Pattern.str();
37     for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
38          pos += strlen(".*")) {
39       Regexp.replace(pos, strlen("*"), ".*");
40     }
41 
42     Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
43 
44     // Check that the regexp is valid.
45     Regex CheckRE(Regexp);
46     std::string REError;
47     if (!CheckRE.isValid(REError))
48       return createStringError(errc::invalid_argument, REError);
49 
50     RegExes.emplace_back(std::make_pair(
51         std::make_unique<Regex>(std::move(CheckRE)), LineNumber));
52 
53     return Error::success();
54   }
55 
56   auto [It, DidEmplace] = Globs.try_emplace(Pattern);
57   if (DidEmplace) {
58     // We must be sure to use the string in the map rather than the provided
59     // reference which could be destroyed before match() is called
60     Pattern = It->getKey();
61     auto &Pair = It->getValue();
62     if (auto Err = GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024)
63                        .moveInto(Pair.first))
64       return Err;
65     Pair.second = LineNumber;
66   }
67   return Error::success();
68 }
69 
70 unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
71   for (const auto &[Pattern, Pair] : Globs)
72     if (Pair.first.match(Query))
73       return Pair.second;
74   for (const auto &[Regex, LineNumber] : RegExes)
75     if (Regex->match(Query))
76       return LineNumber;
77   return 0;
78 }
79 
80 // TODO: Refactor this to return Expected<...>
81 std::unique_ptr<SpecialCaseList>
82 SpecialCaseList::create(const std::vector<std::string> &Paths,
83                         llvm::vfs::FileSystem &FS, std::string &Error) {
84   std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
85   if (SCL->createInternal(Paths, FS, Error))
86     return SCL;
87   return nullptr;
88 }
89 
90 std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
91                                                          std::string &Error) {
92   std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
93   if (SCL->createInternal(MB, Error))
94     return SCL;
95   return nullptr;
96 }
97 
98 std::unique_ptr<SpecialCaseList>
99 SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
100                              llvm::vfs::FileSystem &FS) {
101   std::string Error;
102   if (auto SCL = create(Paths, FS, Error))
103     return SCL;
104   report_fatal_error(Twine(Error));
105 }
106 
107 bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
108                                      vfs::FileSystem &VFS, std::string &Error) {
109   for (const auto &Path : Paths) {
110     ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
111         VFS.getBufferForFile(Path);
112     if (std::error_code EC = FileOrErr.getError()) {
113       Error = (Twine("can't open file '") + Path + "': " + EC.message()).str();
114       return false;
115     }
116     std::string ParseError;
117     if (!parse(FileOrErr.get().get(), ParseError)) {
118       Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
119       return false;
120     }
121   }
122   return true;
123 }
124 
125 bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
126                                      std::string &Error) {
127   if (!parse(MB, Error))
128     return false;
129   return true;
130 }
131 
132 Expected<SpecialCaseList::Section *>
133 SpecialCaseList::addSection(StringRef SectionStr, unsigned LineNo,
134                             bool UseGlobs) {
135   auto [It, DidEmplace] = Sections.try_emplace(SectionStr);
136   auto &Section = It->getValue();
137   if (DidEmplace)
138     if (auto Err = Section.SectionMatcher->insert(SectionStr, LineNo, UseGlobs))
139       return createStringError(errc::invalid_argument,
140                                "malformed section at line " + Twine(LineNo) +
141                                    ": '" + SectionStr +
142                                    "': " + toString(std::move(Err)));
143   return &Section;
144 }
145 
146 bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
147   Section *CurrentSection;
148   if (auto Err = addSection("*", 1).moveInto(CurrentSection)) {
149     Error = toString(std::move(Err));
150     return false;
151   }
152 
153   // In https://reviews.llvm.org/D154014 we added glob support and planned to
154   // remove regex support in patterns. We temporarily support the original
155   // behavior using regexes if "#!special-case-list-v1" is the first line of the
156   // file. For more details, see
157   // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
158   bool UseGlobs = !MB->getBuffer().starts_with("#!special-case-list-v1\n");
159 
160   for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
161        !LineIt.is_at_eof(); LineIt++) {
162     unsigned LineNo = LineIt.line_number();
163     StringRef Line = LineIt->trim();
164     if (Line.empty())
165       continue;
166 
167     // Save section names
168     if (Line.starts_with("[")) {
169       if (!Line.ends_with("]")) {
170         Error =
171             ("malformed section header on line " + Twine(LineNo) + ": " + Line)
172                 .str();
173         return false;
174       }
175 
176       if (auto Err = addSection(Line.drop_front().drop_back(), LineNo, UseGlobs)
177                          .moveInto(CurrentSection)) {
178         Error = toString(std::move(Err));
179         return false;
180       }
181       continue;
182     }
183 
184     // Get our prefix and unparsed glob.
185     auto [Prefix, Postfix] = Line.split(":");
186     if (Postfix.empty()) {
187       // Missing ':' in the line.
188       Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'").str();
189       return false;
190     }
191 
192     auto [Pattern, Category] = Postfix.split("=");
193     auto &Entry = CurrentSection->Entries[Prefix][Category];
194     if (auto Err = Entry.insert(Pattern, LineNo, UseGlobs)) {
195       Error =
196           (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
197            Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))
198               .str();
199       return false;
200     }
201   }
202   return true;
203 }
204 
205 SpecialCaseList::~SpecialCaseList() = default;
206 
207 bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
208                                 StringRef Query, StringRef Category) const {
209   return inSectionBlame(Section, Prefix, Query, Category);
210 }
211 
212 unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
213                                          StringRef Query,
214                                          StringRef Category) const {
215   for (const auto &It : Sections) {
216     const auto &S = It.getValue();
217     if (S.SectionMatcher->match(Section)) {
218       unsigned Blame = inSectionBlame(S.Entries, Prefix, Query, Category);
219       if (Blame)
220         return Blame;
221     }
222   }
223   return 0;
224 }
225 
226 unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries,
227                                          StringRef Prefix, StringRef Query,
228                                          StringRef Category) const {
229   SectionEntries::const_iterator I = Entries.find(Prefix);
230   if (I == Entries.end()) return 0;
231   StringMap<Matcher>::const_iterator II = I->second.find(Category);
232   if (II == I->second.end()) return 0;
233 
234   return II->getValue().match(Query);
235 }
236 
237 } // namespace llvm
238