xref: /freebsd/contrib/llvm-project/lldb/source/Utility/FileSpec.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- FileSpec.cpp ------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/FileSpec.h"
10 #include "lldb/Utility/RegularExpression.h"
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/ErrorOr.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/Program.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include "llvm/TargetParser/Triple.h"
23 
24 #include <algorithm>
25 #include <optional>
26 #include <system_error>
27 #include <vector>
28 
29 #include <cassert>
30 #include <climits>
31 #include <cstdio>
32 #include <cstring>
33 
34 using namespace lldb;
35 using namespace lldb_private;
36 
37 namespace {
38 
GetNativeStyle()39 static constexpr FileSpec::Style GetNativeStyle() {
40 #if defined(_WIN32)
41   return FileSpec::Style::windows;
42 #else
43   return FileSpec::Style::posix;
44 #endif
45 }
46 
PathStyleIsPosix(FileSpec::Style style)47 bool PathStyleIsPosix(FileSpec::Style style) {
48   return llvm::sys::path::is_style_posix(style);
49 }
50 
GetPathSeparators(FileSpec::Style style)51 const char *GetPathSeparators(FileSpec::Style style) {
52   return llvm::sys::path::get_separator(style).data();
53 }
54 
GetPreferredPathSeparator(FileSpec::Style style)55 char GetPreferredPathSeparator(FileSpec::Style style) {
56   return GetPathSeparators(style)[0];
57 }
58 
Denormalize(llvm::SmallVectorImpl<char> & path,FileSpec::Style style)59 void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) {
60   if (PathStyleIsPosix(style))
61     return;
62 
63   llvm::replace(path, '/', '\\');
64 }
65 
66 } // end anonymous namespace
67 
FileSpec()68 FileSpec::FileSpec() : m_style(GetNativeStyle()) {}
69 
70 // Default constructor that can take an optional full path to a file on disk.
FileSpec(llvm::StringRef path,Style style)71 FileSpec::FileSpec(llvm::StringRef path, Style style) : m_style(style) {
72   SetFile(path, style);
73 }
74 
FileSpec(llvm::StringRef path,const llvm::Triple & triple)75 FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &triple)
76     : FileSpec{path, triple.isOSWindows() ? Style::windows : Style::posix} {}
77 
78 namespace {
79 /// Safely get a character at the specified index.
80 ///
81 /// \param[in] path
82 ///     A full, partial, or relative path to a file.
83 ///
84 /// \param[in] i
85 ///     An index into path which may or may not be valid.
86 ///
87 /// \return
88 ///   The character at index \a i if the index is valid, or 0 if
89 ///   the index is not valid.
safeCharAtIndex(const llvm::StringRef & path,size_t i)90 inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) {
91   if (i < path.size())
92     return path[i];
93   return 0;
94 }
95 
96 /// Check if a path needs to be normalized.
97 ///
98 /// Check if a path needs to be normalized. We currently consider a
99 /// path to need normalization if any of the following are true
100 ///  - path contains "/./"
101 ///  - path contains "/../"
102 ///  - path contains "//"
103 ///  - path ends with "/"
104 /// Paths that start with "./" or with "../" are not considered to
105 /// need normalization since we aren't trying to resolve the path,
106 /// we are just trying to remove redundant things from the path.
107 ///
108 /// \param[in] path
109 ///     A full, partial, or relative path to a file.
110 ///
111 /// \return
112 ///   Returns \b true if the path needs to be normalized.
needsNormalization(const llvm::StringRef & path)113 bool needsNormalization(const llvm::StringRef &path) {
114   if (path.empty())
115     return false;
116   // We strip off leading "." values so these paths need to be normalized
117   if (path[0] == '.')
118     return true;
119   for (auto i = path.find_first_of("\\/"); i != llvm::StringRef::npos;
120        i = path.find_first_of("\\/", i + 1)) {
121     const auto next = safeCharAtIndex(path, i+1);
122     switch (next) {
123       case 0:
124         // path separator char at the end of the string which should be
125         // stripped unless it is the one and only character
126         return i > 0;
127       case '/':
128       case '\\':
129         // two path separator chars in the middle of a path needs to be
130         // normalized
131         if (i > 0)
132           return true;
133         ++i;
134         break;
135 
136       case '.': {
137           const auto next_next = safeCharAtIndex(path, i+2);
138           switch (next_next) {
139             default: break;
140             case 0: return true; // ends with "/."
141             case '/':
142             case '\\':
143               return true; // contains "/./"
144             case '.': {
145               const auto next_next_next = safeCharAtIndex(path, i+3);
146               switch (next_next_next) {
147                 default: break;
148                 case 0: return true; // ends with "/.."
149                 case '/':
150                 case '\\':
151                   return true; // contains "/../"
152               }
153               break;
154             }
155           }
156         }
157         break;
158 
159       default:
160         break;
161     }
162   }
163   return false;
164 }
165 
166 
167 }
168 
SetFile(llvm::StringRef pathname)169 void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(pathname, m_style); }
170 
171 // Update the contents of this object with a new path. The path will be split
172 // up into a directory and filename and stored as uniqued string values for
173 // quick comparison and efficient memory usage.
SetFile(llvm::StringRef pathname,Style style)174 void FileSpec::SetFile(llvm::StringRef pathname, Style style) {
175   Clear();
176   m_style = (style == Style::native) ? GetNativeStyle() : style;
177 
178   if (pathname.empty())
179     return;
180 
181   llvm::SmallString<128> resolved(pathname);
182 
183   // Normalize the path by removing ".", ".." and other redundant components.
184   if (needsNormalization(resolved))
185     llvm::sys::path::remove_dots(resolved, true, m_style);
186 
187   // Normalize back slashes to forward slashes
188   if (m_style == Style::windows)
189     llvm::replace(resolved, '\\', '/');
190 
191   if (resolved.empty()) {
192     // If we have no path after normalization set the path to the current
193     // directory. This matches what python does and also a few other path
194     // utilities.
195     m_filename.SetString(".");
196     return;
197   }
198 
199   // Split path into filename and directory. We rely on the underlying char
200   // pointer to be nullptr when the components are empty.
201   llvm::StringRef filename = llvm::sys::path::filename(resolved, m_style);
202   if(!filename.empty())
203     m_filename.SetString(filename);
204 
205   llvm::StringRef directory = llvm::sys::path::parent_path(resolved, m_style);
206   if(!directory.empty())
207     m_directory.SetString(directory);
208 }
209 
SetFile(llvm::StringRef path,const llvm::Triple & triple)210 void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &triple) {
211   return SetFile(path, triple.isOSWindows() ? Style::windows : Style::posix);
212 }
213 
214 // Convert to pointer operator. This allows code to check any FileSpec objects
215 // to see if they contain anything valid using code such as:
216 //
217 //  if (file_spec)
218 //  {}
operator bool() const219 FileSpec::operator bool() const { return m_filename || m_directory; }
220 
221 // Logical NOT operator. This allows code to check any FileSpec objects to see
222 // if they are invalid using code such as:
223 //
224 //  if (!file_spec)
225 //  {}
operator !() const226 bool FileSpec::operator!() const { return !m_directory && !m_filename; }
227 
DirectoryEquals(const FileSpec & rhs) const228 bool FileSpec::DirectoryEquals(const FileSpec &rhs) const {
229   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
230   return ConstString::Equals(m_directory, rhs.m_directory, case_sensitive);
231 }
232 
FileEquals(const FileSpec & rhs) const233 bool FileSpec::FileEquals(const FileSpec &rhs) const {
234   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
235   return ConstString::Equals(m_filename, rhs.m_filename, case_sensitive);
236 }
237 
238 // Equal to operator
operator ==(const FileSpec & rhs) const239 bool FileSpec::operator==(const FileSpec &rhs) const {
240   return FileEquals(rhs) && DirectoryEquals(rhs);
241 }
242 
243 // Not equal to operator
operator !=(const FileSpec & rhs) const244 bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); }
245 
246 // Less than operator
operator <(const FileSpec & rhs) const247 bool FileSpec::operator<(const FileSpec &rhs) const {
248   return FileSpec::Compare(*this, rhs, true) < 0;
249 }
250 
251 // Dump a FileSpec object to a stream
operator <<(Stream & s,const FileSpec & f)252 Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) {
253   f.Dump(s.AsRawOstream());
254   return s;
255 }
256 
257 // Clear this object by releasing both the directory and filename string values
258 // and making them both the empty string.
Clear()259 void FileSpec::Clear() {
260   m_directory.Clear();
261   m_filename.Clear();
262   PathWasModified();
263 }
264 
265 // Compare two FileSpec objects. If "full" is true, then both the directory and
266 // the filename must match. If "full" is false, then the directory names for
267 // "a" and "b" are only compared if they are both non-empty. This allows a
268 // FileSpec object to only contain a filename and it can match FileSpec objects
269 // that have matching filenames with different paths.
270 //
271 // Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if
272 // "a" is greater than "b".
Compare(const FileSpec & a,const FileSpec & b,bool full)273 int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) {
274   int result = 0;
275 
276   // case sensitivity of compare
277   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
278 
279   // If full is true, then we must compare both the directory and filename.
280 
281   // If full is false, then if either directory is empty, then we match on the
282   // basename only, and if both directories have valid values, we still do a
283   // full compare. This allows for matching when we just have a filename in one
284   // of the FileSpec objects.
285 
286   if (full || (a.m_directory && b.m_directory)) {
287     result = ConstString::Compare(a.m_directory, b.m_directory, case_sensitive);
288     if (result)
289       return result;
290   }
291   return ConstString::Compare(a.m_filename, b.m_filename, case_sensitive);
292 }
293 
Equal(const FileSpec & a,const FileSpec & b,bool full)294 bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) {
295   if (full || (a.GetDirectory() && b.GetDirectory()))
296     return a == b;
297 
298   return a.FileEquals(b);
299 }
300 
Match(const FileSpec & pattern,const FileSpec & file)301 bool FileSpec::Match(const FileSpec &pattern, const FileSpec &file) {
302   if (pattern.GetDirectory())
303     return pattern == file;
304   if (pattern.GetFilename())
305     return pattern.FileEquals(file);
306   return true;
307 }
308 
309 std::optional<FileSpec::Style>
GuessPathStyle(llvm::StringRef absolute_path)310 FileSpec::GuessPathStyle(llvm::StringRef absolute_path) {
311   if (absolute_path.starts_with("/"))
312     return Style::posix;
313   if (absolute_path.starts_with(R"(\\)"))
314     return Style::windows;
315   if (absolute_path.size() >= 3 && llvm::isAlpha(absolute_path[0]) &&
316       (absolute_path.substr(1, 2) == R"(:\)" ||
317        absolute_path.substr(1, 2) == R"(:/)"))
318     return Style::windows;
319   return std::nullopt;
320 }
321 
322 // Dump the object to the supplied stream. If the object contains a valid
323 // directory name, it will be displayed followed by a directory delimiter, and
324 // the filename.
Dump(llvm::raw_ostream & s) const325 void FileSpec::Dump(llvm::raw_ostream &s) const {
326   std::string path{GetPath(true)};
327   s << path;
328   char path_separator = GetPreferredPathSeparator(m_style);
329   if (!m_filename && !path.empty() && path.back() != path_separator)
330     s << path_separator;
331 }
332 
ToJSON() const333 llvm::json::Value FileSpec::ToJSON() const {
334   std::string str;
335   llvm::raw_string_ostream stream(str);
336   this->Dump(stream);
337   return llvm::json::Value(std::move(str));
338 }
339 
GetPathStyle() const340 FileSpec::Style FileSpec::GetPathStyle() const { return m_style; }
341 
SetDirectory(ConstString directory)342 void FileSpec::SetDirectory(ConstString directory) {
343   m_directory = directory;
344   PathWasModified();
345 }
346 
SetDirectory(llvm::StringRef directory)347 void FileSpec::SetDirectory(llvm::StringRef directory) {
348   m_directory = ConstString(directory);
349   PathWasModified();
350 }
351 
SetFilename(ConstString filename)352 void FileSpec::SetFilename(ConstString filename) {
353   m_filename = filename;
354   PathWasModified();
355 }
356 
SetFilename(llvm::StringRef filename)357 void FileSpec::SetFilename(llvm::StringRef filename) {
358   m_filename = ConstString(filename);
359   PathWasModified();
360 }
361 
ClearFilename()362 void FileSpec::ClearFilename() {
363   m_filename.Clear();
364   PathWasModified();
365 }
366 
ClearDirectory()367 void FileSpec::ClearDirectory() {
368   m_directory.Clear();
369   PathWasModified();
370 }
371 
372 // Extract the directory and path into a fixed buffer. This is needed as the
373 // directory and path are stored in separate string values.
GetPath(char * path,size_t path_max_len,bool denormalize) const374 size_t FileSpec::GetPath(char *path, size_t path_max_len,
375                          bool denormalize) const {
376   if (!path)
377     return 0;
378 
379   std::string result = GetPath(denormalize);
380   ::snprintf(path, path_max_len, "%s", result.c_str());
381   return std::min(path_max_len - 1, result.length());
382 }
383 
GetPath(bool denormalize) const384 std::string FileSpec::GetPath(bool denormalize) const {
385   llvm::SmallString<64> result;
386   GetPath(result, denormalize);
387   return static_cast<std::string>(result);
388 }
389 
GetPathAsConstString(bool denormalize) const390 ConstString FileSpec::GetPathAsConstString(bool denormalize) const {
391   return ConstString{GetPath(denormalize)};
392 }
393 
GetPath(llvm::SmallVectorImpl<char> & path,bool denormalize) const394 void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path,
395                        bool denormalize) const {
396   path.append(m_directory.GetStringRef().begin(),
397               m_directory.GetStringRef().end());
398   // Since the path was normalized and all paths use '/' when stored in these
399   // objects, we don't need to look for the actual syntax specific path
400   // separator, we just look for and insert '/'.
401   if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' &&
402       m_filename.GetStringRef().back() != '/')
403     path.insert(path.end(), '/');
404   path.append(m_filename.GetStringRef().begin(),
405               m_filename.GetStringRef().end());
406   if (denormalize && !path.empty())
407     Denormalize(path, m_style);
408 }
409 
GetFileNameExtension() const410 llvm::StringRef FileSpec::GetFileNameExtension() const {
411   return llvm::sys::path::extension(m_filename.GetStringRef(), m_style);
412 }
413 
GetFileNameStrippingExtension() const414 ConstString FileSpec::GetFileNameStrippingExtension() const {
415   return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style));
416 }
417 
418 // Return the size in bytes that this object takes in memory. This returns the
419 // size in bytes of this object, not any shared string values it may refer to.
MemorySize() const420 size_t FileSpec::MemorySize() const {
421   return m_filename.MemorySize() + m_directory.MemorySize();
422 }
423 
424 FileSpec
CopyByAppendingPathComponent(llvm::StringRef component) const425 FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const {
426   FileSpec ret = *this;
427   ret.AppendPathComponent(component);
428   return ret;
429 }
430 
CopyByRemovingLastPathComponent() const431 FileSpec FileSpec::CopyByRemovingLastPathComponent() const {
432   llvm::SmallString<64> current_path;
433   GetPath(current_path, false);
434   if (llvm::sys::path::has_parent_path(current_path, m_style))
435     return FileSpec(llvm::sys::path::parent_path(current_path, m_style),
436                     m_style);
437   return *this;
438 }
439 
PrependPathComponent(llvm::StringRef component)440 void FileSpec::PrependPathComponent(llvm::StringRef component) {
441   llvm::SmallString<64> new_path(component);
442   llvm::SmallString<64> current_path;
443   GetPath(current_path, false);
444   llvm::sys::path::append(new_path,
445                           llvm::sys::path::begin(current_path, m_style),
446                           llvm::sys::path::end(current_path), m_style);
447   SetFile(new_path, m_style);
448 }
449 
PrependPathComponent(const FileSpec & new_path)450 void FileSpec::PrependPathComponent(const FileSpec &new_path) {
451   return PrependPathComponent(new_path.GetPath(false));
452 }
453 
AppendPathComponent(llvm::StringRef component)454 void FileSpec::AppendPathComponent(llvm::StringRef component) {
455   llvm::SmallString<64> current_path;
456   GetPath(current_path, false);
457   llvm::sys::path::append(current_path, m_style, component);
458   SetFile(current_path, m_style);
459 }
460 
AppendPathComponent(const FileSpec & new_path)461 void FileSpec::AppendPathComponent(const FileSpec &new_path) {
462   return AppendPathComponent(new_path.GetPath(false));
463 }
464 
RemoveLastPathComponent()465 bool FileSpec::RemoveLastPathComponent() {
466   llvm::SmallString<64> current_path;
467   GetPath(current_path, false);
468   if (llvm::sys::path::has_parent_path(current_path, m_style)) {
469     SetFile(llvm::sys::path::parent_path(current_path, m_style));
470     return true;
471   }
472   return false;
473 }
474 
GetComponents() const475 std::vector<llvm::StringRef> FileSpec::GetComponents() const {
476   std::vector<llvm::StringRef> components;
477 
478   auto dir_begin = llvm::sys::path::begin(m_directory.GetStringRef(), m_style);
479   auto dir_end = llvm::sys::path::end(m_directory.GetStringRef());
480 
481   for (auto iter = dir_begin; iter != dir_end; ++iter) {
482     if (*iter == "/" || *iter == ".")
483       continue;
484 
485     components.push_back(*iter);
486   }
487 
488   if (!m_filename.IsEmpty() && m_filename != "/" && m_filename != ".")
489     components.push_back(m_filename.GetStringRef());
490 
491   return components;
492 }
493 
494 /// Returns true if the filespec represents an implementation source
495 /// file (files with a ".c", ".cpp", ".m", ".mm" (many more)
496 /// extension).
497 ///
498 /// \return
499 ///     \b true if the filespec represents an implementation source
500 ///     file, \b false otherwise.
IsSourceImplementationFile() const501 bool FileSpec::IsSourceImplementationFile() const {
502   llvm::StringRef extension = GetFileNameExtension();
503   if (extension.empty())
504     return false;
505 
506   static RegularExpression g_source_file_regex(llvm::StringRef(
507       "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|["
508       "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO]["
509       "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])"
510       "$"));
511   return g_source_file_regex.Execute(extension);
512 }
513 
IsRelative() const514 bool FileSpec::IsRelative() const {
515   return !IsAbsolute();
516 }
517 
IsAbsolute() const518 bool FileSpec::IsAbsolute() const {
519   // Check if we have cached if this path is absolute to avoid recalculating.
520   if (m_absolute != Absolute::Calculate)
521     return m_absolute == Absolute::Yes;
522 
523   m_absolute = Absolute::No;
524 
525   llvm::SmallString<64> path;
526   GetPath(path, false);
527 
528   if (!path.empty()) {
529     // We consider paths starting with ~ to be absolute.
530     if (path[0] == '~' || llvm::sys::path::is_absolute(path, m_style))
531       m_absolute = Absolute::Yes;
532   }
533 
534   return m_absolute == Absolute::Yes;
535 }
536 
MakeAbsolute(const FileSpec & dir)537 void FileSpec::MakeAbsolute(const FileSpec &dir) {
538   if (IsRelative())
539     PrependPathComponent(dir);
540 }
541 
format(const FileSpec & F,raw_ostream & Stream,StringRef Style)542 void llvm::format_provider<FileSpec>::format(const FileSpec &F,
543                                              raw_ostream &Stream,
544                                              StringRef Style) {
545   assert((Style.empty() || Style.equals_insensitive("F") ||
546           Style.equals_insensitive("D")) &&
547          "Invalid FileSpec style!");
548 
549   StringRef dir = F.GetDirectory().GetStringRef();
550   StringRef file = F.GetFilename().GetStringRef();
551 
552   if (dir.empty() && file.empty()) {
553     Stream << "(empty)";
554     return;
555   }
556 
557   if (Style.equals_insensitive("F")) {
558     Stream << (file.empty() ? "(empty)" : file);
559     return;
560   }
561 
562   // Style is either D or empty, either way we need to print the directory.
563   if (!dir.empty()) {
564     // Directory is stored in normalized form, which might be different than
565     // preferred form.  In order to handle this, we need to cut off the
566     // filename, then denormalize, then write the entire denorm'ed directory.
567     llvm::SmallString<64> denormalized_dir = dir;
568     Denormalize(denormalized_dir, F.GetPathStyle());
569     Stream << denormalized_dir;
570     Stream << GetPreferredPathSeparator(F.GetPathStyle());
571   }
572 
573   if (Style.equals_insensitive("D")) {
574     // We only want to print the directory, so now just exit.
575     if (dir.empty())
576       Stream << "(empty)";
577     return;
578   }
579 
580   if (!file.empty())
581     Stream << file;
582 }
583