1 //===- VirtualFileSystem.h - Virtual File System Layer ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the virtual file system interface vfs::FileSystem. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_SUPPORT_VIRTUALFILESYSTEM_H 15 #define LLVM_SUPPORT_VIRTUALFILESYSTEM_H 16 17 #include "llvm/ADT/IntrusiveRefCntPtr.h" 18 #include "llvm/ADT/STLFunctionalExtras.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/Chrono.h" 22 #include "llvm/Support/Errc.h" 23 #include "llvm/Support/Error.h" 24 #include "llvm/Support/ErrorOr.h" 25 #include "llvm/Support/ExtensibleRTTI.h" 26 #include "llvm/Support/FileSystem.h" 27 #include "llvm/Support/Path.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include <cassert> 30 #include <cstdint> 31 #include <ctime> 32 #include <memory> 33 #include <optional> 34 #include <string> 35 #include <system_error> 36 #include <utility> 37 #include <vector> 38 39 namespace llvm { 40 41 class MemoryBuffer; 42 class MemoryBufferRef; 43 class Twine; 44 45 namespace vfs { 46 47 /// The result of a \p status operation. 48 class Status { 49 std::string Name; 50 llvm::sys::fs::UniqueID UID; 51 llvm::sys::TimePoint<> MTime; 52 uint32_t User; 53 uint32_t Group; 54 uint64_t Size; 55 llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::status_error; 56 llvm::sys::fs::perms Perms; 57 58 public: 59 /// Whether this entity has an external path different from the virtual path, 60 /// and the external path is exposed by leaking it through the abstraction. 61 /// For example, a RedirectingFileSystem will set this for paths where 62 /// UseExternalName is true. 63 /// 64 /// FIXME: Currently the external path is exposed by replacing the virtual 65 /// path in this Status object. Instead, we should leave the path in the 66 /// Status intact (matching the requested virtual path) - see 67 /// FileManager::getFileRef for how we plan to fix this. 68 bool ExposesExternalVFSPath = false; 69 70 Status() = default; 71 Status(const llvm::sys::fs::file_status &Status); 72 Status(const Twine &Name, llvm::sys::fs::UniqueID UID, 73 llvm::sys::TimePoint<> MTime, uint32_t User, uint32_t Group, 74 uint64_t Size, llvm::sys::fs::file_type Type, 75 llvm::sys::fs::perms Perms); 76 77 /// Get a copy of a Status with a different size. 78 static Status copyWithNewSize(const Status &In, uint64_t NewSize); 79 /// Get a copy of a Status with a different name. 80 static Status copyWithNewName(const Status &In, const Twine &NewName); 81 static Status copyWithNewName(const llvm::sys::fs::file_status &In, 82 const Twine &NewName); 83 84 /// Returns the name that should be used for this file or directory. getName()85 StringRef getName() const { return Name; } 86 87 /// @name Status interface from llvm::sys::fs 88 /// @{ getType()89 llvm::sys::fs::file_type getType() const { return Type; } getPermissions()90 llvm::sys::fs::perms getPermissions() const { return Perms; } getLastModificationTime()91 llvm::sys::TimePoint<> getLastModificationTime() const { return MTime; } getUniqueID()92 llvm::sys::fs::UniqueID getUniqueID() const { return UID; } getUser()93 uint32_t getUser() const { return User; } getGroup()94 uint32_t getGroup() const { return Group; } getSize()95 uint64_t getSize() const { return Size; } 96 /// @} 97 /// @name Status queries 98 /// These are static queries in llvm::sys::fs. 99 /// @{ 100 bool equivalent(const Status &Other) const; 101 bool isDirectory() const; 102 bool isRegularFile() const; 103 bool isOther() const; 104 bool isSymlink() const; 105 bool isStatusKnown() const; 106 bool exists() const; 107 /// @} 108 }; 109 110 /// Represents an open file. 111 class File { 112 public: 113 /// Destroy the file after closing it (if open). 114 /// Sub-classes should generally call close() inside their destructors. We 115 /// cannot do that from the base class, since close is virtual. 116 virtual ~File(); 117 118 /// Get the status of the file. 119 virtual llvm::ErrorOr<Status> status() = 0; 120 121 /// Get the name of the file getName()122 virtual llvm::ErrorOr<std::string> getName() { 123 if (auto Status = status()) 124 return Status->getName().str(); 125 else 126 return Status.getError(); 127 } 128 129 /// Get the contents of the file as a \p MemoryBuffer. 130 virtual llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 131 getBuffer(const Twine &Name, int64_t FileSize = -1, 132 bool RequiresNullTerminator = true, bool IsVolatile = false) = 0; 133 134 /// Closes the file. 135 virtual std::error_code close() = 0; 136 137 // Get the same file with a different path. 138 static ErrorOr<std::unique_ptr<File>> 139 getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P); 140 141 protected: 142 // Set the file's underlying path. setPath(const Twine & Path)143 virtual void setPath(const Twine &Path) {} 144 }; 145 146 /// A member of a directory, yielded by a directory_iterator. 147 /// Only information available on most platforms is included. 148 class directory_entry { 149 std::string Path; 150 llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::type_unknown; 151 152 public: 153 directory_entry() = default; directory_entry(std::string Path,llvm::sys::fs::file_type Type)154 directory_entry(std::string Path, llvm::sys::fs::file_type Type) 155 : Path(std::move(Path)), Type(Type) {} 156 path()157 llvm::StringRef path() const { return Path; } type()158 llvm::sys::fs::file_type type() const { return Type; } 159 }; 160 161 namespace detail { 162 163 /// An interface for virtual file systems to provide an iterator over the 164 /// (non-recursive) contents of a directory. 165 struct DirIterImpl { 166 virtual ~DirIterImpl(); 167 168 /// Sets \c CurrentEntry to the next entry in the directory on success, 169 /// to directory_entry() at end, or returns a system-defined \c error_code. 170 virtual std::error_code increment() = 0; 171 172 directory_entry CurrentEntry; 173 }; 174 175 } // namespace detail 176 177 /// An input iterator over the entries in a virtual path, similar to 178 /// llvm::sys::fs::directory_iterator. 179 class directory_iterator { 180 std::shared_ptr<detail::DirIterImpl> Impl; // Input iterator semantics on copy 181 182 public: directory_iterator(std::shared_ptr<detail::DirIterImpl> I)183 directory_iterator(std::shared_ptr<detail::DirIterImpl> I) 184 : Impl(std::move(I)) { 185 assert(Impl.get() != nullptr && "requires non-null implementation"); 186 if (Impl->CurrentEntry.path().empty()) 187 Impl.reset(); // Normalize the end iterator to Impl == nullptr. 188 } 189 190 /// Construct an 'end' iterator. 191 directory_iterator() = default; 192 193 /// Equivalent to operator++, with an error code. increment(std::error_code & EC)194 directory_iterator &increment(std::error_code &EC) { 195 assert(Impl && "attempting to increment past end"); 196 EC = Impl->increment(); 197 if (Impl->CurrentEntry.path().empty()) 198 Impl.reset(); // Normalize the end iterator to Impl == nullptr. 199 return *this; 200 } 201 202 const directory_entry &operator*() const { return Impl->CurrentEntry; } 203 const directory_entry *operator->() const { return &Impl->CurrentEntry; } 204 205 bool operator==(const directory_iterator &RHS) const { 206 if (Impl && RHS.Impl) 207 return Impl->CurrentEntry.path() == RHS.Impl->CurrentEntry.path(); 208 return !Impl && !RHS.Impl; 209 } 210 bool operator!=(const directory_iterator &RHS) const { 211 return !(*this == RHS); 212 } 213 }; 214 215 class FileSystem; 216 217 namespace detail { 218 219 /// Keeps state for the recursive_directory_iterator. 220 struct RecDirIterState { 221 std::vector<directory_iterator> Stack; 222 bool HasNoPushRequest = false; 223 }; 224 225 } // end namespace detail 226 227 /// An input iterator over the recursive contents of a virtual path, 228 /// similar to llvm::sys::fs::recursive_directory_iterator. 229 class recursive_directory_iterator { 230 FileSystem *FS; 231 std::shared_ptr<detail::RecDirIterState> 232 State; // Input iterator semantics on copy. 233 234 public: 235 recursive_directory_iterator(FileSystem &FS, const Twine &Path, 236 std::error_code &EC); 237 238 /// Construct an 'end' iterator. 239 recursive_directory_iterator() = default; 240 241 /// Equivalent to operator++, with an error code. 242 recursive_directory_iterator &increment(std::error_code &EC); 243 244 const directory_entry &operator*() const { return *State->Stack.back(); } 245 const directory_entry *operator->() const { return &*State->Stack.back(); } 246 247 bool operator==(const recursive_directory_iterator &Other) const { 248 return State == Other.State; // identity 249 } 250 bool operator!=(const recursive_directory_iterator &RHS) const { 251 return !(*this == RHS); 252 } 253 254 /// Gets the current level. Starting path is at level 0. level()255 int level() const { 256 assert(!State->Stack.empty() && 257 "Cannot get level without any iteration state"); 258 return State->Stack.size() - 1; 259 } 260 no_push()261 void no_push() { State->HasNoPushRequest = true; } 262 }; 263 264 /// The virtual file system interface. 265 class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem>, 266 public RTTIExtends<FileSystem, RTTIRoot> { 267 public: 268 static const char ID; 269 virtual ~FileSystem(); 270 271 /// Get the status of the entry at \p Path, if one exists. 272 virtual llvm::ErrorOr<Status> status(const Twine &Path) = 0; 273 274 /// Get a \p File object for the file at \p Path, if one exists. 275 virtual llvm::ErrorOr<std::unique_ptr<File>> 276 openFileForRead(const Twine &Path) = 0; 277 278 /// This is a convenience method that opens a file, gets its content and then 279 /// closes the file. 280 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 281 getBufferForFile(const Twine &Name, int64_t FileSize = -1, 282 bool RequiresNullTerminator = true, bool IsVolatile = false); 283 284 /// Get a directory_iterator for \p Dir. 285 /// \note The 'end' iterator is directory_iterator(). 286 virtual directory_iterator dir_begin(const Twine &Dir, 287 std::error_code &EC) = 0; 288 289 /// Set the working directory. This will affect all following operations on 290 /// this file system and may propagate down for nested file systems. 291 virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) = 0; 292 293 /// Get the working directory of this file system. 294 virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const = 0; 295 296 /// Gets real path of \p Path e.g. collapse all . and .. patterns, resolve 297 /// symlinks. For real file system, this uses `llvm::sys::fs::real_path`. 298 /// This returns errc::operation_not_permitted if not implemented by subclass. 299 virtual std::error_code getRealPath(const Twine &Path, 300 SmallVectorImpl<char> &Output); 301 302 /// Check whether \p Path exists. By default this uses \c status(), but 303 /// filesystems may provide a more efficient implementation if available. 304 virtual bool exists(const Twine &Path); 305 306 /// Is the file mounted on a local filesystem? 307 virtual std::error_code isLocal(const Twine &Path, bool &Result); 308 309 /// Make \a Path an absolute path. 310 /// 311 /// Makes \a Path absolute using the current directory if it is not already. 312 /// An empty \a Path will result in the current directory. 313 /// 314 /// /absolute/path => /absolute/path 315 /// relative/../path => <current-directory>/relative/../path 316 /// 317 /// \param Path A path that is modified to be an absolute path. 318 /// \returns success if \a path has been made absolute, otherwise a 319 /// platform-specific error_code. 320 virtual std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const; 321 322 /// \returns true if \p A and \p B represent the same file, or an error or 323 /// false if they do not. 324 llvm::ErrorOr<bool> equivalent(const Twine &A, const Twine &B); 325 326 enum class PrintType { Summary, Contents, RecursiveContents }; 327 void print(raw_ostream &OS, PrintType Type = PrintType::Contents, 328 unsigned IndentLevel = 0) const { 329 printImpl(OS, Type, IndentLevel); 330 } 331 332 using VisitCallbackTy = llvm::function_ref<void(FileSystem &)>; visitChildFileSystems(VisitCallbackTy Callback)333 virtual void visitChildFileSystems(VisitCallbackTy Callback) {} visit(VisitCallbackTy Callback)334 void visit(VisitCallbackTy Callback) { 335 Callback(*this); 336 visitChildFileSystems(Callback); 337 } 338 339 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 340 LLVM_DUMP_METHOD void dump() const; 341 #endif 342 343 protected: printImpl(raw_ostream & OS,PrintType Type,unsigned IndentLevel)344 virtual void printImpl(raw_ostream &OS, PrintType Type, 345 unsigned IndentLevel) const { 346 printIndent(OS, IndentLevel); 347 OS << "FileSystem\n"; 348 } 349 printIndent(raw_ostream & OS,unsigned IndentLevel)350 void printIndent(raw_ostream &OS, unsigned IndentLevel) const { 351 for (unsigned i = 0; i < IndentLevel; ++i) 352 OS << " "; 353 } 354 }; 355 356 /// Gets an \p vfs::FileSystem for the 'real' file system, as seen by 357 /// the operating system. 358 /// The working directory is linked to the process's working directory. 359 /// (This is usually thread-hostile). 360 IntrusiveRefCntPtr<FileSystem> getRealFileSystem(); 361 362 /// Create an \p vfs::FileSystem for the 'real' file system, as seen by 363 /// the operating system. 364 /// It has its own working directory, independent of (but initially equal to) 365 /// that of the process. 366 std::unique_ptr<FileSystem> createPhysicalFileSystem(); 367 368 /// A file system that allows overlaying one \p AbstractFileSystem on top 369 /// of another. 370 /// 371 /// Consists of a stack of >=1 \p FileSystem objects, which are treated as being 372 /// one merged file system. When there is a directory that exists in more than 373 /// one file system, the \p OverlayFileSystem contains a directory containing 374 /// the union of their contents. The attributes (permissions, etc.) of the 375 /// top-most (most recently added) directory are used. When there is a file 376 /// that exists in more than one file system, the file in the top-most file 377 /// system overrides the other(s). 378 class OverlayFileSystem : public RTTIExtends<OverlayFileSystem, FileSystem> { 379 using FileSystemList = SmallVector<IntrusiveRefCntPtr<FileSystem>, 1>; 380 381 /// The stack of file systems, implemented as a list in order of 382 /// their addition. 383 FileSystemList FSList; 384 385 public: 386 static const char ID; 387 OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> Base); 388 389 /// Pushes a file system on top of the stack. 390 void pushOverlay(IntrusiveRefCntPtr<FileSystem> FS); 391 392 llvm::ErrorOr<Status> status(const Twine &Path) override; 393 bool exists(const Twine &Path) override; 394 llvm::ErrorOr<std::unique_ptr<File>> 395 openFileForRead(const Twine &Path) override; 396 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; 397 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override; 398 std::error_code setCurrentWorkingDirectory(const Twine &Path) override; 399 std::error_code isLocal(const Twine &Path, bool &Result) override; 400 std::error_code getRealPath(const Twine &Path, 401 SmallVectorImpl<char> &Output) override; 402 403 using iterator = FileSystemList::reverse_iterator; 404 using const_iterator = FileSystemList::const_reverse_iterator; 405 using reverse_iterator = FileSystemList::iterator; 406 using const_reverse_iterator = FileSystemList::const_iterator; 407 using range = iterator_range<iterator>; 408 using const_range = iterator_range<const_iterator>; 409 410 /// Get an iterator pointing to the most recently added file system. overlays_begin()411 iterator overlays_begin() { return FSList.rbegin(); } overlays_begin()412 const_iterator overlays_begin() const { return FSList.rbegin(); } 413 414 /// Get an iterator pointing one-past the least recently added file system. overlays_end()415 iterator overlays_end() { return FSList.rend(); } overlays_end()416 const_iterator overlays_end() const { return FSList.rend(); } 417 418 /// Get an iterator pointing to the least recently added file system. overlays_rbegin()419 reverse_iterator overlays_rbegin() { return FSList.begin(); } overlays_rbegin()420 const_reverse_iterator overlays_rbegin() const { return FSList.begin(); } 421 422 /// Get an iterator pointing one-past the most recently added file system. overlays_rend()423 reverse_iterator overlays_rend() { return FSList.end(); } overlays_rend()424 const_reverse_iterator overlays_rend() const { return FSList.end(); } 425 overlays_range()426 range overlays_range() { return llvm::reverse(FSList); } overlays_range()427 const_range overlays_range() const { return llvm::reverse(FSList); } 428 429 protected: 430 void printImpl(raw_ostream &OS, PrintType Type, 431 unsigned IndentLevel) const override; 432 void visitChildFileSystems(VisitCallbackTy Callback) override; 433 }; 434 435 /// By default, this delegates all calls to the underlying file system. This 436 /// is useful when derived file systems want to override some calls and still 437 /// proxy other calls. 438 class ProxyFileSystem : public RTTIExtends<ProxyFileSystem, FileSystem> { 439 public: 440 static const char ID; ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS)441 explicit ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS) 442 : FS(std::move(FS)) {} 443 status(const Twine & Path)444 llvm::ErrorOr<Status> status(const Twine &Path) override { 445 return FS->status(Path); 446 } exists(const Twine & Path)447 bool exists(const Twine &Path) override { return FS->exists(Path); } 448 llvm::ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine & Path)449 openFileForRead(const Twine &Path) override { 450 return FS->openFileForRead(Path); 451 } dir_begin(const Twine & Dir,std::error_code & EC)452 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override { 453 return FS->dir_begin(Dir, EC); 454 } getCurrentWorkingDirectory()455 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override { 456 return FS->getCurrentWorkingDirectory(); 457 } setCurrentWorkingDirectory(const Twine & Path)458 std::error_code setCurrentWorkingDirectory(const Twine &Path) override { 459 return FS->setCurrentWorkingDirectory(Path); 460 } getRealPath(const Twine & Path,SmallVectorImpl<char> & Output)461 std::error_code getRealPath(const Twine &Path, 462 SmallVectorImpl<char> &Output) override { 463 return FS->getRealPath(Path, Output); 464 } isLocal(const Twine & Path,bool & Result)465 std::error_code isLocal(const Twine &Path, bool &Result) override { 466 return FS->isLocal(Path, Result); 467 } 468 469 protected: getUnderlyingFS()470 FileSystem &getUnderlyingFS() const { return *FS; } visitChildFileSystems(VisitCallbackTy Callback)471 void visitChildFileSystems(VisitCallbackTy Callback) override { 472 if (FS) { 473 Callback(*FS); 474 FS->visitChildFileSystems(Callback); 475 } 476 } 477 478 private: 479 IntrusiveRefCntPtr<FileSystem> FS; 480 481 virtual void anchor() override; 482 }; 483 484 namespace detail { 485 486 class InMemoryDirectory; 487 class InMemoryNode; 488 489 struct NewInMemoryNodeInfo { 490 llvm::sys::fs::UniqueID DirUID; 491 StringRef Path; 492 StringRef Name; 493 time_t ModificationTime; 494 std::unique_ptr<llvm::MemoryBuffer> Buffer; 495 uint32_t User; 496 uint32_t Group; 497 llvm::sys::fs::file_type Type; 498 llvm::sys::fs::perms Perms; 499 500 Status makeStatus() const; 501 }; 502 503 class NamedNodeOrError { 504 ErrorOr<std::pair<llvm::SmallString<128>, const detail::InMemoryNode *>> 505 Value; 506 507 public: NamedNodeOrError(llvm::SmallString<128> Name,const detail::InMemoryNode * Node)508 NamedNodeOrError(llvm::SmallString<128> Name, 509 const detail::InMemoryNode *Node) 510 : Value(std::make_pair(Name, Node)) {} NamedNodeOrError(std::error_code EC)511 NamedNodeOrError(std::error_code EC) : Value(EC) {} NamedNodeOrError(llvm::errc EC)512 NamedNodeOrError(llvm::errc EC) : Value(EC) {} 513 getName()514 StringRef getName() const { return (*Value).first; } 515 explicit operator bool() const { return static_cast<bool>(Value); } error_code()516 operator std::error_code() const { return Value.getError(); } getError()517 std::error_code getError() const { return Value.getError(); } 518 const detail::InMemoryNode *operator*() const { return (*Value).second; } 519 }; 520 521 } // namespace detail 522 523 /// An in-memory file system. 524 class InMemoryFileSystem : public RTTIExtends<InMemoryFileSystem, FileSystem> { 525 std::unique_ptr<detail::InMemoryDirectory> Root; 526 std::string WorkingDirectory; 527 bool UseNormalizedPaths = true; 528 529 public: 530 static const char ID; 531 532 private: 533 using MakeNodeFn = llvm::function_ref<std::unique_ptr<detail::InMemoryNode>( 534 detail::NewInMemoryNodeInfo)>; 535 536 /// Create node with \p MakeNode and add it into this filesystem at \p Path. 537 bool addFile(const Twine &Path, time_t ModificationTime, 538 std::unique_ptr<llvm::MemoryBuffer> Buffer, 539 std::optional<uint32_t> User, std::optional<uint32_t> Group, 540 std::optional<llvm::sys::fs::file_type> Type, 541 std::optional<llvm::sys::fs::perms> Perms, MakeNodeFn MakeNode); 542 543 /// Looks up the in-memory node for the path \p P. 544 /// If \p FollowFinalSymlink is true, the returned node is guaranteed to 545 /// not be a symlink and its path may differ from \p P. 546 detail::NamedNodeOrError lookupNode(const Twine &P, bool FollowFinalSymlink, 547 size_t SymlinkDepth = 0) const; 548 549 class DirIterator; 550 551 public: 552 explicit InMemoryFileSystem(bool UseNormalizedPaths = true); 553 ~InMemoryFileSystem() override; 554 555 /// Add a file containing a buffer or a directory to the VFS with a 556 /// path. The VFS owns the buffer. If present, User, Group, Type 557 /// and Perms apply to the newly-created file or directory. 558 /// \return true if the file or directory was successfully added, 559 /// false if the file or directory already exists in the file system with 560 /// different contents. 561 bool addFile(const Twine &Path, time_t ModificationTime, 562 std::unique_ptr<llvm::MemoryBuffer> Buffer, 563 std::optional<uint32_t> User = std::nullopt, 564 std::optional<uint32_t> Group = std::nullopt, 565 std::optional<llvm::sys::fs::file_type> Type = std::nullopt, 566 std::optional<llvm::sys::fs::perms> Perms = std::nullopt); 567 568 /// Add a hard link to a file. 569 /// 570 /// Here hard links are not intended to be fully equivalent to the classical 571 /// filesystem. Both the hard link and the file share the same buffer and 572 /// status (and thus have the same UniqueID). Because of this there is no way 573 /// to distinguish between the link and the file after the link has been 574 /// added. 575 /// 576 /// The \p Target path must be an existing file or a hardlink. The 577 /// \p NewLink file must not have been added before. The \p Target 578 /// path must not be a directory. The \p NewLink node is added as a hard 579 /// link which points to the resolved file of \p Target node. 580 /// \return true if the above condition is satisfied and hardlink was 581 /// successfully created, false otherwise. 582 bool addHardLink(const Twine &NewLink, const Twine &Target); 583 584 /// Arbitrary max depth to search through symlinks. We can get into problems 585 /// if a link links to a link that links back to the link, for example. 586 static constexpr size_t MaxSymlinkDepth = 16; 587 588 /// Add a symbolic link. Unlike a HardLink, because \p Target doesn't need 589 /// to refer to a file (or refer to anything, as it happens). Also, an 590 /// in-memory directory for \p Target isn't automatically created. 591 bool 592 addSymbolicLink(const Twine &NewLink, const Twine &Target, 593 time_t ModificationTime, 594 std::optional<uint32_t> User = std::nullopt, 595 std::optional<uint32_t> Group = std::nullopt, 596 std::optional<llvm::sys::fs::perms> Perms = std::nullopt); 597 598 /// Add a buffer to the VFS with a path. The VFS does not own the buffer. 599 /// If present, User, Group, Type and Perms apply to the newly-created file 600 /// or directory. 601 /// \return true if the file or directory was successfully added, 602 /// false if the file or directory already exists in the file system with 603 /// different contents. 604 bool addFileNoOwn(const Twine &Path, time_t ModificationTime, 605 const llvm::MemoryBufferRef &Buffer, 606 std::optional<uint32_t> User = std::nullopt, 607 std::optional<uint32_t> Group = std::nullopt, 608 std::optional<llvm::sys::fs::file_type> Type = std::nullopt, 609 std::optional<llvm::sys::fs::perms> Perms = std::nullopt); 610 611 std::string toString() const; 612 613 /// Return true if this file system normalizes . and .. in paths. useNormalizedPaths()614 bool useNormalizedPaths() const { return UseNormalizedPaths; } 615 616 llvm::ErrorOr<Status> status(const Twine &Path) override; 617 llvm::ErrorOr<std::unique_ptr<File>> 618 openFileForRead(const Twine &Path) override; 619 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; 620 getCurrentWorkingDirectory()621 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override { 622 return WorkingDirectory; 623 } 624 /// Canonicalizes \p Path by combining with the current working 625 /// directory and normalizing the path (e.g. remove dots). If the current 626 /// working directory is not set, this returns errc::operation_not_permitted. 627 /// 628 /// This doesn't resolve symlinks as they are not supported in in-memory file 629 /// system. 630 std::error_code getRealPath(const Twine &Path, 631 SmallVectorImpl<char> &Output) override; 632 std::error_code isLocal(const Twine &Path, bool &Result) override; 633 std::error_code setCurrentWorkingDirectory(const Twine &Path) override; 634 635 protected: 636 void printImpl(raw_ostream &OS, PrintType Type, 637 unsigned IndentLevel) const override; 638 }; 639 640 /// Get a globally unique ID for a virtual file or directory. 641 llvm::sys::fs::UniqueID getNextVirtualUniqueID(); 642 643 /// Gets a \p FileSystem for a virtual file system described in YAML 644 /// format. 645 std::unique_ptr<FileSystem> 646 getVFSFromYAML(std::unique_ptr<llvm::MemoryBuffer> Buffer, 647 llvm::SourceMgr::DiagHandlerTy DiagHandler, 648 StringRef YAMLFilePath, void *DiagContext = nullptr, 649 IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem()); 650 651 struct YAMLVFSEntry { 652 template <typename T1, typename T2> 653 YAMLVFSEntry(T1 &&VPath, T2 &&RPath, bool IsDirectory = false) VPathYAMLVFSEntry654 : VPath(std::forward<T1>(VPath)), RPath(std::forward<T2>(RPath)), 655 IsDirectory(IsDirectory) {} 656 std::string VPath; 657 std::string RPath; 658 bool IsDirectory = false; 659 }; 660 661 class RedirectingFSDirIterImpl; 662 class RedirectingFileSystemParser; 663 664 /// A virtual file system parsed from a YAML file. 665 /// 666 /// Currently, this class allows creating virtual files and directories. Virtual 667 /// files map to existing external files in \c ExternalFS, and virtual 668 /// directories may either map to existing directories in \c ExternalFS or list 669 /// their contents in the form of other virtual directories and/or files. 670 /// 671 /// The basic structure of the parsed file is: 672 /// \verbatim 673 /// { 674 /// 'version': <version number>, 675 /// <optional configuration> 676 /// 'roots': [ 677 /// <directory entries> 678 /// ] 679 /// } 680 /// \endverbatim 681 /// The roots may be absolute or relative. If relative they will be made 682 /// absolute against either current working directory or the directory where 683 /// the Overlay YAML file is located, depending on the 'root-relative' 684 /// configuration. 685 /// 686 /// All configuration options are optional. 687 /// 'case-sensitive': <boolean, default=(true for Posix, false for Windows)> 688 /// 'use-external-names': <boolean, default=true> 689 /// 'root-relative': <string, one of 'cwd' or 'overlay-dir', default='cwd'> 690 /// 'overlay-relative': <boolean, default=false> 691 /// 'fallthrough': <boolean, default=true, deprecated - use 'redirecting-with' 692 /// instead> 693 /// 'redirecting-with': <string, one of 'fallthrough', 'fallback', or 694 /// 'redirect-only', default='fallthrough'> 695 /// 696 /// To clarify, 'root-relative' option will prepend the current working 697 /// directory, or the overlay directory to the 'roots->name' field only if 698 /// 'roots->name' is a relative path. On the other hand, when 'overlay-relative' 699 /// is set to 'true', external paths will always be prepended with the overlay 700 /// directory, even if external paths are not relative paths. The 701 /// 'root-relative' option has no interaction with the 'overlay-relative' 702 /// option. 703 /// 704 /// Virtual directories that list their contents are represented as 705 /// \verbatim 706 /// { 707 /// 'type': 'directory', 708 /// 'name': <string>, 709 /// 'contents': [ <file or directory entries> ] 710 /// } 711 /// \endverbatim 712 /// The default attributes for such virtual directories are: 713 /// \verbatim 714 /// MTime = now() when created 715 /// Perms = 0777 716 /// User = Group = 0 717 /// Size = 0 718 /// UniqueID = unspecified unique value 719 /// \endverbatim 720 /// When a path prefix matches such a directory, the next component in the path 721 /// is matched against the entries in the 'contents' array. 722 /// 723 /// Re-mapped directories, on the other hand, are represented as 724 /// /// \verbatim 725 /// { 726 /// 'type': 'directory-remap', 727 /// 'name': <string>, 728 /// 'use-external-name': <boolean>, # Optional 729 /// 'external-contents': <path to external directory> 730 /// } 731 /// \endverbatim 732 /// and inherit their attributes from the external directory. When a path 733 /// prefix matches such an entry, the unmatched components are appended to the 734 /// 'external-contents' path, and the resulting path is looked up in the 735 /// external file system instead. 736 /// 737 /// Re-mapped files are represented as 738 /// \verbatim 739 /// { 740 /// 'type': 'file', 741 /// 'name': <string>, 742 /// 'use-external-name': <boolean>, # Optional 743 /// 'external-contents': <path to external file> 744 /// } 745 /// \endverbatim 746 /// Their attributes and file contents are determined by looking up the file at 747 /// their 'external-contents' path in the external file system. 748 /// 749 /// For 'file', 'directory' and 'directory-remap' entries the 'name' field may 750 /// contain multiple path components (e.g. /path/to/file). However, any 751 /// directory in such a path that contains more than one child must be uniquely 752 /// represented by a 'directory' entry. 753 /// 754 /// When the 'use-external-name' field is set, calls to \a vfs::File::status() 755 /// give the external (remapped) filesystem name instead of the name the file 756 /// was accessed by. This is an intentional leak through the \a 757 /// RedirectingFileSystem abstraction layer. It enables clients to discover 758 /// (and use) the external file location when communicating with users or tools 759 /// that don't use the same VFS overlay. 760 /// 761 /// FIXME: 'use-external-name' causes behaviour that's inconsistent with how 762 /// "real" filesystems behave. Maybe there should be a separate channel for 763 /// this information. 764 class RedirectingFileSystem 765 : public RTTIExtends<RedirectingFileSystem, vfs::FileSystem> { 766 public: 767 static const char ID; 768 enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File }; 769 enum NameKind { NK_NotSet, NK_External, NK_Virtual }; 770 771 /// The type of redirection to perform. 772 enum class RedirectKind { 773 /// Lookup the redirected path first (ie. the one specified in 774 /// 'external-contents') and if that fails "fallthrough" to a lookup of the 775 /// originally provided path. 776 Fallthrough, 777 /// Lookup the provided path first and if that fails, "fallback" to a 778 /// lookup of the redirected path. 779 Fallback, 780 /// Only lookup the redirected path, do not lookup the originally provided 781 /// path. 782 RedirectOnly 783 }; 784 785 /// The type of relative path used by Roots. 786 enum class RootRelativeKind { 787 /// The roots are relative to the current working directory. 788 CWD, 789 /// The roots are relative to the directory where the Overlay YAML file 790 // locates. 791 OverlayDir 792 }; 793 794 /// A single file or directory in the VFS. 795 class Entry { 796 EntryKind Kind; 797 std::string Name; 798 799 public: Entry(EntryKind K,StringRef Name)800 Entry(EntryKind K, StringRef Name) : Kind(K), Name(Name) {} 801 virtual ~Entry() = default; 802 getName()803 StringRef getName() const { return Name; } getKind()804 EntryKind getKind() const { return Kind; } 805 }; 806 807 /// A directory in the vfs with explicitly specified contents. 808 class DirectoryEntry : public Entry { 809 std::vector<std::unique_ptr<Entry>> Contents; 810 Status S; 811 812 public: 813 /// Constructs a directory entry with explicitly specified contents. DirectoryEntry(StringRef Name,std::vector<std::unique_ptr<Entry>> Contents,Status S)814 DirectoryEntry(StringRef Name, std::vector<std::unique_ptr<Entry>> Contents, 815 Status S) 816 : Entry(EK_Directory, Name), Contents(std::move(Contents)), 817 S(std::move(S)) {} 818 819 /// Constructs an empty directory entry. DirectoryEntry(StringRef Name,Status S)820 DirectoryEntry(StringRef Name, Status S) 821 : Entry(EK_Directory, Name), S(std::move(S)) {} 822 getStatus()823 Status getStatus() { return S; } 824 addContent(std::unique_ptr<Entry> Content)825 void addContent(std::unique_ptr<Entry> Content) { 826 Contents.push_back(std::move(Content)); 827 } 828 getLastContent()829 Entry *getLastContent() const { return Contents.back().get(); } 830 831 using iterator = decltype(Contents)::iterator; 832 contents_begin()833 iterator contents_begin() { return Contents.begin(); } contents_end()834 iterator contents_end() { return Contents.end(); } 835 classof(const Entry * E)836 static bool classof(const Entry *E) { return E->getKind() == EK_Directory; } 837 }; 838 839 /// A file or directory in the vfs that is mapped to a file or directory in 840 /// the external filesystem. 841 class RemapEntry : public Entry { 842 std::string ExternalContentsPath; 843 NameKind UseName; 844 845 protected: RemapEntry(EntryKind K,StringRef Name,StringRef ExternalContentsPath,NameKind UseName)846 RemapEntry(EntryKind K, StringRef Name, StringRef ExternalContentsPath, 847 NameKind UseName) 848 : Entry(K, Name), ExternalContentsPath(ExternalContentsPath), 849 UseName(UseName) {} 850 851 public: getExternalContentsPath()852 StringRef getExternalContentsPath() const { return ExternalContentsPath; } 853 854 /// Whether to use the external path as the name for this file or directory. useExternalName(bool GlobalUseExternalName)855 bool useExternalName(bool GlobalUseExternalName) const { 856 return UseName == NK_NotSet ? GlobalUseExternalName 857 : (UseName == NK_External); 858 } 859 getUseName()860 NameKind getUseName() const { return UseName; } 861 classof(const Entry * E)862 static bool classof(const Entry *E) { 863 switch (E->getKind()) { 864 case EK_DirectoryRemap: 865 [[fallthrough]]; 866 case EK_File: 867 return true; 868 case EK_Directory: 869 return false; 870 } 871 llvm_unreachable("invalid entry kind"); 872 } 873 }; 874 875 /// A directory in the vfs that maps to a directory in the external file 876 /// system. 877 class DirectoryRemapEntry : public RemapEntry { 878 public: DirectoryRemapEntry(StringRef Name,StringRef ExternalContentsPath,NameKind UseName)879 DirectoryRemapEntry(StringRef Name, StringRef ExternalContentsPath, 880 NameKind UseName) 881 : RemapEntry(EK_DirectoryRemap, Name, ExternalContentsPath, UseName) {} 882 classof(const Entry * E)883 static bool classof(const Entry *E) { 884 return E->getKind() == EK_DirectoryRemap; 885 } 886 }; 887 888 /// A file in the vfs that maps to a file in the external file system. 889 class FileEntry : public RemapEntry { 890 public: FileEntry(StringRef Name,StringRef ExternalContentsPath,NameKind UseName)891 FileEntry(StringRef Name, StringRef ExternalContentsPath, NameKind UseName) 892 : RemapEntry(EK_File, Name, ExternalContentsPath, UseName) {} 893 classof(const Entry * E)894 static bool classof(const Entry *E) { return E->getKind() == EK_File; } 895 }; 896 897 /// Represents the result of a path lookup into the RedirectingFileSystem. 898 struct LookupResult { 899 /// Chain of parent directory entries for \c E. 900 llvm::SmallVector<Entry *, 32> Parents; 901 902 /// The entry the looked-up path corresponds to. 903 Entry *E; 904 905 private: 906 /// When the found Entry is a DirectoryRemapEntry, stores the path in the 907 /// external file system that the looked-up path in the virtual file system 908 // corresponds to. 909 std::optional<std::string> ExternalRedirect; 910 911 public: 912 LookupResult(Entry *E, sys::path::const_iterator Start, 913 sys::path::const_iterator End); 914 915 /// If the found Entry maps the input path to a path in the external 916 /// file system (i.e. it is a FileEntry or DirectoryRemapEntry), returns 917 /// that path. getExternalRedirectLookupResult918 std::optional<StringRef> getExternalRedirect() const { 919 if (isa<DirectoryRemapEntry>(E)) 920 return StringRef(*ExternalRedirect); 921 if (auto *FE = dyn_cast<FileEntry>(E)) 922 return FE->getExternalContentsPath(); 923 return std::nullopt; 924 } 925 926 /// Get the (canonical) path of the found entry. This uses the as-written 927 /// path components from the VFS specification. 928 void getPath(llvm::SmallVectorImpl<char> &Path) const; 929 }; 930 931 private: 932 friend class RedirectingFSDirIterImpl; 933 friend class RedirectingFileSystemParser; 934 935 /// Canonicalize path by removing ".", "..", "./", components. This is 936 /// a VFS request, do not bother about symlinks in the path components 937 /// but canonicalize in order to perform the correct entry search. 938 std::error_code makeCanonicalForLookup(SmallVectorImpl<char> &Path) const; 939 940 /// Get the File status, or error, from the underlying external file system. 941 /// This returns the status with the originally requested name, while looking 942 /// up the entry using a potentially different path. 943 ErrorOr<Status> getExternalStatus(const Twine &LookupPath, 944 const Twine &OriginalPath) const; 945 946 /// Make \a Path an absolute path. 947 /// 948 /// Makes \a Path absolute using the \a WorkingDir if it is not already. 949 /// 950 /// /absolute/path => /absolute/path 951 /// relative/../path => <WorkingDir>/relative/../path 952 /// 953 /// \param WorkingDir A path that will be used as the base Dir if \a Path 954 /// is not already absolute. 955 /// \param Path A path that is modified to be an absolute path. 956 /// \returns success if \a path has been made absolute, otherwise a 957 /// platform-specific error_code. 958 std::error_code makeAbsolute(StringRef WorkingDir, 959 SmallVectorImpl<char> &Path) const; 960 961 // In a RedirectingFileSystem, keys can be specified in Posix or Windows 962 // style (or even a mixture of both), so this comparison helper allows 963 // slashes (representing a root) to match backslashes (and vice versa). Note 964 // that, other than the root, path components should not contain slashes or 965 // backslashes. pathComponentMatches(llvm::StringRef lhs,llvm::StringRef rhs)966 bool pathComponentMatches(llvm::StringRef lhs, llvm::StringRef rhs) const { 967 if ((CaseSensitive ? lhs == rhs : lhs.equals_insensitive(rhs))) 968 return true; 969 return (lhs == "/" && rhs == "\\") || (lhs == "\\" && rhs == "/"); 970 } 971 972 /// The root(s) of the virtual file system. 973 std::vector<std::unique_ptr<Entry>> Roots; 974 975 /// The current working directory of the file system. 976 std::string WorkingDirectory; 977 978 /// The file system to use for external references. 979 IntrusiveRefCntPtr<FileSystem> ExternalFS; 980 981 /// This represents the directory path that the YAML file is located. 982 /// This will be prefixed to each 'external-contents' if IsRelativeOverlay 983 /// is set. This will also be prefixed to each 'roots->name' if RootRelative 984 /// is set to RootRelativeKind::OverlayDir and the path is relative. 985 std::string OverlayFileDir; 986 987 /// @name Configuration 988 /// @{ 989 990 /// Whether to perform case-sensitive comparisons. 991 /// 992 /// Currently, case-insensitive matching only works correctly with ASCII. 993 bool CaseSensitive = is_style_posix(sys::path::Style::native); 994 995 /// IsRelativeOverlay marks whether a OverlayFileDir path must 996 /// be prefixed in every 'external-contents' when reading from YAML files. 997 bool IsRelativeOverlay = false; 998 999 /// Whether to use to use the value of 'external-contents' for the 1000 /// names of files. This global value is overridable on a per-file basis. 1001 bool UseExternalNames = true; 1002 1003 /// True if this FS has redirected a lookup. This does not include 1004 /// fallthrough. 1005 mutable bool HasBeenUsed = false; 1006 1007 /// Used to enable or disable updating `HasBeenUsed`. 1008 bool UsageTrackingActive = false; 1009 1010 /// Determines the lookups to perform, as well as their order. See 1011 /// \c RedirectKind for details. 1012 RedirectKind Redirection = RedirectKind::Fallthrough; 1013 1014 /// Determine the prefix directory if the roots are relative paths. See 1015 /// \c RootRelativeKind for details. 1016 RootRelativeKind RootRelative = RootRelativeKind::CWD; 1017 /// @} 1018 1019 RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS); 1020 1021 /// Looks up the path <tt>[Start, End)</tt> in \p From, possibly recursing 1022 /// into the contents of \p From if it is a directory. Returns a LookupResult 1023 /// giving the matched entry and, if that entry is a FileEntry or 1024 /// DirectoryRemapEntry, the path it redirects to in the external file system. 1025 ErrorOr<LookupResult> 1026 lookupPathImpl(llvm::sys::path::const_iterator Start, 1027 llvm::sys::path::const_iterator End, Entry *From, 1028 llvm::SmallVectorImpl<Entry *> &Entries) const; 1029 1030 /// Get the status for a path with the provided \c LookupResult. 1031 ErrorOr<Status> status(const Twine &LookupPath, const Twine &OriginalPath, 1032 const LookupResult &Result); 1033 1034 public: 1035 /// Looks up \p Path in \c Roots and returns a LookupResult giving the 1036 /// matched entry and, if the entry was a FileEntry or DirectoryRemapEntry, 1037 /// the path it redirects to in the external file system. 1038 ErrorOr<LookupResult> lookupPath(StringRef Path) const; 1039 1040 /// Parses \p Buffer, which is expected to be in YAML format and 1041 /// returns a virtual file system representing its contents. 1042 static std::unique_ptr<RedirectingFileSystem> 1043 create(std::unique_ptr<MemoryBuffer> Buffer, 1044 SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath, 1045 void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS); 1046 1047 /// Redirect each of the remapped files from first to second. 1048 static std::unique_ptr<RedirectingFileSystem> 1049 create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles, 1050 bool UseExternalNames, FileSystem &ExternalFS); 1051 1052 ErrorOr<Status> status(const Twine &Path) override; 1053 bool exists(const Twine &Path) override; 1054 ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override; 1055 1056 std::error_code getRealPath(const Twine &Path, 1057 SmallVectorImpl<char> &Output) override; 1058 1059 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override; 1060 1061 std::error_code setCurrentWorkingDirectory(const Twine &Path) override; 1062 1063 std::error_code isLocal(const Twine &Path, bool &Result) override; 1064 1065 std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const override; 1066 1067 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; 1068 1069 void setOverlayFileDir(StringRef PrefixDir); 1070 1071 StringRef getOverlayFileDir() const; 1072 1073 /// Sets the redirection kind to \c Fallthrough if true or \c RedirectOnly 1074 /// otherwise. Will removed in the future, use \c setRedirection instead. 1075 void setFallthrough(bool Fallthrough); 1076 1077 void setRedirection(RedirectingFileSystem::RedirectKind Kind); 1078 1079 std::vector<llvm::StringRef> getRoots() const; 1080 hasBeenUsed()1081 bool hasBeenUsed() const { return HasBeenUsed; }; clearHasBeenUsed()1082 void clearHasBeenUsed() { HasBeenUsed = false; } 1083 setUsageTrackingActive(bool Active)1084 void setUsageTrackingActive(bool Active) { UsageTrackingActive = Active; } 1085 1086 void printEntry(raw_ostream &OS, Entry *E, unsigned IndentLevel = 0) const; 1087 1088 protected: 1089 void printImpl(raw_ostream &OS, PrintType Type, 1090 unsigned IndentLevel) const override; 1091 void visitChildFileSystems(VisitCallbackTy Callback) override; 1092 }; 1093 1094 /// Collect all pairs of <virtual path, real path> entries from the 1095 /// \p YAMLFilePath. This is used by the module dependency collector to forward 1096 /// the entries into the reproducer output VFS YAML file. 1097 void collectVFSFromYAML( 1098 std::unique_ptr<llvm::MemoryBuffer> Buffer, 1099 llvm::SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath, 1100 SmallVectorImpl<YAMLVFSEntry> &CollectedEntries, 1101 void *DiagContext = nullptr, 1102 IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem()); 1103 1104 class YAMLVFSWriter { 1105 std::vector<YAMLVFSEntry> Mappings; 1106 std::optional<bool> IsCaseSensitive; 1107 std::optional<bool> IsOverlayRelative; 1108 std::optional<bool> UseExternalNames; 1109 std::string OverlayDir; 1110 1111 void addEntry(StringRef VirtualPath, StringRef RealPath, bool IsDirectory); 1112 1113 public: 1114 YAMLVFSWriter() = default; 1115 1116 void addFileMapping(StringRef VirtualPath, StringRef RealPath); 1117 void addDirectoryMapping(StringRef VirtualPath, StringRef RealPath); 1118 setCaseSensitivity(bool CaseSensitive)1119 void setCaseSensitivity(bool CaseSensitive) { 1120 IsCaseSensitive = CaseSensitive; 1121 } 1122 setUseExternalNames(bool UseExtNames)1123 void setUseExternalNames(bool UseExtNames) { UseExternalNames = UseExtNames; } 1124 setOverlayDir(StringRef OverlayDirectory)1125 void setOverlayDir(StringRef OverlayDirectory) { 1126 IsOverlayRelative = true; 1127 OverlayDir.assign(OverlayDirectory.str()); 1128 } 1129 getMappings()1130 const std::vector<YAMLVFSEntry> &getMappings() const { return Mappings; } 1131 1132 void write(llvm::raw_ostream &OS); 1133 }; 1134 1135 } // namespace vfs 1136 } // namespace llvm 1137 1138 #endif // LLVM_SUPPORT_VIRTUALFILESYSTEM_H 1139