xref: /freebsd/contrib/llvm-project/libcxx/src/filesystem/path.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <__config>
10 #include <filesystem>
11 #include <vector>
12 
13 #include "error.h"
14 #include "path_parser.h"
15 
16 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
17 
18 using detail::ErrorHandler;
19 using parser::createView;
20 using parser::PathParser;
21 using parser::string_view_t;
22 
23 ///////////////////////////////////////////////////////////////////////////////
24 //                            path definitions
25 ///////////////////////////////////////////////////////////////////////////////
26 
27 constexpr path::value_type path::preferred_separator;
28 
29 path& path::replace_extension(path const& replacement) {
30   path p = extension();
31   if (not p.empty()) {
32     __pn_.erase(__pn_.size() - p.native().size());
33   }
34   if (!replacement.empty()) {
35     if (replacement.native()[0] != '.') {
36       __pn_ += PATHSTR(".");
37     }
38     __pn_.append(replacement.__pn_);
39   }
40   return *this;
41 }
42 
43 ///////////////////////////////////////////////////////////////////////////////
44 // path.decompose
45 
46 string_view_t path::__root_name() const {
47   auto PP = PathParser::CreateBegin(__pn_);
48   if (PP.State_ == PathParser::PS_InRootName)
49     return *PP;
50   return {};
51 }
52 
53 string_view_t path::__root_directory() const {
54   auto PP = PathParser::CreateBegin(__pn_);
55   if (PP.State_ == PathParser::PS_InRootName)
56     ++PP;
57   if (PP.State_ == PathParser::PS_InRootDir)
58     return *PP;
59   return {};
60 }
61 
62 string_view_t path::__root_path_raw() const {
63   auto PP = PathParser::CreateBegin(__pn_);
64   if (PP.State_ == PathParser::PS_InRootName) {
65     auto NextCh = PP.peek();
66     if (NextCh && isSeparator(*NextCh)) {
67       ++PP;
68       return createView(__pn_.data(), &PP.RawEntry.back());
69     }
70     return PP.RawEntry;
71   }
72   if (PP.State_ == PathParser::PS_InRootDir)
73     return *PP;
74   return {};
75 }
76 
77 static bool ConsumeRootName(PathParser* PP) {
78   static_assert(PathParser::PS_BeforeBegin == 1 && PathParser::PS_InRootName == 2, "Values for enums are incorrect");
79   while (PP->State_ <= PathParser::PS_InRootName)
80     ++(*PP);
81   return PP->State_ == PathParser::PS_AtEnd;
82 }
83 
84 static bool ConsumeRootDir(PathParser* PP) {
85   static_assert(PathParser::PS_BeforeBegin == 1 && PathParser::PS_InRootName == 2 && PathParser::PS_InRootDir == 3,
86                 "Values for enums are incorrect");
87   while (PP->State_ <= PathParser::PS_InRootDir)
88     ++(*PP);
89   return PP->State_ == PathParser::PS_AtEnd;
90 }
91 
92 string_view_t path::__relative_path() const {
93   auto PP = PathParser::CreateBegin(__pn_);
94   if (ConsumeRootDir(&PP))
95     return {};
96   return createView(PP.RawEntry.data(), &__pn_.back());
97 }
98 
99 string_view_t path::__parent_path() const {
100   if (empty())
101     return {};
102   // Determine if we have a root path but not a relative path. In that case
103   // return *this.
104   {
105     auto PP = PathParser::CreateBegin(__pn_);
106     if (ConsumeRootDir(&PP))
107       return __pn_;
108   }
109   // Otherwise remove a single element from the end of the path, and return
110   // a string representing that path
111   {
112     auto PP = PathParser::CreateEnd(__pn_);
113     --PP;
114     if (PP.RawEntry.data() == __pn_.data())
115       return {};
116     --PP;
117     return createView(__pn_.data(), &PP.RawEntry.back());
118   }
119 }
120 
121 string_view_t path::__filename() const {
122   if (empty())
123     return {};
124   {
125     PathParser PP = PathParser::CreateBegin(__pn_);
126     if (ConsumeRootDir(&PP))
127       return {};
128   }
129   return *(--PathParser::CreateEnd(__pn_));
130 }
131 
132 string_view_t path::__stem() const { return parser::separate_filename(__filename()).first; }
133 
134 string_view_t path::__extension() const { return parser::separate_filename(__filename()).second; }
135 
136 ////////////////////////////////////////////////////////////////////////////
137 // path.gen
138 
139 enum PathPartKind : unsigned char { PK_None, PK_RootSep, PK_Filename, PK_Dot, PK_DotDot, PK_TrailingSep };
140 
141 static PathPartKind ClassifyPathPart(string_view_t Part) {
142   if (Part.empty())
143     return PK_TrailingSep;
144   if (Part == PATHSTR("."))
145     return PK_Dot;
146   if (Part == PATHSTR(".."))
147     return PK_DotDot;
148   if (Part == PATHSTR("/"))
149     return PK_RootSep;
150 #if defined(_LIBCPP_WIN32API)
151   if (Part == PATHSTR("\\"))
152     return PK_RootSep;
153 #endif
154   return PK_Filename;
155 }
156 
157 path path::lexically_normal() const {
158   if (__pn_.empty())
159     return *this;
160 
161   using PartKindPair = pair<string_view_t, PathPartKind>;
162   vector<PartKindPair> Parts;
163   // Guess as to how many elements the path has to avoid reallocating.
164   Parts.reserve(32);
165 
166   // Track the total size of the parts as we collect them. This allows the
167   // resulting path to reserve the correct amount of memory.
168   size_t NewPathSize = 0;
169   auto AddPart       = [&](PathPartKind K, string_view_t P) {
170     NewPathSize += P.size();
171     Parts.emplace_back(P, K);
172   };
173   auto LastPartKind = [&]() {
174     if (Parts.empty())
175       return PK_None;
176     return Parts.back().second;
177   };
178 
179   bool MaybeNeedTrailingSep = false;
180   // Build a stack containing the remaining elements of the path, popping off
181   // elements which occur before a '..' entry.
182   for (auto PP = PathParser::CreateBegin(__pn_); PP; ++PP) {
183     auto Part         = *PP;
184     PathPartKind Kind = ClassifyPathPart(Part);
185     switch (Kind) {
186     case PK_Filename:
187     case PK_RootSep: {
188       // Add all non-dot and non-dot-dot elements to the stack of elements.
189       AddPart(Kind, Part);
190       MaybeNeedTrailingSep = false;
191       break;
192     }
193     case PK_DotDot: {
194       // Only push a ".." element if there are no elements preceding the "..",
195       // or if the preceding element is itself "..".
196       auto LastKind = LastPartKind();
197       if (LastKind == PK_Filename) {
198         NewPathSize -= Parts.back().first.size();
199         Parts.pop_back();
200       } else if (LastKind != PK_RootSep)
201         AddPart(PK_DotDot, PATHSTR(".."));
202       MaybeNeedTrailingSep = LastKind == PK_Filename;
203       break;
204     }
205     case PK_Dot:
206     case PK_TrailingSep: {
207       MaybeNeedTrailingSep = true;
208       break;
209     }
210     case PK_None:
211       __libcpp_unreachable();
212     }
213   }
214   // [fs.path.generic]p6.8: If the path is empty, add a dot.
215   if (Parts.empty())
216     return PATHSTR(".");
217 
218   // [fs.path.generic]p6.7: If the last filename is dot-dot, remove any
219   // trailing directory-separator.
220   bool NeedTrailingSep = MaybeNeedTrailingSep && LastPartKind() == PK_Filename;
221 
222   path Result;
223   Result.__pn_.reserve(Parts.size() + NewPathSize + NeedTrailingSep);
224   for (auto& PK : Parts)
225     Result /= PK.first;
226 
227   if (NeedTrailingSep)
228     Result /= PATHSTR("");
229 
230   Result.make_preferred();
231   return Result;
232 }
233 
234 static int DetermineLexicalElementCount(PathParser PP) {
235   int Count = 0;
236   for (; PP; ++PP) {
237     auto Elem = *PP;
238     if (Elem == PATHSTR(".."))
239       --Count;
240     else if (Elem != PATHSTR(".") && Elem != PATHSTR(""))
241       ++Count;
242   }
243   return Count;
244 }
245 
246 path path::lexically_relative(const path& base) const {
247   { // perform root-name/root-directory mismatch checks
248     auto PP                      = PathParser::CreateBegin(__pn_);
249     auto PPBase                  = PathParser::CreateBegin(base.__pn_);
250     auto CheckIterMismatchAtBase = [&]() {
251       return PP.State_ != PPBase.State_ && (PP.inRootPath() || PPBase.inRootPath());
252     };
253     if (PP.inRootName() && PPBase.inRootName()) {
254       if (*PP != *PPBase)
255         return {};
256     } else if (CheckIterMismatchAtBase())
257       return {};
258 
259     if (PP.inRootPath())
260       ++PP;
261     if (PPBase.inRootPath())
262       ++PPBase;
263     if (CheckIterMismatchAtBase())
264       return {};
265   }
266 
267   // Find the first mismatching element
268   auto PP     = PathParser::CreateBegin(__pn_);
269   auto PPBase = PathParser::CreateBegin(base.__pn_);
270   while (PP && PPBase && PP.State_ == PPBase.State_ && *PP == *PPBase) {
271     ++PP;
272     ++PPBase;
273   }
274 
275   // If there is no mismatch, return ".".
276   if (!PP && !PPBase)
277     return ".";
278 
279   // Otherwise, determine the number of elements, 'n', which are not dot or
280   // dot-dot minus the number of dot-dot elements.
281   int ElemCount = DetermineLexicalElementCount(PPBase);
282   if (ElemCount < 0)
283     return {};
284 
285   // if n == 0 and (a == end() || a->empty()), returns path("."); otherwise
286   if (ElemCount == 0 && (PP.atEnd() || *PP == PATHSTR("")))
287     return PATHSTR(".");
288 
289   // return a path constructed with 'n' dot-dot elements, followed by the
290   // elements of '*this' after the mismatch.
291   path Result;
292   // FIXME: Reserve enough room in Result that it won't have to re-allocate.
293   while (ElemCount--)
294     Result /= PATHSTR("..");
295   for (; PP; ++PP)
296     Result /= *PP;
297   return Result;
298 }
299 
300 ////////////////////////////////////////////////////////////////////////////
301 // path.comparisons
302 static int CompareRootName(PathParser* LHS, PathParser* RHS) {
303   if (!LHS->inRootName() && !RHS->inRootName())
304     return 0;
305 
306   auto GetRootName = [](PathParser* Parser) -> string_view_t { return Parser->inRootName() ? **Parser : PATHSTR(""); };
307   int res          = GetRootName(LHS).compare(GetRootName(RHS));
308   ConsumeRootName(LHS);
309   ConsumeRootName(RHS);
310   return res;
311 }
312 
313 static int CompareRootDir(PathParser* LHS, PathParser* RHS) {
314   if (!LHS->inRootDir() && RHS->inRootDir())
315     return -1;
316   else if (LHS->inRootDir() && !RHS->inRootDir())
317     return 1;
318   else {
319     ConsumeRootDir(LHS);
320     ConsumeRootDir(RHS);
321     return 0;
322   }
323 }
324 
325 static int CompareRelative(PathParser* LHSPtr, PathParser* RHSPtr) {
326   auto& LHS = *LHSPtr;
327   auto& RHS = *RHSPtr;
328 
329   int res;
330   while (LHS && RHS) {
331     if ((res = (*LHS).compare(*RHS)) != 0)
332       return res;
333     ++LHS;
334     ++RHS;
335   }
336   return 0;
337 }
338 
339 static int CompareEndState(PathParser* LHS, PathParser* RHS) {
340   if (LHS->atEnd() && !RHS->atEnd())
341     return -1;
342   else if (!LHS->atEnd() && RHS->atEnd())
343     return 1;
344   return 0;
345 }
346 
347 int path::__compare(string_view_t __s) const {
348   auto LHS = PathParser::CreateBegin(__pn_);
349   auto RHS = PathParser::CreateBegin(__s);
350   int res;
351 
352   if ((res = CompareRootName(&LHS, &RHS)) != 0)
353     return res;
354 
355   if ((res = CompareRootDir(&LHS, &RHS)) != 0)
356     return res;
357 
358   if ((res = CompareRelative(&LHS, &RHS)) != 0)
359     return res;
360 
361   return CompareEndState(&LHS, &RHS);
362 }
363 
364 ////////////////////////////////////////////////////////////////////////////
365 // path.nonmembers
366 size_t hash_value(const path& __p) noexcept {
367   auto PP           = PathParser::CreateBegin(__p.native());
368   size_t hash_value = 0;
369   hash<string_view_t> hasher;
370   while (PP) {
371     hash_value = __hash_combine(hash_value, hasher(*PP));
372     ++PP;
373   }
374   return hash_value;
375 }
376 
377 ////////////////////////////////////////////////////////////////////////////
378 // path.itr
379 path::iterator path::begin() const {
380   auto PP = PathParser::CreateBegin(__pn_);
381   iterator it;
382   it.__path_ptr_ = this;
383   it.__state_    = static_cast<path::iterator::_ParserState>(PP.State_);
384   it.__entry_    = PP.RawEntry;
385   it.__stashed_elem_.__assign_view(*PP);
386   return it;
387 }
388 
389 path::iterator path::end() const {
390   iterator it{};
391   it.__state_    = path::iterator::_AtEnd;
392   it.__path_ptr_ = this;
393   return it;
394 }
395 
396 path::iterator& path::iterator::__increment() {
397   PathParser PP(__path_ptr_->native(), __entry_, __state_);
398   ++PP;
399   __state_ = static_cast<_ParserState>(PP.State_);
400   __entry_ = PP.RawEntry;
401   __stashed_elem_.__assign_view(*PP);
402   return *this;
403 }
404 
405 path::iterator& path::iterator::__decrement() {
406   PathParser PP(__path_ptr_->native(), __entry_, __state_);
407   --PP;
408   __state_ = static_cast<_ParserState>(PP.State_);
409   __entry_ = PP.RawEntry;
410   __stashed_elem_.__assign_view(*PP);
411   return *this;
412 }
413 
414 #if defined(_LIBCPP_WIN32API)
415 ////////////////////////////////////////////////////////////////////////////
416 // Windows path conversions
417 size_t __wide_to_char(const wstring& str, char* out, size_t outlen) {
418   if (str.empty())
419     return 0;
420   ErrorHandler<size_t> err("__wide_to_char", nullptr);
421   UINT codepage     = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
422   BOOL used_default = FALSE;
423   int ret           = WideCharToMultiByte(codepage, 0, str.data(), str.size(), out, outlen, nullptr, &used_default);
424   if (ret <= 0 || used_default)
425     return err.report(errc::illegal_byte_sequence);
426   return ret;
427 }
428 
429 size_t __char_to_wide(const string& str, wchar_t* out, size_t outlen) {
430   if (str.empty())
431     return 0;
432   ErrorHandler<size_t> err("__char_to_wide", nullptr);
433   UINT codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
434   int ret       = MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, str.data(), str.size(), out, outlen);
435   if (ret <= 0)
436     return err.report(errc::illegal_byte_sequence);
437   return ret;
438 }
439 #endif
440 
441 _LIBCPP_END_NAMESPACE_FILESYSTEM
442