xref: /freebsd/contrib/llvm-project/libcxx/src/filesystem/path.cpp (revision 5ca8e32633c4ffbbcd6762e5888b6a4ba0708c6c)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <__config>
10 #include <filesystem>
11 #include <vector>
12 
13 #include "error.h"
14 #include "path_parser.h"
15 
16 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
17 
18 using detail::ErrorHandler;
19 using parser::createView;
20 using parser::PathParser;
21 using parser::string_view_t;
22 
23 ///////////////////////////////////////////////////////////////////////////////
24 //                            path definitions
25 ///////////////////////////////////////////////////////////////////////////////
26 
27 constexpr path::value_type path::preferred_separator;
28 
29 path& path::replace_extension(path const& replacement) {
30   path p = extension();
31   if (not p.empty()) {
32     __pn_.erase(__pn_.size() - p.native().size());
33   }
34   if (!replacement.empty()) {
35     if (replacement.native()[0] != '.') {
36       __pn_ += PATHSTR(".");
37     }
38     __pn_.append(replacement.__pn_);
39   }
40   return *this;
41 }
42 
43 ///////////////////////////////////////////////////////////////////////////////
44 // path.decompose
45 
46 string_view_t path::__root_name() const {
47   auto PP = PathParser::CreateBegin(__pn_);
48   if (PP.State == PathParser::PS_InRootName)
49     return *PP;
50   return {};
51 }
52 
53 string_view_t path::__root_directory() const {
54   auto PP = PathParser::CreateBegin(__pn_);
55   if (PP.State == PathParser::PS_InRootName)
56     ++PP;
57   if (PP.State == PathParser::PS_InRootDir)
58     return *PP;
59   return {};
60 }
61 
62 string_view_t path::__root_path_raw() const {
63   auto PP = PathParser::CreateBegin(__pn_);
64   if (PP.State == PathParser::PS_InRootName) {
65     auto NextCh = PP.peek();
66     if (NextCh && isSeparator(*NextCh)) {
67       ++PP;
68       return createView(__pn_.data(), &PP.RawEntry.back());
69     }
70     return PP.RawEntry;
71   }
72   if (PP.State == PathParser::PS_InRootDir)
73     return *PP;
74   return {};
75 }
76 
77 static bool ConsumeRootName(PathParser *PP) {
78   static_assert(PathParser::PS_BeforeBegin == 1 &&
79       PathParser::PS_InRootName == 2,
80       "Values for enums are incorrect");
81   while (PP->State <= PathParser::PS_InRootName)
82     ++(*PP);
83   return PP->State == PathParser::PS_AtEnd;
84 }
85 
86 static bool ConsumeRootDir(PathParser* PP) {
87   static_assert(PathParser::PS_BeforeBegin == 1 &&
88                 PathParser::PS_InRootName == 2 &&
89                 PathParser::PS_InRootDir == 3, "Values for enums are incorrect");
90   while (PP->State <= PathParser::PS_InRootDir)
91     ++(*PP);
92   return PP->State == PathParser::PS_AtEnd;
93 }
94 
95 string_view_t path::__relative_path() const {
96   auto PP = PathParser::CreateBegin(__pn_);
97   if (ConsumeRootDir(&PP))
98     return {};
99   return createView(PP.RawEntry.data(), &__pn_.back());
100 }
101 
102 string_view_t path::__parent_path() const {
103   if (empty())
104     return {};
105   // Determine if we have a root path but not a relative path. In that case
106   // return *this.
107   {
108     auto PP = PathParser::CreateBegin(__pn_);
109     if (ConsumeRootDir(&PP))
110       return __pn_;
111   }
112   // Otherwise remove a single element from the end of the path, and return
113   // a string representing that path
114   {
115     auto PP = PathParser::CreateEnd(__pn_);
116     --PP;
117     if (PP.RawEntry.data() == __pn_.data())
118       return {};
119     --PP;
120     return createView(__pn_.data(), &PP.RawEntry.back());
121   }
122 }
123 
124 string_view_t path::__filename() const {
125   if (empty())
126     return {};
127   {
128     PathParser PP = PathParser::CreateBegin(__pn_);
129     if (ConsumeRootDir(&PP))
130       return {};
131   }
132   return *(--PathParser::CreateEnd(__pn_));
133 }
134 
135 string_view_t path::__stem() const {
136   return parser::separate_filename(__filename()).first;
137 }
138 
139 string_view_t path::__extension() const {
140   return parser::separate_filename(__filename()).second;
141 }
142 
143 ////////////////////////////////////////////////////////////////////////////
144 // path.gen
145 
146 enum PathPartKind : unsigned char {
147   PK_None,
148   PK_RootSep,
149   PK_Filename,
150   PK_Dot,
151   PK_DotDot,
152   PK_TrailingSep
153 };
154 
155 static PathPartKind ClassifyPathPart(string_view_t Part) {
156   if (Part.empty())
157     return PK_TrailingSep;
158   if (Part == PATHSTR("."))
159     return PK_Dot;
160   if (Part == PATHSTR(".."))
161     return PK_DotDot;
162   if (Part == PATHSTR("/"))
163     return PK_RootSep;
164 #if defined(_LIBCPP_WIN32API)
165   if (Part == PATHSTR("\\"))
166     return PK_RootSep;
167 #endif
168   return PK_Filename;
169 }
170 
171 path path::lexically_normal() const {
172   if (__pn_.empty())
173     return *this;
174 
175   using PartKindPair = pair<string_view_t, PathPartKind>;
176   vector<PartKindPair> Parts;
177   // Guess as to how many elements the path has to avoid reallocating.
178   Parts.reserve(32);
179 
180   // Track the total size of the parts as we collect them. This allows the
181   // resulting path to reserve the correct amount of memory.
182   size_t NewPathSize = 0;
183   auto AddPart = [&](PathPartKind K, string_view_t P) {
184     NewPathSize += P.size();
185     Parts.emplace_back(P, K);
186   };
187   auto LastPartKind = [&]() {
188     if (Parts.empty())
189       return PK_None;
190     return Parts.back().second;
191   };
192 
193   bool MaybeNeedTrailingSep = false;
194   // Build a stack containing the remaining elements of the path, popping off
195   // elements which occur before a '..' entry.
196   for (auto PP = PathParser::CreateBegin(__pn_); PP; ++PP) {
197     auto Part = *PP;
198     PathPartKind Kind = ClassifyPathPart(Part);
199     switch (Kind) {
200     case PK_Filename:
201     case PK_RootSep: {
202       // Add all non-dot and non-dot-dot elements to the stack of elements.
203       AddPart(Kind, Part);
204       MaybeNeedTrailingSep = false;
205       break;
206     }
207     case PK_DotDot: {
208       // Only push a ".." element if there are no elements preceding the "..",
209       // or if the preceding element is itself "..".
210       auto LastKind = LastPartKind();
211       if (LastKind == PK_Filename) {
212         NewPathSize -= Parts.back().first.size();
213         Parts.pop_back();
214       } else if (LastKind != PK_RootSep)
215         AddPart(PK_DotDot, PATHSTR(".."));
216       MaybeNeedTrailingSep = LastKind == PK_Filename;
217       break;
218     }
219     case PK_Dot:
220     case PK_TrailingSep: {
221       MaybeNeedTrailingSep = true;
222       break;
223     }
224     case PK_None:
225       __libcpp_unreachable();
226     }
227   }
228   // [fs.path.generic]p6.8: If the path is empty, add a dot.
229   if (Parts.empty())
230     return PATHSTR(".");
231 
232   // [fs.path.generic]p6.7: If the last filename is dot-dot, remove any
233   // trailing directory-separator.
234   bool NeedTrailingSep = MaybeNeedTrailingSep && LastPartKind() == PK_Filename;
235 
236   path Result;
237   Result.__pn_.reserve(Parts.size() + NewPathSize + NeedTrailingSep);
238   for (auto& PK : Parts)
239     Result /= PK.first;
240 
241   if (NeedTrailingSep)
242     Result /= PATHSTR("");
243 
244   Result.make_preferred();
245   return Result;
246 }
247 
248 static int DetermineLexicalElementCount(PathParser PP) {
249   int Count = 0;
250   for (; PP; ++PP) {
251     auto Elem = *PP;
252     if (Elem == PATHSTR(".."))
253       --Count;
254     else if (Elem != PATHSTR(".") && Elem != PATHSTR(""))
255       ++Count;
256   }
257   return Count;
258 }
259 
260 path path::lexically_relative(const path& base) const {
261   { // perform root-name/root-directory mismatch checks
262     auto PP = PathParser::CreateBegin(__pn_);
263     auto PPBase = PathParser::CreateBegin(base.__pn_);
264     auto CheckIterMismatchAtBase = [&]() {
265       return PP.State != PPBase.State &&
266              (PP.inRootPath() || PPBase.inRootPath());
267     };
268     if (PP.inRootName() && PPBase.inRootName()) {
269       if (*PP != *PPBase)
270         return {};
271     } else if (CheckIterMismatchAtBase())
272       return {};
273 
274     if (PP.inRootPath())
275       ++PP;
276     if (PPBase.inRootPath())
277       ++PPBase;
278     if (CheckIterMismatchAtBase())
279       return {};
280   }
281 
282   // Find the first mismatching element
283   auto PP = PathParser::CreateBegin(__pn_);
284   auto PPBase = PathParser::CreateBegin(base.__pn_);
285   while (PP && PPBase && PP.State == PPBase.State && *PP == *PPBase) {
286     ++PP;
287     ++PPBase;
288   }
289 
290   // If there is no mismatch, return ".".
291   if (!PP && !PPBase)
292     return ".";
293 
294   // Otherwise, determine the number of elements, 'n', which are not dot or
295   // dot-dot minus the number of dot-dot elements.
296   int ElemCount = DetermineLexicalElementCount(PPBase);
297   if (ElemCount < 0)
298     return {};
299 
300   // if n == 0 and (a == end() || a->empty()), returns path("."); otherwise
301   if (ElemCount == 0 && (PP.atEnd() || *PP == PATHSTR("")))
302     return PATHSTR(".");
303 
304   // return a path constructed with 'n' dot-dot elements, followed by the
305   // elements of '*this' after the mismatch.
306   path Result;
307   // FIXME: Reserve enough room in Result that it won't have to re-allocate.
308   while (ElemCount--)
309     Result /= PATHSTR("..");
310   for (; PP; ++PP)
311     Result /= *PP;
312   return Result;
313 }
314 
315 ////////////////////////////////////////////////////////////////////////////
316 // path.comparisons
317 static int CompareRootName(PathParser *LHS, PathParser *RHS) {
318   if (!LHS->inRootName() && !RHS->inRootName())
319     return 0;
320 
321   auto GetRootName = [](PathParser *Parser) -> string_view_t {
322     return Parser->inRootName() ? **Parser : PATHSTR("");
323   };
324   int res = GetRootName(LHS).compare(GetRootName(RHS));
325   ConsumeRootName(LHS);
326   ConsumeRootName(RHS);
327   return res;
328 }
329 
330 static int CompareRootDir(PathParser *LHS, PathParser *RHS) {
331   if (!LHS->inRootDir() && RHS->inRootDir())
332     return -1;
333   else if (LHS->inRootDir() && !RHS->inRootDir())
334     return 1;
335   else {
336     ConsumeRootDir(LHS);
337     ConsumeRootDir(RHS);
338     return 0;
339   }
340 }
341 
342 static int CompareRelative(PathParser *LHSPtr, PathParser *RHSPtr) {
343   auto &LHS = *LHSPtr;
344   auto &RHS = *RHSPtr;
345 
346   int res;
347   while (LHS && RHS) {
348     if ((res = (*LHS).compare(*RHS)) != 0)
349       return res;
350     ++LHS;
351     ++RHS;
352   }
353   return 0;
354 }
355 
356 static int CompareEndState(PathParser *LHS, PathParser *RHS) {
357   if (LHS->atEnd() && !RHS->atEnd())
358     return -1;
359   else if (!LHS->atEnd() && RHS->atEnd())
360     return 1;
361   return 0;
362 }
363 
364 int path::__compare(string_view_t __s) const {
365   auto LHS = PathParser::CreateBegin(__pn_);
366   auto RHS = PathParser::CreateBegin(__s);
367   int res;
368 
369   if ((res = CompareRootName(&LHS, &RHS)) != 0)
370     return res;
371 
372   if ((res = CompareRootDir(&LHS, &RHS)) != 0)
373     return res;
374 
375   if ((res = CompareRelative(&LHS, &RHS)) != 0)
376     return res;
377 
378   return CompareEndState(&LHS, &RHS);
379 }
380 
381 ////////////////////////////////////////////////////////////////////////////
382 // path.nonmembers
383 size_t hash_value(const path& __p) noexcept {
384   auto PP = PathParser::CreateBegin(__p.native());
385   size_t hash_value = 0;
386   hash<string_view_t> hasher;
387   while (PP) {
388     hash_value = __hash_combine(hash_value, hasher(*PP));
389     ++PP;
390   }
391   return hash_value;
392 }
393 
394 ////////////////////////////////////////////////////////////////////////////
395 // path.itr
396 path::iterator path::begin() const {
397   auto PP = PathParser::CreateBegin(__pn_);
398   iterator it;
399   it.__path_ptr_ = this;
400   it.__state_ = static_cast<path::iterator::_ParserState>(PP.State);
401   it.__entry_ = PP.RawEntry;
402   it.__stashed_elem_.__assign_view(*PP);
403   return it;
404 }
405 
406 path::iterator path::end() const {
407   iterator it{};
408   it.__state_ = path::iterator::_AtEnd;
409   it.__path_ptr_ = this;
410   return it;
411 }
412 
413 path::iterator& path::iterator::__increment() {
414   PathParser PP(__path_ptr_->native(), __entry_, __state_);
415   ++PP;
416   __state_ = static_cast<_ParserState>(PP.State);
417   __entry_ = PP.RawEntry;
418   __stashed_elem_.__assign_view(*PP);
419   return *this;
420 }
421 
422 path::iterator& path::iterator::__decrement() {
423   PathParser PP(__path_ptr_->native(), __entry_, __state_);
424   --PP;
425   __state_ = static_cast<_ParserState>(PP.State);
426   __entry_ = PP.RawEntry;
427   __stashed_elem_.__assign_view(*PP);
428   return *this;
429 }
430 
431 #if defined(_LIBCPP_WIN32API)
432 ////////////////////////////////////////////////////////////////////////////
433 // Windows path conversions
434 size_t __wide_to_char(const wstring &str, char *out, size_t outlen) {
435   if (str.empty())
436     return 0;
437   ErrorHandler<size_t> err("__wide_to_char", nullptr);
438   UINT codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
439   BOOL used_default = FALSE;
440   int ret = WideCharToMultiByte(codepage, 0, str.data(), str.size(), out,
441                                 outlen, nullptr, &used_default);
442   if (ret <= 0 || used_default)
443     return err.report(errc::illegal_byte_sequence);
444   return ret;
445 }
446 
447 size_t __char_to_wide(const string &str, wchar_t *out, size_t outlen) {
448   if (str.empty())
449     return 0;
450   ErrorHandler<size_t> err("__char_to_wide", nullptr);
451   UINT codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
452   int ret = MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, str.data(),
453                                 str.size(), out, outlen);
454   if (ret <= 0)
455     return err.report(errc::illegal_byte_sequence);
456   return ret;
457 }
458 #endif
459 
460 _LIBCPP_END_NAMESPACE_FILESYSTEM
461