xref: /freebsd/contrib/llvm-project/libcxx/src/filesystem/path_parser.h (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef PATH_PARSER_H
10 #define PATH_PARSER_H
11 
12 #include <__config>
13 #include <__utility/unreachable.h>
14 #include <cstddef>
15 #include <filesystem>
16 #include <utility>
17 
18 #include "format_string.h"
19 
20 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
21 
22 inline bool isSeparator(path::value_type C) {
23   if (C == '/')
24     return true;
25 #if defined(_LIBCPP_WIN32API)
26   if (C == '\\')
27     return true;
28 #endif
29   return false;
30 }
31 
32 inline bool isDriveLetter(path::value_type C) { return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'); }
33 
34 namespace parser {
35 
36 using string_view_t    = path::__string_view;
37 using string_view_pair = pair<string_view_t, string_view_t>;
38 using PosPtr           = path::value_type const*;
39 
40 struct PathParser {
41   enum ParserState : unsigned char {
42     // Zero is a special sentinel value used by default constructed iterators.
43     PS_BeforeBegin   = path::iterator::_BeforeBegin,
44     PS_InRootName    = path::iterator::_InRootName,
45     PS_InRootDir     = path::iterator::_InRootDir,
46     PS_InFilenames   = path::iterator::_InFilenames,
47     PS_InTrailingSep = path::iterator::_InTrailingSep,
48     PS_AtEnd         = path::iterator::_AtEnd
49   };
50 
51   const string_view_t Path;
52   string_view_t RawEntry;
53   ParserState State_;
54 
55 private:
56   PathParser(string_view_t P, ParserState State) noexcept : Path(P), State_(State) {}
57 
58 public:
59   PathParser(string_view_t P, string_view_t E, unsigned char S)
60       : Path(P), RawEntry(E), State_(static_cast<ParserState>(S)) {
61     // S cannot be '0' or PS_BeforeBegin.
62   }
63 
64   static PathParser CreateBegin(string_view_t P) noexcept {
65     PathParser PP(P, PS_BeforeBegin);
66     PP.increment();
67     return PP;
68   }
69 
70   static PathParser CreateEnd(string_view_t P) noexcept {
71     PathParser PP(P, PS_AtEnd);
72     return PP;
73   }
74 
75   PosPtr peek() const noexcept {
76     auto TkEnd = getNextTokenStartPos();
77     auto End   = getAfterBack();
78     return TkEnd == End ? nullptr : TkEnd;
79   }
80 
81   void increment() noexcept {
82     const PosPtr End   = getAfterBack();
83     const PosPtr Start = getNextTokenStartPos();
84     if (Start == End)
85       return makeState(PS_AtEnd);
86 
87     switch (State_) {
88     case PS_BeforeBegin: {
89       PosPtr TkEnd = consumeRootName(Start, End);
90       if (TkEnd)
91         return makeState(PS_InRootName, Start, TkEnd);
92     }
93       _LIBCPP_FALLTHROUGH();
94     case PS_InRootName: {
95       PosPtr TkEnd = consumeAllSeparators(Start, End);
96       if (TkEnd)
97         return makeState(PS_InRootDir, Start, TkEnd);
98       else
99         return makeState(PS_InFilenames, Start, consumeName(Start, End));
100     }
101     case PS_InRootDir:
102       return makeState(PS_InFilenames, Start, consumeName(Start, End));
103 
104     case PS_InFilenames: {
105       PosPtr SepEnd = consumeAllSeparators(Start, End);
106       if (SepEnd != End) {
107         PosPtr TkEnd = consumeName(SepEnd, End);
108         if (TkEnd)
109           return makeState(PS_InFilenames, SepEnd, TkEnd);
110       }
111       return makeState(PS_InTrailingSep, Start, SepEnd);
112     }
113 
114     case PS_InTrailingSep:
115       return makeState(PS_AtEnd);
116 
117     case PS_AtEnd:
118       __libcpp_unreachable();
119     }
120   }
121 
122   void decrement() noexcept {
123     const PosPtr REnd   = getBeforeFront();
124     const PosPtr RStart = getCurrentTokenStartPos() - 1;
125     if (RStart == REnd) // we're decrementing the begin
126       return makeState(PS_BeforeBegin);
127 
128     switch (State_) {
129     case PS_AtEnd: {
130       // Try to consume a trailing separator or root directory first.
131       if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) {
132         if (SepEnd == REnd)
133           return makeState(PS_InRootDir, Path.data(), RStart + 1);
134         PosPtr TkStart = consumeRootName(SepEnd, REnd);
135         if (TkStart == REnd)
136           return makeState(PS_InRootDir, RStart, RStart + 1);
137         return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1);
138       } else {
139         PosPtr TkStart = consumeRootName(RStart, REnd);
140         if (TkStart == REnd)
141           return makeState(PS_InRootName, TkStart + 1, RStart + 1);
142         TkStart = consumeName(RStart, REnd);
143         return makeState(PS_InFilenames, TkStart + 1, RStart + 1);
144       }
145     }
146     case PS_InTrailingSep:
147       return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1, RStart + 1);
148     case PS_InFilenames: {
149       PosPtr SepEnd = consumeAllSeparators(RStart, REnd);
150       if (SepEnd == REnd)
151         return makeState(PS_InRootDir, Path.data(), RStart + 1);
152       PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd);
153       if (TkStart == REnd) {
154         if (SepEnd)
155           return makeState(PS_InRootDir, SepEnd + 1, RStart + 1);
156         return makeState(PS_InRootName, TkStart + 1, RStart + 1);
157       }
158       TkStart = consumeName(SepEnd, REnd);
159       return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1);
160     }
161     case PS_InRootDir:
162       return makeState(PS_InRootName, Path.data(), RStart + 1);
163     case PS_InRootName:
164     case PS_BeforeBegin:
165       __libcpp_unreachable();
166     }
167   }
168 
169   /// \brief Return a view with the "preferred representation" of the current
170   ///   element. For example trailing separators are represented as a '.'
171   string_view_t operator*() const noexcept {
172     switch (State_) {
173     case PS_BeforeBegin:
174     case PS_AtEnd:
175       return PATHSTR("");
176     case PS_InRootDir:
177       if (RawEntry[0] == '\\')
178         return PATHSTR("\\");
179       else
180         return PATHSTR("/");
181     case PS_InTrailingSep:
182       return PATHSTR("");
183     case PS_InRootName:
184     case PS_InFilenames:
185       return RawEntry;
186     }
187     __libcpp_unreachable();
188   }
189 
190   explicit operator bool() const noexcept { return State_ != PS_BeforeBegin && State_ != PS_AtEnd; }
191 
192   PathParser& operator++() noexcept {
193     increment();
194     return *this;
195   }
196 
197   PathParser& operator--() noexcept {
198     decrement();
199     return *this;
200   }
201 
202   bool atEnd() const noexcept { return State_ == PS_AtEnd; }
203 
204   bool inRootDir() const noexcept { return State_ == PS_InRootDir; }
205 
206   bool inRootName() const noexcept { return State_ == PS_InRootName; }
207 
208   bool inRootPath() const noexcept { return inRootName() || inRootDir(); }
209 
210 private:
211   void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept {
212     State_    = NewState;
213     RawEntry = string_view_t(Start, End - Start);
214   }
215   void makeState(ParserState NewState) noexcept {
216     State_    = NewState;
217     RawEntry = {};
218   }
219 
220   PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); }
221 
222   PosPtr getBeforeFront() const noexcept { return Path.data() - 1; }
223 
224   /// \brief Return a pointer to the first character after the currently
225   ///   lexed element.
226   PosPtr getNextTokenStartPos() const noexcept {
227     switch (State_) {
228     case PS_BeforeBegin:
229       return Path.data();
230     case PS_InRootName:
231     case PS_InRootDir:
232     case PS_InFilenames:
233       return &RawEntry.back() + 1;
234     case PS_InTrailingSep:
235     case PS_AtEnd:
236       return getAfterBack();
237     }
238     __libcpp_unreachable();
239   }
240 
241   /// \brief Return a pointer to the first character in the currently lexed
242   ///   element.
243   PosPtr getCurrentTokenStartPos() const noexcept {
244     switch (State_) {
245     case PS_BeforeBegin:
246     case PS_InRootName:
247       return &Path.front();
248     case PS_InRootDir:
249     case PS_InFilenames:
250     case PS_InTrailingSep:
251       return &RawEntry.front();
252     case PS_AtEnd:
253       return &Path.back() + 1;
254     }
255     __libcpp_unreachable();
256   }
257 
258   // Consume all consecutive separators.
259   PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept {
260     if (P == nullptr || P == End || !isSeparator(*P))
261       return nullptr;
262     const int Inc = P < End ? 1 : -1;
263     P += Inc;
264     while (P != End && isSeparator(*P))
265       P += Inc;
266     return P;
267   }
268 
269   // Consume exactly N separators, or return nullptr.
270   PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept {
271     PosPtr Ret = consumeAllSeparators(P, End);
272     if (Ret == nullptr)
273       return nullptr;
274     if (P < End) {
275       if (Ret == P + N)
276         return Ret;
277     } else {
278       if (Ret == P - N)
279         return Ret;
280     }
281     return nullptr;
282   }
283 
284   PosPtr consumeName(PosPtr P, PosPtr End) const noexcept {
285     PosPtr Start = P;
286     if (P == nullptr || P == End || isSeparator(*P))
287       return nullptr;
288     const int Inc = P < End ? 1 : -1;
289     P += Inc;
290     while (P != End && !isSeparator(*P))
291       P += Inc;
292     if (P == End && Inc < 0) {
293       // Iterating backwards and consumed all the rest of the input.
294       // Check if the start of the string would have been considered
295       // a root name.
296       PosPtr RootEnd = consumeRootName(End + 1, Start);
297       if (RootEnd)
298         return RootEnd - 1;
299     }
300     return P;
301   }
302 
303   PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept {
304     if (P == End)
305       return nullptr;
306     if (P < End) {
307       if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':')
308         return nullptr;
309       return P + 2;
310     } else {
311       if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':')
312         return nullptr;
313       return P - 2;
314     }
315   }
316 
317   PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept {
318     if (P == End)
319       return nullptr;
320     if (P < End)
321       return consumeName(consumeNSeparators(P, End, 2), End);
322     else
323       return consumeNSeparators(consumeName(P, End), End, 2);
324   }
325 
326   PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept {
327 #if defined(_LIBCPP_WIN32API)
328     if (PosPtr Ret = consumeDriveLetter(P, End))
329       return Ret;
330     if (PosPtr Ret = consumeNetworkRoot(P, End))
331       return Ret;
332 #endif
333     return nullptr;
334   }
335 };
336 
337 inline string_view_pair separate_filename(string_view_t const& s) {
338   if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty())
339     return string_view_pair{s, PATHSTR("")};
340   auto pos = s.find_last_of('.');
341   if (pos == string_view_t::npos || pos == 0)
342     return string_view_pair{s, string_view_t{}};
343   return string_view_pair{s.substr(0, pos), s.substr(pos)};
344 }
345 
346 inline string_view_t createView(PosPtr S, PosPtr E) noexcept { return {S, static_cast<size_t>(E - S) + 1}; }
347 
348 } // namespace parser
349 
350 _LIBCPP_END_NAMESPACE_FILESYSTEM
351 
352 #endif // PATH_PARSER_H
353