xref: /freebsd/contrib/llvm-project/libcxx/src/filesystem/path_parser.h (revision 5ca8e32633c4ffbbcd6762e5888b6a4ba0708c6c)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef PATH_PARSER_H
10 #define PATH_PARSER_H
11 
12 #include <__config>
13 #include <__utility/unreachable.h>
14 #include <cstddef>
15 #include <filesystem>
16 #include <utility>
17 
18 #include "format_string.h"
19 
20 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
21 
22 inline bool isSeparator(path::value_type C) {
23   if (C == '/')
24     return true;
25 #if defined(_LIBCPP_WIN32API)
26   if (C == '\\')
27     return true;
28 #endif
29   return false;
30 }
31 
32 inline bool isDriveLetter(path::value_type C) {
33   return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z');
34 }
35 
36 namespace parser {
37 
38 using string_view_t = path::__string_view;
39 using string_view_pair = pair<string_view_t, string_view_t>;
40 using PosPtr = path::value_type const*;
41 
42 struct PathParser {
43   enum ParserState : unsigned char {
44     // Zero is a special sentinel value used by default constructed iterators.
45     PS_BeforeBegin = path::iterator::_BeforeBegin,
46     PS_InRootName = path::iterator::_InRootName,
47     PS_InRootDir = path::iterator::_InRootDir,
48     PS_InFilenames = path::iterator::_InFilenames,
49     PS_InTrailingSep = path::iterator::_InTrailingSep,
50     PS_AtEnd = path::iterator::_AtEnd
51   };
52 
53   const string_view_t Path;
54   string_view_t RawEntry;
55   ParserState State;
56 
57 private:
58   PathParser(string_view_t P, ParserState State) noexcept : Path(P),
59                                                             State(State) {}
60 
61 public:
62   PathParser(string_view_t P, string_view_t E, unsigned char S)
63       : Path(P), RawEntry(E), State(static_cast<ParserState>(S)) {
64     // S cannot be '0' or PS_BeforeBegin.
65   }
66 
67   static PathParser CreateBegin(string_view_t P) noexcept {
68     PathParser PP(P, PS_BeforeBegin);
69     PP.increment();
70     return PP;
71   }
72 
73   static PathParser CreateEnd(string_view_t P) noexcept {
74     PathParser PP(P, PS_AtEnd);
75     return PP;
76   }
77 
78   PosPtr peek() const noexcept {
79     auto TkEnd = getNextTokenStartPos();
80     auto End = getAfterBack();
81     return TkEnd == End ? nullptr : TkEnd;
82   }
83 
84   void increment() noexcept {
85     const PosPtr End = getAfterBack();
86     const PosPtr Start = getNextTokenStartPos();
87     if (Start == End)
88       return makeState(PS_AtEnd);
89 
90     switch (State) {
91     case PS_BeforeBegin: {
92       PosPtr TkEnd = consumeRootName(Start, End);
93       if (TkEnd)
94         return makeState(PS_InRootName, Start, TkEnd);
95     }
96       _LIBCPP_FALLTHROUGH();
97     case PS_InRootName: {
98       PosPtr TkEnd = consumeAllSeparators(Start, End);
99       if (TkEnd)
100         return makeState(PS_InRootDir, Start, TkEnd);
101       else
102         return makeState(PS_InFilenames, Start, consumeName(Start, End));
103     }
104     case PS_InRootDir:
105       return makeState(PS_InFilenames, Start, consumeName(Start, End));
106 
107     case PS_InFilenames: {
108       PosPtr SepEnd = consumeAllSeparators(Start, End);
109       if (SepEnd != End) {
110         PosPtr TkEnd = consumeName(SepEnd, End);
111         if (TkEnd)
112           return makeState(PS_InFilenames, SepEnd, TkEnd);
113       }
114       return makeState(PS_InTrailingSep, Start, SepEnd);
115     }
116 
117     case PS_InTrailingSep:
118       return makeState(PS_AtEnd);
119 
120     case PS_AtEnd:
121       __libcpp_unreachable();
122     }
123   }
124 
125   void decrement() noexcept {
126     const PosPtr REnd = getBeforeFront();
127     const PosPtr RStart = getCurrentTokenStartPos() - 1;
128     if (RStart == REnd) // we're decrementing the begin
129       return makeState(PS_BeforeBegin);
130 
131     switch (State) {
132     case PS_AtEnd: {
133       // Try to consume a trailing separator or root directory first.
134       if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) {
135         if (SepEnd == REnd)
136           return makeState(PS_InRootDir, Path.data(), RStart + 1);
137         PosPtr TkStart = consumeRootName(SepEnd, REnd);
138         if (TkStart == REnd)
139           return makeState(PS_InRootDir, RStart, RStart + 1);
140         return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1);
141       } else {
142         PosPtr TkStart = consumeRootName(RStart, REnd);
143         if (TkStart == REnd)
144           return makeState(PS_InRootName, TkStart + 1, RStart + 1);
145         TkStart = consumeName(RStart, REnd);
146         return makeState(PS_InFilenames, TkStart + 1, RStart + 1);
147       }
148     }
149     case PS_InTrailingSep:
150       return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1,
151                        RStart + 1);
152     case PS_InFilenames: {
153       PosPtr SepEnd = consumeAllSeparators(RStart, REnd);
154       if (SepEnd == REnd)
155         return makeState(PS_InRootDir, Path.data(), RStart + 1);
156       PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd);
157       if (TkStart == REnd) {
158         if (SepEnd)
159           return makeState(PS_InRootDir, SepEnd + 1, RStart + 1);
160         return makeState(PS_InRootName, TkStart + 1, RStart + 1);
161       }
162       TkStart = consumeName(SepEnd, REnd);
163       return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1);
164     }
165     case PS_InRootDir:
166       return makeState(PS_InRootName, Path.data(), RStart + 1);
167     case PS_InRootName:
168     case PS_BeforeBegin:
169       __libcpp_unreachable();
170     }
171   }
172 
173   /// \brief Return a view with the "preferred representation" of the current
174   ///   element. For example trailing separators are represented as a '.'
175   string_view_t operator*() const noexcept {
176     switch (State) {
177     case PS_BeforeBegin:
178     case PS_AtEnd:
179       return PATHSTR("");
180     case PS_InRootDir:
181       if (RawEntry[0] == '\\')
182         return PATHSTR("\\");
183       else
184         return PATHSTR("/");
185     case PS_InTrailingSep:
186       return PATHSTR("");
187     case PS_InRootName:
188     case PS_InFilenames:
189       return RawEntry;
190     }
191     __libcpp_unreachable();
192   }
193 
194   explicit operator bool() const noexcept {
195     return State != PS_BeforeBegin && State != PS_AtEnd;
196   }
197 
198   PathParser& operator++() noexcept {
199     increment();
200     return *this;
201   }
202 
203   PathParser& operator--() noexcept {
204     decrement();
205     return *this;
206   }
207 
208   bool atEnd() const noexcept {
209     return State == PS_AtEnd;
210   }
211 
212   bool inRootDir() const noexcept {
213     return State == PS_InRootDir;
214   }
215 
216   bool inRootName() const noexcept {
217     return State == PS_InRootName;
218   }
219 
220   bool inRootPath() const noexcept {
221     return inRootName() || inRootDir();
222   }
223 
224 private:
225   void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept {
226     State = NewState;
227     RawEntry = string_view_t(Start, End - Start);
228   }
229   void makeState(ParserState NewState) noexcept {
230     State = NewState;
231     RawEntry = {};
232   }
233 
234   PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); }
235 
236   PosPtr getBeforeFront() const noexcept { return Path.data() - 1; }
237 
238   /// \brief Return a pointer to the first character after the currently
239   ///   lexed element.
240   PosPtr getNextTokenStartPos() const noexcept {
241     switch (State) {
242     case PS_BeforeBegin:
243       return Path.data();
244     case PS_InRootName:
245     case PS_InRootDir:
246     case PS_InFilenames:
247       return &RawEntry.back() + 1;
248     case PS_InTrailingSep:
249     case PS_AtEnd:
250       return getAfterBack();
251     }
252     __libcpp_unreachable();
253   }
254 
255   /// \brief Return a pointer to the first character in the currently lexed
256   ///   element.
257   PosPtr getCurrentTokenStartPos() const noexcept {
258     switch (State) {
259     case PS_BeforeBegin:
260     case PS_InRootName:
261       return &Path.front();
262     case PS_InRootDir:
263     case PS_InFilenames:
264     case PS_InTrailingSep:
265       return &RawEntry.front();
266     case PS_AtEnd:
267       return &Path.back() + 1;
268     }
269     __libcpp_unreachable();
270   }
271 
272   // Consume all consecutive separators.
273   PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept {
274     if (P == nullptr || P == End || !isSeparator(*P))
275       return nullptr;
276     const int Inc = P < End ? 1 : -1;
277     P += Inc;
278     while (P != End && isSeparator(*P))
279       P += Inc;
280     return P;
281   }
282 
283   // Consume exactly N separators, or return nullptr.
284   PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept {
285     PosPtr Ret = consumeAllSeparators(P, End);
286     if (Ret == nullptr)
287       return nullptr;
288     if (P < End) {
289       if (Ret == P + N)
290         return Ret;
291     } else {
292       if (Ret == P - N)
293         return Ret;
294     }
295     return nullptr;
296   }
297 
298   PosPtr consumeName(PosPtr P, PosPtr End) const noexcept {
299     PosPtr Start = P;
300     if (P == nullptr || P == End || isSeparator(*P))
301       return nullptr;
302     const int Inc = P < End ? 1 : -1;
303     P += Inc;
304     while (P != End && !isSeparator(*P))
305       P += Inc;
306     if (P == End && Inc < 0) {
307       // Iterating backwards and consumed all the rest of the input.
308       // Check if the start of the string would have been considered
309       // a root name.
310       PosPtr RootEnd = consumeRootName(End + 1, Start);
311       if (RootEnd)
312         return RootEnd - 1;
313     }
314     return P;
315   }
316 
317   PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept {
318     if (P == End)
319       return nullptr;
320     if (P < End) {
321       if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':')
322         return nullptr;
323       return P + 2;
324     } else {
325       if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':')
326         return nullptr;
327       return P - 2;
328     }
329   }
330 
331   PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept {
332     if (P == End)
333       return nullptr;
334     if (P < End)
335       return consumeName(consumeNSeparators(P, End, 2), End);
336     else
337       return consumeNSeparators(consumeName(P, End), End, 2);
338   }
339 
340   PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept {
341 #if defined(_LIBCPP_WIN32API)
342     if (PosPtr Ret = consumeDriveLetter(P, End))
343       return Ret;
344     if (PosPtr Ret = consumeNetworkRoot(P, End))
345       return Ret;
346 #endif
347     return nullptr;
348   }
349 };
350 
351 inline string_view_pair separate_filename(string_view_t const& s) {
352   if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty())
353     return string_view_pair{s, PATHSTR("")};
354   auto pos = s.find_last_of('.');
355   if (pos == string_view_t::npos || pos == 0)
356     return string_view_pair{s, string_view_t{}};
357   return string_view_pair{s.substr(0, pos), s.substr(pos)};
358 }
359 
360 inline string_view_t createView(PosPtr S, PosPtr E) noexcept {
361   return {S, static_cast<size_t>(E - S) + 1};
362 }
363 
364 } // namespace parser
365 
366 _LIBCPP_END_NAMESPACE_FILESYSTEM
367 
368 #endif // PATH_PARSER_H
369