1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef PATH_PARSER_H
10 #define PATH_PARSER_H
11
12 #include <__config>
13 #include <__utility/unreachable.h>
14 #include <cstddef>
15 #include <filesystem>
16 #include <utility>
17
18 #include "format_string.h"
19
20 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
21
isSeparator(path::value_type C)22 inline bool isSeparator(path::value_type C) {
23 if (C == '/')
24 return true;
25 #if defined(_LIBCPP_WIN32API)
26 if (C == '\\')
27 return true;
28 #endif
29 return false;
30 }
31
isDriveLetter(path::value_type C)32 inline bool isDriveLetter(path::value_type C) { return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'); }
33
34 namespace parser {
35
36 using string_view_t = path::__string_view;
37 using string_view_pair = pair<string_view_t, string_view_t>;
38 using PosPtr = path::value_type const*;
39
40 struct PathParser {
41 enum ParserState : unsigned char {
42 // Zero is a special sentinel value used by default constructed iterators.
43 PS_BeforeBegin = path::iterator::_BeforeBegin,
44 PS_InRootName = path::iterator::_InRootName,
45 PS_InRootDir = path::iterator::_InRootDir,
46 PS_InFilenames = path::iterator::_InFilenames,
47 PS_InTrailingSep = path::iterator::_InTrailingSep,
48 PS_AtEnd = path::iterator::_AtEnd
49 };
50
51 const string_view_t Path;
52 string_view_t RawEntry;
53 ParserState State_;
54
55 private:
PathParserPathParser56 PathParser(string_view_t P, ParserState State) noexcept : Path(P), State_(State) {}
57
58 public:
PathParserPathParser59 PathParser(string_view_t P, string_view_t E, unsigned char S)
60 : Path(P), RawEntry(E), State_(static_cast<ParserState>(S)) {
61 // S cannot be '0' or PS_BeforeBegin.
62 }
63
CreateBeginPathParser64 static PathParser CreateBegin(string_view_t P) noexcept {
65 PathParser PP(P, PS_BeforeBegin);
66 PP.increment();
67 return PP;
68 }
69
CreateEndPathParser70 static PathParser CreateEnd(string_view_t P) noexcept {
71 PathParser PP(P, PS_AtEnd);
72 return PP;
73 }
74
peekPathParser75 PosPtr peek() const noexcept {
76 auto TkEnd = getNextTokenStartPos();
77 auto End = getAfterBack();
78 return TkEnd == End ? nullptr : TkEnd;
79 }
80
incrementPathParser81 void increment() noexcept {
82 const PosPtr End = getAfterBack();
83 const PosPtr Start = getNextTokenStartPos();
84 if (Start == End)
85 return makeState(PS_AtEnd);
86
87 switch (State_) {
88 case PS_BeforeBegin: {
89 PosPtr TkEnd = consumeRootName(Start, End);
90 if (TkEnd)
91 return makeState(PS_InRootName, Start, TkEnd);
92 }
93 _LIBCPP_FALLTHROUGH();
94 case PS_InRootName: {
95 PosPtr TkEnd = consumeAllSeparators(Start, End);
96 if (TkEnd)
97 return makeState(PS_InRootDir, Start, TkEnd);
98 else
99 return makeState(PS_InFilenames, Start, consumeName(Start, End));
100 }
101 case PS_InRootDir:
102 return makeState(PS_InFilenames, Start, consumeName(Start, End));
103
104 case PS_InFilenames: {
105 PosPtr SepEnd = consumeAllSeparators(Start, End);
106 if (SepEnd != End) {
107 PosPtr TkEnd = consumeName(SepEnd, End);
108 if (TkEnd)
109 return makeState(PS_InFilenames, SepEnd, TkEnd);
110 }
111 return makeState(PS_InTrailingSep, Start, SepEnd);
112 }
113
114 case PS_InTrailingSep:
115 return makeState(PS_AtEnd);
116
117 case PS_AtEnd:
118 __libcpp_unreachable();
119 }
120 }
121
decrementPathParser122 void decrement() noexcept {
123 const PosPtr REnd = getBeforeFront();
124 const PosPtr RStart = getCurrentTokenStartPos() - 1;
125 if (RStart == REnd) // we're decrementing the begin
126 return makeState(PS_BeforeBegin);
127
128 switch (State_) {
129 case PS_AtEnd: {
130 // Try to consume a trailing separator or root directory first.
131 if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) {
132 if (SepEnd == REnd)
133 return makeState(PS_InRootDir, Path.data(), RStart + 1);
134 PosPtr TkStart = consumeRootName(SepEnd, REnd);
135 if (TkStart == REnd)
136 return makeState(PS_InRootDir, RStart, RStart + 1);
137 return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1);
138 } else {
139 PosPtr TkStart = consumeRootName(RStart, REnd);
140 if (TkStart == REnd)
141 return makeState(PS_InRootName, TkStart + 1, RStart + 1);
142 TkStart = consumeName(RStart, REnd);
143 return makeState(PS_InFilenames, TkStart + 1, RStart + 1);
144 }
145 }
146 case PS_InTrailingSep:
147 return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1, RStart + 1);
148 case PS_InFilenames: {
149 PosPtr SepEnd = consumeAllSeparators(RStart, REnd);
150 if (SepEnd == REnd)
151 return makeState(PS_InRootDir, Path.data(), RStart + 1);
152 PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd);
153 if (TkStart == REnd) {
154 if (SepEnd)
155 return makeState(PS_InRootDir, SepEnd + 1, RStart + 1);
156 return makeState(PS_InRootName, TkStart + 1, RStart + 1);
157 }
158 TkStart = consumeName(SepEnd, REnd);
159 return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1);
160 }
161 case PS_InRootDir:
162 return makeState(PS_InRootName, Path.data(), RStart + 1);
163 case PS_InRootName:
164 case PS_BeforeBegin:
165 __libcpp_unreachable();
166 }
167 }
168
169 /// \brief Return a view with the "preferred representation" of the current
170 /// element. For example trailing separators are represented as a '.'
171 string_view_t operator*() const noexcept {
172 switch (State_) {
173 case PS_BeforeBegin:
174 case PS_AtEnd:
175 return PATHSTR("");
176 case PS_InRootDir:
177 if (RawEntry[0] == '\\')
178 return PATHSTR("\\");
179 else
180 return PATHSTR("/");
181 case PS_InTrailingSep:
182 return PATHSTR("");
183 case PS_InRootName:
184 case PS_InFilenames:
185 return RawEntry;
186 }
187 __libcpp_unreachable();
188 }
189
190 explicit operator bool() const noexcept { return State_ != PS_BeforeBegin && State_ != PS_AtEnd; }
191
192 PathParser& operator++() noexcept {
193 increment();
194 return *this;
195 }
196
197 PathParser& operator--() noexcept {
198 decrement();
199 return *this;
200 }
201
atEndPathParser202 bool atEnd() const noexcept { return State_ == PS_AtEnd; }
203
inRootDirPathParser204 bool inRootDir() const noexcept { return State_ == PS_InRootDir; }
205
inRootNamePathParser206 bool inRootName() const noexcept { return State_ == PS_InRootName; }
207
inRootPathPathParser208 bool inRootPath() const noexcept { return inRootName() || inRootDir(); }
209
210 private:
makeStatePathParser211 void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept {
212 State_ = NewState;
213 RawEntry = string_view_t(Start, End - Start);
214 }
makeStatePathParser215 void makeState(ParserState NewState) noexcept {
216 State_ = NewState;
217 RawEntry = {};
218 }
219
getAfterBackPathParser220 PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); }
221
getBeforeFrontPathParser222 PosPtr getBeforeFront() const noexcept { return Path.data() - 1; }
223
224 /// \brief Return a pointer to the first character after the currently
225 /// lexed element.
getNextTokenStartPosPathParser226 PosPtr getNextTokenStartPos() const noexcept {
227 switch (State_) {
228 case PS_BeforeBegin:
229 return Path.data();
230 case PS_InRootName:
231 case PS_InRootDir:
232 case PS_InFilenames:
233 return &RawEntry.back() + 1;
234 case PS_InTrailingSep:
235 case PS_AtEnd:
236 return getAfterBack();
237 }
238 __libcpp_unreachable();
239 }
240
241 /// \brief Return a pointer to the first character in the currently lexed
242 /// element.
getCurrentTokenStartPosPathParser243 PosPtr getCurrentTokenStartPos() const noexcept {
244 switch (State_) {
245 case PS_BeforeBegin:
246 case PS_InRootName:
247 return &Path.front();
248 case PS_InRootDir:
249 case PS_InFilenames:
250 case PS_InTrailingSep:
251 return &RawEntry.front();
252 case PS_AtEnd:
253 return &Path.back() + 1;
254 }
255 __libcpp_unreachable();
256 }
257
258 // Consume all consecutive separators.
consumeAllSeparatorsPathParser259 PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept {
260 if (P == nullptr || P == End || !isSeparator(*P))
261 return nullptr;
262 const int Inc = P < End ? 1 : -1;
263 P += Inc;
264 while (P != End && isSeparator(*P))
265 P += Inc;
266 return P;
267 }
268
269 // Consume exactly N separators, or return nullptr.
consumeNSeparatorsPathParser270 PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept {
271 PosPtr Ret = consumeAllSeparators(P, End);
272 if (Ret == nullptr)
273 return nullptr;
274 if (P < End) {
275 if (Ret == P + N)
276 return Ret;
277 } else {
278 if (Ret == P - N)
279 return Ret;
280 }
281 return nullptr;
282 }
283
consumeNamePathParser284 PosPtr consumeName(PosPtr P, PosPtr End) const noexcept {
285 PosPtr Start = P;
286 if (P == nullptr || P == End || isSeparator(*P))
287 return nullptr;
288 const int Inc = P < End ? 1 : -1;
289 P += Inc;
290 while (P != End && !isSeparator(*P))
291 P += Inc;
292 if (P == End && Inc < 0) {
293 // Iterating backwards and consumed all the rest of the input.
294 // Check if the start of the string would have been considered
295 // a root name.
296 PosPtr RootEnd = consumeRootName(End + 1, Start);
297 if (RootEnd)
298 return RootEnd - 1;
299 }
300 return P;
301 }
302
consumeDriveLetterPathParser303 PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept {
304 if (P == End)
305 return nullptr;
306 if (P < End) {
307 if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':')
308 return nullptr;
309 return P + 2;
310 } else {
311 if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':')
312 return nullptr;
313 return P - 2;
314 }
315 }
316
consumeNetworkRootPathParser317 PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept {
318 if (P == End)
319 return nullptr;
320 if (P < End)
321 return consumeName(consumeNSeparators(P, End, 2), End);
322 else
323 return consumeNSeparators(consumeName(P, End), End, 2);
324 }
325
consumeRootNamePathParser326 PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept {
327 #if defined(_LIBCPP_WIN32API)
328 if (PosPtr Ret = consumeDriveLetter(P, End))
329 return Ret;
330 if (PosPtr Ret = consumeNetworkRoot(P, End))
331 return Ret;
332 #endif
333 return nullptr;
334 }
335 };
336
separate_filename(string_view_t const & s)337 inline string_view_pair separate_filename(string_view_t const& s) {
338 if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty())
339 return string_view_pair{s, PATHSTR("")};
340 auto pos = s.find_last_of('.');
341 if (pos == string_view_t::npos || pos == 0)
342 return string_view_pair{s, string_view_t{}};
343 return string_view_pair{s.substr(0, pos), s.substr(pos)};
344 }
345
createView(PosPtr S,PosPtr E)346 inline string_view_t createView(PosPtr S, PosPtr E) noexcept { return {S, static_cast<size_t>(E - S) + 1}; }
347
348 } // namespace parser
349
350 _LIBCPP_END_NAMESPACE_FILESYSTEM
351
352 #endif // PATH_PARSER_H
353