1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef PATH_PARSER_H 10 #define PATH_PARSER_H 11 12 #include <__config> 13 #include <__utility/unreachable.h> 14 #include <cstddef> 15 #include <filesystem> 16 #include <utility> 17 18 #include "format_string.h" 19 20 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM 21 22 inline bool isSeparator(path::value_type C) { 23 if (C == '/') 24 return true; 25 #if defined(_LIBCPP_WIN32API) 26 if (C == '\\') 27 return true; 28 #endif 29 return false; 30 } 31 32 inline bool isDriveLetter(path::value_type C) { return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'); } 33 34 namespace parser { 35 36 using string_view_t = path::__string_view; 37 using string_view_pair = pair<string_view_t, string_view_t>; 38 using PosPtr = path::value_type const*; 39 40 struct PathParser { 41 enum ParserState : unsigned char { 42 // Zero is a special sentinel value used by default constructed iterators. 43 PS_BeforeBegin = path::iterator::_BeforeBegin, 44 PS_InRootName = path::iterator::_InRootName, 45 PS_InRootDir = path::iterator::_InRootDir, 46 PS_InFilenames = path::iterator::_InFilenames, 47 PS_InTrailingSep = path::iterator::_InTrailingSep, 48 PS_AtEnd = path::iterator::_AtEnd 49 }; 50 51 const string_view_t Path; 52 string_view_t RawEntry; 53 ParserState State; 54 55 private: 56 PathParser(string_view_t P, ParserState State) noexcept : Path(P), State(State) {} 57 58 public: 59 PathParser(string_view_t P, string_view_t E, unsigned char S) 60 : Path(P), RawEntry(E), State(static_cast<ParserState>(S)) { 61 // S cannot be '0' or PS_BeforeBegin. 62 } 63 64 static PathParser CreateBegin(string_view_t P) noexcept { 65 PathParser PP(P, PS_BeforeBegin); 66 PP.increment(); 67 return PP; 68 } 69 70 static PathParser CreateEnd(string_view_t P) noexcept { 71 PathParser PP(P, PS_AtEnd); 72 return PP; 73 } 74 75 PosPtr peek() const noexcept { 76 auto TkEnd = getNextTokenStartPos(); 77 auto End = getAfterBack(); 78 return TkEnd == End ? nullptr : TkEnd; 79 } 80 81 void increment() noexcept { 82 const PosPtr End = getAfterBack(); 83 const PosPtr Start = getNextTokenStartPos(); 84 if (Start == End) 85 return makeState(PS_AtEnd); 86 87 switch (State) { 88 case PS_BeforeBegin: { 89 PosPtr TkEnd = consumeRootName(Start, End); 90 if (TkEnd) 91 return makeState(PS_InRootName, Start, TkEnd); 92 } 93 _LIBCPP_FALLTHROUGH(); 94 case PS_InRootName: { 95 PosPtr TkEnd = consumeAllSeparators(Start, End); 96 if (TkEnd) 97 return makeState(PS_InRootDir, Start, TkEnd); 98 else 99 return makeState(PS_InFilenames, Start, consumeName(Start, End)); 100 } 101 case PS_InRootDir: 102 return makeState(PS_InFilenames, Start, consumeName(Start, End)); 103 104 case PS_InFilenames: { 105 PosPtr SepEnd = consumeAllSeparators(Start, End); 106 if (SepEnd != End) { 107 PosPtr TkEnd = consumeName(SepEnd, End); 108 if (TkEnd) 109 return makeState(PS_InFilenames, SepEnd, TkEnd); 110 } 111 return makeState(PS_InTrailingSep, Start, SepEnd); 112 } 113 114 case PS_InTrailingSep: 115 return makeState(PS_AtEnd); 116 117 case PS_AtEnd: 118 __libcpp_unreachable(); 119 } 120 } 121 122 void decrement() noexcept { 123 const PosPtr REnd = getBeforeFront(); 124 const PosPtr RStart = getCurrentTokenStartPos() - 1; 125 if (RStart == REnd) // we're decrementing the begin 126 return makeState(PS_BeforeBegin); 127 128 switch (State) { 129 case PS_AtEnd: { 130 // Try to consume a trailing separator or root directory first. 131 if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) { 132 if (SepEnd == REnd) 133 return makeState(PS_InRootDir, Path.data(), RStart + 1); 134 PosPtr TkStart = consumeRootName(SepEnd, REnd); 135 if (TkStart == REnd) 136 return makeState(PS_InRootDir, RStart, RStart + 1); 137 return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1); 138 } else { 139 PosPtr TkStart = consumeRootName(RStart, REnd); 140 if (TkStart == REnd) 141 return makeState(PS_InRootName, TkStart + 1, RStart + 1); 142 TkStart = consumeName(RStart, REnd); 143 return makeState(PS_InFilenames, TkStart + 1, RStart + 1); 144 } 145 } 146 case PS_InTrailingSep: 147 return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1, RStart + 1); 148 case PS_InFilenames: { 149 PosPtr SepEnd = consumeAllSeparators(RStart, REnd); 150 if (SepEnd == REnd) 151 return makeState(PS_InRootDir, Path.data(), RStart + 1); 152 PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd); 153 if (TkStart == REnd) { 154 if (SepEnd) 155 return makeState(PS_InRootDir, SepEnd + 1, RStart + 1); 156 return makeState(PS_InRootName, TkStart + 1, RStart + 1); 157 } 158 TkStart = consumeName(SepEnd, REnd); 159 return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1); 160 } 161 case PS_InRootDir: 162 return makeState(PS_InRootName, Path.data(), RStart + 1); 163 case PS_InRootName: 164 case PS_BeforeBegin: 165 __libcpp_unreachable(); 166 } 167 } 168 169 /// \brief Return a view with the "preferred representation" of the current 170 /// element. For example trailing separators are represented as a '.' 171 string_view_t operator*() const noexcept { 172 switch (State) { 173 case PS_BeforeBegin: 174 case PS_AtEnd: 175 return PATHSTR(""); 176 case PS_InRootDir: 177 if (RawEntry[0] == '\\') 178 return PATHSTR("\\"); 179 else 180 return PATHSTR("/"); 181 case PS_InTrailingSep: 182 return PATHSTR(""); 183 case PS_InRootName: 184 case PS_InFilenames: 185 return RawEntry; 186 } 187 __libcpp_unreachable(); 188 } 189 190 explicit operator bool() const noexcept { return State != PS_BeforeBegin && State != PS_AtEnd; } 191 192 PathParser& operator++() noexcept { 193 increment(); 194 return *this; 195 } 196 197 PathParser& operator--() noexcept { 198 decrement(); 199 return *this; 200 } 201 202 bool atEnd() const noexcept { return State == PS_AtEnd; } 203 204 bool inRootDir() const noexcept { return State == PS_InRootDir; } 205 206 bool inRootName() const noexcept { return State == PS_InRootName; } 207 208 bool inRootPath() const noexcept { return inRootName() || inRootDir(); } 209 210 private: 211 void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept { 212 State = NewState; 213 RawEntry = string_view_t(Start, End - Start); 214 } 215 void makeState(ParserState NewState) noexcept { 216 State = NewState; 217 RawEntry = {}; 218 } 219 220 PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); } 221 222 PosPtr getBeforeFront() const noexcept { return Path.data() - 1; } 223 224 /// \brief Return a pointer to the first character after the currently 225 /// lexed element. 226 PosPtr getNextTokenStartPos() const noexcept { 227 switch (State) { 228 case PS_BeforeBegin: 229 return Path.data(); 230 case PS_InRootName: 231 case PS_InRootDir: 232 case PS_InFilenames: 233 return &RawEntry.back() + 1; 234 case PS_InTrailingSep: 235 case PS_AtEnd: 236 return getAfterBack(); 237 } 238 __libcpp_unreachable(); 239 } 240 241 /// \brief Return a pointer to the first character in the currently lexed 242 /// element. 243 PosPtr getCurrentTokenStartPos() const noexcept { 244 switch (State) { 245 case PS_BeforeBegin: 246 case PS_InRootName: 247 return &Path.front(); 248 case PS_InRootDir: 249 case PS_InFilenames: 250 case PS_InTrailingSep: 251 return &RawEntry.front(); 252 case PS_AtEnd: 253 return &Path.back() + 1; 254 } 255 __libcpp_unreachable(); 256 } 257 258 // Consume all consecutive separators. 259 PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept { 260 if (P == nullptr || P == End || !isSeparator(*P)) 261 return nullptr; 262 const int Inc = P < End ? 1 : -1; 263 P += Inc; 264 while (P != End && isSeparator(*P)) 265 P += Inc; 266 return P; 267 } 268 269 // Consume exactly N separators, or return nullptr. 270 PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept { 271 PosPtr Ret = consumeAllSeparators(P, End); 272 if (Ret == nullptr) 273 return nullptr; 274 if (P < End) { 275 if (Ret == P + N) 276 return Ret; 277 } else { 278 if (Ret == P - N) 279 return Ret; 280 } 281 return nullptr; 282 } 283 284 PosPtr consumeName(PosPtr P, PosPtr End) const noexcept { 285 PosPtr Start = P; 286 if (P == nullptr || P == End || isSeparator(*P)) 287 return nullptr; 288 const int Inc = P < End ? 1 : -1; 289 P += Inc; 290 while (P != End && !isSeparator(*P)) 291 P += Inc; 292 if (P == End && Inc < 0) { 293 // Iterating backwards and consumed all the rest of the input. 294 // Check if the start of the string would have been considered 295 // a root name. 296 PosPtr RootEnd = consumeRootName(End + 1, Start); 297 if (RootEnd) 298 return RootEnd - 1; 299 } 300 return P; 301 } 302 303 PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept { 304 if (P == End) 305 return nullptr; 306 if (P < End) { 307 if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':') 308 return nullptr; 309 return P + 2; 310 } else { 311 if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':') 312 return nullptr; 313 return P - 2; 314 } 315 } 316 317 PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept { 318 if (P == End) 319 return nullptr; 320 if (P < End) 321 return consumeName(consumeNSeparators(P, End, 2), End); 322 else 323 return consumeNSeparators(consumeName(P, End), End, 2); 324 } 325 326 PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept { 327 #if defined(_LIBCPP_WIN32API) 328 if (PosPtr Ret = consumeDriveLetter(P, End)) 329 return Ret; 330 if (PosPtr Ret = consumeNetworkRoot(P, End)) 331 return Ret; 332 #endif 333 return nullptr; 334 } 335 }; 336 337 inline string_view_pair separate_filename(string_view_t const& s) { 338 if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty()) 339 return string_view_pair{s, PATHSTR("")}; 340 auto pos = s.find_last_of('.'); 341 if (pos == string_view_t::npos || pos == 0) 342 return string_view_pair{s, string_view_t{}}; 343 return string_view_pair{s.substr(0, pos), s.substr(pos)}; 344 } 345 346 inline string_view_t createView(PosPtr S, PosPtr E) noexcept { return {S, static_cast<size_t>(E - S) + 1}; } 347 348 } // namespace parser 349 350 _LIBCPP_END_NAMESPACE_FILESYSTEM 351 352 #endif // PATH_PARSER_H 353