1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef PATH_PARSER_H 10 #define PATH_PARSER_H 11 12 #include <__config> 13 #include <__utility/unreachable.h> 14 #include <cstddef> 15 #include <filesystem> 16 #include <utility> 17 18 #include "format_string.h" 19 20 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM 21 22 inline bool isSeparator(path::value_type C) { 23 if (C == '/') 24 return true; 25 #if defined(_LIBCPP_WIN32API) 26 if (C == '\\') 27 return true; 28 #endif 29 return false; 30 } 31 32 inline bool isDriveLetter(path::value_type C) { 33 return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'); 34 } 35 36 namespace parser { 37 38 using string_view_t = path::__string_view; 39 using string_view_pair = pair<string_view_t, string_view_t>; 40 using PosPtr = path::value_type const*; 41 42 struct PathParser { 43 enum ParserState : unsigned char { 44 // Zero is a special sentinel value used by default constructed iterators. 45 PS_BeforeBegin = path::iterator::_BeforeBegin, 46 PS_InRootName = path::iterator::_InRootName, 47 PS_InRootDir = path::iterator::_InRootDir, 48 PS_InFilenames = path::iterator::_InFilenames, 49 PS_InTrailingSep = path::iterator::_InTrailingSep, 50 PS_AtEnd = path::iterator::_AtEnd 51 }; 52 53 const string_view_t Path; 54 string_view_t RawEntry; 55 ParserState State; 56 57 private: 58 PathParser(string_view_t P, ParserState State) noexcept : Path(P), 59 State(State) {} 60 61 public: 62 PathParser(string_view_t P, string_view_t E, unsigned char S) 63 : Path(P), RawEntry(E), State(static_cast<ParserState>(S)) { 64 // S cannot be '0' or PS_BeforeBegin. 65 } 66 67 static PathParser CreateBegin(string_view_t P) noexcept { 68 PathParser PP(P, PS_BeforeBegin); 69 PP.increment(); 70 return PP; 71 } 72 73 static PathParser CreateEnd(string_view_t P) noexcept { 74 PathParser PP(P, PS_AtEnd); 75 return PP; 76 } 77 78 PosPtr peek() const noexcept { 79 auto TkEnd = getNextTokenStartPos(); 80 auto End = getAfterBack(); 81 return TkEnd == End ? nullptr : TkEnd; 82 } 83 84 void increment() noexcept { 85 const PosPtr End = getAfterBack(); 86 const PosPtr Start = getNextTokenStartPos(); 87 if (Start == End) 88 return makeState(PS_AtEnd); 89 90 switch (State) { 91 case PS_BeforeBegin: { 92 PosPtr TkEnd = consumeRootName(Start, End); 93 if (TkEnd) 94 return makeState(PS_InRootName, Start, TkEnd); 95 } 96 _LIBCPP_FALLTHROUGH(); 97 case PS_InRootName: { 98 PosPtr TkEnd = consumeAllSeparators(Start, End); 99 if (TkEnd) 100 return makeState(PS_InRootDir, Start, TkEnd); 101 else 102 return makeState(PS_InFilenames, Start, consumeName(Start, End)); 103 } 104 case PS_InRootDir: 105 return makeState(PS_InFilenames, Start, consumeName(Start, End)); 106 107 case PS_InFilenames: { 108 PosPtr SepEnd = consumeAllSeparators(Start, End); 109 if (SepEnd != End) { 110 PosPtr TkEnd = consumeName(SepEnd, End); 111 if (TkEnd) 112 return makeState(PS_InFilenames, SepEnd, TkEnd); 113 } 114 return makeState(PS_InTrailingSep, Start, SepEnd); 115 } 116 117 case PS_InTrailingSep: 118 return makeState(PS_AtEnd); 119 120 case PS_AtEnd: 121 __libcpp_unreachable(); 122 } 123 } 124 125 void decrement() noexcept { 126 const PosPtr REnd = getBeforeFront(); 127 const PosPtr RStart = getCurrentTokenStartPos() - 1; 128 if (RStart == REnd) // we're decrementing the begin 129 return makeState(PS_BeforeBegin); 130 131 switch (State) { 132 case PS_AtEnd: { 133 // Try to consume a trailing separator or root directory first. 134 if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) { 135 if (SepEnd == REnd) 136 return makeState(PS_InRootDir, Path.data(), RStart + 1); 137 PosPtr TkStart = consumeRootName(SepEnd, REnd); 138 if (TkStart == REnd) 139 return makeState(PS_InRootDir, RStart, RStart + 1); 140 return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1); 141 } else { 142 PosPtr TkStart = consumeRootName(RStart, REnd); 143 if (TkStart == REnd) 144 return makeState(PS_InRootName, TkStart + 1, RStart + 1); 145 TkStart = consumeName(RStart, REnd); 146 return makeState(PS_InFilenames, TkStart + 1, RStart + 1); 147 } 148 } 149 case PS_InTrailingSep: 150 return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1, 151 RStart + 1); 152 case PS_InFilenames: { 153 PosPtr SepEnd = consumeAllSeparators(RStart, REnd); 154 if (SepEnd == REnd) 155 return makeState(PS_InRootDir, Path.data(), RStart + 1); 156 PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd); 157 if (TkStart == REnd) { 158 if (SepEnd) 159 return makeState(PS_InRootDir, SepEnd + 1, RStart + 1); 160 return makeState(PS_InRootName, TkStart + 1, RStart + 1); 161 } 162 TkStart = consumeName(SepEnd, REnd); 163 return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1); 164 } 165 case PS_InRootDir: 166 return makeState(PS_InRootName, Path.data(), RStart + 1); 167 case PS_InRootName: 168 case PS_BeforeBegin: 169 __libcpp_unreachable(); 170 } 171 } 172 173 /// \brief Return a view with the "preferred representation" of the current 174 /// element. For example trailing separators are represented as a '.' 175 string_view_t operator*() const noexcept { 176 switch (State) { 177 case PS_BeforeBegin: 178 case PS_AtEnd: 179 return PATHSTR(""); 180 case PS_InRootDir: 181 if (RawEntry[0] == '\\') 182 return PATHSTR("\\"); 183 else 184 return PATHSTR("/"); 185 case PS_InTrailingSep: 186 return PATHSTR(""); 187 case PS_InRootName: 188 case PS_InFilenames: 189 return RawEntry; 190 } 191 __libcpp_unreachable(); 192 } 193 194 explicit operator bool() const noexcept { 195 return State != PS_BeforeBegin && State != PS_AtEnd; 196 } 197 198 PathParser& operator++() noexcept { 199 increment(); 200 return *this; 201 } 202 203 PathParser& operator--() noexcept { 204 decrement(); 205 return *this; 206 } 207 208 bool atEnd() const noexcept { 209 return State == PS_AtEnd; 210 } 211 212 bool inRootDir() const noexcept { 213 return State == PS_InRootDir; 214 } 215 216 bool inRootName() const noexcept { 217 return State == PS_InRootName; 218 } 219 220 bool inRootPath() const noexcept { 221 return inRootName() || inRootDir(); 222 } 223 224 private: 225 void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept { 226 State = NewState; 227 RawEntry = string_view_t(Start, End - Start); 228 } 229 void makeState(ParserState NewState) noexcept { 230 State = NewState; 231 RawEntry = {}; 232 } 233 234 PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); } 235 236 PosPtr getBeforeFront() const noexcept { return Path.data() - 1; } 237 238 /// \brief Return a pointer to the first character after the currently 239 /// lexed element. 240 PosPtr getNextTokenStartPos() const noexcept { 241 switch (State) { 242 case PS_BeforeBegin: 243 return Path.data(); 244 case PS_InRootName: 245 case PS_InRootDir: 246 case PS_InFilenames: 247 return &RawEntry.back() + 1; 248 case PS_InTrailingSep: 249 case PS_AtEnd: 250 return getAfterBack(); 251 } 252 __libcpp_unreachable(); 253 } 254 255 /// \brief Return a pointer to the first character in the currently lexed 256 /// element. 257 PosPtr getCurrentTokenStartPos() const noexcept { 258 switch (State) { 259 case PS_BeforeBegin: 260 case PS_InRootName: 261 return &Path.front(); 262 case PS_InRootDir: 263 case PS_InFilenames: 264 case PS_InTrailingSep: 265 return &RawEntry.front(); 266 case PS_AtEnd: 267 return &Path.back() + 1; 268 } 269 __libcpp_unreachable(); 270 } 271 272 // Consume all consecutive separators. 273 PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept { 274 if (P == nullptr || P == End || !isSeparator(*P)) 275 return nullptr; 276 const int Inc = P < End ? 1 : -1; 277 P += Inc; 278 while (P != End && isSeparator(*P)) 279 P += Inc; 280 return P; 281 } 282 283 // Consume exactly N separators, or return nullptr. 284 PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept { 285 PosPtr Ret = consumeAllSeparators(P, End); 286 if (Ret == nullptr) 287 return nullptr; 288 if (P < End) { 289 if (Ret == P + N) 290 return Ret; 291 } else { 292 if (Ret == P - N) 293 return Ret; 294 } 295 return nullptr; 296 } 297 298 PosPtr consumeName(PosPtr P, PosPtr End) const noexcept { 299 PosPtr Start = P; 300 if (P == nullptr || P == End || isSeparator(*P)) 301 return nullptr; 302 const int Inc = P < End ? 1 : -1; 303 P += Inc; 304 while (P != End && !isSeparator(*P)) 305 P += Inc; 306 if (P == End && Inc < 0) { 307 // Iterating backwards and consumed all the rest of the input. 308 // Check if the start of the string would have been considered 309 // a root name. 310 PosPtr RootEnd = consumeRootName(End + 1, Start); 311 if (RootEnd) 312 return RootEnd - 1; 313 } 314 return P; 315 } 316 317 PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept { 318 if (P == End) 319 return nullptr; 320 if (P < End) { 321 if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':') 322 return nullptr; 323 return P + 2; 324 } else { 325 if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':') 326 return nullptr; 327 return P - 2; 328 } 329 } 330 331 PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept { 332 if (P == End) 333 return nullptr; 334 if (P < End) 335 return consumeName(consumeNSeparators(P, End, 2), End); 336 else 337 return consumeNSeparators(consumeName(P, End), End, 2); 338 } 339 340 PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept { 341 #if defined(_LIBCPP_WIN32API) 342 if (PosPtr Ret = consumeDriveLetter(P, End)) 343 return Ret; 344 if (PosPtr Ret = consumeNetworkRoot(P, End)) 345 return Ret; 346 #endif 347 return nullptr; 348 } 349 }; 350 351 inline string_view_pair separate_filename(string_view_t const& s) { 352 if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty()) 353 return string_view_pair{s, PATHSTR("")}; 354 auto pos = s.find_last_of('.'); 355 if (pos == string_view_t::npos || pos == 0) 356 return string_view_pair{s, string_view_t{}}; 357 return string_view_pair{s.substr(0, pos), s.substr(pos)}; 358 } 359 360 inline string_view_t createView(PosPtr S, PosPtr E) noexcept { 361 return {S, static_cast<size_t>(E - S) + 1}; 362 } 363 364 } // namespace parser 365 366 _LIBCPP_END_NAMESPACE_FILESYSTEM 367 368 #endif // PATH_PARSER_H 369