xref: /freebsd/contrib/llvm-project/libcxx/include/__locale_dir/scan_keyword.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___LOCALE_DIR_SCAN_KEYWORD_H
10 #define _LIBCPP___LOCALE_DIR_SCAN_KEYWORD_H
11 
12 #include <__config>
13 #include <__memory/unique_ptr.h>
14 #include <ios>
15 
16 #if _LIBCPP_HAS_LOCALIZATION
17 
18 #  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
19 #    pragma GCC system_header
20 #  endif
21 
22 _LIBCPP_BEGIN_NAMESPACE_STD
23 
24 // __scan_keyword
25 // Scans [__b, __e) until a match is found in the basic_strings range
26 //  [__kb, __ke) or until it can be shown that there is no match in [__kb, __ke).
27 //  __b will be incremented (visibly), consuming CharT until a match is found
28 //  or proved to not exist.  A keyword may be "", in which will match anything.
29 //  If one keyword is a prefix of another, and the next CharT in the input
30 //  might match another keyword, the algorithm will attempt to find the longest
31 //  matching keyword.  If the longer matching keyword ends up not matching, then
32 //  no keyword match is found.  If no keyword match is found, __ke is returned
33 //  and failbit is set in __err.
34 //  Else an iterator pointing to the matching keyword is found.  If more than
35 //  one keyword matches, an iterator to the first matching keyword is returned.
36 //  If on exit __b == __e, eofbit is set in __err.  If __case_sensitive is false,
37 //  __ct is used to force to lower case before comparing characters.
38 //  Examples:
39 //  Keywords:  "a", "abb"
40 //  If the input is "a", the first keyword matches and eofbit is set.
41 //  If the input is "abc", no match is found and "ab" are consumed.
42 template <class _InputIterator, class _ForwardIterator, class _Ctype>
43 _LIBCPP_HIDE_FROM_ABI _ForwardIterator __scan_keyword(
44     _InputIterator& __b,
45     _InputIterator __e,
46     _ForwardIterator __kb,
47     _ForwardIterator __ke,
48     const _Ctype& __ct,
49     ios_base::iostate& __err,
50     bool __case_sensitive = true) {
51   typedef typename iterator_traits<_InputIterator>::value_type _CharT;
52   size_t __nkw                       = static_cast<size_t>(std::distance(__kb, __ke));
53   const unsigned char __doesnt_match = '\0';
54   const unsigned char __might_match  = '\1';
55   const unsigned char __does_match   = '\2';
56   unsigned char __statbuf[100];
57   unsigned char* __status = __statbuf;
58   unique_ptr<unsigned char, void (*)(void*)> __stat_hold(nullptr, free);
59   if (__nkw > sizeof(__statbuf)) {
60     __status = (unsigned char*)malloc(__nkw);
61     if (__status == nullptr)
62       std::__throw_bad_alloc();
63     __stat_hold.reset(__status);
64   }
65   size_t __n_might_match = __nkw; // At this point, any keyword might match
66   size_t __n_does_match  = 0;     // but none of them definitely do
67   // Initialize all statuses to __might_match, except for "" keywords are __does_match
68   unsigned char* __st = __status;
69   for (_ForwardIterator __ky = __kb; __ky != __ke; ++__ky, (void)++__st) {
70     if (!__ky->empty())
71       *__st = __might_match;
72     else {
73       *__st = __does_match;
74       --__n_might_match;
75       ++__n_does_match;
76     }
77   }
78   // While there might be a match, test keywords against the next CharT
79   for (size_t __indx = 0; __b != __e && __n_might_match > 0; ++__indx) {
80     // Peek at the next CharT but don't consume it
81     _CharT __c = *__b;
82     if (!__case_sensitive)
83       __c = __ct.toupper(__c);
84     bool __consume = false;
85     // For each keyword which might match, see if the __indx character is __c
86     // If a match if found, consume __c
87     // If a match is found, and that is the last character in the keyword,
88     //    then that keyword matches.
89     // If the keyword doesn't match this character, then change the keyword
90     //    to doesn't match
91     __st = __status;
92     for (_ForwardIterator __ky = __kb; __ky != __ke; ++__ky, (void)++__st) {
93       if (*__st == __might_match) {
94         _CharT __kc = (*__ky)[__indx];
95         if (!__case_sensitive)
96           __kc = __ct.toupper(__kc);
97         if (__c == __kc) {
98           __consume = true;
99           if (__ky->size() == __indx + 1) {
100             *__st = __does_match;
101             --__n_might_match;
102             ++__n_does_match;
103           }
104         } else {
105           *__st = __doesnt_match;
106           --__n_might_match;
107         }
108       }
109     }
110     // consume if we matched a character
111     if (__consume) {
112       ++__b;
113       // If we consumed a character and there might be a matched keyword that
114       //   was marked matched on a previous iteration, then such keywords
115       //   which are now marked as not matching.
116       if (__n_might_match + __n_does_match > 1) {
117         __st = __status;
118         for (_ForwardIterator __ky = __kb; __ky != __ke; ++__ky, (void)++__st) {
119           if (*__st == __does_match && __ky->size() != __indx + 1) {
120             *__st = __doesnt_match;
121             --__n_does_match;
122           }
123         }
124       }
125     }
126   }
127   // We've exited the loop because we hit eof and/or we have no more "might matches".
128   if (__b == __e)
129     __err |= ios_base::eofbit;
130   // Return the first matching result
131   for (__st = __status; __kb != __ke; ++__kb, (void)++__st)
132     if (*__st == __does_match)
133       break;
134   if (__kb == __ke)
135     __err |= ios_base::failbit;
136   return __kb;
137 }
138 
139 _LIBCPP_END_NAMESPACE_STD
140 
141 #endif // _LIBCPP_HAS_LOCALIZATION
142 
143 #endif // _LIBCPP___LOCALE_DIR_SCAN_KEYWORD_H
144