xref: /freebsd/contrib/llvm-project/libcxx/include/__functional/boyer_moore_searcher.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
10 #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
11 
12 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
13 #  pragma GCC system_header
14 #endif
15 
16 #include <__algorithm/fill_n.h>
17 #include <__config>
18 #include <__functional/hash.h>
19 #include <__functional/operations.h>
20 #include <__iterator/iterator_traits.h>
21 #include <__memory/shared_ptr.h>
22 #include <__type_traits/make_unsigned.h>
23 #include <__utility/pair.h>
24 #include <array>
25 #include <limits>
26 #include <unordered_map>
27 
28 #if _LIBCPP_STD_VER >= 17
29 
30 _LIBCPP_PUSH_MACROS
31 #  include <__undef_macros>
32 
33 _LIBCPP_BEGIN_NAMESPACE_STD
34 
35 template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/>
36 class _BMSkipTable;
37 
38 // General case for BM data searching; use a map
39 template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
40 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> {
41 private:
42   using value_type = _Value;
43   using key_type   = _Key;
44 
45   const value_type __default_value_;
46   unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_;
47 
48 public:
_BMSkipTable(size_t __sz,value_type __default_value,_Hash __hash,_BinaryPredicate __pred)49   _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(
50       size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred)
51       : __default_value_(__default_value), __table_(__sz, __hash, __pred) {}
52 
insert(const key_type & __key,value_type __val)53   _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; }
54 
55   _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const {
56     auto __it = __table_.find(__key);
57     return __it == __table_.end() ? __default_value_ : __it->second;
58   }
59 };
60 
61 // Special case small numeric values; use an array
62 template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
63 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> {
64 private:
65   using value_type = _Value;
66   using key_type   = _Key;
67 
68   using unsigned_key_type = make_unsigned_t<key_type>;
69   std::array<value_type, 256> __table_;
70   static_assert(numeric_limits<unsigned_key_type>::max() < 256);
71 
72 public:
_BMSkipTable(size_t,value_type __default_value,_Hash,_BinaryPredicate)73   _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) {
74     std::fill_n(__table_.data(), __table_.size(), __default_value);
75   }
76 
insert(key_type __key,value_type __val)77   _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) {
78     __table_[static_cast<unsigned_key_type>(__key)] = __val;
79   }
80 
81   _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const {
82     return __table_[static_cast<unsigned_key_type>(__key)];
83   }
84 };
85 
86 template <class _RandomAccessIterator1,
87           class _Hash            = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
88           class _BinaryPredicate = equal_to<>>
89 class boyer_moore_searcher {
90 private:
91   using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type;
92   using value_type      = typename std::iterator_traits<_RandomAccessIterator1>::value_type;
93   using __skip_table_type _LIBCPP_NODEBUG =
94       _BMSkipTable<value_type,
95                    difference_type,
96                    _Hash,
97                    _BinaryPredicate,
98                    is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
99                        is_same_v<_BinaryPredicate, equal_to<>>>;
100 
101 public:
102   _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher(
103       _RandomAccessIterator1 __first,
104       _RandomAccessIterator1 __last,
105       _Hash __hash            = _Hash(),
106       _BinaryPredicate __pred = _BinaryPredicate())
__first_(__first)107       : __first_(__first),
108         __last_(__last),
109         __pred_(__pred),
110         __pattern_length_(__last - __first),
111         __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)),
112         __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>(
113             allocator<difference_type>(), __pattern_length_ + 1)) {
114     difference_type __i = 0;
115     while (__first != __last) {
116       __skip_table_->insert(*__first, __i);
117       ++__first;
118       ++__i;
119     }
120     __build_suffix_table(__first_, __last_, __pred_);
121   }
122 
123   template <class _RandomAccessIterator2>
124   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
operator()125   operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
126     static_assert(is_same_v<__remove_cvref_t<typename iterator_traits<_RandomAccessIterator1>::value_type>,
127                             __remove_cvref_t<typename iterator_traits<_RandomAccessIterator2>::value_type>>,
128                   "Corpus and Pattern iterators must point to the same type");
129     if (__first == __last)
130       return std::make_pair(__last, __last);
131     if (__first_ == __last_)
132       return std::make_pair(__first, __first);
133 
134     if (__pattern_length_ > (__last - __first))
135       return std::make_pair(__last, __last);
136     return __search(__first, __last);
137   }
138 
139 private:
140   _RandomAccessIterator1 __first_;
141   _RandomAccessIterator1 __last_;
142   _BinaryPredicate __pred_;
143   difference_type __pattern_length_;
144   shared_ptr<__skip_table_type> __skip_table_;
145   shared_ptr<difference_type[]> __suffix_;
146 
147   template <class _RandomAccessIterator2>
148   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
__search(_RandomAccessIterator2 __f,_RandomAccessIterator2 __l)149   __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
150     _RandomAccessIterator2 __current      = __f;
151     const _RandomAccessIterator2 __last   = __l - __pattern_length_;
152     const __skip_table_type& __skip_table = *__skip_table_;
153 
154     while (__current <= __last) {
155       difference_type __j = __pattern_length_;
156       while (__pred_(__first_[__j - 1], __current[__j - 1])) {
157         --__j;
158         if (__j == 0)
159           return std::make_pair(__current, __current + __pattern_length_);
160       }
161 
162       difference_type __k = __skip_table[__current[__j - 1]];
163       difference_type __m = __j - __k - 1;
164       if (__k < __j && __m > __suffix_[__j])
165         __current += __m;
166       else
167         __current += __suffix_[__j];
168     }
169     return std::make_pair(__l, __l);
170   }
171 
172   template <class _Iterator, class _Container>
173   _LIBCPP_HIDE_FROM_ABI void
__compute_bm_prefix(_Iterator __first,_Iterator __last,_BinaryPredicate __pred,_Container & __prefix)174   __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) {
175     const size_t __count = __last - __first;
176 
177     __prefix[0] = 0;
178     size_t __k  = 0;
179 
180     for (size_t __i = 1; __i != __count; ++__i) {
181       while (__k > 0 && !__pred(__first[__k], __first[__i]))
182         __k = __prefix[__k - 1];
183 
184       if (__pred(__first[__k], __first[__i]))
185         ++__k;
186       __prefix[__i] = __k;
187     }
188   }
189 
190   _LIBCPP_HIDE_FROM_ABI void
__build_suffix_table(_RandomAccessIterator1 __first,_RandomAccessIterator1 __last,_BinaryPredicate __pred)191   __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) {
192     const size_t __count = __last - __first;
193 
194     if (__count == 0)
195       return;
196 
197     auto __scratch = std::make_unique<difference_type[]>(__count);
198 
199     __compute_bm_prefix(__first, __last, __pred, __scratch);
200     for (size_t __i = 0; __i <= __count; ++__i)
201       __suffix_[__i] = __count - __scratch[__count - 1];
202 
203     using _ReverseIter = reverse_iterator<_RandomAccessIterator1>;
204     __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch);
205 
206     for (size_t __i = 0; __i != __count; ++__i) {
207       const size_t __j          = __count - __scratch[__i];
208       const difference_type __k = __i - __scratch[__i] + 1;
209 
210       if (__suffix_[__j] > __k)
211         __suffix_[__j] = __k;
212     }
213   }
214 };
215 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher);
216 
217 template <class _RandomAccessIterator1,
218           class _Hash            = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
219           class _BinaryPredicate = equal_to<>>
220 class boyer_moore_horspool_searcher {
221 private:
222   using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type;
223   using value_type      = typename iterator_traits<_RandomAccessIterator1>::value_type;
224   using __skip_table_type _LIBCPP_NODEBUG =
225       _BMSkipTable<value_type,
226                    difference_type,
227                    _Hash,
228                    _BinaryPredicate,
229                    is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
230                        is_same_v<_BinaryPredicate, equal_to<>>>;
231 
232 public:
233   _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher(
234       _RandomAccessIterator1 __first,
235       _RandomAccessIterator1 __last,
236       _Hash __hash            = _Hash(),
237       _BinaryPredicate __pred = _BinaryPredicate())
__first_(__first)238       : __first_(__first),
239         __last_(__last),
240         __pred_(__pred),
241         __pattern_length_(__last - __first),
242         __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) {
243     if (__first == __last)
244       return;
245     --__last;
246     difference_type __i = 0;
247     while (__first != __last) {
248       __skip_table_->insert(*__first, __pattern_length_ - 1 - __i);
249       ++__first;
250       ++__i;
251     }
252   }
253 
254   template <class _RandomAccessIterator2>
255   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
operator()256   operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
257     static_assert(is_same_v<__remove_cvref_t<typename std::iterator_traits<_RandomAccessIterator1>::value_type>,
258                             __remove_cvref_t<typename std::iterator_traits<_RandomAccessIterator2>::value_type>>,
259                   "Corpus and Pattern iterators must point to the same type");
260     if (__first == __last)
261       return std::make_pair(__last, __last);
262     if (__first_ == __last_)
263       return std::make_pair(__first, __first);
264 
265     if (__pattern_length_ > __last - __first)
266       return std::make_pair(__last, __last);
267 
268     return __search(__first, __last);
269   }
270 
271 private:
272   _RandomAccessIterator1 __first_;
273   _RandomAccessIterator1 __last_;
274   _BinaryPredicate __pred_;
275   difference_type __pattern_length_;
276   shared_ptr<__skip_table_type> __skip_table_;
277 
278   template <class _RandomAccessIterator2>
279   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
__search(_RandomAccessIterator2 __f,_RandomAccessIterator2 __l)280   __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
281     _RandomAccessIterator2 __current      = __f;
282     const _RandomAccessIterator2 __last   = __l - __pattern_length_;
283     const __skip_table_type& __skip_table = *__skip_table_;
284 
285     while (__current <= __last) {
286       difference_type __j = __pattern_length_;
287       while (__pred_(__first_[__j - 1], __current[__j - 1])) {
288         --__j;
289         if (__j == 0)
290           return std::make_pair(__current, __current + __pattern_length_);
291       }
292       __current += __skip_table[__current[__pattern_length_ - 1]];
293     }
294     return std::make_pair(__l, __l);
295   }
296 };
297 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher);
298 
299 _LIBCPP_END_NAMESPACE_STD
300 
301 _LIBCPP_POP_MACROS
302 
303 #endif // _LIBCPP_STD_VER >= 17
304 
305 #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
306