xref: /freebsd/contrib/llvm-project/libc/src/__support/HashTable/generic/bitmask_impl.inc (revision bb722a7d0f1642bff6487f943ad0427799a6e5bf)
1//===-- HashTable BitMasks Generic Implementation ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "src/__support/CPP/bit.h"
10#include "src/__support/common.h"
11#include "src/__support/endian_internal.h"
12#include "src/__support/macros/config.h"
13
14namespace LIBC_NAMESPACE_DECL {
15namespace internal {
16
17// GPU architectures are 64-bit but use 32-bit general purpose registers.
18#ifdef LIBC_TARGET_ARCH_IS_GPU
19using bitmask_t = uint32_t;
20#else
21using bitmask_t = uintptr_t;
22#endif
23
24// Helper function to spread a byte across the whole word.
25// Accumutively, the procedure looks like:
26//    byte                  = 0x00000000000000ff
27//    byte | (byte << 8)    = 0x000000000000ffff
28//    byte | (byte << 16)   = 0x00000000ffffffff
29//    byte | (byte << 32)   = 0xffffffffffffffff
30LIBC_INLINE constexpr bitmask_t repeat_byte(bitmask_t byte) {
31  size_t shift_amount = 8;
32  while (shift_amount < sizeof(bitmask_t) * 8) {
33    byte |= byte << shift_amount;
34    shift_amount <<= 1;
35  }
36  return byte;
37}
38
39using BitMask = BitMaskAdaptor<bitmask_t, 0x8ul>;
40using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>;
41
42struct Group {
43  LIBC_INLINE_VAR static constexpr bitmask_t MASK = repeat_byte(0x80ul);
44  bitmask_t data;
45
46  // Load a group of control words from an arbitary address.
47  LIBC_INLINE static Group load(const void *addr) {
48    char bytes[sizeof(bitmask_t)];
49
50    for (size_t i = 0; i < sizeof(bitmask_t); ++i)
51      bytes[i] = static_cast<const char *>(addr)[i];
52    return Group{cpp::bit_cast<bitmask_t>(bytes)};
53  }
54
55  // Load a group of control words from an aligned address.
56  LIBC_INLINE static Group load_aligned(const void *addr) {
57    return *static_cast<const Group *>(addr);
58  }
59
60  // Find out the lanes equal to the given byte and return the bitmask
61  // with corresponding bits set.
62  LIBC_INLINE IteratableBitMask match_byte(uint8_t byte) const {
63    // Given byte = 0x10, suppose the data is:
64    //
65    //       data = [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ]
66    //
67    // First, we compare the byte using XOR operation:
68    //
69    //        [ 0x10 | 0x10 | 0x10 | 0x10 | ... ]   (0)
70    //      ^ [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ]   (1)
71    //      = [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ]   (2)
72    //
73    // Notice that the equal positions will now be 0x00, so if we substract 0x01
74    // respective to every byte, it will need to carry the substraction to upper
75    // bits (assume no carry from the hidden parts)
76    //        [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ]   (2)
77    //      - [ 0x01 | 0x01 | 0x01 | 0x01 | ... ]   (3)
78    //      = [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ]   (4)
79    //
80    // But there may be some bytes whose highest bit is already set after the
81    // xor operation. To rule out these positions, we AND them with the NOT
82    // of the XOR result:
83    //
84    //        [ 0xFF | 0xFF | 0xEF | 0x1E | ... ]   (5, NOT (2))
85    //      & [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ]   (4)
86    //      = [ 0xFE | 0xFF | 0x0F | 0x10 | ... ]   (6)
87    //
88    // To make the bitmask iteratable, only one bit can be set in each stride.
89    // So we AND each byte with 0x80 and keep only the highest bit:
90    //
91    //        [ 0xFE | 0xFF | 0x0F | 0x10 | ... ]   (6)
92    //      & [ 0x80 | 0x80 | 0x80 | 0x80 | ... ]   (7)
93    //      = [ 0x80 | 0x80 | 0x00 | 0x00 | ... ]   (8)
94    //
95    // However, there are possitbilites for false positives. For example, if the
96    // data is [ 0x10 | 0x11 | 0x10 | 0xF1 | ... ]. This only happens when there
97    // is a key only differs from the searched by the lowest bit. The claims
98    // are:
99    //
100    //  - This never happens for `EMPTY` and `DELETED`, only full entries.
101    //  - The check for key equality will catch these.
102    //  - This only happens if there is at least 1 true match.
103    //  - The chance of this happening is very low (< 1% chance per byte).
104    static constexpr bitmask_t ONES = repeat_byte(0x01ul);
105    auto cmp = data ^ repeat_byte(static_cast<bitmask_t>(byte) & 0xFFul);
106    auto result =
107        LIBC_NAMESPACE::Endian::to_little_endian((cmp - ONES) & ~cmp & MASK);
108    return {BitMask{result}};
109  }
110
111  // Find out the lanes equal to EMPTY or DELETE (highest bit set) and
112  // return the bitmask with corresponding bits set.
113  LIBC_INLINE BitMask mask_available() const {
114    bitmask_t le_data = LIBC_NAMESPACE::Endian::to_little_endian(data);
115    return {le_data & MASK};
116  }
117
118  LIBC_INLINE IteratableBitMask occupied() const {
119    bitmask_t available = mask_available().word;
120    return {BitMask{available ^ MASK}};
121  }
122};
123} // namespace internal
124} // namespace LIBC_NAMESPACE_DECL
125