1//===-- HashTable BitMasks Generic Implementation ---------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "src/__support/CPP/bit.h" 10#include "src/__support/common.h" 11#include "src/__support/endian_internal.h" 12#include "src/__support/macros/config.h" 13 14namespace LIBC_NAMESPACE_DECL { 15namespace internal { 16 17// GPU architectures are 64-bit but use 32-bit general purpose registers. 18#ifdef LIBC_TARGET_ARCH_IS_GPU 19using bitmask_t = uint32_t; 20#else 21using bitmask_t = uintptr_t; 22#endif 23 24// Helper function to spread a byte across the whole word. 25// Accumutively, the procedure looks like: 26// byte = 0x00000000000000ff 27// byte | (byte << 8) = 0x000000000000ffff 28// byte | (byte << 16) = 0x00000000ffffffff 29// byte | (byte << 32) = 0xffffffffffffffff 30LIBC_INLINE constexpr bitmask_t repeat_byte(bitmask_t byte) { 31 size_t shift_amount = 8; 32 while (shift_amount < sizeof(bitmask_t) * 8) { 33 byte |= byte << shift_amount; 34 shift_amount <<= 1; 35 } 36 return byte; 37} 38 39using BitMask = BitMaskAdaptor<bitmask_t, 0x8ul>; 40using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>; 41 42struct Group { 43 LIBC_INLINE_VAR static constexpr bitmask_t MASK = repeat_byte(0x80ul); 44 bitmask_t data; 45 46 // Load a group of control words from an arbitary address. 47 LIBC_INLINE static Group load(const void *addr) { 48 char bytes[sizeof(bitmask_t)]; 49 50 for (size_t i = 0; i < sizeof(bitmask_t); ++i) 51 bytes[i] = static_cast<const char *>(addr)[i]; 52 return Group{cpp::bit_cast<bitmask_t>(bytes)}; 53 } 54 55 // Load a group of control words from an aligned address. 56 LIBC_INLINE static Group load_aligned(const void *addr) { 57 return *static_cast<const Group *>(addr); 58 } 59 60 // Find out the lanes equal to the given byte and return the bitmask 61 // with corresponding bits set. 62 LIBC_INLINE IteratableBitMask match_byte(uint8_t byte) const { 63 // Given byte = 0x10, suppose the data is: 64 // 65 // data = [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ] 66 // 67 // First, we compare the byte using XOR operation: 68 // 69 // [ 0x10 | 0x10 | 0x10 | 0x10 | ... ] (0) 70 // ^ [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ] (1) 71 // = [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2) 72 // 73 // Notice that the equal positions will now be 0x00, so if we substract 0x01 74 // respective to every byte, it will need to carry the substraction to upper 75 // bits (assume no carry from the hidden parts) 76 // [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2) 77 // - [ 0x01 | 0x01 | 0x01 | 0x01 | ... ] (3) 78 // = [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4) 79 // 80 // But there may be some bytes whose highest bit is already set after the 81 // xor operation. To rule out these positions, we AND them with the NOT 82 // of the XOR result: 83 // 84 // [ 0xFF | 0xFF | 0xEF | 0x1E | ... ] (5, NOT (2)) 85 // & [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4) 86 // = [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6) 87 // 88 // To make the bitmask iteratable, only one bit can be set in each stride. 89 // So we AND each byte with 0x80 and keep only the highest bit: 90 // 91 // [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6) 92 // & [ 0x80 | 0x80 | 0x80 | 0x80 | ... ] (7) 93 // = [ 0x80 | 0x80 | 0x00 | 0x00 | ... ] (8) 94 // 95 // However, there are possitbilites for false positives. For example, if the 96 // data is [ 0x10 | 0x11 | 0x10 | 0xF1 | ... ]. This only happens when there 97 // is a key only differs from the searched by the lowest bit. The claims 98 // are: 99 // 100 // - This never happens for `EMPTY` and `DELETED`, only full entries. 101 // - The check for key equality will catch these. 102 // - This only happens if there is at least 1 true match. 103 // - The chance of this happening is very low (< 1% chance per byte). 104 static constexpr bitmask_t ONES = repeat_byte(0x01ul); 105 auto cmp = data ^ repeat_byte(static_cast<bitmask_t>(byte) & 0xFFul); 106 auto result = 107 LIBC_NAMESPACE::Endian::to_little_endian((cmp - ONES) & ~cmp & MASK); 108 return {BitMask{result}}; 109 } 110 111 // Find out the lanes equal to EMPTY or DELETE (highest bit set) and 112 // return the bitmask with corresponding bits set. 113 LIBC_INLINE BitMask mask_available() const { 114 bitmask_t le_data = LIBC_NAMESPACE::Endian::to_little_endian(data); 115 return {le_data & MASK}; 116 } 117 118 LIBC_INLINE IteratableBitMask occupied() const { 119 bitmask_t available = mask_available().word; 120 return {BitMask{available ^ MASK}}; 121 } 122}; 123} // namespace internal 124} // namespace LIBC_NAMESPACE_DECL 125