1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * PowerPC atomic bit operations. 4 * 5 * Merged version by David Gibson <david@gibson.dropbear.id.au>. 6 * Based on ppc64 versions by: Dave Engebretsen, Todd Inglett, Don 7 * Reed, Pat McCarthy, Peter Bergner, Anton Blanchard. They 8 * originally took it from the ppc32 code. 9 * 10 * Within a word, bits are numbered LSB first. Lot's of places make 11 * this assumption by directly testing bits with (val & (1<<nr)). 12 * This can cause confusion for large (> 1 word) bitmaps on a 13 * big-endian system because, unlike little endian, the number of each 14 * bit depends on the word size. 15 * 16 * The bitop functions are defined to work on unsigned longs, so for a 17 * ppc64 system the bits end up numbered: 18 * |63..............0|127............64|191...........128|255...........192| 19 * and on ppc32: 20 * |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224| 21 * 22 * There are a few little-endian macros used mostly for filesystem 23 * bitmaps, these work on similar bit arrays layouts, but 24 * byte-oriented: 25 * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56| 26 * 27 * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit 28 * number field needs to be reversed compared to the big-endian bit 29 * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b). 30 */ 31 32 #ifndef _ASM_POWERPC_BITOPS_H 33 #define _ASM_POWERPC_BITOPS_H 34 35 #ifdef __KERNEL__ 36 37 #ifndef _LINUX_BITOPS_H 38 #error only <linux/bitops.h> can be included directly 39 #endif 40 41 #include <linux/compiler.h> 42 #include <asm/asm-compat.h> 43 #include <asm/synch.h> 44 45 /* PPC bit number conversion */ 46 #define PPC_BITLSHIFT(be) (BITS_PER_LONG - 1 - (be)) 47 #define PPC_BIT(bit) (1UL << PPC_BITLSHIFT(bit)) 48 #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) 49 50 /* Put a PPC bit into a "normal" bit position */ 51 #define PPC_BITEXTRACT(bits, ppc_bit, dst_bit) \ 52 ((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit)) 53 54 #define PPC_BITLSHIFT32(be) (32 - 1 - (be)) 55 #define PPC_BIT32(bit) (1UL << PPC_BITLSHIFT32(bit)) 56 #define PPC_BITMASK32(bs, be) ((PPC_BIT32(bs) - PPC_BIT32(be))|PPC_BIT32(bs)) 57 58 #define PPC_BITLSHIFT8(be) (8 - 1 - (be)) 59 #define PPC_BIT8(bit) (1UL << PPC_BITLSHIFT8(bit)) 60 #define PPC_BITMASK8(bs, be) ((PPC_BIT8(bs) - PPC_BIT8(be))|PPC_BIT8(bs)) 61 62 #include <asm/barrier.h> 63 64 /* Macro for generating the ***_bits() functions */ 65 #define DEFINE_BITOP(fn, op, prefix) \ 66 static inline void fn(unsigned long mask, \ 67 volatile unsigned long *_p) \ 68 { \ 69 unsigned long old; \ 70 unsigned long *p = (unsigned long *)_p; \ 71 __asm__ __volatile__ ( \ 72 prefix \ 73 "1:" PPC_LLARX "%0,0,%3,0\n" \ 74 #op "%I2 %0,%0,%2\n" \ 75 PPC_STLCX "%0,0,%3\n" \ 76 "bne- 1b\n" \ 77 : "=&r" (old), "+m" (*p) \ 78 : "rK" (mask), "r" (p) \ 79 : "cc", "memory"); \ 80 } 81 82 DEFINE_BITOP(set_bits, or, "") 83 DEFINE_BITOP(change_bits, xor, "") 84 85 static __always_inline bool is_rlwinm_mask_valid(unsigned long x) 86 { 87 if (!x) 88 return false; 89 if (x & 1) 90 x = ~x; // make the mask non-wrapping 91 x += x & -x; // adding the low set bit results in at most one bit set 92 93 return !(x & (x - 1)); 94 } 95 96 #define DEFINE_CLROP(fn, prefix) \ 97 static inline void fn(unsigned long mask, volatile unsigned long *_p) \ 98 { \ 99 unsigned long old; \ 100 unsigned long *p = (unsigned long *)_p; \ 101 \ 102 if (IS_ENABLED(CONFIG_PPC32) && \ 103 __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) {\ 104 asm volatile ( \ 105 prefix \ 106 "1:" "lwarx %0,0,%3\n" \ 107 "rlwinm %0,%0,0,%2\n" \ 108 "stwcx. %0,0,%3\n" \ 109 "bne- 1b\n" \ 110 : "=&r" (old), "+m" (*p) \ 111 : "n" (~mask), "r" (p) \ 112 : "cc", "memory"); \ 113 } else { \ 114 asm volatile ( \ 115 prefix \ 116 "1:" PPC_LLARX "%0,0,%3,0\n" \ 117 "andc %0,%0,%2\n" \ 118 PPC_STLCX "%0,0,%3\n" \ 119 "bne- 1b\n" \ 120 : "=&r" (old), "+m" (*p) \ 121 : "r" (mask), "r" (p) \ 122 : "cc", "memory"); \ 123 } \ 124 } 125 126 DEFINE_CLROP(clear_bits, "") 127 DEFINE_CLROP(clear_bits_unlock, PPC_RELEASE_BARRIER) 128 129 static inline void arch_set_bit(int nr, volatile unsigned long *addr) 130 { 131 set_bits(BIT_MASK(nr), addr + BIT_WORD(nr)); 132 } 133 134 static inline void arch_clear_bit(int nr, volatile unsigned long *addr) 135 { 136 clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr)); 137 } 138 139 static inline void arch_clear_bit_unlock(int nr, volatile unsigned long *addr) 140 { 141 clear_bits_unlock(BIT_MASK(nr), addr + BIT_WORD(nr)); 142 } 143 144 static inline void arch_change_bit(int nr, volatile unsigned long *addr) 145 { 146 change_bits(BIT_MASK(nr), addr + BIT_WORD(nr)); 147 } 148 149 /* Like DEFINE_BITOP(), with changes to the arguments to 'op' and the output 150 * operands. */ 151 #define DEFINE_TESTOP(fn, op, prefix, postfix, eh) \ 152 static inline unsigned long fn( \ 153 unsigned long mask, \ 154 volatile unsigned long *_p) \ 155 { \ 156 unsigned long old, t; \ 157 unsigned long *p = (unsigned long *)_p; \ 158 __asm__ __volatile__ ( \ 159 prefix \ 160 "1:" PPC_LLARX "%0,0,%3,%4\n" \ 161 #op "%I2 %1,%0,%2\n" \ 162 PPC_STLCX "%1,0,%3\n" \ 163 "bne- 1b\n" \ 164 postfix \ 165 : "=&r" (old), "=&r" (t) \ 166 : "rK" (mask), "r" (p), "n" (eh) \ 167 : "cc", "memory"); \ 168 return (old & mask); \ 169 } 170 171 DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER, 172 PPC_ATOMIC_EXIT_BARRIER, 0) 173 DEFINE_TESTOP(test_and_set_bits_lock, or, "", 174 PPC_ACQUIRE_BARRIER, IS_ENABLED(CONFIG_PPC64)) 175 DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER, 176 PPC_ATOMIC_EXIT_BARRIER, 0) 177 178 static inline unsigned long test_and_clear_bits(unsigned long mask, volatile unsigned long *_p) 179 { 180 unsigned long old, t; 181 unsigned long *p = (unsigned long *)_p; 182 183 if (IS_ENABLED(CONFIG_PPC32) && 184 __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) { 185 asm volatile ( 186 PPC_ATOMIC_ENTRY_BARRIER 187 "1:" "lwarx %0,0,%3\n" 188 "rlwinm %1,%0,0,%2\n" 189 "stwcx. %1,0,%3\n" 190 "bne- 1b\n" 191 PPC_ATOMIC_EXIT_BARRIER 192 : "=&r" (old), "=&r" (t) 193 : "n" (~mask), "r" (p) 194 : "cc", "memory"); 195 } else { 196 asm volatile ( 197 PPC_ATOMIC_ENTRY_BARRIER 198 "1:" PPC_LLARX "%0,0,%3,0\n" 199 "andc %1,%0,%2\n" 200 PPC_STLCX "%1,0,%3\n" 201 "bne- 1b\n" 202 PPC_ATOMIC_EXIT_BARRIER 203 : "=&r" (old), "=&r" (t) 204 : "r" (mask), "r" (p) 205 : "cc", "memory"); 206 } 207 208 return (old & mask); 209 } 210 211 static inline int arch_test_and_set_bit(unsigned long nr, 212 volatile unsigned long *addr) 213 { 214 return test_and_set_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0; 215 } 216 217 static inline int arch_test_and_set_bit_lock(unsigned long nr, 218 volatile unsigned long *addr) 219 { 220 return test_and_set_bits_lock(BIT_MASK(nr), 221 addr + BIT_WORD(nr)) != 0; 222 } 223 224 static inline int arch_test_and_clear_bit(unsigned long nr, 225 volatile unsigned long *addr) 226 { 227 return test_and_clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0; 228 } 229 230 static inline int arch_test_and_change_bit(unsigned long nr, 231 volatile unsigned long *addr) 232 { 233 return test_and_change_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0; 234 } 235 236 static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, 237 volatile unsigned long *p) 238 { 239 unsigned long old, t; 240 241 __asm__ __volatile__ ( 242 PPC_RELEASE_BARRIER 243 "1:" PPC_LLARX "%0,0,%3,0\n" 244 "xor %1,%0,%2\n" 245 PPC_STLCX "%1,0,%3\n" 246 "bne- 1b\n" 247 : "=&r" (old), "=&r" (t) 248 : "r" (mask), "r" (p) 249 : "cc", "memory"); 250 251 return (old & BIT_MASK(7)) != 0; 252 } 253 #define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte 254 255 #include <asm-generic/bitops/non-atomic.h> 256 257 static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr) 258 { 259 __asm__ __volatile__(PPC_RELEASE_BARRIER "" ::: "memory"); 260 __clear_bit(nr, addr); 261 } 262 263 /* 264 * Return the zero-based bit position (LE, not IBM bit numbering) of 265 * the most significant 1-bit in a double word. 266 */ 267 #define __ilog2(x) ilog2(x) 268 269 #include <asm-generic/bitops/ffz.h> 270 271 #include <asm-generic/bitops/builtin-__ffs.h> 272 273 #include <asm-generic/bitops/builtin-ffs.h> 274 275 /* 276 * fls: find last (most-significant) bit set. 277 * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. 278 */ 279 static __always_inline int fls(unsigned int x) 280 { 281 int lz; 282 283 if (__builtin_constant_p(x)) 284 return x ? 32 - __builtin_clz(x) : 0; 285 asm("cntlzw %0,%1" : "=r" (lz) : "r" (x)); 286 return 32 - lz; 287 } 288 289 #include <asm-generic/bitops/builtin-__fls.h> 290 291 /* 292 * 64-bit can do this using one cntlzd (count leading zeroes doubleword) 293 * instruction; for 32-bit we use the generic version, which does two 294 * 32-bit fls calls. 295 */ 296 #ifdef CONFIG_PPC64 297 static __always_inline int fls64(__u64 x) 298 { 299 int lz; 300 301 if (__builtin_constant_p(x)) 302 return x ? 64 - __builtin_clzll(x) : 0; 303 asm("cntlzd %0,%1" : "=r" (lz) : "r" (x)); 304 return 64 - lz; 305 } 306 #else 307 #include <asm-generic/bitops/fls64.h> 308 #endif 309 310 #ifdef CONFIG_PPC64 311 unsigned int __arch_hweight8(unsigned int w); 312 unsigned int __arch_hweight16(unsigned int w); 313 unsigned int __arch_hweight32(unsigned int w); 314 unsigned long __arch_hweight64(__u64 w); 315 #include <asm-generic/bitops/const_hweight.h> 316 #else 317 #include <asm-generic/bitops/hweight.h> 318 #endif 319 320 /* wrappers that deal with KASAN instrumentation */ 321 #include <asm-generic/bitops/instrumented-atomic.h> 322 #include <asm-generic/bitops/instrumented-lock.h> 323 324 /* Little-endian versions */ 325 #include <asm-generic/bitops/le.h> 326 327 /* Bitmap functions for the ext2 filesystem */ 328 329 #include <asm-generic/bitops/ext2-atomic-setbit.h> 330 331 #include <asm-generic/bitops/sched.h> 332 333 #endif /* __KERNEL__ */ 334 335 #endif /* _ASM_POWERPC_BITOPS_H */ 336