1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #ifndef _LINUX_SIX_H 4 #define _LINUX_SIX_H 5 6 /** 7 * DOC: SIX locks overview 8 * 9 * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores 10 * but with an additional state: read/shared, intent, exclusive/write 11 * 12 * The purpose of the intent state is to allow for greater concurrency on tree 13 * structures without deadlocking. In general, a read can't be upgraded to a 14 * write lock without deadlocking, so an operation that updates multiple nodes 15 * will have to take write locks for the full duration of the operation. 16 * 17 * But by adding an intent state, which is exclusive with other intent locks but 18 * not with readers, we can take intent locks at the start of the operation, 19 * and then take write locks only for the actual update to each individual 20 * nodes, without deadlocking. 21 * 22 * Example usage: 23 * six_lock_read(&foo->lock); 24 * six_unlock_read(&foo->lock); 25 * 26 * An intent lock must be held before taking a write lock: 27 * six_lock_intent(&foo->lock); 28 * six_lock_write(&foo->lock); 29 * six_unlock_write(&foo->lock); 30 * six_unlock_intent(&foo->lock); 31 * 32 * Other operations: 33 * six_trylock_read() 34 * six_trylock_intent() 35 * six_trylock_write() 36 * 37 * six_lock_downgrade() convert from intent to read 38 * six_lock_tryupgrade() attempt to convert from read to intent, may fail 39 * 40 * There are also interfaces that take the lock type as an enum: 41 * 42 * six_lock_type(&foo->lock, SIX_LOCK_read); 43 * six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent) 44 * six_lock_type(&foo->lock, SIX_LOCK_write); 45 * six_unlock_type(&foo->lock, SIX_LOCK_write); 46 * six_unlock_type(&foo->lock, SIX_LOCK_intent); 47 * 48 * Lock sequence numbers - unlock(), relock(): 49 * 50 * Locks embed sequences numbers, which are incremented on write lock/unlock. 51 * This allows locks to be dropped and the retaken iff the state they protect 52 * hasn't changed; this makes it much easier to avoid holding locks while e.g. 53 * doing IO or allocating memory. 54 * 55 * Example usage: 56 * six_lock_read(&foo->lock); 57 * u32 seq = six_lock_seq(&foo->lock); 58 * six_unlock_read(&foo->lock); 59 * 60 * some_operation_that_may_block(); 61 * 62 * if (six_relock_read(&foo->lock, seq)) { ... } 63 * 64 * If the relock operation succeeds, it is as if the lock was never unlocked. 65 * 66 * Reentrancy: 67 * 68 * Six locks are not by themselves reentrant, but have counters for both the 69 * read and intent states that can be used to provide reentrancy by an upper 70 * layer that tracks held locks. If a lock is known to already be held in the 71 * read or intent state, six_lock_increment() can be used to bump the "lock 72 * held in this state" counter, increasing the number of unlock calls that 73 * will be required to fully unlock it. 74 * 75 * Example usage: 76 * six_lock_read(&foo->lock); 77 * six_lock_increment(&foo->lock, SIX_LOCK_read); 78 * six_unlock_read(&foo->lock); 79 * six_unlock_read(&foo->lock); 80 * foo->lock is now fully unlocked. 81 * 82 * Since the intent state supercedes read, it's legal to increment the read 83 * counter when holding an intent lock, but not the reverse. 84 * 85 * A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write) 86 * is not legal. 87 * 88 * should_sleep_fn: 89 * 90 * There is a six_lock() variant that takes a function pointer that is called 91 * immediately prior to schedule() when blocking, and may return an error to 92 * abort. 93 * 94 * One possible use for this feature is when objects being locked are part of 95 * a cache and may reused, and lock ordering is based on a property of the 96 * object that will change when the object is reused - i.e. logical key order. 97 * 98 * If looking up an object in the cache may race with object reuse, and lock 99 * ordering is required to prevent deadlock, object reuse may change the 100 * correct lock order for that object and cause a deadlock. should_sleep_fn 101 * can be used to check if the object is still the object we want and avoid 102 * this deadlock. 103 * 104 * Wait list entry interface: 105 * 106 * There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a 107 * wait list entry. By embedding six_lock_waiter into another object, and by 108 * traversing lock waitlists, it is then possible for an upper layer to 109 * implement full cycle detection for deadlock avoidance. 110 * 111 * should_sleep_fn should be used for invoking the cycle detector, walking the 112 * graph of held locks to check for a deadlock. The upper layer must track 113 * held locks for each thread, and each thread's held locks must be reachable 114 * from its six_lock_waiter object. 115 * 116 * six_lock_waiter() will add the wait object to the waitlist re-trying taking 117 * the lock, and before calling should_sleep_fn, and the wait object will not 118 * be removed from the waitlist until either the lock has been successfully 119 * acquired, or we aborted because should_sleep_fn returned an error. 120 * 121 * Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will 122 * have timestamps in strictly ascending order - this is so the timestamp can 123 * be used as a cursor for lock graph traverse. 124 */ 125 126 #include <linux/lockdep.h> 127 #include <linux/sched.h> 128 #include <linux/types.h> 129 130 enum six_lock_type { 131 SIX_LOCK_read, 132 SIX_LOCK_intent, 133 SIX_LOCK_write, 134 }; 135 136 struct six_lock { 137 atomic_t state; 138 u32 seq; 139 unsigned intent_lock_recurse; 140 struct task_struct *owner; 141 unsigned __percpu *readers; 142 raw_spinlock_t wait_lock; 143 struct list_head wait_list; 144 #ifdef CONFIG_DEBUG_LOCK_ALLOC 145 struct lockdep_map dep_map; 146 #endif 147 }; 148 149 struct six_lock_waiter { 150 struct list_head list; 151 struct task_struct *task; 152 enum six_lock_type lock_want; 153 bool lock_acquired; 154 u64 start_time; 155 }; 156 157 typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *); 158 159 void six_lock_exit(struct six_lock *lock); 160 161 enum six_lock_init_flags { 162 SIX_LOCK_INIT_PCPU = 1U << 0, 163 }; 164 165 void __six_lock_init(struct six_lock *lock, const char *name, 166 struct lock_class_key *key, enum six_lock_init_flags flags); 167 168 /** 169 * six_lock_init - initialize a six lock 170 * @lock: lock to initialize 171 * @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU 172 */ 173 #define six_lock_init(lock, flags) \ 174 do { \ 175 static struct lock_class_key __key; \ 176 \ 177 __six_lock_init((lock), #lock, &__key, flags); \ 178 } while (0) 179 180 /** 181 * six_lock_seq - obtain current lock sequence number 182 * @lock: six_lock to obtain sequence number for 183 * 184 * @lock should be held for read or intent, and not write 185 * 186 * By saving the lock sequence number, we can unlock @lock and then (typically 187 * after some blocking operation) attempt to relock it: the relock will succeed 188 * if the sequence number hasn't changed, meaning no write locks have been taken 189 * and state corresponding to what @lock protects is still valid. 190 */ 191 static inline u32 six_lock_seq(const struct six_lock *lock) 192 { 193 return lock->seq; 194 } 195 196 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip); 197 198 /** 199 * six_trylock_type - attempt to take a six lock without blocking 200 * @lock: lock to take 201 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write 202 * 203 * Return: true on success, false on failure. 204 */ 205 static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type) 206 { 207 return six_trylock_ip(lock, type, _THIS_IP_); 208 } 209 210 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type, 211 struct six_lock_waiter *wait, 212 six_lock_should_sleep_fn should_sleep_fn, void *p, 213 unsigned long ip); 214 215 /** 216 * six_lock_waiter - take a lock, with full waitlist interface 217 * @lock: lock to take 218 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write 219 * @wait: pointer to wait object, which will be added to lock's waitlist 220 * @should_sleep_fn: callback run after adding to waitlist, immediately prior 221 * to scheduling 222 * @p: passed through to @should_sleep_fn 223 * 224 * This is a convenience wrapper around six_lock_ip_waiter(), see that function 225 * for full documentation. 226 * 227 * Return: 0 on success, or the return code from @should_sleep_fn on failure. 228 */ 229 static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type, 230 struct six_lock_waiter *wait, 231 six_lock_should_sleep_fn should_sleep_fn, void *p) 232 { 233 return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_); 234 } 235 236 /** 237 * six_lock_ip - take a six lock lock 238 * @lock: lock to take 239 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write 240 * @should_sleep_fn: callback run after adding to waitlist, immediately prior 241 * to scheduling 242 * @p: passed through to @should_sleep_fn 243 * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ 244 * 245 * Return: 0 on success, or the return code from @should_sleep_fn on failure. 246 */ 247 static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type, 248 six_lock_should_sleep_fn should_sleep_fn, void *p, 249 unsigned long ip) 250 { 251 struct six_lock_waiter wait; 252 253 return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip); 254 } 255 256 /** 257 * six_lock_type - take a six lock lock 258 * @lock: lock to take 259 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write 260 * @should_sleep_fn: callback run after adding to waitlist, immediately prior 261 * to scheduling 262 * @p: passed through to @should_sleep_fn 263 * 264 * Return: 0 on success, or the return code from @should_sleep_fn on failure. 265 */ 266 static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type, 267 six_lock_should_sleep_fn should_sleep_fn, void *p) 268 { 269 struct six_lock_waiter wait; 270 271 return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_); 272 } 273 274 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type, 275 unsigned seq, unsigned long ip); 276 277 /** 278 * six_relock_type - attempt to re-take a lock that was held previously 279 * @lock: lock to take 280 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write 281 * @seq: lock sequence number obtained from six_lock_seq() while lock was 282 * held previously 283 * 284 * Return: true on success, false on failure. 285 */ 286 static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type, 287 unsigned seq) 288 { 289 return six_relock_ip(lock, type, seq, _THIS_IP_); 290 } 291 292 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip); 293 294 /** 295 * six_unlock_type - drop a six lock 296 * @lock: lock to unlock 297 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write 298 * 299 * When a lock is held multiple times (because six_lock_incement()) was used), 300 * this decrements the 'lock held' counter by one. 301 * 302 * For example: 303 * six_lock_read(&foo->lock); read count 1 304 * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2 305 * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1 306 * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0 307 */ 308 static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type) 309 { 310 six_unlock_ip(lock, type, _THIS_IP_); 311 } 312 313 #define __SIX_LOCK(type) \ 314 static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\ 315 { \ 316 return six_trylock_ip(lock, SIX_LOCK_##type, ip); \ 317 } \ 318 \ 319 static inline bool six_trylock_##type(struct six_lock *lock) \ 320 { \ 321 return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \ 322 } \ 323 \ 324 static inline int six_lock_ip_waiter_##type(struct six_lock *lock, \ 325 struct six_lock_waiter *wait, \ 326 six_lock_should_sleep_fn should_sleep_fn, void *p,\ 327 unsigned long ip) \ 328 { \ 329 return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\ 330 } \ 331 \ 332 static inline int six_lock_ip_##type(struct six_lock *lock, \ 333 six_lock_should_sleep_fn should_sleep_fn, void *p, \ 334 unsigned long ip) \ 335 { \ 336 return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\ 337 } \ 338 \ 339 static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\ 340 { \ 341 return six_relock_ip(lock, SIX_LOCK_##type, seq, ip); \ 342 } \ 343 \ 344 static inline bool six_relock_##type(struct six_lock *lock, u32 seq) \ 345 { \ 346 return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_); \ 347 } \ 348 \ 349 static inline int six_lock_##type(struct six_lock *lock, \ 350 six_lock_should_sleep_fn fn, void *p)\ 351 { \ 352 return six_lock_ip_##type(lock, fn, p, _THIS_IP_); \ 353 } \ 354 \ 355 static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \ 356 { \ 357 six_unlock_ip(lock, SIX_LOCK_##type, ip); \ 358 } \ 359 \ 360 static inline void six_unlock_##type(struct six_lock *lock) \ 361 { \ 362 six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \ 363 } 364 365 __SIX_LOCK(read) 366 __SIX_LOCK(intent) 367 __SIX_LOCK(write) 368 #undef __SIX_LOCK 369 370 void six_lock_downgrade(struct six_lock *); 371 bool six_lock_tryupgrade(struct six_lock *); 372 bool six_trylock_convert(struct six_lock *, enum six_lock_type, 373 enum six_lock_type); 374 375 void six_lock_increment(struct six_lock *, enum six_lock_type); 376 377 void six_lock_wakeup_all(struct six_lock *); 378 379 struct six_lock_count { 380 unsigned n[3]; 381 }; 382 383 struct six_lock_count six_lock_counts(struct six_lock *); 384 void six_lock_readers_add(struct six_lock *, int); 385 386 #endif /* _LINUX_SIX_H */ 387