1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Oracle. All rights reserved. 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/pagemap.h> 8 #include <linux/spinlock.h> 9 #include <linux/page-flags.h> 10 #include <asm/bug.h> 11 #include "misc.h" 12 #include "ctree.h" 13 #include "extent_io.h" 14 #include "locking.h" 15 16 /* 17 * Extent buffer locking 18 * ===================== 19 * 20 * We use a rw_semaphore for tree locking, and the semantics are exactly the 21 * same: 22 * 23 * - reader/writer exclusion 24 * - writer/writer exclusion 25 * - reader/reader sharing 26 * - try-lock semantics for readers and writers 27 * 28 * The rwsem implementation does opportunistic spinning which reduces number of 29 * times the locking task needs to sleep. 30 */ 31 32 /* 33 * __btrfs_tree_read_lock - lock extent buffer for read 34 * @eb: the eb to be locked 35 * @nest: the nesting level to be used for lockdep 36 * 37 * This takes the read lock on the extent buffer, using the specified nesting 38 * level for lockdep purposes. 39 */ 40 void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest) 41 { 42 u64 start_ns = 0; 43 44 if (trace_btrfs_tree_read_lock_enabled()) 45 start_ns = ktime_get_ns(); 46 47 down_read_nested(&eb->lock, nest); 48 trace_btrfs_tree_read_lock(eb, start_ns); 49 } 50 51 void btrfs_tree_read_lock(struct extent_buffer *eb) 52 { 53 __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL); 54 } 55 56 /* 57 * Try-lock for read. 58 * 59 * Return 1 if the rwlock has been taken, 0 otherwise 60 */ 61 int btrfs_try_tree_read_lock(struct extent_buffer *eb) 62 { 63 if (down_read_trylock(&eb->lock)) { 64 trace_btrfs_try_tree_read_lock(eb); 65 return 1; 66 } 67 return 0; 68 } 69 70 /* 71 * Try-lock for write. 72 * 73 * Return 1 if the rwlock has been taken, 0 otherwise 74 */ 75 int btrfs_try_tree_write_lock(struct extent_buffer *eb) 76 { 77 if (down_write_trylock(&eb->lock)) { 78 eb->lock_owner = current->pid; 79 trace_btrfs_try_tree_write_lock(eb); 80 return 1; 81 } 82 return 0; 83 } 84 85 /* 86 * Release read lock. 87 */ 88 void btrfs_tree_read_unlock(struct extent_buffer *eb) 89 { 90 trace_btrfs_tree_read_unlock(eb); 91 up_read(&eb->lock); 92 } 93 94 /* 95 * __btrfs_tree_lock - lock eb for write 96 * @eb: the eb to lock 97 * @nest: the nesting to use for the lock 98 * 99 * Returns with the eb->lock write locked. 100 */ 101 void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest) 102 __acquires(&eb->lock) 103 { 104 u64 start_ns = 0; 105 106 if (trace_btrfs_tree_lock_enabled()) 107 start_ns = ktime_get_ns(); 108 109 down_write_nested(&eb->lock, nest); 110 eb->lock_owner = current->pid; 111 trace_btrfs_tree_lock(eb, start_ns); 112 } 113 114 void btrfs_tree_lock(struct extent_buffer *eb) 115 { 116 __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL); 117 } 118 119 /* 120 * Release the write lock. 121 */ 122 void btrfs_tree_unlock(struct extent_buffer *eb) 123 { 124 trace_btrfs_tree_unlock(eb); 125 eb->lock_owner = 0; 126 up_write(&eb->lock); 127 } 128 129 /* 130 * This releases any locks held in the path starting at level and going all the 131 * way up to the root. 132 * 133 * btrfs_search_slot will keep the lock held on higher nodes in a few corner 134 * cases, such as COW of the block at slot zero in the node. This ignores 135 * those rules, and it should only be called when there are no more updates to 136 * be done higher up in the tree. 137 */ 138 void btrfs_unlock_up_safe(struct btrfs_path *path, int level) 139 { 140 int i; 141 142 if (path->keep_locks) 143 return; 144 145 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 146 if (!path->nodes[i]) 147 continue; 148 if (!path->locks[i]) 149 continue; 150 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); 151 path->locks[i] = 0; 152 } 153 } 154 155 /* 156 * Loop around taking references on and locking the root node of the tree until 157 * we end up with a lock on the root node. 158 * 159 * Return: root extent buffer with write lock held 160 */ 161 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) 162 { 163 struct extent_buffer *eb; 164 165 while (1) { 166 eb = btrfs_root_node(root); 167 btrfs_tree_lock(eb); 168 if (eb == root->node) 169 break; 170 btrfs_tree_unlock(eb); 171 free_extent_buffer(eb); 172 } 173 return eb; 174 } 175 176 /* 177 * Loop around taking references on and locking the root node of the tree until 178 * we end up with a lock on the root node. 179 * 180 * Return: root extent buffer with read lock held 181 */ 182 struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) 183 { 184 struct extent_buffer *eb; 185 186 while (1) { 187 eb = btrfs_root_node(root); 188 btrfs_tree_read_lock(eb); 189 if (eb == root->node) 190 break; 191 btrfs_tree_read_unlock(eb); 192 free_extent_buffer(eb); 193 } 194 return eb; 195 } 196 197 /* 198 * DREW locks 199 * ========== 200 * 201 * DREW stands for double-reader-writer-exclusion lock. It's used in situation 202 * where you want to provide A-B exclusion but not AA or BB. 203 * 204 * Currently implementation gives more priority to reader. If a reader and a 205 * writer both race to acquire their respective sides of the lock the writer 206 * would yield its lock as soon as it detects a concurrent reader. Additionally 207 * if there are pending readers no new writers would be allowed to come in and 208 * acquire the lock. 209 */ 210 211 int btrfs_drew_lock_init(struct btrfs_drew_lock *lock) 212 { 213 int ret; 214 215 ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL); 216 if (ret) 217 return ret; 218 219 atomic_set(&lock->readers, 0); 220 init_waitqueue_head(&lock->pending_readers); 221 init_waitqueue_head(&lock->pending_writers); 222 223 return 0; 224 } 225 226 void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock) 227 { 228 percpu_counter_destroy(&lock->writers); 229 } 230 231 /* Return true if acquisition is successful, false otherwise */ 232 bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock) 233 { 234 if (atomic_read(&lock->readers)) 235 return false; 236 237 percpu_counter_inc(&lock->writers); 238 239 /* Ensure writers count is updated before we check for pending readers */ 240 smp_mb(); 241 if (atomic_read(&lock->readers)) { 242 btrfs_drew_write_unlock(lock); 243 return false; 244 } 245 246 return true; 247 } 248 249 void btrfs_drew_write_lock(struct btrfs_drew_lock *lock) 250 { 251 while (true) { 252 if (btrfs_drew_try_write_lock(lock)) 253 return; 254 wait_event(lock->pending_writers, !atomic_read(&lock->readers)); 255 } 256 } 257 258 void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock) 259 { 260 percpu_counter_dec(&lock->writers); 261 cond_wake_up(&lock->pending_readers); 262 } 263 264 void btrfs_drew_read_lock(struct btrfs_drew_lock *lock) 265 { 266 atomic_inc(&lock->readers); 267 268 /* 269 * Ensure the pending reader count is perceieved BEFORE this reader 270 * goes to sleep in case of active writers. This guarantees new writers 271 * won't be allowed and that the current reader will be woken up when 272 * the last active writer finishes its jobs. 273 */ 274 smp_mb__after_atomic(); 275 276 wait_event(lock->pending_readers, 277 percpu_counter_sum(&lock->writers) == 0); 278 } 279 280 void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock) 281 { 282 /* 283 * atomic_dec_and_test implies a full barrier, so woken up writers 284 * are guaranteed to see the decrement 285 */ 286 if (atomic_dec_and_test(&lock->readers)) 287 wake_up(&lock->pending_writers); 288 } 289