xref: /linux/fs/btrfs/locking.c (revision 987b741c52c7c6c68d46fbaeb95b8d1087f10b7f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2008 Oracle.  All rights reserved.
4  */
5 
6 #include <linux/sched.h>
7 #include <linux/pagemap.h>
8 #include <linux/spinlock.h>
9 #include <linux/page-flags.h>
10 #include <asm/bug.h>
11 #include "misc.h"
12 #include "ctree.h"
13 #include "extent_io.h"
14 #include "locking.h"
15 
16 /*
17  * Extent buffer locking
18  * =====================
19  *
20  * We use a rw_semaphore for tree locking, and the semantics are exactly the
21  * same:
22  *
23  * - reader/writer exclusion
24  * - writer/writer exclusion
25  * - reader/reader sharing
26  * - try-lock semantics for readers and writers
27  *
28  * The rwsem implementation does opportunistic spinning which reduces number of
29  * times the locking task needs to sleep.
30  */
31 
32 /*
33  * __btrfs_tree_read_lock - lock extent buffer for read
34  * @eb:		the eb to be locked
35  * @nest:	the nesting level to be used for lockdep
36  *
37  * This takes the read lock on the extent buffer, using the specified nesting
38  * level for lockdep purposes.
39  */
40 void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
41 {
42 	u64 start_ns = 0;
43 
44 	if (trace_btrfs_tree_read_lock_enabled())
45 		start_ns = ktime_get_ns();
46 
47 	down_read_nested(&eb->lock, nest);
48 	eb->lock_owner = current->pid;
49 	trace_btrfs_tree_read_lock(eb, start_ns);
50 }
51 
52 void btrfs_tree_read_lock(struct extent_buffer *eb)
53 {
54 	__btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL);
55 }
56 
57 /*
58  * Try-lock for read.
59  *
60  * Retrun 1 if the rwlock has been taken, 0 otherwise
61  */
62 int btrfs_try_tree_read_lock(struct extent_buffer *eb)
63 {
64 	if (down_read_trylock(&eb->lock)) {
65 		eb->lock_owner = current->pid;
66 		trace_btrfs_try_tree_read_lock(eb);
67 		return 1;
68 	}
69 	return 0;
70 }
71 
72 /*
73  * Try-lock for write.
74  *
75  * Retrun 1 if the rwlock has been taken, 0 otherwise
76  */
77 int btrfs_try_tree_write_lock(struct extent_buffer *eb)
78 {
79 	if (down_write_trylock(&eb->lock)) {
80 		eb->lock_owner = current->pid;
81 		trace_btrfs_try_tree_write_lock(eb);
82 		return 1;
83 	}
84 	return 0;
85 }
86 
87 /*
88  * Release read lock.
89  */
90 void btrfs_tree_read_unlock(struct extent_buffer *eb)
91 {
92 	trace_btrfs_tree_read_unlock(eb);
93 	eb->lock_owner = 0;
94 	up_read(&eb->lock);
95 }
96 
97 /*
98  * __btrfs_tree_lock - lock eb for write
99  * @eb:		the eb to lock
100  * @nest:	the nesting to use for the lock
101  *
102  * Returns with the eb->lock write locked.
103  */
104 void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
105 	__acquires(&eb->lock)
106 {
107 	u64 start_ns = 0;
108 
109 	if (trace_btrfs_tree_lock_enabled())
110 		start_ns = ktime_get_ns();
111 
112 	down_write_nested(&eb->lock, nest);
113 	eb->lock_owner = current->pid;
114 	trace_btrfs_tree_lock(eb, start_ns);
115 }
116 
117 void btrfs_tree_lock(struct extent_buffer *eb)
118 {
119 	__btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL);
120 }
121 
122 /*
123  * Release the write lock.
124  */
125 void btrfs_tree_unlock(struct extent_buffer *eb)
126 {
127 	trace_btrfs_tree_unlock(eb);
128 	eb->lock_owner = 0;
129 	up_write(&eb->lock);
130 }
131 
132 /*
133  * This releases any locks held in the path starting at level and going all the
134  * way up to the root.
135  *
136  * btrfs_search_slot will keep the lock held on higher nodes in a few corner
137  * cases, such as COW of the block at slot zero in the node.  This ignores
138  * those rules, and it should only be called when there are no more updates to
139  * be done higher up in the tree.
140  */
141 void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
142 {
143 	int i;
144 
145 	if (path->keep_locks)
146 		return;
147 
148 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
149 		if (!path->nodes[i])
150 			continue;
151 		if (!path->locks[i])
152 			continue;
153 		btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
154 		path->locks[i] = 0;
155 	}
156 }
157 
158 /*
159  * Loop around taking references on and locking the root node of the tree until
160  * we end up with a lock on the root node.
161  *
162  * Return: root extent buffer with write lock held
163  */
164 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
165 {
166 	struct extent_buffer *eb;
167 
168 	while (1) {
169 		eb = btrfs_root_node(root);
170 		btrfs_tree_lock(eb);
171 		if (eb == root->node)
172 			break;
173 		btrfs_tree_unlock(eb);
174 		free_extent_buffer(eb);
175 	}
176 	return eb;
177 }
178 
179 /*
180  * Loop around taking references on and locking the root node of the tree until
181  * we end up with a lock on the root node.
182  *
183  * Return: root extent buffer with read lock held
184  */
185 struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
186 {
187 	struct extent_buffer *eb;
188 
189 	while (1) {
190 		eb = btrfs_root_node(root);
191 		btrfs_tree_read_lock(eb);
192 		if (eb == root->node)
193 			break;
194 		btrfs_tree_read_unlock(eb);
195 		free_extent_buffer(eb);
196 	}
197 	return eb;
198 }
199 
200 /*
201  * DREW locks
202  * ==========
203  *
204  * DREW stands for double-reader-writer-exclusion lock. It's used in situation
205  * where you want to provide A-B exclusion but not AA or BB.
206  *
207  * Currently implementation gives more priority to reader. If a reader and a
208  * writer both race to acquire their respective sides of the lock the writer
209  * would yield its lock as soon as it detects a concurrent reader. Additionally
210  * if there are pending readers no new writers would be allowed to come in and
211  * acquire the lock.
212  */
213 
214 int btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
215 {
216 	int ret;
217 
218 	ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL);
219 	if (ret)
220 		return ret;
221 
222 	atomic_set(&lock->readers, 0);
223 	init_waitqueue_head(&lock->pending_readers);
224 	init_waitqueue_head(&lock->pending_writers);
225 
226 	return 0;
227 }
228 
229 void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock)
230 {
231 	percpu_counter_destroy(&lock->writers);
232 }
233 
234 /* Return true if acquisition is successful, false otherwise */
235 bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
236 {
237 	if (atomic_read(&lock->readers))
238 		return false;
239 
240 	percpu_counter_inc(&lock->writers);
241 
242 	/* Ensure writers count is updated before we check for pending readers */
243 	smp_mb();
244 	if (atomic_read(&lock->readers)) {
245 		btrfs_drew_write_unlock(lock);
246 		return false;
247 	}
248 
249 	return true;
250 }
251 
252 void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
253 {
254 	while (true) {
255 		if (btrfs_drew_try_write_lock(lock))
256 			return;
257 		wait_event(lock->pending_writers, !atomic_read(&lock->readers));
258 	}
259 }
260 
261 void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
262 {
263 	percpu_counter_dec(&lock->writers);
264 	cond_wake_up(&lock->pending_readers);
265 }
266 
267 void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
268 {
269 	atomic_inc(&lock->readers);
270 
271 	/*
272 	 * Ensure the pending reader count is perceieved BEFORE this reader
273 	 * goes to sleep in case of active writers. This guarantees new writers
274 	 * won't be allowed and that the current reader will be woken up when
275 	 * the last active writer finishes its jobs.
276 	 */
277 	smp_mb__after_atomic();
278 
279 	wait_event(lock->pending_readers,
280 		   percpu_counter_sum(&lock->writers) == 0);
281 }
282 
283 void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
284 {
285 	/*
286 	 * atomic_dec_and_test implies a full barrier, so woken up writers
287 	 * are guaranteed to see the decrement
288 	 */
289 	if (atomic_dec_and_test(&lock->readers))
290 		wake_up(&lock->pending_writers);
291 }
292