1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "bcachefs.h"
4 #include "btree_locking.h"
5 #include "btree_types.h"
6
7 static struct lock_class_key bch2_btree_node_lock_key;
8
bch2_btree_lock_init(struct btree_bkey_cached_common * b,enum six_lock_init_flags flags)9 void bch2_btree_lock_init(struct btree_bkey_cached_common *b,
10 enum six_lock_init_flags flags)
11 {
12 __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags);
13 lockdep_set_notrack_class(&b->lock);
14 }
15
16 /* Btree node locking: */
17
bch2_btree_node_lock_counts(struct btree_trans * trans,struct btree_path * skip,struct btree_bkey_cached_common * b,unsigned level)18 struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans,
19 struct btree_path *skip,
20 struct btree_bkey_cached_common *b,
21 unsigned level)
22 {
23 struct btree_path *path;
24 struct six_lock_count ret;
25 unsigned i;
26
27 memset(&ret, 0, sizeof(ret));
28
29 if (IS_ERR_OR_NULL(b))
30 return ret;
31
32 trans_for_each_path(trans, path, i)
33 if (path != skip && &path->l[level].b->c == b) {
34 int t = btree_node_locked_type(path, level);
35
36 if (t != BTREE_NODE_UNLOCKED)
37 ret.n[t]++;
38 }
39
40 return ret;
41 }
42
43 /* unlock */
44
bch2_btree_node_unlock_write(struct btree_trans * trans,struct btree_path * path,struct btree * b)45 void bch2_btree_node_unlock_write(struct btree_trans *trans,
46 struct btree_path *path, struct btree *b)
47 {
48 bch2_btree_node_unlock_write_inlined(trans, path, b);
49 }
50
51 /* lock */
52
53 /*
54 * @trans wants to lock @b with type @type
55 */
56 struct trans_waiting_for_lock {
57 struct btree_trans *trans;
58 struct btree_bkey_cached_common *node_want;
59 enum six_lock_type lock_want;
60
61 /* for iterating over held locks :*/
62 u8 path_idx;
63 u8 level;
64 u64 lock_start_time;
65 };
66
67 struct lock_graph {
68 struct trans_waiting_for_lock g[8];
69 unsigned nr;
70 };
71
print_cycle(struct printbuf * out,struct lock_graph * g)72 static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
73 {
74 struct trans_waiting_for_lock *i;
75
76 prt_printf(out, "Found lock cycle (%u entries):\n", g->nr);
77
78 for (i = g->g; i < g->g + g->nr; i++) {
79 struct task_struct *task = READ_ONCE(i->trans->locking_wait.task);
80 if (!task)
81 continue;
82
83 bch2_btree_trans_to_text(out, i->trans);
84 bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1, GFP_NOWAIT);
85 }
86 }
87
print_chain(struct printbuf * out,struct lock_graph * g)88 static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
89 {
90 struct trans_waiting_for_lock *i;
91
92 for (i = g->g; i != g->g + g->nr; i++) {
93 struct task_struct *task = i->trans->locking_wait.task;
94 if (i != g->g)
95 prt_str(out, "<- ");
96 prt_printf(out, "%u ", task ?task->pid : 0);
97 }
98 prt_newline(out);
99 }
100
lock_graph_up(struct lock_graph * g)101 static void lock_graph_up(struct lock_graph *g)
102 {
103 closure_put(&g->g[--g->nr].trans->ref);
104 }
105
lock_graph_pop_all(struct lock_graph * g)106 static noinline void lock_graph_pop_all(struct lock_graph *g)
107 {
108 while (g->nr)
109 lock_graph_up(g);
110 }
111
__lock_graph_down(struct lock_graph * g,struct btree_trans * trans)112 static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
113 {
114 g->g[g->nr++] = (struct trans_waiting_for_lock) {
115 .trans = trans,
116 .node_want = trans->locking,
117 .lock_want = trans->locking_wait.lock_want,
118 };
119 }
120
lock_graph_down(struct lock_graph * g,struct btree_trans * trans)121 static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
122 {
123 closure_get(&trans->ref);
124 __lock_graph_down(g, trans);
125 }
126
lock_graph_remove_non_waiters(struct lock_graph * g)127 static bool lock_graph_remove_non_waiters(struct lock_graph *g)
128 {
129 struct trans_waiting_for_lock *i;
130
131 for (i = g->g + 1; i < g->g + g->nr; i++)
132 if (i->trans->locking != i->node_want ||
133 i->trans->locking_wait.start_time != i[-1].lock_start_time) {
134 while (g->g + g->nr > i)
135 lock_graph_up(g);
136 return true;
137 }
138
139 return false;
140 }
141
trace_would_deadlock(struct lock_graph * g,struct btree_trans * trans)142 static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans)
143 {
144 struct bch_fs *c = trans->c;
145
146 count_event(c, trans_restart_would_deadlock);
147
148 if (trace_trans_restart_would_deadlock_enabled()) {
149 struct printbuf buf = PRINTBUF;
150
151 buf.atomic++;
152 print_cycle(&buf, g);
153
154 trace_trans_restart_would_deadlock(trans, buf.buf);
155 printbuf_exit(&buf);
156 }
157 }
158
abort_lock(struct lock_graph * g,struct trans_waiting_for_lock * i)159 static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
160 {
161 if (i == g->g) {
162 trace_would_deadlock(g, i->trans);
163 return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
164 } else {
165 i->trans->lock_must_abort = true;
166 wake_up_process(i->trans->locking_wait.task);
167 return 0;
168 }
169 }
170
btree_trans_abort_preference(struct btree_trans * trans)171 static int btree_trans_abort_preference(struct btree_trans *trans)
172 {
173 if (trans->lock_may_not_fail)
174 return 0;
175 if (trans->locking_wait.lock_want == SIX_LOCK_write)
176 return 1;
177 if (!trans->in_traverse_all)
178 return 2;
179 return 3;
180 }
181
break_cycle(struct lock_graph * g,struct printbuf * cycle)182 static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
183 {
184 struct trans_waiting_for_lock *i, *abort = NULL;
185 unsigned best = 0, pref;
186 int ret;
187
188 if (lock_graph_remove_non_waiters(g))
189 return 0;
190
191 /* Only checking, for debugfs: */
192 if (cycle) {
193 print_cycle(cycle, g);
194 ret = -1;
195 goto out;
196 }
197
198 for (i = g->g; i < g->g + g->nr; i++) {
199 pref = btree_trans_abort_preference(i->trans);
200 if (pref > best) {
201 abort = i;
202 best = pref;
203 }
204 }
205
206 if (unlikely(!best)) {
207 struct printbuf buf = PRINTBUF;
208 buf.atomic++;
209
210 prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks"));
211
212 for (i = g->g; i < g->g + g->nr; i++) {
213 struct btree_trans *trans = i->trans;
214
215 bch2_btree_trans_to_text(&buf, trans);
216
217 prt_printf(&buf, "backtrace:\n");
218 printbuf_indent_add(&buf, 2);
219 bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT);
220 printbuf_indent_sub(&buf, 2);
221 prt_newline(&buf);
222 }
223
224 bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf);
225 printbuf_exit(&buf);
226 BUG();
227 }
228
229 ret = abort_lock(g, abort);
230 out:
231 if (ret)
232 while (g->nr)
233 lock_graph_up(g);
234 return ret;
235 }
236
lock_graph_descend(struct lock_graph * g,struct btree_trans * trans,struct printbuf * cycle)237 static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
238 struct printbuf *cycle)
239 {
240 struct btree_trans *orig_trans = g->g->trans;
241 struct trans_waiting_for_lock *i;
242
243 for (i = g->g; i < g->g + g->nr; i++)
244 if (i->trans == trans) {
245 closure_put(&trans->ref);
246 return break_cycle(g, cycle);
247 }
248
249 if (g->nr == ARRAY_SIZE(g->g)) {
250 closure_put(&trans->ref);
251
252 if (orig_trans->lock_may_not_fail)
253 return 0;
254
255 while (g->nr)
256 lock_graph_up(g);
257
258 if (cycle)
259 return 0;
260
261 trace_and_count(trans->c, trans_restart_would_deadlock_recursion_limit, trans, _RET_IP_);
262 return btree_trans_restart(orig_trans, BCH_ERR_transaction_restart_deadlock_recursion_limit);
263 }
264
265 __lock_graph_down(g, trans);
266 return 0;
267 }
268
lock_type_conflicts(enum six_lock_type t1,enum six_lock_type t2)269 static bool lock_type_conflicts(enum six_lock_type t1, enum six_lock_type t2)
270 {
271 return t1 + t2 > 1;
272 }
273
bch2_check_for_deadlock(struct btree_trans * trans,struct printbuf * cycle)274 int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
275 {
276 struct lock_graph g;
277 struct trans_waiting_for_lock *top;
278 struct btree_bkey_cached_common *b;
279 btree_path_idx_t path_idx;
280 int ret = 0;
281
282 g.nr = 0;
283
284 if (trans->lock_must_abort) {
285 if (cycle)
286 return -1;
287
288 trace_would_deadlock(&g, trans);
289 return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
290 }
291
292 lock_graph_down(&g, trans);
293
294 /* trans->paths is rcu protected vs. freeing */
295 rcu_read_lock();
296 if (cycle)
297 cycle->atomic++;
298 next:
299 if (!g.nr)
300 goto out;
301
302 top = &g.g[g.nr - 1];
303
304 struct btree_path *paths = rcu_dereference(top->trans->paths);
305 if (!paths)
306 goto up;
307
308 unsigned long *paths_allocated = trans_paths_allocated(paths);
309
310 trans_for_each_path_idx_from(paths_allocated, *trans_paths_nr(paths),
311 path_idx, top->path_idx) {
312 struct btree_path *path = paths + path_idx;
313 if (!path->nodes_locked)
314 continue;
315
316 if (path_idx != top->path_idx) {
317 top->path_idx = path_idx;
318 top->level = 0;
319 top->lock_start_time = 0;
320 }
321
322 for (;
323 top->level < BTREE_MAX_DEPTH;
324 top->level++, top->lock_start_time = 0) {
325 int lock_held = btree_node_locked_type(path, top->level);
326
327 if (lock_held == BTREE_NODE_UNLOCKED)
328 continue;
329
330 b = &READ_ONCE(path->l[top->level].b)->c;
331
332 if (IS_ERR_OR_NULL(b)) {
333 /*
334 * If we get here, it means we raced with the
335 * other thread updating its btree_path
336 * structures - which means it can't be blocked
337 * waiting on a lock:
338 */
339 if (!lock_graph_remove_non_waiters(&g)) {
340 /*
341 * If lock_graph_remove_non_waiters()
342 * didn't do anything, it must be
343 * because we're being called by debugfs
344 * checking for lock cycles, which
345 * invokes us on btree_transactions that
346 * aren't actually waiting on anything.
347 * Just bail out:
348 */
349 lock_graph_pop_all(&g);
350 }
351
352 goto next;
353 }
354
355 if (list_empty_careful(&b->lock.wait_list))
356 continue;
357
358 raw_spin_lock(&b->lock.wait_lock);
359 list_for_each_entry(trans, &b->lock.wait_list, locking_wait.list) {
360 BUG_ON(b != trans->locking);
361
362 if (top->lock_start_time &&
363 time_after_eq64(top->lock_start_time, trans->locking_wait.start_time))
364 continue;
365
366 top->lock_start_time = trans->locking_wait.start_time;
367
368 /* Don't check for self deadlock: */
369 if (trans == top->trans ||
370 !lock_type_conflicts(lock_held, trans->locking_wait.lock_want))
371 continue;
372
373 closure_get(&trans->ref);
374 raw_spin_unlock(&b->lock.wait_lock);
375
376 ret = lock_graph_descend(&g, trans, cycle);
377 if (ret)
378 goto out;
379 goto next;
380
381 }
382 raw_spin_unlock(&b->lock.wait_lock);
383 }
384 }
385 up:
386 if (g.nr > 1 && cycle)
387 print_chain(cycle, &g);
388 lock_graph_up(&g);
389 goto next;
390 out:
391 if (cycle)
392 --cycle->atomic;
393 rcu_read_unlock();
394 return ret;
395 }
396
bch2_six_check_for_deadlock(struct six_lock * lock,void * p)397 int bch2_six_check_for_deadlock(struct six_lock *lock, void *p)
398 {
399 struct btree_trans *trans = p;
400
401 return bch2_check_for_deadlock(trans, NULL);
402 }
403
__bch2_btree_node_lock_write(struct btree_trans * trans,struct btree_path * path,struct btree_bkey_cached_common * b,bool lock_may_not_fail)404 int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *path,
405 struct btree_bkey_cached_common *b,
406 bool lock_may_not_fail)
407 {
408 int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->level).n[SIX_LOCK_read];
409 int ret;
410
411 /*
412 * Must drop our read locks before calling six_lock_write() -
413 * six_unlock() won't do wakeups until the reader count
414 * goes to 0, and it's safe because we have the node intent
415 * locked:
416 */
417 six_lock_readers_add(&b->lock, -readers);
418 ret = __btree_node_lock_nopath(trans, b, SIX_LOCK_write,
419 lock_may_not_fail, _RET_IP_);
420 six_lock_readers_add(&b->lock, readers);
421
422 if (ret)
423 mark_btree_node_locked_noreset(path, b->level, BTREE_NODE_INTENT_LOCKED);
424
425 return ret;
426 }
427
bch2_btree_node_lock_write_nofail(struct btree_trans * trans,struct btree_path * path,struct btree_bkey_cached_common * b)428 void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
429 struct btree_path *path,
430 struct btree_bkey_cached_common *b)
431 {
432 int ret = __btree_node_lock_write(trans, path, b, true);
433 BUG_ON(ret);
434 }
435
436 /* relock */
437
btree_path_get_locks(struct btree_trans * trans,struct btree_path * path,bool upgrade,struct get_locks_fail * f)438 static inline bool btree_path_get_locks(struct btree_trans *trans,
439 struct btree_path *path,
440 bool upgrade,
441 struct get_locks_fail *f)
442 {
443 unsigned l = path->level;
444 int fail_idx = -1;
445
446 do {
447 if (!btree_path_node(path, l))
448 break;
449
450 if (!(upgrade
451 ? bch2_btree_node_upgrade(trans, path, l)
452 : bch2_btree_node_relock(trans, path, l))) {
453 fail_idx = l;
454
455 if (f) {
456 f->l = l;
457 f->b = path->l[l].b;
458 }
459 }
460
461 l++;
462 } while (l < path->locks_want);
463
464 /*
465 * When we fail to get a lock, we have to ensure that any child nodes
466 * can't be relocked so bch2_btree_path_traverse has to walk back up to
467 * the node that we failed to relock:
468 */
469 if (fail_idx >= 0) {
470 __bch2_btree_path_unlock(trans, path);
471 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
472
473 do {
474 path->l[fail_idx].b = upgrade
475 ? ERR_PTR(-BCH_ERR_no_btree_node_upgrade)
476 : ERR_PTR(-BCH_ERR_no_btree_node_relock);
477 --fail_idx;
478 } while (fail_idx >= 0);
479 }
480
481 if (path->uptodate == BTREE_ITER_NEED_RELOCK)
482 path->uptodate = BTREE_ITER_UPTODATE;
483
484 return path->uptodate < BTREE_ITER_NEED_RELOCK;
485 }
486
__bch2_btree_node_relock(struct btree_trans * trans,struct btree_path * path,unsigned level,bool trace)487 bool __bch2_btree_node_relock(struct btree_trans *trans,
488 struct btree_path *path, unsigned level,
489 bool trace)
490 {
491 struct btree *b = btree_path_node(path, level);
492 int want = __btree_lock_want(path, level);
493
494 if (race_fault())
495 goto fail;
496
497 if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
498 (btree_node_lock_seq_matches(path, b, level) &&
499 btree_node_lock_increment(trans, &b->c, level, want))) {
500 mark_btree_node_locked(trans, path, level, want);
501 return true;
502 }
503 fail:
504 if (trace && !trans->notrace_relock_fail)
505 trace_and_count(trans->c, btree_path_relock_fail, trans, _RET_IP_, path, level);
506 return false;
507 }
508
509 /* upgrade */
510
bch2_btree_node_upgrade(struct btree_trans * trans,struct btree_path * path,unsigned level)511 bool bch2_btree_node_upgrade(struct btree_trans *trans,
512 struct btree_path *path, unsigned level)
513 {
514 struct btree *b = path->l[level].b;
515 struct six_lock_count count = bch2_btree_node_lock_counts(trans, path, &b->c, level);
516
517 if (!is_btree_node(path, level))
518 return false;
519
520 switch (btree_lock_want(path, level)) {
521 case BTREE_NODE_UNLOCKED:
522 BUG_ON(btree_node_locked(path, level));
523 return true;
524 case BTREE_NODE_READ_LOCKED:
525 BUG_ON(btree_node_intent_locked(path, level));
526 return bch2_btree_node_relock(trans, path, level);
527 case BTREE_NODE_INTENT_LOCKED:
528 break;
529 case BTREE_NODE_WRITE_LOCKED:
530 BUG();
531 }
532
533 if (btree_node_intent_locked(path, level))
534 return true;
535
536 if (race_fault())
537 return false;
538
539 if (btree_node_locked(path, level)) {
540 bool ret;
541
542 six_lock_readers_add(&b->c.lock, -count.n[SIX_LOCK_read]);
543 ret = six_lock_tryupgrade(&b->c.lock);
544 six_lock_readers_add(&b->c.lock, count.n[SIX_LOCK_read]);
545
546 if (ret)
547 goto success;
548 } else {
549 if (six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
550 goto success;
551 }
552
553 /*
554 * Do we already have an intent lock via another path? If so, just bump
555 * lock count:
556 */
557 if (btree_node_lock_seq_matches(path, b, level) &&
558 btree_node_lock_increment(trans, &b->c, level, BTREE_NODE_INTENT_LOCKED)) {
559 btree_node_unlock(trans, path, level);
560 goto success;
561 }
562
563 trace_and_count(trans->c, btree_path_upgrade_fail, trans, _RET_IP_, path, level);
564 return false;
565 success:
566 mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
567 return true;
568 }
569
570 /* Btree path locking: */
571
572 /*
573 * Only for btree_cache.c - only relocks intent locks
574 */
bch2_btree_path_relock_intent(struct btree_trans * trans,struct btree_path * path)575 int bch2_btree_path_relock_intent(struct btree_trans *trans,
576 struct btree_path *path)
577 {
578 unsigned l;
579
580 for (l = path->level;
581 l < path->locks_want && btree_path_node(path, l);
582 l++) {
583 if (!bch2_btree_node_relock(trans, path, l)) {
584 __bch2_btree_path_unlock(trans, path);
585 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
586 trace_and_count(trans->c, trans_restart_relock_path_intent, trans, _RET_IP_, path);
587 return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent);
588 }
589 }
590
591 return 0;
592 }
593
594 __flatten
bch2_btree_path_relock_norestart(struct btree_trans * trans,struct btree_path * path)595 bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path)
596 {
597 struct get_locks_fail f;
598
599 bool ret = btree_path_get_locks(trans, path, false, &f);
600 bch2_trans_verify_locks(trans);
601 return ret;
602 }
603
__bch2_btree_path_relock(struct btree_trans * trans,struct btree_path * path,unsigned long trace_ip)604 int __bch2_btree_path_relock(struct btree_trans *trans,
605 struct btree_path *path, unsigned long trace_ip)
606 {
607 if (!bch2_btree_path_relock_norestart(trans, path)) {
608 trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path);
609 return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
610 }
611
612 return 0;
613 }
614
bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans * trans,struct btree_path * path,unsigned new_locks_want,struct get_locks_fail * f)615 bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
616 struct btree_path *path,
617 unsigned new_locks_want,
618 struct get_locks_fail *f)
619 {
620 EBUG_ON(path->locks_want >= new_locks_want);
621
622 path->locks_want = new_locks_want;
623
624 bool ret = btree_path_get_locks(trans, path, true, f);
625 bch2_trans_verify_locks(trans);
626 return ret;
627 }
628
__bch2_btree_path_upgrade(struct btree_trans * trans,struct btree_path * path,unsigned new_locks_want,struct get_locks_fail * f)629 bool __bch2_btree_path_upgrade(struct btree_trans *trans,
630 struct btree_path *path,
631 unsigned new_locks_want,
632 struct get_locks_fail *f)
633 {
634 bool ret = bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f);
635 if (ret)
636 goto out;
637
638 /*
639 * XXX: this is ugly - we'd prefer to not be mucking with other
640 * iterators in the btree_trans here.
641 *
642 * On failure to upgrade the iterator, setting iter->locks_want and
643 * calling get_locks() is sufficient to make bch2_btree_path_traverse()
644 * get the locks we want on transaction restart.
645 *
646 * But if this iterator was a clone, on transaction restart what we did
647 * to this iterator isn't going to be preserved.
648 *
649 * Possibly we could add an iterator field for the parent iterator when
650 * an iterator is a copy - for now, we'll just upgrade any other
651 * iterators with the same btree id.
652 *
653 * The code below used to be needed to ensure ancestor nodes get locked
654 * before interior nodes - now that's handled by
655 * bch2_btree_path_traverse_all().
656 */
657 if (!path->cached && !trans->in_traverse_all) {
658 struct btree_path *linked;
659 unsigned i;
660
661 trans_for_each_path(trans, linked, i)
662 if (linked != path &&
663 linked->cached == path->cached &&
664 linked->btree_id == path->btree_id &&
665 linked->locks_want < new_locks_want) {
666 linked->locks_want = new_locks_want;
667 btree_path_get_locks(trans, linked, true, NULL);
668 }
669 }
670 out:
671 bch2_trans_verify_locks(trans);
672 return ret;
673 }
674
__bch2_btree_path_downgrade(struct btree_trans * trans,struct btree_path * path,unsigned new_locks_want)675 void __bch2_btree_path_downgrade(struct btree_trans *trans,
676 struct btree_path *path,
677 unsigned new_locks_want)
678 {
679 unsigned l, old_locks_want = path->locks_want;
680
681 if (trans->restarted)
682 return;
683
684 EBUG_ON(path->locks_want < new_locks_want);
685
686 path->locks_want = new_locks_want;
687
688 while (path->nodes_locked &&
689 (l = btree_path_highest_level_locked(path)) >= path->locks_want) {
690 if (l > path->level) {
691 btree_node_unlock(trans, path, l);
692 } else {
693 if (btree_node_intent_locked(path, l)) {
694 six_lock_downgrade(&path->l[l].b->c.lock);
695 mark_btree_node_locked_noreset(path, l, BTREE_NODE_READ_LOCKED);
696 }
697 break;
698 }
699 }
700
701 bch2_btree_path_verify_locks(path);
702
703 trace_path_downgrade(trans, _RET_IP_, path, old_locks_want);
704 }
705
706 /* Btree transaction locking: */
707
bch2_trans_downgrade(struct btree_trans * trans)708 void bch2_trans_downgrade(struct btree_trans *trans)
709 {
710 struct btree_path *path;
711 unsigned i;
712
713 if (trans->restarted)
714 return;
715
716 trans_for_each_path(trans, path, i)
717 if (path->ref)
718 bch2_btree_path_downgrade(trans, path);
719 }
720
__bch2_trans_unlock(struct btree_trans * trans)721 static inline void __bch2_trans_unlock(struct btree_trans *trans)
722 {
723 struct btree_path *path;
724 unsigned i;
725
726 trans_for_each_path(trans, path, i)
727 __bch2_btree_path_unlock(trans, path);
728 }
729
bch2_trans_relock_fail(struct btree_trans * trans,struct btree_path * path,struct get_locks_fail * f,bool trace)730 static noinline __cold int bch2_trans_relock_fail(struct btree_trans *trans, struct btree_path *path,
731 struct get_locks_fail *f, bool trace)
732 {
733 if (!trace)
734 goto out;
735
736 if (trace_trans_restart_relock_enabled()) {
737 struct printbuf buf = PRINTBUF;
738
739 bch2_bpos_to_text(&buf, path->pos);
740 prt_printf(&buf, " l=%u seq=%u node seq=", f->l, path->l[f->l].lock_seq);
741 if (IS_ERR_OR_NULL(f->b)) {
742 prt_str(&buf, bch2_err_str(PTR_ERR(f->b)));
743 } else {
744 prt_printf(&buf, "%u", f->b->c.lock.seq);
745
746 struct six_lock_count c =
747 bch2_btree_node_lock_counts(trans, NULL, &f->b->c, f->l);
748 prt_printf(&buf, " self locked %u.%u.%u", c.n[0], c.n[1], c.n[2]);
749
750 c = six_lock_counts(&f->b->c.lock);
751 prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]);
752 }
753
754 trace_trans_restart_relock(trans, _RET_IP_, buf.buf);
755 printbuf_exit(&buf);
756 }
757
758 count_event(trans->c, trans_restart_relock);
759 out:
760 __bch2_trans_unlock(trans);
761 bch2_trans_verify_locks(trans);
762 return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
763 }
764
__bch2_trans_relock(struct btree_trans * trans,bool trace)765 static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
766 {
767 bch2_trans_verify_locks(trans);
768
769 if (unlikely(trans->restarted))
770 return -((int) trans->restarted);
771 if (unlikely(trans->locked))
772 goto out;
773
774 struct btree_path *path;
775 unsigned i;
776
777 trans_for_each_path(trans, path, i) {
778 struct get_locks_fail f;
779
780 if (path->should_be_locked &&
781 !btree_path_get_locks(trans, path, false, &f))
782 return bch2_trans_relock_fail(trans, path, &f, trace);
783 }
784
785 trans_set_locked(trans);
786 out:
787 bch2_trans_verify_locks(trans);
788 return 0;
789 }
790
bch2_trans_relock(struct btree_trans * trans)791 int bch2_trans_relock(struct btree_trans *trans)
792 {
793 return __bch2_trans_relock(trans, true);
794 }
795
bch2_trans_relock_notrace(struct btree_trans * trans)796 int bch2_trans_relock_notrace(struct btree_trans *trans)
797 {
798 return __bch2_trans_relock(trans, false);
799 }
800
bch2_trans_unlock_noassert(struct btree_trans * trans)801 void bch2_trans_unlock_noassert(struct btree_trans *trans)
802 {
803 __bch2_trans_unlock(trans);
804
805 trans_set_unlocked(trans);
806 }
807
bch2_trans_unlock(struct btree_trans * trans)808 void bch2_trans_unlock(struct btree_trans *trans)
809 {
810 __bch2_trans_unlock(trans);
811
812 trans_set_unlocked(trans);
813 }
814
bch2_trans_unlock_long(struct btree_trans * trans)815 void bch2_trans_unlock_long(struct btree_trans *trans)
816 {
817 bch2_trans_unlock(trans);
818 bch2_trans_srcu_unlock(trans);
819 }
820
__bch2_trans_mutex_lock(struct btree_trans * trans,struct mutex * lock)821 int __bch2_trans_mutex_lock(struct btree_trans *trans,
822 struct mutex *lock)
823 {
824 int ret = drop_locks_do(trans, (mutex_lock(lock), 0));
825
826 if (ret)
827 mutex_unlock(lock);
828 return ret;
829 }
830
831 /* Debug */
832
833 #ifdef CONFIG_BCACHEFS_DEBUG
834
bch2_btree_path_verify_locks(struct btree_path * path)835 void bch2_btree_path_verify_locks(struct btree_path *path)
836 {
837 /*
838 * A path may be uptodate and yet have nothing locked if and only if
839 * there is no node at path->level, which generally means we were
840 * iterating over all nodes and got to the end of the btree
841 */
842 BUG_ON(path->uptodate == BTREE_ITER_UPTODATE &&
843 btree_path_node(path, path->level) &&
844 !path->nodes_locked);
845
846 if (!path->nodes_locked)
847 return;
848
849 for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++) {
850 int want = btree_lock_want(path, l);
851 int have = btree_node_locked_type(path, l);
852
853 BUG_ON(!is_btree_node(path, l) && have != BTREE_NODE_UNLOCKED);
854
855 BUG_ON(is_btree_node(path, l) &&
856 (want == BTREE_NODE_UNLOCKED ||
857 have != BTREE_NODE_WRITE_LOCKED) &&
858 want != have);
859 }
860 }
861
bch2_trans_locked(struct btree_trans * trans)862 static bool bch2_trans_locked(struct btree_trans *trans)
863 {
864 struct btree_path *path;
865 unsigned i;
866
867 trans_for_each_path(trans, path, i)
868 if (path->nodes_locked)
869 return true;
870 return false;
871 }
872
bch2_trans_verify_locks(struct btree_trans * trans)873 void bch2_trans_verify_locks(struct btree_trans *trans)
874 {
875 if (!trans->locked) {
876 BUG_ON(bch2_trans_locked(trans));
877 return;
878 }
879
880 struct btree_path *path;
881 unsigned i;
882
883 trans_for_each_path(trans, path, i)
884 bch2_btree_path_verify_locks(path);
885 }
886
887 #endif
888