xref: /linux/fs/dlm/recover.c (revision 8fa5723aa7e053d498336b48448b292fc2e0458b)
1 /******************************************************************************
2 *******************************************************************************
3 **
4 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
5 **  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
6 **
7 **  This copyrighted material is made available to anyone wishing to use,
8 **  modify, copy, or redistribute it subject to the terms and conditions
9 **  of the GNU General Public License v.2.
10 **
11 *******************************************************************************
12 ******************************************************************************/
13 
14 #include "dlm_internal.h"
15 #include "lockspace.h"
16 #include "dir.h"
17 #include "config.h"
18 #include "ast.h"
19 #include "memory.h"
20 #include "rcom.h"
21 #include "lock.h"
22 #include "lowcomms.h"
23 #include "member.h"
24 #include "recover.h"
25 
26 
27 /*
28  * Recovery waiting routines: these functions wait for a particular reply from
29  * a remote node, or for the remote node to report a certain status.  They need
30  * to abort if the lockspace is stopped indicating a node has failed (perhaps
31  * the one being waited for).
32  */
33 
34 /*
35  * Wait until given function returns non-zero or lockspace is stopped
36  * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes).  When another
37  * function thinks it could have completed the waited-on task, they should wake
38  * up ls_wait_general to get an immediate response rather than waiting for the
39  * timer to detect the result.  A timer wakes us up periodically while waiting
40  * to see if we should abort due to a node failure.  This should only be called
41  * by the dlm_recoverd thread.
42  */
43 
44 static void dlm_wait_timer_fn(unsigned long data)
45 {
46 	struct dlm_ls *ls = (struct dlm_ls *) data;
47 	mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
48 	wake_up(&ls->ls_wait_general);
49 }
50 
51 int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
52 {
53 	int error = 0;
54 
55 	init_timer(&ls->ls_timer);
56 	ls->ls_timer.function = dlm_wait_timer_fn;
57 	ls->ls_timer.data = (long) ls;
58 	ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
59 	add_timer(&ls->ls_timer);
60 
61 	wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
62 	del_timer_sync(&ls->ls_timer);
63 
64 	if (dlm_recovery_stopped(ls)) {
65 		log_debug(ls, "dlm_wait_function aborted");
66 		error = -EINTR;
67 	}
68 	return error;
69 }
70 
71 /*
72  * An efficient way for all nodes to wait for all others to have a certain
73  * status.  The node with the lowest nodeid polls all the others for their
74  * status (wait_status_all) and all the others poll the node with the low id
75  * for its accumulated result (wait_status_low).  When all nodes have set
76  * status flag X, then status flag X_ALL will be set on the low nodeid.
77  */
78 
79 uint32_t dlm_recover_status(struct dlm_ls *ls)
80 {
81 	uint32_t status;
82 	spin_lock(&ls->ls_recover_lock);
83 	status = ls->ls_recover_status;
84 	spin_unlock(&ls->ls_recover_lock);
85 	return status;
86 }
87 
88 void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status)
89 {
90 	spin_lock(&ls->ls_recover_lock);
91 	ls->ls_recover_status |= status;
92 	spin_unlock(&ls->ls_recover_lock);
93 }
94 
95 static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status)
96 {
97 	struct dlm_rcom *rc = ls->ls_recover_buf;
98 	struct dlm_member *memb;
99 	int error = 0, delay;
100 
101 	list_for_each_entry(memb, &ls->ls_nodes, list) {
102 		delay = 0;
103 		for (;;) {
104 			if (dlm_recovery_stopped(ls)) {
105 				error = -EINTR;
106 				goto out;
107 			}
108 
109 			error = dlm_rcom_status(ls, memb->nodeid);
110 			if (error)
111 				goto out;
112 
113 			if (rc->rc_result & wait_status)
114 				break;
115 			if (delay < 1000)
116 				delay += 20;
117 			msleep(delay);
118 		}
119 	}
120  out:
121 	return error;
122 }
123 
124 static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status)
125 {
126 	struct dlm_rcom *rc = ls->ls_recover_buf;
127 	int error = 0, delay = 0, nodeid = ls->ls_low_nodeid;
128 
129 	for (;;) {
130 		if (dlm_recovery_stopped(ls)) {
131 			error = -EINTR;
132 			goto out;
133 		}
134 
135 		error = dlm_rcom_status(ls, nodeid);
136 		if (error)
137 			break;
138 
139 		if (rc->rc_result & wait_status)
140 			break;
141 		if (delay < 1000)
142 			delay += 20;
143 		msleep(delay);
144 	}
145  out:
146 	return error;
147 }
148 
149 static int wait_status(struct dlm_ls *ls, uint32_t status)
150 {
151 	uint32_t status_all = status << 1;
152 	int error;
153 
154 	if (ls->ls_low_nodeid == dlm_our_nodeid()) {
155 		error = wait_status_all(ls, status);
156 		if (!error)
157 			dlm_set_recover_status(ls, status_all);
158 	} else
159 		error = wait_status_low(ls, status_all);
160 
161 	return error;
162 }
163 
164 int dlm_recover_members_wait(struct dlm_ls *ls)
165 {
166 	return wait_status(ls, DLM_RS_NODES);
167 }
168 
169 int dlm_recover_directory_wait(struct dlm_ls *ls)
170 {
171 	return wait_status(ls, DLM_RS_DIR);
172 }
173 
174 int dlm_recover_locks_wait(struct dlm_ls *ls)
175 {
176 	return wait_status(ls, DLM_RS_LOCKS);
177 }
178 
179 int dlm_recover_done_wait(struct dlm_ls *ls)
180 {
181 	return wait_status(ls, DLM_RS_DONE);
182 }
183 
184 /*
185  * The recover_list contains all the rsb's for which we've requested the new
186  * master nodeid.  As replies are returned from the resource directories the
187  * rsb's are removed from the list.  When the list is empty we're done.
188  *
189  * The recover_list is later similarly used for all rsb's for which we've sent
190  * new lkb's and need to receive new corresponding lkid's.
191  *
192  * We use the address of the rsb struct as a simple local identifier for the
193  * rsb so we can match an rcom reply with the rsb it was sent for.
194  */
195 
196 static int recover_list_empty(struct dlm_ls *ls)
197 {
198 	int empty;
199 
200 	spin_lock(&ls->ls_recover_list_lock);
201 	empty = list_empty(&ls->ls_recover_list);
202 	spin_unlock(&ls->ls_recover_list_lock);
203 
204 	return empty;
205 }
206 
207 static void recover_list_add(struct dlm_rsb *r)
208 {
209 	struct dlm_ls *ls = r->res_ls;
210 
211 	spin_lock(&ls->ls_recover_list_lock);
212 	if (list_empty(&r->res_recover_list)) {
213 		list_add_tail(&r->res_recover_list, &ls->ls_recover_list);
214 		ls->ls_recover_list_count++;
215 		dlm_hold_rsb(r);
216 	}
217 	spin_unlock(&ls->ls_recover_list_lock);
218 }
219 
220 static void recover_list_del(struct dlm_rsb *r)
221 {
222 	struct dlm_ls *ls = r->res_ls;
223 
224 	spin_lock(&ls->ls_recover_list_lock);
225 	list_del_init(&r->res_recover_list);
226 	ls->ls_recover_list_count--;
227 	spin_unlock(&ls->ls_recover_list_lock);
228 
229 	dlm_put_rsb(r);
230 }
231 
232 static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, uint64_t id)
233 {
234 	struct dlm_rsb *r = NULL;
235 
236 	spin_lock(&ls->ls_recover_list_lock);
237 
238 	list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) {
239 		if (id == (unsigned long) r)
240 			goto out;
241 	}
242 	r = NULL;
243  out:
244 	spin_unlock(&ls->ls_recover_list_lock);
245 	return r;
246 }
247 
248 static void recover_list_clear(struct dlm_ls *ls)
249 {
250 	struct dlm_rsb *r, *s;
251 
252 	spin_lock(&ls->ls_recover_list_lock);
253 	list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
254 		list_del_init(&r->res_recover_list);
255 		r->res_recover_locks_count = 0;
256 		dlm_put_rsb(r);
257 		ls->ls_recover_list_count--;
258 	}
259 
260 	if (ls->ls_recover_list_count != 0) {
261 		log_error(ls, "warning: recover_list_count %d",
262 			  ls->ls_recover_list_count);
263 		ls->ls_recover_list_count = 0;
264 	}
265 	spin_unlock(&ls->ls_recover_list_lock);
266 }
267 
268 
269 /* Master recovery: find new master node for rsb's that were
270    mastered on nodes that have been removed.
271 
272    dlm_recover_masters
273    recover_master
274    dlm_send_rcom_lookup            ->  receive_rcom_lookup
275                                        dlm_dir_lookup
276    receive_rcom_lookup_reply       <-
277    dlm_recover_master_reply
278    set_new_master
279    set_master_lkbs
280    set_lock_master
281 */
282 
283 /*
284  * Set the lock master for all LKBs in a lock queue
285  * If we are the new master of the rsb, we may have received new
286  * MSTCPY locks from other nodes already which we need to ignore
287  * when setting the new nodeid.
288  */
289 
290 static void set_lock_master(struct list_head *queue, int nodeid)
291 {
292 	struct dlm_lkb *lkb;
293 
294 	list_for_each_entry(lkb, queue, lkb_statequeue)
295 		if (!(lkb->lkb_flags & DLM_IFL_MSTCPY))
296 			lkb->lkb_nodeid = nodeid;
297 }
298 
299 static void set_master_lkbs(struct dlm_rsb *r)
300 {
301 	set_lock_master(&r->res_grantqueue, r->res_nodeid);
302 	set_lock_master(&r->res_convertqueue, r->res_nodeid);
303 	set_lock_master(&r->res_waitqueue, r->res_nodeid);
304 }
305 
306 /*
307  * Propogate the new master nodeid to locks
308  * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
309  * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which
310  * rsb's to consider.
311  */
312 
313 static void set_new_master(struct dlm_rsb *r, int nodeid)
314 {
315 	lock_rsb(r);
316 	r->res_nodeid = nodeid;
317 	set_master_lkbs(r);
318 	rsb_set_flag(r, RSB_NEW_MASTER);
319 	rsb_set_flag(r, RSB_NEW_MASTER2);
320 	unlock_rsb(r);
321 }
322 
323 /*
324  * We do async lookups on rsb's that need new masters.  The rsb's
325  * waiting for a lookup reply are kept on the recover_list.
326  */
327 
328 static int recover_master(struct dlm_rsb *r)
329 {
330 	struct dlm_ls *ls = r->res_ls;
331 	int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
332 
333 	dir_nodeid = dlm_dir_nodeid(r);
334 
335 	if (dir_nodeid == our_nodeid) {
336 		error = dlm_dir_lookup(ls, our_nodeid, r->res_name,
337 				       r->res_length, &ret_nodeid);
338 		if (error)
339 			log_error(ls, "recover dir lookup error %d", error);
340 
341 		if (ret_nodeid == our_nodeid)
342 			ret_nodeid = 0;
343 		set_new_master(r, ret_nodeid);
344 	} else {
345 		recover_list_add(r);
346 		error = dlm_send_rcom_lookup(r, dir_nodeid);
347 	}
348 
349 	return error;
350 }
351 
352 /*
353  * When not using a directory, most resource names will hash to a new static
354  * master nodeid and the resource will need to be remastered.
355  */
356 
357 static int recover_master_static(struct dlm_rsb *r)
358 {
359 	int master = dlm_dir_nodeid(r);
360 
361 	if (master == dlm_our_nodeid())
362 		master = 0;
363 
364 	if (r->res_nodeid != master) {
365 		if (is_master(r))
366 			dlm_purge_mstcpy_locks(r);
367 		set_new_master(r, master);
368 		return 1;
369 	}
370 	return 0;
371 }
372 
373 /*
374  * Go through local root resources and for each rsb which has a master which
375  * has departed, get the new master nodeid from the directory.  The dir will
376  * assign mastery to the first node to look up the new master.  That means
377  * we'll discover in this lookup if we're the new master of any rsb's.
378  *
379  * We fire off all the dir lookup requests individually and asynchronously to
380  * the correct dir node.
381  */
382 
383 int dlm_recover_masters(struct dlm_ls *ls)
384 {
385 	struct dlm_rsb *r;
386 	int error = 0, count = 0;
387 
388 	log_debug(ls, "dlm_recover_masters");
389 
390 	down_read(&ls->ls_root_sem);
391 	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
392 		if (dlm_recovery_stopped(ls)) {
393 			up_read(&ls->ls_root_sem);
394 			error = -EINTR;
395 			goto out;
396 		}
397 
398 		if (dlm_no_directory(ls))
399 			count += recover_master_static(r);
400 		else if (!is_master(r) &&
401 			 (dlm_is_removed(ls, r->res_nodeid) ||
402 			  rsb_flag(r, RSB_NEW_MASTER))) {
403 			recover_master(r);
404 			count++;
405 		}
406 
407 		schedule();
408 	}
409 	up_read(&ls->ls_root_sem);
410 
411 	log_debug(ls, "dlm_recover_masters %d resources", count);
412 
413 	error = dlm_wait_function(ls, &recover_list_empty);
414  out:
415 	if (error)
416 		recover_list_clear(ls);
417 	return error;
418 }
419 
420 int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
421 {
422 	struct dlm_rsb *r;
423 	int nodeid;
424 
425 	r = recover_list_find(ls, rc->rc_id);
426 	if (!r) {
427 		log_error(ls, "dlm_recover_master_reply no id %llx",
428 			  (unsigned long long)rc->rc_id);
429 		goto out;
430 	}
431 
432 	nodeid = rc->rc_result;
433 	if (nodeid == dlm_our_nodeid())
434 		nodeid = 0;
435 
436 	set_new_master(r, nodeid);
437 	recover_list_del(r);
438 
439 	if (recover_list_empty(ls))
440 		wake_up(&ls->ls_wait_general);
441  out:
442 	return 0;
443 }
444 
445 
446 /* Lock recovery: rebuild the process-copy locks we hold on a
447    remastered rsb on the new rsb master.
448 
449    dlm_recover_locks
450    recover_locks
451    recover_locks_queue
452    dlm_send_rcom_lock              ->  receive_rcom_lock
453                                        dlm_recover_master_copy
454    receive_rcom_lock_reply         <-
455    dlm_recover_process_copy
456 */
457 
458 
459 /*
460  * keep a count of the number of lkb's we send to the new master; when we get
461  * an equal number of replies then recovery for the rsb is done
462  */
463 
464 static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head)
465 {
466 	struct dlm_lkb *lkb;
467 	int error = 0;
468 
469 	list_for_each_entry(lkb, head, lkb_statequeue) {
470 	   	error = dlm_send_rcom_lock(r, lkb);
471 		if (error)
472 			break;
473 		r->res_recover_locks_count++;
474 	}
475 
476 	return error;
477 }
478 
479 static int recover_locks(struct dlm_rsb *r)
480 {
481 	int error = 0;
482 
483 	lock_rsb(r);
484 
485 	DLM_ASSERT(!r->res_recover_locks_count, dlm_dump_rsb(r););
486 
487 	error = recover_locks_queue(r, &r->res_grantqueue);
488 	if (error)
489 		goto out;
490 	error = recover_locks_queue(r, &r->res_convertqueue);
491 	if (error)
492 		goto out;
493 	error = recover_locks_queue(r, &r->res_waitqueue);
494 	if (error)
495 		goto out;
496 
497 	if (r->res_recover_locks_count)
498 		recover_list_add(r);
499 	else
500 		rsb_clear_flag(r, RSB_NEW_MASTER);
501  out:
502 	unlock_rsb(r);
503 	return error;
504 }
505 
506 int dlm_recover_locks(struct dlm_ls *ls)
507 {
508 	struct dlm_rsb *r;
509 	int error, count = 0;
510 
511 	log_debug(ls, "dlm_recover_locks");
512 
513 	down_read(&ls->ls_root_sem);
514 	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
515 		if (is_master(r)) {
516 			rsb_clear_flag(r, RSB_NEW_MASTER);
517 			continue;
518 		}
519 
520 		if (!rsb_flag(r, RSB_NEW_MASTER))
521 			continue;
522 
523 		if (dlm_recovery_stopped(ls)) {
524 			error = -EINTR;
525 			up_read(&ls->ls_root_sem);
526 			goto out;
527 		}
528 
529 		error = recover_locks(r);
530 		if (error) {
531 			up_read(&ls->ls_root_sem);
532 			goto out;
533 		}
534 
535 		count += r->res_recover_locks_count;
536 	}
537 	up_read(&ls->ls_root_sem);
538 
539 	log_debug(ls, "dlm_recover_locks %d locks", count);
540 
541 	error = dlm_wait_function(ls, &recover_list_empty);
542  out:
543 	if (error)
544 		recover_list_clear(ls);
545 	else
546 		dlm_set_recover_status(ls, DLM_RS_LOCKS);
547 	return error;
548 }
549 
550 void dlm_recovered_lock(struct dlm_rsb *r)
551 {
552 	DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_dump_rsb(r););
553 
554 	r->res_recover_locks_count--;
555 	if (!r->res_recover_locks_count) {
556 		rsb_clear_flag(r, RSB_NEW_MASTER);
557 		recover_list_del(r);
558 	}
559 
560 	if (recover_list_empty(r->res_ls))
561 		wake_up(&r->res_ls->ls_wait_general);
562 }
563 
564 /*
565  * The lvb needs to be recovered on all master rsb's.  This includes setting
566  * the VALNOTVALID flag if necessary, and determining the correct lvb contents
567  * based on the lvb's of the locks held on the rsb.
568  *
569  * RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb.  If it
570  * was already set prior to recovery, it's not cleared, regardless of locks.
571  *
572  * The LVB contents are only considered for changing when this is a new master
573  * of the rsb (NEW_MASTER2).  Then, the rsb's lvb is taken from any lkb with
574  * mode > CR.  If no lkb's exist with mode above CR, the lvb contents are taken
575  * from the lkb with the largest lvb sequence number.
576  */
577 
578 static void recover_lvb(struct dlm_rsb *r)
579 {
580 	struct dlm_lkb *lkb, *high_lkb = NULL;
581 	uint32_t high_seq = 0;
582 	int lock_lvb_exists = 0;
583 	int big_lock_exists = 0;
584 	int lvblen = r->res_ls->ls_lvblen;
585 
586 	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
587 		if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
588 			continue;
589 
590 		lock_lvb_exists = 1;
591 
592 		if (lkb->lkb_grmode > DLM_LOCK_CR) {
593 			big_lock_exists = 1;
594 			goto setflag;
595 		}
596 
597 		if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
598 			high_lkb = lkb;
599 			high_seq = lkb->lkb_lvbseq;
600 		}
601 	}
602 
603 	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
604 		if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
605 			continue;
606 
607 		lock_lvb_exists = 1;
608 
609 		if (lkb->lkb_grmode > DLM_LOCK_CR) {
610 			big_lock_exists = 1;
611 			goto setflag;
612 		}
613 
614 		if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
615 			high_lkb = lkb;
616 			high_seq = lkb->lkb_lvbseq;
617 		}
618 	}
619 
620  setflag:
621 	if (!lock_lvb_exists)
622 		goto out;
623 
624 	if (!big_lock_exists)
625 		rsb_set_flag(r, RSB_VALNOTVALID);
626 
627 	/* don't mess with the lvb unless we're the new master */
628 	if (!rsb_flag(r, RSB_NEW_MASTER2))
629 		goto out;
630 
631 	if (!r->res_lvbptr) {
632 		r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
633 		if (!r->res_lvbptr)
634 			goto out;
635 	}
636 
637 	if (big_lock_exists) {
638 		r->res_lvbseq = lkb->lkb_lvbseq;
639 		memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen);
640 	} else if (high_lkb) {
641 		r->res_lvbseq = high_lkb->lkb_lvbseq;
642 		memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen);
643 	} else {
644 		r->res_lvbseq = 0;
645 		memset(r->res_lvbptr, 0, lvblen);
646 	}
647  out:
648 	return;
649 }
650 
651 /* All master rsb's flagged RECOVER_CONVERT need to be looked at.  The locks
652    converting PR->CW or CW->PR need to have their lkb_grmode set. */
653 
654 static void recover_conversion(struct dlm_rsb *r)
655 {
656 	struct dlm_lkb *lkb;
657 	int grmode = -1;
658 
659 	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
660 		if (lkb->lkb_grmode == DLM_LOCK_PR ||
661 		    lkb->lkb_grmode == DLM_LOCK_CW) {
662 			grmode = lkb->lkb_grmode;
663 			break;
664 		}
665 	}
666 
667 	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
668 		if (lkb->lkb_grmode != DLM_LOCK_IV)
669 			continue;
670 		if (grmode == -1)
671 			lkb->lkb_grmode = lkb->lkb_rqmode;
672 		else
673 			lkb->lkb_grmode = grmode;
674 	}
675 }
676 
677 /* We've become the new master for this rsb and waiting/converting locks may
678    need to be granted in dlm_grant_after_purge() due to locks that may have
679    existed from a removed node. */
680 
681 static void set_locks_purged(struct dlm_rsb *r)
682 {
683 	if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue))
684 		rsb_set_flag(r, RSB_LOCKS_PURGED);
685 }
686 
687 void dlm_recover_rsbs(struct dlm_ls *ls)
688 {
689 	struct dlm_rsb *r;
690 	int count = 0;
691 
692 	log_debug(ls, "dlm_recover_rsbs");
693 
694 	down_read(&ls->ls_root_sem);
695 	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
696 		lock_rsb(r);
697 		if (is_master(r)) {
698 			if (rsb_flag(r, RSB_RECOVER_CONVERT))
699 				recover_conversion(r);
700 			if (rsb_flag(r, RSB_NEW_MASTER2))
701 				set_locks_purged(r);
702 			recover_lvb(r);
703 			count++;
704 		}
705 		rsb_clear_flag(r, RSB_RECOVER_CONVERT);
706 		rsb_clear_flag(r, RSB_NEW_MASTER2);
707 		unlock_rsb(r);
708 	}
709 	up_read(&ls->ls_root_sem);
710 
711 	log_debug(ls, "dlm_recover_rsbs %d rsbs", count);
712 }
713 
714 /* Create a single list of all root rsb's to be used during recovery */
715 
716 int dlm_create_root_list(struct dlm_ls *ls)
717 {
718 	struct dlm_rsb *r;
719 	int i, error = 0;
720 
721 	down_write(&ls->ls_root_sem);
722 	if (!list_empty(&ls->ls_root_list)) {
723 		log_error(ls, "root list not empty");
724 		error = -EINVAL;
725 		goto out;
726 	}
727 
728 	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
729 		read_lock(&ls->ls_rsbtbl[i].lock);
730 		list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) {
731 			list_add(&r->res_root_list, &ls->ls_root_list);
732 			dlm_hold_rsb(r);
733 		}
734 
735 		/* If we're using a directory, add tossed rsbs to the root
736 		   list; they'll have entries created in the new directory,
737 		   but no other recovery steps should do anything with them. */
738 
739 		if (dlm_no_directory(ls)) {
740 			read_unlock(&ls->ls_rsbtbl[i].lock);
741 			continue;
742 		}
743 
744 		list_for_each_entry(r, &ls->ls_rsbtbl[i].toss, res_hashchain) {
745 			list_add(&r->res_root_list, &ls->ls_root_list);
746 			dlm_hold_rsb(r);
747 		}
748 		read_unlock(&ls->ls_rsbtbl[i].lock);
749 	}
750  out:
751 	up_write(&ls->ls_root_sem);
752 	return error;
753 }
754 
755 void dlm_release_root_list(struct dlm_ls *ls)
756 {
757 	struct dlm_rsb *r, *safe;
758 
759 	down_write(&ls->ls_root_sem);
760 	list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) {
761 		list_del_init(&r->res_root_list);
762 		dlm_put_rsb(r);
763 	}
764 	up_write(&ls->ls_root_sem);
765 }
766 
767 /* If not using a directory, clear the entire toss list, there's no benefit to
768    caching the master value since it's fixed.  If we are using a dir, keep the
769    rsb's we're the master of.  Recovery will add them to the root list and from
770    there they'll be entered in the rebuilt directory. */
771 
772 void dlm_clear_toss_list(struct dlm_ls *ls)
773 {
774 	struct dlm_rsb *r, *safe;
775 	int i;
776 
777 	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
778 		write_lock(&ls->ls_rsbtbl[i].lock);
779 		list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss,
780 					 res_hashchain) {
781 			if (dlm_no_directory(ls) || !is_master(r)) {
782 				list_del(&r->res_hashchain);
783 				dlm_free_rsb(r);
784 			}
785 		}
786 		write_unlock(&ls->ls_rsbtbl[i].lock);
787 	}
788 }
789 
790