xref: /linux/fs/dlm/recover.c (revision da8c66638ae684c99abcb30e89d2803402e7ca20)
1e7fd4179SDavid Teigland /******************************************************************************
2e7fd4179SDavid Teigland *******************************************************************************
3e7fd4179SDavid Teigland **
4e7fd4179SDavid Teigland **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
5e7fd4179SDavid Teigland **  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
6e7fd4179SDavid Teigland **
7e7fd4179SDavid Teigland **  This copyrighted material is made available to anyone wishing to use,
8e7fd4179SDavid Teigland **  modify, copy, or redistribute it subject to the terms and conditions
9e7fd4179SDavid Teigland **  of the GNU General Public License v.2.
10e7fd4179SDavid Teigland **
11e7fd4179SDavid Teigland *******************************************************************************
12e7fd4179SDavid Teigland ******************************************************************************/
13e7fd4179SDavid Teigland 
14e7fd4179SDavid Teigland #include "dlm_internal.h"
15e7fd4179SDavid Teigland #include "lockspace.h"
16e7fd4179SDavid Teigland #include "dir.h"
17e7fd4179SDavid Teigland #include "config.h"
18e7fd4179SDavid Teigland #include "ast.h"
19e7fd4179SDavid Teigland #include "memory.h"
20e7fd4179SDavid Teigland #include "rcom.h"
21e7fd4179SDavid Teigland #include "lock.h"
22e7fd4179SDavid Teigland #include "lowcomms.h"
23e7fd4179SDavid Teigland #include "member.h"
24e7fd4179SDavid Teigland #include "recover.h"
25e7fd4179SDavid Teigland 
26e7fd4179SDavid Teigland 
27e7fd4179SDavid Teigland /*
28e7fd4179SDavid Teigland  * Recovery waiting routines: these functions wait for a particular reply from
29e7fd4179SDavid Teigland  * a remote node, or for the remote node to report a certain status.  They need
30e7fd4179SDavid Teigland  * to abort if the lockspace is stopped indicating a node has failed (perhaps
31e7fd4179SDavid Teigland  * the one being waited for).
32e7fd4179SDavid Teigland  */
33e7fd4179SDavid Teigland 
34e7fd4179SDavid Teigland /*
35e7fd4179SDavid Teigland  * Wait until given function returns non-zero or lockspace is stopped
36e7fd4179SDavid Teigland  * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes).  When another
37e7fd4179SDavid Teigland  * function thinks it could have completed the waited-on task, they should wake
38e7fd4179SDavid Teigland  * up ls_wait_general to get an immediate response rather than waiting for the
396d768177SDavid Teigland  * timeout.  This uses a timeout so it can check periodically if the wait
406d768177SDavid Teigland  * should abort due to node failure (which doesn't cause a wake_up).
416d768177SDavid Teigland  * This should only be called by the dlm_recoverd thread.
42e7fd4179SDavid Teigland  */
43e7fd4179SDavid Teigland 
44e7fd4179SDavid Teigland int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
45e7fd4179SDavid Teigland {
46e7fd4179SDavid Teigland 	int error = 0;
476d768177SDavid Teigland 	int rv;
48e7fd4179SDavid Teigland 
496d768177SDavid Teigland 	while (1) {
506d768177SDavid Teigland 		rv = wait_event_timeout(ls->ls_wait_general,
516d768177SDavid Teigland 					testfn(ls) || dlm_recovery_stopped(ls),
526d768177SDavid Teigland 					dlm_config.ci_recover_timer * HZ);
536d768177SDavid Teigland 		if (rv)
546d768177SDavid Teigland 			break;
556d768177SDavid Teigland 	}
56e7fd4179SDavid Teigland 
57e7fd4179SDavid Teigland 	if (dlm_recovery_stopped(ls)) {
58e7fd4179SDavid Teigland 		log_debug(ls, "dlm_wait_function aborted");
59e7fd4179SDavid Teigland 		error = -EINTR;
60e7fd4179SDavid Teigland 	}
61e7fd4179SDavid Teigland 	return error;
62e7fd4179SDavid Teigland }
63e7fd4179SDavid Teigland 
64e7fd4179SDavid Teigland /*
65e7fd4179SDavid Teigland  * An efficient way for all nodes to wait for all others to have a certain
66e7fd4179SDavid Teigland  * status.  The node with the lowest nodeid polls all the others for their
67e7fd4179SDavid Teigland  * status (wait_status_all) and all the others poll the node with the low id
68e7fd4179SDavid Teigland  * for its accumulated result (wait_status_low).  When all nodes have set
69e7fd4179SDavid Teigland  * status flag X, then status flag X_ALL will be set on the low nodeid.
70e7fd4179SDavid Teigland  */
71e7fd4179SDavid Teigland 
72e7fd4179SDavid Teigland uint32_t dlm_recover_status(struct dlm_ls *ls)
73e7fd4179SDavid Teigland {
74e7fd4179SDavid Teigland 	uint32_t status;
75e7fd4179SDavid Teigland 	spin_lock(&ls->ls_recover_lock);
76e7fd4179SDavid Teigland 	status = ls->ls_recover_status;
77e7fd4179SDavid Teigland 	spin_unlock(&ls->ls_recover_lock);
78e7fd4179SDavid Teigland 	return status;
79e7fd4179SDavid Teigland }
80e7fd4179SDavid Teigland 
81757a4271SDavid Teigland static void _set_recover_status(struct dlm_ls *ls, uint32_t status)
82757a4271SDavid Teigland {
83757a4271SDavid Teigland 	ls->ls_recover_status |= status;
84757a4271SDavid Teigland }
85757a4271SDavid Teigland 
86e7fd4179SDavid Teigland void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status)
87e7fd4179SDavid Teigland {
88e7fd4179SDavid Teigland 	spin_lock(&ls->ls_recover_lock);
89757a4271SDavid Teigland 	_set_recover_status(ls, status);
90e7fd4179SDavid Teigland 	spin_unlock(&ls->ls_recover_lock);
91e7fd4179SDavid Teigland }
92e7fd4179SDavid Teigland 
93757a4271SDavid Teigland static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status,
94757a4271SDavid Teigland 			   int save_slots)
95e7fd4179SDavid Teigland {
964007685cSAl Viro 	struct dlm_rcom *rc = ls->ls_recover_buf;
97e7fd4179SDavid Teigland 	struct dlm_member *memb;
98e7fd4179SDavid Teigland 	int error = 0, delay;
99e7fd4179SDavid Teigland 
100e7fd4179SDavid Teigland 	list_for_each_entry(memb, &ls->ls_nodes, list) {
101e7fd4179SDavid Teigland 		delay = 0;
102e7fd4179SDavid Teigland 		for (;;) {
103e7fd4179SDavid Teigland 			if (dlm_recovery_stopped(ls)) {
104e7fd4179SDavid Teigland 				error = -EINTR;
105e7fd4179SDavid Teigland 				goto out;
106e7fd4179SDavid Teigland 			}
107e7fd4179SDavid Teigland 
108757a4271SDavid Teigland 			error = dlm_rcom_status(ls, memb->nodeid, 0);
109e7fd4179SDavid Teigland 			if (error)
110e7fd4179SDavid Teigland 				goto out;
111e7fd4179SDavid Teigland 
112757a4271SDavid Teigland 			if (save_slots)
113757a4271SDavid Teigland 				dlm_slot_save(ls, rc, memb);
114757a4271SDavid Teigland 
115e7fd4179SDavid Teigland 			if (rc->rc_result & wait_status)
116e7fd4179SDavid Teigland 				break;
117e7fd4179SDavid Teigland 			if (delay < 1000)
118e7fd4179SDavid Teigland 				delay += 20;
119e7fd4179SDavid Teigland 			msleep(delay);
120e7fd4179SDavid Teigland 		}
121e7fd4179SDavid Teigland 	}
122e7fd4179SDavid Teigland  out:
123e7fd4179SDavid Teigland 	return error;
124e7fd4179SDavid Teigland }
125e7fd4179SDavid Teigland 
126757a4271SDavid Teigland static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status,
127757a4271SDavid Teigland 			   uint32_t status_flags)
128e7fd4179SDavid Teigland {
1294007685cSAl Viro 	struct dlm_rcom *rc = ls->ls_recover_buf;
130e7fd4179SDavid Teigland 	int error = 0, delay = 0, nodeid = ls->ls_low_nodeid;
131e7fd4179SDavid Teigland 
132e7fd4179SDavid Teigland 	for (;;) {
133e7fd4179SDavid Teigland 		if (dlm_recovery_stopped(ls)) {
134e7fd4179SDavid Teigland 			error = -EINTR;
135e7fd4179SDavid Teigland 			goto out;
136e7fd4179SDavid Teigland 		}
137e7fd4179SDavid Teigland 
138757a4271SDavid Teigland 		error = dlm_rcom_status(ls, nodeid, status_flags);
139e7fd4179SDavid Teigland 		if (error)
140e7fd4179SDavid Teigland 			break;
141e7fd4179SDavid Teigland 
142e7fd4179SDavid Teigland 		if (rc->rc_result & wait_status)
143e7fd4179SDavid Teigland 			break;
144e7fd4179SDavid Teigland 		if (delay < 1000)
145e7fd4179SDavid Teigland 			delay += 20;
146e7fd4179SDavid Teigland 		msleep(delay);
147e7fd4179SDavid Teigland 	}
148e7fd4179SDavid Teigland  out:
149e7fd4179SDavid Teigland 	return error;
150e7fd4179SDavid Teigland }
151e7fd4179SDavid Teigland 
152e7fd4179SDavid Teigland static int wait_status(struct dlm_ls *ls, uint32_t status)
153e7fd4179SDavid Teigland {
154e7fd4179SDavid Teigland 	uint32_t status_all = status << 1;
155e7fd4179SDavid Teigland 	int error;
156e7fd4179SDavid Teigland 
157e7fd4179SDavid Teigland 	if (ls->ls_low_nodeid == dlm_our_nodeid()) {
158757a4271SDavid Teigland 		error = wait_status_all(ls, status, 0);
159e7fd4179SDavid Teigland 		if (!error)
160e7fd4179SDavid Teigland 			dlm_set_recover_status(ls, status_all);
161e7fd4179SDavid Teigland 	} else
162757a4271SDavid Teigland 		error = wait_status_low(ls, status_all, 0);
163e7fd4179SDavid Teigland 
164e7fd4179SDavid Teigland 	return error;
165e7fd4179SDavid Teigland }
166e7fd4179SDavid Teigland 
167e7fd4179SDavid Teigland int dlm_recover_members_wait(struct dlm_ls *ls)
168e7fd4179SDavid Teigland {
169757a4271SDavid Teigland 	struct dlm_member *memb;
170757a4271SDavid Teigland 	struct dlm_slot *slots;
171757a4271SDavid Teigland 	int num_slots, slots_size;
172757a4271SDavid Teigland 	int error, rv;
173757a4271SDavid Teigland 	uint32_t gen;
174757a4271SDavid Teigland 
175757a4271SDavid Teigland 	list_for_each_entry(memb, &ls->ls_nodes, list) {
176757a4271SDavid Teigland 		memb->slot = -1;
177757a4271SDavid Teigland 		memb->generation = 0;
178757a4271SDavid Teigland 	}
179757a4271SDavid Teigland 
180757a4271SDavid Teigland 	if (ls->ls_low_nodeid == dlm_our_nodeid()) {
181757a4271SDavid Teigland 		error = wait_status_all(ls, DLM_RS_NODES, 1);
182757a4271SDavid Teigland 		if (error)
183757a4271SDavid Teigland 			goto out;
184757a4271SDavid Teigland 
185757a4271SDavid Teigland 		/* slots array is sparse, slots_size may be > num_slots */
186757a4271SDavid Teigland 
187757a4271SDavid Teigland 		rv = dlm_slots_assign(ls, &num_slots, &slots_size, &slots, &gen);
188757a4271SDavid Teigland 		if (!rv) {
189757a4271SDavid Teigland 			spin_lock(&ls->ls_recover_lock);
190757a4271SDavid Teigland 			_set_recover_status(ls, DLM_RS_NODES_ALL);
191757a4271SDavid Teigland 			ls->ls_num_slots = num_slots;
192757a4271SDavid Teigland 			ls->ls_slots_size = slots_size;
193757a4271SDavid Teigland 			ls->ls_slots = slots;
194757a4271SDavid Teigland 			ls->ls_generation = gen;
195757a4271SDavid Teigland 			spin_unlock(&ls->ls_recover_lock);
196757a4271SDavid Teigland 		} else {
197757a4271SDavid Teigland 			dlm_set_recover_status(ls, DLM_RS_NODES_ALL);
198757a4271SDavid Teigland 		}
199757a4271SDavid Teigland 	} else {
200757a4271SDavid Teigland 		error = wait_status_low(ls, DLM_RS_NODES_ALL, DLM_RSF_NEED_SLOTS);
201757a4271SDavid Teigland 		if (error)
202757a4271SDavid Teigland 			goto out;
203757a4271SDavid Teigland 
204757a4271SDavid Teigland 		dlm_slots_copy_in(ls);
205757a4271SDavid Teigland 	}
206757a4271SDavid Teigland  out:
207757a4271SDavid Teigland 	return error;
208e7fd4179SDavid Teigland }
209e7fd4179SDavid Teigland 
210e7fd4179SDavid Teigland int dlm_recover_directory_wait(struct dlm_ls *ls)
211e7fd4179SDavid Teigland {
212e7fd4179SDavid Teigland 	return wait_status(ls, DLM_RS_DIR);
213e7fd4179SDavid Teigland }
214e7fd4179SDavid Teigland 
215e7fd4179SDavid Teigland int dlm_recover_locks_wait(struct dlm_ls *ls)
216e7fd4179SDavid Teigland {
217e7fd4179SDavid Teigland 	return wait_status(ls, DLM_RS_LOCKS);
218e7fd4179SDavid Teigland }
219e7fd4179SDavid Teigland 
220e7fd4179SDavid Teigland int dlm_recover_done_wait(struct dlm_ls *ls)
221e7fd4179SDavid Teigland {
222e7fd4179SDavid Teigland 	return wait_status(ls, DLM_RS_DONE);
223e7fd4179SDavid Teigland }
224e7fd4179SDavid Teigland 
225e7fd4179SDavid Teigland /*
226e7fd4179SDavid Teigland  * The recover_list contains all the rsb's for which we've requested the new
227e7fd4179SDavid Teigland  * master nodeid.  As replies are returned from the resource directories the
228e7fd4179SDavid Teigland  * rsb's are removed from the list.  When the list is empty we're done.
229e7fd4179SDavid Teigland  *
230e7fd4179SDavid Teigland  * The recover_list is later similarly used for all rsb's for which we've sent
231e7fd4179SDavid Teigland  * new lkb's and need to receive new corresponding lkid's.
232e7fd4179SDavid Teigland  *
233e7fd4179SDavid Teigland  * We use the address of the rsb struct as a simple local identifier for the
234e7fd4179SDavid Teigland  * rsb so we can match an rcom reply with the rsb it was sent for.
235e7fd4179SDavid Teigland  */
236e7fd4179SDavid Teigland 
237e7fd4179SDavid Teigland static int recover_list_empty(struct dlm_ls *ls)
238e7fd4179SDavid Teigland {
239e7fd4179SDavid Teigland 	int empty;
240e7fd4179SDavid Teigland 
241e7fd4179SDavid Teigland 	spin_lock(&ls->ls_recover_list_lock);
242e7fd4179SDavid Teigland 	empty = list_empty(&ls->ls_recover_list);
243e7fd4179SDavid Teigland 	spin_unlock(&ls->ls_recover_list_lock);
244e7fd4179SDavid Teigland 
245e7fd4179SDavid Teigland 	return empty;
246e7fd4179SDavid Teigland }
247e7fd4179SDavid Teigland 
248e7fd4179SDavid Teigland static void recover_list_add(struct dlm_rsb *r)
249e7fd4179SDavid Teigland {
250e7fd4179SDavid Teigland 	struct dlm_ls *ls = r->res_ls;
251e7fd4179SDavid Teigland 
252e7fd4179SDavid Teigland 	spin_lock(&ls->ls_recover_list_lock);
253e7fd4179SDavid Teigland 	if (list_empty(&r->res_recover_list)) {
254e7fd4179SDavid Teigland 		list_add_tail(&r->res_recover_list, &ls->ls_recover_list);
255e7fd4179SDavid Teigland 		ls->ls_recover_list_count++;
256e7fd4179SDavid Teigland 		dlm_hold_rsb(r);
257e7fd4179SDavid Teigland 	}
258e7fd4179SDavid Teigland 	spin_unlock(&ls->ls_recover_list_lock);
259e7fd4179SDavid Teigland }
260e7fd4179SDavid Teigland 
261e7fd4179SDavid Teigland static void recover_list_del(struct dlm_rsb *r)
262e7fd4179SDavid Teigland {
263e7fd4179SDavid Teigland 	struct dlm_ls *ls = r->res_ls;
264e7fd4179SDavid Teigland 
265e7fd4179SDavid Teigland 	spin_lock(&ls->ls_recover_list_lock);
266e7fd4179SDavid Teigland 	list_del_init(&r->res_recover_list);
267e7fd4179SDavid Teigland 	ls->ls_recover_list_count--;
268e7fd4179SDavid Teigland 	spin_unlock(&ls->ls_recover_list_lock);
269e7fd4179SDavid Teigland 
270e7fd4179SDavid Teigland 	dlm_put_rsb(r);
271e7fd4179SDavid Teigland }
272e7fd4179SDavid Teigland 
273e7fd4179SDavid Teigland static void recover_list_clear(struct dlm_ls *ls)
274e7fd4179SDavid Teigland {
275e7fd4179SDavid Teigland 	struct dlm_rsb *r, *s;
276e7fd4179SDavid Teigland 
277e7fd4179SDavid Teigland 	spin_lock(&ls->ls_recover_list_lock);
278e7fd4179SDavid Teigland 	list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
279e7fd4179SDavid Teigland 		list_del_init(&r->res_recover_list);
28052069809SDavid Teigland 		r->res_recover_locks_count = 0;
281e7fd4179SDavid Teigland 		dlm_put_rsb(r);
282e7fd4179SDavid Teigland 		ls->ls_recover_list_count--;
283e7fd4179SDavid Teigland 	}
284e7fd4179SDavid Teigland 
285e7fd4179SDavid Teigland 	if (ls->ls_recover_list_count != 0) {
286e7fd4179SDavid Teigland 		log_error(ls, "warning: recover_list_count %d",
287e7fd4179SDavid Teigland 			  ls->ls_recover_list_count);
288e7fd4179SDavid Teigland 		ls->ls_recover_list_count = 0;
289e7fd4179SDavid Teigland 	}
290e7fd4179SDavid Teigland 	spin_unlock(&ls->ls_recover_list_lock);
291e7fd4179SDavid Teigland }
292e7fd4179SDavid Teigland 
2931d7c484eSDavid Teigland static int recover_idr_empty(struct dlm_ls *ls)
2941d7c484eSDavid Teigland {
2951d7c484eSDavid Teigland 	int empty = 1;
2961d7c484eSDavid Teigland 
2971d7c484eSDavid Teigland 	spin_lock(&ls->ls_recover_idr_lock);
2981d7c484eSDavid Teigland 	if (ls->ls_recover_list_count)
2991d7c484eSDavid Teigland 		empty = 0;
3001d7c484eSDavid Teigland 	spin_unlock(&ls->ls_recover_idr_lock);
3011d7c484eSDavid Teigland 
3021d7c484eSDavid Teigland 	return empty;
3031d7c484eSDavid Teigland }
3041d7c484eSDavid Teigland 
3051d7c484eSDavid Teigland static int recover_idr_add(struct dlm_rsb *r)
3061d7c484eSDavid Teigland {
3071d7c484eSDavid Teigland 	struct dlm_ls *ls = r->res_ls;
3081d7c484eSDavid Teigland 	int rv, id;
3091d7c484eSDavid Teigland 
3101d7c484eSDavid Teigland 	rv = idr_pre_get(&ls->ls_recover_idr, GFP_NOFS);
3111d7c484eSDavid Teigland 	if (!rv)
3121d7c484eSDavid Teigland 		return -ENOMEM;
3131d7c484eSDavid Teigland 
3141d7c484eSDavid Teigland 	spin_lock(&ls->ls_recover_idr_lock);
3151d7c484eSDavid Teigland 	if (r->res_id) {
3161d7c484eSDavid Teigland 		spin_unlock(&ls->ls_recover_idr_lock);
3171d7c484eSDavid Teigland 		return -1;
3181d7c484eSDavid Teigland 	}
3191d7c484eSDavid Teigland 	rv = idr_get_new_above(&ls->ls_recover_idr, r, 1, &id);
3201d7c484eSDavid Teigland 	if (rv) {
3211d7c484eSDavid Teigland 		spin_unlock(&ls->ls_recover_idr_lock);
3221d7c484eSDavid Teigland 		return rv;
3231d7c484eSDavid Teigland 	}
3241d7c484eSDavid Teigland 	r->res_id = id;
3251d7c484eSDavid Teigland 	ls->ls_recover_list_count++;
3261d7c484eSDavid Teigland 	dlm_hold_rsb(r);
3271d7c484eSDavid Teigland 	spin_unlock(&ls->ls_recover_idr_lock);
3281d7c484eSDavid Teigland 	return 0;
3291d7c484eSDavid Teigland }
3301d7c484eSDavid Teigland 
3311d7c484eSDavid Teigland static void recover_idr_del(struct dlm_rsb *r)
3321d7c484eSDavid Teigland {
3331d7c484eSDavid Teigland 	struct dlm_ls *ls = r->res_ls;
3341d7c484eSDavid Teigland 
3351d7c484eSDavid Teigland 	spin_lock(&ls->ls_recover_idr_lock);
3361d7c484eSDavid Teigland 	idr_remove(&ls->ls_recover_idr, r->res_id);
3371d7c484eSDavid Teigland 	r->res_id = 0;
3381d7c484eSDavid Teigland 	ls->ls_recover_list_count--;
3391d7c484eSDavid Teigland 	spin_unlock(&ls->ls_recover_idr_lock);
3401d7c484eSDavid Teigland 
3411d7c484eSDavid Teigland 	dlm_put_rsb(r);
3421d7c484eSDavid Teigland }
3431d7c484eSDavid Teigland 
3441d7c484eSDavid Teigland static struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id)
3451d7c484eSDavid Teigland {
3461d7c484eSDavid Teigland 	struct dlm_rsb *r;
3471d7c484eSDavid Teigland 
3481d7c484eSDavid Teigland 	spin_lock(&ls->ls_recover_idr_lock);
3491d7c484eSDavid Teigland 	r = idr_find(&ls->ls_recover_idr, (int)id);
3501d7c484eSDavid Teigland 	spin_unlock(&ls->ls_recover_idr_lock);
3511d7c484eSDavid Teigland 	return r;
3521d7c484eSDavid Teigland }
3531d7c484eSDavid Teigland 
3541d7c484eSDavid Teigland static int recover_idr_clear_rsb(int id, void *p, void *data)
3551d7c484eSDavid Teigland {
3561d7c484eSDavid Teigland 	struct dlm_ls *ls = data;
3571d7c484eSDavid Teigland 	struct dlm_rsb *r = p;
3581d7c484eSDavid Teigland 
3591d7c484eSDavid Teigland 	r->res_id = 0;
3601d7c484eSDavid Teigland 	r->res_recover_locks_count = 0;
3611d7c484eSDavid Teigland 	ls->ls_recover_list_count--;
3621d7c484eSDavid Teigland 
3631d7c484eSDavid Teigland 	dlm_put_rsb(r);
3641d7c484eSDavid Teigland 	return 0;
3651d7c484eSDavid Teigland }
3661d7c484eSDavid Teigland 
3671d7c484eSDavid Teigland static void recover_idr_clear(struct dlm_ls *ls)
3681d7c484eSDavid Teigland {
3691d7c484eSDavid Teigland 	spin_lock(&ls->ls_recover_idr_lock);
3701d7c484eSDavid Teigland 	idr_for_each(&ls->ls_recover_idr, recover_idr_clear_rsb, ls);
3711d7c484eSDavid Teigland 	idr_remove_all(&ls->ls_recover_idr);
3721d7c484eSDavid Teigland 
3731d7c484eSDavid Teigland 	if (ls->ls_recover_list_count != 0) {
3741d7c484eSDavid Teigland 		log_error(ls, "warning: recover_list_count %d",
3751d7c484eSDavid Teigland 			  ls->ls_recover_list_count);
3761d7c484eSDavid Teigland 		ls->ls_recover_list_count = 0;
3771d7c484eSDavid Teigland 	}
3781d7c484eSDavid Teigland 	spin_unlock(&ls->ls_recover_idr_lock);
3791d7c484eSDavid Teigland }
3801d7c484eSDavid Teigland 
381e7fd4179SDavid Teigland 
382e7fd4179SDavid Teigland /* Master recovery: find new master node for rsb's that were
383e7fd4179SDavid Teigland    mastered on nodes that have been removed.
384e7fd4179SDavid Teigland 
385e7fd4179SDavid Teigland    dlm_recover_masters
386e7fd4179SDavid Teigland    recover_master
387e7fd4179SDavid Teigland    dlm_send_rcom_lookup            ->  receive_rcom_lookup
388e7fd4179SDavid Teigland                                        dlm_dir_lookup
389e7fd4179SDavid Teigland    receive_rcom_lookup_reply       <-
390e7fd4179SDavid Teigland    dlm_recover_master_reply
391e7fd4179SDavid Teigland    set_new_master
392e7fd4179SDavid Teigland    set_master_lkbs
393e7fd4179SDavid Teigland    set_lock_master
394e7fd4179SDavid Teigland */
395e7fd4179SDavid Teigland 
396e7fd4179SDavid Teigland /*
397e7fd4179SDavid Teigland  * Set the lock master for all LKBs in a lock queue
398e7fd4179SDavid Teigland  * If we are the new master of the rsb, we may have received new
399e7fd4179SDavid Teigland  * MSTCPY locks from other nodes already which we need to ignore
400e7fd4179SDavid Teigland  * when setting the new nodeid.
401e7fd4179SDavid Teigland  */
402e7fd4179SDavid Teigland 
403e7fd4179SDavid Teigland static void set_lock_master(struct list_head *queue, int nodeid)
404e7fd4179SDavid Teigland {
405e7fd4179SDavid Teigland 	struct dlm_lkb *lkb;
406e7fd4179SDavid Teigland 
4074875647aSDavid Teigland 	list_for_each_entry(lkb, queue, lkb_statequeue) {
4084875647aSDavid Teigland 		if (!(lkb->lkb_flags & DLM_IFL_MSTCPY)) {
409e7fd4179SDavid Teigland 			lkb->lkb_nodeid = nodeid;
4104875647aSDavid Teigland 			lkb->lkb_remid = 0;
4114875647aSDavid Teigland 		}
4124875647aSDavid Teigland 	}
413e7fd4179SDavid Teigland }
414e7fd4179SDavid Teigland 
415e7fd4179SDavid Teigland static void set_master_lkbs(struct dlm_rsb *r)
416e7fd4179SDavid Teigland {
417e7fd4179SDavid Teigland 	set_lock_master(&r->res_grantqueue, r->res_nodeid);
418e7fd4179SDavid Teigland 	set_lock_master(&r->res_convertqueue, r->res_nodeid);
419e7fd4179SDavid Teigland 	set_lock_master(&r->res_waitqueue, r->res_nodeid);
420e7fd4179SDavid Teigland }
421e7fd4179SDavid Teigland 
422e7fd4179SDavid Teigland /*
42325985edcSLucas De Marchi  * Propagate the new master nodeid to locks
424e7fd4179SDavid Teigland  * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
4254875647aSDavid Teigland  * The NEW_MASTER2 flag tells recover_lvb() and recover_grant() which
426f7da790dSDavid Teigland  * rsb's to consider.
427e7fd4179SDavid Teigland  */
428e7fd4179SDavid Teigland 
429c04fecb4SDavid Teigland static void set_new_master(struct dlm_rsb *r)
430e7fd4179SDavid Teigland {
431e7fd4179SDavid Teigland 	set_master_lkbs(r);
432e7fd4179SDavid Teigland 	rsb_set_flag(r, RSB_NEW_MASTER);
433e7fd4179SDavid Teigland 	rsb_set_flag(r, RSB_NEW_MASTER2);
434e7fd4179SDavid Teigland }
435e7fd4179SDavid Teigland 
436e7fd4179SDavid Teigland /*
437e7fd4179SDavid Teigland  * We do async lookups on rsb's that need new masters.  The rsb's
438e7fd4179SDavid Teigland  * waiting for a lookup reply are kept on the recover_list.
439c04fecb4SDavid Teigland  *
440c04fecb4SDavid Teigland  * Another node recovering the master may have sent us a rcom lookup,
441c04fecb4SDavid Teigland  * and our dlm_master_lookup() set it as the new master, along with
442c04fecb4SDavid Teigland  * NEW_MASTER so that we'll recover it here (this implies dir_nodeid
443c04fecb4SDavid Teigland  * equals our_nodeid below).
444e7fd4179SDavid Teigland  */
445e7fd4179SDavid Teigland 
446c04fecb4SDavid Teigland static int recover_master(struct dlm_rsb *r, unsigned int *count)
447e7fd4179SDavid Teigland {
448e7fd4179SDavid Teigland 	struct dlm_ls *ls = r->res_ls;
449c04fecb4SDavid Teigland 	int our_nodeid, dir_nodeid;
450c04fecb4SDavid Teigland 	int is_removed = 0;
451c04fecb4SDavid Teigland 	int error;
452c04fecb4SDavid Teigland 
453c04fecb4SDavid Teigland 	if (is_master(r))
454c04fecb4SDavid Teigland 		return 0;
455c04fecb4SDavid Teigland 
456c04fecb4SDavid Teigland 	is_removed = dlm_is_removed(ls, r->res_nodeid);
457c04fecb4SDavid Teigland 
458c04fecb4SDavid Teigland 	if (!is_removed && !rsb_flag(r, RSB_NEW_MASTER))
459c04fecb4SDavid Teigland 		return 0;
460c04fecb4SDavid Teigland 
461c04fecb4SDavid Teigland 	our_nodeid = dlm_our_nodeid();
462c04fecb4SDavid Teigland 	dir_nodeid = dlm_dir_nodeid(r);
463e7fd4179SDavid Teigland 
464e7fd4179SDavid Teigland 	if (dir_nodeid == our_nodeid) {
465c04fecb4SDavid Teigland 		if (is_removed) {
466c04fecb4SDavid Teigland 			r->res_master_nodeid = our_nodeid;
467c04fecb4SDavid Teigland 			r->res_nodeid = 0;
468c04fecb4SDavid Teigland 		}
469e7fd4179SDavid Teigland 
470c04fecb4SDavid Teigland 		/* set master of lkbs to ourself when is_removed, or to
471c04fecb4SDavid Teigland 		   another new master which we set along with NEW_MASTER
472c04fecb4SDavid Teigland 		   in dlm_master_lookup */
473c04fecb4SDavid Teigland 		set_new_master(r);
474c04fecb4SDavid Teigland 		error = 0;
475e7fd4179SDavid Teigland 	} else {
4761d7c484eSDavid Teigland 		recover_idr_add(r);
477e7fd4179SDavid Teigland 		error = dlm_send_rcom_lookup(r, dir_nodeid);
478e7fd4179SDavid Teigland 	}
479e7fd4179SDavid Teigland 
480c04fecb4SDavid Teigland 	(*count)++;
481e7fd4179SDavid Teigland 	return error;
482e7fd4179SDavid Teigland }
483e7fd4179SDavid Teigland 
484e7fd4179SDavid Teigland /*
4854875647aSDavid Teigland  * All MSTCPY locks are purged and rebuilt, even if the master stayed the same.
4864875647aSDavid Teigland  * This is necessary because recovery can be started, aborted and restarted,
4874875647aSDavid Teigland  * causing the master nodeid to briefly change during the aborted recovery, and
4884875647aSDavid Teigland  * change back to the original value in the second recovery.  The MSTCPY locks
4894875647aSDavid Teigland  * may or may not have been purged during the aborted recovery.  Another node
4904875647aSDavid Teigland  * with an outstanding request in waiters list and a request reply saved in the
4914875647aSDavid Teigland  * requestqueue, cannot know whether it should ignore the reply and resend the
4924875647aSDavid Teigland  * request, or accept the reply and complete the request.  It must do the
4934875647aSDavid Teigland  * former if the remote node purged MSTCPY locks, and it must do the later if
4944875647aSDavid Teigland  * the remote node did not.  This is solved by always purging MSTCPY locks, in
4954875647aSDavid Teigland  * which case, the request reply would always be ignored and the request
4964875647aSDavid Teigland  * resent.
497e7fd4179SDavid Teigland  */
498e7fd4179SDavid Teigland 
499c04fecb4SDavid Teigland static int recover_master_static(struct dlm_rsb *r, unsigned int *count)
500e7fd4179SDavid Teigland {
5014875647aSDavid Teigland 	int dir_nodeid = dlm_dir_nodeid(r);
5024875647aSDavid Teigland 	int new_master = dir_nodeid;
503e7fd4179SDavid Teigland 
5044875647aSDavid Teigland 	if (dir_nodeid == dlm_our_nodeid())
5054875647aSDavid Teigland 		new_master = 0;
506e7fd4179SDavid Teigland 
507e7fd4179SDavid Teigland 	dlm_purge_mstcpy_locks(r);
508c04fecb4SDavid Teigland 	r->res_master_nodeid = dir_nodeid;
509c04fecb4SDavid Teigland 	r->res_nodeid = new_master;
510c04fecb4SDavid Teigland 	set_new_master(r);
511c04fecb4SDavid Teigland 	(*count)++;
512c04fecb4SDavid Teigland 	return 0;
513e7fd4179SDavid Teigland }
514e7fd4179SDavid Teigland 
515e7fd4179SDavid Teigland /*
516e7fd4179SDavid Teigland  * Go through local root resources and for each rsb which has a master which
517e7fd4179SDavid Teigland  * has departed, get the new master nodeid from the directory.  The dir will
518e7fd4179SDavid Teigland  * assign mastery to the first node to look up the new master.  That means
519e7fd4179SDavid Teigland  * we'll discover in this lookup if we're the new master of any rsb's.
520e7fd4179SDavid Teigland  *
521e7fd4179SDavid Teigland  * We fire off all the dir lookup requests individually and asynchronously to
522e7fd4179SDavid Teigland  * the correct dir node.
523e7fd4179SDavid Teigland  */
524e7fd4179SDavid Teigland 
525e7fd4179SDavid Teigland int dlm_recover_masters(struct dlm_ls *ls)
526e7fd4179SDavid Teigland {
527e7fd4179SDavid Teigland 	struct dlm_rsb *r;
528c04fecb4SDavid Teigland 	unsigned int total = 0;
529c04fecb4SDavid Teigland 	unsigned int count = 0;
530c04fecb4SDavid Teigland 	int nodir = dlm_no_directory(ls);
531c04fecb4SDavid Teigland 	int error;
532e7fd4179SDavid Teigland 
533e7fd4179SDavid Teigland 	log_debug(ls, "dlm_recover_masters");
534e7fd4179SDavid Teigland 
535e7fd4179SDavid Teigland 	down_read(&ls->ls_root_sem);
536e7fd4179SDavid Teigland 	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
537e7fd4179SDavid Teigland 		if (dlm_recovery_stopped(ls)) {
538e7fd4179SDavid Teigland 			up_read(&ls->ls_root_sem);
539e7fd4179SDavid Teigland 			error = -EINTR;
540e7fd4179SDavid Teigland 			goto out;
541e7fd4179SDavid Teigland 		}
542e7fd4179SDavid Teigland 
543c04fecb4SDavid Teigland 		lock_rsb(r);
544c04fecb4SDavid Teigland 		if (nodir)
545c04fecb4SDavid Teigland 			error = recover_master_static(r, &count);
546c04fecb4SDavid Teigland 		else
547c04fecb4SDavid Teigland 			error = recover_master(r, &count);
548c04fecb4SDavid Teigland 		unlock_rsb(r);
549c04fecb4SDavid Teigland 		cond_resched();
550c04fecb4SDavid Teigland 		total++;
551e7fd4179SDavid Teigland 
552c04fecb4SDavid Teigland 		if (error) {
553c04fecb4SDavid Teigland 			up_read(&ls->ls_root_sem);
554c04fecb4SDavid Teigland 			goto out;
555c04fecb4SDavid Teigland 		}
556e7fd4179SDavid Teigland 	}
557e7fd4179SDavid Teigland 	up_read(&ls->ls_root_sem);
558e7fd4179SDavid Teigland 
559c04fecb4SDavid Teigland 	log_debug(ls, "dlm_recover_masters %u of %u", count, total);
560e7fd4179SDavid Teigland 
5611d7c484eSDavid Teigland 	error = dlm_wait_function(ls, &recover_idr_empty);
562e7fd4179SDavid Teigland  out:
563e7fd4179SDavid Teigland 	if (error)
5641d7c484eSDavid Teigland 		recover_idr_clear(ls);
565e7fd4179SDavid Teigland 	return error;
566e7fd4179SDavid Teigland }
567e7fd4179SDavid Teigland 
568e7fd4179SDavid Teigland int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
569e7fd4179SDavid Teigland {
570e7fd4179SDavid Teigland 	struct dlm_rsb *r;
571c04fecb4SDavid Teigland 	int ret_nodeid, new_master;
572e7fd4179SDavid Teigland 
5731d7c484eSDavid Teigland 	r = recover_idr_find(ls, rc->rc_id);
574e7fd4179SDavid Teigland 	if (!r) {
57590135925SDavid Teigland 		log_error(ls, "dlm_recover_master_reply no id %llx",
5769229f013SDavid Teigland 			  (unsigned long long)rc->rc_id);
577e7fd4179SDavid Teigland 		goto out;
578e7fd4179SDavid Teigland 	}
579e7fd4179SDavid Teigland 
580c04fecb4SDavid Teigland 	ret_nodeid = rc->rc_result;
581c04fecb4SDavid Teigland 
582c04fecb4SDavid Teigland 	if (ret_nodeid == dlm_our_nodeid())
583c04fecb4SDavid Teigland 		new_master = 0;
584c04fecb4SDavid Teigland 	else
585c04fecb4SDavid Teigland 		new_master = ret_nodeid;
586e7fd4179SDavid Teigland 
5874875647aSDavid Teigland 	lock_rsb(r);
588c04fecb4SDavid Teigland 	r->res_master_nodeid = ret_nodeid;
589c04fecb4SDavid Teigland 	r->res_nodeid = new_master;
590c04fecb4SDavid Teigland 	set_new_master(r);
5914875647aSDavid Teigland 	unlock_rsb(r);
5921d7c484eSDavid Teigland 	recover_idr_del(r);
593e7fd4179SDavid Teigland 
5941d7c484eSDavid Teigland 	if (recover_idr_empty(ls))
595e7fd4179SDavid Teigland 		wake_up(&ls->ls_wait_general);
596e7fd4179SDavid Teigland  out:
597e7fd4179SDavid Teigland 	return 0;
598e7fd4179SDavid Teigland }
599e7fd4179SDavid Teigland 
600e7fd4179SDavid Teigland 
601e7fd4179SDavid Teigland /* Lock recovery: rebuild the process-copy locks we hold on a
602e7fd4179SDavid Teigland    remastered rsb on the new rsb master.
603e7fd4179SDavid Teigland 
604e7fd4179SDavid Teigland    dlm_recover_locks
605e7fd4179SDavid Teigland    recover_locks
606e7fd4179SDavid Teigland    recover_locks_queue
607e7fd4179SDavid Teigland    dlm_send_rcom_lock              ->  receive_rcom_lock
608e7fd4179SDavid Teigland                                        dlm_recover_master_copy
609e7fd4179SDavid Teigland    receive_rcom_lock_reply         <-
610e7fd4179SDavid Teigland    dlm_recover_process_copy
611e7fd4179SDavid Teigland */
612e7fd4179SDavid Teigland 
613e7fd4179SDavid Teigland 
614e7fd4179SDavid Teigland /*
615e7fd4179SDavid Teigland  * keep a count of the number of lkb's we send to the new master; when we get
616e7fd4179SDavid Teigland  * an equal number of replies then recovery for the rsb is done
617e7fd4179SDavid Teigland  */
618e7fd4179SDavid Teigland 
619e7fd4179SDavid Teigland static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head)
620e7fd4179SDavid Teigland {
621e7fd4179SDavid Teigland 	struct dlm_lkb *lkb;
622e7fd4179SDavid Teigland 	int error = 0;
623e7fd4179SDavid Teigland 
624e7fd4179SDavid Teigland 	list_for_each_entry(lkb, head, lkb_statequeue) {
625e7fd4179SDavid Teigland 	   	error = dlm_send_rcom_lock(r, lkb);
626e7fd4179SDavid Teigland 		if (error)
627e7fd4179SDavid Teigland 			break;
628e7fd4179SDavid Teigland 		r->res_recover_locks_count++;
629e7fd4179SDavid Teigland 	}
630e7fd4179SDavid Teigland 
631e7fd4179SDavid Teigland 	return error;
632e7fd4179SDavid Teigland }
633e7fd4179SDavid Teigland 
634e7fd4179SDavid Teigland static int recover_locks(struct dlm_rsb *r)
635e7fd4179SDavid Teigland {
636e7fd4179SDavid Teigland 	int error = 0;
637e7fd4179SDavid Teigland 
638e7fd4179SDavid Teigland 	lock_rsb(r);
639e7fd4179SDavid Teigland 
640a345da3eSDavid Teigland 	DLM_ASSERT(!r->res_recover_locks_count, dlm_dump_rsb(r););
641e7fd4179SDavid Teigland 
642e7fd4179SDavid Teigland 	error = recover_locks_queue(r, &r->res_grantqueue);
643e7fd4179SDavid Teigland 	if (error)
644e7fd4179SDavid Teigland 		goto out;
645e7fd4179SDavid Teigland 	error = recover_locks_queue(r, &r->res_convertqueue);
646e7fd4179SDavid Teigland 	if (error)
647e7fd4179SDavid Teigland 		goto out;
648e7fd4179SDavid Teigland 	error = recover_locks_queue(r, &r->res_waitqueue);
649e7fd4179SDavid Teigland 	if (error)
650e7fd4179SDavid Teigland 		goto out;
651e7fd4179SDavid Teigland 
652e7fd4179SDavid Teigland 	if (r->res_recover_locks_count)
653e7fd4179SDavid Teigland 		recover_list_add(r);
654e7fd4179SDavid Teigland 	else
655e7fd4179SDavid Teigland 		rsb_clear_flag(r, RSB_NEW_MASTER);
656e7fd4179SDavid Teigland  out:
657e7fd4179SDavid Teigland 	unlock_rsb(r);
658e7fd4179SDavid Teigland 	return error;
659e7fd4179SDavid Teigland }
660e7fd4179SDavid Teigland 
661e7fd4179SDavid Teigland int dlm_recover_locks(struct dlm_ls *ls)
662e7fd4179SDavid Teigland {
663e7fd4179SDavid Teigland 	struct dlm_rsb *r;
664e7fd4179SDavid Teigland 	int error, count = 0;
665e7fd4179SDavid Teigland 
666e7fd4179SDavid Teigland 	down_read(&ls->ls_root_sem);
667e7fd4179SDavid Teigland 	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
668e7fd4179SDavid Teigland 		if (is_master(r)) {
669e7fd4179SDavid Teigland 			rsb_clear_flag(r, RSB_NEW_MASTER);
670e7fd4179SDavid Teigland 			continue;
671e7fd4179SDavid Teigland 		}
672e7fd4179SDavid Teigland 
673e7fd4179SDavid Teigland 		if (!rsb_flag(r, RSB_NEW_MASTER))
674e7fd4179SDavid Teigland 			continue;
675e7fd4179SDavid Teigland 
676e7fd4179SDavid Teigland 		if (dlm_recovery_stopped(ls)) {
677e7fd4179SDavid Teigland 			error = -EINTR;
678e7fd4179SDavid Teigland 			up_read(&ls->ls_root_sem);
679e7fd4179SDavid Teigland 			goto out;
680e7fd4179SDavid Teigland 		}
681e7fd4179SDavid Teigland 
682e7fd4179SDavid Teigland 		error = recover_locks(r);
683e7fd4179SDavid Teigland 		if (error) {
684e7fd4179SDavid Teigland 			up_read(&ls->ls_root_sem);
685e7fd4179SDavid Teigland 			goto out;
686e7fd4179SDavid Teigland 		}
687e7fd4179SDavid Teigland 
688e7fd4179SDavid Teigland 		count += r->res_recover_locks_count;
689e7fd4179SDavid Teigland 	}
690e7fd4179SDavid Teigland 	up_read(&ls->ls_root_sem);
691e7fd4179SDavid Teigland 
6924875647aSDavid Teigland 	log_debug(ls, "dlm_recover_locks %d out", count);
693e7fd4179SDavid Teigland 
694e7fd4179SDavid Teigland 	error = dlm_wait_function(ls, &recover_list_empty);
695e7fd4179SDavid Teigland  out:
696e7fd4179SDavid Teigland 	if (error)
697e7fd4179SDavid Teigland 		recover_list_clear(ls);
698e7fd4179SDavid Teigland 	return error;
699e7fd4179SDavid Teigland }
700e7fd4179SDavid Teigland 
701e7fd4179SDavid Teigland void dlm_recovered_lock(struct dlm_rsb *r)
702e7fd4179SDavid Teigland {
703a345da3eSDavid Teigland 	DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_dump_rsb(r););
704e7fd4179SDavid Teigland 
705e7fd4179SDavid Teigland 	r->res_recover_locks_count--;
706e7fd4179SDavid Teigland 	if (!r->res_recover_locks_count) {
707e7fd4179SDavid Teigland 		rsb_clear_flag(r, RSB_NEW_MASTER);
708e7fd4179SDavid Teigland 		recover_list_del(r);
709e7fd4179SDavid Teigland 	}
710e7fd4179SDavid Teigland 
711e7fd4179SDavid Teigland 	if (recover_list_empty(r->res_ls))
712e7fd4179SDavid Teigland 		wake_up(&r->res_ls->ls_wait_general);
713e7fd4179SDavid Teigland }
714e7fd4179SDavid Teigland 
715e7fd4179SDavid Teigland /*
716e7fd4179SDavid Teigland  * The lvb needs to be recovered on all master rsb's.  This includes setting
717e7fd4179SDavid Teigland  * the VALNOTVALID flag if necessary, and determining the correct lvb contents
718e7fd4179SDavid Teigland  * based on the lvb's of the locks held on the rsb.
719e7fd4179SDavid Teigland  *
720*da8c6663SDavid Teigland  * RSB_VALNOTVALID is set in two cases:
721*da8c6663SDavid Teigland  *
722*da8c6663SDavid Teigland  * 1. we are master, but not new, and we purged an EX/PW lock held by a
723*da8c6663SDavid Teigland  * failed node (in dlm_recover_purge which set RSB_RECOVER_LVB_INVAL)
724*da8c6663SDavid Teigland  *
725*da8c6663SDavid Teigland  * 2. we are a new master, and there are only NL/CR locks left.
726*da8c6663SDavid Teigland  * (We could probably improve this by only invaliding in this way when
727*da8c6663SDavid Teigland  * the previous master left uncleanly.  VMS docs mention that.)
728e7fd4179SDavid Teigland  *
729e7fd4179SDavid Teigland  * The LVB contents are only considered for changing when this is a new master
730e7fd4179SDavid Teigland  * of the rsb (NEW_MASTER2).  Then, the rsb's lvb is taken from any lkb with
731e7fd4179SDavid Teigland  * mode > CR.  If no lkb's exist with mode above CR, the lvb contents are taken
732e7fd4179SDavid Teigland  * from the lkb with the largest lvb sequence number.
733e7fd4179SDavid Teigland  */
734e7fd4179SDavid Teigland 
735e7fd4179SDavid Teigland static void recover_lvb(struct dlm_rsb *r)
736e7fd4179SDavid Teigland {
737e7fd4179SDavid Teigland 	struct dlm_lkb *lkb, *high_lkb = NULL;
738e7fd4179SDavid Teigland 	uint32_t high_seq = 0;
73990135925SDavid Teigland 	int lock_lvb_exists = 0;
74090135925SDavid Teigland 	int big_lock_exists = 0;
741e7fd4179SDavid Teigland 	int lvblen = r->res_ls->ls_lvblen;
742e7fd4179SDavid Teigland 
743*da8c6663SDavid Teigland 	if (!rsb_flag(r, RSB_NEW_MASTER2) &&
744*da8c6663SDavid Teigland 	    rsb_flag(r, RSB_RECOVER_LVB_INVAL)) {
745*da8c6663SDavid Teigland 		/* case 1 above */
746*da8c6663SDavid Teigland 		rsb_set_flag(r, RSB_VALNOTVALID);
747*da8c6663SDavid Teigland 		return;
748*da8c6663SDavid Teigland 	}
749*da8c6663SDavid Teigland 
750*da8c6663SDavid Teigland 	if (!rsb_flag(r, RSB_NEW_MASTER2))
751*da8c6663SDavid Teigland 		return;
752*da8c6663SDavid Teigland 
753*da8c6663SDavid Teigland 	/* we are the new master, so figure out if VALNOTVALID should
754*da8c6663SDavid Teigland 	   be set, and set the rsb lvb from the best lkb available. */
755*da8c6663SDavid Teigland 
756e7fd4179SDavid Teigland 	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
757e7fd4179SDavid Teigland 		if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
758e7fd4179SDavid Teigland 			continue;
759e7fd4179SDavid Teigland 
76090135925SDavid Teigland 		lock_lvb_exists = 1;
761e7fd4179SDavid Teigland 
762e7fd4179SDavid Teigland 		if (lkb->lkb_grmode > DLM_LOCK_CR) {
76390135925SDavid Teigland 			big_lock_exists = 1;
764e7fd4179SDavid Teigland 			goto setflag;
765e7fd4179SDavid Teigland 		}
766e7fd4179SDavid Teigland 
767e7fd4179SDavid Teigland 		if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
768e7fd4179SDavid Teigland 			high_lkb = lkb;
769e7fd4179SDavid Teigland 			high_seq = lkb->lkb_lvbseq;
770e7fd4179SDavid Teigland 		}
771e7fd4179SDavid Teigland 	}
772e7fd4179SDavid Teigland 
773e7fd4179SDavid Teigland 	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
774e7fd4179SDavid Teigland 		if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
775e7fd4179SDavid Teigland 			continue;
776e7fd4179SDavid Teigland 
77790135925SDavid Teigland 		lock_lvb_exists = 1;
778e7fd4179SDavid Teigland 
779e7fd4179SDavid Teigland 		if (lkb->lkb_grmode > DLM_LOCK_CR) {
78090135925SDavid Teigland 			big_lock_exists = 1;
781e7fd4179SDavid Teigland 			goto setflag;
782e7fd4179SDavid Teigland 		}
783e7fd4179SDavid Teigland 
784e7fd4179SDavid Teigland 		if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
785e7fd4179SDavid Teigland 			high_lkb = lkb;
786e7fd4179SDavid Teigland 			high_seq = lkb->lkb_lvbseq;
787e7fd4179SDavid Teigland 		}
788e7fd4179SDavid Teigland 	}
789e7fd4179SDavid Teigland 
790e7fd4179SDavid Teigland  setflag:
791e7fd4179SDavid Teigland 	if (!lock_lvb_exists)
792e7fd4179SDavid Teigland 		goto out;
793e7fd4179SDavid Teigland 
794*da8c6663SDavid Teigland 	/* lvb is invalidated if only NL/CR locks remain */
795e7fd4179SDavid Teigland 	if (!big_lock_exists)
796e7fd4179SDavid Teigland 		rsb_set_flag(r, RSB_VALNOTVALID);
797e7fd4179SDavid Teigland 
798e7fd4179SDavid Teigland 	if (!r->res_lvbptr) {
79952bda2b5SDavid Teigland 		r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
800e7fd4179SDavid Teigland 		if (!r->res_lvbptr)
801e7fd4179SDavid Teigland 			goto out;
802e7fd4179SDavid Teigland 	}
803e7fd4179SDavid Teigland 
804e7fd4179SDavid Teigland 	if (big_lock_exists) {
805e7fd4179SDavid Teigland 		r->res_lvbseq = lkb->lkb_lvbseq;
806e7fd4179SDavid Teigland 		memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen);
807e7fd4179SDavid Teigland 	} else if (high_lkb) {
808e7fd4179SDavid Teigland 		r->res_lvbseq = high_lkb->lkb_lvbseq;
809e7fd4179SDavid Teigland 		memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen);
810e7fd4179SDavid Teigland 	} else {
811e7fd4179SDavid Teigland 		r->res_lvbseq = 0;
812e7fd4179SDavid Teigland 		memset(r->res_lvbptr, 0, lvblen);
813e7fd4179SDavid Teigland 	}
814e7fd4179SDavid Teigland  out:
815e7fd4179SDavid Teigland 	return;
816e7fd4179SDavid Teigland }
817e7fd4179SDavid Teigland 
818e7fd4179SDavid Teigland /* All master rsb's flagged RECOVER_CONVERT need to be looked at.  The locks
819e7fd4179SDavid Teigland    converting PR->CW or CW->PR need to have their lkb_grmode set. */
820e7fd4179SDavid Teigland 
821e7fd4179SDavid Teigland static void recover_conversion(struct dlm_rsb *r)
822e7fd4179SDavid Teigland {
823c503a621SDavid Teigland 	struct dlm_ls *ls = r->res_ls;
824e7fd4179SDavid Teigland 	struct dlm_lkb *lkb;
825e7fd4179SDavid Teigland 	int grmode = -1;
826e7fd4179SDavid Teigland 
827e7fd4179SDavid Teigland 	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
828e7fd4179SDavid Teigland 		if (lkb->lkb_grmode == DLM_LOCK_PR ||
829e7fd4179SDavid Teigland 		    lkb->lkb_grmode == DLM_LOCK_CW) {
830e7fd4179SDavid Teigland 			grmode = lkb->lkb_grmode;
831e7fd4179SDavid Teigland 			break;
832e7fd4179SDavid Teigland 		}
833e7fd4179SDavid Teigland 	}
834e7fd4179SDavid Teigland 
835e7fd4179SDavid Teigland 	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
836e7fd4179SDavid Teigland 		if (lkb->lkb_grmode != DLM_LOCK_IV)
837e7fd4179SDavid Teigland 			continue;
838c503a621SDavid Teigland 		if (grmode == -1) {
839c503a621SDavid Teigland 			log_debug(ls, "recover_conversion %x set gr to rq %d",
840c503a621SDavid Teigland 				  lkb->lkb_id, lkb->lkb_rqmode);
841e7fd4179SDavid Teigland 			lkb->lkb_grmode = lkb->lkb_rqmode;
842c503a621SDavid Teigland 		} else {
843c503a621SDavid Teigland 			log_debug(ls, "recover_conversion %x set gr %d",
844c503a621SDavid Teigland 				  lkb->lkb_id, grmode);
845e7fd4179SDavid Teigland 			lkb->lkb_grmode = grmode;
846e7fd4179SDavid Teigland 		}
847e7fd4179SDavid Teigland 	}
848c503a621SDavid Teigland }
849e7fd4179SDavid Teigland 
850f7da790dSDavid Teigland /* We've become the new master for this rsb and waiting/converting locks may
8514875647aSDavid Teigland    need to be granted in dlm_recover_grant() due to locks that may have
852f7da790dSDavid Teigland    existed from a removed node. */
853f7da790dSDavid Teigland 
8544875647aSDavid Teigland static void recover_grant(struct dlm_rsb *r)
855f7da790dSDavid Teigland {
856f7da790dSDavid Teigland 	if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue))
8574875647aSDavid Teigland 		rsb_set_flag(r, RSB_RECOVER_GRANT);
858f7da790dSDavid Teigland }
859f7da790dSDavid Teigland 
860e7fd4179SDavid Teigland void dlm_recover_rsbs(struct dlm_ls *ls)
861e7fd4179SDavid Teigland {
862e7fd4179SDavid Teigland 	struct dlm_rsb *r;
8634875647aSDavid Teigland 	unsigned int count = 0;
864e7fd4179SDavid Teigland 
865e7fd4179SDavid Teigland 	down_read(&ls->ls_root_sem);
866e7fd4179SDavid Teigland 	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
867e7fd4179SDavid Teigland 		lock_rsb(r);
868e7fd4179SDavid Teigland 		if (is_master(r)) {
869e7fd4179SDavid Teigland 			if (rsb_flag(r, RSB_RECOVER_CONVERT))
870e7fd4179SDavid Teigland 				recover_conversion(r);
871*da8c6663SDavid Teigland 
872*da8c6663SDavid Teigland 			/* recover lvb before granting locks so the updated
873*da8c6663SDavid Teigland 			   lvb/VALNOTVALID is presented in the completion */
874*da8c6663SDavid Teigland 			recover_lvb(r);
875*da8c6663SDavid Teigland 
876f7da790dSDavid Teigland 			if (rsb_flag(r, RSB_NEW_MASTER2))
8774875647aSDavid Teigland 				recover_grant(r);
878e7fd4179SDavid Teigland 			count++;
879*da8c6663SDavid Teigland 		} else {
880*da8c6663SDavid Teigland 			rsb_clear_flag(r, RSB_VALNOTVALID);
881e7fd4179SDavid Teigland 		}
882e7fd4179SDavid Teigland 		rsb_clear_flag(r, RSB_RECOVER_CONVERT);
883*da8c6663SDavid Teigland 		rsb_clear_flag(r, RSB_RECOVER_LVB_INVAL);
884f7da790dSDavid Teigland 		rsb_clear_flag(r, RSB_NEW_MASTER2);
885e7fd4179SDavid Teigland 		unlock_rsb(r);
886e7fd4179SDavid Teigland 	}
887e7fd4179SDavid Teigland 	up_read(&ls->ls_root_sem);
888e7fd4179SDavid Teigland 
8894875647aSDavid Teigland 	if (count)
8904875647aSDavid Teigland 		log_debug(ls, "dlm_recover_rsbs %d done", count);
891e7fd4179SDavid Teigland }
892e7fd4179SDavid Teigland 
893e7fd4179SDavid Teigland /* Create a single list of all root rsb's to be used during recovery */
894e7fd4179SDavid Teigland 
895e7fd4179SDavid Teigland int dlm_create_root_list(struct dlm_ls *ls)
896e7fd4179SDavid Teigland {
8979beb3bf5SBob Peterson 	struct rb_node *n;
898e7fd4179SDavid Teigland 	struct dlm_rsb *r;
899e7fd4179SDavid Teigland 	int i, error = 0;
900e7fd4179SDavid Teigland 
901e7fd4179SDavid Teigland 	down_write(&ls->ls_root_sem);
902e7fd4179SDavid Teigland 	if (!list_empty(&ls->ls_root_list)) {
903e7fd4179SDavid Teigland 		log_error(ls, "root list not empty");
904e7fd4179SDavid Teigland 		error = -EINVAL;
905e7fd4179SDavid Teigland 		goto out;
906e7fd4179SDavid Teigland 	}
907e7fd4179SDavid Teigland 
908e7fd4179SDavid Teigland 	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
909c7be761aSDavid Teigland 		spin_lock(&ls->ls_rsbtbl[i].lock);
9109beb3bf5SBob Peterson 		for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) {
9119beb3bf5SBob Peterson 			r = rb_entry(n, struct dlm_rsb, res_hashnode);
912e7fd4179SDavid Teigland 			list_add(&r->res_root_list, &ls->ls_root_list);
913e7fd4179SDavid Teigland 			dlm_hold_rsb(r);
914e7fd4179SDavid Teigland 		}
91585f0379aSDavid Teigland 
916c04fecb4SDavid Teigland 		if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[i].toss))
917c04fecb4SDavid Teigland 			log_error(ls, "dlm_create_root_list toss not empty");
918c7be761aSDavid Teigland 		spin_unlock(&ls->ls_rsbtbl[i].lock);
919e7fd4179SDavid Teigland 	}
920e7fd4179SDavid Teigland  out:
921e7fd4179SDavid Teigland 	up_write(&ls->ls_root_sem);
922e7fd4179SDavid Teigland 	return error;
923e7fd4179SDavid Teigland }
924e7fd4179SDavid Teigland 
925e7fd4179SDavid Teigland void dlm_release_root_list(struct dlm_ls *ls)
926e7fd4179SDavid Teigland {
927e7fd4179SDavid Teigland 	struct dlm_rsb *r, *safe;
928e7fd4179SDavid Teigland 
929e7fd4179SDavid Teigland 	down_write(&ls->ls_root_sem);
930e7fd4179SDavid Teigland 	list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) {
931e7fd4179SDavid Teigland 		list_del_init(&r->res_root_list);
932e7fd4179SDavid Teigland 		dlm_put_rsb(r);
933e7fd4179SDavid Teigland 	}
934e7fd4179SDavid Teigland 	up_write(&ls->ls_root_sem);
935e7fd4179SDavid Teigland }
936e7fd4179SDavid Teigland 
937c04fecb4SDavid Teigland void dlm_clear_toss(struct dlm_ls *ls)
938e7fd4179SDavid Teigland {
9399beb3bf5SBob Peterson 	struct rb_node *n, *next;
940c04fecb4SDavid Teigland 	struct dlm_rsb *r;
941c04fecb4SDavid Teigland 	unsigned int count = 0;
942e7fd4179SDavid Teigland 	int i;
943e7fd4179SDavid Teigland 
944e7fd4179SDavid Teigland 	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
945c7be761aSDavid Teigland 		spin_lock(&ls->ls_rsbtbl[i].lock);
9469beb3bf5SBob Peterson 		for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) {
947c04fecb4SDavid Teigland 			next = rb_next(n);
948c04fecb4SDavid Teigland 			r = rb_entry(n, struct dlm_rsb, res_hashnode);
9499beb3bf5SBob Peterson 			rb_erase(n, &ls->ls_rsbtbl[i].toss);
950c04fecb4SDavid Teigland 			dlm_free_rsb(r);
951c04fecb4SDavid Teigland 			count++;
95285f0379aSDavid Teigland 		}
953c7be761aSDavid Teigland 		spin_unlock(&ls->ls_rsbtbl[i].lock);
954e7fd4179SDavid Teigland 	}
955c04fecb4SDavid Teigland 
956c04fecb4SDavid Teigland 	if (count)
957c04fecb4SDavid Teigland 		log_debug(ls, "dlm_clear_toss %u done", count);
958e7fd4179SDavid Teigland }
959e7fd4179SDavid Teigland 
960