1e7fd4179SDavid Teigland /****************************************************************************** 2e7fd4179SDavid Teigland ******************************************************************************* 3e7fd4179SDavid Teigland ** 4e7fd4179SDavid Teigland ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 5e7fd4179SDavid Teigland ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 6e7fd4179SDavid Teigland ** 7e7fd4179SDavid Teigland ** This copyrighted material is made available to anyone wishing to use, 8e7fd4179SDavid Teigland ** modify, copy, or redistribute it subject to the terms and conditions 9e7fd4179SDavid Teigland ** of the GNU General Public License v.2. 10e7fd4179SDavid Teigland ** 11e7fd4179SDavid Teigland ******************************************************************************* 12e7fd4179SDavid Teigland ******************************************************************************/ 13e7fd4179SDavid Teigland 14e7fd4179SDavid Teigland #include "dlm_internal.h" 15e7fd4179SDavid Teigland #include "lockspace.h" 16e7fd4179SDavid Teigland #include "dir.h" 17e7fd4179SDavid Teigland #include "config.h" 18e7fd4179SDavid Teigland #include "ast.h" 19e7fd4179SDavid Teigland #include "memory.h" 20e7fd4179SDavid Teigland #include "rcom.h" 21e7fd4179SDavid Teigland #include "lock.h" 22e7fd4179SDavid Teigland #include "lowcomms.h" 23e7fd4179SDavid Teigland #include "member.h" 24e7fd4179SDavid Teigland #include "recover.h" 25e7fd4179SDavid Teigland 26e7fd4179SDavid Teigland 27e7fd4179SDavid Teigland /* 28e7fd4179SDavid Teigland * Recovery waiting routines: these functions wait for a particular reply from 29e7fd4179SDavid Teigland * a remote node, or for the remote node to report a certain status. They need 30e7fd4179SDavid Teigland * to abort if the lockspace is stopped indicating a node has failed (perhaps 31e7fd4179SDavid Teigland * the one being waited for). 32e7fd4179SDavid Teigland */ 33e7fd4179SDavid Teigland 34e7fd4179SDavid Teigland /* 35e7fd4179SDavid Teigland * Wait until given function returns non-zero or lockspace is stopped 36e7fd4179SDavid Teigland * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another 37e7fd4179SDavid Teigland * function thinks it could have completed the waited-on task, they should wake 38e7fd4179SDavid Teigland * up ls_wait_general to get an immediate response rather than waiting for the 396d768177SDavid Teigland * timeout. This uses a timeout so it can check periodically if the wait 406d768177SDavid Teigland * should abort due to node failure (which doesn't cause a wake_up). 416d768177SDavid Teigland * This should only be called by the dlm_recoverd thread. 42e7fd4179SDavid Teigland */ 43e7fd4179SDavid Teigland 44e7fd4179SDavid Teigland int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) 45e7fd4179SDavid Teigland { 46e7fd4179SDavid Teigland int error = 0; 476d768177SDavid Teigland int rv; 48e7fd4179SDavid Teigland 496d768177SDavid Teigland while (1) { 506d768177SDavid Teigland rv = wait_event_timeout(ls->ls_wait_general, 516d768177SDavid Teigland testfn(ls) || dlm_recovery_stopped(ls), 526d768177SDavid Teigland dlm_config.ci_recover_timer * HZ); 536d768177SDavid Teigland if (rv) 546d768177SDavid Teigland break; 556d768177SDavid Teigland } 56e7fd4179SDavid Teigland 57e7fd4179SDavid Teigland if (dlm_recovery_stopped(ls)) { 58e7fd4179SDavid Teigland log_debug(ls, "dlm_wait_function aborted"); 59e7fd4179SDavid Teigland error = -EINTR; 60e7fd4179SDavid Teigland } 61e7fd4179SDavid Teigland return error; 62e7fd4179SDavid Teigland } 63e7fd4179SDavid Teigland 64e7fd4179SDavid Teigland /* 65e7fd4179SDavid Teigland * An efficient way for all nodes to wait for all others to have a certain 66e7fd4179SDavid Teigland * status. The node with the lowest nodeid polls all the others for their 67e7fd4179SDavid Teigland * status (wait_status_all) and all the others poll the node with the low id 68e7fd4179SDavid Teigland * for its accumulated result (wait_status_low). When all nodes have set 69e7fd4179SDavid Teigland * status flag X, then status flag X_ALL will be set on the low nodeid. 70e7fd4179SDavid Teigland */ 71e7fd4179SDavid Teigland 72e7fd4179SDavid Teigland uint32_t dlm_recover_status(struct dlm_ls *ls) 73e7fd4179SDavid Teigland { 74e7fd4179SDavid Teigland uint32_t status; 75e7fd4179SDavid Teigland spin_lock(&ls->ls_recover_lock); 76e7fd4179SDavid Teigland status = ls->ls_recover_status; 77e7fd4179SDavid Teigland spin_unlock(&ls->ls_recover_lock); 78e7fd4179SDavid Teigland return status; 79e7fd4179SDavid Teigland } 80e7fd4179SDavid Teigland 81757a4271SDavid Teigland static void _set_recover_status(struct dlm_ls *ls, uint32_t status) 82757a4271SDavid Teigland { 83757a4271SDavid Teigland ls->ls_recover_status |= status; 84757a4271SDavid Teigland } 85757a4271SDavid Teigland 86e7fd4179SDavid Teigland void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status) 87e7fd4179SDavid Teigland { 88e7fd4179SDavid Teigland spin_lock(&ls->ls_recover_lock); 89757a4271SDavid Teigland _set_recover_status(ls, status); 90e7fd4179SDavid Teigland spin_unlock(&ls->ls_recover_lock); 91e7fd4179SDavid Teigland } 92e7fd4179SDavid Teigland 93757a4271SDavid Teigland static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status, 94757a4271SDavid Teigland int save_slots) 95e7fd4179SDavid Teigland { 964007685cSAl Viro struct dlm_rcom *rc = ls->ls_recover_buf; 97e7fd4179SDavid Teigland struct dlm_member *memb; 98e7fd4179SDavid Teigland int error = 0, delay; 99e7fd4179SDavid Teigland 100e7fd4179SDavid Teigland list_for_each_entry(memb, &ls->ls_nodes, list) { 101e7fd4179SDavid Teigland delay = 0; 102e7fd4179SDavid Teigland for (;;) { 103e7fd4179SDavid Teigland if (dlm_recovery_stopped(ls)) { 104e7fd4179SDavid Teigland error = -EINTR; 105e7fd4179SDavid Teigland goto out; 106e7fd4179SDavid Teigland } 107e7fd4179SDavid Teigland 108757a4271SDavid Teigland error = dlm_rcom_status(ls, memb->nodeid, 0); 109e7fd4179SDavid Teigland if (error) 110e7fd4179SDavid Teigland goto out; 111e7fd4179SDavid Teigland 112757a4271SDavid Teigland if (save_slots) 113757a4271SDavid Teigland dlm_slot_save(ls, rc, memb); 114757a4271SDavid Teigland 115e7fd4179SDavid Teigland if (rc->rc_result & wait_status) 116e7fd4179SDavid Teigland break; 117e7fd4179SDavid Teigland if (delay < 1000) 118e7fd4179SDavid Teigland delay += 20; 119e7fd4179SDavid Teigland msleep(delay); 120e7fd4179SDavid Teigland } 121e7fd4179SDavid Teigland } 122e7fd4179SDavid Teigland out: 123e7fd4179SDavid Teigland return error; 124e7fd4179SDavid Teigland } 125e7fd4179SDavid Teigland 126757a4271SDavid Teigland static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status, 127757a4271SDavid Teigland uint32_t status_flags) 128e7fd4179SDavid Teigland { 1294007685cSAl Viro struct dlm_rcom *rc = ls->ls_recover_buf; 130e7fd4179SDavid Teigland int error = 0, delay = 0, nodeid = ls->ls_low_nodeid; 131e7fd4179SDavid Teigland 132e7fd4179SDavid Teigland for (;;) { 133e7fd4179SDavid Teigland if (dlm_recovery_stopped(ls)) { 134e7fd4179SDavid Teigland error = -EINTR; 135e7fd4179SDavid Teigland goto out; 136e7fd4179SDavid Teigland } 137e7fd4179SDavid Teigland 138757a4271SDavid Teigland error = dlm_rcom_status(ls, nodeid, status_flags); 139e7fd4179SDavid Teigland if (error) 140e7fd4179SDavid Teigland break; 141e7fd4179SDavid Teigland 142e7fd4179SDavid Teigland if (rc->rc_result & wait_status) 143e7fd4179SDavid Teigland break; 144e7fd4179SDavid Teigland if (delay < 1000) 145e7fd4179SDavid Teigland delay += 20; 146e7fd4179SDavid Teigland msleep(delay); 147e7fd4179SDavid Teigland } 148e7fd4179SDavid Teigland out: 149e7fd4179SDavid Teigland return error; 150e7fd4179SDavid Teigland } 151e7fd4179SDavid Teigland 152e7fd4179SDavid Teigland static int wait_status(struct dlm_ls *ls, uint32_t status) 153e7fd4179SDavid Teigland { 154e7fd4179SDavid Teigland uint32_t status_all = status << 1; 155e7fd4179SDavid Teigland int error; 156e7fd4179SDavid Teigland 157e7fd4179SDavid Teigland if (ls->ls_low_nodeid == dlm_our_nodeid()) { 158757a4271SDavid Teigland error = wait_status_all(ls, status, 0); 159e7fd4179SDavid Teigland if (!error) 160e7fd4179SDavid Teigland dlm_set_recover_status(ls, status_all); 161e7fd4179SDavid Teigland } else 162757a4271SDavid Teigland error = wait_status_low(ls, status_all, 0); 163e7fd4179SDavid Teigland 164e7fd4179SDavid Teigland return error; 165e7fd4179SDavid Teigland } 166e7fd4179SDavid Teigland 167e7fd4179SDavid Teigland int dlm_recover_members_wait(struct dlm_ls *ls) 168e7fd4179SDavid Teigland { 169757a4271SDavid Teigland struct dlm_member *memb; 170757a4271SDavid Teigland struct dlm_slot *slots; 171757a4271SDavid Teigland int num_slots, slots_size; 172757a4271SDavid Teigland int error, rv; 173757a4271SDavid Teigland uint32_t gen; 174757a4271SDavid Teigland 175757a4271SDavid Teigland list_for_each_entry(memb, &ls->ls_nodes, list) { 176757a4271SDavid Teigland memb->slot = -1; 177757a4271SDavid Teigland memb->generation = 0; 178757a4271SDavid Teigland } 179757a4271SDavid Teigland 180757a4271SDavid Teigland if (ls->ls_low_nodeid == dlm_our_nodeid()) { 181757a4271SDavid Teigland error = wait_status_all(ls, DLM_RS_NODES, 1); 182757a4271SDavid Teigland if (error) 183757a4271SDavid Teigland goto out; 184757a4271SDavid Teigland 185757a4271SDavid Teigland /* slots array is sparse, slots_size may be > num_slots */ 186757a4271SDavid Teigland 187757a4271SDavid Teigland rv = dlm_slots_assign(ls, &num_slots, &slots_size, &slots, &gen); 188757a4271SDavid Teigland if (!rv) { 189757a4271SDavid Teigland spin_lock(&ls->ls_recover_lock); 190757a4271SDavid Teigland _set_recover_status(ls, DLM_RS_NODES_ALL); 191757a4271SDavid Teigland ls->ls_num_slots = num_slots; 192757a4271SDavid Teigland ls->ls_slots_size = slots_size; 193757a4271SDavid Teigland ls->ls_slots = slots; 194757a4271SDavid Teigland ls->ls_generation = gen; 195757a4271SDavid Teigland spin_unlock(&ls->ls_recover_lock); 196757a4271SDavid Teigland } else { 197757a4271SDavid Teigland dlm_set_recover_status(ls, DLM_RS_NODES_ALL); 198757a4271SDavid Teigland } 199757a4271SDavid Teigland } else { 200757a4271SDavid Teigland error = wait_status_low(ls, DLM_RS_NODES_ALL, DLM_RSF_NEED_SLOTS); 201757a4271SDavid Teigland if (error) 202757a4271SDavid Teigland goto out; 203757a4271SDavid Teigland 204757a4271SDavid Teigland dlm_slots_copy_in(ls); 205757a4271SDavid Teigland } 206757a4271SDavid Teigland out: 207757a4271SDavid Teigland return error; 208e7fd4179SDavid Teigland } 209e7fd4179SDavid Teigland 210e7fd4179SDavid Teigland int dlm_recover_directory_wait(struct dlm_ls *ls) 211e7fd4179SDavid Teigland { 212e7fd4179SDavid Teigland return wait_status(ls, DLM_RS_DIR); 213e7fd4179SDavid Teigland } 214e7fd4179SDavid Teigland 215e7fd4179SDavid Teigland int dlm_recover_locks_wait(struct dlm_ls *ls) 216e7fd4179SDavid Teigland { 217e7fd4179SDavid Teigland return wait_status(ls, DLM_RS_LOCKS); 218e7fd4179SDavid Teigland } 219e7fd4179SDavid Teigland 220e7fd4179SDavid Teigland int dlm_recover_done_wait(struct dlm_ls *ls) 221e7fd4179SDavid Teigland { 222e7fd4179SDavid Teigland return wait_status(ls, DLM_RS_DONE); 223e7fd4179SDavid Teigland } 224e7fd4179SDavid Teigland 225e7fd4179SDavid Teigland /* 226e7fd4179SDavid Teigland * The recover_list contains all the rsb's for which we've requested the new 227e7fd4179SDavid Teigland * master nodeid. As replies are returned from the resource directories the 228e7fd4179SDavid Teigland * rsb's are removed from the list. When the list is empty we're done. 229e7fd4179SDavid Teigland * 230e7fd4179SDavid Teigland * The recover_list is later similarly used for all rsb's for which we've sent 231e7fd4179SDavid Teigland * new lkb's and need to receive new corresponding lkid's. 232e7fd4179SDavid Teigland * 233e7fd4179SDavid Teigland * We use the address of the rsb struct as a simple local identifier for the 234e7fd4179SDavid Teigland * rsb so we can match an rcom reply with the rsb it was sent for. 235e7fd4179SDavid Teigland */ 236e7fd4179SDavid Teigland 237e7fd4179SDavid Teigland static int recover_list_empty(struct dlm_ls *ls) 238e7fd4179SDavid Teigland { 239e7fd4179SDavid Teigland int empty; 240e7fd4179SDavid Teigland 241e7fd4179SDavid Teigland spin_lock(&ls->ls_recover_list_lock); 242e7fd4179SDavid Teigland empty = list_empty(&ls->ls_recover_list); 243e7fd4179SDavid Teigland spin_unlock(&ls->ls_recover_list_lock); 244e7fd4179SDavid Teigland 245e7fd4179SDavid Teigland return empty; 246e7fd4179SDavid Teigland } 247e7fd4179SDavid Teigland 248e7fd4179SDavid Teigland static void recover_list_add(struct dlm_rsb *r) 249e7fd4179SDavid Teigland { 250e7fd4179SDavid Teigland struct dlm_ls *ls = r->res_ls; 251e7fd4179SDavid Teigland 252e7fd4179SDavid Teigland spin_lock(&ls->ls_recover_list_lock); 253e7fd4179SDavid Teigland if (list_empty(&r->res_recover_list)) { 254e7fd4179SDavid Teigland list_add_tail(&r->res_recover_list, &ls->ls_recover_list); 255e7fd4179SDavid Teigland ls->ls_recover_list_count++; 256e7fd4179SDavid Teigland dlm_hold_rsb(r); 257e7fd4179SDavid Teigland } 258e7fd4179SDavid Teigland spin_unlock(&ls->ls_recover_list_lock); 259e7fd4179SDavid Teigland } 260e7fd4179SDavid Teigland 261e7fd4179SDavid Teigland static void recover_list_del(struct dlm_rsb *r) 262e7fd4179SDavid Teigland { 263e7fd4179SDavid Teigland struct dlm_ls *ls = r->res_ls; 264e7fd4179SDavid Teigland 265e7fd4179SDavid Teigland spin_lock(&ls->ls_recover_list_lock); 266e7fd4179SDavid Teigland list_del_init(&r->res_recover_list); 267e7fd4179SDavid Teigland ls->ls_recover_list_count--; 268e7fd4179SDavid Teigland spin_unlock(&ls->ls_recover_list_lock); 269e7fd4179SDavid Teigland 270e7fd4179SDavid Teigland dlm_put_rsb(r); 271e7fd4179SDavid Teigland } 272e7fd4179SDavid Teigland 273e7fd4179SDavid Teigland static void recover_list_clear(struct dlm_ls *ls) 274e7fd4179SDavid Teigland { 275e7fd4179SDavid Teigland struct dlm_rsb *r, *s; 276e7fd4179SDavid Teigland 277e7fd4179SDavid Teigland spin_lock(&ls->ls_recover_list_lock); 278e7fd4179SDavid Teigland list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) { 279e7fd4179SDavid Teigland list_del_init(&r->res_recover_list); 28052069809SDavid Teigland r->res_recover_locks_count = 0; 281e7fd4179SDavid Teigland dlm_put_rsb(r); 282e7fd4179SDavid Teigland ls->ls_recover_list_count--; 283e7fd4179SDavid Teigland } 284e7fd4179SDavid Teigland 285e7fd4179SDavid Teigland if (ls->ls_recover_list_count != 0) { 286e7fd4179SDavid Teigland log_error(ls, "warning: recover_list_count %d", 287e7fd4179SDavid Teigland ls->ls_recover_list_count); 288e7fd4179SDavid Teigland ls->ls_recover_list_count = 0; 289e7fd4179SDavid Teigland } 290e7fd4179SDavid Teigland spin_unlock(&ls->ls_recover_list_lock); 291e7fd4179SDavid Teigland } 292e7fd4179SDavid Teigland 2931d7c484eSDavid Teigland static int recover_idr_empty(struct dlm_ls *ls) 2941d7c484eSDavid Teigland { 2951d7c484eSDavid Teigland int empty = 1; 2961d7c484eSDavid Teigland 2971d7c484eSDavid Teigland spin_lock(&ls->ls_recover_idr_lock); 2981d7c484eSDavid Teigland if (ls->ls_recover_list_count) 2991d7c484eSDavid Teigland empty = 0; 3001d7c484eSDavid Teigland spin_unlock(&ls->ls_recover_idr_lock); 3011d7c484eSDavid Teigland 3021d7c484eSDavid Teigland return empty; 3031d7c484eSDavid Teigland } 3041d7c484eSDavid Teigland 3051d7c484eSDavid Teigland static int recover_idr_add(struct dlm_rsb *r) 3061d7c484eSDavid Teigland { 3071d7c484eSDavid Teigland struct dlm_ls *ls = r->res_ls; 3081d7c484eSDavid Teigland int rv, id; 3091d7c484eSDavid Teigland 3101d7c484eSDavid Teigland rv = idr_pre_get(&ls->ls_recover_idr, GFP_NOFS); 3111d7c484eSDavid Teigland if (!rv) 3121d7c484eSDavid Teigland return -ENOMEM; 3131d7c484eSDavid Teigland 3141d7c484eSDavid Teigland spin_lock(&ls->ls_recover_idr_lock); 3151d7c484eSDavid Teigland if (r->res_id) { 3161d7c484eSDavid Teigland spin_unlock(&ls->ls_recover_idr_lock); 3171d7c484eSDavid Teigland return -1; 3181d7c484eSDavid Teigland } 3191d7c484eSDavid Teigland rv = idr_get_new_above(&ls->ls_recover_idr, r, 1, &id); 3201d7c484eSDavid Teigland if (rv) { 3211d7c484eSDavid Teigland spin_unlock(&ls->ls_recover_idr_lock); 3221d7c484eSDavid Teigland return rv; 3231d7c484eSDavid Teigland } 3241d7c484eSDavid Teigland r->res_id = id; 3251d7c484eSDavid Teigland ls->ls_recover_list_count++; 3261d7c484eSDavid Teigland dlm_hold_rsb(r); 3271d7c484eSDavid Teigland spin_unlock(&ls->ls_recover_idr_lock); 3281d7c484eSDavid Teigland return 0; 3291d7c484eSDavid Teigland } 3301d7c484eSDavid Teigland 3311d7c484eSDavid Teigland static void recover_idr_del(struct dlm_rsb *r) 3321d7c484eSDavid Teigland { 3331d7c484eSDavid Teigland struct dlm_ls *ls = r->res_ls; 3341d7c484eSDavid Teigland 3351d7c484eSDavid Teigland spin_lock(&ls->ls_recover_idr_lock); 3361d7c484eSDavid Teigland idr_remove(&ls->ls_recover_idr, r->res_id); 3371d7c484eSDavid Teigland r->res_id = 0; 3381d7c484eSDavid Teigland ls->ls_recover_list_count--; 3391d7c484eSDavid Teigland spin_unlock(&ls->ls_recover_idr_lock); 3401d7c484eSDavid Teigland 3411d7c484eSDavid Teigland dlm_put_rsb(r); 3421d7c484eSDavid Teigland } 3431d7c484eSDavid Teigland 3441d7c484eSDavid Teigland static struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id) 3451d7c484eSDavid Teigland { 3461d7c484eSDavid Teigland struct dlm_rsb *r; 3471d7c484eSDavid Teigland 3481d7c484eSDavid Teigland spin_lock(&ls->ls_recover_idr_lock); 3491d7c484eSDavid Teigland r = idr_find(&ls->ls_recover_idr, (int)id); 3501d7c484eSDavid Teigland spin_unlock(&ls->ls_recover_idr_lock); 3511d7c484eSDavid Teigland return r; 3521d7c484eSDavid Teigland } 3531d7c484eSDavid Teigland 3541d7c484eSDavid Teigland static int recover_idr_clear_rsb(int id, void *p, void *data) 3551d7c484eSDavid Teigland { 3561d7c484eSDavid Teigland struct dlm_ls *ls = data; 3571d7c484eSDavid Teigland struct dlm_rsb *r = p; 3581d7c484eSDavid Teigland 3591d7c484eSDavid Teigland r->res_id = 0; 3601d7c484eSDavid Teigland r->res_recover_locks_count = 0; 3611d7c484eSDavid Teigland ls->ls_recover_list_count--; 3621d7c484eSDavid Teigland 3631d7c484eSDavid Teigland dlm_put_rsb(r); 3641d7c484eSDavid Teigland return 0; 3651d7c484eSDavid Teigland } 3661d7c484eSDavid Teigland 3671d7c484eSDavid Teigland static void recover_idr_clear(struct dlm_ls *ls) 3681d7c484eSDavid Teigland { 3691d7c484eSDavid Teigland spin_lock(&ls->ls_recover_idr_lock); 3701d7c484eSDavid Teigland idr_for_each(&ls->ls_recover_idr, recover_idr_clear_rsb, ls); 3711d7c484eSDavid Teigland idr_remove_all(&ls->ls_recover_idr); 3721d7c484eSDavid Teigland 3731d7c484eSDavid Teigland if (ls->ls_recover_list_count != 0) { 3741d7c484eSDavid Teigland log_error(ls, "warning: recover_list_count %d", 3751d7c484eSDavid Teigland ls->ls_recover_list_count); 3761d7c484eSDavid Teigland ls->ls_recover_list_count = 0; 3771d7c484eSDavid Teigland } 3781d7c484eSDavid Teigland spin_unlock(&ls->ls_recover_idr_lock); 3791d7c484eSDavid Teigland } 3801d7c484eSDavid Teigland 381e7fd4179SDavid Teigland 382e7fd4179SDavid Teigland /* Master recovery: find new master node for rsb's that were 383e7fd4179SDavid Teigland mastered on nodes that have been removed. 384e7fd4179SDavid Teigland 385e7fd4179SDavid Teigland dlm_recover_masters 386e7fd4179SDavid Teigland recover_master 387e7fd4179SDavid Teigland dlm_send_rcom_lookup -> receive_rcom_lookup 388e7fd4179SDavid Teigland dlm_dir_lookup 389e7fd4179SDavid Teigland receive_rcom_lookup_reply <- 390e7fd4179SDavid Teigland dlm_recover_master_reply 391e7fd4179SDavid Teigland set_new_master 392e7fd4179SDavid Teigland set_master_lkbs 393e7fd4179SDavid Teigland set_lock_master 394e7fd4179SDavid Teigland */ 395e7fd4179SDavid Teigland 396e7fd4179SDavid Teigland /* 397e7fd4179SDavid Teigland * Set the lock master for all LKBs in a lock queue 398e7fd4179SDavid Teigland * If we are the new master of the rsb, we may have received new 399e7fd4179SDavid Teigland * MSTCPY locks from other nodes already which we need to ignore 400e7fd4179SDavid Teigland * when setting the new nodeid. 401e7fd4179SDavid Teigland */ 402e7fd4179SDavid Teigland 403e7fd4179SDavid Teigland static void set_lock_master(struct list_head *queue, int nodeid) 404e7fd4179SDavid Teigland { 405e7fd4179SDavid Teigland struct dlm_lkb *lkb; 406e7fd4179SDavid Teigland 4074875647aSDavid Teigland list_for_each_entry(lkb, queue, lkb_statequeue) { 4084875647aSDavid Teigland if (!(lkb->lkb_flags & DLM_IFL_MSTCPY)) { 409e7fd4179SDavid Teigland lkb->lkb_nodeid = nodeid; 4104875647aSDavid Teigland lkb->lkb_remid = 0; 4114875647aSDavid Teigland } 4124875647aSDavid Teigland } 413e7fd4179SDavid Teigland } 414e7fd4179SDavid Teigland 415e7fd4179SDavid Teigland static void set_master_lkbs(struct dlm_rsb *r) 416e7fd4179SDavid Teigland { 417e7fd4179SDavid Teigland set_lock_master(&r->res_grantqueue, r->res_nodeid); 418e7fd4179SDavid Teigland set_lock_master(&r->res_convertqueue, r->res_nodeid); 419e7fd4179SDavid Teigland set_lock_master(&r->res_waitqueue, r->res_nodeid); 420e7fd4179SDavid Teigland } 421e7fd4179SDavid Teigland 422e7fd4179SDavid Teigland /* 42325985edcSLucas De Marchi * Propagate the new master nodeid to locks 424e7fd4179SDavid Teigland * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider. 4254875647aSDavid Teigland * The NEW_MASTER2 flag tells recover_lvb() and recover_grant() which 426f7da790dSDavid Teigland * rsb's to consider. 427e7fd4179SDavid Teigland */ 428e7fd4179SDavid Teigland 429c04fecb4SDavid Teigland static void set_new_master(struct dlm_rsb *r) 430e7fd4179SDavid Teigland { 431e7fd4179SDavid Teigland set_master_lkbs(r); 432e7fd4179SDavid Teigland rsb_set_flag(r, RSB_NEW_MASTER); 433e7fd4179SDavid Teigland rsb_set_flag(r, RSB_NEW_MASTER2); 434e7fd4179SDavid Teigland } 435e7fd4179SDavid Teigland 436e7fd4179SDavid Teigland /* 437e7fd4179SDavid Teigland * We do async lookups on rsb's that need new masters. The rsb's 438e7fd4179SDavid Teigland * waiting for a lookup reply are kept on the recover_list. 439c04fecb4SDavid Teigland * 440c04fecb4SDavid Teigland * Another node recovering the master may have sent us a rcom lookup, 441c04fecb4SDavid Teigland * and our dlm_master_lookup() set it as the new master, along with 442c04fecb4SDavid Teigland * NEW_MASTER so that we'll recover it here (this implies dir_nodeid 443c04fecb4SDavid Teigland * equals our_nodeid below). 444e7fd4179SDavid Teigland */ 445e7fd4179SDavid Teigland 446c04fecb4SDavid Teigland static int recover_master(struct dlm_rsb *r, unsigned int *count) 447e7fd4179SDavid Teigland { 448e7fd4179SDavid Teigland struct dlm_ls *ls = r->res_ls; 449c04fecb4SDavid Teigland int our_nodeid, dir_nodeid; 450c04fecb4SDavid Teigland int is_removed = 0; 451c04fecb4SDavid Teigland int error; 452c04fecb4SDavid Teigland 453c04fecb4SDavid Teigland if (is_master(r)) 454c04fecb4SDavid Teigland return 0; 455c04fecb4SDavid Teigland 456c04fecb4SDavid Teigland is_removed = dlm_is_removed(ls, r->res_nodeid); 457c04fecb4SDavid Teigland 458c04fecb4SDavid Teigland if (!is_removed && !rsb_flag(r, RSB_NEW_MASTER)) 459c04fecb4SDavid Teigland return 0; 460c04fecb4SDavid Teigland 461c04fecb4SDavid Teigland our_nodeid = dlm_our_nodeid(); 462c04fecb4SDavid Teigland dir_nodeid = dlm_dir_nodeid(r); 463e7fd4179SDavid Teigland 464e7fd4179SDavid Teigland if (dir_nodeid == our_nodeid) { 465c04fecb4SDavid Teigland if (is_removed) { 466c04fecb4SDavid Teigland r->res_master_nodeid = our_nodeid; 467c04fecb4SDavid Teigland r->res_nodeid = 0; 468c04fecb4SDavid Teigland } 469e7fd4179SDavid Teigland 470c04fecb4SDavid Teigland /* set master of lkbs to ourself when is_removed, or to 471c04fecb4SDavid Teigland another new master which we set along with NEW_MASTER 472c04fecb4SDavid Teigland in dlm_master_lookup */ 473c04fecb4SDavid Teigland set_new_master(r); 474c04fecb4SDavid Teigland error = 0; 475e7fd4179SDavid Teigland } else { 4761d7c484eSDavid Teigland recover_idr_add(r); 477e7fd4179SDavid Teigland error = dlm_send_rcom_lookup(r, dir_nodeid); 478e7fd4179SDavid Teigland } 479e7fd4179SDavid Teigland 480c04fecb4SDavid Teigland (*count)++; 481e7fd4179SDavid Teigland return error; 482e7fd4179SDavid Teigland } 483e7fd4179SDavid Teigland 484e7fd4179SDavid Teigland /* 4854875647aSDavid Teigland * All MSTCPY locks are purged and rebuilt, even if the master stayed the same. 4864875647aSDavid Teigland * This is necessary because recovery can be started, aborted and restarted, 4874875647aSDavid Teigland * causing the master nodeid to briefly change during the aborted recovery, and 4884875647aSDavid Teigland * change back to the original value in the second recovery. The MSTCPY locks 4894875647aSDavid Teigland * may or may not have been purged during the aborted recovery. Another node 4904875647aSDavid Teigland * with an outstanding request in waiters list and a request reply saved in the 4914875647aSDavid Teigland * requestqueue, cannot know whether it should ignore the reply and resend the 4924875647aSDavid Teigland * request, or accept the reply and complete the request. It must do the 4934875647aSDavid Teigland * former if the remote node purged MSTCPY locks, and it must do the later if 4944875647aSDavid Teigland * the remote node did not. This is solved by always purging MSTCPY locks, in 4954875647aSDavid Teigland * which case, the request reply would always be ignored and the request 4964875647aSDavid Teigland * resent. 497e7fd4179SDavid Teigland */ 498e7fd4179SDavid Teigland 499c04fecb4SDavid Teigland static int recover_master_static(struct dlm_rsb *r, unsigned int *count) 500e7fd4179SDavid Teigland { 5014875647aSDavid Teigland int dir_nodeid = dlm_dir_nodeid(r); 5024875647aSDavid Teigland int new_master = dir_nodeid; 503e7fd4179SDavid Teigland 5044875647aSDavid Teigland if (dir_nodeid == dlm_our_nodeid()) 5054875647aSDavid Teigland new_master = 0; 506e7fd4179SDavid Teigland 507e7fd4179SDavid Teigland dlm_purge_mstcpy_locks(r); 508c04fecb4SDavid Teigland r->res_master_nodeid = dir_nodeid; 509c04fecb4SDavid Teigland r->res_nodeid = new_master; 510c04fecb4SDavid Teigland set_new_master(r); 511c04fecb4SDavid Teigland (*count)++; 512c04fecb4SDavid Teigland return 0; 513e7fd4179SDavid Teigland } 514e7fd4179SDavid Teigland 515e7fd4179SDavid Teigland /* 516e7fd4179SDavid Teigland * Go through local root resources and for each rsb which has a master which 517e7fd4179SDavid Teigland * has departed, get the new master nodeid from the directory. The dir will 518e7fd4179SDavid Teigland * assign mastery to the first node to look up the new master. That means 519e7fd4179SDavid Teigland * we'll discover in this lookup if we're the new master of any rsb's. 520e7fd4179SDavid Teigland * 521e7fd4179SDavid Teigland * We fire off all the dir lookup requests individually and asynchronously to 522e7fd4179SDavid Teigland * the correct dir node. 523e7fd4179SDavid Teigland */ 524e7fd4179SDavid Teigland 525e7fd4179SDavid Teigland int dlm_recover_masters(struct dlm_ls *ls) 526e7fd4179SDavid Teigland { 527e7fd4179SDavid Teigland struct dlm_rsb *r; 528c04fecb4SDavid Teigland unsigned int total = 0; 529c04fecb4SDavid Teigland unsigned int count = 0; 530c04fecb4SDavid Teigland int nodir = dlm_no_directory(ls); 531c04fecb4SDavid Teigland int error; 532e7fd4179SDavid Teigland 533e7fd4179SDavid Teigland log_debug(ls, "dlm_recover_masters"); 534e7fd4179SDavid Teigland 535e7fd4179SDavid Teigland down_read(&ls->ls_root_sem); 536e7fd4179SDavid Teigland list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 537e7fd4179SDavid Teigland if (dlm_recovery_stopped(ls)) { 538e7fd4179SDavid Teigland up_read(&ls->ls_root_sem); 539e7fd4179SDavid Teigland error = -EINTR; 540e7fd4179SDavid Teigland goto out; 541e7fd4179SDavid Teigland } 542e7fd4179SDavid Teigland 543c04fecb4SDavid Teigland lock_rsb(r); 544c04fecb4SDavid Teigland if (nodir) 545c04fecb4SDavid Teigland error = recover_master_static(r, &count); 546c04fecb4SDavid Teigland else 547c04fecb4SDavid Teigland error = recover_master(r, &count); 548c04fecb4SDavid Teigland unlock_rsb(r); 549c04fecb4SDavid Teigland cond_resched(); 550c04fecb4SDavid Teigland total++; 551e7fd4179SDavid Teigland 552c04fecb4SDavid Teigland if (error) { 553c04fecb4SDavid Teigland up_read(&ls->ls_root_sem); 554c04fecb4SDavid Teigland goto out; 555c04fecb4SDavid Teigland } 556e7fd4179SDavid Teigland } 557e7fd4179SDavid Teigland up_read(&ls->ls_root_sem); 558e7fd4179SDavid Teigland 559c04fecb4SDavid Teigland log_debug(ls, "dlm_recover_masters %u of %u", count, total); 560e7fd4179SDavid Teigland 5611d7c484eSDavid Teigland error = dlm_wait_function(ls, &recover_idr_empty); 562e7fd4179SDavid Teigland out: 563e7fd4179SDavid Teigland if (error) 5641d7c484eSDavid Teigland recover_idr_clear(ls); 565e7fd4179SDavid Teigland return error; 566e7fd4179SDavid Teigland } 567e7fd4179SDavid Teigland 568e7fd4179SDavid Teigland int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) 569e7fd4179SDavid Teigland { 570e7fd4179SDavid Teigland struct dlm_rsb *r; 571c04fecb4SDavid Teigland int ret_nodeid, new_master; 572e7fd4179SDavid Teigland 5731d7c484eSDavid Teigland r = recover_idr_find(ls, rc->rc_id); 574e7fd4179SDavid Teigland if (!r) { 57590135925SDavid Teigland log_error(ls, "dlm_recover_master_reply no id %llx", 5769229f013SDavid Teigland (unsigned long long)rc->rc_id); 577e7fd4179SDavid Teigland goto out; 578e7fd4179SDavid Teigland } 579e7fd4179SDavid Teigland 580c04fecb4SDavid Teigland ret_nodeid = rc->rc_result; 581c04fecb4SDavid Teigland 582c04fecb4SDavid Teigland if (ret_nodeid == dlm_our_nodeid()) 583c04fecb4SDavid Teigland new_master = 0; 584c04fecb4SDavid Teigland else 585c04fecb4SDavid Teigland new_master = ret_nodeid; 586e7fd4179SDavid Teigland 5874875647aSDavid Teigland lock_rsb(r); 588c04fecb4SDavid Teigland r->res_master_nodeid = ret_nodeid; 589c04fecb4SDavid Teigland r->res_nodeid = new_master; 590c04fecb4SDavid Teigland set_new_master(r); 5914875647aSDavid Teigland unlock_rsb(r); 5921d7c484eSDavid Teigland recover_idr_del(r); 593e7fd4179SDavid Teigland 5941d7c484eSDavid Teigland if (recover_idr_empty(ls)) 595e7fd4179SDavid Teigland wake_up(&ls->ls_wait_general); 596e7fd4179SDavid Teigland out: 597e7fd4179SDavid Teigland return 0; 598e7fd4179SDavid Teigland } 599e7fd4179SDavid Teigland 600e7fd4179SDavid Teigland 601e7fd4179SDavid Teigland /* Lock recovery: rebuild the process-copy locks we hold on a 602e7fd4179SDavid Teigland remastered rsb on the new rsb master. 603e7fd4179SDavid Teigland 604e7fd4179SDavid Teigland dlm_recover_locks 605e7fd4179SDavid Teigland recover_locks 606e7fd4179SDavid Teigland recover_locks_queue 607e7fd4179SDavid Teigland dlm_send_rcom_lock -> receive_rcom_lock 608e7fd4179SDavid Teigland dlm_recover_master_copy 609e7fd4179SDavid Teigland receive_rcom_lock_reply <- 610e7fd4179SDavid Teigland dlm_recover_process_copy 611e7fd4179SDavid Teigland */ 612e7fd4179SDavid Teigland 613e7fd4179SDavid Teigland 614e7fd4179SDavid Teigland /* 615e7fd4179SDavid Teigland * keep a count of the number of lkb's we send to the new master; when we get 616e7fd4179SDavid Teigland * an equal number of replies then recovery for the rsb is done 617e7fd4179SDavid Teigland */ 618e7fd4179SDavid Teigland 619e7fd4179SDavid Teigland static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head) 620e7fd4179SDavid Teigland { 621e7fd4179SDavid Teigland struct dlm_lkb *lkb; 622e7fd4179SDavid Teigland int error = 0; 623e7fd4179SDavid Teigland 624e7fd4179SDavid Teigland list_for_each_entry(lkb, head, lkb_statequeue) { 625e7fd4179SDavid Teigland error = dlm_send_rcom_lock(r, lkb); 626e7fd4179SDavid Teigland if (error) 627e7fd4179SDavid Teigland break; 628e7fd4179SDavid Teigland r->res_recover_locks_count++; 629e7fd4179SDavid Teigland } 630e7fd4179SDavid Teigland 631e7fd4179SDavid Teigland return error; 632e7fd4179SDavid Teigland } 633e7fd4179SDavid Teigland 634e7fd4179SDavid Teigland static int recover_locks(struct dlm_rsb *r) 635e7fd4179SDavid Teigland { 636e7fd4179SDavid Teigland int error = 0; 637e7fd4179SDavid Teigland 638e7fd4179SDavid Teigland lock_rsb(r); 639e7fd4179SDavid Teigland 640a345da3eSDavid Teigland DLM_ASSERT(!r->res_recover_locks_count, dlm_dump_rsb(r);); 641e7fd4179SDavid Teigland 642e7fd4179SDavid Teigland error = recover_locks_queue(r, &r->res_grantqueue); 643e7fd4179SDavid Teigland if (error) 644e7fd4179SDavid Teigland goto out; 645e7fd4179SDavid Teigland error = recover_locks_queue(r, &r->res_convertqueue); 646e7fd4179SDavid Teigland if (error) 647e7fd4179SDavid Teigland goto out; 648e7fd4179SDavid Teigland error = recover_locks_queue(r, &r->res_waitqueue); 649e7fd4179SDavid Teigland if (error) 650e7fd4179SDavid Teigland goto out; 651e7fd4179SDavid Teigland 652e7fd4179SDavid Teigland if (r->res_recover_locks_count) 653e7fd4179SDavid Teigland recover_list_add(r); 654e7fd4179SDavid Teigland else 655e7fd4179SDavid Teigland rsb_clear_flag(r, RSB_NEW_MASTER); 656e7fd4179SDavid Teigland out: 657e7fd4179SDavid Teigland unlock_rsb(r); 658e7fd4179SDavid Teigland return error; 659e7fd4179SDavid Teigland } 660e7fd4179SDavid Teigland 661e7fd4179SDavid Teigland int dlm_recover_locks(struct dlm_ls *ls) 662e7fd4179SDavid Teigland { 663e7fd4179SDavid Teigland struct dlm_rsb *r; 664e7fd4179SDavid Teigland int error, count = 0; 665e7fd4179SDavid Teigland 666e7fd4179SDavid Teigland down_read(&ls->ls_root_sem); 667e7fd4179SDavid Teigland list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 668e7fd4179SDavid Teigland if (is_master(r)) { 669e7fd4179SDavid Teigland rsb_clear_flag(r, RSB_NEW_MASTER); 670e7fd4179SDavid Teigland continue; 671e7fd4179SDavid Teigland } 672e7fd4179SDavid Teigland 673e7fd4179SDavid Teigland if (!rsb_flag(r, RSB_NEW_MASTER)) 674e7fd4179SDavid Teigland continue; 675e7fd4179SDavid Teigland 676e7fd4179SDavid Teigland if (dlm_recovery_stopped(ls)) { 677e7fd4179SDavid Teigland error = -EINTR; 678e7fd4179SDavid Teigland up_read(&ls->ls_root_sem); 679e7fd4179SDavid Teigland goto out; 680e7fd4179SDavid Teigland } 681e7fd4179SDavid Teigland 682e7fd4179SDavid Teigland error = recover_locks(r); 683e7fd4179SDavid Teigland if (error) { 684e7fd4179SDavid Teigland up_read(&ls->ls_root_sem); 685e7fd4179SDavid Teigland goto out; 686e7fd4179SDavid Teigland } 687e7fd4179SDavid Teigland 688e7fd4179SDavid Teigland count += r->res_recover_locks_count; 689e7fd4179SDavid Teigland } 690e7fd4179SDavid Teigland up_read(&ls->ls_root_sem); 691e7fd4179SDavid Teigland 6924875647aSDavid Teigland log_debug(ls, "dlm_recover_locks %d out", count); 693e7fd4179SDavid Teigland 694e7fd4179SDavid Teigland error = dlm_wait_function(ls, &recover_list_empty); 695e7fd4179SDavid Teigland out: 696e7fd4179SDavid Teigland if (error) 697e7fd4179SDavid Teigland recover_list_clear(ls); 698e7fd4179SDavid Teigland return error; 699e7fd4179SDavid Teigland } 700e7fd4179SDavid Teigland 701e7fd4179SDavid Teigland void dlm_recovered_lock(struct dlm_rsb *r) 702e7fd4179SDavid Teigland { 703a345da3eSDavid Teigland DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_dump_rsb(r);); 704e7fd4179SDavid Teigland 705e7fd4179SDavid Teigland r->res_recover_locks_count--; 706e7fd4179SDavid Teigland if (!r->res_recover_locks_count) { 707e7fd4179SDavid Teigland rsb_clear_flag(r, RSB_NEW_MASTER); 708e7fd4179SDavid Teigland recover_list_del(r); 709e7fd4179SDavid Teigland } 710e7fd4179SDavid Teigland 711e7fd4179SDavid Teigland if (recover_list_empty(r->res_ls)) 712e7fd4179SDavid Teigland wake_up(&r->res_ls->ls_wait_general); 713e7fd4179SDavid Teigland } 714e7fd4179SDavid Teigland 715e7fd4179SDavid Teigland /* 716e7fd4179SDavid Teigland * The lvb needs to be recovered on all master rsb's. This includes setting 717e7fd4179SDavid Teigland * the VALNOTVALID flag if necessary, and determining the correct lvb contents 718e7fd4179SDavid Teigland * based on the lvb's of the locks held on the rsb. 719e7fd4179SDavid Teigland * 720*da8c6663SDavid Teigland * RSB_VALNOTVALID is set in two cases: 721*da8c6663SDavid Teigland * 722*da8c6663SDavid Teigland * 1. we are master, but not new, and we purged an EX/PW lock held by a 723*da8c6663SDavid Teigland * failed node (in dlm_recover_purge which set RSB_RECOVER_LVB_INVAL) 724*da8c6663SDavid Teigland * 725*da8c6663SDavid Teigland * 2. we are a new master, and there are only NL/CR locks left. 726*da8c6663SDavid Teigland * (We could probably improve this by only invaliding in this way when 727*da8c6663SDavid Teigland * the previous master left uncleanly. VMS docs mention that.) 728e7fd4179SDavid Teigland * 729e7fd4179SDavid Teigland * The LVB contents are only considered for changing when this is a new master 730e7fd4179SDavid Teigland * of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with 731e7fd4179SDavid Teigland * mode > CR. If no lkb's exist with mode above CR, the lvb contents are taken 732e7fd4179SDavid Teigland * from the lkb with the largest lvb sequence number. 733e7fd4179SDavid Teigland */ 734e7fd4179SDavid Teigland 735e7fd4179SDavid Teigland static void recover_lvb(struct dlm_rsb *r) 736e7fd4179SDavid Teigland { 737e7fd4179SDavid Teigland struct dlm_lkb *lkb, *high_lkb = NULL; 738e7fd4179SDavid Teigland uint32_t high_seq = 0; 73990135925SDavid Teigland int lock_lvb_exists = 0; 74090135925SDavid Teigland int big_lock_exists = 0; 741e7fd4179SDavid Teigland int lvblen = r->res_ls->ls_lvblen; 742e7fd4179SDavid Teigland 743*da8c6663SDavid Teigland if (!rsb_flag(r, RSB_NEW_MASTER2) && 744*da8c6663SDavid Teigland rsb_flag(r, RSB_RECOVER_LVB_INVAL)) { 745*da8c6663SDavid Teigland /* case 1 above */ 746*da8c6663SDavid Teigland rsb_set_flag(r, RSB_VALNOTVALID); 747*da8c6663SDavid Teigland return; 748*da8c6663SDavid Teigland } 749*da8c6663SDavid Teigland 750*da8c6663SDavid Teigland if (!rsb_flag(r, RSB_NEW_MASTER2)) 751*da8c6663SDavid Teigland return; 752*da8c6663SDavid Teigland 753*da8c6663SDavid Teigland /* we are the new master, so figure out if VALNOTVALID should 754*da8c6663SDavid Teigland be set, and set the rsb lvb from the best lkb available. */ 755*da8c6663SDavid Teigland 756e7fd4179SDavid Teigland list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { 757e7fd4179SDavid Teigland if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) 758e7fd4179SDavid Teigland continue; 759e7fd4179SDavid Teigland 76090135925SDavid Teigland lock_lvb_exists = 1; 761e7fd4179SDavid Teigland 762e7fd4179SDavid Teigland if (lkb->lkb_grmode > DLM_LOCK_CR) { 76390135925SDavid Teigland big_lock_exists = 1; 764e7fd4179SDavid Teigland goto setflag; 765e7fd4179SDavid Teigland } 766e7fd4179SDavid Teigland 767e7fd4179SDavid Teigland if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) { 768e7fd4179SDavid Teigland high_lkb = lkb; 769e7fd4179SDavid Teigland high_seq = lkb->lkb_lvbseq; 770e7fd4179SDavid Teigland } 771e7fd4179SDavid Teigland } 772e7fd4179SDavid Teigland 773e7fd4179SDavid Teigland list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { 774e7fd4179SDavid Teigland if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) 775e7fd4179SDavid Teigland continue; 776e7fd4179SDavid Teigland 77790135925SDavid Teigland lock_lvb_exists = 1; 778e7fd4179SDavid Teigland 779e7fd4179SDavid Teigland if (lkb->lkb_grmode > DLM_LOCK_CR) { 78090135925SDavid Teigland big_lock_exists = 1; 781e7fd4179SDavid Teigland goto setflag; 782e7fd4179SDavid Teigland } 783e7fd4179SDavid Teigland 784e7fd4179SDavid Teigland if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) { 785e7fd4179SDavid Teigland high_lkb = lkb; 786e7fd4179SDavid Teigland high_seq = lkb->lkb_lvbseq; 787e7fd4179SDavid Teigland } 788e7fd4179SDavid Teigland } 789e7fd4179SDavid Teigland 790e7fd4179SDavid Teigland setflag: 791e7fd4179SDavid Teigland if (!lock_lvb_exists) 792e7fd4179SDavid Teigland goto out; 793e7fd4179SDavid Teigland 794*da8c6663SDavid Teigland /* lvb is invalidated if only NL/CR locks remain */ 795e7fd4179SDavid Teigland if (!big_lock_exists) 796e7fd4179SDavid Teigland rsb_set_flag(r, RSB_VALNOTVALID); 797e7fd4179SDavid Teigland 798e7fd4179SDavid Teigland if (!r->res_lvbptr) { 79952bda2b5SDavid Teigland r->res_lvbptr = dlm_allocate_lvb(r->res_ls); 800e7fd4179SDavid Teigland if (!r->res_lvbptr) 801e7fd4179SDavid Teigland goto out; 802e7fd4179SDavid Teigland } 803e7fd4179SDavid Teigland 804e7fd4179SDavid Teigland if (big_lock_exists) { 805e7fd4179SDavid Teigland r->res_lvbseq = lkb->lkb_lvbseq; 806e7fd4179SDavid Teigland memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen); 807e7fd4179SDavid Teigland } else if (high_lkb) { 808e7fd4179SDavid Teigland r->res_lvbseq = high_lkb->lkb_lvbseq; 809e7fd4179SDavid Teigland memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen); 810e7fd4179SDavid Teigland } else { 811e7fd4179SDavid Teigland r->res_lvbseq = 0; 812e7fd4179SDavid Teigland memset(r->res_lvbptr, 0, lvblen); 813e7fd4179SDavid Teigland } 814e7fd4179SDavid Teigland out: 815e7fd4179SDavid Teigland return; 816e7fd4179SDavid Teigland } 817e7fd4179SDavid Teigland 818e7fd4179SDavid Teigland /* All master rsb's flagged RECOVER_CONVERT need to be looked at. The locks 819e7fd4179SDavid Teigland converting PR->CW or CW->PR need to have their lkb_grmode set. */ 820e7fd4179SDavid Teigland 821e7fd4179SDavid Teigland static void recover_conversion(struct dlm_rsb *r) 822e7fd4179SDavid Teigland { 823c503a621SDavid Teigland struct dlm_ls *ls = r->res_ls; 824e7fd4179SDavid Teigland struct dlm_lkb *lkb; 825e7fd4179SDavid Teigland int grmode = -1; 826e7fd4179SDavid Teigland 827e7fd4179SDavid Teigland list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { 828e7fd4179SDavid Teigland if (lkb->lkb_grmode == DLM_LOCK_PR || 829e7fd4179SDavid Teigland lkb->lkb_grmode == DLM_LOCK_CW) { 830e7fd4179SDavid Teigland grmode = lkb->lkb_grmode; 831e7fd4179SDavid Teigland break; 832e7fd4179SDavid Teigland } 833e7fd4179SDavid Teigland } 834e7fd4179SDavid Teigland 835e7fd4179SDavid Teigland list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { 836e7fd4179SDavid Teigland if (lkb->lkb_grmode != DLM_LOCK_IV) 837e7fd4179SDavid Teigland continue; 838c503a621SDavid Teigland if (grmode == -1) { 839c503a621SDavid Teigland log_debug(ls, "recover_conversion %x set gr to rq %d", 840c503a621SDavid Teigland lkb->lkb_id, lkb->lkb_rqmode); 841e7fd4179SDavid Teigland lkb->lkb_grmode = lkb->lkb_rqmode; 842c503a621SDavid Teigland } else { 843c503a621SDavid Teigland log_debug(ls, "recover_conversion %x set gr %d", 844c503a621SDavid Teigland lkb->lkb_id, grmode); 845e7fd4179SDavid Teigland lkb->lkb_grmode = grmode; 846e7fd4179SDavid Teigland } 847e7fd4179SDavid Teigland } 848c503a621SDavid Teigland } 849e7fd4179SDavid Teigland 850f7da790dSDavid Teigland /* We've become the new master for this rsb and waiting/converting locks may 8514875647aSDavid Teigland need to be granted in dlm_recover_grant() due to locks that may have 852f7da790dSDavid Teigland existed from a removed node. */ 853f7da790dSDavid Teigland 8544875647aSDavid Teigland static void recover_grant(struct dlm_rsb *r) 855f7da790dSDavid Teigland { 856f7da790dSDavid Teigland if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue)) 8574875647aSDavid Teigland rsb_set_flag(r, RSB_RECOVER_GRANT); 858f7da790dSDavid Teigland } 859f7da790dSDavid Teigland 860e7fd4179SDavid Teigland void dlm_recover_rsbs(struct dlm_ls *ls) 861e7fd4179SDavid Teigland { 862e7fd4179SDavid Teigland struct dlm_rsb *r; 8634875647aSDavid Teigland unsigned int count = 0; 864e7fd4179SDavid Teigland 865e7fd4179SDavid Teigland down_read(&ls->ls_root_sem); 866e7fd4179SDavid Teigland list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 867e7fd4179SDavid Teigland lock_rsb(r); 868e7fd4179SDavid Teigland if (is_master(r)) { 869e7fd4179SDavid Teigland if (rsb_flag(r, RSB_RECOVER_CONVERT)) 870e7fd4179SDavid Teigland recover_conversion(r); 871*da8c6663SDavid Teigland 872*da8c6663SDavid Teigland /* recover lvb before granting locks so the updated 873*da8c6663SDavid Teigland lvb/VALNOTVALID is presented in the completion */ 874*da8c6663SDavid Teigland recover_lvb(r); 875*da8c6663SDavid Teigland 876f7da790dSDavid Teigland if (rsb_flag(r, RSB_NEW_MASTER2)) 8774875647aSDavid Teigland recover_grant(r); 878e7fd4179SDavid Teigland count++; 879*da8c6663SDavid Teigland } else { 880*da8c6663SDavid Teigland rsb_clear_flag(r, RSB_VALNOTVALID); 881e7fd4179SDavid Teigland } 882e7fd4179SDavid Teigland rsb_clear_flag(r, RSB_RECOVER_CONVERT); 883*da8c6663SDavid Teigland rsb_clear_flag(r, RSB_RECOVER_LVB_INVAL); 884f7da790dSDavid Teigland rsb_clear_flag(r, RSB_NEW_MASTER2); 885e7fd4179SDavid Teigland unlock_rsb(r); 886e7fd4179SDavid Teigland } 887e7fd4179SDavid Teigland up_read(&ls->ls_root_sem); 888e7fd4179SDavid Teigland 8894875647aSDavid Teigland if (count) 8904875647aSDavid Teigland log_debug(ls, "dlm_recover_rsbs %d done", count); 891e7fd4179SDavid Teigland } 892e7fd4179SDavid Teigland 893e7fd4179SDavid Teigland /* Create a single list of all root rsb's to be used during recovery */ 894e7fd4179SDavid Teigland 895e7fd4179SDavid Teigland int dlm_create_root_list(struct dlm_ls *ls) 896e7fd4179SDavid Teigland { 8979beb3bf5SBob Peterson struct rb_node *n; 898e7fd4179SDavid Teigland struct dlm_rsb *r; 899e7fd4179SDavid Teigland int i, error = 0; 900e7fd4179SDavid Teigland 901e7fd4179SDavid Teigland down_write(&ls->ls_root_sem); 902e7fd4179SDavid Teigland if (!list_empty(&ls->ls_root_list)) { 903e7fd4179SDavid Teigland log_error(ls, "root list not empty"); 904e7fd4179SDavid Teigland error = -EINVAL; 905e7fd4179SDavid Teigland goto out; 906e7fd4179SDavid Teigland } 907e7fd4179SDavid Teigland 908e7fd4179SDavid Teigland for (i = 0; i < ls->ls_rsbtbl_size; i++) { 909c7be761aSDavid Teigland spin_lock(&ls->ls_rsbtbl[i].lock); 9109beb3bf5SBob Peterson for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) { 9119beb3bf5SBob Peterson r = rb_entry(n, struct dlm_rsb, res_hashnode); 912e7fd4179SDavid Teigland list_add(&r->res_root_list, &ls->ls_root_list); 913e7fd4179SDavid Teigland dlm_hold_rsb(r); 914e7fd4179SDavid Teigland } 91585f0379aSDavid Teigland 916c04fecb4SDavid Teigland if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[i].toss)) 917c04fecb4SDavid Teigland log_error(ls, "dlm_create_root_list toss not empty"); 918c7be761aSDavid Teigland spin_unlock(&ls->ls_rsbtbl[i].lock); 919e7fd4179SDavid Teigland } 920e7fd4179SDavid Teigland out: 921e7fd4179SDavid Teigland up_write(&ls->ls_root_sem); 922e7fd4179SDavid Teigland return error; 923e7fd4179SDavid Teigland } 924e7fd4179SDavid Teigland 925e7fd4179SDavid Teigland void dlm_release_root_list(struct dlm_ls *ls) 926e7fd4179SDavid Teigland { 927e7fd4179SDavid Teigland struct dlm_rsb *r, *safe; 928e7fd4179SDavid Teigland 929e7fd4179SDavid Teigland down_write(&ls->ls_root_sem); 930e7fd4179SDavid Teigland list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) { 931e7fd4179SDavid Teigland list_del_init(&r->res_root_list); 932e7fd4179SDavid Teigland dlm_put_rsb(r); 933e7fd4179SDavid Teigland } 934e7fd4179SDavid Teigland up_write(&ls->ls_root_sem); 935e7fd4179SDavid Teigland } 936e7fd4179SDavid Teigland 937c04fecb4SDavid Teigland void dlm_clear_toss(struct dlm_ls *ls) 938e7fd4179SDavid Teigland { 9399beb3bf5SBob Peterson struct rb_node *n, *next; 940c04fecb4SDavid Teigland struct dlm_rsb *r; 941c04fecb4SDavid Teigland unsigned int count = 0; 942e7fd4179SDavid Teigland int i; 943e7fd4179SDavid Teigland 944e7fd4179SDavid Teigland for (i = 0; i < ls->ls_rsbtbl_size; i++) { 945c7be761aSDavid Teigland spin_lock(&ls->ls_rsbtbl[i].lock); 9469beb3bf5SBob Peterson for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) { 947c04fecb4SDavid Teigland next = rb_next(n); 948c04fecb4SDavid Teigland r = rb_entry(n, struct dlm_rsb, res_hashnode); 9499beb3bf5SBob Peterson rb_erase(n, &ls->ls_rsbtbl[i].toss); 950c04fecb4SDavid Teigland dlm_free_rsb(r); 951c04fecb4SDavid Teigland count++; 95285f0379aSDavid Teigland } 953c7be761aSDavid Teigland spin_unlock(&ls->ls_rsbtbl[i].lock); 954e7fd4179SDavid Teigland } 955c04fecb4SDavid Teigland 956c04fecb4SDavid Teigland if (count) 957c04fecb4SDavid Teigland log_debug(ls, "dlm_clear_toss %u done", count); 958e7fd4179SDavid Teigland } 959e7fd4179SDavid Teigland 960