1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 5 */ 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9 #include <linux/sched.h> 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <linux/buffer_head.h> 13 #include <linux/delay.h> 14 #include <linux/sort.h> 15 #include <linux/hash.h> 16 #include <linux/jhash.h> 17 #include <linux/kallsyms.h> 18 #include <linux/gfs2_ondisk.h> 19 #include <linux/list.h> 20 #include <linux/wait.h> 21 #include <linux/module.h> 22 #include <linux/uaccess.h> 23 #include <linux/seq_file.h> 24 #include <linux/debugfs.h> 25 #include <linux/kthread.h> 26 #include <linux/freezer.h> 27 #include <linux/workqueue.h> 28 #include <linux/jiffies.h> 29 #include <linux/rcupdate.h> 30 #include <linux/rculist_bl.h> 31 #include <linux/bit_spinlock.h> 32 #include <linux/percpu.h> 33 #include <linux/list_sort.h> 34 #include <linux/lockref.h> 35 #include <linux/rhashtable.h> 36 #include <linux/pid_namespace.h> 37 #include <linux/file.h> 38 #include <linux/random.h> 39 40 #include "gfs2.h" 41 #include "incore.h" 42 #include "glock.h" 43 #include "glops.h" 44 #include "inode.h" 45 #include "lops.h" 46 #include "meta_io.h" 47 #include "quota.h" 48 #include "super.h" 49 #include "util.h" 50 #include "bmap.h" 51 #define CREATE_TRACE_POINTS 52 #include "trace_gfs2.h" 53 54 struct gfs2_glock_iter { 55 struct gfs2_sbd *sdp; /* incore superblock */ 56 struct rhashtable_iter hti; /* rhashtable iterator */ 57 struct gfs2_glock *gl; /* current glock struct */ 58 loff_t last_pos; /* last position */ 59 }; 60 61 typedef void (*glock_examiner) (struct gfs2_glock * gl); 62 63 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 64 static void request_demote(struct gfs2_glock *gl, unsigned int state, 65 unsigned long delay, bool remote); 66 67 static struct dentry *gfs2_root; 68 static LIST_HEAD(lru_list); 69 static atomic_t lru_count = ATOMIC_INIT(0); 70 static DEFINE_SPINLOCK(lru_lock); 71 72 #define GFS2_GL_HASH_SHIFT 15 73 #define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT) 74 75 static const struct rhashtable_params ht_parms = { 76 .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, 77 .key_len = offsetofend(struct lm_lockname, ln_type), 78 .key_offset = offsetof(struct gfs2_glock, gl_name), 79 .head_offset = offsetof(struct gfs2_glock, gl_node), 80 }; 81 82 static struct rhashtable gl_hash_table; 83 84 #define GLOCK_WAIT_TABLE_BITS 12 85 #define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS) 86 static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned; 87 88 struct wait_glock_queue { 89 struct lm_lockname *name; 90 wait_queue_entry_t wait; 91 }; 92 93 static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode, 94 int sync, void *key) 95 { 96 struct wait_glock_queue *wait_glock = 97 container_of(wait, struct wait_glock_queue, wait); 98 struct lm_lockname *wait_name = wait_glock->name; 99 struct lm_lockname *wake_name = key; 100 101 if (wake_name->ln_sbd != wait_name->ln_sbd || 102 wake_name->ln_number != wait_name->ln_number || 103 wake_name->ln_type != wait_name->ln_type) 104 return 0; 105 return autoremove_wake_function(wait, mode, sync, key); 106 } 107 108 static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name) 109 { 110 u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0); 111 112 return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS); 113 } 114 115 /** 116 * wake_up_glock - Wake up waiters on a glock 117 * @gl: the glock 118 */ 119 static void wake_up_glock(struct gfs2_glock *gl) 120 { 121 wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name); 122 123 if (waitqueue_active(wq)) 124 __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name); 125 } 126 127 static void gfs2_glock_dealloc(struct rcu_head *rcu) 128 { 129 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); 130 131 kfree(gl->gl_lksb.sb_lvbptr); 132 if (gl->gl_ops->go_flags & GLOF_ASPACE) { 133 struct gfs2_glock_aspace *gla = 134 container_of(gl, struct gfs2_glock_aspace, glock); 135 kmem_cache_free(gfs2_glock_aspace_cachep, gla); 136 } else 137 kmem_cache_free(gfs2_glock_cachep, gl); 138 } 139 140 /** 141 * glock_blocked_by_withdraw - determine if we can still use a glock 142 * @gl: the glock 143 * 144 * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted 145 * when we're withdrawn. For example, to maintain metadata integrity, we should 146 * disallow the use of inode and rgrp glocks when withdrawn. Other glocks like 147 * the iopen or freeze glock may be safely used because none of their 148 * metadata goes through the journal. So in general, we should disallow all 149 * glocks that are journaled, and allow all the others. One exception is: 150 * we need to allow our active journal to be promoted and demoted so others 151 * may recover it and we can reacquire it when they're done. 152 */ 153 static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) 154 { 155 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 156 157 if (!gfs2_withdrawing_or_withdrawn(sdp)) 158 return false; 159 if (gl->gl_ops->go_flags & GLOF_NONDISK) 160 return false; 161 if (!sdp->sd_jdesc || 162 gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr) 163 return false; 164 return true; 165 } 166 167 static void __gfs2_glock_free(struct gfs2_glock *gl) 168 { 169 rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); 170 smp_mb(); 171 wake_up_glock(gl); 172 call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); 173 } 174 175 void gfs2_glock_free(struct gfs2_glock *gl) { 176 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 177 178 __gfs2_glock_free(gl); 179 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 180 wake_up(&sdp->sd_kill_wait); 181 } 182 183 void gfs2_glock_free_later(struct gfs2_glock *gl) { 184 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 185 186 spin_lock(&lru_lock); 187 list_add(&gl->gl_lru, &sdp->sd_dead_glocks); 188 spin_unlock(&lru_lock); 189 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 190 wake_up(&sdp->sd_kill_wait); 191 } 192 193 static void gfs2_free_dead_glocks(struct gfs2_sbd *sdp) 194 { 195 struct list_head *list = &sdp->sd_dead_glocks; 196 197 while(!list_empty(list)) { 198 struct gfs2_glock *gl; 199 200 gl = list_first_entry(list, struct gfs2_glock, gl_lru); 201 list_del_init(&gl->gl_lru); 202 __gfs2_glock_free(gl); 203 } 204 } 205 206 /** 207 * gfs2_glock_hold() - increment reference count on glock 208 * @gl: The glock to hold 209 * 210 */ 211 212 struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl) 213 { 214 GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); 215 lockref_get(&gl->gl_lockref); 216 return gl; 217 } 218 219 static void gfs2_glock_add_to_lru(struct gfs2_glock *gl) 220 { 221 spin_lock(&lru_lock); 222 list_move_tail(&gl->gl_lru, &lru_list); 223 224 if (!test_bit(GLF_LRU, &gl->gl_flags)) { 225 set_bit(GLF_LRU, &gl->gl_flags); 226 atomic_inc(&lru_count); 227 } 228 229 spin_unlock(&lru_lock); 230 } 231 232 static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) 233 { 234 spin_lock(&lru_lock); 235 if (test_bit(GLF_LRU, &gl->gl_flags)) { 236 list_del_init(&gl->gl_lru); 237 atomic_dec(&lru_count); 238 clear_bit(GLF_LRU, &gl->gl_flags); 239 } 240 spin_unlock(&lru_lock); 241 } 242 243 /* 244 * Enqueue the glock on the work queue. Passes one glock reference on to the 245 * work queue. 246 */ 247 static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { 248 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 249 250 if (!queue_delayed_work(sdp->sd_glock_wq, &gl->gl_work, delay)) { 251 /* 252 * We are holding the lockref spinlock, and the work was still 253 * queued above. The queued work (glock_work_func) takes that 254 * spinlock before dropping its glock reference(s), so it 255 * cannot have dropped them in the meantime. 256 */ 257 GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2); 258 gl->gl_lockref.count--; 259 } 260 } 261 262 static void __gfs2_glock_put(struct gfs2_glock *gl) 263 { 264 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 265 struct address_space *mapping = gfs2_glock2aspace(gl); 266 267 lockref_mark_dead(&gl->gl_lockref); 268 spin_unlock(&gl->gl_lockref.lock); 269 gfs2_glock_remove_from_lru(gl); 270 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 271 if (mapping) { 272 truncate_inode_pages_final(mapping); 273 if (!gfs2_withdrawing_or_withdrawn(sdp)) 274 GLOCK_BUG_ON(gl, !mapping_empty(mapping)); 275 } 276 trace_gfs2_glock_put(gl); 277 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); 278 } 279 280 static bool __gfs2_glock_put_or_lock(struct gfs2_glock *gl) 281 { 282 if (lockref_put_or_lock(&gl->gl_lockref)) 283 return true; 284 GLOCK_BUG_ON(gl, gl->gl_lockref.count != 1); 285 if (gl->gl_state != LM_ST_UNLOCKED) { 286 gl->gl_lockref.count--; 287 gfs2_glock_add_to_lru(gl); 288 spin_unlock(&gl->gl_lockref.lock); 289 return true; 290 } 291 return false; 292 } 293 294 /** 295 * gfs2_glock_put() - Decrement reference count on glock 296 * @gl: The glock to put 297 * 298 */ 299 300 void gfs2_glock_put(struct gfs2_glock *gl) 301 { 302 if (__gfs2_glock_put_or_lock(gl)) 303 return; 304 305 __gfs2_glock_put(gl); 306 } 307 308 /* 309 * gfs2_glock_put_async - Decrement reference count without sleeping 310 * @gl: The glock to put 311 * 312 * Decrement the reference count on glock immediately unless it is the last 313 * reference. Defer putting the last reference to work queue context. 314 */ 315 void gfs2_glock_put_async(struct gfs2_glock *gl) 316 { 317 if (__gfs2_glock_put_or_lock(gl)) 318 return; 319 320 gfs2_glock_queue_work(gl, 0); 321 spin_unlock(&gl->gl_lockref.lock); 322 } 323 324 /** 325 * may_grant - check if it's ok to grant a new lock 326 * @gl: The glock 327 * @current_gh: One of the current holders of @gl 328 * @gh: The lock request which we wish to grant 329 * 330 * With our current compatibility rules, if a glock has one or more active 331 * holders (HIF_HOLDER flag set), any of those holders can be passed in as 332 * @current_gh; they are all the same as far as compatibility with the new @gh 333 * goes. 334 * 335 * Returns true if it's ok to grant the lock. 336 */ 337 338 static inline bool may_grant(struct gfs2_glock *gl, 339 struct gfs2_holder *current_gh, 340 struct gfs2_holder *gh) 341 { 342 if (current_gh) { 343 GLOCK_BUG_ON(gl, !test_bit(HIF_HOLDER, ¤t_gh->gh_iflags)); 344 345 switch(current_gh->gh_state) { 346 case LM_ST_EXCLUSIVE: 347 /* 348 * Here we make a special exception to grant holders 349 * who agree to share the EX lock with other holders 350 * who also have the bit set. If the original holder 351 * has the LM_FLAG_NODE_SCOPE bit set, we grant more 352 * holders with the bit set. 353 */ 354 return gh->gh_state == LM_ST_EXCLUSIVE && 355 (current_gh->gh_flags & LM_FLAG_NODE_SCOPE) && 356 (gh->gh_flags & LM_FLAG_NODE_SCOPE); 357 358 case LM_ST_SHARED: 359 case LM_ST_DEFERRED: 360 return gh->gh_state == current_gh->gh_state; 361 362 default: 363 return false; 364 } 365 } 366 367 if (gl->gl_state == gh->gh_state) 368 return true; 369 if (gh->gh_flags & GL_EXACT) 370 return false; 371 if (gl->gl_state == LM_ST_EXCLUSIVE) { 372 return gh->gh_state == LM_ST_SHARED || 373 gh->gh_state == LM_ST_DEFERRED; 374 } 375 if (gh->gh_flags & LM_FLAG_ANY) 376 return gl->gl_state != LM_ST_UNLOCKED; 377 return false; 378 } 379 380 static void gfs2_holder_wake(struct gfs2_holder *gh) 381 { 382 clear_bit(HIF_WAIT, &gh->gh_iflags); 383 smp_mb__after_atomic(); 384 wake_up_bit(&gh->gh_iflags, HIF_WAIT); 385 if (gh->gh_flags & GL_ASYNC) { 386 struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd; 387 388 wake_up(&sdp->sd_async_glock_wait); 389 } 390 } 391 392 /** 393 * do_error - Something unexpected has happened during a lock request 394 * @gl: The glock 395 * @ret: The status from the DLM 396 */ 397 398 static void do_error(struct gfs2_glock *gl, const int ret) 399 { 400 struct gfs2_holder *gh, *tmp; 401 402 list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { 403 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 404 continue; 405 if (ret & LM_OUT_ERROR) 406 gh->gh_error = -EIO; 407 else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) 408 gh->gh_error = GLR_TRYFAILED; 409 else 410 continue; 411 list_del_init(&gh->gh_list); 412 trace_gfs2_glock_queue(gh, 0); 413 gfs2_holder_wake(gh); 414 } 415 } 416 417 /** 418 * find_first_holder - find the first "holder" gh 419 * @gl: the glock 420 */ 421 422 static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) 423 { 424 struct gfs2_holder *gh; 425 426 if (!list_empty(&gl->gl_holders)) { 427 gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, 428 gh_list); 429 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 430 return gh; 431 } 432 return NULL; 433 } 434 435 /* 436 * gfs2_instantiate - Call the glops instantiate function 437 * @gh: The glock holder 438 * 439 * Returns: 0 if instantiate was successful, or error. 440 */ 441 int gfs2_instantiate(struct gfs2_holder *gh) 442 { 443 struct gfs2_glock *gl = gh->gh_gl; 444 const struct gfs2_glock_operations *glops = gl->gl_ops; 445 int ret; 446 447 again: 448 if (!test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags)) 449 goto done; 450 451 /* 452 * Since we unlock the lockref lock, we set a flag to indicate 453 * instantiate is in progress. 454 */ 455 if (test_and_set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) { 456 wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG, 457 TASK_UNINTERRUPTIBLE); 458 /* 459 * Here we just waited for a different instantiate to finish. 460 * But that may not have been successful, as when a process 461 * locks an inode glock _before_ it has an actual inode to 462 * instantiate into. So we check again. This process might 463 * have an inode to instantiate, so might be successful. 464 */ 465 goto again; 466 } 467 468 ret = glops->go_instantiate(gl); 469 if (!ret) 470 clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); 471 clear_and_wake_up_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags); 472 if (ret) 473 return ret; 474 475 done: 476 if (glops->go_held) 477 return glops->go_held(gh); 478 return 0; 479 } 480 481 /** 482 * do_promote - promote as many requests as possible on the current queue 483 * @gl: The glock 484 * 485 * Returns true on success (i.e., progress was made or there are no waiters). 486 */ 487 488 static bool do_promote(struct gfs2_glock *gl) 489 { 490 struct gfs2_holder *gh, *current_gh; 491 492 current_gh = find_first_holder(gl); 493 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 494 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 495 continue; 496 if (!may_grant(gl, current_gh, gh)) { 497 /* 498 * If we get here, it means we may not grant this 499 * holder for some reason. If this holder is at the 500 * head of the list, it means we have a blocked holder 501 * at the head, so return false. 502 */ 503 if (list_is_first(&gh->gh_list, &gl->gl_holders)) 504 return false; 505 do_error(gl, 0); 506 break; 507 } 508 set_bit(HIF_HOLDER, &gh->gh_iflags); 509 trace_gfs2_promote(gh); 510 gfs2_holder_wake(gh); 511 if (!current_gh) 512 current_gh = gh; 513 } 514 return true; 515 } 516 517 /** 518 * find_first_waiter - find the first gh that's waiting for the glock 519 * @gl: the glock 520 */ 521 522 static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) 523 { 524 struct gfs2_holder *gh; 525 526 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 527 if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) 528 return gh; 529 } 530 return NULL; 531 } 532 533 /** 534 * find_last_waiter - find the last gh that's waiting for the glock 535 * @gl: the glock 536 * 537 * This also is a fast way of finding out if there are any waiters. 538 */ 539 540 static inline struct gfs2_holder *find_last_waiter(const struct gfs2_glock *gl) 541 { 542 struct gfs2_holder *gh; 543 544 if (list_empty(&gl->gl_holders)) 545 return NULL; 546 gh = list_last_entry(&gl->gl_holders, struct gfs2_holder, gh_list); 547 return test_bit(HIF_HOLDER, &gh->gh_iflags) ? NULL : gh; 548 } 549 550 /** 551 * state_change - record that the glock is now in a different state 552 * @gl: the glock 553 * @new_state: the new state 554 */ 555 556 static void state_change(struct gfs2_glock *gl, unsigned int new_state) 557 { 558 if (new_state != gl->gl_target) 559 /* shorten our minimum hold time */ 560 gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, 561 GL_GLOCK_MIN_HOLD); 562 gl->gl_state = new_state; 563 gl->gl_tchange = jiffies; 564 } 565 566 static void gfs2_set_demote(int nr, struct gfs2_glock *gl) 567 { 568 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 569 570 set_bit(nr, &gl->gl_flags); 571 smp_mb(); 572 wake_up(&sdp->sd_async_glock_wait); 573 } 574 575 static void gfs2_demote_wake(struct gfs2_glock *gl) 576 { 577 gl->gl_demote_state = LM_ST_EXCLUSIVE; 578 clear_bit(GLF_DEMOTE, &gl->gl_flags); 579 smp_mb__after_atomic(); 580 wake_up_bit(&gl->gl_flags, GLF_DEMOTE); 581 } 582 583 /** 584 * finish_xmote - The DLM has replied to one of our lock requests 585 * @gl: The glock 586 * @ret: The status from the DLM 587 * 588 */ 589 590 static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) 591 { 592 const struct gfs2_glock_operations *glops = gl->gl_ops; 593 struct gfs2_holder *gh; 594 unsigned state = ret & LM_OUT_ST_MASK; 595 596 trace_gfs2_glock_state_change(gl, state); 597 state_change(gl, state); 598 gh = find_first_waiter(gl); 599 600 /* Demote to UN request arrived during demote to SH or DF */ 601 if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && 602 state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) 603 gl->gl_target = LM_ST_UNLOCKED; 604 605 /* Check for state != intended state */ 606 if (unlikely(state != gl->gl_target)) { 607 if (gh && (ret & LM_OUT_CANCELED)) 608 gfs2_holder_wake(gh); 609 if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { 610 if (ret & LM_OUT_CANCELED) { 611 list_del_init(&gh->gh_list); 612 trace_gfs2_glock_queue(gh, 0); 613 gl->gl_target = gl->gl_state; 614 gh = find_first_waiter(gl); 615 if (gh) { 616 gl->gl_target = gh->gh_state; 617 if (do_promote(gl)) 618 goto out; 619 do_xmote(gl, gh, gl->gl_target); 620 return; 621 } 622 goto out; 623 } 624 /* Some error or failed "try lock" - report it */ 625 if ((ret & LM_OUT_ERROR) || 626 (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { 627 gl->gl_target = gl->gl_state; 628 do_error(gl, ret); 629 goto out; 630 } 631 } 632 switch(state) { 633 /* Unlocked due to conversion deadlock, try again */ 634 case LM_ST_UNLOCKED: 635 do_xmote(gl, gh, gl->gl_target); 636 break; 637 /* Conversion fails, unlock and try again */ 638 case LM_ST_SHARED: 639 case LM_ST_DEFERRED: 640 do_xmote(gl, gh, LM_ST_UNLOCKED); 641 break; 642 default: /* Everything else */ 643 fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n", 644 gl->gl_target, state); 645 GLOCK_BUG_ON(gl, 1); 646 } 647 return; 648 } 649 650 /* Fast path - we got what we asked for */ 651 if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) 652 gfs2_demote_wake(gl); 653 if (state != LM_ST_UNLOCKED) { 654 if (glops->go_xmote_bh) { 655 int rv; 656 657 spin_unlock(&gl->gl_lockref.lock); 658 rv = glops->go_xmote_bh(gl); 659 spin_lock(&gl->gl_lockref.lock); 660 if (rv) { 661 do_error(gl, rv); 662 goto out; 663 } 664 } 665 do_promote(gl); 666 } 667 out: 668 if (!test_bit(GLF_CANCELING, &gl->gl_flags)) 669 clear_bit(GLF_LOCK, &gl->gl_flags); 670 } 671 672 static bool is_system_glock(struct gfs2_glock *gl) 673 { 674 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 675 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 676 677 if (gl == m_ip->i_gl) 678 return true; 679 return false; 680 } 681 682 /** 683 * do_xmote - Calls the DLM to change the state of a lock 684 * @gl: The lock state 685 * @gh: The holder (only for promotes) 686 * @target: The target lock state 687 * 688 */ 689 690 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, 691 unsigned int target) 692 __releases(&gl->gl_lockref.lock) 693 __acquires(&gl->gl_lockref.lock) 694 { 695 const struct gfs2_glock_operations *glops = gl->gl_ops; 696 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 697 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 698 unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); 699 int ret; 700 701 if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && 702 gh && !(gh->gh_flags & LM_FLAG_NOEXP)) 703 goto skip_inval; 704 705 lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP); 706 GLOCK_BUG_ON(gl, gl->gl_state == target); 707 GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); 708 if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && 709 glops->go_inval) { 710 /* 711 * If another process is already doing the invalidate, let that 712 * finish first. The glock state machine will get back to this 713 * holder again later. 714 */ 715 if (test_and_set_bit(GLF_INVALIDATE_IN_PROGRESS, 716 &gl->gl_flags)) 717 return; 718 do_error(gl, 0); /* Fail queued try locks */ 719 } 720 gl->gl_req = target; 721 set_bit(GLF_BLOCKING, &gl->gl_flags); 722 if ((gl->gl_req == LM_ST_UNLOCKED) || 723 (gl->gl_state == LM_ST_EXCLUSIVE) || 724 (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) 725 clear_bit(GLF_BLOCKING, &gl->gl_flags); 726 if (!glops->go_inval && !glops->go_sync) 727 goto skip_inval; 728 729 spin_unlock(&gl->gl_lockref.lock); 730 if (glops->go_sync) { 731 ret = glops->go_sync(gl); 732 /* If we had a problem syncing (due to io errors or whatever, 733 * we should not invalidate the metadata or tell dlm to 734 * release the glock to other nodes. 735 */ 736 if (ret) { 737 if (cmpxchg(&sdp->sd_log_error, 0, ret)) { 738 fs_err(sdp, "Error %d syncing glock \n", ret); 739 gfs2_dump_glock(NULL, gl, true); 740 } 741 spin_lock(&gl->gl_lockref.lock); 742 goto skip_inval; 743 } 744 } 745 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { 746 /* 747 * The call to go_sync should have cleared out the ail list. 748 * If there are still items, we have a problem. We ought to 749 * withdraw, but we can't because the withdraw code also uses 750 * glocks. Warn about the error, dump the glock, then fall 751 * through and wait for logd to do the withdraw for us. 752 */ 753 if ((atomic_read(&gl->gl_ail_count) != 0) && 754 (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { 755 gfs2_glock_assert_warn(gl, 756 !atomic_read(&gl->gl_ail_count)); 757 gfs2_dump_glock(NULL, gl, true); 758 } 759 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); 760 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 761 } 762 spin_lock(&gl->gl_lockref.lock); 763 764 skip_inval: 765 gl->gl_lockref.count++; 766 /* 767 * Check for an error encountered since we called go_sync and go_inval. 768 * If so, we can't withdraw from the glock code because the withdraw 769 * code itself uses glocks (see function signal_our_withdraw) to 770 * change the mount to read-only. Most importantly, we must not call 771 * dlm to unlock the glock until the journal is in a known good state 772 * (after journal replay) otherwise other nodes may use the object 773 * (rgrp or dinode) and then later, journal replay will corrupt the 774 * file system. The best we can do here is wait for the logd daemon 775 * to see sd_log_error and withdraw, and in the meantime, requeue the 776 * work for later. 777 * 778 * We make a special exception for some system glocks, such as the 779 * system statfs inode glock, which needs to be granted before the 780 * gfs2_quotad daemon can exit, and that exit needs to finish before 781 * we can unmount the withdrawn file system. 782 * 783 * However, if we're just unlocking the lock (say, for unmount, when 784 * gfs2_gl_hash_clear calls clear_glock) and recovery is complete 785 * then it's okay to tell dlm to unlock it. 786 */ 787 if (unlikely(sdp->sd_log_error) && !gfs2_withdrawing_or_withdrawn(sdp)) 788 gfs2_withdraw_delayed(sdp); 789 if (glock_blocked_by_withdraw(gl) && 790 (target != LM_ST_UNLOCKED || 791 test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { 792 if (!is_system_glock(gl)) { 793 request_demote(gl, LM_ST_UNLOCKED, 0, false); 794 /* 795 * Ordinarily, we would call dlm and its callback would call 796 * finish_xmote, which would call state_change() to the new state. 797 * Since we withdrew, we won't call dlm, so call state_change 798 * manually, but to the UNLOCKED state we desire. 799 */ 800 state_change(gl, LM_ST_UNLOCKED); 801 /* 802 * We skip telling dlm to do the locking, so we won't get a 803 * reply that would otherwise clear GLF_LOCK. So we clear it here. 804 */ 805 clear_bit(GLF_LOCK, &gl->gl_flags); 806 clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); 807 gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); 808 return; 809 } else { 810 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 811 } 812 } 813 814 if (ls->ls_ops->lm_lock) { 815 set_bit(GLF_PENDING_REPLY, &gl->gl_flags); 816 spin_unlock(&gl->gl_lockref.lock); 817 ret = ls->ls_ops->lm_lock(gl, target, lck_flags); 818 spin_lock(&gl->gl_lockref.lock); 819 820 if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && 821 target == LM_ST_UNLOCKED && 822 test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { 823 /* 824 * The lockspace has been released and the lock has 825 * been unlocked implicitly. 826 */ 827 } else if (ret) { 828 fs_err(sdp, "lm_lock ret %d\n", ret); 829 target = gl->gl_state | LM_OUT_ERROR; 830 } else { 831 /* The operation will be completed asynchronously. */ 832 return; 833 } 834 clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); 835 } 836 837 /* Complete the operation now. */ 838 finish_xmote(gl, target); 839 gfs2_glock_queue_work(gl, 0); 840 } 841 842 /** 843 * run_queue - do all outstanding tasks related to a glock 844 * @gl: The glock in question 845 * @nonblock: True if we must not block in run_queue 846 * 847 */ 848 849 static void run_queue(struct gfs2_glock *gl, const int nonblock) 850 __releases(&gl->gl_lockref.lock) 851 __acquires(&gl->gl_lockref.lock) 852 { 853 struct gfs2_holder *gh; 854 855 if (test_bit(GLF_LOCK, &gl->gl_flags)) 856 return; 857 set_bit(GLF_LOCK, &gl->gl_flags); 858 859 /* While a demote is in progress, the GLF_LOCK flag must be set. */ 860 GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); 861 862 if (test_bit(GLF_DEMOTE, &gl->gl_flags) && 863 gl->gl_demote_state != gl->gl_state) { 864 if (find_first_holder(gl)) 865 goto out_unlock; 866 if (nonblock) 867 goto out_sched; 868 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); 869 GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); 870 gl->gl_target = gl->gl_demote_state; 871 do_xmote(gl, NULL, gl->gl_target); 872 return; 873 } else { 874 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) 875 gfs2_demote_wake(gl); 876 if (do_promote(gl)) 877 goto out_unlock; 878 gh = find_first_waiter(gl); 879 if (!gh) 880 goto out_unlock; 881 gl->gl_target = gh->gh_state; 882 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 883 do_error(gl, 0); /* Fail queued try locks */ 884 do_xmote(gl, gh, gl->gl_target); 885 return; 886 } 887 888 out_sched: 889 clear_bit(GLF_LOCK, &gl->gl_flags); 890 smp_mb__after_atomic(); 891 gl->gl_lockref.count++; 892 gfs2_glock_queue_work(gl, 0); 893 return; 894 895 out_unlock: 896 clear_bit(GLF_LOCK, &gl->gl_flags); 897 smp_mb__after_atomic(); 898 } 899 900 /** 901 * glock_set_object - set the gl_object field of a glock 902 * @gl: the glock 903 * @object: the object 904 */ 905 void glock_set_object(struct gfs2_glock *gl, void *object) 906 { 907 void *prev_object; 908 909 spin_lock(&gl->gl_lockref.lock); 910 prev_object = gl->gl_object; 911 gl->gl_object = object; 912 spin_unlock(&gl->gl_lockref.lock); 913 if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) 914 gfs2_dump_glock(NULL, gl, true); 915 } 916 917 /** 918 * glock_clear_object - clear the gl_object field of a glock 919 * @gl: the glock 920 * @object: object the glock currently points at 921 */ 922 void glock_clear_object(struct gfs2_glock *gl, void *object) 923 { 924 void *prev_object; 925 926 spin_lock(&gl->gl_lockref.lock); 927 prev_object = gl->gl_object; 928 gl->gl_object = NULL; 929 spin_unlock(&gl->gl_lockref.lock); 930 if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == object)) 931 gfs2_dump_glock(NULL, gl, true); 932 } 933 934 void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation) 935 { 936 struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; 937 938 if (ri->ri_magic == 0) 939 ri->ri_magic = cpu_to_be32(GFS2_MAGIC); 940 if (ri->ri_magic == cpu_to_be32(GFS2_MAGIC)) 941 ri->ri_generation_deleted = cpu_to_be64(generation); 942 } 943 944 bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation) 945 { 946 struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; 947 948 if (ri->ri_magic != cpu_to_be32(GFS2_MAGIC)) 949 return false; 950 return generation <= be64_to_cpu(ri->ri_generation_deleted); 951 } 952 953 static void gfs2_glock_poke(struct gfs2_glock *gl) 954 { 955 int flags = LM_FLAG_TRY_1CB | LM_FLAG_ANY | GL_SKIP; 956 struct gfs2_holder gh; 957 int error; 958 959 __gfs2_holder_init(gl, LM_ST_SHARED, flags, &gh, _RET_IP_); 960 error = gfs2_glock_nq(&gh); 961 if (!error) 962 gfs2_glock_dq(&gh); 963 gfs2_holder_uninit(&gh); 964 } 965 966 static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl) 967 { 968 struct gfs2_inode *ip; 969 970 spin_lock(&gl->gl_lockref.lock); 971 ip = gl->gl_object; 972 if (ip && !igrab(&ip->i_inode)) 973 ip = NULL; 974 spin_unlock(&gl->gl_lockref.lock); 975 if (ip) { 976 wait_on_inode(&ip->i_inode); 977 if (is_bad_inode(&ip->i_inode)) { 978 iput(&ip->i_inode); 979 ip = NULL; 980 } 981 } 982 return ip; 983 } 984 985 static void gfs2_try_evict(struct gfs2_glock *gl) 986 { 987 struct gfs2_inode *ip; 988 989 /* 990 * If there is contention on the iopen glock and we have an inode, try 991 * to grab and release the inode so that it can be evicted. The 992 * GIF_DEFER_DELETE flag indicates to gfs2_evict_inode() that the inode 993 * should not be deleted locally. This will allow the remote node to 994 * go ahead and delete the inode without us having to do it, which will 995 * avoid rgrp glock thrashing. 996 * 997 * The remote node is likely still holding the corresponding inode 998 * glock, so it will run before we get to verify that the delete has 999 * happened below. (Verification is triggered by the call to 1000 * gfs2_queue_verify_delete() in gfs2_evict_inode().) 1001 */ 1002 ip = gfs2_grab_existing_inode(gl); 1003 if (ip) { 1004 set_bit(GLF_DEFER_DELETE, &gl->gl_flags); 1005 d_prune_aliases(&ip->i_inode); 1006 iput(&ip->i_inode); 1007 clear_bit(GLF_DEFER_DELETE, &gl->gl_flags); 1008 1009 /* If the inode was evicted, gl->gl_object will now be NULL. */ 1010 ip = gfs2_grab_existing_inode(gl); 1011 if (ip) { 1012 gfs2_glock_poke(ip->i_gl); 1013 iput(&ip->i_inode); 1014 } 1015 } 1016 } 1017 1018 bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) 1019 { 1020 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1021 1022 if (test_and_set_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) 1023 return false; 1024 return !mod_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, 0); 1025 } 1026 1027 bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later) 1028 { 1029 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1030 unsigned long delay; 1031 1032 if (test_and_set_bit(GLF_VERIFY_DELETE, &gl->gl_flags)) 1033 return false; 1034 delay = later ? HZ + get_random_long() % (HZ * 9) : 0; 1035 return queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, delay); 1036 } 1037 1038 static void delete_work_func(struct work_struct *work) 1039 { 1040 struct delayed_work *dwork = to_delayed_work(work); 1041 struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete); 1042 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1043 bool verify_delete = test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); 1044 1045 if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) 1046 gfs2_try_evict(gl); 1047 1048 if (verify_delete) { 1049 u64 no_addr = gl->gl_name.ln_number; 1050 struct inode *inode; 1051 1052 inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, 1053 GFS2_BLKST_UNLINKED); 1054 if (IS_ERR(inode)) { 1055 if (PTR_ERR(inode) == -EAGAIN && 1056 !test_bit(SDF_KILL, &sdp->sd_flags) && 1057 gfs2_queue_verify_delete(gl, true)) 1058 return; 1059 } else { 1060 d_prune_aliases(inode); 1061 iput(inode); 1062 } 1063 } 1064 1065 gfs2_glock_put(gl); 1066 } 1067 1068 static void glock_work_func(struct work_struct *work) 1069 { 1070 unsigned long delay = 0; 1071 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 1072 unsigned int drop_refs = 1; 1073 1074 spin_lock(&gl->gl_lockref.lock); 1075 if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags)) { 1076 clear_bit(GLF_HAVE_REPLY, &gl->gl_flags); 1077 finish_xmote(gl, gl->gl_reply); 1078 drop_refs++; 1079 } 1080 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1081 gl->gl_state != LM_ST_UNLOCKED && 1082 gl->gl_demote_state != LM_ST_EXCLUSIVE) { 1083 if (gl->gl_name.ln_type == LM_TYPE_INODE) { 1084 unsigned long holdtime, now = jiffies; 1085 1086 holdtime = gl->gl_tchange + gl->gl_hold_time; 1087 if (time_before(now, holdtime)) 1088 delay = holdtime - now; 1089 } 1090 1091 if (!delay) { 1092 clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); 1093 gfs2_set_demote(GLF_DEMOTE, gl); 1094 } 1095 } 1096 run_queue(gl, 0); 1097 if (delay) { 1098 /* Keep one glock reference for the work we requeue. */ 1099 drop_refs--; 1100 gfs2_glock_queue_work(gl, delay); 1101 } 1102 1103 /* Drop the remaining glock references manually. */ 1104 GLOCK_BUG_ON(gl, gl->gl_lockref.count < drop_refs); 1105 gl->gl_lockref.count -= drop_refs; 1106 if (!gl->gl_lockref.count) { 1107 if (gl->gl_state == LM_ST_UNLOCKED) { 1108 __gfs2_glock_put(gl); 1109 return; 1110 } 1111 gfs2_glock_add_to_lru(gl); 1112 } 1113 spin_unlock(&gl->gl_lockref.lock); 1114 } 1115 1116 static struct gfs2_glock *find_insert_glock(struct lm_lockname *name, 1117 struct gfs2_glock *new) 1118 { 1119 struct wait_glock_queue wait; 1120 wait_queue_head_t *wq = glock_waitqueue(name); 1121 struct gfs2_glock *gl; 1122 1123 wait.name = name; 1124 init_wait(&wait.wait); 1125 wait.wait.func = glock_wake_function; 1126 1127 again: 1128 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1129 rcu_read_lock(); 1130 if (new) { 1131 gl = rhashtable_lookup_get_insert_fast(&gl_hash_table, 1132 &new->gl_node, ht_parms); 1133 if (IS_ERR(gl)) 1134 goto out; 1135 } else { 1136 gl = rhashtable_lookup_fast(&gl_hash_table, 1137 name, ht_parms); 1138 } 1139 if (gl && !lockref_get_not_dead(&gl->gl_lockref)) { 1140 rcu_read_unlock(); 1141 schedule(); 1142 goto again; 1143 } 1144 out: 1145 rcu_read_unlock(); 1146 finish_wait(wq, &wait.wait); 1147 if (gl) 1148 gfs2_glock_remove_from_lru(gl); 1149 return gl; 1150 } 1151 1152 /** 1153 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 1154 * @sdp: The GFS2 superblock 1155 * @number: the lock number 1156 * @glops: The glock_operations to use 1157 * @create: If 0, don't create the glock if it doesn't exist 1158 * @glp: the glock is returned here 1159 * 1160 * This does not lock a glock, just finds/creates structures for one. 1161 * 1162 * Returns: errno 1163 */ 1164 1165 int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, 1166 const struct gfs2_glock_operations *glops, int create, 1167 struct gfs2_glock **glp) 1168 { 1169 struct lm_lockname name = { .ln_number = number, 1170 .ln_type = glops->go_type, 1171 .ln_sbd = sdp }; 1172 struct gfs2_glock *gl, *tmp; 1173 struct address_space *mapping; 1174 1175 gl = find_insert_glock(&name, NULL); 1176 if (gl) 1177 goto found; 1178 if (!create) 1179 return -ENOENT; 1180 1181 if (glops->go_flags & GLOF_ASPACE) { 1182 struct gfs2_glock_aspace *gla = 1183 kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_NOFS); 1184 if (!gla) 1185 return -ENOMEM; 1186 gl = &gla->glock; 1187 } else { 1188 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_NOFS); 1189 if (!gl) 1190 return -ENOMEM; 1191 } 1192 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 1193 gl->gl_ops = glops; 1194 1195 if (glops->go_flags & GLOF_LVB) { 1196 gl->gl_lksb.sb_lvbptr = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); 1197 if (!gl->gl_lksb.sb_lvbptr) { 1198 gfs2_glock_dealloc(&gl->gl_rcu); 1199 return -ENOMEM; 1200 } 1201 } 1202 1203 atomic_inc(&sdp->sd_glock_disposal); 1204 gl->gl_node.next = NULL; 1205 gl->gl_flags = BIT(GLF_INITIAL); 1206 if (glops->go_instantiate) 1207 gl->gl_flags |= BIT(GLF_INSTANTIATE_NEEDED); 1208 gl->gl_name = name; 1209 lockref_init(&gl->gl_lockref); 1210 lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); 1211 gl->gl_state = LM_ST_UNLOCKED; 1212 gl->gl_target = LM_ST_UNLOCKED; 1213 gl->gl_demote_state = LM_ST_EXCLUSIVE; 1214 gl->gl_dstamp = 0; 1215 preempt_disable(); 1216 /* We use the global stats to estimate the initial per-glock stats */ 1217 gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type]; 1218 preempt_enable(); 1219 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; 1220 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; 1221 gl->gl_tchange = jiffies; 1222 gl->gl_object = NULL; 1223 gl->gl_hold_time = GL_GLOCK_DFT_HOLD; 1224 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 1225 if (gl->gl_name.ln_type == LM_TYPE_IOPEN) 1226 INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func); 1227 1228 mapping = gfs2_glock2aspace(gl); 1229 if (mapping) { 1230 mapping->a_ops = &gfs2_meta_aops; 1231 mapping->host = sdp->sd_inode; 1232 mapping->flags = 0; 1233 mapping_set_gfp_mask(mapping, GFP_NOFS); 1234 mapping->i_private_data = NULL; 1235 mapping->writeback_index = 0; 1236 } 1237 1238 tmp = find_insert_glock(&name, gl); 1239 if (tmp) { 1240 gfs2_glock_dealloc(&gl->gl_rcu); 1241 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 1242 wake_up(&sdp->sd_kill_wait); 1243 1244 if (IS_ERR(tmp)) 1245 return PTR_ERR(tmp); 1246 gl = tmp; 1247 } 1248 1249 found: 1250 *glp = gl; 1251 return 0; 1252 } 1253 1254 /** 1255 * __gfs2_holder_init - initialize a struct gfs2_holder in the default way 1256 * @gl: the glock 1257 * @state: the state we're requesting 1258 * @flags: the modifier flags 1259 * @gh: the holder structure 1260 * 1261 */ 1262 1263 void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, 1264 struct gfs2_holder *gh, unsigned long ip) 1265 { 1266 INIT_LIST_HEAD(&gh->gh_list); 1267 gh->gh_gl = gfs2_glock_hold(gl); 1268 gh->gh_ip = ip; 1269 gh->gh_owner_pid = get_pid(task_pid(current)); 1270 gh->gh_state = state; 1271 gh->gh_flags = flags; 1272 gh->gh_iflags = 0; 1273 } 1274 1275 /** 1276 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it 1277 * @state: the state we're requesting 1278 * @flags: the modifier flags 1279 * @gh: the holder structure 1280 * 1281 * Don't mess with the glock. 1282 * 1283 */ 1284 1285 void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh) 1286 { 1287 gh->gh_state = state; 1288 gh->gh_flags = flags; 1289 gh->gh_iflags = 0; 1290 gh->gh_ip = _RET_IP_; 1291 put_pid(gh->gh_owner_pid); 1292 gh->gh_owner_pid = get_pid(task_pid(current)); 1293 } 1294 1295 /** 1296 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference) 1297 * @gh: the holder structure 1298 * 1299 */ 1300 1301 void gfs2_holder_uninit(struct gfs2_holder *gh) 1302 { 1303 put_pid(gh->gh_owner_pid); 1304 gfs2_glock_put(gh->gh_gl); 1305 gfs2_holder_mark_uninitialized(gh); 1306 gh->gh_ip = 0; 1307 } 1308 1309 static void gfs2_glock_update_hold_time(struct gfs2_glock *gl, 1310 unsigned long start_time) 1311 { 1312 /* Have we waited longer that a second? */ 1313 if (time_after(jiffies, start_time + HZ)) { 1314 /* Lengthen the minimum hold time. */ 1315 gl->gl_hold_time = min(gl->gl_hold_time + GL_GLOCK_HOLD_INCR, 1316 GL_GLOCK_MAX_HOLD); 1317 } 1318 } 1319 1320 /** 1321 * gfs2_glock_holder_ready - holder is ready and its error code can be collected 1322 * @gh: the glock holder 1323 * 1324 * Called when a glock holder no longer needs to be waited for because it is 1325 * now either held (HIF_HOLDER set; gh_error == 0), or acquiring the lock has 1326 * failed (gh_error != 0). 1327 */ 1328 1329 int gfs2_glock_holder_ready(struct gfs2_holder *gh) 1330 { 1331 if (gh->gh_error || (gh->gh_flags & GL_SKIP)) 1332 return gh->gh_error; 1333 gh->gh_error = gfs2_instantiate(gh); 1334 if (gh->gh_error) 1335 gfs2_glock_dq(gh); 1336 return gh->gh_error; 1337 } 1338 1339 /** 1340 * gfs2_glock_wait - wait on a glock acquisition 1341 * @gh: the glock holder 1342 * 1343 * Returns: 0 on success 1344 */ 1345 1346 int gfs2_glock_wait(struct gfs2_holder *gh) 1347 { 1348 unsigned long start_time = jiffies; 1349 1350 might_sleep(); 1351 wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); 1352 gfs2_glock_update_hold_time(gh->gh_gl, start_time); 1353 return gfs2_glock_holder_ready(gh); 1354 } 1355 1356 static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs) 1357 { 1358 int i; 1359 1360 for (i = 0; i < num_gh; i++) 1361 if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) 1362 return 1; 1363 return 0; 1364 } 1365 1366 /** 1367 * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions 1368 * @num_gh: the number of holders in the array 1369 * @ghs: the glock holder array 1370 * 1371 * Returns: 0 on success, meaning all glocks have been granted and are held. 1372 * -ESTALE if the request timed out, meaning all glocks were released, 1373 * and the caller should retry the operation. 1374 */ 1375 1376 int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs) 1377 { 1378 struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd; 1379 int i, ret = 0, timeout = 0; 1380 unsigned long start_time = jiffies; 1381 1382 might_sleep(); 1383 /* 1384 * Total up the (minimum hold time * 2) of all glocks and use that to 1385 * determine the max amount of time we should wait. 1386 */ 1387 for (i = 0; i < num_gh; i++) 1388 timeout += ghs[i].gh_gl->gl_hold_time << 1; 1389 1390 if (!wait_event_timeout(sdp->sd_async_glock_wait, 1391 !glocks_pending(num_gh, ghs), timeout)) { 1392 ret = -ESTALE; /* request timed out. */ 1393 goto out; 1394 } 1395 1396 for (i = 0; i < num_gh; i++) { 1397 struct gfs2_holder *gh = &ghs[i]; 1398 int ret2; 1399 1400 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) { 1401 gfs2_glock_update_hold_time(gh->gh_gl, 1402 start_time); 1403 } 1404 ret2 = gfs2_glock_holder_ready(gh); 1405 if (!ret) 1406 ret = ret2; 1407 } 1408 1409 out: 1410 if (ret) { 1411 for (i = 0; i < num_gh; i++) { 1412 struct gfs2_holder *gh = &ghs[i]; 1413 1414 gfs2_glock_dq(gh); 1415 } 1416 } 1417 return ret; 1418 } 1419 1420 /** 1421 * request_demote - process a demote request 1422 * @gl: the glock 1423 * @state: the state the caller wants us to change to 1424 * @delay: zero to demote immediately; otherwise pending demote 1425 * @remote: true if this came from a different cluster node 1426 * 1427 * There are only two requests that we are going to see in actual 1428 * practise: LM_ST_SHARED and LM_ST_UNLOCKED 1429 */ 1430 1431 static void request_demote(struct gfs2_glock *gl, unsigned int state, 1432 unsigned long delay, bool remote) 1433 { 1434 gfs2_set_demote(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, gl); 1435 if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { 1436 gl->gl_demote_state = state; 1437 gl->gl_demote_time = jiffies; 1438 } else if (gl->gl_demote_state != LM_ST_UNLOCKED && 1439 gl->gl_demote_state != state) { 1440 gl->gl_demote_state = LM_ST_UNLOCKED; 1441 } 1442 if (gl->gl_ops->go_callback) 1443 gl->gl_ops->go_callback(gl, remote); 1444 trace_gfs2_demote_rq(gl, remote); 1445 } 1446 1447 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) 1448 { 1449 struct va_format vaf; 1450 va_list args; 1451 1452 va_start(args, fmt); 1453 1454 if (seq) { 1455 seq_vprintf(seq, fmt, args); 1456 } else { 1457 vaf.fmt = fmt; 1458 vaf.va = &args; 1459 1460 pr_err("%pV", &vaf); 1461 } 1462 1463 va_end(args); 1464 } 1465 1466 static inline bool pid_is_meaningful(const struct gfs2_holder *gh) 1467 { 1468 if (!(gh->gh_flags & GL_NOPID)) 1469 return true; 1470 return !test_bit(HIF_HOLDER, &gh->gh_iflags); 1471 } 1472 1473 /** 1474 * add_to_queue - Add a holder to the wait queue (but look for recursion) 1475 * @gh: the holder structure to add 1476 * 1477 * Eventually we should move the recursive locking trap to a 1478 * debugging option or something like that. This is the fast 1479 * path and needs to have the minimum number of distractions. 1480 * 1481 */ 1482 1483 static inline void add_to_queue(struct gfs2_holder *gh) 1484 __releases(&gl->gl_lockref.lock) 1485 __acquires(&gl->gl_lockref.lock) 1486 { 1487 struct gfs2_glock *gl = gh->gh_gl; 1488 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1489 struct gfs2_holder *gh2; 1490 int try_futile = 0; 1491 1492 GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL); 1493 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) 1494 GLOCK_BUG_ON(gl, true); 1495 1496 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { 1497 if (test_bit(GLF_LOCK, &gl->gl_flags)) { 1498 struct gfs2_holder *current_gh; 1499 1500 current_gh = find_first_holder(gl); 1501 try_futile = !may_grant(gl, current_gh, gh); 1502 } 1503 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) 1504 goto fail; 1505 } 1506 1507 list_for_each_entry(gh2, &gl->gl_holders, gh_list) { 1508 if (likely(gh2->gh_owner_pid != gh->gh_owner_pid)) 1509 continue; 1510 if (gh->gh_gl->gl_ops->go_type == LM_TYPE_FLOCK) 1511 continue; 1512 if (!pid_is_meaningful(gh2)) 1513 continue; 1514 goto trap_recursive; 1515 } 1516 list_for_each_entry(gh2, &gl->gl_holders, gh_list) { 1517 if (try_futile && 1518 !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { 1519 fail: 1520 gh->gh_error = GLR_TRYFAILED; 1521 gfs2_holder_wake(gh); 1522 return; 1523 } 1524 } 1525 trace_gfs2_glock_queue(gh, 1); 1526 gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT); 1527 gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT); 1528 list_add_tail(&gh->gh_list, &gl->gl_holders); 1529 return; 1530 1531 trap_recursive: 1532 fs_err(sdp, "original: %pSR\n", (void *)gh2->gh_ip); 1533 fs_err(sdp, "pid: %d\n", pid_nr(gh2->gh_owner_pid)); 1534 fs_err(sdp, "lock type: %d req lock state : %d\n", 1535 gh2->gh_gl->gl_name.ln_type, gh2->gh_state); 1536 fs_err(sdp, "new: %pSR\n", (void *)gh->gh_ip); 1537 fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid)); 1538 fs_err(sdp, "lock type: %d req lock state : %d\n", 1539 gh->gh_gl->gl_name.ln_type, gh->gh_state); 1540 gfs2_dump_glock(NULL, gl, true); 1541 BUG(); 1542 } 1543 1544 /** 1545 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock) 1546 * @gh: the holder structure 1547 * 1548 * if (gh->gh_flags & GL_ASYNC), this never returns an error 1549 * 1550 * Returns: 0, GLR_TRYFAILED, or errno on failure 1551 */ 1552 1553 int gfs2_glock_nq(struct gfs2_holder *gh) 1554 { 1555 struct gfs2_glock *gl = gh->gh_gl; 1556 int error; 1557 1558 if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP)) 1559 return -EIO; 1560 1561 if (gh->gh_flags & GL_NOBLOCK) { 1562 struct gfs2_holder *current_gh; 1563 1564 error = -ECHILD; 1565 spin_lock(&gl->gl_lockref.lock); 1566 if (find_last_waiter(gl)) 1567 goto unlock; 1568 current_gh = find_first_holder(gl); 1569 if (!may_grant(gl, current_gh, gh)) 1570 goto unlock; 1571 set_bit(HIF_HOLDER, &gh->gh_iflags); 1572 list_add_tail(&gh->gh_list, &gl->gl_holders); 1573 trace_gfs2_promote(gh); 1574 error = 0; 1575 unlock: 1576 spin_unlock(&gl->gl_lockref.lock); 1577 return error; 1578 } 1579 1580 gh->gh_error = 0; 1581 spin_lock(&gl->gl_lockref.lock); 1582 add_to_queue(gh); 1583 if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && 1584 test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags))) { 1585 set_bit(GLF_HAVE_REPLY, &gl->gl_flags); 1586 gl->gl_lockref.count++; 1587 gfs2_glock_queue_work(gl, 0); 1588 } 1589 run_queue(gl, 1); 1590 spin_unlock(&gl->gl_lockref.lock); 1591 1592 error = 0; 1593 if (!(gh->gh_flags & GL_ASYNC)) 1594 error = gfs2_glock_wait(gh); 1595 1596 return error; 1597 } 1598 1599 /** 1600 * gfs2_glock_poll - poll to see if an async request has been completed 1601 * @gh: the holder 1602 * 1603 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on 1604 */ 1605 1606 int gfs2_glock_poll(struct gfs2_holder *gh) 1607 { 1608 return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; 1609 } 1610 1611 static void __gfs2_glock_dq(struct gfs2_holder *gh) 1612 { 1613 struct gfs2_glock *gl = gh->gh_gl; 1614 unsigned delay = 0; 1615 int fast_path = 0; 1616 1617 /* 1618 * This holder should not be cached, so mark it for demote. 1619 * Note: this should be done before the glock_needs_demote 1620 * check below. 1621 */ 1622 if (gh->gh_flags & GL_NOCACHE) 1623 request_demote(gl, LM_ST_UNLOCKED, 0, false); 1624 1625 list_del_init(&gh->gh_list); 1626 clear_bit(HIF_HOLDER, &gh->gh_iflags); 1627 trace_gfs2_glock_queue(gh, 0); 1628 1629 /* 1630 * If there hasn't been a demote request we are done. 1631 * (Let the remaining holders, if any, keep holding it.) 1632 */ 1633 if (!glock_needs_demote(gl)) { 1634 if (list_empty(&gl->gl_holders)) 1635 fast_path = 1; 1636 } 1637 1638 if (unlikely(!fast_path)) { 1639 gl->gl_lockref.count++; 1640 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1641 !test_bit(GLF_DEMOTE, &gl->gl_flags) && 1642 gl->gl_name.ln_type == LM_TYPE_INODE) 1643 delay = gl->gl_hold_time; 1644 gfs2_glock_queue_work(gl, delay); 1645 } 1646 } 1647 1648 /** 1649 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock) 1650 * @gh: the glock holder 1651 * 1652 */ 1653 void gfs2_glock_dq(struct gfs2_holder *gh) 1654 { 1655 struct gfs2_glock *gl = gh->gh_gl; 1656 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1657 1658 spin_lock(&gl->gl_lockref.lock); 1659 if (!gfs2_holder_queued(gh)) { 1660 /* 1661 * May have already been dequeued because the locking request 1662 * was GL_ASYNC and it has failed in the meantime. 1663 */ 1664 goto out; 1665 } 1666 1667 if (list_is_first(&gh->gh_list, &gl->gl_holders) && 1668 !test_bit(HIF_HOLDER, &gh->gh_iflags) && 1669 test_bit(GLF_LOCK, &gl->gl_flags) && 1670 !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && 1671 !test_bit(GLF_CANCELING, &gl->gl_flags)) { 1672 set_bit(GLF_CANCELING, &gl->gl_flags); 1673 spin_unlock(&gl->gl_lockref.lock); 1674 gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl); 1675 wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); 1676 spin_lock(&gl->gl_lockref.lock); 1677 clear_bit(GLF_CANCELING, &gl->gl_flags); 1678 clear_bit(GLF_LOCK, &gl->gl_flags); 1679 if (!gfs2_holder_queued(gh)) 1680 goto out; 1681 } 1682 1683 /* 1684 * If we're in the process of file system withdraw, we cannot just 1685 * dequeue any glocks until our journal is recovered, lest we introduce 1686 * file system corruption. We need two exceptions to this rule: We need 1687 * to allow unlocking of nondisk glocks and the glock for our own 1688 * journal that needs recovery. 1689 */ 1690 if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && 1691 glock_blocked_by_withdraw(gl) && 1692 gh->gh_gl != sdp->sd_jinode_gl) { 1693 sdp->sd_glock_dqs_held++; 1694 spin_unlock(&gl->gl_lockref.lock); 1695 might_sleep(); 1696 wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, 1697 TASK_UNINTERRUPTIBLE); 1698 spin_lock(&gl->gl_lockref.lock); 1699 } 1700 1701 __gfs2_glock_dq(gh); 1702 out: 1703 spin_unlock(&gl->gl_lockref.lock); 1704 } 1705 1706 void gfs2_glock_dq_wait(struct gfs2_holder *gh) 1707 { 1708 struct gfs2_glock *gl = gh->gh_gl; 1709 gfs2_glock_dq(gh); 1710 might_sleep(); 1711 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); 1712 } 1713 1714 /** 1715 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it 1716 * @gh: the holder structure 1717 * 1718 */ 1719 1720 void gfs2_glock_dq_uninit(struct gfs2_holder *gh) 1721 { 1722 gfs2_glock_dq(gh); 1723 gfs2_holder_uninit(gh); 1724 } 1725 1726 /** 1727 * gfs2_glock_nq_num - acquire a glock based on lock number 1728 * @sdp: the filesystem 1729 * @number: the lock number 1730 * @glops: the glock operations for the type of glock 1731 * @state: the state to acquire the glock in 1732 * @flags: modifier flags for the acquisition 1733 * @gh: the struct gfs2_holder 1734 * 1735 * Returns: errno 1736 */ 1737 1738 int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, 1739 const struct gfs2_glock_operations *glops, 1740 unsigned int state, u16 flags, struct gfs2_holder *gh) 1741 { 1742 struct gfs2_glock *gl; 1743 int error; 1744 1745 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); 1746 if (!error) { 1747 error = gfs2_glock_nq_init(gl, state, flags, gh); 1748 gfs2_glock_put(gl); 1749 } 1750 1751 return error; 1752 } 1753 1754 /** 1755 * glock_compare - Compare two struct gfs2_glock structures for sorting 1756 * @arg_a: the first structure 1757 * @arg_b: the second structure 1758 * 1759 */ 1760 1761 static int glock_compare(const void *arg_a, const void *arg_b) 1762 { 1763 const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a; 1764 const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b; 1765 const struct lm_lockname *a = &gh_a->gh_gl->gl_name; 1766 const struct lm_lockname *b = &gh_b->gh_gl->gl_name; 1767 1768 if (a->ln_number > b->ln_number) 1769 return 1; 1770 if (a->ln_number < b->ln_number) 1771 return -1; 1772 BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type); 1773 return 0; 1774 } 1775 1776 /** 1777 * nq_m_sync - synchronously acquire more than one glock in deadlock free order 1778 * @num_gh: the number of structures 1779 * @ghs: an array of struct gfs2_holder structures 1780 * @p: placeholder for the holder structure to pass back 1781 * 1782 * Returns: 0 on success (all glocks acquired), 1783 * errno on failure (no glocks acquired) 1784 */ 1785 1786 static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, 1787 struct gfs2_holder **p) 1788 { 1789 unsigned int x; 1790 int error = 0; 1791 1792 for (x = 0; x < num_gh; x++) 1793 p[x] = &ghs[x]; 1794 1795 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL); 1796 1797 for (x = 0; x < num_gh; x++) { 1798 error = gfs2_glock_nq(p[x]); 1799 if (error) { 1800 while (x--) 1801 gfs2_glock_dq(p[x]); 1802 break; 1803 } 1804 } 1805 1806 return error; 1807 } 1808 1809 /** 1810 * gfs2_glock_nq_m - acquire multiple glocks 1811 * @num_gh: the number of structures 1812 * @ghs: an array of struct gfs2_holder structures 1813 * 1814 * Returns: 0 on success (all glocks acquired), 1815 * errno on failure (no glocks acquired) 1816 */ 1817 1818 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1819 { 1820 struct gfs2_holder *tmp[4]; 1821 struct gfs2_holder **pph = tmp; 1822 int error = 0; 1823 1824 switch(num_gh) { 1825 case 0: 1826 return 0; 1827 case 1: 1828 return gfs2_glock_nq(ghs); 1829 default: 1830 if (num_gh <= 4) 1831 break; 1832 pph = kmalloc_array(num_gh, sizeof(struct gfs2_holder *), 1833 GFP_NOFS); 1834 if (!pph) 1835 return -ENOMEM; 1836 } 1837 1838 error = nq_m_sync(num_gh, ghs, pph); 1839 1840 if (pph != tmp) 1841 kfree(pph); 1842 1843 return error; 1844 } 1845 1846 /** 1847 * gfs2_glock_dq_m - release multiple glocks 1848 * @num_gh: the number of structures 1849 * @ghs: an array of struct gfs2_holder structures 1850 * 1851 */ 1852 1853 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1854 { 1855 while (num_gh--) 1856 gfs2_glock_dq(&ghs[num_gh]); 1857 } 1858 1859 void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) 1860 { 1861 unsigned long delay = 0; 1862 1863 gfs2_glock_hold(gl); 1864 spin_lock(&gl->gl_lockref.lock); 1865 if (!list_empty(&gl->gl_holders) && 1866 gl->gl_name.ln_type == LM_TYPE_INODE) { 1867 unsigned long now = jiffies; 1868 unsigned long holdtime; 1869 1870 holdtime = gl->gl_tchange + gl->gl_hold_time; 1871 1872 if (time_before(now, holdtime)) 1873 delay = holdtime - now; 1874 if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags)) 1875 delay = gl->gl_hold_time; 1876 } 1877 request_demote(gl, state, delay, true); 1878 gfs2_glock_queue_work(gl, delay); 1879 spin_unlock(&gl->gl_lockref.lock); 1880 } 1881 1882 /** 1883 * gfs2_should_freeze - Figure out if glock should be frozen 1884 * @gl: The glock in question 1885 * 1886 * Glocks are not frozen if (a) the result of the dlm operation is 1887 * an error, (b) the locking operation was an unlock operation or 1888 * (c) if there is a "noexp" flagged request anywhere in the queue 1889 * 1890 * Returns: 1 if freezing should occur, 0 otherwise 1891 */ 1892 1893 static int gfs2_should_freeze(const struct gfs2_glock *gl) 1894 { 1895 const struct gfs2_holder *gh; 1896 1897 if (gl->gl_reply & ~LM_OUT_ST_MASK) 1898 return 0; 1899 if (gl->gl_target == LM_ST_UNLOCKED) 1900 return 0; 1901 1902 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1903 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 1904 continue; 1905 if (LM_FLAG_NOEXP & gh->gh_flags) 1906 return 0; 1907 } 1908 1909 return 1; 1910 } 1911 1912 /** 1913 * gfs2_glock_complete - Callback used by locking 1914 * @gl: Pointer to the glock 1915 * @ret: The return value from the dlm 1916 * 1917 * The gl_reply field is under the gl_lockref.lock lock so that it is ok 1918 * to use a bitfield shared with other glock state fields. 1919 */ 1920 1921 void gfs2_glock_complete(struct gfs2_glock *gl, int ret) 1922 { 1923 struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; 1924 1925 spin_lock(&gl->gl_lockref.lock); 1926 clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); 1927 gl->gl_reply = ret; 1928 1929 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { 1930 if (gfs2_should_freeze(gl)) { 1931 set_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags); 1932 spin_unlock(&gl->gl_lockref.lock); 1933 return; 1934 } 1935 } 1936 1937 gl->gl_lockref.count++; 1938 set_bit(GLF_HAVE_REPLY, &gl->gl_flags); 1939 gfs2_glock_queue_work(gl, 0); 1940 spin_unlock(&gl->gl_lockref.lock); 1941 } 1942 1943 static int glock_cmp(void *priv, const struct list_head *a, 1944 const struct list_head *b) 1945 { 1946 struct gfs2_glock *gla, *glb; 1947 1948 gla = list_entry(a, struct gfs2_glock, gl_lru); 1949 glb = list_entry(b, struct gfs2_glock, gl_lru); 1950 1951 if (gla->gl_name.ln_number > glb->gl_name.ln_number) 1952 return 1; 1953 if (gla->gl_name.ln_number < glb->gl_name.ln_number) 1954 return -1; 1955 1956 return 0; 1957 } 1958 1959 static bool can_free_glock(struct gfs2_glock *gl) 1960 { 1961 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1962 1963 return !test_bit(GLF_LOCK, &gl->gl_flags) && 1964 !gl->gl_lockref.count && 1965 (!test_bit(GLF_LFLUSH, &gl->gl_flags) || 1966 test_bit(SDF_KILL, &sdp->sd_flags)); 1967 } 1968 1969 /** 1970 * gfs2_dispose_glock_lru - Demote a list of glocks 1971 * @list: The list to dispose of 1972 * 1973 * Disposing of glocks may involve disk accesses, so that here we sort 1974 * the glocks by number (i.e. disk location of the inodes) so that if 1975 * there are any such accesses, they'll be sent in order (mostly). 1976 * 1977 * Must be called under the lru_lock, but may drop and retake this 1978 * lock. While the lru_lock is dropped, entries may vanish from the 1979 * list, but no new entries will appear on the list (since it is 1980 * private) 1981 */ 1982 1983 static unsigned long gfs2_dispose_glock_lru(struct list_head *list) 1984 __releases(&lru_lock) 1985 __acquires(&lru_lock) 1986 { 1987 struct gfs2_glock *gl; 1988 unsigned long freed = 0; 1989 1990 list_sort(NULL, list, glock_cmp); 1991 1992 while(!list_empty(list)) { 1993 gl = list_first_entry(list, struct gfs2_glock, gl_lru); 1994 if (!spin_trylock(&gl->gl_lockref.lock)) { 1995 add_back_to_lru: 1996 list_move(&gl->gl_lru, &lru_list); 1997 continue; 1998 } 1999 if (!can_free_glock(gl)) { 2000 spin_unlock(&gl->gl_lockref.lock); 2001 goto add_back_to_lru; 2002 } 2003 list_del_init(&gl->gl_lru); 2004 atomic_dec(&lru_count); 2005 clear_bit(GLF_LRU, &gl->gl_flags); 2006 freed++; 2007 gl->gl_lockref.count++; 2008 if (gl->gl_state != LM_ST_UNLOCKED) 2009 request_demote(gl, LM_ST_UNLOCKED, 0, false); 2010 gfs2_glock_queue_work(gl, 0); 2011 spin_unlock(&gl->gl_lockref.lock); 2012 cond_resched_lock(&lru_lock); 2013 } 2014 return freed; 2015 } 2016 2017 /** 2018 * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote 2019 * @nr: The number of entries to scan 2020 * 2021 * This function selects the entries on the LRU which are able to 2022 * be demoted, and then kicks off the process by calling 2023 * gfs2_dispose_glock_lru() above. 2024 */ 2025 2026 static unsigned long gfs2_scan_glock_lru(unsigned long nr) 2027 { 2028 struct gfs2_glock *gl, *next; 2029 LIST_HEAD(dispose); 2030 unsigned long freed = 0; 2031 2032 spin_lock(&lru_lock); 2033 list_for_each_entry_safe(gl, next, &lru_list, gl_lru) { 2034 if (!nr--) 2035 break; 2036 if (can_free_glock(gl)) 2037 list_move(&gl->gl_lru, &dispose); 2038 } 2039 if (!list_empty(&dispose)) 2040 freed = gfs2_dispose_glock_lru(&dispose); 2041 spin_unlock(&lru_lock); 2042 2043 return freed; 2044 } 2045 2046 static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, 2047 struct shrink_control *sc) 2048 { 2049 if (!(sc->gfp_mask & __GFP_FS)) 2050 return SHRINK_STOP; 2051 return gfs2_scan_glock_lru(sc->nr_to_scan); 2052 } 2053 2054 static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, 2055 struct shrink_control *sc) 2056 { 2057 return vfs_pressure_ratio(atomic_read(&lru_count)); 2058 } 2059 2060 static struct shrinker *glock_shrinker; 2061 2062 /** 2063 * glock_hash_walk - Call a function for glock in a hash bucket 2064 * @examiner: the function 2065 * @sdp: the filesystem 2066 * 2067 * Note that the function can be called multiple times on the same 2068 * object. So the user must ensure that the function can cope with 2069 * that. 2070 */ 2071 2072 static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) 2073 { 2074 struct gfs2_glock *gl; 2075 struct rhashtable_iter iter; 2076 2077 rhashtable_walk_enter(&gl_hash_table, &iter); 2078 2079 do { 2080 rhashtable_walk_start(&iter); 2081 2082 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) { 2083 if (gl->gl_name.ln_sbd == sdp) 2084 examiner(gl); 2085 } 2086 2087 rhashtable_walk_stop(&iter); 2088 } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); 2089 2090 rhashtable_walk_exit(&iter); 2091 } 2092 2093 void gfs2_cancel_delete_work(struct gfs2_glock *gl) 2094 { 2095 clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags); 2096 clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); 2097 if (cancel_delayed_work(&gl->gl_delete)) 2098 gfs2_glock_put(gl); 2099 } 2100 2101 static void flush_delete_work(struct gfs2_glock *gl) 2102 { 2103 if (gl->gl_name.ln_type == LM_TYPE_IOPEN) { 2104 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 2105 2106 if (cancel_delayed_work(&gl->gl_delete)) { 2107 queue_delayed_work(sdp->sd_delete_wq, 2108 &gl->gl_delete, 0); 2109 } 2110 } 2111 } 2112 2113 void gfs2_flush_delete_work(struct gfs2_sbd *sdp) 2114 { 2115 glock_hash_walk(flush_delete_work, sdp); 2116 flush_workqueue(sdp->sd_delete_wq); 2117 } 2118 2119 /** 2120 * thaw_glock - thaw out a glock which has an unprocessed reply waiting 2121 * @gl: The glock to thaw 2122 * 2123 */ 2124 2125 static void thaw_glock(struct gfs2_glock *gl) 2126 { 2127 if (!test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags)) 2128 return; 2129 if (!lockref_get_not_dead(&gl->gl_lockref)) 2130 return; 2131 2132 gfs2_glock_remove_from_lru(gl); 2133 spin_lock(&gl->gl_lockref.lock); 2134 set_bit(GLF_HAVE_REPLY, &gl->gl_flags); 2135 gfs2_glock_queue_work(gl, 0); 2136 spin_unlock(&gl->gl_lockref.lock); 2137 } 2138 2139 /** 2140 * clear_glock - look at a glock and see if we can free it from glock cache 2141 * @gl: the glock to look at 2142 * 2143 */ 2144 2145 static void clear_glock(struct gfs2_glock *gl) 2146 { 2147 gfs2_glock_remove_from_lru(gl); 2148 2149 spin_lock(&gl->gl_lockref.lock); 2150 if (!__lockref_is_dead(&gl->gl_lockref)) { 2151 gl->gl_lockref.count++; 2152 if (gl->gl_state != LM_ST_UNLOCKED) 2153 request_demote(gl, LM_ST_UNLOCKED, 0, false); 2154 gfs2_glock_queue_work(gl, 0); 2155 } 2156 spin_unlock(&gl->gl_lockref.lock); 2157 } 2158 2159 /** 2160 * gfs2_glock_thaw - Thaw any frozen glocks 2161 * @sdp: The super block 2162 * 2163 */ 2164 2165 void gfs2_glock_thaw(struct gfs2_sbd *sdp) 2166 { 2167 glock_hash_walk(thaw_glock, sdp); 2168 } 2169 2170 static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) 2171 { 2172 spin_lock(&gl->gl_lockref.lock); 2173 gfs2_dump_glock(seq, gl, fsid); 2174 spin_unlock(&gl->gl_lockref.lock); 2175 } 2176 2177 static void dump_glock_func(struct gfs2_glock *gl) 2178 { 2179 dump_glock(NULL, gl, true); 2180 } 2181 2182 static void withdraw_dq(struct gfs2_glock *gl) 2183 { 2184 spin_lock(&gl->gl_lockref.lock); 2185 if (!__lockref_is_dead(&gl->gl_lockref) && 2186 glock_blocked_by_withdraw(gl)) 2187 do_error(gl, LM_OUT_ERROR); /* remove pending waiters */ 2188 spin_unlock(&gl->gl_lockref.lock); 2189 } 2190 2191 void gfs2_gl_dq_holders(struct gfs2_sbd *sdp) 2192 { 2193 glock_hash_walk(withdraw_dq, sdp); 2194 } 2195 2196 /** 2197 * gfs2_gl_hash_clear - Empty out the glock hash table 2198 * @sdp: the filesystem 2199 * 2200 * Called when unmounting the filesystem. 2201 */ 2202 2203 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 2204 { 2205 unsigned long start = jiffies; 2206 bool timed_out = false; 2207 2208 set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); 2209 flush_workqueue(sdp->sd_glock_wq); 2210 glock_hash_walk(clear_glock, sdp); 2211 flush_workqueue(sdp->sd_glock_wq); 2212 2213 while (!timed_out) { 2214 wait_event_timeout(sdp->sd_kill_wait, 2215 !atomic_read(&sdp->sd_glock_disposal), 2216 HZ * 60); 2217 if (!atomic_read(&sdp->sd_glock_disposal)) 2218 break; 2219 timed_out = time_after(jiffies, start + (HZ * 600)); 2220 fs_warn(sdp, "%u glocks left after %u seconds%s\n", 2221 atomic_read(&sdp->sd_glock_disposal), 2222 jiffies_to_msecs(jiffies - start) / 1000, 2223 timed_out ? ":" : "; still waiting"); 2224 } 2225 gfs2_lm_unmount(sdp); 2226 gfs2_free_dead_glocks(sdp); 2227 glock_hash_walk(dump_glock_func, sdp); 2228 destroy_workqueue(sdp->sd_glock_wq); 2229 sdp->sd_glock_wq = NULL; 2230 } 2231 2232 static const char *state2str(unsigned state) 2233 { 2234 switch(state) { 2235 case LM_ST_UNLOCKED: 2236 return "UN"; 2237 case LM_ST_SHARED: 2238 return "SH"; 2239 case LM_ST_DEFERRED: 2240 return "DF"; 2241 case LM_ST_EXCLUSIVE: 2242 return "EX"; 2243 } 2244 return "??"; 2245 } 2246 2247 static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) 2248 { 2249 char *p = buf; 2250 if (flags & LM_FLAG_TRY) 2251 *p++ = 't'; 2252 if (flags & LM_FLAG_TRY_1CB) 2253 *p++ = 'T'; 2254 if (flags & LM_FLAG_NOEXP) 2255 *p++ = 'e'; 2256 if (flags & LM_FLAG_ANY) 2257 *p++ = 'A'; 2258 if (flags & LM_FLAG_NODE_SCOPE) 2259 *p++ = 'n'; 2260 if (flags & GL_ASYNC) 2261 *p++ = 'a'; 2262 if (flags & GL_EXACT) 2263 *p++ = 'E'; 2264 if (flags & GL_NOCACHE) 2265 *p++ = 'c'; 2266 if (test_bit(HIF_HOLDER, &iflags)) 2267 *p++ = 'H'; 2268 if (test_bit(HIF_WAIT, &iflags)) 2269 *p++ = 'W'; 2270 if (flags & GL_SKIP) 2271 *p++ = 's'; 2272 *p = 0; 2273 return buf; 2274 } 2275 2276 /** 2277 * dump_holder - print information about a glock holder 2278 * @seq: the seq_file struct 2279 * @gh: the glock holder 2280 * @fs_id_buf: pointer to file system id (if requested) 2281 * 2282 */ 2283 2284 static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh, 2285 const char *fs_id_buf) 2286 { 2287 const char *comm = "(none)"; 2288 pid_t owner_pid = 0; 2289 char flags_buf[32]; 2290 2291 rcu_read_lock(); 2292 if (pid_is_meaningful(gh)) { 2293 struct task_struct *gh_owner; 2294 2295 comm = "(ended)"; 2296 owner_pid = pid_nr(gh->gh_owner_pid); 2297 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); 2298 if (gh_owner) 2299 comm = gh_owner->comm; 2300 } 2301 gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n", 2302 fs_id_buf, state2str(gh->gh_state), 2303 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), 2304 gh->gh_error, (long)owner_pid, comm, (void *)gh->gh_ip); 2305 rcu_read_unlock(); 2306 } 2307 2308 static const char *gflags2str(char *buf, const struct gfs2_glock *gl) 2309 { 2310 const unsigned long *gflags = &gl->gl_flags; 2311 char *p = buf; 2312 2313 if (test_bit(GLF_LOCK, gflags)) 2314 *p++ = 'l'; 2315 if (test_bit(GLF_DEMOTE, gflags)) 2316 *p++ = 'D'; 2317 if (test_bit(GLF_PENDING_DEMOTE, gflags)) 2318 *p++ = 'd'; 2319 if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags)) 2320 *p++ = 'p'; 2321 if (test_bit(GLF_DIRTY, gflags)) 2322 *p++ = 'y'; 2323 if (test_bit(GLF_LFLUSH, gflags)) 2324 *p++ = 'f'; 2325 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) 2326 *p++ = 'i'; 2327 if (test_bit(GLF_PENDING_REPLY, gflags)) 2328 *p++ = 'R'; 2329 if (test_bit(GLF_HAVE_REPLY, gflags)) 2330 *p++ = 'r'; 2331 if (test_bit(GLF_INITIAL, gflags)) 2332 *p++ = 'a'; 2333 if (test_bit(GLF_HAVE_FROZEN_REPLY, gflags)) 2334 *p++ = 'F'; 2335 if (!list_empty(&gl->gl_holders)) 2336 *p++ = 'q'; 2337 if (test_bit(GLF_LRU, gflags)) 2338 *p++ = 'L'; 2339 if (gl->gl_object) 2340 *p++ = 'o'; 2341 if (test_bit(GLF_BLOCKING, gflags)) 2342 *p++ = 'b'; 2343 if (test_bit(GLF_UNLOCKED, gflags)) 2344 *p++ = 'x'; 2345 if (test_bit(GLF_INSTANTIATE_NEEDED, gflags)) 2346 *p++ = 'n'; 2347 if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags)) 2348 *p++ = 'N'; 2349 if (test_bit(GLF_TRY_TO_EVICT, gflags)) 2350 *p++ = 'e'; 2351 if (test_bit(GLF_VERIFY_DELETE, gflags)) 2352 *p++ = 'E'; 2353 if (test_bit(GLF_DEFER_DELETE, gflags)) 2354 *p++ = 's'; 2355 if (test_bit(GLF_CANCELING, gflags)) 2356 *p++ = 'C'; 2357 *p = 0; 2358 return buf; 2359 } 2360 2361 /** 2362 * gfs2_dump_glock - print information about a glock 2363 * @seq: The seq_file struct 2364 * @gl: the glock 2365 * @fsid: If true, also dump the file system id 2366 * 2367 * The file format is as follows: 2368 * One line per object, capital letters are used to indicate objects 2369 * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented, 2370 * other objects are indented by a single space and follow the glock to 2371 * which they are related. Fields are indicated by lower case letters 2372 * followed by a colon and the field value, except for strings which are in 2373 * [] so that its possible to see if they are composed of spaces for 2374 * example. The field's are n = number (id of the object), f = flags, 2375 * t = type, s = state, r = refcount, e = error, p = pid. 2376 * 2377 */ 2378 2379 void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) 2380 { 2381 const struct gfs2_glock_operations *glops = gl->gl_ops; 2382 unsigned long long dtime; 2383 const struct gfs2_holder *gh; 2384 char gflags_buf[32]; 2385 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 2386 char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; 2387 unsigned long nrpages = 0; 2388 2389 if (gl->gl_ops->go_flags & GLOF_ASPACE) { 2390 struct address_space *mapping = gfs2_glock2aspace(gl); 2391 2392 nrpages = mapping->nrpages; 2393 } 2394 memset(fs_id_buf, 0, sizeof(fs_id_buf)); 2395 if (fsid && sdp) /* safety precaution */ 2396 sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); 2397 dtime = jiffies - gl->gl_demote_time; 2398 dtime *= 1000000/HZ; /* demote time in uSec */ 2399 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 2400 dtime = 0; 2401 gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d " 2402 "v:%d r:%d m:%ld p:%lu\n", 2403 fs_id_buf, state2str(gl->gl_state), 2404 gl->gl_name.ln_type, 2405 (unsigned long long)gl->gl_name.ln_number, 2406 gflags2str(gflags_buf, gl), 2407 state2str(gl->gl_target), 2408 state2str(gl->gl_demote_state), dtime, 2409 atomic_read(&gl->gl_ail_count), 2410 atomic_read(&gl->gl_revokes), 2411 (int)gl->gl_lockref.count, gl->gl_hold_time, nrpages); 2412 2413 list_for_each_entry(gh, &gl->gl_holders, gh_list) 2414 dump_holder(seq, gh, fs_id_buf); 2415 2416 if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) 2417 glops->go_dump(seq, gl, fs_id_buf); 2418 } 2419 2420 static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) 2421 { 2422 struct gfs2_glock *gl = iter_ptr; 2423 2424 seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n", 2425 gl->gl_name.ln_type, 2426 (unsigned long long)gl->gl_name.ln_number, 2427 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT], 2428 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR], 2429 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB], 2430 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB], 2431 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT], 2432 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR], 2433 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT], 2434 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]); 2435 return 0; 2436 } 2437 2438 static const char *gfs2_gltype[] = { 2439 "type", 2440 "reserved", 2441 "nondisk", 2442 "inode", 2443 "rgrp", 2444 "meta", 2445 "iopen", 2446 "flock", 2447 "plock", 2448 "quota", 2449 "journal", 2450 }; 2451 2452 static const char *gfs2_stype[] = { 2453 [GFS2_LKS_SRTT] = "srtt", 2454 [GFS2_LKS_SRTTVAR] = "srttvar", 2455 [GFS2_LKS_SRTTB] = "srttb", 2456 [GFS2_LKS_SRTTVARB] = "srttvarb", 2457 [GFS2_LKS_SIRT] = "sirt", 2458 [GFS2_LKS_SIRTVAR] = "sirtvar", 2459 [GFS2_LKS_DCOUNT] = "dlm", 2460 [GFS2_LKS_QCOUNT] = "queue", 2461 }; 2462 2463 #define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype)) 2464 2465 static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) 2466 { 2467 struct gfs2_sbd *sdp = seq->private; 2468 loff_t pos = *(loff_t *)iter_ptr; 2469 unsigned index = pos >> 3; 2470 unsigned subindex = pos & 0x07; 2471 int i; 2472 2473 if (index == 0 && subindex != 0) 2474 return 0; 2475 2476 seq_printf(seq, "%-10s %8s:", gfs2_gltype[index], 2477 (index == 0) ? "cpu": gfs2_stype[subindex]); 2478 2479 for_each_possible_cpu(i) { 2480 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i); 2481 2482 if (index == 0) 2483 seq_printf(seq, " %15u", i); 2484 else 2485 seq_printf(seq, " %15llu", (unsigned long long)lkstats-> 2486 lkstats[index - 1].stats[subindex]); 2487 } 2488 seq_putc(seq, '\n'); 2489 return 0; 2490 } 2491 2492 int __init gfs2_glock_init(void) 2493 { 2494 int i, ret; 2495 2496 ret = rhashtable_init(&gl_hash_table, &ht_parms); 2497 if (ret < 0) 2498 return ret; 2499 2500 glock_shrinker = shrinker_alloc(0, "gfs2-glock"); 2501 if (!glock_shrinker) { 2502 rhashtable_destroy(&gl_hash_table); 2503 return -ENOMEM; 2504 } 2505 2506 glock_shrinker->count_objects = gfs2_glock_shrink_count; 2507 glock_shrinker->scan_objects = gfs2_glock_shrink_scan; 2508 2509 shrinker_register(glock_shrinker); 2510 2511 for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++) 2512 init_waitqueue_head(glock_wait_table + i); 2513 2514 return 0; 2515 } 2516 2517 void gfs2_glock_exit(void) 2518 { 2519 shrinker_free(glock_shrinker); 2520 rhashtable_destroy(&gl_hash_table); 2521 } 2522 2523 static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n) 2524 { 2525 struct gfs2_glock *gl = gi->gl; 2526 2527 if (gl) { 2528 if (n == 0) 2529 return; 2530 gfs2_glock_put_async(gl); 2531 } 2532 for (;;) { 2533 gl = rhashtable_walk_next(&gi->hti); 2534 if (IS_ERR_OR_NULL(gl)) { 2535 if (gl == ERR_PTR(-EAGAIN)) { 2536 n = 1; 2537 continue; 2538 } 2539 gl = NULL; 2540 break; 2541 } 2542 if (gl->gl_name.ln_sbd != gi->sdp) 2543 continue; 2544 if (n <= 1) { 2545 if (!lockref_get_not_dead(&gl->gl_lockref)) 2546 continue; 2547 break; 2548 } else { 2549 if (__lockref_is_dead(&gl->gl_lockref)) 2550 continue; 2551 n--; 2552 } 2553 } 2554 gi->gl = gl; 2555 } 2556 2557 static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) 2558 __acquires(RCU) 2559 { 2560 struct gfs2_glock_iter *gi = seq->private; 2561 loff_t n; 2562 2563 /* 2564 * We can either stay where we are, skip to the next hash table 2565 * entry, or start from the beginning. 2566 */ 2567 if (*pos < gi->last_pos) { 2568 rhashtable_walk_exit(&gi->hti); 2569 rhashtable_walk_enter(&gl_hash_table, &gi->hti); 2570 n = *pos + 1; 2571 } else { 2572 n = *pos - gi->last_pos; 2573 } 2574 2575 rhashtable_walk_start(&gi->hti); 2576 2577 gfs2_glock_iter_next(gi, n); 2578 gi->last_pos = *pos; 2579 return gi->gl; 2580 } 2581 2582 static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, 2583 loff_t *pos) 2584 { 2585 struct gfs2_glock_iter *gi = seq->private; 2586 2587 (*pos)++; 2588 gi->last_pos = *pos; 2589 gfs2_glock_iter_next(gi, 1); 2590 return gi->gl; 2591 } 2592 2593 static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) 2594 __releases(RCU) 2595 { 2596 struct gfs2_glock_iter *gi = seq->private; 2597 2598 rhashtable_walk_stop(&gi->hti); 2599 } 2600 2601 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 2602 { 2603 dump_glock(seq, iter_ptr, false); 2604 return 0; 2605 } 2606 2607 static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) 2608 { 2609 preempt_disable(); 2610 if (*pos >= GFS2_NR_SBSTATS) 2611 return NULL; 2612 return pos; 2613 } 2614 2615 static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr, 2616 loff_t *pos) 2617 { 2618 (*pos)++; 2619 if (*pos >= GFS2_NR_SBSTATS) 2620 return NULL; 2621 return pos; 2622 } 2623 2624 static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr) 2625 { 2626 preempt_enable(); 2627 } 2628 2629 static const struct seq_operations gfs2_glock_seq_ops = { 2630 .start = gfs2_glock_seq_start, 2631 .next = gfs2_glock_seq_next, 2632 .stop = gfs2_glock_seq_stop, 2633 .show = gfs2_glock_seq_show, 2634 }; 2635 2636 static const struct seq_operations gfs2_glstats_seq_ops = { 2637 .start = gfs2_glock_seq_start, 2638 .next = gfs2_glock_seq_next, 2639 .stop = gfs2_glock_seq_stop, 2640 .show = gfs2_glstats_seq_show, 2641 }; 2642 2643 static const struct seq_operations gfs2_sbstats_sops = { 2644 .start = gfs2_sbstats_seq_start, 2645 .next = gfs2_sbstats_seq_next, 2646 .stop = gfs2_sbstats_seq_stop, 2647 .show = gfs2_sbstats_seq_show, 2648 }; 2649 2650 #define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL) 2651 2652 static int __gfs2_glocks_open(struct inode *inode, struct file *file, 2653 const struct seq_operations *ops) 2654 { 2655 int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter)); 2656 if (ret == 0) { 2657 struct seq_file *seq = file->private_data; 2658 struct gfs2_glock_iter *gi = seq->private; 2659 2660 gi->sdp = inode->i_private; 2661 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); 2662 if (seq->buf) 2663 seq->size = GFS2_SEQ_GOODSIZE; 2664 /* 2665 * Initially, we are "before" the first hash table entry; the 2666 * first call to rhashtable_walk_next gets us the first entry. 2667 */ 2668 gi->last_pos = -1; 2669 gi->gl = NULL; 2670 rhashtable_walk_enter(&gl_hash_table, &gi->hti); 2671 } 2672 return ret; 2673 } 2674 2675 static int gfs2_glocks_open(struct inode *inode, struct file *file) 2676 { 2677 return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops); 2678 } 2679 2680 static int gfs2_glocks_release(struct inode *inode, struct file *file) 2681 { 2682 struct seq_file *seq = file->private_data; 2683 struct gfs2_glock_iter *gi = seq->private; 2684 2685 if (gi->gl) 2686 gfs2_glock_put(gi->gl); 2687 rhashtable_walk_exit(&gi->hti); 2688 return seq_release_private(inode, file); 2689 } 2690 2691 static int gfs2_glstats_open(struct inode *inode, struct file *file) 2692 { 2693 return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops); 2694 } 2695 2696 static const struct file_operations gfs2_glocks_fops = { 2697 .owner = THIS_MODULE, 2698 .open = gfs2_glocks_open, 2699 .read = seq_read, 2700 .llseek = seq_lseek, 2701 .release = gfs2_glocks_release, 2702 }; 2703 2704 static const struct file_operations gfs2_glstats_fops = { 2705 .owner = THIS_MODULE, 2706 .open = gfs2_glstats_open, 2707 .read = seq_read, 2708 .llseek = seq_lseek, 2709 .release = gfs2_glocks_release, 2710 }; 2711 2712 struct gfs2_glockfd_iter { 2713 struct super_block *sb; 2714 unsigned int tgid; 2715 struct task_struct *task; 2716 unsigned int fd; 2717 struct file *file; 2718 }; 2719 2720 static struct task_struct *gfs2_glockfd_next_task(struct gfs2_glockfd_iter *i) 2721 { 2722 struct pid_namespace *ns = task_active_pid_ns(current); 2723 struct pid *pid; 2724 2725 if (i->task) 2726 put_task_struct(i->task); 2727 2728 rcu_read_lock(); 2729 retry: 2730 i->task = NULL; 2731 pid = find_ge_pid(i->tgid, ns); 2732 if (pid) { 2733 i->tgid = pid_nr_ns(pid, ns); 2734 i->task = pid_task(pid, PIDTYPE_TGID); 2735 if (!i->task) { 2736 i->tgid++; 2737 goto retry; 2738 } 2739 get_task_struct(i->task); 2740 } 2741 rcu_read_unlock(); 2742 return i->task; 2743 } 2744 2745 static struct file *gfs2_glockfd_next_file(struct gfs2_glockfd_iter *i) 2746 { 2747 if (i->file) { 2748 fput(i->file); 2749 i->file = NULL; 2750 } 2751 2752 for(;; i->fd++) { 2753 i->file = fget_task_next(i->task, &i->fd); 2754 if (!i->file) { 2755 i->fd = 0; 2756 break; 2757 } 2758 2759 if (file_inode(i->file)->i_sb == i->sb) 2760 break; 2761 2762 fput(i->file); 2763 } 2764 return i->file; 2765 } 2766 2767 static void *gfs2_glockfd_seq_start(struct seq_file *seq, loff_t *pos) 2768 { 2769 struct gfs2_glockfd_iter *i = seq->private; 2770 2771 if (*pos) 2772 return NULL; 2773 while (gfs2_glockfd_next_task(i)) { 2774 if (gfs2_glockfd_next_file(i)) 2775 return i; 2776 i->tgid++; 2777 } 2778 return NULL; 2779 } 2780 2781 static void *gfs2_glockfd_seq_next(struct seq_file *seq, void *iter_ptr, 2782 loff_t *pos) 2783 { 2784 struct gfs2_glockfd_iter *i = seq->private; 2785 2786 (*pos)++; 2787 i->fd++; 2788 do { 2789 if (gfs2_glockfd_next_file(i)) 2790 return i; 2791 i->tgid++; 2792 } while (gfs2_glockfd_next_task(i)); 2793 return NULL; 2794 } 2795 2796 static void gfs2_glockfd_seq_stop(struct seq_file *seq, void *iter_ptr) 2797 { 2798 struct gfs2_glockfd_iter *i = seq->private; 2799 2800 if (i->file) 2801 fput(i->file); 2802 if (i->task) 2803 put_task_struct(i->task); 2804 } 2805 2806 static void gfs2_glockfd_seq_show_flock(struct seq_file *seq, 2807 struct gfs2_glockfd_iter *i) 2808 { 2809 struct gfs2_file *fp = i->file->private_data; 2810 struct gfs2_holder *fl_gh = &fp->f_fl_gh; 2811 struct lm_lockname gl_name = { .ln_type = LM_TYPE_RESERVED }; 2812 2813 if (!READ_ONCE(fl_gh->gh_gl)) 2814 return; 2815 2816 spin_lock(&i->file->f_lock); 2817 if (gfs2_holder_initialized(fl_gh)) 2818 gl_name = fl_gh->gh_gl->gl_name; 2819 spin_unlock(&i->file->f_lock); 2820 2821 if (gl_name.ln_type != LM_TYPE_RESERVED) { 2822 seq_printf(seq, "%d %u %u/%llx\n", 2823 i->tgid, i->fd, gl_name.ln_type, 2824 (unsigned long long)gl_name.ln_number); 2825 } 2826 } 2827 2828 static int gfs2_glockfd_seq_show(struct seq_file *seq, void *iter_ptr) 2829 { 2830 struct gfs2_glockfd_iter *i = seq->private; 2831 struct inode *inode = file_inode(i->file); 2832 struct gfs2_glock *gl; 2833 2834 inode_lock_shared(inode); 2835 gl = GFS2_I(inode)->i_iopen_gh.gh_gl; 2836 if (gl) { 2837 seq_printf(seq, "%d %u %u/%llx\n", 2838 i->tgid, i->fd, gl->gl_name.ln_type, 2839 (unsigned long long)gl->gl_name.ln_number); 2840 } 2841 gfs2_glockfd_seq_show_flock(seq, i); 2842 inode_unlock_shared(inode); 2843 return 0; 2844 } 2845 2846 static const struct seq_operations gfs2_glockfd_seq_ops = { 2847 .start = gfs2_glockfd_seq_start, 2848 .next = gfs2_glockfd_seq_next, 2849 .stop = gfs2_glockfd_seq_stop, 2850 .show = gfs2_glockfd_seq_show, 2851 }; 2852 2853 static int gfs2_glockfd_open(struct inode *inode, struct file *file) 2854 { 2855 struct gfs2_glockfd_iter *i; 2856 struct gfs2_sbd *sdp = inode->i_private; 2857 2858 i = __seq_open_private(file, &gfs2_glockfd_seq_ops, 2859 sizeof(struct gfs2_glockfd_iter)); 2860 if (!i) 2861 return -ENOMEM; 2862 i->sb = sdp->sd_vfs; 2863 return 0; 2864 } 2865 2866 static const struct file_operations gfs2_glockfd_fops = { 2867 .owner = THIS_MODULE, 2868 .open = gfs2_glockfd_open, 2869 .read = seq_read, 2870 .llseek = seq_lseek, 2871 .release = seq_release_private, 2872 }; 2873 2874 DEFINE_SEQ_ATTRIBUTE(gfs2_sbstats); 2875 2876 void gfs2_create_debugfs_file(struct gfs2_sbd *sdp) 2877 { 2878 sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); 2879 2880 debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2881 &gfs2_glocks_fops); 2882 2883 debugfs_create_file("glockfd", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2884 &gfs2_glockfd_fops); 2885 2886 debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2887 &gfs2_glstats_fops); 2888 2889 debugfs_create_file("sbstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2890 &gfs2_sbstats_fops); 2891 } 2892 2893 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2894 { 2895 debugfs_remove_recursive(sdp->debugfs_dir); 2896 sdp->debugfs_dir = NULL; 2897 } 2898 2899 void gfs2_register_debugfs(void) 2900 { 2901 gfs2_root = debugfs_create_dir("gfs2", NULL); 2902 } 2903 2904 void gfs2_unregister_debugfs(void) 2905 { 2906 debugfs_remove(gfs2_root); 2907 gfs2_root = NULL; 2908 } 2909