1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 5 */ 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9 #include <linux/sched.h> 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <linux/buffer_head.h> 13 #include <linux/delay.h> 14 #include <linux/sort.h> 15 #include <linux/hash.h> 16 #include <linux/jhash.h> 17 #include <linux/kallsyms.h> 18 #include <linux/gfs2_ondisk.h> 19 #include <linux/list.h> 20 #include <linux/wait.h> 21 #include <linux/module.h> 22 #include <linux/uaccess.h> 23 #include <linux/seq_file.h> 24 #include <linux/debugfs.h> 25 #include <linux/kthread.h> 26 #include <linux/freezer.h> 27 #include <linux/workqueue.h> 28 #include <linux/jiffies.h> 29 #include <linux/rcupdate.h> 30 #include <linux/rculist_bl.h> 31 #include <linux/bit_spinlock.h> 32 #include <linux/percpu.h> 33 #include <linux/list_sort.h> 34 #include <linux/lockref.h> 35 #include <linux/rhashtable.h> 36 #include <linux/pid_namespace.h> 37 #include <linux/file.h> 38 #include <linux/random.h> 39 40 #include "gfs2.h" 41 #include "incore.h" 42 #include "glock.h" 43 #include "glops.h" 44 #include "inode.h" 45 #include "lops.h" 46 #include "meta_io.h" 47 #include "quota.h" 48 #include "super.h" 49 #include "util.h" 50 #include "bmap.h" 51 #define CREATE_TRACE_POINTS 52 #include "trace_gfs2.h" 53 54 struct gfs2_glock_iter { 55 struct gfs2_sbd *sdp; /* incore superblock */ 56 struct rhashtable_iter hti; /* rhashtable iterator */ 57 struct gfs2_glock *gl; /* current glock struct */ 58 loff_t last_pos; /* last position */ 59 }; 60 61 typedef void (*glock_examiner) (struct gfs2_glock * gl); 62 63 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 64 static void request_demote(struct gfs2_glock *gl, unsigned int state, 65 unsigned long delay, bool remote); 66 67 static struct dentry *gfs2_root; 68 static LIST_HEAD(lru_list); 69 static atomic_t lru_count = ATOMIC_INIT(0); 70 static DEFINE_SPINLOCK(lru_lock); 71 72 #define GFS2_GL_HASH_SHIFT 15 73 #define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT) 74 75 static const struct rhashtable_params ht_parms = { 76 .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, 77 .key_len = offsetofend(struct lm_lockname, ln_type), 78 .key_offset = offsetof(struct gfs2_glock, gl_name), 79 .head_offset = offsetof(struct gfs2_glock, gl_node), 80 }; 81 82 static struct rhashtable gl_hash_table; 83 84 #define GLOCK_WAIT_TABLE_BITS 12 85 #define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS) 86 static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned; 87 88 struct wait_glock_queue { 89 struct lm_lockname *name; 90 wait_queue_entry_t wait; 91 }; 92 93 static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode, 94 int sync, void *key) 95 { 96 struct wait_glock_queue *wait_glock = 97 container_of(wait, struct wait_glock_queue, wait); 98 struct lm_lockname *wait_name = wait_glock->name; 99 struct lm_lockname *wake_name = key; 100 101 if (wake_name->ln_sbd != wait_name->ln_sbd || 102 wake_name->ln_number != wait_name->ln_number || 103 wake_name->ln_type != wait_name->ln_type) 104 return 0; 105 return autoremove_wake_function(wait, mode, sync, key); 106 } 107 108 static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name) 109 { 110 u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0); 111 112 return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS); 113 } 114 115 /** 116 * wake_up_glock - Wake up waiters on a glock 117 * @gl: the glock 118 */ 119 static void wake_up_glock(struct gfs2_glock *gl) 120 { 121 wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name); 122 123 if (waitqueue_active(wq)) 124 __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name); 125 } 126 127 static void gfs2_glock_dealloc(struct rcu_head *rcu) 128 { 129 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); 130 131 kfree(gl->gl_lksb.sb_lvbptr); 132 if (gl->gl_ops->go_flags & GLOF_ASPACE) { 133 struct gfs2_glock_aspace *gla = 134 container_of(gl, struct gfs2_glock_aspace, glock); 135 kmem_cache_free(gfs2_glock_aspace_cachep, gla); 136 } else 137 kmem_cache_free(gfs2_glock_cachep, gl); 138 } 139 140 /** 141 * glock_blocked_by_withdraw - determine if we can still use a glock 142 * @gl: the glock 143 * 144 * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted 145 * when we're withdrawn. For example, to maintain metadata integrity, we should 146 * disallow the use of inode and rgrp glocks when withdrawn. Other glocks like 147 * the iopen or freeze glock may be safely used because none of their 148 * metadata goes through the journal. So in general, we should disallow all 149 * glocks that are journaled, and allow all the others. One exception is: 150 * we need to allow our active journal to be promoted and demoted so others 151 * may recover it and we can reacquire it when they're done. 152 */ 153 static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) 154 { 155 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 156 157 if (!gfs2_withdrawing_or_withdrawn(sdp)) 158 return false; 159 if (gl->gl_ops->go_flags & GLOF_NONDISK) 160 return false; 161 if (!sdp->sd_jdesc || 162 gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr) 163 return false; 164 return true; 165 } 166 167 static void __gfs2_glock_free(struct gfs2_glock *gl) 168 { 169 rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); 170 smp_mb(); 171 wake_up_glock(gl); 172 call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); 173 } 174 175 void gfs2_glock_free(struct gfs2_glock *gl) { 176 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 177 178 __gfs2_glock_free(gl); 179 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 180 wake_up(&sdp->sd_kill_wait); 181 } 182 183 void gfs2_glock_free_later(struct gfs2_glock *gl) { 184 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 185 186 spin_lock(&lru_lock); 187 list_add(&gl->gl_lru, &sdp->sd_dead_glocks); 188 spin_unlock(&lru_lock); 189 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 190 wake_up(&sdp->sd_kill_wait); 191 } 192 193 static void gfs2_free_dead_glocks(struct gfs2_sbd *sdp) 194 { 195 struct list_head *list = &sdp->sd_dead_glocks; 196 197 while(!list_empty(list)) { 198 struct gfs2_glock *gl; 199 200 gl = list_first_entry(list, struct gfs2_glock, gl_lru); 201 list_del_init(&gl->gl_lru); 202 __gfs2_glock_free(gl); 203 } 204 } 205 206 /** 207 * gfs2_glock_hold() - increment reference count on glock 208 * @gl: The glock to hold 209 * 210 */ 211 212 struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl) 213 { 214 GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); 215 lockref_get(&gl->gl_lockref); 216 return gl; 217 } 218 219 static void gfs2_glock_add_to_lru(struct gfs2_glock *gl) 220 { 221 spin_lock(&lru_lock); 222 list_move_tail(&gl->gl_lru, &lru_list); 223 224 if (!test_bit(GLF_LRU, &gl->gl_flags)) { 225 set_bit(GLF_LRU, &gl->gl_flags); 226 atomic_inc(&lru_count); 227 } 228 229 spin_unlock(&lru_lock); 230 } 231 232 static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) 233 { 234 spin_lock(&lru_lock); 235 if (test_bit(GLF_LRU, &gl->gl_flags)) { 236 list_del_init(&gl->gl_lru); 237 atomic_dec(&lru_count); 238 clear_bit(GLF_LRU, &gl->gl_flags); 239 } 240 spin_unlock(&lru_lock); 241 } 242 243 /* 244 * Enqueue the glock on the work queue. Passes one glock reference on to the 245 * work queue. 246 */ 247 static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { 248 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 249 250 if (!queue_delayed_work(sdp->sd_glock_wq, &gl->gl_work, delay)) { 251 /* 252 * We are holding the lockref spinlock, and the work was still 253 * queued above. The queued work (glock_work_func) takes that 254 * spinlock before dropping its glock reference(s), so it 255 * cannot have dropped them in the meantime. 256 */ 257 GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2); 258 gl->gl_lockref.count--; 259 } 260 } 261 262 static void __gfs2_glock_put(struct gfs2_glock *gl) 263 { 264 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 265 struct address_space *mapping = gfs2_glock2aspace(gl); 266 267 lockref_mark_dead(&gl->gl_lockref); 268 spin_unlock(&gl->gl_lockref.lock); 269 gfs2_glock_remove_from_lru(gl); 270 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 271 if (mapping) { 272 truncate_inode_pages_final(mapping); 273 if (!gfs2_withdrawing_or_withdrawn(sdp)) 274 GLOCK_BUG_ON(gl, !mapping_empty(mapping)); 275 } 276 trace_gfs2_glock_put(gl); 277 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); 278 } 279 280 static bool __gfs2_glock_put_or_lock(struct gfs2_glock *gl) 281 { 282 if (lockref_put_or_lock(&gl->gl_lockref)) 283 return true; 284 GLOCK_BUG_ON(gl, gl->gl_lockref.count != 1); 285 if (gl->gl_state != LM_ST_UNLOCKED) { 286 gl->gl_lockref.count--; 287 gfs2_glock_add_to_lru(gl); 288 spin_unlock(&gl->gl_lockref.lock); 289 return true; 290 } 291 return false; 292 } 293 294 /** 295 * gfs2_glock_put() - Decrement reference count on glock 296 * @gl: The glock to put 297 * 298 */ 299 300 void gfs2_glock_put(struct gfs2_glock *gl) 301 { 302 if (__gfs2_glock_put_or_lock(gl)) 303 return; 304 305 __gfs2_glock_put(gl); 306 } 307 308 /* 309 * gfs2_glock_put_async - Decrement reference count without sleeping 310 * @gl: The glock to put 311 * 312 * Decrement the reference count on glock immediately unless it is the last 313 * reference. Defer putting the last reference to work queue context. 314 */ 315 void gfs2_glock_put_async(struct gfs2_glock *gl) 316 { 317 if (__gfs2_glock_put_or_lock(gl)) 318 return; 319 320 gfs2_glock_queue_work(gl, 0); 321 spin_unlock(&gl->gl_lockref.lock); 322 } 323 324 /** 325 * may_grant - check if it's ok to grant a new lock 326 * @gl: The glock 327 * @current_gh: One of the current holders of @gl 328 * @gh: The lock request which we wish to grant 329 * 330 * With our current compatibility rules, if a glock has one or more active 331 * holders (HIF_HOLDER flag set), any of those holders can be passed in as 332 * @current_gh; they are all the same as far as compatibility with the new @gh 333 * goes. 334 * 335 * Returns true if it's ok to grant the lock. 336 */ 337 338 static inline bool may_grant(struct gfs2_glock *gl, 339 struct gfs2_holder *current_gh, 340 struct gfs2_holder *gh) 341 { 342 if (current_gh) { 343 GLOCK_BUG_ON(gl, !test_bit(HIF_HOLDER, ¤t_gh->gh_iflags)); 344 345 switch(current_gh->gh_state) { 346 case LM_ST_EXCLUSIVE: 347 /* 348 * Here we make a special exception to grant holders 349 * who agree to share the EX lock with other holders 350 * who also have the bit set. If the original holder 351 * has the LM_FLAG_NODE_SCOPE bit set, we grant more 352 * holders with the bit set. 353 */ 354 return gh->gh_state == LM_ST_EXCLUSIVE && 355 (current_gh->gh_flags & LM_FLAG_NODE_SCOPE) && 356 (gh->gh_flags & LM_FLAG_NODE_SCOPE); 357 358 case LM_ST_SHARED: 359 case LM_ST_DEFERRED: 360 return gh->gh_state == current_gh->gh_state; 361 362 default: 363 return false; 364 } 365 } 366 367 if (gl->gl_state == gh->gh_state) 368 return true; 369 if (gh->gh_flags & GL_EXACT) 370 return false; 371 if (gl->gl_state == LM_ST_EXCLUSIVE) { 372 return gh->gh_state == LM_ST_SHARED || 373 gh->gh_state == LM_ST_DEFERRED; 374 } 375 if (gh->gh_flags & LM_FLAG_ANY) 376 return gl->gl_state != LM_ST_UNLOCKED; 377 return false; 378 } 379 380 static void gfs2_holder_wake(struct gfs2_holder *gh) 381 { 382 clear_bit(HIF_WAIT, &gh->gh_iflags); 383 smp_mb__after_atomic(); 384 wake_up_bit(&gh->gh_iflags, HIF_WAIT); 385 if (gh->gh_flags & GL_ASYNC) { 386 struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd; 387 388 wake_up(&sdp->sd_async_glock_wait); 389 } 390 } 391 392 /** 393 * do_error - Something unexpected has happened during a lock request 394 * @gl: The glock 395 * @ret: The status from the DLM 396 */ 397 398 static void do_error(struct gfs2_glock *gl, const int ret) 399 { 400 struct gfs2_holder *gh, *tmp; 401 402 list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { 403 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 404 continue; 405 if (ret & LM_OUT_ERROR) 406 gh->gh_error = -EIO; 407 else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) 408 gh->gh_error = GLR_TRYFAILED; 409 else 410 continue; 411 list_del_init(&gh->gh_list); 412 trace_gfs2_glock_queue(gh, 0); 413 gfs2_holder_wake(gh); 414 } 415 } 416 417 /** 418 * find_first_holder - find the first "holder" gh 419 * @gl: the glock 420 */ 421 422 static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) 423 { 424 struct gfs2_holder *gh; 425 426 if (!list_empty(&gl->gl_holders)) { 427 gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, 428 gh_list); 429 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 430 return gh; 431 } 432 return NULL; 433 } 434 435 /* 436 * gfs2_instantiate - Call the glops instantiate function 437 * @gh: The glock holder 438 * 439 * Returns: 0 if instantiate was successful, or error. 440 */ 441 int gfs2_instantiate(struct gfs2_holder *gh) 442 { 443 struct gfs2_glock *gl = gh->gh_gl; 444 const struct gfs2_glock_operations *glops = gl->gl_ops; 445 int ret; 446 447 again: 448 if (!test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags)) 449 goto done; 450 451 /* 452 * Since we unlock the lockref lock, we set a flag to indicate 453 * instantiate is in progress. 454 */ 455 if (test_and_set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) { 456 wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG, 457 TASK_UNINTERRUPTIBLE); 458 /* 459 * Here we just waited for a different instantiate to finish. 460 * But that may not have been successful, as when a process 461 * locks an inode glock _before_ it has an actual inode to 462 * instantiate into. So we check again. This process might 463 * have an inode to instantiate, so might be successful. 464 */ 465 goto again; 466 } 467 468 ret = glops->go_instantiate(gl); 469 if (!ret) 470 clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); 471 clear_and_wake_up_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags); 472 if (ret) 473 return ret; 474 475 done: 476 if (glops->go_held) 477 return glops->go_held(gh); 478 return 0; 479 } 480 481 /** 482 * do_promote - promote as many requests as possible on the current queue 483 * @gl: The glock 484 * 485 * Returns true on success (i.e., progress was made or there are no waiters). 486 */ 487 488 static bool do_promote(struct gfs2_glock *gl) 489 { 490 struct gfs2_holder *gh, *current_gh; 491 492 current_gh = find_first_holder(gl); 493 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 494 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 495 continue; 496 if (!may_grant(gl, current_gh, gh)) { 497 /* 498 * If we get here, it means we may not grant this 499 * holder for some reason. If this holder is at the 500 * head of the list, it means we have a blocked holder 501 * at the head, so return false. 502 */ 503 if (list_is_first(&gh->gh_list, &gl->gl_holders)) 504 return false; 505 do_error(gl, 0); 506 break; 507 } 508 set_bit(HIF_HOLDER, &gh->gh_iflags); 509 trace_gfs2_promote(gh); 510 gfs2_holder_wake(gh); 511 if (!current_gh) 512 current_gh = gh; 513 } 514 return true; 515 } 516 517 /** 518 * find_first_waiter - find the first gh that's waiting for the glock 519 * @gl: the glock 520 */ 521 522 static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) 523 { 524 struct gfs2_holder *gh; 525 526 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 527 if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) 528 return gh; 529 } 530 return NULL; 531 } 532 533 /** 534 * find_last_waiter - find the last gh that's waiting for the glock 535 * @gl: the glock 536 * 537 * This also is a fast way of finding out if there are any waiters. 538 */ 539 540 static inline struct gfs2_holder *find_last_waiter(const struct gfs2_glock *gl) 541 { 542 struct gfs2_holder *gh; 543 544 if (list_empty(&gl->gl_holders)) 545 return NULL; 546 gh = list_last_entry(&gl->gl_holders, struct gfs2_holder, gh_list); 547 return test_bit(HIF_HOLDER, &gh->gh_iflags) ? NULL : gh; 548 } 549 550 /** 551 * state_change - record that the glock is now in a different state 552 * @gl: the glock 553 * @new_state: the new state 554 */ 555 556 static void state_change(struct gfs2_glock *gl, unsigned int new_state) 557 { 558 if (new_state != gl->gl_target) 559 /* shorten our minimum hold time */ 560 gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, 561 GL_GLOCK_MIN_HOLD); 562 gl->gl_state = new_state; 563 gl->gl_tchange = jiffies; 564 } 565 566 static void gfs2_set_demote(int nr, struct gfs2_glock *gl) 567 { 568 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 569 570 set_bit(nr, &gl->gl_flags); 571 smp_mb(); 572 wake_up(&sdp->sd_async_glock_wait); 573 } 574 575 static void gfs2_demote_wake(struct gfs2_glock *gl) 576 { 577 gl->gl_demote_state = LM_ST_EXCLUSIVE; 578 clear_bit(GLF_DEMOTE, &gl->gl_flags); 579 smp_mb__after_atomic(); 580 wake_up_bit(&gl->gl_flags, GLF_DEMOTE); 581 } 582 583 /** 584 * finish_xmote - The DLM has replied to one of our lock requests 585 * @gl: The glock 586 * @ret: The status from the DLM 587 * 588 */ 589 590 static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) 591 { 592 const struct gfs2_glock_operations *glops = gl->gl_ops; 593 struct gfs2_holder *gh; 594 unsigned state = ret & LM_OUT_ST_MASK; 595 596 trace_gfs2_glock_state_change(gl, state); 597 state_change(gl, state); 598 gh = find_first_waiter(gl); 599 600 /* Demote to UN request arrived during demote to SH or DF */ 601 if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && 602 state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) 603 gl->gl_target = LM_ST_UNLOCKED; 604 605 /* Check for state != intended state */ 606 if (unlikely(state != gl->gl_target)) { 607 if (gh && (ret & LM_OUT_CANCELED)) 608 gfs2_holder_wake(gh); 609 if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { 610 if (ret & LM_OUT_CANCELED) { 611 list_del_init(&gh->gh_list); 612 trace_gfs2_glock_queue(gh, 0); 613 gl->gl_target = gl->gl_state; 614 gh = find_first_waiter(gl); 615 if (gh) { 616 gl->gl_target = gh->gh_state; 617 if (do_promote(gl)) 618 goto out; 619 do_xmote(gl, gh, gl->gl_target); 620 return; 621 } 622 goto out; 623 } 624 /* Some error or failed "try lock" - report it */ 625 if ((ret & LM_OUT_ERROR) || 626 (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { 627 gl->gl_target = gl->gl_state; 628 do_error(gl, ret); 629 goto out; 630 } 631 } 632 switch(state) { 633 /* Unlocked due to conversion deadlock, try again */ 634 case LM_ST_UNLOCKED: 635 do_xmote(gl, gh, gl->gl_target); 636 break; 637 /* Conversion fails, unlock and try again */ 638 case LM_ST_SHARED: 639 case LM_ST_DEFERRED: 640 do_xmote(gl, gh, LM_ST_UNLOCKED); 641 break; 642 default: /* Everything else */ 643 fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n", 644 gl->gl_target, state); 645 GLOCK_BUG_ON(gl, 1); 646 } 647 return; 648 } 649 650 /* Fast path - we got what we asked for */ 651 if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) 652 gfs2_demote_wake(gl); 653 if (state != LM_ST_UNLOCKED) { 654 if (glops->go_xmote_bh) { 655 int rv; 656 657 spin_unlock(&gl->gl_lockref.lock); 658 rv = glops->go_xmote_bh(gl); 659 spin_lock(&gl->gl_lockref.lock); 660 if (rv) { 661 do_error(gl, rv); 662 goto out; 663 } 664 } 665 do_promote(gl); 666 } 667 out: 668 if (!test_bit(GLF_CANCELING, &gl->gl_flags)) 669 clear_bit(GLF_LOCK, &gl->gl_flags); 670 } 671 672 static bool is_system_glock(struct gfs2_glock *gl) 673 { 674 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 675 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 676 677 if (gl == m_ip->i_gl) 678 return true; 679 return false; 680 } 681 682 /** 683 * do_xmote - Calls the DLM to change the state of a lock 684 * @gl: The lock state 685 * @gh: The holder (only for promotes) 686 * @target: The target lock state 687 * 688 */ 689 690 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, 691 unsigned int target) 692 __releases(&gl->gl_lockref.lock) 693 __acquires(&gl->gl_lockref.lock) 694 { 695 const struct gfs2_glock_operations *glops = gl->gl_ops; 696 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 697 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 698 unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); 699 int ret; 700 701 if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && 702 gh && !(gh->gh_flags & LM_FLAG_NOEXP)) 703 goto skip_inval; 704 705 lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP); 706 GLOCK_BUG_ON(gl, gl->gl_state == target); 707 GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); 708 if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && 709 glops->go_inval) { 710 /* 711 * If another process is already doing the invalidate, let that 712 * finish first. The glock state machine will get back to this 713 * holder again later. 714 */ 715 if (test_and_set_bit(GLF_INVALIDATE_IN_PROGRESS, 716 &gl->gl_flags)) 717 return; 718 do_error(gl, 0); /* Fail queued try locks */ 719 } 720 gl->gl_req = target; 721 set_bit(GLF_BLOCKING, &gl->gl_flags); 722 if ((gl->gl_req == LM_ST_UNLOCKED) || 723 (gl->gl_state == LM_ST_EXCLUSIVE) || 724 (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) 725 clear_bit(GLF_BLOCKING, &gl->gl_flags); 726 if (!glops->go_inval && !glops->go_sync) 727 goto skip_inval; 728 729 spin_unlock(&gl->gl_lockref.lock); 730 if (glops->go_sync) { 731 ret = glops->go_sync(gl); 732 /* If we had a problem syncing (due to io errors or whatever, 733 * we should not invalidate the metadata or tell dlm to 734 * release the glock to other nodes. 735 */ 736 if (ret) { 737 if (cmpxchg(&sdp->sd_log_error, 0, ret)) { 738 fs_err(sdp, "Error %d syncing glock \n", ret); 739 gfs2_dump_glock(NULL, gl, true); 740 } 741 spin_lock(&gl->gl_lockref.lock); 742 goto skip_inval; 743 } 744 } 745 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { 746 /* 747 * The call to go_sync should have cleared out the ail list. 748 * If there are still items, we have a problem. We ought to 749 * withdraw, but we can't because the withdraw code also uses 750 * glocks. Warn about the error, dump the glock, then fall 751 * through and wait for logd to do the withdraw for us. 752 */ 753 if ((atomic_read(&gl->gl_ail_count) != 0) && 754 (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { 755 gfs2_glock_assert_warn(gl, 756 !atomic_read(&gl->gl_ail_count)); 757 gfs2_dump_glock(NULL, gl, true); 758 } 759 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); 760 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 761 } 762 spin_lock(&gl->gl_lockref.lock); 763 764 skip_inval: 765 gl->gl_lockref.count++; 766 /* 767 * Check for an error encountered since we called go_sync and go_inval. 768 * If so, we can't withdraw from the glock code because the withdraw 769 * code itself uses glocks (see function signal_our_withdraw) to 770 * change the mount to read-only. Most importantly, we must not call 771 * dlm to unlock the glock until the journal is in a known good state 772 * (after journal replay) otherwise other nodes may use the object 773 * (rgrp or dinode) and then later, journal replay will corrupt the 774 * file system. The best we can do here is wait for the logd daemon 775 * to see sd_log_error and withdraw, and in the meantime, requeue the 776 * work for later. 777 * 778 * We make a special exception for some system glocks, such as the 779 * system statfs inode glock, which needs to be granted before the 780 * gfs2_quotad daemon can exit, and that exit needs to finish before 781 * we can unmount the withdrawn file system. 782 * 783 * However, if we're just unlocking the lock (say, for unmount, when 784 * gfs2_gl_hash_clear calls clear_glock) and recovery is complete 785 * then it's okay to tell dlm to unlock it. 786 */ 787 if (unlikely(sdp->sd_log_error) && !gfs2_withdrawing_or_withdrawn(sdp)) 788 gfs2_withdraw_delayed(sdp); 789 if (glock_blocked_by_withdraw(gl) && 790 (target != LM_ST_UNLOCKED || 791 test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { 792 if (!is_system_glock(gl)) { 793 request_demote(gl, LM_ST_UNLOCKED, 0, false); 794 /* 795 * Ordinarily, we would call dlm and its callback would call 796 * finish_xmote, which would call state_change() to the new state. 797 * Since we withdrew, we won't call dlm, so call state_change 798 * manually, but to the UNLOCKED state we desire. 799 */ 800 state_change(gl, LM_ST_UNLOCKED); 801 /* 802 * We skip telling dlm to do the locking, so we won't get a 803 * reply that would otherwise clear GLF_LOCK. So we clear it here. 804 */ 805 clear_bit(GLF_LOCK, &gl->gl_flags); 806 clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); 807 gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); 808 return; 809 } else { 810 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 811 } 812 } 813 814 if (ls->ls_ops->lm_lock) { 815 set_bit(GLF_PENDING_REPLY, &gl->gl_flags); 816 spin_unlock(&gl->gl_lockref.lock); 817 ret = ls->ls_ops->lm_lock(gl, target, lck_flags); 818 spin_lock(&gl->gl_lockref.lock); 819 820 if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && 821 target == LM_ST_UNLOCKED && 822 test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { 823 /* 824 * The lockspace has been released and the lock has 825 * been unlocked implicitly. 826 */ 827 } else if (ret) { 828 fs_err(sdp, "lm_lock ret %d\n", ret); 829 target = gl->gl_state | LM_OUT_ERROR; 830 } else { 831 /* The operation will be completed asynchronously. */ 832 return; 833 } 834 clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); 835 } 836 837 /* Complete the operation now. */ 838 finish_xmote(gl, target); 839 gfs2_glock_queue_work(gl, 0); 840 } 841 842 /** 843 * run_queue - do all outstanding tasks related to a glock 844 * @gl: The glock in question 845 * @nonblock: True if we must not block in run_queue 846 * 847 */ 848 849 static void run_queue(struct gfs2_glock *gl, const int nonblock) 850 __releases(&gl->gl_lockref.lock) 851 __acquires(&gl->gl_lockref.lock) 852 { 853 struct gfs2_holder *gh; 854 855 if (test_bit(GLF_LOCK, &gl->gl_flags)) 856 return; 857 set_bit(GLF_LOCK, &gl->gl_flags); 858 859 /* While a demote is in progress, the GLF_LOCK flag must be set. */ 860 GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); 861 862 if (test_bit(GLF_DEMOTE, &gl->gl_flags) && 863 gl->gl_demote_state != gl->gl_state) { 864 if (find_first_holder(gl)) 865 goto out_unlock; 866 if (nonblock) 867 goto out_sched; 868 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); 869 GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); 870 gl->gl_target = gl->gl_demote_state; 871 do_xmote(gl, NULL, gl->gl_target); 872 return; 873 } else { 874 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) 875 gfs2_demote_wake(gl); 876 if (do_promote(gl)) 877 goto out_unlock; 878 gh = find_first_waiter(gl); 879 if (!gh) 880 goto out_unlock; 881 gl->gl_target = gh->gh_state; 882 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 883 do_error(gl, 0); /* Fail queued try locks */ 884 do_xmote(gl, gh, gl->gl_target); 885 return; 886 } 887 888 out_sched: 889 clear_bit(GLF_LOCK, &gl->gl_flags); 890 smp_mb__after_atomic(); 891 gl->gl_lockref.count++; 892 gfs2_glock_queue_work(gl, 0); 893 return; 894 895 out_unlock: 896 clear_bit(GLF_LOCK, &gl->gl_flags); 897 smp_mb__after_atomic(); 898 } 899 900 /** 901 * glock_set_object - set the gl_object field of a glock 902 * @gl: the glock 903 * @object: the object 904 */ 905 void glock_set_object(struct gfs2_glock *gl, void *object) 906 { 907 void *prev_object; 908 909 spin_lock(&gl->gl_lockref.lock); 910 prev_object = gl->gl_object; 911 gl->gl_object = object; 912 spin_unlock(&gl->gl_lockref.lock); 913 if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) 914 gfs2_dump_glock(NULL, gl, true); 915 } 916 917 /** 918 * glock_clear_object - clear the gl_object field of a glock 919 * @gl: the glock 920 * @object: object the glock currently points at 921 */ 922 void glock_clear_object(struct gfs2_glock *gl, void *object) 923 { 924 void *prev_object; 925 926 spin_lock(&gl->gl_lockref.lock); 927 prev_object = gl->gl_object; 928 gl->gl_object = NULL; 929 spin_unlock(&gl->gl_lockref.lock); 930 if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == object)) 931 gfs2_dump_glock(NULL, gl, true); 932 } 933 934 void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation) 935 { 936 struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; 937 938 if (ri->ri_magic == 0) 939 ri->ri_magic = cpu_to_be32(GFS2_MAGIC); 940 if (ri->ri_magic == cpu_to_be32(GFS2_MAGIC)) 941 ri->ri_generation_deleted = cpu_to_be64(generation); 942 } 943 944 bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation) 945 { 946 struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; 947 948 if (ri->ri_magic != cpu_to_be32(GFS2_MAGIC)) 949 return false; 950 return generation <= be64_to_cpu(ri->ri_generation_deleted); 951 } 952 953 static void gfs2_glock_poke(struct gfs2_glock *gl) 954 { 955 int flags = LM_FLAG_TRY_1CB | LM_FLAG_ANY | GL_SKIP; 956 struct gfs2_holder gh; 957 int error; 958 959 __gfs2_holder_init(gl, LM_ST_SHARED, flags, &gh, _RET_IP_); 960 error = gfs2_glock_nq(&gh); 961 if (!error) 962 gfs2_glock_dq(&gh); 963 gfs2_holder_uninit(&gh); 964 } 965 966 static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl) 967 { 968 struct gfs2_inode *ip; 969 970 spin_lock(&gl->gl_lockref.lock); 971 ip = gl->gl_object; 972 if (ip && !igrab(&ip->i_inode)) 973 ip = NULL; 974 spin_unlock(&gl->gl_lockref.lock); 975 if (ip) { 976 wait_on_inode(&ip->i_inode); 977 if (is_bad_inode(&ip->i_inode)) { 978 iput(&ip->i_inode); 979 ip = NULL; 980 } 981 } 982 return ip; 983 } 984 985 static void gfs2_try_evict(struct gfs2_glock *gl) 986 { 987 struct gfs2_inode *ip; 988 989 /* 990 * If there is contention on the iopen glock and we have an inode, try 991 * to grab and release the inode so that it can be evicted. The 992 * GIF_DEFER_DELETE flag indicates to gfs2_evict_inode() that the inode 993 * should not be deleted locally. This will allow the remote node to 994 * go ahead and delete the inode without us having to do it, which will 995 * avoid rgrp glock thrashing. 996 * 997 * The remote node is likely still holding the corresponding inode 998 * glock, so it will run before we get to verify that the delete has 999 * happened below. (Verification is triggered by the call to 1000 * gfs2_queue_verify_delete() in gfs2_evict_inode().) 1001 */ 1002 ip = gfs2_grab_existing_inode(gl); 1003 if (ip) { 1004 set_bit(GLF_DEFER_DELETE, &gl->gl_flags); 1005 d_prune_aliases(&ip->i_inode); 1006 iput(&ip->i_inode); 1007 clear_bit(GLF_DEFER_DELETE, &gl->gl_flags); 1008 1009 /* If the inode was evicted, gl->gl_object will now be NULL. */ 1010 ip = gfs2_grab_existing_inode(gl); 1011 if (ip) { 1012 gfs2_glock_poke(ip->i_gl); 1013 iput(&ip->i_inode); 1014 } 1015 } 1016 } 1017 1018 bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) 1019 { 1020 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1021 1022 if (test_and_set_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) 1023 return false; 1024 return !mod_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, 0); 1025 } 1026 1027 bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later) 1028 { 1029 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1030 unsigned long delay; 1031 1032 if (test_and_set_bit(GLF_VERIFY_DELETE, &gl->gl_flags)) 1033 return false; 1034 delay = later ? HZ + get_random_long() % (HZ * 9) : 0; 1035 return queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, delay); 1036 } 1037 1038 static void delete_work_func(struct work_struct *work) 1039 { 1040 struct delayed_work *dwork = to_delayed_work(work); 1041 struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete); 1042 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1043 bool verify_delete = test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); 1044 1045 if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) 1046 gfs2_try_evict(gl); 1047 1048 if (verify_delete) { 1049 u64 no_addr = gl->gl_name.ln_number; 1050 struct inode *inode; 1051 1052 inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, 1053 GFS2_BLKST_UNLINKED); 1054 if (IS_ERR(inode)) { 1055 if (PTR_ERR(inode) == -EAGAIN && 1056 !test_bit(SDF_KILL, &sdp->sd_flags) && 1057 gfs2_queue_verify_delete(gl, true)) 1058 return; 1059 } else { 1060 d_prune_aliases(inode); 1061 iput(inode); 1062 } 1063 } 1064 1065 gfs2_glock_put(gl); 1066 } 1067 1068 static void glock_work_func(struct work_struct *work) 1069 { 1070 unsigned long delay = 0; 1071 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 1072 unsigned int drop_refs = 1; 1073 1074 spin_lock(&gl->gl_lockref.lock); 1075 if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags)) { 1076 clear_bit(GLF_HAVE_REPLY, &gl->gl_flags); 1077 finish_xmote(gl, gl->gl_reply); 1078 drop_refs++; 1079 } 1080 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1081 gl->gl_state != LM_ST_UNLOCKED && 1082 gl->gl_demote_state != LM_ST_EXCLUSIVE) { 1083 if (gl->gl_name.ln_type == LM_TYPE_INODE) { 1084 unsigned long holdtime, now = jiffies; 1085 1086 holdtime = gl->gl_tchange + gl->gl_hold_time; 1087 if (time_before(now, holdtime)) 1088 delay = holdtime - now; 1089 } 1090 1091 if (!delay) { 1092 clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); 1093 gfs2_set_demote(GLF_DEMOTE, gl); 1094 } 1095 } 1096 run_queue(gl, 0); 1097 if (delay) { 1098 /* Keep one glock reference for the work we requeue. */ 1099 drop_refs--; 1100 gfs2_glock_queue_work(gl, delay); 1101 } 1102 1103 /* Drop the remaining glock references manually. */ 1104 GLOCK_BUG_ON(gl, gl->gl_lockref.count < drop_refs); 1105 gl->gl_lockref.count -= drop_refs; 1106 if (!gl->gl_lockref.count) { 1107 if (gl->gl_state == LM_ST_UNLOCKED) { 1108 __gfs2_glock_put(gl); 1109 return; 1110 } 1111 gfs2_glock_add_to_lru(gl); 1112 } 1113 spin_unlock(&gl->gl_lockref.lock); 1114 } 1115 1116 static struct gfs2_glock *find_insert_glock(struct lm_lockname *name, 1117 struct gfs2_glock *new) 1118 { 1119 struct wait_glock_queue wait; 1120 wait_queue_head_t *wq = glock_waitqueue(name); 1121 struct gfs2_glock *gl; 1122 1123 wait.name = name; 1124 init_wait(&wait.wait); 1125 wait.wait.func = glock_wake_function; 1126 1127 again: 1128 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1129 rcu_read_lock(); 1130 if (new) { 1131 gl = rhashtable_lookup_get_insert_fast(&gl_hash_table, 1132 &new->gl_node, ht_parms); 1133 if (IS_ERR(gl)) 1134 goto out; 1135 } else { 1136 gl = rhashtable_lookup_fast(&gl_hash_table, 1137 name, ht_parms); 1138 } 1139 if (gl && !lockref_get_not_dead(&gl->gl_lockref)) { 1140 rcu_read_unlock(); 1141 schedule(); 1142 goto again; 1143 } 1144 out: 1145 rcu_read_unlock(); 1146 finish_wait(wq, &wait.wait); 1147 if (gl) 1148 gfs2_glock_remove_from_lru(gl); 1149 return gl; 1150 } 1151 1152 /** 1153 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 1154 * @sdp: The GFS2 superblock 1155 * @number: the lock number 1156 * @glops: The glock_operations to use 1157 * @create: If 0, don't create the glock if it doesn't exist 1158 * @glp: the glock is returned here 1159 * 1160 * This does not lock a glock, just finds/creates structures for one. 1161 * 1162 * Returns: errno 1163 */ 1164 1165 int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, 1166 const struct gfs2_glock_operations *glops, int create, 1167 struct gfs2_glock **glp) 1168 { 1169 struct super_block *s = sdp->sd_vfs; 1170 struct lm_lockname name = { .ln_number = number, 1171 .ln_type = glops->go_type, 1172 .ln_sbd = sdp }; 1173 struct gfs2_glock *gl, *tmp; 1174 struct address_space *mapping; 1175 1176 gl = find_insert_glock(&name, NULL); 1177 if (gl) 1178 goto found; 1179 if (!create) 1180 return -ENOENT; 1181 1182 if (glops->go_flags & GLOF_ASPACE) { 1183 struct gfs2_glock_aspace *gla = 1184 kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_NOFS); 1185 if (!gla) 1186 return -ENOMEM; 1187 gl = &gla->glock; 1188 } else { 1189 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_NOFS); 1190 if (!gl) 1191 return -ENOMEM; 1192 } 1193 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 1194 gl->gl_ops = glops; 1195 1196 if (glops->go_flags & GLOF_LVB) { 1197 gl->gl_lksb.sb_lvbptr = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); 1198 if (!gl->gl_lksb.sb_lvbptr) { 1199 gfs2_glock_dealloc(&gl->gl_rcu); 1200 return -ENOMEM; 1201 } 1202 } 1203 1204 atomic_inc(&sdp->sd_glock_disposal); 1205 gl->gl_node.next = NULL; 1206 gl->gl_flags = BIT(GLF_INITIAL); 1207 if (glops->go_instantiate) 1208 gl->gl_flags |= BIT(GLF_INSTANTIATE_NEEDED); 1209 gl->gl_name = name; 1210 lockref_init(&gl->gl_lockref); 1211 lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); 1212 gl->gl_state = LM_ST_UNLOCKED; 1213 gl->gl_target = LM_ST_UNLOCKED; 1214 gl->gl_demote_state = LM_ST_EXCLUSIVE; 1215 gl->gl_dstamp = 0; 1216 preempt_disable(); 1217 /* We use the global stats to estimate the initial per-glock stats */ 1218 gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type]; 1219 preempt_enable(); 1220 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; 1221 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; 1222 gl->gl_tchange = jiffies; 1223 gl->gl_object = NULL; 1224 gl->gl_hold_time = GL_GLOCK_DFT_HOLD; 1225 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 1226 if (gl->gl_name.ln_type == LM_TYPE_IOPEN) 1227 INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func); 1228 1229 mapping = gfs2_glock2aspace(gl); 1230 if (mapping) { 1231 mapping->a_ops = &gfs2_meta_aops; 1232 mapping->host = s->s_bdev->bd_mapping->host; 1233 mapping->flags = 0; 1234 mapping_set_gfp_mask(mapping, GFP_NOFS); 1235 mapping->i_private_data = NULL; 1236 mapping->writeback_index = 0; 1237 } 1238 1239 tmp = find_insert_glock(&name, gl); 1240 if (tmp) { 1241 gfs2_glock_dealloc(&gl->gl_rcu); 1242 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 1243 wake_up(&sdp->sd_kill_wait); 1244 1245 if (IS_ERR(tmp)) 1246 return PTR_ERR(tmp); 1247 gl = tmp; 1248 } 1249 1250 found: 1251 *glp = gl; 1252 return 0; 1253 } 1254 1255 /** 1256 * __gfs2_holder_init - initialize a struct gfs2_holder in the default way 1257 * @gl: the glock 1258 * @state: the state we're requesting 1259 * @flags: the modifier flags 1260 * @gh: the holder structure 1261 * 1262 */ 1263 1264 void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, 1265 struct gfs2_holder *gh, unsigned long ip) 1266 { 1267 INIT_LIST_HEAD(&gh->gh_list); 1268 gh->gh_gl = gfs2_glock_hold(gl); 1269 gh->gh_ip = ip; 1270 gh->gh_owner_pid = get_pid(task_pid(current)); 1271 gh->gh_state = state; 1272 gh->gh_flags = flags; 1273 gh->gh_iflags = 0; 1274 } 1275 1276 /** 1277 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it 1278 * @state: the state we're requesting 1279 * @flags: the modifier flags 1280 * @gh: the holder structure 1281 * 1282 * Don't mess with the glock. 1283 * 1284 */ 1285 1286 void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh) 1287 { 1288 gh->gh_state = state; 1289 gh->gh_flags = flags; 1290 gh->gh_iflags = 0; 1291 gh->gh_ip = _RET_IP_; 1292 put_pid(gh->gh_owner_pid); 1293 gh->gh_owner_pid = get_pid(task_pid(current)); 1294 } 1295 1296 /** 1297 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference) 1298 * @gh: the holder structure 1299 * 1300 */ 1301 1302 void gfs2_holder_uninit(struct gfs2_holder *gh) 1303 { 1304 put_pid(gh->gh_owner_pid); 1305 gfs2_glock_put(gh->gh_gl); 1306 gfs2_holder_mark_uninitialized(gh); 1307 gh->gh_ip = 0; 1308 } 1309 1310 static void gfs2_glock_update_hold_time(struct gfs2_glock *gl, 1311 unsigned long start_time) 1312 { 1313 /* Have we waited longer that a second? */ 1314 if (time_after(jiffies, start_time + HZ)) { 1315 /* Lengthen the minimum hold time. */ 1316 gl->gl_hold_time = min(gl->gl_hold_time + GL_GLOCK_HOLD_INCR, 1317 GL_GLOCK_MAX_HOLD); 1318 } 1319 } 1320 1321 /** 1322 * gfs2_glock_holder_ready - holder is ready and its error code can be collected 1323 * @gh: the glock holder 1324 * 1325 * Called when a glock holder no longer needs to be waited for because it is 1326 * now either held (HIF_HOLDER set; gh_error == 0), or acquiring the lock has 1327 * failed (gh_error != 0). 1328 */ 1329 1330 int gfs2_glock_holder_ready(struct gfs2_holder *gh) 1331 { 1332 if (gh->gh_error || (gh->gh_flags & GL_SKIP)) 1333 return gh->gh_error; 1334 gh->gh_error = gfs2_instantiate(gh); 1335 if (gh->gh_error) 1336 gfs2_glock_dq(gh); 1337 return gh->gh_error; 1338 } 1339 1340 /** 1341 * gfs2_glock_wait - wait on a glock acquisition 1342 * @gh: the glock holder 1343 * 1344 * Returns: 0 on success 1345 */ 1346 1347 int gfs2_glock_wait(struct gfs2_holder *gh) 1348 { 1349 unsigned long start_time = jiffies; 1350 1351 might_sleep(); 1352 wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); 1353 gfs2_glock_update_hold_time(gh->gh_gl, start_time); 1354 return gfs2_glock_holder_ready(gh); 1355 } 1356 1357 static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs) 1358 { 1359 int i; 1360 1361 for (i = 0; i < num_gh; i++) 1362 if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) 1363 return 1; 1364 return 0; 1365 } 1366 1367 /** 1368 * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions 1369 * @num_gh: the number of holders in the array 1370 * @ghs: the glock holder array 1371 * 1372 * Returns: 0 on success, meaning all glocks have been granted and are held. 1373 * -ESTALE if the request timed out, meaning all glocks were released, 1374 * and the caller should retry the operation. 1375 */ 1376 1377 int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs) 1378 { 1379 struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd; 1380 int i, ret = 0, timeout = 0; 1381 unsigned long start_time = jiffies; 1382 1383 might_sleep(); 1384 /* 1385 * Total up the (minimum hold time * 2) of all glocks and use that to 1386 * determine the max amount of time we should wait. 1387 */ 1388 for (i = 0; i < num_gh; i++) 1389 timeout += ghs[i].gh_gl->gl_hold_time << 1; 1390 1391 if (!wait_event_timeout(sdp->sd_async_glock_wait, 1392 !glocks_pending(num_gh, ghs), timeout)) { 1393 ret = -ESTALE; /* request timed out. */ 1394 goto out; 1395 } 1396 1397 for (i = 0; i < num_gh; i++) { 1398 struct gfs2_holder *gh = &ghs[i]; 1399 int ret2; 1400 1401 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) { 1402 gfs2_glock_update_hold_time(gh->gh_gl, 1403 start_time); 1404 } 1405 ret2 = gfs2_glock_holder_ready(gh); 1406 if (!ret) 1407 ret = ret2; 1408 } 1409 1410 out: 1411 if (ret) { 1412 for (i = 0; i < num_gh; i++) { 1413 struct gfs2_holder *gh = &ghs[i]; 1414 1415 gfs2_glock_dq(gh); 1416 } 1417 } 1418 return ret; 1419 } 1420 1421 /** 1422 * request_demote - process a demote request 1423 * @gl: the glock 1424 * @state: the state the caller wants us to change to 1425 * @delay: zero to demote immediately; otherwise pending demote 1426 * @remote: true if this came from a different cluster node 1427 * 1428 * There are only two requests that we are going to see in actual 1429 * practise: LM_ST_SHARED and LM_ST_UNLOCKED 1430 */ 1431 1432 static void request_demote(struct gfs2_glock *gl, unsigned int state, 1433 unsigned long delay, bool remote) 1434 { 1435 gfs2_set_demote(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, gl); 1436 if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { 1437 gl->gl_demote_state = state; 1438 gl->gl_demote_time = jiffies; 1439 } else if (gl->gl_demote_state != LM_ST_UNLOCKED && 1440 gl->gl_demote_state != state) { 1441 gl->gl_demote_state = LM_ST_UNLOCKED; 1442 } 1443 if (gl->gl_ops->go_callback) 1444 gl->gl_ops->go_callback(gl, remote); 1445 trace_gfs2_demote_rq(gl, remote); 1446 } 1447 1448 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) 1449 { 1450 struct va_format vaf; 1451 va_list args; 1452 1453 va_start(args, fmt); 1454 1455 if (seq) { 1456 seq_vprintf(seq, fmt, args); 1457 } else { 1458 vaf.fmt = fmt; 1459 vaf.va = &args; 1460 1461 pr_err("%pV", &vaf); 1462 } 1463 1464 va_end(args); 1465 } 1466 1467 static inline bool pid_is_meaningful(const struct gfs2_holder *gh) 1468 { 1469 if (!(gh->gh_flags & GL_NOPID)) 1470 return true; 1471 return !test_bit(HIF_HOLDER, &gh->gh_iflags); 1472 } 1473 1474 /** 1475 * add_to_queue - Add a holder to the wait queue (but look for recursion) 1476 * @gh: the holder structure to add 1477 * 1478 * Eventually we should move the recursive locking trap to a 1479 * debugging option or something like that. This is the fast 1480 * path and needs to have the minimum number of distractions. 1481 * 1482 */ 1483 1484 static inline void add_to_queue(struct gfs2_holder *gh) 1485 __releases(&gl->gl_lockref.lock) 1486 __acquires(&gl->gl_lockref.lock) 1487 { 1488 struct gfs2_glock *gl = gh->gh_gl; 1489 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1490 struct gfs2_holder *gh2; 1491 int try_futile = 0; 1492 1493 GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL); 1494 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) 1495 GLOCK_BUG_ON(gl, true); 1496 1497 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { 1498 if (test_bit(GLF_LOCK, &gl->gl_flags)) { 1499 struct gfs2_holder *current_gh; 1500 1501 current_gh = find_first_holder(gl); 1502 try_futile = !may_grant(gl, current_gh, gh); 1503 } 1504 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) 1505 goto fail; 1506 } 1507 1508 list_for_each_entry(gh2, &gl->gl_holders, gh_list) { 1509 if (likely(gh2->gh_owner_pid != gh->gh_owner_pid)) 1510 continue; 1511 if (gh->gh_gl->gl_ops->go_type == LM_TYPE_FLOCK) 1512 continue; 1513 if (!pid_is_meaningful(gh2)) 1514 continue; 1515 goto trap_recursive; 1516 } 1517 list_for_each_entry(gh2, &gl->gl_holders, gh_list) { 1518 if (try_futile && 1519 !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { 1520 fail: 1521 gh->gh_error = GLR_TRYFAILED; 1522 gfs2_holder_wake(gh); 1523 return; 1524 } 1525 } 1526 trace_gfs2_glock_queue(gh, 1); 1527 gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT); 1528 gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT); 1529 list_add_tail(&gh->gh_list, &gl->gl_holders); 1530 return; 1531 1532 trap_recursive: 1533 fs_err(sdp, "original: %pSR\n", (void *)gh2->gh_ip); 1534 fs_err(sdp, "pid: %d\n", pid_nr(gh2->gh_owner_pid)); 1535 fs_err(sdp, "lock type: %d req lock state : %d\n", 1536 gh2->gh_gl->gl_name.ln_type, gh2->gh_state); 1537 fs_err(sdp, "new: %pSR\n", (void *)gh->gh_ip); 1538 fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid)); 1539 fs_err(sdp, "lock type: %d req lock state : %d\n", 1540 gh->gh_gl->gl_name.ln_type, gh->gh_state); 1541 gfs2_dump_glock(NULL, gl, true); 1542 BUG(); 1543 } 1544 1545 /** 1546 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock) 1547 * @gh: the holder structure 1548 * 1549 * if (gh->gh_flags & GL_ASYNC), this never returns an error 1550 * 1551 * Returns: 0, GLR_TRYFAILED, or errno on failure 1552 */ 1553 1554 int gfs2_glock_nq(struct gfs2_holder *gh) 1555 { 1556 struct gfs2_glock *gl = gh->gh_gl; 1557 int error; 1558 1559 if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP)) 1560 return -EIO; 1561 1562 if (gh->gh_flags & GL_NOBLOCK) { 1563 struct gfs2_holder *current_gh; 1564 1565 error = -ECHILD; 1566 spin_lock(&gl->gl_lockref.lock); 1567 if (find_last_waiter(gl)) 1568 goto unlock; 1569 current_gh = find_first_holder(gl); 1570 if (!may_grant(gl, current_gh, gh)) 1571 goto unlock; 1572 set_bit(HIF_HOLDER, &gh->gh_iflags); 1573 list_add_tail(&gh->gh_list, &gl->gl_holders); 1574 trace_gfs2_promote(gh); 1575 error = 0; 1576 unlock: 1577 spin_unlock(&gl->gl_lockref.lock); 1578 return error; 1579 } 1580 1581 gh->gh_error = 0; 1582 spin_lock(&gl->gl_lockref.lock); 1583 add_to_queue(gh); 1584 if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && 1585 test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags))) { 1586 set_bit(GLF_HAVE_REPLY, &gl->gl_flags); 1587 gl->gl_lockref.count++; 1588 gfs2_glock_queue_work(gl, 0); 1589 } 1590 run_queue(gl, 1); 1591 spin_unlock(&gl->gl_lockref.lock); 1592 1593 error = 0; 1594 if (!(gh->gh_flags & GL_ASYNC)) 1595 error = gfs2_glock_wait(gh); 1596 1597 return error; 1598 } 1599 1600 /** 1601 * gfs2_glock_poll - poll to see if an async request has been completed 1602 * @gh: the holder 1603 * 1604 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on 1605 */ 1606 1607 int gfs2_glock_poll(struct gfs2_holder *gh) 1608 { 1609 return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; 1610 } 1611 1612 static void __gfs2_glock_dq(struct gfs2_holder *gh) 1613 { 1614 struct gfs2_glock *gl = gh->gh_gl; 1615 unsigned delay = 0; 1616 int fast_path = 0; 1617 1618 /* 1619 * This holder should not be cached, so mark it for demote. 1620 * Note: this should be done before the glock_needs_demote 1621 * check below. 1622 */ 1623 if (gh->gh_flags & GL_NOCACHE) 1624 request_demote(gl, LM_ST_UNLOCKED, 0, false); 1625 1626 list_del_init(&gh->gh_list); 1627 clear_bit(HIF_HOLDER, &gh->gh_iflags); 1628 trace_gfs2_glock_queue(gh, 0); 1629 1630 /* 1631 * If there hasn't been a demote request we are done. 1632 * (Let the remaining holders, if any, keep holding it.) 1633 */ 1634 if (!glock_needs_demote(gl)) { 1635 if (list_empty(&gl->gl_holders)) 1636 fast_path = 1; 1637 } 1638 1639 if (unlikely(!fast_path)) { 1640 gl->gl_lockref.count++; 1641 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1642 !test_bit(GLF_DEMOTE, &gl->gl_flags) && 1643 gl->gl_name.ln_type == LM_TYPE_INODE) 1644 delay = gl->gl_hold_time; 1645 gfs2_glock_queue_work(gl, delay); 1646 } 1647 } 1648 1649 /** 1650 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock) 1651 * @gh: the glock holder 1652 * 1653 */ 1654 void gfs2_glock_dq(struct gfs2_holder *gh) 1655 { 1656 struct gfs2_glock *gl = gh->gh_gl; 1657 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1658 1659 spin_lock(&gl->gl_lockref.lock); 1660 if (!gfs2_holder_queued(gh)) { 1661 /* 1662 * May have already been dequeued because the locking request 1663 * was GL_ASYNC and it has failed in the meantime. 1664 */ 1665 goto out; 1666 } 1667 1668 if (list_is_first(&gh->gh_list, &gl->gl_holders) && 1669 !test_bit(HIF_HOLDER, &gh->gh_iflags) && 1670 test_bit(GLF_LOCK, &gl->gl_flags) && 1671 !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && 1672 !test_bit(GLF_CANCELING, &gl->gl_flags)) { 1673 set_bit(GLF_CANCELING, &gl->gl_flags); 1674 spin_unlock(&gl->gl_lockref.lock); 1675 gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl); 1676 wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); 1677 spin_lock(&gl->gl_lockref.lock); 1678 clear_bit(GLF_CANCELING, &gl->gl_flags); 1679 clear_bit(GLF_LOCK, &gl->gl_flags); 1680 if (!gfs2_holder_queued(gh)) 1681 goto out; 1682 } 1683 1684 /* 1685 * If we're in the process of file system withdraw, we cannot just 1686 * dequeue any glocks until our journal is recovered, lest we introduce 1687 * file system corruption. We need two exceptions to this rule: We need 1688 * to allow unlocking of nondisk glocks and the glock for our own 1689 * journal that needs recovery. 1690 */ 1691 if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && 1692 glock_blocked_by_withdraw(gl) && 1693 gh->gh_gl != sdp->sd_jinode_gl) { 1694 sdp->sd_glock_dqs_held++; 1695 spin_unlock(&gl->gl_lockref.lock); 1696 might_sleep(); 1697 wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, 1698 TASK_UNINTERRUPTIBLE); 1699 spin_lock(&gl->gl_lockref.lock); 1700 } 1701 1702 __gfs2_glock_dq(gh); 1703 out: 1704 spin_unlock(&gl->gl_lockref.lock); 1705 } 1706 1707 void gfs2_glock_dq_wait(struct gfs2_holder *gh) 1708 { 1709 struct gfs2_glock *gl = gh->gh_gl; 1710 gfs2_glock_dq(gh); 1711 might_sleep(); 1712 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); 1713 } 1714 1715 /** 1716 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it 1717 * @gh: the holder structure 1718 * 1719 */ 1720 1721 void gfs2_glock_dq_uninit(struct gfs2_holder *gh) 1722 { 1723 gfs2_glock_dq(gh); 1724 gfs2_holder_uninit(gh); 1725 } 1726 1727 /** 1728 * gfs2_glock_nq_num - acquire a glock based on lock number 1729 * @sdp: the filesystem 1730 * @number: the lock number 1731 * @glops: the glock operations for the type of glock 1732 * @state: the state to acquire the glock in 1733 * @flags: modifier flags for the acquisition 1734 * @gh: the struct gfs2_holder 1735 * 1736 * Returns: errno 1737 */ 1738 1739 int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, 1740 const struct gfs2_glock_operations *glops, 1741 unsigned int state, u16 flags, struct gfs2_holder *gh) 1742 { 1743 struct gfs2_glock *gl; 1744 int error; 1745 1746 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); 1747 if (!error) { 1748 error = gfs2_glock_nq_init(gl, state, flags, gh); 1749 gfs2_glock_put(gl); 1750 } 1751 1752 return error; 1753 } 1754 1755 /** 1756 * glock_compare - Compare two struct gfs2_glock structures for sorting 1757 * @arg_a: the first structure 1758 * @arg_b: the second structure 1759 * 1760 */ 1761 1762 static int glock_compare(const void *arg_a, const void *arg_b) 1763 { 1764 const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a; 1765 const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b; 1766 const struct lm_lockname *a = &gh_a->gh_gl->gl_name; 1767 const struct lm_lockname *b = &gh_b->gh_gl->gl_name; 1768 1769 if (a->ln_number > b->ln_number) 1770 return 1; 1771 if (a->ln_number < b->ln_number) 1772 return -1; 1773 BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type); 1774 return 0; 1775 } 1776 1777 /** 1778 * nq_m_sync - synchronously acquire more than one glock in deadlock free order 1779 * @num_gh: the number of structures 1780 * @ghs: an array of struct gfs2_holder structures 1781 * @p: placeholder for the holder structure to pass back 1782 * 1783 * Returns: 0 on success (all glocks acquired), 1784 * errno on failure (no glocks acquired) 1785 */ 1786 1787 static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, 1788 struct gfs2_holder **p) 1789 { 1790 unsigned int x; 1791 int error = 0; 1792 1793 for (x = 0; x < num_gh; x++) 1794 p[x] = &ghs[x]; 1795 1796 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL); 1797 1798 for (x = 0; x < num_gh; x++) { 1799 error = gfs2_glock_nq(p[x]); 1800 if (error) { 1801 while (x--) 1802 gfs2_glock_dq(p[x]); 1803 break; 1804 } 1805 } 1806 1807 return error; 1808 } 1809 1810 /** 1811 * gfs2_glock_nq_m - acquire multiple glocks 1812 * @num_gh: the number of structures 1813 * @ghs: an array of struct gfs2_holder structures 1814 * 1815 * Returns: 0 on success (all glocks acquired), 1816 * errno on failure (no glocks acquired) 1817 */ 1818 1819 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1820 { 1821 struct gfs2_holder *tmp[4]; 1822 struct gfs2_holder **pph = tmp; 1823 int error = 0; 1824 1825 switch(num_gh) { 1826 case 0: 1827 return 0; 1828 case 1: 1829 return gfs2_glock_nq(ghs); 1830 default: 1831 if (num_gh <= 4) 1832 break; 1833 pph = kmalloc_array(num_gh, sizeof(struct gfs2_holder *), 1834 GFP_NOFS); 1835 if (!pph) 1836 return -ENOMEM; 1837 } 1838 1839 error = nq_m_sync(num_gh, ghs, pph); 1840 1841 if (pph != tmp) 1842 kfree(pph); 1843 1844 return error; 1845 } 1846 1847 /** 1848 * gfs2_glock_dq_m - release multiple glocks 1849 * @num_gh: the number of structures 1850 * @ghs: an array of struct gfs2_holder structures 1851 * 1852 */ 1853 1854 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1855 { 1856 while (num_gh--) 1857 gfs2_glock_dq(&ghs[num_gh]); 1858 } 1859 1860 void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) 1861 { 1862 unsigned long delay = 0; 1863 1864 gfs2_glock_hold(gl); 1865 spin_lock(&gl->gl_lockref.lock); 1866 if (!list_empty(&gl->gl_holders) && 1867 gl->gl_name.ln_type == LM_TYPE_INODE) { 1868 unsigned long now = jiffies; 1869 unsigned long holdtime; 1870 1871 holdtime = gl->gl_tchange + gl->gl_hold_time; 1872 1873 if (time_before(now, holdtime)) 1874 delay = holdtime - now; 1875 if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags)) 1876 delay = gl->gl_hold_time; 1877 } 1878 request_demote(gl, state, delay, true); 1879 gfs2_glock_queue_work(gl, delay); 1880 spin_unlock(&gl->gl_lockref.lock); 1881 } 1882 1883 /** 1884 * gfs2_should_freeze - Figure out if glock should be frozen 1885 * @gl: The glock in question 1886 * 1887 * Glocks are not frozen if (a) the result of the dlm operation is 1888 * an error, (b) the locking operation was an unlock operation or 1889 * (c) if there is a "noexp" flagged request anywhere in the queue 1890 * 1891 * Returns: 1 if freezing should occur, 0 otherwise 1892 */ 1893 1894 static int gfs2_should_freeze(const struct gfs2_glock *gl) 1895 { 1896 const struct gfs2_holder *gh; 1897 1898 if (gl->gl_reply & ~LM_OUT_ST_MASK) 1899 return 0; 1900 if (gl->gl_target == LM_ST_UNLOCKED) 1901 return 0; 1902 1903 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1904 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 1905 continue; 1906 if (LM_FLAG_NOEXP & gh->gh_flags) 1907 return 0; 1908 } 1909 1910 return 1; 1911 } 1912 1913 /** 1914 * gfs2_glock_complete - Callback used by locking 1915 * @gl: Pointer to the glock 1916 * @ret: The return value from the dlm 1917 * 1918 * The gl_reply field is under the gl_lockref.lock lock so that it is ok 1919 * to use a bitfield shared with other glock state fields. 1920 */ 1921 1922 void gfs2_glock_complete(struct gfs2_glock *gl, int ret) 1923 { 1924 struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; 1925 1926 spin_lock(&gl->gl_lockref.lock); 1927 clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); 1928 gl->gl_reply = ret; 1929 1930 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { 1931 if (gfs2_should_freeze(gl)) { 1932 set_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags); 1933 spin_unlock(&gl->gl_lockref.lock); 1934 return; 1935 } 1936 } 1937 1938 gl->gl_lockref.count++; 1939 set_bit(GLF_HAVE_REPLY, &gl->gl_flags); 1940 gfs2_glock_queue_work(gl, 0); 1941 spin_unlock(&gl->gl_lockref.lock); 1942 } 1943 1944 static int glock_cmp(void *priv, const struct list_head *a, 1945 const struct list_head *b) 1946 { 1947 struct gfs2_glock *gla, *glb; 1948 1949 gla = list_entry(a, struct gfs2_glock, gl_lru); 1950 glb = list_entry(b, struct gfs2_glock, gl_lru); 1951 1952 if (gla->gl_name.ln_number > glb->gl_name.ln_number) 1953 return 1; 1954 if (gla->gl_name.ln_number < glb->gl_name.ln_number) 1955 return -1; 1956 1957 return 0; 1958 } 1959 1960 static bool can_free_glock(struct gfs2_glock *gl) 1961 { 1962 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1963 1964 return !test_bit(GLF_LOCK, &gl->gl_flags) && 1965 !gl->gl_lockref.count && 1966 (!test_bit(GLF_LFLUSH, &gl->gl_flags) || 1967 test_bit(SDF_KILL, &sdp->sd_flags)); 1968 } 1969 1970 /** 1971 * gfs2_dispose_glock_lru - Demote a list of glocks 1972 * @list: The list to dispose of 1973 * 1974 * Disposing of glocks may involve disk accesses, so that here we sort 1975 * the glocks by number (i.e. disk location of the inodes) so that if 1976 * there are any such accesses, they'll be sent in order (mostly). 1977 * 1978 * Must be called under the lru_lock, but may drop and retake this 1979 * lock. While the lru_lock is dropped, entries may vanish from the 1980 * list, but no new entries will appear on the list (since it is 1981 * private) 1982 */ 1983 1984 static unsigned long gfs2_dispose_glock_lru(struct list_head *list) 1985 __releases(&lru_lock) 1986 __acquires(&lru_lock) 1987 { 1988 struct gfs2_glock *gl; 1989 unsigned long freed = 0; 1990 1991 list_sort(NULL, list, glock_cmp); 1992 1993 while(!list_empty(list)) { 1994 gl = list_first_entry(list, struct gfs2_glock, gl_lru); 1995 if (!spin_trylock(&gl->gl_lockref.lock)) { 1996 add_back_to_lru: 1997 list_move(&gl->gl_lru, &lru_list); 1998 continue; 1999 } 2000 if (!can_free_glock(gl)) { 2001 spin_unlock(&gl->gl_lockref.lock); 2002 goto add_back_to_lru; 2003 } 2004 list_del_init(&gl->gl_lru); 2005 atomic_dec(&lru_count); 2006 clear_bit(GLF_LRU, &gl->gl_flags); 2007 freed++; 2008 gl->gl_lockref.count++; 2009 if (gl->gl_state != LM_ST_UNLOCKED) 2010 request_demote(gl, LM_ST_UNLOCKED, 0, false); 2011 gfs2_glock_queue_work(gl, 0); 2012 spin_unlock(&gl->gl_lockref.lock); 2013 cond_resched_lock(&lru_lock); 2014 } 2015 return freed; 2016 } 2017 2018 /** 2019 * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote 2020 * @nr: The number of entries to scan 2021 * 2022 * This function selects the entries on the LRU which are able to 2023 * be demoted, and then kicks off the process by calling 2024 * gfs2_dispose_glock_lru() above. 2025 */ 2026 2027 static unsigned long gfs2_scan_glock_lru(unsigned long nr) 2028 { 2029 struct gfs2_glock *gl, *next; 2030 LIST_HEAD(dispose); 2031 unsigned long freed = 0; 2032 2033 spin_lock(&lru_lock); 2034 list_for_each_entry_safe(gl, next, &lru_list, gl_lru) { 2035 if (!nr--) 2036 break; 2037 if (can_free_glock(gl)) 2038 list_move(&gl->gl_lru, &dispose); 2039 } 2040 if (!list_empty(&dispose)) 2041 freed = gfs2_dispose_glock_lru(&dispose); 2042 spin_unlock(&lru_lock); 2043 2044 return freed; 2045 } 2046 2047 static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, 2048 struct shrink_control *sc) 2049 { 2050 if (!(sc->gfp_mask & __GFP_FS)) 2051 return SHRINK_STOP; 2052 return gfs2_scan_glock_lru(sc->nr_to_scan); 2053 } 2054 2055 static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, 2056 struct shrink_control *sc) 2057 { 2058 return vfs_pressure_ratio(atomic_read(&lru_count)); 2059 } 2060 2061 static struct shrinker *glock_shrinker; 2062 2063 /** 2064 * glock_hash_walk - Call a function for glock in a hash bucket 2065 * @examiner: the function 2066 * @sdp: the filesystem 2067 * 2068 * Note that the function can be called multiple times on the same 2069 * object. So the user must ensure that the function can cope with 2070 * that. 2071 */ 2072 2073 static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) 2074 { 2075 struct gfs2_glock *gl; 2076 struct rhashtable_iter iter; 2077 2078 rhashtable_walk_enter(&gl_hash_table, &iter); 2079 2080 do { 2081 rhashtable_walk_start(&iter); 2082 2083 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) { 2084 if (gl->gl_name.ln_sbd == sdp) 2085 examiner(gl); 2086 } 2087 2088 rhashtable_walk_stop(&iter); 2089 } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); 2090 2091 rhashtable_walk_exit(&iter); 2092 } 2093 2094 void gfs2_cancel_delete_work(struct gfs2_glock *gl) 2095 { 2096 clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags); 2097 clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); 2098 if (cancel_delayed_work(&gl->gl_delete)) 2099 gfs2_glock_put(gl); 2100 } 2101 2102 static void flush_delete_work(struct gfs2_glock *gl) 2103 { 2104 if (gl->gl_name.ln_type == LM_TYPE_IOPEN) { 2105 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 2106 2107 if (cancel_delayed_work(&gl->gl_delete)) { 2108 queue_delayed_work(sdp->sd_delete_wq, 2109 &gl->gl_delete, 0); 2110 } 2111 } 2112 } 2113 2114 void gfs2_flush_delete_work(struct gfs2_sbd *sdp) 2115 { 2116 glock_hash_walk(flush_delete_work, sdp); 2117 flush_workqueue(sdp->sd_delete_wq); 2118 } 2119 2120 /** 2121 * thaw_glock - thaw out a glock which has an unprocessed reply waiting 2122 * @gl: The glock to thaw 2123 * 2124 */ 2125 2126 static void thaw_glock(struct gfs2_glock *gl) 2127 { 2128 if (!test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags)) 2129 return; 2130 if (!lockref_get_not_dead(&gl->gl_lockref)) 2131 return; 2132 2133 gfs2_glock_remove_from_lru(gl); 2134 spin_lock(&gl->gl_lockref.lock); 2135 set_bit(GLF_HAVE_REPLY, &gl->gl_flags); 2136 gfs2_glock_queue_work(gl, 0); 2137 spin_unlock(&gl->gl_lockref.lock); 2138 } 2139 2140 /** 2141 * clear_glock - look at a glock and see if we can free it from glock cache 2142 * @gl: the glock to look at 2143 * 2144 */ 2145 2146 static void clear_glock(struct gfs2_glock *gl) 2147 { 2148 gfs2_glock_remove_from_lru(gl); 2149 2150 spin_lock(&gl->gl_lockref.lock); 2151 if (!__lockref_is_dead(&gl->gl_lockref)) { 2152 gl->gl_lockref.count++; 2153 if (gl->gl_state != LM_ST_UNLOCKED) 2154 request_demote(gl, LM_ST_UNLOCKED, 0, false); 2155 gfs2_glock_queue_work(gl, 0); 2156 } 2157 spin_unlock(&gl->gl_lockref.lock); 2158 } 2159 2160 /** 2161 * gfs2_glock_thaw - Thaw any frozen glocks 2162 * @sdp: The super block 2163 * 2164 */ 2165 2166 void gfs2_glock_thaw(struct gfs2_sbd *sdp) 2167 { 2168 glock_hash_walk(thaw_glock, sdp); 2169 } 2170 2171 static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) 2172 { 2173 spin_lock(&gl->gl_lockref.lock); 2174 gfs2_dump_glock(seq, gl, fsid); 2175 spin_unlock(&gl->gl_lockref.lock); 2176 } 2177 2178 static void dump_glock_func(struct gfs2_glock *gl) 2179 { 2180 dump_glock(NULL, gl, true); 2181 } 2182 2183 static void withdraw_dq(struct gfs2_glock *gl) 2184 { 2185 spin_lock(&gl->gl_lockref.lock); 2186 if (!__lockref_is_dead(&gl->gl_lockref) && 2187 glock_blocked_by_withdraw(gl)) 2188 do_error(gl, LM_OUT_ERROR); /* remove pending waiters */ 2189 spin_unlock(&gl->gl_lockref.lock); 2190 } 2191 2192 void gfs2_gl_dq_holders(struct gfs2_sbd *sdp) 2193 { 2194 glock_hash_walk(withdraw_dq, sdp); 2195 } 2196 2197 /** 2198 * gfs2_gl_hash_clear - Empty out the glock hash table 2199 * @sdp: the filesystem 2200 * 2201 * Called when unmounting the filesystem. 2202 */ 2203 2204 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 2205 { 2206 unsigned long start = jiffies; 2207 bool timed_out = false; 2208 2209 set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); 2210 flush_workqueue(sdp->sd_glock_wq); 2211 glock_hash_walk(clear_glock, sdp); 2212 flush_workqueue(sdp->sd_glock_wq); 2213 2214 while (!timed_out) { 2215 wait_event_timeout(sdp->sd_kill_wait, 2216 !atomic_read(&sdp->sd_glock_disposal), 2217 HZ * 60); 2218 if (!atomic_read(&sdp->sd_glock_disposal)) 2219 break; 2220 timed_out = time_after(jiffies, start + (HZ * 600)); 2221 fs_warn(sdp, "%u glocks left after %u seconds%s\n", 2222 atomic_read(&sdp->sd_glock_disposal), 2223 jiffies_to_msecs(jiffies - start) / 1000, 2224 timed_out ? ":" : "; still waiting"); 2225 } 2226 gfs2_lm_unmount(sdp); 2227 gfs2_free_dead_glocks(sdp); 2228 glock_hash_walk(dump_glock_func, sdp); 2229 destroy_workqueue(sdp->sd_glock_wq); 2230 sdp->sd_glock_wq = NULL; 2231 } 2232 2233 static const char *state2str(unsigned state) 2234 { 2235 switch(state) { 2236 case LM_ST_UNLOCKED: 2237 return "UN"; 2238 case LM_ST_SHARED: 2239 return "SH"; 2240 case LM_ST_DEFERRED: 2241 return "DF"; 2242 case LM_ST_EXCLUSIVE: 2243 return "EX"; 2244 } 2245 return "??"; 2246 } 2247 2248 static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) 2249 { 2250 char *p = buf; 2251 if (flags & LM_FLAG_TRY) 2252 *p++ = 't'; 2253 if (flags & LM_FLAG_TRY_1CB) 2254 *p++ = 'T'; 2255 if (flags & LM_FLAG_NOEXP) 2256 *p++ = 'e'; 2257 if (flags & LM_FLAG_ANY) 2258 *p++ = 'A'; 2259 if (flags & LM_FLAG_NODE_SCOPE) 2260 *p++ = 'n'; 2261 if (flags & GL_ASYNC) 2262 *p++ = 'a'; 2263 if (flags & GL_EXACT) 2264 *p++ = 'E'; 2265 if (flags & GL_NOCACHE) 2266 *p++ = 'c'; 2267 if (test_bit(HIF_HOLDER, &iflags)) 2268 *p++ = 'H'; 2269 if (test_bit(HIF_WAIT, &iflags)) 2270 *p++ = 'W'; 2271 if (flags & GL_SKIP) 2272 *p++ = 's'; 2273 *p = 0; 2274 return buf; 2275 } 2276 2277 /** 2278 * dump_holder - print information about a glock holder 2279 * @seq: the seq_file struct 2280 * @gh: the glock holder 2281 * @fs_id_buf: pointer to file system id (if requested) 2282 * 2283 */ 2284 2285 static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh, 2286 const char *fs_id_buf) 2287 { 2288 const char *comm = "(none)"; 2289 pid_t owner_pid = 0; 2290 char flags_buf[32]; 2291 2292 rcu_read_lock(); 2293 if (pid_is_meaningful(gh)) { 2294 struct task_struct *gh_owner; 2295 2296 comm = "(ended)"; 2297 owner_pid = pid_nr(gh->gh_owner_pid); 2298 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); 2299 if (gh_owner) 2300 comm = gh_owner->comm; 2301 } 2302 gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n", 2303 fs_id_buf, state2str(gh->gh_state), 2304 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), 2305 gh->gh_error, (long)owner_pid, comm, (void *)gh->gh_ip); 2306 rcu_read_unlock(); 2307 } 2308 2309 static const char *gflags2str(char *buf, const struct gfs2_glock *gl) 2310 { 2311 const unsigned long *gflags = &gl->gl_flags; 2312 char *p = buf; 2313 2314 if (test_bit(GLF_LOCK, gflags)) 2315 *p++ = 'l'; 2316 if (test_bit(GLF_DEMOTE, gflags)) 2317 *p++ = 'D'; 2318 if (test_bit(GLF_PENDING_DEMOTE, gflags)) 2319 *p++ = 'd'; 2320 if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags)) 2321 *p++ = 'p'; 2322 if (test_bit(GLF_DIRTY, gflags)) 2323 *p++ = 'y'; 2324 if (test_bit(GLF_LFLUSH, gflags)) 2325 *p++ = 'f'; 2326 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) 2327 *p++ = 'i'; 2328 if (test_bit(GLF_PENDING_REPLY, gflags)) 2329 *p++ = 'R'; 2330 if (test_bit(GLF_HAVE_REPLY, gflags)) 2331 *p++ = 'r'; 2332 if (test_bit(GLF_INITIAL, gflags)) 2333 *p++ = 'a'; 2334 if (test_bit(GLF_HAVE_FROZEN_REPLY, gflags)) 2335 *p++ = 'F'; 2336 if (!list_empty(&gl->gl_holders)) 2337 *p++ = 'q'; 2338 if (test_bit(GLF_LRU, gflags)) 2339 *p++ = 'L'; 2340 if (gl->gl_object) 2341 *p++ = 'o'; 2342 if (test_bit(GLF_BLOCKING, gflags)) 2343 *p++ = 'b'; 2344 if (test_bit(GLF_UNLOCKED, gflags)) 2345 *p++ = 'x'; 2346 if (test_bit(GLF_INSTANTIATE_NEEDED, gflags)) 2347 *p++ = 'n'; 2348 if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags)) 2349 *p++ = 'N'; 2350 if (test_bit(GLF_TRY_TO_EVICT, gflags)) 2351 *p++ = 'e'; 2352 if (test_bit(GLF_VERIFY_DELETE, gflags)) 2353 *p++ = 'E'; 2354 if (test_bit(GLF_DEFER_DELETE, gflags)) 2355 *p++ = 's'; 2356 if (test_bit(GLF_CANCELING, gflags)) 2357 *p++ = 'C'; 2358 *p = 0; 2359 return buf; 2360 } 2361 2362 /** 2363 * gfs2_dump_glock - print information about a glock 2364 * @seq: The seq_file struct 2365 * @gl: the glock 2366 * @fsid: If true, also dump the file system id 2367 * 2368 * The file format is as follows: 2369 * One line per object, capital letters are used to indicate objects 2370 * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented, 2371 * other objects are indented by a single space and follow the glock to 2372 * which they are related. Fields are indicated by lower case letters 2373 * followed by a colon and the field value, except for strings which are in 2374 * [] so that its possible to see if they are composed of spaces for 2375 * example. The field's are n = number (id of the object), f = flags, 2376 * t = type, s = state, r = refcount, e = error, p = pid. 2377 * 2378 */ 2379 2380 void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) 2381 { 2382 const struct gfs2_glock_operations *glops = gl->gl_ops; 2383 unsigned long long dtime; 2384 const struct gfs2_holder *gh; 2385 char gflags_buf[32]; 2386 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 2387 char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; 2388 unsigned long nrpages = 0; 2389 2390 if (gl->gl_ops->go_flags & GLOF_ASPACE) { 2391 struct address_space *mapping = gfs2_glock2aspace(gl); 2392 2393 nrpages = mapping->nrpages; 2394 } 2395 memset(fs_id_buf, 0, sizeof(fs_id_buf)); 2396 if (fsid && sdp) /* safety precaution */ 2397 sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); 2398 dtime = jiffies - gl->gl_demote_time; 2399 dtime *= 1000000/HZ; /* demote time in uSec */ 2400 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 2401 dtime = 0; 2402 gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d " 2403 "v:%d r:%d m:%ld p:%lu\n", 2404 fs_id_buf, state2str(gl->gl_state), 2405 gl->gl_name.ln_type, 2406 (unsigned long long)gl->gl_name.ln_number, 2407 gflags2str(gflags_buf, gl), 2408 state2str(gl->gl_target), 2409 state2str(gl->gl_demote_state), dtime, 2410 atomic_read(&gl->gl_ail_count), 2411 atomic_read(&gl->gl_revokes), 2412 (int)gl->gl_lockref.count, gl->gl_hold_time, nrpages); 2413 2414 list_for_each_entry(gh, &gl->gl_holders, gh_list) 2415 dump_holder(seq, gh, fs_id_buf); 2416 2417 if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) 2418 glops->go_dump(seq, gl, fs_id_buf); 2419 } 2420 2421 static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) 2422 { 2423 struct gfs2_glock *gl = iter_ptr; 2424 2425 seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n", 2426 gl->gl_name.ln_type, 2427 (unsigned long long)gl->gl_name.ln_number, 2428 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT], 2429 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR], 2430 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB], 2431 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB], 2432 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT], 2433 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR], 2434 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT], 2435 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]); 2436 return 0; 2437 } 2438 2439 static const char *gfs2_gltype[] = { 2440 "type", 2441 "reserved", 2442 "nondisk", 2443 "inode", 2444 "rgrp", 2445 "meta", 2446 "iopen", 2447 "flock", 2448 "plock", 2449 "quota", 2450 "journal", 2451 }; 2452 2453 static const char *gfs2_stype[] = { 2454 [GFS2_LKS_SRTT] = "srtt", 2455 [GFS2_LKS_SRTTVAR] = "srttvar", 2456 [GFS2_LKS_SRTTB] = "srttb", 2457 [GFS2_LKS_SRTTVARB] = "srttvarb", 2458 [GFS2_LKS_SIRT] = "sirt", 2459 [GFS2_LKS_SIRTVAR] = "sirtvar", 2460 [GFS2_LKS_DCOUNT] = "dlm", 2461 [GFS2_LKS_QCOUNT] = "queue", 2462 }; 2463 2464 #define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype)) 2465 2466 static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) 2467 { 2468 struct gfs2_sbd *sdp = seq->private; 2469 loff_t pos = *(loff_t *)iter_ptr; 2470 unsigned index = pos >> 3; 2471 unsigned subindex = pos & 0x07; 2472 int i; 2473 2474 if (index == 0 && subindex != 0) 2475 return 0; 2476 2477 seq_printf(seq, "%-10s %8s:", gfs2_gltype[index], 2478 (index == 0) ? "cpu": gfs2_stype[subindex]); 2479 2480 for_each_possible_cpu(i) { 2481 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i); 2482 2483 if (index == 0) 2484 seq_printf(seq, " %15u", i); 2485 else 2486 seq_printf(seq, " %15llu", (unsigned long long)lkstats-> 2487 lkstats[index - 1].stats[subindex]); 2488 } 2489 seq_putc(seq, '\n'); 2490 return 0; 2491 } 2492 2493 int __init gfs2_glock_init(void) 2494 { 2495 int i, ret; 2496 2497 ret = rhashtable_init(&gl_hash_table, &ht_parms); 2498 if (ret < 0) 2499 return ret; 2500 2501 glock_shrinker = shrinker_alloc(0, "gfs2-glock"); 2502 if (!glock_shrinker) { 2503 rhashtable_destroy(&gl_hash_table); 2504 return -ENOMEM; 2505 } 2506 2507 glock_shrinker->count_objects = gfs2_glock_shrink_count; 2508 glock_shrinker->scan_objects = gfs2_glock_shrink_scan; 2509 2510 shrinker_register(glock_shrinker); 2511 2512 for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++) 2513 init_waitqueue_head(glock_wait_table + i); 2514 2515 return 0; 2516 } 2517 2518 void gfs2_glock_exit(void) 2519 { 2520 shrinker_free(glock_shrinker); 2521 rhashtable_destroy(&gl_hash_table); 2522 } 2523 2524 static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n) 2525 { 2526 struct gfs2_glock *gl = gi->gl; 2527 2528 if (gl) { 2529 if (n == 0) 2530 return; 2531 gfs2_glock_put_async(gl); 2532 } 2533 for (;;) { 2534 gl = rhashtable_walk_next(&gi->hti); 2535 if (IS_ERR_OR_NULL(gl)) { 2536 if (gl == ERR_PTR(-EAGAIN)) { 2537 n = 1; 2538 continue; 2539 } 2540 gl = NULL; 2541 break; 2542 } 2543 if (gl->gl_name.ln_sbd != gi->sdp) 2544 continue; 2545 if (n <= 1) { 2546 if (!lockref_get_not_dead(&gl->gl_lockref)) 2547 continue; 2548 break; 2549 } else { 2550 if (__lockref_is_dead(&gl->gl_lockref)) 2551 continue; 2552 n--; 2553 } 2554 } 2555 gi->gl = gl; 2556 } 2557 2558 static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) 2559 __acquires(RCU) 2560 { 2561 struct gfs2_glock_iter *gi = seq->private; 2562 loff_t n; 2563 2564 /* 2565 * We can either stay where we are, skip to the next hash table 2566 * entry, or start from the beginning. 2567 */ 2568 if (*pos < gi->last_pos) { 2569 rhashtable_walk_exit(&gi->hti); 2570 rhashtable_walk_enter(&gl_hash_table, &gi->hti); 2571 n = *pos + 1; 2572 } else { 2573 n = *pos - gi->last_pos; 2574 } 2575 2576 rhashtable_walk_start(&gi->hti); 2577 2578 gfs2_glock_iter_next(gi, n); 2579 gi->last_pos = *pos; 2580 return gi->gl; 2581 } 2582 2583 static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, 2584 loff_t *pos) 2585 { 2586 struct gfs2_glock_iter *gi = seq->private; 2587 2588 (*pos)++; 2589 gi->last_pos = *pos; 2590 gfs2_glock_iter_next(gi, 1); 2591 return gi->gl; 2592 } 2593 2594 static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) 2595 __releases(RCU) 2596 { 2597 struct gfs2_glock_iter *gi = seq->private; 2598 2599 rhashtable_walk_stop(&gi->hti); 2600 } 2601 2602 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 2603 { 2604 dump_glock(seq, iter_ptr, false); 2605 return 0; 2606 } 2607 2608 static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) 2609 { 2610 preempt_disable(); 2611 if (*pos >= GFS2_NR_SBSTATS) 2612 return NULL; 2613 return pos; 2614 } 2615 2616 static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr, 2617 loff_t *pos) 2618 { 2619 (*pos)++; 2620 if (*pos >= GFS2_NR_SBSTATS) 2621 return NULL; 2622 return pos; 2623 } 2624 2625 static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr) 2626 { 2627 preempt_enable(); 2628 } 2629 2630 static const struct seq_operations gfs2_glock_seq_ops = { 2631 .start = gfs2_glock_seq_start, 2632 .next = gfs2_glock_seq_next, 2633 .stop = gfs2_glock_seq_stop, 2634 .show = gfs2_glock_seq_show, 2635 }; 2636 2637 static const struct seq_operations gfs2_glstats_seq_ops = { 2638 .start = gfs2_glock_seq_start, 2639 .next = gfs2_glock_seq_next, 2640 .stop = gfs2_glock_seq_stop, 2641 .show = gfs2_glstats_seq_show, 2642 }; 2643 2644 static const struct seq_operations gfs2_sbstats_sops = { 2645 .start = gfs2_sbstats_seq_start, 2646 .next = gfs2_sbstats_seq_next, 2647 .stop = gfs2_sbstats_seq_stop, 2648 .show = gfs2_sbstats_seq_show, 2649 }; 2650 2651 #define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL) 2652 2653 static int __gfs2_glocks_open(struct inode *inode, struct file *file, 2654 const struct seq_operations *ops) 2655 { 2656 int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter)); 2657 if (ret == 0) { 2658 struct seq_file *seq = file->private_data; 2659 struct gfs2_glock_iter *gi = seq->private; 2660 2661 gi->sdp = inode->i_private; 2662 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); 2663 if (seq->buf) 2664 seq->size = GFS2_SEQ_GOODSIZE; 2665 /* 2666 * Initially, we are "before" the first hash table entry; the 2667 * first call to rhashtable_walk_next gets us the first entry. 2668 */ 2669 gi->last_pos = -1; 2670 gi->gl = NULL; 2671 rhashtable_walk_enter(&gl_hash_table, &gi->hti); 2672 } 2673 return ret; 2674 } 2675 2676 static int gfs2_glocks_open(struct inode *inode, struct file *file) 2677 { 2678 return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops); 2679 } 2680 2681 static int gfs2_glocks_release(struct inode *inode, struct file *file) 2682 { 2683 struct seq_file *seq = file->private_data; 2684 struct gfs2_glock_iter *gi = seq->private; 2685 2686 if (gi->gl) 2687 gfs2_glock_put(gi->gl); 2688 rhashtable_walk_exit(&gi->hti); 2689 return seq_release_private(inode, file); 2690 } 2691 2692 static int gfs2_glstats_open(struct inode *inode, struct file *file) 2693 { 2694 return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops); 2695 } 2696 2697 static const struct file_operations gfs2_glocks_fops = { 2698 .owner = THIS_MODULE, 2699 .open = gfs2_glocks_open, 2700 .read = seq_read, 2701 .llseek = seq_lseek, 2702 .release = gfs2_glocks_release, 2703 }; 2704 2705 static const struct file_operations gfs2_glstats_fops = { 2706 .owner = THIS_MODULE, 2707 .open = gfs2_glstats_open, 2708 .read = seq_read, 2709 .llseek = seq_lseek, 2710 .release = gfs2_glocks_release, 2711 }; 2712 2713 struct gfs2_glockfd_iter { 2714 struct super_block *sb; 2715 unsigned int tgid; 2716 struct task_struct *task; 2717 unsigned int fd; 2718 struct file *file; 2719 }; 2720 2721 static struct task_struct *gfs2_glockfd_next_task(struct gfs2_glockfd_iter *i) 2722 { 2723 struct pid_namespace *ns = task_active_pid_ns(current); 2724 struct pid *pid; 2725 2726 if (i->task) 2727 put_task_struct(i->task); 2728 2729 rcu_read_lock(); 2730 retry: 2731 i->task = NULL; 2732 pid = find_ge_pid(i->tgid, ns); 2733 if (pid) { 2734 i->tgid = pid_nr_ns(pid, ns); 2735 i->task = pid_task(pid, PIDTYPE_TGID); 2736 if (!i->task) { 2737 i->tgid++; 2738 goto retry; 2739 } 2740 get_task_struct(i->task); 2741 } 2742 rcu_read_unlock(); 2743 return i->task; 2744 } 2745 2746 static struct file *gfs2_glockfd_next_file(struct gfs2_glockfd_iter *i) 2747 { 2748 if (i->file) { 2749 fput(i->file); 2750 i->file = NULL; 2751 } 2752 2753 for(;; i->fd++) { 2754 i->file = fget_task_next(i->task, &i->fd); 2755 if (!i->file) { 2756 i->fd = 0; 2757 break; 2758 } 2759 2760 if (file_inode(i->file)->i_sb == i->sb) 2761 break; 2762 2763 fput(i->file); 2764 } 2765 return i->file; 2766 } 2767 2768 static void *gfs2_glockfd_seq_start(struct seq_file *seq, loff_t *pos) 2769 { 2770 struct gfs2_glockfd_iter *i = seq->private; 2771 2772 if (*pos) 2773 return NULL; 2774 while (gfs2_glockfd_next_task(i)) { 2775 if (gfs2_glockfd_next_file(i)) 2776 return i; 2777 i->tgid++; 2778 } 2779 return NULL; 2780 } 2781 2782 static void *gfs2_glockfd_seq_next(struct seq_file *seq, void *iter_ptr, 2783 loff_t *pos) 2784 { 2785 struct gfs2_glockfd_iter *i = seq->private; 2786 2787 (*pos)++; 2788 i->fd++; 2789 do { 2790 if (gfs2_glockfd_next_file(i)) 2791 return i; 2792 i->tgid++; 2793 } while (gfs2_glockfd_next_task(i)); 2794 return NULL; 2795 } 2796 2797 static void gfs2_glockfd_seq_stop(struct seq_file *seq, void *iter_ptr) 2798 { 2799 struct gfs2_glockfd_iter *i = seq->private; 2800 2801 if (i->file) 2802 fput(i->file); 2803 if (i->task) 2804 put_task_struct(i->task); 2805 } 2806 2807 static void gfs2_glockfd_seq_show_flock(struct seq_file *seq, 2808 struct gfs2_glockfd_iter *i) 2809 { 2810 struct gfs2_file *fp = i->file->private_data; 2811 struct gfs2_holder *fl_gh = &fp->f_fl_gh; 2812 struct lm_lockname gl_name = { .ln_type = LM_TYPE_RESERVED }; 2813 2814 if (!READ_ONCE(fl_gh->gh_gl)) 2815 return; 2816 2817 spin_lock(&i->file->f_lock); 2818 if (gfs2_holder_initialized(fl_gh)) 2819 gl_name = fl_gh->gh_gl->gl_name; 2820 spin_unlock(&i->file->f_lock); 2821 2822 if (gl_name.ln_type != LM_TYPE_RESERVED) { 2823 seq_printf(seq, "%d %u %u/%llx\n", 2824 i->tgid, i->fd, gl_name.ln_type, 2825 (unsigned long long)gl_name.ln_number); 2826 } 2827 } 2828 2829 static int gfs2_glockfd_seq_show(struct seq_file *seq, void *iter_ptr) 2830 { 2831 struct gfs2_glockfd_iter *i = seq->private; 2832 struct inode *inode = file_inode(i->file); 2833 struct gfs2_glock *gl; 2834 2835 inode_lock_shared(inode); 2836 gl = GFS2_I(inode)->i_iopen_gh.gh_gl; 2837 if (gl) { 2838 seq_printf(seq, "%d %u %u/%llx\n", 2839 i->tgid, i->fd, gl->gl_name.ln_type, 2840 (unsigned long long)gl->gl_name.ln_number); 2841 } 2842 gfs2_glockfd_seq_show_flock(seq, i); 2843 inode_unlock_shared(inode); 2844 return 0; 2845 } 2846 2847 static const struct seq_operations gfs2_glockfd_seq_ops = { 2848 .start = gfs2_glockfd_seq_start, 2849 .next = gfs2_glockfd_seq_next, 2850 .stop = gfs2_glockfd_seq_stop, 2851 .show = gfs2_glockfd_seq_show, 2852 }; 2853 2854 static int gfs2_glockfd_open(struct inode *inode, struct file *file) 2855 { 2856 struct gfs2_glockfd_iter *i; 2857 struct gfs2_sbd *sdp = inode->i_private; 2858 2859 i = __seq_open_private(file, &gfs2_glockfd_seq_ops, 2860 sizeof(struct gfs2_glockfd_iter)); 2861 if (!i) 2862 return -ENOMEM; 2863 i->sb = sdp->sd_vfs; 2864 return 0; 2865 } 2866 2867 static const struct file_operations gfs2_glockfd_fops = { 2868 .owner = THIS_MODULE, 2869 .open = gfs2_glockfd_open, 2870 .read = seq_read, 2871 .llseek = seq_lseek, 2872 .release = seq_release_private, 2873 }; 2874 2875 DEFINE_SEQ_ATTRIBUTE(gfs2_sbstats); 2876 2877 void gfs2_create_debugfs_file(struct gfs2_sbd *sdp) 2878 { 2879 sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); 2880 2881 debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2882 &gfs2_glocks_fops); 2883 2884 debugfs_create_file("glockfd", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2885 &gfs2_glockfd_fops); 2886 2887 debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2888 &gfs2_glstats_fops); 2889 2890 debugfs_create_file("sbstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2891 &gfs2_sbstats_fops); 2892 } 2893 2894 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2895 { 2896 debugfs_remove_recursive(sdp->debugfs_dir); 2897 sdp->debugfs_dir = NULL; 2898 } 2899 2900 void gfs2_register_debugfs(void) 2901 { 2902 gfs2_root = debugfs_create_dir("gfs2", NULL); 2903 } 2904 2905 void gfs2_unregister_debugfs(void) 2906 { 2907 debugfs_remove(gfs2_root); 2908 gfs2_root = NULL; 2909 } 2910