1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 5 */ 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9 #include <linux/sched.h> 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <linux/buffer_head.h> 13 #include <linux/delay.h> 14 #include <linux/sort.h> 15 #include <linux/hash.h> 16 #include <linux/jhash.h> 17 #include <linux/kallsyms.h> 18 #include <linux/gfs2_ondisk.h> 19 #include <linux/list.h> 20 #include <linux/wait.h> 21 #include <linux/module.h> 22 #include <linux/uaccess.h> 23 #include <linux/seq_file.h> 24 #include <linux/debugfs.h> 25 #include <linux/kthread.h> 26 #include <linux/freezer.h> 27 #include <linux/workqueue.h> 28 #include <linux/jiffies.h> 29 #include <linux/rcupdate.h> 30 #include <linux/rculist_bl.h> 31 #include <linux/bit_spinlock.h> 32 #include <linux/percpu.h> 33 #include <linux/list_sort.h> 34 #include <linux/lockref.h> 35 #include <linux/rhashtable.h> 36 #include <linux/pid_namespace.h> 37 #include <linux/file.h> 38 #include <linux/random.h> 39 40 #include "gfs2.h" 41 #include "incore.h" 42 #include "glock.h" 43 #include "glops.h" 44 #include "inode.h" 45 #include "lops.h" 46 #include "meta_io.h" 47 #include "quota.h" 48 #include "super.h" 49 #include "util.h" 50 #include "bmap.h" 51 #define CREATE_TRACE_POINTS 52 #include "trace_gfs2.h" 53 54 struct gfs2_glock_iter { 55 struct gfs2_sbd *sdp; /* incore superblock */ 56 struct rhashtable_iter hti; /* rhashtable iterator */ 57 struct gfs2_glock *gl; /* current glock struct */ 58 loff_t last_pos; /* last position */ 59 }; 60 61 typedef void (*glock_examiner) (struct gfs2_glock * gl); 62 63 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 64 static void request_demote(struct gfs2_glock *gl, unsigned int state, 65 unsigned long delay, bool remote); 66 67 static struct dentry *gfs2_root; 68 static LIST_HEAD(lru_list); 69 static atomic_t lru_count = ATOMIC_INIT(0); 70 static DEFINE_SPINLOCK(lru_lock); 71 72 #define GFS2_GL_HASH_SHIFT 15 73 #define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT) 74 75 static const struct rhashtable_params ht_parms = { 76 .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, 77 .key_len = offsetofend(struct lm_lockname, ln_type), 78 .key_offset = offsetof(struct gfs2_glock, gl_name), 79 .head_offset = offsetof(struct gfs2_glock, gl_node), 80 }; 81 82 static struct rhashtable gl_hash_table; 83 84 #define GLOCK_WAIT_TABLE_BITS 12 85 #define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS) 86 static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned; 87 88 struct wait_glock_queue { 89 struct lm_lockname *name; 90 wait_queue_entry_t wait; 91 }; 92 93 static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode, 94 int sync, void *key) 95 { 96 struct wait_glock_queue *wait_glock = 97 container_of(wait, struct wait_glock_queue, wait); 98 struct lm_lockname *wait_name = wait_glock->name; 99 struct lm_lockname *wake_name = key; 100 101 if (wake_name->ln_sbd != wait_name->ln_sbd || 102 wake_name->ln_number != wait_name->ln_number || 103 wake_name->ln_type != wait_name->ln_type) 104 return 0; 105 return autoremove_wake_function(wait, mode, sync, key); 106 } 107 108 static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name) 109 { 110 u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0); 111 112 return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS); 113 } 114 115 /** 116 * wake_up_glock - Wake up waiters on a glock 117 * @gl: the glock 118 */ 119 static void wake_up_glock(struct gfs2_glock *gl) 120 { 121 wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name); 122 123 if (waitqueue_active(wq)) 124 __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name); 125 } 126 127 static void gfs2_glock_dealloc(struct rcu_head *rcu) 128 { 129 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); 130 131 kfree(gl->gl_lksb.sb_lvbptr); 132 if (gl->gl_ops->go_flags & GLOF_ASPACE) { 133 struct gfs2_glock_aspace *gla = 134 container_of(gl, struct gfs2_glock_aspace, glock); 135 kmem_cache_free(gfs2_glock_aspace_cachep, gla); 136 } else 137 kmem_cache_free(gfs2_glock_cachep, gl); 138 } 139 140 /** 141 * glock_blocked_by_withdraw - determine if we can still use a glock 142 * @gl: the glock 143 * 144 * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted 145 * when we're withdrawn. For example, to maintain metadata integrity, we should 146 * disallow the use of inode and rgrp glocks when withdrawn. Other glocks like 147 * the iopen or freeze glock may be safely used because none of their 148 * metadata goes through the journal. So in general, we should disallow all 149 * glocks that are journaled, and allow all the others. One exception is: 150 * we need to allow our active journal to be promoted and demoted so others 151 * may recover it and we can reacquire it when they're done. 152 */ 153 static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) 154 { 155 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 156 157 if (!gfs2_withdrawing_or_withdrawn(sdp)) 158 return false; 159 if (gl->gl_ops->go_flags & GLOF_NONDISK) 160 return false; 161 if (!sdp->sd_jdesc || 162 gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr) 163 return false; 164 return true; 165 } 166 167 static void __gfs2_glock_free(struct gfs2_glock *gl) 168 { 169 rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); 170 smp_mb(); 171 wake_up_glock(gl); 172 call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); 173 } 174 175 void gfs2_glock_free(struct gfs2_glock *gl) { 176 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 177 178 __gfs2_glock_free(gl); 179 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 180 wake_up(&sdp->sd_kill_wait); 181 } 182 183 void gfs2_glock_free_later(struct gfs2_glock *gl) { 184 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 185 186 spin_lock(&lru_lock); 187 list_add(&gl->gl_lru, &sdp->sd_dead_glocks); 188 spin_unlock(&lru_lock); 189 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 190 wake_up(&sdp->sd_kill_wait); 191 } 192 193 static void gfs2_free_dead_glocks(struct gfs2_sbd *sdp) 194 { 195 struct list_head *list = &sdp->sd_dead_glocks; 196 197 while(!list_empty(list)) { 198 struct gfs2_glock *gl; 199 200 gl = list_first_entry(list, struct gfs2_glock, gl_lru); 201 list_del_init(&gl->gl_lru); 202 __gfs2_glock_free(gl); 203 } 204 } 205 206 /** 207 * gfs2_glock_hold() - increment reference count on glock 208 * @gl: The glock to hold 209 * 210 */ 211 212 struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl) 213 { 214 GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); 215 lockref_get(&gl->gl_lockref); 216 return gl; 217 } 218 219 static void gfs2_glock_add_to_lru(struct gfs2_glock *gl) 220 { 221 spin_lock(&lru_lock); 222 list_move_tail(&gl->gl_lru, &lru_list); 223 224 if (!test_bit(GLF_LRU, &gl->gl_flags)) { 225 set_bit(GLF_LRU, &gl->gl_flags); 226 atomic_inc(&lru_count); 227 } 228 229 spin_unlock(&lru_lock); 230 } 231 232 static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) 233 { 234 spin_lock(&lru_lock); 235 if (test_bit(GLF_LRU, &gl->gl_flags)) { 236 list_del_init(&gl->gl_lru); 237 atomic_dec(&lru_count); 238 clear_bit(GLF_LRU, &gl->gl_flags); 239 } 240 spin_unlock(&lru_lock); 241 } 242 243 /* 244 * Enqueue the glock on the work queue. Passes one glock reference on to the 245 * work queue. 246 */ 247 static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { 248 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 249 250 if (!queue_delayed_work(sdp->sd_glock_wq, &gl->gl_work, delay)) { 251 /* 252 * We are holding the lockref spinlock, and the work was still 253 * queued above. The queued work (glock_work_func) takes that 254 * spinlock before dropping its glock reference(s), so it 255 * cannot have dropped them in the meantime. 256 */ 257 GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2); 258 gl->gl_lockref.count--; 259 } 260 } 261 262 static void __gfs2_glock_put(struct gfs2_glock *gl) 263 { 264 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 265 struct address_space *mapping = gfs2_glock2aspace(gl); 266 267 lockref_mark_dead(&gl->gl_lockref); 268 spin_unlock(&gl->gl_lockref.lock); 269 gfs2_glock_remove_from_lru(gl); 270 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 271 if (mapping) { 272 truncate_inode_pages_final(mapping); 273 if (!gfs2_withdrawing_or_withdrawn(sdp)) 274 GLOCK_BUG_ON(gl, !mapping_empty(mapping)); 275 } 276 trace_gfs2_glock_put(gl); 277 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); 278 } 279 280 static bool __gfs2_glock_put_or_lock(struct gfs2_glock *gl) 281 { 282 if (lockref_put_or_lock(&gl->gl_lockref)) 283 return true; 284 GLOCK_BUG_ON(gl, gl->gl_lockref.count != 1); 285 if (gl->gl_state != LM_ST_UNLOCKED) { 286 gl->gl_lockref.count--; 287 gfs2_glock_add_to_lru(gl); 288 spin_unlock(&gl->gl_lockref.lock); 289 return true; 290 } 291 return false; 292 } 293 294 /** 295 * gfs2_glock_put() - Decrement reference count on glock 296 * @gl: The glock to put 297 * 298 */ 299 300 void gfs2_glock_put(struct gfs2_glock *gl) 301 { 302 if (__gfs2_glock_put_or_lock(gl)) 303 return; 304 305 __gfs2_glock_put(gl); 306 } 307 308 /* 309 * gfs2_glock_put_async - Decrement reference count without sleeping 310 * @gl: The glock to put 311 * 312 * Decrement the reference count on glock immediately unless it is the last 313 * reference. Defer putting the last reference to work queue context. 314 */ 315 void gfs2_glock_put_async(struct gfs2_glock *gl) 316 { 317 if (__gfs2_glock_put_or_lock(gl)) 318 return; 319 320 gfs2_glock_queue_work(gl, 0); 321 spin_unlock(&gl->gl_lockref.lock); 322 } 323 324 /** 325 * may_grant - check if it's ok to grant a new lock 326 * @gl: The glock 327 * @current_gh: One of the current holders of @gl 328 * @gh: The lock request which we wish to grant 329 * 330 * With our current compatibility rules, if a glock has one or more active 331 * holders (HIF_HOLDER flag set), any of those holders can be passed in as 332 * @current_gh; they are all the same as far as compatibility with the new @gh 333 * goes. 334 * 335 * Returns true if it's ok to grant the lock. 336 */ 337 338 static inline bool may_grant(struct gfs2_glock *gl, 339 struct gfs2_holder *current_gh, 340 struct gfs2_holder *gh) 341 { 342 if (current_gh) { 343 GLOCK_BUG_ON(gl, !test_bit(HIF_HOLDER, ¤t_gh->gh_iflags)); 344 345 switch(current_gh->gh_state) { 346 case LM_ST_EXCLUSIVE: 347 /* 348 * Here we make a special exception to grant holders 349 * who agree to share the EX lock with other holders 350 * who also have the bit set. If the original holder 351 * has the LM_FLAG_NODE_SCOPE bit set, we grant more 352 * holders with the bit set. 353 */ 354 return gh->gh_state == LM_ST_EXCLUSIVE && 355 (current_gh->gh_flags & LM_FLAG_NODE_SCOPE) && 356 (gh->gh_flags & LM_FLAG_NODE_SCOPE); 357 358 case LM_ST_SHARED: 359 case LM_ST_DEFERRED: 360 return gh->gh_state == current_gh->gh_state; 361 362 default: 363 return false; 364 } 365 } 366 367 if (gl->gl_state == gh->gh_state) 368 return true; 369 if (gh->gh_flags & GL_EXACT) 370 return false; 371 if (gl->gl_state == LM_ST_EXCLUSIVE) { 372 return gh->gh_state == LM_ST_SHARED || 373 gh->gh_state == LM_ST_DEFERRED; 374 } 375 if (gh->gh_flags & LM_FLAG_ANY) 376 return gl->gl_state != LM_ST_UNLOCKED; 377 return false; 378 } 379 380 static void gfs2_holder_wake(struct gfs2_holder *gh) 381 { 382 clear_bit(HIF_WAIT, &gh->gh_iflags); 383 smp_mb__after_atomic(); 384 wake_up_bit(&gh->gh_iflags, HIF_WAIT); 385 if (gh->gh_flags & GL_ASYNC) { 386 struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd; 387 388 wake_up(&sdp->sd_async_glock_wait); 389 } 390 } 391 392 /** 393 * do_error - Something unexpected has happened during a lock request 394 * @gl: The glock 395 * @ret: The status from the DLM 396 */ 397 398 static void do_error(struct gfs2_glock *gl, const int ret) 399 { 400 struct gfs2_holder *gh, *tmp; 401 402 list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { 403 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 404 continue; 405 if (ret & LM_OUT_ERROR) 406 gh->gh_error = -EIO; 407 else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) 408 gh->gh_error = GLR_TRYFAILED; 409 else 410 continue; 411 list_del_init(&gh->gh_list); 412 trace_gfs2_glock_queue(gh, 0); 413 gfs2_holder_wake(gh); 414 } 415 } 416 417 /** 418 * find_first_holder - find the first "holder" gh 419 * @gl: the glock 420 */ 421 422 static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) 423 { 424 struct gfs2_holder *gh; 425 426 if (!list_empty(&gl->gl_holders)) { 427 gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, 428 gh_list); 429 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 430 return gh; 431 } 432 return NULL; 433 } 434 435 /* 436 * gfs2_instantiate - Call the glops instantiate function 437 * @gh: The glock holder 438 * 439 * Returns: 0 if instantiate was successful, or error. 440 */ 441 int gfs2_instantiate(struct gfs2_holder *gh) 442 { 443 struct gfs2_glock *gl = gh->gh_gl; 444 const struct gfs2_glock_operations *glops = gl->gl_ops; 445 int ret; 446 447 again: 448 if (!test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags)) 449 goto done; 450 451 /* 452 * Since we unlock the lockref lock, we set a flag to indicate 453 * instantiate is in progress. 454 */ 455 if (test_and_set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) { 456 wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG, 457 TASK_UNINTERRUPTIBLE); 458 /* 459 * Here we just waited for a different instantiate to finish. 460 * But that may not have been successful, as when a process 461 * locks an inode glock _before_ it has an actual inode to 462 * instantiate into. So we check again. This process might 463 * have an inode to instantiate, so might be successful. 464 */ 465 goto again; 466 } 467 468 ret = glops->go_instantiate(gl); 469 if (!ret) 470 clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); 471 clear_and_wake_up_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags); 472 if (ret) 473 return ret; 474 475 done: 476 if (glops->go_held) 477 return glops->go_held(gh); 478 return 0; 479 } 480 481 /** 482 * do_promote - promote as many requests as possible on the current queue 483 * @gl: The glock 484 */ 485 486 static void do_promote(struct gfs2_glock *gl) 487 { 488 struct gfs2_holder *gh, *current_gh; 489 490 current_gh = find_first_holder(gl); 491 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 492 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 493 continue; 494 if (!may_grant(gl, current_gh, gh)) { 495 /* 496 * If we get here, it means we may not grant this 497 * holder for some reason. 498 */ 499 if (current_gh) 500 do_error(gl, 0); /* Fail queued try locks */ 501 break; 502 } 503 set_bit(HIF_HOLDER, &gh->gh_iflags); 504 trace_gfs2_promote(gh); 505 gfs2_holder_wake(gh); 506 if (!current_gh) 507 current_gh = gh; 508 } 509 } 510 511 /** 512 * find_first_waiter - find the first gh that's waiting for the glock 513 * @gl: the glock 514 */ 515 516 static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) 517 { 518 struct gfs2_holder *gh; 519 520 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 521 if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) 522 return gh; 523 } 524 return NULL; 525 } 526 527 /** 528 * find_last_waiter - find the last gh that's waiting for the glock 529 * @gl: the glock 530 * 531 * This also is a fast way of finding out if there are any waiters. 532 */ 533 534 static inline struct gfs2_holder *find_last_waiter(const struct gfs2_glock *gl) 535 { 536 struct gfs2_holder *gh; 537 538 if (list_empty(&gl->gl_holders)) 539 return NULL; 540 gh = list_last_entry(&gl->gl_holders, struct gfs2_holder, gh_list); 541 return test_bit(HIF_HOLDER, &gh->gh_iflags) ? NULL : gh; 542 } 543 544 /** 545 * state_change - record that the glock is now in a different state 546 * @gl: the glock 547 * @new_state: the new state 548 */ 549 550 static void state_change(struct gfs2_glock *gl, unsigned int new_state) 551 { 552 if (new_state != gl->gl_target) 553 /* shorten our minimum hold time */ 554 gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, 555 GL_GLOCK_MIN_HOLD); 556 gl->gl_state = new_state; 557 gl->gl_tchange = jiffies; 558 } 559 560 static void gfs2_set_demote(int nr, struct gfs2_glock *gl) 561 { 562 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 563 564 set_bit(nr, &gl->gl_flags); 565 smp_mb(); 566 wake_up(&sdp->sd_async_glock_wait); 567 } 568 569 static void gfs2_demote_wake(struct gfs2_glock *gl) 570 { 571 gl->gl_demote_state = LM_ST_EXCLUSIVE; 572 clear_bit(GLF_DEMOTE, &gl->gl_flags); 573 smp_mb__after_atomic(); 574 wake_up_bit(&gl->gl_flags, GLF_DEMOTE); 575 } 576 577 /** 578 * finish_xmote - The DLM has replied to one of our lock requests 579 * @gl: The glock 580 * @ret: The status from the DLM 581 * 582 */ 583 584 static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) 585 { 586 const struct gfs2_glock_operations *glops = gl->gl_ops; 587 588 if (!(ret & ~LM_OUT_ST_MASK)) { 589 unsigned state = ret & LM_OUT_ST_MASK; 590 591 trace_gfs2_glock_state_change(gl, state); 592 state_change(gl, state); 593 } 594 595 596 /* Demote to UN request arrived during demote to SH or DF */ 597 if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && 598 gl->gl_state != LM_ST_UNLOCKED && 599 gl->gl_demote_state == LM_ST_UNLOCKED) 600 gl->gl_target = LM_ST_UNLOCKED; 601 602 /* Check for state != intended state */ 603 if (unlikely(gl->gl_state != gl->gl_target)) { 604 struct gfs2_holder *gh = find_first_waiter(gl); 605 606 if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { 607 if (ret & LM_OUT_CANCELED) { 608 list_del_init(&gh->gh_list); 609 trace_gfs2_glock_queue(gh, 0); 610 gfs2_holder_wake(gh); 611 gl->gl_target = gl->gl_state; 612 goto out; 613 } 614 /* Some error or failed "try lock" - report it */ 615 if ((ret & LM_OUT_ERROR) || 616 (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { 617 gl->gl_target = gl->gl_state; 618 do_error(gl, ret); 619 goto out; 620 } 621 } 622 switch(gl->gl_state) { 623 /* Unlocked due to conversion deadlock, try again */ 624 case LM_ST_UNLOCKED: 625 do_xmote(gl, gh, gl->gl_target); 626 break; 627 /* Conversion fails, unlock and try again */ 628 case LM_ST_SHARED: 629 case LM_ST_DEFERRED: 630 do_xmote(gl, gh, LM_ST_UNLOCKED); 631 break; 632 default: /* Everything else */ 633 fs_err(gl->gl_name.ln_sbd, 634 "glock %u:%llu requested=%u ret=%u\n", 635 gl->gl_name.ln_type, gl->gl_name.ln_number, 636 gl->gl_req, ret); 637 GLOCK_BUG_ON(gl, 1); 638 } 639 return; 640 } 641 642 /* Fast path - we got what we asked for */ 643 if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { 644 clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); 645 gfs2_demote_wake(gl); 646 } 647 if (gl->gl_state != LM_ST_UNLOCKED) { 648 if (glops->go_xmote_bh) { 649 int rv; 650 651 spin_unlock(&gl->gl_lockref.lock); 652 rv = glops->go_xmote_bh(gl); 653 spin_lock(&gl->gl_lockref.lock); 654 if (rv) { 655 do_error(gl, rv); 656 goto out; 657 } 658 } 659 do_promote(gl); 660 } 661 out: 662 if (!test_bit(GLF_CANCELING, &gl->gl_flags)) 663 clear_bit(GLF_LOCK, &gl->gl_flags); 664 } 665 666 static bool is_system_glock(struct gfs2_glock *gl) 667 { 668 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 669 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 670 671 if (gl == m_ip->i_gl) 672 return true; 673 return false; 674 } 675 676 /** 677 * do_xmote - Calls the DLM to change the state of a lock 678 * @gl: The lock state 679 * @gh: The holder (only for promotes) 680 * @target: The target lock state 681 * 682 */ 683 684 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, 685 unsigned int target) 686 __releases(&gl->gl_lockref.lock) 687 __acquires(&gl->gl_lockref.lock) 688 { 689 const struct gfs2_glock_operations *glops = gl->gl_ops; 690 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 691 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 692 int ret; 693 694 if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && 695 gh && !(gh->gh_flags & LM_FLAG_NOEXP)) 696 goto skip_inval; 697 698 GLOCK_BUG_ON(gl, gl->gl_state == target); 699 GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); 700 if (!glops->go_inval || !glops->go_sync) 701 goto skip_inval; 702 703 spin_unlock(&gl->gl_lockref.lock); 704 ret = glops->go_sync(gl); 705 /* If we had a problem syncing (due to io errors or whatever, 706 * we should not invalidate the metadata or tell dlm to 707 * release the glock to other nodes. 708 */ 709 if (ret) { 710 if (cmpxchg(&sdp->sd_log_error, 0, ret)) { 711 fs_err(sdp, "Error %d syncing glock\n", ret); 712 gfs2_dump_glock(NULL, gl, true); 713 } 714 spin_lock(&gl->gl_lockref.lock); 715 goto skip_inval; 716 } 717 718 if (target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) { 719 /* 720 * The call to go_sync should have cleared out the ail list. 721 * If there are still items, we have a problem. We ought to 722 * withdraw, but we can't because the withdraw code also uses 723 * glocks. Warn about the error, dump the glock, then fall 724 * through and wait for logd to do the withdraw for us. 725 */ 726 if ((atomic_read(&gl->gl_ail_count) != 0) && 727 (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { 728 gfs2_glock_assert_warn(gl, 729 !atomic_read(&gl->gl_ail_count)); 730 gfs2_dump_glock(NULL, gl, true); 731 } 732 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); 733 } 734 spin_lock(&gl->gl_lockref.lock); 735 736 skip_inval: 737 /* 738 * Check for an error encountered since we called go_sync and go_inval. 739 * If so, we can't withdraw from the glock code because the withdraw 740 * code itself uses glocks (see function signal_our_withdraw) to 741 * change the mount to read-only. Most importantly, we must not call 742 * dlm to unlock the glock until the journal is in a known good state 743 * (after journal replay) otherwise other nodes may use the object 744 * (rgrp or dinode) and then later, journal replay will corrupt the 745 * file system. The best we can do here is wait for the logd daemon 746 * to see sd_log_error and withdraw, and in the meantime, requeue the 747 * work for later. 748 * 749 * We make a special exception for some system glocks, such as the 750 * system statfs inode glock, which needs to be granted before the 751 * gfs2_quotad daemon can exit, and that exit needs to finish before 752 * we can unmount the withdrawn file system. 753 * 754 * However, if we're just unlocking the lock (say, for unmount, when 755 * gfs2_gl_hash_clear calls clear_glock) and recovery is complete 756 * then it's okay to tell dlm to unlock it. 757 */ 758 if (unlikely(sdp->sd_log_error) && !gfs2_withdrawing_or_withdrawn(sdp)) 759 gfs2_withdraw_delayed(sdp); 760 if (glock_blocked_by_withdraw(gl) && 761 (target != LM_ST_UNLOCKED || 762 test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { 763 if (!is_system_glock(gl)) { 764 request_demote(gl, LM_ST_UNLOCKED, 0, false); 765 /* 766 * Ordinarily, we would call dlm and its callback would call 767 * finish_xmote, which would call state_change() to the new state. 768 * Since we withdrew, we won't call dlm, so call state_change 769 * manually, but to the UNLOCKED state we desire. 770 */ 771 state_change(gl, LM_ST_UNLOCKED); 772 /* 773 * We skip telling dlm to do the locking, so we won't get a 774 * reply that would otherwise clear GLF_LOCK. So we clear it here. 775 */ 776 if (!test_bit(GLF_CANCELING, &gl->gl_flags)) 777 clear_bit(GLF_LOCK, &gl->gl_flags); 778 clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); 779 gl->gl_lockref.count++; 780 gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); 781 return; 782 } 783 } 784 785 if (ls->ls_ops->lm_lock) { 786 set_bit(GLF_PENDING_REPLY, &gl->gl_flags); 787 spin_unlock(&gl->gl_lockref.lock); 788 ret = ls->ls_ops->lm_lock(gl, target, gh ? gh->gh_flags : 0); 789 spin_lock(&gl->gl_lockref.lock); 790 791 if (!ret) { 792 /* The operation will be completed asynchronously. */ 793 gl->gl_lockref.count++; 794 return; 795 } 796 clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); 797 798 if (ret == -ENODEV && gl->gl_target == LM_ST_UNLOCKED && 799 target == LM_ST_UNLOCKED) { 800 /* 801 * The lockspace has been released and the lock has 802 * been unlocked implicitly. 803 */ 804 } else { 805 fs_err(sdp, "lm_lock ret %d\n", ret); 806 GLOCK_BUG_ON(gl, !gfs2_withdrawing_or_withdrawn(sdp)); 807 return; 808 } 809 } 810 811 /* Complete the operation now. */ 812 finish_xmote(gl, target); 813 gl->gl_lockref.count++; 814 gfs2_glock_queue_work(gl, 0); 815 } 816 817 /** 818 * run_queue - do all outstanding tasks related to a glock 819 * @gl: The glock in question 820 * @nonblock: True if we must not block in run_queue 821 * 822 */ 823 824 static void run_queue(struct gfs2_glock *gl, const int nonblock) 825 __releases(&gl->gl_lockref.lock) 826 __acquires(&gl->gl_lockref.lock) 827 { 828 struct gfs2_holder *gh; 829 830 if (test_bit(GLF_LOCK, &gl->gl_flags)) 831 return; 832 set_bit(GLF_LOCK, &gl->gl_flags); 833 834 /* 835 * The GLF_DEMOTE_IN_PROGRESS flag is only set intermittently during 836 * locking operations. We have just started a locking operation by 837 * setting the GLF_LOCK flag, so the GLF_DEMOTE_IN_PROGRESS flag must 838 * be cleared. 839 */ 840 GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); 841 842 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { 843 if (gl->gl_demote_state == gl->gl_state) { 844 gfs2_demote_wake(gl); 845 goto promote; 846 } 847 848 if (find_first_holder(gl)) 849 goto out_unlock; 850 if (nonblock) 851 goto out_sched; 852 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); 853 GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); 854 gl->gl_target = gl->gl_demote_state; 855 do_xmote(gl, NULL, gl->gl_target); 856 return; 857 } 858 859 promote: 860 do_promote(gl); 861 if (find_first_holder(gl)) 862 goto out_unlock; 863 gh = find_first_waiter(gl); 864 if (!gh) 865 goto out_unlock; 866 if (nonblock) 867 goto out_sched; 868 gl->gl_target = gh->gh_state; 869 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 870 do_error(gl, 0); /* Fail queued try locks */ 871 do_xmote(gl, gh, gl->gl_target); 872 return; 873 874 out_sched: 875 clear_bit(GLF_LOCK, &gl->gl_flags); 876 gl->gl_lockref.count++; 877 gfs2_glock_queue_work(gl, 0); 878 return; 879 880 out_unlock: 881 clear_bit(GLF_LOCK, &gl->gl_flags); 882 } 883 884 /** 885 * glock_set_object - set the gl_object field of a glock 886 * @gl: the glock 887 * @object: the object 888 */ 889 void glock_set_object(struct gfs2_glock *gl, void *object) 890 { 891 void *prev_object; 892 893 spin_lock(&gl->gl_lockref.lock); 894 prev_object = gl->gl_object; 895 gl->gl_object = object; 896 spin_unlock(&gl->gl_lockref.lock); 897 if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) 898 gfs2_dump_glock(NULL, gl, true); 899 } 900 901 /** 902 * glock_clear_object - clear the gl_object field of a glock 903 * @gl: the glock 904 * @object: object the glock currently points at 905 */ 906 void glock_clear_object(struct gfs2_glock *gl, void *object) 907 { 908 void *prev_object; 909 910 spin_lock(&gl->gl_lockref.lock); 911 prev_object = gl->gl_object; 912 gl->gl_object = NULL; 913 spin_unlock(&gl->gl_lockref.lock); 914 if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == object)) 915 gfs2_dump_glock(NULL, gl, true); 916 } 917 918 void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation) 919 { 920 struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; 921 922 if (ri->ri_magic == 0) 923 ri->ri_magic = cpu_to_be32(GFS2_MAGIC); 924 if (ri->ri_magic == cpu_to_be32(GFS2_MAGIC)) 925 ri->ri_generation_deleted = cpu_to_be64(generation); 926 } 927 928 bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation) 929 { 930 struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; 931 932 if (ri->ri_magic != cpu_to_be32(GFS2_MAGIC)) 933 return false; 934 return generation <= be64_to_cpu(ri->ri_generation_deleted); 935 } 936 937 static void gfs2_glock_poke(struct gfs2_glock *gl) 938 { 939 int flags = LM_FLAG_TRY_1CB | LM_FLAG_ANY | GL_SKIP; 940 struct gfs2_holder gh; 941 int error; 942 943 __gfs2_holder_init(gl, LM_ST_SHARED, flags, &gh, _RET_IP_); 944 error = gfs2_glock_nq(&gh); 945 if (!error) 946 gfs2_glock_dq(&gh); 947 gfs2_holder_uninit(&gh); 948 } 949 950 static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl) 951 { 952 struct gfs2_inode *ip; 953 954 spin_lock(&gl->gl_lockref.lock); 955 ip = gl->gl_object; 956 if (ip && !igrab(&ip->i_inode)) 957 ip = NULL; 958 spin_unlock(&gl->gl_lockref.lock); 959 if (ip) { 960 wait_on_inode(&ip->i_inode); 961 if (is_bad_inode(&ip->i_inode)) { 962 iput(&ip->i_inode); 963 ip = NULL; 964 } 965 } 966 return ip; 967 } 968 969 static void gfs2_try_evict(struct gfs2_glock *gl) 970 { 971 struct gfs2_inode *ip; 972 973 /* 974 * If there is contention on the iopen glock and we have an inode, try 975 * to grab and release the inode so that it can be evicted. The 976 * GIF_DEFER_DELETE flag indicates to gfs2_evict_inode() that the inode 977 * should not be deleted locally. This will allow the remote node to 978 * go ahead and delete the inode without us having to do it, which will 979 * avoid rgrp glock thrashing. 980 * 981 * The remote node is likely still holding the corresponding inode 982 * glock, so it will run before we get to verify that the delete has 983 * happened below. (Verification is triggered by the call to 984 * gfs2_queue_verify_delete() in gfs2_evict_inode().) 985 */ 986 ip = gfs2_grab_existing_inode(gl); 987 if (ip) { 988 set_bit(GLF_DEFER_DELETE, &gl->gl_flags); 989 d_prune_aliases(&ip->i_inode); 990 iput(&ip->i_inode); 991 clear_bit(GLF_DEFER_DELETE, &gl->gl_flags); 992 993 /* If the inode was evicted, gl->gl_object will now be NULL. */ 994 ip = gfs2_grab_existing_inode(gl); 995 if (ip) { 996 gfs2_glock_poke(ip->i_gl); 997 iput(&ip->i_inode); 998 } 999 } 1000 } 1001 1002 bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) 1003 { 1004 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1005 1006 if (test_and_set_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) 1007 return false; 1008 return !mod_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, 0); 1009 } 1010 1011 bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later) 1012 { 1013 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1014 unsigned long delay; 1015 1016 if (test_and_set_bit(GLF_VERIFY_DELETE, &gl->gl_flags)) 1017 return false; 1018 delay = later ? HZ + get_random_long() % (HZ * 9) : 0; 1019 return queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, delay); 1020 } 1021 1022 static void delete_work_func(struct work_struct *work) 1023 { 1024 struct delayed_work *dwork = to_delayed_work(work); 1025 struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete); 1026 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1027 bool verify_delete = test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); 1028 1029 if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) 1030 gfs2_try_evict(gl); 1031 1032 if (verify_delete) { 1033 u64 no_addr = gl->gl_name.ln_number; 1034 struct inode *inode; 1035 1036 inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, 1037 GFS2_BLKST_UNLINKED); 1038 if (IS_ERR(inode)) { 1039 if (PTR_ERR(inode) == -EAGAIN && 1040 !test_bit(SDF_KILL, &sdp->sd_flags) && 1041 gfs2_queue_verify_delete(gl, true)) 1042 return; 1043 } else { 1044 d_prune_aliases(inode); 1045 iput(inode); 1046 } 1047 } 1048 1049 gfs2_glock_put(gl); 1050 } 1051 1052 static void glock_work_func(struct work_struct *work) 1053 { 1054 unsigned long delay = 0; 1055 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 1056 unsigned int drop_refs = 1; 1057 1058 spin_lock(&gl->gl_lockref.lock); 1059 if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags)) { 1060 clear_bit(GLF_HAVE_REPLY, &gl->gl_flags); 1061 finish_xmote(gl, gl->gl_reply); 1062 drop_refs++; 1063 } 1064 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1065 gl->gl_state != LM_ST_UNLOCKED && 1066 gl->gl_demote_state != LM_ST_EXCLUSIVE) { 1067 if (gl->gl_name.ln_type == LM_TYPE_INODE) { 1068 unsigned long holdtime, now = jiffies; 1069 1070 holdtime = gl->gl_tchange + gl->gl_hold_time; 1071 if (time_before(now, holdtime)) 1072 delay = holdtime - now; 1073 } 1074 1075 if (!delay) { 1076 clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); 1077 gfs2_set_demote(GLF_DEMOTE, gl); 1078 } 1079 } 1080 run_queue(gl, 0); 1081 if (delay) { 1082 /* Keep one glock reference for the work we requeue. */ 1083 drop_refs--; 1084 gfs2_glock_queue_work(gl, delay); 1085 } 1086 1087 /* Drop the remaining glock references manually. */ 1088 GLOCK_BUG_ON(gl, gl->gl_lockref.count < drop_refs); 1089 gl->gl_lockref.count -= drop_refs; 1090 if (!gl->gl_lockref.count) { 1091 if (gl->gl_state == LM_ST_UNLOCKED) { 1092 __gfs2_glock_put(gl); 1093 return; 1094 } 1095 gfs2_glock_add_to_lru(gl); 1096 } 1097 spin_unlock(&gl->gl_lockref.lock); 1098 } 1099 1100 static struct gfs2_glock *find_insert_glock(struct lm_lockname *name, 1101 struct gfs2_glock *new) 1102 { 1103 struct wait_glock_queue wait; 1104 wait_queue_head_t *wq = glock_waitqueue(name); 1105 struct gfs2_glock *gl; 1106 1107 wait.name = name; 1108 init_wait(&wait.wait); 1109 wait.wait.func = glock_wake_function; 1110 1111 again: 1112 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1113 rcu_read_lock(); 1114 if (new) { 1115 gl = rhashtable_lookup_get_insert_fast(&gl_hash_table, 1116 &new->gl_node, ht_parms); 1117 if (IS_ERR(gl)) 1118 goto out; 1119 } else { 1120 gl = rhashtable_lookup_fast(&gl_hash_table, 1121 name, ht_parms); 1122 } 1123 if (gl && !lockref_get_not_dead(&gl->gl_lockref)) { 1124 rcu_read_unlock(); 1125 schedule(); 1126 goto again; 1127 } 1128 out: 1129 rcu_read_unlock(); 1130 finish_wait(wq, &wait.wait); 1131 if (gl) 1132 gfs2_glock_remove_from_lru(gl); 1133 return gl; 1134 } 1135 1136 /** 1137 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 1138 * @sdp: The GFS2 superblock 1139 * @number: the lock number 1140 * @glops: The glock_operations to use 1141 * @create: If 0, don't create the glock if it doesn't exist 1142 * @glp: the glock is returned here 1143 * 1144 * This does not lock a glock, just finds/creates structures for one. 1145 * 1146 * Returns: errno 1147 */ 1148 1149 int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, 1150 const struct gfs2_glock_operations *glops, int create, 1151 struct gfs2_glock **glp) 1152 { 1153 struct lm_lockname name = { .ln_number = number, 1154 .ln_type = glops->go_type, 1155 .ln_sbd = sdp }; 1156 struct gfs2_glock *gl, *tmp; 1157 struct address_space *mapping; 1158 1159 gl = find_insert_glock(&name, NULL); 1160 if (gl) 1161 goto found; 1162 if (!create) 1163 return -ENOENT; 1164 1165 if (glops->go_flags & GLOF_ASPACE) { 1166 struct gfs2_glock_aspace *gla = 1167 kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_NOFS); 1168 if (!gla) 1169 return -ENOMEM; 1170 gl = &gla->glock; 1171 } else { 1172 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_NOFS); 1173 if (!gl) 1174 return -ENOMEM; 1175 } 1176 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 1177 gl->gl_ops = glops; 1178 1179 if (glops->go_flags & GLOF_LVB) { 1180 gl->gl_lksb.sb_lvbptr = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); 1181 if (!gl->gl_lksb.sb_lvbptr) { 1182 gfs2_glock_dealloc(&gl->gl_rcu); 1183 return -ENOMEM; 1184 } 1185 } 1186 1187 atomic_inc(&sdp->sd_glock_disposal); 1188 gl->gl_node.next = NULL; 1189 gl->gl_flags = BIT(GLF_INITIAL); 1190 if (glops->go_instantiate) 1191 gl->gl_flags |= BIT(GLF_INSTANTIATE_NEEDED); 1192 gl->gl_name = name; 1193 lockref_init(&gl->gl_lockref); 1194 lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); 1195 gl->gl_state = LM_ST_UNLOCKED; 1196 gl->gl_target = LM_ST_UNLOCKED; 1197 gl->gl_demote_state = LM_ST_EXCLUSIVE; 1198 gl->gl_dstamp = 0; 1199 preempt_disable(); 1200 /* We use the global stats to estimate the initial per-glock stats */ 1201 gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type]; 1202 preempt_enable(); 1203 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; 1204 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; 1205 gl->gl_tchange = jiffies; 1206 gl->gl_object = NULL; 1207 gl->gl_hold_time = GL_GLOCK_DFT_HOLD; 1208 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 1209 if (gl->gl_name.ln_type == LM_TYPE_IOPEN) 1210 INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func); 1211 1212 mapping = gfs2_glock2aspace(gl); 1213 if (mapping) { 1214 mapping->a_ops = &gfs2_meta_aops; 1215 mapping->host = sdp->sd_inode; 1216 mapping->flags = 0; 1217 mapping_set_gfp_mask(mapping, GFP_NOFS); 1218 mapping->i_private_data = NULL; 1219 mapping->writeback_index = 0; 1220 } 1221 1222 tmp = find_insert_glock(&name, gl); 1223 if (tmp) { 1224 gfs2_glock_dealloc(&gl->gl_rcu); 1225 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 1226 wake_up(&sdp->sd_kill_wait); 1227 1228 if (IS_ERR(tmp)) 1229 return PTR_ERR(tmp); 1230 gl = tmp; 1231 } 1232 1233 found: 1234 *glp = gl; 1235 return 0; 1236 } 1237 1238 /** 1239 * __gfs2_holder_init - initialize a struct gfs2_holder in the default way 1240 * @gl: the glock 1241 * @state: the state we're requesting 1242 * @flags: the modifier flags 1243 * @gh: the holder structure 1244 * 1245 */ 1246 1247 void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, 1248 struct gfs2_holder *gh, unsigned long ip) 1249 { 1250 INIT_LIST_HEAD(&gh->gh_list); 1251 gh->gh_gl = gfs2_glock_hold(gl); 1252 gh->gh_ip = ip; 1253 gh->gh_owner_pid = get_pid(task_pid(current)); 1254 gh->gh_state = state; 1255 gh->gh_flags = flags; 1256 gh->gh_iflags = 0; 1257 } 1258 1259 /** 1260 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it 1261 * @state: the state we're requesting 1262 * @flags: the modifier flags 1263 * @gh: the holder structure 1264 * 1265 * Don't mess with the glock. 1266 * 1267 */ 1268 1269 void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh) 1270 { 1271 gh->gh_state = state; 1272 gh->gh_flags = flags; 1273 gh->gh_iflags = 0; 1274 gh->gh_ip = _RET_IP_; 1275 put_pid(gh->gh_owner_pid); 1276 gh->gh_owner_pid = get_pid(task_pid(current)); 1277 } 1278 1279 /** 1280 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference) 1281 * @gh: the holder structure 1282 * 1283 */ 1284 1285 void gfs2_holder_uninit(struct gfs2_holder *gh) 1286 { 1287 put_pid(gh->gh_owner_pid); 1288 gfs2_glock_put(gh->gh_gl); 1289 gfs2_holder_mark_uninitialized(gh); 1290 gh->gh_ip = 0; 1291 } 1292 1293 static void gfs2_glock_update_hold_time(struct gfs2_glock *gl, 1294 unsigned long start_time) 1295 { 1296 /* Have we waited longer that a second? */ 1297 if (time_after(jiffies, start_time + HZ)) { 1298 /* Lengthen the minimum hold time. */ 1299 gl->gl_hold_time = min(gl->gl_hold_time + GL_GLOCK_HOLD_INCR, 1300 GL_GLOCK_MAX_HOLD); 1301 } 1302 } 1303 1304 /** 1305 * gfs2_glock_holder_ready - holder is ready and its error code can be collected 1306 * @gh: the glock holder 1307 * 1308 * Called when a glock holder no longer needs to be waited for because it is 1309 * now either held (HIF_HOLDER set; gh_error == 0), or acquiring the lock has 1310 * failed (gh_error != 0). 1311 */ 1312 1313 int gfs2_glock_holder_ready(struct gfs2_holder *gh) 1314 { 1315 if (gh->gh_error || (gh->gh_flags & GL_SKIP)) 1316 return gh->gh_error; 1317 gh->gh_error = gfs2_instantiate(gh); 1318 if (gh->gh_error) 1319 gfs2_glock_dq(gh); 1320 return gh->gh_error; 1321 } 1322 1323 /** 1324 * gfs2_glock_wait - wait on a glock acquisition 1325 * @gh: the glock holder 1326 * 1327 * Returns: 0 on success 1328 */ 1329 1330 int gfs2_glock_wait(struct gfs2_holder *gh) 1331 { 1332 unsigned long start_time = jiffies; 1333 1334 might_sleep(); 1335 wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); 1336 gfs2_glock_update_hold_time(gh->gh_gl, start_time); 1337 return gfs2_glock_holder_ready(gh); 1338 } 1339 1340 static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs) 1341 { 1342 int i; 1343 1344 for (i = 0; i < num_gh; i++) 1345 if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) 1346 return 1; 1347 return 0; 1348 } 1349 1350 /** 1351 * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions 1352 * @num_gh: the number of holders in the array 1353 * @ghs: the glock holder array 1354 * 1355 * Returns: 0 on success, meaning all glocks have been granted and are held. 1356 * -ESTALE if the request timed out, meaning all glocks were released, 1357 * and the caller should retry the operation. 1358 */ 1359 1360 int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs) 1361 { 1362 struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd; 1363 int i, ret = 0, timeout = 0; 1364 unsigned long start_time = jiffies; 1365 1366 might_sleep(); 1367 /* 1368 * Total up the (minimum hold time * 2) of all glocks and use that to 1369 * determine the max amount of time we should wait. 1370 */ 1371 for (i = 0; i < num_gh; i++) 1372 timeout += ghs[i].gh_gl->gl_hold_time << 1; 1373 1374 if (!wait_event_timeout(sdp->sd_async_glock_wait, 1375 !glocks_pending(num_gh, ghs), timeout)) { 1376 ret = -ESTALE; /* request timed out. */ 1377 goto out; 1378 } 1379 1380 for (i = 0; i < num_gh; i++) { 1381 struct gfs2_holder *gh = &ghs[i]; 1382 int ret2; 1383 1384 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) { 1385 gfs2_glock_update_hold_time(gh->gh_gl, 1386 start_time); 1387 } 1388 ret2 = gfs2_glock_holder_ready(gh); 1389 if (!ret) 1390 ret = ret2; 1391 } 1392 1393 out: 1394 if (ret) { 1395 for (i = 0; i < num_gh; i++) { 1396 struct gfs2_holder *gh = &ghs[i]; 1397 1398 gfs2_glock_dq(gh); 1399 } 1400 } 1401 return ret; 1402 } 1403 1404 /** 1405 * request_demote - process a demote request 1406 * @gl: the glock 1407 * @state: the state the caller wants us to change to 1408 * @delay: zero to demote immediately; otherwise pending demote 1409 * @remote: true if this came from a different cluster node 1410 * 1411 * There are only two requests that we are going to see in actual 1412 * practise: LM_ST_SHARED and LM_ST_UNLOCKED 1413 */ 1414 1415 static void request_demote(struct gfs2_glock *gl, unsigned int state, 1416 unsigned long delay, bool remote) 1417 { 1418 gfs2_set_demote(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, gl); 1419 if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { 1420 gl->gl_demote_state = state; 1421 gl->gl_demote_time = jiffies; 1422 } else if (gl->gl_demote_state != LM_ST_UNLOCKED && 1423 gl->gl_demote_state != state) { 1424 gl->gl_demote_state = LM_ST_UNLOCKED; 1425 } 1426 if (gl->gl_ops->go_callback) 1427 gl->gl_ops->go_callback(gl, remote); 1428 trace_gfs2_demote_rq(gl, remote); 1429 } 1430 1431 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) 1432 { 1433 struct va_format vaf; 1434 va_list args; 1435 1436 va_start(args, fmt); 1437 1438 if (seq) { 1439 seq_vprintf(seq, fmt, args); 1440 } else { 1441 vaf.fmt = fmt; 1442 vaf.va = &args; 1443 1444 pr_err("%pV", &vaf); 1445 } 1446 1447 va_end(args); 1448 } 1449 1450 static bool gfs2_should_queue_trylock(struct gfs2_glock *gl, 1451 struct gfs2_holder *gh) 1452 { 1453 struct gfs2_holder *current_gh, *gh2; 1454 1455 current_gh = find_first_holder(gl); 1456 if (current_gh && !may_grant(gl, current_gh, gh)) 1457 return false; 1458 1459 list_for_each_entry(gh2, &gl->gl_holders, gh_list) { 1460 if (test_bit(HIF_HOLDER, &gh2->gh_iflags)) 1461 continue; 1462 if (!(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 1463 return false; 1464 } 1465 return true; 1466 } 1467 1468 static inline bool pid_is_meaningful(const struct gfs2_holder *gh) 1469 { 1470 if (!(gh->gh_flags & GL_NOPID)) 1471 return true; 1472 return !test_bit(HIF_HOLDER, &gh->gh_iflags); 1473 } 1474 1475 /** 1476 * add_to_queue - Add a holder to the wait queue (but look for recursion) 1477 * @gh: the holder structure to add 1478 * 1479 * Eventually we should move the recursive locking trap to a 1480 * debugging option or something like that. This is the fast 1481 * path and needs to have the minimum number of distractions. 1482 * 1483 */ 1484 1485 static inline void add_to_queue(struct gfs2_holder *gh) 1486 { 1487 struct gfs2_glock *gl = gh->gh_gl; 1488 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1489 struct gfs2_holder *gh2; 1490 1491 GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL); 1492 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) 1493 GLOCK_BUG_ON(gl, true); 1494 1495 if ((gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && 1496 !gfs2_should_queue_trylock(gl, gh)) { 1497 gh->gh_error = GLR_TRYFAILED; 1498 gfs2_holder_wake(gh); 1499 return; 1500 } 1501 1502 list_for_each_entry(gh2, &gl->gl_holders, gh_list) { 1503 if (likely(gh2->gh_owner_pid != gh->gh_owner_pid)) 1504 continue; 1505 if (gh->gh_gl->gl_ops->go_type == LM_TYPE_FLOCK) 1506 continue; 1507 if (!pid_is_meaningful(gh2)) 1508 continue; 1509 goto trap_recursive; 1510 } 1511 trace_gfs2_glock_queue(gh, 1); 1512 gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT); 1513 gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT); 1514 list_add_tail(&gh->gh_list, &gl->gl_holders); 1515 return; 1516 1517 trap_recursive: 1518 fs_err(sdp, "original: %pSR\n", (void *)gh2->gh_ip); 1519 fs_err(sdp, "pid: %d\n", pid_nr(gh2->gh_owner_pid)); 1520 fs_err(sdp, "lock type: %d req lock state : %d\n", 1521 gh2->gh_gl->gl_name.ln_type, gh2->gh_state); 1522 fs_err(sdp, "new: %pSR\n", (void *)gh->gh_ip); 1523 fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid)); 1524 fs_err(sdp, "lock type: %d req lock state : %d\n", 1525 gh->gh_gl->gl_name.ln_type, gh->gh_state); 1526 gfs2_dump_glock(NULL, gl, true); 1527 BUG(); 1528 } 1529 1530 /** 1531 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock) 1532 * @gh: the holder structure 1533 * 1534 * if (gh->gh_flags & GL_ASYNC), this never returns an error 1535 * 1536 * Returns: 0, GLR_TRYFAILED, or errno on failure 1537 */ 1538 1539 int gfs2_glock_nq(struct gfs2_holder *gh) 1540 { 1541 struct gfs2_glock *gl = gh->gh_gl; 1542 int error; 1543 1544 if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP)) 1545 return -EIO; 1546 1547 if (gh->gh_flags & GL_NOBLOCK) { 1548 struct gfs2_holder *current_gh; 1549 1550 error = -ECHILD; 1551 spin_lock(&gl->gl_lockref.lock); 1552 if (find_last_waiter(gl)) 1553 goto unlock; 1554 current_gh = find_first_holder(gl); 1555 if (!may_grant(gl, current_gh, gh)) 1556 goto unlock; 1557 set_bit(HIF_HOLDER, &gh->gh_iflags); 1558 list_add_tail(&gh->gh_list, &gl->gl_holders); 1559 trace_gfs2_promote(gh); 1560 error = 0; 1561 unlock: 1562 spin_unlock(&gl->gl_lockref.lock); 1563 return error; 1564 } 1565 1566 gh->gh_error = 0; 1567 spin_lock(&gl->gl_lockref.lock); 1568 add_to_queue(gh); 1569 if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && 1570 test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags))) { 1571 set_bit(GLF_HAVE_REPLY, &gl->gl_flags); 1572 gl->gl_lockref.count++; 1573 gfs2_glock_queue_work(gl, 0); 1574 } 1575 run_queue(gl, 1); 1576 spin_unlock(&gl->gl_lockref.lock); 1577 1578 error = 0; 1579 if (!(gh->gh_flags & GL_ASYNC)) 1580 error = gfs2_glock_wait(gh); 1581 1582 return error; 1583 } 1584 1585 /** 1586 * gfs2_glock_poll - poll to see if an async request has been completed 1587 * @gh: the holder 1588 * 1589 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on 1590 */ 1591 1592 int gfs2_glock_poll(struct gfs2_holder *gh) 1593 { 1594 return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; 1595 } 1596 1597 static void __gfs2_glock_dq(struct gfs2_holder *gh) 1598 { 1599 struct gfs2_glock *gl = gh->gh_gl; 1600 unsigned delay = 0; 1601 int fast_path = 0; 1602 1603 /* 1604 * This holder should not be cached, so mark it for demote. 1605 * Note: this should be done before the glock_needs_demote 1606 * check below. 1607 */ 1608 if (gh->gh_flags & GL_NOCACHE) 1609 request_demote(gl, LM_ST_UNLOCKED, 0, false); 1610 1611 list_del_init(&gh->gh_list); 1612 clear_bit(HIF_HOLDER, &gh->gh_iflags); 1613 trace_gfs2_glock_queue(gh, 0); 1614 1615 /* 1616 * If there hasn't been a demote request we are done. 1617 * (Let the remaining holders, if any, keep holding it.) 1618 */ 1619 if (!glock_needs_demote(gl)) { 1620 if (list_empty(&gl->gl_holders)) 1621 fast_path = 1; 1622 } 1623 1624 if (unlikely(!fast_path)) { 1625 gl->gl_lockref.count++; 1626 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1627 !test_bit(GLF_DEMOTE, &gl->gl_flags) && 1628 gl->gl_name.ln_type == LM_TYPE_INODE) 1629 delay = gl->gl_hold_time; 1630 gfs2_glock_queue_work(gl, delay); 1631 } 1632 } 1633 1634 /** 1635 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock) 1636 * @gh: the glock holder 1637 * 1638 */ 1639 void gfs2_glock_dq(struct gfs2_holder *gh) 1640 { 1641 struct gfs2_glock *gl = gh->gh_gl; 1642 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1643 1644 spin_lock(&gl->gl_lockref.lock); 1645 if (!gfs2_holder_queued(gh)) { 1646 /* 1647 * May have already been dequeued because the locking request 1648 * was GL_ASYNC and it has failed in the meantime. 1649 */ 1650 goto out; 1651 } 1652 1653 if (list_is_first(&gh->gh_list, &gl->gl_holders) && 1654 !test_bit(HIF_HOLDER, &gh->gh_iflags) && 1655 test_bit(GLF_LOCK, &gl->gl_flags) && 1656 !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && 1657 !test_bit(GLF_CANCELING, &gl->gl_flags)) { 1658 set_bit(GLF_CANCELING, &gl->gl_flags); 1659 spin_unlock(&gl->gl_lockref.lock); 1660 gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl); 1661 wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); 1662 spin_lock(&gl->gl_lockref.lock); 1663 clear_bit(GLF_CANCELING, &gl->gl_flags); 1664 clear_bit(GLF_LOCK, &gl->gl_flags); 1665 if (!gfs2_holder_queued(gh)) 1666 goto out; 1667 } 1668 1669 /* 1670 * If we're in the process of file system withdraw, we cannot just 1671 * dequeue any glocks until our journal is recovered, lest we introduce 1672 * file system corruption. We need two exceptions to this rule: We need 1673 * to allow unlocking of nondisk glocks and the glock for our own 1674 * journal that needs recovery. 1675 */ 1676 if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && 1677 glock_blocked_by_withdraw(gl) && 1678 gh->gh_gl != sdp->sd_jinode_gl) { 1679 sdp->sd_glock_dqs_held++; 1680 spin_unlock(&gl->gl_lockref.lock); 1681 might_sleep(); 1682 wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, 1683 TASK_UNINTERRUPTIBLE); 1684 spin_lock(&gl->gl_lockref.lock); 1685 } 1686 1687 __gfs2_glock_dq(gh); 1688 out: 1689 spin_unlock(&gl->gl_lockref.lock); 1690 } 1691 1692 void gfs2_glock_dq_wait(struct gfs2_holder *gh) 1693 { 1694 struct gfs2_glock *gl = gh->gh_gl; 1695 gfs2_glock_dq(gh); 1696 might_sleep(); 1697 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); 1698 } 1699 1700 /** 1701 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it 1702 * @gh: the holder structure 1703 * 1704 */ 1705 1706 void gfs2_glock_dq_uninit(struct gfs2_holder *gh) 1707 { 1708 gfs2_glock_dq(gh); 1709 gfs2_holder_uninit(gh); 1710 } 1711 1712 /** 1713 * gfs2_glock_nq_num - acquire a glock based on lock number 1714 * @sdp: the filesystem 1715 * @number: the lock number 1716 * @glops: the glock operations for the type of glock 1717 * @state: the state to acquire the glock in 1718 * @flags: modifier flags for the acquisition 1719 * @gh: the struct gfs2_holder 1720 * 1721 * Returns: errno 1722 */ 1723 1724 int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, 1725 const struct gfs2_glock_operations *glops, 1726 unsigned int state, u16 flags, struct gfs2_holder *gh) 1727 { 1728 struct gfs2_glock *gl; 1729 int error; 1730 1731 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); 1732 if (!error) { 1733 error = gfs2_glock_nq_init(gl, state, flags, gh); 1734 gfs2_glock_put(gl); 1735 } 1736 1737 return error; 1738 } 1739 1740 /** 1741 * glock_compare - Compare two struct gfs2_glock structures for sorting 1742 * @arg_a: the first structure 1743 * @arg_b: the second structure 1744 * 1745 */ 1746 1747 static int glock_compare(const void *arg_a, const void *arg_b) 1748 { 1749 const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a; 1750 const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b; 1751 const struct lm_lockname *a = &gh_a->gh_gl->gl_name; 1752 const struct lm_lockname *b = &gh_b->gh_gl->gl_name; 1753 1754 if (a->ln_number > b->ln_number) 1755 return 1; 1756 if (a->ln_number < b->ln_number) 1757 return -1; 1758 BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type); 1759 return 0; 1760 } 1761 1762 /** 1763 * nq_m_sync - synchronously acquire more than one glock in deadlock free order 1764 * @num_gh: the number of structures 1765 * @ghs: an array of struct gfs2_holder structures 1766 * @p: placeholder for the holder structure to pass back 1767 * 1768 * Returns: 0 on success (all glocks acquired), 1769 * errno on failure (no glocks acquired) 1770 */ 1771 1772 static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, 1773 struct gfs2_holder **p) 1774 { 1775 unsigned int x; 1776 int error = 0; 1777 1778 for (x = 0; x < num_gh; x++) 1779 p[x] = &ghs[x]; 1780 1781 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL); 1782 1783 for (x = 0; x < num_gh; x++) { 1784 error = gfs2_glock_nq(p[x]); 1785 if (error) { 1786 while (x--) 1787 gfs2_glock_dq(p[x]); 1788 break; 1789 } 1790 } 1791 1792 return error; 1793 } 1794 1795 /** 1796 * gfs2_glock_nq_m - acquire multiple glocks 1797 * @num_gh: the number of structures 1798 * @ghs: an array of struct gfs2_holder structures 1799 * 1800 * Returns: 0 on success (all glocks acquired), 1801 * errno on failure (no glocks acquired) 1802 */ 1803 1804 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1805 { 1806 struct gfs2_holder *tmp[4]; 1807 struct gfs2_holder **pph = tmp; 1808 int error = 0; 1809 1810 switch(num_gh) { 1811 case 0: 1812 return 0; 1813 case 1: 1814 return gfs2_glock_nq(ghs); 1815 default: 1816 if (num_gh <= 4) 1817 break; 1818 pph = kmalloc_array(num_gh, sizeof(struct gfs2_holder *), 1819 GFP_NOFS); 1820 if (!pph) 1821 return -ENOMEM; 1822 } 1823 1824 error = nq_m_sync(num_gh, ghs, pph); 1825 1826 if (pph != tmp) 1827 kfree(pph); 1828 1829 return error; 1830 } 1831 1832 /** 1833 * gfs2_glock_dq_m - release multiple glocks 1834 * @num_gh: the number of structures 1835 * @ghs: an array of struct gfs2_holder structures 1836 * 1837 */ 1838 1839 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1840 { 1841 while (num_gh--) 1842 gfs2_glock_dq(&ghs[num_gh]); 1843 } 1844 1845 void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) 1846 { 1847 unsigned long delay = 0; 1848 1849 gfs2_glock_hold(gl); 1850 spin_lock(&gl->gl_lockref.lock); 1851 if (!list_empty(&gl->gl_holders) && 1852 gl->gl_name.ln_type == LM_TYPE_INODE) { 1853 unsigned long now = jiffies; 1854 unsigned long holdtime; 1855 1856 holdtime = gl->gl_tchange + gl->gl_hold_time; 1857 1858 if (time_before(now, holdtime)) 1859 delay = holdtime - now; 1860 if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags)) 1861 delay = gl->gl_hold_time; 1862 } 1863 request_demote(gl, state, delay, true); 1864 gfs2_glock_queue_work(gl, delay); 1865 spin_unlock(&gl->gl_lockref.lock); 1866 } 1867 1868 /** 1869 * gfs2_should_freeze - Figure out if glock should be frozen 1870 * @gl: The glock in question 1871 * 1872 * Glocks are not frozen if (a) the result of the dlm operation is 1873 * an error, (b) the locking operation was an unlock operation or 1874 * (c) if there is a "noexp" flagged request anywhere in the queue 1875 * 1876 * Returns: 1 if freezing should occur, 0 otherwise 1877 */ 1878 1879 static int gfs2_should_freeze(const struct gfs2_glock *gl) 1880 { 1881 const struct gfs2_holder *gh; 1882 1883 if (gl->gl_reply & ~LM_OUT_ST_MASK) 1884 return 0; 1885 if (gl->gl_target == LM_ST_UNLOCKED) 1886 return 0; 1887 1888 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1889 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 1890 continue; 1891 if (LM_FLAG_NOEXP & gh->gh_flags) 1892 return 0; 1893 } 1894 1895 return 1; 1896 } 1897 1898 /** 1899 * gfs2_glock_complete - Callback used by locking 1900 * @gl: Pointer to the glock 1901 * @ret: The return value from the dlm 1902 * 1903 * The gl_reply field is under the gl_lockref.lock lock so that it is ok 1904 * to use a bitfield shared with other glock state fields. 1905 */ 1906 1907 void gfs2_glock_complete(struct gfs2_glock *gl, int ret) 1908 { 1909 struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; 1910 1911 spin_lock(&gl->gl_lockref.lock); 1912 clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); 1913 gl->gl_reply = ret; 1914 1915 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { 1916 if (gfs2_should_freeze(gl)) { 1917 set_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags); 1918 spin_unlock(&gl->gl_lockref.lock); 1919 return; 1920 } 1921 } 1922 1923 gl->gl_lockref.count++; 1924 set_bit(GLF_HAVE_REPLY, &gl->gl_flags); 1925 gfs2_glock_queue_work(gl, 0); 1926 spin_unlock(&gl->gl_lockref.lock); 1927 } 1928 1929 static int glock_cmp(void *priv, const struct list_head *a, 1930 const struct list_head *b) 1931 { 1932 struct gfs2_glock *gla, *glb; 1933 1934 gla = list_entry(a, struct gfs2_glock, gl_lru); 1935 glb = list_entry(b, struct gfs2_glock, gl_lru); 1936 1937 if (gla->gl_name.ln_number > glb->gl_name.ln_number) 1938 return 1; 1939 if (gla->gl_name.ln_number < glb->gl_name.ln_number) 1940 return -1; 1941 1942 return 0; 1943 } 1944 1945 static bool can_free_glock(struct gfs2_glock *gl) 1946 { 1947 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1948 1949 return !test_bit(GLF_LOCK, &gl->gl_flags) && 1950 !gl->gl_lockref.count && 1951 (!test_bit(GLF_LFLUSH, &gl->gl_flags) || 1952 test_bit(SDF_KILL, &sdp->sd_flags)); 1953 } 1954 1955 /** 1956 * gfs2_dispose_glock_lru - Demote a list of glocks 1957 * @list: The list to dispose of 1958 * 1959 * Disposing of glocks may involve disk accesses, so that here we sort 1960 * the glocks by number (i.e. disk location of the inodes) so that if 1961 * there are any such accesses, they'll be sent in order (mostly). 1962 * 1963 * Must be called under the lru_lock, but may drop and retake this 1964 * lock. While the lru_lock is dropped, entries may vanish from the 1965 * list, but no new entries will appear on the list (since it is 1966 * private) 1967 */ 1968 1969 static unsigned long gfs2_dispose_glock_lru(struct list_head *list) 1970 __releases(&lru_lock) 1971 __acquires(&lru_lock) 1972 { 1973 struct gfs2_glock *gl; 1974 unsigned long freed = 0; 1975 1976 list_sort(NULL, list, glock_cmp); 1977 1978 while(!list_empty(list)) { 1979 gl = list_first_entry(list, struct gfs2_glock, gl_lru); 1980 if (!spin_trylock(&gl->gl_lockref.lock)) { 1981 add_back_to_lru: 1982 list_move(&gl->gl_lru, &lru_list); 1983 continue; 1984 } 1985 if (!can_free_glock(gl)) { 1986 spin_unlock(&gl->gl_lockref.lock); 1987 goto add_back_to_lru; 1988 } 1989 list_del_init(&gl->gl_lru); 1990 atomic_dec(&lru_count); 1991 clear_bit(GLF_LRU, &gl->gl_flags); 1992 freed++; 1993 gl->gl_lockref.count++; 1994 if (gl->gl_state != LM_ST_UNLOCKED) 1995 request_demote(gl, LM_ST_UNLOCKED, 0, false); 1996 gfs2_glock_queue_work(gl, 0); 1997 spin_unlock(&gl->gl_lockref.lock); 1998 cond_resched_lock(&lru_lock); 1999 } 2000 return freed; 2001 } 2002 2003 /** 2004 * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote 2005 * @nr: The number of entries to scan 2006 * 2007 * This function selects the entries on the LRU which are able to 2008 * be demoted, and then kicks off the process by calling 2009 * gfs2_dispose_glock_lru() above. 2010 */ 2011 2012 static unsigned long gfs2_scan_glock_lru(unsigned long nr) 2013 { 2014 struct gfs2_glock *gl, *next; 2015 LIST_HEAD(dispose); 2016 unsigned long freed = 0; 2017 2018 spin_lock(&lru_lock); 2019 list_for_each_entry_safe(gl, next, &lru_list, gl_lru) { 2020 if (!nr--) 2021 break; 2022 if (can_free_glock(gl)) 2023 list_move(&gl->gl_lru, &dispose); 2024 } 2025 if (!list_empty(&dispose)) 2026 freed = gfs2_dispose_glock_lru(&dispose); 2027 spin_unlock(&lru_lock); 2028 2029 return freed; 2030 } 2031 2032 static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, 2033 struct shrink_control *sc) 2034 { 2035 if (!(sc->gfp_mask & __GFP_FS)) 2036 return SHRINK_STOP; 2037 return gfs2_scan_glock_lru(sc->nr_to_scan); 2038 } 2039 2040 static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, 2041 struct shrink_control *sc) 2042 { 2043 return vfs_pressure_ratio(atomic_read(&lru_count)); 2044 } 2045 2046 static struct shrinker *glock_shrinker; 2047 2048 /** 2049 * glock_hash_walk - Call a function for glock in a hash bucket 2050 * @examiner: the function 2051 * @sdp: the filesystem 2052 * 2053 * Note that the function can be called multiple times on the same 2054 * object. So the user must ensure that the function can cope with 2055 * that. 2056 */ 2057 2058 static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) 2059 { 2060 struct gfs2_glock *gl; 2061 struct rhashtable_iter iter; 2062 2063 rhashtable_walk_enter(&gl_hash_table, &iter); 2064 2065 do { 2066 rhashtable_walk_start(&iter); 2067 2068 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) { 2069 if (gl->gl_name.ln_sbd == sdp) 2070 examiner(gl); 2071 } 2072 2073 rhashtable_walk_stop(&iter); 2074 } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); 2075 2076 rhashtable_walk_exit(&iter); 2077 } 2078 2079 void gfs2_cancel_delete_work(struct gfs2_glock *gl) 2080 { 2081 clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags); 2082 clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); 2083 if (cancel_delayed_work(&gl->gl_delete)) 2084 gfs2_glock_put(gl); 2085 } 2086 2087 static void flush_delete_work(struct gfs2_glock *gl) 2088 { 2089 if (gl->gl_name.ln_type == LM_TYPE_IOPEN) { 2090 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 2091 2092 if (cancel_delayed_work(&gl->gl_delete)) { 2093 queue_delayed_work(sdp->sd_delete_wq, 2094 &gl->gl_delete, 0); 2095 } 2096 } 2097 } 2098 2099 void gfs2_flush_delete_work(struct gfs2_sbd *sdp) 2100 { 2101 glock_hash_walk(flush_delete_work, sdp); 2102 flush_workqueue(sdp->sd_delete_wq); 2103 } 2104 2105 /** 2106 * thaw_glock - thaw out a glock which has an unprocessed reply waiting 2107 * @gl: The glock to thaw 2108 * 2109 */ 2110 2111 static void thaw_glock(struct gfs2_glock *gl) 2112 { 2113 if (!test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags)) 2114 return; 2115 if (!lockref_get_not_dead(&gl->gl_lockref)) 2116 return; 2117 2118 gfs2_glock_remove_from_lru(gl); 2119 spin_lock(&gl->gl_lockref.lock); 2120 set_bit(GLF_HAVE_REPLY, &gl->gl_flags); 2121 gfs2_glock_queue_work(gl, 0); 2122 spin_unlock(&gl->gl_lockref.lock); 2123 } 2124 2125 /** 2126 * clear_glock - look at a glock and see if we can free it from glock cache 2127 * @gl: the glock to look at 2128 * 2129 */ 2130 2131 static void clear_glock(struct gfs2_glock *gl) 2132 { 2133 gfs2_glock_remove_from_lru(gl); 2134 2135 spin_lock(&gl->gl_lockref.lock); 2136 if (!__lockref_is_dead(&gl->gl_lockref)) { 2137 gl->gl_lockref.count++; 2138 if (gl->gl_state != LM_ST_UNLOCKED) 2139 request_demote(gl, LM_ST_UNLOCKED, 0, false); 2140 gfs2_glock_queue_work(gl, 0); 2141 } 2142 spin_unlock(&gl->gl_lockref.lock); 2143 } 2144 2145 /** 2146 * gfs2_glock_thaw - Thaw any frozen glocks 2147 * @sdp: The super block 2148 * 2149 */ 2150 2151 void gfs2_glock_thaw(struct gfs2_sbd *sdp) 2152 { 2153 glock_hash_walk(thaw_glock, sdp); 2154 } 2155 2156 static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) 2157 { 2158 spin_lock(&gl->gl_lockref.lock); 2159 gfs2_dump_glock(seq, gl, fsid); 2160 spin_unlock(&gl->gl_lockref.lock); 2161 } 2162 2163 static void dump_glock_func(struct gfs2_glock *gl) 2164 { 2165 dump_glock(NULL, gl, true); 2166 } 2167 2168 static void withdraw_dq(struct gfs2_glock *gl) 2169 { 2170 spin_lock(&gl->gl_lockref.lock); 2171 if (!__lockref_is_dead(&gl->gl_lockref) && 2172 glock_blocked_by_withdraw(gl)) 2173 do_error(gl, LM_OUT_ERROR); /* remove pending waiters */ 2174 spin_unlock(&gl->gl_lockref.lock); 2175 } 2176 2177 void gfs2_gl_dq_holders(struct gfs2_sbd *sdp) 2178 { 2179 glock_hash_walk(withdraw_dq, sdp); 2180 } 2181 2182 /** 2183 * gfs2_gl_hash_clear - Empty out the glock hash table 2184 * @sdp: the filesystem 2185 * 2186 * Called when unmounting the filesystem. 2187 */ 2188 2189 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 2190 { 2191 unsigned long start = jiffies; 2192 bool timed_out = false; 2193 2194 set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); 2195 flush_workqueue(sdp->sd_glock_wq); 2196 glock_hash_walk(clear_glock, sdp); 2197 flush_workqueue(sdp->sd_glock_wq); 2198 2199 while (!timed_out) { 2200 wait_event_timeout(sdp->sd_kill_wait, 2201 !atomic_read(&sdp->sd_glock_disposal), 2202 HZ * 60); 2203 if (!atomic_read(&sdp->sd_glock_disposal)) 2204 break; 2205 timed_out = time_after(jiffies, start + (HZ * 600)); 2206 fs_warn(sdp, "%u glocks left after %u seconds%s\n", 2207 atomic_read(&sdp->sd_glock_disposal), 2208 jiffies_to_msecs(jiffies - start) / 1000, 2209 timed_out ? ":" : "; still waiting"); 2210 } 2211 gfs2_lm_unmount(sdp); 2212 gfs2_free_dead_glocks(sdp); 2213 glock_hash_walk(dump_glock_func, sdp); 2214 destroy_workqueue(sdp->sd_glock_wq); 2215 sdp->sd_glock_wq = NULL; 2216 } 2217 2218 static const char *state2str(unsigned state) 2219 { 2220 switch(state) { 2221 case LM_ST_UNLOCKED: 2222 return "UN"; 2223 case LM_ST_SHARED: 2224 return "SH"; 2225 case LM_ST_DEFERRED: 2226 return "DF"; 2227 case LM_ST_EXCLUSIVE: 2228 return "EX"; 2229 } 2230 return "??"; 2231 } 2232 2233 static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) 2234 { 2235 char *p = buf; 2236 if (flags & LM_FLAG_TRY) 2237 *p++ = 't'; 2238 if (flags & LM_FLAG_TRY_1CB) 2239 *p++ = 'T'; 2240 if (flags & LM_FLAG_NOEXP) 2241 *p++ = 'e'; 2242 if (flags & LM_FLAG_ANY) 2243 *p++ = 'A'; 2244 if (flags & LM_FLAG_NODE_SCOPE) 2245 *p++ = 'n'; 2246 if (flags & GL_ASYNC) 2247 *p++ = 'a'; 2248 if (flags & GL_EXACT) 2249 *p++ = 'E'; 2250 if (flags & GL_NOCACHE) 2251 *p++ = 'c'; 2252 if (test_bit(HIF_HOLDER, &iflags)) 2253 *p++ = 'H'; 2254 if (test_bit(HIF_WAIT, &iflags)) 2255 *p++ = 'W'; 2256 if (flags & GL_SKIP) 2257 *p++ = 's'; 2258 *p = 0; 2259 return buf; 2260 } 2261 2262 /** 2263 * dump_holder - print information about a glock holder 2264 * @seq: the seq_file struct 2265 * @gh: the glock holder 2266 * @fs_id_buf: pointer to file system id (if requested) 2267 * 2268 */ 2269 2270 static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh, 2271 const char *fs_id_buf) 2272 { 2273 const char *comm = "(none)"; 2274 pid_t owner_pid = 0; 2275 char flags_buf[32]; 2276 2277 rcu_read_lock(); 2278 if (pid_is_meaningful(gh)) { 2279 struct task_struct *gh_owner; 2280 2281 comm = "(ended)"; 2282 owner_pid = pid_nr(gh->gh_owner_pid); 2283 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); 2284 if (gh_owner) 2285 comm = gh_owner->comm; 2286 } 2287 gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n", 2288 fs_id_buf, state2str(gh->gh_state), 2289 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), 2290 gh->gh_error, (long)owner_pid, comm, (void *)gh->gh_ip); 2291 rcu_read_unlock(); 2292 } 2293 2294 static const char *gflags2str(char *buf, const struct gfs2_glock *gl) 2295 { 2296 const unsigned long *gflags = &gl->gl_flags; 2297 char *p = buf; 2298 2299 if (test_bit(GLF_LOCK, gflags)) 2300 *p++ = 'l'; 2301 if (test_bit(GLF_DEMOTE, gflags)) 2302 *p++ = 'D'; 2303 if (test_bit(GLF_PENDING_DEMOTE, gflags)) 2304 *p++ = 'd'; 2305 if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags)) 2306 *p++ = 'p'; 2307 if (test_bit(GLF_DIRTY, gflags)) 2308 *p++ = 'y'; 2309 if (test_bit(GLF_LFLUSH, gflags)) 2310 *p++ = 'f'; 2311 if (test_bit(GLF_PENDING_REPLY, gflags)) 2312 *p++ = 'R'; 2313 if (test_bit(GLF_HAVE_REPLY, gflags)) 2314 *p++ = 'r'; 2315 if (test_bit(GLF_INITIAL, gflags)) 2316 *p++ = 'a'; 2317 if (test_bit(GLF_HAVE_FROZEN_REPLY, gflags)) 2318 *p++ = 'F'; 2319 if (!list_empty(&gl->gl_holders)) 2320 *p++ = 'q'; 2321 if (test_bit(GLF_LRU, gflags)) 2322 *p++ = 'L'; 2323 if (gl->gl_object) 2324 *p++ = 'o'; 2325 if (test_bit(GLF_BLOCKING, gflags)) 2326 *p++ = 'b'; 2327 if (test_bit(GLF_UNLOCKED, gflags)) 2328 *p++ = 'x'; 2329 if (test_bit(GLF_INSTANTIATE_NEEDED, gflags)) 2330 *p++ = 'n'; 2331 if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags)) 2332 *p++ = 'N'; 2333 if (test_bit(GLF_TRY_TO_EVICT, gflags)) 2334 *p++ = 'e'; 2335 if (test_bit(GLF_VERIFY_DELETE, gflags)) 2336 *p++ = 'E'; 2337 if (test_bit(GLF_DEFER_DELETE, gflags)) 2338 *p++ = 's'; 2339 if (test_bit(GLF_CANCELING, gflags)) 2340 *p++ = 'C'; 2341 *p = 0; 2342 return buf; 2343 } 2344 2345 /** 2346 * gfs2_dump_glock - print information about a glock 2347 * @seq: The seq_file struct 2348 * @gl: the glock 2349 * @fsid: If true, also dump the file system id 2350 * 2351 * The file format is as follows: 2352 * One line per object, capital letters are used to indicate objects 2353 * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented, 2354 * other objects are indented by a single space and follow the glock to 2355 * which they are related. Fields are indicated by lower case letters 2356 * followed by a colon and the field value, except for strings which are in 2357 * [] so that its possible to see if they are composed of spaces for 2358 * example. The field's are n = number (id of the object), f = flags, 2359 * t = type, s = state, r = refcount, e = error, p = pid. 2360 * 2361 */ 2362 2363 void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) 2364 { 2365 const struct gfs2_glock_operations *glops = gl->gl_ops; 2366 unsigned long long dtime; 2367 const struct gfs2_holder *gh; 2368 char gflags_buf[32]; 2369 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 2370 char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; 2371 unsigned long nrpages = 0; 2372 2373 if (gl->gl_ops->go_flags & GLOF_ASPACE) { 2374 struct address_space *mapping = gfs2_glock2aspace(gl); 2375 2376 nrpages = mapping->nrpages; 2377 } 2378 memset(fs_id_buf, 0, sizeof(fs_id_buf)); 2379 if (fsid && sdp) /* safety precaution */ 2380 sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); 2381 dtime = jiffies - gl->gl_demote_time; 2382 dtime *= 1000000/HZ; /* demote time in uSec */ 2383 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 2384 dtime = 0; 2385 gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d " 2386 "v:%d r:%d m:%ld p:%lu\n", 2387 fs_id_buf, state2str(gl->gl_state), 2388 gl->gl_name.ln_type, 2389 (unsigned long long)gl->gl_name.ln_number, 2390 gflags2str(gflags_buf, gl), 2391 state2str(gl->gl_target), 2392 state2str(gl->gl_demote_state), dtime, 2393 atomic_read(&gl->gl_ail_count), 2394 atomic_read(&gl->gl_revokes), 2395 (int)gl->gl_lockref.count, gl->gl_hold_time, nrpages); 2396 2397 list_for_each_entry(gh, &gl->gl_holders, gh_list) 2398 dump_holder(seq, gh, fs_id_buf); 2399 2400 if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) 2401 glops->go_dump(seq, gl, fs_id_buf); 2402 } 2403 2404 static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) 2405 { 2406 struct gfs2_glock *gl = iter_ptr; 2407 2408 seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n", 2409 gl->gl_name.ln_type, 2410 (unsigned long long)gl->gl_name.ln_number, 2411 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT], 2412 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR], 2413 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB], 2414 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB], 2415 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT], 2416 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR], 2417 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT], 2418 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]); 2419 return 0; 2420 } 2421 2422 static const char *gfs2_gltype[] = { 2423 "type", 2424 "reserved", 2425 "nondisk", 2426 "inode", 2427 "rgrp", 2428 "meta", 2429 "iopen", 2430 "flock", 2431 "plock", 2432 "quota", 2433 "journal", 2434 }; 2435 2436 static const char *gfs2_stype[] = { 2437 [GFS2_LKS_SRTT] = "srtt", 2438 [GFS2_LKS_SRTTVAR] = "srttvar", 2439 [GFS2_LKS_SRTTB] = "srttb", 2440 [GFS2_LKS_SRTTVARB] = "srttvarb", 2441 [GFS2_LKS_SIRT] = "sirt", 2442 [GFS2_LKS_SIRTVAR] = "sirtvar", 2443 [GFS2_LKS_DCOUNT] = "dlm", 2444 [GFS2_LKS_QCOUNT] = "queue", 2445 }; 2446 2447 #define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype)) 2448 2449 static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) 2450 { 2451 struct gfs2_sbd *sdp = seq->private; 2452 loff_t pos = *(loff_t *)iter_ptr; 2453 unsigned index = pos >> 3; 2454 unsigned subindex = pos & 0x07; 2455 int i; 2456 2457 if (index == 0 && subindex != 0) 2458 return 0; 2459 2460 seq_printf(seq, "%-10s %8s:", gfs2_gltype[index], 2461 (index == 0) ? "cpu": gfs2_stype[subindex]); 2462 2463 for_each_possible_cpu(i) { 2464 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i); 2465 2466 if (index == 0) 2467 seq_printf(seq, " %15u", i); 2468 else 2469 seq_printf(seq, " %15llu", (unsigned long long)lkstats-> 2470 lkstats[index - 1].stats[subindex]); 2471 } 2472 seq_putc(seq, '\n'); 2473 return 0; 2474 } 2475 2476 int __init gfs2_glock_init(void) 2477 { 2478 int i, ret; 2479 2480 ret = rhashtable_init(&gl_hash_table, &ht_parms); 2481 if (ret < 0) 2482 return ret; 2483 2484 glock_shrinker = shrinker_alloc(0, "gfs2-glock"); 2485 if (!glock_shrinker) { 2486 rhashtable_destroy(&gl_hash_table); 2487 return -ENOMEM; 2488 } 2489 2490 glock_shrinker->count_objects = gfs2_glock_shrink_count; 2491 glock_shrinker->scan_objects = gfs2_glock_shrink_scan; 2492 2493 shrinker_register(glock_shrinker); 2494 2495 for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++) 2496 init_waitqueue_head(glock_wait_table + i); 2497 2498 return 0; 2499 } 2500 2501 void gfs2_glock_exit(void) 2502 { 2503 shrinker_free(glock_shrinker); 2504 rhashtable_destroy(&gl_hash_table); 2505 } 2506 2507 static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n) 2508 { 2509 struct gfs2_glock *gl = gi->gl; 2510 2511 if (gl) { 2512 if (n == 0) 2513 return; 2514 gfs2_glock_put_async(gl); 2515 } 2516 for (;;) { 2517 gl = rhashtable_walk_next(&gi->hti); 2518 if (IS_ERR_OR_NULL(gl)) { 2519 if (gl == ERR_PTR(-EAGAIN)) { 2520 n = 1; 2521 continue; 2522 } 2523 gl = NULL; 2524 break; 2525 } 2526 if (gl->gl_name.ln_sbd != gi->sdp) 2527 continue; 2528 if (n <= 1) { 2529 if (!lockref_get_not_dead(&gl->gl_lockref)) 2530 continue; 2531 break; 2532 } else { 2533 if (__lockref_is_dead(&gl->gl_lockref)) 2534 continue; 2535 n--; 2536 } 2537 } 2538 gi->gl = gl; 2539 } 2540 2541 static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) 2542 __acquires(RCU) 2543 { 2544 struct gfs2_glock_iter *gi = seq->private; 2545 loff_t n; 2546 2547 /* 2548 * We can either stay where we are, skip to the next hash table 2549 * entry, or start from the beginning. 2550 */ 2551 if (*pos < gi->last_pos) { 2552 rhashtable_walk_exit(&gi->hti); 2553 rhashtable_walk_enter(&gl_hash_table, &gi->hti); 2554 n = *pos + 1; 2555 } else { 2556 n = *pos - gi->last_pos; 2557 } 2558 2559 rhashtable_walk_start(&gi->hti); 2560 2561 gfs2_glock_iter_next(gi, n); 2562 gi->last_pos = *pos; 2563 return gi->gl; 2564 } 2565 2566 static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, 2567 loff_t *pos) 2568 { 2569 struct gfs2_glock_iter *gi = seq->private; 2570 2571 (*pos)++; 2572 gi->last_pos = *pos; 2573 gfs2_glock_iter_next(gi, 1); 2574 return gi->gl; 2575 } 2576 2577 static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) 2578 __releases(RCU) 2579 { 2580 struct gfs2_glock_iter *gi = seq->private; 2581 2582 rhashtable_walk_stop(&gi->hti); 2583 } 2584 2585 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 2586 { 2587 dump_glock(seq, iter_ptr, false); 2588 return 0; 2589 } 2590 2591 static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) 2592 { 2593 preempt_disable(); 2594 if (*pos >= GFS2_NR_SBSTATS) 2595 return NULL; 2596 return pos; 2597 } 2598 2599 static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr, 2600 loff_t *pos) 2601 { 2602 (*pos)++; 2603 if (*pos >= GFS2_NR_SBSTATS) 2604 return NULL; 2605 return pos; 2606 } 2607 2608 static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr) 2609 { 2610 preempt_enable(); 2611 } 2612 2613 static const struct seq_operations gfs2_glock_seq_ops = { 2614 .start = gfs2_glock_seq_start, 2615 .next = gfs2_glock_seq_next, 2616 .stop = gfs2_glock_seq_stop, 2617 .show = gfs2_glock_seq_show, 2618 }; 2619 2620 static const struct seq_operations gfs2_glstats_seq_ops = { 2621 .start = gfs2_glock_seq_start, 2622 .next = gfs2_glock_seq_next, 2623 .stop = gfs2_glock_seq_stop, 2624 .show = gfs2_glstats_seq_show, 2625 }; 2626 2627 static const struct seq_operations gfs2_sbstats_sops = { 2628 .start = gfs2_sbstats_seq_start, 2629 .next = gfs2_sbstats_seq_next, 2630 .stop = gfs2_sbstats_seq_stop, 2631 .show = gfs2_sbstats_seq_show, 2632 }; 2633 2634 #define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL) 2635 2636 static int __gfs2_glocks_open(struct inode *inode, struct file *file, 2637 const struct seq_operations *ops) 2638 { 2639 int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter)); 2640 if (ret == 0) { 2641 struct seq_file *seq = file->private_data; 2642 struct gfs2_glock_iter *gi = seq->private; 2643 2644 gi->sdp = inode->i_private; 2645 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); 2646 if (seq->buf) 2647 seq->size = GFS2_SEQ_GOODSIZE; 2648 /* 2649 * Initially, we are "before" the first hash table entry; the 2650 * first call to rhashtable_walk_next gets us the first entry. 2651 */ 2652 gi->last_pos = -1; 2653 gi->gl = NULL; 2654 rhashtable_walk_enter(&gl_hash_table, &gi->hti); 2655 } 2656 return ret; 2657 } 2658 2659 static int gfs2_glocks_open(struct inode *inode, struct file *file) 2660 { 2661 return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops); 2662 } 2663 2664 static int gfs2_glocks_release(struct inode *inode, struct file *file) 2665 { 2666 struct seq_file *seq = file->private_data; 2667 struct gfs2_glock_iter *gi = seq->private; 2668 2669 if (gi->gl) 2670 gfs2_glock_put(gi->gl); 2671 rhashtable_walk_exit(&gi->hti); 2672 return seq_release_private(inode, file); 2673 } 2674 2675 static int gfs2_glstats_open(struct inode *inode, struct file *file) 2676 { 2677 return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops); 2678 } 2679 2680 static const struct file_operations gfs2_glocks_fops = { 2681 .owner = THIS_MODULE, 2682 .open = gfs2_glocks_open, 2683 .read = seq_read, 2684 .llseek = seq_lseek, 2685 .release = gfs2_glocks_release, 2686 }; 2687 2688 static const struct file_operations gfs2_glstats_fops = { 2689 .owner = THIS_MODULE, 2690 .open = gfs2_glstats_open, 2691 .read = seq_read, 2692 .llseek = seq_lseek, 2693 .release = gfs2_glocks_release, 2694 }; 2695 2696 struct gfs2_glockfd_iter { 2697 struct super_block *sb; 2698 unsigned int tgid; 2699 struct task_struct *task; 2700 unsigned int fd; 2701 struct file *file; 2702 }; 2703 2704 static struct task_struct *gfs2_glockfd_next_task(struct gfs2_glockfd_iter *i) 2705 { 2706 struct pid_namespace *ns = task_active_pid_ns(current); 2707 struct pid *pid; 2708 2709 if (i->task) 2710 put_task_struct(i->task); 2711 2712 rcu_read_lock(); 2713 retry: 2714 i->task = NULL; 2715 pid = find_ge_pid(i->tgid, ns); 2716 if (pid) { 2717 i->tgid = pid_nr_ns(pid, ns); 2718 i->task = pid_task(pid, PIDTYPE_TGID); 2719 if (!i->task) { 2720 i->tgid++; 2721 goto retry; 2722 } 2723 get_task_struct(i->task); 2724 } 2725 rcu_read_unlock(); 2726 return i->task; 2727 } 2728 2729 static struct file *gfs2_glockfd_next_file(struct gfs2_glockfd_iter *i) 2730 { 2731 if (i->file) { 2732 fput(i->file); 2733 i->file = NULL; 2734 } 2735 2736 for(;; i->fd++) { 2737 i->file = fget_task_next(i->task, &i->fd); 2738 if (!i->file) { 2739 i->fd = 0; 2740 break; 2741 } 2742 2743 if (file_inode(i->file)->i_sb == i->sb) 2744 break; 2745 2746 fput(i->file); 2747 } 2748 return i->file; 2749 } 2750 2751 static void *gfs2_glockfd_seq_start(struct seq_file *seq, loff_t *pos) 2752 { 2753 struct gfs2_glockfd_iter *i = seq->private; 2754 2755 if (*pos) 2756 return NULL; 2757 while (gfs2_glockfd_next_task(i)) { 2758 if (gfs2_glockfd_next_file(i)) 2759 return i; 2760 i->tgid++; 2761 } 2762 return NULL; 2763 } 2764 2765 static void *gfs2_glockfd_seq_next(struct seq_file *seq, void *iter_ptr, 2766 loff_t *pos) 2767 { 2768 struct gfs2_glockfd_iter *i = seq->private; 2769 2770 (*pos)++; 2771 i->fd++; 2772 do { 2773 if (gfs2_glockfd_next_file(i)) 2774 return i; 2775 i->tgid++; 2776 } while (gfs2_glockfd_next_task(i)); 2777 return NULL; 2778 } 2779 2780 static void gfs2_glockfd_seq_stop(struct seq_file *seq, void *iter_ptr) 2781 { 2782 struct gfs2_glockfd_iter *i = seq->private; 2783 2784 if (i->file) 2785 fput(i->file); 2786 if (i->task) 2787 put_task_struct(i->task); 2788 } 2789 2790 static void gfs2_glockfd_seq_show_flock(struct seq_file *seq, 2791 struct gfs2_glockfd_iter *i) 2792 { 2793 struct gfs2_file *fp = i->file->private_data; 2794 struct gfs2_holder *fl_gh = &fp->f_fl_gh; 2795 struct lm_lockname gl_name = { .ln_type = LM_TYPE_RESERVED }; 2796 2797 if (!READ_ONCE(fl_gh->gh_gl)) 2798 return; 2799 2800 spin_lock(&i->file->f_lock); 2801 if (gfs2_holder_initialized(fl_gh)) 2802 gl_name = fl_gh->gh_gl->gl_name; 2803 spin_unlock(&i->file->f_lock); 2804 2805 if (gl_name.ln_type != LM_TYPE_RESERVED) { 2806 seq_printf(seq, "%d %u %u/%llx\n", 2807 i->tgid, i->fd, gl_name.ln_type, 2808 (unsigned long long)gl_name.ln_number); 2809 } 2810 } 2811 2812 static int gfs2_glockfd_seq_show(struct seq_file *seq, void *iter_ptr) 2813 { 2814 struct gfs2_glockfd_iter *i = seq->private; 2815 struct inode *inode = file_inode(i->file); 2816 struct gfs2_glock *gl; 2817 2818 inode_lock_shared(inode); 2819 gl = GFS2_I(inode)->i_iopen_gh.gh_gl; 2820 if (gl) { 2821 seq_printf(seq, "%d %u %u/%llx\n", 2822 i->tgid, i->fd, gl->gl_name.ln_type, 2823 (unsigned long long)gl->gl_name.ln_number); 2824 } 2825 gfs2_glockfd_seq_show_flock(seq, i); 2826 inode_unlock_shared(inode); 2827 return 0; 2828 } 2829 2830 static const struct seq_operations gfs2_glockfd_seq_ops = { 2831 .start = gfs2_glockfd_seq_start, 2832 .next = gfs2_glockfd_seq_next, 2833 .stop = gfs2_glockfd_seq_stop, 2834 .show = gfs2_glockfd_seq_show, 2835 }; 2836 2837 static int gfs2_glockfd_open(struct inode *inode, struct file *file) 2838 { 2839 struct gfs2_glockfd_iter *i; 2840 struct gfs2_sbd *sdp = inode->i_private; 2841 2842 i = __seq_open_private(file, &gfs2_glockfd_seq_ops, 2843 sizeof(struct gfs2_glockfd_iter)); 2844 if (!i) 2845 return -ENOMEM; 2846 i->sb = sdp->sd_vfs; 2847 return 0; 2848 } 2849 2850 static const struct file_operations gfs2_glockfd_fops = { 2851 .owner = THIS_MODULE, 2852 .open = gfs2_glockfd_open, 2853 .read = seq_read, 2854 .llseek = seq_lseek, 2855 .release = seq_release_private, 2856 }; 2857 2858 DEFINE_SEQ_ATTRIBUTE(gfs2_sbstats); 2859 2860 void gfs2_create_debugfs_file(struct gfs2_sbd *sdp) 2861 { 2862 sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); 2863 2864 debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2865 &gfs2_glocks_fops); 2866 2867 debugfs_create_file("glockfd", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2868 &gfs2_glockfd_fops); 2869 2870 debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2871 &gfs2_glstats_fops); 2872 2873 debugfs_create_file("sbstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2874 &gfs2_sbstats_fops); 2875 } 2876 2877 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2878 { 2879 debugfs_remove_recursive(sdp->debugfs_dir); 2880 sdp->debugfs_dir = NULL; 2881 } 2882 2883 void gfs2_register_debugfs(void) 2884 { 2885 gfs2_root = debugfs_create_dir("gfs2", NULL); 2886 } 2887 2888 void gfs2_unregister_debugfs(void) 2889 { 2890 debugfs_remove(gfs2_root); 2891 gfs2_root = NULL; 2892 } 2893