1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/spinlock.h> 11 #include <linux/completion.h> 12 #include <linux/buffer_head.h> 13 #include <linux/gfs2_ondisk.h> 14 #include <linux/bio.h> 15 #include <linux/posix_acl.h> 16 17 #include "gfs2.h" 18 #include "incore.h" 19 #include "bmap.h" 20 #include "glock.h" 21 #include "glops.h" 22 #include "inode.h" 23 #include "log.h" 24 #include "meta_io.h" 25 #include "recovery.h" 26 #include "rgrp.h" 27 #include "util.h" 28 #include "trans.h" 29 #include "dir.h" 30 31 static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) 32 { 33 fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n", 34 bh, (unsigned long long)bh->b_blocknr, bh->b_state, 35 bh->b_page->mapping, bh->b_page->flags); 36 fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n", 37 gl->gl_name.ln_type, gl->gl_name.ln_number, 38 gfs2_glock2aspace(gl)); 39 gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n"); 40 } 41 42 /** 43 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL 44 * @gl: the glock 45 * @fsync: set when called from fsync (not all buffers will be clean) 46 * 47 * None of the buffers should be dirty, locked, or pinned. 48 */ 49 50 static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) 51 { 52 struct gfs2_sbd *sdp = gl->gl_sbd; 53 struct list_head *head = &gl->gl_ail_list; 54 struct gfs2_bufdata *bd, *tmp; 55 struct buffer_head *bh; 56 const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); 57 sector_t blocknr; 58 59 gfs2_log_lock(sdp); 60 spin_lock(&sdp->sd_ail_lock); 61 list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) { 62 bh = bd->bd_bh; 63 if (bh->b_state & b_state) { 64 if (fsync) 65 continue; 66 gfs2_ail_error(gl, bh); 67 } 68 blocknr = bh->b_blocknr; 69 bh->b_private = NULL; 70 gfs2_remove_from_ail(bd); /* drops ref on bh */ 71 72 bd->bd_bh = NULL; 73 bd->bd_blkno = blocknr; 74 75 gfs2_trans_add_revoke(sdp, bd); 76 } 77 GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); 78 spin_unlock(&sdp->sd_ail_lock); 79 gfs2_log_unlock(sdp); 80 } 81 82 83 static void gfs2_ail_empty_gl(struct gfs2_glock *gl) 84 { 85 struct gfs2_sbd *sdp = gl->gl_sbd; 86 struct gfs2_trans tr; 87 88 memset(&tr, 0, sizeof(tr)); 89 tr.tr_revokes = atomic_read(&gl->gl_ail_count); 90 91 if (!tr.tr_revokes) 92 return; 93 94 /* A shortened, inline version of gfs2_trans_begin() */ 95 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); 96 tr.tr_ip = (unsigned long)__builtin_return_address(0); 97 sb_start_intwrite(sdp->sd_vfs); 98 gfs2_log_reserve(sdp, tr.tr_reserved); 99 WARN_ON_ONCE(current->journal_info); 100 current->journal_info = &tr; 101 102 __gfs2_ail_flush(gl, 0); 103 104 gfs2_trans_end(sdp); 105 gfs2_log_flush(sdp, NULL); 106 } 107 108 void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) 109 { 110 struct gfs2_sbd *sdp = gl->gl_sbd; 111 unsigned int revokes = atomic_read(&gl->gl_ail_count); 112 int ret; 113 114 if (!revokes) 115 return; 116 117 ret = gfs2_trans_begin(sdp, 0, revokes); 118 if (ret) 119 return; 120 __gfs2_ail_flush(gl, fsync); 121 gfs2_trans_end(sdp); 122 gfs2_log_flush(sdp, NULL); 123 } 124 125 /** 126 * rgrp_go_sync - sync out the metadata for this glock 127 * @gl: the glock 128 * 129 * Called when demoting or unlocking an EX glock. We must flush 130 * to disk all dirty buffers/pages relating to this glock, and must not 131 * not return to caller to demote/unlock the glock until I/O is complete. 132 */ 133 134 static void rgrp_go_sync(struct gfs2_glock *gl) 135 { 136 struct address_space *metamapping = gfs2_glock2aspace(gl); 137 struct gfs2_rgrpd *rgd; 138 int error; 139 140 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 141 return; 142 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 143 144 gfs2_log_flush(gl->gl_sbd, gl); 145 filemap_fdatawrite(metamapping); 146 error = filemap_fdatawait(metamapping); 147 mapping_set_error(metamapping, error); 148 gfs2_ail_empty_gl(gl); 149 150 spin_lock(&gl->gl_spin); 151 rgd = gl->gl_object; 152 if (rgd) 153 gfs2_free_clones(rgd); 154 spin_unlock(&gl->gl_spin); 155 } 156 157 /** 158 * rgrp_go_inval - invalidate the metadata for this glock 159 * @gl: the glock 160 * @flags: 161 * 162 * We never used LM_ST_DEFERRED with resource groups, so that we 163 * should always see the metadata flag set here. 164 * 165 */ 166 167 static void rgrp_go_inval(struct gfs2_glock *gl, int flags) 168 { 169 struct address_space *mapping = gfs2_glock2aspace(gl); 170 171 WARN_ON_ONCE(!(flags & DIO_METADATA)); 172 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); 173 truncate_inode_pages(mapping, 0); 174 175 if (gl->gl_object) { 176 struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; 177 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 178 } 179 } 180 181 /** 182 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock 183 * @gl: the glock protecting the inode 184 * 185 */ 186 187 static void inode_go_sync(struct gfs2_glock *gl) 188 { 189 struct gfs2_inode *ip = gl->gl_object; 190 struct address_space *metamapping = gfs2_glock2aspace(gl); 191 int error; 192 193 if (ip && !S_ISREG(ip->i_inode.i_mode)) 194 ip = NULL; 195 if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) 196 unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); 197 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 198 return; 199 200 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 201 202 gfs2_log_flush(gl->gl_sbd, gl); 203 filemap_fdatawrite(metamapping); 204 if (ip) { 205 struct address_space *mapping = ip->i_inode.i_mapping; 206 filemap_fdatawrite(mapping); 207 error = filemap_fdatawait(mapping); 208 mapping_set_error(mapping, error); 209 } 210 error = filemap_fdatawait(metamapping); 211 mapping_set_error(metamapping, error); 212 gfs2_ail_empty_gl(gl); 213 /* 214 * Writeback of the data mapping may cause the dirty flag to be set 215 * so we have to clear it again here. 216 */ 217 smp_mb__before_clear_bit(); 218 clear_bit(GLF_DIRTY, &gl->gl_flags); 219 } 220 221 /** 222 * inode_go_inval - prepare a inode glock to be released 223 * @gl: the glock 224 * @flags: 225 * 226 * Normally we invlidate everything, but if we are moving into 227 * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we 228 * can keep hold of the metadata, since it won't have changed. 229 * 230 */ 231 232 static void inode_go_inval(struct gfs2_glock *gl, int flags) 233 { 234 struct gfs2_inode *ip = gl->gl_object; 235 236 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); 237 238 if (flags & DIO_METADATA) { 239 struct address_space *mapping = gfs2_glock2aspace(gl); 240 truncate_inode_pages(mapping, 0); 241 if (ip) { 242 set_bit(GIF_INVALID, &ip->i_flags); 243 forget_all_cached_acls(&ip->i_inode); 244 gfs2_dir_hash_inval(ip); 245 } 246 } 247 248 if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) { 249 gfs2_log_flush(gl->gl_sbd, NULL); 250 gl->gl_sbd->sd_rindex_uptodate = 0; 251 } 252 if (ip && S_ISREG(ip->i_inode.i_mode)) 253 truncate_inode_pages(ip->i_inode.i_mapping, 0); 254 } 255 256 /** 257 * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock 258 * @gl: the glock 259 * 260 * Returns: 1 if it's ok 261 */ 262 263 static int inode_go_demote_ok(const struct gfs2_glock *gl) 264 { 265 struct gfs2_sbd *sdp = gl->gl_sbd; 266 struct gfs2_holder *gh; 267 268 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) 269 return 0; 270 271 if (!list_empty(&gl->gl_holders)) { 272 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); 273 if (gh->gh_list.next != &gl->gl_holders) 274 return 0; 275 } 276 277 return 1; 278 } 279 280 /** 281 * gfs2_set_nlink - Set the inode's link count based on on-disk info 282 * @inode: The inode in question 283 * @nlink: The link count 284 * 285 * If the link count has hit zero, it must never be raised, whatever the 286 * on-disk inode might say. When new struct inodes are created the link 287 * count is set to 1, so that we can safely use this test even when reading 288 * in on disk information for the first time. 289 */ 290 291 static void gfs2_set_nlink(struct inode *inode, u32 nlink) 292 { 293 /* 294 * We will need to review setting the nlink count here in the 295 * light of the forthcoming ro bind mount work. This is a reminder 296 * to do that. 297 */ 298 if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) { 299 if (nlink == 0) 300 clear_nlink(inode); 301 else 302 set_nlink(inode, nlink); 303 } 304 } 305 306 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 307 { 308 const struct gfs2_dinode *str = buf; 309 struct timespec atime; 310 u16 height, depth; 311 312 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) 313 goto corrupt; 314 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); 315 ip->i_inode.i_mode = be32_to_cpu(str->di_mode); 316 ip->i_inode.i_rdev = 0; 317 switch (ip->i_inode.i_mode & S_IFMT) { 318 case S_IFBLK: 319 case S_IFCHR: 320 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), 321 be32_to_cpu(str->di_minor)); 322 break; 323 }; 324 325 i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid)); 326 i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid)); 327 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); 328 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); 329 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 330 atime.tv_sec = be64_to_cpu(str->di_atime); 331 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 332 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) 333 ip->i_inode.i_atime = atime; 334 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 335 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); 336 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 337 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); 338 339 ip->i_goal = be64_to_cpu(str->di_goal_meta); 340 ip->i_generation = be64_to_cpu(str->di_generation); 341 342 ip->i_diskflags = be32_to_cpu(str->di_flags); 343 ip->i_eattr = be64_to_cpu(str->di_eattr); 344 /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ 345 gfs2_set_inode_flags(&ip->i_inode); 346 height = be16_to_cpu(str->di_height); 347 if (unlikely(height > GFS2_MAX_META_HEIGHT)) 348 goto corrupt; 349 ip->i_height = (u8)height; 350 351 depth = be16_to_cpu(str->di_depth); 352 if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) 353 goto corrupt; 354 ip->i_depth = (u8)depth; 355 ip->i_entries = be32_to_cpu(str->di_entries); 356 357 if (S_ISREG(ip->i_inode.i_mode)) 358 gfs2_set_aops(&ip->i_inode); 359 360 return 0; 361 corrupt: 362 gfs2_consist_inode(ip); 363 return -EIO; 364 } 365 366 /** 367 * gfs2_inode_refresh - Refresh the incore copy of the dinode 368 * @ip: The GFS2 inode 369 * 370 * Returns: errno 371 */ 372 373 int gfs2_inode_refresh(struct gfs2_inode *ip) 374 { 375 struct buffer_head *dibh; 376 int error; 377 378 error = gfs2_meta_inode_buffer(ip, &dibh); 379 if (error) 380 return error; 381 382 error = gfs2_dinode_in(ip, dibh->b_data); 383 brelse(dibh); 384 clear_bit(GIF_INVALID, &ip->i_flags); 385 386 return error; 387 } 388 389 /** 390 * inode_go_lock - operation done after an inode lock is locked by a process 391 * @gl: the glock 392 * @flags: 393 * 394 * Returns: errno 395 */ 396 397 static int inode_go_lock(struct gfs2_holder *gh) 398 { 399 struct gfs2_glock *gl = gh->gh_gl; 400 struct gfs2_sbd *sdp = gl->gl_sbd; 401 struct gfs2_inode *ip = gl->gl_object; 402 int error = 0; 403 404 if (!ip || (gh->gh_flags & GL_SKIP)) 405 return 0; 406 407 if (test_bit(GIF_INVALID, &ip->i_flags)) { 408 error = gfs2_inode_refresh(ip); 409 if (error) 410 return error; 411 } 412 413 if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && 414 (gl->gl_state == LM_ST_EXCLUSIVE) && 415 (gh->gh_state == LM_ST_EXCLUSIVE)) { 416 spin_lock(&sdp->sd_trunc_lock); 417 if (list_empty(&ip->i_trunc_list)) 418 list_add(&sdp->sd_trunc_list, &ip->i_trunc_list); 419 spin_unlock(&sdp->sd_trunc_lock); 420 wake_up(&sdp->sd_quota_wait); 421 return 1; 422 } 423 424 return error; 425 } 426 427 /** 428 * inode_go_dump - print information about an inode 429 * @seq: The iterator 430 * @ip: the inode 431 * 432 * Returns: 0 on success, -ENOBUFS when we run out of space 433 */ 434 435 static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) 436 { 437 const struct gfs2_inode *ip = gl->gl_object; 438 if (ip == NULL) 439 return 0; 440 gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", 441 (unsigned long long)ip->i_no_formal_ino, 442 (unsigned long long)ip->i_no_addr, 443 IF2DT(ip->i_inode.i_mode), ip->i_flags, 444 (unsigned int)ip->i_diskflags, 445 (unsigned long long)i_size_read(&ip->i_inode)); 446 return 0; 447 } 448 449 /** 450 * trans_go_sync - promote/demote the transaction glock 451 * @gl: the glock 452 * @state: the requested state 453 * @flags: 454 * 455 */ 456 457 static void trans_go_sync(struct gfs2_glock *gl) 458 { 459 struct gfs2_sbd *sdp = gl->gl_sbd; 460 461 if (gl->gl_state != LM_ST_UNLOCKED && 462 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 463 gfs2_meta_syncfs(sdp); 464 gfs2_log_shutdown(sdp); 465 } 466 } 467 468 /** 469 * trans_go_xmote_bh - After promoting/demoting the transaction glock 470 * @gl: the glock 471 * 472 */ 473 474 static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) 475 { 476 struct gfs2_sbd *sdp = gl->gl_sbd; 477 struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); 478 struct gfs2_glock *j_gl = ip->i_gl; 479 struct gfs2_log_header_host head; 480 int error; 481 482 if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 483 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); 484 485 error = gfs2_find_jhead(sdp->sd_jdesc, &head); 486 if (error) 487 gfs2_consist(sdp); 488 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) 489 gfs2_consist(sdp); 490 491 /* Initialize some head of the log stuff */ 492 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { 493 sdp->sd_log_sequence = head.lh_sequence + 1; 494 gfs2_log_pointers_init(sdp, head.lh_blkno); 495 } 496 } 497 return 0; 498 } 499 500 /** 501 * trans_go_demote_ok 502 * @gl: the glock 503 * 504 * Always returns 0 505 */ 506 507 static int trans_go_demote_ok(const struct gfs2_glock *gl) 508 { 509 return 0; 510 } 511 512 /** 513 * iopen_go_callback - schedule the dcache entry for the inode to be deleted 514 * @gl: the glock 515 * 516 * gl_spin lock is held while calling this 517 */ 518 static void iopen_go_callback(struct gfs2_glock *gl, bool remote) 519 { 520 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; 521 struct gfs2_sbd *sdp = gl->gl_sbd; 522 523 if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY)) 524 return; 525 526 if (gl->gl_demote_state == LM_ST_UNLOCKED && 527 gl->gl_state == LM_ST_SHARED && ip) { 528 gfs2_glock_hold(gl); 529 if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 530 gfs2_glock_put_nolock(gl); 531 } 532 } 533 534 const struct gfs2_glock_operations gfs2_meta_glops = { 535 .go_type = LM_TYPE_META, 536 }; 537 538 const struct gfs2_glock_operations gfs2_inode_glops = { 539 .go_sync = inode_go_sync, 540 .go_inval = inode_go_inval, 541 .go_demote_ok = inode_go_demote_ok, 542 .go_lock = inode_go_lock, 543 .go_dump = inode_go_dump, 544 .go_type = LM_TYPE_INODE, 545 .go_flags = GLOF_ASPACE, 546 }; 547 548 const struct gfs2_glock_operations gfs2_rgrp_glops = { 549 .go_sync = rgrp_go_sync, 550 .go_inval = rgrp_go_inval, 551 .go_lock = gfs2_rgrp_go_lock, 552 .go_unlock = gfs2_rgrp_go_unlock, 553 .go_dump = gfs2_rgrp_dump, 554 .go_type = LM_TYPE_RGRP, 555 .go_flags = GLOF_ASPACE | GLOF_LVB, 556 }; 557 558 const struct gfs2_glock_operations gfs2_trans_glops = { 559 .go_sync = trans_go_sync, 560 .go_xmote_bh = trans_go_xmote_bh, 561 .go_demote_ok = trans_go_demote_ok, 562 .go_type = LM_TYPE_NONDISK, 563 }; 564 565 const struct gfs2_glock_operations gfs2_iopen_glops = { 566 .go_type = LM_TYPE_IOPEN, 567 .go_callback = iopen_go_callback, 568 }; 569 570 const struct gfs2_glock_operations gfs2_flock_glops = { 571 .go_type = LM_TYPE_FLOCK, 572 }; 573 574 const struct gfs2_glock_operations gfs2_nondisk_glops = { 575 .go_type = LM_TYPE_NONDISK, 576 }; 577 578 const struct gfs2_glock_operations gfs2_quota_glops = { 579 .go_type = LM_TYPE_QUOTA, 580 .go_flags = GLOF_LVB, 581 }; 582 583 const struct gfs2_glock_operations gfs2_journal_glops = { 584 .go_type = LM_TYPE_JOURNAL, 585 }; 586 587 const struct gfs2_glock_operations *gfs2_glops_list[] = { 588 [LM_TYPE_META] = &gfs2_meta_glops, 589 [LM_TYPE_INODE] = &gfs2_inode_glops, 590 [LM_TYPE_RGRP] = &gfs2_rgrp_glops, 591 [LM_TYPE_IOPEN] = &gfs2_iopen_glops, 592 [LM_TYPE_FLOCK] = &gfs2_flock_glops, 593 [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, 594 [LM_TYPE_QUOTA] = &gfs2_quota_glops, 595 [LM_TYPE_JOURNAL] = &gfs2_journal_glops, 596 }; 597 598