1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/spinlock.h> 11 #include <linux/completion.h> 12 #include <linux/buffer_head.h> 13 #include <linux/gfs2_ondisk.h> 14 #include <linux/bio.h> 15 #include <linux/posix_acl.h> 16 17 #include "gfs2.h" 18 #include "incore.h" 19 #include "bmap.h" 20 #include "glock.h" 21 #include "glops.h" 22 #include "inode.h" 23 #include "log.h" 24 #include "meta_io.h" 25 #include "recovery.h" 26 #include "rgrp.h" 27 #include "util.h" 28 #include "trans.h" 29 #include "dir.h" 30 31 static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) 32 { 33 fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n", 34 bh, (unsigned long long)bh->b_blocknr, bh->b_state, 35 bh->b_page->mapping, bh->b_page->flags); 36 fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n", 37 gl->gl_name.ln_type, gl->gl_name.ln_number, 38 gfs2_glock2aspace(gl)); 39 gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n"); 40 } 41 42 /** 43 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL 44 * @gl: the glock 45 * @fsync: set when called from fsync (not all buffers will be clean) 46 * 47 * None of the buffers should be dirty, locked, or pinned. 48 */ 49 50 static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) 51 { 52 struct gfs2_sbd *sdp = gl->gl_sbd; 53 struct list_head *head = &gl->gl_ail_list; 54 struct gfs2_bufdata *bd, *tmp; 55 struct buffer_head *bh; 56 const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); 57 58 gfs2_log_lock(sdp); 59 spin_lock(&sdp->sd_ail_lock); 60 list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) { 61 bh = bd->bd_bh; 62 if (bh->b_state & b_state) { 63 if (fsync) 64 continue; 65 gfs2_ail_error(gl, bh); 66 } 67 gfs2_trans_add_revoke(sdp, bd); 68 } 69 GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); 70 spin_unlock(&sdp->sd_ail_lock); 71 gfs2_log_unlock(sdp); 72 } 73 74 75 static void gfs2_ail_empty_gl(struct gfs2_glock *gl) 76 { 77 struct gfs2_sbd *sdp = gl->gl_sbd; 78 struct gfs2_trans tr; 79 80 memset(&tr, 0, sizeof(tr)); 81 tr.tr_revokes = atomic_read(&gl->gl_ail_count); 82 83 if (!tr.tr_revokes) 84 return; 85 86 /* A shortened, inline version of gfs2_trans_begin() */ 87 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); 88 tr.tr_ip = (unsigned long)__builtin_return_address(0); 89 sb_start_intwrite(sdp->sd_vfs); 90 gfs2_log_reserve(sdp, tr.tr_reserved); 91 WARN_ON_ONCE(current->journal_info); 92 current->journal_info = &tr; 93 94 __gfs2_ail_flush(gl, 0); 95 96 gfs2_trans_end(sdp); 97 gfs2_log_flush(sdp, NULL); 98 } 99 100 void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) 101 { 102 struct gfs2_sbd *sdp = gl->gl_sbd; 103 unsigned int revokes = atomic_read(&gl->gl_ail_count); 104 int ret; 105 106 if (!revokes) 107 return; 108 109 ret = gfs2_trans_begin(sdp, 0, revokes); 110 if (ret) 111 return; 112 __gfs2_ail_flush(gl, fsync); 113 gfs2_trans_end(sdp); 114 gfs2_log_flush(sdp, NULL); 115 } 116 117 /** 118 * rgrp_go_sync - sync out the metadata for this glock 119 * @gl: the glock 120 * 121 * Called when demoting or unlocking an EX glock. We must flush 122 * to disk all dirty buffers/pages relating to this glock, and must not 123 * not return to caller to demote/unlock the glock until I/O is complete. 124 */ 125 126 static void rgrp_go_sync(struct gfs2_glock *gl) 127 { 128 struct address_space *metamapping = gfs2_glock2aspace(gl); 129 struct gfs2_rgrpd *rgd; 130 int error; 131 132 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 133 return; 134 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 135 136 gfs2_log_flush(gl->gl_sbd, gl); 137 filemap_fdatawrite(metamapping); 138 error = filemap_fdatawait(metamapping); 139 mapping_set_error(metamapping, error); 140 gfs2_ail_empty_gl(gl); 141 142 spin_lock(&gl->gl_spin); 143 rgd = gl->gl_object; 144 if (rgd) 145 gfs2_free_clones(rgd); 146 spin_unlock(&gl->gl_spin); 147 } 148 149 /** 150 * rgrp_go_inval - invalidate the metadata for this glock 151 * @gl: the glock 152 * @flags: 153 * 154 * We never used LM_ST_DEFERRED with resource groups, so that we 155 * should always see the metadata flag set here. 156 * 157 */ 158 159 static void rgrp_go_inval(struct gfs2_glock *gl, int flags) 160 { 161 struct address_space *mapping = gfs2_glock2aspace(gl); 162 163 WARN_ON_ONCE(!(flags & DIO_METADATA)); 164 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); 165 truncate_inode_pages(mapping, 0); 166 167 if (gl->gl_object) { 168 struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; 169 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 170 } 171 } 172 173 /** 174 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock 175 * @gl: the glock protecting the inode 176 * 177 */ 178 179 static void inode_go_sync(struct gfs2_glock *gl) 180 { 181 struct gfs2_inode *ip = gl->gl_object; 182 struct address_space *metamapping = gfs2_glock2aspace(gl); 183 int error; 184 185 if (ip && !S_ISREG(ip->i_inode.i_mode)) 186 ip = NULL; 187 if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) 188 unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); 189 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 190 return; 191 192 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 193 194 gfs2_log_flush(gl->gl_sbd, gl); 195 filemap_fdatawrite(metamapping); 196 if (ip) { 197 struct address_space *mapping = ip->i_inode.i_mapping; 198 filemap_fdatawrite(mapping); 199 error = filemap_fdatawait(mapping); 200 mapping_set_error(mapping, error); 201 } 202 error = filemap_fdatawait(metamapping); 203 mapping_set_error(metamapping, error); 204 gfs2_ail_empty_gl(gl); 205 /* 206 * Writeback of the data mapping may cause the dirty flag to be set 207 * so we have to clear it again here. 208 */ 209 smp_mb__before_clear_bit(); 210 clear_bit(GLF_DIRTY, &gl->gl_flags); 211 } 212 213 /** 214 * inode_go_inval - prepare a inode glock to be released 215 * @gl: the glock 216 * @flags: 217 * 218 * Normally we invlidate everything, but if we are moving into 219 * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we 220 * can keep hold of the metadata, since it won't have changed. 221 * 222 */ 223 224 static void inode_go_inval(struct gfs2_glock *gl, int flags) 225 { 226 struct gfs2_inode *ip = gl->gl_object; 227 228 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); 229 230 if (flags & DIO_METADATA) { 231 struct address_space *mapping = gfs2_glock2aspace(gl); 232 truncate_inode_pages(mapping, 0); 233 if (ip) { 234 set_bit(GIF_INVALID, &ip->i_flags); 235 forget_all_cached_acls(&ip->i_inode); 236 gfs2_dir_hash_inval(ip); 237 } 238 } 239 240 if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) { 241 gfs2_log_flush(gl->gl_sbd, NULL); 242 gl->gl_sbd->sd_rindex_uptodate = 0; 243 } 244 if (ip && S_ISREG(ip->i_inode.i_mode)) 245 truncate_inode_pages(ip->i_inode.i_mapping, 0); 246 } 247 248 /** 249 * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock 250 * @gl: the glock 251 * 252 * Returns: 1 if it's ok 253 */ 254 255 static int inode_go_demote_ok(const struct gfs2_glock *gl) 256 { 257 struct gfs2_sbd *sdp = gl->gl_sbd; 258 struct gfs2_holder *gh; 259 260 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) 261 return 0; 262 263 if (!list_empty(&gl->gl_holders)) { 264 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); 265 if (gh->gh_list.next != &gl->gl_holders) 266 return 0; 267 } 268 269 return 1; 270 } 271 272 /** 273 * gfs2_set_nlink - Set the inode's link count based on on-disk info 274 * @inode: The inode in question 275 * @nlink: The link count 276 * 277 * If the link count has hit zero, it must never be raised, whatever the 278 * on-disk inode might say. When new struct inodes are created the link 279 * count is set to 1, so that we can safely use this test even when reading 280 * in on disk information for the first time. 281 */ 282 283 static void gfs2_set_nlink(struct inode *inode, u32 nlink) 284 { 285 /* 286 * We will need to review setting the nlink count here in the 287 * light of the forthcoming ro bind mount work. This is a reminder 288 * to do that. 289 */ 290 if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) { 291 if (nlink == 0) 292 clear_nlink(inode); 293 else 294 set_nlink(inode, nlink); 295 } 296 } 297 298 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 299 { 300 const struct gfs2_dinode *str = buf; 301 struct timespec atime; 302 u16 height, depth; 303 304 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) 305 goto corrupt; 306 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); 307 ip->i_inode.i_mode = be32_to_cpu(str->di_mode); 308 ip->i_inode.i_rdev = 0; 309 switch (ip->i_inode.i_mode & S_IFMT) { 310 case S_IFBLK: 311 case S_IFCHR: 312 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), 313 be32_to_cpu(str->di_minor)); 314 break; 315 }; 316 317 i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid)); 318 i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid)); 319 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); 320 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); 321 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 322 atime.tv_sec = be64_to_cpu(str->di_atime); 323 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 324 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) 325 ip->i_inode.i_atime = atime; 326 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 327 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); 328 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 329 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); 330 331 ip->i_goal = be64_to_cpu(str->di_goal_meta); 332 ip->i_generation = be64_to_cpu(str->di_generation); 333 334 ip->i_diskflags = be32_to_cpu(str->di_flags); 335 ip->i_eattr = be64_to_cpu(str->di_eattr); 336 /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ 337 gfs2_set_inode_flags(&ip->i_inode); 338 height = be16_to_cpu(str->di_height); 339 if (unlikely(height > GFS2_MAX_META_HEIGHT)) 340 goto corrupt; 341 ip->i_height = (u8)height; 342 343 depth = be16_to_cpu(str->di_depth); 344 if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) 345 goto corrupt; 346 ip->i_depth = (u8)depth; 347 ip->i_entries = be32_to_cpu(str->di_entries); 348 349 if (S_ISREG(ip->i_inode.i_mode)) 350 gfs2_set_aops(&ip->i_inode); 351 352 return 0; 353 corrupt: 354 gfs2_consist_inode(ip); 355 return -EIO; 356 } 357 358 /** 359 * gfs2_inode_refresh - Refresh the incore copy of the dinode 360 * @ip: The GFS2 inode 361 * 362 * Returns: errno 363 */ 364 365 int gfs2_inode_refresh(struct gfs2_inode *ip) 366 { 367 struct buffer_head *dibh; 368 int error; 369 370 error = gfs2_meta_inode_buffer(ip, &dibh); 371 if (error) 372 return error; 373 374 error = gfs2_dinode_in(ip, dibh->b_data); 375 brelse(dibh); 376 clear_bit(GIF_INVALID, &ip->i_flags); 377 378 return error; 379 } 380 381 /** 382 * inode_go_lock - operation done after an inode lock is locked by a process 383 * @gl: the glock 384 * @flags: 385 * 386 * Returns: errno 387 */ 388 389 static int inode_go_lock(struct gfs2_holder *gh) 390 { 391 struct gfs2_glock *gl = gh->gh_gl; 392 struct gfs2_sbd *sdp = gl->gl_sbd; 393 struct gfs2_inode *ip = gl->gl_object; 394 int error = 0; 395 396 if (!ip || (gh->gh_flags & GL_SKIP)) 397 return 0; 398 399 if (test_bit(GIF_INVALID, &ip->i_flags)) { 400 error = gfs2_inode_refresh(ip); 401 if (error) 402 return error; 403 } 404 405 if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && 406 (gl->gl_state == LM_ST_EXCLUSIVE) && 407 (gh->gh_state == LM_ST_EXCLUSIVE)) { 408 spin_lock(&sdp->sd_trunc_lock); 409 if (list_empty(&ip->i_trunc_list)) 410 list_add(&sdp->sd_trunc_list, &ip->i_trunc_list); 411 spin_unlock(&sdp->sd_trunc_lock); 412 wake_up(&sdp->sd_quota_wait); 413 return 1; 414 } 415 416 return error; 417 } 418 419 /** 420 * inode_go_dump - print information about an inode 421 * @seq: The iterator 422 * @ip: the inode 423 * 424 * Returns: 0 on success, -ENOBUFS when we run out of space 425 */ 426 427 static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) 428 { 429 const struct gfs2_inode *ip = gl->gl_object; 430 if (ip == NULL) 431 return 0; 432 gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", 433 (unsigned long long)ip->i_no_formal_ino, 434 (unsigned long long)ip->i_no_addr, 435 IF2DT(ip->i_inode.i_mode), ip->i_flags, 436 (unsigned int)ip->i_diskflags, 437 (unsigned long long)i_size_read(&ip->i_inode)); 438 return 0; 439 } 440 441 /** 442 * trans_go_sync - promote/demote the transaction glock 443 * @gl: the glock 444 * @state: the requested state 445 * @flags: 446 * 447 */ 448 449 static void trans_go_sync(struct gfs2_glock *gl) 450 { 451 struct gfs2_sbd *sdp = gl->gl_sbd; 452 453 if (gl->gl_state != LM_ST_UNLOCKED && 454 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 455 gfs2_meta_syncfs(sdp); 456 gfs2_log_shutdown(sdp); 457 } 458 } 459 460 /** 461 * trans_go_xmote_bh - After promoting/demoting the transaction glock 462 * @gl: the glock 463 * 464 */ 465 466 static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) 467 { 468 struct gfs2_sbd *sdp = gl->gl_sbd; 469 struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); 470 struct gfs2_glock *j_gl = ip->i_gl; 471 struct gfs2_log_header_host head; 472 int error; 473 474 if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 475 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); 476 477 error = gfs2_find_jhead(sdp->sd_jdesc, &head); 478 if (error) 479 gfs2_consist(sdp); 480 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) 481 gfs2_consist(sdp); 482 483 /* Initialize some head of the log stuff */ 484 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { 485 sdp->sd_log_sequence = head.lh_sequence + 1; 486 gfs2_log_pointers_init(sdp, head.lh_blkno); 487 } 488 } 489 return 0; 490 } 491 492 /** 493 * trans_go_demote_ok 494 * @gl: the glock 495 * 496 * Always returns 0 497 */ 498 499 static int trans_go_demote_ok(const struct gfs2_glock *gl) 500 { 501 return 0; 502 } 503 504 /** 505 * iopen_go_callback - schedule the dcache entry for the inode to be deleted 506 * @gl: the glock 507 * 508 * gl_spin lock is held while calling this 509 */ 510 static void iopen_go_callback(struct gfs2_glock *gl, bool remote) 511 { 512 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; 513 struct gfs2_sbd *sdp = gl->gl_sbd; 514 515 if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY)) 516 return; 517 518 if (gl->gl_demote_state == LM_ST_UNLOCKED && 519 gl->gl_state == LM_ST_SHARED && ip) { 520 gfs2_glock_hold(gl); 521 if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 522 gfs2_glock_put_nolock(gl); 523 } 524 } 525 526 const struct gfs2_glock_operations gfs2_meta_glops = { 527 .go_type = LM_TYPE_META, 528 }; 529 530 const struct gfs2_glock_operations gfs2_inode_glops = { 531 .go_sync = inode_go_sync, 532 .go_inval = inode_go_inval, 533 .go_demote_ok = inode_go_demote_ok, 534 .go_lock = inode_go_lock, 535 .go_dump = inode_go_dump, 536 .go_type = LM_TYPE_INODE, 537 .go_flags = GLOF_ASPACE, 538 }; 539 540 const struct gfs2_glock_operations gfs2_rgrp_glops = { 541 .go_sync = rgrp_go_sync, 542 .go_inval = rgrp_go_inval, 543 .go_lock = gfs2_rgrp_go_lock, 544 .go_unlock = gfs2_rgrp_go_unlock, 545 .go_dump = gfs2_rgrp_dump, 546 .go_type = LM_TYPE_RGRP, 547 .go_flags = GLOF_ASPACE | GLOF_LVB, 548 }; 549 550 const struct gfs2_glock_operations gfs2_trans_glops = { 551 .go_sync = trans_go_sync, 552 .go_xmote_bh = trans_go_xmote_bh, 553 .go_demote_ok = trans_go_demote_ok, 554 .go_type = LM_TYPE_NONDISK, 555 }; 556 557 const struct gfs2_glock_operations gfs2_iopen_glops = { 558 .go_type = LM_TYPE_IOPEN, 559 .go_callback = iopen_go_callback, 560 }; 561 562 const struct gfs2_glock_operations gfs2_flock_glops = { 563 .go_type = LM_TYPE_FLOCK, 564 }; 565 566 const struct gfs2_glock_operations gfs2_nondisk_glops = { 567 .go_type = LM_TYPE_NONDISK, 568 }; 569 570 const struct gfs2_glock_operations gfs2_quota_glops = { 571 .go_type = LM_TYPE_QUOTA, 572 .go_flags = GLOF_LVB, 573 }; 574 575 const struct gfs2_glock_operations gfs2_journal_glops = { 576 .go_type = LM_TYPE_JOURNAL, 577 }; 578 579 const struct gfs2_glock_operations *gfs2_glops_list[] = { 580 [LM_TYPE_META] = &gfs2_meta_glops, 581 [LM_TYPE_INODE] = &gfs2_inode_glops, 582 [LM_TYPE_RGRP] = &gfs2_rgrp_glops, 583 [LM_TYPE_IOPEN] = &gfs2_iopen_glops, 584 [LM_TYPE_FLOCK] = &gfs2_flock_glops, 585 [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, 586 [LM_TYPE_QUOTA] = &gfs2_quota_glops, 587 [LM_TYPE_JOURNAL] = &gfs2_journal_glops, 588 }; 589 590