1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * inode.c 5 * 6 * vfs' aops, fops, dops and iops 7 * 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public 21 * License along with this program; if not, write to the 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23 * Boston, MA 021110-1307, USA. 24 */ 25 26 #include <linux/fs.h> 27 #include <linux/types.h> 28 #include <linux/highmem.h> 29 #include <linux/pagemap.h> 30 #include <linux/quotaops.h> 31 #include <linux/iversion.h> 32 33 #include <asm/byteorder.h> 34 35 #include <cluster/masklog.h> 36 37 #include "ocfs2.h" 38 39 #include "alloc.h" 40 #include "dir.h" 41 #include "blockcheck.h" 42 #include "dlmglue.h" 43 #include "extent_map.h" 44 #include "file.h" 45 #include "heartbeat.h" 46 #include "inode.h" 47 #include "journal.h" 48 #include "namei.h" 49 #include "suballoc.h" 50 #include "super.h" 51 #include "symlink.h" 52 #include "sysfile.h" 53 #include "uptodate.h" 54 #include "xattr.h" 55 #include "refcounttree.h" 56 #include "ocfs2_trace.h" 57 #include "filecheck.h" 58 59 #include "buffer_head_io.h" 60 61 struct ocfs2_find_inode_args 62 { 63 u64 fi_blkno; 64 unsigned long fi_ino; 65 unsigned int fi_flags; 66 unsigned int fi_sysfile_type; 67 }; 68 69 static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES]; 70 71 static int ocfs2_read_locked_inode(struct inode *inode, 72 struct ocfs2_find_inode_args *args); 73 static int ocfs2_init_locked_inode(struct inode *inode, void *opaque); 74 static int ocfs2_find_actor(struct inode *inode, void *opaque); 75 static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, 76 struct inode *inode, 77 struct buffer_head *fe_bh); 78 79 static int ocfs2_filecheck_read_inode_block_full(struct inode *inode, 80 struct buffer_head **bh, 81 int flags, int type); 82 static int ocfs2_filecheck_validate_inode_block(struct super_block *sb, 83 struct buffer_head *bh); 84 static int ocfs2_filecheck_repair_inode_block(struct super_block *sb, 85 struct buffer_head *bh); 86 87 void ocfs2_set_inode_flags(struct inode *inode) 88 { 89 unsigned int flags = OCFS2_I(inode)->ip_attr; 90 91 inode->i_flags &= ~(S_IMMUTABLE | 92 S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); 93 94 if (flags & OCFS2_IMMUTABLE_FL) 95 inode->i_flags |= S_IMMUTABLE; 96 97 if (flags & OCFS2_SYNC_FL) 98 inode->i_flags |= S_SYNC; 99 if (flags & OCFS2_APPEND_FL) 100 inode->i_flags |= S_APPEND; 101 if (flags & OCFS2_NOATIME_FL) 102 inode->i_flags |= S_NOATIME; 103 if (flags & OCFS2_DIRSYNC_FL) 104 inode->i_flags |= S_DIRSYNC; 105 } 106 107 /* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */ 108 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi) 109 { 110 unsigned int flags = oi->vfs_inode.i_flags; 111 112 oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL| 113 OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL); 114 if (flags & S_SYNC) 115 oi->ip_attr |= OCFS2_SYNC_FL; 116 if (flags & S_APPEND) 117 oi->ip_attr |= OCFS2_APPEND_FL; 118 if (flags & S_IMMUTABLE) 119 oi->ip_attr |= OCFS2_IMMUTABLE_FL; 120 if (flags & S_NOATIME) 121 oi->ip_attr |= OCFS2_NOATIME_FL; 122 if (flags & S_DIRSYNC) 123 oi->ip_attr |= OCFS2_DIRSYNC_FL; 124 } 125 126 struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno) 127 { 128 struct ocfs2_find_inode_args args; 129 130 args.fi_blkno = blkno; 131 args.fi_flags = 0; 132 args.fi_ino = ino_from_blkno(sb, blkno); 133 args.fi_sysfile_type = 0; 134 135 return ilookup5(sb, blkno, ocfs2_find_actor, &args); 136 } 137 struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, 138 int sysfile_type) 139 { 140 int rc = -ESTALE; 141 struct inode *inode = NULL; 142 struct super_block *sb = osb->sb; 143 struct ocfs2_find_inode_args args; 144 journal_t *journal = OCFS2_SB(sb)->journal->j_journal; 145 146 trace_ocfs2_iget_begin((unsigned long long)blkno, flags, 147 sysfile_type); 148 149 /* Ok. By now we've either got the offsets passed to us by the 150 * caller, or we just pulled them off the bh. Lets do some 151 * sanity checks to make sure they're OK. */ 152 if (blkno == 0) { 153 inode = ERR_PTR(-EINVAL); 154 mlog_errno(PTR_ERR(inode)); 155 goto bail; 156 } 157 158 args.fi_blkno = blkno; 159 args.fi_flags = flags; 160 args.fi_ino = ino_from_blkno(sb, blkno); 161 args.fi_sysfile_type = sysfile_type; 162 163 inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, 164 ocfs2_init_locked_inode, &args); 165 /* inode was *not* in the inode cache. 2.6.x requires 166 * us to do our own read_inode call and unlock it 167 * afterwards. */ 168 if (inode == NULL) { 169 inode = ERR_PTR(-ENOMEM); 170 mlog_errno(PTR_ERR(inode)); 171 goto bail; 172 } 173 trace_ocfs2_iget5_locked(inode->i_state); 174 if (inode->i_state & I_NEW) { 175 rc = ocfs2_read_locked_inode(inode, &args); 176 unlock_new_inode(inode); 177 } 178 if (is_bad_inode(inode)) { 179 iput(inode); 180 inode = ERR_PTR(rc); 181 goto bail; 182 } 183 184 /* 185 * Set transaction id's of transactions that have to be committed 186 * to finish f[data]sync. We set them to currently running transaction 187 * as we cannot be sure that the inode or some of its metadata isn't 188 * part of the transaction - the inode could have been reclaimed and 189 * now it is reread from disk. 190 */ 191 if (journal) { 192 transaction_t *transaction; 193 tid_t tid; 194 struct ocfs2_inode_info *oi = OCFS2_I(inode); 195 196 read_lock(&journal->j_state_lock); 197 if (journal->j_running_transaction) 198 transaction = journal->j_running_transaction; 199 else 200 transaction = journal->j_committing_transaction; 201 if (transaction) 202 tid = transaction->t_tid; 203 else 204 tid = journal->j_commit_sequence; 205 read_unlock(&journal->j_state_lock); 206 oi->i_sync_tid = tid; 207 oi->i_datasync_tid = tid; 208 } 209 210 bail: 211 if (!IS_ERR(inode)) { 212 trace_ocfs2_iget_end(inode, 213 (unsigned long long)OCFS2_I(inode)->ip_blkno); 214 } 215 216 return inode; 217 } 218 219 220 /* 221 * here's how inodes get read from disk: 222 * iget5_locked -> find_actor -> OCFS2_FIND_ACTOR 223 * found? : return the in-memory inode 224 * not found? : get_new_inode -> OCFS2_INIT_LOCKED_INODE 225 */ 226 227 static int ocfs2_find_actor(struct inode *inode, void *opaque) 228 { 229 struct ocfs2_find_inode_args *args = NULL; 230 struct ocfs2_inode_info *oi = OCFS2_I(inode); 231 int ret = 0; 232 233 args = opaque; 234 235 mlog_bug_on_msg(!inode, "No inode in find actor!\n"); 236 237 trace_ocfs2_find_actor(inode, inode->i_ino, opaque, args->fi_blkno); 238 239 if (oi->ip_blkno != args->fi_blkno) 240 goto bail; 241 242 ret = 1; 243 bail: 244 return ret; 245 } 246 247 /* 248 * initialize the new inode, but don't do anything that would cause 249 * us to sleep. 250 * return 0 on success, 1 on failure 251 */ 252 static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) 253 { 254 struct ocfs2_find_inode_args *args = opaque; 255 static struct lock_class_key ocfs2_quota_ip_alloc_sem_key, 256 ocfs2_file_ip_alloc_sem_key; 257 258 inode->i_ino = args->fi_ino; 259 OCFS2_I(inode)->ip_blkno = args->fi_blkno; 260 if (args->fi_sysfile_type != 0) 261 lockdep_set_class(&inode->i_rwsem, 262 &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); 263 if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE || 264 args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE || 265 args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE || 266 args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE) 267 lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem, 268 &ocfs2_quota_ip_alloc_sem_key); 269 else 270 lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem, 271 &ocfs2_file_ip_alloc_sem_key); 272 273 return 0; 274 } 275 276 void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, 277 int create_ino) 278 { 279 struct super_block *sb; 280 struct ocfs2_super *osb; 281 int use_plocks = 1; 282 283 sb = inode->i_sb; 284 osb = OCFS2_SB(sb); 285 286 if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) || 287 ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks()) 288 use_plocks = 0; 289 290 /* 291 * These have all been checked by ocfs2_read_inode_block() or set 292 * by ocfs2_mknod_locked(), so a failure is a code bug. 293 */ 294 BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); /* This means that read_inode 295 cannot create a superblock 296 inode today. change if 297 that is needed. */ 298 BUG_ON(!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))); 299 BUG_ON(le32_to_cpu(fe->i_fs_generation) != osb->fs_generation); 300 301 302 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 303 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 304 OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features); 305 306 inode_set_iversion(inode, 1); 307 inode->i_generation = le32_to_cpu(fe->i_generation); 308 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); 309 inode->i_mode = le16_to_cpu(fe->i_mode); 310 i_uid_write(inode, le32_to_cpu(fe->i_uid)); 311 i_gid_write(inode, le32_to_cpu(fe->i_gid)); 312 313 /* Fast symlinks will have i_size but no allocated clusters. */ 314 if (S_ISLNK(inode->i_mode) && !fe->i_clusters) { 315 inode->i_blocks = 0; 316 inode->i_mapping->a_ops = &ocfs2_fast_symlink_aops; 317 } else { 318 inode->i_blocks = ocfs2_inode_sector_count(inode); 319 inode->i_mapping->a_ops = &ocfs2_aops; 320 } 321 inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); 322 inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); 323 inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); 324 inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec); 325 inode->i_ctime.tv_sec = le64_to_cpu(fe->i_ctime); 326 inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec); 327 328 if (OCFS2_I(inode)->ip_blkno != le64_to_cpu(fe->i_blkno)) 329 mlog(ML_ERROR, 330 "ip_blkno %llu != i_blkno %llu!\n", 331 (unsigned long long)OCFS2_I(inode)->ip_blkno, 332 (unsigned long long)le64_to_cpu(fe->i_blkno)); 333 334 set_nlink(inode, ocfs2_read_links_count(fe)); 335 336 trace_ocfs2_populate_inode(OCFS2_I(inode)->ip_blkno, 337 le32_to_cpu(fe->i_flags)); 338 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) { 339 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; 340 inode->i_flags |= S_NOQUOTA; 341 } 342 343 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { 344 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 345 } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) { 346 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 347 } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) { 348 inode->i_flags |= S_NOQUOTA; 349 } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) { 350 /* we can't actually hit this as read_inode can't 351 * handle superblocks today ;-) */ 352 BUG(); 353 } 354 355 switch (inode->i_mode & S_IFMT) { 356 case S_IFREG: 357 if (use_plocks) 358 inode->i_fop = &ocfs2_fops; 359 else 360 inode->i_fop = &ocfs2_fops_no_plocks; 361 inode->i_op = &ocfs2_file_iops; 362 i_size_write(inode, le64_to_cpu(fe->i_size)); 363 break; 364 case S_IFDIR: 365 inode->i_op = &ocfs2_dir_iops; 366 if (use_plocks) 367 inode->i_fop = &ocfs2_dops; 368 else 369 inode->i_fop = &ocfs2_dops_no_plocks; 370 i_size_write(inode, le64_to_cpu(fe->i_size)); 371 OCFS2_I(inode)->ip_dir_lock_gen = 1; 372 break; 373 case S_IFLNK: 374 inode->i_op = &ocfs2_symlink_inode_operations; 375 inode_nohighmem(inode); 376 i_size_write(inode, le64_to_cpu(fe->i_size)); 377 break; 378 default: 379 inode->i_op = &ocfs2_special_file_iops; 380 init_special_inode(inode, inode->i_mode, 381 inode->i_rdev); 382 break; 383 } 384 385 if (create_ino) { 386 inode->i_ino = ino_from_blkno(inode->i_sb, 387 le64_to_cpu(fe->i_blkno)); 388 389 /* 390 * If we ever want to create system files from kernel, 391 * the generation argument to 392 * ocfs2_inode_lock_res_init() will have to change. 393 */ 394 BUG_ON(le32_to_cpu(fe->i_flags) & OCFS2_SYSTEM_FL); 395 396 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres, 397 OCFS2_LOCK_TYPE_META, 0, inode); 398 399 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, 400 OCFS2_LOCK_TYPE_OPEN, 0, inode); 401 } 402 403 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, 404 OCFS2_LOCK_TYPE_RW, inode->i_generation, 405 inode); 406 407 ocfs2_set_inode_flags(inode); 408 409 OCFS2_I(inode)->ip_last_used_slot = 0; 410 OCFS2_I(inode)->ip_last_used_group = 0; 411 412 if (S_ISDIR(inode->i_mode)) 413 ocfs2_resv_set_type(&OCFS2_I(inode)->ip_la_data_resv, 414 OCFS2_RESV_FLAG_DIR); 415 } 416 417 static int ocfs2_read_locked_inode(struct inode *inode, 418 struct ocfs2_find_inode_args *args) 419 { 420 struct super_block *sb; 421 struct ocfs2_super *osb; 422 struct ocfs2_dinode *fe; 423 struct buffer_head *bh = NULL; 424 int status, can_lock, lock_level = 0; 425 u32 generation = 0; 426 427 status = -EINVAL; 428 sb = inode->i_sb; 429 osb = OCFS2_SB(sb); 430 431 /* 432 * To improve performance of cold-cache inode stats, we take 433 * the cluster lock here if possible. 434 * 435 * Generally, OCFS2 never trusts the contents of an inode 436 * unless it's holding a cluster lock, so taking it here isn't 437 * a correctness issue as much as it is a performance 438 * improvement. 439 * 440 * There are three times when taking the lock is not a good idea: 441 * 442 * 1) During startup, before we have initialized the DLM. 443 * 444 * 2) If we are reading certain system files which never get 445 * cluster locks (local alloc, truncate log). 446 * 447 * 3) If the process doing the iget() is responsible for 448 * orphan dir recovery. We're holding the orphan dir lock and 449 * can get into a deadlock with another process on another 450 * node in ->delete_inode(). 451 * 452 * #1 and #2 can be simply solved by never taking the lock 453 * here for system files (which are the only type we read 454 * during mount). It's a heavier approach, but our main 455 * concern is user-accessible files anyway. 456 * 457 * #3 works itself out because we'll eventually take the 458 * cluster lock before trusting anything anyway. 459 */ 460 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) 461 && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) 462 && !ocfs2_mount_local(osb); 463 464 trace_ocfs2_read_locked_inode( 465 (unsigned long long)OCFS2_I(inode)->ip_blkno, can_lock); 466 467 /* 468 * To maintain backwards compatibility with older versions of 469 * ocfs2-tools, we still store the generation value for system 470 * files. The only ones that actually matter to userspace are 471 * the journals, but it's easier and inexpensive to just flag 472 * all system files similarly. 473 */ 474 if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) 475 generation = osb->fs_generation; 476 477 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres, 478 OCFS2_LOCK_TYPE_META, 479 generation, inode); 480 481 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, 482 OCFS2_LOCK_TYPE_OPEN, 483 0, inode); 484 485 if (can_lock) { 486 status = ocfs2_open_lock(inode); 487 if (status) { 488 make_bad_inode(inode); 489 mlog_errno(status); 490 return status; 491 } 492 status = ocfs2_inode_lock(inode, NULL, lock_level); 493 if (status) { 494 make_bad_inode(inode); 495 mlog_errno(status); 496 return status; 497 } 498 } 499 500 if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) { 501 status = ocfs2_try_open_lock(inode, 0); 502 if (status) { 503 make_bad_inode(inode); 504 return status; 505 } 506 } 507 508 if (can_lock) { 509 if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_CHK) 510 status = ocfs2_filecheck_read_inode_block_full(inode, 511 &bh, OCFS2_BH_IGNORE_CACHE, 0); 512 else if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_FIX) 513 status = ocfs2_filecheck_read_inode_block_full(inode, 514 &bh, OCFS2_BH_IGNORE_CACHE, 1); 515 else 516 status = ocfs2_read_inode_block_full(inode, 517 &bh, OCFS2_BH_IGNORE_CACHE); 518 } else { 519 status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); 520 /* 521 * If buffer is in jbd, then its checksum may not have been 522 * computed as yet. 523 */ 524 if (!status && !buffer_jbd(bh)) { 525 if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_CHK) 526 status = ocfs2_filecheck_validate_inode_block( 527 osb->sb, bh); 528 else if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_FIX) 529 status = ocfs2_filecheck_repair_inode_block( 530 osb->sb, bh); 531 else 532 status = ocfs2_validate_inode_block( 533 osb->sb, bh); 534 } 535 } 536 if (status < 0) { 537 mlog_errno(status); 538 goto bail; 539 } 540 541 status = -EINVAL; 542 fe = (struct ocfs2_dinode *) bh->b_data; 543 544 /* 545 * This is a code bug. Right now the caller needs to 546 * understand whether it is asking for a system file inode or 547 * not so the proper lock names can be built. 548 */ 549 mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) != 550 !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE), 551 "Inode %llu: system file state is ambigous\n", 552 (unsigned long long)args->fi_blkno); 553 554 if (S_ISCHR(le16_to_cpu(fe->i_mode)) || 555 S_ISBLK(le16_to_cpu(fe->i_mode))) 556 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); 557 558 ocfs2_populate_inode(inode, fe, 0); 559 560 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); 561 562 if (buffer_dirty(bh) && !buffer_jbd(bh)) { 563 if (can_lock) { 564 ocfs2_inode_unlock(inode, lock_level); 565 lock_level = 1; 566 ocfs2_inode_lock(inode, NULL, lock_level); 567 } 568 status = ocfs2_write_block(osb, bh, INODE_CACHE(inode)); 569 if (status < 0) { 570 mlog_errno(status); 571 goto bail; 572 } 573 } 574 575 status = 0; 576 577 bail: 578 if (can_lock) 579 ocfs2_inode_unlock(inode, lock_level); 580 581 if (status < 0) 582 make_bad_inode(inode); 583 584 brelse(bh); 585 586 return status; 587 } 588 589 void ocfs2_sync_blockdev(struct super_block *sb) 590 { 591 sync_blockdev(sb->s_bdev); 592 } 593 594 static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, 595 struct inode *inode, 596 struct buffer_head *fe_bh) 597 { 598 int status = 0; 599 struct ocfs2_dinode *fe; 600 handle_t *handle = NULL; 601 602 fe = (struct ocfs2_dinode *) fe_bh->b_data; 603 604 /* 605 * This check will also skip truncate of inodes with inline 606 * data and fast symlinks. 607 */ 608 if (fe->i_clusters) { 609 if (ocfs2_should_order_data(inode)) 610 ocfs2_begin_ordered_truncate(inode, 0); 611 612 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 613 if (IS_ERR(handle)) { 614 status = PTR_ERR(handle); 615 handle = NULL; 616 mlog_errno(status); 617 goto out; 618 } 619 620 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), 621 fe_bh, 622 OCFS2_JOURNAL_ACCESS_WRITE); 623 if (status < 0) { 624 mlog_errno(status); 625 goto out; 626 } 627 628 i_size_write(inode, 0); 629 630 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); 631 if (status < 0) { 632 mlog_errno(status); 633 goto out; 634 } 635 636 ocfs2_commit_trans(osb, handle); 637 handle = NULL; 638 639 status = ocfs2_commit_truncate(osb, inode, fe_bh); 640 if (status < 0) { 641 mlog_errno(status); 642 goto out; 643 } 644 } 645 646 out: 647 if (handle) 648 ocfs2_commit_trans(osb, handle); 649 return status; 650 } 651 652 static int ocfs2_remove_inode(struct inode *inode, 653 struct buffer_head *di_bh, 654 struct inode *orphan_dir_inode, 655 struct buffer_head *orphan_dir_bh) 656 { 657 int status; 658 struct inode *inode_alloc_inode = NULL; 659 struct buffer_head *inode_alloc_bh = NULL; 660 handle_t *handle; 661 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 662 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; 663 664 inode_alloc_inode = 665 ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, 666 le16_to_cpu(di->i_suballoc_slot)); 667 if (!inode_alloc_inode) { 668 status = -ENOENT; 669 mlog_errno(status); 670 goto bail; 671 } 672 673 inode_lock(inode_alloc_inode); 674 status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1); 675 if (status < 0) { 676 inode_unlock(inode_alloc_inode); 677 678 mlog_errno(status); 679 goto bail; 680 } 681 682 handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS + 683 ocfs2_quota_trans_credits(inode->i_sb)); 684 if (IS_ERR(handle)) { 685 status = PTR_ERR(handle); 686 mlog_errno(status); 687 goto bail_unlock; 688 } 689 690 if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { 691 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, 692 orphan_dir_bh, false); 693 if (status < 0) { 694 mlog_errno(status); 695 goto bail_commit; 696 } 697 } 698 699 /* set the inodes dtime */ 700 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 701 OCFS2_JOURNAL_ACCESS_WRITE); 702 if (status < 0) { 703 mlog_errno(status); 704 goto bail_commit; 705 } 706 707 di->i_dtime = cpu_to_le64(ktime_get_real_seconds()); 708 di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); 709 ocfs2_journal_dirty(handle, di_bh); 710 711 ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh); 712 dquot_free_inode(inode); 713 714 status = ocfs2_free_dinode(handle, inode_alloc_inode, 715 inode_alloc_bh, di); 716 if (status < 0) 717 mlog_errno(status); 718 719 bail_commit: 720 ocfs2_commit_trans(osb, handle); 721 bail_unlock: 722 ocfs2_inode_unlock(inode_alloc_inode, 1); 723 inode_unlock(inode_alloc_inode); 724 brelse(inode_alloc_bh); 725 bail: 726 iput(inode_alloc_inode); 727 728 return status; 729 } 730 731 /* 732 * Serialize with orphan dir recovery. If the process doing 733 * recovery on this orphan dir does an iget() with the dir 734 * i_mutex held, we'll deadlock here. Instead we detect this 735 * and exit early - recovery will wipe this inode for us. 736 */ 737 static int ocfs2_check_orphan_recovery_state(struct ocfs2_super *osb, 738 int slot) 739 { 740 int ret = 0; 741 742 spin_lock(&osb->osb_lock); 743 if (ocfs2_node_map_test_bit(osb, &osb->osb_recovering_orphan_dirs, slot)) { 744 ret = -EDEADLK; 745 goto out; 746 } 747 /* This signals to the orphan recovery process that it should 748 * wait for us to handle the wipe. */ 749 osb->osb_orphan_wipes[slot]++; 750 out: 751 spin_unlock(&osb->osb_lock); 752 trace_ocfs2_check_orphan_recovery_state(slot, ret); 753 return ret; 754 } 755 756 static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb, 757 int slot) 758 { 759 spin_lock(&osb->osb_lock); 760 osb->osb_orphan_wipes[slot]--; 761 spin_unlock(&osb->osb_lock); 762 763 wake_up(&osb->osb_wipe_event); 764 } 765 766 static int ocfs2_wipe_inode(struct inode *inode, 767 struct buffer_head *di_bh) 768 { 769 int status, orphaned_slot = -1; 770 struct inode *orphan_dir_inode = NULL; 771 struct buffer_head *orphan_dir_bh = NULL; 772 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 773 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; 774 775 if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { 776 orphaned_slot = le16_to_cpu(di->i_orphaned_slot); 777 778 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); 779 if (status) 780 return status; 781 782 orphan_dir_inode = ocfs2_get_system_file_inode(osb, 783 ORPHAN_DIR_SYSTEM_INODE, 784 orphaned_slot); 785 if (!orphan_dir_inode) { 786 status = -ENOENT; 787 mlog_errno(status); 788 goto bail; 789 } 790 791 /* Lock the orphan dir. The lock will be held for the entire 792 * delete_inode operation. We do this now to avoid races with 793 * recovery completion on other nodes. */ 794 inode_lock(orphan_dir_inode); 795 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); 796 if (status < 0) { 797 inode_unlock(orphan_dir_inode); 798 799 mlog_errno(status); 800 goto bail; 801 } 802 } 803 804 /* we do this while holding the orphan dir lock because we 805 * don't want recovery being run from another node to try an 806 * inode delete underneath us -- this will result in two nodes 807 * truncating the same file! */ 808 status = ocfs2_truncate_for_delete(osb, inode, di_bh); 809 if (status < 0) { 810 mlog_errno(status); 811 goto bail_unlock_dir; 812 } 813 814 /* Remove any dir index tree */ 815 if (S_ISDIR(inode->i_mode)) { 816 status = ocfs2_dx_dir_truncate(inode, di_bh); 817 if (status) { 818 mlog_errno(status); 819 goto bail_unlock_dir; 820 } 821 } 822 823 /*Free extended attribute resources associated with this inode.*/ 824 status = ocfs2_xattr_remove(inode, di_bh); 825 if (status < 0) { 826 mlog_errno(status); 827 goto bail_unlock_dir; 828 } 829 830 status = ocfs2_remove_refcount_tree(inode, di_bh); 831 if (status < 0) { 832 mlog_errno(status); 833 goto bail_unlock_dir; 834 } 835 836 status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, 837 orphan_dir_bh); 838 if (status < 0) 839 mlog_errno(status); 840 841 bail_unlock_dir: 842 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR) 843 return status; 844 845 ocfs2_inode_unlock(orphan_dir_inode, 1); 846 inode_unlock(orphan_dir_inode); 847 brelse(orphan_dir_bh); 848 bail: 849 iput(orphan_dir_inode); 850 ocfs2_signal_wipe_completion(osb, orphaned_slot); 851 852 return status; 853 } 854 855 /* There is a series of simple checks that should be done before a 856 * trylock is even considered. Encapsulate those in this function. */ 857 static int ocfs2_inode_is_valid_to_delete(struct inode *inode) 858 { 859 int ret = 0; 860 struct ocfs2_inode_info *oi = OCFS2_I(inode); 861 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 862 863 trace_ocfs2_inode_is_valid_to_delete(current, osb->dc_task, 864 (unsigned long long)oi->ip_blkno, 865 oi->ip_flags); 866 867 /* We shouldn't be getting here for the root directory 868 * inode.. */ 869 if (inode == osb->root_inode) { 870 mlog(ML_ERROR, "Skipping delete of root inode.\n"); 871 goto bail; 872 } 873 874 /* 875 * If we're coming from downconvert_thread we can't go into our own 876 * voting [hello, deadlock city!] so we cannot delete the inode. But 877 * since we dropped last inode ref when downconverting dentry lock, 878 * we cannot have the file open and thus the node doing unlink will 879 * take care of deleting the inode. 880 */ 881 if (current == osb->dc_task) 882 goto bail; 883 884 spin_lock(&oi->ip_lock); 885 /* OCFS2 *never* deletes system files. This should technically 886 * never get here as system file inodes should always have a 887 * positive link count. */ 888 if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) { 889 mlog(ML_ERROR, "Skipping delete of system file %llu\n", 890 (unsigned long long)oi->ip_blkno); 891 goto bail_unlock; 892 } 893 894 ret = 1; 895 bail_unlock: 896 spin_unlock(&oi->ip_lock); 897 bail: 898 return ret; 899 } 900 901 /* Query the cluster to determine whether we should wipe an inode from 902 * disk or not. 903 * 904 * Requires the inode to have the cluster lock. */ 905 static int ocfs2_query_inode_wipe(struct inode *inode, 906 struct buffer_head *di_bh, 907 int *wipe) 908 { 909 int status = 0, reason = 0; 910 struct ocfs2_inode_info *oi = OCFS2_I(inode); 911 struct ocfs2_dinode *di; 912 913 *wipe = 0; 914 915 trace_ocfs2_query_inode_wipe_begin((unsigned long long)oi->ip_blkno, 916 inode->i_nlink); 917 918 /* While we were waiting for the cluster lock in 919 * ocfs2_delete_inode, another node might have asked to delete 920 * the inode. Recheck our flags to catch this. */ 921 if (!ocfs2_inode_is_valid_to_delete(inode)) { 922 reason = 1; 923 goto bail; 924 } 925 926 /* Now that we have an up to date inode, we can double check 927 * the link count. */ 928 if (inode->i_nlink) 929 goto bail; 930 931 /* Do some basic inode verification... */ 932 di = (struct ocfs2_dinode *) di_bh->b_data; 933 if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) && 934 !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { 935 /* 936 * Inodes in the orphan dir must have ORPHANED_FL. The only 937 * inodes that come back out of the orphan dir are reflink 938 * targets. A reflink target may be moved out of the orphan 939 * dir between the time we scan the directory and the time we 940 * process it. This would lead to HAS_REFCOUNT_FL being set but 941 * ORPHANED_FL not. 942 */ 943 if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) { 944 reason = 2; 945 goto bail; 946 } 947 948 /* for lack of a better error? */ 949 status = -EEXIST; 950 mlog(ML_ERROR, 951 "Inode %llu (on-disk %llu) not orphaned! " 952 "Disk flags 0x%x, inode flags 0x%x\n", 953 (unsigned long long)oi->ip_blkno, 954 (unsigned long long)le64_to_cpu(di->i_blkno), 955 le32_to_cpu(di->i_flags), oi->ip_flags); 956 goto bail; 957 } 958 959 /* has someone already deleted us?! baaad... */ 960 if (di->i_dtime) { 961 status = -EEXIST; 962 mlog_errno(status); 963 goto bail; 964 } 965 966 /* 967 * This is how ocfs2 determines whether an inode is still live 968 * within the cluster. Every node takes a shared read lock on 969 * the inode open lock in ocfs2_read_locked_inode(). When we 970 * get to ->delete_inode(), each node tries to convert it's 971 * lock to an exclusive. Trylocks are serialized by the inode 972 * meta data lock. If the upconvert succeeds, we know the inode 973 * is no longer live and can be deleted. 974 * 975 * Though we call this with the meta data lock held, the 976 * trylock keeps us from ABBA deadlock. 977 */ 978 status = ocfs2_try_open_lock(inode, 1); 979 if (status == -EAGAIN) { 980 status = 0; 981 reason = 3; 982 goto bail; 983 } 984 if (status < 0) { 985 mlog_errno(status); 986 goto bail; 987 } 988 989 *wipe = 1; 990 trace_ocfs2_query_inode_wipe_succ(le16_to_cpu(di->i_orphaned_slot)); 991 992 bail: 993 trace_ocfs2_query_inode_wipe_end(status, reason); 994 return status; 995 } 996 997 /* Support function for ocfs2_delete_inode. Will help us keep the 998 * inode data in a consistent state for clear_inode. Always truncates 999 * pages, optionally sync's them first. */ 1000 static void ocfs2_cleanup_delete_inode(struct inode *inode, 1001 int sync_data) 1002 { 1003 trace_ocfs2_cleanup_delete_inode( 1004 (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data); 1005 if (sync_data) 1006 filemap_write_and_wait(inode->i_mapping); 1007 truncate_inode_pages_final(&inode->i_data); 1008 } 1009 1010 static void ocfs2_delete_inode(struct inode *inode) 1011 { 1012 int wipe, status; 1013 sigset_t oldset; 1014 struct buffer_head *di_bh = NULL; 1015 struct ocfs2_dinode *di = NULL; 1016 1017 trace_ocfs2_delete_inode(inode->i_ino, 1018 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1019 is_bad_inode(inode)); 1020 1021 /* When we fail in read_inode() we mark inode as bad. The second test 1022 * catches the case when inode allocation fails before allocating 1023 * a block for inode. */ 1024 if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) 1025 goto bail; 1026 1027 if (!ocfs2_inode_is_valid_to_delete(inode)) { 1028 /* It's probably not necessary to truncate_inode_pages 1029 * here but we do it for safety anyway (it will most 1030 * likely be a no-op anyway) */ 1031 ocfs2_cleanup_delete_inode(inode, 0); 1032 goto bail; 1033 } 1034 1035 dquot_initialize(inode); 1036 1037 /* We want to block signals in delete_inode as the lock and 1038 * messaging paths may return us -ERESTARTSYS. Which would 1039 * cause us to exit early, resulting in inodes being orphaned 1040 * forever. */ 1041 ocfs2_block_signals(&oldset); 1042 1043 /* 1044 * Synchronize us against ocfs2_get_dentry. We take this in 1045 * shared mode so that all nodes can still concurrently 1046 * process deletes. 1047 */ 1048 status = ocfs2_nfs_sync_lock(OCFS2_SB(inode->i_sb), 0); 1049 if (status < 0) { 1050 mlog(ML_ERROR, "getting nfs sync lock(PR) failed %d\n", status); 1051 ocfs2_cleanup_delete_inode(inode, 0); 1052 goto bail_unblock; 1053 } 1054 /* Lock down the inode. This gives us an up to date view of 1055 * it's metadata (for verification), and allows us to 1056 * serialize delete_inode on multiple nodes. 1057 * 1058 * Even though we might be doing a truncate, we don't take the 1059 * allocation lock here as it won't be needed - nobody will 1060 * have the file open. 1061 */ 1062 status = ocfs2_inode_lock(inode, &di_bh, 1); 1063 if (status < 0) { 1064 if (status != -ENOENT) 1065 mlog_errno(status); 1066 ocfs2_cleanup_delete_inode(inode, 0); 1067 goto bail_unlock_nfs_sync; 1068 } 1069 1070 di = (struct ocfs2_dinode *)di_bh->b_data; 1071 /* Skip inode deletion and wait for dio orphan entry recovered 1072 * first */ 1073 if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) { 1074 ocfs2_cleanup_delete_inode(inode, 0); 1075 goto bail_unlock_inode; 1076 } 1077 1078 /* Query the cluster. This will be the final decision made 1079 * before we go ahead and wipe the inode. */ 1080 status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); 1081 if (!wipe || status < 0) { 1082 /* Error and remote inode busy both mean we won't be 1083 * removing the inode, so they take almost the same 1084 * path. */ 1085 if (status < 0) 1086 mlog_errno(status); 1087 1088 /* Someone in the cluster has disallowed a wipe of 1089 * this inode, or it was never completely 1090 * orphaned. Write out the pages and exit now. */ 1091 ocfs2_cleanup_delete_inode(inode, 1); 1092 goto bail_unlock_inode; 1093 } 1094 1095 ocfs2_cleanup_delete_inode(inode, 0); 1096 1097 status = ocfs2_wipe_inode(inode, di_bh); 1098 if (status < 0) { 1099 if (status != -EDEADLK) 1100 mlog_errno(status); 1101 goto bail_unlock_inode; 1102 } 1103 1104 /* 1105 * Mark the inode as successfully deleted. 1106 * 1107 * This is important for ocfs2_clear_inode() as it will check 1108 * this flag and skip any checkpointing work 1109 * 1110 * ocfs2_stuff_meta_lvb() also uses this flag to invalidate 1111 * the LVB for other nodes. 1112 */ 1113 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; 1114 1115 bail_unlock_inode: 1116 ocfs2_inode_unlock(inode, 1); 1117 brelse(di_bh); 1118 1119 bail_unlock_nfs_sync: 1120 ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0); 1121 1122 bail_unblock: 1123 ocfs2_unblock_signals(&oldset); 1124 bail: 1125 return; 1126 } 1127 1128 static void ocfs2_clear_inode(struct inode *inode) 1129 { 1130 int status; 1131 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1132 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1133 1134 clear_inode(inode); 1135 trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, 1136 inode->i_nlink); 1137 1138 mlog_bug_on_msg(osb == NULL, 1139 "Inode=%lu\n", inode->i_ino); 1140 1141 dquot_drop(inode); 1142 1143 /* To preven remote deletes we hold open lock before, now it 1144 * is time to unlock PR and EX open locks. */ 1145 ocfs2_open_unlock(inode); 1146 1147 /* Do these before all the other work so that we don't bounce 1148 * the downconvert thread while waiting to destroy the locks. */ 1149 ocfs2_mark_lockres_freeing(osb, &oi->ip_rw_lockres); 1150 ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres); 1151 ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres); 1152 1153 ocfs2_resv_discard(&osb->osb_la_resmap, 1154 &oi->ip_la_data_resv); 1155 ocfs2_resv_init_once(&oi->ip_la_data_resv); 1156 1157 /* We very well may get a clear_inode before all an inodes 1158 * metadata has hit disk. Of course, we can't drop any cluster 1159 * locks until the journal has finished with it. The only 1160 * exception here are successfully wiped inodes - their 1161 * metadata can now be considered to be part of the system 1162 * inodes from which it came. */ 1163 if (!(oi->ip_flags & OCFS2_INODE_DELETED)) 1164 ocfs2_checkpoint_inode(inode); 1165 1166 mlog_bug_on_msg(!list_empty(&oi->ip_io_markers), 1167 "Clear inode of %llu, inode has io markers\n", 1168 (unsigned long long)oi->ip_blkno); 1169 mlog_bug_on_msg(!list_empty(&oi->ip_unwritten_list), 1170 "Clear inode of %llu, inode has unwritten extents\n", 1171 (unsigned long long)oi->ip_blkno); 1172 1173 ocfs2_extent_map_trunc(inode, 0); 1174 1175 status = ocfs2_drop_inode_locks(inode); 1176 if (status < 0) 1177 mlog_errno(status); 1178 1179 ocfs2_lock_res_free(&oi->ip_rw_lockres); 1180 ocfs2_lock_res_free(&oi->ip_inode_lockres); 1181 ocfs2_lock_res_free(&oi->ip_open_lockres); 1182 1183 ocfs2_metadata_cache_exit(INODE_CACHE(inode)); 1184 1185 mlog_bug_on_msg(INODE_CACHE(inode)->ci_num_cached, 1186 "Clear inode of %llu, inode has %u cache items\n", 1187 (unsigned long long)oi->ip_blkno, 1188 INODE_CACHE(inode)->ci_num_cached); 1189 1190 mlog_bug_on_msg(!(INODE_CACHE(inode)->ci_flags & OCFS2_CACHE_FL_INLINE), 1191 "Clear inode of %llu, inode has a bad flag\n", 1192 (unsigned long long)oi->ip_blkno); 1193 1194 mlog_bug_on_msg(spin_is_locked(&oi->ip_lock), 1195 "Clear inode of %llu, inode is locked\n", 1196 (unsigned long long)oi->ip_blkno); 1197 1198 mlog_bug_on_msg(!mutex_trylock(&oi->ip_io_mutex), 1199 "Clear inode of %llu, io_mutex is locked\n", 1200 (unsigned long long)oi->ip_blkno); 1201 mutex_unlock(&oi->ip_io_mutex); 1202 1203 /* 1204 * down_trylock() returns 0, down_write_trylock() returns 1 1205 * kernel 1, world 0 1206 */ 1207 mlog_bug_on_msg(!down_write_trylock(&oi->ip_alloc_sem), 1208 "Clear inode of %llu, alloc_sem is locked\n", 1209 (unsigned long long)oi->ip_blkno); 1210 up_write(&oi->ip_alloc_sem); 1211 1212 mlog_bug_on_msg(oi->ip_open_count, 1213 "Clear inode of %llu has open count %d\n", 1214 (unsigned long long)oi->ip_blkno, oi->ip_open_count); 1215 1216 /* Clear all other flags. */ 1217 oi->ip_flags = 0; 1218 oi->ip_dir_start_lookup = 0; 1219 oi->ip_blkno = 0ULL; 1220 1221 /* 1222 * ip_jinode is used to track txns against this inode. We ensure that 1223 * the journal is flushed before journal shutdown. Thus it is safe to 1224 * have inodes get cleaned up after journal shutdown. 1225 */ 1226 jbd2_journal_release_jbd_inode(osb->journal->j_journal, 1227 &oi->ip_jinode); 1228 } 1229 1230 void ocfs2_evict_inode(struct inode *inode) 1231 { 1232 if (!inode->i_nlink || 1233 (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) { 1234 ocfs2_delete_inode(inode); 1235 } else { 1236 truncate_inode_pages_final(&inode->i_data); 1237 } 1238 ocfs2_clear_inode(inode); 1239 } 1240 1241 /* Called under inode_lock, with no more references on the 1242 * struct inode, so it's safe here to check the flags field 1243 * and to manipulate i_nlink without any other locks. */ 1244 int ocfs2_drop_inode(struct inode *inode) 1245 { 1246 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1247 1248 trace_ocfs2_drop_inode((unsigned long long)oi->ip_blkno, 1249 inode->i_nlink, oi->ip_flags); 1250 1251 assert_spin_locked(&inode->i_lock); 1252 inode->i_state |= I_WILL_FREE; 1253 spin_unlock(&inode->i_lock); 1254 write_inode_now(inode, 1); 1255 spin_lock(&inode->i_lock); 1256 WARN_ON(inode->i_state & I_NEW); 1257 inode->i_state &= ~I_WILL_FREE; 1258 1259 return 1; 1260 } 1261 1262 /* 1263 * This is called from our getattr. 1264 */ 1265 int ocfs2_inode_revalidate(struct dentry *dentry) 1266 { 1267 struct inode *inode = d_inode(dentry); 1268 int status = 0; 1269 1270 trace_ocfs2_inode_revalidate(inode, 1271 inode ? (unsigned long long)OCFS2_I(inode)->ip_blkno : 0ULL, 1272 inode ? (unsigned long long)OCFS2_I(inode)->ip_flags : 0); 1273 1274 if (!inode) { 1275 status = -ENOENT; 1276 goto bail; 1277 } 1278 1279 spin_lock(&OCFS2_I(inode)->ip_lock); 1280 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 1281 spin_unlock(&OCFS2_I(inode)->ip_lock); 1282 status = -ENOENT; 1283 goto bail; 1284 } 1285 spin_unlock(&OCFS2_I(inode)->ip_lock); 1286 1287 /* Let ocfs2_inode_lock do the work of updating our struct 1288 * inode for us. */ 1289 status = ocfs2_inode_lock(inode, NULL, 0); 1290 if (status < 0) { 1291 if (status != -ENOENT) 1292 mlog_errno(status); 1293 goto bail; 1294 } 1295 ocfs2_inode_unlock(inode, 0); 1296 bail: 1297 return status; 1298 } 1299 1300 /* 1301 * Updates a disk inode from a 1302 * struct inode. 1303 * Only takes ip_lock. 1304 */ 1305 int ocfs2_mark_inode_dirty(handle_t *handle, 1306 struct inode *inode, 1307 struct buffer_head *bh) 1308 { 1309 int status; 1310 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; 1311 1312 trace_ocfs2_mark_inode_dirty((unsigned long long)OCFS2_I(inode)->ip_blkno); 1313 1314 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, 1315 OCFS2_JOURNAL_ACCESS_WRITE); 1316 if (status < 0) { 1317 mlog_errno(status); 1318 goto leave; 1319 } 1320 1321 spin_lock(&OCFS2_I(inode)->ip_lock); 1322 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); 1323 ocfs2_get_inode_flags(OCFS2_I(inode)); 1324 fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr); 1325 fe->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 1326 spin_unlock(&OCFS2_I(inode)->ip_lock); 1327 1328 fe->i_size = cpu_to_le64(i_size_read(inode)); 1329 ocfs2_set_links_count(fe, inode->i_nlink); 1330 fe->i_uid = cpu_to_le32(i_uid_read(inode)); 1331 fe->i_gid = cpu_to_le32(i_gid_read(inode)); 1332 fe->i_mode = cpu_to_le16(inode->i_mode); 1333 fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 1334 fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); 1335 fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 1336 fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 1337 fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); 1338 fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 1339 1340 ocfs2_journal_dirty(handle, bh); 1341 ocfs2_update_inode_fsync_trans(handle, inode, 1); 1342 leave: 1343 return status; 1344 } 1345 1346 /* 1347 * 1348 * Updates a struct inode from a disk inode. 1349 * does no i/o, only takes ip_lock. 1350 */ 1351 void ocfs2_refresh_inode(struct inode *inode, 1352 struct ocfs2_dinode *fe) 1353 { 1354 spin_lock(&OCFS2_I(inode)->ip_lock); 1355 1356 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 1357 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 1358 OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features); 1359 ocfs2_set_inode_flags(inode); 1360 i_size_write(inode, le64_to_cpu(fe->i_size)); 1361 set_nlink(inode, ocfs2_read_links_count(fe)); 1362 i_uid_write(inode, le32_to_cpu(fe->i_uid)); 1363 i_gid_write(inode, le32_to_cpu(fe->i_gid)); 1364 inode->i_mode = le16_to_cpu(fe->i_mode); 1365 if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0) 1366 inode->i_blocks = 0; 1367 else 1368 inode->i_blocks = ocfs2_inode_sector_count(inode); 1369 inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); 1370 inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); 1371 inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); 1372 inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec); 1373 inode->i_ctime.tv_sec = le64_to_cpu(fe->i_ctime); 1374 inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec); 1375 1376 spin_unlock(&OCFS2_I(inode)->ip_lock); 1377 } 1378 1379 int ocfs2_validate_inode_block(struct super_block *sb, 1380 struct buffer_head *bh) 1381 { 1382 int rc; 1383 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 1384 1385 trace_ocfs2_validate_inode_block((unsigned long long)bh->b_blocknr); 1386 1387 BUG_ON(!buffer_uptodate(bh)); 1388 1389 /* 1390 * If the ecc fails, we return the error but otherwise 1391 * leave the filesystem running. We know any error is 1392 * local to this block. 1393 */ 1394 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check); 1395 if (rc) { 1396 mlog(ML_ERROR, "Checksum failed for dinode %llu\n", 1397 (unsigned long long)bh->b_blocknr); 1398 goto bail; 1399 } 1400 1401 /* 1402 * Errors after here are fatal. 1403 */ 1404 1405 rc = -EINVAL; 1406 1407 if (!OCFS2_IS_VALID_DINODE(di)) { 1408 rc = ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n", 1409 (unsigned long long)bh->b_blocknr, 7, 1410 di->i_signature); 1411 goto bail; 1412 } 1413 1414 if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) { 1415 rc = ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n", 1416 (unsigned long long)bh->b_blocknr, 1417 (unsigned long long)le64_to_cpu(di->i_blkno)); 1418 goto bail; 1419 } 1420 1421 if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { 1422 rc = ocfs2_error(sb, 1423 "Invalid dinode #%llu: OCFS2_VALID_FL not set\n", 1424 (unsigned long long)bh->b_blocknr); 1425 goto bail; 1426 } 1427 1428 if (le32_to_cpu(di->i_fs_generation) != 1429 OCFS2_SB(sb)->fs_generation) { 1430 rc = ocfs2_error(sb, 1431 "Invalid dinode #%llu: fs_generation is %u\n", 1432 (unsigned long long)bh->b_blocknr, 1433 le32_to_cpu(di->i_fs_generation)); 1434 goto bail; 1435 } 1436 1437 rc = 0; 1438 1439 bail: 1440 return rc; 1441 } 1442 1443 static int ocfs2_filecheck_validate_inode_block(struct super_block *sb, 1444 struct buffer_head *bh) 1445 { 1446 int rc = 0; 1447 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 1448 1449 trace_ocfs2_filecheck_validate_inode_block( 1450 (unsigned long long)bh->b_blocknr); 1451 1452 BUG_ON(!buffer_uptodate(bh)); 1453 1454 /* 1455 * Call ocfs2_validate_meta_ecc() first since it has ecc repair 1456 * function, but we should not return error immediately when ecc 1457 * validation fails, because the reason is quite likely the invalid 1458 * inode number inputed. 1459 */ 1460 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check); 1461 if (rc) { 1462 mlog(ML_ERROR, 1463 "Filecheck: checksum failed for dinode %llu\n", 1464 (unsigned long long)bh->b_blocknr); 1465 rc = -OCFS2_FILECHECK_ERR_BLOCKECC; 1466 } 1467 1468 if (!OCFS2_IS_VALID_DINODE(di)) { 1469 mlog(ML_ERROR, 1470 "Filecheck: invalid dinode #%llu: signature = %.*s\n", 1471 (unsigned long long)bh->b_blocknr, 7, di->i_signature); 1472 rc = -OCFS2_FILECHECK_ERR_INVALIDINO; 1473 goto bail; 1474 } else if (rc) 1475 goto bail; 1476 1477 if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) { 1478 mlog(ML_ERROR, 1479 "Filecheck: invalid dinode #%llu: i_blkno is %llu\n", 1480 (unsigned long long)bh->b_blocknr, 1481 (unsigned long long)le64_to_cpu(di->i_blkno)); 1482 rc = -OCFS2_FILECHECK_ERR_BLOCKNO; 1483 goto bail; 1484 } 1485 1486 if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { 1487 mlog(ML_ERROR, 1488 "Filecheck: invalid dinode #%llu: OCFS2_VALID_FL " 1489 "not set\n", 1490 (unsigned long long)bh->b_blocknr); 1491 rc = -OCFS2_FILECHECK_ERR_VALIDFLAG; 1492 goto bail; 1493 } 1494 1495 if (le32_to_cpu(di->i_fs_generation) != 1496 OCFS2_SB(sb)->fs_generation) { 1497 mlog(ML_ERROR, 1498 "Filecheck: invalid dinode #%llu: fs_generation is %u\n", 1499 (unsigned long long)bh->b_blocknr, 1500 le32_to_cpu(di->i_fs_generation)); 1501 rc = -OCFS2_FILECHECK_ERR_GENERATION; 1502 goto bail; 1503 } 1504 1505 bail: 1506 return rc; 1507 } 1508 1509 static int ocfs2_filecheck_repair_inode_block(struct super_block *sb, 1510 struct buffer_head *bh) 1511 { 1512 int changed = 0; 1513 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 1514 1515 if (!ocfs2_filecheck_validate_inode_block(sb, bh)) 1516 return 0; 1517 1518 trace_ocfs2_filecheck_repair_inode_block( 1519 (unsigned long long)bh->b_blocknr); 1520 1521 if (ocfs2_is_hard_readonly(OCFS2_SB(sb)) || 1522 ocfs2_is_soft_readonly(OCFS2_SB(sb))) { 1523 mlog(ML_ERROR, 1524 "Filecheck: cannot repair dinode #%llu " 1525 "on readonly filesystem\n", 1526 (unsigned long long)bh->b_blocknr); 1527 return -OCFS2_FILECHECK_ERR_READONLY; 1528 } 1529 1530 if (buffer_jbd(bh)) { 1531 mlog(ML_ERROR, 1532 "Filecheck: cannot repair dinode #%llu, " 1533 "its buffer is in jbd\n", 1534 (unsigned long long)bh->b_blocknr); 1535 return -OCFS2_FILECHECK_ERR_INJBD; 1536 } 1537 1538 if (!OCFS2_IS_VALID_DINODE(di)) { 1539 /* Cannot fix invalid inode block */ 1540 return -OCFS2_FILECHECK_ERR_INVALIDINO; 1541 } 1542 1543 if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { 1544 /* Cannot just add VALID_FL flag back as a fix, 1545 * need more things to check here. 1546 */ 1547 return -OCFS2_FILECHECK_ERR_VALIDFLAG; 1548 } 1549 1550 if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) { 1551 di->i_blkno = cpu_to_le64(bh->b_blocknr); 1552 changed = 1; 1553 mlog(ML_ERROR, 1554 "Filecheck: reset dinode #%llu: i_blkno to %llu\n", 1555 (unsigned long long)bh->b_blocknr, 1556 (unsigned long long)le64_to_cpu(di->i_blkno)); 1557 } 1558 1559 if (le32_to_cpu(di->i_fs_generation) != 1560 OCFS2_SB(sb)->fs_generation) { 1561 di->i_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 1562 changed = 1; 1563 mlog(ML_ERROR, 1564 "Filecheck: reset dinode #%llu: fs_generation to %u\n", 1565 (unsigned long long)bh->b_blocknr, 1566 le32_to_cpu(di->i_fs_generation)); 1567 } 1568 1569 if (changed || ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check)) { 1570 ocfs2_compute_meta_ecc(sb, bh->b_data, &di->i_check); 1571 mark_buffer_dirty(bh); 1572 mlog(ML_ERROR, 1573 "Filecheck: reset dinode #%llu: compute meta ecc\n", 1574 (unsigned long long)bh->b_blocknr); 1575 } 1576 1577 return 0; 1578 } 1579 1580 static int 1581 ocfs2_filecheck_read_inode_block_full(struct inode *inode, 1582 struct buffer_head **bh, 1583 int flags, int type) 1584 { 1585 int rc; 1586 struct buffer_head *tmp = *bh; 1587 1588 if (!type) /* Check inode block */ 1589 rc = ocfs2_read_blocks(INODE_CACHE(inode), 1590 OCFS2_I(inode)->ip_blkno, 1591 1, &tmp, flags, 1592 ocfs2_filecheck_validate_inode_block); 1593 else /* Repair inode block */ 1594 rc = ocfs2_read_blocks(INODE_CACHE(inode), 1595 OCFS2_I(inode)->ip_blkno, 1596 1, &tmp, flags, 1597 ocfs2_filecheck_repair_inode_block); 1598 1599 /* If ocfs2_read_blocks() got us a new bh, pass it up. */ 1600 if (!rc && !*bh) 1601 *bh = tmp; 1602 1603 return rc; 1604 } 1605 1606 int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, 1607 int flags) 1608 { 1609 int rc; 1610 struct buffer_head *tmp = *bh; 1611 1612 rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno, 1613 1, &tmp, flags, ocfs2_validate_inode_block); 1614 1615 /* If ocfs2_read_blocks() got us a new bh, pass it up. */ 1616 if (!rc && !*bh) 1617 *bh = tmp; 1618 1619 return rc; 1620 } 1621 1622 int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh) 1623 { 1624 return ocfs2_read_inode_block_full(inode, bh, 0); 1625 } 1626 1627 1628 static u64 ocfs2_inode_cache_owner(struct ocfs2_caching_info *ci) 1629 { 1630 struct ocfs2_inode_info *oi = cache_info_to_inode(ci); 1631 1632 return oi->ip_blkno; 1633 } 1634 1635 static struct super_block *ocfs2_inode_cache_get_super(struct ocfs2_caching_info *ci) 1636 { 1637 struct ocfs2_inode_info *oi = cache_info_to_inode(ci); 1638 1639 return oi->vfs_inode.i_sb; 1640 } 1641 1642 static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci) 1643 { 1644 struct ocfs2_inode_info *oi = cache_info_to_inode(ci); 1645 1646 spin_lock(&oi->ip_lock); 1647 } 1648 1649 static void ocfs2_inode_cache_unlock(struct ocfs2_caching_info *ci) 1650 { 1651 struct ocfs2_inode_info *oi = cache_info_to_inode(ci); 1652 1653 spin_unlock(&oi->ip_lock); 1654 } 1655 1656 static void ocfs2_inode_cache_io_lock(struct ocfs2_caching_info *ci) 1657 { 1658 struct ocfs2_inode_info *oi = cache_info_to_inode(ci); 1659 1660 mutex_lock(&oi->ip_io_mutex); 1661 } 1662 1663 static void ocfs2_inode_cache_io_unlock(struct ocfs2_caching_info *ci) 1664 { 1665 struct ocfs2_inode_info *oi = cache_info_to_inode(ci); 1666 1667 mutex_unlock(&oi->ip_io_mutex); 1668 } 1669 1670 const struct ocfs2_caching_operations ocfs2_inode_caching_ops = { 1671 .co_owner = ocfs2_inode_cache_owner, 1672 .co_get_super = ocfs2_inode_cache_get_super, 1673 .co_cache_lock = ocfs2_inode_cache_lock, 1674 .co_cache_unlock = ocfs2_inode_cache_unlock, 1675 .co_io_lock = ocfs2_inode_cache_io_lock, 1676 .co_io_unlock = ocfs2_inode_cache_io_unlock, 1677 }; 1678 1679