1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * super.c 4 * 5 * load/unload driver, mount/dismount volumes 6 * 7 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 8 */ 9 10 #include <linux/module.h> 11 #include <linux/fs.h> 12 #include <linux/types.h> 13 #include <linux/slab.h> 14 #include <linux/highmem.h> 15 #include <linux/init.h> 16 #include <linux/random.h> 17 #include <linux/statfs.h> 18 #include <linux/moduleparam.h> 19 #include <linux/blkdev.h> 20 #include <linux/socket.h> 21 #include <linux/inet.h> 22 #include <linux/fs_parser.h> 23 #include <linux/fs_context.h> 24 #include <linux/crc32.h> 25 #include <linux/debugfs.h> 26 #include <linux/seq_file.h> 27 #include <linux/quotaops.h> 28 #include <linux/signal.h> 29 30 #define CREATE_TRACE_POINTS 31 #include "ocfs2_trace.h" 32 33 #include <cluster/masklog.h> 34 35 #include "ocfs2.h" 36 37 /* this should be the only file to include a version 1 header */ 38 #include "ocfs1_fs_compat.h" 39 40 #include "alloc.h" 41 #include "aops.h" 42 #include "blockcheck.h" 43 #include "dlmglue.h" 44 #include "export.h" 45 #include "extent_map.h" 46 #include "heartbeat.h" 47 #include "inode.h" 48 #include "journal.h" 49 #include "localalloc.h" 50 #include "namei.h" 51 #include "slot_map.h" 52 #include "super.h" 53 #include "sysfile.h" 54 #include "uptodate.h" 55 #include "xattr.h" 56 #include "quota.h" 57 #include "refcounttree.h" 58 #include "suballoc.h" 59 60 #include "buffer_head_io.h" 61 #include "filecheck.h" 62 63 static struct kmem_cache *ocfs2_inode_cachep; 64 struct kmem_cache *ocfs2_dquot_cachep; 65 struct kmem_cache *ocfs2_qf_chunk_cachep; 66 67 static struct dentry *ocfs2_debugfs_root; 68 69 MODULE_AUTHOR("Oracle"); 70 MODULE_LICENSE("GPL"); 71 MODULE_DESCRIPTION("OCFS2 cluster file system"); 72 73 struct mount_options 74 { 75 unsigned long commit_interval; 76 unsigned long mount_opt; 77 unsigned int atime_quantum; 78 unsigned short slot; 79 int localalloc_opt; 80 unsigned int resv_level; 81 int dir_resv_level; 82 char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; 83 bool user_stack; 84 }; 85 86 static int ocfs2_parse_param(struct fs_context *fc, struct fs_parameter *param); 87 static int ocfs2_check_set_options(struct super_block *sb, 88 struct mount_options *options); 89 static int ocfs2_show_options(struct seq_file *s, struct dentry *root); 90 static void ocfs2_put_super(struct super_block *sb); 91 static int ocfs2_mount_volume(struct super_block *sb); 92 static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err); 93 static int ocfs2_initialize_mem_caches(void); 94 static void ocfs2_free_mem_caches(void); 95 static void ocfs2_delete_osb(struct ocfs2_super *osb); 96 97 static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf); 98 99 static int ocfs2_sync_fs(struct super_block *sb, int wait); 100 101 static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); 102 static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); 103 static void ocfs2_release_system_inodes(struct ocfs2_super *osb); 104 static int ocfs2_check_volume(struct ocfs2_super *osb); 105 static int ocfs2_verify_volume(struct ocfs2_dinode *di, 106 struct buffer_head *bh, 107 u32 sectsize, 108 struct ocfs2_blockcheck_stats *stats); 109 static int ocfs2_initialize_super(struct super_block *sb, 110 struct buffer_head *bh, 111 int sector_size, 112 struct ocfs2_blockcheck_stats *stats); 113 static int ocfs2_get_sector(struct super_block *sb, 114 struct buffer_head **bh, 115 int block, 116 int sect_size); 117 static struct inode *ocfs2_alloc_inode(struct super_block *sb); 118 static void ocfs2_free_inode(struct inode *inode); 119 static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend); 120 static int ocfs2_enable_quotas(struct ocfs2_super *osb); 121 static void ocfs2_disable_quotas(struct ocfs2_super *osb); 122 123 static struct dquot __rcu **ocfs2_get_dquots(struct inode *inode) 124 { 125 return OCFS2_I(inode)->i_dquot; 126 } 127 128 static const struct super_operations ocfs2_sops = { 129 .statfs = ocfs2_statfs, 130 .alloc_inode = ocfs2_alloc_inode, 131 .free_inode = ocfs2_free_inode, 132 .drop_inode = ocfs2_drop_inode, 133 .evict_inode = ocfs2_evict_inode, 134 .sync_fs = ocfs2_sync_fs, 135 .put_super = ocfs2_put_super, 136 .show_options = ocfs2_show_options, 137 .quota_read = ocfs2_quota_read, 138 .quota_write = ocfs2_quota_write, 139 .get_dquots = ocfs2_get_dquots, 140 }; 141 142 enum { 143 Opt_barrier, 144 Opt_errors, 145 Opt_intr, 146 Opt_heartbeat, 147 Opt_data, 148 Opt_atime_quantum, 149 Opt_slot, 150 Opt_commit, 151 Opt_localalloc, 152 Opt_localflocks, 153 Opt_stack, 154 Opt_user_xattr, 155 Opt_inode64, 156 Opt_acl, 157 Opt_usrquota, 158 Opt_grpquota, 159 Opt_coherency, 160 Opt_resv_level, 161 Opt_dir_resv_level, 162 Opt_journal_async_commit, 163 }; 164 165 static const struct constant_table ocfs2_param_errors[] = { 166 {"panic", OCFS2_MOUNT_ERRORS_PANIC}, 167 {"remount-ro", OCFS2_MOUNT_ERRORS_ROFS}, 168 {"continue", OCFS2_MOUNT_ERRORS_CONT}, 169 {} 170 }; 171 172 static const struct constant_table ocfs2_param_heartbeat[] = { 173 {"local", OCFS2_MOUNT_HB_LOCAL}, 174 {"none", OCFS2_MOUNT_HB_NONE}, 175 {"global", OCFS2_MOUNT_HB_GLOBAL}, 176 {} 177 }; 178 179 static const struct constant_table ocfs2_param_data[] = { 180 {"writeback", OCFS2_MOUNT_DATA_WRITEBACK}, 181 {"ordered", 0}, 182 {} 183 }; 184 185 static const struct constant_table ocfs2_param_coherency[] = { 186 {"buffered", OCFS2_MOUNT_COHERENCY_BUFFERED}, 187 {"full", 0}, 188 {} 189 }; 190 191 static const struct fs_parameter_spec ocfs2_param_spec[] = { 192 fsparam_u32 ("barrier", Opt_barrier), 193 fsparam_enum ("errors", Opt_errors, ocfs2_param_errors), 194 fsparam_flag_no ("intr", Opt_intr), 195 fsparam_enum ("heartbeat", Opt_heartbeat, ocfs2_param_heartbeat), 196 fsparam_enum ("data", Opt_data, ocfs2_param_data), 197 fsparam_u32 ("atime_quantum", Opt_atime_quantum), 198 fsparam_u32 ("preferred_slot", Opt_slot), 199 fsparam_u32 ("commit", Opt_commit), 200 fsparam_s32 ("localalloc", Opt_localalloc), 201 fsparam_flag ("localflocks", Opt_localflocks), 202 fsparam_string ("cluster_stack", Opt_stack), 203 fsparam_flag_no ("user_xattr", Opt_user_xattr), 204 fsparam_flag ("inode64", Opt_inode64), 205 fsparam_flag_no ("acl", Opt_acl), 206 fsparam_flag ("usrquota", Opt_usrquota), 207 fsparam_flag ("grpquota", Opt_grpquota), 208 fsparam_enum ("coherency", Opt_coherency, ocfs2_param_coherency), 209 fsparam_u32 ("resv_level", Opt_resv_level), 210 fsparam_u32 ("dir_resv_level", Opt_dir_resv_level), 211 fsparam_flag ("journal_async_commit", Opt_journal_async_commit), 212 {} 213 }; 214 215 #ifdef CONFIG_DEBUG_FS 216 static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) 217 { 218 struct ocfs2_cluster_connection *cconn = osb->cconn; 219 struct ocfs2_recovery_map *rm = osb->recovery_map; 220 struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan; 221 int i, out = 0; 222 unsigned long flags; 223 224 out += scnprintf(buf + out, len - out, 225 "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", 226 "Device", osb->dev_str, osb->uuid_str, 227 osb->fs_generation, osb->vol_label); 228 229 out += scnprintf(buf + out, len - out, 230 "%10s => State: %d Flags: 0x%lX\n", "Volume", 231 atomic_read(&osb->vol_state), osb->osb_flags); 232 233 out += scnprintf(buf + out, len - out, 234 "%10s => Block: %lu Cluster: %d\n", "Sizes", 235 osb->sb->s_blocksize, osb->s_clustersize); 236 237 out += scnprintf(buf + out, len - out, 238 "%10s => Compat: 0x%X Incompat: 0x%X " 239 "ROcompat: 0x%X\n", 240 "Features", osb->s_feature_compat, 241 osb->s_feature_incompat, osb->s_feature_ro_compat); 242 243 out += scnprintf(buf + out, len - out, 244 "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount", 245 osb->s_mount_opt, osb->s_atime_quantum); 246 247 if (cconn) { 248 out += scnprintf(buf + out, len - out, 249 "%10s => Stack: %s Name: %*s " 250 "Version: %d.%d\n", "Cluster", 251 (*osb->osb_cluster_stack == '\0' ? 252 "o2cb" : osb->osb_cluster_stack), 253 cconn->cc_namelen, cconn->cc_name, 254 cconn->cc_version.pv_major, 255 cconn->cc_version.pv_minor); 256 } 257 258 spin_lock_irqsave(&osb->dc_task_lock, flags); 259 out += scnprintf(buf + out, len - out, 260 "%10s => Pid: %d Count: %lu WakeSeq: %lu " 261 "WorkSeq: %lu\n", "DownCnvt", 262 (osb->dc_task ? task_pid_nr(osb->dc_task) : -1), 263 osb->blocked_lock_count, osb->dc_wake_sequence, 264 osb->dc_work_sequence); 265 spin_unlock_irqrestore(&osb->dc_task_lock, flags); 266 267 spin_lock(&osb->osb_lock); 268 out += scnprintf(buf + out, len - out, "%10s => Pid: %d Nodes:", 269 "Recovery", 270 (osb->recovery_thread_task ? 271 task_pid_nr(osb->recovery_thread_task) : -1)); 272 if (rm->rm_used == 0) 273 out += scnprintf(buf + out, len - out, " None\n"); 274 else { 275 for (i = 0; i < rm->rm_used; i++) 276 out += scnprintf(buf + out, len - out, " %d", 277 rm->rm_entries[i]); 278 out += scnprintf(buf + out, len - out, "\n"); 279 } 280 spin_unlock(&osb->osb_lock); 281 282 out += scnprintf(buf + out, len - out, 283 "%10s => Pid: %d Interval: %lu\n", "Commit", 284 (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), 285 osb->osb_commit_interval); 286 287 out += scnprintf(buf + out, len - out, 288 "%10s => State: %d TxnId: %lu NumTxns: %d\n", 289 "Journal", osb->journal->j_state, 290 osb->journal->j_trans_id, 291 atomic_read(&osb->journal->j_num_trans)); 292 293 out += scnprintf(buf + out, len - out, 294 "%10s => GlobalAllocs: %d LocalAllocs: %d " 295 "SubAllocs: %d LAWinMoves: %d SAExtends: %d\n", 296 "Stats", 297 atomic_read(&osb->alloc_stats.bitmap_data), 298 atomic_read(&osb->alloc_stats.local_data), 299 atomic_read(&osb->alloc_stats.bg_allocs), 300 atomic_read(&osb->alloc_stats.moves), 301 atomic_read(&osb->alloc_stats.bg_extends)); 302 303 out += scnprintf(buf + out, len - out, 304 "%10s => State: %u Descriptor: %llu Size: %u bits " 305 "Default: %u bits\n", 306 "LocalAlloc", osb->local_alloc_state, 307 (unsigned long long)osb->la_last_gd, 308 osb->local_alloc_bits, osb->local_alloc_default_bits); 309 310 spin_lock(&osb->osb_lock); 311 out += scnprintf(buf + out, len - out, 312 "%10s => InodeSlot: %d StolenInodes: %d, " 313 "MetaSlot: %d StolenMeta: %d\n", "Steal", 314 osb->s_inode_steal_slot, 315 atomic_read(&osb->s_num_inodes_stolen), 316 osb->s_meta_steal_slot, 317 atomic_read(&osb->s_num_meta_stolen)); 318 spin_unlock(&osb->osb_lock); 319 320 out += scnprintf(buf + out, len - out, "OrphanScan => "); 321 out += scnprintf(buf + out, len - out, "Local: %u Global: %u ", 322 os->os_count, os->os_seqno); 323 out += scnprintf(buf + out, len - out, " Last Scan: "); 324 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) 325 out += scnprintf(buf + out, len - out, "Disabled\n"); 326 else 327 out += scnprintf(buf + out, len - out, "%lu seconds ago\n", 328 (unsigned long)(ktime_get_seconds() - os->os_scantime)); 329 330 out += scnprintf(buf + out, len - out, "%10s => %3s %10s\n", 331 "Slots", "Num", "RecoGen"); 332 for (i = 0; i < osb->max_slots; ++i) { 333 out += scnprintf(buf + out, len - out, 334 "%10s %c %3d %10d\n", 335 " ", 336 (i == osb->slot_num ? '*' : ' '), 337 i, osb->slot_recovery_generations[i]); 338 } 339 340 return out; 341 } 342 343 static int ocfs2_osb_debug_open(struct inode *inode, struct file *file) 344 { 345 struct ocfs2_super *osb = inode->i_private; 346 char *buf = NULL; 347 348 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 349 if (!buf) 350 goto bail; 351 352 i_size_write(inode, ocfs2_osb_dump(osb, buf, PAGE_SIZE)); 353 354 file->private_data = buf; 355 356 return 0; 357 bail: 358 return -ENOMEM; 359 } 360 361 static int ocfs2_debug_release(struct inode *inode, struct file *file) 362 { 363 kfree(file->private_data); 364 return 0; 365 } 366 367 static ssize_t ocfs2_debug_read(struct file *file, char __user *buf, 368 size_t nbytes, loff_t *ppos) 369 { 370 return simple_read_from_buffer(buf, nbytes, ppos, file->private_data, 371 i_size_read(file->f_mapping->host)); 372 } 373 #else 374 static int ocfs2_osb_debug_open(struct inode *inode, struct file *file) 375 { 376 return 0; 377 } 378 static int ocfs2_debug_release(struct inode *inode, struct file *file) 379 { 380 return 0; 381 } 382 static ssize_t ocfs2_debug_read(struct file *file, char __user *buf, 383 size_t nbytes, loff_t *ppos) 384 { 385 return 0; 386 } 387 #endif /* CONFIG_DEBUG_FS */ 388 389 static const struct file_operations ocfs2_osb_debug_fops = { 390 .open = ocfs2_osb_debug_open, 391 .release = ocfs2_debug_release, 392 .read = ocfs2_debug_read, 393 .llseek = generic_file_llseek, 394 }; 395 396 static int ocfs2_sync_fs(struct super_block *sb, int wait) 397 { 398 int status; 399 tid_t target; 400 struct ocfs2_super *osb = OCFS2_SB(sb); 401 402 if (ocfs2_is_hard_readonly(osb)) 403 return -EROFS; 404 405 if (wait) { 406 status = ocfs2_flush_truncate_log(osb); 407 if (status < 0) 408 mlog_errno(status); 409 } else { 410 ocfs2_schedule_truncate_log_flush(osb, 0); 411 } 412 413 if (jbd2_journal_start_commit(osb->journal->j_journal, 414 &target)) { 415 if (wait) 416 jbd2_log_wait_commit(osb->journal->j_journal, 417 target); 418 } 419 return 0; 420 } 421 422 static int ocfs2_need_system_inode(struct ocfs2_super *osb, int ino) 423 { 424 if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA) 425 && (ino == USER_QUOTA_SYSTEM_INODE 426 || ino == LOCAL_USER_QUOTA_SYSTEM_INODE)) 427 return 0; 428 if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) 429 && (ino == GROUP_QUOTA_SYSTEM_INODE 430 || ino == LOCAL_GROUP_QUOTA_SYSTEM_INODE)) 431 return 0; 432 return 1; 433 } 434 435 static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) 436 { 437 struct inode *new = NULL; 438 int status = 0; 439 int i; 440 441 new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0); 442 if (IS_ERR(new)) { 443 status = PTR_ERR(new); 444 mlog_errno(status); 445 goto bail; 446 } 447 osb->root_inode = new; 448 449 new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE, 0); 450 if (IS_ERR(new)) { 451 status = PTR_ERR(new); 452 mlog_errno(status); 453 goto bail; 454 } 455 osb->sys_root_inode = new; 456 457 for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE; 458 i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) { 459 if (!ocfs2_need_system_inode(osb, i)) 460 continue; 461 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); 462 if (!new) { 463 ocfs2_release_system_inodes(osb); 464 status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL; 465 mlog_errno(status); 466 mlog(ML_ERROR, "Unable to load system inode %d, " 467 "possibly corrupt fs?", i); 468 goto bail; 469 } 470 // the array now has one ref, so drop this one 471 iput(new); 472 } 473 474 bail: 475 if (status) 476 mlog_errno(status); 477 return status; 478 } 479 480 static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb) 481 { 482 struct inode *new = NULL; 483 int status = 0; 484 int i; 485 486 for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; 487 i < NUM_SYSTEM_INODES; 488 i++) { 489 if (!ocfs2_need_system_inode(osb, i)) 490 continue; 491 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); 492 if (!new) { 493 ocfs2_release_system_inodes(osb); 494 status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL; 495 mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n", 496 status, i, osb->slot_num); 497 goto bail; 498 } 499 /* the array now has one ref, so drop this one */ 500 iput(new); 501 } 502 503 bail: 504 if (status) 505 mlog_errno(status); 506 return status; 507 } 508 509 static void ocfs2_release_system_inodes(struct ocfs2_super *osb) 510 { 511 int i; 512 struct inode *inode; 513 514 for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) { 515 inode = osb->global_system_inodes[i]; 516 if (inode) { 517 iput(inode); 518 osb->global_system_inodes[i] = NULL; 519 } 520 } 521 522 inode = osb->sys_root_inode; 523 if (inode) { 524 iput(inode); 525 osb->sys_root_inode = NULL; 526 } 527 528 inode = osb->root_inode; 529 if (inode) { 530 iput(inode); 531 osb->root_inode = NULL; 532 } 533 534 if (!osb->local_system_inodes) 535 return; 536 537 for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) { 538 if (osb->local_system_inodes[i]) { 539 iput(osb->local_system_inodes[i]); 540 osb->local_system_inodes[i] = NULL; 541 } 542 } 543 544 kfree(osb->local_system_inodes); 545 osb->local_system_inodes = NULL; 546 } 547 548 /* We're allocating fs objects, use GFP_NOFS */ 549 static struct inode *ocfs2_alloc_inode(struct super_block *sb) 550 { 551 struct ocfs2_inode_info *oi; 552 553 oi = alloc_inode_sb(sb, ocfs2_inode_cachep, GFP_NOFS); 554 if (!oi) 555 return NULL; 556 557 oi->i_sync_tid = 0; 558 oi->i_datasync_tid = 0; 559 memset(&oi->i_dquot, 0, sizeof(oi->i_dquot)); 560 561 jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); 562 return &oi->vfs_inode; 563 } 564 565 static void ocfs2_free_inode(struct inode *inode) 566 { 567 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); 568 } 569 570 static unsigned long long ocfs2_max_file_offset(unsigned int bbits, 571 unsigned int cbits) 572 { 573 unsigned int bytes = 1 << cbits; 574 unsigned int trim = bytes; 575 unsigned int bitshift = 32; 576 577 /* 578 * i_size and all block offsets in ocfs2 are always 64 bits 579 * wide. i_clusters is 32 bits, in cluster-sized units. So on 580 * 64 bit platforms, cluster size will be the limiting factor. 581 */ 582 583 #if BITS_PER_LONG == 32 584 BUILD_BUG_ON(sizeof(sector_t) != 8); 585 /* 586 * We might be limited by page cache size. 587 */ 588 if (bytes > PAGE_SIZE) { 589 bytes = PAGE_SIZE; 590 trim = 1; 591 /* 592 * Shift by 31 here so that we don't get larger than 593 * MAX_LFS_FILESIZE 594 */ 595 bitshift = 31; 596 } 597 #endif 598 599 /* 600 * Trim by a whole cluster when we can actually approach the 601 * on-disk limits. Otherwise we can overflow i_clusters when 602 * an extent start is at the max offset. 603 */ 604 return (((unsigned long long)bytes) << bitshift) - trim; 605 } 606 607 static int ocfs2_reconfigure(struct fs_context *fc) 608 { 609 int incompat_features; 610 int ret = 0; 611 struct mount_options *parsed_options = fc->fs_private; 612 struct super_block *sb = fc->root->d_sb; 613 struct ocfs2_super *osb = OCFS2_SB(sb); 614 u32 tmp; 615 616 sync_filesystem(sb); 617 618 if (!ocfs2_check_set_options(sb, parsed_options)) { 619 ret = -EINVAL; 620 goto out; 621 } 622 623 tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | 624 OCFS2_MOUNT_HB_NONE; 625 if ((osb->s_mount_opt & tmp) != (parsed_options->mount_opt & tmp)) { 626 ret = -EINVAL; 627 mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); 628 goto out; 629 } 630 631 if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) != 632 (parsed_options->mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) { 633 ret = -EINVAL; 634 mlog(ML_ERROR, "Cannot change data mode on remount\n"); 635 goto out; 636 } 637 638 /* Probably don't want this on remount; it might 639 * mess with other nodes */ 640 if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64) && 641 (parsed_options->mount_opt & OCFS2_MOUNT_INODE64)) { 642 ret = -EINVAL; 643 mlog(ML_ERROR, "Cannot enable inode64 on remount\n"); 644 goto out; 645 } 646 647 /* We're going to/from readonly mode. */ 648 if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) { 649 /* Disable quota accounting before remounting RO */ 650 if (fc->sb_flags & SB_RDONLY) { 651 ret = ocfs2_susp_quotas(osb, 0); 652 if (ret < 0) 653 goto out; 654 } 655 /* Lock here so the check of HARD_RO and the potential 656 * setting of SOFT_RO is atomic. */ 657 spin_lock(&osb->osb_lock); 658 if (osb->osb_flags & OCFS2_OSB_HARD_RO) { 659 mlog(ML_ERROR, "Remount on readonly device is forbidden.\n"); 660 ret = -EROFS; 661 goto unlock_osb; 662 } 663 664 if (fc->sb_flags & SB_RDONLY) { 665 sb->s_flags |= SB_RDONLY; 666 osb->osb_flags |= OCFS2_OSB_SOFT_RO; 667 } else { 668 if (osb->osb_flags & OCFS2_OSB_ERROR_FS) { 669 mlog(ML_ERROR, "Cannot remount RDWR " 670 "filesystem due to previous errors.\n"); 671 ret = -EROFS; 672 goto unlock_osb; 673 } 674 incompat_features = OCFS2_HAS_RO_COMPAT_FEATURE(sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP); 675 if (incompat_features) { 676 mlog(ML_ERROR, "Cannot remount RDWR because " 677 "of unsupported optional features " 678 "(%x).\n", incompat_features); 679 ret = -EINVAL; 680 goto unlock_osb; 681 } 682 sb->s_flags &= ~SB_RDONLY; 683 osb->osb_flags &= ~OCFS2_OSB_SOFT_RO; 684 } 685 trace_ocfs2_remount(sb->s_flags, osb->osb_flags, fc->sb_flags); 686 unlock_osb: 687 spin_unlock(&osb->osb_lock); 688 /* Enable quota accounting after remounting RW */ 689 if (!ret && !(fc->sb_flags & SB_RDONLY)) { 690 if (sb_any_quota_suspended(sb)) 691 ret = ocfs2_susp_quotas(osb, 1); 692 else 693 ret = ocfs2_enable_quotas(osb); 694 if (ret < 0) { 695 /* Return back changes... */ 696 spin_lock(&osb->osb_lock); 697 sb->s_flags |= SB_RDONLY; 698 osb->osb_flags |= OCFS2_OSB_SOFT_RO; 699 spin_unlock(&osb->osb_lock); 700 goto out; 701 } 702 } 703 } 704 705 if (!ret) { 706 /* Only save off the new mount options in case of a successful 707 * remount. */ 708 osb->s_mount_opt = parsed_options->mount_opt; 709 osb->s_atime_quantum = parsed_options->atime_quantum; 710 osb->preferred_slot = parsed_options->slot; 711 if (parsed_options->commit_interval) 712 osb->osb_commit_interval = parsed_options->commit_interval; 713 714 if (!ocfs2_is_hard_readonly(osb)) 715 ocfs2_set_journal_params(osb); 716 717 sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | 718 ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? 719 SB_POSIXACL : 0); 720 } 721 out: 722 return ret; 723 } 724 725 static int ocfs2_sb_probe(struct super_block *sb, 726 struct buffer_head **bh, 727 int *sector_size, 728 struct ocfs2_blockcheck_stats *stats) 729 { 730 int status, tmpstat; 731 struct ocfs1_vol_disk_hdr *hdr; 732 struct ocfs2_dinode *di; 733 int blksize; 734 735 *bh = NULL; 736 737 /* may be > 512 */ 738 *sector_size = bdev_logical_block_size(sb->s_bdev); 739 if (*sector_size > OCFS2_MAX_BLOCKSIZE) { 740 mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n", 741 *sector_size, OCFS2_MAX_BLOCKSIZE); 742 status = -EINVAL; 743 goto bail; 744 } 745 746 /* Can this really happen? */ 747 if (*sector_size < OCFS2_MIN_BLOCKSIZE) 748 *sector_size = OCFS2_MIN_BLOCKSIZE; 749 750 /* check block zero for old format */ 751 status = ocfs2_get_sector(sb, bh, 0, *sector_size); 752 if (status < 0) { 753 mlog_errno(status); 754 goto bail; 755 } 756 hdr = (struct ocfs1_vol_disk_hdr *) (*bh)->b_data; 757 if (hdr->major_version == OCFS1_MAJOR_VERSION) { 758 mlog(ML_ERROR, "incompatible version: %u.%u\n", 759 hdr->major_version, hdr->minor_version); 760 status = -EINVAL; 761 } 762 if (memcmp(hdr->signature, OCFS1_VOLUME_SIGNATURE, 763 strlen(OCFS1_VOLUME_SIGNATURE)) == 0) { 764 mlog(ML_ERROR, "incompatible volume signature: %8s\n", 765 hdr->signature); 766 status = -EINVAL; 767 } 768 brelse(*bh); 769 *bh = NULL; 770 if (status < 0) { 771 mlog(ML_ERROR, "This is an ocfs v1 filesystem which must be " 772 "upgraded before mounting with ocfs v2\n"); 773 goto bail; 774 } 775 776 /* 777 * Now check at magic offset for 512, 1024, 2048, 4096 778 * blocksizes. 4096 is the maximum blocksize because it is 779 * the minimum clustersize. 780 */ 781 status = -EINVAL; 782 for (blksize = *sector_size; 783 blksize <= OCFS2_MAX_BLOCKSIZE; 784 blksize <<= 1) { 785 tmpstat = ocfs2_get_sector(sb, bh, 786 OCFS2_SUPER_BLOCK_BLKNO, 787 blksize); 788 if (tmpstat < 0) { 789 status = tmpstat; 790 mlog_errno(status); 791 break; 792 } 793 di = (struct ocfs2_dinode *) (*bh)->b_data; 794 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); 795 spin_lock_init(&stats->b_lock); 796 tmpstat = ocfs2_verify_volume(di, *bh, blksize, stats); 797 if (tmpstat < 0) { 798 brelse(*bh); 799 *bh = NULL; 800 } 801 if (tmpstat != -EAGAIN) { 802 status = tmpstat; 803 break; 804 } 805 } 806 807 bail: 808 return status; 809 } 810 811 static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) 812 { 813 u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL; 814 815 if (osb->s_mount_opt & hb_enabled) { 816 if (ocfs2_mount_local(osb)) { 817 mlog(ML_ERROR, "Cannot heartbeat on a locally " 818 "mounted device.\n"); 819 return -EINVAL; 820 } 821 if (ocfs2_userspace_stack(osb)) { 822 mlog(ML_ERROR, "Userspace stack expected, but " 823 "o2cb heartbeat arguments passed to mount\n"); 824 return -EINVAL; 825 } 826 if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) && 827 !ocfs2_cluster_o2cb_global_heartbeat(osb)) || 828 ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) && 829 ocfs2_cluster_o2cb_global_heartbeat(osb))) { 830 mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n"); 831 return -EINVAL; 832 } 833 } 834 835 if (!(osb->s_mount_opt & hb_enabled)) { 836 if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && 837 !ocfs2_userspace_stack(osb)) { 838 mlog(ML_ERROR, "Heartbeat has to be started to mount " 839 "a read-write clustered device.\n"); 840 return -EINVAL; 841 } 842 } 843 844 return 0; 845 } 846 847 /* 848 * If we're using a userspace stack, mount should have passed 849 * a name that matches the disk. If not, mount should not 850 * have passed a stack. 851 */ 852 static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, 853 struct mount_options *mopt) 854 { 855 if (!ocfs2_userspace_stack(osb) && mopt->cluster_stack[0]) { 856 mlog(ML_ERROR, 857 "cluster stack passed to mount, but this filesystem " 858 "does not support it\n"); 859 return -EINVAL; 860 } 861 862 if (ocfs2_userspace_stack(osb) && 863 strncmp(osb->osb_cluster_stack, mopt->cluster_stack, 864 OCFS2_STACK_LABEL_LEN)) { 865 mlog(ML_ERROR, 866 "cluster stack passed to mount (\"%s\") does not " 867 "match the filesystem (\"%s\")\n", 868 mopt->cluster_stack, 869 osb->osb_cluster_stack); 870 return -EINVAL; 871 } 872 873 return 0; 874 } 875 876 static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend) 877 { 878 int type; 879 struct super_block *sb = osb->sb; 880 unsigned int feature[OCFS2_MAXQUOTAS] = { 881 OCFS2_FEATURE_RO_COMPAT_USRQUOTA, 882 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; 883 int status = 0; 884 885 for (type = 0; type < OCFS2_MAXQUOTAS; type++) { 886 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) 887 continue; 888 if (unsuspend) 889 status = dquot_resume(sb, type); 890 else { 891 struct ocfs2_mem_dqinfo *oinfo; 892 893 /* Cancel periodic syncing before suspending */ 894 oinfo = sb_dqinfo(sb, type)->dqi_priv; 895 cancel_delayed_work_sync(&oinfo->dqi_sync_work); 896 status = dquot_suspend(sb, type); 897 } 898 if (status < 0) 899 break; 900 } 901 if (status < 0) 902 mlog(ML_ERROR, "Failed to suspend/unsuspend quotas on " 903 "remount (error = %d).\n", status); 904 return status; 905 } 906 907 static int ocfs2_enable_quotas(struct ocfs2_super *osb) 908 { 909 struct inode *inode[OCFS2_MAXQUOTAS] = { NULL, NULL }; 910 struct super_block *sb = osb->sb; 911 unsigned int feature[OCFS2_MAXQUOTAS] = { 912 OCFS2_FEATURE_RO_COMPAT_USRQUOTA, 913 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; 914 unsigned int ino[OCFS2_MAXQUOTAS] = { 915 LOCAL_USER_QUOTA_SYSTEM_INODE, 916 LOCAL_GROUP_QUOTA_SYSTEM_INODE }; 917 int status; 918 int type; 919 920 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE; 921 for (type = 0; type < OCFS2_MAXQUOTAS; type++) { 922 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) 923 continue; 924 inode[type] = ocfs2_get_system_file_inode(osb, ino[type], 925 osb->slot_num); 926 if (!inode[type]) { 927 status = -ENOENT; 928 goto out_quota_off; 929 } 930 status = dquot_load_quota_inode(inode[type], type, QFMT_OCFS2, 931 DQUOT_USAGE_ENABLED); 932 if (status < 0) 933 goto out_quota_off; 934 } 935 936 for (type = 0; type < OCFS2_MAXQUOTAS; type++) 937 iput(inode[type]); 938 return 0; 939 out_quota_off: 940 ocfs2_disable_quotas(osb); 941 for (type = 0; type < OCFS2_MAXQUOTAS; type++) 942 iput(inode[type]); 943 mlog_errno(status); 944 return status; 945 } 946 947 static void ocfs2_disable_quotas(struct ocfs2_super *osb) 948 { 949 int type; 950 struct inode *inode; 951 struct super_block *sb = osb->sb; 952 struct ocfs2_mem_dqinfo *oinfo; 953 954 /* We mostly ignore errors in this function because there's not much 955 * we can do when we see them */ 956 for (type = 0; type < OCFS2_MAXQUOTAS; type++) { 957 if (!sb_has_quota_loaded(sb, type)) 958 continue; 959 if (!sb_has_quota_suspended(sb, type)) { 960 oinfo = sb_dqinfo(sb, type)->dqi_priv; 961 cancel_delayed_work_sync(&oinfo->dqi_sync_work); 962 } 963 inode = igrab(sb->s_dquot.files[type]); 964 /* Turn off quotas. This will remove all dquot structures from 965 * memory and so they will be automatically synced to global 966 * quota files */ 967 dquot_disable(sb, type, DQUOT_USAGE_ENABLED | 968 DQUOT_LIMITS_ENABLED); 969 iput(inode); 970 } 971 } 972 973 static int ocfs2_fill_super(struct super_block *sb, struct fs_context *fc) 974 { 975 struct dentry *root; 976 int status, sector_size; 977 struct mount_options *parsed_options = fc->fs_private; 978 struct inode *inode = NULL; 979 struct ocfs2_super *osb = NULL; 980 struct buffer_head *bh = NULL; 981 char nodestr[12]; 982 struct ocfs2_blockcheck_stats stats; 983 984 trace_ocfs2_fill_super(sb, fc, fc->sb_flags & SB_SILENT); 985 986 /* probe for superblock */ 987 status = ocfs2_sb_probe(sb, &bh, §or_size, &stats); 988 if (status < 0) { 989 mlog(ML_ERROR, "superblock probe failed!\n"); 990 goto out; 991 } 992 993 status = ocfs2_initialize_super(sb, bh, sector_size, &stats); 994 brelse(bh); 995 bh = NULL; 996 if (status < 0) 997 goto out; 998 999 osb = OCFS2_SB(sb); 1000 1001 if (!ocfs2_check_set_options(sb, parsed_options)) { 1002 status = -EINVAL; 1003 goto out_super; 1004 } 1005 osb->s_mount_opt = parsed_options->mount_opt; 1006 osb->s_atime_quantum = parsed_options->atime_quantum; 1007 osb->preferred_slot = parsed_options->slot; 1008 osb->osb_commit_interval = parsed_options->commit_interval; 1009 1010 ocfs2_la_set_sizes(osb, parsed_options->localalloc_opt); 1011 osb->osb_resv_level = parsed_options->resv_level; 1012 osb->osb_dir_resv_level = parsed_options->resv_level; 1013 if (parsed_options->dir_resv_level == -1) 1014 osb->osb_dir_resv_level = parsed_options->resv_level; 1015 else 1016 osb->osb_dir_resv_level = parsed_options->dir_resv_level; 1017 1018 status = ocfs2_verify_userspace_stack(osb, parsed_options); 1019 if (status) 1020 goto out_super; 1021 1022 sb->s_magic = OCFS2_SUPER_MAGIC; 1023 1024 sb->s_flags = (sb->s_flags & ~(SB_POSIXACL | SB_NOSEC)) | 1025 ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0); 1026 1027 /* Hard readonly mode only if: bdev_read_only, SB_RDONLY, 1028 * heartbeat=none */ 1029 if (bdev_read_only(sb->s_bdev)) { 1030 if (!sb_rdonly(sb)) { 1031 status = -EACCES; 1032 mlog(ML_ERROR, "Readonly device detected but readonly " 1033 "mount was not specified.\n"); 1034 goto out_super; 1035 } 1036 1037 /* You should not be able to start a local heartbeat 1038 * on a readonly device. */ 1039 if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { 1040 status = -EROFS; 1041 mlog(ML_ERROR, "Local heartbeat specified on readonly " 1042 "device.\n"); 1043 goto out_super; 1044 } 1045 1046 status = ocfs2_check_journals_nolocks(osb); 1047 if (status < 0) { 1048 if (status == -EROFS) 1049 mlog(ML_ERROR, "Recovery required on readonly " 1050 "file system, but write access is " 1051 "unavailable.\n"); 1052 goto out_super; 1053 } 1054 1055 ocfs2_set_ro_flag(osb, 1); 1056 1057 printk(KERN_NOTICE "ocfs2: Readonly device (%s) detected. " 1058 "Cluster services will not be used for this mount. " 1059 "Recovery will be skipped.\n", osb->dev_str); 1060 } 1061 1062 if (!ocfs2_is_hard_readonly(osb)) { 1063 if (sb_rdonly(sb)) 1064 ocfs2_set_ro_flag(osb, 0); 1065 } 1066 1067 status = ocfs2_verify_heartbeat(osb); 1068 if (status < 0) 1069 goto out_super; 1070 1071 osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, 1072 ocfs2_debugfs_root); 1073 1074 debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root, 1075 osb, &ocfs2_osb_debug_fops); 1076 1077 if (ocfs2_meta_ecc(osb)) { 1078 ocfs2_initialize_journal_triggers(sb, osb->s_journal_triggers); 1079 ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats, 1080 osb->osb_debug_root); 1081 } 1082 1083 status = ocfs2_mount_volume(sb); 1084 if (status < 0) 1085 goto out_debugfs; 1086 1087 if (osb->root_inode) 1088 inode = igrab(osb->root_inode); 1089 1090 if (!inode) { 1091 status = -EIO; 1092 goto out_dismount; 1093 } 1094 1095 osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL, 1096 &ocfs2_kset->kobj); 1097 if (!osb->osb_dev_kset) { 1098 status = -ENOMEM; 1099 mlog(ML_ERROR, "Unable to create device kset %s.\n", sb->s_id); 1100 goto out_dismount; 1101 } 1102 1103 /* Create filecheck sysfs related directories/files at 1104 * /sys/fs/ocfs2/<devname>/filecheck */ 1105 if (ocfs2_filecheck_create_sysfs(osb)) { 1106 status = -ENOMEM; 1107 mlog(ML_ERROR, "Unable to create filecheck sysfs directory at " 1108 "/sys/fs/ocfs2/%s/filecheck.\n", sb->s_id); 1109 goto out_dismount; 1110 } 1111 1112 root = d_make_root(inode); 1113 if (!root) { 1114 status = -ENOMEM; 1115 goto out_dismount; 1116 } 1117 1118 sb->s_root = root; 1119 1120 ocfs2_complete_mount_recovery(osb); 1121 1122 if (ocfs2_mount_local(osb)) 1123 snprintf(nodestr, sizeof(nodestr), "local"); 1124 else 1125 snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num); 1126 1127 printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " 1128 "with %s data mode.\n", 1129 osb->dev_str, nodestr, osb->slot_num, 1130 osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : 1131 "ordered"); 1132 1133 atomic_set(&osb->vol_state, VOLUME_MOUNTED); 1134 wake_up(&osb->osb_mount_event); 1135 1136 /* Now we can initialize quotas because we can afford to wait 1137 * for cluster locks recovery now. That also means that truncation 1138 * log recovery can happen but that waits for proper quota setup */ 1139 if (!sb_rdonly(sb)) { 1140 status = ocfs2_enable_quotas(osb); 1141 if (status < 0) { 1142 /* We have to err-out specially here because 1143 * s_root is already set */ 1144 mlog_errno(status); 1145 atomic_set(&osb->vol_state, VOLUME_DISABLED); 1146 wake_up(&osb->osb_mount_event); 1147 return status; 1148 } 1149 } 1150 1151 ocfs2_complete_quota_recovery(osb); 1152 1153 /* Now we wake up again for processes waiting for quotas */ 1154 atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS); 1155 wake_up(&osb->osb_mount_event); 1156 1157 /* Start this when the mount is almost sure of being successful */ 1158 ocfs2_orphan_scan_start(osb); 1159 1160 return status; 1161 1162 out_dismount: 1163 atomic_set(&osb->vol_state, VOLUME_DISABLED); 1164 wake_up(&osb->osb_mount_event); 1165 ocfs2_free_replay_slots(osb); 1166 ocfs2_dismount_volume(sb, 1); 1167 goto out; 1168 1169 out_debugfs: 1170 debugfs_remove_recursive(osb->osb_debug_root); 1171 out_super: 1172 ocfs2_release_system_inodes(osb); 1173 kfree(osb->recovery_map); 1174 ocfs2_delete_osb(osb); 1175 kfree(osb); 1176 out: 1177 mlog_errno(status); 1178 1179 return status; 1180 } 1181 1182 static int ocfs2_get_tree(struct fs_context *fc) 1183 { 1184 return get_tree_bdev(fc, ocfs2_fill_super); 1185 } 1186 1187 static void ocfs2_free_fc(struct fs_context *fc) 1188 { 1189 kfree(fc->fs_private); 1190 } 1191 1192 static const struct fs_context_operations ocfs2_context_ops = { 1193 .parse_param = ocfs2_parse_param, 1194 .get_tree = ocfs2_get_tree, 1195 .reconfigure = ocfs2_reconfigure, 1196 .free = ocfs2_free_fc, 1197 }; 1198 1199 static int ocfs2_init_fs_context(struct fs_context *fc) 1200 { 1201 struct mount_options *mopt; 1202 1203 mopt = kzalloc(sizeof(struct mount_options), GFP_KERNEL); 1204 if (!mopt) 1205 return -EINVAL; 1206 1207 mopt->commit_interval = 0; 1208 mopt->mount_opt = OCFS2_MOUNT_NOINTR; 1209 mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; 1210 mopt->slot = OCFS2_INVALID_SLOT; 1211 mopt->localalloc_opt = -1; 1212 mopt->cluster_stack[0] = '\0'; 1213 mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL; 1214 mopt->dir_resv_level = -1; 1215 1216 fc->fs_private = mopt; 1217 fc->ops = &ocfs2_context_ops; 1218 1219 return 0; 1220 } 1221 1222 static struct file_system_type ocfs2_fs_type = { 1223 .owner = THIS_MODULE, 1224 .name = "ocfs2", 1225 .kill_sb = kill_block_super, 1226 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1227 .next = NULL, 1228 .init_fs_context = ocfs2_init_fs_context, 1229 .parameters = ocfs2_param_spec, 1230 }; 1231 MODULE_ALIAS_FS("ocfs2"); 1232 1233 static int ocfs2_check_set_options(struct super_block *sb, 1234 struct mount_options *options) 1235 { 1236 if (options->user_stack == 0) { 1237 u32 tmp; 1238 1239 /* Ensure only one heartbeat mode */ 1240 tmp = options->mount_opt & (OCFS2_MOUNT_HB_LOCAL | 1241 OCFS2_MOUNT_HB_GLOBAL | 1242 OCFS2_MOUNT_HB_NONE); 1243 if (hweight32(tmp) != 1) { 1244 mlog(ML_ERROR, "Invalid heartbeat mount options\n"); 1245 return 0; 1246 } 1247 } 1248 if (options->mount_opt & OCFS2_MOUNT_USRQUOTA && 1249 !OCFS2_HAS_RO_COMPAT_FEATURE(sb, 1250 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { 1251 mlog(ML_ERROR, "User quotas were requested, but this " 1252 "filesystem does not have the feature enabled.\n"); 1253 return 0; 1254 } 1255 if (options->mount_opt & OCFS2_MOUNT_GRPQUOTA && 1256 !OCFS2_HAS_RO_COMPAT_FEATURE(sb, 1257 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { 1258 mlog(ML_ERROR, "Group quotas were requested, but this " 1259 "filesystem does not have the feature enabled.\n"); 1260 return 0; 1261 } 1262 if (options->mount_opt & OCFS2_MOUNT_POSIX_ACL && 1263 !OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) { 1264 mlog(ML_ERROR, "ACL support requested but extended attributes " 1265 "feature is not enabled\n"); 1266 return 0; 1267 } 1268 /* No ACL setting specified? Use XATTR feature... */ 1269 if (!(options->mount_opt & (OCFS2_MOUNT_POSIX_ACL | 1270 OCFS2_MOUNT_NO_POSIX_ACL))) { 1271 if (OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) 1272 options->mount_opt |= OCFS2_MOUNT_POSIX_ACL; 1273 else 1274 options->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; 1275 } 1276 return 1; 1277 } 1278 1279 static int ocfs2_parse_param(struct fs_context *fc, struct fs_parameter *param) 1280 { 1281 struct fs_parse_result result; 1282 int opt; 1283 struct mount_options *mopt = fc->fs_private; 1284 bool is_remount = (fc->purpose & FS_CONTEXT_FOR_RECONFIGURE); 1285 1286 trace_ocfs2_parse_options(is_remount, param->key); 1287 1288 opt = fs_parse(fc, ocfs2_param_spec, param, &result); 1289 if (opt < 0) 1290 return opt; 1291 1292 switch (opt) { 1293 case Opt_heartbeat: 1294 mopt->mount_opt |= result.uint_32; 1295 break; 1296 case Opt_barrier: 1297 if (result.uint_32) 1298 mopt->mount_opt |= OCFS2_MOUNT_BARRIER; 1299 else 1300 mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER; 1301 break; 1302 case Opt_intr: 1303 if (result.negated) 1304 mopt->mount_opt |= OCFS2_MOUNT_NOINTR; 1305 else 1306 mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR; 1307 break; 1308 case Opt_errors: 1309 mopt->mount_opt &= ~(OCFS2_MOUNT_ERRORS_CONT | 1310 OCFS2_MOUNT_ERRORS_ROFS | 1311 OCFS2_MOUNT_ERRORS_PANIC); 1312 mopt->mount_opt |= result.uint_32; 1313 break; 1314 case Opt_data: 1315 mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK; 1316 mopt->mount_opt |= result.uint_32; 1317 break; 1318 case Opt_user_xattr: 1319 if (result.negated) 1320 mopt->mount_opt |= OCFS2_MOUNT_NOUSERXATTR; 1321 else 1322 mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR; 1323 break; 1324 case Opt_atime_quantum: 1325 mopt->atime_quantum = result.uint_32; 1326 break; 1327 case Opt_slot: 1328 if (result.uint_32) 1329 mopt->slot = (u16)result.uint_32; 1330 break; 1331 case Opt_commit: 1332 if (result.uint_32 == 0) 1333 mopt->commit_interval = HZ * JBD2_DEFAULT_MAX_COMMIT_AGE; 1334 else 1335 mopt->commit_interval = HZ * result.uint_32; 1336 break; 1337 case Opt_localalloc: 1338 if (result.int_32 >= 0) 1339 mopt->localalloc_opt = result.int_32; 1340 break; 1341 case Opt_localflocks: 1342 /* 1343 * Changing this during remount could race flock() requests, or 1344 * "unbalance" existing ones (e.g., a lock is taken in one mode 1345 * but dropped in the other). If users care enough to flip 1346 * locking modes during remount, we could add a "local" flag to 1347 * individual flock structures for proper tracking of state. 1348 */ 1349 if (!is_remount) 1350 mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; 1351 break; 1352 case Opt_stack: 1353 /* Check both that the option we were passed is of the right 1354 * length and that it is a proper string of the right length. 1355 */ 1356 if (strlen(param->string) != OCFS2_STACK_LABEL_LEN) { 1357 mlog(ML_ERROR, "Invalid cluster_stack option\n"); 1358 return -EINVAL; 1359 } 1360 memcpy(mopt->cluster_stack, param->string, OCFS2_STACK_LABEL_LEN); 1361 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; 1362 /* 1363 * Open code the memcmp here as we don't have an osb to pass 1364 * to ocfs2_userspace_stack(). 1365 */ 1366 if (memcmp(mopt->cluster_stack, 1367 OCFS2_CLASSIC_CLUSTER_STACK, 1368 OCFS2_STACK_LABEL_LEN)) 1369 mopt->user_stack = 1; 1370 break; 1371 case Opt_inode64: 1372 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1373 break; 1374 case Opt_usrquota: 1375 mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; 1376 break; 1377 case Opt_grpquota: 1378 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; 1379 break; 1380 case Opt_coherency: 1381 mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED; 1382 mopt->mount_opt |= result.uint_32; 1383 break; 1384 case Opt_acl: 1385 if (result.negated) { 1386 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; 1387 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1388 } else { 1389 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; 1390 mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; 1391 } 1392 break; 1393 case Opt_resv_level: 1394 if (is_remount) 1395 break; 1396 if (result.uint_32 >= OCFS2_MIN_RESV_LEVEL && 1397 result.uint_32 < OCFS2_MAX_RESV_LEVEL) 1398 mopt->resv_level = result.uint_32; 1399 break; 1400 case Opt_dir_resv_level: 1401 if (is_remount) 1402 break; 1403 if (result.uint_32 >= OCFS2_MIN_RESV_LEVEL && 1404 result.uint_32 < OCFS2_MAX_RESV_LEVEL) 1405 mopt->dir_resv_level = result.uint_32; 1406 break; 1407 case Opt_journal_async_commit: 1408 mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; 1409 break; 1410 default: 1411 return -EINVAL; 1412 } 1413 1414 return 0; 1415 } 1416 1417 static int ocfs2_show_options(struct seq_file *s, struct dentry *root) 1418 { 1419 struct ocfs2_super *osb = OCFS2_SB(root->d_sb); 1420 unsigned long opts = osb->s_mount_opt; 1421 unsigned int local_alloc_megs; 1422 1423 if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) { 1424 seq_printf(s, ",_netdev"); 1425 if (opts & OCFS2_MOUNT_HB_LOCAL) 1426 seq_printf(s, ",%s", OCFS2_HB_LOCAL); 1427 else 1428 seq_printf(s, ",%s", OCFS2_HB_GLOBAL); 1429 } else 1430 seq_printf(s, ",%s", OCFS2_HB_NONE); 1431 1432 if (opts & OCFS2_MOUNT_NOINTR) 1433 seq_printf(s, ",nointr"); 1434 1435 if (opts & OCFS2_MOUNT_DATA_WRITEBACK) 1436 seq_printf(s, ",data=writeback"); 1437 else 1438 seq_printf(s, ",data=ordered"); 1439 1440 if (opts & OCFS2_MOUNT_BARRIER) 1441 seq_printf(s, ",barrier=1"); 1442 1443 if (opts & OCFS2_MOUNT_ERRORS_PANIC) 1444 seq_printf(s, ",errors=panic"); 1445 else if (opts & OCFS2_MOUNT_ERRORS_CONT) 1446 seq_printf(s, ",errors=continue"); 1447 else 1448 seq_printf(s, ",errors=remount-ro"); 1449 1450 if (osb->preferred_slot != OCFS2_INVALID_SLOT) 1451 seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); 1452 1453 seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); 1454 1455 if (osb->osb_commit_interval) 1456 seq_printf(s, ",commit=%u", 1457 (unsigned) (osb->osb_commit_interval / HZ)); 1458 1459 local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits); 1460 if (local_alloc_megs != ocfs2_la_default_mb(osb)) 1461 seq_printf(s, ",localalloc=%d", local_alloc_megs); 1462 1463 if (opts & OCFS2_MOUNT_LOCALFLOCKS) 1464 seq_printf(s, ",localflocks,"); 1465 1466 if (osb->osb_cluster_stack[0]) 1467 seq_show_option(s, "cluster_stack", osb->osb_cluster_stack); 1468 if (opts & OCFS2_MOUNT_USRQUOTA) 1469 seq_printf(s, ",usrquota"); 1470 if (opts & OCFS2_MOUNT_GRPQUOTA) 1471 seq_printf(s, ",grpquota"); 1472 1473 if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED) 1474 seq_printf(s, ",coherency=buffered"); 1475 else 1476 seq_printf(s, ",coherency=full"); 1477 1478 if (opts & OCFS2_MOUNT_NOUSERXATTR) 1479 seq_printf(s, ",nouser_xattr"); 1480 else 1481 seq_printf(s, ",user_xattr"); 1482 1483 if (opts & OCFS2_MOUNT_INODE64) 1484 seq_printf(s, ",inode64"); 1485 1486 if (opts & OCFS2_MOUNT_POSIX_ACL) 1487 seq_printf(s, ",acl"); 1488 else 1489 seq_printf(s, ",noacl"); 1490 1491 if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL) 1492 seq_printf(s, ",resv_level=%d", osb->osb_resv_level); 1493 1494 if (osb->osb_dir_resv_level != osb->osb_resv_level) 1495 seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level); 1496 1497 if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) 1498 seq_printf(s, ",journal_async_commit"); 1499 1500 return 0; 1501 } 1502 1503 static int __init ocfs2_init(void) 1504 { 1505 int status; 1506 1507 status = init_ocfs2_uptodate_cache(); 1508 if (status < 0) 1509 goto out1; 1510 1511 status = ocfs2_initialize_mem_caches(); 1512 if (status < 0) 1513 goto out2; 1514 1515 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); 1516 1517 ocfs2_set_locking_protocol(); 1518 1519 register_quota_format(&ocfs2_quota_format); 1520 1521 status = register_filesystem(&ocfs2_fs_type); 1522 if (!status) 1523 return 0; 1524 1525 unregister_quota_format(&ocfs2_quota_format); 1526 debugfs_remove(ocfs2_debugfs_root); 1527 ocfs2_free_mem_caches(); 1528 out2: 1529 exit_ocfs2_uptodate_cache(); 1530 out1: 1531 mlog_errno(status); 1532 return status; 1533 } 1534 1535 static void __exit ocfs2_exit(void) 1536 { 1537 unregister_quota_format(&ocfs2_quota_format); 1538 1539 debugfs_remove(ocfs2_debugfs_root); 1540 1541 ocfs2_free_mem_caches(); 1542 1543 unregister_filesystem(&ocfs2_fs_type); 1544 1545 exit_ocfs2_uptodate_cache(); 1546 } 1547 1548 static void ocfs2_put_super(struct super_block *sb) 1549 { 1550 trace_ocfs2_put_super(sb); 1551 1552 ocfs2_sync_blockdev(sb); 1553 ocfs2_dismount_volume(sb, 0); 1554 } 1555 1556 static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) 1557 { 1558 struct ocfs2_super *osb; 1559 u32 numbits, freebits; 1560 int status; 1561 struct ocfs2_dinode *bm_lock; 1562 struct buffer_head *bh = NULL; 1563 struct inode *inode = NULL; 1564 1565 trace_ocfs2_statfs(dentry->d_sb, buf); 1566 1567 osb = OCFS2_SB(dentry->d_sb); 1568 1569 inode = ocfs2_get_system_file_inode(osb, 1570 GLOBAL_BITMAP_SYSTEM_INODE, 1571 OCFS2_INVALID_SLOT); 1572 if (!inode) { 1573 mlog(ML_ERROR, "failed to get bitmap inode\n"); 1574 status = -EIO; 1575 goto bail; 1576 } 1577 1578 status = ocfs2_inode_lock(inode, &bh, 0); 1579 if (status < 0) { 1580 mlog_errno(status); 1581 goto bail; 1582 } 1583 1584 bm_lock = (struct ocfs2_dinode *) bh->b_data; 1585 1586 numbits = le32_to_cpu(bm_lock->id1.bitmap1.i_total); 1587 freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.i_used); 1588 1589 buf->f_type = OCFS2_SUPER_MAGIC; 1590 buf->f_bsize = dentry->d_sb->s_blocksize; 1591 buf->f_namelen = OCFS2_MAX_FILENAME_LEN; 1592 buf->f_blocks = ((sector_t) numbits) * 1593 (osb->s_clustersize >> osb->sb->s_blocksize_bits); 1594 buf->f_bfree = ((sector_t) freebits) * 1595 (osb->s_clustersize >> osb->sb->s_blocksize_bits); 1596 buf->f_bavail = buf->f_bfree; 1597 buf->f_files = numbits; 1598 buf->f_ffree = freebits; 1599 buf->f_fsid.val[0] = crc32_le(0, osb->uuid_str, OCFS2_VOL_UUID_LEN) 1600 & 0xFFFFFFFFUL; 1601 buf->f_fsid.val[1] = crc32_le(0, osb->uuid_str + OCFS2_VOL_UUID_LEN, 1602 OCFS2_VOL_UUID_LEN) & 0xFFFFFFFFUL; 1603 1604 brelse(bh); 1605 1606 ocfs2_inode_unlock(inode, 0); 1607 status = 0; 1608 bail: 1609 iput(inode); 1610 1611 if (status) 1612 mlog_errno(status); 1613 1614 return status; 1615 } 1616 1617 static void ocfs2_inode_init_once(void *data) 1618 { 1619 struct ocfs2_inode_info *oi = data; 1620 1621 oi->ip_flags = 0; 1622 oi->ip_open_count = 0; 1623 spin_lock_init(&oi->ip_lock); 1624 ocfs2_extent_map_init(&oi->vfs_inode); 1625 INIT_LIST_HEAD(&oi->ip_io_markers); 1626 INIT_LIST_HEAD(&oi->ip_unwritten_list); 1627 oi->ip_dir_start_lookup = 0; 1628 init_rwsem(&oi->ip_alloc_sem); 1629 init_rwsem(&oi->ip_xattr_sem); 1630 mutex_init(&oi->ip_io_mutex); 1631 1632 oi->ip_blkno = 0ULL; 1633 oi->ip_clusters = 0; 1634 oi->ip_next_orphan = NULL; 1635 1636 ocfs2_resv_init_once(&oi->ip_la_data_resv); 1637 1638 ocfs2_lock_res_init_once(&oi->ip_rw_lockres); 1639 ocfs2_lock_res_init_once(&oi->ip_inode_lockres); 1640 ocfs2_lock_res_init_once(&oi->ip_open_lockres); 1641 1642 ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode), 1643 &ocfs2_inode_caching_ops); 1644 1645 inode_init_once(&oi->vfs_inode); 1646 } 1647 1648 static int ocfs2_initialize_mem_caches(void) 1649 { 1650 ocfs2_inode_cachep = kmem_cache_create("ocfs2_inode_cache", 1651 sizeof(struct ocfs2_inode_info), 1652 0, 1653 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 1654 SLAB_ACCOUNT), 1655 ocfs2_inode_init_once); 1656 ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache", 1657 sizeof(struct ocfs2_dquot), 1658 0, 1659 SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, 1660 NULL); 1661 ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache", 1662 sizeof(struct ocfs2_quota_chunk), 1663 0, 1664 SLAB_RECLAIM_ACCOUNT, 1665 NULL); 1666 if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep || 1667 !ocfs2_qf_chunk_cachep) { 1668 kmem_cache_destroy(ocfs2_inode_cachep); 1669 kmem_cache_destroy(ocfs2_dquot_cachep); 1670 kmem_cache_destroy(ocfs2_qf_chunk_cachep); 1671 return -ENOMEM; 1672 } 1673 1674 return 0; 1675 } 1676 1677 static void ocfs2_free_mem_caches(void) 1678 { 1679 /* 1680 * Make sure all delayed rcu free inodes are flushed before we 1681 * destroy cache. 1682 */ 1683 rcu_barrier(); 1684 kmem_cache_destroy(ocfs2_inode_cachep); 1685 ocfs2_inode_cachep = NULL; 1686 1687 kmem_cache_destroy(ocfs2_dquot_cachep); 1688 ocfs2_dquot_cachep = NULL; 1689 1690 kmem_cache_destroy(ocfs2_qf_chunk_cachep); 1691 ocfs2_qf_chunk_cachep = NULL; 1692 } 1693 1694 static int ocfs2_get_sector(struct super_block *sb, 1695 struct buffer_head **bh, 1696 int block, 1697 int sect_size) 1698 { 1699 if (!sb_set_blocksize(sb, sect_size)) { 1700 mlog(ML_ERROR, "unable to set blocksize\n"); 1701 return -EIO; 1702 } 1703 1704 *bh = sb_getblk(sb, block); 1705 if (!*bh) { 1706 mlog_errno(-ENOMEM); 1707 return -ENOMEM; 1708 } 1709 lock_buffer(*bh); 1710 if (!buffer_dirty(*bh)) 1711 clear_buffer_uptodate(*bh); 1712 unlock_buffer(*bh); 1713 if (bh_read(*bh, 0) < 0) { 1714 mlog_errno(-EIO); 1715 brelse(*bh); 1716 *bh = NULL; 1717 return -EIO; 1718 } 1719 1720 return 0; 1721 } 1722 1723 static int ocfs2_mount_volume(struct super_block *sb) 1724 { 1725 int status = 0; 1726 struct ocfs2_super *osb = OCFS2_SB(sb); 1727 1728 if (ocfs2_is_hard_readonly(osb)) 1729 goto out; 1730 1731 mutex_init(&osb->obs_trim_fs_mutex); 1732 1733 status = ocfs2_dlm_init(osb); 1734 if (status < 0) { 1735 mlog_errno(status); 1736 if (status == -EBADR && ocfs2_userspace_stack(osb)) 1737 mlog(ML_ERROR, "couldn't mount because cluster name on" 1738 " disk does not match the running cluster name.\n"); 1739 goto out; 1740 } 1741 1742 status = ocfs2_super_lock(osb, 1); 1743 if (status < 0) { 1744 mlog_errno(status); 1745 goto out_dlm; 1746 } 1747 1748 /* This will load up the node map and add ourselves to it. */ 1749 status = ocfs2_find_slot(osb); 1750 if (status < 0) { 1751 mlog_errno(status); 1752 goto out_super_lock; 1753 } 1754 1755 /* load all node-local system inodes */ 1756 status = ocfs2_init_local_system_inodes(osb); 1757 if (status < 0) { 1758 mlog_errno(status); 1759 goto out_super_lock; 1760 } 1761 1762 status = ocfs2_check_volume(osb); 1763 if (status < 0) { 1764 mlog_errno(status); 1765 goto out_system_inodes; 1766 } 1767 1768 status = ocfs2_truncate_log_init(osb); 1769 if (status < 0) { 1770 mlog_errno(status); 1771 goto out_check_volume; 1772 } 1773 1774 ocfs2_super_unlock(osb, 1); 1775 return 0; 1776 1777 out_check_volume: 1778 ocfs2_free_replay_slots(osb); 1779 out_system_inodes: 1780 if (osb->local_alloc_state == OCFS2_LA_ENABLED) 1781 ocfs2_shutdown_local_alloc(osb); 1782 ocfs2_release_system_inodes(osb); 1783 /* before journal shutdown, we should release slot_info */ 1784 ocfs2_free_slot_info(osb); 1785 ocfs2_journal_shutdown(osb); 1786 out_super_lock: 1787 ocfs2_super_unlock(osb, 1); 1788 out_dlm: 1789 ocfs2_dlm_shutdown(osb, 0); 1790 out: 1791 return status; 1792 } 1793 1794 static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) 1795 { 1796 int tmp, hangup_needed = 0; 1797 struct ocfs2_super *osb = NULL; 1798 char nodestr[12]; 1799 1800 trace_ocfs2_dismount_volume(sb); 1801 1802 BUG_ON(!sb); 1803 osb = OCFS2_SB(sb); 1804 BUG_ON(!osb); 1805 1806 /* Remove file check sysfs related directories/files, 1807 * and wait for the pending file check operations */ 1808 ocfs2_filecheck_remove_sysfs(osb); 1809 1810 kset_unregister(osb->osb_dev_kset); 1811 1812 /* Orphan scan should be stopped as early as possible */ 1813 ocfs2_orphan_scan_stop(osb); 1814 1815 ocfs2_disable_quotas(osb); 1816 1817 /* All dquots should be freed by now */ 1818 WARN_ON(!llist_empty(&osb->dquot_drop_list)); 1819 /* Wait for worker to be done with the work structure in osb */ 1820 cancel_work_sync(&osb->dquot_drop_work); 1821 1822 ocfs2_shutdown_local_alloc(osb); 1823 1824 ocfs2_truncate_log_shutdown(osb); 1825 1826 /* This will disable recovery and flush any recovery work. */ 1827 ocfs2_recovery_exit(osb); 1828 1829 ocfs2_sync_blockdev(sb); 1830 1831 ocfs2_purge_refcount_trees(osb); 1832 1833 /* No cluster connection means we've failed during mount, so skip 1834 * all the steps which depended on that to complete. */ 1835 if (osb->cconn) { 1836 tmp = ocfs2_super_lock(osb, 1); 1837 if (tmp < 0) { 1838 mlog_errno(tmp); 1839 return; 1840 } 1841 } 1842 1843 if (osb->slot_num != OCFS2_INVALID_SLOT) 1844 ocfs2_put_slot(osb); 1845 1846 if (osb->cconn) 1847 ocfs2_super_unlock(osb, 1); 1848 1849 ocfs2_release_system_inodes(osb); 1850 1851 ocfs2_journal_shutdown(osb); 1852 1853 /* 1854 * If we're dismounting due to mount error, mount.ocfs2 will clean 1855 * up heartbeat. If we're a local mount, there is no heartbeat. 1856 * If we failed before we got a uuid_str yet, we can't stop 1857 * heartbeat. Otherwise, do it. 1858 */ 1859 if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str && 1860 !ocfs2_is_hard_readonly(osb)) 1861 hangup_needed = 1; 1862 1863 ocfs2_dlm_shutdown(osb, hangup_needed); 1864 1865 ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats); 1866 debugfs_remove_recursive(osb->osb_debug_root); 1867 1868 if (hangup_needed) 1869 ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str)); 1870 1871 atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); 1872 1873 if (ocfs2_mount_local(osb)) 1874 snprintf(nodestr, sizeof(nodestr), "local"); 1875 else 1876 snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num); 1877 1878 printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", 1879 osb->dev_str, nodestr); 1880 1881 ocfs2_delete_osb(osb); 1882 kfree(osb); 1883 sb->s_dev = 0; 1884 sb->s_fs_info = NULL; 1885 } 1886 1887 static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uuid, 1888 unsigned uuid_bytes) 1889 { 1890 int i, ret; 1891 char *ptr; 1892 1893 BUG_ON(uuid_bytes != OCFS2_VOL_UUID_LEN); 1894 1895 osb->uuid_str = kzalloc(OCFS2_VOL_UUID_LEN * 2 + 1, GFP_KERNEL); 1896 if (osb->uuid_str == NULL) 1897 return -ENOMEM; 1898 1899 for (i = 0, ptr = osb->uuid_str; i < OCFS2_VOL_UUID_LEN; i++) { 1900 /* print with null */ 1901 ret = snprintf(ptr, 3, "%02X", uuid[i]); 1902 if (ret != 2) /* drop super cleans up */ 1903 return -EINVAL; 1904 /* then only advance past the last char */ 1905 ptr += 2; 1906 } 1907 1908 return 0; 1909 } 1910 1911 /* Make sure entire volume is addressable by our journal. Requires 1912 osb_clusters_at_boot to be valid and for the journal to have been 1913 initialized by ocfs2_journal_init(). */ 1914 static int ocfs2_journal_addressable(struct ocfs2_super *osb) 1915 { 1916 int status = 0; 1917 u64 max_block = 1918 ocfs2_clusters_to_blocks(osb->sb, 1919 osb->osb_clusters_at_boot) - 1; 1920 1921 /* 32-bit block number is always OK. */ 1922 if (max_block <= (u32)~0ULL) 1923 goto out; 1924 1925 /* Volume is "huge", so see if our journal is new enough to 1926 support it. */ 1927 if (!(OCFS2_HAS_COMPAT_FEATURE(osb->sb, 1928 OCFS2_FEATURE_COMPAT_JBD2_SB) && 1929 jbd2_journal_check_used_features(osb->journal->j_journal, 0, 0, 1930 JBD2_FEATURE_INCOMPAT_64BIT))) { 1931 mlog(ML_ERROR, "The journal cannot address the entire volume. " 1932 "Enable the 'block64' journal option with tunefs.ocfs2"); 1933 status = -EFBIG; 1934 goto out; 1935 } 1936 1937 out: 1938 return status; 1939 } 1940 1941 static int ocfs2_initialize_super(struct super_block *sb, 1942 struct buffer_head *bh, 1943 int sector_size, 1944 struct ocfs2_blockcheck_stats *stats) 1945 { 1946 int status; 1947 int i, cbits, bbits; 1948 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 1949 struct inode *inode = NULL; 1950 struct ocfs2_super *osb; 1951 u64 total_blocks; 1952 1953 osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL); 1954 if (!osb) { 1955 status = -ENOMEM; 1956 mlog_errno(status); 1957 goto out; 1958 } 1959 1960 sb->s_fs_info = osb; 1961 sb->s_op = &ocfs2_sops; 1962 sb->s_d_op = &ocfs2_dentry_ops; 1963 sb->s_export_op = &ocfs2_export_ops; 1964 sb->s_qcop = &dquot_quotactl_sysfile_ops; 1965 sb->dq_op = &ocfs2_quota_operations; 1966 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; 1967 sb->s_xattr = ocfs2_xattr_handlers; 1968 sb->s_time_gran = 1; 1969 sb->s_flags |= SB_NOATIME; 1970 /* this is needed to support O_LARGEFILE */ 1971 cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); 1972 bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); 1973 sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); 1974 super_set_uuid(sb, di->id2.i_super.s_uuid, 1975 sizeof(di->id2.i_super.s_uuid)); 1976 1977 osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; 1978 1979 for (i = 0; i < 3; i++) 1980 osb->osb_dx_seed[i] = le32_to_cpu(di->id2.i_super.s_dx_seed[i]); 1981 osb->osb_dx_seed[3] = le32_to_cpu(di->id2.i_super.s_uuid_hash); 1982 1983 osb->sb = sb; 1984 osb->s_sectsize_bits = blksize_bits(sector_size); 1985 BUG_ON(!osb->s_sectsize_bits); 1986 1987 spin_lock_init(&osb->dc_task_lock); 1988 init_waitqueue_head(&osb->dc_event); 1989 osb->dc_work_sequence = 0; 1990 osb->dc_wake_sequence = 0; 1991 INIT_LIST_HEAD(&osb->blocked_lock_list); 1992 osb->blocked_lock_count = 0; 1993 spin_lock_init(&osb->osb_lock); 1994 spin_lock_init(&osb->osb_xattr_lock); 1995 ocfs2_init_steal_slots(osb); 1996 1997 mutex_init(&osb->system_file_mutex); 1998 1999 atomic_set(&osb->alloc_stats.moves, 0); 2000 atomic_set(&osb->alloc_stats.local_data, 0); 2001 atomic_set(&osb->alloc_stats.bitmap_data, 0); 2002 atomic_set(&osb->alloc_stats.bg_allocs, 0); 2003 atomic_set(&osb->alloc_stats.bg_extends, 0); 2004 2005 /* Copy the blockcheck stats from the superblock probe */ 2006 osb->osb_ecc_stats = *stats; 2007 2008 ocfs2_init_node_maps(osb); 2009 2010 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 2011 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 2012 2013 osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); 2014 if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { 2015 mlog(ML_ERROR, "Invalid number of node slots (%u)\n", 2016 osb->max_slots); 2017 status = -EINVAL; 2018 goto out; 2019 } 2020 2021 ocfs2_orphan_scan_init(osb); 2022 2023 status = ocfs2_recovery_init(osb); 2024 if (status) { 2025 mlog(ML_ERROR, "Unable to initialize recovery state\n"); 2026 mlog_errno(status); 2027 goto out; 2028 } 2029 2030 init_waitqueue_head(&osb->checkpoint_event); 2031 2032 osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; 2033 2034 osb->slot_num = OCFS2_INVALID_SLOT; 2035 2036 osb->s_xattr_inline_size = le16_to_cpu( 2037 di->id2.i_super.s_xattr_inline_size); 2038 2039 osb->local_alloc_state = OCFS2_LA_UNUSED; 2040 osb->local_alloc_bh = NULL; 2041 INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker); 2042 2043 init_waitqueue_head(&osb->osb_mount_event); 2044 2045 ocfs2_resmap_init(osb, &osb->osb_la_resmap); 2046 2047 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); 2048 if (!osb->vol_label) { 2049 mlog(ML_ERROR, "unable to alloc vol label\n"); 2050 status = -ENOMEM; 2051 goto out_recovery_map; 2052 } 2053 2054 osb->slot_recovery_generations = 2055 kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), 2056 GFP_KERNEL); 2057 if (!osb->slot_recovery_generations) { 2058 status = -ENOMEM; 2059 mlog_errno(status); 2060 goto out_vol_label; 2061 } 2062 2063 init_waitqueue_head(&osb->osb_wipe_event); 2064 osb->osb_orphan_wipes = kcalloc(osb->max_slots, 2065 sizeof(*osb->osb_orphan_wipes), 2066 GFP_KERNEL); 2067 if (!osb->osb_orphan_wipes) { 2068 status = -ENOMEM; 2069 mlog_errno(status); 2070 goto out_slot_recovery_gen; 2071 } 2072 2073 osb->osb_rf_lock_tree = RB_ROOT; 2074 2075 osb->s_feature_compat = 2076 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); 2077 osb->s_feature_ro_compat = 2078 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_ro_compat); 2079 osb->s_feature_incompat = 2080 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_incompat); 2081 2082 if ((i = OCFS2_HAS_INCOMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_INCOMPAT_SUPP))) { 2083 mlog(ML_ERROR, "couldn't mount because of unsupported " 2084 "optional features (%x).\n", i); 2085 status = -EINVAL; 2086 goto out_orphan_wipes; 2087 } 2088 if (!sb_rdonly(osb->sb) && (i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) { 2089 mlog(ML_ERROR, "couldn't mount RDWR because of " 2090 "unsupported optional features (%x).\n", i); 2091 status = -EINVAL; 2092 goto out_orphan_wipes; 2093 } 2094 2095 if (ocfs2_clusterinfo_valid(osb)) { 2096 /* 2097 * ci_stack and ci_cluster in ocfs2_cluster_info may not be null 2098 * terminated, so make sure no overflow happens here by using 2099 * memcpy. Destination strings will always be null terminated 2100 * because osb is allocated using kzalloc. 2101 */ 2102 osb->osb_stackflags = 2103 OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; 2104 memcpy(osb->osb_cluster_stack, 2105 OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, 2106 OCFS2_STACK_LABEL_LEN); 2107 if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { 2108 mlog(ML_ERROR, 2109 "couldn't mount because of an invalid " 2110 "cluster stack label (%s) \n", 2111 osb->osb_cluster_stack); 2112 status = -EINVAL; 2113 goto out_orphan_wipes; 2114 } 2115 memcpy(osb->osb_cluster_name, 2116 OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster, 2117 OCFS2_CLUSTER_NAME_LEN); 2118 } else { 2119 /* The empty string is identical with classic tools that 2120 * don't know about s_cluster_info. */ 2121 osb->osb_cluster_stack[0] = '\0'; 2122 } 2123 2124 get_random_bytes(&osb->s_next_generation, sizeof(u32)); 2125 2126 /* 2127 * FIXME 2128 * This should be done in ocfs2_journal_init(), but any inode 2129 * writes back operation will cause the filesystem to crash. 2130 */ 2131 status = ocfs2_journal_alloc(osb); 2132 if (status < 0) 2133 goto out_orphan_wipes; 2134 2135 INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs); 2136 init_llist_head(&osb->dquot_drop_list); 2137 2138 /* get some pseudo constants for clustersize bits */ 2139 osb->s_clustersize_bits = 2140 le32_to_cpu(di->id2.i_super.s_clustersize_bits); 2141 osb->s_clustersize = 1 << osb->s_clustersize_bits; 2142 2143 if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE || 2144 osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) { 2145 mlog(ML_ERROR, "Volume has invalid cluster size (%d)\n", 2146 osb->s_clustersize); 2147 status = -EINVAL; 2148 goto out_journal; 2149 } 2150 2151 total_blocks = ocfs2_clusters_to_blocks(osb->sb, 2152 le32_to_cpu(di->i_clusters)); 2153 2154 status = generic_check_addressable(osb->sb->s_blocksize_bits, 2155 total_blocks); 2156 if (status) { 2157 mlog(ML_ERROR, "Volume too large " 2158 "to mount safely on this system"); 2159 status = -EFBIG; 2160 goto out_journal; 2161 } 2162 2163 if (ocfs2_setup_osb_uuid(osb, di->id2.i_super.s_uuid, 2164 sizeof(di->id2.i_super.s_uuid))) { 2165 mlog(ML_ERROR, "Out of memory trying to setup our uuid.\n"); 2166 status = -ENOMEM; 2167 goto out_journal; 2168 } 2169 2170 strscpy(osb->vol_label, di->id2.i_super.s_label, 2171 OCFS2_MAX_VOL_LABEL_LEN); 2172 osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); 2173 osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno); 2174 osb->first_cluster_group_blkno = 2175 le64_to_cpu(di->id2.i_super.s_first_cluster_group); 2176 osb->fs_generation = le32_to_cpu(di->i_fs_generation); 2177 osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash); 2178 trace_ocfs2_initialize_super(osb->vol_label, osb->uuid_str, 2179 (unsigned long long)osb->root_blkno, 2180 (unsigned long long)osb->system_dir_blkno, 2181 osb->s_clustersize_bits); 2182 2183 osb->osb_dlm_debug = ocfs2_new_dlm_debug(); 2184 if (!osb->osb_dlm_debug) { 2185 status = -ENOMEM; 2186 mlog_errno(status); 2187 goto out_uuid_str; 2188 } 2189 2190 atomic_set(&osb->vol_state, VOLUME_INIT); 2191 2192 /* load root, system_dir, and all global system inodes */ 2193 status = ocfs2_init_global_system_inodes(osb); 2194 if (status < 0) { 2195 mlog_errno(status); 2196 goto out_dlm_out; 2197 } 2198 2199 /* 2200 * global bitmap 2201 */ 2202 inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, 2203 OCFS2_INVALID_SLOT); 2204 if (!inode) { 2205 status = -EINVAL; 2206 mlog_errno(status); 2207 goto out_system_inodes; 2208 } 2209 2210 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; 2211 osb->osb_clusters_at_boot = OCFS2_I(inode)->ip_clusters; 2212 iput(inode); 2213 2214 osb->bitmap_cpg = ocfs2_group_bitmap_size(sb, 0, 2215 osb->s_feature_incompat) * 8; 2216 2217 status = ocfs2_init_slot_info(osb); 2218 if (status < 0) { 2219 mlog_errno(status); 2220 goto out_system_inodes; 2221 } 2222 2223 osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM); 2224 if (!osb->ocfs2_wq) { 2225 status = -ENOMEM; 2226 mlog_errno(status); 2227 goto out_slot_info; 2228 } 2229 2230 return status; 2231 2232 out_slot_info: 2233 ocfs2_free_slot_info(osb); 2234 out_system_inodes: 2235 ocfs2_release_system_inodes(osb); 2236 out_dlm_out: 2237 ocfs2_put_dlm_debug(osb->osb_dlm_debug); 2238 out_uuid_str: 2239 kfree(osb->uuid_str); 2240 out_journal: 2241 kfree(osb->journal); 2242 out_orphan_wipes: 2243 kfree(osb->osb_orphan_wipes); 2244 out_slot_recovery_gen: 2245 kfree(osb->slot_recovery_generations); 2246 out_vol_label: 2247 kfree(osb->vol_label); 2248 out_recovery_map: 2249 kfree(osb->recovery_map); 2250 out: 2251 kfree(osb); 2252 sb->s_fs_info = NULL; 2253 return status; 2254 } 2255 2256 /* 2257 * will return: -EAGAIN if it is ok to keep searching for superblocks 2258 * -EINVAL if there is a bad superblock 2259 * 0 on success 2260 */ 2261 static int ocfs2_verify_volume(struct ocfs2_dinode *di, 2262 struct buffer_head *bh, 2263 u32 blksz, 2264 struct ocfs2_blockcheck_stats *stats) 2265 { 2266 int status = -EAGAIN; 2267 u32 blksz_bits; 2268 2269 if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, 2270 strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) { 2271 /* We have to do a raw check of the feature here */ 2272 if (le32_to_cpu(di->id2.i_super.s_feature_incompat) & 2273 OCFS2_FEATURE_INCOMPAT_META_ECC) { 2274 status = ocfs2_block_check_validate(bh->b_data, 2275 bh->b_size, 2276 &di->i_check, 2277 stats); 2278 if (status) 2279 goto out; 2280 } 2281 status = -EINVAL; 2282 /* Acceptable block sizes are 512 bytes, 1K, 2K and 4K. */ 2283 blksz_bits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); 2284 if (blksz_bits < 9 || blksz_bits > 12) { 2285 mlog(ML_ERROR, "found superblock with incorrect block " 2286 "size bits: found %u, should be 9, 10, 11, or 12\n", 2287 blksz_bits); 2288 } else if ((1 << blksz_bits) != blksz) { 2289 mlog(ML_ERROR, "found superblock with incorrect block " 2290 "size: found %u, should be %u\n", 1 << blksz_bits, blksz); 2291 } else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) != 2292 OCFS2_MAJOR_REV_LEVEL || 2293 le16_to_cpu(di->id2.i_super.s_minor_rev_level) != 2294 OCFS2_MINOR_REV_LEVEL) { 2295 mlog(ML_ERROR, "found superblock with bad version: " 2296 "found %u.%u, should be %u.%u\n", 2297 le16_to_cpu(di->id2.i_super.s_major_rev_level), 2298 le16_to_cpu(di->id2.i_super.s_minor_rev_level), 2299 OCFS2_MAJOR_REV_LEVEL, 2300 OCFS2_MINOR_REV_LEVEL); 2301 } else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) { 2302 mlog(ML_ERROR, "bad block number on superblock: " 2303 "found %llu, should be %llu\n", 2304 (unsigned long long)le64_to_cpu(di->i_blkno), 2305 (unsigned long long)bh->b_blocknr); 2306 } else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 || 2307 le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) { 2308 mlog(ML_ERROR, "bad cluster size bit found: %u\n", 2309 le32_to_cpu(di->id2.i_super.s_clustersize_bits)); 2310 } else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) { 2311 mlog(ML_ERROR, "bad root_blkno: 0\n"); 2312 } else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) { 2313 mlog(ML_ERROR, "bad system_dir_blkno: 0\n"); 2314 } else if (le16_to_cpu(di->id2.i_super.s_max_slots) > OCFS2_MAX_SLOTS) { 2315 mlog(ML_ERROR, 2316 "Superblock slots found greater than file system " 2317 "maximum: found %u, max %u\n", 2318 le16_to_cpu(di->id2.i_super.s_max_slots), 2319 OCFS2_MAX_SLOTS); 2320 } else { 2321 /* found it! */ 2322 status = 0; 2323 } 2324 } 2325 2326 out: 2327 if (status && status != -EAGAIN) 2328 mlog_errno(status); 2329 return status; 2330 } 2331 2332 static int ocfs2_check_volume(struct ocfs2_super *osb) 2333 { 2334 int status; 2335 int dirty; 2336 int local; 2337 struct ocfs2_dinode *local_alloc = NULL; /* only used if we 2338 * recover 2339 * ourselves. */ 2340 2341 /* Init our journal object. */ 2342 status = ocfs2_journal_init(osb, &dirty); 2343 if (status < 0) { 2344 mlog(ML_ERROR, "Could not initialize journal!\n"); 2345 goto finally; 2346 } 2347 2348 /* Now that journal has been initialized, check to make sure 2349 entire volume is addressable. */ 2350 status = ocfs2_journal_addressable(osb); 2351 if (status) 2352 goto finally; 2353 2354 /* If the journal was unmounted cleanly then we don't want to 2355 * recover anything. Otherwise, journal_load will do that 2356 * dirty work for us :) */ 2357 if (!dirty) { 2358 status = ocfs2_journal_wipe(osb->journal, 0); 2359 if (status < 0) { 2360 mlog_errno(status); 2361 goto finally; 2362 } 2363 } else { 2364 printk(KERN_NOTICE "ocfs2: File system on device (%s) was not " 2365 "unmounted cleanly, recovering it.\n", osb->dev_str); 2366 } 2367 2368 local = ocfs2_mount_local(osb); 2369 2370 /* will play back anything left in the journal. */ 2371 status = ocfs2_journal_load(osb->journal, local, dirty); 2372 if (status < 0) { 2373 mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); 2374 goto finally; 2375 } 2376 2377 if (osb->s_mount_opt & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) 2378 jbd2_journal_set_features(osb->journal->j_journal, 2379 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2380 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2381 else 2382 jbd2_journal_clear_features(osb->journal->j_journal, 2383 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2384 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2385 2386 if (dirty) { 2387 /* recover my local alloc if we didn't unmount cleanly. */ 2388 status = ocfs2_begin_local_alloc_recovery(osb, 2389 osb->slot_num, 2390 &local_alloc); 2391 if (status < 0) { 2392 mlog_errno(status); 2393 goto finally; 2394 } 2395 /* we complete the recovery process after we've marked 2396 * ourselves as mounted. */ 2397 } 2398 2399 status = ocfs2_load_local_alloc(osb); 2400 if (status < 0) { 2401 mlog_errno(status); 2402 goto finally; 2403 } 2404 2405 if (dirty) { 2406 /* Recovery will be completed after we've mounted the 2407 * rest of the volume. */ 2408 osb->local_alloc_copy = local_alloc; 2409 local_alloc = NULL; 2410 } 2411 2412 /* go through each journal, trylock it and if you get the 2413 * lock, and it's marked as dirty, set the bit in the recover 2414 * map and launch a recovery thread for it. */ 2415 status = ocfs2_mark_dead_nodes(osb); 2416 if (status < 0) { 2417 mlog_errno(status); 2418 goto finally; 2419 } 2420 2421 status = ocfs2_compute_replay_slots(osb); 2422 if (status < 0) 2423 mlog_errno(status); 2424 2425 finally: 2426 kfree(local_alloc); 2427 2428 if (status) 2429 mlog_errno(status); 2430 return status; 2431 } 2432 2433 /* 2434 * The routine gets called from dismount or close whenever a dismount on 2435 * volume is requested and the osb open count becomes 1. 2436 * It will remove the osb from the global list and also free up all the 2437 * initialized resources and fileobject. 2438 */ 2439 static void ocfs2_delete_osb(struct ocfs2_super *osb) 2440 { 2441 /* This function assumes that the caller has the main osb resource */ 2442 2443 /* ocfs2_initializer_super have already created this workqueue */ 2444 if (osb->ocfs2_wq) 2445 destroy_workqueue(osb->ocfs2_wq); 2446 2447 ocfs2_free_slot_info(osb); 2448 2449 kfree(osb->osb_orphan_wipes); 2450 kfree(osb->slot_recovery_generations); 2451 /* FIXME 2452 * This belongs in journal shutdown, but because we have to 2453 * allocate osb->journal at the middle of ocfs2_initialize_super(), 2454 * we free it here. 2455 */ 2456 kfree(osb->journal); 2457 kfree(osb->local_alloc_copy); 2458 kfree(osb->uuid_str); 2459 kfree(osb->vol_label); 2460 ocfs2_put_dlm_debug(osb->osb_dlm_debug); 2461 memset(osb, 0, sizeof(struct ocfs2_super)); 2462 } 2463 2464 /* Depending on the mount option passed, perform one of the following: 2465 * Put OCFS2 into a readonly state (default) 2466 * Return EIO so that only the process errs 2467 * Fix the error as if fsck.ocfs2 -y 2468 * panic 2469 */ 2470 static int ocfs2_handle_error(struct super_block *sb) 2471 { 2472 struct ocfs2_super *osb = OCFS2_SB(sb); 2473 int rv = 0; 2474 2475 ocfs2_set_osb_flag(osb, OCFS2_OSB_ERROR_FS); 2476 pr_crit("On-disk corruption discovered. " 2477 "Please run fsck.ocfs2 once the filesystem is unmounted.\n"); 2478 2479 if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_PANIC) { 2480 panic("OCFS2: (device %s): panic forced after error\n", 2481 sb->s_id); 2482 } else if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_CONT) { 2483 pr_crit("OCFS2: Returning error to the calling process.\n"); 2484 rv = -EIO; 2485 } else { /* default option */ 2486 rv = -EROFS; 2487 if (sb_rdonly(sb) && (ocfs2_is_soft_readonly(osb) || ocfs2_is_hard_readonly(osb))) 2488 return rv; 2489 2490 pr_crit("OCFS2: File system is now read-only.\n"); 2491 sb->s_flags |= SB_RDONLY; 2492 ocfs2_set_ro_flag(osb, 0); 2493 } 2494 2495 return rv; 2496 } 2497 2498 int __ocfs2_error(struct super_block *sb, const char *function, 2499 const char *fmt, ...) 2500 { 2501 struct va_format vaf; 2502 va_list args; 2503 2504 va_start(args, fmt); 2505 vaf.fmt = fmt; 2506 vaf.va = &args; 2507 2508 /* Not using mlog here because we want to show the actual 2509 * function the error came from. */ 2510 printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV", 2511 sb->s_id, function, &vaf); 2512 2513 va_end(args); 2514 2515 return ocfs2_handle_error(sb); 2516 } 2517 2518 /* Handle critical errors. This is intentionally more drastic than 2519 * ocfs2_handle_error, so we only use for things like journal errors, 2520 * etc. */ 2521 void __ocfs2_abort(struct super_block *sb, const char *function, 2522 const char *fmt, ...) 2523 { 2524 struct va_format vaf; 2525 va_list args; 2526 2527 va_start(args, fmt); 2528 2529 vaf.fmt = fmt; 2530 vaf.va = &args; 2531 2532 printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV", 2533 sb->s_id, function, &vaf); 2534 2535 va_end(args); 2536 2537 /* We don't have the cluster support yet to go straight to 2538 * hard readonly in here. Until then, we want to keep 2539 * ocfs2_abort() so that we can at least mark critical 2540 * errors. 2541 * 2542 * TODO: This should abort the journal and alert other nodes 2543 * that our slot needs recovery. */ 2544 2545 /* Force a panic(). This stinks, but it's better than letting 2546 * things continue without having a proper hard readonly 2547 * here. */ 2548 if (!ocfs2_mount_local(OCFS2_SB(sb))) 2549 OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; 2550 ocfs2_handle_error(sb); 2551 } 2552 2553 /* 2554 * Void signal blockers, because in-kernel sigprocmask() only fails 2555 * when SIG_* is wrong. 2556 */ 2557 void ocfs2_block_signals(sigset_t *oldset) 2558 { 2559 int rc; 2560 sigset_t blocked; 2561 2562 sigfillset(&blocked); 2563 rc = sigprocmask(SIG_BLOCK, &blocked, oldset); 2564 BUG_ON(rc); 2565 } 2566 2567 void ocfs2_unblock_signals(sigset_t *oldset) 2568 { 2569 int rc = sigprocmask(SIG_SETMASK, oldset, NULL); 2570 BUG_ON(rc); 2571 } 2572 2573 module_init(ocfs2_init); 2574 module_exit(ocfs2_exit); 2575