1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Simple file system for zoned block devices exposing zones as files. 4 * 5 * Copyright (C) 2019 Western Digital Corporation or its affiliates. 6 */ 7 #include <linux/module.h> 8 #include <linux/pagemap.h> 9 #include <linux/magic.h> 10 #include <linux/iomap.h> 11 #include <linux/init.h> 12 #include <linux/slab.h> 13 #include <linux/blkdev.h> 14 #include <linux/statfs.h> 15 #include <linux/writeback.h> 16 #include <linux/quotaops.h> 17 #include <linux/seq_file.h> 18 #include <linux/parser.h> 19 #include <linux/uio.h> 20 #include <linux/mman.h> 21 #include <linux/sched/mm.h> 22 #include <linux/crc32.h> 23 #include <linux/task_io_accounting_ops.h> 24 25 #include "zonefs.h" 26 27 #define CREATE_TRACE_POINTS 28 #include "trace.h" 29 30 /* 31 * Get the name of a zone group directory. 32 */ 33 static const char *zonefs_zgroup_name(enum zonefs_ztype ztype) 34 { 35 switch (ztype) { 36 case ZONEFS_ZTYPE_CNV: 37 return "cnv"; 38 case ZONEFS_ZTYPE_SEQ: 39 return "seq"; 40 default: 41 WARN_ON_ONCE(1); 42 return "???"; 43 } 44 } 45 46 /* 47 * Manage the active zone count. 48 */ 49 static void zonefs_account_active(struct super_block *sb, 50 struct zonefs_zone *z) 51 { 52 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 53 54 if (zonefs_zone_is_cnv(z)) 55 return; 56 57 /* 58 * For zones that transitioned to the offline or readonly condition, 59 * we only need to clear the active state. 60 */ 61 if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) 62 goto out; 63 64 /* 65 * If the zone is active, that is, if it is explicitly open or 66 * partially written, check if it was already accounted as active. 67 */ 68 if ((z->z_flags & ZONEFS_ZONE_OPEN) || 69 (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) { 70 if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) { 71 z->z_flags |= ZONEFS_ZONE_ACTIVE; 72 atomic_inc(&sbi->s_active_seq_files); 73 } 74 return; 75 } 76 77 out: 78 /* The zone is not active. If it was, update the active count */ 79 if (z->z_flags & ZONEFS_ZONE_ACTIVE) { 80 z->z_flags &= ~ZONEFS_ZONE_ACTIVE; 81 atomic_dec(&sbi->s_active_seq_files); 82 } 83 } 84 85 /* 86 * Manage the active zone count. Called with zi->i_truncate_mutex held. 87 */ 88 void zonefs_inode_account_active(struct inode *inode) 89 { 90 lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); 91 92 return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode)); 93 } 94 95 /* 96 * Execute a zone management operation. 97 */ 98 static int zonefs_zone_mgmt(struct super_block *sb, 99 struct zonefs_zone *z, enum req_op op) 100 { 101 int ret; 102 103 /* 104 * With ZNS drives, closing an explicitly open zone that has not been 105 * written will change the zone state to "closed", that is, the zone 106 * will remain active. Since this can then cause failure of explicit 107 * open operation on other zones if the drive active zone resources 108 * are exceeded, make sure that the zone does not remain active by 109 * resetting it. 110 */ 111 if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset) 112 op = REQ_OP_ZONE_RESET; 113 114 trace_zonefs_zone_mgmt(sb, z, op); 115 ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector, 116 z->z_size >> SECTOR_SHIFT, GFP_NOFS); 117 if (ret) { 118 zonefs_err(sb, 119 "Zone management operation %s at %llu failed %d\n", 120 blk_op_str(op), z->z_sector, ret); 121 return ret; 122 } 123 124 return 0; 125 } 126 127 int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op) 128 { 129 lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); 130 131 return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op); 132 } 133 134 void zonefs_i_size_write(struct inode *inode, loff_t isize) 135 { 136 struct zonefs_zone *z = zonefs_inode_zone(inode); 137 138 i_size_write(inode, isize); 139 140 /* 141 * A full zone is no longer open/active and does not need 142 * explicit closing. 143 */ 144 if (isize >= z->z_capacity) { 145 struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); 146 147 if (z->z_flags & ZONEFS_ZONE_ACTIVE) 148 atomic_dec(&sbi->s_active_seq_files); 149 z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE); 150 } 151 } 152 153 void zonefs_update_stats(struct inode *inode, loff_t new_isize) 154 { 155 struct super_block *sb = inode->i_sb; 156 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 157 loff_t old_isize = i_size_read(inode); 158 loff_t nr_blocks; 159 160 if (new_isize == old_isize) 161 return; 162 163 spin_lock(&sbi->s_lock); 164 165 /* 166 * This may be called for an update after an IO error. 167 * So beware of the values seen. 168 */ 169 if (new_isize < old_isize) { 170 nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits; 171 if (sbi->s_used_blocks > nr_blocks) 172 sbi->s_used_blocks -= nr_blocks; 173 else 174 sbi->s_used_blocks = 0; 175 } else { 176 sbi->s_used_blocks += 177 (new_isize - old_isize) >> sb->s_blocksize_bits; 178 if (sbi->s_used_blocks > sbi->s_blocks) 179 sbi->s_used_blocks = sbi->s_blocks; 180 } 181 182 spin_unlock(&sbi->s_lock); 183 } 184 185 /* 186 * Check a zone condition. Return the amount of written (and still readable) 187 * data in the zone. 188 */ 189 static loff_t zonefs_check_zone_condition(struct super_block *sb, 190 struct zonefs_zone *z, 191 struct blk_zone *zone) 192 { 193 switch (zone->cond) { 194 case BLK_ZONE_COND_OFFLINE: 195 zonefs_warn(sb, "Zone %llu: offline zone\n", 196 z->z_sector); 197 z->z_flags |= ZONEFS_ZONE_OFFLINE; 198 return 0; 199 case BLK_ZONE_COND_READONLY: 200 /* 201 * The write pointer of read-only zones is invalid, so we cannot 202 * determine the zone wpoffset (inode size). We thus keep the 203 * zone wpoffset as is, which leads to an empty file 204 * (wpoffset == 0) on mount. For a runtime error, this keeps 205 * the inode size as it was when last updated so that the user 206 * can recover data. 207 */ 208 zonefs_warn(sb, "Zone %llu: read-only zone\n", 209 z->z_sector); 210 z->z_flags |= ZONEFS_ZONE_READONLY; 211 if (zonefs_zone_is_cnv(z)) 212 return z->z_capacity; 213 return z->z_wpoffset; 214 case BLK_ZONE_COND_FULL: 215 /* The write pointer of full zones is invalid. */ 216 return z->z_capacity; 217 default: 218 if (zonefs_zone_is_cnv(z)) 219 return z->z_capacity; 220 return (zone->wp - zone->start) << SECTOR_SHIFT; 221 } 222 } 223 224 /* 225 * Check a zone condition and adjust its inode access permissions for 226 * offline and readonly zones. 227 */ 228 static void zonefs_inode_update_mode(struct inode *inode) 229 { 230 struct zonefs_zone *z = zonefs_inode_zone(inode); 231 232 if (z->z_flags & ZONEFS_ZONE_OFFLINE) { 233 /* Offline zones cannot be read nor written */ 234 inode->i_flags |= S_IMMUTABLE; 235 inode->i_mode &= ~0777; 236 } else if (z->z_flags & ZONEFS_ZONE_READONLY) { 237 /* Readonly zones cannot be written */ 238 inode->i_flags |= S_IMMUTABLE; 239 if (z->z_flags & ZONEFS_ZONE_INIT_MODE) 240 inode->i_mode &= ~0777; 241 else 242 inode->i_mode &= ~0222; 243 } 244 245 z->z_flags &= ~ZONEFS_ZONE_INIT_MODE; 246 z->z_mode = inode->i_mode; 247 } 248 249 static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, 250 void *data) 251 { 252 struct blk_zone *z = data; 253 254 *z = *zone; 255 return 0; 256 } 257 258 static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone, 259 bool write) 260 { 261 struct zonefs_zone *z = zonefs_inode_zone(inode); 262 struct super_block *sb = inode->i_sb; 263 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 264 loff_t isize, data_size; 265 266 /* 267 * Check the zone condition: if the zone is not "bad" (offline or 268 * read-only), read errors are simply signaled to the IO issuer as long 269 * as there is no inconsistency between the inode size and the amount of 270 * data writen in the zone (data_size). 271 */ 272 data_size = zonefs_check_zone_condition(sb, z, zone); 273 isize = i_size_read(inode); 274 if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && 275 !write && isize == data_size) 276 return; 277 278 /* 279 * At this point, we detected either a bad zone or an inconsistency 280 * between the inode size and the amount of data written in the zone. 281 * For the latter case, the cause may be a write IO error or an external 282 * action on the device. Two error patterns exist: 283 * 1) The inode size is lower than the amount of data in the zone: 284 * a write operation partially failed and data was writen at the end 285 * of the file. This can happen in the case of a large direct IO 286 * needing several BIOs and/or write requests to be processed. 287 * 2) The inode size is larger than the amount of data in the zone: 288 * this can happen with a deferred write error with the use of the 289 * device side write cache after getting successful write IO 290 * completions. Other possibilities are (a) an external corruption, 291 * e.g. an application reset the zone directly, or (b) the device 292 * has a serious problem (e.g. firmware bug). 293 * 294 * In all cases, warn about inode size inconsistency and handle the 295 * IO error according to the zone condition and to the mount options. 296 */ 297 if (isize != data_size) 298 zonefs_warn(sb, 299 "inode %lu: invalid size %lld (should be %lld)\n", 300 inode->i_ino, isize, data_size); 301 302 /* 303 * First handle bad zones signaled by hardware. The mount options 304 * errors=zone-ro and errors=zone-offline result in changing the 305 * zone condition to read-only and offline respectively, as if the 306 * condition was signaled by the hardware. 307 */ 308 if ((z->z_flags & ZONEFS_ZONE_OFFLINE) || 309 (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) { 310 zonefs_warn(sb, "inode %lu: read/write access disabled\n", 311 inode->i_ino); 312 if (!(z->z_flags & ZONEFS_ZONE_OFFLINE)) 313 z->z_flags |= ZONEFS_ZONE_OFFLINE; 314 zonefs_inode_update_mode(inode); 315 data_size = 0; 316 } else if ((z->z_flags & ZONEFS_ZONE_READONLY) || 317 (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) { 318 zonefs_warn(sb, "inode %lu: write access disabled\n", 319 inode->i_ino); 320 if (!(z->z_flags & ZONEFS_ZONE_READONLY)) 321 z->z_flags |= ZONEFS_ZONE_READONLY; 322 zonefs_inode_update_mode(inode); 323 data_size = isize; 324 } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO && 325 data_size > isize) { 326 /* Do not expose garbage data */ 327 data_size = isize; 328 } 329 330 /* 331 * If the filesystem is mounted with the explicit-open mount option, we 332 * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to 333 * the read-only or offline condition, to avoid attempting an explicit 334 * close of the zone when the inode file is closed. 335 */ 336 if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) && 337 (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE))) 338 z->z_flags &= ~ZONEFS_ZONE_OPEN; 339 340 /* 341 * If error=remount-ro was specified, any error result in remounting 342 * the volume as read-only. 343 */ 344 if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) { 345 zonefs_warn(sb, "remounting filesystem read-only\n"); 346 sb->s_flags |= SB_RDONLY; 347 } 348 349 /* 350 * Update block usage stats and the inode size to prevent access to 351 * invalid data. 352 */ 353 zonefs_update_stats(inode, data_size); 354 zonefs_i_size_write(inode, data_size); 355 z->z_wpoffset = data_size; 356 zonefs_inode_account_active(inode); 357 } 358 359 /* 360 * When an file IO error occurs, check the file zone to see if there is a change 361 * in the zone condition (e.g. offline or read-only). For a failed write to a 362 * sequential zone, the zone write pointer position must also be checked to 363 * eventually correct the file size and zonefs inode write pointer offset 364 * (which can be out of sync with the drive due to partial write failures). 365 */ 366 void __zonefs_io_error(struct inode *inode, bool write) 367 { 368 struct zonefs_zone *z = zonefs_inode_zone(inode); 369 struct super_block *sb = inode->i_sb; 370 unsigned int noio_flag; 371 struct blk_zone zone; 372 int ret; 373 374 /* 375 * Conventional zone have no write pointer and cannot become read-only 376 * or offline. So simply fake a report for a single or aggregated zone 377 * and let zonefs_handle_io_error() correct the zone inode information 378 * according to the mount options. 379 */ 380 if (!zonefs_zone_is_seq(z)) { 381 zone.start = z->z_sector; 382 zone.len = z->z_size >> SECTOR_SHIFT; 383 zone.wp = zone.start + zone.len; 384 zone.type = BLK_ZONE_TYPE_CONVENTIONAL; 385 zone.cond = BLK_ZONE_COND_NOT_WP; 386 zone.capacity = zone.len; 387 goto handle_io_error; 388 } 389 390 /* 391 * Memory allocations in blkdev_report_zones() can trigger a memory 392 * reclaim which may in turn cause a recursion into zonefs as well as 393 * struct request allocations for the same device. The former case may 394 * end up in a deadlock on the inode truncate mutex, while the latter 395 * may prevent IO forward progress. Executing the report zones under 396 * the GFP_NOIO context avoids both problems. 397 */ 398 noio_flag = memalloc_noio_save(); 399 ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1, 400 zonefs_io_error_cb, &zone); 401 memalloc_noio_restore(noio_flag); 402 403 if (ret != 1) { 404 zonefs_err(sb, "Get inode %lu zone information failed %d\n", 405 inode->i_ino, ret); 406 zonefs_warn(sb, "remounting filesystem read-only\n"); 407 sb->s_flags |= SB_RDONLY; 408 return; 409 } 410 411 handle_io_error: 412 zonefs_handle_io_error(inode, &zone, write); 413 } 414 415 static struct kmem_cache *zonefs_inode_cachep; 416 417 static struct inode *zonefs_alloc_inode(struct super_block *sb) 418 { 419 struct zonefs_inode_info *zi; 420 421 zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL); 422 if (!zi) 423 return NULL; 424 425 inode_init_once(&zi->i_vnode); 426 mutex_init(&zi->i_truncate_mutex); 427 zi->i_wr_refcnt = 0; 428 429 return &zi->i_vnode; 430 } 431 432 static void zonefs_free_inode(struct inode *inode) 433 { 434 kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode)); 435 } 436 437 /* 438 * File system stat. 439 */ 440 static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) 441 { 442 struct super_block *sb = dentry->d_sb; 443 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 444 enum zonefs_ztype t; 445 446 buf->f_type = ZONEFS_MAGIC; 447 buf->f_bsize = sb->s_blocksize; 448 buf->f_namelen = ZONEFS_NAME_MAX; 449 450 spin_lock(&sbi->s_lock); 451 452 buf->f_blocks = sbi->s_blocks; 453 if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks)) 454 buf->f_bfree = 0; 455 else 456 buf->f_bfree = buf->f_blocks - sbi->s_used_blocks; 457 buf->f_bavail = buf->f_bfree; 458 459 for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { 460 if (sbi->s_zgroup[t].g_nr_zones) 461 buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1; 462 } 463 buf->f_ffree = 0; 464 465 spin_unlock(&sbi->s_lock); 466 467 buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b); 468 469 return 0; 470 } 471 472 enum { 473 Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair, 474 Opt_explicit_open, Opt_err, 475 }; 476 477 static const match_table_t tokens = { 478 { Opt_errors_ro, "errors=remount-ro"}, 479 { Opt_errors_zro, "errors=zone-ro"}, 480 { Opt_errors_zol, "errors=zone-offline"}, 481 { Opt_errors_repair, "errors=repair"}, 482 { Opt_explicit_open, "explicit-open" }, 483 { Opt_err, NULL} 484 }; 485 486 static int zonefs_parse_options(struct super_block *sb, char *options) 487 { 488 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 489 substring_t args[MAX_OPT_ARGS]; 490 char *p; 491 492 if (!options) 493 return 0; 494 495 while ((p = strsep(&options, ",")) != NULL) { 496 int token; 497 498 if (!*p) 499 continue; 500 501 token = match_token(p, tokens, args); 502 switch (token) { 503 case Opt_errors_ro: 504 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; 505 sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO; 506 break; 507 case Opt_errors_zro: 508 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; 509 sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO; 510 break; 511 case Opt_errors_zol: 512 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; 513 sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL; 514 break; 515 case Opt_errors_repair: 516 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; 517 sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR; 518 break; 519 case Opt_explicit_open: 520 sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN; 521 break; 522 default: 523 return -EINVAL; 524 } 525 } 526 527 return 0; 528 } 529 530 static int zonefs_show_options(struct seq_file *seq, struct dentry *root) 531 { 532 struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb); 533 534 if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) 535 seq_puts(seq, ",errors=remount-ro"); 536 if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) 537 seq_puts(seq, ",errors=zone-ro"); 538 if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) 539 seq_puts(seq, ",errors=zone-offline"); 540 if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR) 541 seq_puts(seq, ",errors=repair"); 542 543 return 0; 544 } 545 546 static int zonefs_remount(struct super_block *sb, int *flags, char *data) 547 { 548 sync_filesystem(sb); 549 550 return zonefs_parse_options(sb, data); 551 } 552 553 static int zonefs_inode_setattr(struct mnt_idmap *idmap, 554 struct dentry *dentry, struct iattr *iattr) 555 { 556 struct inode *inode = d_inode(dentry); 557 int ret; 558 559 if (unlikely(IS_IMMUTABLE(inode))) 560 return -EPERM; 561 562 ret = setattr_prepare(&nop_mnt_idmap, dentry, iattr); 563 if (ret) 564 return ret; 565 566 /* 567 * Since files and directories cannot be created nor deleted, do not 568 * allow setting any write attributes on the sub-directories grouping 569 * files by zone type. 570 */ 571 if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) && 572 (iattr->ia_mode & 0222)) 573 return -EPERM; 574 575 if (((iattr->ia_valid & ATTR_UID) && 576 !uid_eq(iattr->ia_uid, inode->i_uid)) || 577 ((iattr->ia_valid & ATTR_GID) && 578 !gid_eq(iattr->ia_gid, inode->i_gid))) { 579 ret = dquot_transfer(&nop_mnt_idmap, inode, iattr); 580 if (ret) 581 return ret; 582 } 583 584 if (iattr->ia_valid & ATTR_SIZE) { 585 ret = zonefs_file_truncate(inode, iattr->ia_size); 586 if (ret) 587 return ret; 588 } 589 590 setattr_copy(&nop_mnt_idmap, inode, iattr); 591 592 if (S_ISREG(inode->i_mode)) { 593 struct zonefs_zone *z = zonefs_inode_zone(inode); 594 595 z->z_mode = inode->i_mode; 596 z->z_uid = inode->i_uid; 597 z->z_gid = inode->i_gid; 598 } 599 600 return 0; 601 } 602 603 static const struct inode_operations zonefs_file_inode_operations = { 604 .setattr = zonefs_inode_setattr, 605 }; 606 607 static long zonefs_fname_to_fno(const struct qstr *fname) 608 { 609 const char *name = fname->name; 610 unsigned int len = fname->len; 611 long fno = 0, shift = 1; 612 const char *rname; 613 char c = *name; 614 unsigned int i; 615 616 /* 617 * File names are always a base-10 number string without any 618 * leading 0s. 619 */ 620 if (!isdigit(c)) 621 return -ENOENT; 622 623 if (len > 1 && c == '0') 624 return -ENOENT; 625 626 if (len == 1) 627 return c - '0'; 628 629 for (i = 0, rname = name + len - 1; i < len; i++, rname--) { 630 c = *rname; 631 if (!isdigit(c)) 632 return -ENOENT; 633 fno += (c - '0') * shift; 634 shift *= 10; 635 } 636 637 return fno; 638 } 639 640 static struct inode *zonefs_get_file_inode(struct inode *dir, 641 struct dentry *dentry) 642 { 643 struct zonefs_zone_group *zgroup = dir->i_private; 644 struct super_block *sb = dir->i_sb; 645 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 646 struct zonefs_zone *z; 647 struct inode *inode; 648 ino_t ino; 649 long fno; 650 651 /* Get the file number from the file name */ 652 fno = zonefs_fname_to_fno(&dentry->d_name); 653 if (fno < 0) 654 return ERR_PTR(fno); 655 656 if (!zgroup->g_nr_zones || fno >= zgroup->g_nr_zones) 657 return ERR_PTR(-ENOENT); 658 659 z = &zgroup->g_zones[fno]; 660 ino = z->z_sector >> sbi->s_zone_sectors_shift; 661 inode = iget_locked(sb, ino); 662 if (!inode) 663 return ERR_PTR(-ENOMEM); 664 if (!(inode->i_state & I_NEW)) { 665 WARN_ON_ONCE(inode->i_private != z); 666 return inode; 667 } 668 669 inode->i_ino = ino; 670 inode->i_mode = z->z_mode; 671 inode_set_mtime_to_ts(inode, 672 inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, inode_get_ctime(dir)))); 673 inode->i_uid = z->z_uid; 674 inode->i_gid = z->z_gid; 675 inode->i_size = z->z_wpoffset; 676 inode->i_blocks = z->z_capacity >> SECTOR_SHIFT; 677 inode->i_private = z; 678 679 inode->i_op = &zonefs_file_inode_operations; 680 inode->i_fop = &zonefs_file_operations; 681 inode->i_mapping->a_ops = &zonefs_file_aops; 682 683 /* Update the inode access rights depending on the zone condition */ 684 zonefs_inode_update_mode(inode); 685 686 unlock_new_inode(inode); 687 688 return inode; 689 } 690 691 static struct inode *zonefs_get_zgroup_inode(struct super_block *sb, 692 enum zonefs_ztype ztype) 693 { 694 struct inode *root = d_inode(sb->s_root); 695 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 696 struct inode *inode; 697 ino_t ino = bdev_nr_zones(sb->s_bdev) + ztype + 1; 698 699 inode = iget_locked(sb, ino); 700 if (!inode) 701 return ERR_PTR(-ENOMEM); 702 if (!(inode->i_state & I_NEW)) 703 return inode; 704 705 inode->i_ino = ino; 706 inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555); 707 inode->i_size = sbi->s_zgroup[ztype].g_nr_zones; 708 inode_set_mtime_to_ts(inode, 709 inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, inode_get_ctime(root)))); 710 inode->i_private = &sbi->s_zgroup[ztype]; 711 set_nlink(inode, 2); 712 713 inode->i_op = &zonefs_dir_inode_operations; 714 inode->i_fop = &zonefs_dir_operations; 715 716 unlock_new_inode(inode); 717 718 return inode; 719 } 720 721 722 static struct inode *zonefs_get_dir_inode(struct inode *dir, 723 struct dentry *dentry) 724 { 725 struct super_block *sb = dir->i_sb; 726 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 727 const char *name = dentry->d_name.name; 728 enum zonefs_ztype ztype; 729 730 /* 731 * We only need to check for the "seq" directory and 732 * the "cnv" directory if we have conventional zones. 733 */ 734 if (dentry->d_name.len != 3) 735 return ERR_PTR(-ENOENT); 736 737 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 738 if (sbi->s_zgroup[ztype].g_nr_zones && 739 memcmp(name, zonefs_zgroup_name(ztype), 3) == 0) 740 break; 741 } 742 if (ztype == ZONEFS_ZTYPE_MAX) 743 return ERR_PTR(-ENOENT); 744 745 return zonefs_get_zgroup_inode(sb, ztype); 746 } 747 748 static struct dentry *zonefs_lookup(struct inode *dir, struct dentry *dentry, 749 unsigned int flags) 750 { 751 struct inode *inode; 752 753 if (dentry->d_name.len > ZONEFS_NAME_MAX) 754 return ERR_PTR(-ENAMETOOLONG); 755 756 if (dir == d_inode(dir->i_sb->s_root)) 757 inode = zonefs_get_dir_inode(dir, dentry); 758 else 759 inode = zonefs_get_file_inode(dir, dentry); 760 761 return d_splice_alias(inode, dentry); 762 } 763 764 static int zonefs_readdir_root(struct file *file, struct dir_context *ctx) 765 { 766 struct inode *inode = file_inode(file); 767 struct super_block *sb = inode->i_sb; 768 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 769 enum zonefs_ztype ztype = ZONEFS_ZTYPE_CNV; 770 ino_t base_ino = bdev_nr_zones(sb->s_bdev) + 1; 771 772 if (ctx->pos >= inode->i_size) 773 return 0; 774 775 if (!dir_emit_dots(file, ctx)) 776 return 0; 777 778 if (ctx->pos == 2) { 779 if (!sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) 780 ztype = ZONEFS_ZTYPE_SEQ; 781 782 if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3, 783 base_ino + ztype, DT_DIR)) 784 return 0; 785 ctx->pos++; 786 } 787 788 if (ctx->pos == 3 && ztype != ZONEFS_ZTYPE_SEQ) { 789 ztype = ZONEFS_ZTYPE_SEQ; 790 if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3, 791 base_ino + ztype, DT_DIR)) 792 return 0; 793 ctx->pos++; 794 } 795 796 return 0; 797 } 798 799 static int zonefs_readdir_zgroup(struct file *file, 800 struct dir_context *ctx) 801 { 802 struct inode *inode = file_inode(file); 803 struct zonefs_zone_group *zgroup = inode->i_private; 804 struct super_block *sb = inode->i_sb; 805 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 806 struct zonefs_zone *z; 807 int fname_len; 808 char *fname; 809 ino_t ino; 810 int f; 811 812 /* 813 * The size of zone group directories is equal to the number 814 * of zone files in the group and does note include the "." and 815 * ".." entries. Hence the "+ 2" here. 816 */ 817 if (ctx->pos >= inode->i_size + 2) 818 return 0; 819 820 if (!dir_emit_dots(file, ctx)) 821 return 0; 822 823 fname = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL); 824 if (!fname) 825 return -ENOMEM; 826 827 for (f = ctx->pos - 2; f < zgroup->g_nr_zones; f++) { 828 z = &zgroup->g_zones[f]; 829 ino = z->z_sector >> sbi->s_zone_sectors_shift; 830 fname_len = snprintf(fname, ZONEFS_NAME_MAX - 1, "%u", f); 831 if (!dir_emit(ctx, fname, fname_len, ino, DT_REG)) 832 break; 833 ctx->pos++; 834 } 835 836 kfree(fname); 837 838 return 0; 839 } 840 841 static int zonefs_readdir(struct file *file, struct dir_context *ctx) 842 { 843 struct inode *inode = file_inode(file); 844 845 if (inode == d_inode(inode->i_sb->s_root)) 846 return zonefs_readdir_root(file, ctx); 847 848 return zonefs_readdir_zgroup(file, ctx); 849 } 850 851 const struct inode_operations zonefs_dir_inode_operations = { 852 .lookup = zonefs_lookup, 853 .setattr = zonefs_inode_setattr, 854 }; 855 856 const struct file_operations zonefs_dir_operations = { 857 .llseek = generic_file_llseek, 858 .read = generic_read_dir, 859 .iterate_shared = zonefs_readdir, 860 }; 861 862 struct zonefs_zone_data { 863 struct super_block *sb; 864 unsigned int nr_zones[ZONEFS_ZTYPE_MAX]; 865 sector_t cnv_zone_start; 866 struct blk_zone *zones; 867 }; 868 869 static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx, 870 void *data) 871 { 872 struct zonefs_zone_data *zd = data; 873 struct super_block *sb = zd->sb; 874 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 875 876 /* 877 * We do not care about the first zone: it contains the super block 878 * and not exposed as a file. 879 */ 880 if (!idx) 881 return 0; 882 883 /* 884 * Count the number of zones that will be exposed as files. 885 * For sequential zones, we always have as many files as zones. 886 * FOr conventional zones, the number of files depends on if we have 887 * conventional zones aggregation enabled. 888 */ 889 switch (zone->type) { 890 case BLK_ZONE_TYPE_CONVENTIONAL: 891 if (sbi->s_features & ZONEFS_F_AGGRCNV) { 892 /* One file per set of contiguous conventional zones */ 893 if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) || 894 zone->start != zd->cnv_zone_start) 895 sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; 896 zd->cnv_zone_start = zone->start + zone->len; 897 } else { 898 /* One file per zone */ 899 sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; 900 } 901 break; 902 case BLK_ZONE_TYPE_SEQWRITE_REQ: 903 case BLK_ZONE_TYPE_SEQWRITE_PREF: 904 sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++; 905 break; 906 default: 907 zonefs_err(zd->sb, "Unsupported zone type 0x%x\n", 908 zone->type); 909 return -EIO; 910 } 911 912 memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone)); 913 914 return 0; 915 } 916 917 static int zonefs_get_zone_info(struct zonefs_zone_data *zd) 918 { 919 struct block_device *bdev = zd->sb->s_bdev; 920 int ret; 921 922 zd->zones = kvcalloc(bdev_nr_zones(bdev), sizeof(struct blk_zone), 923 GFP_KERNEL); 924 if (!zd->zones) 925 return -ENOMEM; 926 927 /* Get zones information from the device */ 928 ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, 929 zonefs_get_zone_info_cb, zd); 930 if (ret < 0) { 931 zonefs_err(zd->sb, "Zone report failed %d\n", ret); 932 return ret; 933 } 934 935 if (ret != bdev_nr_zones(bdev)) { 936 zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n", 937 ret, bdev_nr_zones(bdev)); 938 return -EIO; 939 } 940 941 return 0; 942 } 943 944 static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd) 945 { 946 kvfree(zd->zones); 947 } 948 949 /* 950 * Create a zone group and populate it with zone files. 951 */ 952 static int zonefs_init_zgroup(struct super_block *sb, 953 struct zonefs_zone_data *zd, 954 enum zonefs_ztype ztype) 955 { 956 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 957 struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype]; 958 struct blk_zone *zone, *next, *end; 959 struct zonefs_zone *z; 960 unsigned int n = 0; 961 int ret; 962 963 /* Allocate the zone group. If it is empty, we have nothing to do. */ 964 if (!zgroup->g_nr_zones) 965 return 0; 966 967 zgroup->g_zones = kvcalloc(zgroup->g_nr_zones, 968 sizeof(struct zonefs_zone), GFP_KERNEL); 969 if (!zgroup->g_zones) 970 return -ENOMEM; 971 972 /* 973 * Initialize the zone groups using the device zone information. 974 * We always skip the first zone as it contains the super block 975 * and is not use to back a file. 976 */ 977 end = zd->zones + bdev_nr_zones(sb->s_bdev); 978 for (zone = &zd->zones[1]; zone < end; zone = next) { 979 980 next = zone + 1; 981 if (zonefs_zone_type(zone) != ztype) 982 continue; 983 984 if (WARN_ON_ONCE(n >= zgroup->g_nr_zones)) 985 return -EINVAL; 986 987 /* 988 * For conventional zones, contiguous zones can be aggregated 989 * together to form larger files. Note that this overwrites the 990 * length of the first zone of the set of contiguous zones 991 * aggregated together. If one offline or read-only zone is 992 * found, assume that all zones aggregated have the same 993 * condition. 994 */ 995 if (ztype == ZONEFS_ZTYPE_CNV && 996 (sbi->s_features & ZONEFS_F_AGGRCNV)) { 997 for (; next < end; next++) { 998 if (zonefs_zone_type(next) != ztype) 999 break; 1000 zone->len += next->len; 1001 zone->capacity += next->capacity; 1002 if (next->cond == BLK_ZONE_COND_READONLY && 1003 zone->cond != BLK_ZONE_COND_OFFLINE) 1004 zone->cond = BLK_ZONE_COND_READONLY; 1005 else if (next->cond == BLK_ZONE_COND_OFFLINE) 1006 zone->cond = BLK_ZONE_COND_OFFLINE; 1007 } 1008 } 1009 1010 z = &zgroup->g_zones[n]; 1011 if (ztype == ZONEFS_ZTYPE_CNV) 1012 z->z_flags |= ZONEFS_ZONE_CNV; 1013 z->z_sector = zone->start; 1014 z->z_size = zone->len << SECTOR_SHIFT; 1015 if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && 1016 !(sbi->s_features & ZONEFS_F_AGGRCNV)) { 1017 zonefs_err(sb, 1018 "Invalid zone size %llu (device zone sectors %llu)\n", 1019 z->z_size, 1020 bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); 1021 return -EINVAL; 1022 } 1023 1024 z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE, 1025 zone->capacity << SECTOR_SHIFT); 1026 z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone); 1027 1028 z->z_mode = S_IFREG | sbi->s_perm; 1029 z->z_uid = sbi->s_uid; 1030 z->z_gid = sbi->s_gid; 1031 1032 /* 1033 * Let zonefs_inode_update_mode() know that we will need 1034 * special initialization of the inode mode the first time 1035 * it is accessed. 1036 */ 1037 z->z_flags |= ZONEFS_ZONE_INIT_MODE; 1038 1039 sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes); 1040 sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits; 1041 sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits; 1042 1043 /* 1044 * For sequential zones, make sure that any open zone is closed 1045 * first to ensure that the initial number of open zones is 0, 1046 * in sync with the open zone accounting done when the mount 1047 * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used. 1048 */ 1049 if (ztype == ZONEFS_ZTYPE_SEQ && 1050 (zone->cond == BLK_ZONE_COND_IMP_OPEN || 1051 zone->cond == BLK_ZONE_COND_EXP_OPEN)) { 1052 ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE); 1053 if (ret) 1054 return ret; 1055 } 1056 1057 zonefs_account_active(sb, z); 1058 1059 n++; 1060 } 1061 1062 if (WARN_ON_ONCE(n != zgroup->g_nr_zones)) 1063 return -EINVAL; 1064 1065 zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", 1066 zonefs_zgroup_name(ztype), 1067 zgroup->g_nr_zones, 1068 zgroup->g_nr_zones > 1 ? "s" : ""); 1069 1070 return 0; 1071 } 1072 1073 static void zonefs_free_zgroups(struct super_block *sb) 1074 { 1075 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1076 enum zonefs_ztype ztype; 1077 1078 if (!sbi) 1079 return; 1080 1081 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1082 kvfree(sbi->s_zgroup[ztype].g_zones); 1083 sbi->s_zgroup[ztype].g_zones = NULL; 1084 } 1085 } 1086 1087 /* 1088 * Create a zone group and populate it with zone files. 1089 */ 1090 static int zonefs_init_zgroups(struct super_block *sb) 1091 { 1092 struct zonefs_zone_data zd; 1093 enum zonefs_ztype ztype; 1094 int ret; 1095 1096 /* First get the device zone information */ 1097 memset(&zd, 0, sizeof(struct zonefs_zone_data)); 1098 zd.sb = sb; 1099 ret = zonefs_get_zone_info(&zd); 1100 if (ret) 1101 goto cleanup; 1102 1103 /* Allocate and initialize the zone groups */ 1104 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1105 ret = zonefs_init_zgroup(sb, &zd, ztype); 1106 if (ret) { 1107 zonefs_info(sb, 1108 "Zone group \"%s\" initialization failed\n", 1109 zonefs_zgroup_name(ztype)); 1110 break; 1111 } 1112 } 1113 1114 cleanup: 1115 zonefs_free_zone_info(&zd); 1116 if (ret) 1117 zonefs_free_zgroups(sb); 1118 1119 return ret; 1120 } 1121 1122 /* 1123 * Read super block information from the device. 1124 */ 1125 static int zonefs_read_super(struct super_block *sb) 1126 { 1127 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1128 struct zonefs_super *super; 1129 u32 crc, stored_crc; 1130 struct page *page; 1131 struct bio_vec bio_vec; 1132 struct bio bio; 1133 int ret; 1134 1135 page = alloc_page(GFP_KERNEL); 1136 if (!page) 1137 return -ENOMEM; 1138 1139 bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ); 1140 bio.bi_iter.bi_sector = 0; 1141 __bio_add_page(&bio, page, PAGE_SIZE, 0); 1142 1143 ret = submit_bio_wait(&bio); 1144 if (ret) 1145 goto free_page; 1146 1147 super = page_address(page); 1148 1149 ret = -EINVAL; 1150 if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC) 1151 goto free_page; 1152 1153 stored_crc = le32_to_cpu(super->s_crc); 1154 super->s_crc = 0; 1155 crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super)); 1156 if (crc != stored_crc) { 1157 zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)", 1158 crc, stored_crc); 1159 goto free_page; 1160 } 1161 1162 sbi->s_features = le64_to_cpu(super->s_features); 1163 if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) { 1164 zonefs_err(sb, "Unknown features set 0x%llx\n", 1165 sbi->s_features); 1166 goto free_page; 1167 } 1168 1169 if (sbi->s_features & ZONEFS_F_UID) { 1170 sbi->s_uid = make_kuid(current_user_ns(), 1171 le32_to_cpu(super->s_uid)); 1172 if (!uid_valid(sbi->s_uid)) { 1173 zonefs_err(sb, "Invalid UID feature\n"); 1174 goto free_page; 1175 } 1176 } 1177 1178 if (sbi->s_features & ZONEFS_F_GID) { 1179 sbi->s_gid = make_kgid(current_user_ns(), 1180 le32_to_cpu(super->s_gid)); 1181 if (!gid_valid(sbi->s_gid)) { 1182 zonefs_err(sb, "Invalid GID feature\n"); 1183 goto free_page; 1184 } 1185 } 1186 1187 if (sbi->s_features & ZONEFS_F_PERM) 1188 sbi->s_perm = le32_to_cpu(super->s_perm); 1189 1190 if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) { 1191 zonefs_err(sb, "Reserved area is being used\n"); 1192 goto free_page; 1193 } 1194 1195 import_uuid(&sbi->s_uuid, super->s_uuid); 1196 ret = 0; 1197 1198 free_page: 1199 __free_page(page); 1200 1201 return ret; 1202 } 1203 1204 static const struct super_operations zonefs_sops = { 1205 .alloc_inode = zonefs_alloc_inode, 1206 .free_inode = zonefs_free_inode, 1207 .statfs = zonefs_statfs, 1208 .remount_fs = zonefs_remount, 1209 .show_options = zonefs_show_options, 1210 }; 1211 1212 static int zonefs_get_zgroup_inodes(struct super_block *sb) 1213 { 1214 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1215 struct inode *dir_inode; 1216 enum zonefs_ztype ztype; 1217 1218 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1219 if (!sbi->s_zgroup[ztype].g_nr_zones) 1220 continue; 1221 1222 dir_inode = zonefs_get_zgroup_inode(sb, ztype); 1223 if (IS_ERR(dir_inode)) 1224 return PTR_ERR(dir_inode); 1225 1226 sbi->s_zgroup[ztype].g_inode = dir_inode; 1227 } 1228 1229 return 0; 1230 } 1231 1232 static void zonefs_release_zgroup_inodes(struct super_block *sb) 1233 { 1234 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1235 enum zonefs_ztype ztype; 1236 1237 if (!sbi) 1238 return; 1239 1240 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1241 if (sbi->s_zgroup[ztype].g_inode) { 1242 iput(sbi->s_zgroup[ztype].g_inode); 1243 sbi->s_zgroup[ztype].g_inode = NULL; 1244 } 1245 } 1246 } 1247 1248 /* 1249 * Check that the device is zoned. If it is, get the list of zones and create 1250 * sub-directories and files according to the device zone configuration and 1251 * format options. 1252 */ 1253 static int zonefs_fill_super(struct super_block *sb, void *data, int silent) 1254 { 1255 struct zonefs_sb_info *sbi; 1256 struct inode *inode; 1257 enum zonefs_ztype ztype; 1258 int ret; 1259 1260 if (!bdev_is_zoned(sb->s_bdev)) { 1261 zonefs_err(sb, "Not a zoned block device\n"); 1262 return -EINVAL; 1263 } 1264 1265 /* 1266 * Initialize super block information: the maximum file size is updated 1267 * when the zone files are created so that the format option 1268 * ZONEFS_F_AGGRCNV which increases the maximum file size of a file 1269 * beyond the zone size is taken into account. 1270 */ 1271 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1272 if (!sbi) 1273 return -ENOMEM; 1274 1275 spin_lock_init(&sbi->s_lock); 1276 sb->s_fs_info = sbi; 1277 sb->s_magic = ZONEFS_MAGIC; 1278 sb->s_maxbytes = 0; 1279 sb->s_op = &zonefs_sops; 1280 sb->s_time_gran = 1; 1281 1282 /* 1283 * The block size is set to the device zone write granularity to ensure 1284 * that write operations are always aligned according to the device 1285 * interface constraints. 1286 */ 1287 sb_set_blocksize(sb, bdev_zone_write_granularity(sb->s_bdev)); 1288 sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev)); 1289 sbi->s_uid = GLOBAL_ROOT_UID; 1290 sbi->s_gid = GLOBAL_ROOT_GID; 1291 sbi->s_perm = 0640; 1292 sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO; 1293 1294 atomic_set(&sbi->s_wro_seq_files, 0); 1295 sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev); 1296 atomic_set(&sbi->s_active_seq_files, 0); 1297 sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev); 1298 1299 ret = zonefs_read_super(sb); 1300 if (ret) 1301 return ret; 1302 1303 ret = zonefs_parse_options(sb, data); 1304 if (ret) 1305 return ret; 1306 1307 zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev)); 1308 1309 if (!sbi->s_max_wro_seq_files && 1310 !sbi->s_max_active_seq_files && 1311 sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { 1312 zonefs_info(sb, 1313 "No open and active zone limits. Ignoring explicit_open mount option\n"); 1314 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; 1315 } 1316 1317 /* Initialize the zone groups */ 1318 ret = zonefs_init_zgroups(sb); 1319 if (ret) 1320 goto cleanup; 1321 1322 /* Create the root directory inode */ 1323 ret = -ENOMEM; 1324 inode = new_inode(sb); 1325 if (!inode) 1326 goto cleanup; 1327 1328 inode->i_ino = bdev_nr_zones(sb->s_bdev); 1329 inode->i_mode = S_IFDIR | 0555; 1330 simple_inode_init_ts(inode); 1331 inode->i_op = &zonefs_dir_inode_operations; 1332 inode->i_fop = &zonefs_dir_operations; 1333 inode->i_size = 2; 1334 set_nlink(inode, 2); 1335 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1336 if (sbi->s_zgroup[ztype].g_nr_zones) { 1337 inc_nlink(inode); 1338 inode->i_size++; 1339 } 1340 } 1341 1342 sb->s_root = d_make_root(inode); 1343 if (!sb->s_root) 1344 goto cleanup; 1345 1346 /* 1347 * Take a reference on the zone groups directory inodes 1348 * to keep them in the inode cache. 1349 */ 1350 ret = zonefs_get_zgroup_inodes(sb); 1351 if (ret) 1352 goto cleanup; 1353 1354 ret = zonefs_sysfs_register(sb); 1355 if (ret) 1356 goto cleanup; 1357 1358 return 0; 1359 1360 cleanup: 1361 zonefs_release_zgroup_inodes(sb); 1362 zonefs_free_zgroups(sb); 1363 1364 return ret; 1365 } 1366 1367 static struct dentry *zonefs_mount(struct file_system_type *fs_type, 1368 int flags, const char *dev_name, void *data) 1369 { 1370 return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super); 1371 } 1372 1373 static void zonefs_kill_super(struct super_block *sb) 1374 { 1375 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1376 1377 /* Release the reference on the zone group directory inodes */ 1378 zonefs_release_zgroup_inodes(sb); 1379 1380 kill_block_super(sb); 1381 1382 zonefs_sysfs_unregister(sb); 1383 zonefs_free_zgroups(sb); 1384 kfree(sbi); 1385 } 1386 1387 /* 1388 * File system definition and registration. 1389 */ 1390 static struct file_system_type zonefs_type = { 1391 .owner = THIS_MODULE, 1392 .name = "zonefs", 1393 .mount = zonefs_mount, 1394 .kill_sb = zonefs_kill_super, 1395 .fs_flags = FS_REQUIRES_DEV, 1396 }; 1397 1398 static int __init zonefs_init_inodecache(void) 1399 { 1400 zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache", 1401 sizeof(struct zonefs_inode_info), 0, 1402 (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), 1403 NULL); 1404 if (zonefs_inode_cachep == NULL) 1405 return -ENOMEM; 1406 return 0; 1407 } 1408 1409 static void zonefs_destroy_inodecache(void) 1410 { 1411 /* 1412 * Make sure all delayed rcu free inodes are flushed before we 1413 * destroy the inode cache. 1414 */ 1415 rcu_barrier(); 1416 kmem_cache_destroy(zonefs_inode_cachep); 1417 } 1418 1419 static int __init zonefs_init(void) 1420 { 1421 int ret; 1422 1423 BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE); 1424 1425 ret = zonefs_init_inodecache(); 1426 if (ret) 1427 return ret; 1428 1429 ret = zonefs_sysfs_init(); 1430 if (ret) 1431 goto destroy_inodecache; 1432 1433 ret = register_filesystem(&zonefs_type); 1434 if (ret) 1435 goto sysfs_exit; 1436 1437 return 0; 1438 1439 sysfs_exit: 1440 zonefs_sysfs_exit(); 1441 destroy_inodecache: 1442 zonefs_destroy_inodecache(); 1443 1444 return ret; 1445 } 1446 1447 static void __exit zonefs_exit(void) 1448 { 1449 unregister_filesystem(&zonefs_type); 1450 zonefs_sysfs_exit(); 1451 zonefs_destroy_inodecache(); 1452 } 1453 1454 MODULE_AUTHOR("Damien Le Moal"); 1455 MODULE_DESCRIPTION("Zone file system for zoned block devices"); 1456 MODULE_LICENSE("GPL"); 1457 MODULE_ALIAS_FS("zonefs"); 1458 module_init(zonefs_init); 1459 module_exit(zonefs_exit); 1460