1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2025, Christoph Hellwig. 4 * Copyright (c) 2025, Western Digital Corporation or its affiliates. 5 * 6 * Zoned Loop Device driver - exports a zoned block device using one file per 7 * zone as backing storage. 8 */ 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/module.h> 12 #include <linux/blk-mq.h> 13 #include <linux/blkzoned.h> 14 #include <linux/pagemap.h> 15 #include <linux/miscdevice.h> 16 #include <linux/falloc.h> 17 #include <linux/mutex.h> 18 #include <linux/parser.h> 19 #include <linux/seq_file.h> 20 21 /* 22 * Options for adding (and removing) a device. 23 */ 24 enum { 25 ZLOOP_OPT_ERR = 0, 26 ZLOOP_OPT_ID = (1 << 0), 27 ZLOOP_OPT_CAPACITY = (1 << 1), 28 ZLOOP_OPT_ZONE_SIZE = (1 << 2), 29 ZLOOP_OPT_ZONE_CAPACITY = (1 << 3), 30 ZLOOP_OPT_NR_CONV_ZONES = (1 << 4), 31 ZLOOP_OPT_BASE_DIR = (1 << 5), 32 ZLOOP_OPT_NR_QUEUES = (1 << 6), 33 ZLOOP_OPT_QUEUE_DEPTH = (1 << 7), 34 ZLOOP_OPT_BUFFERED_IO = (1 << 8), 35 ZLOOP_OPT_ZONE_APPEND = (1 << 9), 36 ZLOOP_OPT_ORDERED_ZONE_APPEND = (1 << 10), 37 }; 38 39 static const match_table_t zloop_opt_tokens = { 40 { ZLOOP_OPT_ID, "id=%d" }, 41 { ZLOOP_OPT_CAPACITY, "capacity_mb=%u" }, 42 { ZLOOP_OPT_ZONE_SIZE, "zone_size_mb=%u" }, 43 { ZLOOP_OPT_ZONE_CAPACITY, "zone_capacity_mb=%u" }, 44 { ZLOOP_OPT_NR_CONV_ZONES, "conv_zones=%u" }, 45 { ZLOOP_OPT_BASE_DIR, "base_dir=%s" }, 46 { ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" }, 47 { ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" }, 48 { ZLOOP_OPT_BUFFERED_IO, "buffered_io" }, 49 { ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" }, 50 { ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" }, 51 { ZLOOP_OPT_ERR, NULL } 52 }; 53 54 /* Default values for the "add" operation. */ 55 #define ZLOOP_DEF_ID -1 56 #define ZLOOP_DEF_ZONE_SIZE ((256ULL * SZ_1M) >> SECTOR_SHIFT) 57 #define ZLOOP_DEF_NR_ZONES 64 58 #define ZLOOP_DEF_NR_CONV_ZONES 8 59 #define ZLOOP_DEF_BASE_DIR "/var/local/zloop" 60 #define ZLOOP_DEF_NR_QUEUES 1 61 #define ZLOOP_DEF_QUEUE_DEPTH 128 62 #define ZLOOP_DEF_BUFFERED_IO false 63 #define ZLOOP_DEF_ZONE_APPEND true 64 #define ZLOOP_DEF_ORDERED_ZONE_APPEND false 65 66 /* Arbitrary limit on the zone size (16GB). */ 67 #define ZLOOP_MAX_ZONE_SIZE_MB 16384 68 69 struct zloop_options { 70 unsigned int mask; 71 int id; 72 sector_t capacity; 73 sector_t zone_size; 74 sector_t zone_capacity; 75 unsigned int nr_conv_zones; 76 char *base_dir; 77 unsigned int nr_queues; 78 unsigned int queue_depth; 79 bool buffered_io; 80 bool zone_append; 81 bool ordered_zone_append; 82 }; 83 84 /* 85 * Device states. 86 */ 87 enum { 88 Zlo_creating = 0, 89 Zlo_live, 90 Zlo_deleting, 91 }; 92 93 enum zloop_zone_flags { 94 ZLOOP_ZONE_CONV = 0, 95 ZLOOP_ZONE_SEQ_ERROR, 96 }; 97 98 struct zloop_zone { 99 struct file *file; 100 101 unsigned long flags; 102 struct mutex lock; 103 spinlock_t wp_lock; 104 enum blk_zone_cond cond; 105 sector_t start; 106 sector_t wp; 107 108 gfp_t old_gfp_mask; 109 }; 110 111 struct zloop_device { 112 unsigned int id; 113 unsigned int state; 114 115 struct blk_mq_tag_set tag_set; 116 struct gendisk *disk; 117 118 struct workqueue_struct *workqueue; 119 bool buffered_io; 120 bool zone_append; 121 bool ordered_zone_append; 122 123 const char *base_dir; 124 struct file *data_dir; 125 126 unsigned int zone_shift; 127 sector_t zone_size; 128 sector_t zone_capacity; 129 unsigned int nr_zones; 130 unsigned int nr_conv_zones; 131 unsigned int block_size; 132 133 struct zloop_zone zones[] __counted_by(nr_zones); 134 }; 135 136 struct zloop_cmd { 137 struct work_struct work; 138 atomic_t ref; 139 sector_t sector; 140 sector_t nr_sectors; 141 long ret; 142 struct kiocb iocb; 143 struct bio_vec *bvec; 144 }; 145 146 static DEFINE_IDR(zloop_index_idr); 147 static DEFINE_MUTEX(zloop_ctl_mutex); 148 149 static unsigned int rq_zone_no(struct request *rq) 150 { 151 struct zloop_device *zlo = rq->q->queuedata; 152 153 return blk_rq_pos(rq) >> zlo->zone_shift; 154 } 155 156 static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no) 157 { 158 struct zloop_zone *zone = &zlo->zones[zone_no]; 159 struct kstat stat; 160 sector_t file_sectors; 161 unsigned long flags; 162 int ret; 163 164 lockdep_assert_held(&zone->lock); 165 166 ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0); 167 if (ret < 0) { 168 pr_err("Failed to get zone %u file stat (err=%d)\n", 169 zone_no, ret); 170 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 171 return ret; 172 } 173 174 file_sectors = stat.size >> SECTOR_SHIFT; 175 if (file_sectors > zlo->zone_capacity) { 176 pr_err("Zone %u file too large (%llu sectors > %llu)\n", 177 zone_no, file_sectors, zlo->zone_capacity); 178 return -EINVAL; 179 } 180 181 if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) { 182 pr_err("Zone %u file size not aligned to block size %u\n", 183 zone_no, zlo->block_size); 184 return -EINVAL; 185 } 186 187 spin_lock_irqsave(&zone->wp_lock, flags); 188 if (!file_sectors) { 189 zone->cond = BLK_ZONE_COND_EMPTY; 190 zone->wp = zone->start; 191 } else if (file_sectors == zlo->zone_capacity) { 192 zone->cond = BLK_ZONE_COND_FULL; 193 zone->wp = ULLONG_MAX; 194 } else { 195 zone->cond = BLK_ZONE_COND_CLOSED; 196 zone->wp = zone->start + file_sectors; 197 } 198 spin_unlock_irqrestore(&zone->wp_lock, flags); 199 200 return 0; 201 } 202 203 static int zloop_open_zone(struct zloop_device *zlo, unsigned int zone_no) 204 { 205 struct zloop_zone *zone = &zlo->zones[zone_no]; 206 int ret = 0; 207 208 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 209 return -EIO; 210 211 mutex_lock(&zone->lock); 212 213 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 214 ret = zloop_update_seq_zone(zlo, zone_no); 215 if (ret) 216 goto unlock; 217 } 218 219 switch (zone->cond) { 220 case BLK_ZONE_COND_EXP_OPEN: 221 break; 222 case BLK_ZONE_COND_EMPTY: 223 case BLK_ZONE_COND_CLOSED: 224 case BLK_ZONE_COND_IMP_OPEN: 225 zone->cond = BLK_ZONE_COND_EXP_OPEN; 226 break; 227 case BLK_ZONE_COND_FULL: 228 default: 229 ret = -EIO; 230 break; 231 } 232 233 unlock: 234 mutex_unlock(&zone->lock); 235 236 return ret; 237 } 238 239 static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no) 240 { 241 struct zloop_zone *zone = &zlo->zones[zone_no]; 242 unsigned long flags; 243 int ret = 0; 244 245 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 246 return -EIO; 247 248 mutex_lock(&zone->lock); 249 250 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 251 ret = zloop_update_seq_zone(zlo, zone_no); 252 if (ret) 253 goto unlock; 254 } 255 256 switch (zone->cond) { 257 case BLK_ZONE_COND_CLOSED: 258 break; 259 case BLK_ZONE_COND_IMP_OPEN: 260 case BLK_ZONE_COND_EXP_OPEN: 261 spin_lock_irqsave(&zone->wp_lock, flags); 262 if (zone->wp == zone->start) 263 zone->cond = BLK_ZONE_COND_EMPTY; 264 else 265 zone->cond = BLK_ZONE_COND_CLOSED; 266 spin_unlock_irqrestore(&zone->wp_lock, flags); 267 break; 268 case BLK_ZONE_COND_EMPTY: 269 case BLK_ZONE_COND_FULL: 270 default: 271 ret = -EIO; 272 break; 273 } 274 275 unlock: 276 mutex_unlock(&zone->lock); 277 278 return ret; 279 } 280 281 static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no) 282 { 283 struct zloop_zone *zone = &zlo->zones[zone_no]; 284 unsigned long flags; 285 int ret = 0; 286 287 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 288 return -EIO; 289 290 mutex_lock(&zone->lock); 291 292 if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) && 293 zone->cond == BLK_ZONE_COND_EMPTY) 294 goto unlock; 295 296 if (vfs_truncate(&zone->file->f_path, 0)) { 297 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 298 ret = -EIO; 299 goto unlock; 300 } 301 302 spin_lock_irqsave(&zone->wp_lock, flags); 303 zone->cond = BLK_ZONE_COND_EMPTY; 304 zone->wp = zone->start; 305 clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 306 spin_unlock_irqrestore(&zone->wp_lock, flags); 307 308 unlock: 309 mutex_unlock(&zone->lock); 310 311 return ret; 312 } 313 314 static int zloop_reset_all_zones(struct zloop_device *zlo) 315 { 316 unsigned int i; 317 int ret; 318 319 for (i = zlo->nr_conv_zones; i < zlo->nr_zones; i++) { 320 ret = zloop_reset_zone(zlo, i); 321 if (ret) 322 return ret; 323 } 324 325 return 0; 326 } 327 328 static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no) 329 { 330 struct zloop_zone *zone = &zlo->zones[zone_no]; 331 unsigned long flags; 332 int ret = 0; 333 334 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 335 return -EIO; 336 337 mutex_lock(&zone->lock); 338 339 if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) && 340 zone->cond == BLK_ZONE_COND_FULL) 341 goto unlock; 342 343 if (vfs_truncate(&zone->file->f_path, zlo->zone_size << SECTOR_SHIFT)) { 344 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 345 ret = -EIO; 346 goto unlock; 347 } 348 349 spin_lock_irqsave(&zone->wp_lock, flags); 350 zone->cond = BLK_ZONE_COND_FULL; 351 zone->wp = ULLONG_MAX; 352 clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 353 spin_unlock_irqrestore(&zone->wp_lock, flags); 354 355 unlock: 356 mutex_unlock(&zone->lock); 357 358 return ret; 359 } 360 361 static void zloop_put_cmd(struct zloop_cmd *cmd) 362 { 363 struct request *rq = blk_mq_rq_from_pdu(cmd); 364 365 if (!atomic_dec_and_test(&cmd->ref)) 366 return; 367 kfree(cmd->bvec); 368 cmd->bvec = NULL; 369 if (likely(!blk_should_fake_timeout(rq->q))) 370 blk_mq_complete_request(rq); 371 } 372 373 static void zloop_rw_complete(struct kiocb *iocb, long ret) 374 { 375 struct zloop_cmd *cmd = container_of(iocb, struct zloop_cmd, iocb); 376 377 cmd->ret = ret; 378 zloop_put_cmd(cmd); 379 } 380 381 static void zloop_rw(struct zloop_cmd *cmd) 382 { 383 struct request *rq = blk_mq_rq_from_pdu(cmd); 384 struct zloop_device *zlo = rq->q->queuedata; 385 unsigned int zone_no = rq_zone_no(rq); 386 sector_t sector = blk_rq_pos(rq); 387 sector_t nr_sectors = blk_rq_sectors(rq); 388 bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND; 389 bool is_write = req_op(rq) == REQ_OP_WRITE || is_append; 390 int rw = is_write ? ITER_SOURCE : ITER_DEST; 391 struct req_iterator rq_iter; 392 struct zloop_zone *zone; 393 struct iov_iter iter; 394 struct bio_vec tmp; 395 unsigned long flags; 396 sector_t zone_end; 397 unsigned int nr_bvec; 398 int ret; 399 400 atomic_set(&cmd->ref, 2); 401 cmd->sector = sector; 402 cmd->nr_sectors = nr_sectors; 403 cmd->ret = 0; 404 405 if (WARN_ON_ONCE(is_append && !zlo->zone_append)) { 406 ret = -EIO; 407 goto out; 408 } 409 410 /* We should never get an I/O beyond the device capacity. */ 411 if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) { 412 ret = -EIO; 413 goto out; 414 } 415 zone = &zlo->zones[zone_no]; 416 zone_end = zone->start + zlo->zone_capacity; 417 418 /* 419 * The block layer should never send requests that are not fully 420 * contained within the zone. 421 */ 422 if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) { 423 ret = -EIO; 424 goto out; 425 } 426 427 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 428 mutex_lock(&zone->lock); 429 ret = zloop_update_seq_zone(zlo, zone_no); 430 mutex_unlock(&zone->lock); 431 if (ret) 432 goto out; 433 } 434 435 if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) { 436 mutex_lock(&zone->lock); 437 438 spin_lock_irqsave(&zone->wp_lock, flags); 439 440 /* 441 * Zone append operations always go at the current write 442 * pointer, but regular write operations must already be 443 * aligned to the write pointer when submitted. 444 */ 445 if (is_append) { 446 /* 447 * If ordered zone append is in use, we already checked 448 * and set the target sector in zloop_queue_rq(). 449 */ 450 if (!zlo->ordered_zone_append) { 451 if (zone->cond == BLK_ZONE_COND_FULL || 452 zone->wp + nr_sectors > zone_end) { 453 spin_unlock_irqrestore(&zone->wp_lock, 454 flags); 455 ret = -EIO; 456 goto unlock; 457 } 458 sector = zone->wp; 459 } 460 cmd->sector = sector; 461 } else if (sector != zone->wp) { 462 spin_unlock_irqrestore(&zone->wp_lock, flags); 463 pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n", 464 zone_no, sector, zone->wp); 465 ret = -EIO; 466 goto unlock; 467 } 468 469 /* Implicitly open the target zone. */ 470 if (zone->cond == BLK_ZONE_COND_CLOSED || 471 zone->cond == BLK_ZONE_COND_EMPTY) 472 zone->cond = BLK_ZONE_COND_IMP_OPEN; 473 474 /* 475 * Advance the write pointer, unless ordered zone append is in 476 * use. If the write fails, the write pointer position will be 477 * corrected when the next I/O starts execution. 478 */ 479 if (!is_append || !zlo->ordered_zone_append) { 480 zone->wp += nr_sectors; 481 if (zone->wp == zone_end) { 482 zone->cond = BLK_ZONE_COND_FULL; 483 zone->wp = ULLONG_MAX; 484 } 485 } 486 487 spin_unlock_irqrestore(&zone->wp_lock, flags); 488 } 489 490 nr_bvec = blk_rq_nr_bvec(rq); 491 492 if (rq->bio != rq->biotail) { 493 struct bio_vec *bvec; 494 495 cmd->bvec = kmalloc_objs(*cmd->bvec, nr_bvec, GFP_NOIO); 496 if (!cmd->bvec) { 497 ret = -EIO; 498 goto unlock; 499 } 500 501 /* 502 * The bios of the request may be started from the middle of 503 * the 'bvec' because of bio splitting, so we can't directly 504 * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec 505 * API will take care of all details for us. 506 */ 507 bvec = cmd->bvec; 508 rq_for_each_bvec(tmp, rq, rq_iter) { 509 *bvec = tmp; 510 bvec++; 511 } 512 iov_iter_bvec(&iter, rw, cmd->bvec, nr_bvec, blk_rq_bytes(rq)); 513 } else { 514 /* 515 * Same here, this bio may be started from the middle of the 516 * 'bvec' because of bio splitting, so offset from the bvec 517 * must be passed to iov iterator 518 */ 519 iov_iter_bvec(&iter, rw, 520 __bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter), 521 nr_bvec, blk_rq_bytes(rq)); 522 iter.iov_offset = rq->bio->bi_iter.bi_bvec_done; 523 } 524 525 cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT; 526 cmd->iocb.ki_filp = zone->file; 527 cmd->iocb.ki_complete = zloop_rw_complete; 528 if (!zlo->buffered_io) 529 cmd->iocb.ki_flags = IOCB_DIRECT; 530 cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); 531 532 if (rw == ITER_SOURCE) 533 ret = zone->file->f_op->write_iter(&cmd->iocb, &iter); 534 else 535 ret = zone->file->f_op->read_iter(&cmd->iocb, &iter); 536 unlock: 537 if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) 538 mutex_unlock(&zone->lock); 539 out: 540 if (ret != -EIOCBQUEUED) 541 zloop_rw_complete(&cmd->iocb, ret); 542 zloop_put_cmd(cmd); 543 } 544 545 /* 546 * Sync the entire FS containing the zone files instead of walking all files. 547 */ 548 static int zloop_flush(struct zloop_device *zlo) 549 { 550 struct super_block *sb = file_inode(zlo->data_dir)->i_sb; 551 int ret; 552 553 down_read(&sb->s_umount); 554 ret = sync_filesystem(sb); 555 up_read(&sb->s_umount); 556 557 return ret; 558 } 559 560 static void zloop_handle_cmd(struct zloop_cmd *cmd) 561 { 562 struct request *rq = blk_mq_rq_from_pdu(cmd); 563 struct zloop_device *zlo = rq->q->queuedata; 564 565 /* We can block in this context, so ignore REQ_NOWAIT. */ 566 if (rq->cmd_flags & REQ_NOWAIT) 567 rq->cmd_flags &= ~REQ_NOWAIT; 568 569 switch (req_op(rq)) { 570 case REQ_OP_READ: 571 case REQ_OP_WRITE: 572 case REQ_OP_ZONE_APPEND: 573 /* 574 * zloop_rw() always executes asynchronously or completes 575 * directly. 576 */ 577 zloop_rw(cmd); 578 return; 579 case REQ_OP_FLUSH: 580 cmd->ret = zloop_flush(zlo); 581 break; 582 case REQ_OP_ZONE_RESET: 583 cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq)); 584 break; 585 case REQ_OP_ZONE_RESET_ALL: 586 cmd->ret = zloop_reset_all_zones(zlo); 587 break; 588 case REQ_OP_ZONE_FINISH: 589 cmd->ret = zloop_finish_zone(zlo, rq_zone_no(rq)); 590 break; 591 case REQ_OP_ZONE_OPEN: 592 cmd->ret = zloop_open_zone(zlo, rq_zone_no(rq)); 593 break; 594 case REQ_OP_ZONE_CLOSE: 595 cmd->ret = zloop_close_zone(zlo, rq_zone_no(rq)); 596 break; 597 default: 598 WARN_ON_ONCE(1); 599 pr_err("Unsupported operation %d\n", req_op(rq)); 600 cmd->ret = -EOPNOTSUPP; 601 break; 602 } 603 604 blk_mq_complete_request(rq); 605 } 606 607 static void zloop_cmd_workfn(struct work_struct *work) 608 { 609 struct zloop_cmd *cmd = container_of(work, struct zloop_cmd, work); 610 int orig_flags = current->flags; 611 612 current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; 613 zloop_handle_cmd(cmd); 614 current->flags = orig_flags; 615 } 616 617 static void zloop_complete_rq(struct request *rq) 618 { 619 struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq); 620 struct zloop_device *zlo = rq->q->queuedata; 621 unsigned int zone_no = cmd->sector >> zlo->zone_shift; 622 struct zloop_zone *zone = &zlo->zones[zone_no]; 623 blk_status_t sts = BLK_STS_OK; 624 625 switch (req_op(rq)) { 626 case REQ_OP_READ: 627 if (cmd->ret < 0) 628 pr_err("Zone %u: failed read sector %llu, %llu sectors\n", 629 zone_no, cmd->sector, cmd->nr_sectors); 630 631 if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) { 632 /* short read */ 633 struct bio *bio; 634 635 __rq_for_each_bio(bio, rq) 636 zero_fill_bio(bio); 637 } 638 break; 639 case REQ_OP_WRITE: 640 case REQ_OP_ZONE_APPEND: 641 if (cmd->ret < 0) 642 pr_err("Zone %u: failed %swrite sector %llu, %llu sectors\n", 643 zone_no, 644 req_op(rq) == REQ_OP_WRITE ? "" : "append ", 645 cmd->sector, cmd->nr_sectors); 646 647 if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) { 648 pr_err("Zone %u: partial write %ld/%u B\n", 649 zone_no, cmd->ret, blk_rq_bytes(rq)); 650 cmd->ret = -EIO; 651 } 652 653 if (cmd->ret < 0 && !test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { 654 /* 655 * A write to a sequential zone file failed: mark the 656 * zone as having an error. This will be corrected and 657 * cleared when the next IO is submitted. 658 */ 659 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 660 break; 661 } 662 if (req_op(rq) == REQ_OP_ZONE_APPEND) 663 rq->__sector = cmd->sector; 664 665 break; 666 default: 667 break; 668 } 669 670 if (cmd->ret < 0) 671 sts = errno_to_blk_status(cmd->ret); 672 blk_mq_end_request(rq, sts); 673 } 674 675 static bool zloop_set_zone_append_sector(struct request *rq) 676 { 677 struct zloop_device *zlo = rq->q->queuedata; 678 unsigned int zone_no = rq_zone_no(rq); 679 struct zloop_zone *zone = &zlo->zones[zone_no]; 680 sector_t zone_end = zone->start + zlo->zone_capacity; 681 sector_t nr_sectors = blk_rq_sectors(rq); 682 unsigned long flags; 683 684 spin_lock_irqsave(&zone->wp_lock, flags); 685 686 if (zone->cond == BLK_ZONE_COND_FULL || 687 zone->wp + nr_sectors > zone_end) { 688 spin_unlock_irqrestore(&zone->wp_lock, flags); 689 return false; 690 } 691 692 rq->__sector = zone->wp; 693 zone->wp += blk_rq_sectors(rq); 694 if (zone->wp >= zone_end) { 695 zone->cond = BLK_ZONE_COND_FULL; 696 zone->wp = ULLONG_MAX; 697 } 698 699 spin_unlock_irqrestore(&zone->wp_lock, flags); 700 701 return true; 702 } 703 704 static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx, 705 const struct blk_mq_queue_data *bd) 706 { 707 struct request *rq = bd->rq; 708 struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq); 709 struct zloop_device *zlo = rq->q->queuedata; 710 711 if (data_race(READ_ONCE(zlo->state)) == Zlo_deleting) 712 return BLK_STS_IOERR; 713 714 /* 715 * If we need to strongly order zone append operations, set the request 716 * sector to the zone write pointer location now instead of when the 717 * command work runs. 718 */ 719 if (zlo->ordered_zone_append && req_op(rq) == REQ_OP_ZONE_APPEND) { 720 if (!zloop_set_zone_append_sector(rq)) 721 return BLK_STS_IOERR; 722 } 723 724 blk_mq_start_request(rq); 725 726 INIT_WORK(&cmd->work, zloop_cmd_workfn); 727 queue_work(zlo->workqueue, &cmd->work); 728 729 return BLK_STS_OK; 730 } 731 732 static const struct blk_mq_ops zloop_mq_ops = { 733 .queue_rq = zloop_queue_rq, 734 .complete = zloop_complete_rq, 735 }; 736 737 static int zloop_open(struct gendisk *disk, blk_mode_t mode) 738 { 739 struct zloop_device *zlo = disk->private_data; 740 int ret; 741 742 ret = mutex_lock_killable(&zloop_ctl_mutex); 743 if (ret) 744 return ret; 745 746 if (zlo->state != Zlo_live) 747 ret = -ENXIO; 748 mutex_unlock(&zloop_ctl_mutex); 749 return ret; 750 } 751 752 static int zloop_report_zones(struct gendisk *disk, sector_t sector, 753 unsigned int nr_zones, struct blk_report_zones_args *args) 754 { 755 struct zloop_device *zlo = disk->private_data; 756 struct blk_zone blkz = {}; 757 unsigned int first, i; 758 unsigned long flags; 759 int ret; 760 761 first = disk_zone_no(disk, sector); 762 if (first >= zlo->nr_zones) 763 return 0; 764 nr_zones = min(nr_zones, zlo->nr_zones - first); 765 766 for (i = 0; i < nr_zones; i++) { 767 unsigned int zone_no = first + i; 768 struct zloop_zone *zone = &zlo->zones[zone_no]; 769 770 mutex_lock(&zone->lock); 771 772 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 773 ret = zloop_update_seq_zone(zlo, zone_no); 774 if (ret) { 775 mutex_unlock(&zone->lock); 776 return ret; 777 } 778 } 779 780 blkz.start = zone->start; 781 blkz.len = zlo->zone_size; 782 spin_lock_irqsave(&zone->wp_lock, flags); 783 blkz.wp = zone->wp; 784 spin_unlock_irqrestore(&zone->wp_lock, flags); 785 blkz.cond = zone->cond; 786 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { 787 blkz.type = BLK_ZONE_TYPE_CONVENTIONAL; 788 blkz.capacity = zlo->zone_size; 789 } else { 790 blkz.type = BLK_ZONE_TYPE_SEQWRITE_REQ; 791 blkz.capacity = zlo->zone_capacity; 792 } 793 794 mutex_unlock(&zone->lock); 795 796 ret = disk_report_zone(disk, &blkz, i, args); 797 if (ret) 798 return ret; 799 } 800 801 return nr_zones; 802 } 803 804 static void zloop_free_disk(struct gendisk *disk) 805 { 806 struct zloop_device *zlo = disk->private_data; 807 unsigned int i; 808 809 blk_mq_free_tag_set(&zlo->tag_set); 810 811 for (i = 0; i < zlo->nr_zones; i++) { 812 struct zloop_zone *zone = &zlo->zones[i]; 813 814 mapping_set_gfp_mask(zone->file->f_mapping, 815 zone->old_gfp_mask); 816 fput(zone->file); 817 } 818 819 fput(zlo->data_dir); 820 destroy_workqueue(zlo->workqueue); 821 kfree(zlo->base_dir); 822 kvfree(zlo); 823 } 824 825 static const struct block_device_operations zloop_fops = { 826 .owner = THIS_MODULE, 827 .open = zloop_open, 828 .report_zones = zloop_report_zones, 829 .free_disk = zloop_free_disk, 830 }; 831 832 __printf(3, 4) 833 static struct file *zloop_filp_open_fmt(int oflags, umode_t mode, 834 const char *fmt, ...) 835 { 836 struct file *file; 837 va_list ap; 838 char *p; 839 840 va_start(ap, fmt); 841 p = kvasprintf(GFP_KERNEL, fmt, ap); 842 va_end(ap); 843 844 if (!p) 845 return ERR_PTR(-ENOMEM); 846 file = filp_open(p, oflags, mode); 847 kfree(p); 848 return file; 849 } 850 851 static int zloop_get_block_size(struct zloop_device *zlo, 852 struct zloop_zone *zone) 853 { 854 struct block_device *sb_bdev = zone->file->f_mapping->host->i_sb->s_bdev; 855 struct kstat st; 856 857 /* 858 * If the FS block size is lower than or equal to 4K, use that as the 859 * device block size. Otherwise, fallback to the FS direct IO alignment 860 * constraint if that is provided, and to the FS underlying device 861 * physical block size if the direct IO alignment is unknown. 862 */ 863 if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K) 864 zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize; 865 else if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) && 866 (st.result_mask & STATX_DIOALIGN)) 867 zlo->block_size = st.dio_offset_align; 868 else if (sb_bdev) 869 zlo->block_size = bdev_physical_block_size(sb_bdev); 870 else 871 zlo->block_size = SECTOR_SIZE; 872 873 if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - 1)) { 874 pr_err("Zone capacity is not aligned to block size %u\n", 875 zlo->block_size); 876 return -EINVAL; 877 } 878 879 return 0; 880 } 881 882 static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts, 883 unsigned int zone_no, bool restore) 884 { 885 struct zloop_zone *zone = &zlo->zones[zone_no]; 886 int oflags = O_RDWR; 887 struct kstat stat; 888 sector_t file_sectors; 889 int ret; 890 891 mutex_init(&zone->lock); 892 spin_lock_init(&zone->wp_lock); 893 zone->start = (sector_t)zone_no << zlo->zone_shift; 894 895 if (!restore) 896 oflags |= O_CREAT; 897 898 if (!opts->buffered_io) 899 oflags |= O_DIRECT; 900 901 if (zone_no < zlo->nr_conv_zones) { 902 /* Conventional zone file. */ 903 set_bit(ZLOOP_ZONE_CONV, &zone->flags); 904 zone->cond = BLK_ZONE_COND_NOT_WP; 905 zone->wp = U64_MAX; 906 907 zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/cnv-%06u", 908 zlo->base_dir, zlo->id, zone_no); 909 if (IS_ERR(zone->file)) { 910 pr_err("Failed to open zone %u file %s/%u/cnv-%06u (err=%ld)", 911 zone_no, zlo->base_dir, zlo->id, zone_no, 912 PTR_ERR(zone->file)); 913 return PTR_ERR(zone->file); 914 } 915 916 if (!zlo->block_size) { 917 ret = zloop_get_block_size(zlo, zone); 918 if (ret) 919 return ret; 920 } 921 922 ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0); 923 if (ret < 0) { 924 pr_err("Failed to get zone %u file stat\n", zone_no); 925 return ret; 926 } 927 file_sectors = stat.size >> SECTOR_SHIFT; 928 929 if (restore && file_sectors != zlo->zone_size) { 930 pr_err("Invalid conventional zone %u file size (%llu sectors != %llu)\n", 931 zone_no, file_sectors, zlo->zone_capacity); 932 return ret; 933 } 934 935 ret = vfs_truncate(&zone->file->f_path, 936 zlo->zone_size << SECTOR_SHIFT); 937 if (ret < 0) { 938 pr_err("Failed to truncate zone %u file (err=%d)\n", 939 zone_no, ret); 940 return ret; 941 } 942 943 return 0; 944 } 945 946 /* Sequential zone file. */ 947 zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/seq-%06u", 948 zlo->base_dir, zlo->id, zone_no); 949 if (IS_ERR(zone->file)) { 950 pr_err("Failed to open zone %u file %s/%u/seq-%06u (err=%ld)", 951 zone_no, zlo->base_dir, zlo->id, zone_no, 952 PTR_ERR(zone->file)); 953 return PTR_ERR(zone->file); 954 } 955 956 if (!zlo->block_size) { 957 ret = zloop_get_block_size(zlo, zone); 958 if (ret) 959 return ret; 960 } 961 962 zloop_get_block_size(zlo, zone); 963 964 mutex_lock(&zone->lock); 965 ret = zloop_update_seq_zone(zlo, zone_no); 966 mutex_unlock(&zone->lock); 967 968 return ret; 969 } 970 971 static bool zloop_dev_exists(struct zloop_device *zlo) 972 { 973 struct file *cnv, *seq; 974 bool exists; 975 976 cnv = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/cnv-%06u", 977 zlo->base_dir, zlo->id, 0); 978 seq = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/seq-%06u", 979 zlo->base_dir, zlo->id, 0); 980 exists = !IS_ERR(cnv) || !IS_ERR(seq); 981 982 if (!IS_ERR(cnv)) 983 fput(cnv); 984 if (!IS_ERR(seq)) 985 fput(seq); 986 987 return exists; 988 } 989 990 static int zloop_ctl_add(struct zloop_options *opts) 991 { 992 struct queue_limits lim = { 993 .max_hw_sectors = SZ_1M >> SECTOR_SHIFT, 994 .chunk_sectors = opts->zone_size, 995 .features = BLK_FEAT_ZONED | BLK_FEAT_WRITE_CACHE, 996 997 }; 998 unsigned int nr_zones, i, j; 999 struct zloop_device *zlo; 1000 int ret = -EINVAL; 1001 bool restore; 1002 1003 __module_get(THIS_MODULE); 1004 1005 nr_zones = opts->capacity >> ilog2(opts->zone_size); 1006 if (opts->nr_conv_zones >= nr_zones) { 1007 pr_err("Invalid number of conventional zones %u\n", 1008 opts->nr_conv_zones); 1009 goto out; 1010 } 1011 1012 zlo = kvzalloc_flex(*zlo, zones, nr_zones); 1013 if (!zlo) { 1014 ret = -ENOMEM; 1015 goto out; 1016 } 1017 WRITE_ONCE(zlo->state, Zlo_creating); 1018 1019 ret = mutex_lock_killable(&zloop_ctl_mutex); 1020 if (ret) 1021 goto out_free_dev; 1022 1023 /* Allocate id, if @opts->id >= 0, we're requesting that specific id */ 1024 if (opts->id >= 0) { 1025 ret = idr_alloc(&zloop_index_idr, zlo, 1026 opts->id, opts->id + 1, GFP_KERNEL); 1027 if (ret == -ENOSPC) 1028 ret = -EEXIST; 1029 } else { 1030 ret = idr_alloc(&zloop_index_idr, zlo, 0, 0, GFP_KERNEL); 1031 } 1032 mutex_unlock(&zloop_ctl_mutex); 1033 if (ret < 0) 1034 goto out_free_dev; 1035 1036 zlo->id = ret; 1037 zlo->zone_shift = ilog2(opts->zone_size); 1038 zlo->zone_size = opts->zone_size; 1039 if (opts->zone_capacity) 1040 zlo->zone_capacity = opts->zone_capacity; 1041 else 1042 zlo->zone_capacity = zlo->zone_size; 1043 zlo->nr_zones = nr_zones; 1044 zlo->nr_conv_zones = opts->nr_conv_zones; 1045 zlo->buffered_io = opts->buffered_io; 1046 zlo->zone_append = opts->zone_append; 1047 if (zlo->zone_append) 1048 zlo->ordered_zone_append = opts->ordered_zone_append; 1049 1050 zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE, 1051 opts->nr_queues * opts->queue_depth, zlo->id); 1052 if (!zlo->workqueue) { 1053 ret = -ENOMEM; 1054 goto out_free_idr; 1055 } 1056 1057 if (opts->base_dir) 1058 zlo->base_dir = kstrdup(opts->base_dir, GFP_KERNEL); 1059 else 1060 zlo->base_dir = kstrdup(ZLOOP_DEF_BASE_DIR, GFP_KERNEL); 1061 if (!zlo->base_dir) { 1062 ret = -ENOMEM; 1063 goto out_destroy_workqueue; 1064 } 1065 1066 zlo->data_dir = zloop_filp_open_fmt(O_RDONLY | O_DIRECTORY, 0, "%s/%u", 1067 zlo->base_dir, zlo->id); 1068 if (IS_ERR(zlo->data_dir)) { 1069 ret = PTR_ERR(zlo->data_dir); 1070 pr_warn("Failed to open directory %s/%u (err=%d)\n", 1071 zlo->base_dir, zlo->id, ret); 1072 goto out_free_base_dir; 1073 } 1074 1075 /* 1076 * If we already have zone files, we are restoring a device created by a 1077 * previous add operation. In this case, zloop_init_zone() will check 1078 * that the zone files are consistent with the zone configuration given. 1079 */ 1080 restore = zloop_dev_exists(zlo); 1081 for (i = 0; i < nr_zones; i++) { 1082 ret = zloop_init_zone(zlo, opts, i, restore); 1083 if (ret) 1084 goto out_close_files; 1085 } 1086 1087 lim.physical_block_size = zlo->block_size; 1088 lim.logical_block_size = zlo->block_size; 1089 if (zlo->zone_append) 1090 lim.max_hw_zone_append_sectors = lim.max_hw_sectors; 1091 1092 zlo->tag_set.ops = &zloop_mq_ops; 1093 zlo->tag_set.nr_hw_queues = opts->nr_queues; 1094 zlo->tag_set.queue_depth = opts->queue_depth; 1095 zlo->tag_set.numa_node = NUMA_NO_NODE; 1096 zlo->tag_set.cmd_size = sizeof(struct zloop_cmd); 1097 zlo->tag_set.driver_data = zlo; 1098 1099 ret = blk_mq_alloc_tag_set(&zlo->tag_set); 1100 if (ret) { 1101 pr_err("blk_mq_alloc_tag_set failed (err=%d)\n", ret); 1102 goto out_close_files; 1103 } 1104 1105 zlo->disk = blk_mq_alloc_disk(&zlo->tag_set, &lim, zlo); 1106 if (IS_ERR(zlo->disk)) { 1107 pr_err("blk_mq_alloc_disk failed (err=%d)\n", ret); 1108 ret = PTR_ERR(zlo->disk); 1109 goto out_cleanup_tags; 1110 } 1111 zlo->disk->flags = GENHD_FL_NO_PART; 1112 zlo->disk->fops = &zloop_fops; 1113 zlo->disk->private_data = zlo; 1114 sprintf(zlo->disk->disk_name, "zloop%d", zlo->id); 1115 set_capacity(zlo->disk, (u64)lim.chunk_sectors * zlo->nr_zones); 1116 1117 ret = blk_revalidate_disk_zones(zlo->disk); 1118 if (ret) 1119 goto out_cleanup_disk; 1120 1121 ret = add_disk(zlo->disk); 1122 if (ret) { 1123 pr_err("add_disk failed (err=%d)\n", ret); 1124 goto out_cleanup_disk; 1125 } 1126 1127 mutex_lock(&zloop_ctl_mutex); 1128 WRITE_ONCE(zlo->state, Zlo_live); 1129 mutex_unlock(&zloop_ctl_mutex); 1130 1131 pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n", 1132 zlo->id, zlo->nr_zones, 1133 ((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20, 1134 zlo->block_size); 1135 pr_info("zloop%d: using %s%s zone append\n", 1136 zlo->id, 1137 zlo->ordered_zone_append ? "ordered " : "", 1138 zlo->zone_append ? "native" : "emulated"); 1139 1140 return 0; 1141 1142 out_cleanup_disk: 1143 put_disk(zlo->disk); 1144 out_cleanup_tags: 1145 blk_mq_free_tag_set(&zlo->tag_set); 1146 out_close_files: 1147 for (j = 0; j < i; j++) { 1148 struct zloop_zone *zone = &zlo->zones[j]; 1149 1150 if (!IS_ERR_OR_NULL(zone->file)) 1151 fput(zone->file); 1152 } 1153 fput(zlo->data_dir); 1154 out_free_base_dir: 1155 kfree(zlo->base_dir); 1156 out_destroy_workqueue: 1157 destroy_workqueue(zlo->workqueue); 1158 out_free_idr: 1159 mutex_lock(&zloop_ctl_mutex); 1160 idr_remove(&zloop_index_idr, zlo->id); 1161 mutex_unlock(&zloop_ctl_mutex); 1162 out_free_dev: 1163 kvfree(zlo); 1164 out: 1165 module_put(THIS_MODULE); 1166 if (ret == -ENOENT) 1167 ret = -EINVAL; 1168 return ret; 1169 } 1170 1171 static int zloop_ctl_remove(struct zloop_options *opts) 1172 { 1173 struct zloop_device *zlo; 1174 int ret; 1175 1176 if (!(opts->mask & ZLOOP_OPT_ID)) { 1177 pr_err("No ID specified for remove\n"); 1178 return -EINVAL; 1179 } 1180 1181 if (opts->mask & ~ZLOOP_OPT_ID) { 1182 pr_err("Invalid option specified for remove\n"); 1183 return -EINVAL; 1184 } 1185 1186 ret = mutex_lock_killable(&zloop_ctl_mutex); 1187 if (ret) 1188 return ret; 1189 1190 zlo = idr_find(&zloop_index_idr, opts->id); 1191 if (!zlo || zlo->state == Zlo_creating) { 1192 ret = -ENODEV; 1193 } else if (zlo->state == Zlo_deleting) { 1194 ret = -EINVAL; 1195 } else { 1196 idr_remove(&zloop_index_idr, zlo->id); 1197 WRITE_ONCE(zlo->state, Zlo_deleting); 1198 } 1199 1200 mutex_unlock(&zloop_ctl_mutex); 1201 if (ret) 1202 return ret; 1203 1204 del_gendisk(zlo->disk); 1205 put_disk(zlo->disk); 1206 1207 pr_info("Removed device %d\n", opts->id); 1208 1209 module_put(THIS_MODULE); 1210 1211 return 0; 1212 } 1213 1214 static int zloop_parse_options(struct zloop_options *opts, const char *buf) 1215 { 1216 substring_t args[MAX_OPT_ARGS]; 1217 char *options, *o, *p; 1218 unsigned int token; 1219 int ret = 0; 1220 1221 /* Set defaults. */ 1222 opts->mask = 0; 1223 opts->id = ZLOOP_DEF_ID; 1224 opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES; 1225 opts->zone_size = ZLOOP_DEF_ZONE_SIZE; 1226 opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES; 1227 opts->nr_queues = ZLOOP_DEF_NR_QUEUES; 1228 opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH; 1229 opts->buffered_io = ZLOOP_DEF_BUFFERED_IO; 1230 opts->zone_append = ZLOOP_DEF_ZONE_APPEND; 1231 opts->ordered_zone_append = ZLOOP_DEF_ORDERED_ZONE_APPEND; 1232 1233 if (!buf) 1234 return 0; 1235 1236 /* Skip leading spaces before the options. */ 1237 while (isspace(*buf)) 1238 buf++; 1239 1240 options = o = kstrdup(buf, GFP_KERNEL); 1241 if (!options) 1242 return -ENOMEM; 1243 1244 /* Parse the options, doing only some light invalid value checks. */ 1245 while ((p = strsep(&o, ",\n")) != NULL) { 1246 if (!*p) 1247 continue; 1248 1249 token = match_token(p, zloop_opt_tokens, args); 1250 opts->mask |= token; 1251 switch (token) { 1252 case ZLOOP_OPT_ID: 1253 if (match_int(args, &opts->id)) { 1254 ret = -EINVAL; 1255 goto out; 1256 } 1257 break; 1258 case ZLOOP_OPT_CAPACITY: 1259 if (match_uint(args, &token)) { 1260 ret = -EINVAL; 1261 goto out; 1262 } 1263 if (!token) { 1264 pr_err("Invalid capacity\n"); 1265 ret = -EINVAL; 1266 goto out; 1267 } 1268 opts->capacity = 1269 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; 1270 break; 1271 case ZLOOP_OPT_ZONE_SIZE: 1272 if (match_uint(args, &token)) { 1273 ret = -EINVAL; 1274 goto out; 1275 } 1276 if (!token || token > ZLOOP_MAX_ZONE_SIZE_MB || 1277 !is_power_of_2(token)) { 1278 pr_err("Invalid zone size %u\n", token); 1279 ret = -EINVAL; 1280 goto out; 1281 } 1282 opts->zone_size = 1283 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; 1284 break; 1285 case ZLOOP_OPT_ZONE_CAPACITY: 1286 if (match_uint(args, &token)) { 1287 ret = -EINVAL; 1288 goto out; 1289 } 1290 if (!token) { 1291 pr_err("Invalid zone capacity\n"); 1292 ret = -EINVAL; 1293 goto out; 1294 } 1295 opts->zone_capacity = 1296 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; 1297 break; 1298 case ZLOOP_OPT_NR_CONV_ZONES: 1299 if (match_uint(args, &token)) { 1300 ret = -EINVAL; 1301 goto out; 1302 } 1303 opts->nr_conv_zones = token; 1304 break; 1305 case ZLOOP_OPT_BASE_DIR: 1306 p = match_strdup(args); 1307 if (!p) { 1308 ret = -ENOMEM; 1309 goto out; 1310 } 1311 kfree(opts->base_dir); 1312 opts->base_dir = p; 1313 break; 1314 case ZLOOP_OPT_NR_QUEUES: 1315 if (match_uint(args, &token)) { 1316 ret = -EINVAL; 1317 goto out; 1318 } 1319 if (!token) { 1320 pr_err("Invalid number of queues\n"); 1321 ret = -EINVAL; 1322 goto out; 1323 } 1324 opts->nr_queues = min(token, num_online_cpus()); 1325 break; 1326 case ZLOOP_OPT_QUEUE_DEPTH: 1327 if (match_uint(args, &token)) { 1328 ret = -EINVAL; 1329 goto out; 1330 } 1331 if (!token) { 1332 pr_err("Invalid queue depth\n"); 1333 ret = -EINVAL; 1334 goto out; 1335 } 1336 opts->queue_depth = token; 1337 break; 1338 case ZLOOP_OPT_BUFFERED_IO: 1339 opts->buffered_io = true; 1340 break; 1341 case ZLOOP_OPT_ZONE_APPEND: 1342 if (match_uint(args, &token)) { 1343 ret = -EINVAL; 1344 goto out; 1345 } 1346 if (token != 0 && token != 1) { 1347 pr_err("Invalid zone_append value\n"); 1348 ret = -EINVAL; 1349 goto out; 1350 } 1351 opts->zone_append = token; 1352 break; 1353 case ZLOOP_OPT_ORDERED_ZONE_APPEND: 1354 opts->ordered_zone_append = true; 1355 break; 1356 case ZLOOP_OPT_ERR: 1357 default: 1358 pr_warn("unknown parameter or missing value '%s'\n", p); 1359 ret = -EINVAL; 1360 goto out; 1361 } 1362 } 1363 1364 ret = -EINVAL; 1365 if (opts->capacity <= opts->zone_size) { 1366 pr_err("Invalid capacity\n"); 1367 goto out; 1368 } 1369 1370 if (opts->zone_capacity > opts->zone_size) { 1371 pr_err("Invalid zone capacity\n"); 1372 goto out; 1373 } 1374 1375 ret = 0; 1376 out: 1377 kfree(options); 1378 return ret; 1379 } 1380 1381 enum { 1382 ZLOOP_CTL_ADD, 1383 ZLOOP_CTL_REMOVE, 1384 }; 1385 1386 static struct zloop_ctl_op { 1387 int code; 1388 const char *name; 1389 } zloop_ctl_ops[] = { 1390 { ZLOOP_CTL_ADD, "add" }, 1391 { ZLOOP_CTL_REMOVE, "remove" }, 1392 { -1, NULL }, 1393 }; 1394 1395 static ssize_t zloop_ctl_write(struct file *file, const char __user *ubuf, 1396 size_t count, loff_t *pos) 1397 { 1398 struct zloop_options opts = { }; 1399 struct zloop_ctl_op *op; 1400 const char *buf, *opts_buf; 1401 int i, ret; 1402 1403 if (count > PAGE_SIZE) 1404 return -ENOMEM; 1405 1406 buf = memdup_user_nul(ubuf, count); 1407 if (IS_ERR(buf)) 1408 return PTR_ERR(buf); 1409 1410 for (i = 0; i < ARRAY_SIZE(zloop_ctl_ops); i++) { 1411 op = &zloop_ctl_ops[i]; 1412 if (!op->name) { 1413 pr_err("Invalid operation\n"); 1414 ret = -EINVAL; 1415 goto out; 1416 } 1417 if (!strncmp(buf, op->name, strlen(op->name))) 1418 break; 1419 } 1420 1421 if (count <= strlen(op->name)) 1422 opts_buf = NULL; 1423 else 1424 opts_buf = buf + strlen(op->name); 1425 1426 ret = zloop_parse_options(&opts, opts_buf); 1427 if (ret) { 1428 pr_err("Failed to parse options\n"); 1429 goto out; 1430 } 1431 1432 switch (op->code) { 1433 case ZLOOP_CTL_ADD: 1434 ret = zloop_ctl_add(&opts); 1435 break; 1436 case ZLOOP_CTL_REMOVE: 1437 ret = zloop_ctl_remove(&opts); 1438 break; 1439 default: 1440 pr_err("Invalid operation\n"); 1441 ret = -EINVAL; 1442 goto out; 1443 } 1444 1445 out: 1446 kfree(opts.base_dir); 1447 kfree(buf); 1448 return ret ? ret : count; 1449 } 1450 1451 static int zloop_ctl_show(struct seq_file *seq_file, void *private) 1452 { 1453 const struct match_token *tok; 1454 int i; 1455 1456 /* Add operation */ 1457 seq_printf(seq_file, "%s ", zloop_ctl_ops[0].name); 1458 for (i = 0; i < ARRAY_SIZE(zloop_opt_tokens); i++) { 1459 tok = &zloop_opt_tokens[i]; 1460 if (!tok->pattern) 1461 break; 1462 if (i) 1463 seq_putc(seq_file, ','); 1464 seq_puts(seq_file, tok->pattern); 1465 } 1466 seq_putc(seq_file, '\n'); 1467 1468 /* Remove operation */ 1469 seq_puts(seq_file, zloop_ctl_ops[1].name); 1470 seq_puts(seq_file, " id=%d\n"); 1471 1472 return 0; 1473 } 1474 1475 static int zloop_ctl_open(struct inode *inode, struct file *file) 1476 { 1477 file->private_data = NULL; 1478 return single_open(file, zloop_ctl_show, NULL); 1479 } 1480 1481 static int zloop_ctl_release(struct inode *inode, struct file *file) 1482 { 1483 return single_release(inode, file); 1484 } 1485 1486 static const struct file_operations zloop_ctl_fops = { 1487 .owner = THIS_MODULE, 1488 .open = zloop_ctl_open, 1489 .release = zloop_ctl_release, 1490 .write = zloop_ctl_write, 1491 .read = seq_read, 1492 }; 1493 1494 static struct miscdevice zloop_misc = { 1495 .minor = MISC_DYNAMIC_MINOR, 1496 .name = "zloop-control", 1497 .fops = &zloop_ctl_fops, 1498 }; 1499 1500 static int __init zloop_init(void) 1501 { 1502 int ret; 1503 1504 ret = misc_register(&zloop_misc); 1505 if (ret) { 1506 pr_err("Failed to register misc device: %d\n", ret); 1507 return ret; 1508 } 1509 pr_info("Module loaded\n"); 1510 1511 return 0; 1512 } 1513 1514 static void __exit zloop_exit(void) 1515 { 1516 misc_deregister(&zloop_misc); 1517 idr_destroy(&zloop_index_idr); 1518 } 1519 1520 module_init(zloop_init); 1521 module_exit(zloop_exit); 1522 1523 MODULE_DESCRIPTION("Zoned loopback device"); 1524 MODULE_LICENSE("GPL"); 1525