1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2025, Christoph Hellwig. 4 * Copyright (c) 2025, Western Digital Corporation or its affiliates. 5 * 6 * Zoned Loop Device driver - exports a zoned block device using one file per 7 * zone as backing storage. 8 */ 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/module.h> 12 #include <linux/blk-mq.h> 13 #include <linux/blkzoned.h> 14 #include <linux/pagemap.h> 15 #include <linux/miscdevice.h> 16 #include <linux/falloc.h> 17 #include <linux/mutex.h> 18 #include <linux/parser.h> 19 #include <linux/seq_file.h> 20 21 /* 22 * Options for adding (and removing) a device. 23 */ 24 enum { 25 ZLOOP_OPT_ERR = 0, 26 ZLOOP_OPT_ID = (1 << 0), 27 ZLOOP_OPT_CAPACITY = (1 << 1), 28 ZLOOP_OPT_ZONE_SIZE = (1 << 2), 29 ZLOOP_OPT_ZONE_CAPACITY = (1 << 3), 30 ZLOOP_OPT_NR_CONV_ZONES = (1 << 4), 31 ZLOOP_OPT_BASE_DIR = (1 << 5), 32 ZLOOP_OPT_NR_QUEUES = (1 << 6), 33 ZLOOP_OPT_QUEUE_DEPTH = (1 << 7), 34 ZLOOP_OPT_BUFFERED_IO = (1 << 8), 35 ZLOOP_OPT_ZONE_APPEND = (1 << 9), 36 ZLOOP_OPT_ORDERED_ZONE_APPEND = (1 << 10), 37 }; 38 39 static const match_table_t zloop_opt_tokens = { 40 { ZLOOP_OPT_ID, "id=%d" }, 41 { ZLOOP_OPT_CAPACITY, "capacity_mb=%u" }, 42 { ZLOOP_OPT_ZONE_SIZE, "zone_size_mb=%u" }, 43 { ZLOOP_OPT_ZONE_CAPACITY, "zone_capacity_mb=%u" }, 44 { ZLOOP_OPT_NR_CONV_ZONES, "conv_zones=%u" }, 45 { ZLOOP_OPT_BASE_DIR, "base_dir=%s" }, 46 { ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" }, 47 { ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" }, 48 { ZLOOP_OPT_BUFFERED_IO, "buffered_io" }, 49 { ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" }, 50 { ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" }, 51 { ZLOOP_OPT_ERR, NULL } 52 }; 53 54 /* Default values for the "add" operation. */ 55 #define ZLOOP_DEF_ID -1 56 #define ZLOOP_DEF_ZONE_SIZE ((256ULL * SZ_1M) >> SECTOR_SHIFT) 57 #define ZLOOP_DEF_NR_ZONES 64 58 #define ZLOOP_DEF_NR_CONV_ZONES 8 59 #define ZLOOP_DEF_BASE_DIR "/var/local/zloop" 60 #define ZLOOP_DEF_NR_QUEUES 1 61 #define ZLOOP_DEF_QUEUE_DEPTH 128 62 #define ZLOOP_DEF_BUFFERED_IO false 63 #define ZLOOP_DEF_ZONE_APPEND true 64 #define ZLOOP_DEF_ORDERED_ZONE_APPEND false 65 66 /* Arbitrary limit on the zone size (16GB). */ 67 #define ZLOOP_MAX_ZONE_SIZE_MB 16384 68 69 struct zloop_options { 70 unsigned int mask; 71 int id; 72 sector_t capacity; 73 sector_t zone_size; 74 sector_t zone_capacity; 75 unsigned int nr_conv_zones; 76 char *base_dir; 77 unsigned int nr_queues; 78 unsigned int queue_depth; 79 bool buffered_io; 80 bool zone_append; 81 bool ordered_zone_append; 82 }; 83 84 /* 85 * Device states. 86 */ 87 enum { 88 Zlo_creating = 0, 89 Zlo_live, 90 Zlo_deleting, 91 }; 92 93 enum zloop_zone_flags { 94 ZLOOP_ZONE_CONV = 0, 95 ZLOOP_ZONE_SEQ_ERROR, 96 }; 97 98 struct zloop_zone { 99 struct file *file; 100 101 unsigned long flags; 102 struct mutex lock; 103 spinlock_t wp_lock; 104 enum blk_zone_cond cond; 105 sector_t start; 106 sector_t wp; 107 108 gfp_t old_gfp_mask; 109 }; 110 111 struct zloop_device { 112 unsigned int id; 113 unsigned int state; 114 115 struct blk_mq_tag_set tag_set; 116 struct gendisk *disk; 117 118 struct workqueue_struct *workqueue; 119 bool buffered_io; 120 bool zone_append; 121 bool ordered_zone_append; 122 123 const char *base_dir; 124 struct file *data_dir; 125 126 unsigned int zone_shift; 127 sector_t zone_size; 128 sector_t zone_capacity; 129 unsigned int nr_zones; 130 unsigned int nr_conv_zones; 131 unsigned int block_size; 132 133 struct zloop_zone zones[] __counted_by(nr_zones); 134 }; 135 136 struct zloop_cmd { 137 struct work_struct work; 138 atomic_t ref; 139 sector_t sector; 140 sector_t nr_sectors; 141 long ret; 142 struct kiocb iocb; 143 struct bio_vec *bvec; 144 }; 145 146 static DEFINE_IDR(zloop_index_idr); 147 static DEFINE_MUTEX(zloop_ctl_mutex); 148 149 static unsigned int rq_zone_no(struct request *rq) 150 { 151 struct zloop_device *zlo = rq->q->queuedata; 152 153 return blk_rq_pos(rq) >> zlo->zone_shift; 154 } 155 156 static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no) 157 { 158 struct zloop_zone *zone = &zlo->zones[zone_no]; 159 struct kstat stat; 160 sector_t file_sectors; 161 unsigned long flags; 162 int ret; 163 164 lockdep_assert_held(&zone->lock); 165 166 ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0); 167 if (ret < 0) { 168 pr_err("Failed to get zone %u file stat (err=%d)\n", 169 zone_no, ret); 170 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 171 return ret; 172 } 173 174 file_sectors = stat.size >> SECTOR_SHIFT; 175 if (file_sectors > zlo->zone_capacity) { 176 pr_err("Zone %u file too large (%llu sectors > %llu)\n", 177 zone_no, file_sectors, zlo->zone_capacity); 178 return -EINVAL; 179 } 180 181 if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) { 182 pr_err("Zone %u file size not aligned to block size %u\n", 183 zone_no, zlo->block_size); 184 return -EINVAL; 185 } 186 187 spin_lock_irqsave(&zone->wp_lock, flags); 188 if (!file_sectors) { 189 zone->cond = BLK_ZONE_COND_EMPTY; 190 zone->wp = zone->start; 191 } else if (file_sectors == zlo->zone_capacity) { 192 zone->cond = BLK_ZONE_COND_FULL; 193 zone->wp = ULLONG_MAX; 194 } else { 195 zone->cond = BLK_ZONE_COND_CLOSED; 196 zone->wp = zone->start + file_sectors; 197 } 198 spin_unlock_irqrestore(&zone->wp_lock, flags); 199 200 return 0; 201 } 202 203 static int zloop_open_zone(struct zloop_device *zlo, unsigned int zone_no) 204 { 205 struct zloop_zone *zone = &zlo->zones[zone_no]; 206 int ret = 0; 207 208 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 209 return -EIO; 210 211 mutex_lock(&zone->lock); 212 213 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 214 ret = zloop_update_seq_zone(zlo, zone_no); 215 if (ret) 216 goto unlock; 217 } 218 219 switch (zone->cond) { 220 case BLK_ZONE_COND_EXP_OPEN: 221 break; 222 case BLK_ZONE_COND_EMPTY: 223 case BLK_ZONE_COND_CLOSED: 224 case BLK_ZONE_COND_IMP_OPEN: 225 zone->cond = BLK_ZONE_COND_EXP_OPEN; 226 break; 227 case BLK_ZONE_COND_FULL: 228 default: 229 ret = -EIO; 230 break; 231 } 232 233 unlock: 234 mutex_unlock(&zone->lock); 235 236 return ret; 237 } 238 239 static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no) 240 { 241 struct zloop_zone *zone = &zlo->zones[zone_no]; 242 unsigned long flags; 243 int ret = 0; 244 245 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 246 return -EIO; 247 248 mutex_lock(&zone->lock); 249 250 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 251 ret = zloop_update_seq_zone(zlo, zone_no); 252 if (ret) 253 goto unlock; 254 } 255 256 switch (zone->cond) { 257 case BLK_ZONE_COND_CLOSED: 258 break; 259 case BLK_ZONE_COND_IMP_OPEN: 260 case BLK_ZONE_COND_EXP_OPEN: 261 spin_lock_irqsave(&zone->wp_lock, flags); 262 if (zone->wp == zone->start) 263 zone->cond = BLK_ZONE_COND_EMPTY; 264 else 265 zone->cond = BLK_ZONE_COND_CLOSED; 266 spin_unlock_irqrestore(&zone->wp_lock, flags); 267 break; 268 case BLK_ZONE_COND_EMPTY: 269 case BLK_ZONE_COND_FULL: 270 default: 271 ret = -EIO; 272 break; 273 } 274 275 unlock: 276 mutex_unlock(&zone->lock); 277 278 return ret; 279 } 280 281 static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no) 282 { 283 struct zloop_zone *zone = &zlo->zones[zone_no]; 284 unsigned long flags; 285 int ret = 0; 286 287 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 288 return -EIO; 289 290 mutex_lock(&zone->lock); 291 292 if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) && 293 zone->cond == BLK_ZONE_COND_EMPTY) 294 goto unlock; 295 296 if (vfs_truncate(&zone->file->f_path, 0)) { 297 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 298 ret = -EIO; 299 goto unlock; 300 } 301 302 spin_lock_irqsave(&zone->wp_lock, flags); 303 zone->cond = BLK_ZONE_COND_EMPTY; 304 zone->wp = zone->start; 305 clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 306 spin_unlock_irqrestore(&zone->wp_lock, flags); 307 308 unlock: 309 mutex_unlock(&zone->lock); 310 311 return ret; 312 } 313 314 static int zloop_reset_all_zones(struct zloop_device *zlo) 315 { 316 unsigned int i; 317 int ret; 318 319 for (i = zlo->nr_conv_zones; i < zlo->nr_zones; i++) { 320 ret = zloop_reset_zone(zlo, i); 321 if (ret) 322 return ret; 323 } 324 325 return 0; 326 } 327 328 static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no) 329 { 330 struct zloop_zone *zone = &zlo->zones[zone_no]; 331 unsigned long flags; 332 int ret = 0; 333 334 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 335 return -EIO; 336 337 mutex_lock(&zone->lock); 338 339 if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) && 340 zone->cond == BLK_ZONE_COND_FULL) 341 goto unlock; 342 343 if (vfs_truncate(&zone->file->f_path, zlo->zone_size << SECTOR_SHIFT)) { 344 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 345 ret = -EIO; 346 goto unlock; 347 } 348 349 spin_lock_irqsave(&zone->wp_lock, flags); 350 zone->cond = BLK_ZONE_COND_FULL; 351 zone->wp = ULLONG_MAX; 352 clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 353 spin_unlock_irqrestore(&zone->wp_lock, flags); 354 355 unlock: 356 mutex_unlock(&zone->lock); 357 358 return ret; 359 } 360 361 static void zloop_put_cmd(struct zloop_cmd *cmd) 362 { 363 struct request *rq = blk_mq_rq_from_pdu(cmd); 364 365 if (!atomic_dec_and_test(&cmd->ref)) 366 return; 367 kfree(cmd->bvec); 368 cmd->bvec = NULL; 369 if (likely(!blk_should_fake_timeout(rq->q))) 370 blk_mq_complete_request(rq); 371 } 372 373 static void zloop_rw_complete(struct kiocb *iocb, long ret) 374 { 375 struct zloop_cmd *cmd = container_of(iocb, struct zloop_cmd, iocb); 376 377 cmd->ret = ret; 378 zloop_put_cmd(cmd); 379 } 380 381 static void zloop_rw(struct zloop_cmd *cmd) 382 { 383 struct request *rq = blk_mq_rq_from_pdu(cmd); 384 struct zloop_device *zlo = rq->q->queuedata; 385 unsigned int zone_no = rq_zone_no(rq); 386 sector_t sector = blk_rq_pos(rq); 387 sector_t nr_sectors = blk_rq_sectors(rq); 388 bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND; 389 bool is_write = req_op(rq) == REQ_OP_WRITE || is_append; 390 int rw = is_write ? ITER_SOURCE : ITER_DEST; 391 struct req_iterator rq_iter; 392 struct zloop_zone *zone; 393 struct iov_iter iter; 394 struct bio_vec tmp; 395 unsigned long flags; 396 sector_t zone_end; 397 int nr_bvec = 0; 398 int ret; 399 400 atomic_set(&cmd->ref, 2); 401 cmd->sector = sector; 402 cmd->nr_sectors = nr_sectors; 403 cmd->ret = 0; 404 405 if (WARN_ON_ONCE(is_append && !zlo->zone_append)) { 406 ret = -EIO; 407 goto out; 408 } 409 410 /* We should never get an I/O beyond the device capacity. */ 411 if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) { 412 ret = -EIO; 413 goto out; 414 } 415 zone = &zlo->zones[zone_no]; 416 zone_end = zone->start + zlo->zone_capacity; 417 418 /* 419 * The block layer should never send requests that are not fully 420 * contained within the zone. 421 */ 422 if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) { 423 ret = -EIO; 424 goto out; 425 } 426 427 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 428 mutex_lock(&zone->lock); 429 ret = zloop_update_seq_zone(zlo, zone_no); 430 mutex_unlock(&zone->lock); 431 if (ret) 432 goto out; 433 } 434 435 if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) { 436 mutex_lock(&zone->lock); 437 438 spin_lock_irqsave(&zone->wp_lock, flags); 439 440 /* 441 * Zone append operations always go at the current write 442 * pointer, but regular write operations must already be 443 * aligned to the write pointer when submitted. 444 */ 445 if (is_append) { 446 /* 447 * If ordered zone append is in use, we already checked 448 * and set the target sector in zloop_queue_rq(). 449 */ 450 if (!zlo->ordered_zone_append) { 451 if (zone->cond == BLK_ZONE_COND_FULL || 452 zone->wp + nr_sectors > zone_end) { 453 spin_unlock_irqrestore(&zone->wp_lock, 454 flags); 455 ret = -EIO; 456 goto unlock; 457 } 458 sector = zone->wp; 459 } 460 cmd->sector = sector; 461 } else if (sector != zone->wp) { 462 spin_unlock_irqrestore(&zone->wp_lock, flags); 463 pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n", 464 zone_no, sector, zone->wp); 465 ret = -EIO; 466 goto unlock; 467 } 468 469 /* Implicitly open the target zone. */ 470 if (zone->cond == BLK_ZONE_COND_CLOSED || 471 zone->cond == BLK_ZONE_COND_EMPTY) 472 zone->cond = BLK_ZONE_COND_IMP_OPEN; 473 474 /* 475 * Advance the write pointer, unless ordered zone append is in 476 * use. If the write fails, the write pointer position will be 477 * corrected when the next I/O starts execution. 478 */ 479 if (!is_append || !zlo->ordered_zone_append) { 480 zone->wp += nr_sectors; 481 if (zone->wp == zone_end) { 482 zone->cond = BLK_ZONE_COND_FULL; 483 zone->wp = ULLONG_MAX; 484 } 485 } 486 487 spin_unlock_irqrestore(&zone->wp_lock, flags); 488 } 489 490 rq_for_each_bvec(tmp, rq, rq_iter) 491 nr_bvec++; 492 493 if (rq->bio != rq->biotail) { 494 struct bio_vec *bvec; 495 496 cmd->bvec = kmalloc_array(nr_bvec, sizeof(*cmd->bvec), GFP_NOIO); 497 if (!cmd->bvec) { 498 ret = -EIO; 499 goto unlock; 500 } 501 502 /* 503 * The bios of the request may be started from the middle of 504 * the 'bvec' because of bio splitting, so we can't directly 505 * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec 506 * API will take care of all details for us. 507 */ 508 bvec = cmd->bvec; 509 rq_for_each_bvec(tmp, rq, rq_iter) { 510 *bvec = tmp; 511 bvec++; 512 } 513 iov_iter_bvec(&iter, rw, cmd->bvec, nr_bvec, blk_rq_bytes(rq)); 514 } else { 515 /* 516 * Same here, this bio may be started from the middle of the 517 * 'bvec' because of bio splitting, so offset from the bvec 518 * must be passed to iov iterator 519 */ 520 iov_iter_bvec(&iter, rw, 521 __bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter), 522 nr_bvec, blk_rq_bytes(rq)); 523 iter.iov_offset = rq->bio->bi_iter.bi_bvec_done; 524 } 525 526 cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT; 527 cmd->iocb.ki_filp = zone->file; 528 cmd->iocb.ki_complete = zloop_rw_complete; 529 if (!zlo->buffered_io) 530 cmd->iocb.ki_flags = IOCB_DIRECT; 531 cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); 532 533 if (rw == ITER_SOURCE) 534 ret = zone->file->f_op->write_iter(&cmd->iocb, &iter); 535 else 536 ret = zone->file->f_op->read_iter(&cmd->iocb, &iter); 537 unlock: 538 if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) 539 mutex_unlock(&zone->lock); 540 out: 541 if (ret != -EIOCBQUEUED) 542 zloop_rw_complete(&cmd->iocb, ret); 543 zloop_put_cmd(cmd); 544 } 545 546 static void zloop_handle_cmd(struct zloop_cmd *cmd) 547 { 548 struct request *rq = blk_mq_rq_from_pdu(cmd); 549 struct zloop_device *zlo = rq->q->queuedata; 550 551 /* We can block in this context, so ignore REQ_NOWAIT. */ 552 if (rq->cmd_flags & REQ_NOWAIT) 553 rq->cmd_flags &= ~REQ_NOWAIT; 554 555 switch (req_op(rq)) { 556 case REQ_OP_READ: 557 case REQ_OP_WRITE: 558 case REQ_OP_ZONE_APPEND: 559 /* 560 * zloop_rw() always executes asynchronously or completes 561 * directly. 562 */ 563 zloop_rw(cmd); 564 return; 565 case REQ_OP_FLUSH: 566 /* 567 * Sync the entire FS containing the zone files instead of 568 * walking all files 569 */ 570 cmd->ret = sync_filesystem(file_inode(zlo->data_dir)->i_sb); 571 break; 572 case REQ_OP_ZONE_RESET: 573 cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq)); 574 break; 575 case REQ_OP_ZONE_RESET_ALL: 576 cmd->ret = zloop_reset_all_zones(zlo); 577 break; 578 case REQ_OP_ZONE_FINISH: 579 cmd->ret = zloop_finish_zone(zlo, rq_zone_no(rq)); 580 break; 581 case REQ_OP_ZONE_OPEN: 582 cmd->ret = zloop_open_zone(zlo, rq_zone_no(rq)); 583 break; 584 case REQ_OP_ZONE_CLOSE: 585 cmd->ret = zloop_close_zone(zlo, rq_zone_no(rq)); 586 break; 587 default: 588 WARN_ON_ONCE(1); 589 pr_err("Unsupported operation %d\n", req_op(rq)); 590 cmd->ret = -EOPNOTSUPP; 591 break; 592 } 593 594 blk_mq_complete_request(rq); 595 } 596 597 static void zloop_cmd_workfn(struct work_struct *work) 598 { 599 struct zloop_cmd *cmd = container_of(work, struct zloop_cmd, work); 600 int orig_flags = current->flags; 601 602 current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; 603 zloop_handle_cmd(cmd); 604 current->flags = orig_flags; 605 } 606 607 static void zloop_complete_rq(struct request *rq) 608 { 609 struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq); 610 struct zloop_device *zlo = rq->q->queuedata; 611 unsigned int zone_no = cmd->sector >> zlo->zone_shift; 612 struct zloop_zone *zone = &zlo->zones[zone_no]; 613 blk_status_t sts = BLK_STS_OK; 614 615 switch (req_op(rq)) { 616 case REQ_OP_READ: 617 if (cmd->ret < 0) 618 pr_err("Zone %u: failed read sector %llu, %llu sectors\n", 619 zone_no, cmd->sector, cmd->nr_sectors); 620 621 if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) { 622 /* short read */ 623 struct bio *bio; 624 625 __rq_for_each_bio(bio, rq) 626 zero_fill_bio(bio); 627 } 628 break; 629 case REQ_OP_WRITE: 630 case REQ_OP_ZONE_APPEND: 631 if (cmd->ret < 0) 632 pr_err("Zone %u: failed %swrite sector %llu, %llu sectors\n", 633 zone_no, 634 req_op(rq) == REQ_OP_WRITE ? "" : "append ", 635 cmd->sector, cmd->nr_sectors); 636 637 if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) { 638 pr_err("Zone %u: partial write %ld/%u B\n", 639 zone_no, cmd->ret, blk_rq_bytes(rq)); 640 cmd->ret = -EIO; 641 } 642 643 if (cmd->ret < 0 && !test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { 644 /* 645 * A write to a sequential zone file failed: mark the 646 * zone as having an error. This will be corrected and 647 * cleared when the next IO is submitted. 648 */ 649 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 650 break; 651 } 652 if (req_op(rq) == REQ_OP_ZONE_APPEND) 653 rq->__sector = cmd->sector; 654 655 break; 656 default: 657 break; 658 } 659 660 if (cmd->ret < 0) 661 sts = errno_to_blk_status(cmd->ret); 662 blk_mq_end_request(rq, sts); 663 } 664 665 static bool zloop_set_zone_append_sector(struct request *rq) 666 { 667 struct zloop_device *zlo = rq->q->queuedata; 668 unsigned int zone_no = rq_zone_no(rq); 669 struct zloop_zone *zone = &zlo->zones[zone_no]; 670 sector_t zone_end = zone->start + zlo->zone_capacity; 671 sector_t nr_sectors = blk_rq_sectors(rq); 672 unsigned long flags; 673 674 spin_lock_irqsave(&zone->wp_lock, flags); 675 676 if (zone->cond == BLK_ZONE_COND_FULL || 677 zone->wp + nr_sectors > zone_end) { 678 spin_unlock_irqrestore(&zone->wp_lock, flags); 679 return false; 680 } 681 682 rq->__sector = zone->wp; 683 zone->wp += blk_rq_sectors(rq); 684 if (zone->wp >= zone_end) { 685 zone->cond = BLK_ZONE_COND_FULL; 686 zone->wp = ULLONG_MAX; 687 } 688 689 spin_unlock_irqrestore(&zone->wp_lock, flags); 690 691 return true; 692 } 693 694 static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx, 695 const struct blk_mq_queue_data *bd) 696 { 697 struct request *rq = bd->rq; 698 struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq); 699 struct zloop_device *zlo = rq->q->queuedata; 700 701 if (zlo->state == Zlo_deleting) 702 return BLK_STS_IOERR; 703 704 /* 705 * If we need to strongly order zone append operations, set the request 706 * sector to the zone write pointer location now instead of when the 707 * command work runs. 708 */ 709 if (zlo->ordered_zone_append && req_op(rq) == REQ_OP_ZONE_APPEND) { 710 if (!zloop_set_zone_append_sector(rq)) 711 return BLK_STS_IOERR; 712 } 713 714 blk_mq_start_request(rq); 715 716 INIT_WORK(&cmd->work, zloop_cmd_workfn); 717 queue_work(zlo->workqueue, &cmd->work); 718 719 return BLK_STS_OK; 720 } 721 722 static const struct blk_mq_ops zloop_mq_ops = { 723 .queue_rq = zloop_queue_rq, 724 .complete = zloop_complete_rq, 725 }; 726 727 static int zloop_open(struct gendisk *disk, blk_mode_t mode) 728 { 729 struct zloop_device *zlo = disk->private_data; 730 int ret; 731 732 ret = mutex_lock_killable(&zloop_ctl_mutex); 733 if (ret) 734 return ret; 735 736 if (zlo->state != Zlo_live) 737 ret = -ENXIO; 738 mutex_unlock(&zloop_ctl_mutex); 739 return ret; 740 } 741 742 static int zloop_report_zones(struct gendisk *disk, sector_t sector, 743 unsigned int nr_zones, struct blk_report_zones_args *args) 744 { 745 struct zloop_device *zlo = disk->private_data; 746 struct blk_zone blkz = {}; 747 unsigned int first, i; 748 unsigned long flags; 749 int ret; 750 751 first = disk_zone_no(disk, sector); 752 if (first >= zlo->nr_zones) 753 return 0; 754 nr_zones = min(nr_zones, zlo->nr_zones - first); 755 756 for (i = 0; i < nr_zones; i++) { 757 unsigned int zone_no = first + i; 758 struct zloop_zone *zone = &zlo->zones[zone_no]; 759 760 mutex_lock(&zone->lock); 761 762 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 763 ret = zloop_update_seq_zone(zlo, zone_no); 764 if (ret) { 765 mutex_unlock(&zone->lock); 766 return ret; 767 } 768 } 769 770 blkz.start = zone->start; 771 blkz.len = zlo->zone_size; 772 spin_lock_irqsave(&zone->wp_lock, flags); 773 blkz.wp = zone->wp; 774 spin_unlock_irqrestore(&zone->wp_lock, flags); 775 blkz.cond = zone->cond; 776 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { 777 blkz.type = BLK_ZONE_TYPE_CONVENTIONAL; 778 blkz.capacity = zlo->zone_size; 779 } else { 780 blkz.type = BLK_ZONE_TYPE_SEQWRITE_REQ; 781 blkz.capacity = zlo->zone_capacity; 782 } 783 784 mutex_unlock(&zone->lock); 785 786 ret = disk_report_zone(disk, &blkz, i, args); 787 if (ret) 788 return ret; 789 } 790 791 return nr_zones; 792 } 793 794 static void zloop_free_disk(struct gendisk *disk) 795 { 796 struct zloop_device *zlo = disk->private_data; 797 unsigned int i; 798 799 blk_mq_free_tag_set(&zlo->tag_set); 800 801 for (i = 0; i < zlo->nr_zones; i++) { 802 struct zloop_zone *zone = &zlo->zones[i]; 803 804 mapping_set_gfp_mask(zone->file->f_mapping, 805 zone->old_gfp_mask); 806 fput(zone->file); 807 } 808 809 fput(zlo->data_dir); 810 destroy_workqueue(zlo->workqueue); 811 kfree(zlo->base_dir); 812 kvfree(zlo); 813 } 814 815 static const struct block_device_operations zloop_fops = { 816 .owner = THIS_MODULE, 817 .open = zloop_open, 818 .report_zones = zloop_report_zones, 819 .free_disk = zloop_free_disk, 820 }; 821 822 __printf(3, 4) 823 static struct file *zloop_filp_open_fmt(int oflags, umode_t mode, 824 const char *fmt, ...) 825 { 826 struct file *file; 827 va_list ap; 828 char *p; 829 830 va_start(ap, fmt); 831 p = kvasprintf(GFP_KERNEL, fmt, ap); 832 va_end(ap); 833 834 if (!p) 835 return ERR_PTR(-ENOMEM); 836 file = filp_open(p, oflags, mode); 837 kfree(p); 838 return file; 839 } 840 841 static int zloop_get_block_size(struct zloop_device *zlo, 842 struct zloop_zone *zone) 843 { 844 struct block_device *sb_bdev = zone->file->f_mapping->host->i_sb->s_bdev; 845 struct kstat st; 846 847 /* 848 * If the FS block size is lower than or equal to 4K, use that as the 849 * device block size. Otherwise, fallback to the FS direct IO alignment 850 * constraint if that is provided, and to the FS underlying device 851 * physical block size if the direct IO alignment is unknown. 852 */ 853 if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K) 854 zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize; 855 else if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) && 856 (st.result_mask & STATX_DIOALIGN)) 857 zlo->block_size = st.dio_offset_align; 858 else if (sb_bdev) 859 zlo->block_size = bdev_physical_block_size(sb_bdev); 860 else 861 zlo->block_size = SECTOR_SIZE; 862 863 if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - 1)) { 864 pr_err("Zone capacity is not aligned to block size %u\n", 865 zlo->block_size); 866 return -EINVAL; 867 } 868 869 return 0; 870 } 871 872 static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts, 873 unsigned int zone_no, bool restore) 874 { 875 struct zloop_zone *zone = &zlo->zones[zone_no]; 876 int oflags = O_RDWR; 877 struct kstat stat; 878 sector_t file_sectors; 879 int ret; 880 881 mutex_init(&zone->lock); 882 spin_lock_init(&zone->wp_lock); 883 zone->start = (sector_t)zone_no << zlo->zone_shift; 884 885 if (!restore) 886 oflags |= O_CREAT; 887 888 if (!opts->buffered_io) 889 oflags |= O_DIRECT; 890 891 if (zone_no < zlo->nr_conv_zones) { 892 /* Conventional zone file. */ 893 set_bit(ZLOOP_ZONE_CONV, &zone->flags); 894 zone->cond = BLK_ZONE_COND_NOT_WP; 895 zone->wp = U64_MAX; 896 897 zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/cnv-%06u", 898 zlo->base_dir, zlo->id, zone_no); 899 if (IS_ERR(zone->file)) { 900 pr_err("Failed to open zone %u file %s/%u/cnv-%06u (err=%ld)", 901 zone_no, zlo->base_dir, zlo->id, zone_no, 902 PTR_ERR(zone->file)); 903 return PTR_ERR(zone->file); 904 } 905 906 if (!zlo->block_size) { 907 ret = zloop_get_block_size(zlo, zone); 908 if (ret) 909 return ret; 910 } 911 912 ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0); 913 if (ret < 0) { 914 pr_err("Failed to get zone %u file stat\n", zone_no); 915 return ret; 916 } 917 file_sectors = stat.size >> SECTOR_SHIFT; 918 919 if (restore && file_sectors != zlo->zone_size) { 920 pr_err("Invalid conventional zone %u file size (%llu sectors != %llu)\n", 921 zone_no, file_sectors, zlo->zone_capacity); 922 return ret; 923 } 924 925 ret = vfs_truncate(&zone->file->f_path, 926 zlo->zone_size << SECTOR_SHIFT); 927 if (ret < 0) { 928 pr_err("Failed to truncate zone %u file (err=%d)\n", 929 zone_no, ret); 930 return ret; 931 } 932 933 return 0; 934 } 935 936 /* Sequential zone file. */ 937 zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/seq-%06u", 938 zlo->base_dir, zlo->id, zone_no); 939 if (IS_ERR(zone->file)) { 940 pr_err("Failed to open zone %u file %s/%u/seq-%06u (err=%ld)", 941 zone_no, zlo->base_dir, zlo->id, zone_no, 942 PTR_ERR(zone->file)); 943 return PTR_ERR(zone->file); 944 } 945 946 if (!zlo->block_size) { 947 ret = zloop_get_block_size(zlo, zone); 948 if (ret) 949 return ret; 950 } 951 952 zloop_get_block_size(zlo, zone); 953 954 mutex_lock(&zone->lock); 955 ret = zloop_update_seq_zone(zlo, zone_no); 956 mutex_unlock(&zone->lock); 957 958 return ret; 959 } 960 961 static bool zloop_dev_exists(struct zloop_device *zlo) 962 { 963 struct file *cnv, *seq; 964 bool exists; 965 966 cnv = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/cnv-%06u", 967 zlo->base_dir, zlo->id, 0); 968 seq = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/seq-%06u", 969 zlo->base_dir, zlo->id, 0); 970 exists = !IS_ERR(cnv) || !IS_ERR(seq); 971 972 if (!IS_ERR(cnv)) 973 fput(cnv); 974 if (!IS_ERR(seq)) 975 fput(seq); 976 977 return exists; 978 } 979 980 static int zloop_ctl_add(struct zloop_options *opts) 981 { 982 struct queue_limits lim = { 983 .max_hw_sectors = SZ_1M >> SECTOR_SHIFT, 984 .chunk_sectors = opts->zone_size, 985 .features = BLK_FEAT_ZONED, 986 }; 987 unsigned int nr_zones, i, j; 988 struct zloop_device *zlo; 989 int ret = -EINVAL; 990 bool restore; 991 992 __module_get(THIS_MODULE); 993 994 nr_zones = opts->capacity >> ilog2(opts->zone_size); 995 if (opts->nr_conv_zones >= nr_zones) { 996 pr_err("Invalid number of conventional zones %u\n", 997 opts->nr_conv_zones); 998 goto out; 999 } 1000 1001 zlo = kvzalloc(struct_size(zlo, zones, nr_zones), GFP_KERNEL); 1002 if (!zlo) { 1003 ret = -ENOMEM; 1004 goto out; 1005 } 1006 zlo->state = Zlo_creating; 1007 1008 ret = mutex_lock_killable(&zloop_ctl_mutex); 1009 if (ret) 1010 goto out_free_dev; 1011 1012 /* Allocate id, if @opts->id >= 0, we're requesting that specific id */ 1013 if (opts->id >= 0) { 1014 ret = idr_alloc(&zloop_index_idr, zlo, 1015 opts->id, opts->id + 1, GFP_KERNEL); 1016 if (ret == -ENOSPC) 1017 ret = -EEXIST; 1018 } else { 1019 ret = idr_alloc(&zloop_index_idr, zlo, 0, 0, GFP_KERNEL); 1020 } 1021 mutex_unlock(&zloop_ctl_mutex); 1022 if (ret < 0) 1023 goto out_free_dev; 1024 1025 zlo->id = ret; 1026 zlo->zone_shift = ilog2(opts->zone_size); 1027 zlo->zone_size = opts->zone_size; 1028 if (opts->zone_capacity) 1029 zlo->zone_capacity = opts->zone_capacity; 1030 else 1031 zlo->zone_capacity = zlo->zone_size; 1032 zlo->nr_zones = nr_zones; 1033 zlo->nr_conv_zones = opts->nr_conv_zones; 1034 zlo->buffered_io = opts->buffered_io; 1035 zlo->zone_append = opts->zone_append; 1036 if (zlo->zone_append) 1037 zlo->ordered_zone_append = opts->ordered_zone_append; 1038 1039 zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE, 1040 opts->nr_queues * opts->queue_depth, zlo->id); 1041 if (!zlo->workqueue) { 1042 ret = -ENOMEM; 1043 goto out_free_idr; 1044 } 1045 1046 if (opts->base_dir) 1047 zlo->base_dir = kstrdup(opts->base_dir, GFP_KERNEL); 1048 else 1049 zlo->base_dir = kstrdup(ZLOOP_DEF_BASE_DIR, GFP_KERNEL); 1050 if (!zlo->base_dir) { 1051 ret = -ENOMEM; 1052 goto out_destroy_workqueue; 1053 } 1054 1055 zlo->data_dir = zloop_filp_open_fmt(O_RDONLY | O_DIRECTORY, 0, "%s/%u", 1056 zlo->base_dir, zlo->id); 1057 if (IS_ERR(zlo->data_dir)) { 1058 ret = PTR_ERR(zlo->data_dir); 1059 pr_warn("Failed to open directory %s/%u (err=%d)\n", 1060 zlo->base_dir, zlo->id, ret); 1061 goto out_free_base_dir; 1062 } 1063 1064 /* 1065 * If we already have zone files, we are restoring a device created by a 1066 * previous add operation. In this case, zloop_init_zone() will check 1067 * that the zone files are consistent with the zone configuration given. 1068 */ 1069 restore = zloop_dev_exists(zlo); 1070 for (i = 0; i < nr_zones; i++) { 1071 ret = zloop_init_zone(zlo, opts, i, restore); 1072 if (ret) 1073 goto out_close_files; 1074 } 1075 1076 lim.physical_block_size = zlo->block_size; 1077 lim.logical_block_size = zlo->block_size; 1078 if (zlo->zone_append) 1079 lim.max_hw_zone_append_sectors = lim.max_hw_sectors; 1080 1081 zlo->tag_set.ops = &zloop_mq_ops; 1082 zlo->tag_set.nr_hw_queues = opts->nr_queues; 1083 zlo->tag_set.queue_depth = opts->queue_depth; 1084 zlo->tag_set.numa_node = NUMA_NO_NODE; 1085 zlo->tag_set.cmd_size = sizeof(struct zloop_cmd); 1086 zlo->tag_set.driver_data = zlo; 1087 1088 ret = blk_mq_alloc_tag_set(&zlo->tag_set); 1089 if (ret) { 1090 pr_err("blk_mq_alloc_tag_set failed (err=%d)\n", ret); 1091 goto out_close_files; 1092 } 1093 1094 zlo->disk = blk_mq_alloc_disk(&zlo->tag_set, &lim, zlo); 1095 if (IS_ERR(zlo->disk)) { 1096 pr_err("blk_mq_alloc_disk failed (err=%d)\n", ret); 1097 ret = PTR_ERR(zlo->disk); 1098 goto out_cleanup_tags; 1099 } 1100 zlo->disk->flags = GENHD_FL_NO_PART; 1101 zlo->disk->fops = &zloop_fops; 1102 zlo->disk->private_data = zlo; 1103 sprintf(zlo->disk->disk_name, "zloop%d", zlo->id); 1104 set_capacity(zlo->disk, (u64)lim.chunk_sectors * zlo->nr_zones); 1105 1106 ret = blk_revalidate_disk_zones(zlo->disk); 1107 if (ret) 1108 goto out_cleanup_disk; 1109 1110 ret = add_disk(zlo->disk); 1111 if (ret) { 1112 pr_err("add_disk failed (err=%d)\n", ret); 1113 goto out_cleanup_disk; 1114 } 1115 1116 mutex_lock(&zloop_ctl_mutex); 1117 zlo->state = Zlo_live; 1118 mutex_unlock(&zloop_ctl_mutex); 1119 1120 pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n", 1121 zlo->id, zlo->nr_zones, 1122 ((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20, 1123 zlo->block_size); 1124 pr_info("zloop%d: using %s%s zone append\n", 1125 zlo->id, 1126 zlo->ordered_zone_append ? "ordered " : "", 1127 zlo->zone_append ? "native" : "emulated"); 1128 1129 return 0; 1130 1131 out_cleanup_disk: 1132 put_disk(zlo->disk); 1133 out_cleanup_tags: 1134 blk_mq_free_tag_set(&zlo->tag_set); 1135 out_close_files: 1136 for (j = 0; j < i; j++) { 1137 struct zloop_zone *zone = &zlo->zones[j]; 1138 1139 if (!IS_ERR_OR_NULL(zone->file)) 1140 fput(zone->file); 1141 } 1142 fput(zlo->data_dir); 1143 out_free_base_dir: 1144 kfree(zlo->base_dir); 1145 out_destroy_workqueue: 1146 destroy_workqueue(zlo->workqueue); 1147 out_free_idr: 1148 mutex_lock(&zloop_ctl_mutex); 1149 idr_remove(&zloop_index_idr, zlo->id); 1150 mutex_unlock(&zloop_ctl_mutex); 1151 out_free_dev: 1152 kvfree(zlo); 1153 out: 1154 module_put(THIS_MODULE); 1155 if (ret == -ENOENT) 1156 ret = -EINVAL; 1157 return ret; 1158 } 1159 1160 static int zloop_ctl_remove(struct zloop_options *opts) 1161 { 1162 struct zloop_device *zlo; 1163 int ret; 1164 1165 if (!(opts->mask & ZLOOP_OPT_ID)) { 1166 pr_err("No ID specified\n"); 1167 return -EINVAL; 1168 } 1169 1170 ret = mutex_lock_killable(&zloop_ctl_mutex); 1171 if (ret) 1172 return ret; 1173 1174 zlo = idr_find(&zloop_index_idr, opts->id); 1175 if (!zlo || zlo->state == Zlo_creating) { 1176 ret = -ENODEV; 1177 } else if (zlo->state == Zlo_deleting) { 1178 ret = -EINVAL; 1179 } else { 1180 idr_remove(&zloop_index_idr, zlo->id); 1181 zlo->state = Zlo_deleting; 1182 } 1183 1184 mutex_unlock(&zloop_ctl_mutex); 1185 if (ret) 1186 return ret; 1187 1188 del_gendisk(zlo->disk); 1189 put_disk(zlo->disk); 1190 1191 pr_info("Removed device %d\n", opts->id); 1192 1193 module_put(THIS_MODULE); 1194 1195 return 0; 1196 } 1197 1198 static int zloop_parse_options(struct zloop_options *opts, const char *buf) 1199 { 1200 substring_t args[MAX_OPT_ARGS]; 1201 char *options, *o, *p; 1202 unsigned int token; 1203 int ret = 0; 1204 1205 /* Set defaults. */ 1206 opts->mask = 0; 1207 opts->id = ZLOOP_DEF_ID; 1208 opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES; 1209 opts->zone_size = ZLOOP_DEF_ZONE_SIZE; 1210 opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES; 1211 opts->nr_queues = ZLOOP_DEF_NR_QUEUES; 1212 opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH; 1213 opts->buffered_io = ZLOOP_DEF_BUFFERED_IO; 1214 opts->zone_append = ZLOOP_DEF_ZONE_APPEND; 1215 opts->ordered_zone_append = ZLOOP_DEF_ORDERED_ZONE_APPEND; 1216 1217 if (!buf) 1218 return 0; 1219 1220 /* Skip leading spaces before the options. */ 1221 while (isspace(*buf)) 1222 buf++; 1223 1224 options = o = kstrdup(buf, GFP_KERNEL); 1225 if (!options) 1226 return -ENOMEM; 1227 1228 /* Parse the options, doing only some light invalid value checks. */ 1229 while ((p = strsep(&o, ",\n")) != NULL) { 1230 if (!*p) 1231 continue; 1232 1233 token = match_token(p, zloop_opt_tokens, args); 1234 opts->mask |= token; 1235 switch (token) { 1236 case ZLOOP_OPT_ID: 1237 if (match_int(args, &opts->id)) { 1238 ret = -EINVAL; 1239 goto out; 1240 } 1241 break; 1242 case ZLOOP_OPT_CAPACITY: 1243 if (match_uint(args, &token)) { 1244 ret = -EINVAL; 1245 goto out; 1246 } 1247 if (!token) { 1248 pr_err("Invalid capacity\n"); 1249 ret = -EINVAL; 1250 goto out; 1251 } 1252 opts->capacity = 1253 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; 1254 break; 1255 case ZLOOP_OPT_ZONE_SIZE: 1256 if (match_uint(args, &token)) { 1257 ret = -EINVAL; 1258 goto out; 1259 } 1260 if (!token || token > ZLOOP_MAX_ZONE_SIZE_MB || 1261 !is_power_of_2(token)) { 1262 pr_err("Invalid zone size %u\n", token); 1263 ret = -EINVAL; 1264 goto out; 1265 } 1266 opts->zone_size = 1267 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; 1268 break; 1269 case ZLOOP_OPT_ZONE_CAPACITY: 1270 if (match_uint(args, &token)) { 1271 ret = -EINVAL; 1272 goto out; 1273 } 1274 if (!token) { 1275 pr_err("Invalid zone capacity\n"); 1276 ret = -EINVAL; 1277 goto out; 1278 } 1279 opts->zone_capacity = 1280 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; 1281 break; 1282 case ZLOOP_OPT_NR_CONV_ZONES: 1283 if (match_uint(args, &token)) { 1284 ret = -EINVAL; 1285 goto out; 1286 } 1287 opts->nr_conv_zones = token; 1288 break; 1289 case ZLOOP_OPT_BASE_DIR: 1290 p = match_strdup(args); 1291 if (!p) { 1292 ret = -ENOMEM; 1293 goto out; 1294 } 1295 kfree(opts->base_dir); 1296 opts->base_dir = p; 1297 break; 1298 case ZLOOP_OPT_NR_QUEUES: 1299 if (match_uint(args, &token)) { 1300 ret = -EINVAL; 1301 goto out; 1302 } 1303 if (!token) { 1304 pr_err("Invalid number of queues\n"); 1305 ret = -EINVAL; 1306 goto out; 1307 } 1308 opts->nr_queues = min(token, num_online_cpus()); 1309 break; 1310 case ZLOOP_OPT_QUEUE_DEPTH: 1311 if (match_uint(args, &token)) { 1312 ret = -EINVAL; 1313 goto out; 1314 } 1315 if (!token) { 1316 pr_err("Invalid queue depth\n"); 1317 ret = -EINVAL; 1318 goto out; 1319 } 1320 opts->queue_depth = token; 1321 break; 1322 case ZLOOP_OPT_BUFFERED_IO: 1323 opts->buffered_io = true; 1324 break; 1325 case ZLOOP_OPT_ZONE_APPEND: 1326 if (match_uint(args, &token)) { 1327 ret = -EINVAL; 1328 goto out; 1329 } 1330 if (token != 0 && token != 1) { 1331 pr_err("Invalid zone_append value\n"); 1332 ret = -EINVAL; 1333 goto out; 1334 } 1335 opts->zone_append = token; 1336 break; 1337 case ZLOOP_OPT_ORDERED_ZONE_APPEND: 1338 opts->ordered_zone_append = true; 1339 break; 1340 case ZLOOP_OPT_ERR: 1341 default: 1342 pr_warn("unknown parameter or missing value '%s'\n", p); 1343 ret = -EINVAL; 1344 goto out; 1345 } 1346 } 1347 1348 ret = -EINVAL; 1349 if (opts->capacity <= opts->zone_size) { 1350 pr_err("Invalid capacity\n"); 1351 goto out; 1352 } 1353 1354 if (opts->zone_capacity > opts->zone_size) { 1355 pr_err("Invalid zone capacity\n"); 1356 goto out; 1357 } 1358 1359 ret = 0; 1360 out: 1361 kfree(options); 1362 return ret; 1363 } 1364 1365 enum { 1366 ZLOOP_CTL_ADD, 1367 ZLOOP_CTL_REMOVE, 1368 }; 1369 1370 static struct zloop_ctl_op { 1371 int code; 1372 const char *name; 1373 } zloop_ctl_ops[] = { 1374 { ZLOOP_CTL_ADD, "add" }, 1375 { ZLOOP_CTL_REMOVE, "remove" }, 1376 { -1, NULL }, 1377 }; 1378 1379 static ssize_t zloop_ctl_write(struct file *file, const char __user *ubuf, 1380 size_t count, loff_t *pos) 1381 { 1382 struct zloop_options opts = { }; 1383 struct zloop_ctl_op *op; 1384 const char *buf, *opts_buf; 1385 int i, ret; 1386 1387 if (count > PAGE_SIZE) 1388 return -ENOMEM; 1389 1390 buf = memdup_user_nul(ubuf, count); 1391 if (IS_ERR(buf)) 1392 return PTR_ERR(buf); 1393 1394 for (i = 0; i < ARRAY_SIZE(zloop_ctl_ops); i++) { 1395 op = &zloop_ctl_ops[i]; 1396 if (!op->name) { 1397 pr_err("Invalid operation\n"); 1398 ret = -EINVAL; 1399 goto out; 1400 } 1401 if (!strncmp(buf, op->name, strlen(op->name))) 1402 break; 1403 } 1404 1405 if (count <= strlen(op->name)) 1406 opts_buf = NULL; 1407 else 1408 opts_buf = buf + strlen(op->name); 1409 1410 ret = zloop_parse_options(&opts, opts_buf); 1411 if (ret) { 1412 pr_err("Failed to parse options\n"); 1413 goto out; 1414 } 1415 1416 switch (op->code) { 1417 case ZLOOP_CTL_ADD: 1418 ret = zloop_ctl_add(&opts); 1419 break; 1420 case ZLOOP_CTL_REMOVE: 1421 ret = zloop_ctl_remove(&opts); 1422 break; 1423 default: 1424 pr_err("Invalid operation\n"); 1425 ret = -EINVAL; 1426 goto out; 1427 } 1428 1429 out: 1430 kfree(opts.base_dir); 1431 kfree(buf); 1432 return ret ? ret : count; 1433 } 1434 1435 static int zloop_ctl_show(struct seq_file *seq_file, void *private) 1436 { 1437 const struct match_token *tok; 1438 int i; 1439 1440 /* Add operation */ 1441 seq_printf(seq_file, "%s ", zloop_ctl_ops[0].name); 1442 for (i = 0; i < ARRAY_SIZE(zloop_opt_tokens); i++) { 1443 tok = &zloop_opt_tokens[i]; 1444 if (!tok->pattern) 1445 break; 1446 if (i) 1447 seq_putc(seq_file, ','); 1448 seq_puts(seq_file, tok->pattern); 1449 } 1450 seq_putc(seq_file, '\n'); 1451 1452 /* Remove operation */ 1453 seq_puts(seq_file, zloop_ctl_ops[1].name); 1454 seq_puts(seq_file, " id=%d\n"); 1455 1456 return 0; 1457 } 1458 1459 static int zloop_ctl_open(struct inode *inode, struct file *file) 1460 { 1461 file->private_data = NULL; 1462 return single_open(file, zloop_ctl_show, NULL); 1463 } 1464 1465 static int zloop_ctl_release(struct inode *inode, struct file *file) 1466 { 1467 return single_release(inode, file); 1468 } 1469 1470 static const struct file_operations zloop_ctl_fops = { 1471 .owner = THIS_MODULE, 1472 .open = zloop_ctl_open, 1473 .release = zloop_ctl_release, 1474 .write = zloop_ctl_write, 1475 .read = seq_read, 1476 }; 1477 1478 static struct miscdevice zloop_misc = { 1479 .minor = MISC_DYNAMIC_MINOR, 1480 .name = "zloop-control", 1481 .fops = &zloop_ctl_fops, 1482 }; 1483 1484 static int __init zloop_init(void) 1485 { 1486 int ret; 1487 1488 ret = misc_register(&zloop_misc); 1489 if (ret) { 1490 pr_err("Failed to register misc device: %d\n", ret); 1491 return ret; 1492 } 1493 pr_info("Module loaded\n"); 1494 1495 return 0; 1496 } 1497 1498 static void __exit zloop_exit(void) 1499 { 1500 misc_deregister(&zloop_misc); 1501 idr_destroy(&zloop_index_idr); 1502 } 1503 1504 module_init(zloop_init); 1505 module_exit(zloop_exit); 1506 1507 MODULE_DESCRIPTION("Zoned loopback device"); 1508 MODULE_LICENSE("GPL"); 1509