1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2025, Christoph Hellwig. 4 * Copyright (c) 2025, Western Digital Corporation or its affiliates. 5 * 6 * Zoned Loop Device driver - exports a zoned block device using one file per 7 * zone as backing storage. 8 */ 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/module.h> 12 #include <linux/blk-mq.h> 13 #include <linux/blkzoned.h> 14 #include <linux/pagemap.h> 15 #include <linux/miscdevice.h> 16 #include <linux/falloc.h> 17 #include <linux/mutex.h> 18 #include <linux/parser.h> 19 #include <linux/seq_file.h> 20 21 /* 22 * Options for adding (and removing) a device. 23 */ 24 enum { 25 ZLOOP_OPT_ERR = 0, 26 ZLOOP_OPT_ID = (1 << 0), 27 ZLOOP_OPT_CAPACITY = (1 << 1), 28 ZLOOP_OPT_ZONE_SIZE = (1 << 2), 29 ZLOOP_OPT_ZONE_CAPACITY = (1 << 3), 30 ZLOOP_OPT_NR_CONV_ZONES = (1 << 4), 31 ZLOOP_OPT_BASE_DIR = (1 << 5), 32 ZLOOP_OPT_NR_QUEUES = (1 << 6), 33 ZLOOP_OPT_QUEUE_DEPTH = (1 << 7), 34 ZLOOP_OPT_BUFFERED_IO = (1 << 8), 35 ZLOOP_OPT_ZONE_APPEND = (1 << 9), 36 ZLOOP_OPT_ORDERED_ZONE_APPEND = (1 << 10), 37 }; 38 39 static const match_table_t zloop_opt_tokens = { 40 { ZLOOP_OPT_ID, "id=%d" }, 41 { ZLOOP_OPT_CAPACITY, "capacity_mb=%u" }, 42 { ZLOOP_OPT_ZONE_SIZE, "zone_size_mb=%u" }, 43 { ZLOOP_OPT_ZONE_CAPACITY, "zone_capacity_mb=%u" }, 44 { ZLOOP_OPT_NR_CONV_ZONES, "conv_zones=%u" }, 45 { ZLOOP_OPT_BASE_DIR, "base_dir=%s" }, 46 { ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" }, 47 { ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" }, 48 { ZLOOP_OPT_BUFFERED_IO, "buffered_io" }, 49 { ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" }, 50 { ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" }, 51 { ZLOOP_OPT_ERR, NULL } 52 }; 53 54 /* Default values for the "add" operation. */ 55 #define ZLOOP_DEF_ID -1 56 #define ZLOOP_DEF_ZONE_SIZE ((256ULL * SZ_1M) >> SECTOR_SHIFT) 57 #define ZLOOP_DEF_NR_ZONES 64 58 #define ZLOOP_DEF_NR_CONV_ZONES 8 59 #define ZLOOP_DEF_BASE_DIR "/var/local/zloop" 60 #define ZLOOP_DEF_NR_QUEUES 1 61 #define ZLOOP_DEF_QUEUE_DEPTH 128 62 #define ZLOOP_DEF_BUFFERED_IO false 63 #define ZLOOP_DEF_ZONE_APPEND true 64 #define ZLOOP_DEF_ORDERED_ZONE_APPEND false 65 66 /* Arbitrary limit on the zone size (16GB). */ 67 #define ZLOOP_MAX_ZONE_SIZE_MB 16384 68 69 struct zloop_options { 70 unsigned int mask; 71 int id; 72 sector_t capacity; 73 sector_t zone_size; 74 sector_t zone_capacity; 75 unsigned int nr_conv_zones; 76 char *base_dir; 77 unsigned int nr_queues; 78 unsigned int queue_depth; 79 bool buffered_io; 80 bool zone_append; 81 bool ordered_zone_append; 82 }; 83 84 /* 85 * Device states. 86 */ 87 enum { 88 Zlo_creating = 0, 89 Zlo_live, 90 Zlo_deleting, 91 }; 92 93 enum zloop_zone_flags { 94 ZLOOP_ZONE_CONV = 0, 95 ZLOOP_ZONE_SEQ_ERROR, 96 }; 97 98 struct zloop_zone { 99 struct file *file; 100 101 unsigned long flags; 102 struct mutex lock; 103 spinlock_t wp_lock; 104 enum blk_zone_cond cond; 105 sector_t start; 106 sector_t wp; 107 108 gfp_t old_gfp_mask; 109 }; 110 111 struct zloop_device { 112 unsigned int id; 113 unsigned int state; 114 115 struct blk_mq_tag_set tag_set; 116 struct gendisk *disk; 117 118 struct workqueue_struct *workqueue; 119 bool buffered_io; 120 bool zone_append; 121 bool ordered_zone_append; 122 123 const char *base_dir; 124 struct file *data_dir; 125 126 unsigned int zone_shift; 127 sector_t zone_size; 128 sector_t zone_capacity; 129 unsigned int nr_zones; 130 unsigned int nr_conv_zones; 131 unsigned int block_size; 132 133 struct zloop_zone zones[] __counted_by(nr_zones); 134 }; 135 136 struct zloop_cmd { 137 struct work_struct work; 138 atomic_t ref; 139 sector_t sector; 140 sector_t nr_sectors; 141 long ret; 142 struct kiocb iocb; 143 struct bio_vec *bvec; 144 }; 145 146 static DEFINE_IDR(zloop_index_idr); 147 static DEFINE_MUTEX(zloop_ctl_mutex); 148 149 static unsigned int rq_zone_no(struct request *rq) 150 { 151 struct zloop_device *zlo = rq->q->queuedata; 152 153 return blk_rq_pos(rq) >> zlo->zone_shift; 154 } 155 156 static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no) 157 { 158 struct zloop_zone *zone = &zlo->zones[zone_no]; 159 struct kstat stat; 160 sector_t file_sectors; 161 unsigned long flags; 162 int ret; 163 164 lockdep_assert_held(&zone->lock); 165 166 ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0); 167 if (ret < 0) { 168 pr_err("Failed to get zone %u file stat (err=%d)\n", 169 zone_no, ret); 170 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 171 return ret; 172 } 173 174 file_sectors = stat.size >> SECTOR_SHIFT; 175 if (file_sectors > zlo->zone_capacity) { 176 pr_err("Zone %u file too large (%llu sectors > %llu)\n", 177 zone_no, file_sectors, zlo->zone_capacity); 178 return -EINVAL; 179 } 180 181 if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) { 182 pr_err("Zone %u file size not aligned to block size %u\n", 183 zone_no, zlo->block_size); 184 return -EINVAL; 185 } 186 187 spin_lock_irqsave(&zone->wp_lock, flags); 188 if (!file_sectors) { 189 zone->cond = BLK_ZONE_COND_EMPTY; 190 zone->wp = zone->start; 191 } else if (file_sectors == zlo->zone_capacity) { 192 zone->cond = BLK_ZONE_COND_FULL; 193 zone->wp = ULLONG_MAX; 194 } else { 195 zone->cond = BLK_ZONE_COND_CLOSED; 196 zone->wp = zone->start + file_sectors; 197 } 198 spin_unlock_irqrestore(&zone->wp_lock, flags); 199 200 return 0; 201 } 202 203 static int zloop_open_zone(struct zloop_device *zlo, unsigned int zone_no) 204 { 205 struct zloop_zone *zone = &zlo->zones[zone_no]; 206 int ret = 0; 207 208 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 209 return -EIO; 210 211 mutex_lock(&zone->lock); 212 213 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 214 ret = zloop_update_seq_zone(zlo, zone_no); 215 if (ret) 216 goto unlock; 217 } 218 219 switch (zone->cond) { 220 case BLK_ZONE_COND_EXP_OPEN: 221 break; 222 case BLK_ZONE_COND_EMPTY: 223 case BLK_ZONE_COND_CLOSED: 224 case BLK_ZONE_COND_IMP_OPEN: 225 zone->cond = BLK_ZONE_COND_EXP_OPEN; 226 break; 227 case BLK_ZONE_COND_FULL: 228 default: 229 ret = -EIO; 230 break; 231 } 232 233 unlock: 234 mutex_unlock(&zone->lock); 235 236 return ret; 237 } 238 239 static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no) 240 { 241 struct zloop_zone *zone = &zlo->zones[zone_no]; 242 unsigned long flags; 243 int ret = 0; 244 245 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 246 return -EIO; 247 248 mutex_lock(&zone->lock); 249 250 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 251 ret = zloop_update_seq_zone(zlo, zone_no); 252 if (ret) 253 goto unlock; 254 } 255 256 switch (zone->cond) { 257 case BLK_ZONE_COND_CLOSED: 258 break; 259 case BLK_ZONE_COND_IMP_OPEN: 260 case BLK_ZONE_COND_EXP_OPEN: 261 spin_lock_irqsave(&zone->wp_lock, flags); 262 if (zone->wp == zone->start) 263 zone->cond = BLK_ZONE_COND_EMPTY; 264 else 265 zone->cond = BLK_ZONE_COND_CLOSED; 266 spin_unlock_irqrestore(&zone->wp_lock, flags); 267 break; 268 case BLK_ZONE_COND_EMPTY: 269 case BLK_ZONE_COND_FULL: 270 default: 271 ret = -EIO; 272 break; 273 } 274 275 unlock: 276 mutex_unlock(&zone->lock); 277 278 return ret; 279 } 280 281 static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no) 282 { 283 struct zloop_zone *zone = &zlo->zones[zone_no]; 284 unsigned long flags; 285 int ret = 0; 286 287 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 288 return -EIO; 289 290 mutex_lock(&zone->lock); 291 292 if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) && 293 zone->cond == BLK_ZONE_COND_EMPTY) 294 goto unlock; 295 296 if (vfs_truncate(&zone->file->f_path, 0)) { 297 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 298 ret = -EIO; 299 goto unlock; 300 } 301 302 spin_lock_irqsave(&zone->wp_lock, flags); 303 zone->cond = BLK_ZONE_COND_EMPTY; 304 zone->wp = zone->start; 305 clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 306 spin_unlock_irqrestore(&zone->wp_lock, flags); 307 308 unlock: 309 mutex_unlock(&zone->lock); 310 311 return ret; 312 } 313 314 static int zloop_reset_all_zones(struct zloop_device *zlo) 315 { 316 unsigned int i; 317 int ret; 318 319 for (i = zlo->nr_conv_zones; i < zlo->nr_zones; i++) { 320 ret = zloop_reset_zone(zlo, i); 321 if (ret) 322 return ret; 323 } 324 325 return 0; 326 } 327 328 static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no) 329 { 330 struct zloop_zone *zone = &zlo->zones[zone_no]; 331 unsigned long flags; 332 int ret = 0; 333 334 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 335 return -EIO; 336 337 mutex_lock(&zone->lock); 338 339 if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) && 340 zone->cond == BLK_ZONE_COND_FULL) 341 goto unlock; 342 343 if (vfs_truncate(&zone->file->f_path, zlo->zone_size << SECTOR_SHIFT)) { 344 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 345 ret = -EIO; 346 goto unlock; 347 } 348 349 spin_lock_irqsave(&zone->wp_lock, flags); 350 zone->cond = BLK_ZONE_COND_FULL; 351 zone->wp = ULLONG_MAX; 352 clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 353 spin_unlock_irqrestore(&zone->wp_lock, flags); 354 355 unlock: 356 mutex_unlock(&zone->lock); 357 358 return ret; 359 } 360 361 static void zloop_put_cmd(struct zloop_cmd *cmd) 362 { 363 struct request *rq = blk_mq_rq_from_pdu(cmd); 364 365 if (!atomic_dec_and_test(&cmd->ref)) 366 return; 367 kfree(cmd->bvec); 368 cmd->bvec = NULL; 369 if (likely(!blk_should_fake_timeout(rq->q))) 370 blk_mq_complete_request(rq); 371 } 372 373 static void zloop_rw_complete(struct kiocb *iocb, long ret) 374 { 375 struct zloop_cmd *cmd = container_of(iocb, struct zloop_cmd, iocb); 376 377 cmd->ret = ret; 378 zloop_put_cmd(cmd); 379 } 380 381 static void zloop_rw(struct zloop_cmd *cmd) 382 { 383 struct request *rq = blk_mq_rq_from_pdu(cmd); 384 struct zloop_device *zlo = rq->q->queuedata; 385 unsigned int zone_no = rq_zone_no(rq); 386 sector_t sector = blk_rq_pos(rq); 387 sector_t nr_sectors = blk_rq_sectors(rq); 388 bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND; 389 bool is_write = req_op(rq) == REQ_OP_WRITE || is_append; 390 int rw = is_write ? ITER_SOURCE : ITER_DEST; 391 struct req_iterator rq_iter; 392 struct zloop_zone *zone; 393 struct iov_iter iter; 394 struct bio_vec tmp; 395 unsigned long flags; 396 sector_t zone_end; 397 int nr_bvec = 0; 398 int ret; 399 400 atomic_set(&cmd->ref, 2); 401 cmd->sector = sector; 402 cmd->nr_sectors = nr_sectors; 403 cmd->ret = 0; 404 405 if (WARN_ON_ONCE(is_append && !zlo->zone_append)) { 406 ret = -EIO; 407 goto out; 408 } 409 410 /* We should never get an I/O beyond the device capacity. */ 411 if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) { 412 ret = -EIO; 413 goto out; 414 } 415 zone = &zlo->zones[zone_no]; 416 zone_end = zone->start + zlo->zone_capacity; 417 418 /* 419 * The block layer should never send requests that are not fully 420 * contained within the zone. 421 */ 422 if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) { 423 ret = -EIO; 424 goto out; 425 } 426 427 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 428 mutex_lock(&zone->lock); 429 ret = zloop_update_seq_zone(zlo, zone_no); 430 mutex_unlock(&zone->lock); 431 if (ret) 432 goto out; 433 } 434 435 if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) { 436 mutex_lock(&zone->lock); 437 438 spin_lock_irqsave(&zone->wp_lock, flags); 439 440 /* 441 * Zone append operations always go at the current write 442 * pointer, but regular write operations must already be 443 * aligned to the write pointer when submitted. 444 */ 445 if (is_append) { 446 /* 447 * If ordered zone append is in use, we already checked 448 * and set the target sector in zloop_queue_rq(). 449 */ 450 if (!zlo->ordered_zone_append) { 451 if (zone->cond == BLK_ZONE_COND_FULL) { 452 spin_unlock_irqrestore(&zone->wp_lock, 453 flags); 454 ret = -EIO; 455 goto unlock; 456 } 457 sector = zone->wp; 458 } 459 cmd->sector = sector; 460 } else if (sector != zone->wp) { 461 spin_unlock_irqrestore(&zone->wp_lock, flags); 462 pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n", 463 zone_no, sector, zone->wp); 464 ret = -EIO; 465 goto unlock; 466 } 467 468 /* Implicitly open the target zone. */ 469 if (zone->cond == BLK_ZONE_COND_CLOSED || 470 zone->cond == BLK_ZONE_COND_EMPTY) 471 zone->cond = BLK_ZONE_COND_IMP_OPEN; 472 473 /* 474 * Advance the write pointer, unless ordered zone append is in 475 * use. If the write fails, the write pointer position will be 476 * corrected when the next I/O starts execution. 477 */ 478 if (!is_append || !zlo->ordered_zone_append) { 479 zone->wp += nr_sectors; 480 if (zone->wp == zone_end) { 481 zone->cond = BLK_ZONE_COND_FULL; 482 zone->wp = ULLONG_MAX; 483 } 484 } 485 486 spin_unlock_irqrestore(&zone->wp_lock, flags); 487 } 488 489 rq_for_each_bvec(tmp, rq, rq_iter) 490 nr_bvec++; 491 492 if (rq->bio != rq->biotail) { 493 struct bio_vec *bvec; 494 495 cmd->bvec = kmalloc_array(nr_bvec, sizeof(*cmd->bvec), GFP_NOIO); 496 if (!cmd->bvec) { 497 ret = -EIO; 498 goto unlock; 499 } 500 501 /* 502 * The bios of the request may be started from the middle of 503 * the 'bvec' because of bio splitting, so we can't directly 504 * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec 505 * API will take care of all details for us. 506 */ 507 bvec = cmd->bvec; 508 rq_for_each_bvec(tmp, rq, rq_iter) { 509 *bvec = tmp; 510 bvec++; 511 } 512 iov_iter_bvec(&iter, rw, cmd->bvec, nr_bvec, blk_rq_bytes(rq)); 513 } else { 514 /* 515 * Same here, this bio may be started from the middle of the 516 * 'bvec' because of bio splitting, so offset from the bvec 517 * must be passed to iov iterator 518 */ 519 iov_iter_bvec(&iter, rw, 520 __bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter), 521 nr_bvec, blk_rq_bytes(rq)); 522 iter.iov_offset = rq->bio->bi_iter.bi_bvec_done; 523 } 524 525 cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT; 526 cmd->iocb.ki_filp = zone->file; 527 cmd->iocb.ki_complete = zloop_rw_complete; 528 if (!zlo->buffered_io) 529 cmd->iocb.ki_flags = IOCB_DIRECT; 530 cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); 531 532 if (rw == ITER_SOURCE) 533 ret = zone->file->f_op->write_iter(&cmd->iocb, &iter); 534 else 535 ret = zone->file->f_op->read_iter(&cmd->iocb, &iter); 536 unlock: 537 if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) 538 mutex_unlock(&zone->lock); 539 out: 540 if (ret != -EIOCBQUEUED) 541 zloop_rw_complete(&cmd->iocb, ret); 542 zloop_put_cmd(cmd); 543 } 544 545 static void zloop_handle_cmd(struct zloop_cmd *cmd) 546 { 547 struct request *rq = blk_mq_rq_from_pdu(cmd); 548 struct zloop_device *zlo = rq->q->queuedata; 549 550 switch (req_op(rq)) { 551 case REQ_OP_READ: 552 case REQ_OP_WRITE: 553 case REQ_OP_ZONE_APPEND: 554 /* 555 * zloop_rw() always executes asynchronously or completes 556 * directly. 557 */ 558 zloop_rw(cmd); 559 return; 560 case REQ_OP_FLUSH: 561 /* 562 * Sync the entire FS containing the zone files instead of 563 * walking all files 564 */ 565 cmd->ret = sync_filesystem(file_inode(zlo->data_dir)->i_sb); 566 break; 567 case REQ_OP_ZONE_RESET: 568 cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq)); 569 break; 570 case REQ_OP_ZONE_RESET_ALL: 571 cmd->ret = zloop_reset_all_zones(zlo); 572 break; 573 case REQ_OP_ZONE_FINISH: 574 cmd->ret = zloop_finish_zone(zlo, rq_zone_no(rq)); 575 break; 576 case REQ_OP_ZONE_OPEN: 577 cmd->ret = zloop_open_zone(zlo, rq_zone_no(rq)); 578 break; 579 case REQ_OP_ZONE_CLOSE: 580 cmd->ret = zloop_close_zone(zlo, rq_zone_no(rq)); 581 break; 582 default: 583 WARN_ON_ONCE(1); 584 pr_err("Unsupported operation %d\n", req_op(rq)); 585 cmd->ret = -EOPNOTSUPP; 586 break; 587 } 588 589 blk_mq_complete_request(rq); 590 } 591 592 static void zloop_cmd_workfn(struct work_struct *work) 593 { 594 struct zloop_cmd *cmd = container_of(work, struct zloop_cmd, work); 595 int orig_flags = current->flags; 596 597 current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; 598 zloop_handle_cmd(cmd); 599 current->flags = orig_flags; 600 } 601 602 static void zloop_complete_rq(struct request *rq) 603 { 604 struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq); 605 struct zloop_device *zlo = rq->q->queuedata; 606 unsigned int zone_no = cmd->sector >> zlo->zone_shift; 607 struct zloop_zone *zone = &zlo->zones[zone_no]; 608 blk_status_t sts = BLK_STS_OK; 609 610 switch (req_op(rq)) { 611 case REQ_OP_READ: 612 if (cmd->ret < 0) 613 pr_err("Zone %u: failed read sector %llu, %llu sectors\n", 614 zone_no, cmd->sector, cmd->nr_sectors); 615 616 if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) { 617 /* short read */ 618 struct bio *bio; 619 620 __rq_for_each_bio(bio, rq) 621 zero_fill_bio(bio); 622 } 623 break; 624 case REQ_OP_WRITE: 625 case REQ_OP_ZONE_APPEND: 626 if (cmd->ret < 0) 627 pr_err("Zone %u: failed %swrite sector %llu, %llu sectors\n", 628 zone_no, 629 req_op(rq) == REQ_OP_WRITE ? "" : "append ", 630 cmd->sector, cmd->nr_sectors); 631 632 if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) { 633 pr_err("Zone %u: partial write %ld/%u B\n", 634 zone_no, cmd->ret, blk_rq_bytes(rq)); 635 cmd->ret = -EIO; 636 } 637 638 if (cmd->ret < 0 && !test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { 639 /* 640 * A write to a sequential zone file failed: mark the 641 * zone as having an error. This will be corrected and 642 * cleared when the next IO is submitted. 643 */ 644 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 645 break; 646 } 647 if (req_op(rq) == REQ_OP_ZONE_APPEND) 648 rq->__sector = cmd->sector; 649 650 break; 651 default: 652 break; 653 } 654 655 if (cmd->ret < 0) 656 sts = errno_to_blk_status(cmd->ret); 657 blk_mq_end_request(rq, sts); 658 } 659 660 static bool zloop_set_zone_append_sector(struct request *rq) 661 { 662 struct zloop_device *zlo = rq->q->queuedata; 663 unsigned int zone_no = rq_zone_no(rq); 664 struct zloop_zone *zone = &zlo->zones[zone_no]; 665 sector_t zone_end = zone->start + zlo->zone_capacity; 666 sector_t nr_sectors = blk_rq_sectors(rq); 667 unsigned long flags; 668 669 spin_lock_irqsave(&zone->wp_lock, flags); 670 671 if (zone->cond == BLK_ZONE_COND_FULL || 672 zone->wp + nr_sectors > zone_end) { 673 spin_unlock_irqrestore(&zone->wp_lock, flags); 674 return false; 675 } 676 677 rq->__sector = zone->wp; 678 zone->wp += blk_rq_sectors(rq); 679 if (zone->wp >= zone_end) { 680 zone->cond = BLK_ZONE_COND_FULL; 681 zone->wp = ULLONG_MAX; 682 } 683 684 spin_unlock_irqrestore(&zone->wp_lock, flags); 685 686 return true; 687 } 688 689 static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx, 690 const struct blk_mq_queue_data *bd) 691 { 692 struct request *rq = bd->rq; 693 struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq); 694 struct zloop_device *zlo = rq->q->queuedata; 695 696 if (zlo->state == Zlo_deleting) 697 return BLK_STS_IOERR; 698 699 /* 700 * If we need to strongly order zone append operations, set the request 701 * sector to the zone write pointer location now instead of when the 702 * command work runs. 703 */ 704 if (zlo->ordered_zone_append && req_op(rq) == REQ_OP_ZONE_APPEND) { 705 if (!zloop_set_zone_append_sector(rq)) 706 return BLK_STS_IOERR; 707 } 708 709 blk_mq_start_request(rq); 710 711 INIT_WORK(&cmd->work, zloop_cmd_workfn); 712 queue_work(zlo->workqueue, &cmd->work); 713 714 return BLK_STS_OK; 715 } 716 717 static const struct blk_mq_ops zloop_mq_ops = { 718 .queue_rq = zloop_queue_rq, 719 .complete = zloop_complete_rq, 720 }; 721 722 static int zloop_open(struct gendisk *disk, blk_mode_t mode) 723 { 724 struct zloop_device *zlo = disk->private_data; 725 int ret; 726 727 ret = mutex_lock_killable(&zloop_ctl_mutex); 728 if (ret) 729 return ret; 730 731 if (zlo->state != Zlo_live) 732 ret = -ENXIO; 733 mutex_unlock(&zloop_ctl_mutex); 734 return ret; 735 } 736 737 static int zloop_report_zones(struct gendisk *disk, sector_t sector, 738 unsigned int nr_zones, struct blk_report_zones_args *args) 739 { 740 struct zloop_device *zlo = disk->private_data; 741 struct blk_zone blkz = {}; 742 unsigned int first, i; 743 unsigned long flags; 744 int ret; 745 746 first = disk_zone_no(disk, sector); 747 if (first >= zlo->nr_zones) 748 return 0; 749 nr_zones = min(nr_zones, zlo->nr_zones - first); 750 751 for (i = 0; i < nr_zones; i++) { 752 unsigned int zone_no = first + i; 753 struct zloop_zone *zone = &zlo->zones[zone_no]; 754 755 mutex_lock(&zone->lock); 756 757 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 758 ret = zloop_update_seq_zone(zlo, zone_no); 759 if (ret) { 760 mutex_unlock(&zone->lock); 761 return ret; 762 } 763 } 764 765 blkz.start = zone->start; 766 blkz.len = zlo->zone_size; 767 spin_lock_irqsave(&zone->wp_lock, flags); 768 blkz.wp = zone->wp; 769 spin_unlock_irqrestore(&zone->wp_lock, flags); 770 blkz.cond = zone->cond; 771 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { 772 blkz.type = BLK_ZONE_TYPE_CONVENTIONAL; 773 blkz.capacity = zlo->zone_size; 774 } else { 775 blkz.type = BLK_ZONE_TYPE_SEQWRITE_REQ; 776 blkz.capacity = zlo->zone_capacity; 777 } 778 779 mutex_unlock(&zone->lock); 780 781 ret = disk_report_zone(disk, &blkz, i, args); 782 if (ret) 783 return ret; 784 } 785 786 return nr_zones; 787 } 788 789 static void zloop_free_disk(struct gendisk *disk) 790 { 791 struct zloop_device *zlo = disk->private_data; 792 unsigned int i; 793 794 blk_mq_free_tag_set(&zlo->tag_set); 795 796 for (i = 0; i < zlo->nr_zones; i++) { 797 struct zloop_zone *zone = &zlo->zones[i]; 798 799 mapping_set_gfp_mask(zone->file->f_mapping, 800 zone->old_gfp_mask); 801 fput(zone->file); 802 } 803 804 fput(zlo->data_dir); 805 destroy_workqueue(zlo->workqueue); 806 kfree(zlo->base_dir); 807 kvfree(zlo); 808 } 809 810 static const struct block_device_operations zloop_fops = { 811 .owner = THIS_MODULE, 812 .open = zloop_open, 813 .report_zones = zloop_report_zones, 814 .free_disk = zloop_free_disk, 815 }; 816 817 __printf(3, 4) 818 static struct file *zloop_filp_open_fmt(int oflags, umode_t mode, 819 const char *fmt, ...) 820 { 821 struct file *file; 822 va_list ap; 823 char *p; 824 825 va_start(ap, fmt); 826 p = kvasprintf(GFP_KERNEL, fmt, ap); 827 va_end(ap); 828 829 if (!p) 830 return ERR_PTR(-ENOMEM); 831 file = filp_open(p, oflags, mode); 832 kfree(p); 833 return file; 834 } 835 836 static int zloop_get_block_size(struct zloop_device *zlo, 837 struct zloop_zone *zone) 838 { 839 struct block_device *sb_bdev = zone->file->f_mapping->host->i_sb->s_bdev; 840 struct kstat st; 841 842 /* 843 * If the FS block size is lower than or equal to 4K, use that as the 844 * device block size. Otherwise, fallback to the FS direct IO alignment 845 * constraint if that is provided, and to the FS underlying device 846 * physical block size if the direct IO alignment is unknown. 847 */ 848 if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K) 849 zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize; 850 else if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) && 851 (st.result_mask & STATX_DIOALIGN)) 852 zlo->block_size = st.dio_offset_align; 853 else if (sb_bdev) 854 zlo->block_size = bdev_physical_block_size(sb_bdev); 855 else 856 zlo->block_size = SECTOR_SIZE; 857 858 if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - 1)) { 859 pr_err("Zone capacity is not aligned to block size %u\n", 860 zlo->block_size); 861 return -EINVAL; 862 } 863 864 return 0; 865 } 866 867 static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts, 868 unsigned int zone_no, bool restore) 869 { 870 struct zloop_zone *zone = &zlo->zones[zone_no]; 871 int oflags = O_RDWR; 872 struct kstat stat; 873 sector_t file_sectors; 874 int ret; 875 876 mutex_init(&zone->lock); 877 spin_lock_init(&zone->wp_lock); 878 zone->start = (sector_t)zone_no << zlo->zone_shift; 879 880 if (!restore) 881 oflags |= O_CREAT; 882 883 if (!opts->buffered_io) 884 oflags |= O_DIRECT; 885 886 if (zone_no < zlo->nr_conv_zones) { 887 /* Conventional zone file. */ 888 set_bit(ZLOOP_ZONE_CONV, &zone->flags); 889 zone->cond = BLK_ZONE_COND_NOT_WP; 890 zone->wp = U64_MAX; 891 892 zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/cnv-%06u", 893 zlo->base_dir, zlo->id, zone_no); 894 if (IS_ERR(zone->file)) { 895 pr_err("Failed to open zone %u file %s/%u/cnv-%06u (err=%ld)", 896 zone_no, zlo->base_dir, zlo->id, zone_no, 897 PTR_ERR(zone->file)); 898 return PTR_ERR(zone->file); 899 } 900 901 if (!zlo->block_size) { 902 ret = zloop_get_block_size(zlo, zone); 903 if (ret) 904 return ret; 905 } 906 907 ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0); 908 if (ret < 0) { 909 pr_err("Failed to get zone %u file stat\n", zone_no); 910 return ret; 911 } 912 file_sectors = stat.size >> SECTOR_SHIFT; 913 914 if (restore && file_sectors != zlo->zone_size) { 915 pr_err("Invalid conventional zone %u file size (%llu sectors != %llu)\n", 916 zone_no, file_sectors, zlo->zone_capacity); 917 return ret; 918 } 919 920 ret = vfs_truncate(&zone->file->f_path, 921 zlo->zone_size << SECTOR_SHIFT); 922 if (ret < 0) { 923 pr_err("Failed to truncate zone %u file (err=%d)\n", 924 zone_no, ret); 925 return ret; 926 } 927 928 return 0; 929 } 930 931 /* Sequential zone file. */ 932 zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/seq-%06u", 933 zlo->base_dir, zlo->id, zone_no); 934 if (IS_ERR(zone->file)) { 935 pr_err("Failed to open zone %u file %s/%u/seq-%06u (err=%ld)", 936 zone_no, zlo->base_dir, zlo->id, zone_no, 937 PTR_ERR(zone->file)); 938 return PTR_ERR(zone->file); 939 } 940 941 if (!zlo->block_size) { 942 ret = zloop_get_block_size(zlo, zone); 943 if (ret) 944 return ret; 945 } 946 947 zloop_get_block_size(zlo, zone); 948 949 mutex_lock(&zone->lock); 950 ret = zloop_update_seq_zone(zlo, zone_no); 951 mutex_unlock(&zone->lock); 952 953 return ret; 954 } 955 956 static bool zloop_dev_exists(struct zloop_device *zlo) 957 { 958 struct file *cnv, *seq; 959 bool exists; 960 961 cnv = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/cnv-%06u", 962 zlo->base_dir, zlo->id, 0); 963 seq = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/seq-%06u", 964 zlo->base_dir, zlo->id, 0); 965 exists = !IS_ERR(cnv) || !IS_ERR(seq); 966 967 if (!IS_ERR(cnv)) 968 fput(cnv); 969 if (!IS_ERR(seq)) 970 fput(seq); 971 972 return exists; 973 } 974 975 static int zloop_ctl_add(struct zloop_options *opts) 976 { 977 struct queue_limits lim = { 978 .max_hw_sectors = SZ_1M >> SECTOR_SHIFT, 979 .chunk_sectors = opts->zone_size, 980 .features = BLK_FEAT_ZONED, 981 }; 982 unsigned int nr_zones, i, j; 983 struct zloop_device *zlo; 984 int ret = -EINVAL; 985 bool restore; 986 987 __module_get(THIS_MODULE); 988 989 nr_zones = opts->capacity >> ilog2(opts->zone_size); 990 if (opts->nr_conv_zones >= nr_zones) { 991 pr_err("Invalid number of conventional zones %u\n", 992 opts->nr_conv_zones); 993 goto out; 994 } 995 996 zlo = kvzalloc(struct_size(zlo, zones, nr_zones), GFP_KERNEL); 997 if (!zlo) { 998 ret = -ENOMEM; 999 goto out; 1000 } 1001 zlo->state = Zlo_creating; 1002 1003 ret = mutex_lock_killable(&zloop_ctl_mutex); 1004 if (ret) 1005 goto out_free_dev; 1006 1007 /* Allocate id, if @opts->id >= 0, we're requesting that specific id */ 1008 if (opts->id >= 0) { 1009 ret = idr_alloc(&zloop_index_idr, zlo, 1010 opts->id, opts->id + 1, GFP_KERNEL); 1011 if (ret == -ENOSPC) 1012 ret = -EEXIST; 1013 } else { 1014 ret = idr_alloc(&zloop_index_idr, zlo, 0, 0, GFP_KERNEL); 1015 } 1016 mutex_unlock(&zloop_ctl_mutex); 1017 if (ret < 0) 1018 goto out_free_dev; 1019 1020 zlo->id = ret; 1021 zlo->zone_shift = ilog2(opts->zone_size); 1022 zlo->zone_size = opts->zone_size; 1023 if (opts->zone_capacity) 1024 zlo->zone_capacity = opts->zone_capacity; 1025 else 1026 zlo->zone_capacity = zlo->zone_size; 1027 zlo->nr_zones = nr_zones; 1028 zlo->nr_conv_zones = opts->nr_conv_zones; 1029 zlo->buffered_io = opts->buffered_io; 1030 zlo->zone_append = opts->zone_append; 1031 if (zlo->zone_append) 1032 zlo->ordered_zone_append = opts->ordered_zone_append; 1033 1034 zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE, 1035 opts->nr_queues * opts->queue_depth, zlo->id); 1036 if (!zlo->workqueue) { 1037 ret = -ENOMEM; 1038 goto out_free_idr; 1039 } 1040 1041 if (opts->base_dir) 1042 zlo->base_dir = kstrdup(opts->base_dir, GFP_KERNEL); 1043 else 1044 zlo->base_dir = kstrdup(ZLOOP_DEF_BASE_DIR, GFP_KERNEL); 1045 if (!zlo->base_dir) { 1046 ret = -ENOMEM; 1047 goto out_destroy_workqueue; 1048 } 1049 1050 zlo->data_dir = zloop_filp_open_fmt(O_RDONLY | O_DIRECTORY, 0, "%s/%u", 1051 zlo->base_dir, zlo->id); 1052 if (IS_ERR(zlo->data_dir)) { 1053 ret = PTR_ERR(zlo->data_dir); 1054 pr_warn("Failed to open directory %s/%u (err=%d)\n", 1055 zlo->base_dir, zlo->id, ret); 1056 goto out_free_base_dir; 1057 } 1058 1059 /* 1060 * If we already have zone files, we are restoring a device created by a 1061 * previous add operation. In this case, zloop_init_zone() will check 1062 * that the zone files are consistent with the zone configuration given. 1063 */ 1064 restore = zloop_dev_exists(zlo); 1065 for (i = 0; i < nr_zones; i++) { 1066 ret = zloop_init_zone(zlo, opts, i, restore); 1067 if (ret) 1068 goto out_close_files; 1069 } 1070 1071 lim.physical_block_size = zlo->block_size; 1072 lim.logical_block_size = zlo->block_size; 1073 if (zlo->zone_append) 1074 lim.max_hw_zone_append_sectors = lim.max_hw_sectors; 1075 1076 zlo->tag_set.ops = &zloop_mq_ops; 1077 zlo->tag_set.nr_hw_queues = opts->nr_queues; 1078 zlo->tag_set.queue_depth = opts->queue_depth; 1079 zlo->tag_set.numa_node = NUMA_NO_NODE; 1080 zlo->tag_set.cmd_size = sizeof(struct zloop_cmd); 1081 zlo->tag_set.driver_data = zlo; 1082 1083 ret = blk_mq_alloc_tag_set(&zlo->tag_set); 1084 if (ret) { 1085 pr_err("blk_mq_alloc_tag_set failed (err=%d)\n", ret); 1086 goto out_close_files; 1087 } 1088 1089 zlo->disk = blk_mq_alloc_disk(&zlo->tag_set, &lim, zlo); 1090 if (IS_ERR(zlo->disk)) { 1091 pr_err("blk_mq_alloc_disk failed (err=%d)\n", ret); 1092 ret = PTR_ERR(zlo->disk); 1093 goto out_cleanup_tags; 1094 } 1095 zlo->disk->flags = GENHD_FL_NO_PART; 1096 zlo->disk->fops = &zloop_fops; 1097 zlo->disk->private_data = zlo; 1098 sprintf(zlo->disk->disk_name, "zloop%d", zlo->id); 1099 set_capacity(zlo->disk, (u64)lim.chunk_sectors * zlo->nr_zones); 1100 1101 ret = blk_revalidate_disk_zones(zlo->disk); 1102 if (ret) 1103 goto out_cleanup_disk; 1104 1105 ret = add_disk(zlo->disk); 1106 if (ret) { 1107 pr_err("add_disk failed (err=%d)\n", ret); 1108 goto out_cleanup_disk; 1109 } 1110 1111 mutex_lock(&zloop_ctl_mutex); 1112 zlo->state = Zlo_live; 1113 mutex_unlock(&zloop_ctl_mutex); 1114 1115 pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n", 1116 zlo->id, zlo->nr_zones, 1117 ((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20, 1118 zlo->block_size); 1119 pr_info("zloop%d: using %s%s zone append\n", 1120 zlo->id, 1121 zlo->ordered_zone_append ? "ordered " : "", 1122 zlo->zone_append ? "native" : "emulated"); 1123 1124 return 0; 1125 1126 out_cleanup_disk: 1127 put_disk(zlo->disk); 1128 out_cleanup_tags: 1129 blk_mq_free_tag_set(&zlo->tag_set); 1130 out_close_files: 1131 for (j = 0; j < i; j++) { 1132 struct zloop_zone *zone = &zlo->zones[j]; 1133 1134 if (!IS_ERR_OR_NULL(zone->file)) 1135 fput(zone->file); 1136 } 1137 fput(zlo->data_dir); 1138 out_free_base_dir: 1139 kfree(zlo->base_dir); 1140 out_destroy_workqueue: 1141 destroy_workqueue(zlo->workqueue); 1142 out_free_idr: 1143 mutex_lock(&zloop_ctl_mutex); 1144 idr_remove(&zloop_index_idr, zlo->id); 1145 mutex_unlock(&zloop_ctl_mutex); 1146 out_free_dev: 1147 kvfree(zlo); 1148 out: 1149 module_put(THIS_MODULE); 1150 if (ret == -ENOENT) 1151 ret = -EINVAL; 1152 return ret; 1153 } 1154 1155 static int zloop_ctl_remove(struct zloop_options *opts) 1156 { 1157 struct zloop_device *zlo; 1158 int ret; 1159 1160 if (!(opts->mask & ZLOOP_OPT_ID)) { 1161 pr_err("No ID specified\n"); 1162 return -EINVAL; 1163 } 1164 1165 ret = mutex_lock_killable(&zloop_ctl_mutex); 1166 if (ret) 1167 return ret; 1168 1169 zlo = idr_find(&zloop_index_idr, opts->id); 1170 if (!zlo || zlo->state == Zlo_creating) { 1171 ret = -ENODEV; 1172 } else if (zlo->state == Zlo_deleting) { 1173 ret = -EINVAL; 1174 } else { 1175 idr_remove(&zloop_index_idr, zlo->id); 1176 zlo->state = Zlo_deleting; 1177 } 1178 1179 mutex_unlock(&zloop_ctl_mutex); 1180 if (ret) 1181 return ret; 1182 1183 del_gendisk(zlo->disk); 1184 put_disk(zlo->disk); 1185 1186 pr_info("Removed device %d\n", opts->id); 1187 1188 module_put(THIS_MODULE); 1189 1190 return 0; 1191 } 1192 1193 static int zloop_parse_options(struct zloop_options *opts, const char *buf) 1194 { 1195 substring_t args[MAX_OPT_ARGS]; 1196 char *options, *o, *p; 1197 unsigned int token; 1198 int ret = 0; 1199 1200 /* Set defaults. */ 1201 opts->mask = 0; 1202 opts->id = ZLOOP_DEF_ID; 1203 opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES; 1204 opts->zone_size = ZLOOP_DEF_ZONE_SIZE; 1205 opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES; 1206 opts->nr_queues = ZLOOP_DEF_NR_QUEUES; 1207 opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH; 1208 opts->buffered_io = ZLOOP_DEF_BUFFERED_IO; 1209 opts->zone_append = ZLOOP_DEF_ZONE_APPEND; 1210 opts->ordered_zone_append = ZLOOP_DEF_ORDERED_ZONE_APPEND; 1211 1212 if (!buf) 1213 return 0; 1214 1215 /* Skip leading spaces before the options. */ 1216 while (isspace(*buf)) 1217 buf++; 1218 1219 options = o = kstrdup(buf, GFP_KERNEL); 1220 if (!options) 1221 return -ENOMEM; 1222 1223 /* Parse the options, doing only some light invalid value checks. */ 1224 while ((p = strsep(&o, ",\n")) != NULL) { 1225 if (!*p) 1226 continue; 1227 1228 token = match_token(p, zloop_opt_tokens, args); 1229 opts->mask |= token; 1230 switch (token) { 1231 case ZLOOP_OPT_ID: 1232 if (match_int(args, &opts->id)) { 1233 ret = -EINVAL; 1234 goto out; 1235 } 1236 break; 1237 case ZLOOP_OPT_CAPACITY: 1238 if (match_uint(args, &token)) { 1239 ret = -EINVAL; 1240 goto out; 1241 } 1242 if (!token) { 1243 pr_err("Invalid capacity\n"); 1244 ret = -EINVAL; 1245 goto out; 1246 } 1247 opts->capacity = 1248 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; 1249 break; 1250 case ZLOOP_OPT_ZONE_SIZE: 1251 if (match_uint(args, &token)) { 1252 ret = -EINVAL; 1253 goto out; 1254 } 1255 if (!token || token > ZLOOP_MAX_ZONE_SIZE_MB || 1256 !is_power_of_2(token)) { 1257 pr_err("Invalid zone size %u\n", token); 1258 ret = -EINVAL; 1259 goto out; 1260 } 1261 opts->zone_size = 1262 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; 1263 break; 1264 case ZLOOP_OPT_ZONE_CAPACITY: 1265 if (match_uint(args, &token)) { 1266 ret = -EINVAL; 1267 goto out; 1268 } 1269 if (!token) { 1270 pr_err("Invalid zone capacity\n"); 1271 ret = -EINVAL; 1272 goto out; 1273 } 1274 opts->zone_capacity = 1275 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; 1276 break; 1277 case ZLOOP_OPT_NR_CONV_ZONES: 1278 if (match_uint(args, &token)) { 1279 ret = -EINVAL; 1280 goto out; 1281 } 1282 opts->nr_conv_zones = token; 1283 break; 1284 case ZLOOP_OPT_BASE_DIR: 1285 p = match_strdup(args); 1286 if (!p) { 1287 ret = -ENOMEM; 1288 goto out; 1289 } 1290 kfree(opts->base_dir); 1291 opts->base_dir = p; 1292 break; 1293 case ZLOOP_OPT_NR_QUEUES: 1294 if (match_uint(args, &token)) { 1295 ret = -EINVAL; 1296 goto out; 1297 } 1298 if (!token) { 1299 pr_err("Invalid number of queues\n"); 1300 ret = -EINVAL; 1301 goto out; 1302 } 1303 opts->nr_queues = min(token, num_online_cpus()); 1304 break; 1305 case ZLOOP_OPT_QUEUE_DEPTH: 1306 if (match_uint(args, &token)) { 1307 ret = -EINVAL; 1308 goto out; 1309 } 1310 if (!token) { 1311 pr_err("Invalid queue depth\n"); 1312 ret = -EINVAL; 1313 goto out; 1314 } 1315 opts->queue_depth = token; 1316 break; 1317 case ZLOOP_OPT_BUFFERED_IO: 1318 opts->buffered_io = true; 1319 break; 1320 case ZLOOP_OPT_ZONE_APPEND: 1321 if (match_uint(args, &token)) { 1322 ret = -EINVAL; 1323 goto out; 1324 } 1325 if (token != 0 && token != 1) { 1326 pr_err("Invalid zone_append value\n"); 1327 ret = -EINVAL; 1328 goto out; 1329 } 1330 opts->zone_append = token; 1331 break; 1332 case ZLOOP_OPT_ORDERED_ZONE_APPEND: 1333 opts->ordered_zone_append = true; 1334 break; 1335 case ZLOOP_OPT_ERR: 1336 default: 1337 pr_warn("unknown parameter or missing value '%s'\n", p); 1338 ret = -EINVAL; 1339 goto out; 1340 } 1341 } 1342 1343 ret = -EINVAL; 1344 if (opts->capacity <= opts->zone_size) { 1345 pr_err("Invalid capacity\n"); 1346 goto out; 1347 } 1348 1349 if (opts->zone_capacity > opts->zone_size) { 1350 pr_err("Invalid zone capacity\n"); 1351 goto out; 1352 } 1353 1354 ret = 0; 1355 out: 1356 kfree(options); 1357 return ret; 1358 } 1359 1360 enum { 1361 ZLOOP_CTL_ADD, 1362 ZLOOP_CTL_REMOVE, 1363 }; 1364 1365 static struct zloop_ctl_op { 1366 int code; 1367 const char *name; 1368 } zloop_ctl_ops[] = { 1369 { ZLOOP_CTL_ADD, "add" }, 1370 { ZLOOP_CTL_REMOVE, "remove" }, 1371 { -1, NULL }, 1372 }; 1373 1374 static ssize_t zloop_ctl_write(struct file *file, const char __user *ubuf, 1375 size_t count, loff_t *pos) 1376 { 1377 struct zloop_options opts = { }; 1378 struct zloop_ctl_op *op; 1379 const char *buf, *opts_buf; 1380 int i, ret; 1381 1382 if (count > PAGE_SIZE) 1383 return -ENOMEM; 1384 1385 buf = memdup_user_nul(ubuf, count); 1386 if (IS_ERR(buf)) 1387 return PTR_ERR(buf); 1388 1389 for (i = 0; i < ARRAY_SIZE(zloop_ctl_ops); i++) { 1390 op = &zloop_ctl_ops[i]; 1391 if (!op->name) { 1392 pr_err("Invalid operation\n"); 1393 ret = -EINVAL; 1394 goto out; 1395 } 1396 if (!strncmp(buf, op->name, strlen(op->name))) 1397 break; 1398 } 1399 1400 if (count <= strlen(op->name)) 1401 opts_buf = NULL; 1402 else 1403 opts_buf = buf + strlen(op->name); 1404 1405 ret = zloop_parse_options(&opts, opts_buf); 1406 if (ret) { 1407 pr_err("Failed to parse options\n"); 1408 goto out; 1409 } 1410 1411 switch (op->code) { 1412 case ZLOOP_CTL_ADD: 1413 ret = zloop_ctl_add(&opts); 1414 break; 1415 case ZLOOP_CTL_REMOVE: 1416 ret = zloop_ctl_remove(&opts); 1417 break; 1418 default: 1419 pr_err("Invalid operation\n"); 1420 ret = -EINVAL; 1421 goto out; 1422 } 1423 1424 out: 1425 kfree(opts.base_dir); 1426 kfree(buf); 1427 return ret ? ret : count; 1428 } 1429 1430 static int zloop_ctl_show(struct seq_file *seq_file, void *private) 1431 { 1432 const struct match_token *tok; 1433 int i; 1434 1435 /* Add operation */ 1436 seq_printf(seq_file, "%s ", zloop_ctl_ops[0].name); 1437 for (i = 0; i < ARRAY_SIZE(zloop_opt_tokens); i++) { 1438 tok = &zloop_opt_tokens[i]; 1439 if (!tok->pattern) 1440 break; 1441 if (i) 1442 seq_putc(seq_file, ','); 1443 seq_puts(seq_file, tok->pattern); 1444 } 1445 seq_putc(seq_file, '\n'); 1446 1447 /* Remove operation */ 1448 seq_puts(seq_file, zloop_ctl_ops[1].name); 1449 seq_puts(seq_file, " id=%d\n"); 1450 1451 return 0; 1452 } 1453 1454 static int zloop_ctl_open(struct inode *inode, struct file *file) 1455 { 1456 file->private_data = NULL; 1457 return single_open(file, zloop_ctl_show, NULL); 1458 } 1459 1460 static int zloop_ctl_release(struct inode *inode, struct file *file) 1461 { 1462 return single_release(inode, file); 1463 } 1464 1465 static const struct file_operations zloop_ctl_fops = { 1466 .owner = THIS_MODULE, 1467 .open = zloop_ctl_open, 1468 .release = zloop_ctl_release, 1469 .write = zloop_ctl_write, 1470 .read = seq_read, 1471 }; 1472 1473 static struct miscdevice zloop_misc = { 1474 .minor = MISC_DYNAMIC_MINOR, 1475 .name = "zloop-control", 1476 .fops = &zloop_ctl_fops, 1477 }; 1478 1479 static int __init zloop_init(void) 1480 { 1481 int ret; 1482 1483 ret = misc_register(&zloop_misc); 1484 if (ret) { 1485 pr_err("Failed to register misc device: %d\n", ret); 1486 return ret; 1487 } 1488 pr_info("Module loaded\n"); 1489 1490 return 0; 1491 } 1492 1493 static void __exit zloop_exit(void) 1494 { 1495 misc_deregister(&zloop_misc); 1496 idr_destroy(&zloop_index_idr); 1497 } 1498 1499 module_init(zloop_init); 1500 module_exit(zloop_exit); 1501 1502 MODULE_DESCRIPTION("Zoned loopback device"); 1503 MODULE_LICENSE("GPL"); 1504