1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2025, Christoph Hellwig. 4 * Copyright (c) 2025, Western Digital Corporation or its affiliates. 5 * 6 * Zoned Loop Device driver - exports a zoned block device using one file per 7 * zone as backing storage. 8 */ 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/module.h> 12 #include <linux/blk-mq.h> 13 #include <linux/blkzoned.h> 14 #include <linux/pagemap.h> 15 #include <linux/miscdevice.h> 16 #include <linux/falloc.h> 17 #include <linux/mutex.h> 18 #include <linux/parser.h> 19 #include <linux/seq_file.h> 20 21 /* 22 * Options for adding (and removing) a device. 23 */ 24 enum { 25 ZLOOP_OPT_ERR = 0, 26 ZLOOP_OPT_ID = (1 << 0), 27 ZLOOP_OPT_CAPACITY = (1 << 1), 28 ZLOOP_OPT_ZONE_SIZE = (1 << 2), 29 ZLOOP_OPT_ZONE_CAPACITY = (1 << 3), 30 ZLOOP_OPT_NR_CONV_ZONES = (1 << 4), 31 ZLOOP_OPT_BASE_DIR = (1 << 5), 32 ZLOOP_OPT_NR_QUEUES = (1 << 6), 33 ZLOOP_OPT_QUEUE_DEPTH = (1 << 7), 34 ZLOOP_OPT_BUFFERED_IO = (1 << 8), 35 ZLOOP_OPT_ZONE_APPEND = (1 << 9), 36 ZLOOP_OPT_ORDERED_ZONE_APPEND = (1 << 10), 37 }; 38 39 static const match_table_t zloop_opt_tokens = { 40 { ZLOOP_OPT_ID, "id=%d" }, 41 { ZLOOP_OPT_CAPACITY, "capacity_mb=%u" }, 42 { ZLOOP_OPT_ZONE_SIZE, "zone_size_mb=%u" }, 43 { ZLOOP_OPT_ZONE_CAPACITY, "zone_capacity_mb=%u" }, 44 { ZLOOP_OPT_NR_CONV_ZONES, "conv_zones=%u" }, 45 { ZLOOP_OPT_BASE_DIR, "base_dir=%s" }, 46 { ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" }, 47 { ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" }, 48 { ZLOOP_OPT_BUFFERED_IO, "buffered_io" }, 49 { ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" }, 50 { ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" }, 51 { ZLOOP_OPT_ERR, NULL } 52 }; 53 54 /* Default values for the "add" operation. */ 55 #define ZLOOP_DEF_ID -1 56 #define ZLOOP_DEF_ZONE_SIZE ((256ULL * SZ_1M) >> SECTOR_SHIFT) 57 #define ZLOOP_DEF_NR_ZONES 64 58 #define ZLOOP_DEF_NR_CONV_ZONES 8 59 #define ZLOOP_DEF_BASE_DIR "/var/local/zloop" 60 #define ZLOOP_DEF_NR_QUEUES 1 61 #define ZLOOP_DEF_QUEUE_DEPTH 128 62 #define ZLOOP_DEF_BUFFERED_IO false 63 #define ZLOOP_DEF_ZONE_APPEND true 64 #define ZLOOP_DEF_ORDERED_ZONE_APPEND false 65 66 /* Arbitrary limit on the zone size (16GB). */ 67 #define ZLOOP_MAX_ZONE_SIZE_MB 16384 68 69 struct zloop_options { 70 unsigned int mask; 71 int id; 72 sector_t capacity; 73 sector_t zone_size; 74 sector_t zone_capacity; 75 unsigned int nr_conv_zones; 76 char *base_dir; 77 unsigned int nr_queues; 78 unsigned int queue_depth; 79 bool buffered_io; 80 bool zone_append; 81 bool ordered_zone_append; 82 }; 83 84 /* 85 * Device states. 86 */ 87 enum { 88 Zlo_creating = 0, 89 Zlo_live, 90 Zlo_deleting, 91 }; 92 93 enum zloop_zone_flags { 94 ZLOOP_ZONE_CONV = 0, 95 ZLOOP_ZONE_SEQ_ERROR, 96 }; 97 98 struct zloop_zone { 99 struct file *file; 100 101 unsigned long flags; 102 struct mutex lock; 103 spinlock_t wp_lock; 104 enum blk_zone_cond cond; 105 sector_t start; 106 sector_t wp; 107 108 gfp_t old_gfp_mask; 109 }; 110 111 struct zloop_device { 112 unsigned int id; 113 unsigned int state; 114 115 struct blk_mq_tag_set tag_set; 116 struct gendisk *disk; 117 118 struct workqueue_struct *workqueue; 119 bool buffered_io; 120 bool zone_append; 121 bool ordered_zone_append; 122 123 const char *base_dir; 124 struct file *data_dir; 125 126 unsigned int zone_shift; 127 sector_t zone_size; 128 sector_t zone_capacity; 129 unsigned int nr_zones; 130 unsigned int nr_conv_zones; 131 unsigned int block_size; 132 133 struct zloop_zone zones[] __counted_by(nr_zones); 134 }; 135 136 struct zloop_cmd { 137 struct work_struct work; 138 atomic_t ref; 139 sector_t sector; 140 sector_t nr_sectors; 141 long ret; 142 struct kiocb iocb; 143 struct bio_vec *bvec; 144 }; 145 146 static DEFINE_IDR(zloop_index_idr); 147 static DEFINE_MUTEX(zloop_ctl_mutex); 148 149 static unsigned int rq_zone_no(struct request *rq) 150 { 151 struct zloop_device *zlo = rq->q->queuedata; 152 153 return blk_rq_pos(rq) >> zlo->zone_shift; 154 } 155 156 static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no) 157 { 158 struct zloop_zone *zone = &zlo->zones[zone_no]; 159 struct kstat stat; 160 sector_t file_sectors; 161 unsigned long flags; 162 int ret; 163 164 lockdep_assert_held(&zone->lock); 165 166 ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0); 167 if (ret < 0) { 168 pr_err("Failed to get zone %u file stat (err=%d)\n", 169 zone_no, ret); 170 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 171 return ret; 172 } 173 174 file_sectors = stat.size >> SECTOR_SHIFT; 175 if (file_sectors > zlo->zone_capacity) { 176 pr_err("Zone %u file too large (%llu sectors > %llu)\n", 177 zone_no, file_sectors, zlo->zone_capacity); 178 return -EINVAL; 179 } 180 181 if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) { 182 pr_err("Zone %u file size not aligned to block size %u\n", 183 zone_no, zlo->block_size); 184 return -EINVAL; 185 } 186 187 spin_lock_irqsave(&zone->wp_lock, flags); 188 if (!file_sectors) { 189 zone->cond = BLK_ZONE_COND_EMPTY; 190 zone->wp = zone->start; 191 } else if (file_sectors == zlo->zone_capacity) { 192 zone->cond = BLK_ZONE_COND_FULL; 193 zone->wp = ULLONG_MAX; 194 } else { 195 zone->cond = BLK_ZONE_COND_CLOSED; 196 zone->wp = zone->start + file_sectors; 197 } 198 spin_unlock_irqrestore(&zone->wp_lock, flags); 199 200 return 0; 201 } 202 203 static int zloop_open_zone(struct zloop_device *zlo, unsigned int zone_no) 204 { 205 struct zloop_zone *zone = &zlo->zones[zone_no]; 206 int ret = 0; 207 208 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 209 return -EIO; 210 211 mutex_lock(&zone->lock); 212 213 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 214 ret = zloop_update_seq_zone(zlo, zone_no); 215 if (ret) 216 goto unlock; 217 } 218 219 switch (zone->cond) { 220 case BLK_ZONE_COND_EXP_OPEN: 221 break; 222 case BLK_ZONE_COND_EMPTY: 223 case BLK_ZONE_COND_CLOSED: 224 case BLK_ZONE_COND_IMP_OPEN: 225 zone->cond = BLK_ZONE_COND_EXP_OPEN; 226 break; 227 case BLK_ZONE_COND_FULL: 228 default: 229 ret = -EIO; 230 break; 231 } 232 233 unlock: 234 mutex_unlock(&zone->lock); 235 236 return ret; 237 } 238 239 static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no) 240 { 241 struct zloop_zone *zone = &zlo->zones[zone_no]; 242 unsigned long flags; 243 int ret = 0; 244 245 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 246 return -EIO; 247 248 mutex_lock(&zone->lock); 249 250 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 251 ret = zloop_update_seq_zone(zlo, zone_no); 252 if (ret) 253 goto unlock; 254 } 255 256 switch (zone->cond) { 257 case BLK_ZONE_COND_CLOSED: 258 break; 259 case BLK_ZONE_COND_IMP_OPEN: 260 case BLK_ZONE_COND_EXP_OPEN: 261 spin_lock_irqsave(&zone->wp_lock, flags); 262 if (zone->wp == zone->start) 263 zone->cond = BLK_ZONE_COND_EMPTY; 264 else 265 zone->cond = BLK_ZONE_COND_CLOSED; 266 spin_unlock_irqrestore(&zone->wp_lock, flags); 267 break; 268 case BLK_ZONE_COND_EMPTY: 269 case BLK_ZONE_COND_FULL: 270 default: 271 ret = -EIO; 272 break; 273 } 274 275 unlock: 276 mutex_unlock(&zone->lock); 277 278 return ret; 279 } 280 281 static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no) 282 { 283 struct zloop_zone *zone = &zlo->zones[zone_no]; 284 unsigned long flags; 285 int ret = 0; 286 287 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 288 return -EIO; 289 290 mutex_lock(&zone->lock); 291 292 if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) && 293 zone->cond == BLK_ZONE_COND_EMPTY) 294 goto unlock; 295 296 if (vfs_truncate(&zone->file->f_path, 0)) { 297 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 298 ret = -EIO; 299 goto unlock; 300 } 301 302 spin_lock_irqsave(&zone->wp_lock, flags); 303 zone->cond = BLK_ZONE_COND_EMPTY; 304 zone->wp = zone->start; 305 clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 306 spin_unlock_irqrestore(&zone->wp_lock, flags); 307 308 unlock: 309 mutex_unlock(&zone->lock); 310 311 return ret; 312 } 313 314 static int zloop_reset_all_zones(struct zloop_device *zlo) 315 { 316 unsigned int i; 317 int ret; 318 319 for (i = zlo->nr_conv_zones; i < zlo->nr_zones; i++) { 320 ret = zloop_reset_zone(zlo, i); 321 if (ret) 322 return ret; 323 } 324 325 return 0; 326 } 327 328 static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no) 329 { 330 struct zloop_zone *zone = &zlo->zones[zone_no]; 331 unsigned long flags; 332 int ret = 0; 333 334 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) 335 return -EIO; 336 337 mutex_lock(&zone->lock); 338 339 if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) && 340 zone->cond == BLK_ZONE_COND_FULL) 341 goto unlock; 342 343 if (vfs_truncate(&zone->file->f_path, zlo->zone_size << SECTOR_SHIFT)) { 344 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 345 ret = -EIO; 346 goto unlock; 347 } 348 349 spin_lock_irqsave(&zone->wp_lock, flags); 350 zone->cond = BLK_ZONE_COND_FULL; 351 zone->wp = ULLONG_MAX; 352 clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 353 spin_unlock_irqrestore(&zone->wp_lock, flags); 354 355 unlock: 356 mutex_unlock(&zone->lock); 357 358 return ret; 359 } 360 361 static void zloop_put_cmd(struct zloop_cmd *cmd) 362 { 363 struct request *rq = blk_mq_rq_from_pdu(cmd); 364 365 if (!atomic_dec_and_test(&cmd->ref)) 366 return; 367 kfree(cmd->bvec); 368 cmd->bvec = NULL; 369 if (likely(!blk_should_fake_timeout(rq->q))) 370 blk_mq_complete_request(rq); 371 } 372 373 static void zloop_rw_complete(struct kiocb *iocb, long ret) 374 { 375 struct zloop_cmd *cmd = container_of(iocb, struct zloop_cmd, iocb); 376 377 cmd->ret = ret; 378 zloop_put_cmd(cmd); 379 } 380 381 static void zloop_rw(struct zloop_cmd *cmd) 382 { 383 struct request *rq = blk_mq_rq_from_pdu(cmd); 384 struct zloop_device *zlo = rq->q->queuedata; 385 unsigned int zone_no = rq_zone_no(rq); 386 sector_t sector = blk_rq_pos(rq); 387 sector_t nr_sectors = blk_rq_sectors(rq); 388 bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND; 389 bool is_write = req_op(rq) == REQ_OP_WRITE || is_append; 390 int rw = is_write ? ITER_SOURCE : ITER_DEST; 391 struct req_iterator rq_iter; 392 struct zloop_zone *zone; 393 struct iov_iter iter; 394 struct bio_vec tmp; 395 unsigned long flags; 396 sector_t zone_end; 397 unsigned int nr_bvec; 398 int ret; 399 400 atomic_set(&cmd->ref, 2); 401 cmd->sector = sector; 402 cmd->nr_sectors = nr_sectors; 403 cmd->ret = 0; 404 405 if (WARN_ON_ONCE(is_append && !zlo->zone_append)) { 406 ret = -EIO; 407 goto out; 408 } 409 410 /* We should never get an I/O beyond the device capacity. */ 411 if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) { 412 ret = -EIO; 413 goto out; 414 } 415 zone = &zlo->zones[zone_no]; 416 zone_end = zone->start + zlo->zone_capacity; 417 418 /* 419 * The block layer should never send requests that are not fully 420 * contained within the zone. 421 */ 422 if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) { 423 ret = -EIO; 424 goto out; 425 } 426 427 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 428 mutex_lock(&zone->lock); 429 ret = zloop_update_seq_zone(zlo, zone_no); 430 mutex_unlock(&zone->lock); 431 if (ret) 432 goto out; 433 } 434 435 if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) { 436 mutex_lock(&zone->lock); 437 438 spin_lock_irqsave(&zone->wp_lock, flags); 439 440 /* 441 * Zone append operations always go at the current write 442 * pointer, but regular write operations must already be 443 * aligned to the write pointer when submitted. 444 */ 445 if (is_append) { 446 /* 447 * If ordered zone append is in use, we already checked 448 * and set the target sector in zloop_queue_rq(). 449 */ 450 if (!zlo->ordered_zone_append) { 451 if (zone->cond == BLK_ZONE_COND_FULL || 452 zone->wp + nr_sectors > zone_end) { 453 spin_unlock_irqrestore(&zone->wp_lock, 454 flags); 455 ret = -EIO; 456 goto unlock; 457 } 458 sector = zone->wp; 459 } 460 cmd->sector = sector; 461 } else if (sector != zone->wp) { 462 spin_unlock_irqrestore(&zone->wp_lock, flags); 463 pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n", 464 zone_no, sector, zone->wp); 465 ret = -EIO; 466 goto unlock; 467 } 468 469 /* Implicitly open the target zone. */ 470 if (zone->cond == BLK_ZONE_COND_CLOSED || 471 zone->cond == BLK_ZONE_COND_EMPTY) 472 zone->cond = BLK_ZONE_COND_IMP_OPEN; 473 474 /* 475 * Advance the write pointer, unless ordered zone append is in 476 * use. If the write fails, the write pointer position will be 477 * corrected when the next I/O starts execution. 478 */ 479 if (!is_append || !zlo->ordered_zone_append) { 480 zone->wp += nr_sectors; 481 if (zone->wp == zone_end) { 482 zone->cond = BLK_ZONE_COND_FULL; 483 zone->wp = ULLONG_MAX; 484 } 485 } 486 487 spin_unlock_irqrestore(&zone->wp_lock, flags); 488 } 489 490 nr_bvec = blk_rq_nr_bvec(rq); 491 492 if (rq->bio != rq->biotail) { 493 struct bio_vec *bvec; 494 495 cmd->bvec = kmalloc_array(nr_bvec, sizeof(*cmd->bvec), GFP_NOIO); 496 if (!cmd->bvec) { 497 ret = -EIO; 498 goto unlock; 499 } 500 501 /* 502 * The bios of the request may be started from the middle of 503 * the 'bvec' because of bio splitting, so we can't directly 504 * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec 505 * API will take care of all details for us. 506 */ 507 bvec = cmd->bvec; 508 rq_for_each_bvec(tmp, rq, rq_iter) { 509 *bvec = tmp; 510 bvec++; 511 } 512 iov_iter_bvec(&iter, rw, cmd->bvec, nr_bvec, blk_rq_bytes(rq)); 513 } else { 514 /* 515 * Same here, this bio may be started from the middle of the 516 * 'bvec' because of bio splitting, so offset from the bvec 517 * must be passed to iov iterator 518 */ 519 iov_iter_bvec(&iter, rw, 520 __bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter), 521 nr_bvec, blk_rq_bytes(rq)); 522 iter.iov_offset = rq->bio->bi_iter.bi_bvec_done; 523 } 524 525 cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT; 526 cmd->iocb.ki_filp = zone->file; 527 cmd->iocb.ki_complete = zloop_rw_complete; 528 if (!zlo->buffered_io) 529 cmd->iocb.ki_flags = IOCB_DIRECT; 530 cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); 531 532 if (rw == ITER_SOURCE) 533 ret = zone->file->f_op->write_iter(&cmd->iocb, &iter); 534 else 535 ret = zone->file->f_op->read_iter(&cmd->iocb, &iter); 536 unlock: 537 if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) 538 mutex_unlock(&zone->lock); 539 out: 540 if (ret != -EIOCBQUEUED) 541 zloop_rw_complete(&cmd->iocb, ret); 542 zloop_put_cmd(cmd); 543 } 544 545 static void zloop_handle_cmd(struct zloop_cmd *cmd) 546 { 547 struct request *rq = blk_mq_rq_from_pdu(cmd); 548 struct zloop_device *zlo = rq->q->queuedata; 549 550 /* We can block in this context, so ignore REQ_NOWAIT. */ 551 if (rq->cmd_flags & REQ_NOWAIT) 552 rq->cmd_flags &= ~REQ_NOWAIT; 553 554 switch (req_op(rq)) { 555 case REQ_OP_READ: 556 case REQ_OP_WRITE: 557 case REQ_OP_ZONE_APPEND: 558 /* 559 * zloop_rw() always executes asynchronously or completes 560 * directly. 561 */ 562 zloop_rw(cmd); 563 return; 564 case REQ_OP_FLUSH: 565 /* 566 * Sync the entire FS containing the zone files instead of 567 * walking all files 568 */ 569 cmd->ret = sync_filesystem(file_inode(zlo->data_dir)->i_sb); 570 break; 571 case REQ_OP_ZONE_RESET: 572 cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq)); 573 break; 574 case REQ_OP_ZONE_RESET_ALL: 575 cmd->ret = zloop_reset_all_zones(zlo); 576 break; 577 case REQ_OP_ZONE_FINISH: 578 cmd->ret = zloop_finish_zone(zlo, rq_zone_no(rq)); 579 break; 580 case REQ_OP_ZONE_OPEN: 581 cmd->ret = zloop_open_zone(zlo, rq_zone_no(rq)); 582 break; 583 case REQ_OP_ZONE_CLOSE: 584 cmd->ret = zloop_close_zone(zlo, rq_zone_no(rq)); 585 break; 586 default: 587 WARN_ON_ONCE(1); 588 pr_err("Unsupported operation %d\n", req_op(rq)); 589 cmd->ret = -EOPNOTSUPP; 590 break; 591 } 592 593 blk_mq_complete_request(rq); 594 } 595 596 static void zloop_cmd_workfn(struct work_struct *work) 597 { 598 struct zloop_cmd *cmd = container_of(work, struct zloop_cmd, work); 599 int orig_flags = current->flags; 600 601 current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; 602 zloop_handle_cmd(cmd); 603 current->flags = orig_flags; 604 } 605 606 static void zloop_complete_rq(struct request *rq) 607 { 608 struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq); 609 struct zloop_device *zlo = rq->q->queuedata; 610 unsigned int zone_no = cmd->sector >> zlo->zone_shift; 611 struct zloop_zone *zone = &zlo->zones[zone_no]; 612 blk_status_t sts = BLK_STS_OK; 613 614 switch (req_op(rq)) { 615 case REQ_OP_READ: 616 if (cmd->ret < 0) 617 pr_err("Zone %u: failed read sector %llu, %llu sectors\n", 618 zone_no, cmd->sector, cmd->nr_sectors); 619 620 if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) { 621 /* short read */ 622 struct bio *bio; 623 624 __rq_for_each_bio(bio, rq) 625 zero_fill_bio(bio); 626 } 627 break; 628 case REQ_OP_WRITE: 629 case REQ_OP_ZONE_APPEND: 630 if (cmd->ret < 0) 631 pr_err("Zone %u: failed %swrite sector %llu, %llu sectors\n", 632 zone_no, 633 req_op(rq) == REQ_OP_WRITE ? "" : "append ", 634 cmd->sector, cmd->nr_sectors); 635 636 if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) { 637 pr_err("Zone %u: partial write %ld/%u B\n", 638 zone_no, cmd->ret, blk_rq_bytes(rq)); 639 cmd->ret = -EIO; 640 } 641 642 if (cmd->ret < 0 && !test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { 643 /* 644 * A write to a sequential zone file failed: mark the 645 * zone as having an error. This will be corrected and 646 * cleared when the next IO is submitted. 647 */ 648 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); 649 break; 650 } 651 if (req_op(rq) == REQ_OP_ZONE_APPEND) 652 rq->__sector = cmd->sector; 653 654 break; 655 default: 656 break; 657 } 658 659 if (cmd->ret < 0) 660 sts = errno_to_blk_status(cmd->ret); 661 blk_mq_end_request(rq, sts); 662 } 663 664 static bool zloop_set_zone_append_sector(struct request *rq) 665 { 666 struct zloop_device *zlo = rq->q->queuedata; 667 unsigned int zone_no = rq_zone_no(rq); 668 struct zloop_zone *zone = &zlo->zones[zone_no]; 669 sector_t zone_end = zone->start + zlo->zone_capacity; 670 sector_t nr_sectors = blk_rq_sectors(rq); 671 unsigned long flags; 672 673 spin_lock_irqsave(&zone->wp_lock, flags); 674 675 if (zone->cond == BLK_ZONE_COND_FULL || 676 zone->wp + nr_sectors > zone_end) { 677 spin_unlock_irqrestore(&zone->wp_lock, flags); 678 return false; 679 } 680 681 rq->__sector = zone->wp; 682 zone->wp += blk_rq_sectors(rq); 683 if (zone->wp >= zone_end) { 684 zone->cond = BLK_ZONE_COND_FULL; 685 zone->wp = ULLONG_MAX; 686 } 687 688 spin_unlock_irqrestore(&zone->wp_lock, flags); 689 690 return true; 691 } 692 693 static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx, 694 const struct blk_mq_queue_data *bd) 695 { 696 struct request *rq = bd->rq; 697 struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq); 698 struct zloop_device *zlo = rq->q->queuedata; 699 700 if (zlo->state == Zlo_deleting) 701 return BLK_STS_IOERR; 702 703 /* 704 * If we need to strongly order zone append operations, set the request 705 * sector to the zone write pointer location now instead of when the 706 * command work runs. 707 */ 708 if (zlo->ordered_zone_append && req_op(rq) == REQ_OP_ZONE_APPEND) { 709 if (!zloop_set_zone_append_sector(rq)) 710 return BLK_STS_IOERR; 711 } 712 713 blk_mq_start_request(rq); 714 715 INIT_WORK(&cmd->work, zloop_cmd_workfn); 716 queue_work(zlo->workqueue, &cmd->work); 717 718 return BLK_STS_OK; 719 } 720 721 static const struct blk_mq_ops zloop_mq_ops = { 722 .queue_rq = zloop_queue_rq, 723 .complete = zloop_complete_rq, 724 }; 725 726 static int zloop_open(struct gendisk *disk, blk_mode_t mode) 727 { 728 struct zloop_device *zlo = disk->private_data; 729 int ret; 730 731 ret = mutex_lock_killable(&zloop_ctl_mutex); 732 if (ret) 733 return ret; 734 735 if (zlo->state != Zlo_live) 736 ret = -ENXIO; 737 mutex_unlock(&zloop_ctl_mutex); 738 return ret; 739 } 740 741 static int zloop_report_zones(struct gendisk *disk, sector_t sector, 742 unsigned int nr_zones, struct blk_report_zones_args *args) 743 { 744 struct zloop_device *zlo = disk->private_data; 745 struct blk_zone blkz = {}; 746 unsigned int first, i; 747 unsigned long flags; 748 int ret; 749 750 first = disk_zone_no(disk, sector); 751 if (first >= zlo->nr_zones) 752 return 0; 753 nr_zones = min(nr_zones, zlo->nr_zones - first); 754 755 for (i = 0; i < nr_zones; i++) { 756 unsigned int zone_no = first + i; 757 struct zloop_zone *zone = &zlo->zones[zone_no]; 758 759 mutex_lock(&zone->lock); 760 761 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) { 762 ret = zloop_update_seq_zone(zlo, zone_no); 763 if (ret) { 764 mutex_unlock(&zone->lock); 765 return ret; 766 } 767 } 768 769 blkz.start = zone->start; 770 blkz.len = zlo->zone_size; 771 spin_lock_irqsave(&zone->wp_lock, flags); 772 blkz.wp = zone->wp; 773 spin_unlock_irqrestore(&zone->wp_lock, flags); 774 blkz.cond = zone->cond; 775 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { 776 blkz.type = BLK_ZONE_TYPE_CONVENTIONAL; 777 blkz.capacity = zlo->zone_size; 778 } else { 779 blkz.type = BLK_ZONE_TYPE_SEQWRITE_REQ; 780 blkz.capacity = zlo->zone_capacity; 781 } 782 783 mutex_unlock(&zone->lock); 784 785 ret = disk_report_zone(disk, &blkz, i, args); 786 if (ret) 787 return ret; 788 } 789 790 return nr_zones; 791 } 792 793 static void zloop_free_disk(struct gendisk *disk) 794 { 795 struct zloop_device *zlo = disk->private_data; 796 unsigned int i; 797 798 blk_mq_free_tag_set(&zlo->tag_set); 799 800 for (i = 0; i < zlo->nr_zones; i++) { 801 struct zloop_zone *zone = &zlo->zones[i]; 802 803 mapping_set_gfp_mask(zone->file->f_mapping, 804 zone->old_gfp_mask); 805 fput(zone->file); 806 } 807 808 fput(zlo->data_dir); 809 destroy_workqueue(zlo->workqueue); 810 kfree(zlo->base_dir); 811 kvfree(zlo); 812 } 813 814 static const struct block_device_operations zloop_fops = { 815 .owner = THIS_MODULE, 816 .open = zloop_open, 817 .report_zones = zloop_report_zones, 818 .free_disk = zloop_free_disk, 819 }; 820 821 __printf(3, 4) 822 static struct file *zloop_filp_open_fmt(int oflags, umode_t mode, 823 const char *fmt, ...) 824 { 825 struct file *file; 826 va_list ap; 827 char *p; 828 829 va_start(ap, fmt); 830 p = kvasprintf(GFP_KERNEL, fmt, ap); 831 va_end(ap); 832 833 if (!p) 834 return ERR_PTR(-ENOMEM); 835 file = filp_open(p, oflags, mode); 836 kfree(p); 837 return file; 838 } 839 840 static int zloop_get_block_size(struct zloop_device *zlo, 841 struct zloop_zone *zone) 842 { 843 struct block_device *sb_bdev = zone->file->f_mapping->host->i_sb->s_bdev; 844 struct kstat st; 845 846 /* 847 * If the FS block size is lower than or equal to 4K, use that as the 848 * device block size. Otherwise, fallback to the FS direct IO alignment 849 * constraint if that is provided, and to the FS underlying device 850 * physical block size if the direct IO alignment is unknown. 851 */ 852 if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K) 853 zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize; 854 else if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) && 855 (st.result_mask & STATX_DIOALIGN)) 856 zlo->block_size = st.dio_offset_align; 857 else if (sb_bdev) 858 zlo->block_size = bdev_physical_block_size(sb_bdev); 859 else 860 zlo->block_size = SECTOR_SIZE; 861 862 if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - 1)) { 863 pr_err("Zone capacity is not aligned to block size %u\n", 864 zlo->block_size); 865 return -EINVAL; 866 } 867 868 return 0; 869 } 870 871 static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts, 872 unsigned int zone_no, bool restore) 873 { 874 struct zloop_zone *zone = &zlo->zones[zone_no]; 875 int oflags = O_RDWR; 876 struct kstat stat; 877 sector_t file_sectors; 878 int ret; 879 880 mutex_init(&zone->lock); 881 spin_lock_init(&zone->wp_lock); 882 zone->start = (sector_t)zone_no << zlo->zone_shift; 883 884 if (!restore) 885 oflags |= O_CREAT; 886 887 if (!opts->buffered_io) 888 oflags |= O_DIRECT; 889 890 if (zone_no < zlo->nr_conv_zones) { 891 /* Conventional zone file. */ 892 set_bit(ZLOOP_ZONE_CONV, &zone->flags); 893 zone->cond = BLK_ZONE_COND_NOT_WP; 894 zone->wp = U64_MAX; 895 896 zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/cnv-%06u", 897 zlo->base_dir, zlo->id, zone_no); 898 if (IS_ERR(zone->file)) { 899 pr_err("Failed to open zone %u file %s/%u/cnv-%06u (err=%ld)", 900 zone_no, zlo->base_dir, zlo->id, zone_no, 901 PTR_ERR(zone->file)); 902 return PTR_ERR(zone->file); 903 } 904 905 if (!zlo->block_size) { 906 ret = zloop_get_block_size(zlo, zone); 907 if (ret) 908 return ret; 909 } 910 911 ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0); 912 if (ret < 0) { 913 pr_err("Failed to get zone %u file stat\n", zone_no); 914 return ret; 915 } 916 file_sectors = stat.size >> SECTOR_SHIFT; 917 918 if (restore && file_sectors != zlo->zone_size) { 919 pr_err("Invalid conventional zone %u file size (%llu sectors != %llu)\n", 920 zone_no, file_sectors, zlo->zone_capacity); 921 return ret; 922 } 923 924 ret = vfs_truncate(&zone->file->f_path, 925 zlo->zone_size << SECTOR_SHIFT); 926 if (ret < 0) { 927 pr_err("Failed to truncate zone %u file (err=%d)\n", 928 zone_no, ret); 929 return ret; 930 } 931 932 return 0; 933 } 934 935 /* Sequential zone file. */ 936 zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/seq-%06u", 937 zlo->base_dir, zlo->id, zone_no); 938 if (IS_ERR(zone->file)) { 939 pr_err("Failed to open zone %u file %s/%u/seq-%06u (err=%ld)", 940 zone_no, zlo->base_dir, zlo->id, zone_no, 941 PTR_ERR(zone->file)); 942 return PTR_ERR(zone->file); 943 } 944 945 if (!zlo->block_size) { 946 ret = zloop_get_block_size(zlo, zone); 947 if (ret) 948 return ret; 949 } 950 951 zloop_get_block_size(zlo, zone); 952 953 mutex_lock(&zone->lock); 954 ret = zloop_update_seq_zone(zlo, zone_no); 955 mutex_unlock(&zone->lock); 956 957 return ret; 958 } 959 960 static bool zloop_dev_exists(struct zloop_device *zlo) 961 { 962 struct file *cnv, *seq; 963 bool exists; 964 965 cnv = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/cnv-%06u", 966 zlo->base_dir, zlo->id, 0); 967 seq = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/seq-%06u", 968 zlo->base_dir, zlo->id, 0); 969 exists = !IS_ERR(cnv) || !IS_ERR(seq); 970 971 if (!IS_ERR(cnv)) 972 fput(cnv); 973 if (!IS_ERR(seq)) 974 fput(seq); 975 976 return exists; 977 } 978 979 static int zloop_ctl_add(struct zloop_options *opts) 980 { 981 struct queue_limits lim = { 982 .max_hw_sectors = SZ_1M >> SECTOR_SHIFT, 983 .chunk_sectors = opts->zone_size, 984 .features = BLK_FEAT_ZONED, 985 }; 986 unsigned int nr_zones, i, j; 987 struct zloop_device *zlo; 988 int ret = -EINVAL; 989 bool restore; 990 991 __module_get(THIS_MODULE); 992 993 nr_zones = opts->capacity >> ilog2(opts->zone_size); 994 if (opts->nr_conv_zones >= nr_zones) { 995 pr_err("Invalid number of conventional zones %u\n", 996 opts->nr_conv_zones); 997 goto out; 998 } 999 1000 zlo = kvzalloc(struct_size(zlo, zones, nr_zones), GFP_KERNEL); 1001 if (!zlo) { 1002 ret = -ENOMEM; 1003 goto out; 1004 } 1005 zlo->state = Zlo_creating; 1006 1007 ret = mutex_lock_killable(&zloop_ctl_mutex); 1008 if (ret) 1009 goto out_free_dev; 1010 1011 /* Allocate id, if @opts->id >= 0, we're requesting that specific id */ 1012 if (opts->id >= 0) { 1013 ret = idr_alloc(&zloop_index_idr, zlo, 1014 opts->id, opts->id + 1, GFP_KERNEL); 1015 if (ret == -ENOSPC) 1016 ret = -EEXIST; 1017 } else { 1018 ret = idr_alloc(&zloop_index_idr, zlo, 0, 0, GFP_KERNEL); 1019 } 1020 mutex_unlock(&zloop_ctl_mutex); 1021 if (ret < 0) 1022 goto out_free_dev; 1023 1024 zlo->id = ret; 1025 zlo->zone_shift = ilog2(opts->zone_size); 1026 zlo->zone_size = opts->zone_size; 1027 if (opts->zone_capacity) 1028 zlo->zone_capacity = opts->zone_capacity; 1029 else 1030 zlo->zone_capacity = zlo->zone_size; 1031 zlo->nr_zones = nr_zones; 1032 zlo->nr_conv_zones = opts->nr_conv_zones; 1033 zlo->buffered_io = opts->buffered_io; 1034 zlo->zone_append = opts->zone_append; 1035 if (zlo->zone_append) 1036 zlo->ordered_zone_append = opts->ordered_zone_append; 1037 1038 zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE, 1039 opts->nr_queues * opts->queue_depth, zlo->id); 1040 if (!zlo->workqueue) { 1041 ret = -ENOMEM; 1042 goto out_free_idr; 1043 } 1044 1045 if (opts->base_dir) 1046 zlo->base_dir = kstrdup(opts->base_dir, GFP_KERNEL); 1047 else 1048 zlo->base_dir = kstrdup(ZLOOP_DEF_BASE_DIR, GFP_KERNEL); 1049 if (!zlo->base_dir) { 1050 ret = -ENOMEM; 1051 goto out_destroy_workqueue; 1052 } 1053 1054 zlo->data_dir = zloop_filp_open_fmt(O_RDONLY | O_DIRECTORY, 0, "%s/%u", 1055 zlo->base_dir, zlo->id); 1056 if (IS_ERR(zlo->data_dir)) { 1057 ret = PTR_ERR(zlo->data_dir); 1058 pr_warn("Failed to open directory %s/%u (err=%d)\n", 1059 zlo->base_dir, zlo->id, ret); 1060 goto out_free_base_dir; 1061 } 1062 1063 /* 1064 * If we already have zone files, we are restoring a device created by a 1065 * previous add operation. In this case, zloop_init_zone() will check 1066 * that the zone files are consistent with the zone configuration given. 1067 */ 1068 restore = zloop_dev_exists(zlo); 1069 for (i = 0; i < nr_zones; i++) { 1070 ret = zloop_init_zone(zlo, opts, i, restore); 1071 if (ret) 1072 goto out_close_files; 1073 } 1074 1075 lim.physical_block_size = zlo->block_size; 1076 lim.logical_block_size = zlo->block_size; 1077 if (zlo->zone_append) 1078 lim.max_hw_zone_append_sectors = lim.max_hw_sectors; 1079 1080 zlo->tag_set.ops = &zloop_mq_ops; 1081 zlo->tag_set.nr_hw_queues = opts->nr_queues; 1082 zlo->tag_set.queue_depth = opts->queue_depth; 1083 zlo->tag_set.numa_node = NUMA_NO_NODE; 1084 zlo->tag_set.cmd_size = sizeof(struct zloop_cmd); 1085 zlo->tag_set.driver_data = zlo; 1086 1087 ret = blk_mq_alloc_tag_set(&zlo->tag_set); 1088 if (ret) { 1089 pr_err("blk_mq_alloc_tag_set failed (err=%d)\n", ret); 1090 goto out_close_files; 1091 } 1092 1093 zlo->disk = blk_mq_alloc_disk(&zlo->tag_set, &lim, zlo); 1094 if (IS_ERR(zlo->disk)) { 1095 pr_err("blk_mq_alloc_disk failed (err=%d)\n", ret); 1096 ret = PTR_ERR(zlo->disk); 1097 goto out_cleanup_tags; 1098 } 1099 zlo->disk->flags = GENHD_FL_NO_PART; 1100 zlo->disk->fops = &zloop_fops; 1101 zlo->disk->private_data = zlo; 1102 sprintf(zlo->disk->disk_name, "zloop%d", zlo->id); 1103 set_capacity(zlo->disk, (u64)lim.chunk_sectors * zlo->nr_zones); 1104 1105 ret = blk_revalidate_disk_zones(zlo->disk); 1106 if (ret) 1107 goto out_cleanup_disk; 1108 1109 ret = add_disk(zlo->disk); 1110 if (ret) { 1111 pr_err("add_disk failed (err=%d)\n", ret); 1112 goto out_cleanup_disk; 1113 } 1114 1115 mutex_lock(&zloop_ctl_mutex); 1116 zlo->state = Zlo_live; 1117 mutex_unlock(&zloop_ctl_mutex); 1118 1119 pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n", 1120 zlo->id, zlo->nr_zones, 1121 ((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20, 1122 zlo->block_size); 1123 pr_info("zloop%d: using %s%s zone append\n", 1124 zlo->id, 1125 zlo->ordered_zone_append ? "ordered " : "", 1126 zlo->zone_append ? "native" : "emulated"); 1127 1128 return 0; 1129 1130 out_cleanup_disk: 1131 put_disk(zlo->disk); 1132 out_cleanup_tags: 1133 blk_mq_free_tag_set(&zlo->tag_set); 1134 out_close_files: 1135 for (j = 0; j < i; j++) { 1136 struct zloop_zone *zone = &zlo->zones[j]; 1137 1138 if (!IS_ERR_OR_NULL(zone->file)) 1139 fput(zone->file); 1140 } 1141 fput(zlo->data_dir); 1142 out_free_base_dir: 1143 kfree(zlo->base_dir); 1144 out_destroy_workqueue: 1145 destroy_workqueue(zlo->workqueue); 1146 out_free_idr: 1147 mutex_lock(&zloop_ctl_mutex); 1148 idr_remove(&zloop_index_idr, zlo->id); 1149 mutex_unlock(&zloop_ctl_mutex); 1150 out_free_dev: 1151 kvfree(zlo); 1152 out: 1153 module_put(THIS_MODULE); 1154 if (ret == -ENOENT) 1155 ret = -EINVAL; 1156 return ret; 1157 } 1158 1159 static int zloop_ctl_remove(struct zloop_options *opts) 1160 { 1161 struct zloop_device *zlo; 1162 int ret; 1163 1164 if (!(opts->mask & ZLOOP_OPT_ID)) { 1165 pr_err("No ID specified\n"); 1166 return -EINVAL; 1167 } 1168 1169 ret = mutex_lock_killable(&zloop_ctl_mutex); 1170 if (ret) 1171 return ret; 1172 1173 zlo = idr_find(&zloop_index_idr, opts->id); 1174 if (!zlo || zlo->state == Zlo_creating) { 1175 ret = -ENODEV; 1176 } else if (zlo->state == Zlo_deleting) { 1177 ret = -EINVAL; 1178 } else { 1179 idr_remove(&zloop_index_idr, zlo->id); 1180 zlo->state = Zlo_deleting; 1181 } 1182 1183 mutex_unlock(&zloop_ctl_mutex); 1184 if (ret) 1185 return ret; 1186 1187 del_gendisk(zlo->disk); 1188 put_disk(zlo->disk); 1189 1190 pr_info("Removed device %d\n", opts->id); 1191 1192 module_put(THIS_MODULE); 1193 1194 return 0; 1195 } 1196 1197 static int zloop_parse_options(struct zloop_options *opts, const char *buf) 1198 { 1199 substring_t args[MAX_OPT_ARGS]; 1200 char *options, *o, *p; 1201 unsigned int token; 1202 int ret = 0; 1203 1204 /* Set defaults. */ 1205 opts->mask = 0; 1206 opts->id = ZLOOP_DEF_ID; 1207 opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES; 1208 opts->zone_size = ZLOOP_DEF_ZONE_SIZE; 1209 opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES; 1210 opts->nr_queues = ZLOOP_DEF_NR_QUEUES; 1211 opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH; 1212 opts->buffered_io = ZLOOP_DEF_BUFFERED_IO; 1213 opts->zone_append = ZLOOP_DEF_ZONE_APPEND; 1214 opts->ordered_zone_append = ZLOOP_DEF_ORDERED_ZONE_APPEND; 1215 1216 if (!buf) 1217 return 0; 1218 1219 /* Skip leading spaces before the options. */ 1220 while (isspace(*buf)) 1221 buf++; 1222 1223 options = o = kstrdup(buf, GFP_KERNEL); 1224 if (!options) 1225 return -ENOMEM; 1226 1227 /* Parse the options, doing only some light invalid value checks. */ 1228 while ((p = strsep(&o, ",\n")) != NULL) { 1229 if (!*p) 1230 continue; 1231 1232 token = match_token(p, zloop_opt_tokens, args); 1233 opts->mask |= token; 1234 switch (token) { 1235 case ZLOOP_OPT_ID: 1236 if (match_int(args, &opts->id)) { 1237 ret = -EINVAL; 1238 goto out; 1239 } 1240 break; 1241 case ZLOOP_OPT_CAPACITY: 1242 if (match_uint(args, &token)) { 1243 ret = -EINVAL; 1244 goto out; 1245 } 1246 if (!token) { 1247 pr_err("Invalid capacity\n"); 1248 ret = -EINVAL; 1249 goto out; 1250 } 1251 opts->capacity = 1252 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; 1253 break; 1254 case ZLOOP_OPT_ZONE_SIZE: 1255 if (match_uint(args, &token)) { 1256 ret = -EINVAL; 1257 goto out; 1258 } 1259 if (!token || token > ZLOOP_MAX_ZONE_SIZE_MB || 1260 !is_power_of_2(token)) { 1261 pr_err("Invalid zone size %u\n", token); 1262 ret = -EINVAL; 1263 goto out; 1264 } 1265 opts->zone_size = 1266 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; 1267 break; 1268 case ZLOOP_OPT_ZONE_CAPACITY: 1269 if (match_uint(args, &token)) { 1270 ret = -EINVAL; 1271 goto out; 1272 } 1273 if (!token) { 1274 pr_err("Invalid zone capacity\n"); 1275 ret = -EINVAL; 1276 goto out; 1277 } 1278 opts->zone_capacity = 1279 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT; 1280 break; 1281 case ZLOOP_OPT_NR_CONV_ZONES: 1282 if (match_uint(args, &token)) { 1283 ret = -EINVAL; 1284 goto out; 1285 } 1286 opts->nr_conv_zones = token; 1287 break; 1288 case ZLOOP_OPT_BASE_DIR: 1289 p = match_strdup(args); 1290 if (!p) { 1291 ret = -ENOMEM; 1292 goto out; 1293 } 1294 kfree(opts->base_dir); 1295 opts->base_dir = p; 1296 break; 1297 case ZLOOP_OPT_NR_QUEUES: 1298 if (match_uint(args, &token)) { 1299 ret = -EINVAL; 1300 goto out; 1301 } 1302 if (!token) { 1303 pr_err("Invalid number of queues\n"); 1304 ret = -EINVAL; 1305 goto out; 1306 } 1307 opts->nr_queues = min(token, num_online_cpus()); 1308 break; 1309 case ZLOOP_OPT_QUEUE_DEPTH: 1310 if (match_uint(args, &token)) { 1311 ret = -EINVAL; 1312 goto out; 1313 } 1314 if (!token) { 1315 pr_err("Invalid queue depth\n"); 1316 ret = -EINVAL; 1317 goto out; 1318 } 1319 opts->queue_depth = token; 1320 break; 1321 case ZLOOP_OPT_BUFFERED_IO: 1322 opts->buffered_io = true; 1323 break; 1324 case ZLOOP_OPT_ZONE_APPEND: 1325 if (match_uint(args, &token)) { 1326 ret = -EINVAL; 1327 goto out; 1328 } 1329 if (token != 0 && token != 1) { 1330 pr_err("Invalid zone_append value\n"); 1331 ret = -EINVAL; 1332 goto out; 1333 } 1334 opts->zone_append = token; 1335 break; 1336 case ZLOOP_OPT_ORDERED_ZONE_APPEND: 1337 opts->ordered_zone_append = true; 1338 break; 1339 case ZLOOP_OPT_ERR: 1340 default: 1341 pr_warn("unknown parameter or missing value '%s'\n", p); 1342 ret = -EINVAL; 1343 goto out; 1344 } 1345 } 1346 1347 ret = -EINVAL; 1348 if (opts->capacity <= opts->zone_size) { 1349 pr_err("Invalid capacity\n"); 1350 goto out; 1351 } 1352 1353 if (opts->zone_capacity > opts->zone_size) { 1354 pr_err("Invalid zone capacity\n"); 1355 goto out; 1356 } 1357 1358 ret = 0; 1359 out: 1360 kfree(options); 1361 return ret; 1362 } 1363 1364 enum { 1365 ZLOOP_CTL_ADD, 1366 ZLOOP_CTL_REMOVE, 1367 }; 1368 1369 static struct zloop_ctl_op { 1370 int code; 1371 const char *name; 1372 } zloop_ctl_ops[] = { 1373 { ZLOOP_CTL_ADD, "add" }, 1374 { ZLOOP_CTL_REMOVE, "remove" }, 1375 { -1, NULL }, 1376 }; 1377 1378 static ssize_t zloop_ctl_write(struct file *file, const char __user *ubuf, 1379 size_t count, loff_t *pos) 1380 { 1381 struct zloop_options opts = { }; 1382 struct zloop_ctl_op *op; 1383 const char *buf, *opts_buf; 1384 int i, ret; 1385 1386 if (count > PAGE_SIZE) 1387 return -ENOMEM; 1388 1389 buf = memdup_user_nul(ubuf, count); 1390 if (IS_ERR(buf)) 1391 return PTR_ERR(buf); 1392 1393 for (i = 0; i < ARRAY_SIZE(zloop_ctl_ops); i++) { 1394 op = &zloop_ctl_ops[i]; 1395 if (!op->name) { 1396 pr_err("Invalid operation\n"); 1397 ret = -EINVAL; 1398 goto out; 1399 } 1400 if (!strncmp(buf, op->name, strlen(op->name))) 1401 break; 1402 } 1403 1404 if (count <= strlen(op->name)) 1405 opts_buf = NULL; 1406 else 1407 opts_buf = buf + strlen(op->name); 1408 1409 ret = zloop_parse_options(&opts, opts_buf); 1410 if (ret) { 1411 pr_err("Failed to parse options\n"); 1412 goto out; 1413 } 1414 1415 switch (op->code) { 1416 case ZLOOP_CTL_ADD: 1417 ret = zloop_ctl_add(&opts); 1418 break; 1419 case ZLOOP_CTL_REMOVE: 1420 ret = zloop_ctl_remove(&opts); 1421 break; 1422 default: 1423 pr_err("Invalid operation\n"); 1424 ret = -EINVAL; 1425 goto out; 1426 } 1427 1428 out: 1429 kfree(opts.base_dir); 1430 kfree(buf); 1431 return ret ? ret : count; 1432 } 1433 1434 static int zloop_ctl_show(struct seq_file *seq_file, void *private) 1435 { 1436 const struct match_token *tok; 1437 int i; 1438 1439 /* Add operation */ 1440 seq_printf(seq_file, "%s ", zloop_ctl_ops[0].name); 1441 for (i = 0; i < ARRAY_SIZE(zloop_opt_tokens); i++) { 1442 tok = &zloop_opt_tokens[i]; 1443 if (!tok->pattern) 1444 break; 1445 if (i) 1446 seq_putc(seq_file, ','); 1447 seq_puts(seq_file, tok->pattern); 1448 } 1449 seq_putc(seq_file, '\n'); 1450 1451 /* Remove operation */ 1452 seq_puts(seq_file, zloop_ctl_ops[1].name); 1453 seq_puts(seq_file, " id=%d\n"); 1454 1455 return 0; 1456 } 1457 1458 static int zloop_ctl_open(struct inode *inode, struct file *file) 1459 { 1460 file->private_data = NULL; 1461 return single_open(file, zloop_ctl_show, NULL); 1462 } 1463 1464 static int zloop_ctl_release(struct inode *inode, struct file *file) 1465 { 1466 return single_release(inode, file); 1467 } 1468 1469 static const struct file_operations zloop_ctl_fops = { 1470 .owner = THIS_MODULE, 1471 .open = zloop_ctl_open, 1472 .release = zloop_ctl_release, 1473 .write = zloop_ctl_write, 1474 .read = seq_read, 1475 }; 1476 1477 static struct miscdevice zloop_misc = { 1478 .minor = MISC_DYNAMIC_MINOR, 1479 .name = "zloop-control", 1480 .fops = &zloop_ctl_fops, 1481 }; 1482 1483 static int __init zloop_init(void) 1484 { 1485 int ret; 1486 1487 ret = misc_register(&zloop_misc); 1488 if (ret) { 1489 pr_err("Failed to register misc device: %d\n", ret); 1490 return ret; 1491 } 1492 pr_info("Module loaded\n"); 1493 1494 return 0; 1495 } 1496 1497 static void __exit zloop_exit(void) 1498 { 1499 misc_deregister(&zloop_misc); 1500 idr_destroy(&zloop_index_idr); 1501 } 1502 1503 module_init(zloop_init); 1504 module_exit(zloop_exit); 1505 1506 MODULE_DESCRIPTION("Zoned loopback device"); 1507 MODULE_LICENSE("GPL"); 1508