1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2025, Christoph Hellwig.
4 * Copyright (c) 2025, Western Digital Corporation or its affiliates.
5 *
6 * Zoned Loop Device driver - exports a zoned block device using one file per
7 * zone as backing storage.
8 */
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11 #include <linux/module.h>
12 #include <linux/blk-mq.h>
13 #include <linux/blkzoned.h>
14 #include <linux/pagemap.h>
15 #include <linux/miscdevice.h>
16 #include <linux/falloc.h>
17 #include <linux/mutex.h>
18 #include <linux/parser.h>
19 #include <linux/seq_file.h>
20
21 /*
22 * Options for adding (and removing) a device.
23 */
24 enum {
25 ZLOOP_OPT_ERR = 0,
26 ZLOOP_OPT_ID = (1 << 0),
27 ZLOOP_OPT_CAPACITY = (1 << 1),
28 ZLOOP_OPT_ZONE_SIZE = (1 << 2),
29 ZLOOP_OPT_ZONE_CAPACITY = (1 << 3),
30 ZLOOP_OPT_NR_CONV_ZONES = (1 << 4),
31 ZLOOP_OPT_BASE_DIR = (1 << 5),
32 ZLOOP_OPT_NR_QUEUES = (1 << 6),
33 ZLOOP_OPT_QUEUE_DEPTH = (1 << 7),
34 ZLOOP_OPT_BUFFERED_IO = (1 << 8),
35 ZLOOP_OPT_ZONE_APPEND = (1 << 9),
36 ZLOOP_OPT_ORDERED_ZONE_APPEND = (1 << 10),
37 };
38
39 static const match_table_t zloop_opt_tokens = {
40 { ZLOOP_OPT_ID, "id=%d" },
41 { ZLOOP_OPT_CAPACITY, "capacity_mb=%u" },
42 { ZLOOP_OPT_ZONE_SIZE, "zone_size_mb=%u" },
43 { ZLOOP_OPT_ZONE_CAPACITY, "zone_capacity_mb=%u" },
44 { ZLOOP_OPT_NR_CONV_ZONES, "conv_zones=%u" },
45 { ZLOOP_OPT_BASE_DIR, "base_dir=%s" },
46 { ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" },
47 { ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" },
48 { ZLOOP_OPT_BUFFERED_IO, "buffered_io" },
49 { ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" },
50 { ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" },
51 { ZLOOP_OPT_ERR, NULL }
52 };
53
54 /* Default values for the "add" operation. */
55 #define ZLOOP_DEF_ID -1
56 #define ZLOOP_DEF_ZONE_SIZE ((256ULL * SZ_1M) >> SECTOR_SHIFT)
57 #define ZLOOP_DEF_NR_ZONES 64
58 #define ZLOOP_DEF_NR_CONV_ZONES 8
59 #define ZLOOP_DEF_BASE_DIR "/var/local/zloop"
60 #define ZLOOP_DEF_NR_QUEUES 1
61 #define ZLOOP_DEF_QUEUE_DEPTH 128
62 #define ZLOOP_DEF_BUFFERED_IO false
63 #define ZLOOP_DEF_ZONE_APPEND true
64 #define ZLOOP_DEF_ORDERED_ZONE_APPEND false
65
66 /* Arbitrary limit on the zone size (16GB). */
67 #define ZLOOP_MAX_ZONE_SIZE_MB 16384
68
69 struct zloop_options {
70 unsigned int mask;
71 int id;
72 sector_t capacity;
73 sector_t zone_size;
74 sector_t zone_capacity;
75 unsigned int nr_conv_zones;
76 char *base_dir;
77 unsigned int nr_queues;
78 unsigned int queue_depth;
79 bool buffered_io;
80 bool zone_append;
81 bool ordered_zone_append;
82 };
83
84 /*
85 * Device states.
86 */
87 enum {
88 Zlo_creating = 0,
89 Zlo_live,
90 Zlo_deleting,
91 };
92
93 enum zloop_zone_flags {
94 ZLOOP_ZONE_CONV = 0,
95 ZLOOP_ZONE_SEQ_ERROR,
96 };
97
98 struct zloop_zone {
99 struct file *file;
100
101 unsigned long flags;
102 struct mutex lock;
103 spinlock_t wp_lock;
104 enum blk_zone_cond cond;
105 sector_t start;
106 sector_t wp;
107
108 gfp_t old_gfp_mask;
109 };
110
111 struct zloop_device {
112 unsigned int id;
113 unsigned int state;
114
115 struct blk_mq_tag_set tag_set;
116 struct gendisk *disk;
117
118 struct workqueue_struct *workqueue;
119 bool buffered_io;
120 bool zone_append;
121 bool ordered_zone_append;
122
123 const char *base_dir;
124 struct file *data_dir;
125
126 unsigned int zone_shift;
127 sector_t zone_size;
128 sector_t zone_capacity;
129 unsigned int nr_zones;
130 unsigned int nr_conv_zones;
131 unsigned int block_size;
132
133 struct zloop_zone zones[] __counted_by(nr_zones);
134 };
135
136 struct zloop_cmd {
137 struct work_struct work;
138 atomic_t ref;
139 sector_t sector;
140 sector_t nr_sectors;
141 long ret;
142 struct kiocb iocb;
143 struct bio_vec *bvec;
144 };
145
146 static DEFINE_IDR(zloop_index_idr);
147 static DEFINE_MUTEX(zloop_ctl_mutex);
148
rq_zone_no(struct request * rq)149 static unsigned int rq_zone_no(struct request *rq)
150 {
151 struct zloop_device *zlo = rq->q->queuedata;
152
153 return blk_rq_pos(rq) >> zlo->zone_shift;
154 }
155
zloop_update_seq_zone(struct zloop_device * zlo,unsigned int zone_no)156 static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
157 {
158 struct zloop_zone *zone = &zlo->zones[zone_no];
159 struct kstat stat;
160 sector_t file_sectors;
161 unsigned long flags;
162 int ret;
163
164 lockdep_assert_held(&zone->lock);
165
166 ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0);
167 if (ret < 0) {
168 pr_err("Failed to get zone %u file stat (err=%d)\n",
169 zone_no, ret);
170 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
171 return ret;
172 }
173
174 file_sectors = stat.size >> SECTOR_SHIFT;
175 if (file_sectors > zlo->zone_capacity) {
176 pr_err("Zone %u file too large (%llu sectors > %llu)\n",
177 zone_no, file_sectors, zlo->zone_capacity);
178 return -EINVAL;
179 }
180
181 if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
182 pr_err("Zone %u file size not aligned to block size %u\n",
183 zone_no, zlo->block_size);
184 return -EINVAL;
185 }
186
187 spin_lock_irqsave(&zone->wp_lock, flags);
188 if (!file_sectors) {
189 zone->cond = BLK_ZONE_COND_EMPTY;
190 zone->wp = zone->start;
191 } else if (file_sectors == zlo->zone_capacity) {
192 zone->cond = BLK_ZONE_COND_FULL;
193 zone->wp = ULLONG_MAX;
194 } else {
195 zone->cond = BLK_ZONE_COND_CLOSED;
196 zone->wp = zone->start + file_sectors;
197 }
198 spin_unlock_irqrestore(&zone->wp_lock, flags);
199
200 return 0;
201 }
202
zloop_open_zone(struct zloop_device * zlo,unsigned int zone_no)203 static int zloop_open_zone(struct zloop_device *zlo, unsigned int zone_no)
204 {
205 struct zloop_zone *zone = &zlo->zones[zone_no];
206 int ret = 0;
207
208 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
209 return -EIO;
210
211 mutex_lock(&zone->lock);
212
213 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
214 ret = zloop_update_seq_zone(zlo, zone_no);
215 if (ret)
216 goto unlock;
217 }
218
219 switch (zone->cond) {
220 case BLK_ZONE_COND_EXP_OPEN:
221 break;
222 case BLK_ZONE_COND_EMPTY:
223 case BLK_ZONE_COND_CLOSED:
224 case BLK_ZONE_COND_IMP_OPEN:
225 zone->cond = BLK_ZONE_COND_EXP_OPEN;
226 break;
227 case BLK_ZONE_COND_FULL:
228 default:
229 ret = -EIO;
230 break;
231 }
232
233 unlock:
234 mutex_unlock(&zone->lock);
235
236 return ret;
237 }
238
zloop_close_zone(struct zloop_device * zlo,unsigned int zone_no)239 static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
240 {
241 struct zloop_zone *zone = &zlo->zones[zone_no];
242 unsigned long flags;
243 int ret = 0;
244
245 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
246 return -EIO;
247
248 mutex_lock(&zone->lock);
249
250 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
251 ret = zloop_update_seq_zone(zlo, zone_no);
252 if (ret)
253 goto unlock;
254 }
255
256 switch (zone->cond) {
257 case BLK_ZONE_COND_CLOSED:
258 break;
259 case BLK_ZONE_COND_IMP_OPEN:
260 case BLK_ZONE_COND_EXP_OPEN:
261 spin_lock_irqsave(&zone->wp_lock, flags);
262 if (zone->wp == zone->start)
263 zone->cond = BLK_ZONE_COND_EMPTY;
264 else
265 zone->cond = BLK_ZONE_COND_CLOSED;
266 spin_unlock_irqrestore(&zone->wp_lock, flags);
267 break;
268 case BLK_ZONE_COND_EMPTY:
269 case BLK_ZONE_COND_FULL:
270 default:
271 ret = -EIO;
272 break;
273 }
274
275 unlock:
276 mutex_unlock(&zone->lock);
277
278 return ret;
279 }
280
zloop_reset_zone(struct zloop_device * zlo,unsigned int zone_no)281 static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
282 {
283 struct zloop_zone *zone = &zlo->zones[zone_no];
284 unsigned long flags;
285 int ret = 0;
286
287 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
288 return -EIO;
289
290 mutex_lock(&zone->lock);
291
292 if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
293 zone->cond == BLK_ZONE_COND_EMPTY)
294 goto unlock;
295
296 if (vfs_truncate(&zone->file->f_path, 0)) {
297 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
298 ret = -EIO;
299 goto unlock;
300 }
301
302 spin_lock_irqsave(&zone->wp_lock, flags);
303 zone->cond = BLK_ZONE_COND_EMPTY;
304 zone->wp = zone->start;
305 clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
306 spin_unlock_irqrestore(&zone->wp_lock, flags);
307
308 unlock:
309 mutex_unlock(&zone->lock);
310
311 return ret;
312 }
313
zloop_reset_all_zones(struct zloop_device * zlo)314 static int zloop_reset_all_zones(struct zloop_device *zlo)
315 {
316 unsigned int i;
317 int ret;
318
319 for (i = zlo->nr_conv_zones; i < zlo->nr_zones; i++) {
320 ret = zloop_reset_zone(zlo, i);
321 if (ret)
322 return ret;
323 }
324
325 return 0;
326 }
327
zloop_finish_zone(struct zloop_device * zlo,unsigned int zone_no)328 static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
329 {
330 struct zloop_zone *zone = &zlo->zones[zone_no];
331 unsigned long flags;
332 int ret = 0;
333
334 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
335 return -EIO;
336
337 mutex_lock(&zone->lock);
338
339 if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
340 zone->cond == BLK_ZONE_COND_FULL)
341 goto unlock;
342
343 if (vfs_truncate(&zone->file->f_path, zlo->zone_size << SECTOR_SHIFT)) {
344 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
345 ret = -EIO;
346 goto unlock;
347 }
348
349 spin_lock_irqsave(&zone->wp_lock, flags);
350 zone->cond = BLK_ZONE_COND_FULL;
351 zone->wp = ULLONG_MAX;
352 clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
353 spin_unlock_irqrestore(&zone->wp_lock, flags);
354
355 unlock:
356 mutex_unlock(&zone->lock);
357
358 return ret;
359 }
360
zloop_put_cmd(struct zloop_cmd * cmd)361 static void zloop_put_cmd(struct zloop_cmd *cmd)
362 {
363 struct request *rq = blk_mq_rq_from_pdu(cmd);
364
365 if (!atomic_dec_and_test(&cmd->ref))
366 return;
367 kfree(cmd->bvec);
368 cmd->bvec = NULL;
369 if (likely(!blk_should_fake_timeout(rq->q)))
370 blk_mq_complete_request(rq);
371 }
372
zloop_rw_complete(struct kiocb * iocb,long ret)373 static void zloop_rw_complete(struct kiocb *iocb, long ret)
374 {
375 struct zloop_cmd *cmd = container_of(iocb, struct zloop_cmd, iocb);
376
377 cmd->ret = ret;
378 zloop_put_cmd(cmd);
379 }
380
zloop_rw(struct zloop_cmd * cmd)381 static void zloop_rw(struct zloop_cmd *cmd)
382 {
383 struct request *rq = blk_mq_rq_from_pdu(cmd);
384 struct zloop_device *zlo = rq->q->queuedata;
385 unsigned int zone_no = rq_zone_no(rq);
386 sector_t sector = blk_rq_pos(rq);
387 sector_t nr_sectors = blk_rq_sectors(rq);
388 bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
389 bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
390 int rw = is_write ? ITER_SOURCE : ITER_DEST;
391 struct req_iterator rq_iter;
392 struct zloop_zone *zone;
393 struct iov_iter iter;
394 struct bio_vec tmp;
395 unsigned long flags;
396 sector_t zone_end;
397 unsigned int nr_bvec;
398 int ret;
399
400 atomic_set(&cmd->ref, 2);
401 cmd->sector = sector;
402 cmd->nr_sectors = nr_sectors;
403 cmd->ret = 0;
404
405 if (WARN_ON_ONCE(is_append && !zlo->zone_append)) {
406 ret = -EIO;
407 goto out;
408 }
409
410 /* We should never get an I/O beyond the device capacity. */
411 if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
412 ret = -EIO;
413 goto out;
414 }
415 zone = &zlo->zones[zone_no];
416 zone_end = zone->start + zlo->zone_capacity;
417
418 /*
419 * The block layer should never send requests that are not fully
420 * contained within the zone.
421 */
422 if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) {
423 ret = -EIO;
424 goto out;
425 }
426
427 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
428 mutex_lock(&zone->lock);
429 ret = zloop_update_seq_zone(zlo, zone_no);
430 mutex_unlock(&zone->lock);
431 if (ret)
432 goto out;
433 }
434
435 if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
436 mutex_lock(&zone->lock);
437
438 spin_lock_irqsave(&zone->wp_lock, flags);
439
440 /*
441 * Zone append operations always go at the current write
442 * pointer, but regular write operations must already be
443 * aligned to the write pointer when submitted.
444 */
445 if (is_append) {
446 /*
447 * If ordered zone append is in use, we already checked
448 * and set the target sector in zloop_queue_rq().
449 */
450 if (!zlo->ordered_zone_append) {
451 if (zone->cond == BLK_ZONE_COND_FULL ||
452 zone->wp + nr_sectors > zone_end) {
453 spin_unlock_irqrestore(&zone->wp_lock,
454 flags);
455 ret = -EIO;
456 goto unlock;
457 }
458 sector = zone->wp;
459 }
460 cmd->sector = sector;
461 } else if (sector != zone->wp) {
462 spin_unlock_irqrestore(&zone->wp_lock, flags);
463 pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
464 zone_no, sector, zone->wp);
465 ret = -EIO;
466 goto unlock;
467 }
468
469 /* Implicitly open the target zone. */
470 if (zone->cond == BLK_ZONE_COND_CLOSED ||
471 zone->cond == BLK_ZONE_COND_EMPTY)
472 zone->cond = BLK_ZONE_COND_IMP_OPEN;
473
474 /*
475 * Advance the write pointer, unless ordered zone append is in
476 * use. If the write fails, the write pointer position will be
477 * corrected when the next I/O starts execution.
478 */
479 if (!is_append || !zlo->ordered_zone_append) {
480 zone->wp += nr_sectors;
481 if (zone->wp == zone_end) {
482 zone->cond = BLK_ZONE_COND_FULL;
483 zone->wp = ULLONG_MAX;
484 }
485 }
486
487 spin_unlock_irqrestore(&zone->wp_lock, flags);
488 }
489
490 nr_bvec = blk_rq_nr_bvec(rq);
491
492 if (rq->bio != rq->biotail) {
493 struct bio_vec *bvec;
494
495 cmd->bvec = kmalloc_objs(*cmd->bvec, nr_bvec, GFP_NOIO);
496 if (!cmd->bvec) {
497 ret = -EIO;
498 goto unlock;
499 }
500
501 /*
502 * The bios of the request may be started from the middle of
503 * the 'bvec' because of bio splitting, so we can't directly
504 * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec
505 * API will take care of all details for us.
506 */
507 bvec = cmd->bvec;
508 rq_for_each_bvec(tmp, rq, rq_iter) {
509 *bvec = tmp;
510 bvec++;
511 }
512 iov_iter_bvec(&iter, rw, cmd->bvec, nr_bvec, blk_rq_bytes(rq));
513 } else {
514 /*
515 * Same here, this bio may be started from the middle of the
516 * 'bvec' because of bio splitting, so offset from the bvec
517 * must be passed to iov iterator
518 */
519 iov_iter_bvec(&iter, rw,
520 __bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter),
521 nr_bvec, blk_rq_bytes(rq));
522 iter.iov_offset = rq->bio->bi_iter.bi_bvec_done;
523 }
524
525 cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT;
526 cmd->iocb.ki_filp = zone->file;
527 cmd->iocb.ki_complete = zloop_rw_complete;
528 if (!zlo->buffered_io)
529 cmd->iocb.ki_flags = IOCB_DIRECT;
530 cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
531
532 if (rw == ITER_SOURCE)
533 ret = zone->file->f_op->write_iter(&cmd->iocb, &iter);
534 else
535 ret = zone->file->f_op->read_iter(&cmd->iocb, &iter);
536 unlock:
537 if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write)
538 mutex_unlock(&zone->lock);
539 out:
540 if (ret != -EIOCBQUEUED)
541 zloop_rw_complete(&cmd->iocb, ret);
542 zloop_put_cmd(cmd);
543 }
544
545 /*
546 * Sync the entire FS containing the zone files instead of walking all files.
547 */
zloop_flush(struct zloop_device * zlo)548 static int zloop_flush(struct zloop_device *zlo)
549 {
550 struct super_block *sb = file_inode(zlo->data_dir)->i_sb;
551 int ret;
552
553 down_read(&sb->s_umount);
554 ret = sync_filesystem(sb);
555 up_read(&sb->s_umount);
556
557 return ret;
558 }
559
zloop_handle_cmd(struct zloop_cmd * cmd)560 static void zloop_handle_cmd(struct zloop_cmd *cmd)
561 {
562 struct request *rq = blk_mq_rq_from_pdu(cmd);
563 struct zloop_device *zlo = rq->q->queuedata;
564
565 /* We can block in this context, so ignore REQ_NOWAIT. */
566 if (rq->cmd_flags & REQ_NOWAIT)
567 rq->cmd_flags &= ~REQ_NOWAIT;
568
569 switch (req_op(rq)) {
570 case REQ_OP_READ:
571 case REQ_OP_WRITE:
572 case REQ_OP_ZONE_APPEND:
573 /*
574 * zloop_rw() always executes asynchronously or completes
575 * directly.
576 */
577 zloop_rw(cmd);
578 return;
579 case REQ_OP_FLUSH:
580 cmd->ret = zloop_flush(zlo);
581 break;
582 case REQ_OP_ZONE_RESET:
583 cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq));
584 break;
585 case REQ_OP_ZONE_RESET_ALL:
586 cmd->ret = zloop_reset_all_zones(zlo);
587 break;
588 case REQ_OP_ZONE_FINISH:
589 cmd->ret = zloop_finish_zone(zlo, rq_zone_no(rq));
590 break;
591 case REQ_OP_ZONE_OPEN:
592 cmd->ret = zloop_open_zone(zlo, rq_zone_no(rq));
593 break;
594 case REQ_OP_ZONE_CLOSE:
595 cmd->ret = zloop_close_zone(zlo, rq_zone_no(rq));
596 break;
597 default:
598 WARN_ON_ONCE(1);
599 pr_err("Unsupported operation %d\n", req_op(rq));
600 cmd->ret = -EOPNOTSUPP;
601 break;
602 }
603
604 blk_mq_complete_request(rq);
605 }
606
zloop_cmd_workfn(struct work_struct * work)607 static void zloop_cmd_workfn(struct work_struct *work)
608 {
609 struct zloop_cmd *cmd = container_of(work, struct zloop_cmd, work);
610 int orig_flags = current->flags;
611
612 current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
613 zloop_handle_cmd(cmd);
614 current->flags = orig_flags;
615 }
616
zloop_complete_rq(struct request * rq)617 static void zloop_complete_rq(struct request *rq)
618 {
619 struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
620 struct zloop_device *zlo = rq->q->queuedata;
621 unsigned int zone_no = cmd->sector >> zlo->zone_shift;
622 struct zloop_zone *zone = &zlo->zones[zone_no];
623 blk_status_t sts = BLK_STS_OK;
624
625 switch (req_op(rq)) {
626 case REQ_OP_READ:
627 if (cmd->ret < 0)
628 pr_err("Zone %u: failed read sector %llu, %llu sectors\n",
629 zone_no, cmd->sector, cmd->nr_sectors);
630
631 if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) {
632 /* short read */
633 struct bio *bio;
634
635 __rq_for_each_bio(bio, rq)
636 zero_fill_bio(bio);
637 }
638 break;
639 case REQ_OP_WRITE:
640 case REQ_OP_ZONE_APPEND:
641 if (cmd->ret < 0)
642 pr_err("Zone %u: failed %swrite sector %llu, %llu sectors\n",
643 zone_no,
644 req_op(rq) == REQ_OP_WRITE ? "" : "append ",
645 cmd->sector, cmd->nr_sectors);
646
647 if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) {
648 pr_err("Zone %u: partial write %ld/%u B\n",
649 zone_no, cmd->ret, blk_rq_bytes(rq));
650 cmd->ret = -EIO;
651 }
652
653 if (cmd->ret < 0 && !test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
654 /*
655 * A write to a sequential zone file failed: mark the
656 * zone as having an error. This will be corrected and
657 * cleared when the next IO is submitted.
658 */
659 set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
660 break;
661 }
662 if (req_op(rq) == REQ_OP_ZONE_APPEND)
663 rq->__sector = cmd->sector;
664
665 break;
666 default:
667 break;
668 }
669
670 if (cmd->ret < 0)
671 sts = errno_to_blk_status(cmd->ret);
672 blk_mq_end_request(rq, sts);
673 }
674
zloop_set_zone_append_sector(struct request * rq)675 static bool zloop_set_zone_append_sector(struct request *rq)
676 {
677 struct zloop_device *zlo = rq->q->queuedata;
678 unsigned int zone_no = rq_zone_no(rq);
679 struct zloop_zone *zone = &zlo->zones[zone_no];
680 sector_t zone_end = zone->start + zlo->zone_capacity;
681 sector_t nr_sectors = blk_rq_sectors(rq);
682 unsigned long flags;
683
684 spin_lock_irqsave(&zone->wp_lock, flags);
685
686 if (zone->cond == BLK_ZONE_COND_FULL ||
687 zone->wp + nr_sectors > zone_end) {
688 spin_unlock_irqrestore(&zone->wp_lock, flags);
689 return false;
690 }
691
692 rq->__sector = zone->wp;
693 zone->wp += blk_rq_sectors(rq);
694 if (zone->wp >= zone_end) {
695 zone->cond = BLK_ZONE_COND_FULL;
696 zone->wp = ULLONG_MAX;
697 }
698
699 spin_unlock_irqrestore(&zone->wp_lock, flags);
700
701 return true;
702 }
703
zloop_queue_rq(struct blk_mq_hw_ctx * hctx,const struct blk_mq_queue_data * bd)704 static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
705 const struct blk_mq_queue_data *bd)
706 {
707 struct request *rq = bd->rq;
708 struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
709 struct zloop_device *zlo = rq->q->queuedata;
710
711 if (data_race(READ_ONCE(zlo->state)) == Zlo_deleting)
712 return BLK_STS_IOERR;
713
714 /*
715 * If we need to strongly order zone append operations, set the request
716 * sector to the zone write pointer location now instead of when the
717 * command work runs.
718 */
719 if (zlo->ordered_zone_append && req_op(rq) == REQ_OP_ZONE_APPEND) {
720 if (!zloop_set_zone_append_sector(rq))
721 return BLK_STS_IOERR;
722 }
723
724 blk_mq_start_request(rq);
725
726 INIT_WORK(&cmd->work, zloop_cmd_workfn);
727 queue_work(zlo->workqueue, &cmd->work);
728
729 return BLK_STS_OK;
730 }
731
732 static const struct blk_mq_ops zloop_mq_ops = {
733 .queue_rq = zloop_queue_rq,
734 .complete = zloop_complete_rq,
735 };
736
zloop_open(struct gendisk * disk,blk_mode_t mode)737 static int zloop_open(struct gendisk *disk, blk_mode_t mode)
738 {
739 struct zloop_device *zlo = disk->private_data;
740 int ret;
741
742 ret = mutex_lock_killable(&zloop_ctl_mutex);
743 if (ret)
744 return ret;
745
746 if (zlo->state != Zlo_live)
747 ret = -ENXIO;
748 mutex_unlock(&zloop_ctl_mutex);
749 return ret;
750 }
751
zloop_report_zones(struct gendisk * disk,sector_t sector,unsigned int nr_zones,struct blk_report_zones_args * args)752 static int zloop_report_zones(struct gendisk *disk, sector_t sector,
753 unsigned int nr_zones, struct blk_report_zones_args *args)
754 {
755 struct zloop_device *zlo = disk->private_data;
756 struct blk_zone blkz = {};
757 unsigned int first, i;
758 unsigned long flags;
759 int ret;
760
761 first = disk_zone_no(disk, sector);
762 if (first >= zlo->nr_zones)
763 return 0;
764 nr_zones = min(nr_zones, zlo->nr_zones - first);
765
766 for (i = 0; i < nr_zones; i++) {
767 unsigned int zone_no = first + i;
768 struct zloop_zone *zone = &zlo->zones[zone_no];
769
770 mutex_lock(&zone->lock);
771
772 if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
773 ret = zloop_update_seq_zone(zlo, zone_no);
774 if (ret) {
775 mutex_unlock(&zone->lock);
776 return ret;
777 }
778 }
779
780 blkz.start = zone->start;
781 blkz.len = zlo->zone_size;
782 spin_lock_irqsave(&zone->wp_lock, flags);
783 blkz.wp = zone->wp;
784 spin_unlock_irqrestore(&zone->wp_lock, flags);
785 blkz.cond = zone->cond;
786 if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
787 blkz.type = BLK_ZONE_TYPE_CONVENTIONAL;
788 blkz.capacity = zlo->zone_size;
789 } else {
790 blkz.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
791 blkz.capacity = zlo->zone_capacity;
792 }
793
794 mutex_unlock(&zone->lock);
795
796 ret = disk_report_zone(disk, &blkz, i, args);
797 if (ret)
798 return ret;
799 }
800
801 return nr_zones;
802 }
803
zloop_free_disk(struct gendisk * disk)804 static void zloop_free_disk(struct gendisk *disk)
805 {
806 struct zloop_device *zlo = disk->private_data;
807 unsigned int i;
808
809 blk_mq_free_tag_set(&zlo->tag_set);
810
811 for (i = 0; i < zlo->nr_zones; i++) {
812 struct zloop_zone *zone = &zlo->zones[i];
813
814 mapping_set_gfp_mask(zone->file->f_mapping,
815 zone->old_gfp_mask);
816 fput(zone->file);
817 }
818
819 fput(zlo->data_dir);
820 destroy_workqueue(zlo->workqueue);
821 kfree(zlo->base_dir);
822 kvfree(zlo);
823 }
824
825 static const struct block_device_operations zloop_fops = {
826 .owner = THIS_MODULE,
827 .open = zloop_open,
828 .report_zones = zloop_report_zones,
829 .free_disk = zloop_free_disk,
830 };
831
832 __printf(3, 4)
zloop_filp_open_fmt(int oflags,umode_t mode,const char * fmt,...)833 static struct file *zloop_filp_open_fmt(int oflags, umode_t mode,
834 const char *fmt, ...)
835 {
836 struct file *file;
837 va_list ap;
838 char *p;
839
840 va_start(ap, fmt);
841 p = kvasprintf(GFP_KERNEL, fmt, ap);
842 va_end(ap);
843
844 if (!p)
845 return ERR_PTR(-ENOMEM);
846 file = filp_open(p, oflags, mode);
847 kfree(p);
848 return file;
849 }
850
zloop_get_block_size(struct zloop_device * zlo,struct zloop_zone * zone)851 static int zloop_get_block_size(struct zloop_device *zlo,
852 struct zloop_zone *zone)
853 {
854 struct block_device *sb_bdev = zone->file->f_mapping->host->i_sb->s_bdev;
855 struct kstat st;
856
857 /*
858 * If the FS block size is lower than or equal to 4K, use that as the
859 * device block size. Otherwise, fallback to the FS direct IO alignment
860 * constraint if that is provided, and to the FS underlying device
861 * physical block size if the direct IO alignment is unknown.
862 */
863 if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K)
864 zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize;
865 else if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) &&
866 (st.result_mask & STATX_DIOALIGN))
867 zlo->block_size = st.dio_offset_align;
868 else if (sb_bdev)
869 zlo->block_size = bdev_physical_block_size(sb_bdev);
870 else
871 zlo->block_size = SECTOR_SIZE;
872
873 if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
874 pr_err("Zone capacity is not aligned to block size %u\n",
875 zlo->block_size);
876 return -EINVAL;
877 }
878
879 return 0;
880 }
881
zloop_init_zone(struct zloop_device * zlo,struct zloop_options * opts,unsigned int zone_no,bool restore)882 static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts,
883 unsigned int zone_no, bool restore)
884 {
885 struct zloop_zone *zone = &zlo->zones[zone_no];
886 int oflags = O_RDWR;
887 struct kstat stat;
888 sector_t file_sectors;
889 int ret;
890
891 mutex_init(&zone->lock);
892 spin_lock_init(&zone->wp_lock);
893 zone->start = (sector_t)zone_no << zlo->zone_shift;
894
895 if (!restore)
896 oflags |= O_CREAT;
897
898 if (!opts->buffered_io)
899 oflags |= O_DIRECT;
900
901 if (zone_no < zlo->nr_conv_zones) {
902 /* Conventional zone file. */
903 set_bit(ZLOOP_ZONE_CONV, &zone->flags);
904 zone->cond = BLK_ZONE_COND_NOT_WP;
905 zone->wp = U64_MAX;
906
907 zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/cnv-%06u",
908 zlo->base_dir, zlo->id, zone_no);
909 if (IS_ERR(zone->file)) {
910 pr_err("Failed to open zone %u file %s/%u/cnv-%06u (err=%ld)",
911 zone_no, zlo->base_dir, zlo->id, zone_no,
912 PTR_ERR(zone->file));
913 return PTR_ERR(zone->file);
914 }
915
916 if (!zlo->block_size) {
917 ret = zloop_get_block_size(zlo, zone);
918 if (ret)
919 return ret;
920 }
921
922 ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0);
923 if (ret < 0) {
924 pr_err("Failed to get zone %u file stat\n", zone_no);
925 return ret;
926 }
927 file_sectors = stat.size >> SECTOR_SHIFT;
928
929 if (restore && file_sectors != zlo->zone_size) {
930 pr_err("Invalid conventional zone %u file size (%llu sectors != %llu)\n",
931 zone_no, file_sectors, zlo->zone_capacity);
932 return ret;
933 }
934
935 ret = vfs_truncate(&zone->file->f_path,
936 zlo->zone_size << SECTOR_SHIFT);
937 if (ret < 0) {
938 pr_err("Failed to truncate zone %u file (err=%d)\n",
939 zone_no, ret);
940 return ret;
941 }
942
943 return 0;
944 }
945
946 /* Sequential zone file. */
947 zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/seq-%06u",
948 zlo->base_dir, zlo->id, zone_no);
949 if (IS_ERR(zone->file)) {
950 pr_err("Failed to open zone %u file %s/%u/seq-%06u (err=%ld)",
951 zone_no, zlo->base_dir, zlo->id, zone_no,
952 PTR_ERR(zone->file));
953 return PTR_ERR(zone->file);
954 }
955
956 if (!zlo->block_size) {
957 ret = zloop_get_block_size(zlo, zone);
958 if (ret)
959 return ret;
960 }
961
962 zloop_get_block_size(zlo, zone);
963
964 mutex_lock(&zone->lock);
965 ret = zloop_update_seq_zone(zlo, zone_no);
966 mutex_unlock(&zone->lock);
967
968 return ret;
969 }
970
zloop_dev_exists(struct zloop_device * zlo)971 static bool zloop_dev_exists(struct zloop_device *zlo)
972 {
973 struct file *cnv, *seq;
974 bool exists;
975
976 cnv = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/cnv-%06u",
977 zlo->base_dir, zlo->id, 0);
978 seq = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/seq-%06u",
979 zlo->base_dir, zlo->id, 0);
980 exists = !IS_ERR(cnv) || !IS_ERR(seq);
981
982 if (!IS_ERR(cnv))
983 fput(cnv);
984 if (!IS_ERR(seq))
985 fput(seq);
986
987 return exists;
988 }
989
zloop_ctl_add(struct zloop_options * opts)990 static int zloop_ctl_add(struct zloop_options *opts)
991 {
992 struct queue_limits lim = {
993 .max_hw_sectors = SZ_1M >> SECTOR_SHIFT,
994 .chunk_sectors = opts->zone_size,
995 .features = BLK_FEAT_ZONED | BLK_FEAT_WRITE_CACHE,
996
997 };
998 unsigned int nr_zones, i, j;
999 struct zloop_device *zlo;
1000 int ret = -EINVAL;
1001 bool restore;
1002
1003 __module_get(THIS_MODULE);
1004
1005 nr_zones = opts->capacity >> ilog2(opts->zone_size);
1006 if (opts->nr_conv_zones >= nr_zones) {
1007 pr_err("Invalid number of conventional zones %u\n",
1008 opts->nr_conv_zones);
1009 goto out;
1010 }
1011
1012 zlo = kvzalloc_flex(*zlo, zones, nr_zones);
1013 if (!zlo) {
1014 ret = -ENOMEM;
1015 goto out;
1016 }
1017 WRITE_ONCE(zlo->state, Zlo_creating);
1018
1019 ret = mutex_lock_killable(&zloop_ctl_mutex);
1020 if (ret)
1021 goto out_free_dev;
1022
1023 /* Allocate id, if @opts->id >= 0, we're requesting that specific id */
1024 if (opts->id >= 0) {
1025 ret = idr_alloc(&zloop_index_idr, zlo,
1026 opts->id, opts->id + 1, GFP_KERNEL);
1027 if (ret == -ENOSPC)
1028 ret = -EEXIST;
1029 } else {
1030 ret = idr_alloc(&zloop_index_idr, zlo, 0, 0, GFP_KERNEL);
1031 }
1032 mutex_unlock(&zloop_ctl_mutex);
1033 if (ret < 0)
1034 goto out_free_dev;
1035
1036 zlo->id = ret;
1037 zlo->zone_shift = ilog2(opts->zone_size);
1038 zlo->zone_size = opts->zone_size;
1039 if (opts->zone_capacity)
1040 zlo->zone_capacity = opts->zone_capacity;
1041 else
1042 zlo->zone_capacity = zlo->zone_size;
1043 zlo->nr_zones = nr_zones;
1044 zlo->nr_conv_zones = opts->nr_conv_zones;
1045 zlo->buffered_io = opts->buffered_io;
1046 zlo->zone_append = opts->zone_append;
1047 if (zlo->zone_append)
1048 zlo->ordered_zone_append = opts->ordered_zone_append;
1049
1050 zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE,
1051 opts->nr_queues * opts->queue_depth, zlo->id);
1052 if (!zlo->workqueue) {
1053 ret = -ENOMEM;
1054 goto out_free_idr;
1055 }
1056
1057 if (opts->base_dir)
1058 zlo->base_dir = kstrdup(opts->base_dir, GFP_KERNEL);
1059 else
1060 zlo->base_dir = kstrdup(ZLOOP_DEF_BASE_DIR, GFP_KERNEL);
1061 if (!zlo->base_dir) {
1062 ret = -ENOMEM;
1063 goto out_destroy_workqueue;
1064 }
1065
1066 zlo->data_dir = zloop_filp_open_fmt(O_RDONLY | O_DIRECTORY, 0, "%s/%u",
1067 zlo->base_dir, zlo->id);
1068 if (IS_ERR(zlo->data_dir)) {
1069 ret = PTR_ERR(zlo->data_dir);
1070 pr_warn("Failed to open directory %s/%u (err=%d)\n",
1071 zlo->base_dir, zlo->id, ret);
1072 goto out_free_base_dir;
1073 }
1074
1075 /*
1076 * If we already have zone files, we are restoring a device created by a
1077 * previous add operation. In this case, zloop_init_zone() will check
1078 * that the zone files are consistent with the zone configuration given.
1079 */
1080 restore = zloop_dev_exists(zlo);
1081 for (i = 0; i < nr_zones; i++) {
1082 ret = zloop_init_zone(zlo, opts, i, restore);
1083 if (ret)
1084 goto out_close_files;
1085 }
1086
1087 lim.physical_block_size = zlo->block_size;
1088 lim.logical_block_size = zlo->block_size;
1089 if (zlo->zone_append)
1090 lim.max_hw_zone_append_sectors = lim.max_hw_sectors;
1091
1092 zlo->tag_set.ops = &zloop_mq_ops;
1093 zlo->tag_set.nr_hw_queues = opts->nr_queues;
1094 zlo->tag_set.queue_depth = opts->queue_depth;
1095 zlo->tag_set.numa_node = NUMA_NO_NODE;
1096 zlo->tag_set.cmd_size = sizeof(struct zloop_cmd);
1097 zlo->tag_set.driver_data = zlo;
1098
1099 ret = blk_mq_alloc_tag_set(&zlo->tag_set);
1100 if (ret) {
1101 pr_err("blk_mq_alloc_tag_set failed (err=%d)\n", ret);
1102 goto out_close_files;
1103 }
1104
1105 zlo->disk = blk_mq_alloc_disk(&zlo->tag_set, &lim, zlo);
1106 if (IS_ERR(zlo->disk)) {
1107 pr_err("blk_mq_alloc_disk failed (err=%d)\n", ret);
1108 ret = PTR_ERR(zlo->disk);
1109 goto out_cleanup_tags;
1110 }
1111 zlo->disk->flags = GENHD_FL_NO_PART;
1112 zlo->disk->fops = &zloop_fops;
1113 zlo->disk->private_data = zlo;
1114 sprintf(zlo->disk->disk_name, "zloop%d", zlo->id);
1115 set_capacity(zlo->disk, (u64)lim.chunk_sectors * zlo->nr_zones);
1116
1117 ret = blk_revalidate_disk_zones(zlo->disk);
1118 if (ret)
1119 goto out_cleanup_disk;
1120
1121 ret = add_disk(zlo->disk);
1122 if (ret) {
1123 pr_err("add_disk failed (err=%d)\n", ret);
1124 goto out_cleanup_disk;
1125 }
1126
1127 mutex_lock(&zloop_ctl_mutex);
1128 WRITE_ONCE(zlo->state, Zlo_live);
1129 mutex_unlock(&zloop_ctl_mutex);
1130
1131 pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n",
1132 zlo->id, zlo->nr_zones,
1133 ((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20,
1134 zlo->block_size);
1135 pr_info("zloop%d: using %s%s zone append\n",
1136 zlo->id,
1137 zlo->ordered_zone_append ? "ordered " : "",
1138 zlo->zone_append ? "native" : "emulated");
1139
1140 return 0;
1141
1142 out_cleanup_disk:
1143 put_disk(zlo->disk);
1144 out_cleanup_tags:
1145 blk_mq_free_tag_set(&zlo->tag_set);
1146 out_close_files:
1147 for (j = 0; j < i; j++) {
1148 struct zloop_zone *zone = &zlo->zones[j];
1149
1150 if (!IS_ERR_OR_NULL(zone->file))
1151 fput(zone->file);
1152 }
1153 fput(zlo->data_dir);
1154 out_free_base_dir:
1155 kfree(zlo->base_dir);
1156 out_destroy_workqueue:
1157 destroy_workqueue(zlo->workqueue);
1158 out_free_idr:
1159 mutex_lock(&zloop_ctl_mutex);
1160 idr_remove(&zloop_index_idr, zlo->id);
1161 mutex_unlock(&zloop_ctl_mutex);
1162 out_free_dev:
1163 kvfree(zlo);
1164 out:
1165 module_put(THIS_MODULE);
1166 if (ret == -ENOENT)
1167 ret = -EINVAL;
1168 return ret;
1169 }
1170
zloop_ctl_remove(struct zloop_options * opts)1171 static int zloop_ctl_remove(struct zloop_options *opts)
1172 {
1173 struct zloop_device *zlo;
1174 int ret;
1175
1176 if (!(opts->mask & ZLOOP_OPT_ID)) {
1177 pr_err("No ID specified for remove\n");
1178 return -EINVAL;
1179 }
1180
1181 if (opts->mask & ~ZLOOP_OPT_ID) {
1182 pr_err("Invalid option specified for remove\n");
1183 return -EINVAL;
1184 }
1185
1186 ret = mutex_lock_killable(&zloop_ctl_mutex);
1187 if (ret)
1188 return ret;
1189
1190 zlo = idr_find(&zloop_index_idr, opts->id);
1191 if (!zlo || zlo->state == Zlo_creating) {
1192 ret = -ENODEV;
1193 } else if (zlo->state == Zlo_deleting) {
1194 ret = -EINVAL;
1195 } else {
1196 idr_remove(&zloop_index_idr, zlo->id);
1197 WRITE_ONCE(zlo->state, Zlo_deleting);
1198 }
1199
1200 mutex_unlock(&zloop_ctl_mutex);
1201 if (ret)
1202 return ret;
1203
1204 del_gendisk(zlo->disk);
1205 put_disk(zlo->disk);
1206
1207 pr_info("Removed device %d\n", opts->id);
1208
1209 module_put(THIS_MODULE);
1210
1211 return 0;
1212 }
1213
zloop_parse_options(struct zloop_options * opts,const char * buf)1214 static int zloop_parse_options(struct zloop_options *opts, const char *buf)
1215 {
1216 substring_t args[MAX_OPT_ARGS];
1217 char *options, *o, *p;
1218 unsigned int token;
1219 int ret = 0;
1220
1221 /* Set defaults. */
1222 opts->mask = 0;
1223 opts->id = ZLOOP_DEF_ID;
1224 opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES;
1225 opts->zone_size = ZLOOP_DEF_ZONE_SIZE;
1226 opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES;
1227 opts->nr_queues = ZLOOP_DEF_NR_QUEUES;
1228 opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH;
1229 opts->buffered_io = ZLOOP_DEF_BUFFERED_IO;
1230 opts->zone_append = ZLOOP_DEF_ZONE_APPEND;
1231 opts->ordered_zone_append = ZLOOP_DEF_ORDERED_ZONE_APPEND;
1232
1233 if (!buf)
1234 return 0;
1235
1236 /* Skip leading spaces before the options. */
1237 while (isspace(*buf))
1238 buf++;
1239
1240 options = o = kstrdup(buf, GFP_KERNEL);
1241 if (!options)
1242 return -ENOMEM;
1243
1244 /* Parse the options, doing only some light invalid value checks. */
1245 while ((p = strsep(&o, ",\n")) != NULL) {
1246 if (!*p)
1247 continue;
1248
1249 token = match_token(p, zloop_opt_tokens, args);
1250 opts->mask |= token;
1251 switch (token) {
1252 case ZLOOP_OPT_ID:
1253 if (match_int(args, &opts->id)) {
1254 ret = -EINVAL;
1255 goto out;
1256 }
1257 break;
1258 case ZLOOP_OPT_CAPACITY:
1259 if (match_uint(args, &token)) {
1260 ret = -EINVAL;
1261 goto out;
1262 }
1263 if (!token) {
1264 pr_err("Invalid capacity\n");
1265 ret = -EINVAL;
1266 goto out;
1267 }
1268 opts->capacity =
1269 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
1270 break;
1271 case ZLOOP_OPT_ZONE_SIZE:
1272 if (match_uint(args, &token)) {
1273 ret = -EINVAL;
1274 goto out;
1275 }
1276 if (!token || token > ZLOOP_MAX_ZONE_SIZE_MB ||
1277 !is_power_of_2(token)) {
1278 pr_err("Invalid zone size %u\n", token);
1279 ret = -EINVAL;
1280 goto out;
1281 }
1282 opts->zone_size =
1283 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
1284 break;
1285 case ZLOOP_OPT_ZONE_CAPACITY:
1286 if (match_uint(args, &token)) {
1287 ret = -EINVAL;
1288 goto out;
1289 }
1290 if (!token) {
1291 pr_err("Invalid zone capacity\n");
1292 ret = -EINVAL;
1293 goto out;
1294 }
1295 opts->zone_capacity =
1296 ((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
1297 break;
1298 case ZLOOP_OPT_NR_CONV_ZONES:
1299 if (match_uint(args, &token)) {
1300 ret = -EINVAL;
1301 goto out;
1302 }
1303 opts->nr_conv_zones = token;
1304 break;
1305 case ZLOOP_OPT_BASE_DIR:
1306 p = match_strdup(args);
1307 if (!p) {
1308 ret = -ENOMEM;
1309 goto out;
1310 }
1311 kfree(opts->base_dir);
1312 opts->base_dir = p;
1313 break;
1314 case ZLOOP_OPT_NR_QUEUES:
1315 if (match_uint(args, &token)) {
1316 ret = -EINVAL;
1317 goto out;
1318 }
1319 if (!token) {
1320 pr_err("Invalid number of queues\n");
1321 ret = -EINVAL;
1322 goto out;
1323 }
1324 opts->nr_queues = min(token, num_online_cpus());
1325 break;
1326 case ZLOOP_OPT_QUEUE_DEPTH:
1327 if (match_uint(args, &token)) {
1328 ret = -EINVAL;
1329 goto out;
1330 }
1331 if (!token) {
1332 pr_err("Invalid queue depth\n");
1333 ret = -EINVAL;
1334 goto out;
1335 }
1336 opts->queue_depth = token;
1337 break;
1338 case ZLOOP_OPT_BUFFERED_IO:
1339 opts->buffered_io = true;
1340 break;
1341 case ZLOOP_OPT_ZONE_APPEND:
1342 if (match_uint(args, &token)) {
1343 ret = -EINVAL;
1344 goto out;
1345 }
1346 if (token != 0 && token != 1) {
1347 pr_err("Invalid zone_append value\n");
1348 ret = -EINVAL;
1349 goto out;
1350 }
1351 opts->zone_append = token;
1352 break;
1353 case ZLOOP_OPT_ORDERED_ZONE_APPEND:
1354 opts->ordered_zone_append = true;
1355 break;
1356 case ZLOOP_OPT_ERR:
1357 default:
1358 pr_warn("unknown parameter or missing value '%s'\n", p);
1359 ret = -EINVAL;
1360 goto out;
1361 }
1362 }
1363
1364 ret = -EINVAL;
1365 if (opts->capacity <= opts->zone_size) {
1366 pr_err("Invalid capacity\n");
1367 goto out;
1368 }
1369
1370 if (opts->zone_capacity > opts->zone_size) {
1371 pr_err("Invalid zone capacity\n");
1372 goto out;
1373 }
1374
1375 ret = 0;
1376 out:
1377 kfree(options);
1378 return ret;
1379 }
1380
1381 enum {
1382 ZLOOP_CTL_ADD,
1383 ZLOOP_CTL_REMOVE,
1384 };
1385
1386 static struct zloop_ctl_op {
1387 int code;
1388 const char *name;
1389 } zloop_ctl_ops[] = {
1390 { ZLOOP_CTL_ADD, "add" },
1391 { ZLOOP_CTL_REMOVE, "remove" },
1392 { -1, NULL },
1393 };
1394
zloop_ctl_write(struct file * file,const char __user * ubuf,size_t count,loff_t * pos)1395 static ssize_t zloop_ctl_write(struct file *file, const char __user *ubuf,
1396 size_t count, loff_t *pos)
1397 {
1398 struct zloop_options opts = { };
1399 struct zloop_ctl_op *op;
1400 const char *buf, *opts_buf;
1401 int i, ret;
1402
1403 if (count > PAGE_SIZE)
1404 return -ENOMEM;
1405
1406 buf = memdup_user_nul(ubuf, count);
1407 if (IS_ERR(buf))
1408 return PTR_ERR(buf);
1409
1410 for (i = 0; i < ARRAY_SIZE(zloop_ctl_ops); i++) {
1411 op = &zloop_ctl_ops[i];
1412 if (!op->name) {
1413 pr_err("Invalid operation\n");
1414 ret = -EINVAL;
1415 goto out;
1416 }
1417 if (!strncmp(buf, op->name, strlen(op->name)))
1418 break;
1419 }
1420
1421 if (count <= strlen(op->name))
1422 opts_buf = NULL;
1423 else
1424 opts_buf = buf + strlen(op->name);
1425
1426 ret = zloop_parse_options(&opts, opts_buf);
1427 if (ret) {
1428 pr_err("Failed to parse options\n");
1429 goto out;
1430 }
1431
1432 switch (op->code) {
1433 case ZLOOP_CTL_ADD:
1434 ret = zloop_ctl_add(&opts);
1435 break;
1436 case ZLOOP_CTL_REMOVE:
1437 ret = zloop_ctl_remove(&opts);
1438 break;
1439 default:
1440 pr_err("Invalid operation\n");
1441 ret = -EINVAL;
1442 goto out;
1443 }
1444
1445 out:
1446 kfree(opts.base_dir);
1447 kfree(buf);
1448 return ret ? ret : count;
1449 }
1450
zloop_ctl_show(struct seq_file * seq_file,void * private)1451 static int zloop_ctl_show(struct seq_file *seq_file, void *private)
1452 {
1453 const struct match_token *tok;
1454 int i;
1455
1456 /* Add operation */
1457 seq_printf(seq_file, "%s ", zloop_ctl_ops[0].name);
1458 for (i = 0; i < ARRAY_SIZE(zloop_opt_tokens); i++) {
1459 tok = &zloop_opt_tokens[i];
1460 if (!tok->pattern)
1461 break;
1462 if (i)
1463 seq_putc(seq_file, ',');
1464 seq_puts(seq_file, tok->pattern);
1465 }
1466 seq_putc(seq_file, '\n');
1467
1468 /* Remove operation */
1469 seq_puts(seq_file, zloop_ctl_ops[1].name);
1470 seq_puts(seq_file, " id=%d\n");
1471
1472 return 0;
1473 }
1474
zloop_ctl_open(struct inode * inode,struct file * file)1475 static int zloop_ctl_open(struct inode *inode, struct file *file)
1476 {
1477 file->private_data = NULL;
1478 return single_open(file, zloop_ctl_show, NULL);
1479 }
1480
zloop_ctl_release(struct inode * inode,struct file * file)1481 static int zloop_ctl_release(struct inode *inode, struct file *file)
1482 {
1483 return single_release(inode, file);
1484 }
1485
1486 static const struct file_operations zloop_ctl_fops = {
1487 .owner = THIS_MODULE,
1488 .open = zloop_ctl_open,
1489 .release = zloop_ctl_release,
1490 .write = zloop_ctl_write,
1491 .read = seq_read,
1492 };
1493
1494 static struct miscdevice zloop_misc = {
1495 .minor = MISC_DYNAMIC_MINOR,
1496 .name = "zloop-control",
1497 .fops = &zloop_ctl_fops,
1498 };
1499
zloop_init(void)1500 static int __init zloop_init(void)
1501 {
1502 int ret;
1503
1504 ret = misc_register(&zloop_misc);
1505 if (ret) {
1506 pr_err("Failed to register misc device: %d\n", ret);
1507 return ret;
1508 }
1509 pr_info("Module loaded\n");
1510
1511 return 0;
1512 }
1513
zloop_exit(void)1514 static void __exit zloop_exit(void)
1515 {
1516 misc_deregister(&zloop_misc);
1517 idr_destroy(&zloop_index_idr);
1518 }
1519
1520 module_init(zloop_init);
1521 module_exit(zloop_exit);
1522
1523 MODULE_DESCRIPTION("Zoned loopback device");
1524 MODULE_LICENSE("GPL");
1525