1 /* SPDX-License-Identifier: MIT */
2 /*
3 * Description: uring_cmd based ublk
4 */
5
6 #include <linux/fs.h>
7 #include "kublk.h"
8
9 #define MAX_NR_TGT_ARG 64
10
11 unsigned int ublk_dbg_mask = UBLK_LOG;
12 static const struct ublk_tgt_ops *tgt_ops_list[] = {
13 &null_tgt_ops,
14 &loop_tgt_ops,
15 &stripe_tgt_ops,
16 &fault_inject_tgt_ops,
17 };
18
ublk_find_tgt(const char * name)19 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
20 {
21 int i;
22
23 if (name == NULL)
24 return NULL;
25
26 for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++)
27 if (strcmp(tgt_ops_list[i]->name, name) == 0)
28 return tgt_ops_list[i];
29 return NULL;
30 }
31
ublk_setup_ring(struct io_uring * r,int depth,int cq_depth,unsigned flags)32 static inline int ublk_setup_ring(struct io_uring *r, int depth,
33 int cq_depth, unsigned flags)
34 {
35 struct io_uring_params p;
36
37 memset(&p, 0, sizeof(p));
38 p.flags = flags | IORING_SETUP_CQSIZE;
39 p.cq_entries = cq_depth;
40
41 return io_uring_queue_init_params(depth, r, &p);
42 }
43
ublk_ctrl_init_cmd(struct ublk_dev * dev,struct io_uring_sqe * sqe,struct ublk_ctrl_cmd_data * data)44 static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
45 struct io_uring_sqe *sqe,
46 struct ublk_ctrl_cmd_data *data)
47 {
48 struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
49 struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
50
51 sqe->fd = dev->ctrl_fd;
52 sqe->opcode = IORING_OP_URING_CMD;
53 sqe->ioprio = 0;
54
55 if (data->flags & CTRL_CMD_HAS_BUF) {
56 cmd->addr = data->addr;
57 cmd->len = data->len;
58 }
59
60 if (data->flags & CTRL_CMD_HAS_DATA)
61 cmd->data[0] = data->data[0];
62
63 cmd->dev_id = info->dev_id;
64 cmd->queue_id = -1;
65
66 ublk_set_sqe_cmd_op(sqe, data->cmd_op);
67
68 io_uring_sqe_set_data(sqe, cmd);
69 }
70
__ublk_ctrl_cmd(struct ublk_dev * dev,struct ublk_ctrl_cmd_data * data)71 static int __ublk_ctrl_cmd(struct ublk_dev *dev,
72 struct ublk_ctrl_cmd_data *data)
73 {
74 struct io_uring_sqe *sqe;
75 struct io_uring_cqe *cqe;
76 int ret = -EINVAL;
77
78 sqe = io_uring_get_sqe(&dev->ring);
79 if (!sqe) {
80 ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
81 return ret;
82 }
83
84 ublk_ctrl_init_cmd(dev, sqe, data);
85
86 ret = io_uring_submit(&dev->ring);
87 if (ret < 0) {
88 ublk_err("uring submit ret %d\n", ret);
89 return ret;
90 }
91
92 ret = io_uring_wait_cqe(&dev->ring, &cqe);
93 if (ret < 0) {
94 ublk_err("wait cqe: %s\n", strerror(-ret));
95 return ret;
96 }
97 io_uring_cqe_seen(&dev->ring, cqe);
98
99 return cqe->res;
100 }
101
ublk_ctrl_stop_dev(struct ublk_dev * dev)102 static int ublk_ctrl_stop_dev(struct ublk_dev *dev)
103 {
104 struct ublk_ctrl_cmd_data data = {
105 .cmd_op = UBLK_U_CMD_STOP_DEV,
106 };
107
108 return __ublk_ctrl_cmd(dev, &data);
109 }
110
ublk_ctrl_try_stop_dev(struct ublk_dev * dev)111 static int ublk_ctrl_try_stop_dev(struct ublk_dev *dev)
112 {
113 struct ublk_ctrl_cmd_data data = {
114 .cmd_op = UBLK_U_CMD_TRY_STOP_DEV,
115 };
116
117 return __ublk_ctrl_cmd(dev, &data);
118 }
119
ublk_ctrl_start_dev(struct ublk_dev * dev,int daemon_pid)120 static int ublk_ctrl_start_dev(struct ublk_dev *dev,
121 int daemon_pid)
122 {
123 struct ublk_ctrl_cmd_data data = {
124 .cmd_op = UBLK_U_CMD_START_DEV,
125 .flags = CTRL_CMD_HAS_DATA,
126 };
127
128 dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
129
130 return __ublk_ctrl_cmd(dev, &data);
131 }
132
ublk_ctrl_start_user_recovery(struct ublk_dev * dev)133 static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev)
134 {
135 struct ublk_ctrl_cmd_data data = {
136 .cmd_op = UBLK_U_CMD_START_USER_RECOVERY,
137 };
138
139 return __ublk_ctrl_cmd(dev, &data);
140 }
141
ublk_ctrl_end_user_recovery(struct ublk_dev * dev,int daemon_pid)142 static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid)
143 {
144 struct ublk_ctrl_cmd_data data = {
145 .cmd_op = UBLK_U_CMD_END_USER_RECOVERY,
146 .flags = CTRL_CMD_HAS_DATA,
147 };
148
149 dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
150
151 return __ublk_ctrl_cmd(dev, &data);
152 }
153
ublk_ctrl_add_dev(struct ublk_dev * dev)154 static int ublk_ctrl_add_dev(struct ublk_dev *dev)
155 {
156 struct ublk_ctrl_cmd_data data = {
157 .cmd_op = UBLK_U_CMD_ADD_DEV,
158 .flags = CTRL_CMD_HAS_BUF,
159 .addr = (__u64) (uintptr_t) &dev->dev_info,
160 .len = sizeof(struct ublksrv_ctrl_dev_info),
161 };
162
163 return __ublk_ctrl_cmd(dev, &data);
164 }
165
ublk_ctrl_del_dev(struct ublk_dev * dev)166 static int ublk_ctrl_del_dev(struct ublk_dev *dev)
167 {
168 struct ublk_ctrl_cmd_data data = {
169 .cmd_op = UBLK_U_CMD_DEL_DEV,
170 .flags = 0,
171 };
172
173 return __ublk_ctrl_cmd(dev, &data);
174 }
175
ublk_ctrl_get_info(struct ublk_dev * dev)176 static int ublk_ctrl_get_info(struct ublk_dev *dev)
177 {
178 struct ublk_ctrl_cmd_data data = {
179 .cmd_op = UBLK_U_CMD_GET_DEV_INFO,
180 .flags = CTRL_CMD_HAS_BUF,
181 .addr = (__u64) (uintptr_t) &dev->dev_info,
182 .len = sizeof(struct ublksrv_ctrl_dev_info),
183 };
184
185 return __ublk_ctrl_cmd(dev, &data);
186 }
187
ublk_ctrl_set_params(struct ublk_dev * dev,struct ublk_params * params)188 static int ublk_ctrl_set_params(struct ublk_dev *dev,
189 struct ublk_params *params)
190 {
191 struct ublk_ctrl_cmd_data data = {
192 .cmd_op = UBLK_U_CMD_SET_PARAMS,
193 .flags = CTRL_CMD_HAS_BUF,
194 .addr = (__u64) (uintptr_t) params,
195 .len = sizeof(*params),
196 };
197 params->len = sizeof(*params);
198 return __ublk_ctrl_cmd(dev, &data);
199 }
200
ublk_ctrl_get_params(struct ublk_dev * dev,struct ublk_params * params)201 static int ublk_ctrl_get_params(struct ublk_dev *dev,
202 struct ublk_params *params)
203 {
204 struct ublk_ctrl_cmd_data data = {
205 .cmd_op = UBLK_U_CMD_GET_PARAMS,
206 .flags = CTRL_CMD_HAS_BUF,
207 .addr = (__u64)params,
208 .len = sizeof(*params),
209 };
210
211 params->len = sizeof(*params);
212
213 return __ublk_ctrl_cmd(dev, &data);
214 }
215
ublk_ctrl_get_features(struct ublk_dev * dev,__u64 * features)216 static int ublk_ctrl_get_features(struct ublk_dev *dev,
217 __u64 *features)
218 {
219 struct ublk_ctrl_cmd_data data = {
220 .cmd_op = UBLK_U_CMD_GET_FEATURES,
221 .flags = CTRL_CMD_HAS_BUF,
222 .addr = (__u64) (uintptr_t) features,
223 .len = sizeof(*features),
224 };
225
226 return __ublk_ctrl_cmd(dev, &data);
227 }
228
ublk_ctrl_update_size(struct ublk_dev * dev,__u64 nr_sects)229 static int ublk_ctrl_update_size(struct ublk_dev *dev,
230 __u64 nr_sects)
231 {
232 struct ublk_ctrl_cmd_data data = {
233 .cmd_op = UBLK_U_CMD_UPDATE_SIZE,
234 .flags = CTRL_CMD_HAS_DATA,
235 };
236
237 data.data[0] = nr_sects;
238 return __ublk_ctrl_cmd(dev, &data);
239 }
240
ublk_ctrl_quiesce_dev(struct ublk_dev * dev,unsigned int timeout_ms)241 static int ublk_ctrl_quiesce_dev(struct ublk_dev *dev,
242 unsigned int timeout_ms)
243 {
244 struct ublk_ctrl_cmd_data data = {
245 .cmd_op = UBLK_U_CMD_QUIESCE_DEV,
246 .flags = CTRL_CMD_HAS_DATA,
247 };
248
249 data.data[0] = timeout_ms;
250 return __ublk_ctrl_cmd(dev, &data);
251 }
252
ublk_dev_state_desc(struct ublk_dev * dev)253 static const char *ublk_dev_state_desc(struct ublk_dev *dev)
254 {
255 switch (dev->dev_info.state) {
256 case UBLK_S_DEV_DEAD:
257 return "DEAD";
258 case UBLK_S_DEV_LIVE:
259 return "LIVE";
260 case UBLK_S_DEV_QUIESCED:
261 return "QUIESCED";
262 default:
263 return "UNKNOWN";
264 };
265 }
266
ublk_print_cpu_set(const cpu_set_t * set,char * buf,unsigned len)267 static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len)
268 {
269 unsigned done = 0;
270 int i;
271
272 for (i = 0; i < CPU_SETSIZE; i++) {
273 if (CPU_ISSET(i, set))
274 done += snprintf(&buf[done], len - done, "%d ", i);
275 }
276 }
277
ublk_adjust_affinity(cpu_set_t * set)278 static void ublk_adjust_affinity(cpu_set_t *set)
279 {
280 int j, updated = 0;
281
282 /*
283 * Just keep the 1st CPU now.
284 *
285 * In future, auto affinity selection can be tried.
286 */
287 for (j = 0; j < CPU_SETSIZE; j++) {
288 if (CPU_ISSET(j, set)) {
289 if (!updated) {
290 updated = 1;
291 continue;
292 }
293 CPU_CLR(j, set);
294 }
295 }
296 }
297
298 /* Caller must free the allocated buffer */
ublk_ctrl_get_affinity(struct ublk_dev * ctrl_dev,cpu_set_t ** ptr_buf)299 static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf)
300 {
301 struct ublk_ctrl_cmd_data data = {
302 .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY,
303 .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF,
304 };
305 cpu_set_t *buf;
306 int i, ret;
307
308 buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues);
309 if (!buf)
310 return -ENOMEM;
311
312 for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) {
313 data.data[0] = i;
314 data.len = sizeof(cpu_set_t);
315 data.addr = (__u64)&buf[i];
316
317 ret = __ublk_ctrl_cmd(ctrl_dev, &data);
318 if (ret < 0) {
319 free(buf);
320 return ret;
321 }
322 ublk_adjust_affinity(&buf[i]);
323 }
324
325 *ptr_buf = buf;
326 return 0;
327 }
328
ublk_ctrl_dump(struct ublk_dev * dev)329 static void ublk_ctrl_dump(struct ublk_dev *dev)
330 {
331 struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
332 struct ublk_params p;
333 cpu_set_t *affinity;
334 int ret;
335
336 ret = ublk_ctrl_get_params(dev, &p);
337 if (ret < 0) {
338 ublk_err("failed to get params %d %s\n", ret, strerror(-ret));
339 return;
340 }
341
342 ret = ublk_ctrl_get_affinity(dev, &affinity);
343 if (ret < 0) {
344 ublk_err("failed to get affinity %m\n");
345 return;
346 }
347
348 ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
349 info->dev_id, info->nr_hw_queues, info->queue_depth,
350 1 << p.basic.logical_bs_shift, p.basic.dev_sectors);
351 ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n",
352 info->max_io_buf_bytes, info->ublksrv_pid, info->flags,
353 ublk_dev_state_desc(dev));
354
355 if (affinity) {
356 char buf[512];
357 int i;
358
359 for (i = 0; i < info->nr_hw_queues; i++) {
360 ublk_print_cpu_set(&affinity[i], buf, sizeof(buf));
361 printf("\tqueue %u: affinity(%s)\n",
362 i, buf);
363 }
364 free(affinity);
365 }
366
367 fflush(stdout);
368 }
369
ublk_ctrl_deinit(struct ublk_dev * dev)370 static void ublk_ctrl_deinit(struct ublk_dev *dev)
371 {
372 close(dev->ctrl_fd);
373 free(dev);
374 }
375
ublk_ctrl_init(void)376 static struct ublk_dev *ublk_ctrl_init(void)
377 {
378 struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
379 struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
380 int ret;
381
382 dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
383 if (dev->ctrl_fd < 0) {
384 free(dev);
385 return NULL;
386 }
387
388 info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
389
390 ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
391 UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
392 if (ret < 0) {
393 ublk_err("queue_init: %s\n", strerror(-ret));
394 free(dev);
395 return NULL;
396 }
397 dev->nr_fds = 1;
398
399 return dev;
400 }
401
__ublk_queue_cmd_buf_sz(unsigned depth)402 static int __ublk_queue_cmd_buf_sz(unsigned depth)
403 {
404 int size = depth * sizeof(struct ublksrv_io_desc);
405 unsigned int page_sz = getpagesize();
406
407 return round_up(size, page_sz);
408 }
409
ublk_queue_max_cmd_buf_sz(void)410 static int ublk_queue_max_cmd_buf_sz(void)
411 {
412 return __ublk_queue_cmd_buf_sz(UBLK_MAX_QUEUE_DEPTH);
413 }
414
ublk_queue_cmd_buf_sz(struct ublk_queue * q)415 static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
416 {
417 return __ublk_queue_cmd_buf_sz(q->q_depth);
418 }
419
ublk_queue_deinit(struct ublk_queue * q)420 static void ublk_queue_deinit(struct ublk_queue *q)
421 {
422 int i;
423 int nr_ios = q->q_depth;
424
425 if (q->io_cmd_buf)
426 munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
427
428 for (i = 0; i < nr_ios; i++) {
429 free(q->ios[i].buf_addr);
430 free(q->ios[i].integrity_buf);
431 }
432 }
433
ublk_thread_deinit(struct ublk_thread * t)434 static void ublk_thread_deinit(struct ublk_thread *t)
435 {
436 io_uring_unregister_buffers(&t->ring);
437
438 ublk_batch_free_buf(t);
439
440 io_uring_unregister_ring_fd(&t->ring);
441
442 if (t->ring.ring_fd > 0) {
443 io_uring_unregister_files(&t->ring);
444 close(t->ring.ring_fd);
445 t->ring.ring_fd = -1;
446 }
447 }
448
ublk_queue_init(struct ublk_queue * q,unsigned long long extra_flags,__u8 metadata_size)449 static int ublk_queue_init(struct ublk_queue *q, unsigned long long extra_flags,
450 __u8 metadata_size)
451 {
452 struct ublk_dev *dev = q->dev;
453 int depth = dev->dev_info.queue_depth;
454 int i;
455 int cmd_buf_size, io_buf_size, integrity_size;
456 unsigned long off;
457
458 pthread_spin_init(&q->lock, PTHREAD_PROCESS_PRIVATE);
459 q->tgt_ops = dev->tgt.ops;
460 q->flags = 0;
461 q->q_depth = depth;
462 q->flags = dev->dev_info.flags;
463 q->flags |= extra_flags;
464 q->metadata_size = metadata_size;
465
466 /* Cache fd in queue for fast path access */
467 q->ublk_fd = dev->fds[0];
468
469 cmd_buf_size = ublk_queue_cmd_buf_sz(q);
470 off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
471 q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ,
472 MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
473 if (q->io_cmd_buf == MAP_FAILED) {
474 ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
475 q->dev->dev_info.dev_id, q->q_id);
476 goto fail;
477 }
478
479 io_buf_size = dev->dev_info.max_io_buf_bytes;
480 integrity_size = ublk_integrity_len(q, io_buf_size);
481 for (i = 0; i < q->q_depth; i++) {
482 q->ios[i].buf_addr = NULL;
483 q->ios[i].flags = UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_FREE;
484 q->ios[i].tag = i;
485
486 if (integrity_size) {
487 q->ios[i].integrity_buf = malloc(integrity_size);
488 if (!q->ios[i].integrity_buf) {
489 ublk_err("ublk dev %d queue %d io %d malloc(%d) failed: %m\n",
490 dev->dev_info.dev_id, q->q_id, i,
491 integrity_size);
492 goto fail;
493 }
494 }
495
496
497 if (ublk_queue_no_buf(q))
498 continue;
499
500 if (posix_memalign((void **)&q->ios[i].buf_addr,
501 getpagesize(), io_buf_size)) {
502 ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
503 dev->dev_info.dev_id, q->q_id, i);
504 goto fail;
505 }
506 }
507
508 return 0;
509 fail:
510 ublk_queue_deinit(q);
511 ublk_err("ublk dev %d queue %d failed\n",
512 dev->dev_info.dev_id, q->q_id);
513 return -ENOMEM;
514 }
515
ublk_thread_init(struct ublk_thread * t,unsigned long long extra_flags)516 static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flags)
517 {
518 struct ublk_dev *dev = t->dev;
519 unsigned long long flags = dev->dev_info.flags | extra_flags;
520 int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth;
521 int ret;
522
523 /* FETCH_IO_CMDS is multishot, so increase cq depth for BATCH_IO */
524 if (ublk_dev_batch_io(dev))
525 cq_depth += dev->dev_info.queue_depth * 2;
526
527 ret = ublk_setup_ring(&t->ring, ring_depth, cq_depth,
528 IORING_SETUP_COOP_TASKRUN |
529 IORING_SETUP_SINGLE_ISSUER |
530 IORING_SETUP_DEFER_TASKRUN);
531 if (ret < 0) {
532 ublk_err("ublk dev %d thread %d setup io_uring failed %d\n",
533 dev->dev_info.dev_id, t->idx, ret);
534 goto fail;
535 }
536
537 if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
538 unsigned nr_ios = dev->dev_info.queue_depth * dev->dev_info.nr_hw_queues;
539 unsigned max_nr_ios_per_thread = nr_ios / dev->nthreads;
540 max_nr_ios_per_thread += !!(nr_ios % dev->nthreads);
541
542 t->nr_bufs = max_nr_ios_per_thread;
543 } else {
544 t->nr_bufs = 0;
545 }
546
547 if (ublk_dev_batch_io(dev))
548 ublk_batch_prepare(t);
549
550 if (t->nr_bufs) {
551 ret = io_uring_register_buffers_sparse(&t->ring, t->nr_bufs);
552 if (ret) {
553 ublk_err("ublk dev %d thread %d register spare buffers failed %d\n",
554 dev->dev_info.dev_id, t->idx, ret);
555 goto fail;
556 }
557 }
558
559 if (ublk_dev_batch_io(dev)) {
560 ret = ublk_batch_alloc_buf(t);
561 if (ret) {
562 ublk_err("ublk dev %d thread %d alloc batch buf failed %d\n",
563 dev->dev_info.dev_id, t->idx, ret);
564 goto fail;
565 }
566 }
567
568 io_uring_register_ring_fd(&t->ring);
569
570 if (flags & UBLKS_Q_NO_UBLK_FIXED_FD) {
571 /* Register only backing files starting from index 1, exclude ublk control device */
572 if (dev->nr_fds > 1) {
573 ret = io_uring_register_files(&t->ring, &dev->fds[1], dev->nr_fds - 1);
574 } else {
575 /* No backing files to register, skip file registration */
576 ret = 0;
577 }
578 } else {
579 ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds);
580 }
581 if (ret) {
582 ublk_err("ublk dev %d thread %d register files failed %d\n",
583 t->dev->dev_info.dev_id, t->idx, ret);
584 goto fail;
585 }
586
587 return 0;
588 fail:
589 ublk_thread_deinit(t);
590 ublk_err("ublk dev %d thread %d init failed\n",
591 dev->dev_info.dev_id, t->idx);
592 return -ENOMEM;
593 }
594
595 #define WAIT_USEC 100000
596 #define MAX_WAIT_USEC (3 * 1000000)
ublk_dev_prep(const struct dev_ctx * ctx,struct ublk_dev * dev)597 static int ublk_dev_prep(const struct dev_ctx *ctx, struct ublk_dev *dev)
598 {
599 int dev_id = dev->dev_info.dev_id;
600 unsigned int wait_usec = 0;
601 int ret = 0, fd = -1;
602 char buf[64];
603
604 snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
605
606 while (wait_usec < MAX_WAIT_USEC) {
607 fd = open(buf, O_RDWR);
608 if (fd >= 0)
609 break;
610 usleep(WAIT_USEC);
611 wait_usec += WAIT_USEC;
612 }
613 if (fd < 0) {
614 ublk_err("can't open %s %s\n", buf, strerror(errno));
615 return -1;
616 }
617
618 dev->fds[0] = fd;
619 if (dev->tgt.ops->init_tgt)
620 ret = dev->tgt.ops->init_tgt(ctx, dev);
621 if (ret)
622 close(dev->fds[0]);
623 return ret;
624 }
625
ublk_dev_unprep(struct ublk_dev * dev)626 static void ublk_dev_unprep(struct ublk_dev *dev)
627 {
628 if (dev->tgt.ops->deinit_tgt)
629 dev->tgt.ops->deinit_tgt(dev);
630 close(dev->fds[0]);
631 }
632
ublk_set_auto_buf_reg(const struct ublk_thread * t,const struct ublk_queue * q,struct io_uring_sqe * sqe,unsigned short tag)633 static void ublk_set_auto_buf_reg(const struct ublk_thread *t,
634 const struct ublk_queue *q,
635 struct io_uring_sqe *sqe,
636 unsigned short tag)
637 {
638 struct ublk_auto_buf_reg buf = {};
639
640 if (q->tgt_ops->buf_index)
641 buf.index = q->tgt_ops->buf_index(t, q, tag);
642 else
643 buf.index = ublk_io_buf_idx(t, q, tag);
644
645 if (ublk_queue_auto_zc_fallback(q))
646 buf.flags = UBLK_AUTO_BUF_REG_FALLBACK;
647
648 sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf);
649 }
650
651 /* Copy in pieces to test the buffer offset logic */
652 #define UBLK_USER_COPY_LEN 2048
653
ublk_user_copy(const struct ublk_io * io,__u8 match_ublk_op)654 static void ublk_user_copy(const struct ublk_io *io, __u8 match_ublk_op)
655 {
656 const struct ublk_queue *q = ublk_io_to_queue(io);
657 const struct ublksrv_io_desc *iod = ublk_get_iod(q, io->tag);
658 __u64 off = ublk_user_copy_offset(q->q_id, io->tag);
659 __u8 ublk_op = ublksrv_get_op(iod);
660 __u32 len = iod->nr_sectors << 9;
661 void *addr = io->buf_addr;
662 ssize_t copied;
663
664 if (ublk_op != match_ublk_op)
665 return;
666
667 while (len) {
668 __u32 copy_len = min(len, UBLK_USER_COPY_LEN);
669
670 if (ublk_op == UBLK_IO_OP_WRITE)
671 copied = pread(q->ublk_fd, addr, copy_len, off);
672 else if (ublk_op == UBLK_IO_OP_READ)
673 copied = pwrite(q->ublk_fd, addr, copy_len, off);
674 else
675 assert(0);
676 assert(copied == (ssize_t)copy_len);
677 addr += copy_len;
678 off += copy_len;
679 len -= copy_len;
680 }
681
682 if (!(iod->op_flags & UBLK_IO_F_INTEGRITY))
683 return;
684
685 len = ublk_integrity_len(q, iod->nr_sectors << 9);
686 off = ublk_user_copy_offset(q->q_id, io->tag);
687 off |= UBLKSRV_IO_INTEGRITY_FLAG;
688 if (ublk_op == UBLK_IO_OP_WRITE)
689 copied = pread(q->ublk_fd, io->integrity_buf, len, off);
690 else if (ublk_op == UBLK_IO_OP_READ)
691 copied = pwrite(q->ublk_fd, io->integrity_buf, len, off);
692 else
693 assert(0);
694 assert(copied == (ssize_t)len);
695 }
696
ublk_queue_io_cmd(struct ublk_thread * t,struct ublk_io * io)697 int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io)
698 {
699 struct ublk_queue *q = ublk_io_to_queue(io);
700 struct ublksrv_io_cmd *cmd;
701 struct io_uring_sqe *sqe[1];
702 unsigned int cmd_op = 0;
703 __u64 user_data;
704
705 /* only freed io can be issued */
706 if (!(io->flags & UBLKS_IO_FREE))
707 return 0;
708
709 /*
710 * we issue because we need either fetching or committing or
711 * getting data
712 */
713 if (!(io->flags &
714 (UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_NEED_GET_DATA)))
715 return 0;
716
717 if (io->flags & UBLKS_IO_NEED_GET_DATA)
718 cmd_op = UBLK_U_IO_NEED_GET_DATA;
719 else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP) {
720 if (ublk_queue_use_user_copy(q))
721 ublk_user_copy(io, UBLK_IO_OP_READ);
722
723 cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
724 } else if (io->flags & UBLKS_IO_NEED_FETCH_RQ)
725 cmd_op = UBLK_U_IO_FETCH_REQ;
726
727 if (io_uring_sq_space_left(&t->ring) < 1)
728 io_uring_submit(&t->ring);
729
730 ublk_io_alloc_sqes(t, sqe, 1);
731 if (!sqe[0]) {
732 ublk_err("%s: run out of sqe. thread %u, tag %d\n",
733 __func__, t->idx, io->tag);
734 return -1;
735 }
736
737 cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe[0]);
738
739 if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
740 cmd->result = io->result;
741
742 /* These fields should be written once, never change */
743 ublk_set_sqe_cmd_op(sqe[0], cmd_op);
744 sqe[0]->fd = ublk_get_registered_fd(q, 0); /* dev->fds[0] */
745 sqe[0]->opcode = IORING_OP_URING_CMD;
746 if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD)
747 sqe[0]->flags = 0; /* Use raw FD, not fixed file */
748 else
749 sqe[0]->flags = IOSQE_FIXED_FILE;
750 sqe[0]->rw_flags = 0;
751 cmd->tag = io->tag;
752 cmd->q_id = q->q_id;
753 if (!ublk_queue_no_buf(q) && !ublk_queue_use_user_copy(q))
754 cmd->addr = (__u64) (uintptr_t) io->buf_addr;
755 else
756 cmd->addr = 0;
757
758 if (ublk_queue_use_auto_zc(q))
759 ublk_set_auto_buf_reg(t, q, sqe[0], io->tag);
760
761 user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0);
762 io_uring_sqe_set_data64(sqe[0], user_data);
763
764 io->flags = 0;
765
766 t->cmd_inflight += 1;
767
768 ublk_dbg(UBLK_DBG_IO_CMD, "%s: (thread %u qid %d tag %u cmd_op %u) iof %x stopping %d\n",
769 __func__, t->idx, q->q_id, io->tag, cmd_op,
770 io->flags, !!(t->state & UBLKS_T_STOPPING));
771 return 1;
772 }
773
ublk_submit_fetch_commands(struct ublk_thread * t)774 static void ublk_submit_fetch_commands(struct ublk_thread *t)
775 {
776 struct ublk_queue *q;
777 struct ublk_io *io;
778 int i = 0, j = 0;
779
780 if (t->dev->per_io_tasks) {
781 /*
782 * Lexicographically order all the (qid,tag) pairs, with
783 * qid taking priority (so (1,0) > (0,1)). Then make
784 * this thread the daemon for every Nth entry in this
785 * list (N is the number of threads), starting at this
786 * thread's index. This ensures that each queue is
787 * handled by as many ublk server threads as possible,
788 * so that load that is concentrated on one or a few
789 * queues can make use of all ublk server threads.
790 */
791 const struct ublksrv_ctrl_dev_info *dinfo = &t->dev->dev_info;
792 int nr_ios = dinfo->nr_hw_queues * dinfo->queue_depth;
793 for (i = t->idx; i < nr_ios; i += t->dev->nthreads) {
794 int q_id = i / dinfo->queue_depth;
795 int tag = i % dinfo->queue_depth;
796 q = &t->dev->q[q_id];
797 io = &q->ios[tag];
798 io->buf_index = j++;
799 ublk_queue_io_cmd(t, io);
800 }
801 } else {
802 /*
803 * Service exclusively the queue whose q_id matches our
804 * thread index.
805 */
806 struct ublk_queue *q = &t->dev->q[t->idx];
807 for (i = 0; i < q->q_depth; i++) {
808 io = &q->ios[i];
809 io->buf_index = i;
810 ublk_queue_io_cmd(t, io);
811 }
812 }
813 }
814
ublk_thread_is_idle(struct ublk_thread * t)815 static int ublk_thread_is_idle(struct ublk_thread *t)
816 {
817 return !io_uring_sq_ready(&t->ring) && !t->io_inflight;
818 }
819
ublk_thread_is_done(struct ublk_thread * t)820 static int ublk_thread_is_done(struct ublk_thread *t)
821 {
822 return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t) && !t->cmd_inflight;
823 }
824
ublksrv_handle_tgt_cqe(struct ublk_thread * t,struct ublk_queue * q,struct io_uring_cqe * cqe)825 static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t,
826 struct ublk_queue *q,
827 struct io_uring_cqe *cqe)
828 {
829 if (cqe->res < 0 && cqe->res != -EAGAIN)
830 ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
831 __func__, cqe->res, q->q_id,
832 user_data_to_tag(cqe->user_data),
833 user_data_to_op(cqe->user_data));
834
835 if (q->tgt_ops->tgt_io_done)
836 q->tgt_ops->tgt_io_done(t, q, cqe);
837 }
838
ublk_handle_uring_cmd(struct ublk_thread * t,struct ublk_queue * q,const struct io_uring_cqe * cqe)839 static void ublk_handle_uring_cmd(struct ublk_thread *t,
840 struct ublk_queue *q,
841 const struct io_uring_cqe *cqe)
842 {
843 int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
844 !(t->state & UBLKS_T_STOPPING);
845 unsigned tag = user_data_to_tag(cqe->user_data);
846 struct ublk_io *io = &q->ios[tag];
847
848 t->cmd_inflight--;
849
850 if (!fetch) {
851 t->state |= UBLKS_T_STOPPING;
852 io->flags &= ~UBLKS_IO_NEED_FETCH_RQ;
853 }
854
855 if (cqe->res == UBLK_IO_RES_OK) {
856 ublk_assert(tag < q->q_depth);
857
858 if (ublk_queue_use_user_copy(q))
859 ublk_user_copy(io, UBLK_IO_OP_WRITE);
860
861 if (q->tgt_ops->queue_io)
862 q->tgt_ops->queue_io(t, q, tag);
863 } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
864 io->flags |= UBLKS_IO_NEED_GET_DATA | UBLKS_IO_FREE;
865 ublk_queue_io_cmd(t, io);
866 } else {
867 /*
868 * COMMIT_REQ will be completed immediately since no fetching
869 * piggyback is required.
870 *
871 * Marking IO_FREE only, then this io won't be issued since
872 * we only issue io with (UBLKS_IO_FREE | UBLKSRV_NEED_*)
873 *
874 * */
875 io->flags = UBLKS_IO_FREE;
876 }
877 }
878
ublk_handle_cqe(struct ublk_thread * t,struct io_uring_cqe * cqe,void * data)879 static void ublk_handle_cqe(struct ublk_thread *t,
880 struct io_uring_cqe *cqe, void *data)
881 {
882 struct ublk_dev *dev = t->dev;
883 unsigned q_id = user_data_to_q_id(cqe->user_data);
884 unsigned cmd_op = user_data_to_op(cqe->user_data);
885
886 if (cqe->res < 0 && cqe->res != -ENODEV && cqe->res != -ENOBUFS)
887 ublk_err("%s: res %d userdata %llx thread state %x\n", __func__,
888 cqe->res, cqe->user_data, t->state);
889
890 ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (thread %d qid %d tag %u cmd_op %x "
891 "data %lx target %d/%d) stopping %d\n",
892 __func__, cqe->res, t->idx, q_id,
893 user_data_to_tag(cqe->user_data),
894 cmd_op, cqe->user_data, is_target_io(cqe->user_data),
895 user_data_to_tgt_data(cqe->user_data),
896 (t->state & UBLKS_T_STOPPING));
897
898 /* Don't retrieve io in case of target io */
899 if (is_target_io(cqe->user_data)) {
900 ublksrv_handle_tgt_cqe(t, &dev->q[q_id], cqe);
901 return;
902 }
903
904 if (ublk_thread_batch_io(t))
905 ublk_batch_compl_cmd(t, cqe);
906 else
907 ublk_handle_uring_cmd(t, &dev->q[q_id], cqe);
908 }
909
ublk_reap_events_uring(struct ublk_thread * t)910 static int ublk_reap_events_uring(struct ublk_thread *t)
911 {
912 struct io_uring_cqe *cqe;
913 unsigned head;
914 int count = 0;
915
916 io_uring_for_each_cqe(&t->ring, head, cqe) {
917 ublk_handle_cqe(t, cqe, NULL);
918 count += 1;
919 }
920 io_uring_cq_advance(&t->ring, count);
921
922 return count;
923 }
924
ublk_process_io(struct ublk_thread * t)925 static int ublk_process_io(struct ublk_thread *t)
926 {
927 int ret, reapped;
928
929 ublk_dbg(UBLK_DBG_THREAD, "dev%d-t%u: to_submit %d inflight cmd %u stopping %d\n",
930 t->dev->dev_info.dev_id,
931 t->idx, io_uring_sq_ready(&t->ring),
932 t->cmd_inflight,
933 (t->state & UBLKS_T_STOPPING));
934
935 if (ublk_thread_is_done(t))
936 return -ENODEV;
937
938 ret = io_uring_submit_and_wait(&t->ring, 1);
939 if (ublk_thread_batch_io(t)) {
940 ublk_batch_prep_commit(t);
941 reapped = ublk_reap_events_uring(t);
942 ublk_batch_commit_io_cmds(t);
943 } else {
944 reapped = ublk_reap_events_uring(t);
945 }
946
947 ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n",
948 ret, reapped, (t->state & UBLKS_T_STOPPING),
949 (t->state & UBLKS_T_IDLE));
950
951 return reapped;
952 }
953
954 struct ublk_thread_info {
955 struct ublk_dev *dev;
956 pthread_t thread;
957 unsigned idx;
958 sem_t *ready;
959 cpu_set_t *affinity;
960 unsigned long long extra_flags;
961 unsigned char (*q_thread_map)[UBLK_MAX_QUEUES];
962 };
963
ublk_thread_set_sched_affinity(const struct ublk_thread_info * info)964 static void ublk_thread_set_sched_affinity(const struct ublk_thread_info *info)
965 {
966 if (pthread_setaffinity_np(pthread_self(), sizeof(*info->affinity), info->affinity) < 0)
967 ublk_err("ublk dev %u thread %u set affinity failed",
968 info->dev->dev_info.dev_id, info->idx);
969 }
970
ublk_batch_setup_queues(struct ublk_thread * t)971 static void ublk_batch_setup_queues(struct ublk_thread *t)
972 {
973 int i;
974
975 for (i = 0; i < t->dev->dev_info.nr_hw_queues; i++) {
976 struct ublk_queue *q = &t->dev->q[i];
977 int ret;
978
979 /*
980 * Only prepare io commands in the mapped thread context,
981 * otherwise io command buffer index may not work as expected
982 */
983 if (t->q_map[i] == 0)
984 continue;
985
986 ret = ublk_batch_queue_prep_io_cmds(t, q);
987 ublk_assert(ret >= 0);
988 }
989 }
990
__ublk_io_handler_fn(struct ublk_thread_info * info)991 static __attribute__((noinline)) int __ublk_io_handler_fn(struct ublk_thread_info *info)
992 {
993 struct ublk_thread t = {
994 .dev = info->dev,
995 .idx = info->idx,
996 };
997 int dev_id = info->dev->dev_info.dev_id;
998 int ret;
999
1000 /* Copy per-thread queue mapping into thread-local variable */
1001 if (info->q_thread_map)
1002 memcpy(t.q_map, info->q_thread_map[info->idx], sizeof(t.q_map));
1003
1004 ret = ublk_thread_init(&t, info->extra_flags);
1005 if (ret) {
1006 ublk_err("ublk dev %d thread %u init failed\n",
1007 dev_id, t.idx);
1008 return ret;
1009 }
1010 sem_post(info->ready);
1011
1012 ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n",
1013 gettid(), dev_id, t.idx);
1014
1015 if (!ublk_thread_batch_io(&t)) {
1016 /* submit all io commands to ublk driver */
1017 ublk_submit_fetch_commands(&t);
1018 } else {
1019 ublk_batch_setup_queues(&t);
1020 ublk_batch_start_fetch(&t);
1021 }
1022
1023 do {
1024 if (ublk_process_io(&t) < 0)
1025 break;
1026 } while (1);
1027
1028 ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %d exiting\n",
1029 gettid(), dev_id, t.idx);
1030 ublk_thread_deinit(&t);
1031 return 0;
1032 }
1033
ublk_io_handler_fn(void * data)1034 static void *ublk_io_handler_fn(void *data)
1035 {
1036 struct ublk_thread_info *info = data;
1037
1038 /*
1039 * IO perf is sensitive with queue pthread affinity on NUMA machine
1040 *
1041 * Set sched_affinity at beginning, so following allocated memory/pages
1042 * could be CPU/NUMA aware.
1043 */
1044 if (info->affinity)
1045 ublk_thread_set_sched_affinity(info);
1046
1047 __ublk_io_handler_fn(info);
1048
1049 return NULL;
1050 }
1051
ublk_set_parameters(struct ublk_dev * dev)1052 static void ublk_set_parameters(struct ublk_dev *dev)
1053 {
1054 int ret;
1055
1056 ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
1057 if (ret)
1058 ublk_err("dev %d set basic parameter failed %d\n",
1059 dev->dev_info.dev_id, ret);
1060 }
1061
ublk_send_dev_event(const struct dev_ctx * ctx,struct ublk_dev * dev,int dev_id)1062 static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id)
1063 {
1064 uint64_t id;
1065 int evtfd = ctx->_evtfd;
1066
1067 if (evtfd < 0)
1068 return -EBADF;
1069
1070 if (dev_id >= 0)
1071 id = dev_id + 1;
1072 else
1073 id = ERROR_EVTFD_DEVID;
1074
1075 if (dev && ctx->shadow_dev)
1076 memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q));
1077
1078 if (write(evtfd, &id, sizeof(id)) != sizeof(id))
1079 return -EINVAL;
1080
1081 close(evtfd);
1082 shmdt(ctx->shadow_dev);
1083
1084 return 0;
1085 }
1086
1087
ublk_start_daemon(const struct dev_ctx * ctx,struct ublk_dev * dev)1088 static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
1089 {
1090 const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
1091 struct ublk_thread_info *tinfo;
1092 unsigned long long extra_flags = 0;
1093 cpu_set_t *affinity_buf;
1094 unsigned char (*q_thread_map)[UBLK_MAX_QUEUES] = NULL;
1095 void *thread_ret;
1096 sem_t ready;
1097 int ret, i;
1098
1099 ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
1100
1101 tinfo = calloc(sizeof(struct ublk_thread_info), dev->nthreads);
1102 if (!tinfo)
1103 return -ENOMEM;
1104
1105 sem_init(&ready, 0, 0);
1106 ret = ublk_dev_prep(ctx, dev);
1107 if (ret)
1108 return ret;
1109
1110 ret = ublk_ctrl_get_affinity(dev, &affinity_buf);
1111 if (ret)
1112 return ret;
1113
1114 if (ublk_dev_batch_io(dev)) {
1115 q_thread_map = calloc(dev->nthreads, sizeof(*q_thread_map));
1116 if (!q_thread_map) {
1117 ret = -ENOMEM;
1118 goto fail;
1119 }
1120 ublk_batch_setup_map(q_thread_map, dev->nthreads,
1121 dinfo->nr_hw_queues);
1122 }
1123
1124 if (ctx->auto_zc_fallback)
1125 extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK;
1126 if (ctx->no_ublk_fixed_fd)
1127 extra_flags |= UBLKS_Q_NO_UBLK_FIXED_FD;
1128
1129 for (i = 0; i < dinfo->nr_hw_queues; i++) {
1130 dev->q[i].dev = dev;
1131 dev->q[i].q_id = i;
1132
1133 ret = ublk_queue_init(&dev->q[i], extra_flags,
1134 ctx->metadata_size);
1135 if (ret) {
1136 ublk_err("ublk dev %d queue %d init queue failed\n",
1137 dinfo->dev_id, i);
1138 goto fail;
1139 }
1140 }
1141
1142 for (i = 0; i < dev->nthreads; i++) {
1143 tinfo[i].dev = dev;
1144 tinfo[i].idx = i;
1145 tinfo[i].ready = &ready;
1146 tinfo[i].extra_flags = extra_flags;
1147 tinfo[i].q_thread_map = q_thread_map;
1148
1149 /*
1150 * If threads are not tied 1:1 to queues, setting thread
1151 * affinity based on queue affinity makes little sense.
1152 * However, thread CPU affinity has significant impact
1153 * on performance, so to compare fairly, we'll still set
1154 * thread CPU affinity based on queue affinity where
1155 * possible.
1156 */
1157 if (dev->nthreads == dinfo->nr_hw_queues)
1158 tinfo[i].affinity = &affinity_buf[i];
1159 pthread_create(&tinfo[i].thread, NULL,
1160 ublk_io_handler_fn,
1161 &tinfo[i]);
1162 }
1163
1164 for (i = 0; i < dev->nthreads; i++)
1165 sem_wait(&ready);
1166 free(affinity_buf);
1167 free(q_thread_map);
1168
1169 /* everything is fine now, start us */
1170 if (ctx->recovery)
1171 ret = ublk_ctrl_end_user_recovery(dev, getpid());
1172 else {
1173 ublk_set_parameters(dev);
1174 ret = ublk_ctrl_start_dev(dev, getpid());
1175 }
1176 if (ret < 0) {
1177 ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
1178 /* stop device so that inflight uring_cmd can be cancelled */
1179 ublk_ctrl_stop_dev(dev);
1180 goto fail_start;
1181 }
1182
1183 ublk_ctrl_get_info(dev);
1184 if (ctx->fg)
1185 ublk_ctrl_dump(dev);
1186 else
1187 ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id);
1188 fail_start:
1189 /* wait until we are terminated */
1190 for (i = 0; i < dev->nthreads; i++)
1191 pthread_join(tinfo[i].thread, &thread_ret);
1192 free(tinfo);
1193 fail:
1194 for (i = 0; i < dinfo->nr_hw_queues; i++)
1195 ublk_queue_deinit(&dev->q[i]);
1196 ublk_dev_unprep(dev);
1197 ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
1198
1199 return ret;
1200 }
1201
wait_ublk_dev(const char * path,int evt_mask,unsigned timeout)1202 static int wait_ublk_dev(const char *path, int evt_mask, unsigned timeout)
1203 {
1204 #define EV_SIZE (sizeof(struct inotify_event))
1205 #define EV_BUF_LEN (128 * (EV_SIZE + 16))
1206 struct pollfd pfd;
1207 int fd, wd;
1208 int ret = -EINVAL;
1209 const char *dev_name = basename(path);
1210
1211 fd = inotify_init();
1212 if (fd < 0) {
1213 ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
1214 return fd;
1215 }
1216
1217 wd = inotify_add_watch(fd, "/dev", evt_mask);
1218 if (wd == -1) {
1219 ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
1220 goto fail;
1221 }
1222
1223 pfd.fd = fd;
1224 pfd.events = POLL_IN;
1225 while (1) {
1226 int i = 0;
1227 char buffer[EV_BUF_LEN];
1228 ret = poll(&pfd, 1, 1000 * timeout);
1229
1230 if (ret == -1) {
1231 ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
1232 goto rm_watch;
1233 } else if (ret == 0) {
1234 ublk_err("%s: poll inotify timeout\n", __func__);
1235 ret = -ETIMEDOUT;
1236 goto rm_watch;
1237 }
1238
1239 ret = read(fd, buffer, EV_BUF_LEN);
1240 if (ret < 0) {
1241 ublk_err("%s: read inotify fd failed\n", __func__);
1242 goto rm_watch;
1243 }
1244
1245 while (i < ret) {
1246 struct inotify_event *event = (struct inotify_event *)&buffer[i];
1247
1248 ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
1249 __func__, event->mask, event->name);
1250 if (event->mask & evt_mask) {
1251 if (!strcmp(event->name, dev_name)) {
1252 ret = 0;
1253 goto rm_watch;
1254 }
1255 }
1256 i += EV_SIZE + event->len;
1257 }
1258 }
1259 rm_watch:
1260 inotify_rm_watch(fd, wd);
1261 fail:
1262 close(fd);
1263 return ret;
1264 }
1265
ublk_stop_io_daemon(const struct ublk_dev * dev)1266 static int ublk_stop_io_daemon(const struct ublk_dev *dev)
1267 {
1268 int daemon_pid = dev->dev_info.ublksrv_pid;
1269 int dev_id = dev->dev_info.dev_id;
1270 char ublkc[64];
1271 int ret = 0;
1272
1273 if (daemon_pid < 0)
1274 return 0;
1275
1276 /* daemon may be dead already */
1277 if (kill(daemon_pid, 0) < 0)
1278 goto wait;
1279
1280 snprintf(ublkc, sizeof(ublkc), "/dev/%s%d", "ublkc", dev_id);
1281
1282 /* ublk char device may be gone already */
1283 if (access(ublkc, F_OK) != 0)
1284 goto wait;
1285
1286 /* Wait until ublk char device is closed, when the daemon is shutdown */
1287 ret = wait_ublk_dev(ublkc, IN_CLOSE, 10);
1288 /* double check and since it may be closed before starting inotify */
1289 if (ret == -ETIMEDOUT)
1290 ret = kill(daemon_pid, 0) < 0;
1291 wait:
1292 waitpid(daemon_pid, NULL, 0);
1293 ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
1294 __func__, daemon_pid, dev_id, ret);
1295
1296 return ret;
1297 }
1298
__cmd_dev_add(const struct dev_ctx * ctx)1299 static int __cmd_dev_add(const struct dev_ctx *ctx)
1300 {
1301 unsigned nthreads = ctx->nthreads;
1302 unsigned nr_queues = ctx->nr_hw_queues;
1303 const char *tgt_type = ctx->tgt_type;
1304 unsigned depth = ctx->queue_depth;
1305 __u64 features;
1306 const struct ublk_tgt_ops *ops;
1307 struct ublksrv_ctrl_dev_info *info;
1308 struct ublk_dev *dev = NULL;
1309 int dev_id = ctx->dev_id;
1310 int ret, i;
1311
1312 ops = ublk_find_tgt(tgt_type);
1313 if (!ops) {
1314 ublk_err("%s: no such tgt type, type %s\n",
1315 __func__, tgt_type);
1316 ret = -ENODEV;
1317 goto fail;
1318 }
1319
1320 if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
1321 ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
1322 __func__, nr_queues, depth);
1323 ret = -EINVAL;
1324 goto fail;
1325 }
1326
1327 /* default to 1:1 threads:queues if nthreads is unspecified */
1328 if (!nthreads)
1329 nthreads = nr_queues;
1330
1331 if (nthreads > UBLK_MAX_THREADS) {
1332 ublk_err("%s: %u is too many threads (max %u)\n",
1333 __func__, nthreads, UBLK_MAX_THREADS);
1334 ret = -EINVAL;
1335 goto fail;
1336 }
1337
1338 if (nthreads != nr_queues && (!ctx->per_io_tasks &&
1339 !(ctx->flags & UBLK_F_BATCH_IO))) {
1340 ublk_err("%s: threads %u must be same as queues %u if "
1341 "not using per_io_tasks\n",
1342 __func__, nthreads, nr_queues);
1343 ret = -EINVAL;
1344 goto fail;
1345 }
1346
1347 dev = ublk_ctrl_init();
1348 if (!dev) {
1349 ublk_err("%s: can't alloc dev id %d, type %s\n",
1350 __func__, dev_id, tgt_type);
1351 ret = -ENOMEM;
1352 goto fail;
1353 }
1354
1355 /* kernel doesn't support get_features */
1356 ret = ublk_ctrl_get_features(dev, &features);
1357 if (ret < 0) {
1358 ret = -EINVAL;
1359 goto fail;
1360 }
1361
1362 if (!(features & UBLK_F_CMD_IOCTL_ENCODE)) {
1363 ret = -ENOTSUP;
1364 goto fail;
1365 }
1366
1367 info = &dev->dev_info;
1368 info->dev_id = ctx->dev_id;
1369 info->nr_hw_queues = nr_queues;
1370 info->queue_depth = depth;
1371 info->flags = ctx->flags;
1372 if ((features & UBLK_F_QUIESCE) &&
1373 (info->flags & UBLK_F_USER_RECOVERY))
1374 info->flags |= UBLK_F_QUIESCE;
1375 dev->nthreads = nthreads;
1376 dev->per_io_tasks = ctx->per_io_tasks;
1377 dev->tgt.ops = ops;
1378 dev->tgt.sq_depth = depth;
1379 dev->tgt.cq_depth = depth;
1380
1381 for (i = 0; i < MAX_BACK_FILES; i++) {
1382 if (ctx->files[i]) {
1383 strcpy(dev->tgt.backing_file[i], ctx->files[i]);
1384 dev->tgt.nr_backing_files++;
1385 }
1386 }
1387
1388 if (ctx->recovery)
1389 ret = ublk_ctrl_start_user_recovery(dev);
1390 else
1391 ret = ublk_ctrl_add_dev(dev);
1392 if (ret < 0) {
1393 ublk_err("%s: can't add dev id %d, type %s ret %d\n",
1394 __func__, dev_id, tgt_type, ret);
1395 goto fail;
1396 }
1397
1398 ret = ublk_start_daemon(ctx, dev);
1399 ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\n", __func__, ret);
1400 if (ret < 0)
1401 ublk_ctrl_del_dev(dev);
1402
1403 fail:
1404 if (ret < 0)
1405 ublk_send_dev_event(ctx, dev, -1);
1406 if (dev)
1407 ublk_ctrl_deinit(dev);
1408 return ret;
1409 }
1410
1411 static int __cmd_dev_list(struct dev_ctx *ctx);
1412
cmd_dev_add(struct dev_ctx * ctx)1413 static int cmd_dev_add(struct dev_ctx *ctx)
1414 {
1415 int res;
1416
1417 if (ctx->fg)
1418 goto run;
1419
1420 ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666);
1421 if (ctx->_shmid < 0) {
1422 ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno));
1423 exit(-1);
1424 }
1425 ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0);
1426 if (ctx->shadow_dev == (struct ublk_dev *)-1) {
1427 ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno));
1428 exit(-1);
1429 }
1430 ctx->_evtfd = eventfd(0, 0);
1431 if (ctx->_evtfd < 0) {
1432 ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno));
1433 exit(-1);
1434 }
1435
1436 res = fork();
1437 if (res == 0) {
1438 int res2;
1439
1440 setsid();
1441 res2 = fork();
1442 if (res2 == 0) {
1443 /* prepare for detaching */
1444 close(STDIN_FILENO);
1445 close(STDOUT_FILENO);
1446 close(STDERR_FILENO);
1447 run:
1448 res = __cmd_dev_add(ctx);
1449 return res;
1450 } else {
1451 /* detached from the foreground task */
1452 exit(EXIT_SUCCESS);
1453 }
1454 } else if (res > 0) {
1455 uint64_t id;
1456 int exit_code = EXIT_FAILURE;
1457
1458 res = read(ctx->_evtfd, &id, sizeof(id));
1459 close(ctx->_evtfd);
1460 if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) {
1461 ctx->dev_id = id - 1;
1462 if (__cmd_dev_list(ctx) >= 0)
1463 exit_code = EXIT_SUCCESS;
1464 }
1465 shmdt(ctx->shadow_dev);
1466 shmctl(ctx->_shmid, IPC_RMID, NULL);
1467 /* wait for child and detach from it */
1468 wait(NULL);
1469 if (exit_code == EXIT_FAILURE)
1470 ublk_err("%s: command failed\n", __func__);
1471 exit(exit_code);
1472 } else {
1473 exit(EXIT_FAILURE);
1474 }
1475 }
1476
__cmd_dev_del(struct dev_ctx * ctx)1477 static int __cmd_dev_del(struct dev_ctx *ctx)
1478 {
1479 int number = ctx->dev_id;
1480 struct ublk_dev *dev;
1481 int ret;
1482
1483 dev = ublk_ctrl_init();
1484 dev->dev_info.dev_id = number;
1485
1486 ret = ublk_ctrl_get_info(dev);
1487 if (ret < 0)
1488 goto fail;
1489
1490 ret = ublk_ctrl_stop_dev(dev);
1491 if (ret < 0)
1492 ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret);
1493
1494 ret = ublk_stop_io_daemon(dev);
1495 if (ret < 0)
1496 ublk_err("%s: stop daemon id %d dev %d, ret %d\n",
1497 __func__, dev->dev_info.ublksrv_pid, number, ret);
1498 ublk_ctrl_del_dev(dev);
1499 fail:
1500 ublk_ctrl_deinit(dev);
1501
1502 return (ret >= 0) ? 0 : ret;
1503 }
1504
cmd_dev_del(struct dev_ctx * ctx)1505 static int cmd_dev_del(struct dev_ctx *ctx)
1506 {
1507 int i;
1508
1509 if (ctx->dev_id >= 0 || !ctx->all)
1510 return __cmd_dev_del(ctx);
1511
1512 for (i = 0; i < 255; i++) {
1513 ctx->dev_id = i;
1514 __cmd_dev_del(ctx);
1515 }
1516 return 0;
1517 }
1518
cmd_dev_stop(struct dev_ctx * ctx)1519 static int cmd_dev_stop(struct dev_ctx *ctx)
1520 {
1521 int number = ctx->dev_id;
1522 struct ublk_dev *dev;
1523 int ret;
1524
1525 if (number < 0) {
1526 ublk_err("%s: device id is required\n", __func__);
1527 return -EINVAL;
1528 }
1529
1530 dev = ublk_ctrl_init();
1531 dev->dev_info.dev_id = number;
1532
1533 ret = ublk_ctrl_get_info(dev);
1534 if (ret < 0)
1535 goto fail;
1536
1537 if (ctx->safe_stop) {
1538 ret = ublk_ctrl_try_stop_dev(dev);
1539 if (ret < 0)
1540 ublk_err("%s: try_stop dev %d failed ret %d\n",
1541 __func__, number, ret);
1542 } else {
1543 ret = ublk_ctrl_stop_dev(dev);
1544 if (ret < 0)
1545 ublk_err("%s: stop dev %d failed ret %d\n",
1546 __func__, number, ret);
1547 }
1548
1549 fail:
1550 ublk_ctrl_deinit(dev);
1551
1552 return ret;
1553 }
1554
__cmd_dev_list(struct dev_ctx * ctx)1555 static int __cmd_dev_list(struct dev_ctx *ctx)
1556 {
1557 struct ublk_dev *dev = ublk_ctrl_init();
1558 int ret;
1559
1560 if (!dev)
1561 return -ENODEV;
1562
1563 dev->dev_info.dev_id = ctx->dev_id;
1564
1565 ret = ublk_ctrl_get_info(dev);
1566 if (ret < 0) {
1567 if (ctx->logging)
1568 ublk_err("%s: can't get dev info from %d: %d\n",
1569 __func__, ctx->dev_id, ret);
1570 } else {
1571 if (ctx->shadow_dev)
1572 memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q));
1573
1574 ublk_ctrl_dump(dev);
1575 }
1576
1577 ublk_ctrl_deinit(dev);
1578
1579 return ret;
1580 }
1581
cmd_dev_list(struct dev_ctx * ctx)1582 static int cmd_dev_list(struct dev_ctx *ctx)
1583 {
1584 int i;
1585
1586 if (ctx->dev_id >= 0 || !ctx->all)
1587 return __cmd_dev_list(ctx);
1588
1589 ctx->logging = false;
1590 for (i = 0; i < 255; i++) {
1591 ctx->dev_id = i;
1592 __cmd_dev_list(ctx);
1593 }
1594 return 0;
1595 }
1596
cmd_dev_get_features(void)1597 static int cmd_dev_get_features(void)
1598 {
1599 #define const_ilog2(x) (63 - __builtin_clzll(x))
1600 #define FEAT_NAME(f) [const_ilog2(f)] = #f
1601 static const char *feat_map[] = {
1602 FEAT_NAME(UBLK_F_SUPPORT_ZERO_COPY),
1603 FEAT_NAME(UBLK_F_URING_CMD_COMP_IN_TASK),
1604 FEAT_NAME(UBLK_F_NEED_GET_DATA),
1605 FEAT_NAME(UBLK_F_USER_RECOVERY),
1606 FEAT_NAME(UBLK_F_USER_RECOVERY_REISSUE),
1607 FEAT_NAME(UBLK_F_UNPRIVILEGED_DEV),
1608 FEAT_NAME(UBLK_F_CMD_IOCTL_ENCODE),
1609 FEAT_NAME(UBLK_F_USER_COPY),
1610 FEAT_NAME(UBLK_F_ZONED),
1611 FEAT_NAME(UBLK_F_USER_RECOVERY_FAIL_IO),
1612 FEAT_NAME(UBLK_F_UPDATE_SIZE),
1613 FEAT_NAME(UBLK_F_AUTO_BUF_REG),
1614 FEAT_NAME(UBLK_F_QUIESCE),
1615 FEAT_NAME(UBLK_F_PER_IO_DAEMON),
1616 FEAT_NAME(UBLK_F_BUF_REG_OFF_DAEMON),
1617 FEAT_NAME(UBLK_F_INTEGRITY),
1618 FEAT_NAME(UBLK_F_SAFE_STOP_DEV),
1619 FEAT_NAME(UBLK_F_BATCH_IO),
1620 FEAT_NAME(UBLK_F_NO_AUTO_PART_SCAN),
1621 };
1622 struct ublk_dev *dev;
1623 __u64 features = 0;
1624 int ret;
1625
1626 dev = ublk_ctrl_init();
1627 if (!dev) {
1628 fprintf(stderr, "ublksrv_ctrl_init failed id\n");
1629 return -EOPNOTSUPP;
1630 }
1631
1632 ret = ublk_ctrl_get_features(dev, &features);
1633 if (!ret) {
1634 int i;
1635
1636 printf("ublk_drv features: 0x%llx\n", features);
1637
1638 for (i = 0; i < sizeof(features) * 8; i++) {
1639 const char *feat;
1640
1641 if (!((1ULL << i) & features))
1642 continue;
1643 if (i < ARRAY_SIZE(feat_map))
1644 feat = feat_map[i];
1645 else
1646 feat = "unknown";
1647 printf("0x%-16llx: %s\n", 1ULL << i, feat);
1648 }
1649 }
1650
1651 return ret;
1652 }
1653
cmd_dev_update_size(struct dev_ctx * ctx)1654 static int cmd_dev_update_size(struct dev_ctx *ctx)
1655 {
1656 struct ublk_dev *dev = ublk_ctrl_init();
1657 struct ublk_params p;
1658 int ret = -EINVAL;
1659
1660 if (!dev)
1661 return -ENODEV;
1662
1663 if (ctx->dev_id < 0) {
1664 fprintf(stderr, "device id isn't provided\n");
1665 goto out;
1666 }
1667
1668 dev->dev_info.dev_id = ctx->dev_id;
1669 ret = ublk_ctrl_get_params(dev, &p);
1670 if (ret < 0) {
1671 ublk_err("failed to get params %d %s\n", ret, strerror(-ret));
1672 goto out;
1673 }
1674
1675 if (ctx->size & ((1 << p.basic.logical_bs_shift) - 1)) {
1676 ublk_err("size isn't aligned with logical block size\n");
1677 ret = -EINVAL;
1678 goto out;
1679 }
1680
1681 ret = ublk_ctrl_update_size(dev, ctx->size >> 9);
1682 out:
1683 ublk_ctrl_deinit(dev);
1684 return ret;
1685 }
1686
cmd_dev_quiesce(struct dev_ctx * ctx)1687 static int cmd_dev_quiesce(struct dev_ctx *ctx)
1688 {
1689 struct ublk_dev *dev = ublk_ctrl_init();
1690 int ret = -EINVAL;
1691
1692 if (!dev)
1693 return -ENODEV;
1694
1695 if (ctx->dev_id < 0) {
1696 fprintf(stderr, "device id isn't provided for quiesce\n");
1697 goto out;
1698 }
1699 dev->dev_info.dev_id = ctx->dev_id;
1700 ret = ublk_ctrl_quiesce_dev(dev, 10000);
1701
1702 out:
1703 ublk_ctrl_deinit(dev);
1704 return ret;
1705 }
1706
__cmd_create_help(char * exe,bool recovery)1707 static void __cmd_create_help(char *exe, bool recovery)
1708 {
1709 int i;
1710
1711 printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
1712 exe, recovery ? "recover" : "add");
1713 printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1] [-g] [-u]\n");
1714 printf("\t[-e 0|1 ] [-i 0|1] [--no_ublk_fixed_fd]\n");
1715 printf("\t[--nthreads threads] [--per_io_tasks]\n");
1716 printf("\t[--integrity_capable] [--integrity_reftag] [--metadata_size SIZE] "
1717 "[--pi_offset OFFSET] [--csum_type ip|t10dif|nvme] [--tag_size SIZE]\n");
1718 printf("\t[--batch|-b] [--no_auto_part_scan]\n");
1719 printf("\t[target options] [backfile1] [backfile2] ...\n");
1720 printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
1721 printf("\tdefault: nthreads=nr_queues");
1722
1723 for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) {
1724 const struct ublk_tgt_ops *ops = tgt_ops_list[i];
1725
1726 if (ops->usage)
1727 ops->usage(ops);
1728 }
1729 }
1730
cmd_add_help(char * exe)1731 static void cmd_add_help(char *exe)
1732 {
1733 __cmd_create_help(exe, false);
1734 printf("\n");
1735 }
1736
cmd_recover_help(char * exe)1737 static void cmd_recover_help(char *exe)
1738 {
1739 __cmd_create_help(exe, true);
1740 printf("\tPlease provide exact command line for creating this device with real dev_id\n");
1741 printf("\n");
1742 }
1743
cmd_dev_help(char * exe)1744 static int cmd_dev_help(char *exe)
1745 {
1746 cmd_add_help(exe);
1747 cmd_recover_help(exe);
1748
1749 printf("%s del [-n dev_id] -a \n", exe);
1750 printf("\t -a delete all devices -n delete specified device\n\n");
1751 printf("%s stop -n dev_id [--safe]\n", exe);
1752 printf("\t --safe only stop if device has no active openers\n\n");
1753 printf("%s list [-n dev_id] -a \n", exe);
1754 printf("\t -a list all devices, -n list specified device, default -a \n\n");
1755 printf("%s features\n", exe);
1756 printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe);
1757 printf("%s quiesce -n dev_id\n", exe);
1758 return 0;
1759 }
1760
main(int argc,char * argv[])1761 int main(int argc, char *argv[])
1762 {
1763 static const struct option longopts[] = {
1764 { "all", 0, NULL, 'a' },
1765 { "type", 1, NULL, 't' },
1766 { "number", 1, NULL, 'n' },
1767 { "queues", 1, NULL, 'q' },
1768 { "depth", 1, NULL, 'd' },
1769 { "debug_mask", 1, NULL, 0 },
1770 { "quiet", 0, NULL, 0 },
1771 { "zero_copy", 0, NULL, 'z' },
1772 { "foreground", 0, NULL, 0 },
1773 { "recovery", 1, NULL, 'r' },
1774 { "recovery_fail_io", 1, NULL, 'e'},
1775 { "recovery_reissue", 1, NULL, 'i'},
1776 { "get_data", 1, NULL, 'g'},
1777 { "auto_zc", 0, NULL, 0 },
1778 { "auto_zc_fallback", 0, NULL, 0 },
1779 { "user_copy", 0, NULL, 'u'},
1780 { "size", 1, NULL, 's'},
1781 { "nthreads", 1, NULL, 0 },
1782 { "per_io_tasks", 0, NULL, 0 },
1783 { "no_ublk_fixed_fd", 0, NULL, 0 },
1784 { "integrity_capable", 0, NULL, 0 },
1785 { "integrity_reftag", 0, NULL, 0 },
1786 { "metadata_size", 1, NULL, 0 },
1787 { "pi_offset", 1, NULL, 0 },
1788 { "csum_type", 1, NULL, 0 },
1789 { "tag_size", 1, NULL, 0 },
1790 { "safe", 0, NULL, 0 },
1791 { "batch", 0, NULL, 'b'},
1792 { "no_auto_part_scan", 0, NULL, 0 },
1793 { 0, 0, 0, 0 }
1794 };
1795 const struct ublk_tgt_ops *ops = NULL;
1796 int option_idx, opt;
1797 const char *cmd = argv[1];
1798 struct dev_ctx ctx = {
1799 ._evtfd = -1,
1800 .queue_depth = 128,
1801 .nr_hw_queues = 2,
1802 .dev_id = -1,
1803 .tgt_type = "unknown",
1804 .csum_type = LBMD_PI_CSUM_NONE,
1805 };
1806 int ret = -EINVAL, i;
1807 int tgt_argc = 1;
1808 char *tgt_argv[MAX_NR_TGT_ARG] = { NULL };
1809 int value;
1810
1811 if (argc == 1)
1812 return ret;
1813
1814 opterr = 0;
1815 optind = 2;
1816 while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gazub",
1817 longopts, &option_idx)) != -1) {
1818 switch (opt) {
1819 case 'a':
1820 ctx.all = 1;
1821 break;
1822 case 'b':
1823 ctx.flags |= UBLK_F_BATCH_IO;
1824 break;
1825 case 'n':
1826 ctx.dev_id = strtol(optarg, NULL, 10);
1827 break;
1828 case 't':
1829 if (strlen(optarg) < sizeof(ctx.tgt_type))
1830 strcpy(ctx.tgt_type, optarg);
1831 break;
1832 case 'q':
1833 ctx.nr_hw_queues = strtol(optarg, NULL, 10);
1834 break;
1835 case 'd':
1836 ctx.queue_depth = strtol(optarg, NULL, 10);
1837 break;
1838 case 'z':
1839 ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY;
1840 break;
1841 case 'r':
1842 value = strtol(optarg, NULL, 10);
1843 if (value)
1844 ctx.flags |= UBLK_F_USER_RECOVERY;
1845 break;
1846 case 'e':
1847 value = strtol(optarg, NULL, 10);
1848 if (value)
1849 ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO;
1850 break;
1851 case 'i':
1852 value = strtol(optarg, NULL, 10);
1853 if (value)
1854 ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE;
1855 break;
1856 case 'g':
1857 ctx.flags |= UBLK_F_NEED_GET_DATA;
1858 break;
1859 case 'u':
1860 ctx.flags |= UBLK_F_USER_COPY;
1861 break;
1862 case 's':
1863 ctx.size = strtoull(optarg, NULL, 10);
1864 break;
1865 case 0:
1866 if (!strcmp(longopts[option_idx].name, "debug_mask"))
1867 ublk_dbg_mask = strtol(optarg, NULL, 16);
1868 if (!strcmp(longopts[option_idx].name, "quiet"))
1869 ublk_dbg_mask = 0;
1870 if (!strcmp(longopts[option_idx].name, "foreground"))
1871 ctx.fg = 1;
1872 if (!strcmp(longopts[option_idx].name, "auto_zc"))
1873 ctx.flags |= UBLK_F_AUTO_BUF_REG;
1874 if (!strcmp(longopts[option_idx].name, "auto_zc_fallback"))
1875 ctx.auto_zc_fallback = 1;
1876 if (!strcmp(longopts[option_idx].name, "nthreads"))
1877 ctx.nthreads = strtol(optarg, NULL, 10);
1878 if (!strcmp(longopts[option_idx].name, "per_io_tasks"))
1879 ctx.per_io_tasks = 1;
1880 if (!strcmp(longopts[option_idx].name, "no_ublk_fixed_fd"))
1881 ctx.no_ublk_fixed_fd = 1;
1882 if (!strcmp(longopts[option_idx].name, "integrity_capable"))
1883 ctx.integrity_flags |= LBMD_PI_CAP_INTEGRITY;
1884 if (!strcmp(longopts[option_idx].name, "integrity_reftag"))
1885 ctx.integrity_flags |= LBMD_PI_CAP_REFTAG;
1886 if (!strcmp(longopts[option_idx].name, "metadata_size"))
1887 ctx.metadata_size = strtoul(optarg, NULL, 0);
1888 if (!strcmp(longopts[option_idx].name, "pi_offset"))
1889 ctx.pi_offset = strtoul(optarg, NULL, 0);
1890 if (!strcmp(longopts[option_idx].name, "csum_type")) {
1891 if (!strcmp(optarg, "ip")) {
1892 ctx.csum_type = LBMD_PI_CSUM_IP;
1893 } else if (!strcmp(optarg, "t10dif")) {
1894 ctx.csum_type = LBMD_PI_CSUM_CRC16_T10DIF;
1895 } else if (!strcmp(optarg, "nvme")) {
1896 ctx.csum_type = LBMD_PI_CSUM_CRC64_NVME;
1897 } else {
1898 ublk_err("invalid csum_type: %s\n", optarg);
1899 return -EINVAL;
1900 }
1901 }
1902 if (!strcmp(longopts[option_idx].name, "tag_size"))
1903 ctx.tag_size = strtoul(optarg, NULL, 0);
1904 if (!strcmp(longopts[option_idx].name, "safe"))
1905 ctx.safe_stop = 1;
1906 if (!strcmp(longopts[option_idx].name, "no_auto_part_scan"))
1907 ctx.flags |= UBLK_F_NO_AUTO_PART_SCAN;
1908 break;
1909 case '?':
1910 /*
1911 * target requires every option must have argument
1912 */
1913 if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') {
1914 fprintf(stderr, "every target option requires argument: %s %s\n",
1915 argv[optind - 1], argv[optind]);
1916 exit(EXIT_FAILURE);
1917 }
1918
1919 if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) {
1920 tgt_argv[tgt_argc++] = argv[optind - 1];
1921 tgt_argv[tgt_argc++] = argv[optind];
1922 } else {
1923 fprintf(stderr, "too many target options\n");
1924 exit(EXIT_FAILURE);
1925 }
1926 optind += 1;
1927 break;
1928 }
1929 }
1930
1931 if (ctx.per_io_tasks && (ctx.flags & UBLK_F_BATCH_IO)) {
1932 ublk_err("per_io_task and F_BATCH_IO conflict\n");
1933 return -EINVAL;
1934 }
1935
1936 /* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */
1937 if (ctx.auto_zc_fallback &&
1938 !((ctx.flags & UBLK_F_AUTO_BUF_REG) &&
1939 (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) {
1940 ublk_err("%s: auto_zc_fallback is set but neither "
1941 "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n",
1942 __func__);
1943 return -EINVAL;
1944 }
1945
1946 if (!!(ctx.flags & UBLK_F_NEED_GET_DATA) +
1947 !!(ctx.flags & UBLK_F_USER_COPY) +
1948 (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY && !ctx.auto_zc_fallback) +
1949 (ctx.flags & UBLK_F_AUTO_BUF_REG && !ctx.auto_zc_fallback) +
1950 ctx.auto_zc_fallback > 1) {
1951 fprintf(stderr, "too many data copy modes specified\n");
1952 return -EINVAL;
1953 }
1954
1955 if (ctx.metadata_size) {
1956 if (!(ctx.flags & UBLK_F_USER_COPY)) {
1957 ublk_err("integrity requires user_copy\n");
1958 return -EINVAL;
1959 }
1960
1961 ctx.flags |= UBLK_F_INTEGRITY;
1962 } else if (ctx.integrity_flags ||
1963 ctx.pi_offset ||
1964 ctx.csum_type != LBMD_PI_CSUM_NONE ||
1965 ctx.tag_size) {
1966 ublk_err("integrity parameters require metadata_size\n");
1967 return -EINVAL;
1968 }
1969
1970 if ((ctx.flags & UBLK_F_AUTO_BUF_REG) &&
1971 (ctx.flags & UBLK_F_BATCH_IO) &&
1972 (ctx.nthreads > ctx.nr_hw_queues)) {
1973 ublk_err("too many threads for F_AUTO_BUF_REG & F_BATCH_IO\n");
1974 return -EINVAL;
1975 }
1976
1977 i = optind;
1978 while (i < argc && ctx.nr_files < MAX_BACK_FILES) {
1979 ctx.files[ctx.nr_files++] = argv[i++];
1980 }
1981
1982 ops = ublk_find_tgt(ctx.tgt_type);
1983 if (ops && ops->parse_cmd_line) {
1984 optind = 0;
1985
1986 tgt_argv[0] = ctx.tgt_type;
1987 ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv);
1988 }
1989
1990 if (!strcmp(cmd, "add"))
1991 ret = cmd_dev_add(&ctx);
1992 else if (!strcmp(cmd, "recover")) {
1993 if (ctx.dev_id < 0) {
1994 fprintf(stderr, "device id isn't provided for recovering\n");
1995 ret = -EINVAL;
1996 } else {
1997 ctx.recovery = 1;
1998 ret = cmd_dev_add(&ctx);
1999 }
2000 } else if (!strcmp(cmd, "del"))
2001 ret = cmd_dev_del(&ctx);
2002 else if (!strcmp(cmd, "stop"))
2003 ret = cmd_dev_stop(&ctx);
2004 else if (!strcmp(cmd, "list")) {
2005 ctx.all = 1;
2006 ret = cmd_dev_list(&ctx);
2007 } else if (!strcmp(cmd, "help"))
2008 ret = cmd_dev_help(argv[0]);
2009 else if (!strcmp(cmd, "features"))
2010 ret = cmd_dev_get_features();
2011 else if (!strcmp(cmd, "update_size"))
2012 ret = cmd_dev_update_size(&ctx);
2013 else if (!strcmp(cmd, "quiesce"))
2014 ret = cmd_dev_quiesce(&ctx);
2015 else
2016 cmd_dev_help(argv[0]);
2017
2018 return ret;
2019 }
2020