1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 7 #include <linux/nvme_ioctl.h> 8 #include <linux/io_uring.h> 9 #include "nvme.h" 10 11 /* 12 * Convert integer values from ioctl structures to user pointers, silently 13 * ignoring the upper bits in the compat case to match behaviour of 32-bit 14 * kernels. 15 */ 16 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 17 { 18 if (in_compat_syscall()) 19 ptrval = (compat_uptr_t)ptrval; 20 return (void __user *)ptrval; 21 } 22 23 static void *nvme_add_user_metadata(struct request *req, void __user *ubuf, 24 unsigned len, u32 seed) 25 { 26 struct bio_integrity_payload *bip; 27 int ret = -ENOMEM; 28 void *buf; 29 struct bio *bio = req->bio; 30 31 buf = kmalloc(len, GFP_KERNEL); 32 if (!buf) 33 goto out; 34 35 ret = -EFAULT; 36 if ((req_op(req) == REQ_OP_DRV_OUT) && copy_from_user(buf, ubuf, len)) 37 goto out_free_meta; 38 39 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); 40 if (IS_ERR(bip)) { 41 ret = PTR_ERR(bip); 42 goto out_free_meta; 43 } 44 45 bip->bip_iter.bi_size = len; 46 bip->bip_iter.bi_sector = seed; 47 ret = bio_integrity_add_page(bio, virt_to_page(buf), len, 48 offset_in_page(buf)); 49 if (ret != len) { 50 ret = -ENOMEM; 51 goto out_free_meta; 52 } 53 54 req->cmd_flags |= REQ_INTEGRITY; 55 return buf; 56 out_free_meta: 57 kfree(buf); 58 out: 59 return ERR_PTR(ret); 60 } 61 62 static int nvme_finish_user_metadata(struct request *req, void __user *ubuf, 63 void *meta, unsigned len, int ret) 64 { 65 if (!ret && req_op(req) == REQ_OP_DRV_IN && 66 copy_to_user(ubuf, meta, len)) 67 ret = -EFAULT; 68 kfree(meta); 69 return ret; 70 } 71 72 static struct request *nvme_alloc_user_request(struct request_queue *q, 73 struct nvme_command *cmd, blk_opf_t rq_flags, 74 blk_mq_req_flags_t blk_flags) 75 { 76 struct request *req; 77 78 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 79 if (IS_ERR(req)) 80 return req; 81 nvme_init_request(req, cmd); 82 nvme_req(req)->flags |= NVME_REQ_USERCMD; 83 return req; 84 } 85 86 static int nvme_map_user_request(struct request *req, u64 ubuffer, 87 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 88 u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd, 89 bool vec) 90 { 91 struct request_queue *q = req->q; 92 struct nvme_ns *ns = q->queuedata; 93 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 94 struct bio *bio = NULL; 95 void *meta = NULL; 96 int ret; 97 98 if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 99 struct iov_iter iter; 100 101 /* fixedbufs is only for non-vectored io */ 102 if (WARN_ON_ONCE(vec)) 103 return -EINVAL; 104 ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 105 rq_data_dir(req), &iter, ioucmd); 106 if (ret < 0) 107 goto out; 108 ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 109 } else { 110 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 111 bufflen, GFP_KERNEL, vec, 0, 0, 112 rq_data_dir(req)); 113 } 114 115 if (ret) 116 goto out; 117 bio = req->bio; 118 if (bdev) 119 bio_set_dev(bio, bdev); 120 121 if (bdev && meta_buffer && meta_len) { 122 meta = nvme_add_user_metadata(req, meta_buffer, meta_len, 123 meta_seed); 124 if (IS_ERR(meta)) { 125 ret = PTR_ERR(meta); 126 goto out_unmap; 127 } 128 *metap = meta; 129 } 130 131 return ret; 132 133 out_unmap: 134 if (bio) 135 blk_rq_unmap_user(bio); 136 out: 137 blk_mq_free_request(req); 138 return ret; 139 } 140 141 static int nvme_submit_user_cmd(struct request_queue *q, 142 struct nvme_command *cmd, u64 ubuffer, 143 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 144 u32 meta_seed, u64 *result, unsigned timeout, bool vec) 145 { 146 struct nvme_ctrl *ctrl; 147 struct request *req; 148 void *meta = NULL; 149 struct bio *bio; 150 u32 effects; 151 int ret; 152 153 req = nvme_alloc_user_request(q, cmd, 0, 0); 154 if (IS_ERR(req)) 155 return PTR_ERR(req); 156 157 req->timeout = timeout; 158 if (ubuffer && bufflen) { 159 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 160 meta_len, meta_seed, &meta, NULL, vec); 161 if (ret) 162 return ret; 163 } 164 165 bio = req->bio; 166 ctrl = nvme_req(req)->ctrl; 167 168 ret = nvme_execute_passthru_rq(req, &effects); 169 170 if (result) 171 *result = le64_to_cpu(nvme_req(req)->result.u64); 172 if (meta) 173 ret = nvme_finish_user_metadata(req, meta_buffer, meta, 174 meta_len, ret); 175 if (bio) 176 blk_rq_unmap_user(bio); 177 blk_mq_free_request(req); 178 179 if (effects) 180 nvme_passthru_end(ctrl, effects, cmd, ret); 181 182 return ret; 183 } 184 185 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 186 { 187 struct nvme_user_io io; 188 struct nvme_command c; 189 unsigned length, meta_len; 190 void __user *metadata; 191 192 if (copy_from_user(&io, uio, sizeof(io))) 193 return -EFAULT; 194 if (io.flags) 195 return -EINVAL; 196 197 switch (io.opcode) { 198 case nvme_cmd_write: 199 case nvme_cmd_read: 200 case nvme_cmd_compare: 201 break; 202 default: 203 return -EINVAL; 204 } 205 206 length = (io.nblocks + 1) << ns->lba_shift; 207 208 if ((io.control & NVME_RW_PRINFO_PRACT) && 209 ns->ms == sizeof(struct t10_pi_tuple)) { 210 /* 211 * Protection information is stripped/inserted by the 212 * controller. 213 */ 214 if (nvme_to_user_ptr(io.metadata)) 215 return -EINVAL; 216 meta_len = 0; 217 metadata = NULL; 218 } else { 219 meta_len = (io.nblocks + 1) * ns->ms; 220 metadata = nvme_to_user_ptr(io.metadata); 221 } 222 223 if (ns->features & NVME_NS_EXT_LBAS) { 224 length += meta_len; 225 meta_len = 0; 226 } else if (meta_len) { 227 if ((io.metadata & 3) || !io.metadata) 228 return -EINVAL; 229 } 230 231 memset(&c, 0, sizeof(c)); 232 c.rw.opcode = io.opcode; 233 c.rw.flags = io.flags; 234 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 235 c.rw.slba = cpu_to_le64(io.slba); 236 c.rw.length = cpu_to_le16(io.nblocks); 237 c.rw.control = cpu_to_le16(io.control); 238 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 239 c.rw.reftag = cpu_to_le32(io.reftag); 240 c.rw.apptag = cpu_to_le16(io.apptag); 241 c.rw.appmask = cpu_to_le16(io.appmask); 242 243 return nvme_submit_user_cmd(ns->queue, &c, 244 io.addr, length, 245 metadata, meta_len, lower_32_bits(io.slba), NULL, 0, 246 false); 247 } 248 249 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 250 struct nvme_ns *ns, __u32 nsid) 251 { 252 if (ns && nsid != ns->head->ns_id) { 253 dev_err(ctrl->device, 254 "%s: nsid (%u) in cmd does not match nsid (%u)" 255 "of namespace\n", 256 current->comm, nsid, ns->head->ns_id); 257 return false; 258 } 259 260 return true; 261 } 262 263 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 264 struct nvme_passthru_cmd __user *ucmd) 265 { 266 struct nvme_passthru_cmd cmd; 267 struct nvme_command c; 268 unsigned timeout = 0; 269 u64 result; 270 int status; 271 272 if (!capable(CAP_SYS_ADMIN)) 273 return -EACCES; 274 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 275 return -EFAULT; 276 if (cmd.flags) 277 return -EINVAL; 278 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 279 return -EINVAL; 280 281 memset(&c, 0, sizeof(c)); 282 c.common.opcode = cmd.opcode; 283 c.common.flags = cmd.flags; 284 c.common.nsid = cpu_to_le32(cmd.nsid); 285 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 286 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 287 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 288 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 289 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 290 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 291 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 292 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 293 294 if (cmd.timeout_ms) 295 timeout = msecs_to_jiffies(cmd.timeout_ms); 296 297 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 298 cmd.addr, cmd.data_len, 299 nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 300 0, &result, timeout, false); 301 302 if (status >= 0) { 303 if (put_user(result, &ucmd->result)) 304 return -EFAULT; 305 } 306 307 return status; 308 } 309 310 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 311 struct nvme_passthru_cmd64 __user *ucmd, bool vec) 312 { 313 struct nvme_passthru_cmd64 cmd; 314 struct nvme_command c; 315 unsigned timeout = 0; 316 int status; 317 318 if (!capable(CAP_SYS_ADMIN)) 319 return -EACCES; 320 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 321 return -EFAULT; 322 if (cmd.flags) 323 return -EINVAL; 324 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 325 return -EINVAL; 326 327 memset(&c, 0, sizeof(c)); 328 c.common.opcode = cmd.opcode; 329 c.common.flags = cmd.flags; 330 c.common.nsid = cpu_to_le32(cmd.nsid); 331 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 332 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 333 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 334 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 335 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 336 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 337 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 338 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 339 340 if (cmd.timeout_ms) 341 timeout = msecs_to_jiffies(cmd.timeout_ms); 342 343 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 344 cmd.addr, cmd.data_len, 345 nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 346 0, &cmd.result, timeout, vec); 347 348 if (status >= 0) { 349 if (put_user(cmd.result, &ucmd->result)) 350 return -EFAULT; 351 } 352 353 return status; 354 } 355 356 struct nvme_uring_data { 357 __u64 metadata; 358 __u64 addr; 359 __u32 data_len; 360 __u32 metadata_len; 361 __u32 timeout_ms; 362 }; 363 364 /* 365 * This overlays struct io_uring_cmd pdu. 366 * Expect build errors if this grows larger than that. 367 */ 368 struct nvme_uring_cmd_pdu { 369 union { 370 struct bio *bio; 371 struct request *req; 372 }; 373 u32 meta_len; 374 u32 nvme_status; 375 union { 376 struct { 377 void *meta; /* kernel-resident buffer */ 378 void __user *meta_buffer; 379 }; 380 u64 result; 381 } u; 382 }; 383 384 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 385 struct io_uring_cmd *ioucmd) 386 { 387 return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; 388 } 389 390 static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd) 391 { 392 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 393 struct request *req = pdu->req; 394 int status; 395 u64 result; 396 397 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) 398 status = -EINTR; 399 else 400 status = nvme_req(req)->status; 401 402 result = le64_to_cpu(nvme_req(req)->result.u64); 403 404 if (pdu->meta_len) 405 status = nvme_finish_user_metadata(req, pdu->u.meta_buffer, 406 pdu->u.meta, pdu->meta_len, status); 407 if (req->bio) 408 blk_rq_unmap_user(req->bio); 409 blk_mq_free_request(req); 410 411 io_uring_cmd_done(ioucmd, status, result); 412 } 413 414 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) 415 { 416 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 417 418 if (pdu->bio) 419 blk_rq_unmap_user(pdu->bio); 420 421 io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result); 422 } 423 424 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 425 blk_status_t err) 426 { 427 struct io_uring_cmd *ioucmd = req->end_io_data; 428 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 429 void *cookie = READ_ONCE(ioucmd->cookie); 430 431 req->bio = pdu->bio; 432 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) 433 pdu->nvme_status = -EINTR; 434 else 435 pdu->nvme_status = nvme_req(req)->status; 436 pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64); 437 438 /* 439 * For iopoll, complete it directly. 440 * Otherwise, move the completion to task work. 441 */ 442 if (cookie != NULL && blk_rq_is_poll(req)) 443 nvme_uring_task_cb(ioucmd); 444 else 445 io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); 446 447 return RQ_END_IO_FREE; 448 } 449 450 static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, 451 blk_status_t err) 452 { 453 struct io_uring_cmd *ioucmd = req->end_io_data; 454 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 455 void *cookie = READ_ONCE(ioucmd->cookie); 456 457 req->bio = pdu->bio; 458 pdu->req = req; 459 460 /* 461 * For iopoll, complete it directly. 462 * Otherwise, move the completion to task work. 463 */ 464 if (cookie != NULL && blk_rq_is_poll(req)) 465 nvme_uring_task_meta_cb(ioucmd); 466 else 467 io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb); 468 469 return RQ_END_IO_NONE; 470 } 471 472 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 473 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 474 { 475 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 476 const struct nvme_uring_cmd *cmd = ioucmd->cmd; 477 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 478 struct nvme_uring_data d; 479 struct nvme_command c; 480 struct request *req; 481 blk_opf_t rq_flags = 0; 482 blk_mq_req_flags_t blk_flags = 0; 483 void *meta = NULL; 484 int ret; 485 486 if (!capable(CAP_SYS_ADMIN)) 487 return -EACCES; 488 489 c.common.opcode = READ_ONCE(cmd->opcode); 490 c.common.flags = READ_ONCE(cmd->flags); 491 if (c.common.flags) 492 return -EINVAL; 493 494 c.common.command_id = 0; 495 c.common.nsid = cpu_to_le32(cmd->nsid); 496 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 497 return -EINVAL; 498 499 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 500 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 501 c.common.metadata = 0; 502 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 503 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 504 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 505 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 506 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 507 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 508 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 509 510 d.metadata = READ_ONCE(cmd->metadata); 511 d.addr = READ_ONCE(cmd->addr); 512 d.data_len = READ_ONCE(cmd->data_len); 513 d.metadata_len = READ_ONCE(cmd->metadata_len); 514 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 515 516 if (issue_flags & IO_URING_F_NONBLOCK) { 517 rq_flags = REQ_NOWAIT; 518 blk_flags = BLK_MQ_REQ_NOWAIT; 519 } 520 if (issue_flags & IO_URING_F_IOPOLL) 521 rq_flags |= REQ_POLLED; 522 523 retry: 524 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 525 if (IS_ERR(req)) 526 return PTR_ERR(req); 527 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 528 529 if (d.addr && d.data_len) { 530 ret = nvme_map_user_request(req, d.addr, 531 d.data_len, nvme_to_user_ptr(d.metadata), 532 d.metadata_len, 0, &meta, ioucmd, vec); 533 if (ret) 534 return ret; 535 } 536 537 if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) { 538 if (unlikely(!req->bio)) { 539 /* we can't poll this, so alloc regular req instead */ 540 blk_mq_free_request(req); 541 rq_flags &= ~REQ_POLLED; 542 goto retry; 543 } else { 544 WRITE_ONCE(ioucmd->cookie, req->bio); 545 req->bio->bi_opf |= REQ_POLLED; 546 } 547 } 548 /* to free bio on completion, as req->bio will be null at that time */ 549 pdu->bio = req->bio; 550 pdu->meta_len = d.metadata_len; 551 req->end_io_data = ioucmd; 552 if (pdu->meta_len) { 553 pdu->u.meta = meta; 554 pdu->u.meta_buffer = nvme_to_user_ptr(d.metadata); 555 req->end_io = nvme_uring_cmd_end_io_meta; 556 } else { 557 req->end_io = nvme_uring_cmd_end_io; 558 } 559 blk_execute_rq_nowait(req, false); 560 return -EIOCBQUEUED; 561 } 562 563 static bool is_ctrl_ioctl(unsigned int cmd) 564 { 565 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 566 return true; 567 if (is_sed_ioctl(cmd)) 568 return true; 569 return false; 570 } 571 572 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 573 void __user *argp) 574 { 575 switch (cmd) { 576 case NVME_IOCTL_ADMIN_CMD: 577 return nvme_user_cmd(ctrl, NULL, argp); 578 case NVME_IOCTL_ADMIN64_CMD: 579 return nvme_user_cmd64(ctrl, NULL, argp, false); 580 default: 581 return sed_ioctl(ctrl->opal_dev, cmd, argp); 582 } 583 } 584 585 #ifdef COMPAT_FOR_U64_ALIGNMENT 586 struct nvme_user_io32 { 587 __u8 opcode; 588 __u8 flags; 589 __u16 control; 590 __u16 nblocks; 591 __u16 rsvd; 592 __u64 metadata; 593 __u64 addr; 594 __u64 slba; 595 __u32 dsmgmt; 596 __u32 reftag; 597 __u16 apptag; 598 __u16 appmask; 599 } __attribute__((__packed__)); 600 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 601 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 602 603 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 604 void __user *argp) 605 { 606 switch (cmd) { 607 case NVME_IOCTL_ID: 608 force_successful_syscall_return(); 609 return ns->head->ns_id; 610 case NVME_IOCTL_IO_CMD: 611 return nvme_user_cmd(ns->ctrl, ns, argp); 612 /* 613 * struct nvme_user_io can have different padding on some 32-bit ABIs. 614 * Just accept the compat version as all fields that are used are the 615 * same size and at the same offset. 616 */ 617 #ifdef COMPAT_FOR_U64_ALIGNMENT 618 case NVME_IOCTL_SUBMIT_IO32: 619 #endif 620 case NVME_IOCTL_SUBMIT_IO: 621 return nvme_submit_io(ns, argp); 622 case NVME_IOCTL_IO64_CMD: 623 return nvme_user_cmd64(ns->ctrl, ns, argp, false); 624 case NVME_IOCTL_IO64_CMD_VEC: 625 return nvme_user_cmd64(ns->ctrl, ns, argp, true); 626 default: 627 return -ENOTTY; 628 } 629 } 630 631 static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg) 632 { 633 if (is_ctrl_ioctl(cmd)) 634 return nvme_ctrl_ioctl(ns->ctrl, cmd, arg); 635 return nvme_ns_ioctl(ns, cmd, arg); 636 } 637 638 int nvme_ioctl(struct block_device *bdev, fmode_t mode, 639 unsigned int cmd, unsigned long arg) 640 { 641 struct nvme_ns *ns = bdev->bd_disk->private_data; 642 643 return __nvme_ioctl(ns, cmd, (void __user *)arg); 644 } 645 646 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 647 { 648 struct nvme_ns *ns = 649 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 650 651 return __nvme_ioctl(ns, cmd, (void __user *)arg); 652 } 653 654 static int nvme_uring_cmd_checks(unsigned int issue_flags) 655 { 656 657 /* NVMe passthrough requires big SQE/CQE support */ 658 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 659 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 660 return -EOPNOTSUPP; 661 return 0; 662 } 663 664 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 665 unsigned int issue_flags) 666 { 667 struct nvme_ctrl *ctrl = ns->ctrl; 668 int ret; 669 670 BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu)); 671 672 ret = nvme_uring_cmd_checks(issue_flags); 673 if (ret) 674 return ret; 675 676 switch (ioucmd->cmd_op) { 677 case NVME_URING_CMD_IO: 678 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 679 break; 680 case NVME_URING_CMD_IO_VEC: 681 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 682 break; 683 default: 684 ret = -ENOTTY; 685 } 686 687 return ret; 688 } 689 690 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 691 { 692 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 693 struct nvme_ns, cdev); 694 695 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 696 } 697 698 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 699 struct io_comp_batch *iob, 700 unsigned int poll_flags) 701 { 702 struct bio *bio; 703 int ret = 0; 704 struct nvme_ns *ns; 705 struct request_queue *q; 706 707 rcu_read_lock(); 708 bio = READ_ONCE(ioucmd->cookie); 709 ns = container_of(file_inode(ioucmd->file)->i_cdev, 710 struct nvme_ns, cdev); 711 q = ns->queue; 712 if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio && bio->bi_bdev) 713 ret = bio_poll(bio, iob, poll_flags); 714 rcu_read_unlock(); 715 return ret; 716 } 717 #ifdef CONFIG_NVME_MULTIPATH 718 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 719 void __user *argp, struct nvme_ns_head *head, int srcu_idx) 720 __releases(&head->srcu) 721 { 722 struct nvme_ctrl *ctrl = ns->ctrl; 723 int ret; 724 725 nvme_get_ctrl(ns->ctrl); 726 srcu_read_unlock(&head->srcu, srcu_idx); 727 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp); 728 729 nvme_put_ctrl(ctrl); 730 return ret; 731 } 732 733 int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, 734 unsigned int cmd, unsigned long arg) 735 { 736 struct nvme_ns_head *head = bdev->bd_disk->private_data; 737 void __user *argp = (void __user *)arg; 738 struct nvme_ns *ns; 739 int srcu_idx, ret = -EWOULDBLOCK; 740 741 srcu_idx = srcu_read_lock(&head->srcu); 742 ns = nvme_find_path(head); 743 if (!ns) 744 goto out_unlock; 745 746 /* 747 * Handle ioctls that apply to the controller instead of the namespace 748 * seperately and drop the ns SRCU reference early. This avoids a 749 * deadlock when deleting namespaces using the passthrough interface. 750 */ 751 if (is_ctrl_ioctl(cmd)) 752 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); 753 754 ret = nvme_ns_ioctl(ns, cmd, argp); 755 out_unlock: 756 srcu_read_unlock(&head->srcu, srcu_idx); 757 return ret; 758 } 759 760 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 761 unsigned long arg) 762 { 763 struct cdev *cdev = file_inode(file)->i_cdev; 764 struct nvme_ns_head *head = 765 container_of(cdev, struct nvme_ns_head, cdev); 766 void __user *argp = (void __user *)arg; 767 struct nvme_ns *ns; 768 int srcu_idx, ret = -EWOULDBLOCK; 769 770 srcu_idx = srcu_read_lock(&head->srcu); 771 ns = nvme_find_path(head); 772 if (!ns) 773 goto out_unlock; 774 775 if (is_ctrl_ioctl(cmd)) 776 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); 777 778 ret = nvme_ns_ioctl(ns, cmd, argp); 779 out_unlock: 780 srcu_read_unlock(&head->srcu, srcu_idx); 781 return ret; 782 } 783 784 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 785 unsigned int issue_flags) 786 { 787 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 788 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 789 int srcu_idx = srcu_read_lock(&head->srcu); 790 struct nvme_ns *ns = nvme_find_path(head); 791 int ret = -EINVAL; 792 793 if (ns) 794 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 795 srcu_read_unlock(&head->srcu, srcu_idx); 796 return ret; 797 } 798 799 int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 800 struct io_comp_batch *iob, 801 unsigned int poll_flags) 802 { 803 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 804 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 805 int srcu_idx = srcu_read_lock(&head->srcu); 806 struct nvme_ns *ns = nvme_find_path(head); 807 struct bio *bio; 808 int ret = 0; 809 struct request_queue *q; 810 811 if (ns) { 812 rcu_read_lock(); 813 bio = READ_ONCE(ioucmd->cookie); 814 q = ns->queue; 815 if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio 816 && bio->bi_bdev) 817 ret = bio_poll(bio, iob, poll_flags); 818 rcu_read_unlock(); 819 } 820 srcu_read_unlock(&head->srcu, srcu_idx); 821 return ret; 822 } 823 #endif /* CONFIG_NVME_MULTIPATH */ 824 825 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 826 { 827 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 828 int ret; 829 830 /* IOPOLL not supported yet */ 831 if (issue_flags & IO_URING_F_IOPOLL) 832 return -EOPNOTSUPP; 833 834 ret = nvme_uring_cmd_checks(issue_flags); 835 if (ret) 836 return ret; 837 838 switch (ioucmd->cmd_op) { 839 case NVME_URING_CMD_ADMIN: 840 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 841 break; 842 case NVME_URING_CMD_ADMIN_VEC: 843 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 844 break; 845 default: 846 ret = -ENOTTY; 847 } 848 849 return ret; 850 } 851 852 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) 853 { 854 struct nvme_ns *ns; 855 int ret; 856 857 down_read(&ctrl->namespaces_rwsem); 858 if (list_empty(&ctrl->namespaces)) { 859 ret = -ENOTTY; 860 goto out_unlock; 861 } 862 863 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); 864 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 865 dev_warn(ctrl->device, 866 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 867 ret = -EINVAL; 868 goto out_unlock; 869 } 870 871 dev_warn(ctrl->device, 872 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 873 kref_get(&ns->kref); 874 up_read(&ctrl->namespaces_rwsem); 875 876 ret = nvme_user_cmd(ctrl, ns, argp); 877 nvme_put_ns(ns); 878 return ret; 879 880 out_unlock: 881 up_read(&ctrl->namespaces_rwsem); 882 return ret; 883 } 884 885 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 886 unsigned long arg) 887 { 888 struct nvme_ctrl *ctrl = file->private_data; 889 void __user *argp = (void __user *)arg; 890 891 switch (cmd) { 892 case NVME_IOCTL_ADMIN_CMD: 893 return nvme_user_cmd(ctrl, NULL, argp); 894 case NVME_IOCTL_ADMIN64_CMD: 895 return nvme_user_cmd64(ctrl, NULL, argp, false); 896 case NVME_IOCTL_IO_CMD: 897 return nvme_dev_user_cmd(ctrl, argp); 898 case NVME_IOCTL_RESET: 899 if (!capable(CAP_SYS_ADMIN)) 900 return -EACCES; 901 dev_warn(ctrl->device, "resetting controller\n"); 902 return nvme_reset_ctrl_sync(ctrl); 903 case NVME_IOCTL_SUBSYS_RESET: 904 if (!capable(CAP_SYS_ADMIN)) 905 return -EACCES; 906 return nvme_reset_subsystem(ctrl); 907 case NVME_IOCTL_RESCAN: 908 if (!capable(CAP_SYS_ADMIN)) 909 return -EACCES; 910 nvme_queue_scan(ctrl); 911 return 0; 912 default: 913 return -ENOTTY; 914 } 915 } 916