1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/blk-integrity.h> 7 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 8 #include <linux/nvme_ioctl.h> 9 #include <linux/io_uring/cmd.h> 10 #include "nvme.h" 11 12 enum { 13 NVME_IOCTL_VEC = (1 << 0), 14 NVME_IOCTL_PARTITION = (1 << 1), 15 }; 16 17 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 18 unsigned int flags, bool open_for_write) 19 { 20 u32 effects; 21 22 /* 23 * Do not allow unprivileged passthrough on partitions, as that allows an 24 * escape from the containment of the partition. 25 */ 26 if (flags & NVME_IOCTL_PARTITION) 27 goto admin; 28 29 /* 30 * Do not allow unprivileged processes to send vendor specific or fabrics 31 * commands as we can't be sure about their effects. 32 */ 33 if (c->common.opcode >= nvme_cmd_vendor_start || 34 c->common.opcode == nvme_fabrics_command) 35 goto admin; 36 37 /* 38 * Do not allow unprivileged passthrough of admin commands except 39 * for a subset of identify commands that contain information required 40 * to form proper I/O commands in userspace and do not expose any 41 * potentially sensitive information. 42 */ 43 if (!ns) { 44 if (c->common.opcode == nvme_admin_identify) { 45 switch (c->identify.cns) { 46 case NVME_ID_CNS_NS: 47 case NVME_ID_CNS_CS_NS: 48 case NVME_ID_CNS_NS_CS_INDEP: 49 case NVME_ID_CNS_CS_CTRL: 50 case NVME_ID_CNS_CTRL: 51 return true; 52 } 53 } 54 goto admin; 55 } 56 57 /* 58 * Check if the controller provides a Commands Supported and Effects log 59 * and marks this command as supported. If not reject unprivileged 60 * passthrough. 61 */ 62 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 63 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 64 goto admin; 65 66 /* 67 * Don't allow passthrough for command that have intrusive (or unknown) 68 * effects. 69 */ 70 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 71 NVME_CMD_EFFECTS_UUID_SEL | 72 NVME_CMD_EFFECTS_SCOPE_MASK)) 73 goto admin; 74 75 /* 76 * Only allow I/O commands that transfer data to the controller or that 77 * change the logical block contents if the file descriptor is open for 78 * writing. 79 */ 80 if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && 81 !open_for_write) 82 goto admin; 83 84 return true; 85 admin: 86 return capable(CAP_SYS_ADMIN); 87 } 88 89 /* 90 * Convert integer values from ioctl structures to user pointers, silently 91 * ignoring the upper bits in the compat case to match behaviour of 32-bit 92 * kernels. 93 */ 94 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 95 { 96 if (in_compat_syscall()) 97 ptrval = (compat_uptr_t)ptrval; 98 return (void __user *)ptrval; 99 } 100 101 static struct request *nvme_alloc_user_request(struct request_queue *q, 102 struct nvme_command *cmd, blk_opf_t rq_flags, 103 blk_mq_req_flags_t blk_flags) 104 { 105 struct request *req; 106 107 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 108 if (IS_ERR(req)) 109 return req; 110 nvme_init_request(req, cmd); 111 nvme_req(req)->flags |= NVME_REQ_USERCMD; 112 return req; 113 } 114 115 static int nvme_map_user_request(struct request *req, u64 ubuffer, 116 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 117 struct io_uring_cmd *ioucmd, unsigned int flags) 118 { 119 struct request_queue *q = req->q; 120 struct nvme_ns *ns = q->queuedata; 121 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 122 bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); 123 struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; 124 bool has_metadata = meta_buffer && meta_len; 125 struct bio *bio = NULL; 126 int ret; 127 128 if (!nvme_ctrl_sgl_supported(ctrl)) 129 dev_warn_once(ctrl->device, "using unchecked data buffer\n"); 130 if (has_metadata) { 131 if (!supports_metadata) { 132 ret = -EINVAL; 133 goto out; 134 } 135 if (!nvme_ctrl_meta_sgl_supported(ctrl)) 136 dev_warn_once(ctrl->device, 137 "using unchecked metadata buffer\n"); 138 } 139 140 if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 141 struct iov_iter iter; 142 143 /* fixedbufs is only for non-vectored io */ 144 if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) { 145 ret = -EINVAL; 146 goto out; 147 } 148 ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 149 rq_data_dir(req), &iter, ioucmd); 150 if (ret < 0) 151 goto out; 152 ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 153 } else { 154 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 155 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 156 0, rq_data_dir(req)); 157 } 158 159 if (ret) 160 goto out; 161 162 bio = req->bio; 163 if (bdev) 164 bio_set_dev(bio, bdev); 165 166 if (has_metadata) { 167 ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len); 168 if (ret) 169 goto out_unmap; 170 } 171 172 return ret; 173 174 out_unmap: 175 if (bio) 176 blk_rq_unmap_user(bio); 177 out: 178 blk_mq_free_request(req); 179 return ret; 180 } 181 182 static int nvme_submit_user_cmd(struct request_queue *q, 183 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 184 void __user *meta_buffer, unsigned meta_len, 185 u64 *result, unsigned timeout, unsigned int flags) 186 { 187 struct nvme_ns *ns = q->queuedata; 188 struct nvme_ctrl *ctrl; 189 struct request *req; 190 struct bio *bio; 191 u32 effects; 192 int ret; 193 194 req = nvme_alloc_user_request(q, cmd, 0, 0); 195 if (IS_ERR(req)) 196 return PTR_ERR(req); 197 198 req->timeout = timeout; 199 if (ubuffer && bufflen) { 200 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 201 meta_len, NULL, flags); 202 if (ret) 203 return ret; 204 } 205 206 bio = req->bio; 207 ctrl = nvme_req(req)->ctrl; 208 209 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 210 ret = nvme_execute_rq(req, false); 211 if (result) 212 *result = le64_to_cpu(nvme_req(req)->result.u64); 213 if (bio) 214 blk_rq_unmap_user(bio); 215 blk_mq_free_request(req); 216 217 if (effects) 218 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 219 220 return ret; 221 } 222 223 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 224 { 225 struct nvme_user_io io; 226 struct nvme_command c; 227 unsigned length, meta_len; 228 void __user *metadata; 229 230 if (copy_from_user(&io, uio, sizeof(io))) 231 return -EFAULT; 232 if (io.flags) 233 return -EINVAL; 234 235 switch (io.opcode) { 236 case nvme_cmd_write: 237 case nvme_cmd_read: 238 case nvme_cmd_compare: 239 break; 240 default: 241 return -EINVAL; 242 } 243 244 length = (io.nblocks + 1) << ns->head->lba_shift; 245 246 if ((io.control & NVME_RW_PRINFO_PRACT) && 247 (ns->head->ms == ns->head->pi_size)) { 248 /* 249 * Protection information is stripped/inserted by the 250 * controller. 251 */ 252 if (nvme_to_user_ptr(io.metadata)) 253 return -EINVAL; 254 meta_len = 0; 255 metadata = NULL; 256 } else { 257 meta_len = (io.nblocks + 1) * ns->head->ms; 258 metadata = nvme_to_user_ptr(io.metadata); 259 } 260 261 if (ns->head->features & NVME_NS_EXT_LBAS) { 262 length += meta_len; 263 meta_len = 0; 264 } else if (meta_len) { 265 if ((io.metadata & 3) || !io.metadata) 266 return -EINVAL; 267 } 268 269 memset(&c, 0, sizeof(c)); 270 c.rw.opcode = io.opcode; 271 c.rw.flags = io.flags; 272 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 273 c.rw.slba = cpu_to_le64(io.slba); 274 c.rw.length = cpu_to_le16(io.nblocks); 275 c.rw.control = cpu_to_le16(io.control); 276 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 277 c.rw.reftag = cpu_to_le32(io.reftag); 278 c.rw.lbat = cpu_to_le16(io.apptag); 279 c.rw.lbatm = cpu_to_le16(io.appmask); 280 281 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 282 meta_len, NULL, 0, 0); 283 } 284 285 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 286 struct nvme_ns *ns, __u32 nsid) 287 { 288 if (ns && nsid != ns->head->ns_id) { 289 dev_err(ctrl->device, 290 "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", 291 current->comm, nsid, ns->head->ns_id); 292 return false; 293 } 294 295 return true; 296 } 297 298 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 299 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 300 bool open_for_write) 301 { 302 struct nvme_passthru_cmd cmd; 303 struct nvme_command c; 304 unsigned timeout = 0; 305 u64 result; 306 int status; 307 308 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 309 return -EFAULT; 310 if (cmd.flags) 311 return -EINVAL; 312 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 313 return -EINVAL; 314 315 memset(&c, 0, sizeof(c)); 316 c.common.opcode = cmd.opcode; 317 c.common.flags = cmd.flags; 318 c.common.nsid = cpu_to_le32(cmd.nsid); 319 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 320 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 321 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 322 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 323 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 324 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 325 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 326 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 327 328 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 329 return -EACCES; 330 331 if (cmd.timeout_ms) 332 timeout = msecs_to_jiffies(cmd.timeout_ms); 333 334 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 335 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 336 cmd.metadata_len, &result, timeout, 0); 337 338 if (status >= 0) { 339 if (put_user(result, &ucmd->result)) 340 return -EFAULT; 341 } 342 343 return status; 344 } 345 346 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 347 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 348 bool open_for_write) 349 { 350 struct nvme_passthru_cmd64 cmd; 351 struct nvme_command c; 352 unsigned timeout = 0; 353 int status; 354 355 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 356 return -EFAULT; 357 if (cmd.flags) 358 return -EINVAL; 359 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 360 return -EINVAL; 361 362 memset(&c, 0, sizeof(c)); 363 c.common.opcode = cmd.opcode; 364 c.common.flags = cmd.flags; 365 c.common.nsid = cpu_to_le32(cmd.nsid); 366 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 367 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 368 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 369 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 370 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 371 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 372 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 373 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 374 375 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 376 return -EACCES; 377 378 if (cmd.timeout_ms) 379 timeout = msecs_to_jiffies(cmd.timeout_ms); 380 381 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 382 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 383 cmd.metadata_len, &cmd.result, timeout, flags); 384 385 if (status >= 0) { 386 if (put_user(cmd.result, &ucmd->result)) 387 return -EFAULT; 388 } 389 390 return status; 391 } 392 393 struct nvme_uring_data { 394 __u64 metadata; 395 __u64 addr; 396 __u32 data_len; 397 __u32 metadata_len; 398 __u32 timeout_ms; 399 }; 400 401 /* 402 * This overlays struct io_uring_cmd pdu. 403 * Expect build errors if this grows larger than that. 404 */ 405 struct nvme_uring_cmd_pdu { 406 struct request *req; 407 struct bio *bio; 408 u64 result; 409 int status; 410 }; 411 412 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 413 struct io_uring_cmd *ioucmd) 414 { 415 return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu); 416 } 417 418 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, 419 unsigned issue_flags) 420 { 421 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 422 423 if (pdu->bio) 424 blk_rq_unmap_user(pdu->bio); 425 io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); 426 } 427 428 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 429 blk_status_t err) 430 { 431 struct io_uring_cmd *ioucmd = req->end_io_data; 432 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 433 434 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { 435 pdu->status = -EINTR; 436 } else { 437 pdu->status = nvme_req(req)->status; 438 if (!pdu->status) 439 pdu->status = blk_status_to_errno(err); 440 } 441 pdu->result = le64_to_cpu(nvme_req(req)->result.u64); 442 443 /* 444 * For iopoll, complete it directly. Note that using the uring_cmd 445 * helper for this is safe only because we check blk_rq_is_poll(). 446 * As that returns false if we're NOT on a polled queue, then it's 447 * safe to use the polled completion helper. 448 * 449 * Otherwise, move the completion to task work. 450 */ 451 if (blk_rq_is_poll(req)) { 452 if (pdu->bio) 453 blk_rq_unmap_user(pdu->bio); 454 io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); 455 } else { 456 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 457 } 458 459 return RQ_END_IO_FREE; 460 } 461 462 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 463 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 464 { 465 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 466 const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); 467 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 468 struct nvme_uring_data d; 469 struct nvme_command c; 470 struct request *req; 471 blk_opf_t rq_flags = REQ_ALLOC_CACHE; 472 blk_mq_req_flags_t blk_flags = 0; 473 int ret; 474 475 c.common.opcode = READ_ONCE(cmd->opcode); 476 c.common.flags = READ_ONCE(cmd->flags); 477 if (c.common.flags) 478 return -EINVAL; 479 480 c.common.command_id = 0; 481 c.common.nsid = cpu_to_le32(cmd->nsid); 482 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 483 return -EINVAL; 484 485 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 486 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 487 c.common.metadata = 0; 488 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 489 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 490 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 491 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 492 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 493 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 494 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 495 496 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 497 return -EACCES; 498 499 d.metadata = READ_ONCE(cmd->metadata); 500 d.addr = READ_ONCE(cmd->addr); 501 d.data_len = READ_ONCE(cmd->data_len); 502 d.metadata_len = READ_ONCE(cmd->metadata_len); 503 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 504 505 if (issue_flags & IO_URING_F_NONBLOCK) { 506 rq_flags |= REQ_NOWAIT; 507 blk_flags = BLK_MQ_REQ_NOWAIT; 508 } 509 if (issue_flags & IO_URING_F_IOPOLL) 510 rq_flags |= REQ_POLLED; 511 512 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 513 if (IS_ERR(req)) 514 return PTR_ERR(req); 515 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 516 517 if (d.addr && d.data_len) { 518 ret = nvme_map_user_request(req, d.addr, 519 d.data_len, nvme_to_user_ptr(d.metadata), 520 d.metadata_len, ioucmd, vec); 521 if (ret) 522 return ret; 523 } 524 525 /* to free bio on completion, as req->bio will be null at that time */ 526 pdu->bio = req->bio; 527 pdu->req = req; 528 req->end_io_data = ioucmd; 529 req->end_io = nvme_uring_cmd_end_io; 530 blk_execute_rq_nowait(req, false); 531 return -EIOCBQUEUED; 532 } 533 534 static bool is_ctrl_ioctl(unsigned int cmd) 535 { 536 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 537 return true; 538 if (is_sed_ioctl(cmd)) 539 return true; 540 return false; 541 } 542 543 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 544 void __user *argp, bool open_for_write) 545 { 546 switch (cmd) { 547 case NVME_IOCTL_ADMIN_CMD: 548 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 549 case NVME_IOCTL_ADMIN64_CMD: 550 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 551 default: 552 return sed_ioctl(ctrl->opal_dev, cmd, argp); 553 } 554 } 555 556 #ifdef COMPAT_FOR_U64_ALIGNMENT 557 struct nvme_user_io32 { 558 __u8 opcode; 559 __u8 flags; 560 __u16 control; 561 __u16 nblocks; 562 __u16 rsvd; 563 __u64 metadata; 564 __u64 addr; 565 __u64 slba; 566 __u32 dsmgmt; 567 __u32 reftag; 568 __u16 apptag; 569 __u16 appmask; 570 } __attribute__((__packed__)); 571 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 572 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 573 574 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 575 void __user *argp, unsigned int flags, bool open_for_write) 576 { 577 switch (cmd) { 578 case NVME_IOCTL_ID: 579 force_successful_syscall_return(); 580 return ns->head->ns_id; 581 case NVME_IOCTL_IO_CMD: 582 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 583 /* 584 * struct nvme_user_io can have different padding on some 32-bit ABIs. 585 * Just accept the compat version as all fields that are used are the 586 * same size and at the same offset. 587 */ 588 #ifdef COMPAT_FOR_U64_ALIGNMENT 589 case NVME_IOCTL_SUBMIT_IO32: 590 #endif 591 case NVME_IOCTL_SUBMIT_IO: 592 return nvme_submit_io(ns, argp); 593 case NVME_IOCTL_IO64_CMD_VEC: 594 flags |= NVME_IOCTL_VEC; 595 fallthrough; 596 case NVME_IOCTL_IO64_CMD: 597 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 598 open_for_write); 599 default: 600 return -ENOTTY; 601 } 602 } 603 604 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 605 unsigned int cmd, unsigned long arg) 606 { 607 struct nvme_ns *ns = bdev->bd_disk->private_data; 608 bool open_for_write = mode & BLK_OPEN_WRITE; 609 void __user *argp = (void __user *)arg; 610 unsigned int flags = 0; 611 612 if (bdev_is_partition(bdev)) 613 flags |= NVME_IOCTL_PARTITION; 614 615 if (is_ctrl_ioctl(cmd)) 616 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 617 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 618 } 619 620 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 621 { 622 struct nvme_ns *ns = 623 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 624 bool open_for_write = file->f_mode & FMODE_WRITE; 625 void __user *argp = (void __user *)arg; 626 627 if (is_ctrl_ioctl(cmd)) 628 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 629 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 630 } 631 632 static int nvme_uring_cmd_checks(unsigned int issue_flags) 633 { 634 635 /* NVMe passthrough requires big SQE/CQE support */ 636 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 637 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 638 return -EOPNOTSUPP; 639 return 0; 640 } 641 642 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 643 unsigned int issue_flags) 644 { 645 struct nvme_ctrl *ctrl = ns->ctrl; 646 int ret; 647 648 ret = nvme_uring_cmd_checks(issue_flags); 649 if (ret) 650 return ret; 651 652 switch (ioucmd->cmd_op) { 653 case NVME_URING_CMD_IO: 654 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 655 break; 656 case NVME_URING_CMD_IO_VEC: 657 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 658 break; 659 default: 660 ret = -ENOTTY; 661 } 662 663 return ret; 664 } 665 666 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 667 { 668 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 669 struct nvme_ns, cdev); 670 671 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 672 } 673 674 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 675 struct io_comp_batch *iob, 676 unsigned int poll_flags) 677 { 678 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 679 struct request *req = pdu->req; 680 681 if (req && blk_rq_is_poll(req)) 682 return blk_rq_poll(req, iob, poll_flags); 683 return 0; 684 } 685 #ifdef CONFIG_NVME_MULTIPATH 686 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 687 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 688 bool open_for_write) 689 __releases(&head->srcu) 690 { 691 struct nvme_ctrl *ctrl = ns->ctrl; 692 int ret; 693 694 nvme_get_ctrl(ns->ctrl); 695 srcu_read_unlock(&head->srcu, srcu_idx); 696 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 697 698 nvme_put_ctrl(ctrl); 699 return ret; 700 } 701 702 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 703 unsigned int cmd, unsigned long arg) 704 { 705 struct nvme_ns_head *head = bdev->bd_disk->private_data; 706 bool open_for_write = mode & BLK_OPEN_WRITE; 707 void __user *argp = (void __user *)arg; 708 struct nvme_ns *ns; 709 int srcu_idx, ret = -EWOULDBLOCK; 710 unsigned int flags = 0; 711 712 if (bdev_is_partition(bdev)) 713 flags |= NVME_IOCTL_PARTITION; 714 715 srcu_idx = srcu_read_lock(&head->srcu); 716 ns = nvme_find_path(head); 717 if (!ns) 718 goto out_unlock; 719 720 /* 721 * Handle ioctls that apply to the controller instead of the namespace 722 * seperately and drop the ns SRCU reference early. This avoids a 723 * deadlock when deleting namespaces using the passthrough interface. 724 */ 725 if (is_ctrl_ioctl(cmd)) 726 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 727 open_for_write); 728 729 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 730 out_unlock: 731 srcu_read_unlock(&head->srcu, srcu_idx); 732 return ret; 733 } 734 735 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 736 unsigned long arg) 737 { 738 bool open_for_write = file->f_mode & FMODE_WRITE; 739 struct cdev *cdev = file_inode(file)->i_cdev; 740 struct nvme_ns_head *head = 741 container_of(cdev, struct nvme_ns_head, cdev); 742 void __user *argp = (void __user *)arg; 743 struct nvme_ns *ns; 744 int srcu_idx, ret = -EWOULDBLOCK; 745 746 srcu_idx = srcu_read_lock(&head->srcu); 747 ns = nvme_find_path(head); 748 if (!ns) 749 goto out_unlock; 750 751 if (is_ctrl_ioctl(cmd)) 752 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 753 open_for_write); 754 755 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 756 out_unlock: 757 srcu_read_unlock(&head->srcu, srcu_idx); 758 return ret; 759 } 760 761 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 762 unsigned int issue_flags) 763 { 764 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 765 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 766 int srcu_idx = srcu_read_lock(&head->srcu); 767 struct nvme_ns *ns = nvme_find_path(head); 768 int ret = -EINVAL; 769 770 if (ns) 771 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 772 srcu_read_unlock(&head->srcu, srcu_idx); 773 return ret; 774 } 775 #endif /* CONFIG_NVME_MULTIPATH */ 776 777 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 778 { 779 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 780 int ret; 781 782 /* IOPOLL not supported yet */ 783 if (issue_flags & IO_URING_F_IOPOLL) 784 return -EOPNOTSUPP; 785 786 ret = nvme_uring_cmd_checks(issue_flags); 787 if (ret) 788 return ret; 789 790 switch (ioucmd->cmd_op) { 791 case NVME_URING_CMD_ADMIN: 792 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 793 break; 794 case NVME_URING_CMD_ADMIN_VEC: 795 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 796 break; 797 default: 798 ret = -ENOTTY; 799 } 800 801 return ret; 802 } 803 804 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 805 bool open_for_write) 806 { 807 struct nvme_ns *ns; 808 int ret, srcu_idx; 809 810 srcu_idx = srcu_read_lock(&ctrl->srcu); 811 if (list_empty(&ctrl->namespaces)) { 812 ret = -ENOTTY; 813 goto out_unlock; 814 } 815 816 ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); 817 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 818 dev_warn(ctrl->device, 819 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 820 ret = -EINVAL; 821 goto out_unlock; 822 } 823 824 dev_warn(ctrl->device, 825 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 826 if (!nvme_get_ns(ns)) { 827 ret = -ENXIO; 828 goto out_unlock; 829 } 830 srcu_read_unlock(&ctrl->srcu, srcu_idx); 831 832 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 833 nvme_put_ns(ns); 834 return ret; 835 836 out_unlock: 837 srcu_read_unlock(&ctrl->srcu, srcu_idx); 838 return ret; 839 } 840 841 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 842 unsigned long arg) 843 { 844 bool open_for_write = file->f_mode & FMODE_WRITE; 845 struct nvme_ctrl *ctrl = file->private_data; 846 void __user *argp = (void __user *)arg; 847 848 switch (cmd) { 849 case NVME_IOCTL_ADMIN_CMD: 850 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 851 case NVME_IOCTL_ADMIN64_CMD: 852 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 853 case NVME_IOCTL_IO_CMD: 854 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 855 case NVME_IOCTL_RESET: 856 if (!capable(CAP_SYS_ADMIN)) 857 return -EACCES; 858 dev_warn(ctrl->device, "resetting controller\n"); 859 return nvme_reset_ctrl_sync(ctrl); 860 case NVME_IOCTL_SUBSYS_RESET: 861 if (!capable(CAP_SYS_ADMIN)) 862 return -EACCES; 863 return nvme_reset_subsystem(ctrl); 864 case NVME_IOCTL_RESCAN: 865 if (!capable(CAP_SYS_ADMIN)) 866 return -EACCES; 867 nvme_queue_scan(ctrl); 868 return 0; 869 default: 870 return -ENOTTY; 871 } 872 } 873