1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/blk-integrity.h> 7 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 8 #include <linux/nvme_ioctl.h> 9 #include <linux/io_uring/cmd.h> 10 #include "nvme.h" 11 12 enum { 13 NVME_IOCTL_VEC = (1 << 0), 14 NVME_IOCTL_PARTITION = (1 << 1), 15 }; 16 17 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 18 unsigned int flags, bool open_for_write) 19 { 20 u32 effects; 21 22 /* 23 * Do not allow unprivileged passthrough on partitions, as that allows an 24 * escape from the containment of the partition. 25 */ 26 if (flags & NVME_IOCTL_PARTITION) 27 goto admin; 28 29 /* 30 * Do not allow unprivileged processes to send vendor specific or fabrics 31 * commands as we can't be sure about their effects. 32 */ 33 if (c->common.opcode >= nvme_cmd_vendor_start || 34 c->common.opcode == nvme_fabrics_command) 35 goto admin; 36 37 /* 38 * Do not allow unprivileged passthrough of admin commands except 39 * for a subset of identify commands that contain information required 40 * to form proper I/O commands in userspace and do not expose any 41 * potentially sensitive information. 42 */ 43 if (!ns) { 44 if (c->common.opcode == nvme_admin_identify) { 45 switch (c->identify.cns) { 46 case NVME_ID_CNS_NS: 47 case NVME_ID_CNS_CS_NS: 48 case NVME_ID_CNS_NS_CS_INDEP: 49 case NVME_ID_CNS_CS_CTRL: 50 case NVME_ID_CNS_CTRL: 51 return true; 52 } 53 } 54 goto admin; 55 } 56 57 /* 58 * Check if the controller provides a Commands Supported and Effects log 59 * and marks this command as supported. If not reject unprivileged 60 * passthrough. 61 */ 62 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 63 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 64 goto admin; 65 66 /* 67 * Don't allow passthrough for command that have intrusive (or unknown) 68 * effects. 69 */ 70 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 71 NVME_CMD_EFFECTS_UUID_SEL | 72 NVME_CMD_EFFECTS_SCOPE_MASK)) 73 goto admin; 74 75 /* 76 * Only allow I/O commands that transfer data to the controller or that 77 * change the logical block contents if the file descriptor is open for 78 * writing. 79 */ 80 if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && 81 !open_for_write) 82 goto admin; 83 84 return true; 85 admin: 86 return capable(CAP_SYS_ADMIN); 87 } 88 89 /* 90 * Convert integer values from ioctl structures to user pointers, silently 91 * ignoring the upper bits in the compat case to match behaviour of 32-bit 92 * kernels. 93 */ 94 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 95 { 96 if (in_compat_syscall()) 97 ptrval = (compat_uptr_t)ptrval; 98 return (void __user *)ptrval; 99 } 100 101 static struct request *nvme_alloc_user_request(struct request_queue *q, 102 struct nvme_command *cmd, blk_opf_t rq_flags, 103 blk_mq_req_flags_t blk_flags) 104 { 105 struct request *req; 106 107 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 108 if (IS_ERR(req)) 109 return req; 110 nvme_init_request(req, cmd); 111 nvme_req(req)->flags |= NVME_REQ_USERCMD; 112 return req; 113 } 114 115 static int nvme_map_user_request(struct request *req, u64 ubuffer, 116 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 117 struct io_uring_cmd *ioucmd, unsigned int flags) 118 { 119 struct request_queue *q = req->q; 120 struct nvme_ns *ns = q->queuedata; 121 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 122 bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); 123 struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; 124 bool has_metadata = meta_buffer && meta_len; 125 struct bio *bio = NULL; 126 int ret; 127 128 if (!nvme_ctrl_sgl_supported(ctrl)) 129 dev_warn_once(ctrl->device, "using unchecked data buffer\n"); 130 if (has_metadata) { 131 if (!supports_metadata) 132 return -EINVAL; 133 if (!nvme_ctrl_meta_sgl_supported(ctrl)) 134 dev_warn_once(ctrl->device, 135 "using unchecked metadata buffer\n"); 136 } 137 138 if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 139 struct iov_iter iter; 140 141 /* fixedbufs is only for non-vectored io */ 142 if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) 143 return -EINVAL; 144 ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 145 rq_data_dir(req), &iter, ioucmd); 146 if (ret < 0) 147 goto out; 148 ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 149 } else { 150 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 151 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 152 0, rq_data_dir(req)); 153 } 154 155 if (ret) 156 goto out; 157 158 bio = req->bio; 159 if (bdev) 160 bio_set_dev(bio, bdev); 161 162 if (has_metadata) { 163 ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len); 164 if (ret) 165 goto out_unmap; 166 } 167 168 return ret; 169 170 out_unmap: 171 if (bio) 172 blk_rq_unmap_user(bio); 173 out: 174 blk_mq_free_request(req); 175 return ret; 176 } 177 178 static int nvme_submit_user_cmd(struct request_queue *q, 179 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 180 void __user *meta_buffer, unsigned meta_len, 181 u64 *result, unsigned timeout, unsigned int flags) 182 { 183 struct nvme_ns *ns = q->queuedata; 184 struct nvme_ctrl *ctrl; 185 struct request *req; 186 struct bio *bio; 187 u32 effects; 188 int ret; 189 190 req = nvme_alloc_user_request(q, cmd, 0, 0); 191 if (IS_ERR(req)) 192 return PTR_ERR(req); 193 194 req->timeout = timeout; 195 if (ubuffer && bufflen) { 196 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 197 meta_len, NULL, flags); 198 if (ret) 199 return ret; 200 } 201 202 bio = req->bio; 203 ctrl = nvme_req(req)->ctrl; 204 205 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 206 ret = nvme_execute_rq(req, false); 207 if (result) 208 *result = le64_to_cpu(nvme_req(req)->result.u64); 209 if (bio) 210 blk_rq_unmap_user(bio); 211 blk_mq_free_request(req); 212 213 if (effects) 214 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 215 216 return ret; 217 } 218 219 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 220 { 221 struct nvme_user_io io; 222 struct nvme_command c; 223 unsigned length, meta_len; 224 void __user *metadata; 225 226 if (copy_from_user(&io, uio, sizeof(io))) 227 return -EFAULT; 228 if (io.flags) 229 return -EINVAL; 230 231 switch (io.opcode) { 232 case nvme_cmd_write: 233 case nvme_cmd_read: 234 case nvme_cmd_compare: 235 break; 236 default: 237 return -EINVAL; 238 } 239 240 length = (io.nblocks + 1) << ns->head->lba_shift; 241 242 if ((io.control & NVME_RW_PRINFO_PRACT) && 243 (ns->head->ms == ns->head->pi_size)) { 244 /* 245 * Protection information is stripped/inserted by the 246 * controller. 247 */ 248 if (nvme_to_user_ptr(io.metadata)) 249 return -EINVAL; 250 meta_len = 0; 251 metadata = NULL; 252 } else { 253 meta_len = (io.nblocks + 1) * ns->head->ms; 254 metadata = nvme_to_user_ptr(io.metadata); 255 } 256 257 if (ns->head->features & NVME_NS_EXT_LBAS) { 258 length += meta_len; 259 meta_len = 0; 260 } else if (meta_len) { 261 if ((io.metadata & 3) || !io.metadata) 262 return -EINVAL; 263 } 264 265 memset(&c, 0, sizeof(c)); 266 c.rw.opcode = io.opcode; 267 c.rw.flags = io.flags; 268 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 269 c.rw.slba = cpu_to_le64(io.slba); 270 c.rw.length = cpu_to_le16(io.nblocks); 271 c.rw.control = cpu_to_le16(io.control); 272 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 273 c.rw.reftag = cpu_to_le32(io.reftag); 274 c.rw.lbat = cpu_to_le16(io.apptag); 275 c.rw.lbatm = cpu_to_le16(io.appmask); 276 277 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 278 meta_len, NULL, 0, 0); 279 } 280 281 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 282 struct nvme_ns *ns, __u32 nsid) 283 { 284 if (ns && nsid != ns->head->ns_id) { 285 dev_err(ctrl->device, 286 "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", 287 current->comm, nsid, ns->head->ns_id); 288 return false; 289 } 290 291 return true; 292 } 293 294 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 295 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 296 bool open_for_write) 297 { 298 struct nvme_passthru_cmd cmd; 299 struct nvme_command c; 300 unsigned timeout = 0; 301 u64 result; 302 int status; 303 304 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 305 return -EFAULT; 306 if (cmd.flags) 307 return -EINVAL; 308 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 309 return -EINVAL; 310 311 memset(&c, 0, sizeof(c)); 312 c.common.opcode = cmd.opcode; 313 c.common.flags = cmd.flags; 314 c.common.nsid = cpu_to_le32(cmd.nsid); 315 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 316 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 317 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 318 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 319 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 320 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 321 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 322 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 323 324 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 325 return -EACCES; 326 327 if (cmd.timeout_ms) 328 timeout = msecs_to_jiffies(cmd.timeout_ms); 329 330 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 331 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 332 cmd.metadata_len, &result, timeout, 0); 333 334 if (status >= 0) { 335 if (put_user(result, &ucmd->result)) 336 return -EFAULT; 337 } 338 339 return status; 340 } 341 342 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 343 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 344 bool open_for_write) 345 { 346 struct nvme_passthru_cmd64 cmd; 347 struct nvme_command c; 348 unsigned timeout = 0; 349 int status; 350 351 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 352 return -EFAULT; 353 if (cmd.flags) 354 return -EINVAL; 355 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 356 return -EINVAL; 357 358 memset(&c, 0, sizeof(c)); 359 c.common.opcode = cmd.opcode; 360 c.common.flags = cmd.flags; 361 c.common.nsid = cpu_to_le32(cmd.nsid); 362 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 363 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 364 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 365 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 366 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 367 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 368 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 369 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 370 371 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 372 return -EACCES; 373 374 if (cmd.timeout_ms) 375 timeout = msecs_to_jiffies(cmd.timeout_ms); 376 377 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 378 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 379 cmd.metadata_len, &cmd.result, timeout, flags); 380 381 if (status >= 0) { 382 if (put_user(cmd.result, &ucmd->result)) 383 return -EFAULT; 384 } 385 386 return status; 387 } 388 389 struct nvme_uring_data { 390 __u64 metadata; 391 __u64 addr; 392 __u32 data_len; 393 __u32 metadata_len; 394 __u32 timeout_ms; 395 }; 396 397 /* 398 * This overlays struct io_uring_cmd pdu. 399 * Expect build errors if this grows larger than that. 400 */ 401 struct nvme_uring_cmd_pdu { 402 struct request *req; 403 struct bio *bio; 404 u64 result; 405 int status; 406 }; 407 408 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 409 struct io_uring_cmd *ioucmd) 410 { 411 return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu); 412 } 413 414 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, 415 unsigned issue_flags) 416 { 417 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 418 419 if (pdu->bio) 420 blk_rq_unmap_user(pdu->bio); 421 io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); 422 } 423 424 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 425 blk_status_t err) 426 { 427 struct io_uring_cmd *ioucmd = req->end_io_data; 428 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 429 430 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { 431 pdu->status = -EINTR; 432 } else { 433 pdu->status = nvme_req(req)->status; 434 if (!pdu->status) 435 pdu->status = blk_status_to_errno(err); 436 } 437 pdu->result = le64_to_cpu(nvme_req(req)->result.u64); 438 439 /* 440 * For iopoll, complete it directly. Note that using the uring_cmd 441 * helper for this is safe only because we check blk_rq_is_poll(). 442 * As that returns false if we're NOT on a polled queue, then it's 443 * safe to use the polled completion helper. 444 * 445 * Otherwise, move the completion to task work. 446 */ 447 if (blk_rq_is_poll(req)) { 448 if (pdu->bio) 449 blk_rq_unmap_user(pdu->bio); 450 io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); 451 } else { 452 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 453 } 454 455 return RQ_END_IO_FREE; 456 } 457 458 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 459 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 460 { 461 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 462 const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); 463 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 464 struct nvme_uring_data d; 465 struct nvme_command c; 466 struct request *req; 467 blk_opf_t rq_flags = REQ_ALLOC_CACHE; 468 blk_mq_req_flags_t blk_flags = 0; 469 int ret; 470 471 c.common.opcode = READ_ONCE(cmd->opcode); 472 c.common.flags = READ_ONCE(cmd->flags); 473 if (c.common.flags) 474 return -EINVAL; 475 476 c.common.command_id = 0; 477 c.common.nsid = cpu_to_le32(cmd->nsid); 478 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 479 return -EINVAL; 480 481 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 482 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 483 c.common.metadata = 0; 484 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 485 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 486 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 487 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 488 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 489 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 490 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 491 492 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 493 return -EACCES; 494 495 d.metadata = READ_ONCE(cmd->metadata); 496 d.addr = READ_ONCE(cmd->addr); 497 d.data_len = READ_ONCE(cmd->data_len); 498 d.metadata_len = READ_ONCE(cmd->metadata_len); 499 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 500 501 if (issue_flags & IO_URING_F_NONBLOCK) { 502 rq_flags |= REQ_NOWAIT; 503 blk_flags = BLK_MQ_REQ_NOWAIT; 504 } 505 if (issue_flags & IO_URING_F_IOPOLL) 506 rq_flags |= REQ_POLLED; 507 508 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 509 if (IS_ERR(req)) 510 return PTR_ERR(req); 511 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 512 513 if (d.addr && d.data_len) { 514 ret = nvme_map_user_request(req, d.addr, 515 d.data_len, nvme_to_user_ptr(d.metadata), 516 d.metadata_len, ioucmd, vec); 517 if (ret) 518 return ret; 519 } 520 521 /* to free bio on completion, as req->bio will be null at that time */ 522 pdu->bio = req->bio; 523 pdu->req = req; 524 req->end_io_data = ioucmd; 525 req->end_io = nvme_uring_cmd_end_io; 526 blk_execute_rq_nowait(req, false); 527 return -EIOCBQUEUED; 528 } 529 530 static bool is_ctrl_ioctl(unsigned int cmd) 531 { 532 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 533 return true; 534 if (is_sed_ioctl(cmd)) 535 return true; 536 return false; 537 } 538 539 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 540 void __user *argp, bool open_for_write) 541 { 542 switch (cmd) { 543 case NVME_IOCTL_ADMIN_CMD: 544 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 545 case NVME_IOCTL_ADMIN64_CMD: 546 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 547 default: 548 return sed_ioctl(ctrl->opal_dev, cmd, argp); 549 } 550 } 551 552 #ifdef COMPAT_FOR_U64_ALIGNMENT 553 struct nvme_user_io32 { 554 __u8 opcode; 555 __u8 flags; 556 __u16 control; 557 __u16 nblocks; 558 __u16 rsvd; 559 __u64 metadata; 560 __u64 addr; 561 __u64 slba; 562 __u32 dsmgmt; 563 __u32 reftag; 564 __u16 apptag; 565 __u16 appmask; 566 } __attribute__((__packed__)); 567 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 568 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 569 570 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 571 void __user *argp, unsigned int flags, bool open_for_write) 572 { 573 switch (cmd) { 574 case NVME_IOCTL_ID: 575 force_successful_syscall_return(); 576 return ns->head->ns_id; 577 case NVME_IOCTL_IO_CMD: 578 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 579 /* 580 * struct nvme_user_io can have different padding on some 32-bit ABIs. 581 * Just accept the compat version as all fields that are used are the 582 * same size and at the same offset. 583 */ 584 #ifdef COMPAT_FOR_U64_ALIGNMENT 585 case NVME_IOCTL_SUBMIT_IO32: 586 #endif 587 case NVME_IOCTL_SUBMIT_IO: 588 return nvme_submit_io(ns, argp); 589 case NVME_IOCTL_IO64_CMD_VEC: 590 flags |= NVME_IOCTL_VEC; 591 fallthrough; 592 case NVME_IOCTL_IO64_CMD: 593 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 594 open_for_write); 595 default: 596 return -ENOTTY; 597 } 598 } 599 600 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 601 unsigned int cmd, unsigned long arg) 602 { 603 struct nvme_ns *ns = bdev->bd_disk->private_data; 604 bool open_for_write = mode & BLK_OPEN_WRITE; 605 void __user *argp = (void __user *)arg; 606 unsigned int flags = 0; 607 608 if (bdev_is_partition(bdev)) 609 flags |= NVME_IOCTL_PARTITION; 610 611 if (is_ctrl_ioctl(cmd)) 612 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 613 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 614 } 615 616 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 617 { 618 struct nvme_ns *ns = 619 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 620 bool open_for_write = file->f_mode & FMODE_WRITE; 621 void __user *argp = (void __user *)arg; 622 623 if (is_ctrl_ioctl(cmd)) 624 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 625 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 626 } 627 628 static int nvme_uring_cmd_checks(unsigned int issue_flags) 629 { 630 631 /* NVMe passthrough requires big SQE/CQE support */ 632 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 633 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 634 return -EOPNOTSUPP; 635 return 0; 636 } 637 638 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 639 unsigned int issue_flags) 640 { 641 struct nvme_ctrl *ctrl = ns->ctrl; 642 int ret; 643 644 ret = nvme_uring_cmd_checks(issue_flags); 645 if (ret) 646 return ret; 647 648 switch (ioucmd->cmd_op) { 649 case NVME_URING_CMD_IO: 650 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 651 break; 652 case NVME_URING_CMD_IO_VEC: 653 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 654 break; 655 default: 656 ret = -ENOTTY; 657 } 658 659 return ret; 660 } 661 662 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 663 { 664 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 665 struct nvme_ns, cdev); 666 667 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 668 } 669 670 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 671 struct io_comp_batch *iob, 672 unsigned int poll_flags) 673 { 674 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 675 struct request *req = pdu->req; 676 677 if (req && blk_rq_is_poll(req)) 678 return blk_rq_poll(req, iob, poll_flags); 679 return 0; 680 } 681 #ifdef CONFIG_NVME_MULTIPATH 682 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 683 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 684 bool open_for_write) 685 __releases(&head->srcu) 686 { 687 struct nvme_ctrl *ctrl = ns->ctrl; 688 int ret; 689 690 nvme_get_ctrl(ns->ctrl); 691 srcu_read_unlock(&head->srcu, srcu_idx); 692 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 693 694 nvme_put_ctrl(ctrl); 695 return ret; 696 } 697 698 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 699 unsigned int cmd, unsigned long arg) 700 { 701 struct nvme_ns_head *head = bdev->bd_disk->private_data; 702 bool open_for_write = mode & BLK_OPEN_WRITE; 703 void __user *argp = (void __user *)arg; 704 struct nvme_ns *ns; 705 int srcu_idx, ret = -EWOULDBLOCK; 706 unsigned int flags = 0; 707 708 if (bdev_is_partition(bdev)) 709 flags |= NVME_IOCTL_PARTITION; 710 711 srcu_idx = srcu_read_lock(&head->srcu); 712 ns = nvme_find_path(head); 713 if (!ns) 714 goto out_unlock; 715 716 /* 717 * Handle ioctls that apply to the controller instead of the namespace 718 * seperately and drop the ns SRCU reference early. This avoids a 719 * deadlock when deleting namespaces using the passthrough interface. 720 */ 721 if (is_ctrl_ioctl(cmd)) 722 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 723 open_for_write); 724 725 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 726 out_unlock: 727 srcu_read_unlock(&head->srcu, srcu_idx); 728 return ret; 729 } 730 731 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 732 unsigned long arg) 733 { 734 bool open_for_write = file->f_mode & FMODE_WRITE; 735 struct cdev *cdev = file_inode(file)->i_cdev; 736 struct nvme_ns_head *head = 737 container_of(cdev, struct nvme_ns_head, cdev); 738 void __user *argp = (void __user *)arg; 739 struct nvme_ns *ns; 740 int srcu_idx, ret = -EWOULDBLOCK; 741 742 srcu_idx = srcu_read_lock(&head->srcu); 743 ns = nvme_find_path(head); 744 if (!ns) 745 goto out_unlock; 746 747 if (is_ctrl_ioctl(cmd)) 748 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 749 open_for_write); 750 751 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 752 out_unlock: 753 srcu_read_unlock(&head->srcu, srcu_idx); 754 return ret; 755 } 756 757 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 758 unsigned int issue_flags) 759 { 760 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 761 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 762 int srcu_idx = srcu_read_lock(&head->srcu); 763 struct nvme_ns *ns = nvme_find_path(head); 764 int ret = -EINVAL; 765 766 if (ns) 767 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 768 srcu_read_unlock(&head->srcu, srcu_idx); 769 return ret; 770 } 771 #endif /* CONFIG_NVME_MULTIPATH */ 772 773 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 774 { 775 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 776 int ret; 777 778 /* IOPOLL not supported yet */ 779 if (issue_flags & IO_URING_F_IOPOLL) 780 return -EOPNOTSUPP; 781 782 ret = nvme_uring_cmd_checks(issue_flags); 783 if (ret) 784 return ret; 785 786 switch (ioucmd->cmd_op) { 787 case NVME_URING_CMD_ADMIN: 788 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 789 break; 790 case NVME_URING_CMD_ADMIN_VEC: 791 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 792 break; 793 default: 794 ret = -ENOTTY; 795 } 796 797 return ret; 798 } 799 800 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 801 bool open_for_write) 802 { 803 struct nvme_ns *ns; 804 int ret, srcu_idx; 805 806 srcu_idx = srcu_read_lock(&ctrl->srcu); 807 if (list_empty(&ctrl->namespaces)) { 808 ret = -ENOTTY; 809 goto out_unlock; 810 } 811 812 ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); 813 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 814 dev_warn(ctrl->device, 815 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 816 ret = -EINVAL; 817 goto out_unlock; 818 } 819 820 dev_warn(ctrl->device, 821 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 822 if (!nvme_get_ns(ns)) { 823 ret = -ENXIO; 824 goto out_unlock; 825 } 826 srcu_read_unlock(&ctrl->srcu, srcu_idx); 827 828 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 829 nvme_put_ns(ns); 830 return ret; 831 832 out_unlock: 833 srcu_read_unlock(&ctrl->srcu, srcu_idx); 834 return ret; 835 } 836 837 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 838 unsigned long arg) 839 { 840 bool open_for_write = file->f_mode & FMODE_WRITE; 841 struct nvme_ctrl *ctrl = file->private_data; 842 void __user *argp = (void __user *)arg; 843 844 switch (cmd) { 845 case NVME_IOCTL_ADMIN_CMD: 846 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 847 case NVME_IOCTL_ADMIN64_CMD: 848 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 849 case NVME_IOCTL_IO_CMD: 850 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 851 case NVME_IOCTL_RESET: 852 if (!capable(CAP_SYS_ADMIN)) 853 return -EACCES; 854 dev_warn(ctrl->device, "resetting controller\n"); 855 return nvme_reset_ctrl_sync(ctrl); 856 case NVME_IOCTL_SUBSYS_RESET: 857 if (!capable(CAP_SYS_ADMIN)) 858 return -EACCES; 859 return nvme_reset_subsystem(ctrl); 860 case NVME_IOCTL_RESCAN: 861 if (!capable(CAP_SYS_ADMIN)) 862 return -EACCES; 863 nvme_queue_scan(ctrl); 864 return 0; 865 default: 866 return -ENOTTY; 867 } 868 } 869