1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/blk-integrity.h> 7 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 8 #include <linux/nvme_ioctl.h> 9 #include <linux/io_uring/cmd.h> 10 #include "nvme.h" 11 12 enum { 13 NVME_IOCTL_VEC = (1 << 0), 14 NVME_IOCTL_PARTITION = (1 << 1), 15 }; 16 17 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 18 unsigned int flags, bool open_for_write) 19 { 20 u32 effects; 21 22 /* 23 * Do not allow unprivileged passthrough on partitions, as that allows an 24 * escape from the containment of the partition. 25 */ 26 if (flags & NVME_IOCTL_PARTITION) 27 goto admin; 28 29 /* 30 * Do not allow unprivileged processes to send vendor specific or fabrics 31 * commands as we can't be sure about their effects. 32 */ 33 if (c->common.opcode >= nvme_cmd_vendor_start || 34 c->common.opcode == nvme_fabrics_command) 35 goto admin; 36 37 /* 38 * Do not allow unprivileged passthrough of admin commands except 39 * for a subset of identify commands that contain information required 40 * to form proper I/O commands in userspace and do not expose any 41 * potentially sensitive information. 42 */ 43 if (!ns) { 44 if (c->common.opcode == nvme_admin_identify) { 45 switch (c->identify.cns) { 46 case NVME_ID_CNS_NS: 47 case NVME_ID_CNS_CS_NS: 48 case NVME_ID_CNS_NS_CS_INDEP: 49 case NVME_ID_CNS_CS_CTRL: 50 case NVME_ID_CNS_CTRL: 51 return true; 52 } 53 } 54 goto admin; 55 } 56 57 /* 58 * Check if the controller provides a Commands Supported and Effects log 59 * and marks this command as supported. If not reject unprivileged 60 * passthrough. 61 */ 62 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 63 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 64 goto admin; 65 66 /* 67 * Don't allow passthrough for command that have intrusive (or unknown) 68 * effects. 69 */ 70 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 71 NVME_CMD_EFFECTS_UUID_SEL | 72 NVME_CMD_EFFECTS_SCOPE_MASK)) 73 goto admin; 74 75 /* 76 * Only allow I/O commands that transfer data to the controller or that 77 * change the logical block contents if the file descriptor is open for 78 * writing. 79 */ 80 if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && 81 !open_for_write) 82 goto admin; 83 84 return true; 85 admin: 86 return capable(CAP_SYS_ADMIN); 87 } 88 89 /* 90 * Convert integer values from ioctl structures to user pointers, silently 91 * ignoring the upper bits in the compat case to match behaviour of 32-bit 92 * kernels. 93 */ 94 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 95 { 96 if (in_compat_syscall()) 97 ptrval = (compat_uptr_t)ptrval; 98 return (void __user *)ptrval; 99 } 100 101 static struct request *nvme_alloc_user_request(struct request_queue *q, 102 struct nvme_command *cmd, blk_opf_t rq_flags, 103 blk_mq_req_flags_t blk_flags) 104 { 105 struct request *req; 106 107 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 108 if (IS_ERR(req)) 109 return req; 110 nvme_init_request(req, cmd); 111 nvme_req(req)->flags |= NVME_REQ_USERCMD; 112 return req; 113 } 114 115 static int nvme_map_user_request(struct request *req, u64 ubuffer, 116 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 117 struct io_uring_cmd *ioucmd, unsigned int flags, 118 unsigned int iou_issue_flags) 119 { 120 struct request_queue *q = req->q; 121 struct nvme_ns *ns = q->queuedata; 122 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 123 bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); 124 struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; 125 bool has_metadata = meta_buffer && meta_len; 126 struct bio *bio = NULL; 127 int ret; 128 129 if (!nvme_ctrl_sgl_supported(ctrl)) 130 dev_warn_once(ctrl->device, "using unchecked data buffer\n"); 131 if (has_metadata) { 132 if (!supports_metadata) { 133 ret = -EINVAL; 134 goto out; 135 } 136 if (!nvme_ctrl_meta_sgl_supported(ctrl)) 137 dev_warn_once(ctrl->device, 138 "using unchecked metadata buffer\n"); 139 } 140 141 if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 142 struct iov_iter iter; 143 144 /* fixedbufs is only for non-vectored io */ 145 if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) { 146 ret = -EINVAL; 147 goto out; 148 } 149 ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 150 rq_data_dir(req), &iter, ioucmd, 151 iou_issue_flags); 152 if (ret < 0) 153 goto out; 154 ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 155 } else { 156 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 157 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 158 0, rq_data_dir(req)); 159 } 160 161 if (ret) 162 goto out; 163 164 bio = req->bio; 165 if (bdev) 166 bio_set_dev(bio, bdev); 167 168 if (has_metadata) { 169 ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len); 170 if (ret) 171 goto out_unmap; 172 } 173 174 return ret; 175 176 out_unmap: 177 if (bio) 178 blk_rq_unmap_user(bio); 179 out: 180 blk_mq_free_request(req); 181 return ret; 182 } 183 184 static int nvme_submit_user_cmd(struct request_queue *q, 185 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 186 void __user *meta_buffer, unsigned meta_len, 187 u64 *result, unsigned timeout, unsigned int flags) 188 { 189 struct nvme_ns *ns = q->queuedata; 190 struct nvme_ctrl *ctrl; 191 struct request *req; 192 struct bio *bio; 193 u32 effects; 194 int ret; 195 196 req = nvme_alloc_user_request(q, cmd, 0, 0); 197 if (IS_ERR(req)) 198 return PTR_ERR(req); 199 200 req->timeout = timeout; 201 if (ubuffer && bufflen) { 202 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 203 meta_len, NULL, flags, 0); 204 if (ret) 205 return ret; 206 } 207 208 bio = req->bio; 209 ctrl = nvme_req(req)->ctrl; 210 211 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 212 ret = nvme_execute_rq(req, false); 213 if (result) 214 *result = le64_to_cpu(nvme_req(req)->result.u64); 215 if (bio) 216 blk_rq_unmap_user(bio); 217 blk_mq_free_request(req); 218 219 if (effects) 220 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 221 222 return ret; 223 } 224 225 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 226 { 227 struct nvme_user_io io; 228 struct nvme_command c; 229 unsigned length, meta_len; 230 void __user *metadata; 231 232 if (copy_from_user(&io, uio, sizeof(io))) 233 return -EFAULT; 234 if (io.flags) 235 return -EINVAL; 236 237 switch (io.opcode) { 238 case nvme_cmd_write: 239 case nvme_cmd_read: 240 case nvme_cmd_compare: 241 break; 242 default: 243 return -EINVAL; 244 } 245 246 length = (io.nblocks + 1) << ns->head->lba_shift; 247 248 if ((io.control & NVME_RW_PRINFO_PRACT) && 249 (ns->head->ms == ns->head->pi_size)) { 250 /* 251 * Protection information is stripped/inserted by the 252 * controller. 253 */ 254 if (nvme_to_user_ptr(io.metadata)) 255 return -EINVAL; 256 meta_len = 0; 257 metadata = NULL; 258 } else { 259 meta_len = (io.nblocks + 1) * ns->head->ms; 260 metadata = nvme_to_user_ptr(io.metadata); 261 } 262 263 if (ns->head->features & NVME_NS_EXT_LBAS) { 264 length += meta_len; 265 meta_len = 0; 266 } else if (meta_len) { 267 if ((io.metadata & 3) || !io.metadata) 268 return -EINVAL; 269 } 270 271 memset(&c, 0, sizeof(c)); 272 c.rw.opcode = io.opcode; 273 c.rw.flags = io.flags; 274 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 275 c.rw.slba = cpu_to_le64(io.slba); 276 c.rw.length = cpu_to_le16(io.nblocks); 277 c.rw.control = cpu_to_le16(io.control); 278 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 279 c.rw.reftag = cpu_to_le32(io.reftag); 280 c.rw.lbat = cpu_to_le16(io.apptag); 281 c.rw.lbatm = cpu_to_le16(io.appmask); 282 283 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 284 meta_len, NULL, 0, 0); 285 } 286 287 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 288 struct nvme_ns *ns, __u32 nsid) 289 { 290 if (ns && nsid != ns->head->ns_id) { 291 dev_err(ctrl->device, 292 "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", 293 current->comm, nsid, ns->head->ns_id); 294 return false; 295 } 296 297 return true; 298 } 299 300 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 301 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 302 bool open_for_write) 303 { 304 struct nvme_passthru_cmd cmd; 305 struct nvme_command c; 306 unsigned timeout = 0; 307 u64 result; 308 int status; 309 310 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 311 return -EFAULT; 312 if (cmd.flags) 313 return -EINVAL; 314 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 315 return -EINVAL; 316 317 memset(&c, 0, sizeof(c)); 318 c.common.opcode = cmd.opcode; 319 c.common.flags = cmd.flags; 320 c.common.nsid = cpu_to_le32(cmd.nsid); 321 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 322 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 323 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 324 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 325 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 326 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 327 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 328 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 329 330 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 331 return -EACCES; 332 333 if (cmd.timeout_ms) 334 timeout = msecs_to_jiffies(cmd.timeout_ms); 335 336 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 337 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 338 cmd.metadata_len, &result, timeout, 0); 339 340 if (status >= 0) { 341 if (put_user(result, &ucmd->result)) 342 return -EFAULT; 343 } 344 345 return status; 346 } 347 348 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 349 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 350 bool open_for_write) 351 { 352 struct nvme_passthru_cmd64 cmd; 353 struct nvme_command c; 354 unsigned timeout = 0; 355 int status; 356 357 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 358 return -EFAULT; 359 if (cmd.flags) 360 return -EINVAL; 361 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 362 return -EINVAL; 363 364 memset(&c, 0, sizeof(c)); 365 c.common.opcode = cmd.opcode; 366 c.common.flags = cmd.flags; 367 c.common.nsid = cpu_to_le32(cmd.nsid); 368 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 369 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 370 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 371 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 372 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 373 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 374 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 375 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 376 377 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 378 return -EACCES; 379 380 if (cmd.timeout_ms) 381 timeout = msecs_to_jiffies(cmd.timeout_ms); 382 383 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 384 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 385 cmd.metadata_len, &cmd.result, timeout, flags); 386 387 if (status >= 0) { 388 if (put_user(cmd.result, &ucmd->result)) 389 return -EFAULT; 390 } 391 392 return status; 393 } 394 395 struct nvme_uring_data { 396 __u64 metadata; 397 __u64 addr; 398 __u32 data_len; 399 __u32 metadata_len; 400 __u32 timeout_ms; 401 }; 402 403 /* 404 * This overlays struct io_uring_cmd pdu. 405 * Expect build errors if this grows larger than that. 406 */ 407 struct nvme_uring_cmd_pdu { 408 struct request *req; 409 struct bio *bio; 410 u64 result; 411 int status; 412 }; 413 414 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 415 struct io_uring_cmd *ioucmd) 416 { 417 return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu); 418 } 419 420 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, 421 unsigned issue_flags) 422 { 423 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 424 425 if (pdu->bio) 426 blk_rq_unmap_user(pdu->bio); 427 io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); 428 } 429 430 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 431 blk_status_t err) 432 { 433 struct io_uring_cmd *ioucmd = req->end_io_data; 434 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 435 436 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { 437 pdu->status = -EINTR; 438 } else { 439 pdu->status = nvme_req(req)->status; 440 if (!pdu->status) 441 pdu->status = blk_status_to_errno(err); 442 } 443 pdu->result = le64_to_cpu(nvme_req(req)->result.u64); 444 445 /* 446 * For iopoll, complete it directly. Note that using the uring_cmd 447 * helper for this is safe only because we check blk_rq_is_poll(). 448 * As that returns false if we're NOT on a polled queue, then it's 449 * safe to use the polled completion helper. 450 * 451 * Otherwise, move the completion to task work. 452 */ 453 if (blk_rq_is_poll(req)) { 454 if (pdu->bio) 455 blk_rq_unmap_user(pdu->bio); 456 io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); 457 } else { 458 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 459 } 460 461 return RQ_END_IO_FREE; 462 } 463 464 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 465 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 466 { 467 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 468 const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); 469 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 470 struct nvme_uring_data d; 471 struct nvme_command c; 472 struct request *req; 473 blk_opf_t rq_flags = REQ_ALLOC_CACHE; 474 blk_mq_req_flags_t blk_flags = 0; 475 int ret; 476 477 c.common.opcode = READ_ONCE(cmd->opcode); 478 c.common.flags = READ_ONCE(cmd->flags); 479 if (c.common.flags) 480 return -EINVAL; 481 482 c.common.command_id = 0; 483 c.common.nsid = cpu_to_le32(cmd->nsid); 484 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 485 return -EINVAL; 486 487 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 488 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 489 c.common.metadata = 0; 490 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 491 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 492 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 493 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 494 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 495 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 496 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 497 498 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 499 return -EACCES; 500 501 d.metadata = READ_ONCE(cmd->metadata); 502 d.addr = READ_ONCE(cmd->addr); 503 d.data_len = READ_ONCE(cmd->data_len); 504 d.metadata_len = READ_ONCE(cmd->metadata_len); 505 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 506 507 if (issue_flags & IO_URING_F_NONBLOCK) { 508 rq_flags |= REQ_NOWAIT; 509 blk_flags = BLK_MQ_REQ_NOWAIT; 510 } 511 if (issue_flags & IO_URING_F_IOPOLL) 512 rq_flags |= REQ_POLLED; 513 514 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 515 if (IS_ERR(req)) 516 return PTR_ERR(req); 517 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 518 519 if (d.data_len) { 520 ret = nvme_map_user_request(req, d.addr, 521 d.data_len, nvme_to_user_ptr(d.metadata), 522 d.metadata_len, ioucmd, vec, issue_flags); 523 if (ret) 524 return ret; 525 } 526 527 /* to free bio on completion, as req->bio will be null at that time */ 528 pdu->bio = req->bio; 529 pdu->req = req; 530 req->end_io_data = ioucmd; 531 req->end_io = nvme_uring_cmd_end_io; 532 blk_execute_rq_nowait(req, false); 533 return -EIOCBQUEUED; 534 } 535 536 static bool is_ctrl_ioctl(unsigned int cmd) 537 { 538 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 539 return true; 540 if (is_sed_ioctl(cmd)) 541 return true; 542 return false; 543 } 544 545 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 546 void __user *argp, bool open_for_write) 547 { 548 switch (cmd) { 549 case NVME_IOCTL_ADMIN_CMD: 550 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 551 case NVME_IOCTL_ADMIN64_CMD: 552 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 553 default: 554 return sed_ioctl(ctrl->opal_dev, cmd, argp); 555 } 556 } 557 558 #ifdef COMPAT_FOR_U64_ALIGNMENT 559 struct nvme_user_io32 { 560 __u8 opcode; 561 __u8 flags; 562 __u16 control; 563 __u16 nblocks; 564 __u16 rsvd; 565 __u64 metadata; 566 __u64 addr; 567 __u64 slba; 568 __u32 dsmgmt; 569 __u32 reftag; 570 __u16 apptag; 571 __u16 appmask; 572 } __attribute__((__packed__)); 573 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 574 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 575 576 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 577 void __user *argp, unsigned int flags, bool open_for_write) 578 { 579 switch (cmd) { 580 case NVME_IOCTL_ID: 581 force_successful_syscall_return(); 582 return ns->head->ns_id; 583 case NVME_IOCTL_IO_CMD: 584 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 585 /* 586 * struct nvme_user_io can have different padding on some 32-bit ABIs. 587 * Just accept the compat version as all fields that are used are the 588 * same size and at the same offset. 589 */ 590 #ifdef COMPAT_FOR_U64_ALIGNMENT 591 case NVME_IOCTL_SUBMIT_IO32: 592 #endif 593 case NVME_IOCTL_SUBMIT_IO: 594 return nvme_submit_io(ns, argp); 595 case NVME_IOCTL_IO64_CMD_VEC: 596 flags |= NVME_IOCTL_VEC; 597 fallthrough; 598 case NVME_IOCTL_IO64_CMD: 599 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 600 open_for_write); 601 default: 602 return -ENOTTY; 603 } 604 } 605 606 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 607 unsigned int cmd, unsigned long arg) 608 { 609 struct nvme_ns *ns = bdev->bd_disk->private_data; 610 bool open_for_write = mode & BLK_OPEN_WRITE; 611 void __user *argp = (void __user *)arg; 612 unsigned int flags = 0; 613 614 if (bdev_is_partition(bdev)) 615 flags |= NVME_IOCTL_PARTITION; 616 617 if (is_ctrl_ioctl(cmd)) 618 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 619 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 620 } 621 622 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 623 { 624 struct nvme_ns *ns = 625 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 626 bool open_for_write = file->f_mode & FMODE_WRITE; 627 void __user *argp = (void __user *)arg; 628 629 if (is_ctrl_ioctl(cmd)) 630 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 631 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 632 } 633 634 static int nvme_uring_cmd_checks(unsigned int issue_flags) 635 { 636 637 /* NVMe passthrough requires big SQE/CQE support */ 638 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 639 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 640 return -EOPNOTSUPP; 641 return 0; 642 } 643 644 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 645 unsigned int issue_flags) 646 { 647 struct nvme_ctrl *ctrl = ns->ctrl; 648 int ret; 649 650 ret = nvme_uring_cmd_checks(issue_flags); 651 if (ret) 652 return ret; 653 654 switch (ioucmd->cmd_op) { 655 case NVME_URING_CMD_IO: 656 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 657 break; 658 case NVME_URING_CMD_IO_VEC: 659 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 660 break; 661 default: 662 ret = -ENOTTY; 663 } 664 665 return ret; 666 } 667 668 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 669 { 670 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 671 struct nvme_ns, cdev); 672 673 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 674 } 675 676 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 677 struct io_comp_batch *iob, 678 unsigned int poll_flags) 679 { 680 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 681 struct request *req = pdu->req; 682 683 if (req && blk_rq_is_poll(req)) 684 return blk_rq_poll(req, iob, poll_flags); 685 return 0; 686 } 687 #ifdef CONFIG_NVME_MULTIPATH 688 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 689 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 690 bool open_for_write) 691 __releases(&head->srcu) 692 { 693 struct nvme_ctrl *ctrl = ns->ctrl; 694 int ret; 695 696 nvme_get_ctrl(ns->ctrl); 697 srcu_read_unlock(&head->srcu, srcu_idx); 698 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 699 700 nvme_put_ctrl(ctrl); 701 return ret; 702 } 703 704 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 705 unsigned int cmd, unsigned long arg) 706 { 707 struct nvme_ns_head *head = bdev->bd_disk->private_data; 708 bool open_for_write = mode & BLK_OPEN_WRITE; 709 void __user *argp = (void __user *)arg; 710 struct nvme_ns *ns; 711 int srcu_idx, ret = -EWOULDBLOCK; 712 unsigned int flags = 0; 713 714 if (bdev_is_partition(bdev)) 715 flags |= NVME_IOCTL_PARTITION; 716 717 srcu_idx = srcu_read_lock(&head->srcu); 718 ns = nvme_find_path(head); 719 if (!ns) 720 goto out_unlock; 721 722 /* 723 * Handle ioctls that apply to the controller instead of the namespace 724 * seperately and drop the ns SRCU reference early. This avoids a 725 * deadlock when deleting namespaces using the passthrough interface. 726 */ 727 if (is_ctrl_ioctl(cmd)) 728 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 729 open_for_write); 730 731 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 732 out_unlock: 733 srcu_read_unlock(&head->srcu, srcu_idx); 734 return ret; 735 } 736 737 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 738 unsigned long arg) 739 { 740 bool open_for_write = file->f_mode & FMODE_WRITE; 741 struct cdev *cdev = file_inode(file)->i_cdev; 742 struct nvme_ns_head *head = 743 container_of(cdev, struct nvme_ns_head, cdev); 744 void __user *argp = (void __user *)arg; 745 struct nvme_ns *ns; 746 int srcu_idx, ret = -EWOULDBLOCK; 747 748 srcu_idx = srcu_read_lock(&head->srcu); 749 ns = nvme_find_path(head); 750 if (!ns) 751 goto out_unlock; 752 753 if (is_ctrl_ioctl(cmd)) 754 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 755 open_for_write); 756 757 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 758 out_unlock: 759 srcu_read_unlock(&head->srcu, srcu_idx); 760 return ret; 761 } 762 763 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 764 unsigned int issue_flags) 765 { 766 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 767 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 768 int srcu_idx = srcu_read_lock(&head->srcu); 769 struct nvme_ns *ns = nvme_find_path(head); 770 int ret = -EINVAL; 771 772 if (ns) 773 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 774 srcu_read_unlock(&head->srcu, srcu_idx); 775 return ret; 776 } 777 #endif /* CONFIG_NVME_MULTIPATH */ 778 779 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 780 { 781 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 782 int ret; 783 784 /* IOPOLL not supported yet */ 785 if (issue_flags & IO_URING_F_IOPOLL) 786 return -EOPNOTSUPP; 787 788 ret = nvme_uring_cmd_checks(issue_flags); 789 if (ret) 790 return ret; 791 792 switch (ioucmd->cmd_op) { 793 case NVME_URING_CMD_ADMIN: 794 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 795 break; 796 case NVME_URING_CMD_ADMIN_VEC: 797 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 798 break; 799 default: 800 ret = -ENOTTY; 801 } 802 803 return ret; 804 } 805 806 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 807 bool open_for_write) 808 { 809 struct nvme_ns *ns; 810 int ret, srcu_idx; 811 812 srcu_idx = srcu_read_lock(&ctrl->srcu); 813 if (list_empty(&ctrl->namespaces)) { 814 ret = -ENOTTY; 815 goto out_unlock; 816 } 817 818 ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); 819 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 820 dev_warn(ctrl->device, 821 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 822 ret = -EINVAL; 823 goto out_unlock; 824 } 825 826 dev_warn(ctrl->device, 827 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 828 if (!nvme_get_ns(ns)) { 829 ret = -ENXIO; 830 goto out_unlock; 831 } 832 srcu_read_unlock(&ctrl->srcu, srcu_idx); 833 834 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 835 nvme_put_ns(ns); 836 return ret; 837 838 out_unlock: 839 srcu_read_unlock(&ctrl->srcu, srcu_idx); 840 return ret; 841 } 842 843 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 844 unsigned long arg) 845 { 846 bool open_for_write = file->f_mode & FMODE_WRITE; 847 struct nvme_ctrl *ctrl = file->private_data; 848 void __user *argp = (void __user *)arg; 849 850 switch (cmd) { 851 case NVME_IOCTL_ADMIN_CMD: 852 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 853 case NVME_IOCTL_ADMIN64_CMD: 854 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 855 case NVME_IOCTL_IO_CMD: 856 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 857 case NVME_IOCTL_RESET: 858 if (!capable(CAP_SYS_ADMIN)) 859 return -EACCES; 860 dev_warn(ctrl->device, "resetting controller\n"); 861 return nvme_reset_ctrl_sync(ctrl); 862 case NVME_IOCTL_SUBSYS_RESET: 863 if (!capable(CAP_SYS_ADMIN)) 864 return -EACCES; 865 return nvme_reset_subsystem(ctrl); 866 case NVME_IOCTL_RESCAN: 867 if (!capable(CAP_SYS_ADMIN)) 868 return -EACCES; 869 nvme_queue_scan(ctrl); 870 return 0; 871 default: 872 return -ENOTTY; 873 } 874 } 875