1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 7 #include <linux/nvme_ioctl.h> 8 #include <linux/io_uring/cmd.h> 9 #include "nvme.h" 10 11 enum { 12 NVME_IOCTL_VEC = (1 << 0), 13 NVME_IOCTL_PARTITION = (1 << 1), 14 }; 15 16 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 17 unsigned int flags, bool open_for_write) 18 { 19 u32 effects; 20 21 /* 22 * Do not allow unprivileged passthrough on partitions, as that allows an 23 * escape from the containment of the partition. 24 */ 25 if (flags & NVME_IOCTL_PARTITION) 26 goto admin; 27 28 /* 29 * Do not allow unprivileged processes to send vendor specific or fabrics 30 * commands as we can't be sure about their effects. 31 */ 32 if (c->common.opcode >= nvme_cmd_vendor_start || 33 c->common.opcode == nvme_fabrics_command) 34 goto admin; 35 36 /* 37 * Do not allow unprivileged passthrough of admin commands except 38 * for a subset of identify commands that contain information required 39 * to form proper I/O commands in userspace and do not expose any 40 * potentially sensitive information. 41 */ 42 if (!ns) { 43 if (c->common.opcode == nvme_admin_identify) { 44 switch (c->identify.cns) { 45 case NVME_ID_CNS_NS: 46 case NVME_ID_CNS_CS_NS: 47 case NVME_ID_CNS_NS_CS_INDEP: 48 case NVME_ID_CNS_CS_CTRL: 49 case NVME_ID_CNS_CTRL: 50 return true; 51 } 52 } 53 goto admin; 54 } 55 56 /* 57 * Check if the controller provides a Commands Supported and Effects log 58 * and marks this command as supported. If not reject unprivileged 59 * passthrough. 60 */ 61 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 62 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 63 goto admin; 64 65 /* 66 * Don't allow passthrough for command that have intrusive (or unknown) 67 * effects. 68 */ 69 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 70 NVME_CMD_EFFECTS_UUID_SEL | 71 NVME_CMD_EFFECTS_SCOPE_MASK)) 72 goto admin; 73 74 /* 75 * Only allow I/O commands that transfer data to the controller or that 76 * change the logical block contents if the file descriptor is open for 77 * writing. 78 */ 79 if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && 80 !open_for_write) 81 goto admin; 82 83 return true; 84 admin: 85 return capable(CAP_SYS_ADMIN); 86 } 87 88 /* 89 * Convert integer values from ioctl structures to user pointers, silently 90 * ignoring the upper bits in the compat case to match behaviour of 32-bit 91 * kernels. 92 */ 93 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 94 { 95 if (in_compat_syscall()) 96 ptrval = (compat_uptr_t)ptrval; 97 return (void __user *)ptrval; 98 } 99 100 static struct request *nvme_alloc_user_request(struct request_queue *q, 101 struct nvme_command *cmd, blk_opf_t rq_flags, 102 blk_mq_req_flags_t blk_flags) 103 { 104 struct request *req; 105 106 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 107 if (IS_ERR(req)) 108 return req; 109 nvme_init_request(req, cmd); 110 nvme_req(req)->flags |= NVME_REQ_USERCMD; 111 return req; 112 } 113 114 static int nvme_map_user_request(struct request *req, u64 ubuffer, 115 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 116 u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags) 117 { 118 struct request_queue *q = req->q; 119 struct nvme_ns *ns = q->queuedata; 120 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 121 struct bio *bio = NULL; 122 int ret; 123 124 if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 125 struct iov_iter iter; 126 127 /* fixedbufs is only for non-vectored io */ 128 if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) 129 return -EINVAL; 130 ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 131 rq_data_dir(req), &iter, ioucmd); 132 if (ret < 0) 133 goto out; 134 ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 135 } else { 136 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 137 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 138 0, rq_data_dir(req)); 139 } 140 141 if (ret) 142 goto out; 143 144 bio = req->bio; 145 if (bdev) { 146 bio_set_dev(bio, bdev); 147 if (meta_buffer && meta_len) { 148 ret = bio_integrity_map_user(bio, meta_buffer, meta_len, 149 meta_seed); 150 if (ret) 151 goto out_unmap; 152 req->cmd_flags |= REQ_INTEGRITY; 153 } 154 } 155 156 return ret; 157 158 out_unmap: 159 if (bio) 160 blk_rq_unmap_user(bio); 161 out: 162 blk_mq_free_request(req); 163 return ret; 164 } 165 166 static int nvme_submit_user_cmd(struct request_queue *q, 167 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 168 void __user *meta_buffer, unsigned meta_len, u32 meta_seed, 169 u64 *result, unsigned timeout, unsigned int flags) 170 { 171 struct nvme_ns *ns = q->queuedata; 172 struct nvme_ctrl *ctrl; 173 struct request *req; 174 struct bio *bio; 175 u32 effects; 176 int ret; 177 178 req = nvme_alloc_user_request(q, cmd, 0, 0); 179 if (IS_ERR(req)) 180 return PTR_ERR(req); 181 182 req->timeout = timeout; 183 if (ubuffer && bufflen) { 184 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 185 meta_len, meta_seed, NULL, flags); 186 if (ret) 187 return ret; 188 } 189 190 bio = req->bio; 191 ctrl = nvme_req(req)->ctrl; 192 193 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 194 ret = nvme_execute_rq(req, false); 195 if (result) 196 *result = le64_to_cpu(nvme_req(req)->result.u64); 197 if (bio) 198 blk_rq_unmap_user(bio); 199 blk_mq_free_request(req); 200 201 if (effects) 202 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 203 204 return ret; 205 } 206 207 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 208 { 209 struct nvme_user_io io; 210 struct nvme_command c; 211 unsigned length, meta_len; 212 void __user *metadata; 213 214 if (copy_from_user(&io, uio, sizeof(io))) 215 return -EFAULT; 216 if (io.flags) 217 return -EINVAL; 218 219 switch (io.opcode) { 220 case nvme_cmd_write: 221 case nvme_cmd_read: 222 case nvme_cmd_compare: 223 break; 224 default: 225 return -EINVAL; 226 } 227 228 length = (io.nblocks + 1) << ns->head->lba_shift; 229 230 if ((io.control & NVME_RW_PRINFO_PRACT) && 231 (ns->head->ms == ns->head->pi_size)) { 232 /* 233 * Protection information is stripped/inserted by the 234 * controller. 235 */ 236 if (nvme_to_user_ptr(io.metadata)) 237 return -EINVAL; 238 meta_len = 0; 239 metadata = NULL; 240 } else { 241 meta_len = (io.nblocks + 1) * ns->head->ms; 242 metadata = nvme_to_user_ptr(io.metadata); 243 } 244 245 if (ns->head->features & NVME_NS_EXT_LBAS) { 246 length += meta_len; 247 meta_len = 0; 248 } else if (meta_len) { 249 if ((io.metadata & 3) || !io.metadata) 250 return -EINVAL; 251 } 252 253 memset(&c, 0, sizeof(c)); 254 c.rw.opcode = io.opcode; 255 c.rw.flags = io.flags; 256 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 257 c.rw.slba = cpu_to_le64(io.slba); 258 c.rw.length = cpu_to_le16(io.nblocks); 259 c.rw.control = cpu_to_le16(io.control); 260 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 261 c.rw.reftag = cpu_to_le32(io.reftag); 262 c.rw.apptag = cpu_to_le16(io.apptag); 263 c.rw.appmask = cpu_to_le16(io.appmask); 264 265 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 266 meta_len, lower_32_bits(io.slba), NULL, 0, 0); 267 } 268 269 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 270 struct nvme_ns *ns, __u32 nsid) 271 { 272 if (ns && nsid != ns->head->ns_id) { 273 dev_err(ctrl->device, 274 "%s: nsid (%u) in cmd does not match nsid (%u)" 275 "of namespace\n", 276 current->comm, nsid, ns->head->ns_id); 277 return false; 278 } 279 280 return true; 281 } 282 283 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 284 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 285 bool open_for_write) 286 { 287 struct nvme_passthru_cmd cmd; 288 struct nvme_command c; 289 unsigned timeout = 0; 290 u64 result; 291 int status; 292 293 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 294 return -EFAULT; 295 if (cmd.flags) 296 return -EINVAL; 297 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 298 return -EINVAL; 299 300 memset(&c, 0, sizeof(c)); 301 c.common.opcode = cmd.opcode; 302 c.common.flags = cmd.flags; 303 c.common.nsid = cpu_to_le32(cmd.nsid); 304 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 305 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 306 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 307 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 308 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 309 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 310 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 311 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 312 313 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 314 return -EACCES; 315 316 if (cmd.timeout_ms) 317 timeout = msecs_to_jiffies(cmd.timeout_ms); 318 319 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 320 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 321 cmd.metadata_len, 0, &result, timeout, 0); 322 323 if (status >= 0) { 324 if (put_user(result, &ucmd->result)) 325 return -EFAULT; 326 } 327 328 return status; 329 } 330 331 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 332 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 333 bool open_for_write) 334 { 335 struct nvme_passthru_cmd64 cmd; 336 struct nvme_command c; 337 unsigned timeout = 0; 338 int status; 339 340 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 341 return -EFAULT; 342 if (cmd.flags) 343 return -EINVAL; 344 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 345 return -EINVAL; 346 347 memset(&c, 0, sizeof(c)); 348 c.common.opcode = cmd.opcode; 349 c.common.flags = cmd.flags; 350 c.common.nsid = cpu_to_le32(cmd.nsid); 351 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 352 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 353 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 354 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 355 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 356 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 357 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 358 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 359 360 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 361 return -EACCES; 362 363 if (cmd.timeout_ms) 364 timeout = msecs_to_jiffies(cmd.timeout_ms); 365 366 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 367 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 368 cmd.metadata_len, 0, &cmd.result, timeout, flags); 369 370 if (status >= 0) { 371 if (put_user(cmd.result, &ucmd->result)) 372 return -EFAULT; 373 } 374 375 return status; 376 } 377 378 struct nvme_uring_data { 379 __u64 metadata; 380 __u64 addr; 381 __u32 data_len; 382 __u32 metadata_len; 383 __u32 timeout_ms; 384 }; 385 386 /* 387 * This overlays struct io_uring_cmd pdu. 388 * Expect build errors if this grows larger than that. 389 */ 390 struct nvme_uring_cmd_pdu { 391 struct request *req; 392 struct bio *bio; 393 u64 result; 394 int status; 395 }; 396 397 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 398 struct io_uring_cmd *ioucmd) 399 { 400 return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; 401 } 402 403 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, 404 unsigned issue_flags) 405 { 406 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 407 408 if (pdu->bio) 409 blk_rq_unmap_user(pdu->bio); 410 io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); 411 } 412 413 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 414 blk_status_t err) 415 { 416 struct io_uring_cmd *ioucmd = req->end_io_data; 417 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 418 419 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) 420 pdu->status = -EINTR; 421 else 422 pdu->status = nvme_req(req)->status; 423 pdu->result = le64_to_cpu(nvme_req(req)->result.u64); 424 425 /* 426 * For iopoll, complete it directly. Note that using the uring_cmd 427 * helper for this is safe only because we check blk_rq_is_poll(). 428 * As that returns false if we're NOT on a polled queue, then it's 429 * safe to use the polled completion helper. 430 * 431 * Otherwise, move the completion to task work. 432 */ 433 if (blk_rq_is_poll(req)) { 434 if (pdu->bio) 435 blk_rq_unmap_user(pdu->bio); 436 io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); 437 } else { 438 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 439 } 440 441 return RQ_END_IO_FREE; 442 } 443 444 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 445 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 446 { 447 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 448 const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); 449 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 450 struct nvme_uring_data d; 451 struct nvme_command c; 452 struct request *req; 453 blk_opf_t rq_flags = REQ_ALLOC_CACHE; 454 blk_mq_req_flags_t blk_flags = 0; 455 int ret; 456 457 c.common.opcode = READ_ONCE(cmd->opcode); 458 c.common.flags = READ_ONCE(cmd->flags); 459 if (c.common.flags) 460 return -EINVAL; 461 462 c.common.command_id = 0; 463 c.common.nsid = cpu_to_le32(cmd->nsid); 464 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 465 return -EINVAL; 466 467 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 468 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 469 c.common.metadata = 0; 470 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 471 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 472 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 473 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 474 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 475 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 476 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 477 478 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 479 return -EACCES; 480 481 d.metadata = READ_ONCE(cmd->metadata); 482 d.addr = READ_ONCE(cmd->addr); 483 d.data_len = READ_ONCE(cmd->data_len); 484 d.metadata_len = READ_ONCE(cmd->metadata_len); 485 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 486 487 if (issue_flags & IO_URING_F_NONBLOCK) { 488 rq_flags |= REQ_NOWAIT; 489 blk_flags = BLK_MQ_REQ_NOWAIT; 490 } 491 if (issue_flags & IO_URING_F_IOPOLL) 492 rq_flags |= REQ_POLLED; 493 494 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 495 if (IS_ERR(req)) 496 return PTR_ERR(req); 497 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 498 499 if (d.addr && d.data_len) { 500 ret = nvme_map_user_request(req, d.addr, 501 d.data_len, nvme_to_user_ptr(d.metadata), 502 d.metadata_len, 0, ioucmd, vec); 503 if (ret) 504 return ret; 505 } 506 507 /* to free bio on completion, as req->bio will be null at that time */ 508 pdu->bio = req->bio; 509 pdu->req = req; 510 req->end_io_data = ioucmd; 511 req->end_io = nvme_uring_cmd_end_io; 512 blk_execute_rq_nowait(req, false); 513 return -EIOCBQUEUED; 514 } 515 516 static bool is_ctrl_ioctl(unsigned int cmd) 517 { 518 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 519 return true; 520 if (is_sed_ioctl(cmd)) 521 return true; 522 return false; 523 } 524 525 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 526 void __user *argp, bool open_for_write) 527 { 528 switch (cmd) { 529 case NVME_IOCTL_ADMIN_CMD: 530 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 531 case NVME_IOCTL_ADMIN64_CMD: 532 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 533 default: 534 return sed_ioctl(ctrl->opal_dev, cmd, argp); 535 } 536 } 537 538 #ifdef COMPAT_FOR_U64_ALIGNMENT 539 struct nvme_user_io32 { 540 __u8 opcode; 541 __u8 flags; 542 __u16 control; 543 __u16 nblocks; 544 __u16 rsvd; 545 __u64 metadata; 546 __u64 addr; 547 __u64 slba; 548 __u32 dsmgmt; 549 __u32 reftag; 550 __u16 apptag; 551 __u16 appmask; 552 } __attribute__((__packed__)); 553 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 554 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 555 556 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 557 void __user *argp, unsigned int flags, bool open_for_write) 558 { 559 switch (cmd) { 560 case NVME_IOCTL_ID: 561 force_successful_syscall_return(); 562 return ns->head->ns_id; 563 case NVME_IOCTL_IO_CMD: 564 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 565 /* 566 * struct nvme_user_io can have different padding on some 32-bit ABIs. 567 * Just accept the compat version as all fields that are used are the 568 * same size and at the same offset. 569 */ 570 #ifdef COMPAT_FOR_U64_ALIGNMENT 571 case NVME_IOCTL_SUBMIT_IO32: 572 #endif 573 case NVME_IOCTL_SUBMIT_IO: 574 return nvme_submit_io(ns, argp); 575 case NVME_IOCTL_IO64_CMD_VEC: 576 flags |= NVME_IOCTL_VEC; 577 fallthrough; 578 case NVME_IOCTL_IO64_CMD: 579 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 580 open_for_write); 581 default: 582 return -ENOTTY; 583 } 584 } 585 586 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 587 unsigned int cmd, unsigned long arg) 588 { 589 struct nvme_ns *ns = bdev->bd_disk->private_data; 590 bool open_for_write = mode & BLK_OPEN_WRITE; 591 void __user *argp = (void __user *)arg; 592 unsigned int flags = 0; 593 594 if (bdev_is_partition(bdev)) 595 flags |= NVME_IOCTL_PARTITION; 596 597 if (is_ctrl_ioctl(cmd)) 598 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 599 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 600 } 601 602 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 603 { 604 struct nvme_ns *ns = 605 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 606 bool open_for_write = file->f_mode & FMODE_WRITE; 607 void __user *argp = (void __user *)arg; 608 609 if (is_ctrl_ioctl(cmd)) 610 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 611 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 612 } 613 614 static int nvme_uring_cmd_checks(unsigned int issue_flags) 615 { 616 617 /* NVMe passthrough requires big SQE/CQE support */ 618 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 619 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 620 return -EOPNOTSUPP; 621 return 0; 622 } 623 624 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 625 unsigned int issue_flags) 626 { 627 struct nvme_ctrl *ctrl = ns->ctrl; 628 int ret; 629 630 BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu)); 631 632 ret = nvme_uring_cmd_checks(issue_flags); 633 if (ret) 634 return ret; 635 636 switch (ioucmd->cmd_op) { 637 case NVME_URING_CMD_IO: 638 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 639 break; 640 case NVME_URING_CMD_IO_VEC: 641 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 642 break; 643 default: 644 ret = -ENOTTY; 645 } 646 647 return ret; 648 } 649 650 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 651 { 652 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 653 struct nvme_ns, cdev); 654 655 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 656 } 657 658 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 659 struct io_comp_batch *iob, 660 unsigned int poll_flags) 661 { 662 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 663 struct request *req = pdu->req; 664 665 if (req && blk_rq_is_poll(req)) 666 return blk_rq_poll(req, iob, poll_flags); 667 return 0; 668 } 669 #ifdef CONFIG_NVME_MULTIPATH 670 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 671 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 672 bool open_for_write) 673 __releases(&head->srcu) 674 { 675 struct nvme_ctrl *ctrl = ns->ctrl; 676 int ret; 677 678 nvme_get_ctrl(ns->ctrl); 679 srcu_read_unlock(&head->srcu, srcu_idx); 680 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 681 682 nvme_put_ctrl(ctrl); 683 return ret; 684 } 685 686 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 687 unsigned int cmd, unsigned long arg) 688 { 689 struct nvme_ns_head *head = bdev->bd_disk->private_data; 690 bool open_for_write = mode & BLK_OPEN_WRITE; 691 void __user *argp = (void __user *)arg; 692 struct nvme_ns *ns; 693 int srcu_idx, ret = -EWOULDBLOCK; 694 unsigned int flags = 0; 695 696 if (bdev_is_partition(bdev)) 697 flags |= NVME_IOCTL_PARTITION; 698 699 srcu_idx = srcu_read_lock(&head->srcu); 700 ns = nvme_find_path(head); 701 if (!ns) 702 goto out_unlock; 703 704 /* 705 * Handle ioctls that apply to the controller instead of the namespace 706 * seperately and drop the ns SRCU reference early. This avoids a 707 * deadlock when deleting namespaces using the passthrough interface. 708 */ 709 if (is_ctrl_ioctl(cmd)) 710 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 711 open_for_write); 712 713 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 714 out_unlock: 715 srcu_read_unlock(&head->srcu, srcu_idx); 716 return ret; 717 } 718 719 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 720 unsigned long arg) 721 { 722 bool open_for_write = file->f_mode & FMODE_WRITE; 723 struct cdev *cdev = file_inode(file)->i_cdev; 724 struct nvme_ns_head *head = 725 container_of(cdev, struct nvme_ns_head, cdev); 726 void __user *argp = (void __user *)arg; 727 struct nvme_ns *ns; 728 int srcu_idx, ret = -EWOULDBLOCK; 729 730 srcu_idx = srcu_read_lock(&head->srcu); 731 ns = nvme_find_path(head); 732 if (!ns) 733 goto out_unlock; 734 735 if (is_ctrl_ioctl(cmd)) 736 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 737 open_for_write); 738 739 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 740 out_unlock: 741 srcu_read_unlock(&head->srcu, srcu_idx); 742 return ret; 743 } 744 745 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 746 unsigned int issue_flags) 747 { 748 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 749 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 750 int srcu_idx = srcu_read_lock(&head->srcu); 751 struct nvme_ns *ns = nvme_find_path(head); 752 int ret = -EINVAL; 753 754 if (ns) 755 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 756 srcu_read_unlock(&head->srcu, srcu_idx); 757 return ret; 758 } 759 #endif /* CONFIG_NVME_MULTIPATH */ 760 761 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 762 { 763 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 764 int ret; 765 766 /* IOPOLL not supported yet */ 767 if (issue_flags & IO_URING_F_IOPOLL) 768 return -EOPNOTSUPP; 769 770 ret = nvme_uring_cmd_checks(issue_flags); 771 if (ret) 772 return ret; 773 774 switch (ioucmd->cmd_op) { 775 case NVME_URING_CMD_ADMIN: 776 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 777 break; 778 case NVME_URING_CMD_ADMIN_VEC: 779 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 780 break; 781 default: 782 ret = -ENOTTY; 783 } 784 785 return ret; 786 } 787 788 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 789 bool open_for_write) 790 { 791 struct nvme_ns *ns; 792 int ret; 793 794 down_read(&ctrl->namespaces_rwsem); 795 if (list_empty(&ctrl->namespaces)) { 796 ret = -ENOTTY; 797 goto out_unlock; 798 } 799 800 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); 801 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 802 dev_warn(ctrl->device, 803 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 804 ret = -EINVAL; 805 goto out_unlock; 806 } 807 808 dev_warn(ctrl->device, 809 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 810 kref_get(&ns->kref); 811 up_read(&ctrl->namespaces_rwsem); 812 813 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 814 nvme_put_ns(ns); 815 return ret; 816 817 out_unlock: 818 up_read(&ctrl->namespaces_rwsem); 819 return ret; 820 } 821 822 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 823 unsigned long arg) 824 { 825 bool open_for_write = file->f_mode & FMODE_WRITE; 826 struct nvme_ctrl *ctrl = file->private_data; 827 void __user *argp = (void __user *)arg; 828 829 switch (cmd) { 830 case NVME_IOCTL_ADMIN_CMD: 831 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 832 case NVME_IOCTL_ADMIN64_CMD: 833 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 834 case NVME_IOCTL_IO_CMD: 835 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 836 case NVME_IOCTL_RESET: 837 if (!capable(CAP_SYS_ADMIN)) 838 return -EACCES; 839 dev_warn(ctrl->device, "resetting controller\n"); 840 return nvme_reset_ctrl_sync(ctrl); 841 case NVME_IOCTL_SUBSYS_RESET: 842 if (!capable(CAP_SYS_ADMIN)) 843 return -EACCES; 844 return nvme_reset_subsystem(ctrl); 845 case NVME_IOCTL_RESCAN: 846 if (!capable(CAP_SYS_ADMIN)) 847 return -EACCES; 848 nvme_queue_scan(ctrl); 849 return 0; 850 default: 851 return -ENOTTY; 852 } 853 } 854