1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/bio-integrity.h> 7 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 8 #include <linux/nvme_ioctl.h> 9 #include <linux/io_uring/cmd.h> 10 #include "nvme.h" 11 12 enum { 13 NVME_IOCTL_VEC = (1 << 0), 14 NVME_IOCTL_PARTITION = (1 << 1), 15 }; 16 17 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 18 unsigned int flags, bool open_for_write) 19 { 20 u32 effects; 21 22 /* 23 * Do not allow unprivileged passthrough on partitions, as that allows an 24 * escape from the containment of the partition. 25 */ 26 if (flags & NVME_IOCTL_PARTITION) 27 goto admin; 28 29 /* 30 * Do not allow unprivileged processes to send vendor specific or fabrics 31 * commands as we can't be sure about their effects. 32 */ 33 if (c->common.opcode >= nvme_cmd_vendor_start || 34 c->common.opcode == nvme_fabrics_command) 35 goto admin; 36 37 /* 38 * Do not allow unprivileged passthrough of admin commands except 39 * for a subset of identify commands that contain information required 40 * to form proper I/O commands in userspace and do not expose any 41 * potentially sensitive information. 42 */ 43 if (!ns) { 44 if (c->common.opcode == nvme_admin_identify) { 45 switch (c->identify.cns) { 46 case NVME_ID_CNS_NS: 47 case NVME_ID_CNS_CS_NS: 48 case NVME_ID_CNS_NS_CS_INDEP: 49 case NVME_ID_CNS_CS_CTRL: 50 case NVME_ID_CNS_CTRL: 51 return true; 52 } 53 } 54 goto admin; 55 } 56 57 /* 58 * Check if the controller provides a Commands Supported and Effects log 59 * and marks this command as supported. If not reject unprivileged 60 * passthrough. 61 */ 62 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 63 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 64 goto admin; 65 66 /* 67 * Don't allow passthrough for command that have intrusive (or unknown) 68 * effects. 69 */ 70 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 71 NVME_CMD_EFFECTS_UUID_SEL | 72 NVME_CMD_EFFECTS_SCOPE_MASK)) 73 goto admin; 74 75 /* 76 * Only allow I/O commands that transfer data to the controller or that 77 * change the logical block contents if the file descriptor is open for 78 * writing. 79 */ 80 if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && 81 !open_for_write) 82 goto admin; 83 84 return true; 85 admin: 86 return capable(CAP_SYS_ADMIN); 87 } 88 89 /* 90 * Convert integer values from ioctl structures to user pointers, silently 91 * ignoring the upper bits in the compat case to match behaviour of 32-bit 92 * kernels. 93 */ 94 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 95 { 96 if (in_compat_syscall()) 97 ptrval = (compat_uptr_t)ptrval; 98 return (void __user *)ptrval; 99 } 100 101 static struct request *nvme_alloc_user_request(struct request_queue *q, 102 struct nvme_command *cmd, blk_opf_t rq_flags, 103 blk_mq_req_flags_t blk_flags) 104 { 105 struct request *req; 106 107 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 108 if (IS_ERR(req)) 109 return req; 110 nvme_init_request(req, cmd); 111 nvme_req(req)->flags |= NVME_REQ_USERCMD; 112 return req; 113 } 114 115 static int nvme_map_user_request(struct request *req, u64 ubuffer, 116 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 117 u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags) 118 { 119 struct request_queue *q = req->q; 120 struct nvme_ns *ns = q->queuedata; 121 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 122 struct bio *bio = NULL; 123 int ret; 124 125 if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 126 struct iov_iter iter; 127 128 /* fixedbufs is only for non-vectored io */ 129 if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) 130 return -EINVAL; 131 ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 132 rq_data_dir(req), &iter, ioucmd); 133 if (ret < 0) 134 goto out; 135 ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 136 } else { 137 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 138 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 139 0, rq_data_dir(req)); 140 } 141 142 if (ret) 143 goto out; 144 145 bio = req->bio; 146 if (bdev) { 147 bio_set_dev(bio, bdev); 148 if (meta_buffer && meta_len) { 149 ret = bio_integrity_map_user(bio, meta_buffer, meta_len, 150 meta_seed); 151 if (ret) 152 goto out_unmap; 153 req->cmd_flags |= REQ_INTEGRITY; 154 } 155 } 156 157 return ret; 158 159 out_unmap: 160 if (bio) 161 blk_rq_unmap_user(bio); 162 out: 163 blk_mq_free_request(req); 164 return ret; 165 } 166 167 static int nvme_submit_user_cmd(struct request_queue *q, 168 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 169 void __user *meta_buffer, unsigned meta_len, u32 meta_seed, 170 u64 *result, unsigned timeout, unsigned int flags) 171 { 172 struct nvme_ns *ns = q->queuedata; 173 struct nvme_ctrl *ctrl; 174 struct request *req; 175 struct bio *bio; 176 u32 effects; 177 int ret; 178 179 req = nvme_alloc_user_request(q, cmd, 0, 0); 180 if (IS_ERR(req)) 181 return PTR_ERR(req); 182 183 req->timeout = timeout; 184 if (ubuffer && bufflen) { 185 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 186 meta_len, meta_seed, NULL, flags); 187 if (ret) 188 return ret; 189 } 190 191 bio = req->bio; 192 ctrl = nvme_req(req)->ctrl; 193 194 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 195 ret = nvme_execute_rq(req, false); 196 if (result) 197 *result = le64_to_cpu(nvme_req(req)->result.u64); 198 if (bio) 199 blk_rq_unmap_user(bio); 200 blk_mq_free_request(req); 201 202 if (effects) 203 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 204 205 return ret; 206 } 207 208 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 209 { 210 struct nvme_user_io io; 211 struct nvme_command c; 212 unsigned length, meta_len; 213 void __user *metadata; 214 215 if (copy_from_user(&io, uio, sizeof(io))) 216 return -EFAULT; 217 if (io.flags) 218 return -EINVAL; 219 220 switch (io.opcode) { 221 case nvme_cmd_write: 222 case nvme_cmd_read: 223 case nvme_cmd_compare: 224 break; 225 default: 226 return -EINVAL; 227 } 228 229 length = (io.nblocks + 1) << ns->head->lba_shift; 230 231 if ((io.control & NVME_RW_PRINFO_PRACT) && 232 (ns->head->ms == ns->head->pi_size)) { 233 /* 234 * Protection information is stripped/inserted by the 235 * controller. 236 */ 237 if (nvme_to_user_ptr(io.metadata)) 238 return -EINVAL; 239 meta_len = 0; 240 metadata = NULL; 241 } else { 242 meta_len = (io.nblocks + 1) * ns->head->ms; 243 metadata = nvme_to_user_ptr(io.metadata); 244 } 245 246 if (ns->head->features & NVME_NS_EXT_LBAS) { 247 length += meta_len; 248 meta_len = 0; 249 } else if (meta_len) { 250 if ((io.metadata & 3) || !io.metadata) 251 return -EINVAL; 252 } 253 254 memset(&c, 0, sizeof(c)); 255 c.rw.opcode = io.opcode; 256 c.rw.flags = io.flags; 257 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 258 c.rw.slba = cpu_to_le64(io.slba); 259 c.rw.length = cpu_to_le16(io.nblocks); 260 c.rw.control = cpu_to_le16(io.control); 261 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 262 c.rw.reftag = cpu_to_le32(io.reftag); 263 c.rw.apptag = cpu_to_le16(io.apptag); 264 c.rw.appmask = cpu_to_le16(io.appmask); 265 266 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 267 meta_len, lower_32_bits(io.slba), NULL, 0, 0); 268 } 269 270 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 271 struct nvme_ns *ns, __u32 nsid) 272 { 273 if (ns && nsid != ns->head->ns_id) { 274 dev_err(ctrl->device, 275 "%s: nsid (%u) in cmd does not match nsid (%u)" 276 "of namespace\n", 277 current->comm, nsid, ns->head->ns_id); 278 return false; 279 } 280 281 return true; 282 } 283 284 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 285 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 286 bool open_for_write) 287 { 288 struct nvme_passthru_cmd cmd; 289 struct nvme_command c; 290 unsigned timeout = 0; 291 u64 result; 292 int status; 293 294 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 295 return -EFAULT; 296 if (cmd.flags) 297 return -EINVAL; 298 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 299 return -EINVAL; 300 301 memset(&c, 0, sizeof(c)); 302 c.common.opcode = cmd.opcode; 303 c.common.flags = cmd.flags; 304 c.common.nsid = cpu_to_le32(cmd.nsid); 305 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 306 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 307 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 308 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 309 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 310 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 311 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 312 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 313 314 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 315 return -EACCES; 316 317 if (cmd.timeout_ms) 318 timeout = msecs_to_jiffies(cmd.timeout_ms); 319 320 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 321 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 322 cmd.metadata_len, 0, &result, timeout, 0); 323 324 if (status >= 0) { 325 if (put_user(result, &ucmd->result)) 326 return -EFAULT; 327 } 328 329 return status; 330 } 331 332 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 333 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 334 bool open_for_write) 335 { 336 struct nvme_passthru_cmd64 cmd; 337 struct nvme_command c; 338 unsigned timeout = 0; 339 int status; 340 341 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 342 return -EFAULT; 343 if (cmd.flags) 344 return -EINVAL; 345 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 346 return -EINVAL; 347 348 memset(&c, 0, sizeof(c)); 349 c.common.opcode = cmd.opcode; 350 c.common.flags = cmd.flags; 351 c.common.nsid = cpu_to_le32(cmd.nsid); 352 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 353 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 354 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 355 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 356 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 357 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 358 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 359 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 360 361 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 362 return -EACCES; 363 364 if (cmd.timeout_ms) 365 timeout = msecs_to_jiffies(cmd.timeout_ms); 366 367 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 368 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 369 cmd.metadata_len, 0, &cmd.result, timeout, flags); 370 371 if (status >= 0) { 372 if (put_user(cmd.result, &ucmd->result)) 373 return -EFAULT; 374 } 375 376 return status; 377 } 378 379 struct nvme_uring_data { 380 __u64 metadata; 381 __u64 addr; 382 __u32 data_len; 383 __u32 metadata_len; 384 __u32 timeout_ms; 385 }; 386 387 /* 388 * This overlays struct io_uring_cmd pdu. 389 * Expect build errors if this grows larger than that. 390 */ 391 struct nvme_uring_cmd_pdu { 392 struct request *req; 393 struct bio *bio; 394 u64 result; 395 int status; 396 }; 397 398 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 399 struct io_uring_cmd *ioucmd) 400 { 401 return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; 402 } 403 404 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, 405 unsigned issue_flags) 406 { 407 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 408 409 if (pdu->bio) 410 blk_rq_unmap_user(pdu->bio); 411 io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); 412 } 413 414 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 415 blk_status_t err) 416 { 417 struct io_uring_cmd *ioucmd = req->end_io_data; 418 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 419 420 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) 421 pdu->status = -EINTR; 422 else 423 pdu->status = nvme_req(req)->status; 424 pdu->result = le64_to_cpu(nvme_req(req)->result.u64); 425 426 /* 427 * For iopoll, complete it directly. Note that using the uring_cmd 428 * helper for this is safe only because we check blk_rq_is_poll(). 429 * As that returns false if we're NOT on a polled queue, then it's 430 * safe to use the polled completion helper. 431 * 432 * Otherwise, move the completion to task work. 433 */ 434 if (blk_rq_is_poll(req)) { 435 if (pdu->bio) 436 blk_rq_unmap_user(pdu->bio); 437 io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); 438 } else { 439 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 440 } 441 442 return RQ_END_IO_FREE; 443 } 444 445 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 446 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 447 { 448 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 449 const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); 450 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 451 struct nvme_uring_data d; 452 struct nvme_command c; 453 struct request *req; 454 blk_opf_t rq_flags = REQ_ALLOC_CACHE; 455 blk_mq_req_flags_t blk_flags = 0; 456 int ret; 457 458 c.common.opcode = READ_ONCE(cmd->opcode); 459 c.common.flags = READ_ONCE(cmd->flags); 460 if (c.common.flags) 461 return -EINVAL; 462 463 c.common.command_id = 0; 464 c.common.nsid = cpu_to_le32(cmd->nsid); 465 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 466 return -EINVAL; 467 468 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 469 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 470 c.common.metadata = 0; 471 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 472 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 473 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 474 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 475 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 476 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 477 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 478 479 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 480 return -EACCES; 481 482 d.metadata = READ_ONCE(cmd->metadata); 483 d.addr = READ_ONCE(cmd->addr); 484 d.data_len = READ_ONCE(cmd->data_len); 485 d.metadata_len = READ_ONCE(cmd->metadata_len); 486 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 487 488 if (issue_flags & IO_URING_F_NONBLOCK) { 489 rq_flags |= REQ_NOWAIT; 490 blk_flags = BLK_MQ_REQ_NOWAIT; 491 } 492 if (issue_flags & IO_URING_F_IOPOLL) 493 rq_flags |= REQ_POLLED; 494 495 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 496 if (IS_ERR(req)) 497 return PTR_ERR(req); 498 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 499 500 if (d.addr && d.data_len) { 501 ret = nvme_map_user_request(req, d.addr, 502 d.data_len, nvme_to_user_ptr(d.metadata), 503 d.metadata_len, 0, ioucmd, vec); 504 if (ret) 505 return ret; 506 } 507 508 /* to free bio on completion, as req->bio will be null at that time */ 509 pdu->bio = req->bio; 510 pdu->req = req; 511 req->end_io_data = ioucmd; 512 req->end_io = nvme_uring_cmd_end_io; 513 blk_execute_rq_nowait(req, false); 514 return -EIOCBQUEUED; 515 } 516 517 static bool is_ctrl_ioctl(unsigned int cmd) 518 { 519 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 520 return true; 521 if (is_sed_ioctl(cmd)) 522 return true; 523 return false; 524 } 525 526 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 527 void __user *argp, bool open_for_write) 528 { 529 switch (cmd) { 530 case NVME_IOCTL_ADMIN_CMD: 531 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 532 case NVME_IOCTL_ADMIN64_CMD: 533 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 534 default: 535 return sed_ioctl(ctrl->opal_dev, cmd, argp); 536 } 537 } 538 539 #ifdef COMPAT_FOR_U64_ALIGNMENT 540 struct nvme_user_io32 { 541 __u8 opcode; 542 __u8 flags; 543 __u16 control; 544 __u16 nblocks; 545 __u16 rsvd; 546 __u64 metadata; 547 __u64 addr; 548 __u64 slba; 549 __u32 dsmgmt; 550 __u32 reftag; 551 __u16 apptag; 552 __u16 appmask; 553 } __attribute__((__packed__)); 554 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 555 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 556 557 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 558 void __user *argp, unsigned int flags, bool open_for_write) 559 { 560 switch (cmd) { 561 case NVME_IOCTL_ID: 562 force_successful_syscall_return(); 563 return ns->head->ns_id; 564 case NVME_IOCTL_IO_CMD: 565 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 566 /* 567 * struct nvme_user_io can have different padding on some 32-bit ABIs. 568 * Just accept the compat version as all fields that are used are the 569 * same size and at the same offset. 570 */ 571 #ifdef COMPAT_FOR_U64_ALIGNMENT 572 case NVME_IOCTL_SUBMIT_IO32: 573 #endif 574 case NVME_IOCTL_SUBMIT_IO: 575 return nvme_submit_io(ns, argp); 576 case NVME_IOCTL_IO64_CMD_VEC: 577 flags |= NVME_IOCTL_VEC; 578 fallthrough; 579 case NVME_IOCTL_IO64_CMD: 580 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 581 open_for_write); 582 default: 583 return -ENOTTY; 584 } 585 } 586 587 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 588 unsigned int cmd, unsigned long arg) 589 { 590 struct nvme_ns *ns = bdev->bd_disk->private_data; 591 bool open_for_write = mode & BLK_OPEN_WRITE; 592 void __user *argp = (void __user *)arg; 593 unsigned int flags = 0; 594 595 if (bdev_is_partition(bdev)) 596 flags |= NVME_IOCTL_PARTITION; 597 598 if (is_ctrl_ioctl(cmd)) 599 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 600 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 601 } 602 603 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 604 { 605 struct nvme_ns *ns = 606 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 607 bool open_for_write = file->f_mode & FMODE_WRITE; 608 void __user *argp = (void __user *)arg; 609 610 if (is_ctrl_ioctl(cmd)) 611 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 612 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 613 } 614 615 static int nvme_uring_cmd_checks(unsigned int issue_flags) 616 { 617 618 /* NVMe passthrough requires big SQE/CQE support */ 619 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 620 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 621 return -EOPNOTSUPP; 622 return 0; 623 } 624 625 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 626 unsigned int issue_flags) 627 { 628 struct nvme_ctrl *ctrl = ns->ctrl; 629 int ret; 630 631 BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu)); 632 633 ret = nvme_uring_cmd_checks(issue_flags); 634 if (ret) 635 return ret; 636 637 switch (ioucmd->cmd_op) { 638 case NVME_URING_CMD_IO: 639 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 640 break; 641 case NVME_URING_CMD_IO_VEC: 642 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 643 break; 644 default: 645 ret = -ENOTTY; 646 } 647 648 return ret; 649 } 650 651 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 652 { 653 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 654 struct nvme_ns, cdev); 655 656 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 657 } 658 659 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 660 struct io_comp_batch *iob, 661 unsigned int poll_flags) 662 { 663 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 664 struct request *req = pdu->req; 665 666 if (req && blk_rq_is_poll(req)) 667 return blk_rq_poll(req, iob, poll_flags); 668 return 0; 669 } 670 #ifdef CONFIG_NVME_MULTIPATH 671 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 672 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 673 bool open_for_write) 674 __releases(&head->srcu) 675 { 676 struct nvme_ctrl *ctrl = ns->ctrl; 677 int ret; 678 679 nvme_get_ctrl(ns->ctrl); 680 srcu_read_unlock(&head->srcu, srcu_idx); 681 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 682 683 nvme_put_ctrl(ctrl); 684 return ret; 685 } 686 687 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 688 unsigned int cmd, unsigned long arg) 689 { 690 struct nvme_ns_head *head = bdev->bd_disk->private_data; 691 bool open_for_write = mode & BLK_OPEN_WRITE; 692 void __user *argp = (void __user *)arg; 693 struct nvme_ns *ns; 694 int srcu_idx, ret = -EWOULDBLOCK; 695 unsigned int flags = 0; 696 697 if (bdev_is_partition(bdev)) 698 flags |= NVME_IOCTL_PARTITION; 699 700 srcu_idx = srcu_read_lock(&head->srcu); 701 ns = nvme_find_path(head); 702 if (!ns) 703 goto out_unlock; 704 705 /* 706 * Handle ioctls that apply to the controller instead of the namespace 707 * seperately and drop the ns SRCU reference early. This avoids a 708 * deadlock when deleting namespaces using the passthrough interface. 709 */ 710 if (is_ctrl_ioctl(cmd)) 711 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 712 open_for_write); 713 714 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 715 out_unlock: 716 srcu_read_unlock(&head->srcu, srcu_idx); 717 return ret; 718 } 719 720 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 721 unsigned long arg) 722 { 723 bool open_for_write = file->f_mode & FMODE_WRITE; 724 struct cdev *cdev = file_inode(file)->i_cdev; 725 struct nvme_ns_head *head = 726 container_of(cdev, struct nvme_ns_head, cdev); 727 void __user *argp = (void __user *)arg; 728 struct nvme_ns *ns; 729 int srcu_idx, ret = -EWOULDBLOCK; 730 731 srcu_idx = srcu_read_lock(&head->srcu); 732 ns = nvme_find_path(head); 733 if (!ns) 734 goto out_unlock; 735 736 if (is_ctrl_ioctl(cmd)) 737 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 738 open_for_write); 739 740 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 741 out_unlock: 742 srcu_read_unlock(&head->srcu, srcu_idx); 743 return ret; 744 } 745 746 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 747 unsigned int issue_flags) 748 { 749 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 750 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 751 int srcu_idx = srcu_read_lock(&head->srcu); 752 struct nvme_ns *ns = nvme_find_path(head); 753 int ret = -EINVAL; 754 755 if (ns) 756 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 757 srcu_read_unlock(&head->srcu, srcu_idx); 758 return ret; 759 } 760 #endif /* CONFIG_NVME_MULTIPATH */ 761 762 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 763 { 764 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 765 int ret; 766 767 /* IOPOLL not supported yet */ 768 if (issue_flags & IO_URING_F_IOPOLL) 769 return -EOPNOTSUPP; 770 771 ret = nvme_uring_cmd_checks(issue_flags); 772 if (ret) 773 return ret; 774 775 switch (ioucmd->cmd_op) { 776 case NVME_URING_CMD_ADMIN: 777 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 778 break; 779 case NVME_URING_CMD_ADMIN_VEC: 780 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 781 break; 782 default: 783 ret = -ENOTTY; 784 } 785 786 return ret; 787 } 788 789 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 790 bool open_for_write) 791 { 792 struct nvme_ns *ns; 793 int ret, srcu_idx; 794 795 srcu_idx = srcu_read_lock(&ctrl->srcu); 796 if (list_empty(&ctrl->namespaces)) { 797 ret = -ENOTTY; 798 goto out_unlock; 799 } 800 801 ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); 802 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 803 dev_warn(ctrl->device, 804 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 805 ret = -EINVAL; 806 goto out_unlock; 807 } 808 809 dev_warn(ctrl->device, 810 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 811 if (!nvme_get_ns(ns)) { 812 ret = -ENXIO; 813 goto out_unlock; 814 } 815 srcu_read_unlock(&ctrl->srcu, srcu_idx); 816 817 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 818 nvme_put_ns(ns); 819 return ret; 820 821 out_unlock: 822 srcu_read_unlock(&ctrl->srcu, srcu_idx); 823 return ret; 824 } 825 826 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 827 unsigned long arg) 828 { 829 bool open_for_write = file->f_mode & FMODE_WRITE; 830 struct nvme_ctrl *ctrl = file->private_data; 831 void __user *argp = (void __user *)arg; 832 833 switch (cmd) { 834 case NVME_IOCTL_ADMIN_CMD: 835 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 836 case NVME_IOCTL_ADMIN64_CMD: 837 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 838 case NVME_IOCTL_IO_CMD: 839 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 840 case NVME_IOCTL_RESET: 841 if (!capable(CAP_SYS_ADMIN)) 842 return -EACCES; 843 dev_warn(ctrl->device, "resetting controller\n"); 844 return nvme_reset_ctrl_sync(ctrl); 845 case NVME_IOCTL_SUBSYS_RESET: 846 if (!capable(CAP_SYS_ADMIN)) 847 return -EACCES; 848 return nvme_reset_subsystem(ctrl); 849 case NVME_IOCTL_RESCAN: 850 if (!capable(CAP_SYS_ADMIN)) 851 return -EACCES; 852 nvme_queue_scan(ctrl); 853 return 0; 854 default: 855 return -ENOTTY; 856 } 857 } 858