1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 7 #include <linux/nvme_ioctl.h> 8 #include <linux/io_uring/cmd.h> 9 #include "nvme.h" 10 11 enum { 12 NVME_IOCTL_VEC = (1 << 0), 13 NVME_IOCTL_PARTITION = (1 << 1), 14 }; 15 16 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 17 unsigned int flags, bool open_for_write) 18 { 19 u32 effects; 20 21 /* 22 * Do not allow unprivileged passthrough on partitions, as that allows an 23 * escape from the containment of the partition. 24 */ 25 if (flags & NVME_IOCTL_PARTITION) 26 goto admin; 27 28 /* 29 * Do not allow unprivileged processes to send vendor specific or fabrics 30 * commands as we can't be sure about their effects. 31 */ 32 if (c->common.opcode >= nvme_cmd_vendor_start || 33 c->common.opcode == nvme_fabrics_command) 34 goto admin; 35 36 /* 37 * Do not allow unprivileged passthrough of admin commands except 38 * for a subset of identify commands that contain information required 39 * to form proper I/O commands in userspace and do not expose any 40 * potentially sensitive information. 41 */ 42 if (!ns) { 43 if (c->common.opcode == nvme_admin_identify) { 44 switch (c->identify.cns) { 45 case NVME_ID_CNS_NS: 46 case NVME_ID_CNS_CS_NS: 47 case NVME_ID_CNS_NS_CS_INDEP: 48 case NVME_ID_CNS_CS_CTRL: 49 case NVME_ID_CNS_CTRL: 50 return true; 51 } 52 } 53 goto admin; 54 } 55 56 /* 57 * Check if the controller provides a Commands Supported and Effects log 58 * and marks this command as supported. If not reject unprivileged 59 * passthrough. 60 */ 61 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 62 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 63 goto admin; 64 65 /* 66 * Don't allow passthrough for command that have intrusive (or unknown) 67 * effects. 68 */ 69 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 70 NVME_CMD_EFFECTS_UUID_SEL | 71 NVME_CMD_EFFECTS_SCOPE_MASK)) 72 goto admin; 73 74 /* 75 * Only allow I/O commands that transfer data to the controller or that 76 * change the logical block contents if the file descriptor is open for 77 * writing. 78 */ 79 if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && 80 !open_for_write) 81 goto admin; 82 83 return true; 84 admin: 85 return capable(CAP_SYS_ADMIN); 86 } 87 88 /* 89 * Convert integer values from ioctl structures to user pointers, silently 90 * ignoring the upper bits in the compat case to match behaviour of 32-bit 91 * kernels. 92 */ 93 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 94 { 95 if (in_compat_syscall()) 96 ptrval = (compat_uptr_t)ptrval; 97 return (void __user *)ptrval; 98 } 99 100 static struct request *nvme_alloc_user_request(struct request_queue *q, 101 struct nvme_command *cmd, blk_opf_t rq_flags, 102 blk_mq_req_flags_t blk_flags) 103 { 104 struct request *req; 105 106 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 107 if (IS_ERR(req)) 108 return req; 109 nvme_init_request(req, cmd); 110 nvme_req(req)->flags |= NVME_REQ_USERCMD; 111 return req; 112 } 113 114 static void nvme_unmap_bio(struct bio *bio) 115 { 116 if (bio_integrity(bio)) 117 bio_integrity_unmap_free_user(bio); 118 blk_rq_unmap_user(bio); 119 } 120 121 static int nvme_map_user_request(struct request *req, u64 ubuffer, 122 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 123 u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags) 124 { 125 struct request_queue *q = req->q; 126 struct nvme_ns *ns = q->queuedata; 127 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 128 struct bio *bio = NULL; 129 int ret; 130 131 if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 132 struct iov_iter iter; 133 134 /* fixedbufs is only for non-vectored io */ 135 if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) 136 return -EINVAL; 137 ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 138 rq_data_dir(req), &iter, ioucmd); 139 if (ret < 0) 140 goto out; 141 ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 142 } else { 143 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 144 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 145 0, rq_data_dir(req)); 146 } 147 148 if (ret) 149 goto out; 150 151 bio = req->bio; 152 if (bdev) { 153 bio_set_dev(bio, bdev); 154 if (meta_buffer && meta_len) { 155 ret = bio_integrity_map_user(bio, meta_buffer, meta_len, 156 meta_seed); 157 if (ret) 158 goto out_unmap; 159 req->cmd_flags |= REQ_INTEGRITY; 160 } 161 } 162 163 return ret; 164 165 out_unmap: 166 if (bio) 167 nvme_unmap_bio(bio); 168 out: 169 blk_mq_free_request(req); 170 return ret; 171 } 172 173 static int nvme_submit_user_cmd(struct request_queue *q, 174 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 175 void __user *meta_buffer, unsigned meta_len, u32 meta_seed, 176 u64 *result, unsigned timeout, unsigned int flags) 177 { 178 struct nvme_ns *ns = q->queuedata; 179 struct nvme_ctrl *ctrl; 180 struct request *req; 181 struct bio *bio; 182 u32 effects; 183 int ret; 184 185 req = nvme_alloc_user_request(q, cmd, 0, 0); 186 if (IS_ERR(req)) 187 return PTR_ERR(req); 188 189 req->timeout = timeout; 190 if (ubuffer && bufflen) { 191 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 192 meta_len, meta_seed, NULL, flags); 193 if (ret) 194 return ret; 195 } 196 197 bio = req->bio; 198 ctrl = nvme_req(req)->ctrl; 199 200 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 201 ret = nvme_execute_rq(req, false); 202 if (result) 203 *result = le64_to_cpu(nvme_req(req)->result.u64); 204 if (bio) 205 nvme_unmap_bio(bio); 206 blk_mq_free_request(req); 207 208 if (effects) 209 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 210 211 return ret; 212 } 213 214 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 215 { 216 struct nvme_user_io io; 217 struct nvme_command c; 218 unsigned length, meta_len; 219 void __user *metadata; 220 221 if (copy_from_user(&io, uio, sizeof(io))) 222 return -EFAULT; 223 if (io.flags) 224 return -EINVAL; 225 226 switch (io.opcode) { 227 case nvme_cmd_write: 228 case nvme_cmd_read: 229 case nvme_cmd_compare: 230 break; 231 default: 232 return -EINVAL; 233 } 234 235 length = (io.nblocks + 1) << ns->head->lba_shift; 236 237 if ((io.control & NVME_RW_PRINFO_PRACT) && 238 (ns->head->ms == ns->head->pi_size)) { 239 /* 240 * Protection information is stripped/inserted by the 241 * controller. 242 */ 243 if (nvme_to_user_ptr(io.metadata)) 244 return -EINVAL; 245 meta_len = 0; 246 metadata = NULL; 247 } else { 248 meta_len = (io.nblocks + 1) * ns->head->ms; 249 metadata = nvme_to_user_ptr(io.metadata); 250 } 251 252 if (ns->head->features & NVME_NS_EXT_LBAS) { 253 length += meta_len; 254 meta_len = 0; 255 } else if (meta_len) { 256 if ((io.metadata & 3) || !io.metadata) 257 return -EINVAL; 258 } 259 260 memset(&c, 0, sizeof(c)); 261 c.rw.opcode = io.opcode; 262 c.rw.flags = io.flags; 263 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 264 c.rw.slba = cpu_to_le64(io.slba); 265 c.rw.length = cpu_to_le16(io.nblocks); 266 c.rw.control = cpu_to_le16(io.control); 267 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 268 c.rw.reftag = cpu_to_le32(io.reftag); 269 c.rw.apptag = cpu_to_le16(io.apptag); 270 c.rw.appmask = cpu_to_le16(io.appmask); 271 272 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 273 meta_len, lower_32_bits(io.slba), NULL, 0, 0); 274 } 275 276 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 277 struct nvme_ns *ns, __u32 nsid) 278 { 279 if (ns && nsid != ns->head->ns_id) { 280 dev_err(ctrl->device, 281 "%s: nsid (%u) in cmd does not match nsid (%u)" 282 "of namespace\n", 283 current->comm, nsid, ns->head->ns_id); 284 return false; 285 } 286 287 return true; 288 } 289 290 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 291 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 292 bool open_for_write) 293 { 294 struct nvme_passthru_cmd cmd; 295 struct nvme_command c; 296 unsigned timeout = 0; 297 u64 result; 298 int status; 299 300 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 301 return -EFAULT; 302 if (cmd.flags) 303 return -EINVAL; 304 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 305 return -EINVAL; 306 307 memset(&c, 0, sizeof(c)); 308 c.common.opcode = cmd.opcode; 309 c.common.flags = cmd.flags; 310 c.common.nsid = cpu_to_le32(cmd.nsid); 311 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 312 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 313 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 314 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 315 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 316 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 317 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 318 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 319 320 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 321 return -EACCES; 322 323 if (cmd.timeout_ms) 324 timeout = msecs_to_jiffies(cmd.timeout_ms); 325 326 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 327 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 328 cmd.metadata_len, 0, &result, timeout, 0); 329 330 if (status >= 0) { 331 if (put_user(result, &ucmd->result)) 332 return -EFAULT; 333 } 334 335 return status; 336 } 337 338 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 339 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 340 bool open_for_write) 341 { 342 struct nvme_passthru_cmd64 cmd; 343 struct nvme_command c; 344 unsigned timeout = 0; 345 int status; 346 347 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 348 return -EFAULT; 349 if (cmd.flags) 350 return -EINVAL; 351 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 352 return -EINVAL; 353 354 memset(&c, 0, sizeof(c)); 355 c.common.opcode = cmd.opcode; 356 c.common.flags = cmd.flags; 357 c.common.nsid = cpu_to_le32(cmd.nsid); 358 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 359 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 360 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 361 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 362 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 363 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 364 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 365 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 366 367 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 368 return -EACCES; 369 370 if (cmd.timeout_ms) 371 timeout = msecs_to_jiffies(cmd.timeout_ms); 372 373 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 374 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 375 cmd.metadata_len, 0, &cmd.result, timeout, flags); 376 377 if (status >= 0) { 378 if (put_user(cmd.result, &ucmd->result)) 379 return -EFAULT; 380 } 381 382 return status; 383 } 384 385 struct nvme_uring_data { 386 __u64 metadata; 387 __u64 addr; 388 __u32 data_len; 389 __u32 metadata_len; 390 __u32 timeout_ms; 391 }; 392 393 /* 394 * This overlays struct io_uring_cmd pdu. 395 * Expect build errors if this grows larger than that. 396 */ 397 struct nvme_uring_cmd_pdu { 398 struct request *req; 399 struct bio *bio; 400 u64 result; 401 int status; 402 }; 403 404 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 405 struct io_uring_cmd *ioucmd) 406 { 407 return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; 408 } 409 410 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, 411 unsigned issue_flags) 412 { 413 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 414 415 if (pdu->bio) 416 nvme_unmap_bio(pdu->bio); 417 io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); 418 } 419 420 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 421 blk_status_t err) 422 { 423 struct io_uring_cmd *ioucmd = req->end_io_data; 424 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 425 426 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) 427 pdu->status = -EINTR; 428 else 429 pdu->status = nvme_req(req)->status; 430 pdu->result = le64_to_cpu(nvme_req(req)->result.u64); 431 432 /* 433 * For iopoll, complete it directly. Note that using the uring_cmd 434 * helper for this is safe only because we check blk_rq_is_poll(). 435 * As that returns false if we're NOT on a polled queue, then it's 436 * safe to use the polled completion helper. 437 * 438 * Otherwise, move the completion to task work. 439 */ 440 if (blk_rq_is_poll(req)) { 441 if (pdu->bio) 442 nvme_unmap_bio(pdu->bio); 443 io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); 444 } else { 445 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 446 } 447 448 return RQ_END_IO_FREE; 449 } 450 451 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 452 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 453 { 454 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 455 const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); 456 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 457 struct nvme_uring_data d; 458 struct nvme_command c; 459 struct request *req; 460 blk_opf_t rq_flags = REQ_ALLOC_CACHE; 461 blk_mq_req_flags_t blk_flags = 0; 462 int ret; 463 464 c.common.opcode = READ_ONCE(cmd->opcode); 465 c.common.flags = READ_ONCE(cmd->flags); 466 if (c.common.flags) 467 return -EINVAL; 468 469 c.common.command_id = 0; 470 c.common.nsid = cpu_to_le32(cmd->nsid); 471 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 472 return -EINVAL; 473 474 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 475 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 476 c.common.metadata = 0; 477 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 478 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 479 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 480 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 481 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 482 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 483 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 484 485 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 486 return -EACCES; 487 488 d.metadata = READ_ONCE(cmd->metadata); 489 d.addr = READ_ONCE(cmd->addr); 490 d.data_len = READ_ONCE(cmd->data_len); 491 d.metadata_len = READ_ONCE(cmd->metadata_len); 492 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 493 494 if (issue_flags & IO_URING_F_NONBLOCK) { 495 rq_flags |= REQ_NOWAIT; 496 blk_flags = BLK_MQ_REQ_NOWAIT; 497 } 498 if (issue_flags & IO_URING_F_IOPOLL) 499 rq_flags |= REQ_POLLED; 500 501 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 502 if (IS_ERR(req)) 503 return PTR_ERR(req); 504 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 505 506 if (d.addr && d.data_len) { 507 ret = nvme_map_user_request(req, d.addr, 508 d.data_len, nvme_to_user_ptr(d.metadata), 509 d.metadata_len, 0, ioucmd, vec); 510 if (ret) 511 return ret; 512 } 513 514 /* to free bio on completion, as req->bio will be null at that time */ 515 pdu->bio = req->bio; 516 pdu->req = req; 517 req->end_io_data = ioucmd; 518 req->end_io = nvme_uring_cmd_end_io; 519 blk_execute_rq_nowait(req, false); 520 return -EIOCBQUEUED; 521 } 522 523 static bool is_ctrl_ioctl(unsigned int cmd) 524 { 525 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 526 return true; 527 if (is_sed_ioctl(cmd)) 528 return true; 529 return false; 530 } 531 532 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 533 void __user *argp, bool open_for_write) 534 { 535 switch (cmd) { 536 case NVME_IOCTL_ADMIN_CMD: 537 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 538 case NVME_IOCTL_ADMIN64_CMD: 539 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 540 default: 541 return sed_ioctl(ctrl->opal_dev, cmd, argp); 542 } 543 } 544 545 #ifdef COMPAT_FOR_U64_ALIGNMENT 546 struct nvme_user_io32 { 547 __u8 opcode; 548 __u8 flags; 549 __u16 control; 550 __u16 nblocks; 551 __u16 rsvd; 552 __u64 metadata; 553 __u64 addr; 554 __u64 slba; 555 __u32 dsmgmt; 556 __u32 reftag; 557 __u16 apptag; 558 __u16 appmask; 559 } __attribute__((__packed__)); 560 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 561 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 562 563 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 564 void __user *argp, unsigned int flags, bool open_for_write) 565 { 566 switch (cmd) { 567 case NVME_IOCTL_ID: 568 force_successful_syscall_return(); 569 return ns->head->ns_id; 570 case NVME_IOCTL_IO_CMD: 571 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 572 /* 573 * struct nvme_user_io can have different padding on some 32-bit ABIs. 574 * Just accept the compat version as all fields that are used are the 575 * same size and at the same offset. 576 */ 577 #ifdef COMPAT_FOR_U64_ALIGNMENT 578 case NVME_IOCTL_SUBMIT_IO32: 579 #endif 580 case NVME_IOCTL_SUBMIT_IO: 581 return nvme_submit_io(ns, argp); 582 case NVME_IOCTL_IO64_CMD_VEC: 583 flags |= NVME_IOCTL_VEC; 584 fallthrough; 585 case NVME_IOCTL_IO64_CMD: 586 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 587 open_for_write); 588 default: 589 return -ENOTTY; 590 } 591 } 592 593 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 594 unsigned int cmd, unsigned long arg) 595 { 596 struct nvme_ns *ns = bdev->bd_disk->private_data; 597 bool open_for_write = mode & BLK_OPEN_WRITE; 598 void __user *argp = (void __user *)arg; 599 unsigned int flags = 0; 600 601 if (bdev_is_partition(bdev)) 602 flags |= NVME_IOCTL_PARTITION; 603 604 if (is_ctrl_ioctl(cmd)) 605 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 606 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 607 } 608 609 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 610 { 611 struct nvme_ns *ns = 612 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 613 bool open_for_write = file->f_mode & FMODE_WRITE; 614 void __user *argp = (void __user *)arg; 615 616 if (is_ctrl_ioctl(cmd)) 617 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 618 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 619 } 620 621 static int nvme_uring_cmd_checks(unsigned int issue_flags) 622 { 623 624 /* NVMe passthrough requires big SQE/CQE support */ 625 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 626 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 627 return -EOPNOTSUPP; 628 return 0; 629 } 630 631 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 632 unsigned int issue_flags) 633 { 634 struct nvme_ctrl *ctrl = ns->ctrl; 635 int ret; 636 637 BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu)); 638 639 ret = nvme_uring_cmd_checks(issue_flags); 640 if (ret) 641 return ret; 642 643 switch (ioucmd->cmd_op) { 644 case NVME_URING_CMD_IO: 645 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 646 break; 647 case NVME_URING_CMD_IO_VEC: 648 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 649 break; 650 default: 651 ret = -ENOTTY; 652 } 653 654 return ret; 655 } 656 657 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 658 { 659 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 660 struct nvme_ns, cdev); 661 662 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 663 } 664 665 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 666 struct io_comp_batch *iob, 667 unsigned int poll_flags) 668 { 669 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 670 struct request *req = pdu->req; 671 672 if (req && blk_rq_is_poll(req)) 673 return blk_rq_poll(req, iob, poll_flags); 674 return 0; 675 } 676 #ifdef CONFIG_NVME_MULTIPATH 677 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 678 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 679 bool open_for_write) 680 __releases(&head->srcu) 681 { 682 struct nvme_ctrl *ctrl = ns->ctrl; 683 int ret; 684 685 nvme_get_ctrl(ns->ctrl); 686 srcu_read_unlock(&head->srcu, srcu_idx); 687 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 688 689 nvme_put_ctrl(ctrl); 690 return ret; 691 } 692 693 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 694 unsigned int cmd, unsigned long arg) 695 { 696 struct nvme_ns_head *head = bdev->bd_disk->private_data; 697 bool open_for_write = mode & BLK_OPEN_WRITE; 698 void __user *argp = (void __user *)arg; 699 struct nvme_ns *ns; 700 int srcu_idx, ret = -EWOULDBLOCK; 701 unsigned int flags = 0; 702 703 if (bdev_is_partition(bdev)) 704 flags |= NVME_IOCTL_PARTITION; 705 706 srcu_idx = srcu_read_lock(&head->srcu); 707 ns = nvme_find_path(head); 708 if (!ns) 709 goto out_unlock; 710 711 /* 712 * Handle ioctls that apply to the controller instead of the namespace 713 * seperately and drop the ns SRCU reference early. This avoids a 714 * deadlock when deleting namespaces using the passthrough interface. 715 */ 716 if (is_ctrl_ioctl(cmd)) 717 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 718 open_for_write); 719 720 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 721 out_unlock: 722 srcu_read_unlock(&head->srcu, srcu_idx); 723 return ret; 724 } 725 726 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 727 unsigned long arg) 728 { 729 bool open_for_write = file->f_mode & FMODE_WRITE; 730 struct cdev *cdev = file_inode(file)->i_cdev; 731 struct nvme_ns_head *head = 732 container_of(cdev, struct nvme_ns_head, cdev); 733 void __user *argp = (void __user *)arg; 734 struct nvme_ns *ns; 735 int srcu_idx, ret = -EWOULDBLOCK; 736 737 srcu_idx = srcu_read_lock(&head->srcu); 738 ns = nvme_find_path(head); 739 if (!ns) 740 goto out_unlock; 741 742 if (is_ctrl_ioctl(cmd)) 743 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 744 open_for_write); 745 746 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 747 out_unlock: 748 srcu_read_unlock(&head->srcu, srcu_idx); 749 return ret; 750 } 751 752 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 753 unsigned int issue_flags) 754 { 755 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 756 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 757 int srcu_idx = srcu_read_lock(&head->srcu); 758 struct nvme_ns *ns = nvme_find_path(head); 759 int ret = -EINVAL; 760 761 if (ns) 762 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 763 srcu_read_unlock(&head->srcu, srcu_idx); 764 return ret; 765 } 766 #endif /* CONFIG_NVME_MULTIPATH */ 767 768 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 769 { 770 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 771 int ret; 772 773 /* IOPOLL not supported yet */ 774 if (issue_flags & IO_URING_F_IOPOLL) 775 return -EOPNOTSUPP; 776 777 ret = nvme_uring_cmd_checks(issue_flags); 778 if (ret) 779 return ret; 780 781 switch (ioucmd->cmd_op) { 782 case NVME_URING_CMD_ADMIN: 783 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 784 break; 785 case NVME_URING_CMD_ADMIN_VEC: 786 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 787 break; 788 default: 789 ret = -ENOTTY; 790 } 791 792 return ret; 793 } 794 795 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 796 bool open_for_write) 797 { 798 struct nvme_ns *ns; 799 int ret, srcu_idx; 800 801 srcu_idx = srcu_read_lock(&ctrl->srcu); 802 if (list_empty(&ctrl->namespaces)) { 803 ret = -ENOTTY; 804 goto out_unlock; 805 } 806 807 ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); 808 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 809 dev_warn(ctrl->device, 810 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 811 ret = -EINVAL; 812 goto out_unlock; 813 } 814 815 dev_warn(ctrl->device, 816 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 817 if (!nvme_get_ns(ns)) { 818 ret = -ENXIO; 819 goto out_unlock; 820 } 821 srcu_read_unlock(&ctrl->srcu, srcu_idx); 822 823 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 824 nvme_put_ns(ns); 825 return ret; 826 827 out_unlock: 828 srcu_read_unlock(&ctrl->srcu, srcu_idx); 829 return ret; 830 } 831 832 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 833 unsigned long arg) 834 { 835 bool open_for_write = file->f_mode & FMODE_WRITE; 836 struct nvme_ctrl *ctrl = file->private_data; 837 void __user *argp = (void __user *)arg; 838 839 switch (cmd) { 840 case NVME_IOCTL_ADMIN_CMD: 841 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 842 case NVME_IOCTL_ADMIN64_CMD: 843 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 844 case NVME_IOCTL_IO_CMD: 845 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 846 case NVME_IOCTL_RESET: 847 if (!capable(CAP_SYS_ADMIN)) 848 return -EACCES; 849 dev_warn(ctrl->device, "resetting controller\n"); 850 return nvme_reset_ctrl_sync(ctrl); 851 case NVME_IOCTL_SUBSYS_RESET: 852 if (!capable(CAP_SYS_ADMIN)) 853 return -EACCES; 854 return nvme_reset_subsystem(ctrl); 855 case NVME_IOCTL_RESCAN: 856 if (!capable(CAP_SYS_ADMIN)) 857 return -EACCES; 858 nvme_queue_scan(ctrl); 859 return 0; 860 default: 861 return -ENOTTY; 862 } 863 } 864