1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/blk-integrity.h> 7 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 8 #include <linux/nvme_ioctl.h> 9 #include <linux/io_uring/cmd.h> 10 #include "nvme.h" 11 12 enum { 13 NVME_IOCTL_VEC = (1 << 0), 14 NVME_IOCTL_PARTITION = (1 << 1), 15 }; 16 17 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 18 unsigned int flags, bool open_for_write) 19 { 20 u32 effects; 21 22 /* 23 * Do not allow unprivileged passthrough on partitions, as that allows an 24 * escape from the containment of the partition. 25 */ 26 if (flags & NVME_IOCTL_PARTITION) 27 goto admin; 28 29 /* 30 * Do not allow unprivileged processes to send vendor specific or fabrics 31 * commands as we can't be sure about their effects. 32 */ 33 if (c->common.opcode >= nvme_cmd_vendor_start || 34 c->common.opcode == nvme_fabrics_command) 35 goto admin; 36 37 /* 38 * Do not allow unprivileged passthrough of admin commands except 39 * for a subset of identify commands that contain information required 40 * to form proper I/O commands in userspace and do not expose any 41 * potentially sensitive information. 42 */ 43 if (!ns) { 44 if (c->common.opcode == nvme_admin_identify) { 45 switch (c->identify.cns) { 46 case NVME_ID_CNS_NS: 47 case NVME_ID_CNS_CS_NS: 48 case NVME_ID_CNS_NS_CS_INDEP: 49 case NVME_ID_CNS_CS_CTRL: 50 case NVME_ID_CNS_CTRL: 51 return true; 52 } 53 } 54 goto admin; 55 } 56 57 /* 58 * Check if the controller provides a Commands Supported and Effects log 59 * and marks this command as supported. If not reject unprivileged 60 * passthrough. 61 */ 62 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 63 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 64 goto admin; 65 66 /* 67 * Don't allow passthrough for command that have intrusive (or unknown) 68 * effects. 69 */ 70 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 71 NVME_CMD_EFFECTS_UUID_SEL | 72 NVME_CMD_EFFECTS_SCOPE_MASK)) 73 goto admin; 74 75 /* 76 * Only allow I/O commands that transfer data to the controller or that 77 * change the logical block contents if the file descriptor is open for 78 * writing. 79 */ 80 if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && 81 !open_for_write) 82 goto admin; 83 84 return true; 85 admin: 86 return capable(CAP_SYS_ADMIN); 87 } 88 89 /* 90 * Convert integer values from ioctl structures to user pointers, silently 91 * ignoring the upper bits in the compat case to match behaviour of 32-bit 92 * kernels. 93 */ 94 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 95 { 96 if (in_compat_syscall()) 97 ptrval = (compat_uptr_t)ptrval; 98 return (void __user *)ptrval; 99 } 100 101 static struct request *nvme_alloc_user_request(struct request_queue *q, 102 struct nvme_command *cmd, blk_opf_t rq_flags, 103 blk_mq_req_flags_t blk_flags) 104 { 105 struct request *req; 106 107 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 108 if (IS_ERR(req)) 109 return req; 110 nvme_init_request(req, cmd); 111 nvme_req(req)->flags |= NVME_REQ_USERCMD; 112 return req; 113 } 114 115 static int nvme_map_user_request(struct request *req, u64 ubuffer, 116 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 117 struct io_uring_cmd *ioucmd, unsigned int flags) 118 { 119 struct request_queue *q = req->q; 120 struct nvme_ns *ns = q->queuedata; 121 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 122 bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); 123 bool has_metadata = meta_buffer && meta_len; 124 struct bio *bio = NULL; 125 int ret; 126 127 if (has_metadata && !supports_metadata) 128 return -EINVAL; 129 130 if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 131 struct iov_iter iter; 132 133 /* fixedbufs is only for non-vectored io */ 134 if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) 135 return -EINVAL; 136 ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 137 rq_data_dir(req), &iter, ioucmd); 138 if (ret < 0) 139 goto out; 140 ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 141 } else { 142 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 143 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 144 0, rq_data_dir(req)); 145 } 146 147 if (ret) 148 goto out; 149 150 bio = req->bio; 151 if (bdev) 152 bio_set_dev(bio, bdev); 153 154 if (has_metadata) { 155 ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len); 156 if (ret) 157 goto out_unmap; 158 } 159 160 return ret; 161 162 out_unmap: 163 if (bio) 164 blk_rq_unmap_user(bio); 165 out: 166 blk_mq_free_request(req); 167 return ret; 168 } 169 170 static int nvme_submit_user_cmd(struct request_queue *q, 171 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 172 void __user *meta_buffer, unsigned meta_len, 173 u64 *result, unsigned timeout, unsigned int flags) 174 { 175 struct nvme_ns *ns = q->queuedata; 176 struct nvme_ctrl *ctrl; 177 struct request *req; 178 struct bio *bio; 179 u32 effects; 180 int ret; 181 182 req = nvme_alloc_user_request(q, cmd, 0, 0); 183 if (IS_ERR(req)) 184 return PTR_ERR(req); 185 186 req->timeout = timeout; 187 if (ubuffer && bufflen) { 188 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 189 meta_len, NULL, flags); 190 if (ret) 191 return ret; 192 } 193 194 bio = req->bio; 195 ctrl = nvme_req(req)->ctrl; 196 197 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 198 ret = nvme_execute_rq(req, false); 199 if (result) 200 *result = le64_to_cpu(nvme_req(req)->result.u64); 201 if (bio) 202 blk_rq_unmap_user(bio); 203 blk_mq_free_request(req); 204 205 if (effects) 206 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 207 208 return ret; 209 } 210 211 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 212 { 213 struct nvme_user_io io; 214 struct nvme_command c; 215 unsigned length, meta_len; 216 void __user *metadata; 217 218 if (copy_from_user(&io, uio, sizeof(io))) 219 return -EFAULT; 220 if (io.flags) 221 return -EINVAL; 222 223 switch (io.opcode) { 224 case nvme_cmd_write: 225 case nvme_cmd_read: 226 case nvme_cmd_compare: 227 break; 228 default: 229 return -EINVAL; 230 } 231 232 length = (io.nblocks + 1) << ns->head->lba_shift; 233 234 if ((io.control & NVME_RW_PRINFO_PRACT) && 235 (ns->head->ms == ns->head->pi_size)) { 236 /* 237 * Protection information is stripped/inserted by the 238 * controller. 239 */ 240 if (nvme_to_user_ptr(io.metadata)) 241 return -EINVAL; 242 meta_len = 0; 243 metadata = NULL; 244 } else { 245 meta_len = (io.nblocks + 1) * ns->head->ms; 246 metadata = nvme_to_user_ptr(io.metadata); 247 } 248 249 if (ns->head->features & NVME_NS_EXT_LBAS) { 250 length += meta_len; 251 meta_len = 0; 252 } else if (meta_len) { 253 if ((io.metadata & 3) || !io.metadata) 254 return -EINVAL; 255 } 256 257 memset(&c, 0, sizeof(c)); 258 c.rw.opcode = io.opcode; 259 c.rw.flags = io.flags; 260 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 261 c.rw.slba = cpu_to_le64(io.slba); 262 c.rw.length = cpu_to_le16(io.nblocks); 263 c.rw.control = cpu_to_le16(io.control); 264 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 265 c.rw.reftag = cpu_to_le32(io.reftag); 266 c.rw.lbat = cpu_to_le16(io.apptag); 267 c.rw.lbatm = cpu_to_le16(io.appmask); 268 269 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 270 meta_len, NULL, 0, 0); 271 } 272 273 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 274 struct nvme_ns *ns, __u32 nsid) 275 { 276 if (ns && nsid != ns->head->ns_id) { 277 dev_err(ctrl->device, 278 "%s: nsid (%u) in cmd does not match nsid (%u)" 279 "of namespace\n", 280 current->comm, nsid, ns->head->ns_id); 281 return false; 282 } 283 284 return true; 285 } 286 287 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 288 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 289 bool open_for_write) 290 { 291 struct nvme_passthru_cmd cmd; 292 struct nvme_command c; 293 unsigned timeout = 0; 294 u64 result; 295 int status; 296 297 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 298 return -EFAULT; 299 if (cmd.flags) 300 return -EINVAL; 301 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 302 return -EINVAL; 303 304 memset(&c, 0, sizeof(c)); 305 c.common.opcode = cmd.opcode; 306 c.common.flags = cmd.flags; 307 c.common.nsid = cpu_to_le32(cmd.nsid); 308 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 309 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 310 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 311 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 312 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 313 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 314 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 315 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 316 317 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 318 return -EACCES; 319 320 if (cmd.timeout_ms) 321 timeout = msecs_to_jiffies(cmd.timeout_ms); 322 323 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 324 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 325 cmd.metadata_len, &result, timeout, 0); 326 327 if (status >= 0) { 328 if (put_user(result, &ucmd->result)) 329 return -EFAULT; 330 } 331 332 return status; 333 } 334 335 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 336 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 337 bool open_for_write) 338 { 339 struct nvme_passthru_cmd64 cmd; 340 struct nvme_command c; 341 unsigned timeout = 0; 342 int status; 343 344 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 345 return -EFAULT; 346 if (cmd.flags) 347 return -EINVAL; 348 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 349 return -EINVAL; 350 351 memset(&c, 0, sizeof(c)); 352 c.common.opcode = cmd.opcode; 353 c.common.flags = cmd.flags; 354 c.common.nsid = cpu_to_le32(cmd.nsid); 355 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 356 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 357 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 358 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 359 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 360 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 361 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 362 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 363 364 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 365 return -EACCES; 366 367 if (cmd.timeout_ms) 368 timeout = msecs_to_jiffies(cmd.timeout_ms); 369 370 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 371 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 372 cmd.metadata_len, &cmd.result, timeout, flags); 373 374 if (status >= 0) { 375 if (put_user(cmd.result, &ucmd->result)) 376 return -EFAULT; 377 } 378 379 return status; 380 } 381 382 struct nvme_uring_data { 383 __u64 metadata; 384 __u64 addr; 385 __u32 data_len; 386 __u32 metadata_len; 387 __u32 timeout_ms; 388 }; 389 390 /* 391 * This overlays struct io_uring_cmd pdu. 392 * Expect build errors if this grows larger than that. 393 */ 394 struct nvme_uring_cmd_pdu { 395 struct request *req; 396 struct bio *bio; 397 u64 result; 398 int status; 399 }; 400 401 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 402 struct io_uring_cmd *ioucmd) 403 { 404 return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu); 405 } 406 407 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, 408 unsigned issue_flags) 409 { 410 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 411 412 if (pdu->bio) 413 blk_rq_unmap_user(pdu->bio); 414 io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); 415 } 416 417 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 418 blk_status_t err) 419 { 420 struct io_uring_cmd *ioucmd = req->end_io_data; 421 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 422 423 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) 424 pdu->status = -EINTR; 425 else 426 pdu->status = nvme_req(req)->status; 427 pdu->result = le64_to_cpu(nvme_req(req)->result.u64); 428 429 /* 430 * For iopoll, complete it directly. Note that using the uring_cmd 431 * helper for this is safe only because we check blk_rq_is_poll(). 432 * As that returns false if we're NOT on a polled queue, then it's 433 * safe to use the polled completion helper. 434 * 435 * Otherwise, move the completion to task work. 436 */ 437 if (blk_rq_is_poll(req)) { 438 if (pdu->bio) 439 blk_rq_unmap_user(pdu->bio); 440 io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); 441 } else { 442 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 443 } 444 445 return RQ_END_IO_FREE; 446 } 447 448 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 449 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 450 { 451 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 452 const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); 453 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 454 struct nvme_uring_data d; 455 struct nvme_command c; 456 struct request *req; 457 blk_opf_t rq_flags = REQ_ALLOC_CACHE; 458 blk_mq_req_flags_t blk_flags = 0; 459 int ret; 460 461 c.common.opcode = READ_ONCE(cmd->opcode); 462 c.common.flags = READ_ONCE(cmd->flags); 463 if (c.common.flags) 464 return -EINVAL; 465 466 c.common.command_id = 0; 467 c.common.nsid = cpu_to_le32(cmd->nsid); 468 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 469 return -EINVAL; 470 471 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 472 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 473 c.common.metadata = 0; 474 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 475 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 476 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 477 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 478 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 479 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 480 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 481 482 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 483 return -EACCES; 484 485 d.metadata = READ_ONCE(cmd->metadata); 486 d.addr = READ_ONCE(cmd->addr); 487 d.data_len = READ_ONCE(cmd->data_len); 488 d.metadata_len = READ_ONCE(cmd->metadata_len); 489 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 490 491 if (issue_flags & IO_URING_F_NONBLOCK) { 492 rq_flags |= REQ_NOWAIT; 493 blk_flags = BLK_MQ_REQ_NOWAIT; 494 } 495 if (issue_flags & IO_URING_F_IOPOLL) 496 rq_flags |= REQ_POLLED; 497 498 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 499 if (IS_ERR(req)) 500 return PTR_ERR(req); 501 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 502 503 if (d.addr && d.data_len) { 504 ret = nvme_map_user_request(req, d.addr, 505 d.data_len, nvme_to_user_ptr(d.metadata), 506 d.metadata_len, ioucmd, vec); 507 if (ret) 508 return ret; 509 } 510 511 /* to free bio on completion, as req->bio will be null at that time */ 512 pdu->bio = req->bio; 513 pdu->req = req; 514 req->end_io_data = ioucmd; 515 req->end_io = nvme_uring_cmd_end_io; 516 blk_execute_rq_nowait(req, false); 517 return -EIOCBQUEUED; 518 } 519 520 static bool is_ctrl_ioctl(unsigned int cmd) 521 { 522 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 523 return true; 524 if (is_sed_ioctl(cmd)) 525 return true; 526 return false; 527 } 528 529 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 530 void __user *argp, bool open_for_write) 531 { 532 switch (cmd) { 533 case NVME_IOCTL_ADMIN_CMD: 534 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 535 case NVME_IOCTL_ADMIN64_CMD: 536 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 537 default: 538 return sed_ioctl(ctrl->opal_dev, cmd, argp); 539 } 540 } 541 542 #ifdef COMPAT_FOR_U64_ALIGNMENT 543 struct nvme_user_io32 { 544 __u8 opcode; 545 __u8 flags; 546 __u16 control; 547 __u16 nblocks; 548 __u16 rsvd; 549 __u64 metadata; 550 __u64 addr; 551 __u64 slba; 552 __u32 dsmgmt; 553 __u32 reftag; 554 __u16 apptag; 555 __u16 appmask; 556 } __attribute__((__packed__)); 557 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 558 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 559 560 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 561 void __user *argp, unsigned int flags, bool open_for_write) 562 { 563 switch (cmd) { 564 case NVME_IOCTL_ID: 565 force_successful_syscall_return(); 566 return ns->head->ns_id; 567 case NVME_IOCTL_IO_CMD: 568 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 569 /* 570 * struct nvme_user_io can have different padding on some 32-bit ABIs. 571 * Just accept the compat version as all fields that are used are the 572 * same size and at the same offset. 573 */ 574 #ifdef COMPAT_FOR_U64_ALIGNMENT 575 case NVME_IOCTL_SUBMIT_IO32: 576 #endif 577 case NVME_IOCTL_SUBMIT_IO: 578 return nvme_submit_io(ns, argp); 579 case NVME_IOCTL_IO64_CMD_VEC: 580 flags |= NVME_IOCTL_VEC; 581 fallthrough; 582 case NVME_IOCTL_IO64_CMD: 583 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 584 open_for_write); 585 default: 586 return -ENOTTY; 587 } 588 } 589 590 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 591 unsigned int cmd, unsigned long arg) 592 { 593 struct nvme_ns *ns = bdev->bd_disk->private_data; 594 bool open_for_write = mode & BLK_OPEN_WRITE; 595 void __user *argp = (void __user *)arg; 596 unsigned int flags = 0; 597 598 if (bdev_is_partition(bdev)) 599 flags |= NVME_IOCTL_PARTITION; 600 601 if (is_ctrl_ioctl(cmd)) 602 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 603 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 604 } 605 606 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 607 { 608 struct nvme_ns *ns = 609 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 610 bool open_for_write = file->f_mode & FMODE_WRITE; 611 void __user *argp = (void __user *)arg; 612 613 if (is_ctrl_ioctl(cmd)) 614 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 615 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 616 } 617 618 static int nvme_uring_cmd_checks(unsigned int issue_flags) 619 { 620 621 /* NVMe passthrough requires big SQE/CQE support */ 622 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 623 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 624 return -EOPNOTSUPP; 625 return 0; 626 } 627 628 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 629 unsigned int issue_flags) 630 { 631 struct nvme_ctrl *ctrl = ns->ctrl; 632 int ret; 633 634 ret = nvme_uring_cmd_checks(issue_flags); 635 if (ret) 636 return ret; 637 638 switch (ioucmd->cmd_op) { 639 case NVME_URING_CMD_IO: 640 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 641 break; 642 case NVME_URING_CMD_IO_VEC: 643 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 644 break; 645 default: 646 ret = -ENOTTY; 647 } 648 649 return ret; 650 } 651 652 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 653 { 654 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 655 struct nvme_ns, cdev); 656 657 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 658 } 659 660 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 661 struct io_comp_batch *iob, 662 unsigned int poll_flags) 663 { 664 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 665 struct request *req = pdu->req; 666 667 if (req && blk_rq_is_poll(req)) 668 return blk_rq_poll(req, iob, poll_flags); 669 return 0; 670 } 671 #ifdef CONFIG_NVME_MULTIPATH 672 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 673 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 674 bool open_for_write) 675 __releases(&head->srcu) 676 { 677 struct nvme_ctrl *ctrl = ns->ctrl; 678 int ret; 679 680 nvme_get_ctrl(ns->ctrl); 681 srcu_read_unlock(&head->srcu, srcu_idx); 682 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 683 684 nvme_put_ctrl(ctrl); 685 return ret; 686 } 687 688 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 689 unsigned int cmd, unsigned long arg) 690 { 691 struct nvme_ns_head *head = bdev->bd_disk->private_data; 692 bool open_for_write = mode & BLK_OPEN_WRITE; 693 void __user *argp = (void __user *)arg; 694 struct nvme_ns *ns; 695 int srcu_idx, ret = -EWOULDBLOCK; 696 unsigned int flags = 0; 697 698 if (bdev_is_partition(bdev)) 699 flags |= NVME_IOCTL_PARTITION; 700 701 srcu_idx = srcu_read_lock(&head->srcu); 702 ns = nvme_find_path(head); 703 if (!ns) 704 goto out_unlock; 705 706 /* 707 * Handle ioctls that apply to the controller instead of the namespace 708 * seperately and drop the ns SRCU reference early. This avoids a 709 * deadlock when deleting namespaces using the passthrough interface. 710 */ 711 if (is_ctrl_ioctl(cmd)) 712 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 713 open_for_write); 714 715 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 716 out_unlock: 717 srcu_read_unlock(&head->srcu, srcu_idx); 718 return ret; 719 } 720 721 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 722 unsigned long arg) 723 { 724 bool open_for_write = file->f_mode & FMODE_WRITE; 725 struct cdev *cdev = file_inode(file)->i_cdev; 726 struct nvme_ns_head *head = 727 container_of(cdev, struct nvme_ns_head, cdev); 728 void __user *argp = (void __user *)arg; 729 struct nvme_ns *ns; 730 int srcu_idx, ret = -EWOULDBLOCK; 731 732 srcu_idx = srcu_read_lock(&head->srcu); 733 ns = nvme_find_path(head); 734 if (!ns) 735 goto out_unlock; 736 737 if (is_ctrl_ioctl(cmd)) 738 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 739 open_for_write); 740 741 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 742 out_unlock: 743 srcu_read_unlock(&head->srcu, srcu_idx); 744 return ret; 745 } 746 747 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 748 unsigned int issue_flags) 749 { 750 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 751 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 752 int srcu_idx = srcu_read_lock(&head->srcu); 753 struct nvme_ns *ns = nvme_find_path(head); 754 int ret = -EINVAL; 755 756 if (ns) 757 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 758 srcu_read_unlock(&head->srcu, srcu_idx); 759 return ret; 760 } 761 #endif /* CONFIG_NVME_MULTIPATH */ 762 763 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 764 { 765 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 766 int ret; 767 768 /* IOPOLL not supported yet */ 769 if (issue_flags & IO_URING_F_IOPOLL) 770 return -EOPNOTSUPP; 771 772 ret = nvme_uring_cmd_checks(issue_flags); 773 if (ret) 774 return ret; 775 776 switch (ioucmd->cmd_op) { 777 case NVME_URING_CMD_ADMIN: 778 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 779 break; 780 case NVME_URING_CMD_ADMIN_VEC: 781 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 782 break; 783 default: 784 ret = -ENOTTY; 785 } 786 787 return ret; 788 } 789 790 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 791 bool open_for_write) 792 { 793 struct nvme_ns *ns; 794 int ret, srcu_idx; 795 796 srcu_idx = srcu_read_lock(&ctrl->srcu); 797 if (list_empty(&ctrl->namespaces)) { 798 ret = -ENOTTY; 799 goto out_unlock; 800 } 801 802 ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); 803 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 804 dev_warn(ctrl->device, 805 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 806 ret = -EINVAL; 807 goto out_unlock; 808 } 809 810 dev_warn(ctrl->device, 811 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 812 if (!nvme_get_ns(ns)) { 813 ret = -ENXIO; 814 goto out_unlock; 815 } 816 srcu_read_unlock(&ctrl->srcu, srcu_idx); 817 818 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 819 nvme_put_ns(ns); 820 return ret; 821 822 out_unlock: 823 srcu_read_unlock(&ctrl->srcu, srcu_idx); 824 return ret; 825 } 826 827 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 828 unsigned long arg) 829 { 830 bool open_for_write = file->f_mode & FMODE_WRITE; 831 struct nvme_ctrl *ctrl = file->private_data; 832 void __user *argp = (void __user *)arg; 833 834 switch (cmd) { 835 case NVME_IOCTL_ADMIN_CMD: 836 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 837 case NVME_IOCTL_ADMIN64_CMD: 838 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 839 case NVME_IOCTL_IO_CMD: 840 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 841 case NVME_IOCTL_RESET: 842 if (!capable(CAP_SYS_ADMIN)) 843 return -EACCES; 844 dev_warn(ctrl->device, "resetting controller\n"); 845 return nvme_reset_ctrl_sync(ctrl); 846 case NVME_IOCTL_SUBSYS_RESET: 847 if (!capable(CAP_SYS_ADMIN)) 848 return -EACCES; 849 return nvme_reset_subsystem(ctrl); 850 case NVME_IOCTL_RESCAN: 851 if (!capable(CAP_SYS_ADMIN)) 852 return -EACCES; 853 nvme_queue_scan(ctrl); 854 return 0; 855 default: 856 return -ENOTTY; 857 } 858 } 859