1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/blk-integrity.h> 7 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 8 #include <linux/nvme_ioctl.h> 9 #include <linux/io_uring/cmd.h> 10 #include "nvme.h" 11 12 enum { 13 NVME_IOCTL_VEC = (1 << 0), 14 NVME_IOCTL_PARTITION = (1 << 1), 15 }; 16 17 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 18 unsigned int flags, bool open_for_write) 19 { 20 u32 effects; 21 22 /* 23 * Do not allow unprivileged passthrough on partitions, as that allows an 24 * escape from the containment of the partition. 25 */ 26 if (flags & NVME_IOCTL_PARTITION) 27 goto admin; 28 29 /* 30 * Do not allow unprivileged processes to send vendor specific or fabrics 31 * commands as we can't be sure about their effects. 32 */ 33 if (c->common.opcode >= nvme_cmd_vendor_start || 34 c->common.opcode == nvme_fabrics_command) 35 goto admin; 36 37 /* 38 * Do not allow unprivileged passthrough of admin commands except 39 * for a subset of identify commands that contain information required 40 * to form proper I/O commands in userspace and do not expose any 41 * potentially sensitive information. 42 */ 43 if (!ns) { 44 if (c->common.opcode == nvme_admin_identify) { 45 switch (c->identify.cns) { 46 case NVME_ID_CNS_NS: 47 case NVME_ID_CNS_CS_NS: 48 case NVME_ID_CNS_NS_CS_INDEP: 49 case NVME_ID_CNS_CS_CTRL: 50 case NVME_ID_CNS_CTRL: 51 return true; 52 } 53 } 54 goto admin; 55 } 56 57 /* 58 * Check if the controller provides a Commands Supported and Effects log 59 * and marks this command as supported. If not reject unprivileged 60 * passthrough. 61 */ 62 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 63 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 64 goto admin; 65 66 /* 67 * Don't allow passthrough for command that have intrusive (or unknown) 68 * effects. 69 */ 70 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 71 NVME_CMD_EFFECTS_UUID_SEL | 72 NVME_CMD_EFFECTS_SCOPE_MASK)) 73 goto admin; 74 75 /* 76 * Only allow I/O commands that transfer data to the controller or that 77 * change the logical block contents if the file descriptor is open for 78 * writing. 79 */ 80 if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && 81 !open_for_write) 82 goto admin; 83 84 return true; 85 admin: 86 return capable(CAP_SYS_ADMIN); 87 } 88 89 /* 90 * Convert integer values from ioctl structures to user pointers, silently 91 * ignoring the upper bits in the compat case to match behaviour of 32-bit 92 * kernels. 93 */ 94 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 95 { 96 if (in_compat_syscall()) 97 ptrval = (compat_uptr_t)ptrval; 98 return (void __user *)ptrval; 99 } 100 101 static struct request *nvme_alloc_user_request(struct request_queue *q, 102 struct nvme_command *cmd, blk_opf_t rq_flags, 103 blk_mq_req_flags_t blk_flags) 104 { 105 struct request *req; 106 107 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 108 if (IS_ERR(req)) 109 return req; 110 nvme_init_request(req, cmd); 111 nvme_req(req)->flags |= NVME_REQ_USERCMD; 112 return req; 113 } 114 115 static int nvme_map_user_request(struct request *req, u64 ubuffer, 116 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 117 u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags) 118 { 119 struct request_queue *q = req->q; 120 struct nvme_ns *ns = q->queuedata; 121 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 122 bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); 123 bool has_metadata = meta_buffer && meta_len; 124 struct bio *bio = NULL; 125 int ret; 126 127 if (has_metadata && !supports_metadata) 128 return -EINVAL; 129 130 if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 131 struct iov_iter iter; 132 133 /* fixedbufs is only for non-vectored io */ 134 if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) 135 return -EINVAL; 136 ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 137 rq_data_dir(req), &iter, ioucmd); 138 if (ret < 0) 139 goto out; 140 ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 141 } else { 142 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 143 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 144 0, rq_data_dir(req)); 145 } 146 147 if (ret) 148 goto out; 149 150 bio = req->bio; 151 if (bdev) 152 bio_set_dev(bio, bdev); 153 154 if (has_metadata) { 155 ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len, 156 meta_seed); 157 if (ret) 158 goto out_unmap; 159 } 160 161 return ret; 162 163 out_unmap: 164 if (bio) 165 blk_rq_unmap_user(bio); 166 out: 167 blk_mq_free_request(req); 168 return ret; 169 } 170 171 static int nvme_submit_user_cmd(struct request_queue *q, 172 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 173 void __user *meta_buffer, unsigned meta_len, u32 meta_seed, 174 u64 *result, unsigned timeout, unsigned int flags) 175 { 176 struct nvme_ns *ns = q->queuedata; 177 struct nvme_ctrl *ctrl; 178 struct request *req; 179 struct bio *bio; 180 u32 effects; 181 int ret; 182 183 req = nvme_alloc_user_request(q, cmd, 0, 0); 184 if (IS_ERR(req)) 185 return PTR_ERR(req); 186 187 req->timeout = timeout; 188 if (ubuffer && bufflen) { 189 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 190 meta_len, meta_seed, NULL, flags); 191 if (ret) 192 return ret; 193 } 194 195 bio = req->bio; 196 ctrl = nvme_req(req)->ctrl; 197 198 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 199 ret = nvme_execute_rq(req, false); 200 if (result) 201 *result = le64_to_cpu(nvme_req(req)->result.u64); 202 if (bio) 203 blk_rq_unmap_user(bio); 204 blk_mq_free_request(req); 205 206 if (effects) 207 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 208 209 return ret; 210 } 211 212 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 213 { 214 struct nvme_user_io io; 215 struct nvme_command c; 216 unsigned length, meta_len; 217 void __user *metadata; 218 219 if (copy_from_user(&io, uio, sizeof(io))) 220 return -EFAULT; 221 if (io.flags) 222 return -EINVAL; 223 224 switch (io.opcode) { 225 case nvme_cmd_write: 226 case nvme_cmd_read: 227 case nvme_cmd_compare: 228 break; 229 default: 230 return -EINVAL; 231 } 232 233 length = (io.nblocks + 1) << ns->head->lba_shift; 234 235 if ((io.control & NVME_RW_PRINFO_PRACT) && 236 (ns->head->ms == ns->head->pi_size)) { 237 /* 238 * Protection information is stripped/inserted by the 239 * controller. 240 */ 241 if (nvme_to_user_ptr(io.metadata)) 242 return -EINVAL; 243 meta_len = 0; 244 metadata = NULL; 245 } else { 246 meta_len = (io.nblocks + 1) * ns->head->ms; 247 metadata = nvme_to_user_ptr(io.metadata); 248 } 249 250 if (ns->head->features & NVME_NS_EXT_LBAS) { 251 length += meta_len; 252 meta_len = 0; 253 } else if (meta_len) { 254 if ((io.metadata & 3) || !io.metadata) 255 return -EINVAL; 256 } 257 258 memset(&c, 0, sizeof(c)); 259 c.rw.opcode = io.opcode; 260 c.rw.flags = io.flags; 261 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 262 c.rw.slba = cpu_to_le64(io.slba); 263 c.rw.length = cpu_to_le16(io.nblocks); 264 c.rw.control = cpu_to_le16(io.control); 265 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 266 c.rw.reftag = cpu_to_le32(io.reftag); 267 c.rw.lbat = cpu_to_le16(io.apptag); 268 c.rw.lbatm = cpu_to_le16(io.appmask); 269 270 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 271 meta_len, lower_32_bits(io.slba), NULL, 0, 0); 272 } 273 274 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 275 struct nvme_ns *ns, __u32 nsid) 276 { 277 if (ns && nsid != ns->head->ns_id) { 278 dev_err(ctrl->device, 279 "%s: nsid (%u) in cmd does not match nsid (%u)" 280 "of namespace\n", 281 current->comm, nsid, ns->head->ns_id); 282 return false; 283 } 284 285 return true; 286 } 287 288 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 289 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 290 bool open_for_write) 291 { 292 struct nvme_passthru_cmd cmd; 293 struct nvme_command c; 294 unsigned timeout = 0; 295 u64 result; 296 int status; 297 298 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 299 return -EFAULT; 300 if (cmd.flags) 301 return -EINVAL; 302 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 303 return -EINVAL; 304 305 memset(&c, 0, sizeof(c)); 306 c.common.opcode = cmd.opcode; 307 c.common.flags = cmd.flags; 308 c.common.nsid = cpu_to_le32(cmd.nsid); 309 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 310 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 311 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 312 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 313 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 314 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 315 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 316 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 317 318 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 319 return -EACCES; 320 321 if (cmd.timeout_ms) 322 timeout = msecs_to_jiffies(cmd.timeout_ms); 323 324 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 325 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 326 cmd.metadata_len, 0, &result, timeout, 0); 327 328 if (status >= 0) { 329 if (put_user(result, &ucmd->result)) 330 return -EFAULT; 331 } 332 333 return status; 334 } 335 336 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 337 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 338 bool open_for_write) 339 { 340 struct nvme_passthru_cmd64 cmd; 341 struct nvme_command c; 342 unsigned timeout = 0; 343 int status; 344 345 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 346 return -EFAULT; 347 if (cmd.flags) 348 return -EINVAL; 349 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 350 return -EINVAL; 351 352 memset(&c, 0, sizeof(c)); 353 c.common.opcode = cmd.opcode; 354 c.common.flags = cmd.flags; 355 c.common.nsid = cpu_to_le32(cmd.nsid); 356 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 357 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 358 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 359 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 360 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 361 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 362 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 363 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 364 365 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 366 return -EACCES; 367 368 if (cmd.timeout_ms) 369 timeout = msecs_to_jiffies(cmd.timeout_ms); 370 371 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 372 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 373 cmd.metadata_len, 0, &cmd.result, timeout, flags); 374 375 if (status >= 0) { 376 if (put_user(cmd.result, &ucmd->result)) 377 return -EFAULT; 378 } 379 380 return status; 381 } 382 383 struct nvme_uring_data { 384 __u64 metadata; 385 __u64 addr; 386 __u32 data_len; 387 __u32 metadata_len; 388 __u32 timeout_ms; 389 }; 390 391 /* 392 * This overlays struct io_uring_cmd pdu. 393 * Expect build errors if this grows larger than that. 394 */ 395 struct nvme_uring_cmd_pdu { 396 struct request *req; 397 struct bio *bio; 398 u64 result; 399 int status; 400 }; 401 402 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 403 struct io_uring_cmd *ioucmd) 404 { 405 return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; 406 } 407 408 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, 409 unsigned issue_flags) 410 { 411 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 412 413 if (pdu->bio) 414 blk_rq_unmap_user(pdu->bio); 415 io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); 416 } 417 418 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 419 blk_status_t err) 420 { 421 struct io_uring_cmd *ioucmd = req->end_io_data; 422 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 423 424 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { 425 pdu->status = -EINTR; 426 } else { 427 pdu->status = nvme_req(req)->status; 428 if (!pdu->status) 429 pdu->status = blk_status_to_errno(err); 430 } 431 pdu->result = le64_to_cpu(nvme_req(req)->result.u64); 432 433 /* 434 * For iopoll, complete it directly. Note that using the uring_cmd 435 * helper for this is safe only because we check blk_rq_is_poll(). 436 * As that returns false if we're NOT on a polled queue, then it's 437 * safe to use the polled completion helper. 438 * 439 * Otherwise, move the completion to task work. 440 */ 441 if (blk_rq_is_poll(req)) { 442 if (pdu->bio) 443 blk_rq_unmap_user(pdu->bio); 444 io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); 445 } else { 446 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 447 } 448 449 return RQ_END_IO_FREE; 450 } 451 452 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 453 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 454 { 455 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 456 const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); 457 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 458 struct nvme_uring_data d; 459 struct nvme_command c; 460 struct request *req; 461 blk_opf_t rq_flags = REQ_ALLOC_CACHE; 462 blk_mq_req_flags_t blk_flags = 0; 463 int ret; 464 465 c.common.opcode = READ_ONCE(cmd->opcode); 466 c.common.flags = READ_ONCE(cmd->flags); 467 if (c.common.flags) 468 return -EINVAL; 469 470 c.common.command_id = 0; 471 c.common.nsid = cpu_to_le32(cmd->nsid); 472 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 473 return -EINVAL; 474 475 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 476 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 477 c.common.metadata = 0; 478 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 479 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 480 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 481 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 482 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 483 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 484 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 485 486 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 487 return -EACCES; 488 489 d.metadata = READ_ONCE(cmd->metadata); 490 d.addr = READ_ONCE(cmd->addr); 491 d.data_len = READ_ONCE(cmd->data_len); 492 d.metadata_len = READ_ONCE(cmd->metadata_len); 493 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 494 495 if (issue_flags & IO_URING_F_NONBLOCK) { 496 rq_flags |= REQ_NOWAIT; 497 blk_flags = BLK_MQ_REQ_NOWAIT; 498 } 499 if (issue_flags & IO_URING_F_IOPOLL) 500 rq_flags |= REQ_POLLED; 501 502 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 503 if (IS_ERR(req)) 504 return PTR_ERR(req); 505 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 506 507 if (d.addr && d.data_len) { 508 ret = nvme_map_user_request(req, d.addr, 509 d.data_len, nvme_to_user_ptr(d.metadata), 510 d.metadata_len, 0, ioucmd, vec); 511 if (ret) 512 return ret; 513 } 514 515 /* to free bio on completion, as req->bio will be null at that time */ 516 pdu->bio = req->bio; 517 pdu->req = req; 518 req->end_io_data = ioucmd; 519 req->end_io = nvme_uring_cmd_end_io; 520 blk_execute_rq_nowait(req, false); 521 return -EIOCBQUEUED; 522 } 523 524 static bool is_ctrl_ioctl(unsigned int cmd) 525 { 526 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 527 return true; 528 if (is_sed_ioctl(cmd)) 529 return true; 530 return false; 531 } 532 533 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 534 void __user *argp, bool open_for_write) 535 { 536 switch (cmd) { 537 case NVME_IOCTL_ADMIN_CMD: 538 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 539 case NVME_IOCTL_ADMIN64_CMD: 540 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 541 default: 542 return sed_ioctl(ctrl->opal_dev, cmd, argp); 543 } 544 } 545 546 #ifdef COMPAT_FOR_U64_ALIGNMENT 547 struct nvme_user_io32 { 548 __u8 opcode; 549 __u8 flags; 550 __u16 control; 551 __u16 nblocks; 552 __u16 rsvd; 553 __u64 metadata; 554 __u64 addr; 555 __u64 slba; 556 __u32 dsmgmt; 557 __u32 reftag; 558 __u16 apptag; 559 __u16 appmask; 560 } __attribute__((__packed__)); 561 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 562 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 563 564 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 565 void __user *argp, unsigned int flags, bool open_for_write) 566 { 567 switch (cmd) { 568 case NVME_IOCTL_ID: 569 force_successful_syscall_return(); 570 return ns->head->ns_id; 571 case NVME_IOCTL_IO_CMD: 572 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 573 /* 574 * struct nvme_user_io can have different padding on some 32-bit ABIs. 575 * Just accept the compat version as all fields that are used are the 576 * same size and at the same offset. 577 */ 578 #ifdef COMPAT_FOR_U64_ALIGNMENT 579 case NVME_IOCTL_SUBMIT_IO32: 580 #endif 581 case NVME_IOCTL_SUBMIT_IO: 582 return nvme_submit_io(ns, argp); 583 case NVME_IOCTL_IO64_CMD_VEC: 584 flags |= NVME_IOCTL_VEC; 585 fallthrough; 586 case NVME_IOCTL_IO64_CMD: 587 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 588 open_for_write); 589 default: 590 return -ENOTTY; 591 } 592 } 593 594 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 595 unsigned int cmd, unsigned long arg) 596 { 597 struct nvme_ns *ns = bdev->bd_disk->private_data; 598 bool open_for_write = mode & BLK_OPEN_WRITE; 599 void __user *argp = (void __user *)arg; 600 unsigned int flags = 0; 601 602 if (bdev_is_partition(bdev)) 603 flags |= NVME_IOCTL_PARTITION; 604 605 if (is_ctrl_ioctl(cmd)) 606 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 607 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 608 } 609 610 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 611 { 612 struct nvme_ns *ns = 613 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 614 bool open_for_write = file->f_mode & FMODE_WRITE; 615 void __user *argp = (void __user *)arg; 616 617 if (is_ctrl_ioctl(cmd)) 618 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 619 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 620 } 621 622 static int nvme_uring_cmd_checks(unsigned int issue_flags) 623 { 624 625 /* NVMe passthrough requires big SQE/CQE support */ 626 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 627 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 628 return -EOPNOTSUPP; 629 return 0; 630 } 631 632 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 633 unsigned int issue_flags) 634 { 635 struct nvme_ctrl *ctrl = ns->ctrl; 636 int ret; 637 638 BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu)); 639 640 ret = nvme_uring_cmd_checks(issue_flags); 641 if (ret) 642 return ret; 643 644 switch (ioucmd->cmd_op) { 645 case NVME_URING_CMD_IO: 646 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 647 break; 648 case NVME_URING_CMD_IO_VEC: 649 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 650 break; 651 default: 652 ret = -ENOTTY; 653 } 654 655 return ret; 656 } 657 658 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 659 { 660 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 661 struct nvme_ns, cdev); 662 663 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 664 } 665 666 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 667 struct io_comp_batch *iob, 668 unsigned int poll_flags) 669 { 670 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 671 struct request *req = pdu->req; 672 673 if (req && blk_rq_is_poll(req)) 674 return blk_rq_poll(req, iob, poll_flags); 675 return 0; 676 } 677 #ifdef CONFIG_NVME_MULTIPATH 678 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 679 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 680 bool open_for_write) 681 __releases(&head->srcu) 682 { 683 struct nvme_ctrl *ctrl = ns->ctrl; 684 int ret; 685 686 nvme_get_ctrl(ns->ctrl); 687 srcu_read_unlock(&head->srcu, srcu_idx); 688 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 689 690 nvme_put_ctrl(ctrl); 691 return ret; 692 } 693 694 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 695 unsigned int cmd, unsigned long arg) 696 { 697 struct nvme_ns_head *head = bdev->bd_disk->private_data; 698 bool open_for_write = mode & BLK_OPEN_WRITE; 699 void __user *argp = (void __user *)arg; 700 struct nvme_ns *ns; 701 int srcu_idx, ret = -EWOULDBLOCK; 702 unsigned int flags = 0; 703 704 if (bdev_is_partition(bdev)) 705 flags |= NVME_IOCTL_PARTITION; 706 707 srcu_idx = srcu_read_lock(&head->srcu); 708 ns = nvme_find_path(head); 709 if (!ns) 710 goto out_unlock; 711 712 /* 713 * Handle ioctls that apply to the controller instead of the namespace 714 * seperately and drop the ns SRCU reference early. This avoids a 715 * deadlock when deleting namespaces using the passthrough interface. 716 */ 717 if (is_ctrl_ioctl(cmd)) 718 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 719 open_for_write); 720 721 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 722 out_unlock: 723 srcu_read_unlock(&head->srcu, srcu_idx); 724 return ret; 725 } 726 727 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 728 unsigned long arg) 729 { 730 bool open_for_write = file->f_mode & FMODE_WRITE; 731 struct cdev *cdev = file_inode(file)->i_cdev; 732 struct nvme_ns_head *head = 733 container_of(cdev, struct nvme_ns_head, cdev); 734 void __user *argp = (void __user *)arg; 735 struct nvme_ns *ns; 736 int srcu_idx, ret = -EWOULDBLOCK; 737 738 srcu_idx = srcu_read_lock(&head->srcu); 739 ns = nvme_find_path(head); 740 if (!ns) 741 goto out_unlock; 742 743 if (is_ctrl_ioctl(cmd)) 744 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 745 open_for_write); 746 747 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 748 out_unlock: 749 srcu_read_unlock(&head->srcu, srcu_idx); 750 return ret; 751 } 752 753 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 754 unsigned int issue_flags) 755 { 756 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 757 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 758 int srcu_idx = srcu_read_lock(&head->srcu); 759 struct nvme_ns *ns = nvme_find_path(head); 760 int ret = -EINVAL; 761 762 if (ns) 763 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 764 srcu_read_unlock(&head->srcu, srcu_idx); 765 return ret; 766 } 767 #endif /* CONFIG_NVME_MULTIPATH */ 768 769 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 770 { 771 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 772 int ret; 773 774 /* IOPOLL not supported yet */ 775 if (issue_flags & IO_URING_F_IOPOLL) 776 return -EOPNOTSUPP; 777 778 ret = nvme_uring_cmd_checks(issue_flags); 779 if (ret) 780 return ret; 781 782 switch (ioucmd->cmd_op) { 783 case NVME_URING_CMD_ADMIN: 784 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 785 break; 786 case NVME_URING_CMD_ADMIN_VEC: 787 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 788 break; 789 default: 790 ret = -ENOTTY; 791 } 792 793 return ret; 794 } 795 796 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 797 bool open_for_write) 798 { 799 struct nvme_ns *ns; 800 int ret, srcu_idx; 801 802 srcu_idx = srcu_read_lock(&ctrl->srcu); 803 if (list_empty(&ctrl->namespaces)) { 804 ret = -ENOTTY; 805 goto out_unlock; 806 } 807 808 ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); 809 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 810 dev_warn(ctrl->device, 811 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 812 ret = -EINVAL; 813 goto out_unlock; 814 } 815 816 dev_warn(ctrl->device, 817 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 818 if (!nvme_get_ns(ns)) { 819 ret = -ENXIO; 820 goto out_unlock; 821 } 822 srcu_read_unlock(&ctrl->srcu, srcu_idx); 823 824 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 825 nvme_put_ns(ns); 826 return ret; 827 828 out_unlock: 829 srcu_read_unlock(&ctrl->srcu, srcu_idx); 830 return ret; 831 } 832 833 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 834 unsigned long arg) 835 { 836 bool open_for_write = file->f_mode & FMODE_WRITE; 837 struct nvme_ctrl *ctrl = file->private_data; 838 void __user *argp = (void __user *)arg; 839 840 switch (cmd) { 841 case NVME_IOCTL_ADMIN_CMD: 842 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 843 case NVME_IOCTL_ADMIN64_CMD: 844 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 845 case NVME_IOCTL_IO_CMD: 846 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 847 case NVME_IOCTL_RESET: 848 if (!capable(CAP_SYS_ADMIN)) 849 return -EACCES; 850 dev_warn(ctrl->device, "resetting controller\n"); 851 return nvme_reset_ctrl_sync(ctrl); 852 case NVME_IOCTL_SUBSYS_RESET: 853 if (!capable(CAP_SYS_ADMIN)) 854 return -EACCES; 855 return nvme_reset_subsystem(ctrl); 856 case NVME_IOCTL_RESCAN: 857 if (!capable(CAP_SYS_ADMIN)) 858 return -EACCES; 859 nvme_queue_scan(ctrl); 860 return 0; 861 default: 862 return -ENOTTY; 863 } 864 } 865