1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 7 #include <linux/nvme_ioctl.h> 8 #include <linux/io_uring.h> 9 #include "nvme.h" 10 11 enum { 12 NVME_IOCTL_VEC = (1 << 0), 13 NVME_IOCTL_PARTITION = (1 << 1), 14 }; 15 16 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 17 unsigned int flags, bool open_for_write) 18 { 19 u32 effects; 20 21 if (capable(CAP_SYS_ADMIN)) 22 return true; 23 24 /* 25 * Do not allow unprivileged passthrough on partitions, as that allows an 26 * escape from the containment of the partition. 27 */ 28 if (flags & NVME_IOCTL_PARTITION) 29 return false; 30 31 /* 32 * Do not allow unprivileged processes to send vendor specific or fabrics 33 * commands as we can't be sure about their effects. 34 */ 35 if (c->common.opcode >= nvme_cmd_vendor_start || 36 c->common.opcode == nvme_fabrics_command) 37 return false; 38 39 /* 40 * Do not allow unprivileged passthrough of admin commands except 41 * for a subset of identify commands that contain information required 42 * to form proper I/O commands in userspace and do not expose any 43 * potentially sensitive information. 44 */ 45 if (!ns) { 46 if (c->common.opcode == nvme_admin_identify) { 47 switch (c->identify.cns) { 48 case NVME_ID_CNS_NS: 49 case NVME_ID_CNS_CS_NS: 50 case NVME_ID_CNS_NS_CS_INDEP: 51 case NVME_ID_CNS_CS_CTRL: 52 case NVME_ID_CNS_CTRL: 53 return true; 54 } 55 } 56 return false; 57 } 58 59 /* 60 * Check if the controller provides a Commands Supported and Effects log 61 * and marks this command as supported. If not reject unprivileged 62 * passthrough. 63 */ 64 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 65 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 66 return false; 67 68 /* 69 * Don't allow passthrough for command that have intrusive (or unknown) 70 * effects. 71 */ 72 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 73 NVME_CMD_EFFECTS_UUID_SEL | 74 NVME_CMD_EFFECTS_SCOPE_MASK)) 75 return false; 76 77 /* 78 * Only allow I/O commands that transfer data to the controller or that 79 * change the logical block contents if the file descriptor is open for 80 * writing. 81 */ 82 if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) 83 return open_for_write; 84 return true; 85 } 86 87 /* 88 * Convert integer values from ioctl structures to user pointers, silently 89 * ignoring the upper bits in the compat case to match behaviour of 32-bit 90 * kernels. 91 */ 92 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 93 { 94 if (in_compat_syscall()) 95 ptrval = (compat_uptr_t)ptrval; 96 return (void __user *)ptrval; 97 } 98 99 static void *nvme_add_user_metadata(struct request *req, void __user *ubuf, 100 unsigned len, u32 seed) 101 { 102 struct bio_integrity_payload *bip; 103 int ret = -ENOMEM; 104 void *buf; 105 struct bio *bio = req->bio; 106 107 buf = kmalloc(len, GFP_KERNEL); 108 if (!buf) 109 goto out; 110 111 if (req_op(req) == REQ_OP_DRV_OUT) { 112 ret = -EFAULT; 113 if (copy_from_user(buf, ubuf, len)) 114 goto out_free_meta; 115 } else { 116 memset(buf, 0, len); 117 } 118 119 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); 120 if (IS_ERR(bip)) { 121 ret = PTR_ERR(bip); 122 goto out_free_meta; 123 } 124 125 bip->bip_iter.bi_sector = seed; 126 ret = bio_integrity_add_page(bio, virt_to_page(buf), len, 127 offset_in_page(buf)); 128 if (ret != len) { 129 ret = -ENOMEM; 130 goto out_free_meta; 131 } 132 133 req->cmd_flags |= REQ_INTEGRITY; 134 return buf; 135 out_free_meta: 136 kfree(buf); 137 out: 138 return ERR_PTR(ret); 139 } 140 141 static int nvme_finish_user_metadata(struct request *req, void __user *ubuf, 142 void *meta, unsigned len, int ret) 143 { 144 if (!ret && req_op(req) == REQ_OP_DRV_IN && 145 copy_to_user(ubuf, meta, len)) 146 ret = -EFAULT; 147 kfree(meta); 148 return ret; 149 } 150 151 static struct request *nvme_alloc_user_request(struct request_queue *q, 152 struct nvme_command *cmd, blk_opf_t rq_flags, 153 blk_mq_req_flags_t blk_flags) 154 { 155 struct request *req; 156 157 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 158 if (IS_ERR(req)) 159 return req; 160 nvme_init_request(req, cmd); 161 nvme_req(req)->flags |= NVME_REQ_USERCMD; 162 return req; 163 } 164 165 static int nvme_map_user_request(struct request *req, u64 ubuffer, 166 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 167 u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd, 168 unsigned int flags) 169 { 170 struct request_queue *q = req->q; 171 struct nvme_ns *ns = q->queuedata; 172 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 173 struct bio *bio = NULL; 174 void *meta = NULL; 175 int ret; 176 177 if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 178 struct iov_iter iter; 179 180 /* fixedbufs is only for non-vectored io */ 181 if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) 182 return -EINVAL; 183 ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 184 rq_data_dir(req), &iter, ioucmd); 185 if (ret < 0) 186 goto out; 187 ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 188 } else { 189 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 190 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 191 0, rq_data_dir(req)); 192 } 193 194 if (ret) 195 goto out; 196 bio = req->bio; 197 if (bdev) 198 bio_set_dev(bio, bdev); 199 200 if (bdev && meta_buffer && meta_len) { 201 meta = nvme_add_user_metadata(req, meta_buffer, meta_len, 202 meta_seed); 203 if (IS_ERR(meta)) { 204 ret = PTR_ERR(meta); 205 goto out_unmap; 206 } 207 *metap = meta; 208 } 209 210 return ret; 211 212 out_unmap: 213 if (bio) 214 blk_rq_unmap_user(bio); 215 out: 216 blk_mq_free_request(req); 217 return ret; 218 } 219 220 static int nvme_submit_user_cmd(struct request_queue *q, 221 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 222 void __user *meta_buffer, unsigned meta_len, u32 meta_seed, 223 u64 *result, unsigned timeout, unsigned int flags) 224 { 225 struct nvme_ns *ns = q->queuedata; 226 struct nvme_ctrl *ctrl; 227 struct request *req; 228 void *meta = NULL; 229 struct bio *bio; 230 u32 effects; 231 int ret; 232 233 req = nvme_alloc_user_request(q, cmd, 0, 0); 234 if (IS_ERR(req)) 235 return PTR_ERR(req); 236 237 req->timeout = timeout; 238 if (ubuffer && bufflen) { 239 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 240 meta_len, meta_seed, &meta, NULL, flags); 241 if (ret) 242 return ret; 243 } 244 245 bio = req->bio; 246 ctrl = nvme_req(req)->ctrl; 247 248 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 249 ret = nvme_execute_rq(req, false); 250 if (result) 251 *result = le64_to_cpu(nvme_req(req)->result.u64); 252 if (meta) 253 ret = nvme_finish_user_metadata(req, meta_buffer, meta, 254 meta_len, ret); 255 if (bio) 256 blk_rq_unmap_user(bio); 257 blk_mq_free_request(req); 258 259 if (effects) 260 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 261 262 return ret; 263 } 264 265 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 266 { 267 struct nvme_user_io io; 268 struct nvme_command c; 269 unsigned length, meta_len; 270 void __user *metadata; 271 272 if (copy_from_user(&io, uio, sizeof(io))) 273 return -EFAULT; 274 if (io.flags) 275 return -EINVAL; 276 277 switch (io.opcode) { 278 case nvme_cmd_write: 279 case nvme_cmd_read: 280 case nvme_cmd_compare: 281 break; 282 default: 283 return -EINVAL; 284 } 285 286 length = (io.nblocks + 1) << ns->lba_shift; 287 288 if ((io.control & NVME_RW_PRINFO_PRACT) && 289 ns->ms == sizeof(struct t10_pi_tuple)) { 290 /* 291 * Protection information is stripped/inserted by the 292 * controller. 293 */ 294 if (nvme_to_user_ptr(io.metadata)) 295 return -EINVAL; 296 meta_len = 0; 297 metadata = NULL; 298 } else { 299 meta_len = (io.nblocks + 1) * ns->ms; 300 metadata = nvme_to_user_ptr(io.metadata); 301 } 302 303 if (ns->features & NVME_NS_EXT_LBAS) { 304 length += meta_len; 305 meta_len = 0; 306 } else if (meta_len) { 307 if ((io.metadata & 3) || !io.metadata) 308 return -EINVAL; 309 } 310 311 memset(&c, 0, sizeof(c)); 312 c.rw.opcode = io.opcode; 313 c.rw.flags = io.flags; 314 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 315 c.rw.slba = cpu_to_le64(io.slba); 316 c.rw.length = cpu_to_le16(io.nblocks); 317 c.rw.control = cpu_to_le16(io.control); 318 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 319 c.rw.reftag = cpu_to_le32(io.reftag); 320 c.rw.apptag = cpu_to_le16(io.apptag); 321 c.rw.appmask = cpu_to_le16(io.appmask); 322 323 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 324 meta_len, lower_32_bits(io.slba), NULL, 0, 0); 325 } 326 327 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 328 struct nvme_ns *ns, __u32 nsid) 329 { 330 if (ns && nsid != ns->head->ns_id) { 331 dev_err(ctrl->device, 332 "%s: nsid (%u) in cmd does not match nsid (%u)" 333 "of namespace\n", 334 current->comm, nsid, ns->head->ns_id); 335 return false; 336 } 337 338 return true; 339 } 340 341 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 342 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 343 bool open_for_write) 344 { 345 struct nvme_passthru_cmd cmd; 346 struct nvme_command c; 347 unsigned timeout = 0; 348 u64 result; 349 int status; 350 351 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 352 return -EFAULT; 353 if (cmd.flags) 354 return -EINVAL; 355 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 356 return -EINVAL; 357 358 memset(&c, 0, sizeof(c)); 359 c.common.opcode = cmd.opcode; 360 c.common.flags = cmd.flags; 361 c.common.nsid = cpu_to_le32(cmd.nsid); 362 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 363 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 364 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 365 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 366 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 367 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 368 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 369 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 370 371 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 372 return -EACCES; 373 374 if (cmd.timeout_ms) 375 timeout = msecs_to_jiffies(cmd.timeout_ms); 376 377 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 378 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 379 cmd.metadata_len, 0, &result, timeout, 0); 380 381 if (status >= 0) { 382 if (put_user(result, &ucmd->result)) 383 return -EFAULT; 384 } 385 386 return status; 387 } 388 389 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 390 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 391 bool open_for_write) 392 { 393 struct nvme_passthru_cmd64 cmd; 394 struct nvme_command c; 395 unsigned timeout = 0; 396 int status; 397 398 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 399 return -EFAULT; 400 if (cmd.flags) 401 return -EINVAL; 402 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 403 return -EINVAL; 404 405 memset(&c, 0, sizeof(c)); 406 c.common.opcode = cmd.opcode; 407 c.common.flags = cmd.flags; 408 c.common.nsid = cpu_to_le32(cmd.nsid); 409 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 410 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 411 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 412 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 413 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 414 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 415 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 416 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 417 418 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 419 return -EACCES; 420 421 if (cmd.timeout_ms) 422 timeout = msecs_to_jiffies(cmd.timeout_ms); 423 424 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 425 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 426 cmd.metadata_len, 0, &cmd.result, timeout, flags); 427 428 if (status >= 0) { 429 if (put_user(cmd.result, &ucmd->result)) 430 return -EFAULT; 431 } 432 433 return status; 434 } 435 436 struct nvme_uring_data { 437 __u64 metadata; 438 __u64 addr; 439 __u32 data_len; 440 __u32 metadata_len; 441 __u32 timeout_ms; 442 }; 443 444 /* 445 * This overlays struct io_uring_cmd pdu. 446 * Expect build errors if this grows larger than that. 447 */ 448 struct nvme_uring_cmd_pdu { 449 union { 450 struct bio *bio; 451 struct request *req; 452 }; 453 u32 meta_len; 454 u32 nvme_status; 455 union { 456 struct { 457 void *meta; /* kernel-resident buffer */ 458 void __user *meta_buffer; 459 }; 460 u64 result; 461 } u; 462 }; 463 464 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 465 struct io_uring_cmd *ioucmd) 466 { 467 return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; 468 } 469 470 static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd, 471 unsigned issue_flags) 472 { 473 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 474 struct request *req = pdu->req; 475 int status; 476 u64 result; 477 478 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) 479 status = -EINTR; 480 else 481 status = nvme_req(req)->status; 482 483 result = le64_to_cpu(nvme_req(req)->result.u64); 484 485 if (pdu->meta_len) 486 status = nvme_finish_user_metadata(req, pdu->u.meta_buffer, 487 pdu->u.meta, pdu->meta_len, status); 488 if (req->bio) 489 blk_rq_unmap_user(req->bio); 490 blk_mq_free_request(req); 491 492 io_uring_cmd_done(ioucmd, status, result, issue_flags); 493 } 494 495 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, 496 unsigned issue_flags) 497 { 498 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 499 500 if (pdu->bio) 501 blk_rq_unmap_user(pdu->bio); 502 503 io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result, issue_flags); 504 } 505 506 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 507 blk_status_t err) 508 { 509 struct io_uring_cmd *ioucmd = req->end_io_data; 510 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 511 512 req->bio = pdu->bio; 513 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { 514 pdu->nvme_status = -EINTR; 515 } else { 516 pdu->nvme_status = nvme_req(req)->status; 517 if (!pdu->nvme_status) 518 pdu->nvme_status = blk_status_to_errno(err); 519 } 520 pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64); 521 522 /* 523 * For iopoll, complete it directly. 524 * Otherwise, move the completion to task work. 525 */ 526 if (blk_rq_is_poll(req)) { 527 WRITE_ONCE(ioucmd->cookie, NULL); 528 nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED); 529 } else { 530 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 531 } 532 533 return RQ_END_IO_FREE; 534 } 535 536 static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, 537 blk_status_t err) 538 { 539 struct io_uring_cmd *ioucmd = req->end_io_data; 540 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 541 542 req->bio = pdu->bio; 543 pdu->req = req; 544 545 /* 546 * For iopoll, complete it directly. 547 * Otherwise, move the completion to task work. 548 */ 549 if (blk_rq_is_poll(req)) { 550 WRITE_ONCE(ioucmd->cookie, NULL); 551 nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED); 552 } else { 553 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb); 554 } 555 556 return RQ_END_IO_NONE; 557 } 558 559 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 560 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 561 { 562 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 563 const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); 564 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 565 struct nvme_uring_data d; 566 struct nvme_command c; 567 struct request *req; 568 blk_opf_t rq_flags = REQ_ALLOC_CACHE; 569 blk_mq_req_flags_t blk_flags = 0; 570 void *meta = NULL; 571 int ret; 572 573 c.common.opcode = READ_ONCE(cmd->opcode); 574 c.common.flags = READ_ONCE(cmd->flags); 575 if (c.common.flags) 576 return -EINVAL; 577 578 c.common.command_id = 0; 579 c.common.nsid = cpu_to_le32(cmd->nsid); 580 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 581 return -EINVAL; 582 583 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 584 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 585 c.common.metadata = 0; 586 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 587 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 588 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 589 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 590 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 591 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 592 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 593 594 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 595 return -EACCES; 596 597 d.metadata = READ_ONCE(cmd->metadata); 598 d.addr = READ_ONCE(cmd->addr); 599 d.data_len = READ_ONCE(cmd->data_len); 600 d.metadata_len = READ_ONCE(cmd->metadata_len); 601 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 602 603 if (issue_flags & IO_URING_F_NONBLOCK) { 604 rq_flags |= REQ_NOWAIT; 605 blk_flags = BLK_MQ_REQ_NOWAIT; 606 } 607 if (issue_flags & IO_URING_F_IOPOLL) 608 rq_flags |= REQ_POLLED; 609 610 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 611 if (IS_ERR(req)) 612 return PTR_ERR(req); 613 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 614 615 if (d.addr && d.data_len) { 616 ret = nvme_map_user_request(req, d.addr, 617 d.data_len, nvme_to_user_ptr(d.metadata), 618 d.metadata_len, 0, &meta, ioucmd, vec); 619 if (ret) 620 return ret; 621 } 622 623 if (blk_rq_is_poll(req)) { 624 ioucmd->flags |= IORING_URING_CMD_POLLED; 625 WRITE_ONCE(ioucmd->cookie, req); 626 } 627 628 /* to free bio on completion, as req->bio will be null at that time */ 629 pdu->bio = req->bio; 630 pdu->meta_len = d.metadata_len; 631 req->end_io_data = ioucmd; 632 if (pdu->meta_len) { 633 pdu->u.meta = meta; 634 pdu->u.meta_buffer = nvme_to_user_ptr(d.metadata); 635 req->end_io = nvme_uring_cmd_end_io_meta; 636 } else { 637 req->end_io = nvme_uring_cmd_end_io; 638 } 639 blk_execute_rq_nowait(req, false); 640 return -EIOCBQUEUED; 641 } 642 643 static bool is_ctrl_ioctl(unsigned int cmd) 644 { 645 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 646 return true; 647 if (is_sed_ioctl(cmd)) 648 return true; 649 return false; 650 } 651 652 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 653 void __user *argp, bool open_for_write) 654 { 655 switch (cmd) { 656 case NVME_IOCTL_ADMIN_CMD: 657 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 658 case NVME_IOCTL_ADMIN64_CMD: 659 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 660 default: 661 return sed_ioctl(ctrl->opal_dev, cmd, argp); 662 } 663 } 664 665 #ifdef COMPAT_FOR_U64_ALIGNMENT 666 struct nvme_user_io32 { 667 __u8 opcode; 668 __u8 flags; 669 __u16 control; 670 __u16 nblocks; 671 __u16 rsvd; 672 __u64 metadata; 673 __u64 addr; 674 __u64 slba; 675 __u32 dsmgmt; 676 __u32 reftag; 677 __u16 apptag; 678 __u16 appmask; 679 } __attribute__((__packed__)); 680 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 681 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 682 683 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 684 void __user *argp, unsigned int flags, bool open_for_write) 685 { 686 switch (cmd) { 687 case NVME_IOCTL_ID: 688 force_successful_syscall_return(); 689 return ns->head->ns_id; 690 case NVME_IOCTL_IO_CMD: 691 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 692 /* 693 * struct nvme_user_io can have different padding on some 32-bit ABIs. 694 * Just accept the compat version as all fields that are used are the 695 * same size and at the same offset. 696 */ 697 #ifdef COMPAT_FOR_U64_ALIGNMENT 698 case NVME_IOCTL_SUBMIT_IO32: 699 #endif 700 case NVME_IOCTL_SUBMIT_IO: 701 return nvme_submit_io(ns, argp); 702 case NVME_IOCTL_IO64_CMD_VEC: 703 flags |= NVME_IOCTL_VEC; 704 fallthrough; 705 case NVME_IOCTL_IO64_CMD: 706 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 707 open_for_write); 708 default: 709 return -ENOTTY; 710 } 711 } 712 713 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 714 unsigned int cmd, unsigned long arg) 715 { 716 struct nvme_ns *ns = bdev->bd_disk->private_data; 717 bool open_for_write = mode & BLK_OPEN_WRITE; 718 void __user *argp = (void __user *)arg; 719 unsigned int flags = 0; 720 721 if (bdev_is_partition(bdev)) 722 flags |= NVME_IOCTL_PARTITION; 723 724 if (is_ctrl_ioctl(cmd)) 725 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 726 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 727 } 728 729 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 730 { 731 struct nvme_ns *ns = 732 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 733 bool open_for_write = file->f_mode & FMODE_WRITE; 734 void __user *argp = (void __user *)arg; 735 736 if (is_ctrl_ioctl(cmd)) 737 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 738 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 739 } 740 741 static int nvme_uring_cmd_checks(unsigned int issue_flags) 742 { 743 744 /* NVMe passthrough requires big SQE/CQE support */ 745 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 746 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 747 return -EOPNOTSUPP; 748 return 0; 749 } 750 751 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 752 unsigned int issue_flags) 753 { 754 struct nvme_ctrl *ctrl = ns->ctrl; 755 int ret; 756 757 BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu)); 758 759 ret = nvme_uring_cmd_checks(issue_flags); 760 if (ret) 761 return ret; 762 763 switch (ioucmd->cmd_op) { 764 case NVME_URING_CMD_IO: 765 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 766 break; 767 case NVME_URING_CMD_IO_VEC: 768 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 769 break; 770 default: 771 ret = -ENOTTY; 772 } 773 774 return ret; 775 } 776 777 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 778 { 779 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 780 struct nvme_ns, cdev); 781 782 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 783 } 784 785 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 786 struct io_comp_batch *iob, 787 unsigned int poll_flags) 788 { 789 struct request *req; 790 int ret = 0; 791 792 if (!(ioucmd->flags & IORING_URING_CMD_POLLED)) 793 return 0; 794 795 req = READ_ONCE(ioucmd->cookie); 796 if (req && blk_rq_is_poll(req)) 797 ret = blk_rq_poll(req, iob, poll_flags); 798 return ret; 799 } 800 #ifdef CONFIG_NVME_MULTIPATH 801 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 802 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 803 bool open_for_write) 804 __releases(&head->srcu) 805 { 806 struct nvme_ctrl *ctrl = ns->ctrl; 807 int ret; 808 809 nvme_get_ctrl(ns->ctrl); 810 srcu_read_unlock(&head->srcu, srcu_idx); 811 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 812 813 nvme_put_ctrl(ctrl); 814 return ret; 815 } 816 817 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 818 unsigned int cmd, unsigned long arg) 819 { 820 struct nvme_ns_head *head = bdev->bd_disk->private_data; 821 bool open_for_write = mode & BLK_OPEN_WRITE; 822 void __user *argp = (void __user *)arg; 823 struct nvme_ns *ns; 824 int srcu_idx, ret = -EWOULDBLOCK; 825 unsigned int flags = 0; 826 827 if (bdev_is_partition(bdev)) 828 flags |= NVME_IOCTL_PARTITION; 829 830 srcu_idx = srcu_read_lock(&head->srcu); 831 ns = nvme_find_path(head); 832 if (!ns) 833 goto out_unlock; 834 835 /* 836 * Handle ioctls that apply to the controller instead of the namespace 837 * seperately and drop the ns SRCU reference early. This avoids a 838 * deadlock when deleting namespaces using the passthrough interface. 839 */ 840 if (is_ctrl_ioctl(cmd)) 841 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 842 open_for_write); 843 844 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 845 out_unlock: 846 srcu_read_unlock(&head->srcu, srcu_idx); 847 return ret; 848 } 849 850 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 851 unsigned long arg) 852 { 853 bool open_for_write = file->f_mode & FMODE_WRITE; 854 struct cdev *cdev = file_inode(file)->i_cdev; 855 struct nvme_ns_head *head = 856 container_of(cdev, struct nvme_ns_head, cdev); 857 void __user *argp = (void __user *)arg; 858 struct nvme_ns *ns; 859 int srcu_idx, ret = -EWOULDBLOCK; 860 861 srcu_idx = srcu_read_lock(&head->srcu); 862 ns = nvme_find_path(head); 863 if (!ns) 864 goto out_unlock; 865 866 if (is_ctrl_ioctl(cmd)) 867 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 868 open_for_write); 869 870 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 871 out_unlock: 872 srcu_read_unlock(&head->srcu, srcu_idx); 873 return ret; 874 } 875 876 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 877 unsigned int issue_flags) 878 { 879 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 880 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 881 int srcu_idx = srcu_read_lock(&head->srcu); 882 struct nvme_ns *ns = nvme_find_path(head); 883 int ret = -EINVAL; 884 885 if (ns) 886 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 887 srcu_read_unlock(&head->srcu, srcu_idx); 888 return ret; 889 } 890 #endif /* CONFIG_NVME_MULTIPATH */ 891 892 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 893 { 894 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 895 int ret; 896 897 /* IOPOLL not supported yet */ 898 if (issue_flags & IO_URING_F_IOPOLL) 899 return -EOPNOTSUPP; 900 901 ret = nvme_uring_cmd_checks(issue_flags); 902 if (ret) 903 return ret; 904 905 switch (ioucmd->cmd_op) { 906 case NVME_URING_CMD_ADMIN: 907 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 908 break; 909 case NVME_URING_CMD_ADMIN_VEC: 910 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 911 break; 912 default: 913 ret = -ENOTTY; 914 } 915 916 return ret; 917 } 918 919 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 920 bool open_for_write) 921 { 922 struct nvme_ns *ns; 923 int ret; 924 925 down_read(&ctrl->namespaces_rwsem); 926 if (list_empty(&ctrl->namespaces)) { 927 ret = -ENOTTY; 928 goto out_unlock; 929 } 930 931 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); 932 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 933 dev_warn(ctrl->device, 934 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 935 ret = -EINVAL; 936 goto out_unlock; 937 } 938 939 dev_warn(ctrl->device, 940 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 941 kref_get(&ns->kref); 942 up_read(&ctrl->namespaces_rwsem); 943 944 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 945 nvme_put_ns(ns); 946 return ret; 947 948 out_unlock: 949 up_read(&ctrl->namespaces_rwsem); 950 return ret; 951 } 952 953 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 954 unsigned long arg) 955 { 956 bool open_for_write = file->f_mode & FMODE_WRITE; 957 struct nvme_ctrl *ctrl = file->private_data; 958 void __user *argp = (void __user *)arg; 959 960 switch (cmd) { 961 case NVME_IOCTL_ADMIN_CMD: 962 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 963 case NVME_IOCTL_ADMIN64_CMD: 964 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 965 case NVME_IOCTL_IO_CMD: 966 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 967 case NVME_IOCTL_RESET: 968 if (!capable(CAP_SYS_ADMIN)) 969 return -EACCES; 970 dev_warn(ctrl->device, "resetting controller\n"); 971 return nvme_reset_ctrl_sync(ctrl); 972 case NVME_IOCTL_SUBSYS_RESET: 973 if (!capable(CAP_SYS_ADMIN)) 974 return -EACCES; 975 return nvme_reset_subsystem(ctrl); 976 case NVME_IOCTL_RESCAN: 977 if (!capable(CAP_SYS_ADMIN)) 978 return -EACCES; 979 nvme_queue_scan(ctrl); 980 return 0; 981 default: 982 return -ENOTTY; 983 } 984 } 985