1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/blk-integrity.h> 7 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 8 #include <linux/nvme_ioctl.h> 9 #include <linux/io_uring/cmd.h> 10 #include "nvme.h" 11 12 enum { 13 NVME_IOCTL_VEC = (1 << 0), 14 NVME_IOCTL_PARTITION = (1 << 1), 15 }; 16 17 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 18 unsigned int flags, bool open_for_write) 19 { 20 u32 effects; 21 22 /* 23 * Do not allow unprivileged passthrough on partitions, as that allows an 24 * escape from the containment of the partition. 25 */ 26 if (flags & NVME_IOCTL_PARTITION) 27 goto admin; 28 29 /* 30 * Do not allow unprivileged processes to send vendor specific or fabrics 31 * commands as we can't be sure about their effects. 32 */ 33 if (c->common.opcode >= nvme_cmd_vendor_start || 34 c->common.opcode == nvme_fabrics_command) 35 goto admin; 36 37 /* 38 * Do not allow unprivileged passthrough of admin commands except 39 * for a subset of identify commands that contain information required 40 * to form proper I/O commands in userspace and do not expose any 41 * potentially sensitive information. 42 */ 43 if (!ns) { 44 if (c->common.opcode == nvme_admin_identify) { 45 switch (c->identify.cns) { 46 case NVME_ID_CNS_NS: 47 case NVME_ID_CNS_CS_NS: 48 case NVME_ID_CNS_NS_CS_INDEP: 49 case NVME_ID_CNS_CS_CTRL: 50 case NVME_ID_CNS_CTRL: 51 return true; 52 } 53 } 54 goto admin; 55 } 56 57 /* 58 * Check if the controller provides a Commands Supported and Effects log 59 * and marks this command as supported. If not reject unprivileged 60 * passthrough. 61 */ 62 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 63 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 64 goto admin; 65 66 /* 67 * Don't allow passthrough for command that have intrusive (or unknown) 68 * effects. 69 */ 70 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 71 NVME_CMD_EFFECTS_UUID_SEL | 72 NVME_CMD_EFFECTS_SCOPE_MASK)) 73 goto admin; 74 75 /* 76 * Only allow I/O commands that transfer data to the controller or that 77 * change the logical block contents if the file descriptor is open for 78 * writing. 79 */ 80 if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && 81 !open_for_write) 82 goto admin; 83 84 return true; 85 admin: 86 return capable(CAP_SYS_ADMIN); 87 } 88 89 /* 90 * Convert integer values from ioctl structures to user pointers, silently 91 * ignoring the upper bits in the compat case to match behaviour of 32-bit 92 * kernels. 93 */ 94 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 95 { 96 if (in_compat_syscall()) 97 ptrval = (compat_uptr_t)ptrval; 98 return (void __user *)ptrval; 99 } 100 101 static struct request *nvme_alloc_user_request(struct request_queue *q, 102 struct nvme_command *cmd, blk_opf_t rq_flags, 103 blk_mq_req_flags_t blk_flags) 104 { 105 struct request *req; 106 107 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 108 if (IS_ERR(req)) 109 return req; 110 nvme_init_request(req, cmd); 111 nvme_req(req)->flags |= NVME_REQ_USERCMD; 112 return req; 113 } 114 115 static int nvme_map_user_request(struct request *req, u64 ubuffer, 116 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 117 struct iov_iter *iter, unsigned int flags) 118 { 119 struct request_queue *q = req->q; 120 struct nvme_ns *ns = q->queuedata; 121 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 122 bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); 123 bool has_metadata = meta_buffer && meta_len; 124 int ret; 125 126 if (has_metadata && !supports_metadata) 127 return -EINVAL; 128 129 if (iter) 130 ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL); 131 else 132 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 133 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 134 0, rq_data_dir(req)); 135 if (ret) 136 return ret; 137 138 if (has_metadata) { 139 ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len); 140 if (ret) 141 goto out_unmap; 142 } 143 144 return ret; 145 146 out_unmap: 147 if (req->bio) 148 blk_rq_unmap_user(req->bio); 149 return ret; 150 } 151 152 static int nvme_submit_user_cmd(struct request_queue *q, 153 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 154 void __user *meta_buffer, unsigned meta_len, 155 u64 *result, unsigned timeout, unsigned int flags) 156 { 157 struct nvme_ns *ns = q->queuedata; 158 struct nvme_ctrl *ctrl; 159 struct request *req; 160 struct bio *bio; 161 u32 effects; 162 int ret; 163 164 req = nvme_alloc_user_request(q, cmd, 0, 0); 165 if (IS_ERR(req)) 166 return PTR_ERR(req); 167 168 req->timeout = timeout; 169 if (ubuffer && bufflen) { 170 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 171 meta_len, NULL, flags); 172 if (ret) 173 goto out_free_req; 174 } 175 176 bio = req->bio; 177 ctrl = nvme_req(req)->ctrl; 178 179 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 180 ret = nvme_execute_rq(req, false); 181 if (result) 182 *result = le64_to_cpu(nvme_req(req)->result.u64); 183 if (bio) 184 blk_rq_unmap_user(bio); 185 blk_mq_free_request(req); 186 187 if (effects) 188 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 189 return ret; 190 191 out_free_req: 192 blk_mq_free_request(req); 193 return ret; 194 } 195 196 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 197 { 198 struct nvme_user_io io; 199 struct nvme_command c; 200 unsigned length, meta_len; 201 void __user *metadata; 202 203 if (copy_from_user(&io, uio, sizeof(io))) 204 return -EFAULT; 205 if (io.flags) 206 return -EINVAL; 207 208 switch (io.opcode) { 209 case nvme_cmd_write: 210 case nvme_cmd_read: 211 case nvme_cmd_compare: 212 break; 213 default: 214 return -EINVAL; 215 } 216 217 length = (io.nblocks + 1) << ns->head->lba_shift; 218 219 if ((io.control & NVME_RW_PRINFO_PRACT) && 220 (ns->head->ms == ns->head->pi_size)) { 221 /* 222 * Protection information is stripped/inserted by the 223 * controller. 224 */ 225 if (nvme_to_user_ptr(io.metadata)) 226 return -EINVAL; 227 meta_len = 0; 228 metadata = NULL; 229 } else { 230 meta_len = (io.nblocks + 1) * ns->head->ms; 231 metadata = nvme_to_user_ptr(io.metadata); 232 } 233 234 if (ns->head->features & NVME_NS_EXT_LBAS) { 235 length += meta_len; 236 meta_len = 0; 237 } else if (meta_len) { 238 if ((io.metadata & 3) || !io.metadata) 239 return -EINVAL; 240 } 241 242 memset(&c, 0, sizeof(c)); 243 c.rw.opcode = io.opcode; 244 c.rw.flags = io.flags; 245 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 246 c.rw.slba = cpu_to_le64(io.slba); 247 c.rw.length = cpu_to_le16(io.nblocks); 248 c.rw.control = cpu_to_le16(io.control); 249 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 250 c.rw.reftag = cpu_to_le32(io.reftag); 251 c.rw.lbat = cpu_to_le16(io.apptag); 252 c.rw.lbatm = cpu_to_le16(io.appmask); 253 254 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 255 meta_len, NULL, 0, 0); 256 } 257 258 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 259 struct nvme_ns *ns, __u32 nsid) 260 { 261 if (ns && nsid != ns->head->ns_id) { 262 dev_err(ctrl->device, 263 "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", 264 current->comm, nsid, ns->head->ns_id); 265 return false; 266 } 267 268 return true; 269 } 270 271 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 272 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 273 bool open_for_write) 274 { 275 struct nvme_passthru_cmd cmd; 276 struct nvme_command c; 277 unsigned timeout = 0; 278 u64 result; 279 int status; 280 281 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 282 return -EFAULT; 283 if (cmd.flags) 284 return -EINVAL; 285 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 286 return -EINVAL; 287 288 memset(&c, 0, sizeof(c)); 289 c.common.opcode = cmd.opcode; 290 c.common.flags = cmd.flags; 291 c.common.nsid = cpu_to_le32(cmd.nsid); 292 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 293 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 294 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 295 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 296 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 297 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 298 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 299 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 300 301 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 302 return -EACCES; 303 304 if (cmd.timeout_ms) 305 timeout = msecs_to_jiffies(cmd.timeout_ms); 306 307 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 308 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 309 cmd.metadata_len, &result, timeout, 0); 310 311 if (status >= 0) { 312 if (put_user(result, &ucmd->result)) 313 return -EFAULT; 314 } 315 316 return status; 317 } 318 319 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 320 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 321 bool open_for_write) 322 { 323 struct nvme_passthru_cmd64 cmd; 324 struct nvme_command c; 325 unsigned timeout = 0; 326 int status; 327 328 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 329 return -EFAULT; 330 if (cmd.flags) 331 return -EINVAL; 332 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 333 return -EINVAL; 334 335 memset(&c, 0, sizeof(c)); 336 c.common.opcode = cmd.opcode; 337 c.common.flags = cmd.flags; 338 c.common.nsid = cpu_to_le32(cmd.nsid); 339 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 340 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 341 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 342 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 343 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 344 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 345 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 346 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 347 348 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 349 return -EACCES; 350 351 if (cmd.timeout_ms) 352 timeout = msecs_to_jiffies(cmd.timeout_ms); 353 354 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 355 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 356 cmd.metadata_len, &cmd.result, timeout, flags); 357 358 if (status >= 0) { 359 if (put_user(cmd.result, &ucmd->result)) 360 return -EFAULT; 361 } 362 363 return status; 364 } 365 366 struct nvme_uring_data { 367 __u64 metadata; 368 __u64 addr; 369 __u32 data_len; 370 __u32 metadata_len; 371 __u32 timeout_ms; 372 }; 373 374 /* 375 * This overlays struct io_uring_cmd pdu. 376 * Expect build errors if this grows larger than that. 377 */ 378 struct nvme_uring_cmd_pdu { 379 struct request *req; 380 struct bio *bio; 381 u64 result; 382 int status; 383 }; 384 385 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 386 struct io_uring_cmd *ioucmd) 387 { 388 return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu); 389 } 390 391 static void nvme_uring_task_cb(struct io_tw_req tw_req, io_tw_token_t tw) 392 { 393 struct io_uring_cmd *ioucmd = io_uring_cmd_from_tw(tw_req); 394 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 395 396 if (pdu->bio) 397 blk_rq_unmap_user(pdu->bio); 398 io_uring_cmd_done32(ioucmd, pdu->status, pdu->result, 399 IO_URING_CMD_TASK_WORK_ISSUE_FLAGS); 400 } 401 402 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 403 blk_status_t err, 404 const struct io_comp_batch *iob) 405 { 406 struct io_uring_cmd *ioucmd = req->end_io_data; 407 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 408 409 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { 410 pdu->status = -EINTR; 411 } else { 412 pdu->status = nvme_req(req)->status; 413 if (!pdu->status) 414 pdu->status = blk_status_to_errno(err); 415 } 416 pdu->result = le64_to_cpu(nvme_req(req)->result.u64); 417 418 /* 419 * For IOPOLL, check if this completion is happening in the context 420 * of the same io_ring that owns the request (local context). If so, 421 * we can complete inline without task_work overhead. Otherwise, we 422 * must punt to task_work to ensure completion happens in the correct 423 * ring's context. 424 */ 425 if (blk_rq_is_poll(req) && iob && 426 iob->poll_ctx == io_uring_cmd_ctx_handle(ioucmd)) { 427 if (pdu->bio) 428 blk_rq_unmap_user(pdu->bio); 429 io_uring_cmd_done32(ioucmd, pdu->status, pdu->result, 0); 430 } else { 431 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 432 } 433 return RQ_END_IO_FREE; 434 } 435 436 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 437 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 438 { 439 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 440 const struct nvme_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, 441 struct nvme_uring_cmd); 442 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 443 struct nvme_uring_data d; 444 struct nvme_command c; 445 struct iov_iter iter; 446 struct iov_iter *map_iter = NULL; 447 struct request *req; 448 blk_opf_t rq_flags = 0; 449 blk_mq_req_flags_t blk_flags = 0; 450 int ret; 451 452 c.common.opcode = READ_ONCE(cmd->opcode); 453 c.common.flags = READ_ONCE(cmd->flags); 454 if (c.common.flags) 455 return -EINVAL; 456 457 c.common.command_id = 0; 458 c.common.nsid = cpu_to_le32(cmd->nsid); 459 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 460 return -EINVAL; 461 462 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 463 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 464 c.common.metadata = 0; 465 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 466 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 467 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 468 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 469 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 470 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 471 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 472 473 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 474 return -EACCES; 475 476 d.metadata = READ_ONCE(cmd->metadata); 477 d.addr = READ_ONCE(cmd->addr); 478 d.data_len = READ_ONCE(cmd->data_len); 479 d.metadata_len = READ_ONCE(cmd->metadata_len); 480 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 481 482 if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 483 int ddir = nvme_is_write(&c) ? WRITE : READ; 484 485 if (vec) 486 ret = io_uring_cmd_import_fixed_vec(ioucmd, 487 u64_to_user_ptr(d.addr), d.data_len, 488 ddir, &iter, issue_flags); 489 else 490 ret = io_uring_cmd_import_fixed(d.addr, d.data_len, 491 ddir, &iter, ioucmd, issue_flags); 492 if (ret < 0) 493 return ret; 494 495 map_iter = &iter; 496 } 497 498 if (issue_flags & IO_URING_F_NONBLOCK) { 499 rq_flags |= REQ_NOWAIT; 500 blk_flags = BLK_MQ_REQ_NOWAIT; 501 } 502 if (issue_flags & IO_URING_F_IOPOLL) 503 rq_flags |= REQ_POLLED; 504 505 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 506 if (IS_ERR(req)) 507 return PTR_ERR(req); 508 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 509 510 if (d.data_len) { 511 ret = nvme_map_user_request(req, d.addr, d.data_len, 512 nvme_to_user_ptr(d.metadata), d.metadata_len, 513 map_iter, vec ? NVME_IOCTL_VEC : 0); 514 if (ret) 515 goto out_free_req; 516 } 517 518 /* to free bio on completion, as req->bio will be null at that time */ 519 pdu->bio = req->bio; 520 pdu->req = req; 521 req->end_io_data = ioucmd; 522 req->end_io = nvme_uring_cmd_end_io; 523 blk_execute_rq_nowait(req, false); 524 return -EIOCBQUEUED; 525 526 out_free_req: 527 blk_mq_free_request(req); 528 return ret; 529 } 530 531 static bool is_ctrl_ioctl(unsigned int cmd) 532 { 533 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 534 return true; 535 if (is_sed_ioctl(cmd)) 536 return true; 537 return false; 538 } 539 540 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 541 void __user *argp, bool open_for_write) 542 { 543 switch (cmd) { 544 case NVME_IOCTL_ADMIN_CMD: 545 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 546 case NVME_IOCTL_ADMIN64_CMD: 547 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 548 default: 549 return sed_ioctl(ctrl->opal_dev, cmd, argp); 550 } 551 } 552 553 #ifdef COMPAT_FOR_U64_ALIGNMENT 554 struct nvme_user_io32 { 555 __u8 opcode; 556 __u8 flags; 557 __u16 control; 558 __u16 nblocks; 559 __u16 rsvd; 560 __u64 metadata; 561 __u64 addr; 562 __u64 slba; 563 __u32 dsmgmt; 564 __u32 reftag; 565 __u16 apptag; 566 __u16 appmask; 567 } __attribute__((__packed__)); 568 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 569 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 570 571 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 572 void __user *argp, unsigned int flags, bool open_for_write) 573 { 574 switch (cmd) { 575 case NVME_IOCTL_ID: 576 force_successful_syscall_return(); 577 return ns->head->ns_id; 578 case NVME_IOCTL_IO_CMD: 579 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 580 /* 581 * struct nvme_user_io can have different padding on some 32-bit ABIs. 582 * Just accept the compat version as all fields that are used are the 583 * same size and at the same offset. 584 */ 585 #ifdef COMPAT_FOR_U64_ALIGNMENT 586 case NVME_IOCTL_SUBMIT_IO32: 587 #endif 588 case NVME_IOCTL_SUBMIT_IO: 589 return nvme_submit_io(ns, argp); 590 case NVME_IOCTL_IO64_CMD_VEC: 591 flags |= NVME_IOCTL_VEC; 592 fallthrough; 593 case NVME_IOCTL_IO64_CMD: 594 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 595 open_for_write); 596 default: 597 return -ENOTTY; 598 } 599 } 600 601 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 602 unsigned int cmd, unsigned long arg) 603 { 604 struct nvme_ns *ns = bdev->bd_disk->private_data; 605 bool open_for_write = mode & BLK_OPEN_WRITE; 606 void __user *argp = (void __user *)arg; 607 unsigned int flags = 0; 608 609 if (bdev_is_partition(bdev)) 610 flags |= NVME_IOCTL_PARTITION; 611 612 if (is_ctrl_ioctl(cmd)) 613 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 614 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 615 } 616 617 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 618 { 619 struct nvme_ns *ns = 620 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 621 bool open_for_write = file->f_mode & FMODE_WRITE; 622 void __user *argp = (void __user *)arg; 623 624 if (is_ctrl_ioctl(cmd)) 625 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 626 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 627 } 628 629 static int nvme_uring_cmd_checks(unsigned int issue_flags) 630 { 631 632 /* NVMe passthrough requires big SQE/CQE support */ 633 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 634 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 635 return -EOPNOTSUPP; 636 return 0; 637 } 638 639 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 640 unsigned int issue_flags) 641 { 642 struct nvme_ctrl *ctrl = ns->ctrl; 643 int ret; 644 645 ret = nvme_uring_cmd_checks(issue_flags); 646 if (ret) 647 return ret; 648 649 switch (ioucmd->cmd_op) { 650 case NVME_URING_CMD_IO: 651 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 652 break; 653 case NVME_URING_CMD_IO_VEC: 654 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 655 break; 656 default: 657 ret = -ENOTTY; 658 } 659 660 return ret; 661 } 662 663 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 664 { 665 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 666 struct nvme_ns, cdev); 667 668 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 669 } 670 671 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 672 struct io_comp_batch *iob, 673 unsigned int poll_flags) 674 { 675 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 676 struct request *req = pdu->req; 677 678 if (req && blk_rq_is_poll(req)) 679 return blk_rq_poll(req, iob, poll_flags); 680 return 0; 681 } 682 #ifdef CONFIG_NVME_MULTIPATH 683 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 684 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 685 bool open_for_write) 686 __releases(&head->srcu) 687 { 688 struct nvme_ctrl *ctrl = ns->ctrl; 689 int ret; 690 691 nvme_get_ctrl(ns->ctrl); 692 srcu_read_unlock(&head->srcu, srcu_idx); 693 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 694 695 nvme_put_ctrl(ctrl); 696 return ret; 697 } 698 699 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 700 unsigned int cmd, unsigned long arg) 701 { 702 struct nvme_ns_head *head = bdev->bd_disk->private_data; 703 bool open_for_write = mode & BLK_OPEN_WRITE; 704 void __user *argp = (void __user *)arg; 705 struct nvme_ns *ns; 706 int srcu_idx, ret = -EWOULDBLOCK; 707 unsigned int flags = 0; 708 709 if (bdev_is_partition(bdev)) 710 flags |= NVME_IOCTL_PARTITION; 711 712 srcu_idx = srcu_read_lock(&head->srcu); 713 ns = nvme_find_path(head); 714 if (!ns) 715 goto out_unlock; 716 717 /* 718 * Handle ioctls that apply to the controller instead of the namespace 719 * separately and drop the ns SRCU reference early. This avoids a 720 * deadlock when deleting namespaces using the passthrough interface. 721 */ 722 if (is_ctrl_ioctl(cmd)) 723 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 724 open_for_write); 725 726 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 727 out_unlock: 728 srcu_read_unlock(&head->srcu, srcu_idx); 729 return ret; 730 } 731 732 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 733 unsigned long arg) 734 { 735 bool open_for_write = file->f_mode & FMODE_WRITE; 736 struct cdev *cdev = file_inode(file)->i_cdev; 737 struct nvme_ns_head *head = 738 container_of(cdev, struct nvme_ns_head, cdev); 739 void __user *argp = (void __user *)arg; 740 struct nvme_ns *ns; 741 int srcu_idx, ret = -EWOULDBLOCK; 742 743 srcu_idx = srcu_read_lock(&head->srcu); 744 ns = nvme_find_path(head); 745 if (!ns) 746 goto out_unlock; 747 748 if (is_ctrl_ioctl(cmd)) 749 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 750 open_for_write); 751 752 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 753 out_unlock: 754 srcu_read_unlock(&head->srcu, srcu_idx); 755 return ret; 756 } 757 758 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 759 unsigned int issue_flags) 760 { 761 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 762 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 763 int srcu_idx = srcu_read_lock(&head->srcu); 764 struct nvme_ns *ns = nvme_find_path(head); 765 int ret = -EINVAL; 766 767 if (ns) 768 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 769 srcu_read_unlock(&head->srcu, srcu_idx); 770 return ret; 771 } 772 #endif /* CONFIG_NVME_MULTIPATH */ 773 774 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 775 { 776 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 777 int ret; 778 779 ret = nvme_uring_cmd_checks(issue_flags); 780 if (ret) 781 return ret; 782 783 switch (ioucmd->cmd_op) { 784 case NVME_URING_CMD_ADMIN: 785 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 786 break; 787 case NVME_URING_CMD_ADMIN_VEC: 788 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 789 break; 790 default: 791 ret = -ENOTTY; 792 } 793 794 return ret; 795 } 796 797 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 798 bool open_for_write) 799 { 800 struct nvme_ns *ns; 801 int ret, srcu_idx; 802 803 srcu_idx = srcu_read_lock(&ctrl->srcu); 804 if (list_empty(&ctrl->namespaces)) { 805 ret = -ENOTTY; 806 goto out_unlock; 807 } 808 809 ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); 810 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 811 dev_warn(ctrl->device, 812 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 813 ret = -EINVAL; 814 goto out_unlock; 815 } 816 817 dev_warn(ctrl->device, 818 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 819 if (!nvme_get_ns(ns)) { 820 ret = -ENXIO; 821 goto out_unlock; 822 } 823 srcu_read_unlock(&ctrl->srcu, srcu_idx); 824 825 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 826 nvme_put_ns(ns); 827 return ret; 828 829 out_unlock: 830 srcu_read_unlock(&ctrl->srcu, srcu_idx); 831 return ret; 832 } 833 834 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 835 unsigned long arg) 836 { 837 bool open_for_write = file->f_mode & FMODE_WRITE; 838 struct nvme_ctrl *ctrl = file->private_data; 839 void __user *argp = (void __user *)arg; 840 841 switch (cmd) { 842 case NVME_IOCTL_ADMIN_CMD: 843 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 844 case NVME_IOCTL_ADMIN64_CMD: 845 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 846 case NVME_IOCTL_IO_CMD: 847 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 848 case NVME_IOCTL_RESET: 849 if (!capable(CAP_SYS_ADMIN)) 850 return -EACCES; 851 dev_warn(ctrl->device, "resetting controller\n"); 852 return nvme_reset_ctrl_sync(ctrl); 853 case NVME_IOCTL_SUBSYS_RESET: 854 if (!capable(CAP_SYS_ADMIN)) 855 return -EACCES; 856 return nvme_reset_subsystem(ctrl); 857 case NVME_IOCTL_RESCAN: 858 if (!capable(CAP_SYS_ADMIN)) 859 return -EACCES; 860 nvme_queue_scan(ctrl); 861 return 0; 862 default: 863 return -ENOTTY; 864 } 865 } 866