1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/blk-integrity.h> 7 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 8 #include <linux/nvme_ioctl.h> 9 #include <linux/io_uring/cmd.h> 10 #include "nvme.h" 11 12 enum { 13 NVME_IOCTL_VEC = (1 << 0), 14 NVME_IOCTL_PARTITION = (1 << 1), 15 }; 16 17 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 18 unsigned int flags, bool open_for_write) 19 { 20 u32 effects; 21 22 /* 23 * Do not allow unprivileged passthrough on partitions, as that allows an 24 * escape from the containment of the partition. 25 */ 26 if (flags & NVME_IOCTL_PARTITION) 27 goto admin; 28 29 /* 30 * Do not allow unprivileged processes to send vendor specific or fabrics 31 * commands as we can't be sure about their effects. 32 */ 33 if (c->common.opcode >= nvme_cmd_vendor_start || 34 c->common.opcode == nvme_fabrics_command) 35 goto admin; 36 37 /* 38 * Do not allow unprivileged passthrough of admin commands except 39 * for a subset of identify commands that contain information required 40 * to form proper I/O commands in userspace and do not expose any 41 * potentially sensitive information. 42 */ 43 if (!ns) { 44 if (c->common.opcode == nvme_admin_identify) { 45 switch (c->identify.cns) { 46 case NVME_ID_CNS_NS: 47 case NVME_ID_CNS_CS_NS: 48 case NVME_ID_CNS_NS_CS_INDEP: 49 case NVME_ID_CNS_CS_CTRL: 50 case NVME_ID_CNS_CTRL: 51 return true; 52 } 53 } 54 goto admin; 55 } 56 57 /* 58 * Check if the controller provides a Commands Supported and Effects log 59 * and marks this command as supported. If not reject unprivileged 60 * passthrough. 61 */ 62 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 63 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 64 goto admin; 65 66 /* 67 * Don't allow passthrough for command that have intrusive (or unknown) 68 * effects. 69 */ 70 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 71 NVME_CMD_EFFECTS_UUID_SEL | 72 NVME_CMD_EFFECTS_SCOPE_MASK)) 73 goto admin; 74 75 /* 76 * Only allow I/O commands that transfer data to the controller or that 77 * change the logical block contents if the file descriptor is open for 78 * writing. 79 */ 80 if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && 81 !open_for_write) 82 goto admin; 83 84 return true; 85 admin: 86 return capable(CAP_SYS_ADMIN); 87 } 88 89 /* 90 * Convert integer values from ioctl structures to user pointers, silently 91 * ignoring the upper bits in the compat case to match behaviour of 32-bit 92 * kernels. 93 */ 94 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 95 { 96 if (in_compat_syscall()) 97 ptrval = (compat_uptr_t)ptrval; 98 return (void __user *)ptrval; 99 } 100 101 static struct request *nvme_alloc_user_request(struct request_queue *q, 102 struct nvme_command *cmd, blk_opf_t rq_flags, 103 blk_mq_req_flags_t blk_flags) 104 { 105 struct nvme_ns *ns = q->queuedata; 106 struct request *req; 107 108 /* 109 * The NVME_MPATH flag is set only for IO commands sent to a namespace 110 * with a multipath enabled head. The request is not eligible for 111 * failover as passthrough requests also append REQ_FAILFAST_DRIVER. 112 */ 113 if (ns && nvme_ns_head_multipath(ns->head)) 114 rq_flags |= REQ_NVME_MPATH; 115 116 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 117 if (IS_ERR(req)) 118 return req; 119 nvme_init_request(req, cmd); 120 nvme_req(req)->flags |= NVME_REQ_USERCMD; 121 return req; 122 } 123 124 static int nvme_map_user_request(struct request *req, u64 ubuffer, 125 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 126 struct iov_iter *iter, unsigned int flags) 127 { 128 struct request_queue *q = req->q; 129 struct nvme_ns *ns = q->queuedata; 130 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 131 bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); 132 bool has_metadata = meta_buffer && meta_len; 133 int ret; 134 135 if (has_metadata && !supports_metadata) 136 return -EINVAL; 137 138 if (iter) 139 ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL); 140 else 141 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 142 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 143 0, rq_data_dir(req)); 144 if (ret) 145 return ret; 146 147 if (has_metadata) { 148 ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len); 149 if (ret) 150 goto out_unmap; 151 } 152 153 return ret; 154 155 out_unmap: 156 if (req->bio) 157 blk_rq_unmap_user(req->bio); 158 return ret; 159 } 160 161 static int nvme_submit_user_cmd(struct request_queue *q, 162 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 163 void __user *meta_buffer, unsigned meta_len, 164 u64 *result, unsigned timeout, unsigned int flags) 165 { 166 struct nvme_ns *ns = q->queuedata; 167 struct nvme_ctrl *ctrl; 168 struct request *req; 169 struct bio *bio; 170 u32 effects; 171 int ret; 172 173 req = nvme_alloc_user_request(q, cmd, 0, 0); 174 if (IS_ERR(req)) 175 return PTR_ERR(req); 176 177 req->timeout = timeout; 178 if (ubuffer && bufflen) { 179 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 180 meta_len, NULL, flags); 181 if (ret) 182 goto out_free_req; 183 } 184 185 bio = req->bio; 186 ctrl = nvme_req(req)->ctrl; 187 188 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 189 ret = nvme_execute_rq(req, false); 190 if (result) 191 *result = le64_to_cpu(nvme_req(req)->result.u64); 192 if (bio) 193 blk_rq_unmap_user(bio); 194 blk_mq_free_request(req); 195 196 if (effects) 197 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 198 return ret; 199 200 out_free_req: 201 blk_mq_free_request(req); 202 return ret; 203 } 204 205 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 206 { 207 struct nvme_user_io io; 208 struct nvme_command c; 209 unsigned length, meta_len; 210 void __user *metadata; 211 212 if (copy_from_user(&io, uio, sizeof(io))) 213 return -EFAULT; 214 if (io.flags) 215 return -EINVAL; 216 217 switch (io.opcode) { 218 case nvme_cmd_write: 219 case nvme_cmd_read: 220 case nvme_cmd_compare: 221 break; 222 default: 223 return -EINVAL; 224 } 225 226 length = (io.nblocks + 1) << ns->head->lba_shift; 227 228 if ((io.control & NVME_RW_PRINFO_PRACT) && 229 (ns->head->ms == ns->head->pi_size)) { 230 /* 231 * Protection information is stripped/inserted by the 232 * controller. 233 */ 234 if (nvme_to_user_ptr(io.metadata)) 235 return -EINVAL; 236 meta_len = 0; 237 metadata = NULL; 238 } else { 239 meta_len = (io.nblocks + 1) * ns->head->ms; 240 metadata = nvme_to_user_ptr(io.metadata); 241 } 242 243 if (ns->head->features & NVME_NS_EXT_LBAS) { 244 length += meta_len; 245 meta_len = 0; 246 } else if (meta_len) { 247 if ((io.metadata & 3) || !io.metadata) 248 return -EINVAL; 249 } 250 251 memset(&c, 0, sizeof(c)); 252 c.rw.opcode = io.opcode; 253 c.rw.flags = io.flags; 254 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 255 c.rw.slba = cpu_to_le64(io.slba); 256 c.rw.length = cpu_to_le16(io.nblocks); 257 c.rw.control = cpu_to_le16(io.control); 258 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 259 c.rw.reftag = cpu_to_le32(io.reftag); 260 c.rw.lbat = cpu_to_le16(io.apptag); 261 c.rw.lbatm = cpu_to_le16(io.appmask); 262 263 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 264 meta_len, NULL, 0, 0); 265 } 266 267 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 268 struct nvme_ns *ns, __u32 nsid) 269 { 270 if (ns && nsid != ns->head->ns_id) { 271 dev_err(ctrl->device, 272 "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", 273 current->comm, nsid, ns->head->ns_id); 274 return false; 275 } 276 277 return true; 278 } 279 280 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 281 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 282 bool open_for_write) 283 { 284 struct nvme_passthru_cmd cmd; 285 struct nvme_command c; 286 unsigned timeout = 0; 287 u64 result; 288 int status; 289 290 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 291 return -EFAULT; 292 if (cmd.flags) 293 return -EINVAL; 294 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 295 return -EINVAL; 296 297 memset(&c, 0, sizeof(c)); 298 c.common.opcode = cmd.opcode; 299 c.common.flags = cmd.flags; 300 c.common.nsid = cpu_to_le32(cmd.nsid); 301 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 302 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 303 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 304 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 305 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 306 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 307 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 308 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 309 310 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 311 return -EACCES; 312 313 if (cmd.timeout_ms) 314 timeout = msecs_to_jiffies(cmd.timeout_ms); 315 316 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 317 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 318 cmd.metadata_len, &result, timeout, 0); 319 320 if (status >= 0) { 321 if (put_user(result, &ucmd->result)) 322 return -EFAULT; 323 } 324 325 return status; 326 } 327 328 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 329 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 330 bool open_for_write) 331 { 332 struct nvme_passthru_cmd64 cmd; 333 struct nvme_command c; 334 unsigned timeout = 0; 335 int status; 336 337 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 338 return -EFAULT; 339 if (cmd.flags) 340 return -EINVAL; 341 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 342 return -EINVAL; 343 344 memset(&c, 0, sizeof(c)); 345 c.common.opcode = cmd.opcode; 346 c.common.flags = cmd.flags; 347 c.common.nsid = cpu_to_le32(cmd.nsid); 348 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 349 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 350 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 351 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 352 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 353 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 354 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 355 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 356 357 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 358 return -EACCES; 359 360 if (cmd.timeout_ms) 361 timeout = msecs_to_jiffies(cmd.timeout_ms); 362 363 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 364 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 365 cmd.metadata_len, &cmd.result, timeout, flags); 366 367 if (status >= 0) { 368 if (put_user(cmd.result, &ucmd->result)) 369 return -EFAULT; 370 } 371 372 return status; 373 } 374 375 struct nvme_uring_data { 376 __u64 metadata; 377 __u64 addr; 378 __u32 data_len; 379 __u32 metadata_len; 380 __u32 timeout_ms; 381 }; 382 383 /* 384 * This overlays struct io_uring_cmd pdu. 385 * Expect build errors if this grows larger than that. 386 */ 387 struct nvme_uring_cmd_pdu { 388 struct request *req; 389 struct bio *bio; 390 u64 result; 391 int status; 392 }; 393 394 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 395 struct io_uring_cmd *ioucmd) 396 { 397 return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu); 398 } 399 400 static void nvme_uring_task_cb(struct io_tw_req tw_req, io_tw_token_t tw) 401 { 402 struct io_uring_cmd *ioucmd = io_uring_cmd_from_tw(tw_req); 403 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 404 405 if (pdu->bio) 406 blk_rq_unmap_user(pdu->bio); 407 io_uring_cmd_done32(ioucmd, pdu->status, pdu->result, 408 IO_URING_CMD_TASK_WORK_ISSUE_FLAGS); 409 } 410 411 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 412 blk_status_t err, 413 const struct io_comp_batch *iob) 414 { 415 struct io_uring_cmd *ioucmd = req->end_io_data; 416 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 417 418 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { 419 pdu->status = -EINTR; 420 } else { 421 pdu->status = nvme_req(req)->status; 422 if (!pdu->status) 423 pdu->status = blk_status_to_errno(err); 424 } 425 pdu->result = le64_to_cpu(nvme_req(req)->result.u64); 426 427 /* 428 * For IOPOLL, check if this completion is happening in the context 429 * of the same io_ring that owns the request (local context). If so, 430 * we can complete inline without task_work overhead. Otherwise, we 431 * must punt to task_work to ensure completion happens in the correct 432 * ring's context. 433 */ 434 if (blk_rq_is_poll(req) && iob && 435 iob->poll_ctx == io_uring_cmd_ctx_handle(ioucmd)) { 436 if (pdu->bio) 437 blk_rq_unmap_user(pdu->bio); 438 io_uring_cmd_done32(ioucmd, pdu->status, pdu->result, 0); 439 } else { 440 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 441 } 442 return RQ_END_IO_FREE; 443 } 444 445 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 446 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 447 { 448 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 449 const struct nvme_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, 450 struct nvme_uring_cmd); 451 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 452 struct nvme_uring_data d; 453 struct nvme_command c; 454 struct iov_iter iter; 455 struct iov_iter *map_iter = NULL; 456 struct request *req; 457 blk_opf_t rq_flags = 0; 458 blk_mq_req_flags_t blk_flags = 0; 459 int ret; 460 461 c.common.opcode = READ_ONCE(cmd->opcode); 462 c.common.flags = READ_ONCE(cmd->flags); 463 if (c.common.flags) 464 return -EINVAL; 465 466 c.common.command_id = 0; 467 c.common.nsid = cpu_to_le32(cmd->nsid); 468 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 469 return -EINVAL; 470 471 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 472 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 473 c.common.metadata = 0; 474 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 475 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 476 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 477 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 478 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 479 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 480 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 481 482 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 483 return -EACCES; 484 485 d.metadata = READ_ONCE(cmd->metadata); 486 d.addr = READ_ONCE(cmd->addr); 487 d.data_len = READ_ONCE(cmd->data_len); 488 d.metadata_len = READ_ONCE(cmd->metadata_len); 489 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 490 491 if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 492 int ddir = nvme_is_write(&c) ? WRITE : READ; 493 494 if (vec) 495 ret = io_uring_cmd_import_fixed_vec(ioucmd, 496 u64_to_user_ptr(d.addr), d.data_len, 497 ddir, &iter, issue_flags); 498 else 499 ret = io_uring_cmd_import_fixed(d.addr, d.data_len, 500 ddir, &iter, ioucmd, issue_flags); 501 if (ret < 0) 502 return ret; 503 504 map_iter = &iter; 505 } 506 507 if (issue_flags & IO_URING_F_NONBLOCK) { 508 rq_flags |= REQ_NOWAIT; 509 blk_flags = BLK_MQ_REQ_NOWAIT; 510 } 511 if (issue_flags & IO_URING_F_IOPOLL) 512 rq_flags |= REQ_POLLED; 513 514 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 515 if (IS_ERR(req)) 516 return PTR_ERR(req); 517 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 518 519 if (d.data_len) { 520 ret = nvme_map_user_request(req, d.addr, d.data_len, 521 nvme_to_user_ptr(d.metadata), d.metadata_len, 522 map_iter, vec ? NVME_IOCTL_VEC : 0); 523 if (ret) 524 goto out_free_req; 525 } 526 527 /* to free bio on completion, as req->bio will be null at that time */ 528 pdu->bio = req->bio; 529 pdu->req = req; 530 req->end_io_data = ioucmd; 531 req->end_io = nvme_uring_cmd_end_io; 532 blk_execute_rq_nowait(req, false); 533 return -EIOCBQUEUED; 534 535 out_free_req: 536 blk_mq_free_request(req); 537 return ret; 538 } 539 540 static bool is_ctrl_ioctl(unsigned int cmd) 541 { 542 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 543 return true; 544 if (is_sed_ioctl(cmd)) 545 return true; 546 return false; 547 } 548 549 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 550 void __user *argp, bool open_for_write) 551 { 552 switch (cmd) { 553 case NVME_IOCTL_ADMIN_CMD: 554 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 555 case NVME_IOCTL_ADMIN64_CMD: 556 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 557 default: 558 return sed_ioctl(ctrl->opal_dev, cmd, argp); 559 } 560 } 561 562 #ifdef COMPAT_FOR_U64_ALIGNMENT 563 struct nvme_user_io32 { 564 __u8 opcode; 565 __u8 flags; 566 __u16 control; 567 __u16 nblocks; 568 __u16 rsvd; 569 __u64 metadata; 570 __u64 addr; 571 __u64 slba; 572 __u32 dsmgmt; 573 __u32 reftag; 574 __u16 apptag; 575 __u16 appmask; 576 } __attribute__((__packed__)); 577 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 578 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 579 580 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 581 void __user *argp, unsigned int flags, bool open_for_write) 582 { 583 switch (cmd) { 584 case NVME_IOCTL_ID: 585 force_successful_syscall_return(); 586 return ns->head->ns_id; 587 case NVME_IOCTL_IO_CMD: 588 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 589 /* 590 * struct nvme_user_io can have different padding on some 32-bit ABIs. 591 * Just accept the compat version as all fields that are used are the 592 * same size and at the same offset. 593 */ 594 #ifdef COMPAT_FOR_U64_ALIGNMENT 595 case NVME_IOCTL_SUBMIT_IO32: 596 #endif 597 case NVME_IOCTL_SUBMIT_IO: 598 return nvme_submit_io(ns, argp); 599 case NVME_IOCTL_IO64_CMD_VEC: 600 flags |= NVME_IOCTL_VEC; 601 fallthrough; 602 case NVME_IOCTL_IO64_CMD: 603 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 604 open_for_write); 605 default: 606 return -ENOTTY; 607 } 608 } 609 610 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 611 unsigned int cmd, unsigned long arg) 612 { 613 struct nvme_ns *ns = bdev->bd_disk->private_data; 614 bool open_for_write = mode & BLK_OPEN_WRITE; 615 void __user *argp = (void __user *)arg; 616 unsigned int flags = 0; 617 618 if (bdev_is_partition(bdev)) 619 flags |= NVME_IOCTL_PARTITION; 620 621 if (is_ctrl_ioctl(cmd)) 622 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 623 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 624 } 625 626 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 627 { 628 struct nvme_ns *ns = 629 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 630 bool open_for_write = file->f_mode & FMODE_WRITE; 631 void __user *argp = (void __user *)arg; 632 633 if (is_ctrl_ioctl(cmd)) 634 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 635 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 636 } 637 638 static int nvme_uring_cmd_checks(unsigned int issue_flags) 639 { 640 641 /* NVMe passthrough requires big SQE/CQE support */ 642 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 643 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 644 return -EOPNOTSUPP; 645 return 0; 646 } 647 648 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 649 unsigned int issue_flags) 650 { 651 struct nvme_ctrl *ctrl = ns->ctrl; 652 int ret; 653 654 ret = nvme_uring_cmd_checks(issue_flags); 655 if (ret) 656 return ret; 657 658 switch (ioucmd->cmd_op) { 659 case NVME_URING_CMD_IO: 660 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 661 break; 662 case NVME_URING_CMD_IO_VEC: 663 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 664 break; 665 default: 666 ret = -ENOTTY; 667 } 668 669 return ret; 670 } 671 672 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 673 { 674 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 675 struct nvme_ns, cdev); 676 677 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 678 } 679 680 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 681 struct io_comp_batch *iob, 682 unsigned int poll_flags) 683 { 684 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 685 struct request *req = pdu->req; 686 687 if (req && blk_rq_is_poll(req)) 688 return blk_rq_poll(req, iob, poll_flags); 689 return 0; 690 } 691 #ifdef CONFIG_NVME_MULTIPATH 692 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 693 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 694 bool open_for_write) 695 __releases(&head->srcu) 696 { 697 struct nvme_ctrl *ctrl = ns->ctrl; 698 int ret; 699 700 nvme_get_ctrl(ns->ctrl); 701 srcu_read_unlock(&head->srcu, srcu_idx); 702 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 703 704 nvme_put_ctrl(ctrl); 705 return ret; 706 } 707 708 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 709 unsigned int cmd, unsigned long arg) 710 { 711 struct nvme_ns_head *head = bdev->bd_disk->private_data; 712 bool open_for_write = mode & BLK_OPEN_WRITE; 713 void __user *argp = (void __user *)arg; 714 struct nvme_ns *ns; 715 int srcu_idx, ret = -EWOULDBLOCK; 716 unsigned int flags = 0; 717 718 if (bdev_is_partition(bdev)) 719 flags |= NVME_IOCTL_PARTITION; 720 721 srcu_idx = srcu_read_lock(&head->srcu); 722 ns = nvme_find_path(head); 723 if (!ns) 724 goto out_unlock; 725 726 /* 727 * Handle ioctls that apply to the controller instead of the namespace 728 * separately and drop the ns SRCU reference early. This avoids a 729 * deadlock when deleting namespaces using the passthrough interface. 730 */ 731 if (is_ctrl_ioctl(cmd)) 732 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 733 open_for_write); 734 735 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 736 out_unlock: 737 srcu_read_unlock(&head->srcu, srcu_idx); 738 return ret; 739 } 740 741 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 742 unsigned long arg) 743 { 744 bool open_for_write = file->f_mode & FMODE_WRITE; 745 struct cdev *cdev = file_inode(file)->i_cdev; 746 struct nvme_ns_head *head = 747 container_of(cdev, struct nvme_ns_head, cdev); 748 void __user *argp = (void __user *)arg; 749 struct nvme_ns *ns; 750 int srcu_idx, ret = -EWOULDBLOCK; 751 752 srcu_idx = srcu_read_lock(&head->srcu); 753 ns = nvme_find_path(head); 754 if (!ns) 755 goto out_unlock; 756 757 if (is_ctrl_ioctl(cmd)) 758 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 759 open_for_write); 760 761 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 762 out_unlock: 763 srcu_read_unlock(&head->srcu, srcu_idx); 764 return ret; 765 } 766 767 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 768 unsigned int issue_flags) 769 { 770 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 771 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 772 int srcu_idx = srcu_read_lock(&head->srcu); 773 struct nvme_ns *ns = nvme_find_path(head); 774 int ret = -EINVAL; 775 776 if (ns) 777 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 778 srcu_read_unlock(&head->srcu, srcu_idx); 779 return ret; 780 } 781 #endif /* CONFIG_NVME_MULTIPATH */ 782 783 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 784 { 785 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 786 int ret; 787 788 ret = nvme_uring_cmd_checks(issue_flags); 789 if (ret) 790 return ret; 791 792 switch (ioucmd->cmd_op) { 793 case NVME_URING_CMD_ADMIN: 794 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 795 break; 796 case NVME_URING_CMD_ADMIN_VEC: 797 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 798 break; 799 default: 800 ret = -ENOTTY; 801 } 802 803 return ret; 804 } 805 806 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 807 bool open_for_write) 808 { 809 struct nvme_ns *ns; 810 int ret, srcu_idx; 811 812 srcu_idx = srcu_read_lock(&ctrl->srcu); 813 if (list_empty(&ctrl->namespaces)) { 814 ret = -ENOTTY; 815 goto out_unlock; 816 } 817 818 ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); 819 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 820 dev_warn(ctrl->device, 821 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 822 ret = -EINVAL; 823 goto out_unlock; 824 } 825 826 dev_warn(ctrl->device, 827 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 828 if (!nvme_get_ns(ns)) { 829 ret = -ENXIO; 830 goto out_unlock; 831 } 832 srcu_read_unlock(&ctrl->srcu, srcu_idx); 833 834 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 835 nvme_put_ns(ns); 836 return ret; 837 838 out_unlock: 839 srcu_read_unlock(&ctrl->srcu, srcu_idx); 840 return ret; 841 } 842 843 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 844 unsigned long arg) 845 { 846 bool open_for_write = file->f_mode & FMODE_WRITE; 847 struct nvme_ctrl *ctrl = file->private_data; 848 void __user *argp = (void __user *)arg; 849 850 switch (cmd) { 851 case NVME_IOCTL_ADMIN_CMD: 852 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 853 case NVME_IOCTL_ADMIN64_CMD: 854 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 855 case NVME_IOCTL_IO_CMD: 856 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 857 case NVME_IOCTL_RESET: 858 if (!capable(CAP_SYS_ADMIN)) 859 return -EACCES; 860 dev_warn(ctrl->device, "resetting controller\n"); 861 return nvme_reset_ctrl_sync(ctrl); 862 case NVME_IOCTL_SUBSYS_RESET: 863 if (!capable(CAP_SYS_ADMIN)) 864 return -EACCES; 865 return nvme_reset_subsystem(ctrl); 866 case NVME_IOCTL_RESCAN: 867 if (!capable(CAP_SYS_ADMIN)) 868 return -EACCES; 869 nvme_queue_scan(ctrl); 870 return 0; 871 default: 872 return -ENOTTY; 873 } 874 } 875