1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/blk-integrity.h> 7 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 8 #include <linux/nvme_ioctl.h> 9 #include <linux/io_uring/cmd.h> 10 #include "nvme.h" 11 12 enum { 13 NVME_IOCTL_VEC = (1 << 0), 14 NVME_IOCTL_PARTITION = (1 << 1), 15 }; 16 17 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 18 unsigned int flags, bool open_for_write) 19 { 20 u32 effects; 21 22 /* 23 * Do not allow unprivileged passthrough on partitions, as that allows an 24 * escape from the containment of the partition. 25 */ 26 if (flags & NVME_IOCTL_PARTITION) 27 goto admin; 28 29 /* 30 * Do not allow unprivileged processes to send vendor specific or fabrics 31 * commands as we can't be sure about their effects. 32 */ 33 if (c->common.opcode >= nvme_cmd_vendor_start || 34 c->common.opcode == nvme_fabrics_command) 35 goto admin; 36 37 /* 38 * Do not allow unprivileged passthrough of admin commands except 39 * for a subset of identify commands that contain information required 40 * to form proper I/O commands in userspace and do not expose any 41 * potentially sensitive information. 42 */ 43 if (!ns) { 44 if (c->common.opcode == nvme_admin_identify) { 45 switch (c->identify.cns) { 46 case NVME_ID_CNS_NS: 47 case NVME_ID_CNS_CS_NS: 48 case NVME_ID_CNS_NS_CS_INDEP: 49 case NVME_ID_CNS_CS_CTRL: 50 case NVME_ID_CNS_CTRL: 51 return true; 52 } 53 } 54 goto admin; 55 } 56 57 /* 58 * Check if the controller provides a Commands Supported and Effects log 59 * and marks this command as supported. If not reject unprivileged 60 * passthrough. 61 */ 62 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 63 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 64 goto admin; 65 66 /* 67 * Don't allow passthrough for command that have intrusive (or unknown) 68 * effects. 69 */ 70 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 71 NVME_CMD_EFFECTS_UUID_SEL | 72 NVME_CMD_EFFECTS_SCOPE_MASK)) 73 goto admin; 74 75 /* 76 * Only allow I/O commands that transfer data to the controller or that 77 * change the logical block contents if the file descriptor is open for 78 * writing. 79 */ 80 if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && 81 !open_for_write) 82 goto admin; 83 84 return true; 85 admin: 86 return capable(CAP_SYS_ADMIN); 87 } 88 89 /* 90 * Convert integer values from ioctl structures to user pointers, silently 91 * ignoring the upper bits in the compat case to match behaviour of 32-bit 92 * kernels. 93 */ 94 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 95 { 96 if (in_compat_syscall()) 97 ptrval = (compat_uptr_t)ptrval; 98 return (void __user *)ptrval; 99 } 100 101 static struct request *nvme_alloc_user_request(struct request_queue *q, 102 struct nvme_command *cmd, blk_opf_t rq_flags, 103 blk_mq_req_flags_t blk_flags) 104 { 105 struct request *req; 106 107 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 108 if (IS_ERR(req)) 109 return req; 110 nvme_init_request(req, cmd); 111 nvme_req(req)->flags |= NVME_REQ_USERCMD; 112 return req; 113 } 114 115 static int nvme_map_user_request(struct request *req, u64 ubuffer, 116 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 117 struct iov_iter *iter, unsigned int flags) 118 { 119 struct request_queue *q = req->q; 120 struct nvme_ns *ns = q->queuedata; 121 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 122 bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); 123 struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; 124 bool has_metadata = meta_buffer && meta_len; 125 struct bio *bio = NULL; 126 int ret; 127 128 if (!nvme_ctrl_sgl_supported(ctrl)) 129 dev_warn_once(ctrl->device, "using unchecked data buffer\n"); 130 if (has_metadata) { 131 if (!supports_metadata) 132 return -EINVAL; 133 134 if (!nvme_ctrl_meta_sgl_supported(ctrl)) 135 dev_warn_once(ctrl->device, 136 "using unchecked metadata buffer\n"); 137 } 138 139 if (iter) 140 ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL); 141 else 142 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 143 bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 144 0, rq_data_dir(req)); 145 if (ret) 146 return ret; 147 148 if (has_metadata) { 149 ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len); 150 if (ret) 151 goto out_unmap; 152 } 153 154 return ret; 155 156 out_unmap: 157 if (bio) 158 blk_rq_unmap_user(bio); 159 return ret; 160 } 161 162 static int nvme_submit_user_cmd(struct request_queue *q, 163 struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, 164 void __user *meta_buffer, unsigned meta_len, 165 u64 *result, unsigned timeout, unsigned int flags) 166 { 167 struct nvme_ns *ns = q->queuedata; 168 struct nvme_ctrl *ctrl; 169 struct request *req; 170 struct bio *bio; 171 u32 effects; 172 int ret; 173 174 req = nvme_alloc_user_request(q, cmd, 0, 0); 175 if (IS_ERR(req)) 176 return PTR_ERR(req); 177 178 req->timeout = timeout; 179 if (ubuffer && bufflen) { 180 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 181 meta_len, NULL, flags); 182 if (ret) 183 goto out_free_req; 184 } 185 186 bio = req->bio; 187 ctrl = nvme_req(req)->ctrl; 188 189 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 190 ret = nvme_execute_rq(req, false); 191 if (result) 192 *result = le64_to_cpu(nvme_req(req)->result.u64); 193 if (bio) 194 blk_rq_unmap_user(bio); 195 blk_mq_free_request(req); 196 197 if (effects) 198 nvme_passthru_end(ctrl, ns, effects, cmd, ret); 199 return ret; 200 201 out_free_req: 202 blk_mq_free_request(req); 203 return ret; 204 } 205 206 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 207 { 208 struct nvme_user_io io; 209 struct nvme_command c; 210 unsigned length, meta_len; 211 void __user *metadata; 212 213 if (copy_from_user(&io, uio, sizeof(io))) 214 return -EFAULT; 215 if (io.flags) 216 return -EINVAL; 217 218 switch (io.opcode) { 219 case nvme_cmd_write: 220 case nvme_cmd_read: 221 case nvme_cmd_compare: 222 break; 223 default: 224 return -EINVAL; 225 } 226 227 length = (io.nblocks + 1) << ns->head->lba_shift; 228 229 if ((io.control & NVME_RW_PRINFO_PRACT) && 230 (ns->head->ms == ns->head->pi_size)) { 231 /* 232 * Protection information is stripped/inserted by the 233 * controller. 234 */ 235 if (nvme_to_user_ptr(io.metadata)) 236 return -EINVAL; 237 meta_len = 0; 238 metadata = NULL; 239 } else { 240 meta_len = (io.nblocks + 1) * ns->head->ms; 241 metadata = nvme_to_user_ptr(io.metadata); 242 } 243 244 if (ns->head->features & NVME_NS_EXT_LBAS) { 245 length += meta_len; 246 meta_len = 0; 247 } else if (meta_len) { 248 if ((io.metadata & 3) || !io.metadata) 249 return -EINVAL; 250 } 251 252 memset(&c, 0, sizeof(c)); 253 c.rw.opcode = io.opcode; 254 c.rw.flags = io.flags; 255 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 256 c.rw.slba = cpu_to_le64(io.slba); 257 c.rw.length = cpu_to_le16(io.nblocks); 258 c.rw.control = cpu_to_le16(io.control); 259 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 260 c.rw.reftag = cpu_to_le32(io.reftag); 261 c.rw.lbat = cpu_to_le16(io.apptag); 262 c.rw.lbatm = cpu_to_le16(io.appmask); 263 264 return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, 265 meta_len, NULL, 0, 0); 266 } 267 268 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 269 struct nvme_ns *ns, __u32 nsid) 270 { 271 if (ns && nsid != ns->head->ns_id) { 272 dev_err(ctrl->device, 273 "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", 274 current->comm, nsid, ns->head->ns_id); 275 return false; 276 } 277 278 return true; 279 } 280 281 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 282 struct nvme_passthru_cmd __user *ucmd, unsigned int flags, 283 bool open_for_write) 284 { 285 struct nvme_passthru_cmd cmd; 286 struct nvme_command c; 287 unsigned timeout = 0; 288 u64 result; 289 int status; 290 291 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 292 return -EFAULT; 293 if (cmd.flags) 294 return -EINVAL; 295 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 296 return -EINVAL; 297 298 memset(&c, 0, sizeof(c)); 299 c.common.opcode = cmd.opcode; 300 c.common.flags = cmd.flags; 301 c.common.nsid = cpu_to_le32(cmd.nsid); 302 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 303 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 304 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 305 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 306 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 307 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 308 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 309 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 310 311 if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) 312 return -EACCES; 313 314 if (cmd.timeout_ms) 315 timeout = msecs_to_jiffies(cmd.timeout_ms); 316 317 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 318 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 319 cmd.metadata_len, &result, timeout, 0); 320 321 if (status >= 0) { 322 if (put_user(result, &ucmd->result)) 323 return -EFAULT; 324 } 325 326 return status; 327 } 328 329 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 330 struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, 331 bool open_for_write) 332 { 333 struct nvme_passthru_cmd64 cmd; 334 struct nvme_command c; 335 unsigned timeout = 0; 336 int status; 337 338 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 339 return -EFAULT; 340 if (cmd.flags) 341 return -EINVAL; 342 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 343 return -EINVAL; 344 345 memset(&c, 0, sizeof(c)); 346 c.common.opcode = cmd.opcode; 347 c.common.flags = cmd.flags; 348 c.common.nsid = cpu_to_le32(cmd.nsid); 349 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 350 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 351 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 352 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 353 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 354 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 355 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 356 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 357 358 if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) 359 return -EACCES; 360 361 if (cmd.timeout_ms) 362 timeout = msecs_to_jiffies(cmd.timeout_ms); 363 364 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 365 cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), 366 cmd.metadata_len, &cmd.result, timeout, flags); 367 368 if (status >= 0) { 369 if (put_user(cmd.result, &ucmd->result)) 370 return -EFAULT; 371 } 372 373 return status; 374 } 375 376 struct nvme_uring_data { 377 __u64 metadata; 378 __u64 addr; 379 __u32 data_len; 380 __u32 metadata_len; 381 __u32 timeout_ms; 382 }; 383 384 /* 385 * This overlays struct io_uring_cmd pdu. 386 * Expect build errors if this grows larger than that. 387 */ 388 struct nvme_uring_cmd_pdu { 389 struct request *req; 390 struct bio *bio; 391 u64 result; 392 int status; 393 }; 394 395 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 396 struct io_uring_cmd *ioucmd) 397 { 398 return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu); 399 } 400 401 static void nvme_uring_task_cb(struct io_tw_req tw_req, io_tw_token_t tw) 402 { 403 struct io_uring_cmd *ioucmd = io_uring_cmd_from_tw(tw_req); 404 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 405 406 if (pdu->bio) 407 blk_rq_unmap_user(pdu->bio); 408 io_uring_cmd_done32(ioucmd, pdu->status, pdu->result, 409 IO_URING_CMD_TASK_WORK_ISSUE_FLAGS); 410 } 411 412 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 413 blk_status_t err, 414 const struct io_comp_batch *iob) 415 { 416 struct io_uring_cmd *ioucmd = req->end_io_data; 417 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 418 419 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { 420 pdu->status = -EINTR; 421 } else { 422 pdu->status = nvme_req(req)->status; 423 if (!pdu->status) 424 pdu->status = blk_status_to_errno(err); 425 } 426 pdu->result = le64_to_cpu(nvme_req(req)->result.u64); 427 428 /* 429 * For IOPOLL, check if this completion is happening in the context 430 * of the same io_ring that owns the request (local context). If so, 431 * we can complete inline without task_work overhead. Otherwise, we 432 * must punt to task_work to ensure completion happens in the correct 433 * ring's context. 434 */ 435 if (blk_rq_is_poll(req) && iob && 436 iob->poll_ctx == io_uring_cmd_ctx_handle(ioucmd)) { 437 if (pdu->bio) 438 blk_rq_unmap_user(pdu->bio); 439 io_uring_cmd_done32(ioucmd, pdu->status, pdu->result, 0); 440 } else { 441 io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); 442 } 443 return RQ_END_IO_FREE; 444 } 445 446 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 447 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 448 { 449 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 450 const struct nvme_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, 451 struct nvme_uring_cmd); 452 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 453 struct nvme_uring_data d; 454 struct nvme_command c; 455 struct iov_iter iter; 456 struct iov_iter *map_iter = NULL; 457 struct request *req; 458 blk_opf_t rq_flags = 0; 459 blk_mq_req_flags_t blk_flags = 0; 460 int ret; 461 462 c.common.opcode = READ_ONCE(cmd->opcode); 463 c.common.flags = READ_ONCE(cmd->flags); 464 if (c.common.flags) 465 return -EINVAL; 466 467 c.common.command_id = 0; 468 c.common.nsid = cpu_to_le32(cmd->nsid); 469 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 470 return -EINVAL; 471 472 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 473 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 474 c.common.metadata = 0; 475 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 476 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 477 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 478 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 479 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 480 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 481 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 482 483 if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) 484 return -EACCES; 485 486 d.metadata = READ_ONCE(cmd->metadata); 487 d.addr = READ_ONCE(cmd->addr); 488 d.data_len = READ_ONCE(cmd->data_len); 489 d.metadata_len = READ_ONCE(cmd->metadata_len); 490 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 491 492 if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 493 int ddir = nvme_is_write(&c) ? WRITE : READ; 494 495 if (vec) 496 ret = io_uring_cmd_import_fixed_vec(ioucmd, 497 u64_to_user_ptr(d.addr), d.data_len, 498 ddir, &iter, issue_flags); 499 else 500 ret = io_uring_cmd_import_fixed(d.addr, d.data_len, 501 ddir, &iter, ioucmd, issue_flags); 502 if (ret < 0) 503 return ret; 504 505 map_iter = &iter; 506 } 507 508 if (issue_flags & IO_URING_F_NONBLOCK) { 509 rq_flags |= REQ_NOWAIT; 510 blk_flags = BLK_MQ_REQ_NOWAIT; 511 } 512 if (issue_flags & IO_URING_F_IOPOLL) 513 rq_flags |= REQ_POLLED; 514 515 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 516 if (IS_ERR(req)) 517 return PTR_ERR(req); 518 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 519 520 if (d.data_len) { 521 ret = nvme_map_user_request(req, d.addr, d.data_len, 522 nvme_to_user_ptr(d.metadata), d.metadata_len, 523 map_iter, vec ? NVME_IOCTL_VEC : 0); 524 if (ret) 525 goto out_free_req; 526 } 527 528 /* to free bio on completion, as req->bio will be null at that time */ 529 pdu->bio = req->bio; 530 pdu->req = req; 531 req->end_io_data = ioucmd; 532 req->end_io = nvme_uring_cmd_end_io; 533 blk_execute_rq_nowait(req, false); 534 return -EIOCBQUEUED; 535 536 out_free_req: 537 blk_mq_free_request(req); 538 return ret; 539 } 540 541 static bool is_ctrl_ioctl(unsigned int cmd) 542 { 543 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 544 return true; 545 if (is_sed_ioctl(cmd)) 546 return true; 547 return false; 548 } 549 550 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 551 void __user *argp, bool open_for_write) 552 { 553 switch (cmd) { 554 case NVME_IOCTL_ADMIN_CMD: 555 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 556 case NVME_IOCTL_ADMIN64_CMD: 557 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 558 default: 559 return sed_ioctl(ctrl->opal_dev, cmd, argp); 560 } 561 } 562 563 #ifdef COMPAT_FOR_U64_ALIGNMENT 564 struct nvme_user_io32 { 565 __u8 opcode; 566 __u8 flags; 567 __u16 control; 568 __u16 nblocks; 569 __u16 rsvd; 570 __u64 metadata; 571 __u64 addr; 572 __u64 slba; 573 __u32 dsmgmt; 574 __u32 reftag; 575 __u16 apptag; 576 __u16 appmask; 577 } __attribute__((__packed__)); 578 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 579 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 580 581 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 582 void __user *argp, unsigned int flags, bool open_for_write) 583 { 584 switch (cmd) { 585 case NVME_IOCTL_ID: 586 force_successful_syscall_return(); 587 return ns->head->ns_id; 588 case NVME_IOCTL_IO_CMD: 589 return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); 590 /* 591 * struct nvme_user_io can have different padding on some 32-bit ABIs. 592 * Just accept the compat version as all fields that are used are the 593 * same size and at the same offset. 594 */ 595 #ifdef COMPAT_FOR_U64_ALIGNMENT 596 case NVME_IOCTL_SUBMIT_IO32: 597 #endif 598 case NVME_IOCTL_SUBMIT_IO: 599 return nvme_submit_io(ns, argp); 600 case NVME_IOCTL_IO64_CMD_VEC: 601 flags |= NVME_IOCTL_VEC; 602 fallthrough; 603 case NVME_IOCTL_IO64_CMD: 604 return nvme_user_cmd64(ns->ctrl, ns, argp, flags, 605 open_for_write); 606 default: 607 return -ENOTTY; 608 } 609 } 610 611 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, 612 unsigned int cmd, unsigned long arg) 613 { 614 struct nvme_ns *ns = bdev->bd_disk->private_data; 615 bool open_for_write = mode & BLK_OPEN_WRITE; 616 void __user *argp = (void __user *)arg; 617 unsigned int flags = 0; 618 619 if (bdev_is_partition(bdev)) 620 flags |= NVME_IOCTL_PARTITION; 621 622 if (is_ctrl_ioctl(cmd)) 623 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 624 return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 625 } 626 627 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 628 { 629 struct nvme_ns *ns = 630 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 631 bool open_for_write = file->f_mode & FMODE_WRITE; 632 void __user *argp = (void __user *)arg; 633 634 if (is_ctrl_ioctl(cmd)) 635 return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 636 return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 637 } 638 639 static int nvme_uring_cmd_checks(unsigned int issue_flags) 640 { 641 642 /* NVMe passthrough requires big SQE/CQE support */ 643 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 644 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 645 return -EOPNOTSUPP; 646 return 0; 647 } 648 649 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 650 unsigned int issue_flags) 651 { 652 struct nvme_ctrl *ctrl = ns->ctrl; 653 int ret; 654 655 ret = nvme_uring_cmd_checks(issue_flags); 656 if (ret) 657 return ret; 658 659 switch (ioucmd->cmd_op) { 660 case NVME_URING_CMD_IO: 661 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 662 break; 663 case NVME_URING_CMD_IO_VEC: 664 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 665 break; 666 default: 667 ret = -ENOTTY; 668 } 669 670 return ret; 671 } 672 673 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 674 { 675 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 676 struct nvme_ns, cdev); 677 678 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 679 } 680 681 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 682 struct io_comp_batch *iob, 683 unsigned int poll_flags) 684 { 685 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 686 struct request *req = pdu->req; 687 688 if (req && blk_rq_is_poll(req)) 689 return blk_rq_poll(req, iob, poll_flags); 690 return 0; 691 } 692 #ifdef CONFIG_NVME_MULTIPATH 693 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 694 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 695 bool open_for_write) 696 __releases(&head->srcu) 697 { 698 struct nvme_ctrl *ctrl = ns->ctrl; 699 int ret; 700 701 nvme_get_ctrl(ns->ctrl); 702 srcu_read_unlock(&head->srcu, srcu_idx); 703 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); 704 705 nvme_put_ctrl(ctrl); 706 return ret; 707 } 708 709 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, 710 unsigned int cmd, unsigned long arg) 711 { 712 struct nvme_ns_head *head = bdev->bd_disk->private_data; 713 bool open_for_write = mode & BLK_OPEN_WRITE; 714 void __user *argp = (void __user *)arg; 715 struct nvme_ns *ns; 716 int srcu_idx, ret = -EWOULDBLOCK; 717 unsigned int flags = 0; 718 719 if (bdev_is_partition(bdev)) 720 flags |= NVME_IOCTL_PARTITION; 721 722 srcu_idx = srcu_read_lock(&head->srcu); 723 ns = nvme_find_path(head); 724 if (!ns) 725 goto out_unlock; 726 727 /* 728 * Handle ioctls that apply to the controller instead of the namespace 729 * separately and drop the ns SRCU reference early. This avoids a 730 * deadlock when deleting namespaces using the passthrough interface. 731 */ 732 if (is_ctrl_ioctl(cmd)) 733 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 734 open_for_write); 735 736 ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); 737 out_unlock: 738 srcu_read_unlock(&head->srcu, srcu_idx); 739 return ret; 740 } 741 742 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 743 unsigned long arg) 744 { 745 bool open_for_write = file->f_mode & FMODE_WRITE; 746 struct cdev *cdev = file_inode(file)->i_cdev; 747 struct nvme_ns_head *head = 748 container_of(cdev, struct nvme_ns_head, cdev); 749 void __user *argp = (void __user *)arg; 750 struct nvme_ns *ns; 751 int srcu_idx, ret = -EWOULDBLOCK; 752 753 srcu_idx = srcu_read_lock(&head->srcu); 754 ns = nvme_find_path(head); 755 if (!ns) 756 goto out_unlock; 757 758 if (is_ctrl_ioctl(cmd)) 759 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 760 open_for_write); 761 762 ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); 763 out_unlock: 764 srcu_read_unlock(&head->srcu, srcu_idx); 765 return ret; 766 } 767 768 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 769 unsigned int issue_flags) 770 { 771 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 772 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 773 int srcu_idx = srcu_read_lock(&head->srcu); 774 struct nvme_ns *ns = nvme_find_path(head); 775 int ret = -EINVAL; 776 777 if (ns) 778 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 779 srcu_read_unlock(&head->srcu, srcu_idx); 780 return ret; 781 } 782 #endif /* CONFIG_NVME_MULTIPATH */ 783 784 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 785 { 786 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 787 int ret; 788 789 /* IOPOLL not supported yet */ 790 if (issue_flags & IO_URING_F_IOPOLL) 791 return -EOPNOTSUPP; 792 793 ret = nvme_uring_cmd_checks(issue_flags); 794 if (ret) 795 return ret; 796 797 switch (ioucmd->cmd_op) { 798 case NVME_URING_CMD_ADMIN: 799 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 800 break; 801 case NVME_URING_CMD_ADMIN_VEC: 802 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 803 break; 804 default: 805 ret = -ENOTTY; 806 } 807 808 return ret; 809 } 810 811 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 812 bool open_for_write) 813 { 814 struct nvme_ns *ns; 815 int ret, srcu_idx; 816 817 srcu_idx = srcu_read_lock(&ctrl->srcu); 818 if (list_empty(&ctrl->namespaces)) { 819 ret = -ENOTTY; 820 goto out_unlock; 821 } 822 823 ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); 824 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 825 dev_warn(ctrl->device, 826 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 827 ret = -EINVAL; 828 goto out_unlock; 829 } 830 831 dev_warn(ctrl->device, 832 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 833 if (!nvme_get_ns(ns)) { 834 ret = -ENXIO; 835 goto out_unlock; 836 } 837 srcu_read_unlock(&ctrl->srcu, srcu_idx); 838 839 ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); 840 nvme_put_ns(ns); 841 return ret; 842 843 out_unlock: 844 srcu_read_unlock(&ctrl->srcu, srcu_idx); 845 return ret; 846 } 847 848 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 849 unsigned long arg) 850 { 851 bool open_for_write = file->f_mode & FMODE_WRITE; 852 struct nvme_ctrl *ctrl = file->private_data; 853 void __user *argp = (void __user *)arg; 854 855 switch (cmd) { 856 case NVME_IOCTL_ADMIN_CMD: 857 return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); 858 case NVME_IOCTL_ADMIN64_CMD: 859 return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); 860 case NVME_IOCTL_IO_CMD: 861 return nvme_dev_user_cmd(ctrl, argp, open_for_write); 862 case NVME_IOCTL_RESET: 863 if (!capable(CAP_SYS_ADMIN)) 864 return -EACCES; 865 dev_warn(ctrl->device, "resetting controller\n"); 866 return nvme_reset_ctrl_sync(ctrl); 867 case NVME_IOCTL_SUBSYS_RESET: 868 if (!capable(CAP_SYS_ADMIN)) 869 return -EACCES; 870 return nvme_reset_subsystem(ctrl); 871 case NVME_IOCTL_RESCAN: 872 if (!capable(CAP_SYS_ADMIN)) 873 return -EACCES; 874 nvme_queue_scan(ctrl); 875 return 0; 876 default: 877 return -ENOTTY; 878 } 879 } 880