1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/nospec.h> 7 #include <linux/io_uring.h> 8 9 #include <uapi/linux/io_uring.h> 10 11 #include "io_uring.h" 12 #include "rsrc.h" 13 #include "filetable.h" 14 #include "msg_ring.h" 15 16 17 /* All valid masks for MSG_RING */ 18 #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \ 19 IORING_MSG_RING_FLAGS_PASS) 20 21 struct io_msg { 22 struct file *file; 23 struct file *src_file; 24 struct callback_head tw; 25 u64 user_data; 26 u32 len; 27 u32 cmd; 28 u32 src_fd; 29 union { 30 u32 dst_fd; 31 u32 cqe_flags; 32 }; 33 u32 flags; 34 }; 35 36 static void io_double_unlock_ctx(struct io_ring_ctx *octx) 37 { 38 mutex_unlock(&octx->uring_lock); 39 } 40 41 static int io_double_lock_ctx(struct io_ring_ctx *octx, 42 unsigned int issue_flags) 43 { 44 /* 45 * To ensure proper ordering between the two ctxs, we can only 46 * attempt a trylock on the target. If that fails and we already have 47 * the source ctx lock, punt to io-wq. 48 */ 49 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 50 if (!mutex_trylock(&octx->uring_lock)) 51 return -EAGAIN; 52 return 0; 53 } 54 mutex_lock(&octx->uring_lock); 55 return 0; 56 } 57 58 void io_msg_ring_cleanup(struct io_kiocb *req) 59 { 60 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 61 62 if (WARN_ON_ONCE(!msg->src_file)) 63 return; 64 65 fput(msg->src_file); 66 msg->src_file = NULL; 67 } 68 69 static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) 70 { 71 if (!target_ctx->task_complete) 72 return false; 73 return current != target_ctx->submitter_task; 74 } 75 76 static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func) 77 { 78 struct io_ring_ctx *ctx = req->file->private_data; 79 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 80 struct task_struct *task = READ_ONCE(ctx->submitter_task); 81 82 if (unlikely(!task)) 83 return -EOWNERDEAD; 84 85 init_task_work(&msg->tw, func); 86 if (task_work_add(task, &msg->tw, TWA_SIGNAL)) 87 return -EOWNERDEAD; 88 89 return IOU_ISSUE_SKIP_COMPLETE; 90 } 91 92 static void io_msg_tw_complete(struct callback_head *head) 93 { 94 struct io_msg *msg = container_of(head, struct io_msg, tw); 95 struct io_kiocb *req = cmd_to_io_kiocb(msg); 96 struct io_ring_ctx *target_ctx = req->file->private_data; 97 int ret = 0; 98 99 if (current->flags & PF_EXITING) { 100 ret = -EOWNERDEAD; 101 } else { 102 u32 flags = 0; 103 104 if (msg->flags & IORING_MSG_RING_FLAGS_PASS) 105 flags = msg->cqe_flags; 106 107 /* 108 * If the target ring is using IOPOLL mode, then we need to be 109 * holding the uring_lock for posting completions. Other ring 110 * types rely on the regular completion locking, which is 111 * handled while posting. 112 */ 113 if (target_ctx->flags & IORING_SETUP_IOPOLL) 114 mutex_lock(&target_ctx->uring_lock); 115 if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) 116 ret = -EOVERFLOW; 117 if (target_ctx->flags & IORING_SETUP_IOPOLL) 118 mutex_unlock(&target_ctx->uring_lock); 119 } 120 121 if (ret < 0) 122 req_set_fail(req); 123 io_req_queue_tw_complete(req, ret); 124 } 125 126 static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags) 127 { 128 struct io_ring_ctx *target_ctx = req->file->private_data; 129 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 130 u32 flags = 0; 131 int ret; 132 133 if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS) 134 return -EINVAL; 135 if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd) 136 return -EINVAL; 137 if (target_ctx->flags & IORING_SETUP_R_DISABLED) 138 return -EBADFD; 139 140 if (io_msg_need_remote(target_ctx)) 141 return io_msg_exec_remote(req, io_msg_tw_complete); 142 143 if (msg->flags & IORING_MSG_RING_FLAGS_PASS) 144 flags = msg->cqe_flags; 145 146 ret = -EOVERFLOW; 147 if (target_ctx->flags & IORING_SETUP_IOPOLL) { 148 if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) 149 return -EAGAIN; 150 } 151 if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) 152 ret = 0; 153 if (target_ctx->flags & IORING_SETUP_IOPOLL) 154 io_double_unlock_ctx(target_ctx); 155 return ret; 156 } 157 158 static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags) 159 { 160 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 161 struct io_ring_ctx *ctx = req->ctx; 162 struct file *file = NULL; 163 int idx = msg->src_fd; 164 165 io_ring_submit_lock(ctx, issue_flags); 166 if (likely(idx < ctx->nr_user_files)) { 167 idx = array_index_nospec(idx, ctx->nr_user_files); 168 file = io_file_from_index(&ctx->file_table, idx); 169 if (file) 170 get_file(file); 171 } 172 io_ring_submit_unlock(ctx, issue_flags); 173 return file; 174 } 175 176 static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags) 177 { 178 struct io_ring_ctx *target_ctx = req->file->private_data; 179 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 180 struct file *src_file = msg->src_file; 181 int ret; 182 183 if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) 184 return -EAGAIN; 185 186 ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd); 187 if (ret < 0) 188 goto out_unlock; 189 190 msg->src_file = NULL; 191 req->flags &= ~REQ_F_NEED_CLEANUP; 192 193 if (msg->flags & IORING_MSG_RING_CQE_SKIP) 194 goto out_unlock; 195 /* 196 * If this fails, the target still received the file descriptor but 197 * wasn't notified of the fact. This means that if this request 198 * completes with -EOVERFLOW, then the sender must ensure that a 199 * later IORING_OP_MSG_RING delivers the message. 200 */ 201 if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0)) 202 ret = -EOVERFLOW; 203 out_unlock: 204 io_double_unlock_ctx(target_ctx); 205 return ret; 206 } 207 208 static void io_msg_tw_fd_complete(struct callback_head *head) 209 { 210 struct io_msg *msg = container_of(head, struct io_msg, tw); 211 struct io_kiocb *req = cmd_to_io_kiocb(msg); 212 int ret = -EOWNERDEAD; 213 214 if (!(current->flags & PF_EXITING)) 215 ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED); 216 if (ret < 0) 217 req_set_fail(req); 218 io_req_queue_tw_complete(req, ret); 219 } 220 221 static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) 222 { 223 struct io_ring_ctx *target_ctx = req->file->private_data; 224 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 225 struct io_ring_ctx *ctx = req->ctx; 226 struct file *src_file = msg->src_file; 227 228 if (msg->len) 229 return -EINVAL; 230 if (target_ctx == ctx) 231 return -EINVAL; 232 if (target_ctx->flags & IORING_SETUP_R_DISABLED) 233 return -EBADFD; 234 if (!src_file) { 235 src_file = io_msg_grab_file(req, issue_flags); 236 if (!src_file) 237 return -EBADF; 238 msg->src_file = src_file; 239 req->flags |= REQ_F_NEED_CLEANUP; 240 } 241 242 if (io_msg_need_remote(target_ctx)) 243 return io_msg_exec_remote(req, io_msg_tw_fd_complete); 244 return io_msg_install_complete(req, issue_flags); 245 } 246 247 int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 248 { 249 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 250 251 if (unlikely(sqe->buf_index || sqe->personality)) 252 return -EINVAL; 253 254 msg->src_file = NULL; 255 msg->user_data = READ_ONCE(sqe->off); 256 msg->len = READ_ONCE(sqe->len); 257 msg->cmd = READ_ONCE(sqe->addr); 258 msg->src_fd = READ_ONCE(sqe->addr3); 259 msg->dst_fd = READ_ONCE(sqe->file_index); 260 msg->flags = READ_ONCE(sqe->msg_ring_flags); 261 if (msg->flags & ~IORING_MSG_RING_MASK) 262 return -EINVAL; 263 264 return 0; 265 } 266 267 int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) 268 { 269 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 270 int ret; 271 272 ret = -EBADFD; 273 if (!io_is_uring_fops(req->file)) 274 goto done; 275 276 switch (msg->cmd) { 277 case IORING_MSG_DATA: 278 ret = io_msg_ring_data(req, issue_flags); 279 break; 280 case IORING_MSG_SEND_FD: 281 ret = io_msg_send_fd(req, issue_flags); 282 break; 283 default: 284 ret = -EINVAL; 285 break; 286 } 287 288 done: 289 if (ret < 0) { 290 if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE) 291 return ret; 292 req_set_fail(req); 293 } 294 io_req_set_res(req, ret, 0); 295 return IOU_OK; 296 } 297