1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/nospec.h> 7 #include <linux/io_uring.h> 8 9 #include <uapi/linux/io_uring.h> 10 11 #include "io_uring.h" 12 #include "rsrc.h" 13 #include "filetable.h" 14 #include "msg_ring.h" 15 16 17 /* All valid masks for MSG_RING */ 18 #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \ 19 IORING_MSG_RING_FLAGS_PASS) 20 21 struct io_msg { 22 struct file *file; 23 struct file *src_file; 24 struct callback_head tw; 25 u64 user_data; 26 u32 len; 27 u32 cmd; 28 u32 src_fd; 29 union { 30 u32 dst_fd; 31 u32 cqe_flags; 32 }; 33 u32 flags; 34 }; 35 36 static void io_double_unlock_ctx(struct io_ring_ctx *octx) 37 { 38 mutex_unlock(&octx->uring_lock); 39 } 40 41 static int io_double_lock_ctx(struct io_ring_ctx *octx, 42 unsigned int issue_flags) 43 { 44 /* 45 * To ensure proper ordering between the two ctxs, we can only 46 * attempt a trylock on the target. If that fails and we already have 47 * the source ctx lock, punt to io-wq. 48 */ 49 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 50 if (!mutex_trylock(&octx->uring_lock)) 51 return -EAGAIN; 52 return 0; 53 } 54 mutex_lock(&octx->uring_lock); 55 return 0; 56 } 57 58 void io_msg_ring_cleanup(struct io_kiocb *req) 59 { 60 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 61 62 if (WARN_ON_ONCE(!msg->src_file)) 63 return; 64 65 fput(msg->src_file); 66 msg->src_file = NULL; 67 } 68 69 static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) 70 { 71 if (!target_ctx->task_complete) 72 return false; 73 return current != target_ctx->submitter_task; 74 } 75 76 static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func) 77 { 78 struct io_ring_ctx *ctx = req->file->private_data; 79 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 80 struct task_struct *task = READ_ONCE(ctx->submitter_task); 81 82 if (unlikely(!task)) 83 return -EOWNERDEAD; 84 85 init_task_work(&msg->tw, func); 86 if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL)) 87 return -EOWNERDEAD; 88 89 return IOU_ISSUE_SKIP_COMPLETE; 90 } 91 92 static void io_msg_tw_complete(struct callback_head *head) 93 { 94 struct io_msg *msg = container_of(head, struct io_msg, tw); 95 struct io_kiocb *req = cmd_to_io_kiocb(msg); 96 struct io_ring_ctx *target_ctx = req->file->private_data; 97 int ret = 0; 98 99 if (current->flags & PF_EXITING) { 100 ret = -EOWNERDEAD; 101 } else { 102 u32 flags = 0; 103 104 if (msg->flags & IORING_MSG_RING_FLAGS_PASS) 105 flags = msg->cqe_flags; 106 107 /* 108 * If the target ring is using IOPOLL mode, then we need to be 109 * holding the uring_lock for posting completions. Other ring 110 * types rely on the regular completion locking, which is 111 * handled while posting. 112 */ 113 if (target_ctx->flags & IORING_SETUP_IOPOLL) 114 mutex_lock(&target_ctx->uring_lock); 115 if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) 116 ret = -EOVERFLOW; 117 if (target_ctx->flags & IORING_SETUP_IOPOLL) 118 mutex_unlock(&target_ctx->uring_lock); 119 } 120 121 if (ret < 0) 122 req_set_fail(req); 123 io_req_queue_tw_complete(req, ret); 124 } 125 126 static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags) 127 { 128 struct io_ring_ctx *target_ctx = req->file->private_data; 129 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 130 u32 flags = 0; 131 int ret; 132 133 if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS) 134 return -EINVAL; 135 if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd) 136 return -EINVAL; 137 if (target_ctx->flags & IORING_SETUP_R_DISABLED) 138 return -EBADFD; 139 140 if (io_msg_need_remote(target_ctx)) 141 return io_msg_exec_remote(req, io_msg_tw_complete); 142 143 if (msg->flags & IORING_MSG_RING_FLAGS_PASS) 144 flags = msg->cqe_flags; 145 146 ret = -EOVERFLOW; 147 if (target_ctx->flags & IORING_SETUP_IOPOLL) { 148 if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) 149 return -EAGAIN; 150 if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) 151 ret = 0; 152 io_double_unlock_ctx(target_ctx); 153 } else { 154 if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) 155 ret = 0; 156 } 157 return ret; 158 } 159 160 static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags) 161 { 162 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 163 struct io_ring_ctx *ctx = req->ctx; 164 struct file *file = NULL; 165 int idx = msg->src_fd; 166 167 io_ring_submit_lock(ctx, issue_flags); 168 if (likely(idx < ctx->nr_user_files)) { 169 idx = array_index_nospec(idx, ctx->nr_user_files); 170 file = io_file_from_index(&ctx->file_table, idx); 171 if (file) 172 get_file(file); 173 } 174 io_ring_submit_unlock(ctx, issue_flags); 175 return file; 176 } 177 178 static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags) 179 { 180 struct io_ring_ctx *target_ctx = req->file->private_data; 181 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 182 struct file *src_file = msg->src_file; 183 int ret; 184 185 if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) 186 return -EAGAIN; 187 188 ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd); 189 if (ret < 0) 190 goto out_unlock; 191 192 msg->src_file = NULL; 193 req->flags &= ~REQ_F_NEED_CLEANUP; 194 195 if (msg->flags & IORING_MSG_RING_CQE_SKIP) 196 goto out_unlock; 197 /* 198 * If this fails, the target still received the file descriptor but 199 * wasn't notified of the fact. This means that if this request 200 * completes with -EOVERFLOW, then the sender must ensure that a 201 * later IORING_OP_MSG_RING delivers the message. 202 */ 203 if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0)) 204 ret = -EOVERFLOW; 205 out_unlock: 206 io_double_unlock_ctx(target_ctx); 207 return ret; 208 } 209 210 static void io_msg_tw_fd_complete(struct callback_head *head) 211 { 212 struct io_msg *msg = container_of(head, struct io_msg, tw); 213 struct io_kiocb *req = cmd_to_io_kiocb(msg); 214 int ret = -EOWNERDEAD; 215 216 if (!(current->flags & PF_EXITING)) 217 ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED); 218 if (ret < 0) 219 req_set_fail(req); 220 io_req_queue_tw_complete(req, ret); 221 } 222 223 static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) 224 { 225 struct io_ring_ctx *target_ctx = req->file->private_data; 226 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 227 struct io_ring_ctx *ctx = req->ctx; 228 struct file *src_file = msg->src_file; 229 230 if (msg->len) 231 return -EINVAL; 232 if (target_ctx == ctx) 233 return -EINVAL; 234 if (target_ctx->flags & IORING_SETUP_R_DISABLED) 235 return -EBADFD; 236 if (!src_file) { 237 src_file = io_msg_grab_file(req, issue_flags); 238 if (!src_file) 239 return -EBADF; 240 msg->src_file = src_file; 241 req->flags |= REQ_F_NEED_CLEANUP; 242 } 243 244 if (io_msg_need_remote(target_ctx)) 245 return io_msg_exec_remote(req, io_msg_tw_fd_complete); 246 return io_msg_install_complete(req, issue_flags); 247 } 248 249 int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 250 { 251 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 252 253 if (unlikely(sqe->buf_index || sqe->personality)) 254 return -EINVAL; 255 256 msg->src_file = NULL; 257 msg->user_data = READ_ONCE(sqe->off); 258 msg->len = READ_ONCE(sqe->len); 259 msg->cmd = READ_ONCE(sqe->addr); 260 msg->src_fd = READ_ONCE(sqe->addr3); 261 msg->dst_fd = READ_ONCE(sqe->file_index); 262 msg->flags = READ_ONCE(sqe->msg_ring_flags); 263 if (msg->flags & ~IORING_MSG_RING_MASK) 264 return -EINVAL; 265 266 return 0; 267 } 268 269 int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) 270 { 271 struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); 272 int ret; 273 274 ret = -EBADFD; 275 if (!io_is_uring_fops(req->file)) 276 goto done; 277 278 switch (msg->cmd) { 279 case IORING_MSG_DATA: 280 ret = io_msg_ring_data(req, issue_flags); 281 break; 282 case IORING_MSG_SEND_FD: 283 ret = io_msg_send_fd(req, issue_flags); 284 break; 285 default: 286 ret = -EINVAL; 287 break; 288 } 289 290 done: 291 if (ret < 0) { 292 if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE) 293 return ret; 294 req_set_fail(req); 295 } 296 io_req_set_res(req, ret, 0); 297 return IOU_OK; 298 } 299