1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/fs.h> 5 #include <linux/file.h> 6 #include <linux/io_uring.h> 7 8 #include <uapi/linux/io_uring.h> 9 10 #include "../kernel/futex/futex.h" 11 #include "io_uring.h" 12 #include "rsrc.h" 13 #include "futex.h" 14 15 struct io_futex { 16 struct file *file; 17 union { 18 u32 __user *uaddr; 19 struct futex_waitv __user *uwaitv; 20 }; 21 unsigned long futex_val; 22 unsigned long futex_mask; 23 unsigned long futexv_owned; 24 u32 futex_flags; 25 unsigned int futex_nr; 26 bool futexv_unqueued; 27 }; 28 29 struct io_futex_data { 30 union { 31 struct futex_q q; 32 struct io_cache_entry cache; 33 }; 34 struct io_kiocb *req; 35 }; 36 37 void io_futex_cache_init(struct io_ring_ctx *ctx) 38 { 39 io_alloc_cache_init(&ctx->futex_cache, IO_NODE_ALLOC_CACHE_MAX, 40 sizeof(struct io_futex_data)); 41 } 42 43 static void io_futex_cache_entry_free(struct io_cache_entry *entry) 44 { 45 kfree(container_of(entry, struct io_futex_data, cache)); 46 } 47 48 void io_futex_cache_free(struct io_ring_ctx *ctx) 49 { 50 io_alloc_cache_free(&ctx->futex_cache, io_futex_cache_entry_free); 51 } 52 53 static void __io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts) 54 { 55 req->async_data = NULL; 56 hlist_del_init(&req->hash_node); 57 io_req_task_complete(req, ts); 58 } 59 60 static void io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts) 61 { 62 struct io_futex_data *ifd = req->async_data; 63 struct io_ring_ctx *ctx = req->ctx; 64 65 io_tw_lock(ctx, ts); 66 if (!io_alloc_cache_put(&ctx->futex_cache, &ifd->cache)) 67 kfree(ifd); 68 __io_futex_complete(req, ts); 69 } 70 71 static void io_futexv_complete(struct io_kiocb *req, struct io_tw_state *ts) 72 { 73 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 74 struct futex_vector *futexv = req->async_data; 75 76 io_tw_lock(req->ctx, ts); 77 78 if (!iof->futexv_unqueued) { 79 int res; 80 81 res = futex_unqueue_multiple(futexv, iof->futex_nr); 82 if (res != -1) 83 io_req_set_res(req, res, 0); 84 } 85 86 kfree(req->async_data); 87 req->flags &= ~REQ_F_ASYNC_DATA; 88 __io_futex_complete(req, ts); 89 } 90 91 static bool io_futexv_claim(struct io_futex *iof) 92 { 93 if (test_bit(0, &iof->futexv_owned) || 94 test_and_set_bit_lock(0, &iof->futexv_owned)) 95 return false; 96 return true; 97 } 98 99 static bool __io_futex_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) 100 { 101 /* futex wake already done or in progress */ 102 if (req->opcode == IORING_OP_FUTEX_WAIT) { 103 struct io_futex_data *ifd = req->async_data; 104 105 if (!futex_unqueue(&ifd->q)) 106 return false; 107 req->io_task_work.func = io_futex_complete; 108 } else { 109 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 110 111 if (!io_futexv_claim(iof)) 112 return false; 113 req->io_task_work.func = io_futexv_complete; 114 } 115 116 hlist_del_init(&req->hash_node); 117 io_req_set_res(req, -ECANCELED, 0); 118 io_req_task_work_add(req); 119 return true; 120 } 121 122 int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 123 unsigned int issue_flags) 124 { 125 struct hlist_node *tmp; 126 struct io_kiocb *req; 127 int nr = 0; 128 129 if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED)) 130 return -ENOENT; 131 132 io_ring_submit_lock(ctx, issue_flags); 133 hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { 134 if (req->cqe.user_data != cd->data && 135 !(cd->flags & IORING_ASYNC_CANCEL_ANY)) 136 continue; 137 if (__io_futex_cancel(ctx, req)) 138 nr++; 139 if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) 140 break; 141 } 142 io_ring_submit_unlock(ctx, issue_flags); 143 144 if (nr) 145 return nr; 146 147 return -ENOENT; 148 } 149 150 bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task, 151 bool cancel_all) 152 { 153 struct hlist_node *tmp; 154 struct io_kiocb *req; 155 bool found = false; 156 157 lockdep_assert_held(&ctx->uring_lock); 158 159 hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { 160 if (!io_match_task_safe(req, task, cancel_all)) 161 continue; 162 hlist_del_init(&req->hash_node); 163 __io_futex_cancel(ctx, req); 164 found = true; 165 } 166 167 return found; 168 } 169 170 int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 171 { 172 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 173 u32 flags; 174 175 if (unlikely(sqe->len || sqe->futex_flags || sqe->buf_index || 176 sqe->file_index)) 177 return -EINVAL; 178 179 iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 180 iof->futex_val = READ_ONCE(sqe->addr2); 181 iof->futex_mask = READ_ONCE(sqe->addr3); 182 flags = READ_ONCE(sqe->fd); 183 184 if (flags & ~FUTEX2_VALID_MASK) 185 return -EINVAL; 186 187 iof->futex_flags = futex2_to_flags(flags); 188 if (!futex_flags_valid(iof->futex_flags)) 189 return -EINVAL; 190 191 if (!futex_validate_input(iof->futex_flags, iof->futex_val) || 192 !futex_validate_input(iof->futex_flags, iof->futex_mask)) 193 return -EINVAL; 194 195 return 0; 196 } 197 198 static void io_futex_wakev_fn(struct wake_q_head *wake_q, struct futex_q *q) 199 { 200 struct io_kiocb *req = q->wake_data; 201 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 202 203 if (!io_futexv_claim(iof)) 204 return; 205 if (unlikely(!__futex_wake_mark(q))) 206 return; 207 208 io_req_set_res(req, 0, 0); 209 req->io_task_work.func = io_futexv_complete; 210 io_req_task_work_add(req); 211 } 212 213 int io_futexv_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 214 { 215 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 216 struct futex_vector *futexv; 217 int ret; 218 219 /* No flags or mask supported for waitv */ 220 if (unlikely(sqe->fd || sqe->buf_index || sqe->file_index || 221 sqe->addr2 || sqe->futex_flags || sqe->addr3)) 222 return -EINVAL; 223 224 iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 225 iof->futex_nr = READ_ONCE(sqe->len); 226 if (!iof->futex_nr || iof->futex_nr > FUTEX_WAITV_MAX) 227 return -EINVAL; 228 229 futexv = kcalloc(iof->futex_nr, sizeof(*futexv), GFP_KERNEL); 230 if (!futexv) 231 return -ENOMEM; 232 233 ret = futex_parse_waitv(futexv, iof->uwaitv, iof->futex_nr, 234 io_futex_wakev_fn, req); 235 if (ret) { 236 kfree(futexv); 237 return ret; 238 } 239 240 iof->futexv_owned = 0; 241 iof->futexv_unqueued = 0; 242 req->flags |= REQ_F_ASYNC_DATA; 243 req->async_data = futexv; 244 return 0; 245 } 246 247 static void io_futex_wake_fn(struct wake_q_head *wake_q, struct futex_q *q) 248 { 249 struct io_futex_data *ifd = container_of(q, struct io_futex_data, q); 250 struct io_kiocb *req = ifd->req; 251 252 if (unlikely(!__futex_wake_mark(q))) 253 return; 254 255 io_req_set_res(req, 0, 0); 256 req->io_task_work.func = io_futex_complete; 257 io_req_task_work_add(req); 258 } 259 260 static struct io_futex_data *io_alloc_ifd(struct io_ring_ctx *ctx) 261 { 262 struct io_cache_entry *entry; 263 264 entry = io_alloc_cache_get(&ctx->futex_cache); 265 if (entry) 266 return container_of(entry, struct io_futex_data, cache); 267 268 return kmalloc(sizeof(struct io_futex_data), GFP_NOWAIT); 269 } 270 271 int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags) 272 { 273 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 274 struct futex_vector *futexv = req->async_data; 275 struct io_ring_ctx *ctx = req->ctx; 276 int ret, woken = -1; 277 278 io_ring_submit_lock(ctx, issue_flags); 279 280 ret = futex_wait_multiple_setup(futexv, iof->futex_nr, &woken); 281 282 /* 283 * Error case, ret is < 0. Mark the request as failed. 284 */ 285 if (unlikely(ret < 0)) { 286 io_ring_submit_unlock(ctx, issue_flags); 287 req_set_fail(req); 288 io_req_set_res(req, ret, 0); 289 kfree(futexv); 290 req->async_data = NULL; 291 req->flags &= ~REQ_F_ASYNC_DATA; 292 return IOU_OK; 293 } 294 295 /* 296 * 0 return means that we successfully setup the waiters, and that 297 * nobody triggered a wakeup while we were doing so. If the wakeup 298 * happened post setup, the task_work will be run post this issue and 299 * under the submission lock. 1 means We got woken while setting up, 300 * let that side do the completion. Note that 301 * futex_wait_multiple_setup() will have unqueued all the futexes in 302 * this case. Mark us as having done that already, since this is 303 * different from normal wakeup. 304 */ 305 if (!ret) { 306 /* 307 * If futex_wait_multiple_setup() returns 0 for a 308 * successful setup, then the task state will not be 309 * runnable. This is fine for the sync syscall, as 310 * it'll be blocking unless we already got one of the 311 * futexes woken, but it obviously won't work for an 312 * async invocation. Mark us runnable again. 313 */ 314 __set_current_state(TASK_RUNNING); 315 hlist_add_head(&req->hash_node, &ctx->futex_list); 316 } else { 317 iof->futexv_unqueued = 1; 318 if (woken != -1) 319 io_req_set_res(req, woken, 0); 320 } 321 322 io_ring_submit_unlock(ctx, issue_flags); 323 return IOU_ISSUE_SKIP_COMPLETE; 324 } 325 326 int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) 327 { 328 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 329 struct io_ring_ctx *ctx = req->ctx; 330 struct io_futex_data *ifd = NULL; 331 struct futex_hash_bucket *hb; 332 int ret; 333 334 if (!iof->futex_mask) { 335 ret = -EINVAL; 336 goto done; 337 } 338 339 io_ring_submit_lock(ctx, issue_flags); 340 ifd = io_alloc_ifd(ctx); 341 if (!ifd) { 342 ret = -ENOMEM; 343 goto done_unlock; 344 } 345 346 req->async_data = ifd; 347 ifd->q = futex_q_init; 348 ifd->q.bitset = iof->futex_mask; 349 ifd->q.wake = io_futex_wake_fn; 350 ifd->req = req; 351 352 ret = futex_wait_setup(iof->uaddr, iof->futex_val, iof->futex_flags, 353 &ifd->q, &hb); 354 if (!ret) { 355 hlist_add_head(&req->hash_node, &ctx->futex_list); 356 io_ring_submit_unlock(ctx, issue_flags); 357 358 futex_queue(&ifd->q, hb); 359 return IOU_ISSUE_SKIP_COMPLETE; 360 } 361 362 done_unlock: 363 io_ring_submit_unlock(ctx, issue_flags); 364 done: 365 if (ret < 0) 366 req_set_fail(req); 367 io_req_set_res(req, ret, 0); 368 kfree(ifd); 369 return IOU_OK; 370 } 371 372 int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags) 373 { 374 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 375 int ret; 376 377 /* 378 * Strict flags - ensure that waking 0 futexes yields a 0 result. 379 * See commit 43adf8449510 ("futex: FLAGS_STRICT") for details. 380 */ 381 ret = futex_wake(iof->uaddr, FLAGS_STRICT | iof->futex_flags, 382 iof->futex_val, iof->futex_mask); 383 if (ret < 0) 384 req_set_fail(req); 385 io_req_set_res(req, ret, 0); 386 return IOU_OK; 387 } 388