1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/fs.h> 5 #include <linux/file.h> 6 #include <linux/io_uring.h> 7 8 #include <uapi/linux/io_uring.h> 9 10 #include "../kernel/futex/futex.h" 11 #include "io_uring.h" 12 #include "rsrc.h" 13 #include "futex.h" 14 15 struct io_futex { 16 struct file *file; 17 union { 18 u32 __user *uaddr; 19 struct futex_waitv __user *uwaitv; 20 }; 21 unsigned long futex_val; 22 unsigned long futex_mask; 23 unsigned long futexv_owned; 24 u32 futex_flags; 25 unsigned int futex_nr; 26 bool futexv_unqueued; 27 }; 28 29 struct io_futex_data { 30 union { 31 struct futex_q q; 32 struct io_cache_entry cache; 33 }; 34 struct io_kiocb *req; 35 }; 36 37 void io_futex_cache_init(struct io_ring_ctx *ctx) 38 { 39 io_alloc_cache_init(&ctx->futex_cache, IO_NODE_ALLOC_CACHE_MAX, 40 sizeof(struct io_futex_data)); 41 } 42 43 static void io_futex_cache_entry_free(struct io_cache_entry *entry) 44 { 45 kfree(container_of(entry, struct io_futex_data, cache)); 46 } 47 48 void io_futex_cache_free(struct io_ring_ctx *ctx) 49 { 50 io_alloc_cache_free(&ctx->futex_cache, io_futex_cache_entry_free); 51 } 52 53 static void __io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts) 54 { 55 req->async_data = NULL; 56 hlist_del_init(&req->hash_node); 57 io_req_task_complete(req, ts); 58 } 59 60 static void io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts) 61 { 62 struct io_futex_data *ifd = req->async_data; 63 struct io_ring_ctx *ctx = req->ctx; 64 65 io_tw_lock(ctx, ts); 66 if (!io_alloc_cache_put(&ctx->futex_cache, &ifd->cache)) 67 kfree(ifd); 68 __io_futex_complete(req, ts); 69 } 70 71 static void io_futexv_complete(struct io_kiocb *req, struct io_tw_state *ts) 72 { 73 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 74 struct futex_vector *futexv = req->async_data; 75 76 io_tw_lock(req->ctx, ts); 77 78 if (!iof->futexv_unqueued) { 79 int res; 80 81 res = futex_unqueue_multiple(futexv, iof->futex_nr); 82 if (res != -1) 83 io_req_set_res(req, res, 0); 84 } 85 86 kfree(req->async_data); 87 req->flags &= ~REQ_F_ASYNC_DATA; 88 __io_futex_complete(req, ts); 89 } 90 91 static bool io_futexv_claim(struct io_futex *iof) 92 { 93 if (test_bit(0, &iof->futexv_owned) || 94 test_and_set_bit_lock(0, &iof->futexv_owned)) 95 return false; 96 return true; 97 } 98 99 static bool __io_futex_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) 100 { 101 /* futex wake already done or in progress */ 102 if (req->opcode == IORING_OP_FUTEX_WAIT) { 103 struct io_futex_data *ifd = req->async_data; 104 105 if (!futex_unqueue(&ifd->q)) 106 return false; 107 req->io_task_work.func = io_futex_complete; 108 } else { 109 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 110 111 if (!io_futexv_claim(iof)) 112 return false; 113 req->io_task_work.func = io_futexv_complete; 114 } 115 116 hlist_del_init(&req->hash_node); 117 io_req_set_res(req, -ECANCELED, 0); 118 io_req_task_work_add(req); 119 return true; 120 } 121 122 int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 123 unsigned int issue_flags) 124 { 125 struct hlist_node *tmp; 126 struct io_kiocb *req; 127 int nr = 0; 128 129 if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED)) 130 return -ENOENT; 131 132 io_ring_submit_lock(ctx, issue_flags); 133 hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { 134 if (req->cqe.user_data != cd->data && 135 !(cd->flags & IORING_ASYNC_CANCEL_ANY)) 136 continue; 137 if (__io_futex_cancel(ctx, req)) 138 nr++; 139 if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) 140 break; 141 } 142 io_ring_submit_unlock(ctx, issue_flags); 143 144 if (nr) 145 return nr; 146 147 return -ENOENT; 148 } 149 150 bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task, 151 bool cancel_all) 152 { 153 struct hlist_node *tmp; 154 struct io_kiocb *req; 155 bool found = false; 156 157 lockdep_assert_held(&ctx->uring_lock); 158 159 hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { 160 if (!io_match_task_safe(req, task, cancel_all)) 161 continue; 162 __io_futex_cancel(ctx, req); 163 found = true; 164 } 165 166 return found; 167 } 168 169 int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 170 { 171 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 172 u32 flags; 173 174 if (unlikely(sqe->len || sqe->futex_flags || sqe->buf_index || 175 sqe->file_index)) 176 return -EINVAL; 177 178 iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 179 iof->futex_val = READ_ONCE(sqe->addr2); 180 iof->futex_mask = READ_ONCE(sqe->addr3); 181 flags = READ_ONCE(sqe->fd); 182 183 if (flags & ~FUTEX2_VALID_MASK) 184 return -EINVAL; 185 186 iof->futex_flags = futex2_to_flags(flags); 187 if (!futex_flags_valid(iof->futex_flags)) 188 return -EINVAL; 189 190 if (!futex_validate_input(iof->futex_flags, iof->futex_val) || 191 !futex_validate_input(iof->futex_flags, iof->futex_mask)) 192 return -EINVAL; 193 194 return 0; 195 } 196 197 static void io_futex_wakev_fn(struct wake_q_head *wake_q, struct futex_q *q) 198 { 199 struct io_kiocb *req = q->wake_data; 200 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 201 202 if (!io_futexv_claim(iof)) 203 return; 204 if (unlikely(!__futex_wake_mark(q))) 205 return; 206 207 io_req_set_res(req, 0, 0); 208 req->io_task_work.func = io_futexv_complete; 209 io_req_task_work_add(req); 210 } 211 212 int io_futexv_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 213 { 214 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 215 struct futex_vector *futexv; 216 int ret; 217 218 /* No flags or mask supported for waitv */ 219 if (unlikely(sqe->fd || sqe->buf_index || sqe->file_index || 220 sqe->addr2 || sqe->futex_flags || sqe->addr3)) 221 return -EINVAL; 222 223 iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 224 iof->futex_nr = READ_ONCE(sqe->len); 225 if (!iof->futex_nr || iof->futex_nr > FUTEX_WAITV_MAX) 226 return -EINVAL; 227 228 futexv = kcalloc(iof->futex_nr, sizeof(*futexv), GFP_KERNEL); 229 if (!futexv) 230 return -ENOMEM; 231 232 ret = futex_parse_waitv(futexv, iof->uwaitv, iof->futex_nr, 233 io_futex_wakev_fn, req); 234 if (ret) { 235 kfree(futexv); 236 return ret; 237 } 238 239 iof->futexv_owned = 0; 240 iof->futexv_unqueued = 0; 241 req->flags |= REQ_F_ASYNC_DATA; 242 req->async_data = futexv; 243 return 0; 244 } 245 246 static void io_futex_wake_fn(struct wake_q_head *wake_q, struct futex_q *q) 247 { 248 struct io_futex_data *ifd = container_of(q, struct io_futex_data, q); 249 struct io_kiocb *req = ifd->req; 250 251 if (unlikely(!__futex_wake_mark(q))) 252 return; 253 254 io_req_set_res(req, 0, 0); 255 req->io_task_work.func = io_futex_complete; 256 io_req_task_work_add(req); 257 } 258 259 static struct io_futex_data *io_alloc_ifd(struct io_ring_ctx *ctx) 260 { 261 struct io_cache_entry *entry; 262 263 entry = io_alloc_cache_get(&ctx->futex_cache); 264 if (entry) 265 return container_of(entry, struct io_futex_data, cache); 266 267 return kmalloc(sizeof(struct io_futex_data), GFP_NOWAIT); 268 } 269 270 int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags) 271 { 272 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 273 struct futex_vector *futexv = req->async_data; 274 struct io_ring_ctx *ctx = req->ctx; 275 int ret, woken = -1; 276 277 io_ring_submit_lock(ctx, issue_flags); 278 279 ret = futex_wait_multiple_setup(futexv, iof->futex_nr, &woken); 280 281 /* 282 * Error case, ret is < 0. Mark the request as failed. 283 */ 284 if (unlikely(ret < 0)) { 285 io_ring_submit_unlock(ctx, issue_flags); 286 req_set_fail(req); 287 io_req_set_res(req, ret, 0); 288 kfree(futexv); 289 req->async_data = NULL; 290 req->flags &= ~REQ_F_ASYNC_DATA; 291 return IOU_OK; 292 } 293 294 /* 295 * 0 return means that we successfully setup the waiters, and that 296 * nobody triggered a wakeup while we were doing so. If the wakeup 297 * happened post setup, the task_work will be run post this issue and 298 * under the submission lock. 1 means We got woken while setting up, 299 * let that side do the completion. Note that 300 * futex_wait_multiple_setup() will have unqueued all the futexes in 301 * this case. Mark us as having done that already, since this is 302 * different from normal wakeup. 303 */ 304 if (!ret) { 305 /* 306 * If futex_wait_multiple_setup() returns 0 for a 307 * successful setup, then the task state will not be 308 * runnable. This is fine for the sync syscall, as 309 * it'll be blocking unless we already got one of the 310 * futexes woken, but it obviously won't work for an 311 * async invocation. Mark us runnable again. 312 */ 313 __set_current_state(TASK_RUNNING); 314 hlist_add_head(&req->hash_node, &ctx->futex_list); 315 } else { 316 iof->futexv_unqueued = 1; 317 if (woken != -1) 318 io_req_set_res(req, woken, 0); 319 } 320 321 io_ring_submit_unlock(ctx, issue_flags); 322 return IOU_ISSUE_SKIP_COMPLETE; 323 } 324 325 int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) 326 { 327 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 328 struct io_ring_ctx *ctx = req->ctx; 329 struct io_futex_data *ifd = NULL; 330 struct futex_hash_bucket *hb; 331 int ret; 332 333 if (!iof->futex_mask) { 334 ret = -EINVAL; 335 goto done; 336 } 337 338 io_ring_submit_lock(ctx, issue_flags); 339 ifd = io_alloc_ifd(ctx); 340 if (!ifd) { 341 ret = -ENOMEM; 342 goto done_unlock; 343 } 344 345 req->async_data = ifd; 346 ifd->q = futex_q_init; 347 ifd->q.bitset = iof->futex_mask; 348 ifd->q.wake = io_futex_wake_fn; 349 ifd->req = req; 350 351 ret = futex_wait_setup(iof->uaddr, iof->futex_val, iof->futex_flags, 352 &ifd->q, &hb); 353 if (!ret) { 354 hlist_add_head(&req->hash_node, &ctx->futex_list); 355 io_ring_submit_unlock(ctx, issue_flags); 356 357 futex_queue(&ifd->q, hb); 358 return IOU_ISSUE_SKIP_COMPLETE; 359 } 360 361 done_unlock: 362 io_ring_submit_unlock(ctx, issue_flags); 363 done: 364 if (ret < 0) 365 req_set_fail(req); 366 io_req_set_res(req, ret, 0); 367 kfree(ifd); 368 return IOU_OK; 369 } 370 371 int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags) 372 { 373 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 374 int ret; 375 376 /* 377 * Strict flags - ensure that waking 0 futexes yields a 0 result. 378 * See commit 43adf8449510 ("futex: FLAGS_STRICT") for details. 379 */ 380 ret = futex_wake(iof->uaddr, FLAGS_STRICT | iof->futex_flags, 381 iof->futex_val, iof->futex_mask); 382 if (ret < 0) 383 req_set_fail(req); 384 io_req_set_res(req, ret, 0); 385 return IOU_OK; 386 } 387