xref: /linux/io_uring/io_uring.h (revision 4f38da1f027ea2c9f01bb71daa7a299c191b6940)
1 #ifndef IOU_CORE_H
2 #define IOU_CORE_H
3 
4 #include <linux/errno.h>
5 #include <linux/lockdep.h>
6 #include <linux/resume_user_mode.h>
7 #include <linux/kasan.h>
8 #include <linux/poll.h>
9 #include <linux/io_uring_types.h>
10 #include <uapi/linux/eventpoll.h>
11 #include "alloc_cache.h"
12 #include "io-wq.h"
13 #include "slist.h"
14 #include "opdef.h"
15 
16 #ifndef CREATE_TRACE_POINTS
17 #include <trace/events/io_uring.h>
18 #endif
19 
20 #define IORING_FEAT_FLAGS (IORING_FEAT_SINGLE_MMAP |\
21 			IORING_FEAT_NODROP |\
22 			IORING_FEAT_SUBMIT_STABLE |\
23 			IORING_FEAT_RW_CUR_POS |\
24 			IORING_FEAT_CUR_PERSONALITY |\
25 			IORING_FEAT_FAST_POLL |\
26 			IORING_FEAT_POLL_32BITS |\
27 			IORING_FEAT_SQPOLL_NONFIXED |\
28 			IORING_FEAT_EXT_ARG |\
29 			IORING_FEAT_NATIVE_WORKERS |\
30 			IORING_FEAT_RSRC_TAGS |\
31 			IORING_FEAT_CQE_SKIP |\
32 			IORING_FEAT_LINKED_FILE |\
33 			IORING_FEAT_REG_REG_RING |\
34 			IORING_FEAT_RECVSEND_BUNDLE |\
35 			IORING_FEAT_MIN_TIMEOUT |\
36 			IORING_FEAT_RW_ATTR |\
37 			IORING_FEAT_NO_IOWAIT)
38 
39 #define IORING_SETUP_FLAGS (IORING_SETUP_IOPOLL |\
40 			IORING_SETUP_SQPOLL |\
41 			IORING_SETUP_SQ_AFF |\
42 			IORING_SETUP_CQSIZE |\
43 			IORING_SETUP_CLAMP |\
44 			IORING_SETUP_ATTACH_WQ |\
45 			IORING_SETUP_R_DISABLED |\
46 			IORING_SETUP_SUBMIT_ALL |\
47 			IORING_SETUP_COOP_TASKRUN |\
48 			IORING_SETUP_TASKRUN_FLAG |\
49 			IORING_SETUP_SQE128 |\
50 			IORING_SETUP_CQE32 |\
51 			IORING_SETUP_SINGLE_ISSUER |\
52 			IORING_SETUP_DEFER_TASKRUN |\
53 			IORING_SETUP_NO_MMAP |\
54 			IORING_SETUP_REGISTERED_FD_ONLY |\
55 			IORING_SETUP_NO_SQARRAY |\
56 			IORING_SETUP_HYBRID_IOPOLL |\
57 			IORING_SETUP_CQE_MIXED)
58 
59 #define IORING_ENTER_FLAGS (IORING_ENTER_GETEVENTS |\
60 			IORING_ENTER_SQ_WAKEUP |\
61 			IORING_ENTER_SQ_WAIT |\
62 			IORING_ENTER_EXT_ARG |\
63 			IORING_ENTER_REGISTERED_RING |\
64 			IORING_ENTER_ABS_TIMER |\
65 			IORING_ENTER_EXT_ARG_REG |\
66 			IORING_ENTER_NO_IOWAIT)
67 
68 
69 #define SQE_VALID_FLAGS (IOSQE_FIXED_FILE |\
70 			IOSQE_IO_DRAIN |\
71 			IOSQE_IO_LINK |\
72 			IOSQE_IO_HARDLINK |\
73 			IOSQE_ASYNC |\
74 			IOSQE_BUFFER_SELECT |\
75 			IOSQE_CQE_SKIP_SUCCESS)
76 
77 enum {
78 	IOU_COMPLETE		= 0,
79 
80 	IOU_ISSUE_SKIP_COMPLETE	= -EIOCBQUEUED,
81 
82 	/*
83 	 * The request has more work to do and should be retried. io_uring will
84 	 * attempt to wait on the file for eligible opcodes, but otherwise
85 	 * it'll be handed to iowq for blocking execution. It works for normal
86 	 * requests as well as for the multi shot mode.
87 	 */
88 	IOU_RETRY		= -EAGAIN,
89 
90 	/*
91 	 * Requeue the task_work to restart operations on this request. The
92 	 * actual value isn't important, should just be not an otherwise
93 	 * valid error code, yet less than -MAX_ERRNO and valid internally.
94 	 */
95 	IOU_REQUEUE		= -3072,
96 };
97 
98 struct io_wait_queue {
99 	struct wait_queue_entry wq;
100 	struct io_ring_ctx *ctx;
101 	unsigned cq_tail;
102 	unsigned cq_min_tail;
103 	unsigned nr_timeouts;
104 	int hit_timeout;
105 	ktime_t min_timeout;
106 	ktime_t timeout;
107 	struct hrtimer t;
108 
109 #ifdef CONFIG_NET_RX_BUSY_POLL
110 	ktime_t napi_busy_poll_dt;
111 	bool napi_prefer_busy_poll;
112 #endif
113 };
114 
115 static inline bool io_should_wake(struct io_wait_queue *iowq)
116 {
117 	struct io_ring_ctx *ctx = iowq->ctx;
118 	int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail;
119 
120 	/*
121 	 * Wake up if we have enough events, or if a timeout occurred since we
122 	 * started waiting. For timeouts, we always want to return to userspace,
123 	 * regardless of event count.
124 	 */
125 	return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
126 }
127 
128 #define IORING_MAX_ENTRIES	32768
129 #define IORING_MAX_CQ_ENTRIES	(2 * IORING_MAX_ENTRIES)
130 
131 unsigned long rings_size(unsigned int flags, unsigned int sq_entries,
132 			 unsigned int cq_entries, size_t *sq_offset);
133 int io_uring_fill_params(unsigned entries, struct io_uring_params *p);
134 bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow, bool cqe32);
135 int io_run_task_work_sig(struct io_ring_ctx *ctx);
136 void io_req_defer_failed(struct io_kiocb *req, s32 res);
137 bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
138 void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
139 bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags);
140 bool io_req_post_cqe32(struct io_kiocb *req, struct io_uring_cqe src_cqe[2]);
141 void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
142 
143 void io_req_track_inflight(struct io_kiocb *req);
144 struct file *io_file_get_normal(struct io_kiocb *req, int fd);
145 struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
146 			       unsigned issue_flags);
147 
148 void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
149 void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags);
150 void io_req_task_queue(struct io_kiocb *req);
151 void io_req_task_complete(struct io_kiocb *req, io_tw_token_t tw);
152 void io_req_task_queue_fail(struct io_kiocb *req, int ret);
153 void io_req_task_submit(struct io_kiocb *req, io_tw_token_t tw);
154 struct llist_node *io_handle_tw_list(struct llist_node *node, unsigned int *count, unsigned int max_entries);
155 struct llist_node *tctx_task_work_run(struct io_uring_task *tctx, unsigned int max_entries, unsigned int *count);
156 void tctx_task_work(struct callback_head *cb);
157 __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
158 
159 int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
160 				     int start, int end);
161 void io_req_queue_iowq(struct io_kiocb *req);
162 
163 int io_poll_issue(struct io_kiocb *req, io_tw_token_t tw);
164 int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr);
165 int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin);
166 void __io_submit_flush_completions(struct io_ring_ctx *ctx);
167 
168 struct io_wq_work *io_wq_free_work(struct io_wq_work *work);
169 void io_wq_submit_work(struct io_wq_work *work);
170 
171 void io_free_req(struct io_kiocb *req);
172 void io_queue_next(struct io_kiocb *req);
173 void io_task_refs_refill(struct io_uring_task *tctx);
174 bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
175 
176 bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
177 			bool cancel_all);
178 
179 void io_activate_pollwq(struct io_ring_ctx *ctx);
180 
181 static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
182 {
183 #if defined(CONFIG_PROVE_LOCKING)
184 	lockdep_assert(in_task());
185 
186 	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
187 		lockdep_assert_held(&ctx->uring_lock);
188 
189 	if (ctx->flags & IORING_SETUP_IOPOLL) {
190 		lockdep_assert_held(&ctx->uring_lock);
191 	} else if (!ctx->task_complete) {
192 		lockdep_assert_held(&ctx->completion_lock);
193 	} else if (ctx->submitter_task) {
194 		/*
195 		 * ->submitter_task may be NULL and we can still post a CQE,
196 		 * if the ring has been setup with IORING_SETUP_R_DISABLED.
197 		 * Not from an SQE, as those cannot be submitted, but via
198 		 * updating tagged resources.
199 		 */
200 		if (!percpu_ref_is_dying(&ctx->refs))
201 			lockdep_assert(current == ctx->submitter_task);
202 	}
203 #endif
204 }
205 
206 static inline bool io_is_compat(struct io_ring_ctx *ctx)
207 {
208 	return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->compat);
209 }
210 
211 static inline void io_req_task_work_add(struct io_kiocb *req)
212 {
213 	__io_req_task_work_add(req, 0);
214 }
215 
216 static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
217 {
218 	if (!wq_list_empty(&ctx->submit_state.compl_reqs) ||
219 	    ctx->submit_state.cq_flush)
220 		__io_submit_flush_completions(ctx);
221 }
222 
223 #define io_for_each_link(pos, head) \
224 	for (pos = (head); pos; pos = pos->link)
225 
226 static inline bool io_get_cqe_overflow(struct io_ring_ctx *ctx,
227 					struct io_uring_cqe **ret,
228 					bool overflow, bool cqe32)
229 {
230 	io_lockdep_assert_cq_locked(ctx);
231 
232 	if (unlikely(ctx->cqe_sentinel - ctx->cqe_cached < (cqe32 + 1))) {
233 		if (unlikely(!io_cqe_cache_refill(ctx, overflow, cqe32)))
234 			return false;
235 	}
236 	*ret = ctx->cqe_cached;
237 	ctx->cached_cq_tail++;
238 	ctx->cqe_cached++;
239 	if (ctx->flags & IORING_SETUP_CQE32) {
240 		ctx->cqe_cached++;
241 	} else if (cqe32 && ctx->flags & IORING_SETUP_CQE_MIXED) {
242 		ctx->cqe_cached++;
243 		ctx->cached_cq_tail++;
244 	}
245 	WARN_ON_ONCE(ctx->cqe_cached > ctx->cqe_sentinel);
246 	return true;
247 }
248 
249 static inline bool io_get_cqe(struct io_ring_ctx *ctx, struct io_uring_cqe **ret,
250 				bool cqe32)
251 {
252 	return io_get_cqe_overflow(ctx, ret, false, cqe32);
253 }
254 
255 static inline bool io_defer_get_uncommited_cqe(struct io_ring_ctx *ctx,
256 					       struct io_uring_cqe **cqe_ret)
257 {
258 	io_lockdep_assert_cq_locked(ctx);
259 
260 	ctx->submit_state.cq_flush = true;
261 	return io_get_cqe(ctx, cqe_ret, ctx->flags & IORING_SETUP_CQE_MIXED);
262 }
263 
264 static __always_inline bool io_fill_cqe_req(struct io_ring_ctx *ctx,
265 					    struct io_kiocb *req)
266 {
267 	bool is_cqe32 = req->cqe.flags & IORING_CQE_F_32;
268 	struct io_uring_cqe *cqe;
269 
270 	/*
271 	 * If we can't get a cq entry, userspace overflowed the submission
272 	 * (by quite a lot).
273 	 */
274 	if (unlikely(!io_get_cqe(ctx, &cqe, is_cqe32)))
275 		return false;
276 
277 	memcpy(cqe, &req->cqe, sizeof(*cqe));
278 	if (ctx->flags & IORING_SETUP_CQE32 || is_cqe32) {
279 		memcpy(cqe->big_cqe, &req->big_cqe, sizeof(*cqe));
280 		memset(&req->big_cqe, 0, sizeof(req->big_cqe));
281 	}
282 
283 	if (trace_io_uring_complete_enabled())
284 		trace_io_uring_complete(req->ctx, req, cqe);
285 	return true;
286 }
287 
288 static inline void req_set_fail(struct io_kiocb *req)
289 {
290 	req->flags |= REQ_F_FAIL;
291 	if (req->flags & REQ_F_CQE_SKIP) {
292 		req->flags &= ~REQ_F_CQE_SKIP;
293 		req->flags |= REQ_F_SKIP_LINK_CQES;
294 	}
295 }
296 
297 static inline void io_req_set_res(struct io_kiocb *req, s32 res, u32 cflags)
298 {
299 	req->cqe.res = res;
300 	req->cqe.flags = cflags;
301 }
302 
303 static inline u32 ctx_cqe32_flags(struct io_ring_ctx *ctx)
304 {
305 	if (ctx->flags & IORING_SETUP_CQE_MIXED)
306 		return IORING_CQE_F_32;
307 	return 0;
308 }
309 
310 static inline void io_req_set_res32(struct io_kiocb *req, s32 res, u32 cflags,
311 				    __u64 extra1, __u64 extra2)
312 {
313 	req->cqe.res = res;
314 	req->cqe.flags = cflags | ctx_cqe32_flags(req->ctx);
315 	req->big_cqe.extra1 = extra1;
316 	req->big_cqe.extra2 = extra2;
317 }
318 
319 static inline void *io_uring_alloc_async_data(struct io_alloc_cache *cache,
320 					      struct io_kiocb *req)
321 {
322 	if (cache) {
323 		req->async_data = io_cache_alloc(cache, GFP_KERNEL);
324 	} else {
325 		const struct io_issue_def *def = &io_issue_defs[req->opcode];
326 
327 		WARN_ON_ONCE(!def->async_size);
328 		req->async_data = kmalloc(def->async_size, GFP_KERNEL);
329 	}
330 	if (req->async_data)
331 		req->flags |= REQ_F_ASYNC_DATA;
332 	return req->async_data;
333 }
334 
335 static inline bool req_has_async_data(struct io_kiocb *req)
336 {
337 	return req->flags & REQ_F_ASYNC_DATA;
338 }
339 
340 static inline void io_req_async_data_clear(struct io_kiocb *req,
341 					   io_req_flags_t extra_flags)
342 {
343 	req->flags &= ~(REQ_F_ASYNC_DATA|extra_flags);
344 	req->async_data = NULL;
345 }
346 
347 static inline void io_req_async_data_free(struct io_kiocb *req)
348 {
349 	kfree(req->async_data);
350 	io_req_async_data_clear(req, 0);
351 }
352 
353 static inline void io_put_file(struct io_kiocb *req)
354 {
355 	if (!(req->flags & REQ_F_FIXED_FILE) && req->file)
356 		fput(req->file);
357 }
358 
359 static inline void io_ring_submit_unlock(struct io_ring_ctx *ctx,
360 					 unsigned issue_flags)
361 {
362 	lockdep_assert_held(&ctx->uring_lock);
363 	if (unlikely(issue_flags & IO_URING_F_UNLOCKED))
364 		mutex_unlock(&ctx->uring_lock);
365 }
366 
367 static inline void io_ring_submit_lock(struct io_ring_ctx *ctx,
368 				       unsigned issue_flags)
369 {
370 	/*
371 	 * "Normal" inline submissions always hold the uring_lock, since we
372 	 * grab it from the system call. Same is true for the SQPOLL offload.
373 	 * The only exception is when we've detached the request and issue it
374 	 * from an async worker thread, grab the lock for that case.
375 	 */
376 	if (unlikely(issue_flags & IO_URING_F_UNLOCKED))
377 		mutex_lock(&ctx->uring_lock);
378 	lockdep_assert_held(&ctx->uring_lock);
379 }
380 
381 static inline void io_commit_cqring(struct io_ring_ctx *ctx)
382 {
383 	/* order cqe stores with ring update */
384 	smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
385 }
386 
387 static inline void __io_wq_wake(struct wait_queue_head *wq)
388 {
389 	/*
390 	 *
391 	 * Pass in EPOLLIN|EPOLL_URING_WAKE as the poll wakeup key. The latter
392 	 * set in the mask so that if we recurse back into our own poll
393 	 * waitqueue handlers, we know we have a dependency between eventfd or
394 	 * epoll and should terminate multishot poll at that point.
395 	 */
396 	if (wq_has_sleeper(wq))
397 		__wake_up(wq, TASK_NORMAL, 0, poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
398 }
399 
400 static inline void io_poll_wq_wake(struct io_ring_ctx *ctx)
401 {
402 	__io_wq_wake(&ctx->poll_wq);
403 }
404 
405 static inline void io_cqring_wake(struct io_ring_ctx *ctx)
406 {
407 	/*
408 	 * Trigger waitqueue handler on all waiters on our waitqueue. This
409 	 * won't necessarily wake up all the tasks, io_should_wake() will make
410 	 * that decision.
411 	 */
412 
413 	__io_wq_wake(&ctx->cq_wait);
414 }
415 
416 static inline bool io_sqring_full(struct io_ring_ctx *ctx)
417 {
418 	struct io_rings *r = ctx->rings;
419 
420 	/*
421 	 * SQPOLL must use the actual sqring head, as using the cached_sq_head
422 	 * is race prone if the SQPOLL thread has grabbed entries but not yet
423 	 * committed them to the ring. For !SQPOLL, this doesn't matter, but
424 	 * since this helper is just used for SQPOLL sqring waits (or POLLOUT),
425 	 * just read the actual sqring head unconditionally.
426 	 */
427 	return READ_ONCE(r->sq.tail) - READ_ONCE(r->sq.head) == ctx->sq_entries;
428 }
429 
430 static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
431 {
432 	struct io_rings *rings = ctx->rings;
433 	unsigned int entries;
434 
435 	/* make sure SQ entry isn't read before tail */
436 	entries = smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
437 	return min(entries, ctx->sq_entries);
438 }
439 
440 static inline int io_run_task_work(void)
441 {
442 	bool ret = false;
443 
444 	/*
445 	 * Always check-and-clear the task_work notification signal. With how
446 	 * signaling works for task_work, we can find it set with nothing to
447 	 * run. We need to clear it for that case, like get_signal() does.
448 	 */
449 	if (test_thread_flag(TIF_NOTIFY_SIGNAL))
450 		clear_notify_signal();
451 	/*
452 	 * PF_IO_WORKER never returns to userspace, so check here if we have
453 	 * notify work that needs processing.
454 	 */
455 	if (current->flags & PF_IO_WORKER) {
456 		if (test_thread_flag(TIF_NOTIFY_RESUME)) {
457 			__set_current_state(TASK_RUNNING);
458 			resume_user_mode_work(NULL);
459 		}
460 		if (current->io_uring) {
461 			unsigned int count = 0;
462 
463 			__set_current_state(TASK_RUNNING);
464 			tctx_task_work_run(current->io_uring, UINT_MAX, &count);
465 			if (count)
466 				ret = true;
467 		}
468 	}
469 	if (task_work_pending(current)) {
470 		__set_current_state(TASK_RUNNING);
471 		task_work_run();
472 		ret = true;
473 	}
474 
475 	return ret;
476 }
477 
478 static inline bool io_local_work_pending(struct io_ring_ctx *ctx)
479 {
480 	return !llist_empty(&ctx->work_llist) || !llist_empty(&ctx->retry_llist);
481 }
482 
483 static inline bool io_task_work_pending(struct io_ring_ctx *ctx)
484 {
485 	return task_work_pending(current) || io_local_work_pending(ctx);
486 }
487 
488 static inline void io_tw_lock(struct io_ring_ctx *ctx, io_tw_token_t tw)
489 {
490 	lockdep_assert_held(&ctx->uring_lock);
491 }
492 
493 /*
494  * Don't complete immediately but use deferred completion infrastructure.
495  * Protected by ->uring_lock and can only be used either with
496  * IO_URING_F_COMPLETE_DEFER or inside a tw handler holding the mutex.
497  */
498 static inline void io_req_complete_defer(struct io_kiocb *req)
499 	__must_hold(&req->ctx->uring_lock)
500 {
501 	struct io_submit_state *state = &req->ctx->submit_state;
502 
503 	lockdep_assert_held(&req->ctx->uring_lock);
504 
505 	wq_list_add_tail(&req->comp_list, &state->compl_reqs);
506 }
507 
508 static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx)
509 {
510 	if (unlikely(ctx->off_timeout_used ||
511 		     ctx->has_evfd || ctx->poll_activated))
512 		__io_commit_cqring_flush(ctx);
513 }
514 
515 static inline void io_get_task_refs(int nr)
516 {
517 	struct io_uring_task *tctx = current->io_uring;
518 
519 	tctx->cached_refs -= nr;
520 	if (unlikely(tctx->cached_refs < 0))
521 		io_task_refs_refill(tctx);
522 }
523 
524 static inline bool io_req_cache_empty(struct io_ring_ctx *ctx)
525 {
526 	return !ctx->submit_state.free_list.next;
527 }
528 
529 extern struct kmem_cache *req_cachep;
530 
531 static inline struct io_kiocb *io_extract_req(struct io_ring_ctx *ctx)
532 {
533 	struct io_kiocb *req;
534 
535 	req = container_of(ctx->submit_state.free_list.next, struct io_kiocb, comp_list);
536 	wq_stack_extract(&ctx->submit_state.free_list);
537 	return req;
538 }
539 
540 static inline bool io_alloc_req(struct io_ring_ctx *ctx, struct io_kiocb **req)
541 {
542 	if (unlikely(io_req_cache_empty(ctx))) {
543 		if (!__io_alloc_req_refill(ctx))
544 			return false;
545 	}
546 	*req = io_extract_req(ctx);
547 	return true;
548 }
549 
550 static inline bool io_allowed_defer_tw_run(struct io_ring_ctx *ctx)
551 {
552 	return likely(ctx->submitter_task == current);
553 }
554 
555 static inline bool io_allowed_run_tw(struct io_ring_ctx *ctx)
556 {
557 	return likely(!(ctx->flags & IORING_SETUP_DEFER_TASKRUN) ||
558 		      ctx->submitter_task == current);
559 }
560 
561 /*
562  * Terminate the request if either of these conditions are true:
563  *
564  * 1) It's being executed by the original task, but that task is marked
565  *    with PF_EXITING as it's exiting.
566  * 2) PF_KTHREAD is set, in which case the invoker of the task_work is
567  *    our fallback task_work.
568  */
569 static inline bool io_should_terminate_tw(struct io_ring_ctx *ctx)
570 {
571 	return (current->flags & (PF_KTHREAD | PF_EXITING)) || percpu_ref_is_dying(&ctx->refs);
572 }
573 
574 static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)
575 {
576 	io_req_set_res(req, res, 0);
577 	req->io_task_work.func = io_req_task_complete;
578 	io_req_task_work_add(req);
579 }
580 
581 /*
582  * IORING_SETUP_SQE128 contexts allocate twice the normal SQE size for each
583  * slot.
584  */
585 static inline size_t uring_sqe_size(struct io_ring_ctx *ctx)
586 {
587 	if (ctx->flags & IORING_SETUP_SQE128)
588 		return 2 * sizeof(struct io_uring_sqe);
589 	return sizeof(struct io_uring_sqe);
590 }
591 
592 static inline bool io_file_can_poll(struct io_kiocb *req)
593 {
594 	if (req->flags & REQ_F_CAN_POLL)
595 		return true;
596 	if (req->file && file_can_poll(req->file)) {
597 		req->flags |= REQ_F_CAN_POLL;
598 		return true;
599 	}
600 	return false;
601 }
602 
603 static inline ktime_t io_get_time(struct io_ring_ctx *ctx)
604 {
605 	if (ctx->clockid == CLOCK_MONOTONIC)
606 		return ktime_get();
607 
608 	return ktime_get_with_offset(ctx->clock_offset);
609 }
610 
611 enum {
612 	IO_CHECK_CQ_OVERFLOW_BIT,
613 	IO_CHECK_CQ_DROPPED_BIT,
614 };
615 
616 static inline bool io_has_work(struct io_ring_ctx *ctx)
617 {
618 	return test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq) ||
619 	       io_local_work_pending(ctx);
620 }
621 #endif
622