xref: /linux/include/trace/events/io_uring.h (revision 6331b8765cd0634a4e4cdcc1a6f1a74196616b94)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #undef TRACE_SYSTEM
3 #define TRACE_SYSTEM io_uring
4 
5 #if !defined(_TRACE_IO_URING_H) || defined(TRACE_HEADER_MULTI_READ)
6 #define _TRACE_IO_URING_H
7 
8 #include <linux/tracepoint.h>
9 #include <uapi/linux/io_uring.h>
10 
11 struct io_wq_work;
12 
13 /**
14  * io_uring_create - called after a new io_uring context was prepared
15  *
16  * @fd:		corresponding file descriptor
17  * @ctx:	pointer to a ring context structure
18  * @sq_entries:	actual SQ size
19  * @cq_entries:	actual CQ size
20  * @flags:	SQ ring flags, provided to io_uring_setup(2)
21  *
22  * Allows to trace io_uring creation and provide pointer to a context, that can
23  * be used later to find correlated events.
24  */
25 TRACE_EVENT(io_uring_create,
26 
27 	TP_PROTO(int fd, void *ctx, u32 sq_entries, u32 cq_entries, u32 flags),
28 
29 	TP_ARGS(fd, ctx, sq_entries, cq_entries, flags),
30 
31 	TP_STRUCT__entry (
32 		__field(  int,		fd			)
33 		__field(  void *,	ctx			)
34 		__field(  u32,		sq_entries	)
35 		__field(  u32,		cq_entries	)
36 		__field(  u32,		flags		)
37 	),
38 
39 	TP_fast_assign(
40 		__entry->fd			= fd;
41 		__entry->ctx		= ctx;
42 		__entry->sq_entries	= sq_entries;
43 		__entry->cq_entries	= cq_entries;
44 		__entry->flags		= flags;
45 	),
46 
47 	TP_printk("ring %p, fd %d sq size %d, cq size %d, flags %d",
48 			  __entry->ctx, __entry->fd, __entry->sq_entries,
49 			  __entry->cq_entries, __entry->flags)
50 );
51 
52 /**
53  * io_uring_register - called after a buffer/file/eventfd was successfully
54  * 					   registered for a ring
55  *
56  * @ctx:		pointer to a ring context structure
57  * @opcode:		describes which operation to perform
58  * @nr_user_files:	number of registered files
59  * @nr_user_bufs:	number of registered buffers
60  * @cq_ev_fd:		whether eventfs registered or not
61  * @ret:		return code
62  *
63  * Allows to trace fixed files/buffers/eventfds, that could be registered to
64  * avoid an overhead of getting references to them for every operation. This
65  * event, together with io_uring_file_get, can provide a full picture of how
66  * much overhead one can reduce via fixing.
67  */
68 TRACE_EVENT(io_uring_register,
69 
70 	TP_PROTO(void *ctx, unsigned opcode, unsigned nr_files,
71 			 unsigned nr_bufs, bool eventfd, long ret),
72 
73 	TP_ARGS(ctx, opcode, nr_files, nr_bufs, eventfd, ret),
74 
75 	TP_STRUCT__entry (
76 		__field(  void *,	ctx			)
77 		__field(  unsigned,	opcode		)
78 		__field(  unsigned,	nr_files	)
79 		__field(  unsigned,	nr_bufs		)
80 		__field(  bool,		eventfd		)
81 		__field(  long,		ret			)
82 	),
83 
84 	TP_fast_assign(
85 		__entry->ctx		= ctx;
86 		__entry->opcode		= opcode;
87 		__entry->nr_files	= nr_files;
88 		__entry->nr_bufs	= nr_bufs;
89 		__entry->eventfd	= eventfd;
90 		__entry->ret		= ret;
91 	),
92 
93 	TP_printk("ring %p, opcode %d, nr_user_files %d, nr_user_bufs %d, "
94 			  "eventfd %d, ret %ld",
95 			  __entry->ctx, __entry->opcode, __entry->nr_files,
96 			  __entry->nr_bufs, __entry->eventfd, __entry->ret)
97 );
98 
99 /**
100  * io_uring_file_get - called before getting references to an SQE file
101  *
102  * @ctx:	pointer to a ring context structure
103  * @fd:		SQE file descriptor
104  *
105  * Allows to trace out how often an SQE file reference is obtained, which can
106  * help figuring out if it makes sense to use fixed files, or check that fixed
107  * files are used correctly.
108  */
109 TRACE_EVENT(io_uring_file_get,
110 
111 	TP_PROTO(void *ctx, int fd),
112 
113 	TP_ARGS(ctx, fd),
114 
115 	TP_STRUCT__entry (
116 		__field(  void *,	ctx	)
117 		__field(  int,		fd	)
118 	),
119 
120 	TP_fast_assign(
121 		__entry->ctx	= ctx;
122 		__entry->fd		= fd;
123 	),
124 
125 	TP_printk("ring %p, fd %d", __entry->ctx, __entry->fd)
126 );
127 
128 /**
129  * io_uring_queue_async_work - called before submitting a new async work
130  *
131  * @ctx:	pointer to a ring context structure
132  * @hashed:	type of workqueue, hashed or normal
133  * @req:	pointer to a submitted request
134  * @work:	pointer to a submitted io_wq_work
135  *
136  * Allows to trace asynchronous work submission.
137  */
138 TRACE_EVENT(io_uring_queue_async_work,
139 
140 	TP_PROTO(void *ctx, int rw, void * req, struct io_wq_work *work,
141 			 unsigned int flags),
142 
143 	TP_ARGS(ctx, rw, req, work, flags),
144 
145 	TP_STRUCT__entry (
146 		__field(  void *,			ctx	)
147 		__field(  int,				rw	)
148 		__field(  void *,			req	)
149 		__field(  struct io_wq_work *,		work	)
150 		__field(  unsigned int,			flags	)
151 	),
152 
153 	TP_fast_assign(
154 		__entry->ctx	= ctx;
155 		__entry->rw	= rw;
156 		__entry->req	= req;
157 		__entry->work	= work;
158 		__entry->flags	= flags;
159 	),
160 
161 	TP_printk("ring %p, request %p, flags %d, %s queue, work %p",
162 			  __entry->ctx, __entry->req, __entry->flags,
163 			  __entry->rw ? "hashed" : "normal", __entry->work)
164 );
165 
166 /**
167  * io_uring_defer - called when an io_uring request is deferred
168  *
169  * @ctx:	pointer to a ring context structure
170  * @req:	pointer to a deferred request
171  * @user_data:	user data associated with the request
172  *
173  * Allows to track deferred requests, to get an insight about what requests are
174  * not started immediately.
175  */
176 TRACE_EVENT(io_uring_defer,
177 
178 	TP_PROTO(void *ctx, void *req, unsigned long long user_data),
179 
180 	TP_ARGS(ctx, req, user_data),
181 
182 	TP_STRUCT__entry (
183 		__field(  void *,	ctx		)
184 		__field(  void *,	req		)
185 		__field(  unsigned long long, data	)
186 	),
187 
188 	TP_fast_assign(
189 		__entry->ctx	= ctx;
190 		__entry->req	= req;
191 		__entry->data	= user_data;
192 	),
193 
194 	TP_printk("ring %p, request %p user_data %llu", __entry->ctx,
195 			__entry->req, __entry->data)
196 );
197 
198 /**
199  * io_uring_link - called before the io_uring request added into link_list of
200  * 		   another request
201  *
202  * @ctx:		pointer to a ring context structure
203  * @req:		pointer to a linked request
204  * @target_req:		pointer to a previous request, that would contain @req
205  *
206  * Allows to track linked requests, to understand dependencies between requests
207  * and how does it influence their execution flow.
208  */
209 TRACE_EVENT(io_uring_link,
210 
211 	TP_PROTO(void *ctx, void *req, void *target_req),
212 
213 	TP_ARGS(ctx, req, target_req),
214 
215 	TP_STRUCT__entry (
216 		__field(  void *,	ctx		)
217 		__field(  void *,	req		)
218 		__field(  void *,	target_req	)
219 	),
220 
221 	TP_fast_assign(
222 		__entry->ctx		= ctx;
223 		__entry->req		= req;
224 		__entry->target_req	= target_req;
225 	),
226 
227 	TP_printk("ring %p, request %p linked after %p",
228 			  __entry->ctx, __entry->req, __entry->target_req)
229 );
230 
231 /**
232  * io_uring_cqring_wait - called before start waiting for an available CQE
233  *
234  * @ctx:		pointer to a ring context structure
235  * @min_events:	minimal number of events to wait for
236  *
237  * Allows to track waiting for CQE, so that we can e.g. troubleshoot
238  * situations, when an application wants to wait for an event, that never
239  * comes.
240  */
241 TRACE_EVENT(io_uring_cqring_wait,
242 
243 	TP_PROTO(void *ctx, int min_events),
244 
245 	TP_ARGS(ctx, min_events),
246 
247 	TP_STRUCT__entry (
248 		__field(  void *,	ctx		)
249 		__field(  int,		min_events	)
250 	),
251 
252 	TP_fast_assign(
253 		__entry->ctx	= ctx;
254 		__entry->min_events	= min_events;
255 	),
256 
257 	TP_printk("ring %p, min_events %d", __entry->ctx, __entry->min_events)
258 );
259 
260 /**
261  * io_uring_fail_link - called before failing a linked request
262  *
263  * @req:	request, which links were cancelled
264  * @link:	cancelled link
265  *
266  * Allows to track linked requests cancellation, to see not only that some work
267  * was cancelled, but also which request was the reason.
268  */
269 TRACE_EVENT(io_uring_fail_link,
270 
271 	TP_PROTO(void *req, void *link),
272 
273 	TP_ARGS(req, link),
274 
275 	TP_STRUCT__entry (
276 		__field(  void *,	req	)
277 		__field(  void *,	link	)
278 	),
279 
280 	TP_fast_assign(
281 		__entry->req	= req;
282 		__entry->link	= link;
283 	),
284 
285 	TP_printk("request %p, link %p", __entry->req, __entry->link)
286 );
287 
288 /**
289  * io_uring_complete - called when completing an SQE
290  *
291  * @ctx:		pointer to a ring context structure
292  * @user_data:		user data associated with the request
293  * @res:		result of the request
294  * @cflags:		completion flags
295  *
296  */
297 TRACE_EVENT(io_uring_complete,
298 
299 	TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags),
300 
301 	TP_ARGS(ctx, user_data, res, cflags),
302 
303 	TP_STRUCT__entry (
304 		__field(  void *,	ctx		)
305 		__field(  u64,		user_data	)
306 		__field(  int,		res		)
307 		__field(  unsigned,	cflags		)
308 	),
309 
310 	TP_fast_assign(
311 		__entry->ctx		= ctx;
312 		__entry->user_data	= user_data;
313 		__entry->res		= res;
314 		__entry->cflags		= cflags;
315 	),
316 
317 	TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x",
318 			  __entry->ctx, (unsigned long long)__entry->user_data,
319 			  __entry->res, __entry->cflags)
320 );
321 
322 /**
323  * io_uring_submit_sqe - called before submitting one SQE
324  *
325  * @ctx:		pointer to a ring context structure
326  * @req:		pointer to a submitted request
327  * @opcode:		opcode of request
328  * @user_data:		user data associated with the request
329  * @flags		request flags
330  * @force_nonblock:	whether a context blocking or not
331  * @sq_thread:		true if sq_thread has submitted this SQE
332  *
333  * Allows to track SQE submitting, to understand what was the source of it, SQ
334  * thread or io_uring_enter call.
335  */
336 TRACE_EVENT(io_uring_submit_sqe,
337 
338 	TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, u32 flags,
339 		 bool force_nonblock, bool sq_thread),
340 
341 	TP_ARGS(ctx, req, opcode, user_data, flags, force_nonblock, sq_thread),
342 
343 	TP_STRUCT__entry (
344 		__field(  void *,	ctx		)
345 		__field(  void *,	req		)
346 		__field(  u8,		opcode		)
347 		__field(  u64,		user_data	)
348 		__field(  u32,		flags		)
349 		__field(  bool,		force_nonblock	)
350 		__field(  bool,		sq_thread	)
351 	),
352 
353 	TP_fast_assign(
354 		__entry->ctx		= ctx;
355 		__entry->req		= req;
356 		__entry->opcode		= opcode;
357 		__entry->user_data	= user_data;
358 		__entry->flags		= flags;
359 		__entry->force_nonblock	= force_nonblock;
360 		__entry->sq_thread	= sq_thread;
361 	),
362 
363 	TP_printk("ring %p, req %p, op %d, data 0x%llx, flags %u, "
364 		  "non block %d, sq_thread %d", __entry->ctx, __entry->req,
365 		  __entry->opcode, (unsigned long long)__entry->user_data,
366 		  __entry->flags, __entry->force_nonblock, __entry->sq_thread)
367 );
368 
369 /*
370  * io_uring_poll_arm - called after arming a poll wait if successful
371  *
372  * @ctx:		pointer to a ring context structure
373  * @req:		pointer to the armed request
374  * @opcode:		opcode of request
375  * @user_data:		user data associated with the request
376  * @mask:		request poll events mask
377  * @events:		registered events of interest
378  *
379  * Allows to track which fds are waiting for and what are the events of
380  * interest.
381  */
382 TRACE_EVENT(io_uring_poll_arm,
383 
384 	TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data,
385 		 int mask, int events),
386 
387 	TP_ARGS(ctx, req, opcode, user_data, mask, events),
388 
389 	TP_STRUCT__entry (
390 		__field(  void *,	ctx		)
391 		__field(  void *,	req		)
392 		__field(  u8,		opcode		)
393 		__field(  u64,		user_data	)
394 		__field(  int,		mask		)
395 		__field(  int,		events		)
396 	),
397 
398 	TP_fast_assign(
399 		__entry->ctx		= ctx;
400 		__entry->req		= req;
401 		__entry->opcode		= opcode;
402 		__entry->user_data	= user_data;
403 		__entry->mask		= mask;
404 		__entry->events		= events;
405 	),
406 
407 	TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
408 		  __entry->ctx, __entry->req, __entry->opcode,
409 		  (unsigned long long) __entry->user_data,
410 		  __entry->mask, __entry->events)
411 );
412 
413 TRACE_EVENT(io_uring_poll_wake,
414 
415 	TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
416 
417 	TP_ARGS(ctx, opcode, user_data, mask),
418 
419 	TP_STRUCT__entry (
420 		__field(  void *,	ctx		)
421 		__field(  u8,		opcode		)
422 		__field(  u64,		user_data	)
423 		__field(  int,		mask		)
424 	),
425 
426 	TP_fast_assign(
427 		__entry->ctx		= ctx;
428 		__entry->opcode		= opcode;
429 		__entry->user_data	= user_data;
430 		__entry->mask		= mask;
431 	),
432 
433 	TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x",
434 			  __entry->ctx, __entry->opcode,
435 			  (unsigned long long) __entry->user_data,
436 			  __entry->mask)
437 );
438 
439 TRACE_EVENT(io_uring_task_add,
440 
441 	TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
442 
443 	TP_ARGS(ctx, opcode, user_data, mask),
444 
445 	TP_STRUCT__entry (
446 		__field(  void *,	ctx		)
447 		__field(  u8,		opcode		)
448 		__field(  u64,		user_data	)
449 		__field(  int,		mask		)
450 	),
451 
452 	TP_fast_assign(
453 		__entry->ctx		= ctx;
454 		__entry->opcode		= opcode;
455 		__entry->user_data	= user_data;
456 		__entry->mask		= mask;
457 	),
458 
459 	TP_printk("ring %p, op %d, data 0x%llx, mask %x",
460 			  __entry->ctx, __entry->opcode,
461 			  (unsigned long long) __entry->user_data,
462 			  __entry->mask)
463 );
464 
465 /*
466  * io_uring_task_run - called when task_work_run() executes the poll events
467  *                     notification callbacks
468  *
469  * @ctx:		pointer to a ring context structure
470  * @req:		pointer to the armed request
471  * @opcode:		opcode of request
472  * @user_data:		user data associated with the request
473  *
474  * Allows to track when notified poll events are processed
475  */
476 TRACE_EVENT(io_uring_task_run,
477 
478 	TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data),
479 
480 	TP_ARGS(ctx, req, opcode, user_data),
481 
482 	TP_STRUCT__entry (
483 		__field(  void *,	ctx		)
484 		__field(  void *,	req		)
485 		__field(  u8,		opcode		)
486 		__field(  u64,		user_data	)
487 	),
488 
489 	TP_fast_assign(
490 		__entry->ctx		= ctx;
491 		__entry->req		= req;
492 		__entry->opcode		= opcode;
493 		__entry->user_data	= user_data;
494 	),
495 
496 	TP_printk("ring %p, req %p, op %d, data 0x%llx",
497 		  __entry->ctx, __entry->req, __entry->opcode,
498 		  (unsigned long long) __entry->user_data)
499 );
500 
501 /*
502  * io_uring_req_failed - called when an sqe is errored dring submission
503  *
504  * @sqe:		pointer to the io_uring_sqe that failed
505  * @error:		error it failed with
506  *
507  * Allows easier diagnosing of malformed requests in production systems.
508  */
509 TRACE_EVENT(io_uring_req_failed,
510 
511 	TP_PROTO(const struct io_uring_sqe *sqe, int error),
512 
513 	TP_ARGS(sqe, error),
514 
515 	TP_STRUCT__entry (
516 		__field(  u8,	opcode )
517 		__field(  u8,	flags )
518 		__field(  u8,	ioprio )
519 		__field( u64,	off )
520 		__field( u64,	addr )
521 		__field( u32,	len )
522 		__field( u32,	op_flags )
523 		__field( u64,	user_data )
524 		__field( u16,	buf_index )
525 		__field( u16,	personality )
526 		__field( u32,	file_index )
527 		__field( u64,	pad1 )
528 		__field( u64,	pad2 )
529 		__field( int,	error )
530 	),
531 
532 	TP_fast_assign(
533 		__entry->opcode		= sqe->opcode;
534 		__entry->flags		= sqe->flags;
535 		__entry->ioprio		= sqe->ioprio;
536 		__entry->off		= sqe->off;
537 		__entry->addr		= sqe->addr;
538 		__entry->len		= sqe->len;
539 		__entry->op_flags	= sqe->rw_flags;
540 		__entry->user_data	= sqe->user_data;
541 		__entry->buf_index	= sqe->buf_index;
542 		__entry->personality	= sqe->personality;
543 		__entry->file_index	= sqe->file_index;
544 		__entry->pad1		= sqe->__pad2[0];
545 		__entry->pad2		= sqe->__pad2[1];
546 		__entry->error		= error;
547 	),
548 
549 	TP_printk("op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, "
550 		  "len=%u, rw_flags=0x%x, user_data=0x%llx, buf_index=%d, "
551 		  "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
552 		  __entry->opcode, __entry->flags, __entry->ioprio,
553 		  (unsigned long long)__entry->off,
554 		  (unsigned long long) __entry->addr, __entry->len,
555 		  __entry->op_flags, (unsigned long long) __entry->user_data,
556 		  __entry->buf_index, __entry->personality, __entry->file_index,
557 		  (unsigned long long) __entry->pad1,
558 		  (unsigned long long) __entry->pad2, __entry->error)
559 );
560 
561 #endif /* _TRACE_IO_URING_H */
562 
563 /* This part must be outside protection */
564 #include <trace/define_trace.h>
565