1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */
2fd0928dfSJens Axboe #ifndef IOCONTEXT_H
3fd0928dfSJens Axboe #define IOCONTEXT_H
4fd0928dfSJens Axboe
54ac845a2SJens Axboe #include <linux/radix-tree.h>
634e6bbf2SFabio Checconi #include <linux/rcupdate.h>
7b2efa052STejun Heo #include <linux/workqueue.h>
84ac845a2SJens Axboe
9dc86900eSTejun Heo enum {
10621032adSTejun Heo ICQ_EXITED = 1 << 2,
1130a2da7bSSahitya Tummala ICQ_DESTROYED = 1 << 3,
12dc86900eSTejun Heo };
13dc86900eSTejun Heo
14f1f8cc94STejun Heo /*
15f1f8cc94STejun Heo * An io_cq (icq) is association between an io_context (ioc) and a
16f1f8cc94STejun Heo * request_queue (q). This is used by elevators which need to track
17f1f8cc94STejun Heo * information per ioc - q pair.
18f1f8cc94STejun Heo *
19f1f8cc94STejun Heo * Elevator can request use of icq by setting elevator_type->icq_size and
20f1f8cc94STejun Heo * ->icq_align. Both size and align must be larger than that of struct
21f1f8cc94STejun Heo * io_cq and elevator can use the tail area for private information. The
22f1f8cc94STejun Heo * recommended way to do this is defining a struct which contains io_cq as
23f1f8cc94STejun Heo * the first member followed by private members and using its size and
24f1f8cc94STejun Heo * align. For example,
25f1f8cc94STejun Heo *
26f1f8cc94STejun Heo * struct snail_io_cq {
27f1f8cc94STejun Heo * struct io_cq icq;
28f1f8cc94STejun Heo * int poke_snail;
29f1f8cc94STejun Heo * int feed_snail;
30f1f8cc94STejun Heo * };
31f1f8cc94STejun Heo *
32f1f8cc94STejun Heo * struct elevator_type snail_elv_type {
33f1f8cc94STejun Heo * .ops = { ... },
34f1f8cc94STejun Heo * .icq_size = sizeof(struct snail_io_cq),
35f1f8cc94STejun Heo * .icq_align = __alignof__(struct snail_io_cq),
36f1f8cc94STejun Heo * ...
37f1f8cc94STejun Heo * };
38f1f8cc94STejun Heo *
39f1f8cc94STejun Heo * If icq_size is set, block core will manage icq's. All requests will
40f1f8cc94STejun Heo * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn()
41f1f8cc94STejun Heo * is called and be holding a reference to the associated io_context.
42f1f8cc94STejun Heo *
43f1f8cc94STejun Heo * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is
44f1f8cc94STejun Heo * called and, on destruction, ->elevator_exit_icq_fn(). Both functions
45f1f8cc94STejun Heo * are called with both the associated io_context and queue locks held.
46f1f8cc94STejun Heo *
47f1f8cc94STejun Heo * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding
48f1f8cc94STejun Heo * queue lock but the returned icq is valid only until the queue lock is
49f1f8cc94STejun Heo * released. Elevators can not and should not try to create or destroy
50f1f8cc94STejun Heo * icq's.
51f1f8cc94STejun Heo *
52f1f8cc94STejun Heo * As icq's are linked from both ioc and q, the locking rules are a bit
53f1f8cc94STejun Heo * complex.
54f1f8cc94STejun Heo *
55f1f8cc94STejun Heo * - ioc lock nests inside q lock.
56f1f8cc94STejun Heo *
57f1f8cc94STejun Heo * - ioc->icq_list and icq->ioc_node are protected by ioc lock.
58f1f8cc94STejun Heo * q->icq_list and icq->q_node by q lock.
59f1f8cc94STejun Heo *
60f1f8cc94STejun Heo * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq
61f1f8cc94STejun Heo * itself is protected by q lock. However, both the indexes and icq
62f1f8cc94STejun Heo * itself are also RCU managed and lookup can be performed holding only
63f1f8cc94STejun Heo * the q lock.
64f1f8cc94STejun Heo *
65f1f8cc94STejun Heo * - icq's are not reference counted. They are destroyed when either the
66f1f8cc94STejun Heo * ioc or q goes away. Each request with icq set holds an extra
67f1f8cc94STejun Heo * reference to ioc to ensure it stays until the request is completed.
68f1f8cc94STejun Heo *
69f1f8cc94STejun Heo * - Linking and unlinking icq's are performed while holding both ioc and q
70f1f8cc94STejun Heo * locks. Due to the lock ordering, q exit is simple but ioc exit
71f1f8cc94STejun Heo * requires reverse-order double lock dance.
72f1f8cc94STejun Heo */
73c5869807STejun Heo struct io_cq {
74283287a5STejun Heo struct request_queue *q;
75fd0928dfSJens Axboe struct io_context *ioc;
76fd0928dfSJens Axboe
777e5a8794STejun Heo /*
787e5a8794STejun Heo * q_node and ioc_node link io_cq through icq_list of q and ioc
797e5a8794STejun Heo * respectively. Both fields are unused once ioc_exit_icq() is
807e5a8794STejun Heo * called and shared with __rcu_icq_cache and __rcu_head which are
817e5a8794STejun Heo * used for RCU free of io_cq.
827e5a8794STejun Heo */
837e5a8794STejun Heo union {
84c5869807STejun Heo struct list_head q_node;
857e5a8794STejun Heo struct kmem_cache *__rcu_icq_cache;
867e5a8794STejun Heo };
877e5a8794STejun Heo union {
88c5869807STejun Heo struct hlist_node ioc_node;
897e5a8794STejun Heo struct rcu_head __rcu_head;
907e5a8794STejun Heo };
91fd0928dfSJens Axboe
92d705ae6bSTejun Heo unsigned int flags;
93fd0928dfSJens Axboe };
94fd0928dfSJens Axboe
95fd0928dfSJens Axboe /*
96d38ecf93SJens Axboe * I/O subsystem state of the associated processes. It is refcounted
97d38ecf93SJens Axboe * and kmalloc'ed. These could be shared between processes.
98fd0928dfSJens Axboe */
99fd0928dfSJens Axboe struct io_context {
100d9c7d394SNikanth Karthikesan atomic_long_t refcount;
101f6e8d01bSTejun Heo atomic_t active_ref;
102d38ecf93SJens Axboe
103*5ef16305SChristoph Hellwig unsigned short ioprio;
104*5ef16305SChristoph Hellwig
105*5ef16305SChristoph Hellwig #ifdef CONFIG_BLK_ICQ
106d38ecf93SJens Axboe /* all the fields below are protected by this lock */
107d38ecf93SJens Axboe spinlock_t lock;
108fd0928dfSJens Axboe
109c5869807STejun Heo struct radix_tree_root icq_tree;
110c5869807STejun Heo struct io_cq __rcu *icq_hint;
111c5869807STejun Heo struct hlist_head icq_list;
112b2efa052STejun Heo
113b2efa052STejun Heo struct work_struct release_work;
114*5ef16305SChristoph Hellwig #endif /* CONFIG_BLK_ICQ */
115fd0928dfSJens Axboe };
116fd0928dfSJens Axboe
117b69f2292SLouis Rilling struct task_struct;
118da9cbc87SJens Axboe #ifdef CONFIG_BLOCK
11911a3122fSTejun Heo void put_io_context(struct io_context *ioc);
120b69f2292SLouis Rilling void exit_io_context(struct task_struct *task);
12188c9a2ceSChristoph Hellwig int __copy_io(unsigned long clone_flags, struct task_struct *tsk);
copy_io(unsigned long clone_flags,struct task_struct * tsk)12288c9a2ceSChristoph Hellwig static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk)
12388c9a2ceSChristoph Hellwig {
12488c9a2ceSChristoph Hellwig if (!current->io_context)
12588c9a2ceSChristoph Hellwig return 0;
12688c9a2ceSChristoph Hellwig return __copy_io(clone_flags, tsk);
12788c9a2ceSChristoph Hellwig }
128da9cbc87SJens Axboe #else
129da9cbc87SJens Axboe struct io_context;
put_io_context(struct io_context * ioc)13011a3122fSTejun Heo static inline void put_io_context(struct io_context *ioc) { }
exit_io_context(struct task_struct * task)13142ec57a8STejun Heo static inline void exit_io_context(struct task_struct *task) { }
copy_io(unsigned long clone_flags,struct task_struct * tsk)13288c9a2ceSChristoph Hellwig static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk)
13388c9a2ceSChristoph Hellwig {
13488c9a2ceSChristoph Hellwig return 0;
13588c9a2ceSChristoph Hellwig }
13688c9a2ceSChristoph Hellwig #endif /* CONFIG_BLOCK */
137da9cbc87SJens Axboe
13888c9a2ceSChristoph Hellwig #endif /* IOCONTEXT_H */
139