1fd0928dfSJens Axboe #ifndef IOCONTEXT_H 2fd0928dfSJens Axboe #define IOCONTEXT_H 3fd0928dfSJens Axboe 44ac845a2SJens Axboe #include <linux/radix-tree.h> 534e6bbf2SFabio Checconi #include <linux/rcupdate.h> 6b2efa052STejun Heo #include <linux/workqueue.h> 74ac845a2SJens Axboe 8dc86900eSTejun Heo enum { 9*d705ae6bSTejun Heo ICQ_IOPRIO_CHANGED = 1 << 0, 10*d705ae6bSTejun Heo ICQ_CGROUP_CHANGED = 1 << 1, 11*d705ae6bSTejun Heo 12*d705ae6bSTejun Heo ICQ_CHANGED_MASK = ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED, 13dc86900eSTejun Heo }; 14dc86900eSTejun Heo 15f1f8cc94STejun Heo /* 16f1f8cc94STejun Heo * An io_cq (icq) is association between an io_context (ioc) and a 17f1f8cc94STejun Heo * request_queue (q). This is used by elevators which need to track 18f1f8cc94STejun Heo * information per ioc - q pair. 19f1f8cc94STejun Heo * 20f1f8cc94STejun Heo * Elevator can request use of icq by setting elevator_type->icq_size and 21f1f8cc94STejun Heo * ->icq_align. Both size and align must be larger than that of struct 22f1f8cc94STejun Heo * io_cq and elevator can use the tail area for private information. The 23f1f8cc94STejun Heo * recommended way to do this is defining a struct which contains io_cq as 24f1f8cc94STejun Heo * the first member followed by private members and using its size and 25f1f8cc94STejun Heo * align. For example, 26f1f8cc94STejun Heo * 27f1f8cc94STejun Heo * struct snail_io_cq { 28f1f8cc94STejun Heo * struct io_cq icq; 29f1f8cc94STejun Heo * int poke_snail; 30f1f8cc94STejun Heo * int feed_snail; 31f1f8cc94STejun Heo * }; 32f1f8cc94STejun Heo * 33f1f8cc94STejun Heo * struct elevator_type snail_elv_type { 34f1f8cc94STejun Heo * .ops = { ... }, 35f1f8cc94STejun Heo * .icq_size = sizeof(struct snail_io_cq), 36f1f8cc94STejun Heo * .icq_align = __alignof__(struct snail_io_cq), 37f1f8cc94STejun Heo * ... 38f1f8cc94STejun Heo * }; 39f1f8cc94STejun Heo * 40f1f8cc94STejun Heo * If icq_size is set, block core will manage icq's. All requests will 41f1f8cc94STejun Heo * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn() 42f1f8cc94STejun Heo * is called and be holding a reference to the associated io_context. 43f1f8cc94STejun Heo * 44f1f8cc94STejun Heo * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is 45f1f8cc94STejun Heo * called and, on destruction, ->elevator_exit_icq_fn(). Both functions 46f1f8cc94STejun Heo * are called with both the associated io_context and queue locks held. 47f1f8cc94STejun Heo * 48f1f8cc94STejun Heo * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding 49f1f8cc94STejun Heo * queue lock but the returned icq is valid only until the queue lock is 50f1f8cc94STejun Heo * released. Elevators can not and should not try to create or destroy 51f1f8cc94STejun Heo * icq's. 52f1f8cc94STejun Heo * 53f1f8cc94STejun Heo * As icq's are linked from both ioc and q, the locking rules are a bit 54f1f8cc94STejun Heo * complex. 55f1f8cc94STejun Heo * 56f1f8cc94STejun Heo * - ioc lock nests inside q lock. 57f1f8cc94STejun Heo * 58f1f8cc94STejun Heo * - ioc->icq_list and icq->ioc_node are protected by ioc lock. 59f1f8cc94STejun Heo * q->icq_list and icq->q_node by q lock. 60f1f8cc94STejun Heo * 61f1f8cc94STejun Heo * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq 62f1f8cc94STejun Heo * itself is protected by q lock. However, both the indexes and icq 63f1f8cc94STejun Heo * itself are also RCU managed and lookup can be performed holding only 64f1f8cc94STejun Heo * the q lock. 65f1f8cc94STejun Heo * 66f1f8cc94STejun Heo * - icq's are not reference counted. They are destroyed when either the 67f1f8cc94STejun Heo * ioc or q goes away. Each request with icq set holds an extra 68f1f8cc94STejun Heo * reference to ioc to ensure it stays until the request is completed. 69f1f8cc94STejun Heo * 70f1f8cc94STejun Heo * - Linking and unlinking icq's are performed while holding both ioc and q 71f1f8cc94STejun Heo * locks. Due to the lock ordering, q exit is simple but ioc exit 72f1f8cc94STejun Heo * requires reverse-order double lock dance. 73f1f8cc94STejun Heo */ 74c5869807STejun Heo struct io_cq { 75283287a5STejun Heo struct request_queue *q; 76fd0928dfSJens Axboe struct io_context *ioc; 77fd0928dfSJens Axboe 787e5a8794STejun Heo /* 797e5a8794STejun Heo * q_node and ioc_node link io_cq through icq_list of q and ioc 807e5a8794STejun Heo * respectively. Both fields are unused once ioc_exit_icq() is 817e5a8794STejun Heo * called and shared with __rcu_icq_cache and __rcu_head which are 827e5a8794STejun Heo * used for RCU free of io_cq. 837e5a8794STejun Heo */ 847e5a8794STejun Heo union { 85c5869807STejun Heo struct list_head q_node; 867e5a8794STejun Heo struct kmem_cache *__rcu_icq_cache; 877e5a8794STejun Heo }; 887e5a8794STejun Heo union { 89c5869807STejun Heo struct hlist_node ioc_node; 907e5a8794STejun Heo struct rcu_head __rcu_head; 917e5a8794STejun Heo }; 92fd0928dfSJens Axboe 93*d705ae6bSTejun Heo unsigned int flags; 94fd0928dfSJens Axboe }; 95fd0928dfSJens Axboe 96fd0928dfSJens Axboe /* 97d38ecf93SJens Axboe * I/O subsystem state of the associated processes. It is refcounted 98d38ecf93SJens Axboe * and kmalloc'ed. These could be shared between processes. 99fd0928dfSJens Axboe */ 100fd0928dfSJens Axboe struct io_context { 101d9c7d394SNikanth Karthikesan atomic_long_t refcount; 102d38ecf93SJens Axboe atomic_t nr_tasks; 103d38ecf93SJens Axboe 104d38ecf93SJens Axboe /* all the fields below are protected by this lock */ 105d38ecf93SJens Axboe spinlock_t lock; 106fd0928dfSJens Axboe 107fd0928dfSJens Axboe unsigned short ioprio; 10831e4c28dSVivek Goyal 109fd0928dfSJens Axboe /* 110fd0928dfSJens Axboe * For request batching 111fd0928dfSJens Axboe */ 112fd0928dfSJens Axboe int nr_batch_requests; /* Number of requests left in the batch */ 11358c24a61SRichard Kennedy unsigned long last_waited; /* Time last woken after wait for request */ 114fd0928dfSJens Axboe 115c5869807STejun Heo struct radix_tree_root icq_tree; 116c5869807STejun Heo struct io_cq __rcu *icq_hint; 117c5869807STejun Heo struct hlist_head icq_list; 118b2efa052STejun Heo 119b2efa052STejun Heo struct work_struct release_work; 120fd0928dfSJens Axboe }; 121fd0928dfSJens Axboe 122d38ecf93SJens Axboe static inline struct io_context *ioc_task_link(struct io_context *ioc) 123d38ecf93SJens Axboe { 124d38ecf93SJens Axboe /* 125d38ecf93SJens Axboe * if ref count is zero, don't allow sharing (ioc is going away, it's 126d38ecf93SJens Axboe * a race). 127d38ecf93SJens Axboe */ 128d9c7d394SNikanth Karthikesan if (ioc && atomic_long_inc_not_zero(&ioc->refcount)) { 129cbb4f264SLi Zefan atomic_inc(&ioc->nr_tasks); 130d38ecf93SJens Axboe return ioc; 131d237e5c7SJens Axboe } 132d38ecf93SJens Axboe 133d38ecf93SJens Axboe return NULL; 134d38ecf93SJens Axboe } 135d38ecf93SJens Axboe 136b69f2292SLouis Rilling struct task_struct; 137da9cbc87SJens Axboe #ifdef CONFIG_BLOCK 13811a3122fSTejun Heo void put_io_context(struct io_context *ioc); 139b69f2292SLouis Rilling void exit_io_context(struct task_struct *task); 1406e736be7STejun Heo struct io_context *get_task_io_context(struct task_struct *task, 1416e736be7STejun Heo gfp_t gfp_flags, int node); 142dc86900eSTejun Heo void ioc_ioprio_changed(struct io_context *ioc, int ioprio); 143dc86900eSTejun Heo void ioc_cgroup_changed(struct io_context *ioc); 144*d705ae6bSTejun Heo unsigned int icq_get_changed(struct io_cq *icq); 145da9cbc87SJens Axboe #else 146da9cbc87SJens Axboe struct io_context; 14711a3122fSTejun Heo static inline void put_io_context(struct io_context *ioc) { } 14842ec57a8STejun Heo static inline void exit_io_context(struct task_struct *task) { } 149da9cbc87SJens Axboe #endif 150da9cbc87SJens Axboe 151fd0928dfSJens Axboe #endif 152