1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to io context handling 4 */ 5 #include <linux/kernel.h> 6 #include <linux/module.h> 7 #include <linux/init.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/slab.h> 11 #include <linux/security.h> 12 #include <linux/sched/task.h> 13 14 #include "blk.h" 15 #include "blk-mq-sched.h" 16 17 /* 18 * For io context allocations 19 */ 20 static struct kmem_cache *iocontext_cachep; 21 22 #ifdef CONFIG_BLK_ICQ 23 /** 24 * get_io_context - increment reference count to io_context 25 * @ioc: io_context to get 26 * 27 * Increment reference count to @ioc. 28 */ 29 static void get_io_context(struct io_context *ioc) 30 { 31 BUG_ON(atomic_long_read(&ioc->refcount) <= 0); 32 atomic_long_inc(&ioc->refcount); 33 } 34 35 /* 36 * Exit an icq. Called with ioc locked for blk-mq, and with both ioc 37 * and queue locked for legacy. 38 */ 39 static void ioc_exit_icq(struct io_cq *icq) 40 { 41 struct elevator_type *et = icq->q->elevator->type; 42 43 if (icq->flags & ICQ_EXITED) 44 return; 45 46 if (et->ops.exit_icq) 47 et->ops.exit_icq(icq); 48 49 icq->flags |= ICQ_EXITED; 50 } 51 52 static void ioc_exit_icqs(struct io_context *ioc) 53 { 54 struct io_cq *icq; 55 56 spin_lock_irq(&ioc->lock); 57 hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) 58 ioc_exit_icq(icq); 59 spin_unlock_irq(&ioc->lock); 60 } 61 62 /* 63 * Release an icq. Called with ioc locked for blk-mq, and with both ioc 64 * and queue locked for legacy. 65 */ 66 static void ioc_destroy_icq(struct io_cq *icq) 67 { 68 struct io_context *ioc = icq->ioc; 69 struct request_queue *q = icq->q; 70 struct elevator_type *et = q->elevator->type; 71 72 lockdep_assert_held(&ioc->lock); 73 lockdep_assert_held(&q->queue_lock); 74 75 if (icq->flags & ICQ_DESTROYED) 76 return; 77 78 radix_tree_delete(&ioc->icq_tree, icq->q->id); 79 hlist_del_init(&icq->ioc_node); 80 list_del_init(&icq->q_node); 81 82 /* 83 * Both setting lookup hint to and clearing it from @icq are done 84 * under queue_lock. If it's not pointing to @icq now, it never 85 * will. Hint assignment itself can race safely. 86 */ 87 if (rcu_access_pointer(ioc->icq_hint) == icq) 88 rcu_assign_pointer(ioc->icq_hint, NULL); 89 90 ioc_exit_icq(icq); 91 92 /* 93 * @icq->q might have gone away by the time RCU callback runs 94 * making it impossible to determine icq_cache. Record it in @icq. 95 */ 96 icq->__rcu_icq_cache = et->icq_cache; 97 icq->flags |= ICQ_DESTROYED; 98 kfree_rcu(icq, __rcu_head); 99 } 100 101 /* 102 * Slow path for ioc release in put_io_context(). Performs double-lock 103 * dancing to unlink all icq's and then frees ioc. 104 */ 105 static void ioc_release_fn(struct work_struct *work) 106 { 107 struct io_context *ioc = container_of(work, struct io_context, 108 release_work); 109 spin_lock_irq(&ioc->lock); 110 111 while (!hlist_empty(&ioc->icq_list)) { 112 struct io_cq *icq = hlist_entry(ioc->icq_list.first, 113 struct io_cq, ioc_node); 114 struct request_queue *q = icq->q; 115 116 if (spin_trylock(&q->queue_lock)) { 117 ioc_destroy_icq(icq); 118 spin_unlock(&q->queue_lock); 119 } else { 120 /* Make sure q and icq cannot be freed. */ 121 rcu_read_lock(); 122 123 /* Re-acquire the locks in the correct order. */ 124 spin_unlock(&ioc->lock); 125 spin_lock(&q->queue_lock); 126 spin_lock(&ioc->lock); 127 128 ioc_destroy_icq(icq); 129 130 spin_unlock(&q->queue_lock); 131 rcu_read_unlock(); 132 } 133 } 134 135 spin_unlock_irq(&ioc->lock); 136 137 kmem_cache_free(iocontext_cachep, ioc); 138 } 139 140 /* 141 * Releasing icqs requires reverse order double locking and we may already be 142 * holding a queue_lock. Do it asynchronously from a workqueue. 143 */ 144 static bool ioc_delay_free(struct io_context *ioc) 145 { 146 unsigned long flags; 147 148 spin_lock_irqsave(&ioc->lock, flags); 149 if (!hlist_empty(&ioc->icq_list)) { 150 queue_work(system_power_efficient_wq, &ioc->release_work); 151 spin_unlock_irqrestore(&ioc->lock, flags); 152 return true; 153 } 154 spin_unlock_irqrestore(&ioc->lock, flags); 155 return false; 156 } 157 158 /** 159 * ioc_clear_queue - break any ioc association with the specified queue 160 * @q: request_queue being cleared 161 * 162 * Walk @q->icq_list and exit all io_cq's. 163 */ 164 void ioc_clear_queue(struct request_queue *q) 165 { 166 spin_lock_irq(&q->queue_lock); 167 while (!list_empty(&q->icq_list)) { 168 struct io_cq *icq = 169 list_first_entry(&q->icq_list, struct io_cq, q_node); 170 171 /* 172 * Other context won't hold ioc lock to wait for queue_lock, see 173 * details in ioc_release_fn(). 174 */ 175 spin_lock(&icq->ioc->lock); 176 ioc_destroy_icq(icq); 177 spin_unlock(&icq->ioc->lock); 178 } 179 spin_unlock_irq(&q->queue_lock); 180 } 181 #else /* CONFIG_BLK_ICQ */ 182 static inline void ioc_exit_icqs(struct io_context *ioc) 183 { 184 } 185 static inline bool ioc_delay_free(struct io_context *ioc) 186 { 187 return false; 188 } 189 #endif /* CONFIG_BLK_ICQ */ 190 191 /** 192 * put_io_context - put a reference of io_context 193 * @ioc: io_context to put 194 * 195 * Decrement reference count of @ioc and release it if the count reaches 196 * zero. 197 */ 198 void put_io_context(struct io_context *ioc) 199 { 200 BUG_ON(atomic_long_read(&ioc->refcount) <= 0); 201 if (atomic_long_dec_and_test(&ioc->refcount) && !ioc_delay_free(ioc)) 202 kmem_cache_free(iocontext_cachep, ioc); 203 } 204 EXPORT_SYMBOL_GPL(put_io_context); 205 206 /* Called by the exiting task */ 207 void exit_io_context(struct task_struct *task) 208 { 209 struct io_context *ioc; 210 211 task_lock(task); 212 ioc = task->io_context; 213 task->io_context = NULL; 214 task_unlock(task); 215 216 if (atomic_dec_and_test(&ioc->active_ref)) { 217 ioc_exit_icqs(ioc); 218 put_io_context(ioc); 219 } 220 } 221 222 static struct io_context *alloc_io_context(gfp_t gfp_flags, int node) 223 { 224 struct io_context *ioc; 225 226 ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO, 227 node); 228 if (unlikely(!ioc)) 229 return NULL; 230 231 atomic_long_set(&ioc->refcount, 1); 232 atomic_set(&ioc->active_ref, 1); 233 #ifdef CONFIG_BLK_ICQ 234 spin_lock_init(&ioc->lock); 235 INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC); 236 INIT_HLIST_HEAD(&ioc->icq_list); 237 INIT_WORK(&ioc->release_work, ioc_release_fn); 238 #endif 239 ioc->ioprio = IOPRIO_DEFAULT; 240 241 return ioc; 242 } 243 244 int set_task_ioprio(struct task_struct *task, int ioprio) 245 { 246 int err; 247 const struct cred *cred = current_cred(), *tcred; 248 249 rcu_read_lock(); 250 tcred = __task_cred(task); 251 if (!uid_eq(tcred->uid, cred->euid) && 252 !uid_eq(tcred->uid, cred->uid) && !capable(CAP_SYS_NICE)) { 253 rcu_read_unlock(); 254 return -EPERM; 255 } 256 rcu_read_unlock(); 257 258 err = security_task_setioprio(task, ioprio); 259 if (err) 260 return err; 261 262 task_lock(task); 263 if (unlikely(!task->io_context)) { 264 struct io_context *ioc; 265 266 task_unlock(task); 267 268 ioc = alloc_io_context(GFP_ATOMIC, NUMA_NO_NODE); 269 if (!ioc) 270 return -ENOMEM; 271 272 task_lock(task); 273 if (task->flags & PF_EXITING) { 274 kmem_cache_free(iocontext_cachep, ioc); 275 goto out; 276 } 277 if (task->io_context) 278 kmem_cache_free(iocontext_cachep, ioc); 279 else 280 task->io_context = ioc; 281 } 282 task->io_context->ioprio = ioprio; 283 out: 284 task_unlock(task); 285 return 0; 286 } 287 EXPORT_SYMBOL_GPL(set_task_ioprio); 288 289 int __copy_io(unsigned long clone_flags, struct task_struct *tsk) 290 { 291 struct io_context *ioc = current->io_context; 292 293 /* 294 * Share io context with parent, if CLONE_IO is set 295 */ 296 if (clone_flags & CLONE_IO) { 297 atomic_inc(&ioc->active_ref); 298 tsk->io_context = ioc; 299 } else if (ioprio_valid(ioc->ioprio)) { 300 tsk->io_context = alloc_io_context(GFP_KERNEL, NUMA_NO_NODE); 301 if (!tsk->io_context) 302 return -ENOMEM; 303 tsk->io_context->ioprio = ioc->ioprio; 304 } 305 306 return 0; 307 } 308 309 #ifdef CONFIG_BLK_ICQ 310 /** 311 * ioc_lookup_icq - lookup io_cq from ioc 312 * @q: the associated request_queue 313 * 314 * Look up io_cq associated with @ioc - @q pair from @ioc. Must be called 315 * with @q->queue_lock held. 316 */ 317 struct io_cq *ioc_lookup_icq(struct request_queue *q) 318 { 319 struct io_context *ioc = current->io_context; 320 struct io_cq *icq; 321 322 lockdep_assert_held(&q->queue_lock); 323 324 /* 325 * icq's are indexed from @ioc using radix tree and hint pointer, 326 * both of which are protected with RCU. All removals are done 327 * holding both q and ioc locks, and we're holding q lock - if we 328 * find a icq which points to us, it's guaranteed to be valid. 329 */ 330 rcu_read_lock(); 331 icq = rcu_dereference(ioc->icq_hint); 332 if (icq && icq->q == q) 333 goto out; 334 335 icq = radix_tree_lookup(&ioc->icq_tree, q->id); 336 if (icq && icq->q == q) 337 rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */ 338 else 339 icq = NULL; 340 out: 341 rcu_read_unlock(); 342 return icq; 343 } 344 EXPORT_SYMBOL(ioc_lookup_icq); 345 346 /** 347 * ioc_create_icq - create and link io_cq 348 * @q: request_queue of interest 349 * 350 * Make sure io_cq linking @ioc and @q exists. If icq doesn't exist, they 351 * will be created using @gfp_mask. 352 * 353 * The caller is responsible for ensuring @ioc won't go away and @q is 354 * alive and will stay alive until this function returns. 355 */ 356 static struct io_cq *ioc_create_icq(struct request_queue *q) 357 { 358 struct io_context *ioc = current->io_context; 359 struct elevator_type *et = q->elevator->type; 360 struct io_cq *icq; 361 362 /* allocate stuff */ 363 icq = kmem_cache_alloc_node(et->icq_cache, GFP_ATOMIC | __GFP_ZERO, 364 q->node); 365 if (!icq) 366 return NULL; 367 368 if (radix_tree_maybe_preload(GFP_ATOMIC) < 0) { 369 kmem_cache_free(et->icq_cache, icq); 370 return NULL; 371 } 372 373 icq->ioc = ioc; 374 icq->q = q; 375 INIT_LIST_HEAD(&icq->q_node); 376 INIT_HLIST_NODE(&icq->ioc_node); 377 378 /* lock both q and ioc and try to link @icq */ 379 spin_lock_irq(&q->queue_lock); 380 spin_lock(&ioc->lock); 381 382 if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { 383 hlist_add_head(&icq->ioc_node, &ioc->icq_list); 384 list_add(&icq->q_node, &q->icq_list); 385 if (et->ops.init_icq) 386 et->ops.init_icq(icq); 387 } else { 388 kmem_cache_free(et->icq_cache, icq); 389 icq = ioc_lookup_icq(q); 390 if (!icq) 391 printk(KERN_ERR "cfq: icq link failed!\n"); 392 } 393 394 spin_unlock(&ioc->lock); 395 spin_unlock_irq(&q->queue_lock); 396 radix_tree_preload_end(); 397 return icq; 398 } 399 400 struct io_cq *ioc_find_get_icq(struct request_queue *q) 401 { 402 struct io_context *ioc = current->io_context; 403 struct io_cq *icq = NULL; 404 405 if (unlikely(!ioc)) { 406 ioc = alloc_io_context(GFP_ATOMIC, q->node); 407 if (!ioc) 408 return NULL; 409 410 task_lock(current); 411 if (current->io_context) { 412 kmem_cache_free(iocontext_cachep, ioc); 413 ioc = current->io_context; 414 } else { 415 current->io_context = ioc; 416 } 417 418 get_io_context(ioc); 419 task_unlock(current); 420 } else { 421 get_io_context(ioc); 422 423 spin_lock_irq(&q->queue_lock); 424 icq = ioc_lookup_icq(q); 425 spin_unlock_irq(&q->queue_lock); 426 } 427 428 if (!icq) { 429 icq = ioc_create_icq(q); 430 if (!icq) { 431 put_io_context(ioc); 432 return NULL; 433 } 434 } 435 return icq; 436 } 437 EXPORT_SYMBOL_GPL(ioc_find_get_icq); 438 #endif /* CONFIG_BLK_ICQ */ 439 440 static int __init blk_ioc_init(void) 441 { 442 iocontext_cachep = kmem_cache_create("blkdev_ioc", 443 sizeof(struct io_context), 0, SLAB_PANIC, NULL); 444 return 0; 445 } 446 subsys_initcall(blk_ioc_init); 447