blk-core.c - OpenGrok cross reference for /linux/block/blk-core.c

Deleted Added

sdiffudifftextold (8324aa91..)new (86db1e29..)

blk-core.c (8324aa91d1e11a1fc25f209687a0b2e6c2ed47d0)	blk-core.c (86db1e29772372155db08ff48a9ceb76e11a2ad1)
1/* 2 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> 6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 7 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 8 / --- 6 unchanged lines hidden* (view full) --- 15#include <linux/backing-dev.h> 16#include <linux/bio.h> 17#include <linux/blkdev.h> 18#include <linux/highmem.h> 19#include <linux/mm.h> 20#include <linux/kernel_stat.h> 21#include <linux/string.h> 22#include <linux/init.h>	1/* 2 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> 6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 7 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 8 / --- 6 unchanged lines hidden* (view full) --- 15#include <linux/backing-dev.h> 16#include <linux/bio.h> 17#include <linux/blkdev.h> 18#include <linux/highmem.h> 19#include <linux/mm.h> 20#include <linux/kernel_stat.h> 21#include <linux/string.h> 22#include <linux/init.h>
23#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
24#include <linux/completion.h> 25#include <linux/slab.h> 26#include <linux/swap.h> 27#include <linux/writeback.h> 28#include <linux/task_io_accounting_ops.h> 29#include <linux/interrupt.h> 30#include <linux/cpu.h> 31#include <linux/blktrace_api.h> 32#include <linux/fault-inject.h> 33#include <linux/scatterlist.h> 34 35#include "blk.h" 36	23#include <linux/completion.h> 24#include <linux/slab.h> 25#include <linux/swap.h> 26#include <linux/writeback.h> 27#include <linux/task_io_accounting_ops.h> 28#include <linux/interrupt.h> 29#include <linux/cpu.h> 30#include <linux/blktrace_api.h> 31#include <linux/fault-inject.h> 32#include <linux/scatterlist.h> 33 34#include "blk.h" 35
37/* 38 * for max sense size 39 / 40#include <scsi/scsi_cmnd.h> 41 42static void blk_unplug_work(struct work_struct work); 43static void blk_unplug_timeout(unsigned long data);
44static void drive_stat_acct(struct request *rq, int new_io);	36static void drive_stat_acct(struct request *rq, int new_io);
45static void init_request_from_bio(struct request req, struct bio bio);
46static int __make_request(struct request_queue q, struct bio bio);	37static int __make_request(struct request_queue q, struct bio bio);
47static struct io_context *current_io_context(gfp_t gfp_flags, int node);
48static void blk_recalc_rq_segments(struct request *rq);	38static void blk_recalc_rq_segments(struct request *rq);
49static void blk_rq_bio_prep(struct request_queue q, struct request rq, 50 struct bio *bio);
51 52/* 53 * For the allocated request tables 54 / 55struct kmem_cache request_cachep; 56 57/* 58 * For queue allocation 59 / 60struct kmem_cache blk_requestq_cachep = NULL; 61 62/*	39 40/* 41 * For the allocated request tables 42 / 43struct kmem_cache request_cachep; 44 45/* 46 * For queue allocation 47 / 48struct kmem_cache blk_requestq_cachep = NULL; 49 50/*
63 * For io context allocations 64 / 65static struct kmem_cache iocontext_cachep; 66 67/*
68 * Controlling structure to kblockd 69 / 70static struct workqueue_struct kblockd_workqueue; 71	51 * Controlling structure to kblockd 52 / 53static struct workqueue_struct kblockd_workqueue; 54
72unsigned long blk_max_low_pfn, blk_max_pfn; 73 74EXPORT_SYMBOL(blk_max_low_pfn); 75EXPORT_SYMBOL(blk_max_pfn); 76
77static DEFINE_PER_CPU(struct list_head, blk_cpu_done); 78	55static DEFINE_PER_CPU(struct list_head, blk_cpu_done); 56
79/* Amount of time in which a process may batch requests / 80#define BLK_BATCH_TIME (HZ/50UL) 81 82/ Number of requests a "batching" process may submit */ 83#define BLK_BATCH_REQ 32 84
85void blk_queue_congestion_threshold(struct request_queue q) 86{ 87 int nr; 88 89 nr = q->nr_requests - (q->nr_requests / 8) + 1; 90 if (nr > q->nr_requests) 91 nr = q->nr_requests; 92 q->nr_congestion_on = nr; --- 19 unchanged lines hidden* (view full) --- 112 struct request_queue *q = bdev_get_queue(bdev); 113 114 if (q) 115 ret = &q->backing_dev_info; 116 return ret; 117} 118EXPORT_SYMBOL(blk_get_backing_dev_info); 119	57void blk_queue_congestion_threshold(struct request_queue q) 58{ 59 int nr; 60 61 nr = q->nr_requests - (q->nr_requests / 8) + 1; 62 if (nr > q->nr_requests) 63 nr = q->nr_requests; 64 q->nr_congestion_on = nr; --- 19 unchanged lines hidden* (view full) --- 84 struct request_queue *q = bdev_get_queue(bdev); 85 86 if (q) 87 ret = &q->backing_dev_info; 88 return ret; 89} 90EXPORT_SYMBOL(blk_get_backing_dev_info); 91
120/** 121 * blk_queue_prep_rq - set a prepare_request function for queue 122 * @q: queue 123 * @pfn: prepare_request function 124 * 125 * It's possible for a queue to register a prepare_request callback which 126 * is invoked before the request is handed to the request_fn. The goal of 127 * the function is to prepare a request for I/O, it can be used to build a 128 * cdb from the request data for instance. 129 * 130 / 131void blk_queue_prep_rq(struct request_queue q, prep_rq_fn *pfn)	92void rq_init(struct request_queue q, struct request rq)
132{	93{
133 q->prep_rq_fn = pfn; 134} 135 136EXPORT_SYMBOL(blk_queue_prep_rq); 137 138/** 139 * blk_queue_merge_bvec - set a merge_bvec function for queue 140 * @q: queue 141 * @mbfn: merge_bvec_fn 142 * 143 * Usually queues have static limitations on the max sectors or segments that 144 * we can put in a request. Stacking drivers may have some settings that 145 * are dynamic, and thus we have to query the queue whether it is ok to 146 * add a new bio_vec to a bio at a given offset or not. If the block device 147 * has such limitations, it needs to register a merge_bvec_fn to control 148 * the size of bio's sent to it. Note that a block device must allow a 149 * single page to be added to an empty bio. The block device driver may want 150 * to use the bio_split() function to deal with these bio's. By default 151 * no merge_bvec_fn is defined for a queue, and only the fixed limits are 152 * honored. 153 / 154void blk_queue_merge_bvec(struct request_queue q, merge_bvec_fn mbfn) 155{ 156 q->merge_bvec_fn = mbfn; 157} 158 159EXPORT_SYMBOL(blk_queue_merge_bvec); 160 161void blk_queue_softirq_done(struct request_queue q, softirq_done_fn fn) 162{ 163 q->softirq_done_fn = fn; 164} 165 166EXPORT_SYMBOL(blk_queue_softirq_done); 167 168/* 169 * blk_queue_make_request - define an alternate make_request function for a device 170 * @q: the request queue for the device to be affected 171 * @mfn: the alternate make_request function 172 * 173 * Description: 174 * The normal way for &struct bios to be passed to a device 175 * driver is for them to be collected into requests on a request 176 * queue, and then to allow the device driver to select requests 177 * off that queue when it is ready. This works well for many block 178 * devices. However some block devices (typically virtual devices 179 * such as md or lvm) do not benefit from the processing on the 180 * request queue, and are served best by having the requests passed 181 * directly to them. This can be achieved by providing a function 182 * to blk_queue_make_request(). 183 * 184 * Caveat: 185 * The driver that does this must be able to deal appropriately 186 * with buffers in "highmemory". This can be accomplished by either calling 187 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling 188 * blk_queue_bounce() to create a buffer in normal memory. 189 */ 190void blk_queue_make_request(struct request_queue q, make_request_fn * mfn) 191{ 192 /* 193 * set defaults 194 / 195 q->nr_requests = BLKDEV_MAX_RQ; 196 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); 197 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); 198 q->make_request_fn = mfn; 199 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD 1024) / PAGE_CACHE_SIZE; 200 q->backing_dev_info.state = 0; 201 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 202 blk_queue_max_sectors(q, SAFE_MAX_SECTORS); 203 blk_queue_hardsect_size(q, 512); 204 blk_queue_dma_alignment(q, 511); 205 blk_queue_congestion_threshold(q); 206 q->nr_batching = BLK_BATCH_REQ; 207 208 q->unplug_thresh = 4; /* hmm / 209 q->unplug_delay = (3 HZ) / 1000; /* 3 milliseconds / 210 if (q->unplug_delay == 0) 211 q->unplug_delay = 1; 212 213 INIT_WORK(&q->unplug_work, blk_unplug_work); 214 215 q->unplug_timer.function = blk_unplug_timeout; 216 q->unplug_timer.data = (unsigned long)q; 217 218 / 219 * by default assume old behaviour and bounce for any highmem page 220 / 221 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); 222} 223 224EXPORT_SYMBOL(blk_queue_make_request); 225 226static void rq_init(struct request_queue q, struct request *rq) 227{
228 INIT_LIST_HEAD(&rq->queuelist); 229 INIT_LIST_HEAD(&rq->donelist); 230 231 rq->errors = 0; 232 rq->bio = rq->biotail = NULL; 233 INIT_HLIST_NODE(&rq->hash); 234 RB_CLEAR_NODE(&rq->rb_node); 235 rq->ioprio = 0; --- 6 unchanged lines hidden (view full) --- 242 rq->nr_phys_segments = 0; 243 rq->sense = NULL; 244 rq->end_io = NULL; 245 rq->end_io_data = NULL; 246 rq->completion_data = NULL; 247 rq->next_rq = NULL; 248} 249	94 INIT_LIST_HEAD(&rq->queuelist); 95 INIT_LIST_HEAD(&rq->donelist); 96 97 rq->errors = 0; 98 rq->bio = rq->biotail = NULL; 99 INIT_HLIST_NODE(&rq->hash); 100 RB_CLEAR_NODE(&rq->rb_node); 101 rq->ioprio = 0; --- 6 unchanged lines hidden (view full) --- 108 rq->nr_phys_segments = 0; 109 rq->sense = NULL; 110 rq->end_io = NULL; 111 rq->end_io_data = NULL; 112 rq->completion_data = NULL; 113 rq->next_rq = NULL; 114} 115
250/** 251 * blk_queue_ordered - does this queue support ordered writes 252 * @q: the request queue 253 * @ordered: one of QUEUE_ORDERED_* 254 * @prepare_flush_fn: rq setup helper for cache flush ordered writes 255 * 256 * Description: 257 * For journalled file systems, doing ordered writes on a commit 258 * block instead of explicitly doing wait_on_buffer (which is bad 259 * for performance) can be a big win. Block drivers supporting this 260 * feature should call this function and indicate so. 261 * 262 */ 263int blk_queue_ordered(struct request_queue q, unsigned ordered, 264 prepare_flush_fn prepare_flush_fn) 265{ 266 if (ordered & (QUEUE_ORDERED_PREFLUSH \| QUEUE_ORDERED_POSTFLUSH) && 267 prepare_flush_fn == NULL) { 268 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); 269 return -EINVAL; 270 } 271 272 if (ordered != QUEUE_ORDERED_NONE && 273 ordered != QUEUE_ORDERED_DRAIN && 274 ordered != QUEUE_ORDERED_DRAIN_FLUSH && 275 ordered != QUEUE_ORDERED_DRAIN_FUA && 276 ordered != QUEUE_ORDERED_TAG && 277 ordered != QUEUE_ORDERED_TAG_FLUSH && 278 ordered != QUEUE_ORDERED_TAG_FUA) { 279 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); 280 return -EINVAL; 281 } 282 283 q->ordered = ordered; 284 q->next_ordered = ordered; 285 q->prepare_flush_fn = prepare_flush_fn; 286 287 return 0; 288} 289 290EXPORT_SYMBOL(blk_queue_ordered); 291 292/ 293 * Cache flushing for ordered writes handling 294 / 295inline unsigned blk_ordered_cur_seq(struct request_queue q) 296{ 297 if (!q->ordseq) 298 return 0; 299 return 1 << ffz(q->ordseq); 300} 301 302unsigned blk_ordered_req_seq(struct request rq) 303{ 304 struct request_queue q = rq->q; 305 306 BUG_ON(q->ordseq == 0); 307 308 if (rq == &q->pre_flush_rq) 309 return QUEUE_ORDSEQ_PREFLUSH; 310 if (rq == &q->bar_rq) 311 return QUEUE_ORDSEQ_BAR; 312 if (rq == &q->post_flush_rq) 313 return QUEUE_ORDSEQ_POSTFLUSH; 314 315 /* 316 * !fs requests don't need to follow barrier ordering. Always 317 * put them at the front. This fixes the following deadlock. 318 * 319 * http://thread.gmane.org/gmane.linux.kernel/537473 320 / 321 if (!blk_fs_request(rq)) 322 return QUEUE_ORDSEQ_DRAIN; 323 324 if ((rq->cmd_flags & REQ_ORDERED_COLOR) == 325 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) 326 return QUEUE_ORDSEQ_DRAIN; 327 else 328 return QUEUE_ORDSEQ_DONE; 329} 330 331void blk_ordered_complete_seq(struct request_queue q, unsigned seq, int error) 332{ 333 struct request rq; 334 335 if (error && !q->orderr) 336 q->orderr = error; 337 338 BUG_ON(q->ordseq & seq); 339 q->ordseq \|= seq; 340 341 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) 342 return; 343 344 / 345 * Okay, sequence complete. 346 / 347 q->ordseq = 0; 348 rq = q->orig_bar_rq; 349 350 if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) 351 BUG(); 352} 353 354static void pre_flush_end_io(struct request rq, int error) 355{ 356 elv_completed_request(rq->q, rq); 357 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); 358} 359 360static void bar_end_io(struct request rq, int error) 361{ 362 elv_completed_request(rq->q, rq); 363 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); 364} 365 366static void post_flush_end_io(struct request rq, int error) 367{ 368 elv_completed_request(rq->q, rq); 369 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); 370} 371 372static void queue_flush(struct request_queue q, unsigned which) 373{ 374 struct request rq; 375 rq_end_io_fn end_io; 376 377 if (which == QUEUE_ORDERED_PREFLUSH) { 378 rq = &q->pre_flush_rq; 379 end_io = pre_flush_end_io; 380 } else { 381 rq = &q->post_flush_rq; 382 end_io = post_flush_end_io; 383 } 384 385 rq->cmd_flags = REQ_HARDBARRIER; 386 rq_init(q, rq); 387 rq->elevator_private = NULL; 388 rq->elevator_private2 = NULL; 389 rq->rq_disk = q->bar_rq.rq_disk; 390 rq->end_io = end_io; 391 q->prepare_flush_fn(q, rq); 392 393 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 394} 395 396static inline struct request start_ordered(struct request_queue q, 397 struct request rq) 398{ 399 q->orderr = 0; 400 q->ordered = q->next_ordered; 401 q->ordseq \|= QUEUE_ORDSEQ_STARTED; 402 403 /* 404 * Prep proxy barrier request. 405 / 406 blkdev_dequeue_request(rq); 407 q->orig_bar_rq = rq; 408 rq = &q->bar_rq; 409 rq->cmd_flags = 0; 410 rq_init(q, rq); 411 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) 412 rq->cmd_flags \|= REQ_RW; 413 if (q->ordered & QUEUE_ORDERED_FUA) 414 rq->cmd_flags \|= REQ_FUA; 415 rq->elevator_private = NULL; 416 rq->elevator_private2 = NULL; 417 init_request_from_bio(rq, q->orig_bar_rq->bio); 418 rq->end_io = bar_end_io; 419 420 / 421 * Queue ordered sequence. As we stack them at the head, we 422 * need to queue in reverse order. Note that we rely on that 423 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs 424 * request gets inbetween ordered sequence. If this request is 425 * an empty barrier, we don't need to do a postflush ever since 426 * there will be no data written between the pre and post flush. 427 * Hence a single flush will suffice. 428 / 429 if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) 430 queue_flush(q, QUEUE_ORDERED_POSTFLUSH); 431 else 432 q->ordseq \|= QUEUE_ORDSEQ_POSTFLUSH; 433 434 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 435 436 if (q->ordered & QUEUE_ORDERED_PREFLUSH) { 437 queue_flush(q, QUEUE_ORDERED_PREFLUSH); 438 rq = &q->pre_flush_rq; 439 } else 440 q->ordseq \|= QUEUE_ORDSEQ_PREFLUSH; 441 442 if ((q->ordered & QUEUE_ORDERED_TAG) \|\| q->in_flight == 0) 443 q->ordseq \|= QUEUE_ORDSEQ_DRAIN; 444 else 445 rq = NULL; 446 447 return rq; 448} 449 450int blk_do_ordered(struct request_queue q, struct request *rqp) 451{ 452 struct request rq = rqp; 453 const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); 454 455 if (!q->ordseq) { 456 if (!is_barrier) 457 return 1; 458 459 if (q->next_ordered != QUEUE_ORDERED_NONE) { 460 rqp = start_ordered(q, rq); 461 return 1; 462 } else { 463 /* 464 * This can happen when the queue switches to 465 * ORDERED_NONE while this request is on it. 466 / 467 blkdev_dequeue_request(rq); 468 if (__blk_end_request(rq, -EOPNOTSUPP, 469 blk_rq_bytes(rq))) 470 BUG(); 471 rqp = NULL; 472 return 0; 473 } 474 } 475 476 /* 477 * Ordered sequence in progress 478 / 479 480 / Special requests are not subject to ordering rules. / 481 if (!blk_fs_request(rq) && 482 rq != &q->pre_flush_rq && rq != &q->post_flush_rq) 483 return 1; 484 485 if (q->ordered & QUEUE_ORDERED_TAG) { 486 / Ordered by tag. Blocking the next barrier is enough. / 487 if (is_barrier && rq != &q->bar_rq) 488 rqp = NULL; 489 } else { 490 /* Ordered by draining. Wait for turn. / 491 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); 492 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) 493 rqp = NULL; 494 } 495 496 return 1; 497} 498
499static void req_bio_endio(struct request rq, struct bio bio, 500 unsigned int nbytes, int error) 501{ 502 struct request_queue q = rq->q; 503 504 if (&q->bar_rq != rq) { 505 if (error) 506 clear_bit(BIO_UPTODATE, &bio->bi_flags); --- 16 unchanged lines hidden* (view full) --- 523 * Okay, this is the barrier request in progress, just 524 * record the error; 525 */ 526 if (error && !q->orderr) 527 q->orderr = error; 528 } 529} 530	116static void req_bio_endio(struct request rq, struct bio bio, 117 unsigned int nbytes, int error) 118{ 119 struct request_queue q = rq->q; 120 121 if (&q->bar_rq != rq) { 122 if (error) 123 clear_bit(BIO_UPTODATE, &bio->bi_flags); --- 16 unchanged lines hidden* (view full) --- 140 * Okay, this is the barrier request in progress, just 141 * record the error; 142 */ 143 if (error && !q->orderr) 144 q->orderr = error; 145 } 146} 147
531/** 532 * blk_queue_bounce_limit - set bounce buffer limit for queue 533 * @q: the request queue for the device 534 * @dma_addr: bus address limit 535 * 536 * Description: 537 * Different hardware can have different requirements as to what pages 538 * it can do I/O directly to. A low level driver can call 539 * blk_queue_bounce_limit to have lower memory pages allocated as bounce 540 * buffers for doing I/O to pages residing above @page. 541 */ 542void blk_queue_bounce_limit(struct request_queue q, u64 dma_addr) 543{ 544 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; 545 int dma = 0; 546 547 q->bounce_gfp = GFP_NOIO; 548#if BITS_PER_LONG == 64 549 /* Assume anything <= 4GB can be handled by IOMMU. 550 Actually some IOMMUs can handle everything, but I don't 551 know of a way to test this here. / 552 if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) 553 dma = 1; 554 q->bounce_pfn = max_low_pfn; 555#else 556 if (bounce_pfn < blk_max_low_pfn) 557 dma = 1; 558 q->bounce_pfn = bounce_pfn; 559#endif 560 if (dma) { 561 init_emergency_isa_pool(); 562 q->bounce_gfp = GFP_NOIO \| GFP_DMA; 563 q->bounce_pfn = bounce_pfn; 564 } 565} 566 567EXPORT_SYMBOL(blk_queue_bounce_limit); 568 569/* 570 * blk_queue_max_sectors - set max sectors for a request for this queue 571 * @q: the request queue for the device 572 * @max_sectors: max sectors in the usual 512b unit 573 * 574 * Description: 575 * Enables a low level driver to set an upper limit on the size of 576 * received requests. 577 */ 578void blk_queue_max_sectors(struct request_queue q, unsigned int max_sectors) 579{ 580 if ((max_sectors << 9) < PAGE_CACHE_SIZE) { 581 max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); 582 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); 583 } 584 585 if (BLK_DEF_MAX_SECTORS > max_sectors) 586 q->max_hw_sectors = q->max_sectors = max_sectors; 587 else { 588 q->max_sectors = BLK_DEF_MAX_SECTORS; 589 q->max_hw_sectors = max_sectors; 590 } 591} 592 593EXPORT_SYMBOL(blk_queue_max_sectors); 594 595/** 596 * blk_queue_max_phys_segments - set max phys segments for a request for this queue 597 * @q: the request queue for the device 598 * @max_segments: max number of segments 599 * 600 * Description: 601 * Enables a low level driver to set an upper limit on the number of 602 * physical data segments in a request. This would be the largest sized 603 * scatter list the driver could handle. 604 */ 605void blk_queue_max_phys_segments(struct request_queue q, 606 unsigned short max_segments) 607{ 608 if (!max_segments) { 609 max_segments = 1; 610 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); 611 } 612 613 q->max_phys_segments = max_segments; 614} 615 616EXPORT_SYMBOL(blk_queue_max_phys_segments); 617 618/** 619 * blk_queue_max_hw_segments - set max hw segments for a request for this queue 620 * @q: the request queue for the device 621 * @max_segments: max number of segments 622 * 623 * Description: 624 * Enables a low level driver to set an upper limit on the number of 625 * hw data segments in a request. This would be the largest number of 626 * address/length pairs the host adapter can actually give as once 627 * to the device. 628 */ 629void blk_queue_max_hw_segments(struct request_queue q, 630 unsigned short max_segments) 631{ 632 if (!max_segments) { 633 max_segments = 1; 634 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); 635 } 636 637 q->max_hw_segments = max_segments; 638} 639 640EXPORT_SYMBOL(blk_queue_max_hw_segments); 641 642/** 643 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg 644 * @q: the request queue for the device 645 * @max_size: max size of segment in bytes 646 * 647 * Description: 648 * Enables a low level driver to set an upper limit on the size of a 649 * coalesced segment 650 */ 651void blk_queue_max_segment_size(struct request_queue q, unsigned int max_size) 652{ 653 if (max_size < PAGE_CACHE_SIZE) { 654 max_size = PAGE_CACHE_SIZE; 655 printk("%s: set to minimum %d\n", __FUNCTION__, max_size); 656 } 657 658 q->max_segment_size = max_size; 659} 660 661EXPORT_SYMBOL(blk_queue_max_segment_size); 662 663/** 664 * blk_queue_hardsect_size - set hardware sector size for the queue 665 * @q: the request queue for the device 666 * @size: the hardware sector size, in bytes 667 * 668 * Description: 669 * This should typically be set to the lowest possible sector size 670 * that the hardware can operate on (possible without reverting to 671 * even internal read-modify-write operations). Usually the default 672 * of 512 covers most hardware. 673 */ 674void blk_queue_hardsect_size(struct request_queue q, unsigned short size) 675{ 676 q->hardsect_size = size; 677} 678 679EXPORT_SYMBOL(blk_queue_hardsect_size); 680 681/* 682 * Returns the minimum that is _not_ zero, unless both are zero. 683 / 684#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) 685 686/* 687 * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers 688 * @t: the stacking driver (top) 689 * @b: the underlying device (bottom) 690 */ 691void blk_queue_stack_limits(struct request_queue t, struct request_queue b) 692{ 693 / zero is "infinity" / 694 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); 695 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); 696 697 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); 698 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); 699 t->max_segment_size = min(t->max_segment_size,b->max_segment_size); 700 t->hardsect_size = max(t->hardsect_size,b->hardsect_size); 701 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) 702 clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); 703} 704 705EXPORT_SYMBOL(blk_queue_stack_limits); 706 707/* 708 * blk_queue_dma_drain - Set up a drain buffer for excess dma. 709 * 710 * @q: the request queue for the device 711 * @buf: physically contiguous buffer 712 * @size: size of the buffer in bytes 713 * 714 * Some devices have excess DMA problems and can't simply discard (or 715 * zero fill) the unwanted piece of the transfer. They have to have a 716 * real area of memory to transfer it into. The use case for this is 717 * ATAPI devices in DMA mode. If the packet command causes a transfer 718 * bigger than the transfer size some HBAs will lock up if there 719 * aren't DMA elements to contain the excess transfer. What this API 720 * does is adjust the queue so that the buf is always appended 721 * silently to the scatterlist. 722 * 723 * Note: This routine adjusts max_hw_segments to make room for 724 * appending the drain buffer. If you call 725 * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after 726 * calling this routine, you must set the limit to one fewer than your 727 * device can support otherwise there won't be room for the drain 728 * buffer. 729 / 730int blk_queue_dma_drain(struct request_queue q, void buf, 731 unsigned int size) 732{ 733 if (q->max_hw_segments < 2 \|\| q->max_phys_segments < 2) 734 return -EINVAL; 735 / make room for appending the drain / 736 --q->max_hw_segments; 737 --q->max_phys_segments; 738 q->dma_drain_buffer = buf; 739 q->dma_drain_size = size; 740 741 return 0; 742} 743 744EXPORT_SYMBOL_GPL(blk_queue_dma_drain); 745 746/* 747 * blk_queue_segment_boundary - set boundary rules for segment merging 748 * @q: the request queue for the device 749 * @mask: the memory boundary mask 750 */ 751void blk_queue_segment_boundary(struct request_queue q, unsigned long mask) 752{ 753 if (mask < PAGE_CACHE_SIZE - 1) { 754 mask = PAGE_CACHE_SIZE - 1; 755 printk("%s: set to minimum %lx\n", __FUNCTION__, mask); 756 } 757 758 q->seg_boundary_mask = mask; 759} 760 761EXPORT_SYMBOL(blk_queue_segment_boundary); 762 763/** 764 * blk_queue_dma_alignment - set dma length and memory alignment 765 * @q: the request queue for the device 766 * @mask: alignment mask 767 * 768 * description: 769 * set required memory and length aligment for direct dma transactions. 770 * this is used when buiding direct io requests for the queue. 771 * 772 */ 773void blk_queue_dma_alignment(struct request_queue q, int mask) 774{ 775 q->dma_alignment = mask; 776} 777 778EXPORT_SYMBOL(blk_queue_dma_alignment); 779 780/** 781 * blk_queue_update_dma_alignment - update dma length and memory alignment 782 * @q: the request queue for the device 783 * @mask: alignment mask 784 * 785 * description: 786 * update required memory and length aligment for direct dma transactions. 787 * If the requested alignment is larger than the current alignment, then 788 * the current queue alignment is updated to the new value, otherwise it 789 * is left alone. The design of this is to allow multiple objects 790 * (driver, device, transport etc) to set their respective 791 * alignments without having them interfere. 792 * 793 */ 794void blk_queue_update_dma_alignment(struct request_queue q, int mask) 795{ 796 BUG_ON(mask > PAGE_SIZE); 797 798 if (mask > q->dma_alignment) 799 q->dma_alignment = mask; 800} 801 802EXPORT_SYMBOL(blk_queue_update_dma_alignment); 803
804void blk_dump_rq_flags(struct request rq, char msg) 805{ 806 int bit; 807 808 printk("%s: dev %s: type=%x, flags=%x\n", msg, 809 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, 810 rq->cmd_flags); 811 --- 257 unchanged lines hidden (view full) --- 1069 * This will form the start of a new hw segment. Bump both 1070 * counters. 1071 */ 1072 req->nr_hw_segments += nr_hw_segs; 1073 req->nr_phys_segments += nr_phys_segs; 1074 return 1; 1075} 1076	148void blk_dump_rq_flags(struct request rq, char msg) 149{ 150 int bit; 151 152 printk("%s: dev %s: type=%x, flags=%x\n", msg, 153 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, 154 rq->cmd_flags); 155 --- 257 unchanged lines hidden (view full) --- 413 * This will form the start of a new hw segment. Bump both 414 * counters. 415 */ 416 req->nr_hw_segments += nr_hw_segs; 417 req->nr_phys_segments += nr_phys_segs; 418 return 1; 419} 420
1077static int ll_back_merge_fn(struct request_queue q, struct request req, 1078 struct bio *bio)	421int ll_back_merge_fn(struct request_queue q, struct request req, 422 struct bio *bio)
1079{ 1080 unsigned short max_sectors; 1081 int len; 1082 1083 if (unlikely(blk_pc_request(req))) 1084 max_sectors = q->max_hw_sectors; 1085 else 1086 max_sectors = q->max_sectors; --- 193 unchanged lines hidden (view full) --- 1280static void blk_backing_dev_unplug(struct backing_dev_info bdi, 1281 struct page page) 1282{ 1283 struct request_queue *q = bdi->unplug_io_data; 1284 1285 blk_unplug(q); 1286} 1287	423{ 424 unsigned short max_sectors; 425 int len; 426 427 if (unlikely(blk_pc_request(req))) 428 max_sectors = q->max_hw_sectors; 429 else 430 max_sectors = q->max_sectors; --- 193 unchanged lines hidden (view full) --- 624static void blk_backing_dev_unplug(struct backing_dev_info bdi, 625 struct page page) 626{ 627 struct request_queue *q = bdi->unplug_io_data; 628 629 blk_unplug(q); 630} 631
1288static void blk_unplug_work(struct work_struct *work)	632void blk_unplug_work(struct work_struct *work)
1289{ 1290 struct request_queue *q = 1291 container_of(work, struct request_queue, unplug_work); 1292 1293 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 1294 q->rq.count[READ] + q->rq.count[WRITE]); 1295 1296 q->unplug_fn(q); 1297} 1298	633{ 634 struct request_queue *q = 635 container_of(work, struct request_queue, unplug_work); 636 637 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 638 q->rq.count[READ] + q->rq.count[WRITE]); 639 640 q->unplug_fn(q); 641} 642
1299static void blk_unplug_timeout(unsigned long data)	643void blk_unplug_timeout(unsigned long data)
1300{ 1301 struct request_queue q = (struct request_queue )data; 1302 1303 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 1304 q->rq.count[READ] + q->rq.count[WRITE]); 1305 1306 kblockd_schedule_work(&q->unplug_work); 1307} --- 648 unchanged lines hidden (view full) --- 1956 drive_stat_acct(rq, 1); 1957 __elv_add_request(q, rq, where, 0); 1958 blk_start_queueing(q); 1959 spin_unlock_irqrestore(q->queue_lock, flags); 1960} 1961 1962EXPORT_SYMBOL(blk_insert_request); 1963	644{ 645 struct request_queue q = (struct request_queue )data; 646 647 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 648 q->rq.count[READ] + q->rq.count[WRITE]); 649 650 kblockd_schedule_work(&q->unplug_work); 651} --- 648 unchanged lines hidden (view full) --- 1300 drive_stat_acct(rq, 1); 1301 __elv_add_request(q, rq, where, 0); 1302 blk_start_queueing(q); 1303 spin_unlock_irqrestore(q->queue_lock, flags); 1304} 1305 1306EXPORT_SYMBOL(blk_insert_request); 1307
1964static int __blk_rq_unmap_user(struct bio bio) 1965{ 1966 int ret = 0; 1967 1968 if (bio) { 1969 if (bio_flagged(bio, BIO_USER_MAPPED)) 1970 bio_unmap_user(bio); 1971 else 1972 ret = bio_uncopy_user(bio); 1973 } 1974 1975 return ret; 1976} 1977 1978int blk_rq_append_bio(struct request_queue q, struct request rq, 1979 struct bio bio) 1980{ 1981 if (!rq->bio) 1982 blk_rq_bio_prep(q, rq, bio); 1983 else if (!ll_back_merge_fn(q, rq, bio)) 1984 return -EINVAL; 1985 else { 1986 rq->biotail->bi_next = bio; 1987 rq->biotail = bio; 1988 1989 rq->data_len += bio->bi_size; 1990 } 1991 return 0; 1992} 1993EXPORT_SYMBOL(blk_rq_append_bio); 1994 1995static int __blk_rq_map_user(struct request_queue q, struct request rq, 1996 void __user ubuf, unsigned int len) 1997{ 1998 unsigned long uaddr; 1999 struct bio bio, orig_bio; 2000 int reading, ret; 2001 2002 reading = rq_data_dir(rq) == READ; 2003 2004 / 2005 * if alignment requirement is satisfied, map in user pages for 2006 * direct dma. else, set up kernel bounce buffers 2007 / 2008 uaddr = (unsigned long) ubuf; 2009 if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) 2010 bio = bio_map_user(q, NULL, uaddr, len, reading); 2011 else 2012 bio = bio_copy_user(q, uaddr, len, reading); 2013 2014 if (IS_ERR(bio)) 2015 return PTR_ERR(bio); 2016 2017 orig_bio = bio; 2018 blk_queue_bounce(q, &bio); 2019 2020 / 2021 * We link the bounce buffer in and could have to traverse it 2022 * later so we have to get a ref to prevent it from being freed 2023 / 2024 bio_get(bio); 2025 2026 ret = blk_rq_append_bio(q, rq, bio); 2027 if (!ret) 2028 return bio->bi_size; 2029 2030 / if it was boucned we must call the end io function / 2031 bio_endio(bio, 0); 2032 __blk_rq_unmap_user(orig_bio); 2033 bio_put(bio); 2034 return ret; 2035} 2036 2037/* 2038 * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage 2039 * @q: request queue where request should be inserted 2040 * @rq: request structure to fill 2041 * @ubuf: the user buffer 2042 * @len: length of user data 2043 * 2044 * Description: 2045 * Data will be mapped directly for zero copy io, if possible. Otherwise 2046 * a kernel bounce buffer is used. 2047 * 2048 * A matching blk_rq_unmap_user() must be issued at the end of io, while 2049 * still in process context. 2050 * 2051 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 2052 * before being submitted to the device, as pages mapped may be out of 2053 * reach. It's the callers responsibility to make sure this happens. The 2054 * original bio must be passed back in to blk_rq_unmap_user() for proper 2055 * unmapping. 2056 / 2057int blk_rq_map_user(struct request_queue q, struct request rq, 2058 void __user ubuf, unsigned long len) 2059{ 2060 unsigned long bytes_read = 0; 2061 struct bio bio = NULL; 2062 int ret; 2063 2064 if (len > (q->max_hw_sectors << 9)) 2065 return -EINVAL; 2066 if (!len \|\| !ubuf) 2067 return -EINVAL; 2068 2069 while (bytes_read != len) { 2070 unsigned long map_len, end, start; 2071 2072 map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE); 2073 end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1) 2074 >> PAGE_SHIFT; 2075 start = (unsigned long)ubuf >> PAGE_SHIFT; 2076 2077 / 2078 * A bad offset could cause us to require BIO_MAX_PAGES + 1 2079 * pages. If this happens we just lower the requested 2080 * mapping len by a page so that we can fit 2081 / 2082 if (end - start > BIO_MAX_PAGES) 2083 map_len -= PAGE_SIZE; 2084 2085 ret = __blk_rq_map_user(q, rq, ubuf, map_len); 2086 if (ret < 0) 2087 goto unmap_rq; 2088 if (!bio) 2089 bio = rq->bio; 2090 bytes_read += ret; 2091 ubuf += ret; 2092 } 2093 2094 rq->buffer = rq->data = NULL; 2095 return 0; 2096unmap_rq: 2097 blk_rq_unmap_user(bio); 2098 return ret; 2099} 2100 2101EXPORT_SYMBOL(blk_rq_map_user); 2102 2103/* 2104 * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage 2105 * @q: request queue where request should be inserted 2106 * @rq: request to map data to 2107 * @iov: pointer to the iovec 2108 * @iov_count: number of elements in the iovec 2109 * @len: I/O byte count 2110 * 2111 * Description: 2112 * Data will be mapped directly for zero copy io, if possible. Otherwise 2113 * a kernel bounce buffer is used. 2114 * 2115 * A matching blk_rq_unmap_user() must be issued at the end of io, while 2116 * still in process context. 2117 * 2118 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 2119 * before being submitted to the device, as pages mapped may be out of 2120 * reach. It's the callers responsibility to make sure this happens. The 2121 * original bio must be passed back in to blk_rq_unmap_user() for proper 2122 * unmapping. 2123 / 2124int blk_rq_map_user_iov(struct request_queue q, struct request rq, 2125 struct sg_iovec iov, int iov_count, unsigned int len) 2126{ 2127 struct bio bio; 2128 2129 if (!iov \|\| iov_count <= 0) 2130 return -EINVAL; 2131 2132 / we don't allow misaligned data like bio_map_user() does. If the 2133 * user is using sg, they're expected to know the alignment constraints 2134 * and respect them accordingly / 2135 bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); 2136 if (IS_ERR(bio)) 2137 return PTR_ERR(bio); 2138 2139 if (bio->bi_size != len) { 2140 bio_endio(bio, 0); 2141 bio_unmap_user(bio); 2142 return -EINVAL; 2143 } 2144 2145 bio_get(bio); 2146 blk_rq_bio_prep(q, rq, bio); 2147 rq->buffer = rq->data = NULL; 2148 return 0; 2149} 2150 2151EXPORT_SYMBOL(blk_rq_map_user_iov); 2152 2153/* 2154 * blk_rq_unmap_user - unmap a request with user data 2155 * @bio: start of bio list 2156 * 2157 * Description: 2158 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 2159 * supply the original rq->bio from the blk_rq_map_user() return, since 2160 * the io completion may have changed rq->bio. 2161 / 2162int blk_rq_unmap_user(struct bio bio) 2163{ 2164 struct bio mapped_bio; 2165 int ret = 0, ret2; 2166 2167 while (bio) { 2168 mapped_bio = bio; 2169 if (unlikely(bio_flagged(bio, BIO_BOUNCED))) 2170 mapped_bio = bio->bi_private; 2171 2172 ret2 = __blk_rq_unmap_user(mapped_bio); 2173 if (ret2 && !ret) 2174 ret = ret2; 2175 2176 mapped_bio = bio; 2177 bio = bio->bi_next; 2178 bio_put(mapped_bio); 2179 } 2180 2181 return ret; 2182} 2183 2184EXPORT_SYMBOL(blk_rq_unmap_user); 2185 2186/* 2187 * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage 2188 * @q: request queue where request should be inserted 2189 * @rq: request to fill 2190 * @kbuf: the kernel buffer 2191 * @len: length of user data 2192 * @gfp_mask: memory allocation flags 2193 / 2194int blk_rq_map_kern(struct request_queue q, struct request rq, void kbuf, 2195 unsigned int len, gfp_t gfp_mask) 2196{ 2197 struct bio bio; 2198 2199 if (len > (q->max_hw_sectors << 9)) 2200 return -EINVAL; 2201 if (!len \|\| !kbuf) 2202 return -EINVAL; 2203 2204 bio = bio_map_kern(q, kbuf, len, gfp_mask); 2205 if (IS_ERR(bio)) 2206 return PTR_ERR(bio); 2207 2208 if (rq_data_dir(rq) == WRITE) 2209 bio->bi_rw \|= (1 << BIO_RW); 2210 2211 blk_rq_bio_prep(q, rq, bio); 2212 blk_queue_bounce(q, &rq->bio); 2213 rq->buffer = rq->data = NULL; 2214 return 0; 2215} 2216 2217EXPORT_SYMBOL(blk_rq_map_kern); 2218 2219/* 2220 * blk_execute_rq_nowait - insert a request into queue for execution 2221 * @q: queue to insert the request in 2222 * @bd_disk: matching gendisk 2223 * @rq: request to insert 2224 * @at_head: insert request at head or tail of queue 2225 * @done: I/O completion handler 2226 * 2227 * Description: 2228 * Insert a fully prepared request at the back of the io scheduler queue 2229 * for execution. Don't wait for completion. 2230 / 2231void blk_execute_rq_nowait(struct request_queue q, struct gendisk bd_disk, 2232 struct request rq, int at_head, 2233 rq_end_io_fn done) 2234{ 2235 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2236 2237 rq->rq_disk = bd_disk; 2238 rq->cmd_flags \|= REQ_NOMERGE; 2239 rq->end_io = done; 2240 WARN_ON(irqs_disabled()); 2241 spin_lock_irq(q->queue_lock); 2242 __elv_add_request(q, rq, where, 1); 2243 __generic_unplug_device(q); 2244 spin_unlock_irq(q->queue_lock); 2245} 2246EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); 2247 2248/* 2249 * blk_execute_rq - insert a request into queue for execution 2250 * @q: queue to insert the request in 2251 * @bd_disk: matching gendisk 2252 * @rq: request to insert 2253 * @at_head: insert request at head or tail of queue 2254 * 2255 * Description: 2256 * Insert a fully prepared request at the back of the io scheduler queue 2257 * for execution and wait for completion. 2258 / 2259int blk_execute_rq(struct request_queue q, struct gendisk bd_disk, 2260 struct request rq, int at_head) 2261{ 2262 DECLARE_COMPLETION_ONSTACK(wait); 2263 char sense[SCSI_SENSE_BUFFERSIZE]; 2264 int err = 0; 2265 2266 /* 2267 * we need an extra reference to the request, so we can look at 2268 * it after io completion 2269 / 2270 rq->ref_count++; 2271 2272 if (!rq->sense) { 2273 memset(sense, 0, sizeof(sense)); 2274 rq->sense = sense; 2275 rq->sense_len = 0; 2276 } 2277 2278 rq->end_io_data = &wait; 2279 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); 2280 wait_for_completion(&wait); 2281 2282 if (rq->errors) 2283 err = -EIO; 2284 2285 return err; 2286} 2287 2288EXPORT_SYMBOL(blk_execute_rq); 2289 2290static void bio_end_empty_barrier(struct bio bio, int err) 2291{ 2292 if (err) 2293 clear_bit(BIO_UPTODATE, &bio->bi_flags); 2294 2295 complete(bio->bi_private); 2296} 2297 2298/** 2299 * blkdev_issue_flush - queue a flush 2300 * @bdev: blockdev to issue flush for 2301 * @error_sector: error sector 2302 * 2303 * Description: 2304 * Issue a flush for the block device in question. Caller can supply 2305 * room for storing the error offset in case of a flush error, if they 2306 * wish to. Caller must run wait_for_completion() on its own. 2307 / 2308int blkdev_issue_flush(struct block_device bdev, sector_t error_sector) 2309{ 2310 DECLARE_COMPLETION_ONSTACK(wait); 2311 struct request_queue q; 2312 struct bio bio; 2313 int ret; 2314 2315 if (bdev->bd_disk == NULL) 2316 return -ENXIO; 2317 2318 q = bdev_get_queue(bdev); 2319 if (!q) 2320 return -ENXIO; 2321 2322 bio = bio_alloc(GFP_KERNEL, 0); 2323 if (!bio) 2324 return -ENOMEM; 2325 2326 bio->bi_end_io = bio_end_empty_barrier; 2327 bio->bi_private = &wait; 2328 bio->bi_bdev = bdev; 2329 submit_bio(1 << BIO_RW_BARRIER, bio); 2330 2331 wait_for_completion(&wait); 2332 2333 / 2334 * The driver must store the error location in ->bi_sector, if 2335 * it supports it. For non-stacked drivers, this should be copied 2336 * from rq->sector. 2337 / 2338 if (error_sector) 2339 error_sector = bio->bi_sector; 2340 2341 ret = 0; 2342 if (!bio_flagged(bio, BIO_UPTODATE)) 2343 ret = -EIO; 2344 2345 bio_put(bio); 2346 return ret; 2347} 2348 2349EXPORT_SYMBOL(blkdev_issue_flush); 2350
2351static void drive_stat_acct(struct request rq, int new_io) 2352{ 2353 int rw = rq_data_dir(rq); 2354 2355 if (!blk_fs_request(rq) \|\| !rq->rq_disk) 2356 return; 2357 2358 if (!new_io) { --- 95 unchanged lines hidden* (view full) --- 2454 spin_lock_irqsave(q->queue_lock, flags); 2455 __blk_put_request(q, req); 2456 spin_unlock_irqrestore(q->queue_lock, flags); 2457 } 2458} 2459 2460EXPORT_SYMBOL(blk_put_request); 2461	1308static void drive_stat_acct(struct request rq, int new_io) 1309{ 1310 int rw = rq_data_dir(rq); 1311 1312 if (!blk_fs_request(rq) \|\| !rq->rq_disk) 1313 return; 1314 1315 if (!new_io) { --- 95 unchanged lines hidden* (view full) --- 1411 spin_lock_irqsave(q->queue_lock, flags); 1412 __blk_put_request(q, req); 1413 spin_unlock_irqrestore(q->queue_lock, flags); 1414 } 1415} 1416 1417EXPORT_SYMBOL(blk_put_request); 1418
2462/** 2463 * blk_end_sync_rq - executes a completion event on a request 2464 * @rq: request to complete 2465 * @error: end io status of the request 2466 / 2467void blk_end_sync_rq(struct request rq, int error) 2468{ 2469 struct completion waiting = rq->end_io_data; 2470 2471 rq->end_io_data = NULL; 2472 __blk_put_request(rq->q, rq); 2473 2474 / 2475 * complete last, if this is a stack request the process (and thus 2476 * the rq pointer) could be invalid right after this complete() 2477 */ 2478 complete(waiting); 2479} 2480EXPORT_SYMBOL(blk_end_sync_rq); 2481
2482/* 2483 * Has to be called with the request spinlock acquired 2484 / 2485static int attempt_merge(struct request_queue q, struct request req, 2486 struct request next) 2487{ 2488 if (!rq_mergeable(req) \|\| !rq_mergeable(next)) 2489 return 0; --- 62 unchanged lines hidden (view full) --- 2552 struct request *prev = elv_former_request(q, rq); 2553 2554 if (prev) 2555 return attempt_merge(q, prev, rq); 2556 2557 return 0; 2558} 2559	1419/* 1420 * Has to be called with the request spinlock acquired 1421 / 1422static int attempt_merge(struct request_queue q, struct request req, 1423 struct request next) 1424{ 1425 if (!rq_mergeable(req) \|\| !rq_mergeable(next)) 1426 return 0; --- 62 unchanged lines hidden (view full) --- 1489 struct request *prev = elv_former_request(q, rq); 1490 1491 if (prev) 1492 return attempt_merge(q, prev, rq); 1493 1494 return 0; 1495} 1496
2560static void init_request_from_bio(struct request req, struct bio bio)	1497void init_request_from_bio(struct request req, struct bio bio)
2561{ 2562 req->cmd_type = REQ_TYPE_FS; 2563 2564 /* 2565 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 2566 / 2567 if (bio_rw_ahead(bio) \|\| bio_failfast(bio)) 2568 req->cmd_flags \|= REQ_FAILFAST; --- 950 unchanged lines hidden* (view full) --- 3519 */ 3520int blk_end_request_callback(struct request rq, int error, int nr_bytes, 3521 int (drv_callback)(struct request *)) 3522{ 3523 return blk_end_io(rq, error, nr_bytes, 0, drv_callback); 3524} 3525EXPORT_SYMBOL_GPL(blk_end_request_callback); 3526	1498{ 1499 req->cmd_type = REQ_TYPE_FS; 1500 1501 /* 1502 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 1503 / 1504 if (bio_rw_ahead(bio) \|\| bio_failfast(bio)) 1505 req->cmd_flags \|= REQ_FAILFAST; --- 950 unchanged lines hidden* (view full) --- 2456 */ 2457int blk_end_request_callback(struct request rq, int error, int nr_bytes, 2458 int (drv_callback)(struct request *)) 2459{ 2460 return blk_end_io(rq, error, nr_bytes, 0, drv_callback); 2461} 2462EXPORT_SYMBOL_GPL(blk_end_request_callback); 2463
3527static void blk_rq_bio_prep(struct request_queue q, struct request rq, 3528 struct bio *bio)	2464void blk_rq_bio_prep(struct request_queue q, struct request rq, 2465 struct bio *bio)
3529{ 3530 /* first two bits are identical in rq->cmd_flags and bio->bi_rw / 3531 rq->cmd_flags \|= (bio->bi_rw & 3); 3532 3533 rq->nr_phys_segments = bio_phys_segments(q, bio); 3534 rq->nr_hw_segments = bio_hw_segments(q, bio); 3535 rq->current_nr_sectors = bio_cur_sectors(bio); 3536 rq->hard_cur_sectors = rq->current_nr_sectors; --- 29 unchanged lines hidden* (view full) --- 3566 panic("Failed to create kblockd\n"); 3567 3568 request_cachep = kmem_cache_create("blkdev_requests", 3569 sizeof(struct request), 0, SLAB_PANIC, NULL); 3570 3571 blk_requestq_cachep = kmem_cache_create("blkdev_queue", 3572 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 3573	2466{ 2467 /* first two bits are identical in rq->cmd_flags and bio->bi_rw / 2468 rq->cmd_flags \|= (bio->bi_rw & 3); 2469 2470 rq->nr_phys_segments = bio_phys_segments(q, bio); 2471 rq->nr_hw_segments = bio_hw_segments(q, bio); 2472 rq->current_nr_sectors = bio_cur_sectors(bio); 2473 rq->hard_cur_sectors = rq->current_nr_sectors; --- 29 unchanged lines hidden* (view full) --- 2503 panic("Failed to create kblockd\n"); 2504 2505 request_cachep = kmem_cache_create("blkdev_requests", 2506 sizeof(struct request), 0, SLAB_PANIC, NULL); 2507 2508 blk_requestq_cachep = kmem_cache_create("blkdev_queue", 2509 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 2510
3574 iocontext_cachep = kmem_cache_create("blkdev_ioc", 3575 sizeof(struct io_context), 0, SLAB_PANIC, NULL); 3576
3577 for_each_possible_cpu(i) 3578 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); 3579 3580 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); 3581 register_hotcpu_notifier(&blk_cpu_notifier); 3582	2511 for_each_possible_cpu(i) 2512 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); 2513 2514 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); 2515 register_hotcpu_notifier(&blk_cpu_notifier); 2516
3583 blk_max_low_pfn = max_low_pfn - 1; 3584 blk_max_pfn = max_pfn - 1; 3585
3586 return 0; 3587} 3588	2517 return 0; 2518} 2519
3589static void cfq_dtor(struct io_context ioc) 3590{ 3591 struct cfq_io_context cic[1]; 3592 int r; 3593 3594 /* 3595 * We don't have a specific key to lookup with, so use the gang 3596 * lookup to just retrieve the first item stored. The cfq exit 3597 * function will iterate the full tree, so any member will do. 3598 / 3599 r = radix_tree_gang_lookup(&ioc->radix_root, (void ) cic, 0, 1); 3600 if (r > 0) 3601 cic[0]->dtor(ioc); 3602} 3603 3604/ 3605 * IO Context helper functions. put_io_context() returns 1 if there are no 3606 * more users of this io context, 0 otherwise. 3607 / 3608int put_io_context(struct io_context ioc) 3609{ 3610 if (ioc == NULL) 3611 return 1; 3612 3613 BUG_ON(atomic_read(&ioc->refcount) == 0); 3614 3615 if (atomic_dec_and_test(&ioc->refcount)) { 3616 rcu_read_lock(); 3617 if (ioc->aic && ioc->aic->dtor) 3618 ioc->aic->dtor(ioc->aic); 3619 rcu_read_unlock(); 3620 cfq_dtor(ioc); 3621 3622 kmem_cache_free(iocontext_cachep, ioc); 3623 return 1; 3624 } 3625 return 0; 3626} 3627EXPORT_SYMBOL(put_io_context); 3628 3629static void cfq_exit(struct io_context ioc) 3630{ 3631 struct cfq_io_context cic[1]; 3632 int r; 3633 3634 rcu_read_lock(); 3635 /* 3636 * See comment for cfq_dtor() 3637 / 3638 r = radix_tree_gang_lookup(&ioc->radix_root, (void ) cic, 0, 1); 3639 rcu_read_unlock(); 3640 3641 if (r > 0) 3642 cic[0]->exit(ioc); 3643} 3644 3645/ Called by the exitting task / 3646void exit_io_context(void) 3647{ 3648 struct io_context ioc; 3649 3650 task_lock(current); 3651 ioc = current->io_context; 3652 current->io_context = NULL; 3653 task_unlock(current); 3654 3655 if (atomic_dec_and_test(&ioc->nr_tasks)) { 3656 if (ioc->aic && ioc->aic->exit) 3657 ioc->aic->exit(ioc->aic); 3658 cfq_exit(ioc); 3659 3660 put_io_context(ioc); 3661 } 3662} 3663 3664struct io_context alloc_io_context(gfp_t gfp_flags, int node) 3665{ 3666 struct io_context ret; 3667 3668 ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); 3669 if (ret) { 3670 atomic_set(&ret->refcount, 1); 3671 atomic_set(&ret->nr_tasks, 1); 3672 spin_lock_init(&ret->lock); 3673 ret->ioprio_changed = 0; 3674 ret->ioprio = 0; 3675 ret->last_waited = jiffies; /* doesn't matter... / 3676 ret->nr_batch_requests = 0; / because this is 0 / 3677 ret->aic = NULL; 3678 INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC \| __GFP_HIGH); 3679 ret->ioc_data = NULL; 3680 } 3681 3682 return ret; 3683} 3684 3685/ 3686 * If the current task has no IO context then create one and initialise it. 3687 * Otherwise, return its existing IO context. 3688 * 3689 * This returned IO context doesn't have a specifically elevated refcount, 3690 * but since the current task itself holds a reference, the context can be 3691 * used in general code, so long as it stays within `current` context. 3692 / 3693static struct io_context current_io_context(gfp_t gfp_flags, int node) 3694{ 3695 struct task_struct tsk = current; 3696 struct io_context ret; 3697 3698 ret = tsk->io_context; 3699 if (likely(ret)) 3700 return ret; 3701 3702 ret = alloc_io_context(gfp_flags, node); 3703 if (ret) { 3704 /* make sure set_task_ioprio() sees the settings above / 3705 smp_wmb(); 3706 tsk->io_context = ret; 3707 } 3708 3709 return ret; 3710} 3711 3712/ 3713 * If the current task has no IO context then create one and initialise it. 3714 * If it does have a context, take a ref on it. 3715 * 3716 * This is always called in the context of the task which submitted the I/O. 3717 / 3718struct io_context get_io_context(gfp_t gfp_flags, int node) 3719{ 3720 struct io_context ret = NULL; 3721 3722 / 3723 * Check for unlikely race with exiting task. ioc ref count is 3724 * zero when ioc is being detached. 3725 / 3726 do { 3727 ret = current_io_context(gfp_flags, node); 3728 if (unlikely(!ret)) 3729 break; 3730 } while (!atomic_inc_not_zero(&ret->refcount)); 3731 3732 return ret; 3733} 3734EXPORT_SYMBOL(get_io_context); 3735 3736void copy_io_context(struct io_context pdst, struct io_context psrc) 3737{ 3738 struct io_context src = psrc; 3739 struct io_context dst = pdst; 3740 3741 if (src) { 3742 BUG_ON(atomic_read(&src->refcount) == 0); 3743 atomic_inc(&src->refcount); 3744 put_io_context(dst); 3745 pdst = src; 3746 } 3747} 3748EXPORT_SYMBOL(copy_io_context); 3749 3750void swap_io_context(struct io_context ioc1, struct io_context ioc2) 3751{ 3752 struct io_context temp; 3753 temp = ioc1; 3754 ioc1 = ioc2; 3755 *ioc2 = temp; 3756} 3757EXPORT_SYMBOL(swap_io_context); 3758

blk-core.c (8324aa91d1e11a1fc25f209687a0b2e6c2ed47d0)	blk-core.c (86db1e29772372155db08ff48a9ceb76e11a2ad1)
1/* 2 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> 6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 7 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 8 / --- 6 unchanged lines hidden* (view full) --- 15#include <linux/backing-dev.h> 16#include <linux/bio.h> 17#include <linux/blkdev.h> 18#include <linux/highmem.h> 19#include <linux/mm.h> 20#include <linux/kernel_stat.h> 21#include <linux/string.h> 22#include <linux/init.h>	1/* 2 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> 6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 7 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 8 / --- 6 unchanged lines hidden* (view full) --- 15#include <linux/backing-dev.h> 16#include <linux/bio.h> 17#include <linux/blkdev.h> 18#include <linux/highmem.h> 19#include <linux/mm.h> 20#include <linux/kernel_stat.h> 21#include <linux/string.h> 22#include <linux/init.h>
23#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
24#include <linux/completion.h> 25#include <linux/slab.h> 26#include <linux/swap.h> 27#include <linux/writeback.h> 28#include <linux/task_io_accounting_ops.h> 29#include <linux/interrupt.h> 30#include <linux/cpu.h> 31#include <linux/blktrace_api.h> 32#include <linux/fault-inject.h> 33#include <linux/scatterlist.h> 34 35#include "blk.h" 36	23#include <linux/completion.h> 24#include <linux/slab.h> 25#include <linux/swap.h> 26#include <linux/writeback.h> 27#include <linux/task_io_accounting_ops.h> 28#include <linux/interrupt.h> 29#include <linux/cpu.h> 30#include <linux/blktrace_api.h> 31#include <linux/fault-inject.h> 32#include <linux/scatterlist.h> 33 34#include "blk.h" 35
37/* 38 * for max sense size 39 / 40#include <scsi/scsi_cmnd.h> 41 42static void blk_unplug_work(struct work_struct work); 43static void blk_unplug_timeout(unsigned long data);
44static void drive_stat_acct(struct request *rq, int new_io);	36static void drive_stat_acct(struct request *rq, int new_io);
45static void init_request_from_bio(struct request req, struct bio bio);
46static int __make_request(struct request_queue q, struct bio bio);	37static int __make_request(struct request_queue q, struct bio bio);
47static struct io_context *current_io_context(gfp_t gfp_flags, int node);
48static void blk_recalc_rq_segments(struct request *rq);	38static void blk_recalc_rq_segments(struct request *rq);
49static void blk_rq_bio_prep(struct request_queue q, struct request rq, 50 struct bio *bio);
51 52/* 53 * For the allocated request tables 54 / 55struct kmem_cache request_cachep; 56 57/* 58 * For queue allocation 59 / 60struct kmem_cache blk_requestq_cachep = NULL; 61 62/*	39 40/* 41 * For the allocated request tables 42 / 43struct kmem_cache request_cachep; 44 45/* 46 * For queue allocation 47 / 48struct kmem_cache blk_requestq_cachep = NULL; 49 50/*
63 * For io context allocations 64 / 65static struct kmem_cache iocontext_cachep; 66 67/*
68 * Controlling structure to kblockd 69 / 70static struct workqueue_struct kblockd_workqueue; 71	51 * Controlling structure to kblockd 52 / 53static struct workqueue_struct kblockd_workqueue; 54
72unsigned long blk_max_low_pfn, blk_max_pfn; 73 74EXPORT_SYMBOL(blk_max_low_pfn); 75EXPORT_SYMBOL(blk_max_pfn); 76
77static DEFINE_PER_CPU(struct list_head, blk_cpu_done); 78	55static DEFINE_PER_CPU(struct list_head, blk_cpu_done); 56
79/* Amount of time in which a process may batch requests / 80#define BLK_BATCH_TIME (HZ/50UL) 81 82/ Number of requests a "batching" process may submit */ 83#define BLK_BATCH_REQ 32 84
85void blk_queue_congestion_threshold(struct request_queue q) 86{ 87 int nr; 88 89 nr = q->nr_requests - (q->nr_requests / 8) + 1; 90 if (nr > q->nr_requests) 91 nr = q->nr_requests; 92 q->nr_congestion_on = nr; --- 19 unchanged lines hidden* (view full) --- 112 struct request_queue *q = bdev_get_queue(bdev); 113 114 if (q) 115 ret = &q->backing_dev_info; 116 return ret; 117} 118EXPORT_SYMBOL(blk_get_backing_dev_info); 119	57void blk_queue_congestion_threshold(struct request_queue q) 58{ 59 int nr; 60 61 nr = q->nr_requests - (q->nr_requests / 8) + 1; 62 if (nr > q->nr_requests) 63 nr = q->nr_requests; 64 q->nr_congestion_on = nr; --- 19 unchanged lines hidden* (view full) --- 84 struct request_queue *q = bdev_get_queue(bdev); 85 86 if (q) 87 ret = &q->backing_dev_info; 88 return ret; 89} 90EXPORT_SYMBOL(blk_get_backing_dev_info); 91
120/** 121 * blk_queue_prep_rq - set a prepare_request function for queue 122 * @q: queue 123 * @pfn: prepare_request function 124 * 125 * It's possible for a queue to register a prepare_request callback which 126 * is invoked before the request is handed to the request_fn. The goal of 127 * the function is to prepare a request for I/O, it can be used to build a 128 * cdb from the request data for instance. 129 * 130 / 131void blk_queue_prep_rq(struct request_queue q, prep_rq_fn *pfn)	92void rq_init(struct request_queue q, struct request rq)
132{	93{
133 q->prep_rq_fn = pfn; 134} 135 136EXPORT_SYMBOL(blk_queue_prep_rq); 137 138/** 139 * blk_queue_merge_bvec - set a merge_bvec function for queue 140 * @q: queue 141 * @mbfn: merge_bvec_fn 142 * 143 * Usually queues have static limitations on the max sectors or segments that 144 * we can put in a request. Stacking drivers may have some settings that 145 * are dynamic, and thus we have to query the queue whether it is ok to 146 * add a new bio_vec to a bio at a given offset or not. If the block device 147 * has such limitations, it needs to register a merge_bvec_fn to control 148 * the size of bio's sent to it. Note that a block device must allow a 149 * single page to be added to an empty bio. The block device driver may want 150 * to use the bio_split() function to deal with these bio's. By default 151 * no merge_bvec_fn is defined for a queue, and only the fixed limits are 152 * honored. 153 / 154void blk_queue_merge_bvec(struct request_queue q, merge_bvec_fn mbfn) 155{ 156 q->merge_bvec_fn = mbfn; 157} 158 159EXPORT_SYMBOL(blk_queue_merge_bvec); 160 161void blk_queue_softirq_done(struct request_queue q, softirq_done_fn fn) 162{ 163 q->softirq_done_fn = fn; 164} 165 166EXPORT_SYMBOL(blk_queue_softirq_done); 167 168/* 169 * blk_queue_make_request - define an alternate make_request function for a device 170 * @q: the request queue for the device to be affected 171 * @mfn: the alternate make_request function 172 * 173 * Description: 174 * The normal way for &struct bios to be passed to a device 175 * driver is for them to be collected into requests on a request 176 * queue, and then to allow the device driver to select requests 177 * off that queue when it is ready. This works well for many block 178 * devices. However some block devices (typically virtual devices 179 * such as md or lvm) do not benefit from the processing on the 180 * request queue, and are served best by having the requests passed 181 * directly to them. This can be achieved by providing a function 182 * to blk_queue_make_request(). 183 * 184 * Caveat: 185 * The driver that does this must be able to deal appropriately 186 * with buffers in "highmemory". This can be accomplished by either calling 187 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling 188 * blk_queue_bounce() to create a buffer in normal memory. 189 */ 190void blk_queue_make_request(struct request_queue q, make_request_fn * mfn) 191{ 192 /* 193 * set defaults 194 / 195 q->nr_requests = BLKDEV_MAX_RQ; 196 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); 197 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); 198 q->make_request_fn = mfn; 199 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD 1024) / PAGE_CACHE_SIZE; 200 q->backing_dev_info.state = 0; 201 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 202 blk_queue_max_sectors(q, SAFE_MAX_SECTORS); 203 blk_queue_hardsect_size(q, 512); 204 blk_queue_dma_alignment(q, 511); 205 blk_queue_congestion_threshold(q); 206 q->nr_batching = BLK_BATCH_REQ; 207 208 q->unplug_thresh = 4; /* hmm / 209 q->unplug_delay = (3 HZ) / 1000; /* 3 milliseconds / 210 if (q->unplug_delay == 0) 211 q->unplug_delay = 1; 212 213 INIT_WORK(&q->unplug_work, blk_unplug_work); 214 215 q->unplug_timer.function = blk_unplug_timeout; 216 q->unplug_timer.data = (unsigned long)q; 217 218 / 219 * by default assume old behaviour and bounce for any highmem page 220 / 221 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); 222} 223 224EXPORT_SYMBOL(blk_queue_make_request); 225 226static void rq_init(struct request_queue q, struct request *rq) 227{
228 INIT_LIST_HEAD(&rq->queuelist); 229 INIT_LIST_HEAD(&rq->donelist); 230 231 rq->errors = 0; 232 rq->bio = rq->biotail = NULL; 233 INIT_HLIST_NODE(&rq->hash); 234 RB_CLEAR_NODE(&rq->rb_node); 235 rq->ioprio = 0; --- 6 unchanged lines hidden (view full) --- 242 rq->nr_phys_segments = 0; 243 rq->sense = NULL; 244 rq->end_io = NULL; 245 rq->end_io_data = NULL; 246 rq->completion_data = NULL; 247 rq->next_rq = NULL; 248} 249	94 INIT_LIST_HEAD(&rq->queuelist); 95 INIT_LIST_HEAD(&rq->donelist); 96 97 rq->errors = 0; 98 rq->bio = rq->biotail = NULL; 99 INIT_HLIST_NODE(&rq->hash); 100 RB_CLEAR_NODE(&rq->rb_node); 101 rq->ioprio = 0; --- 6 unchanged lines hidden (view full) --- 108 rq->nr_phys_segments = 0; 109 rq->sense = NULL; 110 rq->end_io = NULL; 111 rq->end_io_data = NULL; 112 rq->completion_data = NULL; 113 rq->next_rq = NULL; 114} 115
250/** 251 * blk_queue_ordered - does this queue support ordered writes 252 * @q: the request queue 253 * @ordered: one of QUEUE_ORDERED_* 254 * @prepare_flush_fn: rq setup helper for cache flush ordered writes 255 * 256 * Description: 257 * For journalled file systems, doing ordered writes on a commit 258 * block instead of explicitly doing wait_on_buffer (which is bad 259 * for performance) can be a big win. Block drivers supporting this 260 * feature should call this function and indicate so. 261 * 262 */ 263int blk_queue_ordered(struct request_queue q, unsigned ordered, 264 prepare_flush_fn prepare_flush_fn) 265{ 266 if (ordered & (QUEUE_ORDERED_PREFLUSH \| QUEUE_ORDERED_POSTFLUSH) && 267 prepare_flush_fn == NULL) { 268 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); 269 return -EINVAL; 270 } 271 272 if (ordered != QUEUE_ORDERED_NONE && 273 ordered != QUEUE_ORDERED_DRAIN && 274 ordered != QUEUE_ORDERED_DRAIN_FLUSH && 275 ordered != QUEUE_ORDERED_DRAIN_FUA && 276 ordered != QUEUE_ORDERED_TAG && 277 ordered != QUEUE_ORDERED_TAG_FLUSH && 278 ordered != QUEUE_ORDERED_TAG_FUA) { 279 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); 280 return -EINVAL; 281 } 282 283 q->ordered = ordered; 284 q->next_ordered = ordered; 285 q->prepare_flush_fn = prepare_flush_fn; 286 287 return 0; 288} 289 290EXPORT_SYMBOL(blk_queue_ordered); 291 292/ 293 * Cache flushing for ordered writes handling 294 / 295inline unsigned blk_ordered_cur_seq(struct request_queue q) 296{ 297 if (!q->ordseq) 298 return 0; 299 return 1 << ffz(q->ordseq); 300} 301 302unsigned blk_ordered_req_seq(struct request rq) 303{ 304 struct request_queue q = rq->q; 305 306 BUG_ON(q->ordseq == 0); 307 308 if (rq == &q->pre_flush_rq) 309 return QUEUE_ORDSEQ_PREFLUSH; 310 if (rq == &q->bar_rq) 311 return QUEUE_ORDSEQ_BAR; 312 if (rq == &q->post_flush_rq) 313 return QUEUE_ORDSEQ_POSTFLUSH; 314 315 /* 316 * !fs requests don't need to follow barrier ordering. Always 317 * put them at the front. This fixes the following deadlock. 318 * 319 * http://thread.gmane.org/gmane.linux.kernel/537473 320 / 321 if (!blk_fs_request(rq)) 322 return QUEUE_ORDSEQ_DRAIN; 323 324 if ((rq->cmd_flags & REQ_ORDERED_COLOR) == 325 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) 326 return QUEUE_ORDSEQ_DRAIN; 327 else 328 return QUEUE_ORDSEQ_DONE; 329} 330 331void blk_ordered_complete_seq(struct request_queue q, unsigned seq, int error) 332{ 333 struct request rq; 334 335 if (error && !q->orderr) 336 q->orderr = error; 337 338 BUG_ON(q->ordseq & seq); 339 q->ordseq \|= seq; 340 341 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) 342 return; 343 344 / 345 * Okay, sequence complete. 346 / 347 q->ordseq = 0; 348 rq = q->orig_bar_rq; 349 350 if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) 351 BUG(); 352} 353 354static void pre_flush_end_io(struct request rq, int error) 355{ 356 elv_completed_request(rq->q, rq); 357 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); 358} 359 360static void bar_end_io(struct request rq, int error) 361{ 362 elv_completed_request(rq->q, rq); 363 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); 364} 365 366static void post_flush_end_io(struct request rq, int error) 367{ 368 elv_completed_request(rq->q, rq); 369 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); 370} 371 372static void queue_flush(struct request_queue q, unsigned which) 373{ 374 struct request rq; 375 rq_end_io_fn end_io; 376 377 if (which == QUEUE_ORDERED_PREFLUSH) { 378 rq = &q->pre_flush_rq; 379 end_io = pre_flush_end_io; 380 } else { 381 rq = &q->post_flush_rq; 382 end_io = post_flush_end_io; 383 } 384 385 rq->cmd_flags = REQ_HARDBARRIER; 386 rq_init(q, rq); 387 rq->elevator_private = NULL; 388 rq->elevator_private2 = NULL; 389 rq->rq_disk = q->bar_rq.rq_disk; 390 rq->end_io = end_io; 391 q->prepare_flush_fn(q, rq); 392 393 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 394} 395 396static inline struct request start_ordered(struct request_queue q, 397 struct request rq) 398{ 399 q->orderr = 0; 400 q->ordered = q->next_ordered; 401 q->ordseq \|= QUEUE_ORDSEQ_STARTED; 402 403 /* 404 * Prep proxy barrier request. 405 / 406 blkdev_dequeue_request(rq); 407 q->orig_bar_rq = rq; 408 rq = &q->bar_rq; 409 rq->cmd_flags = 0; 410 rq_init(q, rq); 411 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) 412 rq->cmd_flags \|= REQ_RW; 413 if (q->ordered & QUEUE_ORDERED_FUA) 414 rq->cmd_flags \|= REQ_FUA; 415 rq->elevator_private = NULL; 416 rq->elevator_private2 = NULL; 417 init_request_from_bio(rq, q->orig_bar_rq->bio); 418 rq->end_io = bar_end_io; 419 420 / 421 * Queue ordered sequence. As we stack them at the head, we 422 * need to queue in reverse order. Note that we rely on that 423 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs 424 * request gets inbetween ordered sequence. If this request is 425 * an empty barrier, we don't need to do a postflush ever since 426 * there will be no data written between the pre and post flush. 427 * Hence a single flush will suffice. 428 / 429 if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) 430 queue_flush(q, QUEUE_ORDERED_POSTFLUSH); 431 else 432 q->ordseq \|= QUEUE_ORDSEQ_POSTFLUSH; 433 434 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 435 436 if (q->ordered & QUEUE_ORDERED_PREFLUSH) { 437 queue_flush(q, QUEUE_ORDERED_PREFLUSH); 438 rq = &q->pre_flush_rq; 439 } else 440 q->ordseq \|= QUEUE_ORDSEQ_PREFLUSH; 441 442 if ((q->ordered & QUEUE_ORDERED_TAG) \|\| q->in_flight == 0) 443 q->ordseq \|= QUEUE_ORDSEQ_DRAIN; 444 else 445 rq = NULL; 446 447 return rq; 448} 449 450int blk_do_ordered(struct request_queue q, struct request *rqp) 451{ 452 struct request rq = rqp; 453 const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); 454 455 if (!q->ordseq) { 456 if (!is_barrier) 457 return 1; 458 459 if (q->next_ordered != QUEUE_ORDERED_NONE) { 460 rqp = start_ordered(q, rq); 461 return 1; 462 } else { 463 /* 464 * This can happen when the queue switches to 465 * ORDERED_NONE while this request is on it. 466 / 467 blkdev_dequeue_request(rq); 468 if (__blk_end_request(rq, -EOPNOTSUPP, 469 blk_rq_bytes(rq))) 470 BUG(); 471 rqp = NULL; 472 return 0; 473 } 474 } 475 476 /* 477 * Ordered sequence in progress 478 / 479 480 / Special requests are not subject to ordering rules. / 481 if (!blk_fs_request(rq) && 482 rq != &q->pre_flush_rq && rq != &q->post_flush_rq) 483 return 1; 484 485 if (q->ordered & QUEUE_ORDERED_TAG) { 486 / Ordered by tag. Blocking the next barrier is enough. / 487 if (is_barrier && rq != &q->bar_rq) 488 rqp = NULL; 489 } else { 490 /* Ordered by draining. Wait for turn. / 491 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); 492 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) 493 rqp = NULL; 494 } 495 496 return 1; 497} 498
499static void req_bio_endio(struct request rq, struct bio bio, 500 unsigned int nbytes, int error) 501{ 502 struct request_queue q = rq->q; 503 504 if (&q->bar_rq != rq) { 505 if (error) 506 clear_bit(BIO_UPTODATE, &bio->bi_flags); --- 16 unchanged lines hidden* (view full) --- 523 * Okay, this is the barrier request in progress, just 524 * record the error; 525 */ 526 if (error && !q->orderr) 527 q->orderr = error; 528 } 529} 530	116static void req_bio_endio(struct request rq, struct bio bio, 117 unsigned int nbytes, int error) 118{ 119 struct request_queue q = rq->q; 120 121 if (&q->bar_rq != rq) { 122 if (error) 123 clear_bit(BIO_UPTODATE, &bio->bi_flags); --- 16 unchanged lines hidden* (view full) --- 140 * Okay, this is the barrier request in progress, just 141 * record the error; 142 */ 143 if (error && !q->orderr) 144 q->orderr = error; 145 } 146} 147
531/** 532 * blk_queue_bounce_limit - set bounce buffer limit for queue 533 * @q: the request queue for the device 534 * @dma_addr: bus address limit 535 * 536 * Description: 537 * Different hardware can have different requirements as to what pages 538 * it can do I/O directly to. A low level driver can call 539 * blk_queue_bounce_limit to have lower memory pages allocated as bounce 540 * buffers for doing I/O to pages residing above @page. 541 */ 542void blk_queue_bounce_limit(struct request_queue q, u64 dma_addr) 543{ 544 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; 545 int dma = 0; 546 547 q->bounce_gfp = GFP_NOIO; 548#if BITS_PER_LONG == 64 549 /* Assume anything <= 4GB can be handled by IOMMU. 550 Actually some IOMMUs can handle everything, but I don't 551 know of a way to test this here. / 552 if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) 553 dma = 1; 554 q->bounce_pfn = max_low_pfn; 555#else 556 if (bounce_pfn < blk_max_low_pfn) 557 dma = 1; 558 q->bounce_pfn = bounce_pfn; 559#endif 560 if (dma) { 561 init_emergency_isa_pool(); 562 q->bounce_gfp = GFP_NOIO \| GFP_DMA; 563 q->bounce_pfn = bounce_pfn; 564 } 565} 566 567EXPORT_SYMBOL(blk_queue_bounce_limit); 568 569/* 570 * blk_queue_max_sectors - set max sectors for a request for this queue 571 * @q: the request queue for the device 572 * @max_sectors: max sectors in the usual 512b unit 573 * 574 * Description: 575 * Enables a low level driver to set an upper limit on the size of 576 * received requests. 577 */ 578void blk_queue_max_sectors(struct request_queue q, unsigned int max_sectors) 579{ 580 if ((max_sectors << 9) < PAGE_CACHE_SIZE) { 581 max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); 582 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); 583 } 584 585 if (BLK_DEF_MAX_SECTORS > max_sectors) 586 q->max_hw_sectors = q->max_sectors = max_sectors; 587 else { 588 q->max_sectors = BLK_DEF_MAX_SECTORS; 589 q->max_hw_sectors = max_sectors; 590 } 591} 592 593EXPORT_SYMBOL(blk_queue_max_sectors); 594 595/** 596 * blk_queue_max_phys_segments - set max phys segments for a request for this queue 597 * @q: the request queue for the device 598 * @max_segments: max number of segments 599 * 600 * Description: 601 * Enables a low level driver to set an upper limit on the number of 602 * physical data segments in a request. This would be the largest sized 603 * scatter list the driver could handle. 604 */ 605void blk_queue_max_phys_segments(struct request_queue q, 606 unsigned short max_segments) 607{ 608 if (!max_segments) { 609 max_segments = 1; 610 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); 611 } 612 613 q->max_phys_segments = max_segments; 614} 615 616EXPORT_SYMBOL(blk_queue_max_phys_segments); 617 618/** 619 * blk_queue_max_hw_segments - set max hw segments for a request for this queue 620 * @q: the request queue for the device 621 * @max_segments: max number of segments 622 * 623 * Description: 624 * Enables a low level driver to set an upper limit on the number of 625 * hw data segments in a request. This would be the largest number of 626 * address/length pairs the host adapter can actually give as once 627 * to the device. 628 */ 629void blk_queue_max_hw_segments(struct request_queue q, 630 unsigned short max_segments) 631{ 632 if (!max_segments) { 633 max_segments = 1; 634 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); 635 } 636 637 q->max_hw_segments = max_segments; 638} 639 640EXPORT_SYMBOL(blk_queue_max_hw_segments); 641 642/** 643 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg 644 * @q: the request queue for the device 645 * @max_size: max size of segment in bytes 646 * 647 * Description: 648 * Enables a low level driver to set an upper limit on the size of a 649 * coalesced segment 650 */ 651void blk_queue_max_segment_size(struct request_queue q, unsigned int max_size) 652{ 653 if (max_size < PAGE_CACHE_SIZE) { 654 max_size = PAGE_CACHE_SIZE; 655 printk("%s: set to minimum %d\n", __FUNCTION__, max_size); 656 } 657 658 q->max_segment_size = max_size; 659} 660 661EXPORT_SYMBOL(blk_queue_max_segment_size); 662 663/** 664 * blk_queue_hardsect_size - set hardware sector size for the queue 665 * @q: the request queue for the device 666 * @size: the hardware sector size, in bytes 667 * 668 * Description: 669 * This should typically be set to the lowest possible sector size 670 * that the hardware can operate on (possible without reverting to 671 * even internal read-modify-write operations). Usually the default 672 * of 512 covers most hardware. 673 */ 674void blk_queue_hardsect_size(struct request_queue q, unsigned short size) 675{ 676 q->hardsect_size = size; 677} 678 679EXPORT_SYMBOL(blk_queue_hardsect_size); 680 681/* 682 * Returns the minimum that is _not_ zero, unless both are zero. 683 / 684#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) 685 686/* 687 * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers 688 * @t: the stacking driver (top) 689 * @b: the underlying device (bottom) 690 */ 691void blk_queue_stack_limits(struct request_queue t, struct request_queue b) 692{ 693 / zero is "infinity" / 694 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); 695 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); 696 697 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); 698 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); 699 t->max_segment_size = min(t->max_segment_size,b->max_segment_size); 700 t->hardsect_size = max(t->hardsect_size,b->hardsect_size); 701 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) 702 clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); 703} 704 705EXPORT_SYMBOL(blk_queue_stack_limits); 706 707/* 708 * blk_queue_dma_drain - Set up a drain buffer for excess dma. 709 * 710 * @q: the request queue for the device 711 * @buf: physically contiguous buffer 712 * @size: size of the buffer in bytes 713 * 714 * Some devices have excess DMA problems and can't simply discard (or 715 * zero fill) the unwanted piece of the transfer. They have to have a 716 * real area of memory to transfer it into. The use case for this is 717 * ATAPI devices in DMA mode. If the packet command causes a transfer 718 * bigger than the transfer size some HBAs will lock up if there 719 * aren't DMA elements to contain the excess transfer. What this API 720 * does is adjust the queue so that the buf is always appended 721 * silently to the scatterlist. 722 * 723 * Note: This routine adjusts max_hw_segments to make room for 724 * appending the drain buffer. If you call 725 * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after 726 * calling this routine, you must set the limit to one fewer than your 727 * device can support otherwise there won't be room for the drain 728 * buffer. 729 / 730int blk_queue_dma_drain(struct request_queue q, void buf, 731 unsigned int size) 732{ 733 if (q->max_hw_segments < 2 \|\| q->max_phys_segments < 2) 734 return -EINVAL; 735 / make room for appending the drain / 736 --q->max_hw_segments; 737 --q->max_phys_segments; 738 q->dma_drain_buffer = buf; 739 q->dma_drain_size = size; 740 741 return 0; 742} 743 744EXPORT_SYMBOL_GPL(blk_queue_dma_drain); 745 746/* 747 * blk_queue_segment_boundary - set boundary rules for segment merging 748 * @q: the request queue for the device 749 * @mask: the memory boundary mask 750 */ 751void blk_queue_segment_boundary(struct request_queue q, unsigned long mask) 752{ 753 if (mask < PAGE_CACHE_SIZE - 1) { 754 mask = PAGE_CACHE_SIZE - 1; 755 printk("%s: set to minimum %lx\n", __FUNCTION__, mask); 756 } 757 758 q->seg_boundary_mask = mask; 759} 760 761EXPORT_SYMBOL(blk_queue_segment_boundary); 762 763/** 764 * blk_queue_dma_alignment - set dma length and memory alignment 765 * @q: the request queue for the device 766 * @mask: alignment mask 767 * 768 * description: 769 * set required memory and length aligment for direct dma transactions. 770 * this is used when buiding direct io requests for the queue. 771 * 772 */ 773void blk_queue_dma_alignment(struct request_queue q, int mask) 774{ 775 q->dma_alignment = mask; 776} 777 778EXPORT_SYMBOL(blk_queue_dma_alignment); 779 780/** 781 * blk_queue_update_dma_alignment - update dma length and memory alignment 782 * @q: the request queue for the device 783 * @mask: alignment mask 784 * 785 * description: 786 * update required memory and length aligment for direct dma transactions. 787 * If the requested alignment is larger than the current alignment, then 788 * the current queue alignment is updated to the new value, otherwise it 789 * is left alone. The design of this is to allow multiple objects 790 * (driver, device, transport etc) to set their respective 791 * alignments without having them interfere. 792 * 793 */ 794void blk_queue_update_dma_alignment(struct request_queue q, int mask) 795{ 796 BUG_ON(mask > PAGE_SIZE); 797 798 if (mask > q->dma_alignment) 799 q->dma_alignment = mask; 800} 801 802EXPORT_SYMBOL(blk_queue_update_dma_alignment); 803
804void blk_dump_rq_flags(struct request rq, char msg) 805{ 806 int bit; 807 808 printk("%s: dev %s: type=%x, flags=%x\n", msg, 809 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, 810 rq->cmd_flags); 811 --- 257 unchanged lines hidden (view full) --- 1069 * This will form the start of a new hw segment. Bump both 1070 * counters. 1071 */ 1072 req->nr_hw_segments += nr_hw_segs; 1073 req->nr_phys_segments += nr_phys_segs; 1074 return 1; 1075} 1076	148void blk_dump_rq_flags(struct request rq, char msg) 149{ 150 int bit; 151 152 printk("%s: dev %s: type=%x, flags=%x\n", msg, 153 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, 154 rq->cmd_flags); 155 --- 257 unchanged lines hidden (view full) --- 413 * This will form the start of a new hw segment. Bump both 414 * counters. 415 */ 416 req->nr_hw_segments += nr_hw_segs; 417 req->nr_phys_segments += nr_phys_segs; 418 return 1; 419} 420
1077static int ll_back_merge_fn(struct request_queue q, struct request req, 1078 struct bio *bio)	421int ll_back_merge_fn(struct request_queue q, struct request req, 422 struct bio *bio)
1079{ 1080 unsigned short max_sectors; 1081 int len; 1082 1083 if (unlikely(blk_pc_request(req))) 1084 max_sectors = q->max_hw_sectors; 1085 else 1086 max_sectors = q->max_sectors; --- 193 unchanged lines hidden (view full) --- 1280static void blk_backing_dev_unplug(struct backing_dev_info bdi, 1281 struct page page) 1282{ 1283 struct request_queue *q = bdi->unplug_io_data; 1284 1285 blk_unplug(q); 1286} 1287	423{ 424 unsigned short max_sectors; 425 int len; 426 427 if (unlikely(blk_pc_request(req))) 428 max_sectors = q->max_hw_sectors; 429 else 430 max_sectors = q->max_sectors; --- 193 unchanged lines hidden (view full) --- 624static void blk_backing_dev_unplug(struct backing_dev_info bdi, 625 struct page page) 626{ 627 struct request_queue *q = bdi->unplug_io_data; 628 629 blk_unplug(q); 630} 631
1288static void blk_unplug_work(struct work_struct *work)	632void blk_unplug_work(struct work_struct *work)
1289{ 1290 struct request_queue *q = 1291 container_of(work, struct request_queue, unplug_work); 1292 1293 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 1294 q->rq.count[READ] + q->rq.count[WRITE]); 1295 1296 q->unplug_fn(q); 1297} 1298	633{ 634 struct request_queue *q = 635 container_of(work, struct request_queue, unplug_work); 636 637 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 638 q->rq.count[READ] + q->rq.count[WRITE]); 639 640 q->unplug_fn(q); 641} 642
1299static void blk_unplug_timeout(unsigned long data)	643void blk_unplug_timeout(unsigned long data)
1300{ 1301 struct request_queue q = (struct request_queue )data; 1302 1303 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 1304 q->rq.count[READ] + q->rq.count[WRITE]); 1305 1306 kblockd_schedule_work(&q->unplug_work); 1307} --- 648 unchanged lines hidden (view full) --- 1956 drive_stat_acct(rq, 1); 1957 __elv_add_request(q, rq, where, 0); 1958 blk_start_queueing(q); 1959 spin_unlock_irqrestore(q->queue_lock, flags); 1960} 1961 1962EXPORT_SYMBOL(blk_insert_request); 1963	644{ 645 struct request_queue q = (struct request_queue )data; 646 647 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 648 q->rq.count[READ] + q->rq.count[WRITE]); 649 650 kblockd_schedule_work(&q->unplug_work); 651} --- 648 unchanged lines hidden (view full) --- 1300 drive_stat_acct(rq, 1); 1301 __elv_add_request(q, rq, where, 0); 1302 blk_start_queueing(q); 1303 spin_unlock_irqrestore(q->queue_lock, flags); 1304} 1305 1306EXPORT_SYMBOL(blk_insert_request); 1307
1964static int __blk_rq_unmap_user(struct bio bio) 1965{ 1966 int ret = 0; 1967 1968 if (bio) { 1969 if (bio_flagged(bio, BIO_USER_MAPPED)) 1970 bio_unmap_user(bio); 1971 else 1972 ret = bio_uncopy_user(bio); 1973 } 1974 1975 return ret; 1976} 1977 1978int blk_rq_append_bio(struct request_queue q, struct request rq, 1979 struct bio bio) 1980{ 1981 if (!rq->bio) 1982 blk_rq_bio_prep(q, rq, bio); 1983 else if (!ll_back_merge_fn(q, rq, bio)) 1984 return -EINVAL; 1985 else { 1986 rq->biotail->bi_next = bio; 1987 rq->biotail = bio; 1988 1989 rq->data_len += bio->bi_size; 1990 } 1991 return 0; 1992} 1993EXPORT_SYMBOL(blk_rq_append_bio); 1994 1995static int __blk_rq_map_user(struct request_queue q, struct request rq, 1996 void __user ubuf, unsigned int len) 1997{ 1998 unsigned long uaddr; 1999 struct bio bio, orig_bio; 2000 int reading, ret; 2001 2002 reading = rq_data_dir(rq) == READ; 2003 2004 / 2005 * if alignment requirement is satisfied, map in user pages for 2006 * direct dma. else, set up kernel bounce buffers 2007 / 2008 uaddr = (unsigned long) ubuf; 2009 if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) 2010 bio = bio_map_user(q, NULL, uaddr, len, reading); 2011 else 2012 bio = bio_copy_user(q, uaddr, len, reading); 2013 2014 if (IS_ERR(bio)) 2015 return PTR_ERR(bio); 2016 2017 orig_bio = bio; 2018 blk_queue_bounce(q, &bio); 2019 2020 / 2021 * We link the bounce buffer in and could have to traverse it 2022 * later so we have to get a ref to prevent it from being freed 2023 / 2024 bio_get(bio); 2025 2026 ret = blk_rq_append_bio(q, rq, bio); 2027 if (!ret) 2028 return bio->bi_size; 2029 2030 / if it was boucned we must call the end io function / 2031 bio_endio(bio, 0); 2032 __blk_rq_unmap_user(orig_bio); 2033 bio_put(bio); 2034 return ret; 2035} 2036 2037/* 2038 * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage 2039 * @q: request queue where request should be inserted 2040 * @rq: request structure to fill 2041 * @ubuf: the user buffer 2042 * @len: length of user data 2043 * 2044 * Description: 2045 * Data will be mapped directly for zero copy io, if possible. Otherwise 2046 * a kernel bounce buffer is used. 2047 * 2048 * A matching blk_rq_unmap_user() must be issued at the end of io, while 2049 * still in process context. 2050 * 2051 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 2052 * before being submitted to the device, as pages mapped may be out of 2053 * reach. It's the callers responsibility to make sure this happens. The 2054 * original bio must be passed back in to blk_rq_unmap_user() for proper 2055 * unmapping. 2056 / 2057int blk_rq_map_user(struct request_queue q, struct request rq, 2058 void __user ubuf, unsigned long len) 2059{ 2060 unsigned long bytes_read = 0; 2061 struct bio bio = NULL; 2062 int ret; 2063 2064 if (len > (q->max_hw_sectors << 9)) 2065 return -EINVAL; 2066 if (!len \|\| !ubuf) 2067 return -EINVAL; 2068 2069 while (bytes_read != len) { 2070 unsigned long map_len, end, start; 2071 2072 map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE); 2073 end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1) 2074 >> PAGE_SHIFT; 2075 start = (unsigned long)ubuf >> PAGE_SHIFT; 2076 2077 / 2078 * A bad offset could cause us to require BIO_MAX_PAGES + 1 2079 * pages. If this happens we just lower the requested 2080 * mapping len by a page so that we can fit 2081 / 2082 if (end - start > BIO_MAX_PAGES) 2083 map_len -= PAGE_SIZE; 2084 2085 ret = __blk_rq_map_user(q, rq, ubuf, map_len); 2086 if (ret < 0) 2087 goto unmap_rq; 2088 if (!bio) 2089 bio = rq->bio; 2090 bytes_read += ret; 2091 ubuf += ret; 2092 } 2093 2094 rq->buffer = rq->data = NULL; 2095 return 0; 2096unmap_rq: 2097 blk_rq_unmap_user(bio); 2098 return ret; 2099} 2100 2101EXPORT_SYMBOL(blk_rq_map_user); 2102 2103/* 2104 * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage 2105 * @q: request queue where request should be inserted 2106 * @rq: request to map data to 2107 * @iov: pointer to the iovec 2108 * @iov_count: number of elements in the iovec 2109 * @len: I/O byte count 2110 * 2111 * Description: 2112 * Data will be mapped directly for zero copy io, if possible. Otherwise 2113 * a kernel bounce buffer is used. 2114 * 2115 * A matching blk_rq_unmap_user() must be issued at the end of io, while 2116 * still in process context. 2117 * 2118 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 2119 * before being submitted to the device, as pages mapped may be out of 2120 * reach. It's the callers responsibility to make sure this happens. The 2121 * original bio must be passed back in to blk_rq_unmap_user() for proper 2122 * unmapping. 2123 / 2124int blk_rq_map_user_iov(struct request_queue q, struct request rq, 2125 struct sg_iovec iov, int iov_count, unsigned int len) 2126{ 2127 struct bio bio; 2128 2129 if (!iov \|\| iov_count <= 0) 2130 return -EINVAL; 2131 2132 / we don't allow misaligned data like bio_map_user() does. If the 2133 * user is using sg, they're expected to know the alignment constraints 2134 * and respect them accordingly / 2135 bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); 2136 if (IS_ERR(bio)) 2137 return PTR_ERR(bio); 2138 2139 if (bio->bi_size != len) { 2140 bio_endio(bio, 0); 2141 bio_unmap_user(bio); 2142 return -EINVAL; 2143 } 2144 2145 bio_get(bio); 2146 blk_rq_bio_prep(q, rq, bio); 2147 rq->buffer = rq->data = NULL; 2148 return 0; 2149} 2150 2151EXPORT_SYMBOL(blk_rq_map_user_iov); 2152 2153/* 2154 * blk_rq_unmap_user - unmap a request with user data 2155 * @bio: start of bio list 2156 * 2157 * Description: 2158 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 2159 * supply the original rq->bio from the blk_rq_map_user() return, since 2160 * the io completion may have changed rq->bio. 2161 / 2162int blk_rq_unmap_user(struct bio bio) 2163{ 2164 struct bio mapped_bio; 2165 int ret = 0, ret2; 2166 2167 while (bio) { 2168 mapped_bio = bio; 2169 if (unlikely(bio_flagged(bio, BIO_BOUNCED))) 2170 mapped_bio = bio->bi_private; 2171 2172 ret2 = __blk_rq_unmap_user(mapped_bio); 2173 if (ret2 && !ret) 2174 ret = ret2; 2175 2176 mapped_bio = bio; 2177 bio = bio->bi_next; 2178 bio_put(mapped_bio); 2179 } 2180 2181 return ret; 2182} 2183 2184EXPORT_SYMBOL(blk_rq_unmap_user); 2185 2186/* 2187 * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage 2188 * @q: request queue where request should be inserted 2189 * @rq: request to fill 2190 * @kbuf: the kernel buffer 2191 * @len: length of user data 2192 * @gfp_mask: memory allocation flags 2193 / 2194int blk_rq_map_kern(struct request_queue q, struct request rq, void kbuf, 2195 unsigned int len, gfp_t gfp_mask) 2196{ 2197 struct bio bio; 2198 2199 if (len > (q->max_hw_sectors << 9)) 2200 return -EINVAL; 2201 if (!len \|\| !kbuf) 2202 return -EINVAL; 2203 2204 bio = bio_map_kern(q, kbuf, len, gfp_mask); 2205 if (IS_ERR(bio)) 2206 return PTR_ERR(bio); 2207 2208 if (rq_data_dir(rq) == WRITE) 2209 bio->bi_rw \|= (1 << BIO_RW); 2210 2211 blk_rq_bio_prep(q, rq, bio); 2212 blk_queue_bounce(q, &rq->bio); 2213 rq->buffer = rq->data = NULL; 2214 return 0; 2215} 2216 2217EXPORT_SYMBOL(blk_rq_map_kern); 2218 2219/* 2220 * blk_execute_rq_nowait - insert a request into queue for execution 2221 * @q: queue to insert the request in 2222 * @bd_disk: matching gendisk 2223 * @rq: request to insert 2224 * @at_head: insert request at head or tail of queue 2225 * @done: I/O completion handler 2226 * 2227 * Description: 2228 * Insert a fully prepared request at the back of the io scheduler queue 2229 * for execution. Don't wait for completion. 2230 / 2231void blk_execute_rq_nowait(struct request_queue q, struct gendisk bd_disk, 2232 struct request rq, int at_head, 2233 rq_end_io_fn done) 2234{ 2235 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2236 2237 rq->rq_disk = bd_disk; 2238 rq->cmd_flags \|= REQ_NOMERGE; 2239 rq->end_io = done; 2240 WARN_ON(irqs_disabled()); 2241 spin_lock_irq(q->queue_lock); 2242 __elv_add_request(q, rq, where, 1); 2243 __generic_unplug_device(q); 2244 spin_unlock_irq(q->queue_lock); 2245} 2246EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); 2247 2248/* 2249 * blk_execute_rq - insert a request into queue for execution 2250 * @q: queue to insert the request in 2251 * @bd_disk: matching gendisk 2252 * @rq: request to insert 2253 * @at_head: insert request at head or tail of queue 2254 * 2255 * Description: 2256 * Insert a fully prepared request at the back of the io scheduler queue 2257 * for execution and wait for completion. 2258 / 2259int blk_execute_rq(struct request_queue q, struct gendisk bd_disk, 2260 struct request rq, int at_head) 2261{ 2262 DECLARE_COMPLETION_ONSTACK(wait); 2263 char sense[SCSI_SENSE_BUFFERSIZE]; 2264 int err = 0; 2265 2266 /* 2267 * we need an extra reference to the request, so we can look at 2268 * it after io completion 2269 / 2270 rq->ref_count++; 2271 2272 if (!rq->sense) { 2273 memset(sense, 0, sizeof(sense)); 2274 rq->sense = sense; 2275 rq->sense_len = 0; 2276 } 2277 2278 rq->end_io_data = &wait; 2279 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); 2280 wait_for_completion(&wait); 2281 2282 if (rq->errors) 2283 err = -EIO; 2284 2285 return err; 2286} 2287 2288EXPORT_SYMBOL(blk_execute_rq); 2289 2290static void bio_end_empty_barrier(struct bio bio, int err) 2291{ 2292 if (err) 2293 clear_bit(BIO_UPTODATE, &bio->bi_flags); 2294 2295 complete(bio->bi_private); 2296} 2297 2298/** 2299 * blkdev_issue_flush - queue a flush 2300 * @bdev: blockdev to issue flush for 2301 * @error_sector: error sector 2302 * 2303 * Description: 2304 * Issue a flush for the block device in question. Caller can supply 2305 * room for storing the error offset in case of a flush error, if they 2306 * wish to. Caller must run wait_for_completion() on its own. 2307 / 2308int blkdev_issue_flush(struct block_device bdev, sector_t error_sector) 2309{ 2310 DECLARE_COMPLETION_ONSTACK(wait); 2311 struct request_queue q; 2312 struct bio bio; 2313 int ret; 2314 2315 if (bdev->bd_disk == NULL) 2316 return -ENXIO; 2317 2318 q = bdev_get_queue(bdev); 2319 if (!q) 2320 return -ENXIO; 2321 2322 bio = bio_alloc(GFP_KERNEL, 0); 2323 if (!bio) 2324 return -ENOMEM; 2325 2326 bio->bi_end_io = bio_end_empty_barrier; 2327 bio->bi_private = &wait; 2328 bio->bi_bdev = bdev; 2329 submit_bio(1 << BIO_RW_BARRIER, bio); 2330 2331 wait_for_completion(&wait); 2332 2333 / 2334 * The driver must store the error location in ->bi_sector, if 2335 * it supports it. For non-stacked drivers, this should be copied 2336 * from rq->sector. 2337 / 2338 if (error_sector) 2339 error_sector = bio->bi_sector; 2340 2341 ret = 0; 2342 if (!bio_flagged(bio, BIO_UPTODATE)) 2343 ret = -EIO; 2344 2345 bio_put(bio); 2346 return ret; 2347} 2348 2349EXPORT_SYMBOL(blkdev_issue_flush); 2350
2351static void drive_stat_acct(struct request rq, int new_io) 2352{ 2353 int rw = rq_data_dir(rq); 2354 2355 if (!blk_fs_request(rq) \|\| !rq->rq_disk) 2356 return; 2357 2358 if (!new_io) { --- 95 unchanged lines hidden* (view full) --- 2454 spin_lock_irqsave(q->queue_lock, flags); 2455 __blk_put_request(q, req); 2456 spin_unlock_irqrestore(q->queue_lock, flags); 2457 } 2458} 2459 2460EXPORT_SYMBOL(blk_put_request); 2461	1308static void drive_stat_acct(struct request rq, int new_io) 1309{ 1310 int rw = rq_data_dir(rq); 1311 1312 if (!blk_fs_request(rq) \|\| !rq->rq_disk) 1313 return; 1314 1315 if (!new_io) { --- 95 unchanged lines hidden* (view full) --- 1411 spin_lock_irqsave(q->queue_lock, flags); 1412 __blk_put_request(q, req); 1413 spin_unlock_irqrestore(q->queue_lock, flags); 1414 } 1415} 1416 1417EXPORT_SYMBOL(blk_put_request); 1418
2462/** 2463 * blk_end_sync_rq - executes a completion event on a request 2464 * @rq: request to complete 2465 * @error: end io status of the request 2466 / 2467void blk_end_sync_rq(struct request rq, int error) 2468{ 2469 struct completion waiting = rq->end_io_data; 2470 2471 rq->end_io_data = NULL; 2472 __blk_put_request(rq->q, rq); 2473 2474 / 2475 * complete last, if this is a stack request the process (and thus 2476 * the rq pointer) could be invalid right after this complete() 2477 */ 2478 complete(waiting); 2479} 2480EXPORT_SYMBOL(blk_end_sync_rq); 2481
2482/* 2483 * Has to be called with the request spinlock acquired 2484 / 2485static int attempt_merge(struct request_queue q, struct request req, 2486 struct request next) 2487{ 2488 if (!rq_mergeable(req) \|\| !rq_mergeable(next)) 2489 return 0; --- 62 unchanged lines hidden (view full) --- 2552 struct request *prev = elv_former_request(q, rq); 2553 2554 if (prev) 2555 return attempt_merge(q, prev, rq); 2556 2557 return 0; 2558} 2559	1419/* 1420 * Has to be called with the request spinlock acquired 1421 / 1422static int attempt_merge(struct request_queue q, struct request req, 1423 struct request next) 1424{ 1425 if (!rq_mergeable(req) \|\| !rq_mergeable(next)) 1426 return 0; --- 62 unchanged lines hidden (view full) --- 1489 struct request *prev = elv_former_request(q, rq); 1490 1491 if (prev) 1492 return attempt_merge(q, prev, rq); 1493 1494 return 0; 1495} 1496
2560static void init_request_from_bio(struct request req, struct bio bio)	1497void init_request_from_bio(struct request req, struct bio bio)
2561{ 2562 req->cmd_type = REQ_TYPE_FS; 2563 2564 /* 2565 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 2566 / 2567 if (bio_rw_ahead(bio) \|\| bio_failfast(bio)) 2568 req->cmd_flags \|= REQ_FAILFAST; --- 950 unchanged lines hidden* (view full) --- 3519 */ 3520int blk_end_request_callback(struct request rq, int error, int nr_bytes, 3521 int (drv_callback)(struct request *)) 3522{ 3523 return blk_end_io(rq, error, nr_bytes, 0, drv_callback); 3524} 3525EXPORT_SYMBOL_GPL(blk_end_request_callback); 3526	1498{ 1499 req->cmd_type = REQ_TYPE_FS; 1500 1501 /* 1502 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 1503 / 1504 if (bio_rw_ahead(bio) \|\| bio_failfast(bio)) 1505 req->cmd_flags \|= REQ_FAILFAST; --- 950 unchanged lines hidden* (view full) --- 2456 */ 2457int blk_end_request_callback(struct request rq, int error, int nr_bytes, 2458 int (drv_callback)(struct request *)) 2459{ 2460 return blk_end_io(rq, error, nr_bytes, 0, drv_callback); 2461} 2462EXPORT_SYMBOL_GPL(blk_end_request_callback); 2463
3527static void blk_rq_bio_prep(struct request_queue q, struct request rq, 3528 struct bio *bio)	2464void blk_rq_bio_prep(struct request_queue q, struct request rq, 2465 struct bio *bio)
3529{ 3530 /* first two bits are identical in rq->cmd_flags and bio->bi_rw / 3531 rq->cmd_flags \|= (bio->bi_rw & 3); 3532 3533 rq->nr_phys_segments = bio_phys_segments(q, bio); 3534 rq->nr_hw_segments = bio_hw_segments(q, bio); 3535 rq->current_nr_sectors = bio_cur_sectors(bio); 3536 rq->hard_cur_sectors = rq->current_nr_sectors; --- 29 unchanged lines hidden* (view full) --- 3566 panic("Failed to create kblockd\n"); 3567 3568 request_cachep = kmem_cache_create("blkdev_requests", 3569 sizeof(struct request), 0, SLAB_PANIC, NULL); 3570 3571 blk_requestq_cachep = kmem_cache_create("blkdev_queue", 3572 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 3573	2466{ 2467 /* first two bits are identical in rq->cmd_flags and bio->bi_rw / 2468 rq->cmd_flags \|= (bio->bi_rw & 3); 2469 2470 rq->nr_phys_segments = bio_phys_segments(q, bio); 2471 rq->nr_hw_segments = bio_hw_segments(q, bio); 2472 rq->current_nr_sectors = bio_cur_sectors(bio); 2473 rq->hard_cur_sectors = rq->current_nr_sectors; --- 29 unchanged lines hidden* (view full) --- 2503 panic("Failed to create kblockd\n"); 2504 2505 request_cachep = kmem_cache_create("blkdev_requests", 2506 sizeof(struct request), 0, SLAB_PANIC, NULL); 2507 2508 blk_requestq_cachep = kmem_cache_create("blkdev_queue", 2509 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 2510
3574 iocontext_cachep = kmem_cache_create("blkdev_ioc", 3575 sizeof(struct io_context), 0, SLAB_PANIC, NULL); 3576
3577 for_each_possible_cpu(i) 3578 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); 3579 3580 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); 3581 register_hotcpu_notifier(&blk_cpu_notifier); 3582	2511 for_each_possible_cpu(i) 2512 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); 2513 2514 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); 2515 register_hotcpu_notifier(&blk_cpu_notifier); 2516
3583 blk_max_low_pfn = max_low_pfn - 1; 3584 blk_max_pfn = max_pfn - 1; 3585
3586 return 0; 3587} 3588	2517 return 0; 2518} 2519
3589static void cfq_dtor(struct io_context ioc) 3590{ 3591 struct cfq_io_context cic[1]; 3592 int r; 3593 3594 /* 3595 * We don't have a specific key to lookup with, so use the gang 3596 * lookup to just retrieve the first item stored. The cfq exit 3597 * function will iterate the full tree, so any member will do. 3598 / 3599 r = radix_tree_gang_lookup(&ioc->radix_root, (void ) cic, 0, 1); 3600 if (r > 0) 3601 cic[0]->dtor(ioc); 3602} 3603 3604/ 3605 * IO Context helper functions. put_io_context() returns 1 if there are no 3606 * more users of this io context, 0 otherwise. 3607 / 3608int put_io_context(struct io_context ioc) 3609{ 3610 if (ioc == NULL) 3611 return 1; 3612 3613 BUG_ON(atomic_read(&ioc->refcount) == 0); 3614 3615 if (atomic_dec_and_test(&ioc->refcount)) { 3616 rcu_read_lock(); 3617 if (ioc->aic && ioc->aic->dtor) 3618 ioc->aic->dtor(ioc->aic); 3619 rcu_read_unlock(); 3620 cfq_dtor(ioc); 3621 3622 kmem_cache_free(iocontext_cachep, ioc); 3623 return 1; 3624 } 3625 return 0; 3626} 3627EXPORT_SYMBOL(put_io_context); 3628 3629static void cfq_exit(struct io_context ioc) 3630{ 3631 struct cfq_io_context cic[1]; 3632 int r; 3633 3634 rcu_read_lock(); 3635 /* 3636 * See comment for cfq_dtor() 3637 / 3638 r = radix_tree_gang_lookup(&ioc->radix_root, (void ) cic, 0, 1); 3639 rcu_read_unlock(); 3640 3641 if (r > 0) 3642 cic[0]->exit(ioc); 3643} 3644 3645/ Called by the exitting task / 3646void exit_io_context(void) 3647{ 3648 struct io_context ioc; 3649 3650 task_lock(current); 3651 ioc = current->io_context; 3652 current->io_context = NULL; 3653 task_unlock(current); 3654 3655 if (atomic_dec_and_test(&ioc->nr_tasks)) { 3656 if (ioc->aic && ioc->aic->exit) 3657 ioc->aic->exit(ioc->aic); 3658 cfq_exit(ioc); 3659 3660 put_io_context(ioc); 3661 } 3662} 3663 3664struct io_context alloc_io_context(gfp_t gfp_flags, int node) 3665{ 3666 struct io_context ret; 3667 3668 ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); 3669 if (ret) { 3670 atomic_set(&ret->refcount, 1); 3671 atomic_set(&ret->nr_tasks, 1); 3672 spin_lock_init(&ret->lock); 3673 ret->ioprio_changed = 0; 3674 ret->ioprio = 0; 3675 ret->last_waited = jiffies; /* doesn't matter... / 3676 ret->nr_batch_requests = 0; / because this is 0 / 3677 ret->aic = NULL; 3678 INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC \| __GFP_HIGH); 3679 ret->ioc_data = NULL; 3680 } 3681 3682 return ret; 3683} 3684 3685/ 3686 * If the current task has no IO context then create one and initialise it. 3687 * Otherwise, return its existing IO context. 3688 * 3689 * This returned IO context doesn't have a specifically elevated refcount, 3690 * but since the current task itself holds a reference, the context can be 3691 * used in general code, so long as it stays within `current` context. 3692 / 3693static struct io_context current_io_context(gfp_t gfp_flags, int node) 3694{ 3695 struct task_struct tsk = current; 3696 struct io_context ret; 3697 3698 ret = tsk->io_context; 3699 if (likely(ret)) 3700 return ret; 3701 3702 ret = alloc_io_context(gfp_flags, node); 3703 if (ret) { 3704 /* make sure set_task_ioprio() sees the settings above / 3705 smp_wmb(); 3706 tsk->io_context = ret; 3707 } 3708 3709 return ret; 3710} 3711 3712/ 3713 * If the current task has no IO context then create one and initialise it. 3714 * If it does have a context, take a ref on it. 3715 * 3716 * This is always called in the context of the task which submitted the I/O. 3717 / 3718struct io_context get_io_context(gfp_t gfp_flags, int node) 3719{ 3720 struct io_context ret = NULL; 3721 3722 / 3723 * Check for unlikely race with exiting task. ioc ref count is 3724 * zero when ioc is being detached. 3725 / 3726 do { 3727 ret = current_io_context(gfp_flags, node); 3728 if (unlikely(!ret)) 3729 break; 3730 } while (!atomic_inc_not_zero(&ret->refcount)); 3731 3732 return ret; 3733} 3734EXPORT_SYMBOL(get_io_context); 3735 3736void copy_io_context(struct io_context pdst, struct io_context psrc) 3737{ 3738 struct io_context src = psrc; 3739 struct io_context dst = pdst; 3740 3741 if (src) { 3742 BUG_ON(atomic_read(&src->refcount) == 0); 3743 atomic_inc(&src->refcount); 3744 put_io_context(dst); 3745 pdst = src; 3746 } 3747} 3748EXPORT_SYMBOL(copy_io_context); 3749 3750void swap_io_context(struct io_context ioc1, struct io_context ioc2) 3751{ 3752 struct io_context temp; 3753 temp = ioc1; 3754 ioc1 = ioc2; 3755 *ioc2 = temp; 3756} 3757EXPORT_SYMBOL(swap_io_context); 3758