11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * mm/readahead.c - address_space-level file readahead. 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2002, Linus Torvalds 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * 09Apr2002 akpm@zip.com.au 71da177e4SLinus Torvalds * Initial version. 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds 101da177e4SLinus Torvalds #include <linux/kernel.h> 111da177e4SLinus Torvalds #include <linux/fs.h> 121da177e4SLinus Torvalds #include <linux/mm.h> 131da177e4SLinus Torvalds #include <linux/module.h> 141da177e4SLinus Torvalds #include <linux/blkdev.h> 151da177e4SLinus Torvalds #include <linux/backing-dev.h> 168bde37f0SAndrew Morton #include <linux/task_io_accounting_ops.h> 171da177e4SLinus Torvalds #include <linux/pagevec.h> 18f5ff8422SJens Axboe #include <linux/pagemap.h> 191da177e4SLinus Torvalds 201da177e4SLinus Torvalds void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) 211da177e4SLinus Torvalds { 221da177e4SLinus Torvalds } 231da177e4SLinus Torvalds EXPORT_SYMBOL(default_unplug_io_fn); 241da177e4SLinus Torvalds 251da177e4SLinus Torvalds struct backing_dev_info default_backing_dev_info = { 26535443f5SFengguang Wu .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, 271da177e4SLinus Torvalds .state = 0, 281da177e4SLinus Torvalds .capabilities = BDI_CAP_MAP_COPY, 291da177e4SLinus Torvalds .unplug_io_fn = default_unplug_io_fn, 301da177e4SLinus Torvalds }; 311da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(default_backing_dev_info); 321da177e4SLinus Torvalds 331da177e4SLinus Torvalds /* 341da177e4SLinus Torvalds * Initialise a struct file's readahead state. Assumes that the caller has 351da177e4SLinus Torvalds * memset *ra to zero. 361da177e4SLinus Torvalds */ 371da177e4SLinus Torvalds void 381da177e4SLinus Torvalds file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping) 391da177e4SLinus Torvalds { 401da177e4SLinus Torvalds ra->ra_pages = mapping->backing_dev_info->ra_pages; 41f4e6b498SFengguang Wu ra->prev_pos = -1; 421da177e4SLinus Torvalds } 43d41cc702SSteven Whitehouse EXPORT_SYMBOL_GPL(file_ra_state_init); 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) 461da177e4SLinus Torvalds 471da177e4SLinus Torvalds /** 48bd40cddaSRandy Dunlap * read_cache_pages - populate an address space with some pages & start reads against them 491da177e4SLinus Torvalds * @mapping: the address_space 501da177e4SLinus Torvalds * @pages: The address of a list_head which contains the target pages. These 511da177e4SLinus Torvalds * pages have their ->index populated and are otherwise uninitialised. 521da177e4SLinus Torvalds * @filler: callback routine for filling a single page. 531da177e4SLinus Torvalds * @data: private data for the callback routine. 541da177e4SLinus Torvalds * 551da177e4SLinus Torvalds * Hides the details of the LRU cache etc from the filesystems. 561da177e4SLinus Torvalds */ 571da177e4SLinus Torvalds int read_cache_pages(struct address_space *mapping, struct list_head *pages, 581da177e4SLinus Torvalds int (*filler)(void *, struct page *), void *data) 591da177e4SLinus Torvalds { 601da177e4SLinus Torvalds struct page *page; 611da177e4SLinus Torvalds int ret = 0; 621da177e4SLinus Torvalds 631da177e4SLinus Torvalds while (!list_empty(pages)) { 641da177e4SLinus Torvalds page = list_to_page(pages); 651da177e4SLinus Torvalds list_del(&page->lru); 66eb2be189SNick Piggin if (add_to_page_cache_lru(page, mapping, 67eb2be189SNick Piggin page->index, GFP_KERNEL)) { 681da177e4SLinus Torvalds page_cache_release(page); 691da177e4SLinus Torvalds continue; 701da177e4SLinus Torvalds } 71eb2be189SNick Piggin page_cache_release(page); 72eb2be189SNick Piggin 731da177e4SLinus Torvalds ret = filler(data, page); 74eb2be189SNick Piggin if (unlikely(ret)) { 7538da288bSOGAWA Hirofumi put_pages_list(pages); 761da177e4SLinus Torvalds break; 771da177e4SLinus Torvalds } 788bde37f0SAndrew Morton task_io_account_read(PAGE_CACHE_SIZE); 791da177e4SLinus Torvalds } 801da177e4SLinus Torvalds return ret; 811da177e4SLinus Torvalds } 821da177e4SLinus Torvalds 831da177e4SLinus Torvalds EXPORT_SYMBOL(read_cache_pages); 841da177e4SLinus Torvalds 851da177e4SLinus Torvalds static int read_pages(struct address_space *mapping, struct file *filp, 861da177e4SLinus Torvalds struct list_head *pages, unsigned nr_pages) 871da177e4SLinus Torvalds { 881da177e4SLinus Torvalds unsigned page_idx; 89994fc28cSZach Brown int ret; 901da177e4SLinus Torvalds 911da177e4SLinus Torvalds if (mapping->a_ops->readpages) { 921da177e4SLinus Torvalds ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); 93029e332eSOGAWA Hirofumi /* Clean up the remaining pages */ 94029e332eSOGAWA Hirofumi put_pages_list(pages); 951da177e4SLinus Torvalds goto out; 961da177e4SLinus Torvalds } 971da177e4SLinus Torvalds 981da177e4SLinus Torvalds for (page_idx = 0; page_idx < nr_pages; page_idx++) { 991da177e4SLinus Torvalds struct page *page = list_to_page(pages); 1001da177e4SLinus Torvalds list_del(&page->lru); 101eb2be189SNick Piggin if (!add_to_page_cache_lru(page, mapping, 1021da177e4SLinus Torvalds page->index, GFP_KERNEL)) { 1039f1a3cfcSZach Brown mapping->a_ops->readpage(filp, page); 104eb2be189SNick Piggin } 1051da177e4SLinus Torvalds page_cache_release(page); 1061da177e4SLinus Torvalds } 107994fc28cSZach Brown ret = 0; 1081da177e4SLinus Torvalds out: 1091da177e4SLinus Torvalds return ret; 1101da177e4SLinus Torvalds } 1111da177e4SLinus Torvalds 1121da177e4SLinus Torvalds /* 1131da177e4SLinus Torvalds * do_page_cache_readahead actually reads a chunk of disk. It allocates all 1141da177e4SLinus Torvalds * the pages first, then submits them all for I/O. This avoids the very bad 1151da177e4SLinus Torvalds * behaviour which would occur if page allocations are causing VM writeback. 1161da177e4SLinus Torvalds * We really don't want to intermingle reads and writes like that. 1171da177e4SLinus Torvalds * 1181da177e4SLinus Torvalds * Returns the number of pages requested, or the maximum amount of I/O allowed. 1191da177e4SLinus Torvalds * 1201da177e4SLinus Torvalds * do_page_cache_readahead() returns -1 if it encountered request queue 1211da177e4SLinus Torvalds * congestion. 1221da177e4SLinus Torvalds */ 1231da177e4SLinus Torvalds static int 1241da177e4SLinus Torvalds __do_page_cache_readahead(struct address_space *mapping, struct file *filp, 12546fc3e7bSFengguang Wu pgoff_t offset, unsigned long nr_to_read, 12646fc3e7bSFengguang Wu unsigned long lookahead_size) 1271da177e4SLinus Torvalds { 1281da177e4SLinus Torvalds struct inode *inode = mapping->host; 1291da177e4SLinus Torvalds struct page *page; 1301da177e4SLinus Torvalds unsigned long end_index; /* The last page we want to read */ 1311da177e4SLinus Torvalds LIST_HEAD(page_pool); 1321da177e4SLinus Torvalds int page_idx; 1331da177e4SLinus Torvalds int ret = 0; 1341da177e4SLinus Torvalds loff_t isize = i_size_read(inode); 1351da177e4SLinus Torvalds 1361da177e4SLinus Torvalds if (isize == 0) 1371da177e4SLinus Torvalds goto out; 1381da177e4SLinus Torvalds 1391da177e4SLinus Torvalds end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); 1401da177e4SLinus Torvalds 1411da177e4SLinus Torvalds /* 1421da177e4SLinus Torvalds * Preallocate as many pages as we will need. 1431da177e4SLinus Torvalds */ 1441da177e4SLinus Torvalds for (page_idx = 0; page_idx < nr_to_read; page_idx++) { 1457361f4d8SAndrew Morton pgoff_t page_offset = offset + page_idx; 1461da177e4SLinus Torvalds 1471da177e4SLinus Torvalds if (page_offset > end_index) 1481da177e4SLinus Torvalds break; 1491da177e4SLinus Torvalds 15000128188SNick Piggin rcu_read_lock(); 1511da177e4SLinus Torvalds page = radix_tree_lookup(&mapping->page_tree, page_offset); 15200128188SNick Piggin rcu_read_unlock(); 1531da177e4SLinus Torvalds if (page) 1541da177e4SLinus Torvalds continue; 1551da177e4SLinus Torvalds 1561da177e4SLinus Torvalds page = page_cache_alloc_cold(mapping); 1571da177e4SLinus Torvalds if (!page) 1581da177e4SLinus Torvalds break; 1591da177e4SLinus Torvalds page->index = page_offset; 1601da177e4SLinus Torvalds list_add(&page->lru, &page_pool); 16146fc3e7bSFengguang Wu if (page_idx == nr_to_read - lookahead_size) 16246fc3e7bSFengguang Wu SetPageReadahead(page); 1631da177e4SLinus Torvalds ret++; 1641da177e4SLinus Torvalds } 1651da177e4SLinus Torvalds 1661da177e4SLinus Torvalds /* 1671da177e4SLinus Torvalds * Now start the IO. We ignore I/O errors - if the page is not 1681da177e4SLinus Torvalds * uptodate then the caller will launch readpage again, and 1691da177e4SLinus Torvalds * will then handle the error. 1701da177e4SLinus Torvalds */ 1711da177e4SLinus Torvalds if (ret) 1721da177e4SLinus Torvalds read_pages(mapping, filp, &page_pool, ret); 1731da177e4SLinus Torvalds BUG_ON(!list_empty(&page_pool)); 1741da177e4SLinus Torvalds out: 1751da177e4SLinus Torvalds return ret; 1761da177e4SLinus Torvalds } 1771da177e4SLinus Torvalds 1781da177e4SLinus Torvalds /* 1791da177e4SLinus Torvalds * Chunk the readahead into 2 megabyte units, so that we don't pin too much 1801da177e4SLinus Torvalds * memory at once. 1811da177e4SLinus Torvalds */ 1821da177e4SLinus Torvalds int force_page_cache_readahead(struct address_space *mapping, struct file *filp, 1837361f4d8SAndrew Morton pgoff_t offset, unsigned long nr_to_read) 1841da177e4SLinus Torvalds { 1851da177e4SLinus Torvalds int ret = 0; 1861da177e4SLinus Torvalds 1871da177e4SLinus Torvalds if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages)) 1881da177e4SLinus Torvalds return -EINVAL; 1891da177e4SLinus Torvalds 1901da177e4SLinus Torvalds while (nr_to_read) { 1911da177e4SLinus Torvalds int err; 1921da177e4SLinus Torvalds 1931da177e4SLinus Torvalds unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE; 1941da177e4SLinus Torvalds 1951da177e4SLinus Torvalds if (this_chunk > nr_to_read) 1961da177e4SLinus Torvalds this_chunk = nr_to_read; 1971da177e4SLinus Torvalds err = __do_page_cache_readahead(mapping, filp, 19846fc3e7bSFengguang Wu offset, this_chunk, 0); 1991da177e4SLinus Torvalds if (err < 0) { 2001da177e4SLinus Torvalds ret = err; 2011da177e4SLinus Torvalds break; 2021da177e4SLinus Torvalds } 2031da177e4SLinus Torvalds ret += err; 2041da177e4SLinus Torvalds offset += this_chunk; 2051da177e4SLinus Torvalds nr_to_read -= this_chunk; 2061da177e4SLinus Torvalds } 2071da177e4SLinus Torvalds return ret; 2081da177e4SLinus Torvalds } 2091da177e4SLinus Torvalds 2101da177e4SLinus Torvalds /* 2111da177e4SLinus Torvalds * This version skips the IO if the queue is read-congested, and will tell the 2121da177e4SLinus Torvalds * block layer to abandon the readahead if request allocation would block. 2131da177e4SLinus Torvalds * 2141da177e4SLinus Torvalds * force_page_cache_readahead() will ignore queue congestion and will block on 2151da177e4SLinus Torvalds * request queues. 2161da177e4SLinus Torvalds */ 2171da177e4SLinus Torvalds int do_page_cache_readahead(struct address_space *mapping, struct file *filp, 2187361f4d8SAndrew Morton pgoff_t offset, unsigned long nr_to_read) 2191da177e4SLinus Torvalds { 2201da177e4SLinus Torvalds if (bdi_read_congested(mapping->backing_dev_info)) 2211da177e4SLinus Torvalds return -1; 2221da177e4SLinus Torvalds 22346fc3e7bSFengguang Wu return __do_page_cache_readahead(mapping, filp, offset, nr_to_read, 0); 2241da177e4SLinus Torvalds } 2251da177e4SLinus Torvalds 2261da177e4SLinus Torvalds /* 2271da177e4SLinus Torvalds * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a 2281da177e4SLinus Torvalds * sensible upper limit. 2291da177e4SLinus Torvalds */ 2301da177e4SLinus Torvalds unsigned long max_sane_readahead(unsigned long nr) 2311da177e4SLinus Torvalds { 23205a0416bSChristoph Lameter return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE) 23305a0416bSChristoph Lameter + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); 2341da177e4SLinus Torvalds } 2355ce1110bSFengguang Wu 236e0bf68ddSPeter Zijlstra static int __init readahead_init(void) 237e0bf68ddSPeter Zijlstra { 238cf0ca9feSPeter Zijlstra int err; 239cf0ca9feSPeter Zijlstra 240cf0ca9feSPeter Zijlstra err = bdi_init(&default_backing_dev_info); 241cf0ca9feSPeter Zijlstra if (!err) 242cf0ca9feSPeter Zijlstra bdi_register(&default_backing_dev_info, NULL, "default"); 243cf0ca9feSPeter Zijlstra 244cf0ca9feSPeter Zijlstra return err; 245e0bf68ddSPeter Zijlstra } 246e0bf68ddSPeter Zijlstra subsys_initcall(readahead_init); 247e0bf68ddSPeter Zijlstra 2485ce1110bSFengguang Wu /* 2495ce1110bSFengguang Wu * Submit IO for the read-ahead request in file_ra_state. 2505ce1110bSFengguang Wu */ 251f9acc8c7SFengguang Wu static unsigned long ra_submit(struct file_ra_state *ra, 2525ce1110bSFengguang Wu struct address_space *mapping, struct file *filp) 2535ce1110bSFengguang Wu { 2545ce1110bSFengguang Wu int actual; 2555ce1110bSFengguang Wu 2565ce1110bSFengguang Wu actual = __do_page_cache_readahead(mapping, filp, 257f9acc8c7SFengguang Wu ra->start, ra->size, ra->async_size); 2585ce1110bSFengguang Wu 2595ce1110bSFengguang Wu return actual; 2605ce1110bSFengguang Wu } 261122a21d1SFengguang Wu 262122a21d1SFengguang Wu /* 263c743d96bSFengguang Wu * Set the initial window size, round to next power of 2 and square 264c743d96bSFengguang Wu * for small size, x 4 for medium, and x 2 for large 265c743d96bSFengguang Wu * for 128k (32 page) max ra 266c743d96bSFengguang Wu * 1-8 page = 32k initial, > 8 page = 128k initial 267c743d96bSFengguang Wu */ 268c743d96bSFengguang Wu static unsigned long get_init_ra_size(unsigned long size, unsigned long max) 269c743d96bSFengguang Wu { 270c743d96bSFengguang Wu unsigned long newsize = roundup_pow_of_two(size); 271c743d96bSFengguang Wu 272c743d96bSFengguang Wu if (newsize <= max / 32) 273c743d96bSFengguang Wu newsize = newsize * 4; 274c743d96bSFengguang Wu else if (newsize <= max / 4) 275c743d96bSFengguang Wu newsize = newsize * 2; 276c743d96bSFengguang Wu else 277c743d96bSFengguang Wu newsize = max; 278c743d96bSFengguang Wu 279c743d96bSFengguang Wu return newsize; 280c743d96bSFengguang Wu } 281c743d96bSFengguang Wu 282c743d96bSFengguang Wu /* 283122a21d1SFengguang Wu * Get the previous window size, ramp it up, and 284122a21d1SFengguang Wu * return it as the new window size. 285122a21d1SFengguang Wu */ 286c743d96bSFengguang Wu static unsigned long get_next_ra_size(struct file_ra_state *ra, 287122a21d1SFengguang Wu unsigned long max) 288122a21d1SFengguang Wu { 289f9acc8c7SFengguang Wu unsigned long cur = ra->size; 290122a21d1SFengguang Wu unsigned long newsize; 291122a21d1SFengguang Wu 292122a21d1SFengguang Wu if (cur < max / 16) 293c743d96bSFengguang Wu newsize = 4 * cur; 294122a21d1SFengguang Wu else 295c743d96bSFengguang Wu newsize = 2 * cur; 296122a21d1SFengguang Wu 297122a21d1SFengguang Wu return min(newsize, max); 298122a21d1SFengguang Wu } 299122a21d1SFengguang Wu 300122a21d1SFengguang Wu /* 301122a21d1SFengguang Wu * On-demand readahead design. 302122a21d1SFengguang Wu * 303122a21d1SFengguang Wu * The fields in struct file_ra_state represent the most-recently-executed 304122a21d1SFengguang Wu * readahead attempt: 305122a21d1SFengguang Wu * 306f9acc8c7SFengguang Wu * |<----- async_size ---------| 307f9acc8c7SFengguang Wu * |------------------- size -------------------->| 308f9acc8c7SFengguang Wu * |==================#===========================| 309f9acc8c7SFengguang Wu * ^start ^page marked with PG_readahead 310122a21d1SFengguang Wu * 311122a21d1SFengguang Wu * To overlap application thinking time and disk I/O time, we do 312122a21d1SFengguang Wu * `readahead pipelining': Do not wait until the application consumed all 313122a21d1SFengguang Wu * readahead pages and stalled on the missing page at readahead_index; 314f9acc8c7SFengguang Wu * Instead, submit an asynchronous readahead I/O as soon as there are 315f9acc8c7SFengguang Wu * only async_size pages left in the readahead window. Normally async_size 316f9acc8c7SFengguang Wu * will be equal to size, for maximum pipelining. 317122a21d1SFengguang Wu * 318122a21d1SFengguang Wu * In interleaved sequential reads, concurrent streams on the same fd can 319122a21d1SFengguang Wu * be invalidating each other's readahead state. So we flag the new readahead 320f9acc8c7SFengguang Wu * page at (start+size-async_size) with PG_readahead, and use it as readahead 321122a21d1SFengguang Wu * indicator. The flag won't be set on already cached pages, to avoid the 322122a21d1SFengguang Wu * readahead-for-nothing fuss, saving pointless page cache lookups. 323122a21d1SFengguang Wu * 324f4e6b498SFengguang Wu * prev_pos tracks the last visited byte in the _previous_ read request. 325122a21d1SFengguang Wu * It should be maintained by the caller, and will be used for detecting 326122a21d1SFengguang Wu * small random reads. Note that the readahead algorithm checks loosely 327122a21d1SFengguang Wu * for sequential patterns. Hence interleaved reads might be served as 328122a21d1SFengguang Wu * sequential ones. 329122a21d1SFengguang Wu * 330122a21d1SFengguang Wu * There is a special-case: if the first page which the application tries to 331122a21d1SFengguang Wu * read happens to be the first page of the file, it is assumed that a linear 332122a21d1SFengguang Wu * read is about to happen and the window is immediately set to the initial size 333122a21d1SFengguang Wu * based on I/O request size and the max_readahead. 334122a21d1SFengguang Wu * 335122a21d1SFengguang Wu * The code ramps up the readahead size aggressively at first, but slow down as 336122a21d1SFengguang Wu * it approaches max_readhead. 337122a21d1SFengguang Wu */ 338122a21d1SFengguang Wu 339122a21d1SFengguang Wu /* 340122a21d1SFengguang Wu * A minimal readahead algorithm for trivial sequential/random reads. 341122a21d1SFengguang Wu */ 342122a21d1SFengguang Wu static unsigned long 343122a21d1SFengguang Wu ondemand_readahead(struct address_space *mapping, 344122a21d1SFengguang Wu struct file_ra_state *ra, struct file *filp, 345cf914a7dSRusty Russell bool hit_readahead_marker, pgoff_t offset, 346122a21d1SFengguang Wu unsigned long req_size) 347122a21d1SFengguang Wu { 348f4e6b498SFengguang Wu int max = ra->ra_pages; /* max readahead pages */ 349f4e6b498SFengguang Wu pgoff_t prev_offset; 350122a21d1SFengguang Wu int sequential; 351122a21d1SFengguang Wu 352122a21d1SFengguang Wu /* 353f9acc8c7SFengguang Wu * It's the expected callback offset, assume sequential access. 354122a21d1SFengguang Wu * Ramp up sizes, and push forward the readahead window. 355122a21d1SFengguang Wu */ 356f9acc8c7SFengguang Wu if (offset && (offset == (ra->start + ra->size - ra->async_size) || 357f9acc8c7SFengguang Wu offset == (ra->start + ra->size))) { 358f9acc8c7SFengguang Wu ra->start += ra->size; 359f9acc8c7SFengguang Wu ra->size = get_next_ra_size(ra, max); 360f9acc8c7SFengguang Wu ra->async_size = ra->size; 361f9acc8c7SFengguang Wu goto readit; 362122a21d1SFengguang Wu } 363122a21d1SFengguang Wu 364f4e6b498SFengguang Wu prev_offset = ra->prev_pos >> PAGE_CACHE_SHIFT; 365f4e6b498SFengguang Wu sequential = offset - prev_offset <= 1UL || req_size > max; 366f4e6b498SFengguang Wu 367122a21d1SFengguang Wu /* 368122a21d1SFengguang Wu * Standalone, small read. 369122a21d1SFengguang Wu * Read as is, and do not pollute the readahead state. 370122a21d1SFengguang Wu */ 371cf914a7dSRusty Russell if (!hit_readahead_marker && !sequential) { 372122a21d1SFengguang Wu return __do_page_cache_readahead(mapping, filp, 373122a21d1SFengguang Wu offset, req_size, 0); 374122a21d1SFengguang Wu } 375122a21d1SFengguang Wu 376122a21d1SFengguang Wu /* 3776b10c6c9SFengguang Wu * Hit a marked page without valid readahead state. 3786b10c6c9SFengguang Wu * E.g. interleaved reads. 3796b10c6c9SFengguang Wu * Query the pagecache for async_size, which normally equals to 3806b10c6c9SFengguang Wu * readahead size. Ramp it up and use it as the new readahead size. 3816b10c6c9SFengguang Wu */ 3826b10c6c9SFengguang Wu if (hit_readahead_marker) { 3836b10c6c9SFengguang Wu pgoff_t start; 3846b10c6c9SFengguang Wu 385*30002ed2SNick Piggin rcu_read_lock(); 3866b10c6c9SFengguang Wu start = radix_tree_next_hole(&mapping->page_tree, offset,max+1); 387*30002ed2SNick Piggin rcu_read_unlock(); 3886b10c6c9SFengguang Wu 3896b10c6c9SFengguang Wu if (!start || start - offset > max) 3906b10c6c9SFengguang Wu return 0; 3916b10c6c9SFengguang Wu 3926b10c6c9SFengguang Wu ra->start = start; 3936b10c6c9SFengguang Wu ra->size = start - offset; /* old async_size */ 3946b10c6c9SFengguang Wu ra->size = get_next_ra_size(ra, max); 3956b10c6c9SFengguang Wu ra->async_size = ra->size; 3966b10c6c9SFengguang Wu goto readit; 3976b10c6c9SFengguang Wu } 3986b10c6c9SFengguang Wu 3996b10c6c9SFengguang Wu /* 400122a21d1SFengguang Wu * It may be one of 401122a21d1SFengguang Wu * - first read on start of file 402122a21d1SFengguang Wu * - sequential cache miss 403122a21d1SFengguang Wu * - oversize random read 404122a21d1SFengguang Wu * Start readahead for it. 405122a21d1SFengguang Wu */ 406f9acc8c7SFengguang Wu ra->start = offset; 407f9acc8c7SFengguang Wu ra->size = get_init_ra_size(req_size, max); 408f9acc8c7SFengguang Wu ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size; 409122a21d1SFengguang Wu 410f9acc8c7SFengguang Wu readit: 411122a21d1SFengguang Wu return ra_submit(ra, mapping, filp); 412122a21d1SFengguang Wu } 413122a21d1SFengguang Wu 414122a21d1SFengguang Wu /** 415cf914a7dSRusty Russell * page_cache_sync_readahead - generic file readahead 416122a21d1SFengguang Wu * @mapping: address_space which holds the pagecache and I/O vectors 417122a21d1SFengguang Wu * @ra: file_ra_state which holds the readahead state 418122a21d1SFengguang Wu * @filp: passed on to ->readpage() and ->readpages() 419cf914a7dSRusty Russell * @offset: start offset into @mapping, in pagecache page-sized units 420122a21d1SFengguang Wu * @req_size: hint: total size of the read which the caller is performing in 421cf914a7dSRusty Russell * pagecache pages 422122a21d1SFengguang Wu * 423cf914a7dSRusty Russell * page_cache_sync_readahead() should be called when a cache miss happened: 424cf914a7dSRusty Russell * it will submit the read. The readahead logic may decide to piggyback more 425cf914a7dSRusty Russell * pages onto the read request if access patterns suggest it will improve 426cf914a7dSRusty Russell * performance. 427122a21d1SFengguang Wu */ 428cf914a7dSRusty Russell void page_cache_sync_readahead(struct address_space *mapping, 429cf914a7dSRusty Russell struct file_ra_state *ra, struct file *filp, 430cf914a7dSRusty Russell pgoff_t offset, unsigned long req_size) 431cf914a7dSRusty Russell { 432cf914a7dSRusty Russell /* no read-ahead */ 433cf914a7dSRusty Russell if (!ra->ra_pages) 434cf914a7dSRusty Russell return; 435cf914a7dSRusty Russell 436cf914a7dSRusty Russell /* do read-ahead */ 437cf914a7dSRusty Russell ondemand_readahead(mapping, ra, filp, false, offset, req_size); 438cf914a7dSRusty Russell } 439cf914a7dSRusty Russell EXPORT_SYMBOL_GPL(page_cache_sync_readahead); 440cf914a7dSRusty Russell 441cf914a7dSRusty Russell /** 442cf914a7dSRusty Russell * page_cache_async_readahead - file readahead for marked pages 443cf914a7dSRusty Russell * @mapping: address_space which holds the pagecache and I/O vectors 444cf914a7dSRusty Russell * @ra: file_ra_state which holds the readahead state 445cf914a7dSRusty Russell * @filp: passed on to ->readpage() and ->readpages() 446cf914a7dSRusty Russell * @page: the page at @offset which has the PG_readahead flag set 447cf914a7dSRusty Russell * @offset: start offset into @mapping, in pagecache page-sized units 448cf914a7dSRusty Russell * @req_size: hint: total size of the read which the caller is performing in 449cf914a7dSRusty Russell * pagecache pages 450cf914a7dSRusty Russell * 451cf914a7dSRusty Russell * page_cache_async_ondemand() should be called when a page is used which 452f7850d93SRandy Dunlap * has the PG_readahead flag; this is a marker to suggest that the application 453cf914a7dSRusty Russell * has used up enough of the readahead window that we should start pulling in 454f7850d93SRandy Dunlap * more pages. 455f7850d93SRandy Dunlap */ 456cf914a7dSRusty Russell void 457cf914a7dSRusty Russell page_cache_async_readahead(struct address_space *mapping, 458122a21d1SFengguang Wu struct file_ra_state *ra, struct file *filp, 459122a21d1SFengguang Wu struct page *page, pgoff_t offset, 460122a21d1SFengguang Wu unsigned long req_size) 461122a21d1SFengguang Wu { 462122a21d1SFengguang Wu /* no read-ahead */ 463122a21d1SFengguang Wu if (!ra->ra_pages) 464cf914a7dSRusty Russell return; 465122a21d1SFengguang Wu 466fe3cba17SFengguang Wu /* 467cf914a7dSRusty Russell * Same bit is used for PG_readahead and PG_reclaim. 468fe3cba17SFengguang Wu */ 469fe3cba17SFengguang Wu if (PageWriteback(page)) 470cf914a7dSRusty Russell return; 471fe3cba17SFengguang Wu 472122a21d1SFengguang Wu ClearPageReadahead(page); 473122a21d1SFengguang Wu 474122a21d1SFengguang Wu /* 475122a21d1SFengguang Wu * Defer asynchronous read-ahead on IO congestion. 476122a21d1SFengguang Wu */ 477122a21d1SFengguang Wu if (bdi_read_congested(mapping->backing_dev_info)) 478cf914a7dSRusty Russell return; 479122a21d1SFengguang Wu 480122a21d1SFengguang Wu /* do read-ahead */ 481cf914a7dSRusty Russell ondemand_readahead(mapping, ra, filp, true, offset, req_size); 482122a21d1SFengguang Wu } 483cf914a7dSRusty Russell EXPORT_SYMBOL_GPL(page_cache_async_readahead); 484