11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * mm/readahead.c - address_space-level file readahead. 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2002, Linus Torvalds 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * 09Apr2002 akpm@zip.com.au 71da177e4SLinus Torvalds * Initial version. 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds 101da177e4SLinus Torvalds #include <linux/kernel.h> 111da177e4SLinus Torvalds #include <linux/fs.h> 121da177e4SLinus Torvalds #include <linux/mm.h> 131da177e4SLinus Torvalds #include <linux/module.h> 141da177e4SLinus Torvalds #include <linux/blkdev.h> 151da177e4SLinus Torvalds #include <linux/backing-dev.h> 168bde37f0SAndrew Morton #include <linux/task_io_accounting_ops.h> 171da177e4SLinus Torvalds #include <linux/pagevec.h> 18f5ff8422SJens Axboe #include <linux/pagemap.h> 191da177e4SLinus Torvalds 201da177e4SLinus Torvalds void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) 211da177e4SLinus Torvalds { 221da177e4SLinus Torvalds } 231da177e4SLinus Torvalds EXPORT_SYMBOL(default_unplug_io_fn); 241da177e4SLinus Torvalds 25f615bfcaSFengguang Wu /* 26f615bfcaSFengguang Wu * Convienent macros for min/max read-ahead pages. 27f615bfcaSFengguang Wu * Note that MAX_RA_PAGES is rounded down, while MIN_RA_PAGES is rounded up. 28f615bfcaSFengguang Wu * The latter is necessary for systems with large page size(i.e. 64k). 29f615bfcaSFengguang Wu */ 30f615bfcaSFengguang Wu #define MAX_RA_PAGES (VM_MAX_READAHEAD*1024 / PAGE_CACHE_SIZE) 31f615bfcaSFengguang Wu #define MIN_RA_PAGES DIV_ROUND_UP(VM_MIN_READAHEAD*1024, PAGE_CACHE_SIZE) 32f615bfcaSFengguang Wu 331da177e4SLinus Torvalds struct backing_dev_info default_backing_dev_info = { 34f615bfcaSFengguang Wu .ra_pages = MAX_RA_PAGES, 351da177e4SLinus Torvalds .state = 0, 361da177e4SLinus Torvalds .capabilities = BDI_CAP_MAP_COPY, 371da177e4SLinus Torvalds .unplug_io_fn = default_unplug_io_fn, 381da177e4SLinus Torvalds }; 391da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(default_backing_dev_info); 401da177e4SLinus Torvalds 411da177e4SLinus Torvalds /* 421da177e4SLinus Torvalds * Initialise a struct file's readahead state. Assumes that the caller has 431da177e4SLinus Torvalds * memset *ra to zero. 441da177e4SLinus Torvalds */ 451da177e4SLinus Torvalds void 461da177e4SLinus Torvalds file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping) 471da177e4SLinus Torvalds { 481da177e4SLinus Torvalds ra->ra_pages = mapping->backing_dev_info->ra_pages; 49*f4e6b498SFengguang Wu ra->prev_pos = -1; 501da177e4SLinus Torvalds } 51d41cc702SSteven Whitehouse EXPORT_SYMBOL_GPL(file_ra_state_init); 521da177e4SLinus Torvalds 531da177e4SLinus Torvalds #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds /** 56bd40cddaSRandy Dunlap * read_cache_pages - populate an address space with some pages & start reads against them 571da177e4SLinus Torvalds * @mapping: the address_space 581da177e4SLinus Torvalds * @pages: The address of a list_head which contains the target pages. These 591da177e4SLinus Torvalds * pages have their ->index populated and are otherwise uninitialised. 601da177e4SLinus Torvalds * @filler: callback routine for filling a single page. 611da177e4SLinus Torvalds * @data: private data for the callback routine. 621da177e4SLinus Torvalds * 631da177e4SLinus Torvalds * Hides the details of the LRU cache etc from the filesystems. 641da177e4SLinus Torvalds */ 651da177e4SLinus Torvalds int read_cache_pages(struct address_space *mapping, struct list_head *pages, 661da177e4SLinus Torvalds int (*filler)(void *, struct page *), void *data) 671da177e4SLinus Torvalds { 681da177e4SLinus Torvalds struct page *page; 691da177e4SLinus Torvalds struct pagevec lru_pvec; 701da177e4SLinus Torvalds int ret = 0; 711da177e4SLinus Torvalds 721da177e4SLinus Torvalds pagevec_init(&lru_pvec, 0); 731da177e4SLinus Torvalds 741da177e4SLinus Torvalds while (!list_empty(pages)) { 751da177e4SLinus Torvalds page = list_to_page(pages); 761da177e4SLinus Torvalds list_del(&page->lru); 771da177e4SLinus Torvalds if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) { 781da177e4SLinus Torvalds page_cache_release(page); 791da177e4SLinus Torvalds continue; 801da177e4SLinus Torvalds } 811da177e4SLinus Torvalds ret = filler(data, page); 821da177e4SLinus Torvalds if (!pagevec_add(&lru_pvec, page)) 831da177e4SLinus Torvalds __pagevec_lru_add(&lru_pvec); 841da177e4SLinus Torvalds if (ret) { 8538da288bSOGAWA Hirofumi put_pages_list(pages); 861da177e4SLinus Torvalds break; 871da177e4SLinus Torvalds } 888bde37f0SAndrew Morton task_io_account_read(PAGE_CACHE_SIZE); 891da177e4SLinus Torvalds } 901da177e4SLinus Torvalds pagevec_lru_add(&lru_pvec); 911da177e4SLinus Torvalds return ret; 921da177e4SLinus Torvalds } 931da177e4SLinus Torvalds 941da177e4SLinus Torvalds EXPORT_SYMBOL(read_cache_pages); 951da177e4SLinus Torvalds 961da177e4SLinus Torvalds static int read_pages(struct address_space *mapping, struct file *filp, 971da177e4SLinus Torvalds struct list_head *pages, unsigned nr_pages) 981da177e4SLinus Torvalds { 991da177e4SLinus Torvalds unsigned page_idx; 1001da177e4SLinus Torvalds struct pagevec lru_pvec; 101994fc28cSZach Brown int ret; 1021da177e4SLinus Torvalds 1031da177e4SLinus Torvalds if (mapping->a_ops->readpages) { 1041da177e4SLinus Torvalds ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); 105029e332eSOGAWA Hirofumi /* Clean up the remaining pages */ 106029e332eSOGAWA Hirofumi put_pages_list(pages); 1071da177e4SLinus Torvalds goto out; 1081da177e4SLinus Torvalds } 1091da177e4SLinus Torvalds 1101da177e4SLinus Torvalds pagevec_init(&lru_pvec, 0); 1111da177e4SLinus Torvalds for (page_idx = 0; page_idx < nr_pages; page_idx++) { 1121da177e4SLinus Torvalds struct page *page = list_to_page(pages); 1131da177e4SLinus Torvalds list_del(&page->lru); 1141da177e4SLinus Torvalds if (!add_to_page_cache(page, mapping, 1151da177e4SLinus Torvalds page->index, GFP_KERNEL)) { 1169f1a3cfcSZach Brown mapping->a_ops->readpage(filp, page); 1171da177e4SLinus Torvalds if (!pagevec_add(&lru_pvec, page)) 1181da177e4SLinus Torvalds __pagevec_lru_add(&lru_pvec); 1199f1a3cfcSZach Brown } else 1201da177e4SLinus Torvalds page_cache_release(page); 1211da177e4SLinus Torvalds } 1221da177e4SLinus Torvalds pagevec_lru_add(&lru_pvec); 123994fc28cSZach Brown ret = 0; 1241da177e4SLinus Torvalds out: 1251da177e4SLinus Torvalds return ret; 1261da177e4SLinus Torvalds } 1271da177e4SLinus Torvalds 1281da177e4SLinus Torvalds /* 1291da177e4SLinus Torvalds * do_page_cache_readahead actually reads a chunk of disk. It allocates all 1301da177e4SLinus Torvalds * the pages first, then submits them all for I/O. This avoids the very bad 1311da177e4SLinus Torvalds * behaviour which would occur if page allocations are causing VM writeback. 1321da177e4SLinus Torvalds * We really don't want to intermingle reads and writes like that. 1331da177e4SLinus Torvalds * 1341da177e4SLinus Torvalds * Returns the number of pages requested, or the maximum amount of I/O allowed. 1351da177e4SLinus Torvalds * 1361da177e4SLinus Torvalds * do_page_cache_readahead() returns -1 if it encountered request queue 1371da177e4SLinus Torvalds * congestion. 1381da177e4SLinus Torvalds */ 1391da177e4SLinus Torvalds static int 1401da177e4SLinus Torvalds __do_page_cache_readahead(struct address_space *mapping, struct file *filp, 14146fc3e7bSFengguang Wu pgoff_t offset, unsigned long nr_to_read, 14246fc3e7bSFengguang Wu unsigned long lookahead_size) 1431da177e4SLinus Torvalds { 1441da177e4SLinus Torvalds struct inode *inode = mapping->host; 1451da177e4SLinus Torvalds struct page *page; 1461da177e4SLinus Torvalds unsigned long end_index; /* The last page we want to read */ 1471da177e4SLinus Torvalds LIST_HEAD(page_pool); 1481da177e4SLinus Torvalds int page_idx; 1491da177e4SLinus Torvalds int ret = 0; 1501da177e4SLinus Torvalds loff_t isize = i_size_read(inode); 1511da177e4SLinus Torvalds 1521da177e4SLinus Torvalds if (isize == 0) 1531da177e4SLinus Torvalds goto out; 1541da177e4SLinus Torvalds 1551da177e4SLinus Torvalds end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); 1561da177e4SLinus Torvalds 1571da177e4SLinus Torvalds /* 1581da177e4SLinus Torvalds * Preallocate as many pages as we will need. 1591da177e4SLinus Torvalds */ 1601da177e4SLinus Torvalds read_lock_irq(&mapping->tree_lock); 1611da177e4SLinus Torvalds for (page_idx = 0; page_idx < nr_to_read; page_idx++) { 1627361f4d8SAndrew Morton pgoff_t page_offset = offset + page_idx; 1631da177e4SLinus Torvalds 1641da177e4SLinus Torvalds if (page_offset > end_index) 1651da177e4SLinus Torvalds break; 1661da177e4SLinus Torvalds 1671da177e4SLinus Torvalds page = radix_tree_lookup(&mapping->page_tree, page_offset); 1681da177e4SLinus Torvalds if (page) 1691da177e4SLinus Torvalds continue; 1701da177e4SLinus Torvalds 1711da177e4SLinus Torvalds read_unlock_irq(&mapping->tree_lock); 1721da177e4SLinus Torvalds page = page_cache_alloc_cold(mapping); 1731da177e4SLinus Torvalds read_lock_irq(&mapping->tree_lock); 1741da177e4SLinus Torvalds if (!page) 1751da177e4SLinus Torvalds break; 1761da177e4SLinus Torvalds page->index = page_offset; 1771da177e4SLinus Torvalds list_add(&page->lru, &page_pool); 17846fc3e7bSFengguang Wu if (page_idx == nr_to_read - lookahead_size) 17946fc3e7bSFengguang Wu SetPageReadahead(page); 1801da177e4SLinus Torvalds ret++; 1811da177e4SLinus Torvalds } 1821da177e4SLinus Torvalds read_unlock_irq(&mapping->tree_lock); 1831da177e4SLinus Torvalds 1841da177e4SLinus Torvalds /* 1851da177e4SLinus Torvalds * Now start the IO. We ignore I/O errors - if the page is not 1861da177e4SLinus Torvalds * uptodate then the caller will launch readpage again, and 1871da177e4SLinus Torvalds * will then handle the error. 1881da177e4SLinus Torvalds */ 1891da177e4SLinus Torvalds if (ret) 1901da177e4SLinus Torvalds read_pages(mapping, filp, &page_pool, ret); 1911da177e4SLinus Torvalds BUG_ON(!list_empty(&page_pool)); 1921da177e4SLinus Torvalds out: 1931da177e4SLinus Torvalds return ret; 1941da177e4SLinus Torvalds } 1951da177e4SLinus Torvalds 1961da177e4SLinus Torvalds /* 1971da177e4SLinus Torvalds * Chunk the readahead into 2 megabyte units, so that we don't pin too much 1981da177e4SLinus Torvalds * memory at once. 1991da177e4SLinus Torvalds */ 2001da177e4SLinus Torvalds int force_page_cache_readahead(struct address_space *mapping, struct file *filp, 2017361f4d8SAndrew Morton pgoff_t offset, unsigned long nr_to_read) 2021da177e4SLinus Torvalds { 2031da177e4SLinus Torvalds int ret = 0; 2041da177e4SLinus Torvalds 2051da177e4SLinus Torvalds if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages)) 2061da177e4SLinus Torvalds return -EINVAL; 2071da177e4SLinus Torvalds 2081da177e4SLinus Torvalds while (nr_to_read) { 2091da177e4SLinus Torvalds int err; 2101da177e4SLinus Torvalds 2111da177e4SLinus Torvalds unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE; 2121da177e4SLinus Torvalds 2131da177e4SLinus Torvalds if (this_chunk > nr_to_read) 2141da177e4SLinus Torvalds this_chunk = nr_to_read; 2151da177e4SLinus Torvalds err = __do_page_cache_readahead(mapping, filp, 21646fc3e7bSFengguang Wu offset, this_chunk, 0); 2171da177e4SLinus Torvalds if (err < 0) { 2181da177e4SLinus Torvalds ret = err; 2191da177e4SLinus Torvalds break; 2201da177e4SLinus Torvalds } 2211da177e4SLinus Torvalds ret += err; 2221da177e4SLinus Torvalds offset += this_chunk; 2231da177e4SLinus Torvalds nr_to_read -= this_chunk; 2241da177e4SLinus Torvalds } 2251da177e4SLinus Torvalds return ret; 2261da177e4SLinus Torvalds } 2271da177e4SLinus Torvalds 2281da177e4SLinus Torvalds /* 2291da177e4SLinus Torvalds * This version skips the IO if the queue is read-congested, and will tell the 2301da177e4SLinus Torvalds * block layer to abandon the readahead if request allocation would block. 2311da177e4SLinus Torvalds * 2321da177e4SLinus Torvalds * force_page_cache_readahead() will ignore queue congestion and will block on 2331da177e4SLinus Torvalds * request queues. 2341da177e4SLinus Torvalds */ 2351da177e4SLinus Torvalds int do_page_cache_readahead(struct address_space *mapping, struct file *filp, 2367361f4d8SAndrew Morton pgoff_t offset, unsigned long nr_to_read) 2371da177e4SLinus Torvalds { 2381da177e4SLinus Torvalds if (bdi_read_congested(mapping->backing_dev_info)) 2391da177e4SLinus Torvalds return -1; 2401da177e4SLinus Torvalds 24146fc3e7bSFengguang Wu return __do_page_cache_readahead(mapping, filp, offset, nr_to_read, 0); 2421da177e4SLinus Torvalds } 2431da177e4SLinus Torvalds 2441da177e4SLinus Torvalds /* 2451da177e4SLinus Torvalds * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a 2461da177e4SLinus Torvalds * sensible upper limit. 2471da177e4SLinus Torvalds */ 2481da177e4SLinus Torvalds unsigned long max_sane_readahead(unsigned long nr) 2491da177e4SLinus Torvalds { 25005a0416bSChristoph Lameter return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE) 25105a0416bSChristoph Lameter + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); 2521da177e4SLinus Torvalds } 2535ce1110bSFengguang Wu 2545ce1110bSFengguang Wu /* 2555ce1110bSFengguang Wu * Submit IO for the read-ahead request in file_ra_state. 2565ce1110bSFengguang Wu */ 257f9acc8c7SFengguang Wu static unsigned long ra_submit(struct file_ra_state *ra, 2585ce1110bSFengguang Wu struct address_space *mapping, struct file *filp) 2595ce1110bSFengguang Wu { 2605ce1110bSFengguang Wu int actual; 2615ce1110bSFengguang Wu 2625ce1110bSFengguang Wu actual = __do_page_cache_readahead(mapping, filp, 263f9acc8c7SFengguang Wu ra->start, ra->size, ra->async_size); 2645ce1110bSFengguang Wu 2655ce1110bSFengguang Wu return actual; 2665ce1110bSFengguang Wu } 267122a21d1SFengguang Wu 268122a21d1SFengguang Wu /* 269c743d96bSFengguang Wu * Set the initial window size, round to next power of 2 and square 270c743d96bSFengguang Wu * for small size, x 4 for medium, and x 2 for large 271c743d96bSFengguang Wu * for 128k (32 page) max ra 272c743d96bSFengguang Wu * 1-8 page = 32k initial, > 8 page = 128k initial 273c743d96bSFengguang Wu */ 274c743d96bSFengguang Wu static unsigned long get_init_ra_size(unsigned long size, unsigned long max) 275c743d96bSFengguang Wu { 276c743d96bSFengguang Wu unsigned long newsize = roundup_pow_of_two(size); 277c743d96bSFengguang Wu 278c743d96bSFengguang Wu if (newsize <= max / 32) 279c743d96bSFengguang Wu newsize = newsize * 4; 280c743d96bSFengguang Wu else if (newsize <= max / 4) 281c743d96bSFengguang Wu newsize = newsize * 2; 282c743d96bSFengguang Wu else 283c743d96bSFengguang Wu newsize = max; 284c743d96bSFengguang Wu 285c743d96bSFengguang Wu return newsize; 286c743d96bSFengguang Wu } 287c743d96bSFengguang Wu 288c743d96bSFengguang Wu /* 289122a21d1SFengguang Wu * Get the previous window size, ramp it up, and 290122a21d1SFengguang Wu * return it as the new window size. 291122a21d1SFengguang Wu */ 292c743d96bSFengguang Wu static unsigned long get_next_ra_size(struct file_ra_state *ra, 293122a21d1SFengguang Wu unsigned long max) 294122a21d1SFengguang Wu { 295f9acc8c7SFengguang Wu unsigned long cur = ra->size; 296122a21d1SFengguang Wu unsigned long newsize; 297122a21d1SFengguang Wu 298122a21d1SFengguang Wu if (cur < max / 16) 299c743d96bSFengguang Wu newsize = 4 * cur; 300122a21d1SFengguang Wu else 301c743d96bSFengguang Wu newsize = 2 * cur; 302122a21d1SFengguang Wu 303122a21d1SFengguang Wu return min(newsize, max); 304122a21d1SFengguang Wu } 305122a21d1SFengguang Wu 306122a21d1SFengguang Wu /* 307122a21d1SFengguang Wu * On-demand readahead design. 308122a21d1SFengguang Wu * 309122a21d1SFengguang Wu * The fields in struct file_ra_state represent the most-recently-executed 310122a21d1SFengguang Wu * readahead attempt: 311122a21d1SFengguang Wu * 312f9acc8c7SFengguang Wu * |<----- async_size ---------| 313f9acc8c7SFengguang Wu * |------------------- size -------------------->| 314f9acc8c7SFengguang Wu * |==================#===========================| 315f9acc8c7SFengguang Wu * ^start ^page marked with PG_readahead 316122a21d1SFengguang Wu * 317122a21d1SFengguang Wu * To overlap application thinking time and disk I/O time, we do 318122a21d1SFengguang Wu * `readahead pipelining': Do not wait until the application consumed all 319122a21d1SFengguang Wu * readahead pages and stalled on the missing page at readahead_index; 320f9acc8c7SFengguang Wu * Instead, submit an asynchronous readahead I/O as soon as there are 321f9acc8c7SFengguang Wu * only async_size pages left in the readahead window. Normally async_size 322f9acc8c7SFengguang Wu * will be equal to size, for maximum pipelining. 323122a21d1SFengguang Wu * 324122a21d1SFengguang Wu * In interleaved sequential reads, concurrent streams on the same fd can 325122a21d1SFengguang Wu * be invalidating each other's readahead state. So we flag the new readahead 326f9acc8c7SFengguang Wu * page at (start+size-async_size) with PG_readahead, and use it as readahead 327122a21d1SFengguang Wu * indicator. The flag won't be set on already cached pages, to avoid the 328122a21d1SFengguang Wu * readahead-for-nothing fuss, saving pointless page cache lookups. 329122a21d1SFengguang Wu * 330*f4e6b498SFengguang Wu * prev_pos tracks the last visited byte in the _previous_ read request. 331122a21d1SFengguang Wu * It should be maintained by the caller, and will be used for detecting 332122a21d1SFengguang Wu * small random reads. Note that the readahead algorithm checks loosely 333122a21d1SFengguang Wu * for sequential patterns. Hence interleaved reads might be served as 334122a21d1SFengguang Wu * sequential ones. 335122a21d1SFengguang Wu * 336122a21d1SFengguang Wu * There is a special-case: if the first page which the application tries to 337122a21d1SFengguang Wu * read happens to be the first page of the file, it is assumed that a linear 338122a21d1SFengguang Wu * read is about to happen and the window is immediately set to the initial size 339122a21d1SFengguang Wu * based on I/O request size and the max_readahead. 340122a21d1SFengguang Wu * 341122a21d1SFengguang Wu * The code ramps up the readahead size aggressively at first, but slow down as 342122a21d1SFengguang Wu * it approaches max_readhead. 343122a21d1SFengguang Wu */ 344122a21d1SFengguang Wu 345122a21d1SFengguang Wu /* 346122a21d1SFengguang Wu * A minimal readahead algorithm for trivial sequential/random reads. 347122a21d1SFengguang Wu */ 348122a21d1SFengguang Wu static unsigned long 349122a21d1SFengguang Wu ondemand_readahead(struct address_space *mapping, 350122a21d1SFengguang Wu struct file_ra_state *ra, struct file *filp, 351cf914a7dSRusty Russell bool hit_readahead_marker, pgoff_t offset, 352122a21d1SFengguang Wu unsigned long req_size) 353122a21d1SFengguang Wu { 354*f4e6b498SFengguang Wu int max = ra->ra_pages; /* max readahead pages */ 355*f4e6b498SFengguang Wu pgoff_t prev_offset; 356122a21d1SFengguang Wu int sequential; 357122a21d1SFengguang Wu 358122a21d1SFengguang Wu /* 359f9acc8c7SFengguang Wu * It's the expected callback offset, assume sequential access. 360122a21d1SFengguang Wu * Ramp up sizes, and push forward the readahead window. 361122a21d1SFengguang Wu */ 362f9acc8c7SFengguang Wu if (offset && (offset == (ra->start + ra->size - ra->async_size) || 363f9acc8c7SFengguang Wu offset == (ra->start + ra->size))) { 364f9acc8c7SFengguang Wu ra->start += ra->size; 365f9acc8c7SFengguang Wu ra->size = get_next_ra_size(ra, max); 366f9acc8c7SFengguang Wu ra->async_size = ra->size; 367f9acc8c7SFengguang Wu goto readit; 368122a21d1SFengguang Wu } 369122a21d1SFengguang Wu 370*f4e6b498SFengguang Wu prev_offset = ra->prev_pos >> PAGE_CACHE_SHIFT; 371*f4e6b498SFengguang Wu sequential = offset - prev_offset <= 1UL || req_size > max; 372*f4e6b498SFengguang Wu 373122a21d1SFengguang Wu /* 374122a21d1SFengguang Wu * Standalone, small read. 375122a21d1SFengguang Wu * Read as is, and do not pollute the readahead state. 376122a21d1SFengguang Wu */ 377cf914a7dSRusty Russell if (!hit_readahead_marker && !sequential) { 378122a21d1SFengguang Wu return __do_page_cache_readahead(mapping, filp, 379122a21d1SFengguang Wu offset, req_size, 0); 380122a21d1SFengguang Wu } 381122a21d1SFengguang Wu 382122a21d1SFengguang Wu /* 383122a21d1SFengguang Wu * It may be one of 384122a21d1SFengguang Wu * - first read on start of file 385122a21d1SFengguang Wu * - sequential cache miss 386122a21d1SFengguang Wu * - oversize random read 387122a21d1SFengguang Wu * Start readahead for it. 388122a21d1SFengguang Wu */ 389f9acc8c7SFengguang Wu ra->start = offset; 390f9acc8c7SFengguang Wu ra->size = get_init_ra_size(req_size, max); 391f9acc8c7SFengguang Wu ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size; 392122a21d1SFengguang Wu 393122a21d1SFengguang Wu /* 394f9acc8c7SFengguang Wu * Hit on a marked page without valid readahead state. 395122a21d1SFengguang Wu * E.g. interleaved reads. 396122a21d1SFengguang Wu * Not knowing its readahead pos/size, bet on the minimal possible one. 397122a21d1SFengguang Wu */ 398cf914a7dSRusty Russell if (hit_readahead_marker) { 399f9acc8c7SFengguang Wu ra->start++; 400f9acc8c7SFengguang Wu ra->size = get_next_ra_size(ra, max); 401122a21d1SFengguang Wu } 402122a21d1SFengguang Wu 403f9acc8c7SFengguang Wu readit: 404122a21d1SFengguang Wu return ra_submit(ra, mapping, filp); 405122a21d1SFengguang Wu } 406122a21d1SFengguang Wu 407122a21d1SFengguang Wu /** 408cf914a7dSRusty Russell * page_cache_sync_readahead - generic file readahead 409122a21d1SFengguang Wu * @mapping: address_space which holds the pagecache and I/O vectors 410122a21d1SFengguang Wu * @ra: file_ra_state which holds the readahead state 411122a21d1SFengguang Wu * @filp: passed on to ->readpage() and ->readpages() 412cf914a7dSRusty Russell * @offset: start offset into @mapping, in pagecache page-sized units 413122a21d1SFengguang Wu * @req_size: hint: total size of the read which the caller is performing in 414cf914a7dSRusty Russell * pagecache pages 415122a21d1SFengguang Wu * 416cf914a7dSRusty Russell * page_cache_sync_readahead() should be called when a cache miss happened: 417cf914a7dSRusty Russell * it will submit the read. The readahead logic may decide to piggyback more 418cf914a7dSRusty Russell * pages onto the read request if access patterns suggest it will improve 419cf914a7dSRusty Russell * performance. 420122a21d1SFengguang Wu */ 421cf914a7dSRusty Russell void page_cache_sync_readahead(struct address_space *mapping, 422cf914a7dSRusty Russell struct file_ra_state *ra, struct file *filp, 423cf914a7dSRusty Russell pgoff_t offset, unsigned long req_size) 424cf914a7dSRusty Russell { 425cf914a7dSRusty Russell /* no read-ahead */ 426cf914a7dSRusty Russell if (!ra->ra_pages) 427cf914a7dSRusty Russell return; 428cf914a7dSRusty Russell 429cf914a7dSRusty Russell /* do read-ahead */ 430cf914a7dSRusty Russell ondemand_readahead(mapping, ra, filp, false, offset, req_size); 431cf914a7dSRusty Russell } 432cf914a7dSRusty Russell EXPORT_SYMBOL_GPL(page_cache_sync_readahead); 433cf914a7dSRusty Russell 434cf914a7dSRusty Russell /** 435cf914a7dSRusty Russell * page_cache_async_readahead - file readahead for marked pages 436cf914a7dSRusty Russell * @mapping: address_space which holds the pagecache and I/O vectors 437cf914a7dSRusty Russell * @ra: file_ra_state which holds the readahead state 438cf914a7dSRusty Russell * @filp: passed on to ->readpage() and ->readpages() 439cf914a7dSRusty Russell * @page: the page at @offset which has the PG_readahead flag set 440cf914a7dSRusty Russell * @offset: start offset into @mapping, in pagecache page-sized units 441cf914a7dSRusty Russell * @req_size: hint: total size of the read which the caller is performing in 442cf914a7dSRusty Russell * pagecache pages 443cf914a7dSRusty Russell * 444cf914a7dSRusty Russell * page_cache_async_ondemand() should be called when a page is used which 445cf914a7dSRusty Russell * has the PG_readahead flag: this is a marker to suggest that the application 446cf914a7dSRusty Russell * has used up enough of the readahead window that we should start pulling in 447cf914a7dSRusty Russell * more pages. */ 448cf914a7dSRusty Russell void 449cf914a7dSRusty Russell page_cache_async_readahead(struct address_space *mapping, 450122a21d1SFengguang Wu struct file_ra_state *ra, struct file *filp, 451122a21d1SFengguang Wu struct page *page, pgoff_t offset, 452122a21d1SFengguang Wu unsigned long req_size) 453122a21d1SFengguang Wu { 454122a21d1SFengguang Wu /* no read-ahead */ 455122a21d1SFengguang Wu if (!ra->ra_pages) 456cf914a7dSRusty Russell return; 457122a21d1SFengguang Wu 458fe3cba17SFengguang Wu /* 459cf914a7dSRusty Russell * Same bit is used for PG_readahead and PG_reclaim. 460fe3cba17SFengguang Wu */ 461fe3cba17SFengguang Wu if (PageWriteback(page)) 462cf914a7dSRusty Russell return; 463fe3cba17SFengguang Wu 464122a21d1SFengguang Wu ClearPageReadahead(page); 465122a21d1SFengguang Wu 466122a21d1SFengguang Wu /* 467122a21d1SFengguang Wu * Defer asynchronous read-ahead on IO congestion. 468122a21d1SFengguang Wu */ 469122a21d1SFengguang Wu if (bdi_read_congested(mapping->backing_dev_info)) 470cf914a7dSRusty Russell return; 471122a21d1SFengguang Wu 472122a21d1SFengguang Wu /* do read-ahead */ 473cf914a7dSRusty Russell ondemand_readahead(mapping, ra, filp, true, offset, req_size); 474122a21d1SFengguang Wu } 475cf914a7dSRusty Russell EXPORT_SYMBOL_GPL(page_cache_async_readahead); 476