readahead.c - OpenGrok cross reference for /linux/mm/readahead.c

Lines Matching +full:page +full:- +full:size
1 // SPDX-License-Identifier: GPL-2.0-only
3  * mm/readahead.c - address_space-level file readahead.
14  * Readahead is used to read content into the page cache before it is
16  * attempts to read folios that are not yet in the page cache.  If a
17  * folio is present but not up-to-date, readahead will not try to read
18  * it. In that case a simple ->read_folio() will be requested.
21  * system call or a page fault) finds that the requested folio is not in
22  * the page cache, or that it is in the page cache and has the
29  * contains ->size being the total number of pages, and ->async_size
37  * to be determined: the start of the region to read, the size of the
38  * region, and the size of the async tail.
40  * The start of the region is simply the first page address at or after
41  * the accessed address, which is not currently populated in the page
42  * cache.  This is found with a simple search in the page cache.
44  * The size of the async tail is determined by subtracting the size that
45  * was explicitly requested from the determined request size, unless
46  * this would be less than zero - then zero is used.  NOTE THIS
48  * PAGE.  ALSO THIS CALCULATION IS NOT USED CONSISTENTLY.
50  * The size of the region is normally determined from the size of the
53  * or from examining the state of the page cache when multiple
55  * was triggered by the readahead flag, the size of the previous
57  * page to the start of the new readahead.  In these cases, the size of
61  * If the size of the previous read cannot be determined, the number of
62  * preceding pages in the page cache is used to estimate the size of
70  * adjustments to the readahead size in various special cases and these
73  * The above calculation, based on the previous readahead size,
74  * determines the size of the readahead, to which any requested read
75  * size may be added.
77  * Readahead requests are sent to the filesystem using the ->readahead()
79  * implementation.  ->readahead() should normally initiate reads on all
81  * error.  The page cache reading code will issue a ->read_folio() request
82  * for any folio which ->readahead() did not read, and only an error
85  * ->readahead() will generally call readahead_folio() repeatedly to get
103  * considered to be important and ->readahead() should not fail them due
109  * folios from the page cache as is automatically done for folios that
112  * are left in the page cache, then they will be read individually using
113  * ->read_folio() which may be less efficient.
121 #include <linux/backing-dev.h>
128 #include <linux/blk-cgroup.h>
141 	ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
142 	ra->prev_pos = -1;
148 	const struct address_space_operations *aops = rac->mapping->a_ops;
155 	if (unlikely(rac->_workingset))
156 		psi_memstall_enter(&rac->_pflags);
159 	if (aops->readahead) {
160 		aops->readahead(rac);
170 			aops->read_folio(rac->file, folio);
174 	if (unlikely(rac->_workingset))
175 		psi_memstall_leave(&rac->_pflags);
176 	rac->_workingset = false;
187 	if (folio && ractl->dropbehind)
194  * page_cache_ra_unbounded - Start unchecked readahead.
210 	struct address_space *mapping = ractl->mapping;
218 	 * locked pages to the page cache, but will not yet have submitted
219 	 * them for I/O.  Adding another page may need to allocate memory,
222 	 * touch file-backed pages, preventing a deadlock.  Most (all?)
241 					  nr_to_read - lookahead_size,
243 		mark = ra_folio_index - index;
245 	nr_to_read += readahead_index(ractl) - index;
246 	ractl->_index = index;
252 		struct folio *folio = xa_load(&mapping->i_pages, index + i);
257 			 * Page already present?  Kick off the current batch
259 			 * next batch.  This page may be the one we would
261 			 * have a stable reference to this page, and it's
265 			ractl->_index += min_nrpages;
266 			i = ractl->_index + ractl->_nr_pages - index;
278 			if (ret == -ENOMEM)
281 			ractl->_index += min_nrpages;
282 			i = ractl->_index + ractl->_nr_pages - index;
287 		ractl->_workingset |= folio_test_workingset(folio);
288 		ractl->_nr_pages += min_nrpages;
293 	 * Now start the IO.  We ignore I/O errors - if the folio is not
306  * behaviour which would occur if page allocations are causing VM writeback.
312 	struct inode *inode = ractl->mapping->host;
315 	pgoff_t end_index;	/* The last page we want to read */
320 	end_index = (isize - 1) >> PAGE_SHIFT;
323 	/* Don't read past the page containing the last byte of the file */
324 	if (nr_to_read > end_index - index)
325 		nr_to_read = end_index - index + 1;
337 	struct address_space *mapping = ractl->mapping;
338 	struct file_ra_state *ra = ractl->ra;
339 	struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
342 	if (unlikely(!mapping->a_ops->read_folio && !mapping->a_ops->readahead))
347 	 * be up to the optimal hardware IO size
349 	max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
358 		nr_to_read -= this_chunk;
363  * Set the initial window size, round to next power of 2 and square
364  * for small size, x 4 for medium, and x 2 for large
365  * for 128k (32 page) max ra
366  * 1-2 page = 16k, 3-4 page 32k, 5-8 page = 64k, > 8 page = 128k initial
368 static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
370 	unsigned long newsize = roundup_pow_of_two(size);
383  *  Get the previous window size, ramp it up, and
384  *  return it as the new window size.
389 	unsigned long cur = ra->size;
399  * On-demand readahead design.
401  * The fields in struct file_ra_state represent the most-recently-executed
404  *                        |<----- async_size ---------|
405  *     |------------------- size -------------------->|
407  *     ^start             ^page marked with PG_readahead
411  * readahead pages and stalled on the missing page at readahead_index;
414  * will be equal to size, for maximum pipelining.
418  * page at (start+size-async_size) with PG_readahead, and use it as readahead
420  * readahead-for-nothing fuss, saving pointless page cache lookups.
428  * There is a special-case: if the first page which the application tries to
429  * read happens to be the first page of the file, it is assumed that a linear
430  * read is about to happen and the window is immediately set to the initial size
431  * based on I/O request size and the max_readahead.
433  * The code ramps up the readahead size aggressively at first, but slow down as
444 		return -ENOMEM;
448 	err = filemap_add_folio(ractl->mapping, folio, index, gfp);
454 	ractl->_nr_pages += 1UL << order;
455 	ractl->_workingset |= folio_test_workingset(folio);
462 	struct address_space *mapping = ractl->mapping;
466 	pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
467 	pgoff_t mark = index + ra->size - ra->async_size;
474 	 * Fallback when size < min_nrpages as each folio should be
477 	if (!mapping_large_folio_support(mapping) || ra->size < min_ra_size)
480 	limit = min(limit, index + ra->size - 1);
486 	new_order = min_t(unsigned int, new_order, ilog2(ra->size));
497 	ractl->_index = mapping_align_index(mapping, index);
504 		if (index & ((1UL << order) - 1))
507 		while (order > min_order && index + (1UL << order) - 1 > limit)
508 			order--;
520 	 * If there were already pages in the page cache, then we may have
528 	 * ->readahead() may have updated readahead window size so we have to
531 	if (ra->size > index - start)
532 		do_page_cache_ra(ractl, ra->size - (index - start),
533 				 ra->async_size);
539 	struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
540 	unsigned long max_pages = ractl->ra->ra_pages;
544 	 * be up to the optimal hardware IO size
546 	if (req_size > max_pages && bdi->io_pages > max_pages)
547 		max_pages = min(req_size, bdi->io_pages);
555 	bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
556 	struct file_ra_state *ra = ractl->ra;
564 	 * requested range, which we'll set to 1 page for this case.
566 	if (!ra->ra_pages || blk_cgroup_congested()) {
567 		if (!ractl->file)
580 	prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
583 	 * trivial case: (index - prev_index) == 1
584 	 * unaligned reads: (index - prev_index) == 0
586 	if (!index || req_count > max_pages || index - prev_index <= 1UL) {
587 		ra->start = index;
588 		ra->size = get_init_ra_size(req_count, max_pages);
589 		ra->async_size = ra->size > req_count ? ra->size - req_count :
590 							ra->size >> 1;
595 	 * Query the page cache and look for the traces(cached history pages)
599 	miss = page_cache_prev_miss(ractl->mapping, index - 1, max_pages);
601 	contig_count = index - miss - 1;
612 	 * it is a strong indication of long-run stream (or whole-file-read)
616 	ra->start = index;
617 	ra->size = min(contig_count + req_count, max_pages);
618 	ra->async_size = 1;
620 	ractl->_index = ra->start;
629 	struct file_ra_state *ra = ractl->ra;
635 	if (!ra->ra_pages)
654 	expected = round_down(ra->start + ra->size - ra->async_size,
657 		ra->start += ra->size;
659 		 * In the case of MADV_HUGEPAGE, the actual size might exceed
662 		ra->size = max(ra->size, get_next_ra_size(ra, max_pages));
663 		ra->async_size = ra->size;
671 	 * readahead size. Ramp it up and use it as the new readahead size.
674 	start = page_cache_next_miss(ractl->mapping, index + 1, max_pages);
677 	if (!start || start - index > max_pages)
680 	ra->start = start;
681 	ra->size = start - index;	/* old async_size */
682 	ra->size += req_count;
683 	ra->size = get_next_ra_size(ra, max_pages);
684 	ra->async_size = ra->size;
686 	ractl->_index = ra->start;
698 		return -EBADF;
701 	if (!(file->f_mode & FMODE_READ))
702 		return -EBADF;
707 	 * on this file, then we must return -EINVAL.
709 	if (!file->f_mapping)
710 		return -EINVAL;
711 	if (!file->f_mapping->a_ops)
712 		return -EINVAL;
715 	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
716 		return -EINVAL;
718 		return -EINVAL;
736  * readahead_expand - Expand a readahead request
739  * @new_len: The revised size of the request
741  * Attempt to expand a readahead request outwards from the current size to the
742  * specified size by inserting locked pages before and after the current window
743  * to increase the size to the new window.  This may involve the insertion of
747  * The algorithm will stop if it encounters a conflicting page already in the
756 	struct address_space *mapping = ractl->mapping;
757 	struct file_ra_state *ra = ractl->ra;
765 	 * Readahead code should have aligned the ractl->_index to
768 	VM_BUG_ON(!IS_ALIGNED(ractl->_index, min_nrpages));
771 	while (ractl->_index > new_index) {
772 		unsigned long index = ractl->_index - 1;
773 		struct folio *folio = xa_load(&mapping->i_pages, index);
788 				!ractl->_workingset) {
789 			ractl->_workingset = true;
790 			psi_memstall_enter(&ractl->_pflags);
792 		ractl->_nr_pages += min_nrpages;
793 		ractl->_index = folio->index;
796 	new_len += new_start - readahead_pos(ractl);
800 	while (ractl->_nr_pages < new_nr_pages) {
801 		unsigned long index = ractl->_index + ractl->_nr_pages;
802 		struct folio *folio = xa_load(&mapping->i_pages, index);
817 				!ractl->_workingset) {
818 			ractl->_workingset = true;
819 			psi_memstall_enter(&ractl->_pflags);
821 		ractl->_nr_pages += min_nrpages;
823 			ra->size += min_nrpages;
824 			ra->async_size += min_nrpages;