Lines Matching +full:page +full:- +full:size

1 // SPDX-License-Identifier: GPL-2.0-only
3 * mm/readahead.c - address_space-level file readahead.
14 * Readahead is used to read content into the page cache before it is
16 * attempts to read folios that are not yet in the page cache. If a
17 * folio is present but not up-to-date, readahead will not try to read
18 * it. In that case a simple ->read_folio() will be requested.
21 * system call or a page fault) finds that the requested folio is not in
22 * the page cache, or that it is in the page cache and has the
29 * contains ->size being the total number of pages, and ->async_size
37 * to be determined: the start of the region to read, the size of the
38 * region, and the size of the async tail.
40 * The start of the region is simply the first page address at or after
41 * the accessed address, which is not currently populated in the page
42 * cache. This is found with a simple search in the page cache.
44 * The size of the async tail is determined by subtracting the size that
45 * was explicitly requested from the determined request size, unless
46 * this would be less than zero - then zero is used. NOTE THIS
48 * PAGE. ALSO THIS CALCULATION IS NOT USED CONSISTENTLY.
50 * The size of the region is normally determined from the size of the
53 * or from examining the state of the page cache when multiple
55 * was triggered by the readahead flag, the size of the previous
57 * page to the start of the new readahead. In these cases, the size of
61 * If the size of the previous read cannot be determined, the number of
62 * preceding pages in the page cache is used to estimate the size of
70 * adjustments to the readahead size in various special cases and these
73 * The above calculation, based on the previous readahead size,
74 * determines the size of the readahead, to which any requested read
75 * size may be added.
77 * Readahead requests are sent to the filesystem using the ->readahead()
79 * implementation. ->readahead() should normally initiate reads on all
81 * error. The page cache reading code will issue a ->read_folio() request
82 * for any folio which ->readahead() did not read, and only an error
85 * ->readahead() will generally call readahead_folio() repeatedly to get
103 * considered to be important and ->readahead() should not fail them due
109 * folios from the page cache as is automatically done for folios that
112 * are left in the page cache, then they will be read individually using
113 * ->read_folio() which may be less efficient.
121 #include <linux/backing-dev.h>
128 #include <linux/blk-cgroup.h>
141 ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
142 ra->prev_pos = -1;
148 const struct address_space_operations *aops = rac->mapping->a_ops;
155 if (unlikely(rac->_workingset))
156 psi_memstall_enter(&rac->_pflags);
159 if (aops->readahead) {
160 aops->readahead(rac);
170 aops->read_folio(rac->file, folio);
174 if (unlikely(rac->_workingset))
175 psi_memstall_leave(&rac->_pflags);
176 rac->_workingset = false;
187 if (folio && ractl->dropbehind)
194 * page_cache_ra_unbounded - Start unchecked readahead.
210 struct address_space *mapping = ractl->mapping;
218 * locked pages to the page cache, but will not yet have submitted
219 * them for I/O. Adding another page may need to allocate memory,
222 * touch file-backed pages, preventing a deadlock. Most (all?)
241 nr_to_read - lookahead_size,
243 mark = ra_folio_index - index;
245 nr_to_read += readahead_index(ractl) - index;
246 ractl->_index = index;
252 struct folio *folio = xa_load(&mapping->i_pages, index + i);
257 * Page already present? Kick off the current batch
259 * next batch. This page may be the one we would
261 * have a stable reference to this page, and it's
265 ractl->_index += min_nrpages;
266 i = ractl->_index + ractl->_nr_pages - index;
278 if (ret == -ENOMEM)
281 ractl->_index += min_nrpages;
282 i = ractl->_index + ractl->_nr_pages - index;
287 ractl->_workingset |= folio_test_workingset(folio);
288 ractl->_nr_pages += min_nrpages;
293 * Now start the IO. We ignore I/O errors - if the folio is not
306 * behaviour which would occur if page allocations are causing VM writeback.
312 struct inode *inode = ractl->mapping->host;
315 pgoff_t end_index; /* The last page we want to read */
320 end_index = (isize - 1) >> PAGE_SHIFT;
323 /* Don't read past the page containing the last byte of the file */
324 if (nr_to_read > end_index - index)
325 nr_to_read = end_index - index + 1;
337 struct address_space *mapping = ractl->mapping;
338 struct file_ra_state *ra = ractl->ra;
339 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
342 if (unlikely(!mapping->a_ops->read_folio && !mapping->a_ops->readahead))
347 * be up to the optimal hardware IO size
349 max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
358 nr_to_read -= this_chunk;
363 * Set the initial window size, round to next power of 2 and square
364 * for small size, x 4 for medium, and x 2 for large
365 * for 128k (32 page) max ra
366 * 1-2 page = 16k, 3-4 page 32k, 5-8 page = 64k, > 8 page = 128k initial
368 static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
370 unsigned long newsize = roundup_pow_of_two(size);
383 * Get the previous window size, ramp it up, and
384 * return it as the new window size.
389 unsigned long cur = ra->size;
399 * On-demand readahead design.
401 * The fields in struct file_ra_state represent the most-recently-executed
404 * |<----- async_size ---------|
405 * |------------------- size -------------------->|
407 * ^start ^page marked with PG_readahead
411 * readahead pages and stalled on the missing page at readahead_index;
414 * will be equal to size, for maximum pipelining.
418 * page at (start+size-async_size) with PG_readahead, and use it as readahead
420 * readahead-for-nothing fuss, saving pointless page cache lookups.
428 * There is a special-case: if the first page which the application tries to
429 * read happens to be the first page of the file, it is assumed that a linear
430 * read is about to happen and the window is immediately set to the initial size
431 * based on I/O request size and the max_readahead.
433 * The code ramps up the readahead size aggressively at first, but slow down as
444 return -ENOMEM;
448 err = filemap_add_folio(ractl->mapping, folio, index, gfp);
454 ractl->_nr_pages += 1UL << order;
455 ractl->_workingset |= folio_test_workingset(folio);
462 struct address_space *mapping = ractl->mapping;
466 pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
467 pgoff_t mark = index + ra->size - ra->async_size;
474 * Fallback when size < min_nrpages as each folio should be
477 if (!mapping_large_folio_support(mapping) || ra->size < min_ra_size)
480 limit = min(limit, index + ra->size - 1);
486 new_order = min_t(unsigned int, new_order, ilog2(ra->size));
497 ractl->_index = mapping_align_index(mapping, index);
504 if (index & ((1UL << order) - 1))
507 while (order > min_order && index + (1UL << order) - 1 > limit)
508 order--;
520 * If there were already pages in the page cache, then we may have
528 * ->readahead() may have updated readahead window size so we have to
531 if (ra->size > index - start)
532 do_page_cache_ra(ractl, ra->size - (index - start),
533 ra->async_size);
539 struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
540 unsigned long max_pages = ractl->ra->ra_pages;
544 * be up to the optimal hardware IO size
546 if (req_size > max_pages && bdi->io_pages > max_pages)
547 max_pages = min(req_size, bdi->io_pages);
555 bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
556 struct file_ra_state *ra = ractl->ra;
564 * requested range, which we'll set to 1 page for this case.
566 if (!ra->ra_pages || blk_cgroup_congested()) {
567 if (!ractl->file)
580 prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
583 * trivial case: (index - prev_index) == 1
584 * unaligned reads: (index - prev_index) == 0
586 if (!index || req_count > max_pages || index - prev_index <= 1UL) {
587 ra->start = index;
588 ra->size = get_init_ra_size(req_count, max_pages);
589 ra->async_size = ra->size > req_count ? ra->size - req_count :
590 ra->size >> 1;
595 * Query the page cache and look for the traces(cached history pages)
599 miss = page_cache_prev_miss(ractl->mapping, index - 1, max_pages);
601 contig_count = index - miss - 1;
612 * it is a strong indication of long-run stream (or whole-file-read)
616 ra->start = index;
617 ra->size = min(contig_count + req_count, max_pages);
618 ra->async_size = 1;
620 ractl->_index = ra->start;
629 struct file_ra_state *ra = ractl->ra;
635 if (!ra->ra_pages)
654 expected = round_down(ra->start + ra->size - ra->async_size,
657 ra->start += ra->size;
659 * In the case of MADV_HUGEPAGE, the actual size might exceed
662 ra->size = max(ra->size, get_next_ra_size(ra, max_pages));
663 ra->async_size = ra->size;
671 * readahead size. Ramp it up and use it as the new readahead size.
674 start = page_cache_next_miss(ractl->mapping, index + 1, max_pages);
677 if (!start || start - index > max_pages)
680 ra->start = start;
681 ra->size = start - index; /* old async_size */
682 ra->size += req_count;
683 ra->size = get_next_ra_size(ra, max_pages);
684 ra->async_size = ra->size;
686 ractl->_index = ra->start;
698 return -EBADF;
701 if (!(file->f_mode & FMODE_READ))
702 return -EBADF;
707 * on this file, then we must return -EINVAL.
709 if (!file->f_mapping)
710 return -EINVAL;
711 if (!file->f_mapping->a_ops)
712 return -EINVAL;
715 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
716 return -EINVAL;
718 return -EINVAL;
736 * readahead_expand - Expand a readahead request
739 * @new_len: The revised size of the request
741 * Attempt to expand a readahead request outwards from the current size to the
742 * specified size by inserting locked pages before and after the current window
743 * to increase the size to the new window. This may involve the insertion of
747 * The algorithm will stop if it encounters a conflicting page already in the
756 struct address_space *mapping = ractl->mapping;
757 struct file_ra_state *ra = ractl->ra;
765 * Readahead code should have aligned the ractl->_index to
768 VM_BUG_ON(!IS_ALIGNED(ractl->_index, min_nrpages));
771 while (ractl->_index > new_index) {
772 unsigned long index = ractl->_index - 1;
773 struct folio *folio = xa_load(&mapping->i_pages, index);
788 !ractl->_workingset) {
789 ractl->_workingset = true;
790 psi_memstall_enter(&ractl->_pflags);
792 ractl->_nr_pages += min_nrpages;
793 ractl->_index = folio->index;
796 new_len += new_start - readahead_pos(ractl);
800 while (ractl->_nr_pages < new_nr_pages) {
801 unsigned long index = ractl->_index + ractl->_nr_pages;
802 struct folio *folio = xa_load(&mapping->i_pages, index);
817 !ractl->_workingset) {
818 ractl->_workingset = true;
819 psi_memstall_enter(&ractl->_pflags);
821 ractl->_nr_pages += min_nrpages;
823 ra->size += min_nrpages;
824 ra->async_size += min_nrpages;