filemap.c - OpenGrok cross reference for /linux/mm/filemap.c

Lines Matching +full:wait +full:- +full:on +full:- +full:write
1 // SPDX-License-Identifier: GPL-2.0-only
5  * Copyright (C) 1994-1999  Linus Torvalds
30 #include <linux/error-injection.h>
33 #include <linux/backing-dev.h>
73  * finished 'unifying' the page and buffer cache and SMP-threaded the
74  * page-cache, 21.05.1999, Ingo Molnar <mingo@redhat.com>
76  * SMP-threaded pagemap-LRU 1999, Andrea Arcangeli <andrea@suse.de>
82  *  ->i_mmap_rwsem		(truncate_pagecache)
83  *    ->private_lock		(__free_pte->block_dirty_folio)
84  *      ->swap_lock		(exclusive_swap_page, others)
85  *        ->i_pages lock
87  *  ->i_rwsem
88  *    ->invalidate_lock		(acquired by fs in truncate path)
89  *      ->i_mmap_rwsem		(truncate->unmap_mapping_range)
91  *  ->mmap_lock
92  *    ->i_mmap_rwsem
93  *      ->page_table_lock or pte_lock	(various, mainly in memory.c)
94  *        ->i_pages lock	(arch-dependent flush_dcache_mmap_lock)
96  *  ->mmap_lock
97  *    ->invalidate_lock		(filemap_fault)
98  *      ->lock_page		(filemap_fault, access_process_vm)
100  *  ->i_rwsem			(generic_perform_write)
101  *    ->mmap_lock		(fault_in_readable->do_page_fault)
103  *  bdi->wb.list_lock
104  *    sb_lock			(fs/fs-writeback.c)
105  *    ->i_pages lock		(__sync_single_inode)
107  *  ->i_mmap_rwsem
108  *    ->anon_vma.lock		(vma_merge)
110  *  ->anon_vma.lock
111  *    ->page_table_lock or pte_lock	(anon_vma_prepare and various)
113  *  ->page_table_lock or pte_lock
114  *    ->swap_lock		(try_to_unmap_one)
115  *    ->private_lock		(try_to_unmap_one)
116  *    ->i_pages lock		(try_to_unmap_one)
117  *    ->lruvec->lru_lock	(follow_page_mask->mark_page_accessed)
118  *    ->lruvec->lru_lock	(check_pte_range->folio_isolate_lru)
119  *    ->private_lock		(folio_remove_rmap_pte->set_page_dirty)
120  *    ->i_pages lock		(folio_remove_rmap_pte->set_page_dirty)
121  *    bdi.wb->list_lock		(folio_remove_rmap_pte->set_page_dirty)
122  *    ->inode->i_lock		(folio_remove_rmap_pte->set_page_dirty)
123  *    bdi.wb->list_lock		(zap_pte_range->set_page_dirty)
124  *    ->inode->i_lock		(zap_pte_range->set_page_dirty)
125  *    ->private_lock		(zap_pte_range->block_dirty_folio)
131 	XA_STATE(xas, &mapping->i_pages, folio->index);  in page_cache_delete()
136 	xas_set_order(&xas, folio->index, folio_order(folio));  in page_cache_delete()
144 	folio->mapping = NULL;  in page_cache_delete()
145 	/* Leave folio->index set: truncation lookup relies upon it */  in page_cache_delete()
146 	mapping->nrpages -= nr;  in page_cache_delete()
157 			 current->comm, folio_pfn(folio));  in filemap_unaccount_folio()
158 		dump_page(&folio->page, "still mapped when deleted");  in filemap_unaccount_folio()
172 				atomic_set(&folio->_mapcount, -1);  in filemap_unaccount_folio()
184 	__lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr);  in filemap_unaccount_folio()
186 		__lruvec_stat_mod_folio(folio, NR_SHMEM, -nr);  in filemap_unaccount_folio()
188 			__lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr);  in filemap_unaccount_folio()
190 		__lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr);  in filemap_unaccount_folio()
193 	if (test_bit(AS_KERNEL_FILE, &folio->mapping->flags))  in filemap_unaccount_folio()
195 				    NR_KERNEL_FILE_PAGES, -nr);  in filemap_unaccount_folio()
200 	 * unwritten data - on ordinary filesystems.  in filemap_unaccount_folio()
202 	 * But it's harmless on in-memory filesystems like tmpfs; and can  in filemap_unaccount_folio()
213 		folio_account_cleaned(folio, inode_to_wb(mapping->host));  in filemap_unaccount_folio()
218  * sure the page is locked and that nobody else uses it - or that usage
223 	struct address_space *mapping = folio->mapping;  in __filemap_remove_folio()
234 	free_folio = mapping->a_ops->free_folio;  in filemap_free_folio()
242  * filemap_remove_folio - Remove folio from page cache.
245  * This must be called only on folios that are locked and have been
247  * the free list because the caller has a reference on the page.
251 	struct address_space *mapping = folio->mapping;  in filemap_remove_folio()
254 	spin_lock(&mapping->host->i_lock);  in filemap_remove_folio()
255 	xa_lock_irq(&mapping->i_pages);  in filemap_remove_folio()
257 	xa_unlock_irq(&mapping->i_pages);  in filemap_remove_folio()
259 		inode_add_lru(mapping->host);  in filemap_remove_folio()
260 	spin_unlock(&mapping->host->i_lock);  in filemap_remove_folio()
266  * page_cache_delete_batch - delete several folios from page cache
270  * The function walks over mapping->i_pages and removes folios passed in
281 	XA_STATE(xas, &mapping->i_pages, fbatch->folios[0]->index);  in page_cache_delete_batch()
301 		if (folio != fbatch->folios[i]) {  in page_cache_delete_batch()
302 			VM_BUG_ON_FOLIO(folio->index >  in page_cache_delete_batch()
303 					fbatch->folios[i]->index, folio);  in page_cache_delete_batch()
309 		folio->mapping = NULL;  in page_cache_delete_batch()
310 		/* Leave folio->index set: truncation lookup relies on it */  in page_cache_delete_batch()
316 	mapping->nrpages -= total_pages;  in page_cache_delete_batch()
327 	spin_lock(&mapping->host->i_lock);  in delete_from_page_cache_batch()
328 	xa_lock_irq(&mapping->i_pages);  in delete_from_page_cache_batch()
330 		struct folio *folio = fbatch->folios[i];  in delete_from_page_cache_batch()
336 	xa_unlock_irq(&mapping->i_pages);  in delete_from_page_cache_batch()
338 		inode_add_lru(mapping->host);  in delete_from_page_cache_batch()
339 	spin_unlock(&mapping->host->i_lock);  in delete_from_page_cache_batch()
342 		filemap_free_folio(mapping, fbatch->folios[i]);  in delete_from_page_cache_batch()
348 	/* Check for outstanding write errors */  in filemap_check_errors()
349 	if (test_bit(AS_ENOSPC, &mapping->flags) &&  in filemap_check_errors()
350 	    test_and_clear_bit(AS_ENOSPC, &mapping->flags))  in filemap_check_errors()
351 		ret = -ENOSPC;  in filemap_check_errors()
352 	if (test_bit(AS_EIO, &mapping->flags) &&  in filemap_check_errors()
353 	    test_and_clear_bit(AS_EIO, &mapping->flags))  in filemap_check_errors()
354 		ret = -EIO;  in filemap_check_errors()
361 	/* Check for outstanding write errors */  in filemap_check_and_keep_errors()
362 	if (test_bit(AS_EIO, &mapping->flags))  in filemap_check_and_keep_errors()
363 		return -EIO;  in filemap_check_and_keep_errors()
364 	if (test_bit(AS_ENOSPC, &mapping->flags))  in filemap_check_and_keep_errors()
365 		return -ENOSPC;  in filemap_check_and_keep_errors()
370  * filemap_fdatawrite_wbc - start writeback on mapping dirty pages in range
371  * @mapping:	address space structure to write
374  * Call writepages on the mapping using the provided wbc to control the
377  * Return: %0 on success, negative error code otherwise.
388 	wbc_attach_fdatawrite_inode(wbc, mapping->host);  in filemap_fdatawrite_wbc()
396  * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
397  * @mapping:	address space structure to write
410  * Return: %0 on success, negative error code otherwise.
445  * filemap_fdatawrite_range_kick - start writeback on a range
447  * @start:	index to start writeback on
450  * This is a non-integrity writeback helper, to start writing back folios
453  * Return: %0 on success, negative error code otherwise.
463  * filemap_flush - mostly a non-blocking flush
466  * This is a mostly non-blocking flush.  Not suitable for data-integrity
467  * purposes - I/O may not be started against all dirty pages.
469  * Return: %0 on success, negative error code otherwise.
478  * filemap_range_has_page - check if a page exists in range.
493 	XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);  in filemap_range_has_page()
550  * filemap_fdatawait_range - wait for writeback to complete
551  * @mapping:		address space structure to wait for
555  * Walk the list of under-writeback pages of the given address space
556  * in the given range and wait for all of them.  Check error status of
574  * filemap_fdatawait_range_keep_errors - wait for writeback to complete
575  * @mapping:		address space structure to wait for
579  * Walk the list of under-writeback pages of the given address space in the
580  * given range and wait for all of them.  Unlike filemap_fdatawait_range(),
584  * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2),
596  * file_fdatawait_range - wait for writeback to complete
597  * @file:		file pointing to address space structure to wait for
601  * Walk the list of under-writeback pages of the address space that file
602  * refers to, in the given range and wait for all of them.  Check error
603  * status of the address space vs. the file->f_wb_err cursor and return it.
609  * Return: error status of the address space vs. the file->f_wb_err cursor.
613 	struct address_space *mapping = file->f_mapping;  in file_fdatawait_range()
621  * filemap_fdatawait_keep_errors - wait for writeback without clearing errors
622  * @mapping: address space structure to wait for
624  * Walk the list of under-writeback pages of the given address space
625  * and wait for all of them.  Unlike filemap_fdatawait(), this function
629  * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2),
644 	return mapping->nrpages;  in mapping_needs_writeback()
650 	XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);  in filemap_range_has_writeback()
673  * filemap_write_and_wait_range - write out & wait on a file range
678  * Write out and wait upon file offsets lstart->lend, inclusive.
681  * that this function can be used to write to the very end-of-file (end = -1).
698 		 * written partially (e.g. -ENOSPC), so we wait for it.  in filemap_write_and_wait_range()
699 		 * But the -EIO is special case, it may indicate the worst  in filemap_write_and_wait_range()
702 		if (err != -EIO)  in filemap_write_and_wait_range()
714 	errseq_t eseq = errseq_set(&mapping->wb_err, err);  in __filemap_set_wb_err()
721  * file_check_and_advance_wb_err - report wb error (if any) that was previously
723  * @file: struct file on which the error is being reported
738  * While we handle mapping->wb_err with atomic operations, the f_wb_err
742  * Return: %0 on success, negative error code otherwise.
747 	errseq_t old = READ_ONCE(file->f_wb_err);  in file_check_and_advance_wb_err()
748 	struct address_space *mapping = file->f_mapping;  in file_check_and_advance_wb_err()
751 	if (errseq_check(&mapping->wb_err, old)) {  in file_check_and_advance_wb_err()
753 		spin_lock(&file->f_lock);  in file_check_and_advance_wb_err()
754 		old = file->f_wb_err;  in file_check_and_advance_wb_err()
755 		err = errseq_check_and_advance(&mapping->wb_err,  in file_check_and_advance_wb_err()
756 						&file->f_wb_err);  in file_check_and_advance_wb_err()
758 		spin_unlock(&file->f_lock);  in file_check_and_advance_wb_err()
764 	 * that the legacy code would have had on these flags.  in file_check_and_advance_wb_err()
766 	clear_bit(AS_EIO, &mapping->flags);  in file_check_and_advance_wb_err()
767 	clear_bit(AS_ENOSPC, &mapping->flags);  in file_check_and_advance_wb_err()
773  * file_write_and_wait_range - write out & wait on a file range
778  * Write out and wait upon file offsets lstart->lend, inclusive.
781  * that this function can be used to write to the very end-of-file (end = -1).
783  * After writing out and waiting on the data, we check and advance the
786  * Return: %0 on success, negative error code otherwise.
791 	struct address_space *mapping = file->f_mapping;  in file_write_and_wait_range()
800 		if (err != -EIO)  in file_write_and_wait_range()
811  * replace_page_cache_folio - replace a pagecache folio with a new one
815  * This function replaces a folio in the pagecache with a new one.  On
825 	struct address_space *mapping = old->mapping;  in replace_page_cache_folio()
826 	void (*free_folio)(struct folio *) = mapping->a_ops->free_folio;  in replace_page_cache_folio()
827 	pgoff_t offset = old->index;  in replace_page_cache_folio()
828 	XA_STATE(xas, &mapping->i_pages, offset);  in replace_page_cache_folio()
832 	VM_BUG_ON_FOLIO(new->mapping, new);  in replace_page_cache_folio()
835 	new->mapping = mapping;  in replace_page_cache_folio()
836 	new->index = offset;  in replace_page_cache_folio()
843 	old->mapping = NULL;  in replace_page_cache_folio()
863 	XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));  in __filemap_add_folio()
874 	VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);  in __filemap_add_folio()
880 	folio->mapping = mapping;  in __filemap_add_folio()
881 	folio->index = xas.xa_index;  in __filemap_add_folio()
884 		int order = -1;  in __filemap_add_folio()
891 				xas_set_err(&xas, -EEXIST);  in __filemap_add_folio()
898 			if (order == -1)  in __filemap_add_folio()
931 		mapping->nrpages += nr;  in __filemap_add_folio()
954 	folio->mapping = NULL;  in __filemap_add_folio()
955 	/* Leave folio->index set: truncation relies upon it */  in __filemap_add_folio()
967 	bool kernel_file = test_bit(AS_KERNEL_FILE, &mapping->flags);  in filemap_add_folio()
1026  * filemap_invalidate_lock_two - lock invalidate_lock for two mappings
1039 		down_write(&mapping1->invalidate_lock);  in filemap_invalidate_lock_two()
1041 		down_write_nested(&mapping2->invalidate_lock, 1);  in filemap_invalidate_lock_two()
1046  * filemap_invalidate_unlock_two - unlock invalidate_lock for two mappings
1057 		up_write(&mapping1->invalidate_lock);  in filemap_invalidate_unlock_two()
1059 		up_write(&mapping2->invalidate_lock);  in filemap_invalidate_unlock_two()
1064  * In order to wait for pages to become available there must be
1067  * waiters on the same queue and wake all when any of the pages
1107  * The page wait code treats the "wait->flags" somewhat unusually, because
1117  *	and remove it from the wait queue.
1125  *	WQ_FLAG_WOKEN bit, wake it up, and remove it from the wait queue.
1127  *	This is the traditional exclusive wait.
1133  *	cannot be taken, we stop walking the wait queue without waking
1140 static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)  in wake_page_function()  argument
1145 		= container_of(wait, struct wait_page_queue, wait);  in wake_page_function()
1151 	 * If it's a lock handoff wait, we get the bit for it, and  in wake_page_function()
1154 	flags = wait->flags;  in wake_page_function()
1156 		if (test_bit(key->bit_nr, &key->folio->flags.f))  in wake_page_function()
1157 			return -1;  in wake_page_function()
1159 			if (test_and_set_bit(key->bit_nr, &key->folio->flags.f))  in wake_page_function()
1160 				return -1;  in wake_page_function()
1166 	 * We are holding the wait-queue lock, but the waiter that  in wake_page_function()
1171 	 * afterwards to avoid any races. This store-release pairs  in wake_page_function()
1172 	 * with the load-acquire in folio_wait_bit_common().  in wake_page_function()
1174 	smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN);  in wake_page_function()
1175 	wake_up_state(wait->private, mode);  in wake_page_function()
1179 	 * and we can unconditionally remove the wait entry.  in wake_page_function()
1183 	 * After this list_del_init(&wait->entry) the wait entry  in wake_page_function()
1184 	 * might be de-allocated and the process might even have  in wake_page_function()
1187 	list_del_init_careful(&wait->entry);  in wake_page_function()
1201 	spin_lock_irqsave(&q->lock, flags);  in folio_wake_bit()
1206 	 * waiters, but the hashed waitqueue has waiters for other pages on it.  in folio_wake_bit()
1209 	 * Note that, depending on the page pool (buddy, hugetlb, ZONE_DEVICE,  in folio_wake_bit()
1216 	spin_unlock_irqrestore(&q->lock, flags);  in folio_wake_bit()
1224 			 * __folio_lock() waiting on then setting PG_locked.
1227 			 * folio_wait_writeback() waiting on PG_writeback.
1229 	DROP,		/* Drop ref to page before wait, no check when woken,
1230 			 * like folio_put_wait_locked() on PG_locked.
1239 					struct wait_queue_entry *wait)  in folio_trylock_flag()  argument
1241 	if (wait->flags & WQ_FLAG_EXCLUSIVE) {  in folio_trylock_flag()
1242 		if (test_and_set_bit(bit_nr, &folio->flags.f))  in folio_trylock_flag()
1244 	} else if (test_bit(bit_nr, &folio->flags.f))  in folio_trylock_flag()
1247 	wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE;  in folio_trylock_flag()
1257 	wait_queue_entry_t *wait = &wait_page.wait;  in folio_wait_bit_common()  local
1269 	init_wait(wait);  in folio_wait_bit_common()
1270 	wait->func = wake_page_function;  in folio_wait_bit_common()
1275 	wait->flags = 0;  in folio_wait_bit_common()
1277 		wait->flags = WQ_FLAG_EXCLUSIVE;  in folio_wait_bit_common()
1278 		if (--unfairness < 0)  in folio_wait_bit_common()
1279 			wait->flags |= WQ_FLAG_CUSTOM;  in folio_wait_bit_common()
1290 	 * page queue), and add ourselves to the wait  in folio_wait_bit_common()
1296 	spin_lock_irq(&q->lock);  in folio_wait_bit_common()
1298 	if (!folio_trylock_flag(folio, bit_nr, wait))  in folio_wait_bit_common()
1299 		__add_wait_queue_entry_tail(q, wait);  in folio_wait_bit_common()
1300 	spin_unlock_irq(&q->lock);  in folio_wait_bit_common()
1303 	 * From now on, all the logic will be based on  in folio_wait_bit_common()
1316 	 * be very careful with the 'wait->flags', because  in folio_wait_bit_common()
1325 		flags = smp_load_acquire(&wait->flags);  in folio_wait_bit_common()
1334 		/* If we were non-exclusive, we're done */  in folio_wait_bit_common()
1351 		wait->flags |= WQ_FLAG_DONE;  in folio_wait_bit_common()
1357 	 * waiter from the wait-queues, but the folio waiters bit will remain  in folio_wait_bit_common()
1361 	finish_wait(q, wait);  in folio_wait_bit_common()
1369 	 * NOTE! The wait->flags weren't stable until we've done the  in folio_wait_bit_common()
1376 	 * return value based on that state without races.  in folio_wait_bit_common()
1378 	 * Also note that WQ_FLAG_WOKEN is sufficient for a non-exclusive  in folio_wait_bit_common()
1382 		return wait->flags & WQ_FLAG_DONE ? 0 : -EINTR;  in folio_wait_bit_common()
1384 	return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;  in folio_wait_bit_common()
1389  * migration_entry_wait_on_locked - Wait for a migration entry to be removed
1393  * Wait for a migration entry referencing the given page to be removed. This is
1395  * this can be called without taking a reference on the page. Instead this
1408 	wait_queue_entry_t *wait = &wait_page.wait;  in migration_entry_wait_on_locked()  local
1422 	init_wait(wait);  in migration_entry_wait_on_locked()
1423 	wait->func = wake_page_function;  in migration_entry_wait_on_locked()
1426 	wait->flags = 0;  in migration_entry_wait_on_locked()
1428 	spin_lock_irq(&q->lock);  in migration_entry_wait_on_locked()
1430 	if (!folio_trylock_flag(folio, PG_locked, wait))  in migration_entry_wait_on_locked()
1431 		__add_wait_queue_entry_tail(q, wait);  in migration_entry_wait_on_locked()
1432 	spin_unlock_irq(&q->lock);  in migration_entry_wait_on_locked()
1447 		flags = smp_load_acquire(&wait->flags);  in migration_entry_wait_on_locked()
1458 	finish_wait(q, wait);  in migration_entry_wait_on_locked()
1480  * folio_put_wait_locked - Drop a reference and wait for it to be unlocked
1481  * @folio: The folio to wait for.
1484  * The caller should hold a reference on @folio.  They expect the page to
1490  * Return: 0 if the folio was unlocked or -EINTR if interrupted by a signal.
1498  * folio_unlock - Unlock a locked folio.
1501  * Unlocks the folio and wakes up any thread sleeping on the page lock.
1518  * folio_end_read - End read on a folio.
1525  * sleeping on the lock.  The folio will also be marked uptodate if all
1548  * folio_end_private_2 - Clear PG_private_2 and wake any waiters.
1551  * Clear the PG_private_2 bit on a folio and wake up any sleepers waiting for
1568  * folio_wait_private_2 - Wait for PG_private_2 to be cleared on a folio.
1569  * @folio: The folio to wait on.
1571  * Wait for PG_private_2 to be cleared on a folio.
1581  * folio_wait_private_2_killable - Wait for PG_private_2 to be cleared on a folio.
1582  * @folio: The folio to wait on.
1584  * Wait for PG_private_2 to be cleared on a folio or until a fatal signal is
1588  * - 0 if successful.
1589  * - -EINTR if a fatal signal was encountered.
1607 	struct address_space *mapping = folio->mapping;  in filemap_end_dropbehind()
1621  * completes. Do that now. If we fail, it's likely because of a big folio -
1633 	 * would otherwise not need non-IRQ handling. Just skip the  in folio_end_dropbehind()
1644  * folio_end_writeback_no_dropbehind - End writeback against a folio.
1676  * folio_end_writeback - End writeback against a folio.
1689 	 * on truncation to wait for the clearing of PG_writeback.  in folio_end_writeback()
1701  * __folio_lock - Get a lock on the folio, assuming we need to sleep to get it.
1718 static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)  in __folio_lock_async()  argument
1723 	wait->folio = folio;  in __folio_lock_async()
1724 	wait->bit_nr = PG_locked;  in __folio_lock_async()
1726 	spin_lock_irq(&q->lock);  in __folio_lock_async()
1727 	__add_wait_queue_entry_tail(q, &wait->wait);  in __folio_lock_async()
1731 	 * If we were successful now, we know we're still on the  in __folio_lock_async()
1737 		__remove_wait_queue(q, &wait->wait);  in __folio_lock_async()
1739 		ret = -EIOCBQUEUED;  in __folio_lock_async()
1740 	spin_unlock_irq(&q->lock);  in __folio_lock_async()
1746  * 0 - folio is locked.
1747  * non-zero - folio is not locked.
1748  *     mmap_lock or per-VMA lock has been released (mmap_read_unlock() or
1753  * with the folio locked and the mmap_lock/per-VMA lock is left unperturbed.
1757 	unsigned int flags = vmf->flags;  in __folio_lock_or_retry()
1761 		 * CAUTION! In this case, mmap_lock/per-VMA lock is not  in __folio_lock_or_retry()
1790  * page_cache_next_miss() - Find the next gap in the page cache.
1795  * Search the range [index, min(index + max_scan - 1, ULONG_MAX)] for the
1805  * range specified (in which case 'return - index >= max_scan' will be true).
1806  * In the rare case of index wrap-around, 0 will be returned.
1811 	XA_STATE(xas, &mapping->i_pages, index);  in page_cache_next_miss()
1814 	while (nr--) {  in page_cache_next_miss()
1827  * page_cache_prev_miss() - Find the previous gap in the page cache.
1832  * Search the range [max(index - max_scan + 1, 0), index] for the
1842  * range specified (in which case 'index - return >= max_scan' will be true).
1843  * In the rare case of wrap-around, ULONG_MAX will be returned.
1848 	XA_STATE(xas, &mapping->i_pages, index);  in page_cache_prev_miss()
1850 	while (max_scan--) {  in page_cache_prev_miss()
1864  * On the lookup side:
1869  * On the removal side:
1875  * increased by a speculative page cache (or GUP-fast) lookup as it can
1878  * last refcount on the page, any page allocation must be freeable by
1883  * filemap_get_entry - Get a page cache entry.
1896 	XA_STATE(xas, &mapping->i_pages, index);  in filemap_get_entry()
1926  * __filemap_get_folio - Find and get a reference to a folio.
1957 				return ERR_PTR(-EAGAIN);  in __filemap_get_folio()
1964 		if (unlikely(folio->mapping != mapping)) {  in __filemap_get_folio()
1975 		/* Clear idle flag for buffer write */  in __filemap_get_folio()
2003 		if (index & ((1UL << order) - 1))  in __filemap_get_folio()
2009 			err = -ENOMEM;  in __filemap_get_folio()
2027 		} while (order-- > min_order);  in __filemap_get_folio()
2029 		if (err == -EEXIST)  in __filemap_get_folio()
2036 			 * Return -EAGAIN so that there caller retries in a  in __filemap_get_folio()
2037 			 * blocking fashion instead of propagating -ENOMEM  in __filemap_get_folio()
2040 			if ((fgp_flags & FGP_NOWAIT) && err == -ENOMEM)  in __filemap_get_folio()
2041 				err = -EAGAIN;  in __filemap_get_folio()
2053 		return ERR_PTR(-ENOENT);  in __filemap_get_folio()
2097  * find_get_entries - gang pagecache lookup
2106  * takes a reference on any actual folios it returns.
2109  * due to not-present entries or large folios.
2119 	XA_STATE(xas, &mapping->i_pages, *start);  in find_get_entries()
2124 		indices[fbatch->nr] = xas.xa_index;  in find_get_entries()
2131 		int idx = folio_batch_count(fbatch) - 1;  in find_get_entries()
2133 		folio = fbatch->folios[idx];  in find_get_entries()
2137 			nr = 1 << xa_get_order(&mapping->i_pages, indices[idx]);  in find_get_entries()
2146  * find_lock_entries - Find a batch of pagecache entries.
2160  * due to not-present entries, large folios, folios which could not be
2168 	XA_STATE(xas, &mapping->i_pages, *start);  in find_lock_entries()
2178 			base = folio->index;  in find_lock_entries()
2183 			if (base + nr - 1 > end)  in find_lock_entries()
2187 			if (folio->mapping != mapping ||  in find_lock_entries()
2194 			base = xas.xa_index & ~(nr - 1);  in find_lock_entries()
2199 			if (base + nr - 1 > end)  in find_lock_entries()
2203 		/* Update start now so that last update is correct on return */  in find_lock_entries()
2205 		indices[fbatch->nr] = xas.xa_index;  in find_lock_entries()
2220  * filemap_get_folios - Get a batch of folios
2241  * filemap_get_folios_contig - Get a batch of contiguous folios
2258 	XA_STATE(xas, &mapping->i_pages, *start);  in filemap_get_folios_contig()
2287 			*start = folio->index + nr;  in filemap_get_folios_contig()
2290 		xas_advance(&xas, folio_next_index(folio) - 1);  in filemap_get_folios_contig()
2303 		folio = fbatch->folios[nr - 1];  in filemap_get_folios_contig()
2313  * filemap_get_folios_tag - Get a batch of folios matching @tag
2334 	XA_STATE(xas, &mapping->i_pages, *start);  in filemap_get_folios_tag()
2348 			*start = folio->index + nr;  in filemap_get_folios_tag()
2355 	 * breaks the iteration when there is a page at index -1 but that is  in filemap_get_folios_tag()
2358 	if (end == (pgoff_t)-1)  in filemap_get_folios_tag()
2359 		*start = (pgoff_t)-1;  in filemap_get_folios_tag()
2373  *      ---R__________________________________________B__________
2386 	ra->ra_pages /= 4;  in shrink_readahead_size_eio()
2390  * filemap_get_read_batch - Get a batch of folios for read
2401 	XA_STATE(xas, &mapping->i_pages, index);  in filemap_get_read_batch()
2424 		xas_advance(&xas, folio_next_index(folio) - 1);  in filemap_get_read_batch()
2456 		shrink_readahead_size_eio(&file->f_ra);  in filemap_read_folio()
2457 	return -EIO;  in filemap_read_folio()
2469 	if (!mapping->a_ops->is_partially_uptodate)  in filemap_range_uptodate()
2471 	if (mapping->host->i_blkbits >= folio_shift(folio))  in filemap_range_uptodate()
2475 		count -= folio_pos(folio) - pos;  in filemap_range_uptodate()
2478 		pos -= folio_pos(folio);  in filemap_range_uptodate()
2484 	return mapping->a_ops->is_partially_uptodate(folio, pos, count);  in filemap_range_uptodate()
2493 	if (iocb->ki_flags & IOCB_NOWAIT) {  in filemap_update_page()
2495 			return -EAGAIN;  in filemap_update_page()
2501 		error = -EAGAIN;  in filemap_update_page()
2502 		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))  in filemap_update_page()
2504 		if (!(iocb->ki_flags & IOCB_WAITQ)) {  in filemap_update_page()
2513 		error = __folio_lock_async(folio, iocb->ki_waitq);  in filemap_update_page()
2519 	if (!folio->mapping)  in filemap_update_page()
2523 	if (filemap_range_uptodate(mapping, iocb->ki_pos, count, folio,  in filemap_update_page()
2527 	error = -EAGAIN;  in filemap_update_page()
2528 	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))  in filemap_update_page()
2531 	error = filemap_read_folio(iocb->ki_filp, mapping->a_ops->read_folio,  in filemap_update_page()
2545 	struct address_space *mapping = iocb->ki_filp->f_mapping;  in filemap_create_folio()
2551 	if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))  in filemap_create_folio()
2552 		return -EAGAIN;  in filemap_create_folio()
2556 		return -ENOMEM;  in filemap_create_folio()
2557 	if (iocb->ki_flags & IOCB_DONTCACHE)  in filemap_create_folio()
2569 	 * pages or ->readahead() that need to hold invalidate_lock  in filemap_create_folio()
2574 	index = (iocb->ki_pos >> (PAGE_SHIFT + min_order)) << min_order;  in filemap_create_folio()
2577 	if (error == -EEXIST)  in filemap_create_folio()
2582 	error = filemap_read_folio(iocb->ki_filp, mapping->a_ops->read_folio,  in filemap_create_folio()
2600 	DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, folio->index);  in filemap_readahead()
2602 	if (iocb->ki_flags & IOCB_NOIO)  in filemap_readahead()
2603 		return -EAGAIN;  in filemap_readahead()
2604 	if (iocb->ki_flags & IOCB_DONTCACHE)  in filemap_readahead()
2606 	page_cache_async_ra(&ractl, folio, last_index - folio->index);  in filemap_readahead()
2613 	struct file *filp = iocb->ki_filp;  in filemap_get_pages()
2614 	struct address_space *mapping = filp->f_mapping;  in filemap_get_pages()
2615 	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;  in filemap_get_pages()
2622 	last_index = round_up(iocb->ki_pos + count,  in filemap_get_pages()
2626 		return -EINTR;  in filemap_get_pages()
2628 	filemap_get_read_batch(mapping, index, last_index - 1, fbatch);  in filemap_get_pages()
2630 		DEFINE_READAHEAD(ractl, filp, &filp->f_ra, mapping, index);  in filemap_get_pages()
2632 		if (iocb->ki_flags & IOCB_NOIO)  in filemap_get_pages()
2633 			return -EAGAIN;  in filemap_get_pages()
2634 		if (iocb->ki_flags & IOCB_NOWAIT)  in filemap_get_pages()
2636 		if (iocb->ki_flags & IOCB_DONTCACHE)  in filemap_get_pages()
2638 		page_cache_sync_ra(&ractl, last_index - index);  in filemap_get_pages()
2639 		if (iocb->ki_flags & IOCB_NOWAIT)  in filemap_get_pages()
2641 		filemap_get_read_batch(mapping, index, last_index - 1, fbatch);  in filemap_get_pages()
2650 	folio = fbatch->folios[folio_batch_count(fbatch) - 1];  in filemap_get_pages()
2658 			err = -EAGAIN;  in filemap_get_pages()
2667 	trace_mm_filemap_get_pages(mapping, index, last_index - 1);  in filemap_get_pages()
2672 	if (likely(--fbatch->nr))  in filemap_get_pages()
2699  * filemap_read - Read data from the page cache.
2714 	struct file *filp = iocb->ki_filp;  in filemap_read()
2715 	struct file_ra_state *ra = &filp->f_ra;  in filemap_read()
2716 	struct address_space *mapping = filp->f_mapping;  in filemap_read()
2717 	struct inode *inode = mapping->host;  in filemap_read()
2722 	loff_t last_pos = ra->prev_pos;  in filemap_read()
2724 	if (unlikely(iocb->ki_pos < 0))  in filemap_read()
2725 		return -EINVAL;  in filemap_read()
2726 	if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))  in filemap_read()
2731 	iov_iter_truncate(iter, inode->i_sb->s_maxbytes - iocb->ki_pos);  in filemap_read()
2739 		 * can no longer safely return -EIOCBQUEUED. Hence mark  in filemap_read()
2742 		if ((iocb->ki_flags & IOCB_WAITQ) && already_read)  in filemap_read()
2743 			iocb->ki_flags |= IOCB_NOWAIT;  in filemap_read()
2745 		if (unlikely(iocb->ki_pos >= i_size_read(inode)))  in filemap_read()
2748 		error = filemap_get_pages(iocb, iter->count, &fbatch, false);  in filemap_read()
2756 		 * the correct value for "nr", which means the zero-filled  in filemap_read()
2758 		 * another truncate extends the file - this is desired though).  in filemap_read()
2761 		if (unlikely(iocb->ki_pos >= isize))  in filemap_read()
2763 		end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);  in filemap_read()
2775 		if (!pos_same_folio(iocb->ki_pos, last_pos - 1,  in filemap_read()
2782 			size_t offset = iocb->ki_pos & (fsize - 1);  in filemap_read()
2783 			size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,  in filemap_read()
2784 					     fsize - offset);  in filemap_read()
2794 			 * before reading the folio on the kernel side.  in filemap_read()
2802 			iocb->ki_pos += copied;  in filemap_read()
2803 			last_pos = iocb->ki_pos;  in filemap_read()
2806 				error = -EFAULT;  in filemap_read()
2818 	} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);  in filemap_read()
2821 	ra->prev_pos = last_pos;  in filemap_read()
2828 	struct address_space *mapping = iocb->ki_filp->f_mapping;  in kiocb_write_and_wait()
2829 	loff_t pos = iocb->ki_pos;  in kiocb_write_and_wait()
2830 	loff_t end = pos + count - 1;  in kiocb_write_and_wait()
2832 	if (iocb->ki_flags & IOCB_NOWAIT) {  in kiocb_write_and_wait()
2834 			return -EAGAIN;  in kiocb_write_and_wait()
2850 			return -EAGAIN;  in filemap_invalidate_pages()
2858 	 * After a write we want buffered reads to be sure to go to disk to get  in filemap_invalidate_pages()
2860 	 * about to write.  We do this *before* the write so that we can return  in filemap_invalidate_pages()
2861 	 * without clobbering -EIOCBQUEUED from ->direct_IO().  in filemap_invalidate_pages()
2869 	struct address_space *mapping = iocb->ki_filp->f_mapping;  in kiocb_invalidate_pages()
2871 	return filemap_invalidate_pages(mapping, iocb->ki_pos,  in kiocb_invalidate_pages()
2872 					iocb->ki_pos + count - 1,  in kiocb_invalidate_pages()
2873 					iocb->ki_flags & IOCB_NOWAIT);  in kiocb_invalidate_pages()
2878  * generic_file_read_iter - generic filesystem read routine
2885  * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall
2889  * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O
2891  * can be read, -EAGAIN shall be returned.  When readahead would be
2907 	if (iocb->ki_flags & IOCB_DIRECT) {  in generic_file_read_iter()
2908 		struct file *file = iocb->ki_filp;  in generic_file_read_iter()
2909 		struct address_space *mapping = file->f_mapping;  in generic_file_read_iter()
2910 		struct inode *inode = mapping->host;  in generic_file_read_iter()
2917 		retval = mapping->a_ops->direct_IO(iocb, iter);  in generic_file_read_iter()
2919 			iocb->ki_pos += retval;  in generic_file_read_iter()
2920 			count -= retval;  in generic_file_read_iter()
2922 		if (retval != -EIOCBQUEUED)  in generic_file_read_iter()
2923 			iov_iter_revert(iter, count - iov_iter_count(iter));  in generic_file_read_iter()
2936 		if (iocb->ki_pos >= i_size_read(inode))  in generic_file_read_iter()
2954 	size = min(size, folio_size(folio) - offset);  in splice_folio_into_pipe()
2959 		size_t part = min_t(size_t, PAGE_SIZE - offset, size - spliced);  in splice_folio_into_pipe()
2968 		pipe->head++;  in splice_folio_into_pipe()
2978  * filemap_splice_read -  Splice data from a file's pagecache into a pipe
2989  * Return: On success, the number of bytes read will be returned and *@ppos
2991  * to be read; -EAGAIN will be returned if the pipe had no space, and some
2992  * other negative error code will be returned on error.  A short read may occur
3007 	if (unlikely(*ppos >= in->f_mapping->host->i_sb->s_maxbytes))  in filemap_splice_read()
3015 	npages = max_t(ssize_t, pipe->max_usage - used, 0);  in filemap_splice_read()
3023 		if (*ppos >= i_size_read(in->f_mapping->host))  in filemap_splice_read()
3035 		 * the correct value for "nr", which means the zero-filled  in filemap_splice_read()
3037 		 * another truncate extends the file - this is desired though).  in filemap_splice_read()
3039 		isize = i_size_read(in->f_mapping->host);  in filemap_splice_read()
3048 		writably_mapped = mapping_writably_mapped(in->f_mapping);  in filemap_splice_read()
3061 			 * before reading the folio on the kernel side.  in filemap_splice_read()
3066 			n = min_t(loff_t, len, isize - *ppos);  in filemap_splice_read()
3070 			len -= n;  in filemap_splice_read()
3073 			in->f_ra.prev_pos = *ppos;  in filemap_splice_read()
3093 	const struct address_space_operations *ops = mapping->a_ops;  in folio_seek_hole_data()
3094 	size_t offset, bsz = i_blocksize(mapping->host);  in folio_seek_hole_data()
3098 	if (!ops->is_partially_uptodate)  in folio_seek_hole_data()
3104 	if (unlikely(folio->mapping != mapping))  in folio_seek_hole_data()
3107 	offset = offset_in_folio(folio, start) & ~(bsz - 1);  in folio_seek_hole_data()
3110 		if (ops->is_partially_uptodate(folio, offset, bsz) ==  in folio_seek_hole_data()
3113 		start = (start + bsz) & ~((u64)bsz - 1);  in folio_seek_hole_data()
3130  * mapping_seek_hole_data - Seek for SEEK_DATA / SEEK_HOLE in the page cache.
3139  * entirely memory-based such as tmpfs, and filesystems which support
3142  * Return: The requested offset on success, or -ENXIO if @whence specifies
3144  * after @end - 1, so SEEK_HOLE returns @end if all the bytes between @start
3150 	XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT);  in mapping_seek_hole_data()
3151 	pgoff_t max = (end - 1) >> PAGE_SHIFT;  in mapping_seek_hole_data()
3156 		return -ENXIO;  in mapping_seek_hole_data()
3183 		start = -ENXIO;  in mapping_seek_hole_data()
3196  * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
3197  * @vmf - the vm_fault for this fault.
3198  * @folio - the folio to lock.
3199  * @fpin - the pointer to the file we may pin (or is already pinned).
3218 	if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)  in lock_folio_maybe_drop_mmap()
3222 	if (vmf->flags & FAULT_FLAG_KILLABLE) {  in lock_folio_maybe_drop_mmap()
3250 	struct file *file = vmf->vma->vm_file;  in do_sync_mmap_readahead()
3251 	struct file_ra_state *ra = &file->f_ra;  in do_sync_mmap_readahead()
3252 	struct address_space *mapping = file->f_mapping;  in do_sync_mmap_readahead()
3253 	DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff);  in do_sync_mmap_readahead()
3255 	vm_flags_t vm_flags = vmf->vma->vm_flags;  in do_sync_mmap_readahead()
3262 		ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);  in do_sync_mmap_readahead()
3263 		ra->size = HPAGE_PMD_NR;  in do_sync_mmap_readahead()
3269 			ra->size *= 2;  in do_sync_mmap_readahead()
3270 		ra->async_size = HPAGE_PMD_NR;  in do_sync_mmap_readahead()
3271 		ra->order = HPAGE_PMD_ORDER;  in do_sync_mmap_readahead()
3278 	 * If we don't want any read-ahead, don't bother. VM_EXEC case below is  in do_sync_mmap_readahead()
3283 	if (!ra->ra_pages)  in do_sync_mmap_readahead()
3288 		page_cache_sync_ra(&ractl, ra->ra_pages);  in do_sync_mmap_readahead()
3293 	mmap_miss = READ_ONCE(ra->mmap_miss);  in do_sync_mmap_readahead()
3295 		WRITE_ONCE(ra->mmap_miss, ++mmap_miss);  in do_sync_mmap_readahead()
3299 	 * stop bothering with read-ahead. It will only hurt.  in do_sync_mmap_readahead()
3308 		 * performance if (e.g.) arm64 can contpte-map the folio.  in do_sync_mmap_readahead()
3316 		struct vm_area_struct *vma = vmf->vma;  in do_sync_mmap_readahead()
3317 		unsigned long start = vma->vm_pgoff;  in do_sync_mmap_readahead()
3321 		ra->order = exec_folio_order();  in do_sync_mmap_readahead()
3322 		ra->start = round_down(vmf->pgoff, 1UL << ra->order);  in do_sync_mmap_readahead()
3323 		ra->start = max(ra->start, start);  in do_sync_mmap_readahead()
3324 		ra_end = round_up(ra->start + ra->ra_pages, 1UL << ra->order);  in do_sync_mmap_readahead()
3326 		ra->size = ra_end - ra->start;  in do_sync_mmap_readahead()
3327 		ra->async_size = 0;  in do_sync_mmap_readahead()
3330 		 * mmap read-around  in do_sync_mmap_readahead()
3332 		ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2);  in do_sync_mmap_readahead()
3333 		ra->size = ra->ra_pages;  in do_sync_mmap_readahead()
3334 		ra->async_size = ra->ra_pages / 4;  in do_sync_mmap_readahead()
3335 		ra->order = 0;  in do_sync_mmap_readahead()
3339 	ractl._index = ra->start;  in do_sync_mmap_readahead()
3352 	struct file *file = vmf->vma->vm_file;  in do_async_mmap_readahead()
3353 	struct file_ra_state *ra = &file->f_ra;  in do_async_mmap_readahead()
3354 	DEFINE_READAHEAD(ractl, file, ra, file->f_mapping, vmf->pgoff);  in do_async_mmap_readahead()
3358 	/* If we don't want any read-ahead, don't bother */  in do_async_mmap_readahead()
3359 	if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)  in do_async_mmap_readahead()
3369 		mmap_miss = READ_ONCE(ra->mmap_miss);  in do_async_mmap_readahead()
3371 			WRITE_ONCE(ra->mmap_miss, --mmap_miss);  in do_async_mmap_readahead()
3376 		page_cache_async_ra(&ractl, folio, ra->ra_pages);  in do_async_mmap_readahead()
3383 	struct vm_area_struct *vma = vmf->vma;  in filemap_fault_recheck_pte_none()
3390 	 * might have been evicted. During a read+clear/modify/write update of  in filemap_fault_recheck_pte_none()
3397 	 * scenario while holding the PT lock, to not degrade non-mlocked  in filemap_fault_recheck_pte_none()
3401 	if (!(vma->vm_flags & VM_LOCKED))  in filemap_fault_recheck_pte_none()
3404 	if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))  in filemap_fault_recheck_pte_none()
3407 	ptep = pte_offset_map_ro_nolock(vma->vm_mm, vmf->pmd, vmf->address,  in filemap_fault_recheck_pte_none()
3408 					&vmf->ptl);  in filemap_fault_recheck_pte_none()
3415 		spin_lock(vmf->ptl);  in filemap_fault_recheck_pte_none()
3418 		spin_unlock(vmf->ptl);  in filemap_fault_recheck_pte_none()
3425  * filemap_fault - read in file data for page fault handling
3435  * vma->vm_mm->mmap_lock must be held on entry.
3445  * Return: bitwise-OR of %VM_FAULT_ codes.
3450 	struct file *file = vmf->vma->vm_file;  in filemap_fault()
3452 	struct address_space *mapping = file->f_mapping;  in filemap_fault()
3453 	struct inode *inode = mapping->host;  in filemap_fault()
3454 	pgoff_t max_idx, index = vmf->pgoff;  in filemap_fault()
3474 		if (!(vmf->flags & FAULT_FLAG_TRIED))  in filemap_fault()
3487 		count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);  in filemap_fault()
3501 					  vmf->gfp_mask);  in filemap_fault()
3514 	if (unlikely(folio->mapping != mapping)) {  in filemap_fault()
3523 	 * that it's up-to-date. If not, it is going to be due to an error,  in filemap_fault()
3549 	 * time to return to the upper layer and have it re-find the vma and  in filemap_fault()
3560 	 * Found the page and have a reference on it.  in filemap_fault()
3570 	vmf->page = folio_file_page(folio, index);  in filemap_fault()
3575 	 * Umm, take care of errors if the page isn't up-to-date.  in filemap_fault()
3576 	 * Try to re-read it _once_. We do this synchronously,  in filemap_fault()
3581 	error = filemap_read_folio(file, mapping->a_ops->read_folio, folio);  in filemap_fault()
3595 	 * re-find the vma and come back and find our hopefully still populated  in filemap_fault()
3611 	struct mm_struct *mm = vmf->vma->vm_mm;  in filemap_map_pmd()
3614 	if (pmd_trans_huge(*vmf->pmd)) {  in filemap_map_pmd()
3620 	if (pmd_none(*vmf->pmd) && folio_test_pmd_mappable(folio)) {  in filemap_map_pmd()
3630 	if (pmd_none(*vmf->pmd) && vmf->prealloc_pte)  in filemap_map_pmd()
3631 		pmd_install(mm, vmf->pmd, &vmf->prealloc_pte);  in filemap_map_pmd()
3660 		if (folio->mapping != mapping)  in next_uptodate_folio()
3664 		max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);  in next_uptodate_folio()
3665 		if (xas->xa_index >= max_idx)  in next_uptodate_folio()
3690 	pte_t *old_ptep = vmf->pte;  in filemap_map_folio_range()
3698 	addr0 = addr - start * PAGE_SIZE;  in filemap_map_folio_range()
3699 	if (folio_within_vma(folio, vmf->vma) &&  in filemap_map_folio_range()
3700 	    (addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) {  in filemap_map_folio_range()
3701 		vmf->pte -= start;  in filemap_map_folio_range()
3702 		page -= start;  in filemap_map_folio_range()
3714 		 * In such situation, read-ahead is only a waste of IO.  in filemap_map_folio_range()
3716 		 * we can stop read-ahead.  in filemap_map_folio_range()
3724 		 * fault-around logic.  in filemap_map_folio_range()
3726 		if (!pte_none(ptep_get(&vmf->pte[count])))  in filemap_map_folio_range()
3735 			folio_ref_add(folio, count - ref_from_caller);  in filemap_map_folio_range()
3737 			if (in_range(vmf->address, addr, count * PAGE_SIZE))  in filemap_map_folio_range()
3743 		vmf->pte += count;  in filemap_map_folio_range()
3746 	} while (--nr_pages > 0);  in filemap_map_folio_range()
3751 		folio_ref_add(folio, count - ref_from_caller);  in filemap_map_folio_range()
3753 		if (in_range(vmf->address, addr, count * PAGE_SIZE))  in filemap_map_folio_range()
3757 	vmf->pte = old_ptep;  in filemap_map_folio_range()
3770 	struct page *page = &folio->page;  in filemap_map_order0_folio()
3782 	 * the fault-around logic.  in filemap_map_order0_folio()
3784 	if (!pte_none(ptep_get(vmf->pte)))  in filemap_map_order0_folio()
3787 	if (vmf->address == addr)  in filemap_map_order0_folio()
3803 	struct vm_area_struct *vma = vmf->vma;  in filemap_map_pages()
3804 	struct file *file = vma->vm_file;  in filemap_map_pages()
3805 	struct address_space *mapping = file->f_mapping;  in filemap_map_pages()
3808 	XA_STATE(xas, &mapping->i_pages, start_pgoff);  in filemap_map_pages()
3825 	addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);  in filemap_map_pages()
3826 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);  in filemap_map_pages()
3827 	if (!vmf->pte) {  in filemap_map_pages()
3833 	file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;  in filemap_map_pages()
3841 		addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;  in filemap_map_pages()
3842 		vmf->pte += xas.xa_index - last_pgoff;  in filemap_map_pages()
3844 		end = folio_next_index(folio) - 1;  in filemap_map_pages()
3845 		nr_pages = min(end, end_pgoff) - xas.xa_index + 1;  in filemap_map_pages()
3852 					xas.xa_index - folio->index, addr,  in filemap_map_pages()
3857 	add_mm_counter(vma->vm_mm, folio_type, rss);  in filemap_map_pages()
3858 	pte_unmap_unlock(vmf->pte, vmf->ptl);  in filemap_map_pages()
3863 	mmap_miss_saved = READ_ONCE(file->f_ra.mmap_miss);  in filemap_map_pages()
3865 		WRITE_ONCE(file->f_ra.mmap_miss, 0);  in filemap_map_pages()
3867 		WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss_saved - mmap_miss);  in filemap_map_pages()
3875 	struct address_space *mapping = vmf->vma->vm_file->f_mapping;  in filemap_page_mkwrite()
3876 	struct folio *folio = page_folio(vmf->page);  in filemap_page_mkwrite()
3879 	sb_start_pagefault(mapping->host->i_sb);  in filemap_page_mkwrite()
3880 	file_update_time(vmf->vma->vm_file);  in filemap_page_mkwrite()
3882 	if (folio->mapping != mapping) {  in filemap_page_mkwrite()
3895 	sb_end_pagefault(mapping->host->i_sb);  in filemap_page_mkwrite()
3909 	struct address_space *mapping = file->f_mapping;  in generic_file_mmap()
3911 	if (!mapping->a_ops->read_folio)  in generic_file_mmap()
3912 		return -ENOEXEC;  in generic_file_mmap()
3914 	vma->vm_ops = &generic_file_vm_ops;  in generic_file_mmap()
3920 	struct file *file = desc->file;  in generic_file_mmap_prepare()
3921 	struct address_space *mapping = file->f_mapping;  in generic_file_mmap_prepare()
3923 	if (!mapping->a_ops->read_folio)  in generic_file_mmap_prepare()
3924 		return -ENOEXEC;  in generic_file_mmap_prepare()
3926 	desc->vm_ops = &generic_file_vm_ops;  in generic_file_mmap_prepare()
3931  * This is for filesystems which do not implement ->writepage.
3936 		return -EINVAL;  in generic_file_readonly_mmap()
3942 	if (is_shared_maywrite(desc->vm_flags))  in generic_file_readonly_mmap_prepare()
3943 		return -EINVAL;  in generic_file_readonly_mmap_prepare()
3953 	return -ENOSYS;  in generic_file_mmap()
3957 	return -ENOSYS;  in generic_file_mmap_prepare()
3961 	return -ENOSYS;  in generic_file_readonly_mmap()
3965 	return -ENOSYS;  in generic_file_readonly_mmap_prepare()
3982 		filler = mapping->a_ops->read_folio;  in do_read_cache_folio()
3989 			return ERR_PTR(-ENOMEM);  in do_read_cache_folio()
3994 			if (err == -EEXIST)  in do_read_cache_folio()
4011 	if (!folio->mapping) {  in do_read_cache_folio()
4038  * read_cache_folio - Read into page cache, fill it if needed.
4041  * @filler: Function to perform the read, or NULL to use aops->read_folio().
4050  * Context: May sleep.  Expects mapping->invalidate_lock to be held.
4051  * Return: An uptodate folio on success, ERR_PTR() on failure.
4062  * mapping_read_folio_gfp - Read into page cache, using specified allocation flags.
4071  * possible and so is EINTR.  If ->read_folio returns another error,
4074  * The function expects mapping->invalidate_lock to be already held.
4076  * Return: Uptodate folio on success, ERR_PTR() on failure.
4092 		return &folio->page;  in do_read_cache_page()
4105  * read_cache_page_gfp - read into page cache, using specified page allocation flags.
4113  * If the page does not get brought uptodate, return -EIO.
4115  * The function expects mapping->invalidate_lock to be already held.
4117  * Return: up to date page on success, ERR_PTR() on failure.
4128  * Warn about a page cache invalidation failure during a direct I/O write.
4136 	errseq_set(&filp->f_mapping->wb_err, -EIO);  in dio_warn_stale_pagecache()
4141 …pr_crit("Page cache invalidation failure on direct I/O.  Possible data corruption due to collision…  in dio_warn_stale_pagecache()
4142 		pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid,  in dio_warn_stale_pagecache()
4143 			current->comm);  in dio_warn_stale_pagecache()
4149 	struct address_space *mapping = iocb->ki_filp->f_mapping;  in kiocb_invalidate_post_direct_write()
4151 	if (mapping->nrpages &&  in kiocb_invalidate_post_direct_write()
4153 			iocb->ki_pos >> PAGE_SHIFT,  in kiocb_invalidate_post_direct_write()
4154 			(iocb->ki_pos + count - 1) >> PAGE_SHIFT))  in kiocb_invalidate_post_direct_write()
4155 		dio_warn_stale_pagecache(iocb->ki_filp);  in kiocb_invalidate_post_direct_write()
4161 	struct address_space *mapping = iocb->ki_filp->f_mapping;  in generic_file_direct_write()
4167 	 * to buffered write.  in generic_file_direct_write()
4171 		if (written == -EBUSY)  in generic_file_direct_write()
4176 	written = mapping->a_ops->direct_IO(iocb, from);  in generic_file_direct_write()
4180 	 * cached by non-direct readahead, or faulted in by get_user_pages()  in generic_file_direct_write()
4181 	 * if the source of the write was an mmap'ed region of the file  in generic_file_direct_write()
4184 	 * fails, tough, the write still worked...  in generic_file_direct_write()
4196 		struct inode *inode = mapping->host;  in generic_file_direct_write()
4197 		loff_t pos = iocb->ki_pos;  in generic_file_direct_write()
4201 		write_len -= written;  in generic_file_direct_write()
4202 		if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {  in generic_file_direct_write()
4206 		iocb->ki_pos = pos;  in generic_file_direct_write()
4208 	if (written != -EIOCBQUEUED)  in generic_file_direct_write()
4209 		iov_iter_revert(from, write_len - iov_iter_count(from));  in generic_file_direct_write()
4216 	struct file *file = iocb->ki_filp;  in generic_perform_write()
4217 	loff_t pos = iocb->ki_pos;  in generic_perform_write()
4218 	struct address_space *mapping = file->f_mapping;  in generic_perform_write()
4219 	const struct address_space_operations *a_ops = mapping->a_ops;  in generic_perform_write()
4227 		size_t bytes;		/* Bytes to write to folio */  in generic_perform_write()
4233 		offset = pos & (chunk - 1);  in generic_perform_write()
4234 		bytes = min(chunk - offset, bytes);  in generic_perform_write()
4238 			status = -EINTR;  in generic_perform_write()
4242 		status = a_ops->write_begin(iocb, mapping, pos, bytes,  in generic_perform_write()
4248 		if (bytes > folio_size(folio) - offset)  in generic_perform_write()
4249 			bytes = folio_size(folio) - offset;  in generic_perform_write()
4255 		 * Faults here on mmap()s can recurse into arbitrary  in generic_perform_write()
4263 		status = a_ops->write_end(iocb, mapping, pos, bytes, copied,  in generic_perform_write()
4266 			iov_iter_revert(i, copied - max(status, 0L));  in generic_perform_write()
4274 			 * A short copy made ->write_end() reject the  in generic_perform_write()
4287 			 * 'folio' is now unlocked and faults on it can be  in generic_perform_write()
4292 				status = -EFAULT;  in generic_perform_write()
4303 	iocb->ki_pos += written;  in generic_perform_write()
4309  * __generic_file_write_iter - write data to a file
4311  * @from:	iov_iter with data to write
4315  * modification times and calls proper subroutines depending on whether we
4316  * do direct IO or a standard buffered write.
4318  * It expects i_rwsem to be grabbed unless we work on a block device or similar
4321  * This function does *not* take care of syncing data in case of O_SYNC write.
4331 	struct file *file = iocb->ki_filp;  in __generic_file_write_iter()
4332 	struct address_space *mapping = file->f_mapping;  in __generic_file_write_iter()
4333 	struct inode *inode = mapping->host;  in __generic_file_write_iter()
4344 	if (iocb->ki_flags & IOCB_DIRECT) {  in __generic_file_write_iter()
4347 		 * If the write stopped short of completing, fall back to  in __generic_file_write_iter()
4349 		 * holes, for example.  For DAX files, a buffered write will  in __generic_file_write_iter()
4351 		 * page-cache pages correctly).  in __generic_file_write_iter()
4364  * generic_file_write_iter - write data to a file
4366  * @from:	iov_iter with data to write
4373  *   vfs_fsync_range() failed for a synchronous write
4378 	struct file *file = iocb->ki_filp;  in generic_file_write_iter()
4379 	struct inode *inode = file->f_mapping->host;  in generic_file_write_iter()
4395  * filemap_release_folio() - Release fs-specific metadata on a folio.
4400  * (presumably at folio->private).
4402  * This will also be called if the private_2 flag is set on a page,
4413 	struct address_space * const mapping = folio->mapping;  in filemap_release_folio()
4421 	if (mapping && mapping->a_ops->release_folio)  in filemap_release_folio()
4422 		return mapping->a_ops->release_folio(folio, gfp);  in filemap_release_folio()
4428  * filemap_invalidate_inode - Invalidate/forcibly write back a range of an inode's pagecache
4430  * @flush: Set to write back rather than simply invalidate.
4435  * Invalidate all the folios on an inode that contribute to the specified
4443 	struct address_space *mapping = inode->i_mapping;  in filemap_invalidate_inode()
4446 	pgoff_t nr = end == LLONG_MAX ? ULONG_MAX : last - first + 1;  in filemap_invalidate_inode()
4448 	if (!mapping || !mapping->nrpages || end < start)  in filemap_invalidate_inode()
4454 	if (!mapping->nrpages)  in filemap_invalidate_inode()
4459 	/* Write back the data if we're asked to. */  in filemap_invalidate_inode()
4471 	/* Wait for writeback to complete on all folios and discard. */  in filemap_invalidate_inode()
4483  * filemap_cachestat() - compute the page cache statistics of a mapping
4487  * @cs:	the cachestat struct to write the result to.
4497 	XA_STATE(xas, &mapping->i_pages, first_index);  in filemap_cachestat()
4517 		 * the rcu-protected xarray.  in filemap_cachestat()
4526 		folio_last_index = folio_first_index + nr_pages - 1;  in filemap_cachestat()
4530 			nr_pages -= first_index - folio_first_index;  in filemap_cachestat()
4533 			nr_pages -= folio_last_index - last_index;  in filemap_cachestat()
4540 			cs->nr_evicted += nr_pages;  in filemap_cachestat()
4544 				/* shmem file - in swap cache */  in filemap_cachestat()
4567 				cs->nr_recently_evicted += nr_pages;  in filemap_cachestat()
4573 		cs->nr_cache += nr_pages;  in filemap_cachestat()
4576 			cs->nr_dirty += nr_pages;  in filemap_cachestat()
4579 			cs->nr_writeback += nr_pages;  in filemap_cachestat()
4592  * that the calling process has write access to, or could (if
4597 	if (f->f_mode & FMODE_WRITE)  in can_do_cachestat()
4616  * there is memory pressure on the system.
4618  * `off` and `len` must be non-negative integers. If `len` > 0,
4632  *  zero        - success
4633  *  -EFAULT     - cstat or cstat_range points to an illegal address
4634  *  -EINVAL     - invalid flags
4635  *  -EBADF      - invalid file descriptor
4636  *  -EOPNOTSUPP - file descriptor is of a hugetlbfs file
4649 		return -EBADF;  in SYSCALL_DEFINE4()
4653 		return -EFAULT;  in SYSCALL_DEFINE4()
4657 		return -EOPNOTSUPP;  in SYSCALL_DEFINE4()
4660 		return -EPERM;  in SYSCALL_DEFINE4()
4663 		return -EINVAL;  in SYSCALL_DEFINE4()
4667 		csr.len == 0 ? ULONG_MAX : (csr.off + csr.len - 1) >> PAGE_SHIFT;  in SYSCALL_DEFINE4()
4669 	mapping = fd_file(f)->f_mapping;  in SYSCALL_DEFINE4()
4673 		return -EFAULT;  in SYSCALL_DEFINE4()