xref: /linux/mm/fadvise.c (revision cb2e1c2136f71618142557ceca3a8802e87a44cd)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * mm/fadvise.c
4   *
5   * Copyright (C) 2002, Linus Torvalds
6   *
7   * 11Jan2003	Andrew Morton
8   *		Initial version.
9   */
10  
11  #include <linux/kernel.h>
12  #include <linux/file.h>
13  #include <linux/fs.h>
14  #include <linux/mm.h>
15  #include <linux/pagemap.h>
16  #include <linux/backing-dev.h>
17  #include <linux/fadvise.h>
18  #include <linux/writeback.h>
19  #include <linux/syscalls.h>
20  #include <linux/swap.h>
21  
22  #include <asm/unistd.h>
23  
24  #include "internal.h"
25  
26  /*
27   * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
28   * deactivate the pages and clear PG_Referenced.
29   */
30  
31  int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
32  {
33  	struct inode *inode;
34  	struct address_space *mapping;
35  	struct backing_dev_info *bdi;
36  	loff_t endbyte;			/* inclusive */
37  	pgoff_t start_index;
38  	pgoff_t end_index;
39  	unsigned long nrpages;
40  
41  	inode = file_inode(file);
42  	if (S_ISFIFO(inode->i_mode))
43  		return -ESPIPE;
44  
45  	mapping = file->f_mapping;
46  	if (!mapping || len < 0)
47  		return -EINVAL;
48  
49  	bdi = inode_to_bdi(mapping->host);
50  
51  	if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) {
52  		switch (advice) {
53  		case POSIX_FADV_NORMAL:
54  		case POSIX_FADV_RANDOM:
55  		case POSIX_FADV_SEQUENTIAL:
56  		case POSIX_FADV_WILLNEED:
57  		case POSIX_FADV_NOREUSE:
58  		case POSIX_FADV_DONTNEED:
59  			/* no bad return value, but ignore advice */
60  			break;
61  		default:
62  			return -EINVAL;
63  		}
64  		return 0;
65  	}
66  
67  	/*
68  	 * Careful about overflows. Len == 0 means "as much as possible".  Use
69  	 * unsigned math because signed overflows are undefined and UBSan
70  	 * complains.
71  	 */
72  	endbyte = (u64)offset + (u64)len;
73  	if (!len || endbyte < len)
74  		endbyte = LLONG_MAX;
75  	else
76  		endbyte--;		/* inclusive */
77  
78  	switch (advice) {
79  	case POSIX_FADV_NORMAL:
80  		file->f_ra.ra_pages = bdi->ra_pages;
81  		spin_lock(&file->f_lock);
82  		file->f_mode &= ~(FMODE_RANDOM | FMODE_NOREUSE);
83  		spin_unlock(&file->f_lock);
84  		break;
85  	case POSIX_FADV_RANDOM:
86  		spin_lock(&file->f_lock);
87  		file->f_mode |= FMODE_RANDOM;
88  		spin_unlock(&file->f_lock);
89  		break;
90  	case POSIX_FADV_SEQUENTIAL:
91  		file->f_ra.ra_pages = bdi->ra_pages * 2;
92  		spin_lock(&file->f_lock);
93  		file->f_mode &= ~FMODE_RANDOM;
94  		spin_unlock(&file->f_lock);
95  		break;
96  	case POSIX_FADV_WILLNEED:
97  		/* First and last PARTIAL page! */
98  		start_index = offset >> PAGE_SHIFT;
99  		end_index = endbyte >> PAGE_SHIFT;
100  
101  		/* Careful about overflow on the "+1" */
102  		nrpages = end_index - start_index + 1;
103  		if (!nrpages)
104  			nrpages = ~0UL;
105  
106  		force_page_cache_readahead(mapping, file, start_index, nrpages);
107  		break;
108  	case POSIX_FADV_NOREUSE:
109  		spin_lock(&file->f_lock);
110  		file->f_mode |= FMODE_NOREUSE;
111  		spin_unlock(&file->f_lock);
112  		break;
113  	case POSIX_FADV_DONTNEED:
114  		__filemap_fdatawrite_range(mapping, offset, endbyte,
115  					   WB_SYNC_NONE);
116  
117  		/*
118  		 * First and last FULL page! Partial pages are deliberately
119  		 * preserved on the expectation that it is better to preserve
120  		 * needed memory than to discard unneeded memory.
121  		 */
122  		start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
123  		end_index = (endbyte >> PAGE_SHIFT);
124  		/*
125  		 * The page at end_index will be inclusively discarded according
126  		 * by invalidate_mapping_pages(), so subtracting 1 from
127  		 * end_index means we will skip the last page.  But if endbyte
128  		 * is page aligned or is at the end of file, we should not skip
129  		 * that page - discarding the last page is safe enough.
130  		 */
131  		if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
132  				endbyte != inode->i_size - 1) {
133  			/* First page is tricky as 0 - 1 = -1, but pgoff_t
134  			 * is unsigned, so the end_index >= start_index
135  			 * check below would be true and we'll discard the whole
136  			 * file cache which is not what was asked.
137  			 */
138  			if (end_index == 0)
139  				break;
140  
141  			end_index--;
142  		}
143  
144  		if (end_index >= start_index) {
145  			unsigned long nr_failed = 0;
146  
147  			/*
148  			 * It's common to FADV_DONTNEED right after
149  			 * the read or write that instantiates the
150  			 * pages, in which case there will be some
151  			 * sitting on the local LRU cache. Try to
152  			 * avoid the expensive remote drain and the
153  			 * second cache tree walk below by flushing
154  			 * them out right away.
155  			 */
156  			lru_add_drain();
157  
158  			mapping_try_invalidate(mapping, start_index, end_index,
159  					&nr_failed);
160  
161  			/*
162  			 * The failures may be due to the folio being
163  			 * in the LRU cache of a remote CPU. Drain all
164  			 * caches and try again.
165  			 */
166  			if (nr_failed) {
167  				lru_add_drain_all();
168  				invalidate_mapping_pages(mapping, start_index,
169  						end_index);
170  			}
171  		}
172  		break;
173  	default:
174  		return -EINVAL;
175  	}
176  	return 0;
177  }
178  EXPORT_SYMBOL(generic_fadvise);
179  
180  int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
181  {
182  	if (file->f_op->fadvise)
183  		return file->f_op->fadvise(file, offset, len, advice);
184  
185  	return generic_fadvise(file, offset, len, advice);
186  }
187  EXPORT_SYMBOL(vfs_fadvise);
188  
189  #ifdef CONFIG_ADVISE_SYSCALLS
190  
191  int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
192  {
193  	CLASS(fd, f)(fd);
194  
195  	if (fd_empty(f))
196  		return -EBADF;
197  
198  	return vfs_fadvise(fd_file(f), offset, len, advice);
199  }
200  
201  SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
202  {
203  	return ksys_fadvise64_64(fd, offset, len, advice);
204  }
205  
206  #ifdef __ARCH_WANT_SYS_FADVISE64
207  
208  SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
209  {
210  	return ksys_fadvise64_64(fd, offset, len, advice);
211  }
212  
213  #endif
214  
215  #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FADVISE64_64)
216  
217  COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, compat_arg_u64_dual(offset),
218  		       compat_arg_u64_dual(len), int, advice)
219  {
220  	return ksys_fadvise64_64(fd, compat_arg_u64_glue(offset),
221  				 compat_arg_u64_glue(len), advice);
222  }
223  
224  #endif
225  #endif
226