1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * mm/fadvise.c 4 * 5 * Copyright (C) 2002, Linus Torvalds 6 * 7 * 11Jan2003 Andrew Morton 8 * Initial version. 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/file.h> 13 #include <linux/fs.h> 14 #include <linux/mm.h> 15 #include <linux/pagemap.h> 16 #include <linux/backing-dev.h> 17 #include <linux/fadvise.h> 18 #include <linux/writeback.h> 19 #include <linux/syscalls.h> 20 #include <linux/swap.h> 21 22 #include <asm/unistd.h> 23 24 #include "internal.h" 25 26 /* 27 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could 28 * deactivate the pages and clear PG_Referenced. 29 */ 30 31 int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 32 { 33 struct inode *inode; 34 struct address_space *mapping; 35 struct backing_dev_info *bdi; 36 loff_t endbyte; /* inclusive */ 37 pgoff_t start_index; 38 pgoff_t end_index; 39 unsigned long nrpages; 40 41 inode = file_inode(file); 42 if (S_ISFIFO(inode->i_mode)) 43 return -ESPIPE; 44 45 mapping = file->f_mapping; 46 if (!mapping || len < 0) 47 return -EINVAL; 48 49 bdi = inode_to_bdi(mapping->host); 50 51 if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) { 52 switch (advice) { 53 case POSIX_FADV_NORMAL: 54 case POSIX_FADV_RANDOM: 55 case POSIX_FADV_SEQUENTIAL: 56 case POSIX_FADV_WILLNEED: 57 case POSIX_FADV_NOREUSE: 58 case POSIX_FADV_DONTNEED: 59 /* no bad return value, but ignore advice */ 60 break; 61 default: 62 return -EINVAL; 63 } 64 return 0; 65 } 66 67 /* 68 * Careful about overflows. Len == 0 means "as much as possible". Use 69 * unsigned math because signed overflows are undefined and UBSan 70 * complains. 71 */ 72 endbyte = (u64)offset + (u64)len; 73 if (!len || endbyte < len) 74 endbyte = LLONG_MAX; 75 else 76 endbyte--; /* inclusive */ 77 78 switch (advice) { 79 case POSIX_FADV_NORMAL: 80 file->f_ra.ra_pages = bdi->ra_pages; 81 spin_lock(&file->f_lock); 82 file->f_mode &= ~(FMODE_RANDOM | FMODE_NOREUSE); 83 spin_unlock(&file->f_lock); 84 break; 85 case POSIX_FADV_RANDOM: 86 spin_lock(&file->f_lock); 87 file->f_mode |= FMODE_RANDOM; 88 spin_unlock(&file->f_lock); 89 break; 90 case POSIX_FADV_SEQUENTIAL: 91 file->f_ra.ra_pages = bdi->ra_pages * 2; 92 spin_lock(&file->f_lock); 93 file->f_mode &= ~FMODE_RANDOM; 94 spin_unlock(&file->f_lock); 95 break; 96 case POSIX_FADV_WILLNEED: 97 /* First and last PARTIAL page! */ 98 start_index = offset >> PAGE_SHIFT; 99 end_index = endbyte >> PAGE_SHIFT; 100 101 /* Careful about overflow on the "+1" */ 102 nrpages = end_index - start_index + 1; 103 if (!nrpages) 104 nrpages = ~0UL; 105 106 force_page_cache_readahead(mapping, file, start_index, nrpages); 107 break; 108 case POSIX_FADV_NOREUSE: 109 spin_lock(&file->f_lock); 110 file->f_mode |= FMODE_NOREUSE; 111 spin_unlock(&file->f_lock); 112 break; 113 case POSIX_FADV_DONTNEED: 114 filemap_flush_range(mapping, offset, endbyte); 115 116 /* 117 * First and last FULL page! Partial pages are deliberately 118 * preserved on the expectation that it is better to preserve 119 * needed memory than to discard unneeded memory. 120 */ 121 start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; 122 end_index = (endbyte >> PAGE_SHIFT); 123 /* 124 * The page at end_index will be inclusively discarded according 125 * by invalidate_mapping_pages(), so subtracting 1 from 126 * end_index means we will skip the last page. But if endbyte 127 * is page aligned or is at the end of file, we should not skip 128 * that page - discarding the last page is safe enough. 129 */ 130 if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK && 131 endbyte != inode->i_size - 1) { 132 /* First page is tricky as 0 - 1 = -1, but pgoff_t 133 * is unsigned, so the end_index >= start_index 134 * check below would be true and we'll discard the whole 135 * file cache which is not what was asked. 136 */ 137 if (end_index == 0) 138 break; 139 140 end_index--; 141 } 142 143 if (end_index >= start_index) { 144 unsigned long nr_failed = 0; 145 146 /* 147 * It's common to FADV_DONTNEED right after 148 * the read or write that instantiates the 149 * pages, in which case there will be some 150 * sitting on the local LRU cache. Try to 151 * avoid the expensive remote drain and the 152 * second cache tree walk below by flushing 153 * them out right away. 154 */ 155 lru_add_drain(); 156 157 mapping_try_invalidate(mapping, start_index, end_index, 158 &nr_failed); 159 160 /* 161 * The failures may be due to the folio being 162 * in the LRU cache of a remote CPU. Drain all 163 * caches and try again. 164 */ 165 if (nr_failed) { 166 lru_add_drain_all(); 167 invalidate_mapping_pages(mapping, start_index, 168 end_index); 169 } 170 } 171 break; 172 default: 173 return -EINVAL; 174 } 175 return 0; 176 } 177 EXPORT_SYMBOL(generic_fadvise); 178 179 int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 180 { 181 if (file->f_op->fadvise) 182 return file->f_op->fadvise(file, offset, len, advice); 183 184 return generic_fadvise(file, offset, len, advice); 185 } 186 EXPORT_SYMBOL(vfs_fadvise); 187 188 #ifdef CONFIG_ADVISE_SYSCALLS 189 190 int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) 191 { 192 CLASS(fd, f)(fd); 193 194 if (fd_empty(f)) 195 return -EBADF; 196 197 return vfs_fadvise(fd_file(f), offset, len, advice); 198 } 199 200 SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) 201 { 202 return ksys_fadvise64_64(fd, offset, len, advice); 203 } 204 205 #ifdef __ARCH_WANT_SYS_FADVISE64 206 207 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) 208 { 209 return ksys_fadvise64_64(fd, offset, len, advice); 210 } 211 212 #endif 213 214 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FADVISE64_64) 215 216 COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, compat_arg_u64_dual(offset), 217 compat_arg_u64_dual(len), int, advice) 218 { 219 return ksys_fadvise64_64(fd, compat_arg_u64_glue(offset), 220 compat_arg_u64_glue(len), advice); 221 } 222 223 #endif 224 #endif 225