1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * mm/fadvise.c 4 * 5 * Copyright (C) 2002, Linus Torvalds 6 * 7 * 11Jan2003 Andrew Morton 8 * Initial version. 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/file.h> 13 #include <linux/fs.h> 14 #include <linux/mm.h> 15 #include <linux/pagemap.h> 16 #include <linux/backing-dev.h> 17 #include <linux/pagevec.h> 18 #include <linux/fadvise.h> 19 #include <linux/writeback.h> 20 #include <linux/syscalls.h> 21 #include <linux/swap.h> 22 23 #include <asm/unistd.h> 24 25 #include "internal.h" 26 27 /* 28 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could 29 * deactivate the pages and clear PG_Referenced. 30 */ 31 32 int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 33 { 34 struct inode *inode; 35 struct address_space *mapping; 36 struct backing_dev_info *bdi; 37 loff_t endbyte; /* inclusive */ 38 pgoff_t start_index; 39 pgoff_t end_index; 40 unsigned long nrpages; 41 42 inode = file_inode(file); 43 if (S_ISFIFO(inode->i_mode)) 44 return -ESPIPE; 45 46 mapping = file->f_mapping; 47 if (!mapping || len < 0) 48 return -EINVAL; 49 50 bdi = inode_to_bdi(mapping->host); 51 52 if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) { 53 switch (advice) { 54 case POSIX_FADV_NORMAL: 55 case POSIX_FADV_RANDOM: 56 case POSIX_FADV_SEQUENTIAL: 57 case POSIX_FADV_WILLNEED: 58 case POSIX_FADV_NOREUSE: 59 case POSIX_FADV_DONTNEED: 60 /* no bad return value, but ignore advice */ 61 break; 62 default: 63 return -EINVAL; 64 } 65 return 0; 66 } 67 68 /* 69 * Careful about overflows. Len == 0 means "as much as possible". Use 70 * unsigned math because signed overflows are undefined and UBSan 71 * complains. 72 */ 73 endbyte = (u64)offset + (u64)len; 74 if (!len || endbyte < len) 75 endbyte = LLONG_MAX; 76 else 77 endbyte--; /* inclusive */ 78 79 switch (advice) { 80 case POSIX_FADV_NORMAL: 81 file->f_ra.ra_pages = bdi->ra_pages; 82 spin_lock(&file->f_lock); 83 file->f_mode &= ~FMODE_RANDOM; 84 spin_unlock(&file->f_lock); 85 break; 86 case POSIX_FADV_RANDOM: 87 spin_lock(&file->f_lock); 88 file->f_mode |= FMODE_RANDOM; 89 spin_unlock(&file->f_lock); 90 break; 91 case POSIX_FADV_SEQUENTIAL: 92 file->f_ra.ra_pages = bdi->ra_pages * 2; 93 spin_lock(&file->f_lock); 94 file->f_mode &= ~FMODE_RANDOM; 95 spin_unlock(&file->f_lock); 96 break; 97 case POSIX_FADV_WILLNEED: 98 /* First and last PARTIAL page! */ 99 start_index = offset >> PAGE_SHIFT; 100 end_index = endbyte >> PAGE_SHIFT; 101 102 /* Careful about overflow on the "+1" */ 103 nrpages = end_index - start_index + 1; 104 if (!nrpages) 105 nrpages = ~0UL; 106 107 force_page_cache_readahead(mapping, file, start_index, nrpages); 108 break; 109 case POSIX_FADV_NOREUSE: 110 break; 111 case POSIX_FADV_DONTNEED: 112 __filemap_fdatawrite_range(mapping, offset, endbyte, 113 WB_SYNC_NONE); 114 115 /* 116 * First and last FULL page! Partial pages are deliberately 117 * preserved on the expectation that it is better to preserve 118 * needed memory than to discard unneeded memory. 119 */ 120 start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; 121 end_index = (endbyte >> PAGE_SHIFT); 122 /* 123 * The page at end_index will be inclusively discarded according 124 * by invalidate_mapping_pages(), so subtracting 1 from 125 * end_index means we will skip the last page. But if endbyte 126 * is page aligned or is at the end of file, we should not skip 127 * that page - discarding the last page is safe enough. 128 */ 129 if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK && 130 endbyte != inode->i_size - 1) { 131 /* First page is tricky as 0 - 1 = -1, but pgoff_t 132 * is unsigned, so the end_index >= start_index 133 * check below would be true and we'll discard the whole 134 * file cache which is not what was asked. 135 */ 136 if (end_index == 0) 137 break; 138 139 end_index--; 140 } 141 142 if (end_index >= start_index) { 143 unsigned long nr_pagevec = 0; 144 145 /* 146 * It's common to FADV_DONTNEED right after 147 * the read or write that instantiates the 148 * pages, in which case there will be some 149 * sitting on the local LRU cache. Try to 150 * avoid the expensive remote drain and the 151 * second cache tree walk below by flushing 152 * them out right away. 153 */ 154 lru_add_drain(); 155 156 invalidate_mapping_pagevec(mapping, 157 start_index, end_index, 158 &nr_pagevec); 159 160 /* 161 * If fewer pages were invalidated than expected then 162 * it is possible that some of the pages were on 163 * a per-cpu pagevec for a remote CPU. Drain all 164 * pagevecs and try again. 165 */ 166 if (nr_pagevec) { 167 lru_add_drain_all(); 168 invalidate_mapping_pages(mapping, start_index, 169 end_index); 170 } 171 } 172 break; 173 default: 174 return -EINVAL; 175 } 176 return 0; 177 } 178 EXPORT_SYMBOL(generic_fadvise); 179 180 int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 181 { 182 if (file->f_op->fadvise) 183 return file->f_op->fadvise(file, offset, len, advice); 184 185 return generic_fadvise(file, offset, len, advice); 186 } 187 EXPORT_SYMBOL(vfs_fadvise); 188 189 #ifdef CONFIG_ADVISE_SYSCALLS 190 191 int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) 192 { 193 struct fd f = fdget(fd); 194 int ret; 195 196 if (!f.file) 197 return -EBADF; 198 199 ret = vfs_fadvise(f.file, offset, len, advice); 200 201 fdput(f); 202 return ret; 203 } 204 205 SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) 206 { 207 return ksys_fadvise64_64(fd, offset, len, advice); 208 } 209 210 #ifdef __ARCH_WANT_SYS_FADVISE64 211 212 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) 213 { 214 return ksys_fadvise64_64(fd, offset, len, advice); 215 } 216 217 #endif 218 219 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FADVISE64_64) 220 221 COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, compat_arg_u64_dual(offset), 222 compat_arg_u64_dual(len), int, advice) 223 { 224 return ksys_fadvise64_64(fd, compat_arg_u64_glue(offset), 225 compat_arg_u64_glue(len), advice); 226 } 227 228 #endif 229 #endif 230