1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * mm/fadvise.c 4 * 5 * Copyright (C) 2002, Linus Torvalds 6 * 7 * 11Jan2003 Andrew Morton 8 * Initial version. 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/file.h> 13 #include <linux/fs.h> 14 #include <linux/mm.h> 15 #include <linux/pagemap.h> 16 #include <linux/backing-dev.h> 17 #include <linux/pagevec.h> 18 #include <linux/fadvise.h> 19 #include <linux/writeback.h> 20 #include <linux/syscalls.h> 21 #include <linux/swap.h> 22 23 #include <asm/unistd.h> 24 25 /* 26 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could 27 * deactivate the pages and clear PG_Referenced. 28 */ 29 30 static int generic_fadvise(struct file *file, loff_t offset, loff_t len, 31 int advice) 32 { 33 struct inode *inode; 34 struct address_space *mapping; 35 struct backing_dev_info *bdi; 36 loff_t endbyte; /* inclusive */ 37 pgoff_t start_index; 38 pgoff_t end_index; 39 unsigned long nrpages; 40 41 inode = file_inode(file); 42 if (S_ISFIFO(inode->i_mode)) 43 return -ESPIPE; 44 45 mapping = file->f_mapping; 46 if (!mapping || len < 0) 47 return -EINVAL; 48 49 bdi = inode_to_bdi(mapping->host); 50 51 if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) { 52 switch (advice) { 53 case POSIX_FADV_NORMAL: 54 case POSIX_FADV_RANDOM: 55 case POSIX_FADV_SEQUENTIAL: 56 case POSIX_FADV_WILLNEED: 57 case POSIX_FADV_NOREUSE: 58 case POSIX_FADV_DONTNEED: 59 /* no bad return value, but ignore advice */ 60 break; 61 default: 62 return -EINVAL; 63 } 64 return 0; 65 } 66 67 /* 68 * Careful about overflows. Len == 0 means "as much as possible". Use 69 * unsigned math because signed overflows are undefined and UBSan 70 * complains. 71 */ 72 endbyte = (u64)offset + (u64)len; 73 if (!len || endbyte < len) 74 endbyte = -1; 75 else 76 endbyte--; /* inclusive */ 77 78 switch (advice) { 79 case POSIX_FADV_NORMAL: 80 file->f_ra.ra_pages = bdi->ra_pages; 81 spin_lock(&file->f_lock); 82 file->f_mode &= ~FMODE_RANDOM; 83 spin_unlock(&file->f_lock); 84 break; 85 case POSIX_FADV_RANDOM: 86 spin_lock(&file->f_lock); 87 file->f_mode |= FMODE_RANDOM; 88 spin_unlock(&file->f_lock); 89 break; 90 case POSIX_FADV_SEQUENTIAL: 91 file->f_ra.ra_pages = bdi->ra_pages * 2; 92 spin_lock(&file->f_lock); 93 file->f_mode &= ~FMODE_RANDOM; 94 spin_unlock(&file->f_lock); 95 break; 96 case POSIX_FADV_WILLNEED: 97 /* First and last PARTIAL page! */ 98 start_index = offset >> PAGE_SHIFT; 99 end_index = endbyte >> PAGE_SHIFT; 100 101 /* Careful about overflow on the "+1" */ 102 nrpages = end_index - start_index + 1; 103 if (!nrpages) 104 nrpages = ~0UL; 105 106 /* 107 * Ignore return value because fadvise() shall return 108 * success even if filesystem can't retrieve a hint, 109 */ 110 force_page_cache_readahead(mapping, file, start_index, nrpages); 111 break; 112 case POSIX_FADV_NOREUSE: 113 break; 114 case POSIX_FADV_DONTNEED: 115 if (!inode_write_congested(mapping->host)) 116 __filemap_fdatawrite_range(mapping, offset, endbyte, 117 WB_SYNC_NONE); 118 119 /* 120 * First and last FULL page! Partial pages are deliberately 121 * preserved on the expectation that it is better to preserve 122 * needed memory than to discard unneeded memory. 123 */ 124 start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; 125 end_index = (endbyte >> PAGE_SHIFT); 126 /* 127 * The page at end_index will be inclusively discarded according 128 * by invalidate_mapping_pages(), so subtracting 1 from 129 * end_index means we will skip the last page. But if endbyte 130 * is page aligned or is at the end of file, we should not skip 131 * that page - discarding the last page is safe enough. 132 */ 133 if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK && 134 endbyte != inode->i_size - 1) { 135 /* First page is tricky as 0 - 1 = -1, but pgoff_t 136 * is unsigned, so the end_index >= start_index 137 * check below would be true and we'll discard the whole 138 * file cache which is not what was asked. 139 */ 140 if (end_index == 0) 141 break; 142 143 end_index--; 144 } 145 146 if (end_index >= start_index) { 147 unsigned long count; 148 149 /* 150 * It's common to FADV_DONTNEED right after 151 * the read or write that instantiates the 152 * pages, in which case there will be some 153 * sitting on the local LRU cache. Try to 154 * avoid the expensive remote drain and the 155 * second cache tree walk below by flushing 156 * them out right away. 157 */ 158 lru_add_drain(); 159 160 count = invalidate_mapping_pages(mapping, 161 start_index, end_index); 162 163 /* 164 * If fewer pages were invalidated than expected then 165 * it is possible that some of the pages were on 166 * a per-cpu pagevec for a remote CPU. Drain all 167 * pagevecs and try again. 168 */ 169 if (count < (end_index - start_index + 1)) { 170 lru_add_drain_all(); 171 invalidate_mapping_pages(mapping, start_index, 172 end_index); 173 } 174 } 175 break; 176 default: 177 return -EINVAL; 178 } 179 return 0; 180 } 181 182 int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 183 { 184 if (file->f_op->fadvise) 185 return file->f_op->fadvise(file, offset, len, advice); 186 187 return generic_fadvise(file, offset, len, advice); 188 } 189 EXPORT_SYMBOL(vfs_fadvise); 190 191 #ifdef CONFIG_ADVISE_SYSCALLS 192 193 int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) 194 { 195 struct fd f = fdget(fd); 196 int ret; 197 198 if (!f.file) 199 return -EBADF; 200 201 ret = vfs_fadvise(f.file, offset, len, advice); 202 203 fdput(f); 204 return ret; 205 } 206 207 SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) 208 { 209 return ksys_fadvise64_64(fd, offset, len, advice); 210 } 211 212 #ifdef __ARCH_WANT_SYS_FADVISE64 213 214 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) 215 { 216 return ksys_fadvise64_64(fd, offset, len, advice); 217 } 218 219 #endif 220 #endif 221