1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * mm/fadvise.c 4 * 5 * Copyright (C) 2002, Linus Torvalds 6 * 7 * 11Jan2003 Andrew Morton 8 * Initial version. 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/file.h> 13 #include <linux/fs.h> 14 #include <linux/mm.h> 15 #include <linux/pagemap.h> 16 #include <linux/backing-dev.h> 17 #include <linux/pagevec.h> 18 #include <linux/fadvise.h> 19 #include <linux/writeback.h> 20 #include <linux/syscalls.h> 21 #include <linux/swap.h> 22 23 #include <asm/unistd.h> 24 25 /* 26 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could 27 * deactivate the pages and clear PG_Referenced. 28 */ 29 30 int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 31 { 32 struct inode *inode; 33 struct address_space *mapping; 34 struct backing_dev_info *bdi; 35 loff_t endbyte; /* inclusive */ 36 pgoff_t start_index; 37 pgoff_t end_index; 38 unsigned long nrpages; 39 40 inode = file_inode(file); 41 if (S_ISFIFO(inode->i_mode)) 42 return -ESPIPE; 43 44 mapping = file->f_mapping; 45 if (!mapping || len < 0) 46 return -EINVAL; 47 48 bdi = inode_to_bdi(mapping->host); 49 50 if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) { 51 switch (advice) { 52 case POSIX_FADV_NORMAL: 53 case POSIX_FADV_RANDOM: 54 case POSIX_FADV_SEQUENTIAL: 55 case POSIX_FADV_WILLNEED: 56 case POSIX_FADV_NOREUSE: 57 case POSIX_FADV_DONTNEED: 58 /* no bad return value, but ignore advice */ 59 break; 60 default: 61 return -EINVAL; 62 } 63 return 0; 64 } 65 66 /* 67 * Careful about overflows. Len == 0 means "as much as possible". Use 68 * unsigned math because signed overflows are undefined and UBSan 69 * complains. 70 */ 71 endbyte = (u64)offset + (u64)len; 72 if (!len || endbyte < len) 73 endbyte = -1; 74 else 75 endbyte--; /* inclusive */ 76 77 switch (advice) { 78 case POSIX_FADV_NORMAL: 79 file->f_ra.ra_pages = bdi->ra_pages; 80 spin_lock(&file->f_lock); 81 file->f_mode &= ~FMODE_RANDOM; 82 spin_unlock(&file->f_lock); 83 break; 84 case POSIX_FADV_RANDOM: 85 spin_lock(&file->f_lock); 86 file->f_mode |= FMODE_RANDOM; 87 spin_unlock(&file->f_lock); 88 break; 89 case POSIX_FADV_SEQUENTIAL: 90 file->f_ra.ra_pages = bdi->ra_pages * 2; 91 spin_lock(&file->f_lock); 92 file->f_mode &= ~FMODE_RANDOM; 93 spin_unlock(&file->f_lock); 94 break; 95 case POSIX_FADV_WILLNEED: 96 /* First and last PARTIAL page! */ 97 start_index = offset >> PAGE_SHIFT; 98 end_index = endbyte >> PAGE_SHIFT; 99 100 /* Careful about overflow on the "+1" */ 101 nrpages = end_index - start_index + 1; 102 if (!nrpages) 103 nrpages = ~0UL; 104 105 /* 106 * Ignore return value because fadvise() shall return 107 * success even if filesystem can't retrieve a hint, 108 */ 109 force_page_cache_readahead(mapping, file, start_index, nrpages); 110 break; 111 case POSIX_FADV_NOREUSE: 112 break; 113 case POSIX_FADV_DONTNEED: 114 if (!inode_write_congested(mapping->host)) 115 __filemap_fdatawrite_range(mapping, offset, endbyte, 116 WB_SYNC_NONE); 117 118 /* 119 * First and last FULL page! Partial pages are deliberately 120 * preserved on the expectation that it is better to preserve 121 * needed memory than to discard unneeded memory. 122 */ 123 start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; 124 end_index = (endbyte >> PAGE_SHIFT); 125 /* 126 * The page at end_index will be inclusively discarded according 127 * by invalidate_mapping_pages(), so subtracting 1 from 128 * end_index means we will skip the last page. But if endbyte 129 * is page aligned or is at the end of file, we should not skip 130 * that page - discarding the last page is safe enough. 131 */ 132 if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK && 133 endbyte != inode->i_size - 1) { 134 /* First page is tricky as 0 - 1 = -1, but pgoff_t 135 * is unsigned, so the end_index >= start_index 136 * check below would be true and we'll discard the whole 137 * file cache which is not what was asked. 138 */ 139 if (end_index == 0) 140 break; 141 142 end_index--; 143 } 144 145 if (end_index >= start_index) { 146 unsigned long count; 147 148 /* 149 * It's common to FADV_DONTNEED right after 150 * the read or write that instantiates the 151 * pages, in which case there will be some 152 * sitting on the local LRU cache. Try to 153 * avoid the expensive remote drain and the 154 * second cache tree walk below by flushing 155 * them out right away. 156 */ 157 lru_add_drain(); 158 159 count = invalidate_mapping_pages(mapping, 160 start_index, end_index); 161 162 /* 163 * If fewer pages were invalidated than expected then 164 * it is possible that some of the pages were on 165 * a per-cpu pagevec for a remote CPU. Drain all 166 * pagevecs and try again. 167 */ 168 if (count < (end_index - start_index + 1)) { 169 lru_add_drain_all(); 170 invalidate_mapping_pages(mapping, start_index, 171 end_index); 172 } 173 } 174 break; 175 default: 176 return -EINVAL; 177 } 178 return 0; 179 } 180 EXPORT_SYMBOL(generic_fadvise); 181 182 int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 183 { 184 if (file->f_op->fadvise) 185 return file->f_op->fadvise(file, offset, len, advice); 186 187 return generic_fadvise(file, offset, len, advice); 188 } 189 EXPORT_SYMBOL(vfs_fadvise); 190 191 #ifdef CONFIG_ADVISE_SYSCALLS 192 193 int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) 194 { 195 struct fd f = fdget(fd); 196 int ret; 197 198 if (!f.file) 199 return -EBADF; 200 201 ret = vfs_fadvise(f.file, offset, len, advice); 202 203 fdput(f); 204 return ret; 205 } 206 207 SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) 208 { 209 return ksys_fadvise64_64(fd, offset, len, advice); 210 } 211 212 #ifdef __ARCH_WANT_SYS_FADVISE64 213 214 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) 215 { 216 return ksys_fadvise64_64(fd, offset, len, advice); 217 } 218 219 #endif 220 #endif 221