xref: /linux/mm/fadvise.c (revision 4949009eb8d40a441dcddcd96e101e77d31cf1b2)
1 /*
2  * mm/fadvise.c
3  *
4  * Copyright (C) 2002, Linus Torvalds
5  *
6  * 11Jan2003	Andrew Morton
7  *		Initial version.
8  */
9 
10 #include <linux/kernel.h>
11 #include <linux/file.h>
12 #include <linux/fs.h>
13 #include <linux/mm.h>
14 #include <linux/pagemap.h>
15 #include <linux/backing-dev.h>
16 #include <linux/pagevec.h>
17 #include <linux/fadvise.h>
18 #include <linux/writeback.h>
19 #include <linux/syscalls.h>
20 #include <linux/swap.h>
21 
22 #include <asm/unistd.h>
23 
24 /*
25  * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
26  * deactivate the pages and clear PG_Referenced.
27  */
28 SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
29 {
30 	struct fd f = fdget(fd);
31 	struct address_space *mapping;
32 	struct backing_dev_info *bdi;
33 	loff_t endbyte;			/* inclusive */
34 	pgoff_t start_index;
35 	pgoff_t end_index;
36 	unsigned long nrpages;
37 	int ret = 0;
38 
39 	if (!f.file)
40 		return -EBADF;
41 
42 	if (S_ISFIFO(file_inode(f.file)->i_mode)) {
43 		ret = -ESPIPE;
44 		goto out;
45 	}
46 
47 	mapping = f.file->f_mapping;
48 	if (!mapping || len < 0) {
49 		ret = -EINVAL;
50 		goto out;
51 	}
52 
53 	if (mapping->a_ops->get_xip_mem) {
54 		switch (advice) {
55 		case POSIX_FADV_NORMAL:
56 		case POSIX_FADV_RANDOM:
57 		case POSIX_FADV_SEQUENTIAL:
58 		case POSIX_FADV_WILLNEED:
59 		case POSIX_FADV_NOREUSE:
60 		case POSIX_FADV_DONTNEED:
61 			/* no bad return value, but ignore advice */
62 			break;
63 		default:
64 			ret = -EINVAL;
65 		}
66 		goto out;
67 	}
68 
69 	/* Careful about overflows. Len == 0 means "as much as possible" */
70 	endbyte = offset + len;
71 	if (!len || endbyte < len)
72 		endbyte = -1;
73 	else
74 		endbyte--;		/* inclusive */
75 
76 	bdi = mapping->backing_dev_info;
77 
78 	switch (advice) {
79 	case POSIX_FADV_NORMAL:
80 		f.file->f_ra.ra_pages = bdi->ra_pages;
81 		spin_lock(&f.file->f_lock);
82 		f.file->f_mode &= ~FMODE_RANDOM;
83 		spin_unlock(&f.file->f_lock);
84 		break;
85 	case POSIX_FADV_RANDOM:
86 		spin_lock(&f.file->f_lock);
87 		f.file->f_mode |= FMODE_RANDOM;
88 		spin_unlock(&f.file->f_lock);
89 		break;
90 	case POSIX_FADV_SEQUENTIAL:
91 		f.file->f_ra.ra_pages = bdi->ra_pages * 2;
92 		spin_lock(&f.file->f_lock);
93 		f.file->f_mode &= ~FMODE_RANDOM;
94 		spin_unlock(&f.file->f_lock);
95 		break;
96 	case POSIX_FADV_WILLNEED:
97 		/* First and last PARTIAL page! */
98 		start_index = offset >> PAGE_CACHE_SHIFT;
99 		end_index = endbyte >> PAGE_CACHE_SHIFT;
100 
101 		/* Careful about overflow on the "+1" */
102 		nrpages = end_index - start_index + 1;
103 		if (!nrpages)
104 			nrpages = ~0UL;
105 
106 		/*
107 		 * Ignore return value because fadvise() shall return
108 		 * success even if filesystem can't retrieve a hint,
109 		 */
110 		force_page_cache_readahead(mapping, f.file, start_index,
111 					   nrpages);
112 		break;
113 	case POSIX_FADV_NOREUSE:
114 		break;
115 	case POSIX_FADV_DONTNEED:
116 		if (!bdi_write_congested(mapping->backing_dev_info))
117 			__filemap_fdatawrite_range(mapping, offset, endbyte,
118 						   WB_SYNC_NONE);
119 
120 		/*
121 		 * First and last FULL page! Partial pages are deliberately
122 		 * preserved on the expectation that it is better to preserve
123 		 * needed memory than to discard unneeded memory.
124 		 */
125 		start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT;
126 		end_index = (endbyte >> PAGE_CACHE_SHIFT);
127 
128 		if (end_index >= start_index) {
129 			unsigned long count = invalidate_mapping_pages(mapping,
130 						start_index, end_index);
131 
132 			/*
133 			 * If fewer pages were invalidated than expected then
134 			 * it is possible that some of the pages were on
135 			 * a per-cpu pagevec for a remote CPU. Drain all
136 			 * pagevecs and try again.
137 			 */
138 			if (count < (end_index - start_index + 1)) {
139 				lru_add_drain_all();
140 				invalidate_mapping_pages(mapping, start_index,
141 						end_index);
142 			}
143 		}
144 		break;
145 	default:
146 		ret = -EINVAL;
147 	}
148 out:
149 	fdput(f);
150 	return ret;
151 }
152 
153 #ifdef __ARCH_WANT_SYS_FADVISE64
154 
155 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
156 {
157 	return sys_fadvise64_64(fd, offset, len, advice);
158 }
159 
160 #endif
161