xref: /linux/fs/backing-file.c (revision ae22a94997b8a03dcb3c922857c203246711f9d4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Common helpers for stackable filesystems and backing files.
4  *
5  * Forked from fs/overlayfs/file.c.
6  *
7  * Copyright (C) 2017 Red Hat, Inc.
8  * Copyright (C) 2023 CTERA Networks.
9  */
10 
11 #include <linux/fs.h>
12 #include <linux/backing-file.h>
13 #include <linux/splice.h>
14 #include <linux/mm.h>
15 
16 #include "internal.h"
17 
18 /**
19  * backing_file_open - open a backing file for kernel internal use
20  * @user_path:	path that the user reuqested to open
21  * @flags:	open flags
22  * @real_path:	path of the backing file
23  * @cred:	credentials for open
24  *
25  * Open a backing file for a stackable filesystem (e.g., overlayfs).
26  * @user_path may be on the stackable filesystem and @real_path on the
27  * underlying filesystem.  In this case, we want to be able to return the
28  * @user_path of the stackable filesystem. This is done by embedding the
29  * returned file into a container structure that also stores the stacked
30  * file's path, which can be retrieved using backing_file_user_path().
31  */
32 struct file *backing_file_open(const struct path *user_path, int flags,
33 			       const struct path *real_path,
34 			       const struct cred *cred)
35 {
36 	struct file *f;
37 	int error;
38 
39 	f = alloc_empty_backing_file(flags, cred);
40 	if (IS_ERR(f))
41 		return f;
42 
43 	path_get(user_path);
44 	*backing_file_user_path(f) = *user_path;
45 	error = vfs_open(real_path, f);
46 	if (error) {
47 		fput(f);
48 		f = ERR_PTR(error);
49 	}
50 
51 	return f;
52 }
53 EXPORT_SYMBOL_GPL(backing_file_open);
54 
55 struct backing_aio {
56 	struct kiocb iocb;
57 	refcount_t ref;
58 	struct kiocb *orig_iocb;
59 	/* used for aio completion */
60 	void (*end_write)(struct file *);
61 	struct work_struct work;
62 	long res;
63 };
64 
65 static struct kmem_cache *backing_aio_cachep;
66 
67 #define BACKING_IOCB_MASK \
68 	(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
69 
70 static rwf_t iocb_to_rw_flags(int flags)
71 {
72 	return (__force rwf_t)(flags & BACKING_IOCB_MASK);
73 }
74 
75 static void backing_aio_put(struct backing_aio *aio)
76 {
77 	if (refcount_dec_and_test(&aio->ref)) {
78 		fput(aio->iocb.ki_filp);
79 		kmem_cache_free(backing_aio_cachep, aio);
80 	}
81 }
82 
83 static void backing_aio_cleanup(struct backing_aio *aio, long res)
84 {
85 	struct kiocb *iocb = &aio->iocb;
86 	struct kiocb *orig_iocb = aio->orig_iocb;
87 
88 	if (aio->end_write)
89 		aio->end_write(orig_iocb->ki_filp);
90 
91 	orig_iocb->ki_pos = iocb->ki_pos;
92 	backing_aio_put(aio);
93 }
94 
95 static void backing_aio_rw_complete(struct kiocb *iocb, long res)
96 {
97 	struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
98 	struct kiocb *orig_iocb = aio->orig_iocb;
99 
100 	if (iocb->ki_flags & IOCB_WRITE)
101 		kiocb_end_write(iocb);
102 
103 	backing_aio_cleanup(aio, res);
104 	orig_iocb->ki_complete(orig_iocb, res);
105 }
106 
107 static void backing_aio_complete_work(struct work_struct *work)
108 {
109 	struct backing_aio *aio = container_of(work, struct backing_aio, work);
110 
111 	backing_aio_rw_complete(&aio->iocb, aio->res);
112 }
113 
114 static void backing_aio_queue_completion(struct kiocb *iocb, long res)
115 {
116 	struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
117 
118 	/*
119 	 * Punt to a work queue to serialize updates of mtime/size.
120 	 */
121 	aio->res = res;
122 	INIT_WORK(&aio->work, backing_aio_complete_work);
123 	queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
124 		   &aio->work);
125 }
126 
127 static int backing_aio_init_wq(struct kiocb *iocb)
128 {
129 	struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
130 
131 	if (sb->s_dio_done_wq)
132 		return 0;
133 
134 	return sb_init_dio_done_wq(sb);
135 }
136 
137 
138 ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
139 			       struct kiocb *iocb, int flags,
140 			       struct backing_file_ctx *ctx)
141 {
142 	struct backing_aio *aio = NULL;
143 	const struct cred *old_cred;
144 	ssize_t ret;
145 
146 	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
147 		return -EIO;
148 
149 	if (!iov_iter_count(iter))
150 		return 0;
151 
152 	if (iocb->ki_flags & IOCB_DIRECT &&
153 	    !(file->f_mode & FMODE_CAN_ODIRECT))
154 		return -EINVAL;
155 
156 	old_cred = override_creds(ctx->cred);
157 	if (is_sync_kiocb(iocb)) {
158 		rwf_t rwf = iocb_to_rw_flags(flags);
159 
160 		ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
161 	} else {
162 		ret = -ENOMEM;
163 		aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
164 		if (!aio)
165 			goto out;
166 
167 		aio->orig_iocb = iocb;
168 		kiocb_clone(&aio->iocb, iocb, get_file(file));
169 		aio->iocb.ki_complete = backing_aio_rw_complete;
170 		refcount_set(&aio->ref, 2);
171 		ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
172 		backing_aio_put(aio);
173 		if (ret != -EIOCBQUEUED)
174 			backing_aio_cleanup(aio, ret);
175 	}
176 out:
177 	revert_creds(old_cred);
178 
179 	if (ctx->accessed)
180 		ctx->accessed(ctx->user_file);
181 
182 	return ret;
183 }
184 EXPORT_SYMBOL_GPL(backing_file_read_iter);
185 
186 ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
187 				struct kiocb *iocb, int flags,
188 				struct backing_file_ctx *ctx)
189 {
190 	const struct cred *old_cred;
191 	ssize_t ret;
192 
193 	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
194 		return -EIO;
195 
196 	if (!iov_iter_count(iter))
197 		return 0;
198 
199 	ret = file_remove_privs(ctx->user_file);
200 	if (ret)
201 		return ret;
202 
203 	if (iocb->ki_flags & IOCB_DIRECT &&
204 	    !(file->f_mode & FMODE_CAN_ODIRECT))
205 		return -EINVAL;
206 
207 	/*
208 	 * Stacked filesystems don't support deferred completions, don't copy
209 	 * this property in case it is set by the issuer.
210 	 */
211 	flags &= ~IOCB_DIO_CALLER_COMP;
212 
213 	old_cred = override_creds(ctx->cred);
214 	if (is_sync_kiocb(iocb)) {
215 		rwf_t rwf = iocb_to_rw_flags(flags);
216 
217 		ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
218 		if (ctx->end_write)
219 			ctx->end_write(ctx->user_file);
220 	} else {
221 		struct backing_aio *aio;
222 
223 		ret = backing_aio_init_wq(iocb);
224 		if (ret)
225 			goto out;
226 
227 		ret = -ENOMEM;
228 		aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
229 		if (!aio)
230 			goto out;
231 
232 		aio->orig_iocb = iocb;
233 		aio->end_write = ctx->end_write;
234 		kiocb_clone(&aio->iocb, iocb, get_file(file));
235 		aio->iocb.ki_flags = flags;
236 		aio->iocb.ki_complete = backing_aio_queue_completion;
237 		refcount_set(&aio->ref, 2);
238 		ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
239 		backing_aio_put(aio);
240 		if (ret != -EIOCBQUEUED)
241 			backing_aio_cleanup(aio, ret);
242 	}
243 out:
244 	revert_creds(old_cred);
245 
246 	return ret;
247 }
248 EXPORT_SYMBOL_GPL(backing_file_write_iter);
249 
250 ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
251 				 struct pipe_inode_info *pipe, size_t len,
252 				 unsigned int flags,
253 				 struct backing_file_ctx *ctx)
254 {
255 	const struct cred *old_cred;
256 	ssize_t ret;
257 
258 	if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
259 		return -EIO;
260 
261 	old_cred = override_creds(ctx->cred);
262 	ret = vfs_splice_read(in, ppos, pipe, len, flags);
263 	revert_creds(old_cred);
264 
265 	if (ctx->accessed)
266 		ctx->accessed(ctx->user_file);
267 
268 	return ret;
269 }
270 EXPORT_SYMBOL_GPL(backing_file_splice_read);
271 
272 ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
273 				  struct file *out, loff_t *ppos, size_t len,
274 				  unsigned int flags,
275 				  struct backing_file_ctx *ctx)
276 {
277 	const struct cred *old_cred;
278 	ssize_t ret;
279 
280 	if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
281 		return -EIO;
282 
283 	ret = file_remove_privs(ctx->user_file);
284 	if (ret)
285 		return ret;
286 
287 	old_cred = override_creds(ctx->cred);
288 	file_start_write(out);
289 	ret = iter_file_splice_write(pipe, out, ppos, len, flags);
290 	file_end_write(out);
291 	revert_creds(old_cred);
292 
293 	if (ctx->end_write)
294 		ctx->end_write(ctx->user_file);
295 
296 	return ret;
297 }
298 EXPORT_SYMBOL_GPL(backing_file_splice_write);
299 
300 int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
301 		      struct backing_file_ctx *ctx)
302 {
303 	const struct cred *old_cred;
304 	int ret;
305 
306 	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) ||
307 	    WARN_ON_ONCE(ctx->user_file != vma->vm_file))
308 		return -EIO;
309 
310 	if (!file->f_op->mmap)
311 		return -ENODEV;
312 
313 	vma_set_file(vma, file);
314 
315 	old_cred = override_creds(ctx->cred);
316 	ret = call_mmap(vma->vm_file, vma);
317 	revert_creds(old_cred);
318 
319 	if (ctx->accessed)
320 		ctx->accessed(ctx->user_file);
321 
322 	return ret;
323 }
324 EXPORT_SYMBOL_GPL(backing_file_mmap);
325 
326 static int __init backing_aio_init(void)
327 {
328 	backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN);
329 	if (!backing_aio_cachep)
330 		return -ENOMEM;
331 
332 	return 0;
333 }
334 fs_initcall(backing_aio_init);
335