xref: /linux/fs/backing-file.c (revision fa8a4d3659d0c1ad73d5f59b2e0a6d408de5b317)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Common helpers for stackable filesystems and backing files.
4  *
5  * Forked from fs/overlayfs/file.c.
6  *
7  * Copyright (C) 2017 Red Hat, Inc.
8  * Copyright (C) 2023 CTERA Networks.
9  */
10 
11 #include <linux/fs.h>
12 #include <linux/backing-file.h>
13 #include <linux/splice.h>
14 #include <linux/mm.h>
15 
16 #include "internal.h"
17 
18 /**
19  * backing_file_open - open a backing file for kernel internal use
20  * @user_path:	path that the user reuqested to open
21  * @flags:	open flags
22  * @real_path:	path of the backing file
23  * @cred:	credentials for open
24  *
25  * Open a backing file for a stackable filesystem (e.g., overlayfs).
26  * @user_path may be on the stackable filesystem and @real_path on the
27  * underlying filesystem.  In this case, we want to be able to return the
28  * @user_path of the stackable filesystem. This is done by embedding the
29  * returned file into a container structure that also stores the stacked
30  * file's path, which can be retrieved using backing_file_user_path().
31  */
32 struct file *backing_file_open(const struct path *user_path, int flags,
33 			       const struct path *real_path,
34 			       const struct cred *cred)
35 {
36 	struct file *f;
37 	int error;
38 
39 	f = alloc_empty_backing_file(flags, cred);
40 	if (IS_ERR(f))
41 		return f;
42 
43 	path_get(user_path);
44 	*backing_file_user_path(f) = *user_path;
45 	error = vfs_open(real_path, f);
46 	if (error) {
47 		fput(f);
48 		f = ERR_PTR(error);
49 	}
50 
51 	return f;
52 }
53 EXPORT_SYMBOL_GPL(backing_file_open);
54 
55 struct file *backing_tmpfile_open(const struct path *user_path, int flags,
56 				  const struct path *real_parentpath,
57 				  umode_t mode, const struct cred *cred)
58 {
59 	struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt);
60 	struct file *f;
61 	int error;
62 
63 	f = alloc_empty_backing_file(flags, cred);
64 	if (IS_ERR(f))
65 		return f;
66 
67 	path_get(user_path);
68 	*backing_file_user_path(f) = *user_path;
69 	error = vfs_tmpfile(real_idmap, real_parentpath, f, mode);
70 	if (error) {
71 		fput(f);
72 		f = ERR_PTR(error);
73 	}
74 	return f;
75 }
76 EXPORT_SYMBOL(backing_tmpfile_open);
77 
78 struct backing_aio {
79 	struct kiocb iocb;
80 	refcount_t ref;
81 	struct kiocb *orig_iocb;
82 	/* used for aio completion */
83 	void (*end_write)(struct file *);
84 	struct work_struct work;
85 	long res;
86 };
87 
88 static struct kmem_cache *backing_aio_cachep;
89 
90 #define BACKING_IOCB_MASK \
91 	(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
92 
93 static rwf_t iocb_to_rw_flags(int flags)
94 {
95 	return (__force rwf_t)(flags & BACKING_IOCB_MASK);
96 }
97 
98 static void backing_aio_put(struct backing_aio *aio)
99 {
100 	if (refcount_dec_and_test(&aio->ref)) {
101 		fput(aio->iocb.ki_filp);
102 		kmem_cache_free(backing_aio_cachep, aio);
103 	}
104 }
105 
106 static void backing_aio_cleanup(struct backing_aio *aio, long res)
107 {
108 	struct kiocb *iocb = &aio->iocb;
109 	struct kiocb *orig_iocb = aio->orig_iocb;
110 
111 	if (aio->end_write)
112 		aio->end_write(orig_iocb->ki_filp);
113 
114 	orig_iocb->ki_pos = iocb->ki_pos;
115 	backing_aio_put(aio);
116 }
117 
118 static void backing_aio_rw_complete(struct kiocb *iocb, long res)
119 {
120 	struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
121 	struct kiocb *orig_iocb = aio->orig_iocb;
122 
123 	if (iocb->ki_flags & IOCB_WRITE)
124 		kiocb_end_write(iocb);
125 
126 	backing_aio_cleanup(aio, res);
127 	orig_iocb->ki_complete(orig_iocb, res);
128 }
129 
130 static void backing_aio_complete_work(struct work_struct *work)
131 {
132 	struct backing_aio *aio = container_of(work, struct backing_aio, work);
133 
134 	backing_aio_rw_complete(&aio->iocb, aio->res);
135 }
136 
137 static void backing_aio_queue_completion(struct kiocb *iocb, long res)
138 {
139 	struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
140 
141 	/*
142 	 * Punt to a work queue to serialize updates of mtime/size.
143 	 */
144 	aio->res = res;
145 	INIT_WORK(&aio->work, backing_aio_complete_work);
146 	queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
147 		   &aio->work);
148 }
149 
150 static int backing_aio_init_wq(struct kiocb *iocb)
151 {
152 	struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
153 
154 	if (sb->s_dio_done_wq)
155 		return 0;
156 
157 	return sb_init_dio_done_wq(sb);
158 }
159 
160 
161 ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
162 			       struct kiocb *iocb, int flags,
163 			       struct backing_file_ctx *ctx)
164 {
165 	struct backing_aio *aio = NULL;
166 	const struct cred *old_cred;
167 	ssize_t ret;
168 
169 	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
170 		return -EIO;
171 
172 	if (!iov_iter_count(iter))
173 		return 0;
174 
175 	if (iocb->ki_flags & IOCB_DIRECT &&
176 	    !(file->f_mode & FMODE_CAN_ODIRECT))
177 		return -EINVAL;
178 
179 	old_cred = override_creds(ctx->cred);
180 	if (is_sync_kiocb(iocb)) {
181 		rwf_t rwf = iocb_to_rw_flags(flags);
182 
183 		ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
184 	} else {
185 		ret = -ENOMEM;
186 		aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
187 		if (!aio)
188 			goto out;
189 
190 		aio->orig_iocb = iocb;
191 		kiocb_clone(&aio->iocb, iocb, get_file(file));
192 		aio->iocb.ki_complete = backing_aio_rw_complete;
193 		refcount_set(&aio->ref, 2);
194 		ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
195 		backing_aio_put(aio);
196 		if (ret != -EIOCBQUEUED)
197 			backing_aio_cleanup(aio, ret);
198 	}
199 out:
200 	revert_creds(old_cred);
201 
202 	if (ctx->accessed)
203 		ctx->accessed(ctx->user_file);
204 
205 	return ret;
206 }
207 EXPORT_SYMBOL_GPL(backing_file_read_iter);
208 
209 ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
210 				struct kiocb *iocb, int flags,
211 				struct backing_file_ctx *ctx)
212 {
213 	const struct cred *old_cred;
214 	ssize_t ret;
215 
216 	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
217 		return -EIO;
218 
219 	if (!iov_iter_count(iter))
220 		return 0;
221 
222 	ret = file_remove_privs(ctx->user_file);
223 	if (ret)
224 		return ret;
225 
226 	if (iocb->ki_flags & IOCB_DIRECT &&
227 	    !(file->f_mode & FMODE_CAN_ODIRECT))
228 		return -EINVAL;
229 
230 	/*
231 	 * Stacked filesystems don't support deferred completions, don't copy
232 	 * this property in case it is set by the issuer.
233 	 */
234 	flags &= ~IOCB_DIO_CALLER_COMP;
235 
236 	old_cred = override_creds(ctx->cred);
237 	if (is_sync_kiocb(iocb)) {
238 		rwf_t rwf = iocb_to_rw_flags(flags);
239 
240 		ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
241 		if (ctx->end_write)
242 			ctx->end_write(ctx->user_file);
243 	} else {
244 		struct backing_aio *aio;
245 
246 		ret = backing_aio_init_wq(iocb);
247 		if (ret)
248 			goto out;
249 
250 		ret = -ENOMEM;
251 		aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
252 		if (!aio)
253 			goto out;
254 
255 		aio->orig_iocb = iocb;
256 		aio->end_write = ctx->end_write;
257 		kiocb_clone(&aio->iocb, iocb, get_file(file));
258 		aio->iocb.ki_flags = flags;
259 		aio->iocb.ki_complete = backing_aio_queue_completion;
260 		refcount_set(&aio->ref, 2);
261 		ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
262 		backing_aio_put(aio);
263 		if (ret != -EIOCBQUEUED)
264 			backing_aio_cleanup(aio, ret);
265 	}
266 out:
267 	revert_creds(old_cred);
268 
269 	return ret;
270 }
271 EXPORT_SYMBOL_GPL(backing_file_write_iter);
272 
273 ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
274 				 struct pipe_inode_info *pipe, size_t len,
275 				 unsigned int flags,
276 				 struct backing_file_ctx *ctx)
277 {
278 	const struct cred *old_cred;
279 	ssize_t ret;
280 
281 	if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
282 		return -EIO;
283 
284 	old_cred = override_creds(ctx->cred);
285 	ret = vfs_splice_read(in, ppos, pipe, len, flags);
286 	revert_creds(old_cred);
287 
288 	if (ctx->accessed)
289 		ctx->accessed(ctx->user_file);
290 
291 	return ret;
292 }
293 EXPORT_SYMBOL_GPL(backing_file_splice_read);
294 
295 ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
296 				  struct file *out, loff_t *ppos, size_t len,
297 				  unsigned int flags,
298 				  struct backing_file_ctx *ctx)
299 {
300 	const struct cred *old_cred;
301 	ssize_t ret;
302 
303 	if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
304 		return -EIO;
305 
306 	if (!out->f_op->splice_write)
307 		return -EINVAL;
308 
309 	ret = file_remove_privs(ctx->user_file);
310 	if (ret)
311 		return ret;
312 
313 	old_cred = override_creds(ctx->cred);
314 	file_start_write(out);
315 	ret = out->f_op->splice_write(pipe, out, ppos, len, flags);
316 	file_end_write(out);
317 	revert_creds(old_cred);
318 
319 	if (ctx->end_write)
320 		ctx->end_write(ctx->user_file);
321 
322 	return ret;
323 }
324 EXPORT_SYMBOL_GPL(backing_file_splice_write);
325 
326 int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
327 		      struct backing_file_ctx *ctx)
328 {
329 	const struct cred *old_cred;
330 	int ret;
331 
332 	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) ||
333 	    WARN_ON_ONCE(ctx->user_file != vma->vm_file))
334 		return -EIO;
335 
336 	if (!file->f_op->mmap)
337 		return -ENODEV;
338 
339 	vma_set_file(vma, file);
340 
341 	old_cred = override_creds(ctx->cred);
342 	ret = call_mmap(vma->vm_file, vma);
343 	revert_creds(old_cred);
344 
345 	if (ctx->accessed)
346 		ctx->accessed(ctx->user_file);
347 
348 	return ret;
349 }
350 EXPORT_SYMBOL_GPL(backing_file_mmap);
351 
352 static int __init backing_aio_init(void)
353 {
354 	backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN);
355 	if (!backing_aio_cachep)
356 		return -ENOMEM;
357 
358 	return 0;
359 }
360 fs_initcall(backing_aio_init);
361