xref: /linux/fs/overlayfs/file.c (revision 7f4f3b14e8079ecde096bd734af10e30d40c27b7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 Red Hat, Inc.
4  */
5 
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/mount.h>
9 #include <linux/xattr.h>
10 #include <linux/uio.h>
11 #include <linux/uaccess.h>
12 #include <linux/security.h>
13 #include <linux/fs.h>
14 #include <linux/backing-file.h>
15 #include "overlayfs.h"
16 
17 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
18 {
19 	if (realinode != ovl_inode_upper(inode))
20 		return 'l';
21 	if (ovl_has_upperdata(inode))
22 		return 'u';
23 	else
24 		return 'm';
25 }
26 
27 static struct file *ovl_open_realfile(const struct file *file,
28 				      const struct path *realpath)
29 {
30 	struct inode *realinode = d_inode(realpath->dentry);
31 	struct inode *inode = file_inode(file);
32 	struct mnt_idmap *real_idmap;
33 	struct file *realfile;
34 	const struct cred *old_cred;
35 	int flags = file->f_flags | OVL_OPEN_FLAGS;
36 	int acc_mode = ACC_MODE(flags);
37 	int err;
38 
39 	if (flags & O_APPEND)
40 		acc_mode |= MAY_APPEND;
41 
42 	old_cred = ovl_override_creds(inode->i_sb);
43 	real_idmap = mnt_idmap(realpath->mnt);
44 	err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
45 	if (err) {
46 		realfile = ERR_PTR(err);
47 	} else {
48 		if (!inode_owner_or_capable(real_idmap, realinode))
49 			flags &= ~O_NOATIME;
50 
51 		realfile = backing_file_open(&file->f_path, flags, realpath,
52 					     current_cred());
53 	}
54 	ovl_revert_creds(old_cred);
55 
56 	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
57 		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
58 		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
59 
60 	return realfile;
61 }
62 
63 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
64 
65 static int ovl_change_flags(struct file *file, unsigned int flags)
66 {
67 	struct inode *inode = file_inode(file);
68 	int err;
69 
70 	flags &= OVL_SETFL_MASK;
71 
72 	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
73 		return -EPERM;
74 
75 	if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
76 		return -EINVAL;
77 
78 	if (file->f_op->check_flags) {
79 		err = file->f_op->check_flags(flags);
80 		if (err)
81 			return err;
82 	}
83 
84 	spin_lock(&file->f_lock);
85 	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
86 	file->f_iocb_flags = iocb_flags(file);
87 	spin_unlock(&file->f_lock);
88 
89 	return 0;
90 }
91 
92 struct ovl_file {
93 	struct file *realfile;
94 	struct file *upperfile;
95 };
96 
97 struct ovl_file *ovl_file_alloc(struct file *realfile)
98 {
99 	struct ovl_file *of = kzalloc(sizeof(struct ovl_file), GFP_KERNEL);
100 
101 	if (unlikely(!of))
102 		return NULL;
103 
104 	of->realfile = realfile;
105 	return of;
106 }
107 
108 void ovl_file_free(struct ovl_file *of)
109 {
110 	fput(of->realfile);
111 	if (of->upperfile)
112 		fput(of->upperfile);
113 	kfree(of);
114 }
115 
116 static bool ovl_is_real_file(const struct file *realfile,
117 			     const struct path *realpath)
118 {
119 	return file_inode(realfile) == d_inode(realpath->dentry);
120 }
121 
122 static struct file *ovl_real_file_path(const struct file *file,
123 				       struct path *realpath)
124 {
125 	struct ovl_file *of = file->private_data;
126 	struct file *realfile = of->realfile;
127 
128 	if (WARN_ON_ONCE(!realpath->dentry))
129 		return ERR_PTR(-EIO);
130 
131 	/*
132 	 * If the realfile that we want is not where the data used to be at
133 	 * open time, either we'd been copied up, or it's an fsync of a
134 	 * metacopied file.  We need the upperfile either way, so see if it
135 	 * is already opened and if it is not then open and store it.
136 	 */
137 	if (unlikely(!ovl_is_real_file(realfile, realpath))) {
138 		struct file *upperfile = READ_ONCE(of->upperfile);
139 		struct file *old;
140 
141 		if (!upperfile) { /* Nobody opened upperfile yet */
142 			upperfile = ovl_open_realfile(file, realpath);
143 			if (IS_ERR(upperfile))
144 				return upperfile;
145 
146 			/* Store the upperfile for later */
147 			old = cmpxchg_release(&of->upperfile, NULL, upperfile);
148 			if (old) { /* Someone opened upperfile before us */
149 				fput(upperfile);
150 				upperfile = old;
151 			}
152 		}
153 		/*
154 		 * Stored file must be from the right inode, unless someone's
155 		 * been corrupting the upper layer.
156 		 */
157 		if (WARN_ON_ONCE(!ovl_is_real_file(upperfile, realpath)))
158 			return ERR_PTR(-EIO);
159 
160 		realfile = upperfile;
161 	}
162 
163 	/* Did the flags change since open? */
164 	if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS)) {
165 		int err = ovl_change_flags(realfile, file->f_flags);
166 
167 		if (err)
168 			return ERR_PTR(err);
169 	}
170 
171 	return realfile;
172 }
173 
174 static struct file *ovl_real_file(const struct file *file)
175 {
176 	struct dentry *dentry = file_dentry(file);
177 	struct path realpath;
178 	int err;
179 
180 	if (d_is_dir(dentry)) {
181 		struct file *f = ovl_dir_real_file(file, false);
182 
183 		if (WARN_ON_ONCE(!f))
184 			return ERR_PTR(-EIO);
185 		return f;
186 	}
187 
188 	/* lazy lookup and verify of lowerdata */
189 	err = ovl_verify_lowerdata(dentry);
190 	if (err)
191 		return ERR_PTR(err);
192 
193 	ovl_path_realdata(dentry, &realpath);
194 
195 	return ovl_real_file_path(file, &realpath);
196 }
197 
198 static int ovl_open(struct inode *inode, struct file *file)
199 {
200 	struct dentry *dentry = file_dentry(file);
201 	struct file *realfile;
202 	struct path realpath;
203 	struct ovl_file *of;
204 	int err;
205 
206 	/* lazy lookup and verify lowerdata */
207 	err = ovl_verify_lowerdata(dentry);
208 	if (err)
209 		return err;
210 
211 	err = ovl_maybe_copy_up(dentry, file->f_flags);
212 	if (err)
213 		return err;
214 
215 	/* No longer need these flags, so don't pass them on to underlying fs */
216 	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
217 
218 	ovl_path_realdata(dentry, &realpath);
219 	if (!realpath.dentry)
220 		return -EIO;
221 
222 	realfile = ovl_open_realfile(file, &realpath);
223 	if (IS_ERR(realfile))
224 		return PTR_ERR(realfile);
225 
226 	of = ovl_file_alloc(realfile);
227 	if (!of) {
228 		fput(realfile);
229 		return -ENOMEM;
230 	}
231 
232 	file->private_data = of;
233 
234 	return 0;
235 }
236 
237 static int ovl_release(struct inode *inode, struct file *file)
238 {
239 	ovl_file_free(file->private_data);
240 	return 0;
241 }
242 
243 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
244 {
245 	struct inode *inode = file_inode(file);
246 	struct file *realfile;
247 	const struct cred *old_cred;
248 	loff_t ret;
249 
250 	/*
251 	 * The two special cases below do not need to involve real fs,
252 	 * so we can optimizing concurrent callers.
253 	 */
254 	if (offset == 0) {
255 		if (whence == SEEK_CUR)
256 			return file->f_pos;
257 
258 		if (whence == SEEK_SET)
259 			return vfs_setpos(file, 0, 0);
260 	}
261 
262 	realfile = ovl_real_file(file);
263 	if (IS_ERR(realfile))
264 		return PTR_ERR(realfile);
265 
266 	/*
267 	 * Overlay file f_pos is the master copy that is preserved
268 	 * through copy up and modified on read/write, but only real
269 	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
270 	 * limitations that are more strict than ->s_maxbytes for specific
271 	 * files, so we use the real file to perform seeks.
272 	 */
273 	ovl_inode_lock(inode);
274 	realfile->f_pos = file->f_pos;
275 
276 	old_cred = ovl_override_creds(inode->i_sb);
277 	ret = vfs_llseek(realfile, offset, whence);
278 	ovl_revert_creds(old_cred);
279 
280 	file->f_pos = realfile->f_pos;
281 	ovl_inode_unlock(inode);
282 
283 	return ret;
284 }
285 
286 static void ovl_file_modified(struct file *file)
287 {
288 	/* Update size/mtime */
289 	ovl_copyattr(file_inode(file));
290 }
291 
292 static void ovl_file_end_write(struct kiocb *iocb, ssize_t ret)
293 {
294 	ovl_file_modified(iocb->ki_filp);
295 }
296 
297 static void ovl_file_accessed(struct file *file)
298 {
299 	struct inode *inode, *upperinode;
300 	struct timespec64 ctime, uctime;
301 	struct timespec64 mtime, umtime;
302 
303 	if (file->f_flags & O_NOATIME)
304 		return;
305 
306 	inode = file_inode(file);
307 	upperinode = ovl_inode_upper(inode);
308 
309 	if (!upperinode)
310 		return;
311 
312 	ctime = inode_get_ctime(inode);
313 	uctime = inode_get_ctime(upperinode);
314 	mtime = inode_get_mtime(inode);
315 	umtime = inode_get_mtime(upperinode);
316 	if ((!timespec64_equal(&mtime, &umtime)) ||
317 	     !timespec64_equal(&ctime, &uctime)) {
318 		inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode));
319 		inode_set_ctime_to_ts(inode, uctime);
320 	}
321 
322 	touch_atime(&file->f_path);
323 }
324 
325 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
326 {
327 	struct file *file = iocb->ki_filp;
328 	struct file *realfile;
329 	struct backing_file_ctx ctx = {
330 		.cred = ovl_creds(file_inode(file)->i_sb),
331 		.accessed = ovl_file_accessed,
332 	};
333 
334 	if (!iov_iter_count(iter))
335 		return 0;
336 
337 	realfile = ovl_real_file(file);
338 	if (IS_ERR(realfile))
339 		return PTR_ERR(realfile);
340 
341 	return backing_file_read_iter(realfile, iter, iocb, iocb->ki_flags,
342 				      &ctx);
343 }
344 
345 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
346 {
347 	struct file *file = iocb->ki_filp;
348 	struct inode *inode = file_inode(file);
349 	struct file *realfile;
350 	ssize_t ret;
351 	int ifl = iocb->ki_flags;
352 	struct backing_file_ctx ctx = {
353 		.cred = ovl_creds(inode->i_sb),
354 		.end_write = ovl_file_end_write,
355 	};
356 
357 	if (!iov_iter_count(iter))
358 		return 0;
359 
360 	inode_lock(inode);
361 	/* Update mode */
362 	ovl_copyattr(inode);
363 
364 	realfile = ovl_real_file(file);
365 	ret = PTR_ERR(realfile);
366 	if (IS_ERR(realfile))
367 		goto out_unlock;
368 
369 	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
370 		ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
371 
372 	/*
373 	 * Overlayfs doesn't support deferred completions, don't copy
374 	 * this property in case it is set by the issuer.
375 	 */
376 	ifl &= ~IOCB_DIO_CALLER_COMP;
377 	ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx);
378 
379 out_unlock:
380 	inode_unlock(inode);
381 
382 	return ret;
383 }
384 
385 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
386 			       struct pipe_inode_info *pipe, size_t len,
387 			       unsigned int flags)
388 {
389 	struct file *realfile;
390 	ssize_t ret;
391 	struct backing_file_ctx ctx = {
392 		.cred = ovl_creds(file_inode(in)->i_sb),
393 		.accessed = ovl_file_accessed,
394 	};
395 	struct kiocb iocb;
396 
397 	realfile = ovl_real_file(in);
398 	if (IS_ERR(realfile))
399 		return PTR_ERR(realfile);
400 
401 	init_sync_kiocb(&iocb, in);
402 	iocb.ki_pos = *ppos;
403 	ret = backing_file_splice_read(realfile, &iocb, pipe, len, flags, &ctx);
404 	*ppos = iocb.ki_pos;
405 
406 	return ret;
407 }
408 
409 /*
410  * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
411  * due to lock order inversion between pipe->mutex in iter_file_splice_write()
412  * and file_start_write(realfile) in ovl_write_iter().
413  *
414  * So do everything ovl_write_iter() does and call iter_file_splice_write() on
415  * the real file.
416  */
417 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
418 				loff_t *ppos, size_t len, unsigned int flags)
419 {
420 	struct file *realfile;
421 	struct inode *inode = file_inode(out);
422 	ssize_t ret;
423 	struct backing_file_ctx ctx = {
424 		.cred = ovl_creds(inode->i_sb),
425 		.end_write = ovl_file_end_write,
426 	};
427 	struct kiocb iocb;
428 
429 	inode_lock(inode);
430 	/* Update mode */
431 	ovl_copyattr(inode);
432 
433 	realfile = ovl_real_file(out);
434 	ret = PTR_ERR(realfile);
435 	if (IS_ERR(realfile))
436 		goto out_unlock;
437 
438 	init_sync_kiocb(&iocb, out);
439 	iocb.ki_pos = *ppos;
440 	ret = backing_file_splice_write(pipe, realfile, &iocb, len, flags, &ctx);
441 	*ppos = iocb.ki_pos;
442 
443 out_unlock:
444 	inode_unlock(inode);
445 
446 	return ret;
447 }
448 
449 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
450 {
451 	struct dentry *dentry = file_dentry(file);
452 	enum ovl_path_type type;
453 	struct path upperpath;
454 	struct file *upperfile;
455 	const struct cred *old_cred;
456 	int ret;
457 
458 	ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
459 	if (ret <= 0)
460 		return ret;
461 
462 	/* Don't sync lower file for fear of receiving EROFS error */
463 	type = ovl_path_type(dentry);
464 	if (!OVL_TYPE_UPPER(type) || (datasync && OVL_TYPE_MERGE(type)))
465 		return 0;
466 
467 	ovl_path_upper(dentry, &upperpath);
468 	upperfile = ovl_real_file_path(file, &upperpath);
469 	if (IS_ERR(upperfile))
470 		return PTR_ERR(upperfile);
471 
472 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
473 	ret = vfs_fsync_range(upperfile, start, end, datasync);
474 	ovl_revert_creds(old_cred);
475 
476 	return ret;
477 }
478 
479 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
480 {
481 	struct ovl_file *of = file->private_data;
482 	struct backing_file_ctx ctx = {
483 		.cred = ovl_creds(file_inode(file)->i_sb),
484 		.accessed = ovl_file_accessed,
485 	};
486 
487 	return backing_file_mmap(of->realfile, vma, &ctx);
488 }
489 
490 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
491 {
492 	struct inode *inode = file_inode(file);
493 	struct file *realfile;
494 	const struct cred *old_cred;
495 	int ret;
496 
497 	inode_lock(inode);
498 	/* Update mode */
499 	ovl_copyattr(inode);
500 	ret = file_remove_privs(file);
501 	if (ret)
502 		goto out_unlock;
503 
504 	realfile = ovl_real_file(file);
505 	ret = PTR_ERR(realfile);
506 	if (IS_ERR(realfile))
507 		goto out_unlock;
508 
509 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
510 	ret = vfs_fallocate(realfile, mode, offset, len);
511 	ovl_revert_creds(old_cred);
512 
513 	/* Update size */
514 	ovl_file_modified(file);
515 
516 out_unlock:
517 	inode_unlock(inode);
518 
519 	return ret;
520 }
521 
522 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
523 {
524 	struct file *realfile;
525 	const struct cred *old_cred;
526 	int ret;
527 
528 	realfile = ovl_real_file(file);
529 	if (IS_ERR(realfile))
530 		return PTR_ERR(realfile);
531 
532 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
533 	ret = vfs_fadvise(realfile, offset, len, advice);
534 	ovl_revert_creds(old_cred);
535 
536 	return ret;
537 }
538 
539 enum ovl_copyop {
540 	OVL_COPY,
541 	OVL_CLONE,
542 	OVL_DEDUPE,
543 };
544 
545 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
546 			    struct file *file_out, loff_t pos_out,
547 			    loff_t len, unsigned int flags, enum ovl_copyop op)
548 {
549 	struct inode *inode_out = file_inode(file_out);
550 	struct file *realfile_in, *realfile_out;
551 	const struct cred *old_cred;
552 	loff_t ret;
553 
554 	inode_lock(inode_out);
555 	if (op != OVL_DEDUPE) {
556 		/* Update mode */
557 		ovl_copyattr(inode_out);
558 		ret = file_remove_privs(file_out);
559 		if (ret)
560 			goto out_unlock;
561 	}
562 
563 	realfile_out = ovl_real_file(file_out);
564 	ret = PTR_ERR(realfile_out);
565 	if (IS_ERR(realfile_out))
566 		goto out_unlock;
567 
568 	realfile_in = ovl_real_file(file_in);
569 	ret = PTR_ERR(realfile_in);
570 	if (IS_ERR(realfile_in))
571 		goto out_unlock;
572 
573 	old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
574 	switch (op) {
575 	case OVL_COPY:
576 		ret = vfs_copy_file_range(realfile_in, pos_in,
577 					  realfile_out, pos_out, len, flags);
578 		break;
579 
580 	case OVL_CLONE:
581 		ret = vfs_clone_file_range(realfile_in, pos_in,
582 					   realfile_out, pos_out, len, flags);
583 		break;
584 
585 	case OVL_DEDUPE:
586 		ret = vfs_dedupe_file_range_one(realfile_in, pos_in,
587 						realfile_out, pos_out, len,
588 						flags);
589 		break;
590 	}
591 	ovl_revert_creds(old_cred);
592 
593 	/* Update size */
594 	ovl_file_modified(file_out);
595 
596 out_unlock:
597 	inode_unlock(inode_out);
598 
599 	return ret;
600 }
601 
602 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
603 				   struct file *file_out, loff_t pos_out,
604 				   size_t len, unsigned int flags)
605 {
606 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
607 			    OVL_COPY);
608 }
609 
610 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
611 				   struct file *file_out, loff_t pos_out,
612 				   loff_t len, unsigned int remap_flags)
613 {
614 	enum ovl_copyop op;
615 
616 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
617 		return -EINVAL;
618 
619 	if (remap_flags & REMAP_FILE_DEDUP)
620 		op = OVL_DEDUPE;
621 	else
622 		op = OVL_CLONE;
623 
624 	/*
625 	 * Don't copy up because of a dedupe request, this wouldn't make sense
626 	 * most of the time (data would be duplicated instead of deduplicated).
627 	 */
628 	if (op == OVL_DEDUPE &&
629 	    (!ovl_inode_upper(file_inode(file_in)) ||
630 	     !ovl_inode_upper(file_inode(file_out))))
631 		return -EPERM;
632 
633 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
634 			    remap_flags, op);
635 }
636 
637 static int ovl_flush(struct file *file, fl_owner_t id)
638 {
639 	struct file *realfile;
640 	const struct cred *old_cred;
641 	int err = 0;
642 
643 	realfile = ovl_real_file(file);
644 	if (IS_ERR(realfile))
645 		return PTR_ERR(realfile);
646 
647 	if (realfile->f_op->flush) {
648 		old_cred = ovl_override_creds(file_inode(file)->i_sb);
649 		err = realfile->f_op->flush(realfile, id);
650 		ovl_revert_creds(old_cred);
651 	}
652 
653 	return err;
654 }
655 
656 const struct file_operations ovl_file_operations = {
657 	.open		= ovl_open,
658 	.release	= ovl_release,
659 	.llseek		= ovl_llseek,
660 	.read_iter	= ovl_read_iter,
661 	.write_iter	= ovl_write_iter,
662 	.fsync		= ovl_fsync,
663 	.mmap		= ovl_mmap,
664 	.fallocate	= ovl_fallocate,
665 	.fadvise	= ovl_fadvise,
666 	.flush		= ovl_flush,
667 	.splice_read    = ovl_splice_read,
668 	.splice_write   = ovl_splice_write,
669 
670 	.copy_file_range	= ovl_copy_file_range,
671 	.remap_file_range	= ovl_remap_file_range,
672 };
673