xref: /linux/fs/overlayfs/file.c (revision 94a3f60af5dca72fffc041a64d4c3de5a066f98e)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 Red Hat, Inc.
4  */
5 
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/filelock.h>
9 #include <linux/mount.h>
10 #include <linux/xattr.h>
11 #include <linux/uio.h>
12 #include <linux/uaccess.h>
13 #include <linux/security.h>
14 #include <linux/fs.h>
15 #include <linux/backing-file.h>
16 #include "overlayfs.h"
17 
18 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
19 {
20 	if (realinode != ovl_inode_upper(inode))
21 		return 'l';
22 	if (ovl_has_upperdata(inode))
23 		return 'u';
24 	else
25 		return 'm';
26 }
27 
28 static struct file *ovl_open_realfile(const struct file *file,
29 				      const struct path *realpath)
30 {
31 	struct inode *realinode = d_inode(realpath->dentry);
32 	struct inode *inode = file_inode(file);
33 	struct mnt_idmap *real_idmap;
34 	struct file *realfile;
35 	int flags = file->f_flags | OVL_OPEN_FLAGS;
36 	int acc_mode = ACC_MODE(flags);
37 	int err;
38 
39 	if (flags & O_APPEND)
40 		acc_mode |= MAY_APPEND;
41 
42 	with_ovl_creds(inode->i_sb) {
43 		real_idmap = mnt_idmap(realpath->mnt);
44 		err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
45 		if (err) {
46 			realfile = ERR_PTR(err);
47 		} else {
48 			if (!inode_owner_or_capable(real_idmap, realinode))
49 				flags &= ~O_NOATIME;
50 
51 			realfile = backing_file_open(file_user_path(file),
52 						     flags, realpath, current_cred());
53 		}
54 	}
55 
56 	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
57 		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
58 		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
59 
60 	return realfile;
61 }
62 
63 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
64 
65 static int ovl_change_flags(struct file *file, unsigned int flags)
66 {
67 	struct inode *inode = file_inode(file);
68 	int err;
69 
70 	flags &= OVL_SETFL_MASK;
71 
72 	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
73 		return -EPERM;
74 
75 	if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
76 		return -EINVAL;
77 
78 	if (file->f_op->check_flags) {
79 		err = file->f_op->check_flags(flags);
80 		if (err)
81 			return err;
82 	}
83 
84 	spin_lock(&file->f_lock);
85 	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
86 	file->f_iocb_flags = iocb_flags(file);
87 	spin_unlock(&file->f_lock);
88 
89 	return 0;
90 }
91 
92 struct ovl_file {
93 	struct file *realfile;
94 	struct file *upperfile;
95 };
96 
97 struct ovl_file *ovl_file_alloc(struct file *realfile)
98 {
99 	struct ovl_file *of = kzalloc(sizeof(struct ovl_file), GFP_KERNEL);
100 
101 	if (unlikely(!of))
102 		return NULL;
103 
104 	of->realfile = realfile;
105 	return of;
106 }
107 
108 void ovl_file_free(struct ovl_file *of)
109 {
110 	fput(of->realfile);
111 	if (of->upperfile)
112 		fput(of->upperfile);
113 	kfree(of);
114 }
115 
116 static bool ovl_is_real_file(const struct file *realfile,
117 			     const struct path *realpath)
118 {
119 	return file_inode(realfile) == d_inode(realpath->dentry);
120 }
121 
122 static struct file *ovl_real_file_path(const struct file *file,
123 				       const struct path *realpath)
124 {
125 	struct ovl_file *of = file->private_data;
126 	struct file *realfile = of->realfile;
127 
128 	if (WARN_ON_ONCE(!realpath->dentry))
129 		return ERR_PTR(-EIO);
130 
131 	/*
132 	 * If the realfile that we want is not where the data used to be at
133 	 * open time, either we'd been copied up, or it's an fsync of a
134 	 * metacopied file.  We need the upperfile either way, so see if it
135 	 * is already opened and if it is not then open and store it.
136 	 */
137 	if (unlikely(!ovl_is_real_file(realfile, realpath))) {
138 		struct file *upperfile = READ_ONCE(of->upperfile);
139 		struct file *old;
140 
141 		if (!upperfile) { /* Nobody opened upperfile yet */
142 			upperfile = ovl_open_realfile(file, realpath);
143 			if (IS_ERR(upperfile))
144 				return upperfile;
145 
146 			/* Store the upperfile for later */
147 			old = cmpxchg_release(&of->upperfile, NULL, upperfile);
148 			if (old) { /* Someone opened upperfile before us */
149 				fput(upperfile);
150 				upperfile = old;
151 			}
152 		}
153 		/*
154 		 * Stored file must be from the right inode, unless someone's
155 		 * been corrupting the upper layer.
156 		 */
157 		if (WARN_ON_ONCE(!ovl_is_real_file(upperfile, realpath)))
158 			return ERR_PTR(-EIO);
159 
160 		realfile = upperfile;
161 	}
162 
163 	/* Did the flags change since open? */
164 	if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS)) {
165 		int err = ovl_change_flags(realfile, file->f_flags);
166 
167 		if (err)
168 			return ERR_PTR(err);
169 	}
170 
171 	return realfile;
172 }
173 
174 static struct file *ovl_real_file(const struct file *file)
175 {
176 	struct dentry *dentry = file_dentry(file);
177 	struct path realpath;
178 	int err;
179 
180 	if (d_is_dir(dentry)) {
181 		struct file *f = ovl_dir_real_file(file, false);
182 
183 		if (WARN_ON_ONCE(!f))
184 			return ERR_PTR(-EIO);
185 		return f;
186 	}
187 
188 	/* lazy lookup and verify of lowerdata */
189 	err = ovl_verify_lowerdata(dentry);
190 	if (err)
191 		return ERR_PTR(err);
192 
193 	ovl_path_realdata(dentry, &realpath);
194 
195 	return ovl_real_file_path(file, &realpath);
196 }
197 
198 static int ovl_open(struct inode *inode, struct file *file)
199 {
200 	struct dentry *dentry = file_dentry(file);
201 	struct file *realfile;
202 	struct path realpath;
203 	struct ovl_file *of;
204 	int err;
205 
206 	/* lazy lookup and verify lowerdata */
207 	err = ovl_verify_lowerdata(dentry);
208 	if (err)
209 		return err;
210 
211 	err = ovl_maybe_copy_up(dentry, file->f_flags);
212 	if (err)
213 		return err;
214 
215 	/* No longer need these flags, so don't pass them on to underlying fs */
216 	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
217 
218 	ovl_path_realdata(dentry, &realpath);
219 	if (!realpath.dentry)
220 		return -EIO;
221 
222 	realfile = ovl_open_realfile(file, &realpath);
223 	if (IS_ERR(realfile))
224 		return PTR_ERR(realfile);
225 
226 	of = ovl_file_alloc(realfile);
227 	if (!of) {
228 		fput(realfile);
229 		return -ENOMEM;
230 	}
231 
232 	file->private_data = of;
233 
234 	return 0;
235 }
236 
237 static int ovl_release(struct inode *inode, struct file *file)
238 {
239 	ovl_file_free(file->private_data);
240 	return 0;
241 }
242 
243 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
244 {
245 	struct inode *inode = file_inode(file);
246 	struct file *realfile;
247 	loff_t ret;
248 
249 	/*
250 	 * The two special cases below do not need to involve real fs,
251 	 * so we can optimizing concurrent callers.
252 	 */
253 	if (offset == 0) {
254 		if (whence == SEEK_CUR)
255 			return file->f_pos;
256 
257 		if (whence == SEEK_SET)
258 			return vfs_setpos(file, 0, 0);
259 	}
260 
261 	realfile = ovl_real_file(file);
262 	if (IS_ERR(realfile))
263 		return PTR_ERR(realfile);
264 
265 	/*
266 	 * Overlay file f_pos is the master copy that is preserved
267 	 * through copy up and modified on read/write, but only real
268 	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
269 	 * limitations that are more strict than ->s_maxbytes for specific
270 	 * files, so we use the real file to perform seeks.
271 	 */
272 	ovl_inode_lock(inode);
273 	realfile->f_pos = file->f_pos;
274 
275 	with_ovl_creds(inode->i_sb)
276 		ret = vfs_llseek(realfile, offset, whence);
277 
278 	file->f_pos = realfile->f_pos;
279 	ovl_inode_unlock(inode);
280 
281 	return ret;
282 }
283 
284 static void ovl_file_modified(struct file *file)
285 {
286 	/* Update size/mtime */
287 	ovl_copyattr(file_inode(file));
288 }
289 
290 static void ovl_file_end_write(struct kiocb *iocb, ssize_t ret)
291 {
292 	ovl_file_modified(iocb->ki_filp);
293 }
294 
295 static void ovl_file_accessed(struct file *file)
296 {
297 	struct inode *inode, *upperinode;
298 	struct timespec64 ctime, uctime;
299 	struct timespec64 mtime, umtime;
300 
301 	if (file->f_flags & O_NOATIME)
302 		return;
303 
304 	inode = file_inode(file);
305 	upperinode = ovl_inode_upper(inode);
306 
307 	if (!upperinode)
308 		return;
309 
310 	ctime = inode_get_ctime(inode);
311 	uctime = inode_get_ctime(upperinode);
312 	mtime = inode_get_mtime(inode);
313 	umtime = inode_get_mtime(upperinode);
314 	if ((!timespec64_equal(&mtime, &umtime)) ||
315 	     !timespec64_equal(&ctime, &uctime)) {
316 		inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode));
317 		inode_set_ctime_to_ts(inode, uctime);
318 	}
319 
320 	touch_atime(&file->f_path);
321 }
322 
323 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
324 {
325 	struct file *file = iocb->ki_filp;
326 	struct file *realfile;
327 	struct backing_file_ctx ctx = {
328 		.cred = ovl_creds(file_inode(file)->i_sb),
329 		.accessed = ovl_file_accessed,
330 	};
331 
332 	if (!iov_iter_count(iter))
333 		return 0;
334 
335 	realfile = ovl_real_file(file);
336 	if (IS_ERR(realfile))
337 		return PTR_ERR(realfile);
338 
339 	return backing_file_read_iter(realfile, iter, iocb, iocb->ki_flags,
340 				      &ctx);
341 }
342 
343 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
344 {
345 	struct file *file = iocb->ki_filp;
346 	struct inode *inode = file_inode(file);
347 	struct file *realfile;
348 	ssize_t ret;
349 	int ifl = iocb->ki_flags;
350 	struct backing_file_ctx ctx = {
351 		.cred = ovl_creds(inode->i_sb),
352 		.end_write = ovl_file_end_write,
353 	};
354 
355 	if (!iov_iter_count(iter))
356 		return 0;
357 
358 	inode_lock(inode);
359 	/* Update mode */
360 	ovl_copyattr(inode);
361 
362 	realfile = ovl_real_file(file);
363 	ret = PTR_ERR(realfile);
364 	if (IS_ERR(realfile))
365 		goto out_unlock;
366 
367 	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
368 		ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
369 
370 	ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx);
371 
372 out_unlock:
373 	inode_unlock(inode);
374 
375 	return ret;
376 }
377 
378 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
379 			       struct pipe_inode_info *pipe, size_t len,
380 			       unsigned int flags)
381 {
382 	struct file *realfile;
383 	ssize_t ret;
384 	struct backing_file_ctx ctx = {
385 		.cred = ovl_creds(file_inode(in)->i_sb),
386 		.accessed = ovl_file_accessed,
387 	};
388 	struct kiocb iocb;
389 
390 	realfile = ovl_real_file(in);
391 	if (IS_ERR(realfile))
392 		return PTR_ERR(realfile);
393 
394 	init_sync_kiocb(&iocb, in);
395 	iocb.ki_pos = *ppos;
396 	ret = backing_file_splice_read(realfile, &iocb, pipe, len, flags, &ctx);
397 	*ppos = iocb.ki_pos;
398 
399 	return ret;
400 }
401 
402 /*
403  * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
404  * due to lock order inversion between pipe->mutex in iter_file_splice_write()
405  * and file_start_write(realfile) in ovl_write_iter().
406  *
407  * So do everything ovl_write_iter() does and call iter_file_splice_write() on
408  * the real file.
409  */
410 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
411 				loff_t *ppos, size_t len, unsigned int flags)
412 {
413 	struct file *realfile;
414 	struct inode *inode = file_inode(out);
415 	ssize_t ret;
416 	struct backing_file_ctx ctx = {
417 		.cred = ovl_creds(inode->i_sb),
418 		.end_write = ovl_file_end_write,
419 	};
420 	struct kiocb iocb;
421 
422 	inode_lock(inode);
423 	/* Update mode */
424 	ovl_copyattr(inode);
425 
426 	realfile = ovl_real_file(out);
427 	ret = PTR_ERR(realfile);
428 	if (IS_ERR(realfile))
429 		goto out_unlock;
430 
431 	init_sync_kiocb(&iocb, out);
432 	iocb.ki_pos = *ppos;
433 	ret = backing_file_splice_write(pipe, realfile, &iocb, len, flags, &ctx);
434 	*ppos = iocb.ki_pos;
435 
436 out_unlock:
437 	inode_unlock(inode);
438 
439 	return ret;
440 }
441 
442 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
443 {
444 	struct dentry *dentry = file_dentry(file);
445 	enum ovl_path_type type;
446 	struct path upperpath;
447 	struct file *upperfile;
448 	int ret;
449 
450 	ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
451 	if (ret <= 0)
452 		return ret;
453 
454 	/* Don't sync lower file for fear of receiving EROFS error */
455 	type = ovl_path_type(dentry);
456 	if (!OVL_TYPE_UPPER(type) || (datasync && OVL_TYPE_MERGE(type)))
457 		return 0;
458 
459 	ovl_path_upper(dentry, &upperpath);
460 	upperfile = ovl_real_file_path(file, &upperpath);
461 	if (IS_ERR(upperfile))
462 		return PTR_ERR(upperfile);
463 
464 	with_ovl_creds(file_inode(file)->i_sb)
465 		return vfs_fsync_range(upperfile, start, end, datasync);
466 }
467 
468 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
469 {
470 	struct ovl_file *of = file->private_data;
471 	struct backing_file_ctx ctx = {
472 		.cred = ovl_creds(file_inode(file)->i_sb),
473 		.accessed = ovl_file_accessed,
474 	};
475 
476 	return backing_file_mmap(of->realfile, vma, &ctx);
477 }
478 
479 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
480 {
481 	struct inode *inode = file_inode(file);
482 	struct file *realfile;
483 	int ret;
484 
485 	inode_lock(inode);
486 	/* Update mode */
487 	ovl_copyattr(inode);
488 	ret = file_remove_privs(file);
489 	if (ret)
490 		goto out_unlock;
491 
492 	realfile = ovl_real_file(file);
493 	ret = PTR_ERR(realfile);
494 	if (IS_ERR(realfile))
495 		goto out_unlock;
496 
497 	with_ovl_creds(inode->i_sb)
498 		ret = vfs_fallocate(realfile, mode, offset, len);
499 
500 	/* Update size */
501 	ovl_file_modified(file);
502 
503 out_unlock:
504 	inode_unlock(inode);
505 
506 	return ret;
507 }
508 
509 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
510 {
511 	struct file *realfile;
512 
513 	realfile = ovl_real_file(file);
514 	if (IS_ERR(realfile))
515 		return PTR_ERR(realfile);
516 
517 	with_ovl_creds(file_inode(file)->i_sb)
518 		return vfs_fadvise(realfile, offset, len, advice);
519 }
520 
521 enum ovl_copyop {
522 	OVL_COPY,
523 	OVL_CLONE,
524 	OVL_DEDUPE,
525 };
526 
527 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
528 			    struct file *file_out, loff_t pos_out,
529 			    loff_t len, unsigned int flags, enum ovl_copyop op)
530 {
531 	struct inode *inode_out = file_inode(file_out);
532 	struct file *realfile_in, *realfile_out;
533 	loff_t ret;
534 
535 	inode_lock(inode_out);
536 	if (op != OVL_DEDUPE) {
537 		/* Update mode */
538 		ovl_copyattr(inode_out);
539 		ret = file_remove_privs(file_out);
540 		if (ret)
541 			goto out_unlock;
542 	}
543 
544 	realfile_out = ovl_real_file(file_out);
545 	ret = PTR_ERR(realfile_out);
546 	if (IS_ERR(realfile_out))
547 		goto out_unlock;
548 
549 	realfile_in = ovl_real_file(file_in);
550 	ret = PTR_ERR(realfile_in);
551 	if (IS_ERR(realfile_in))
552 		goto out_unlock;
553 
554 	with_ovl_creds(file_inode(file_out)->i_sb) {
555 		switch (op) {
556 		case OVL_COPY:
557 			ret = vfs_copy_file_range(realfile_in, pos_in,
558 						  realfile_out, pos_out, len, flags);
559 			break;
560 
561 		case OVL_CLONE:
562 			ret = vfs_clone_file_range(realfile_in, pos_in,
563 						   realfile_out, pos_out, len, flags);
564 			break;
565 
566 		case OVL_DEDUPE:
567 			ret = vfs_dedupe_file_range_one(realfile_in, pos_in,
568 							realfile_out, pos_out, len,
569 							flags);
570 			break;
571 		}
572 	}
573 
574 	/* Update size */
575 	ovl_file_modified(file_out);
576 
577 out_unlock:
578 	inode_unlock(inode_out);
579 
580 	return ret;
581 }
582 
583 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
584 				   struct file *file_out, loff_t pos_out,
585 				   size_t len, unsigned int flags)
586 {
587 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
588 			    OVL_COPY);
589 }
590 
591 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
592 				   struct file *file_out, loff_t pos_out,
593 				   loff_t len, unsigned int remap_flags)
594 {
595 	enum ovl_copyop op;
596 
597 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
598 		return -EINVAL;
599 
600 	if (remap_flags & REMAP_FILE_DEDUP)
601 		op = OVL_DEDUPE;
602 	else
603 		op = OVL_CLONE;
604 
605 	/*
606 	 * Don't copy up because of a dedupe request, this wouldn't make sense
607 	 * most of the time (data would be duplicated instead of deduplicated).
608 	 */
609 	if (op == OVL_DEDUPE &&
610 	    (!ovl_inode_upper(file_inode(file_in)) ||
611 	     !ovl_inode_upper(file_inode(file_out))))
612 		return -EPERM;
613 
614 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
615 			    remap_flags, op);
616 }
617 
618 static int ovl_flush(struct file *file, fl_owner_t id)
619 {
620 	struct file *realfile;
621 	int err = 0;
622 
623 	realfile = ovl_real_file(file);
624 	if (IS_ERR(realfile))
625 		return PTR_ERR(realfile);
626 
627 	if (realfile->f_op->flush) {
628 		with_ovl_creds(file_inode(file)->i_sb)
629 			err = realfile->f_op->flush(realfile, id);
630 	}
631 
632 	return err;
633 }
634 
635 const struct file_operations ovl_file_operations = {
636 	.open		= ovl_open,
637 	.release	= ovl_release,
638 	.llseek		= ovl_llseek,
639 	.read_iter	= ovl_read_iter,
640 	.write_iter	= ovl_write_iter,
641 	.fsync		= ovl_fsync,
642 	.mmap		= ovl_mmap,
643 	.fallocate	= ovl_fallocate,
644 	.fadvise	= ovl_fadvise,
645 	.flush		= ovl_flush,
646 	.splice_read    = ovl_splice_read,
647 	.splice_write   = ovl_splice_write,
648 
649 	.copy_file_range	= ovl_copy_file_range,
650 	.remap_file_range	= ovl_remap_file_range,
651 	.setlease		= generic_setlease,
652 };
653