xref: /linux/fs/overlayfs/file.c (revision 7fc2cd2e4b398c57c9cf961cfea05eadbf34c05c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 Red Hat, Inc.
4  */
5 
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/mount.h>
9 #include <linux/xattr.h>
10 #include <linux/uio.h>
11 #include <linux/uaccess.h>
12 #include <linux/security.h>
13 #include <linux/fs.h>
14 #include <linux/backing-file.h>
15 #include "overlayfs.h"
16 
17 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
18 {
19 	if (realinode != ovl_inode_upper(inode))
20 		return 'l';
21 	if (ovl_has_upperdata(inode))
22 		return 'u';
23 	else
24 		return 'm';
25 }
26 
27 static struct file *ovl_open_realfile(const struct file *file,
28 				      const struct path *realpath)
29 {
30 	struct inode *realinode = d_inode(realpath->dentry);
31 	struct inode *inode = file_inode(file);
32 	struct mnt_idmap *real_idmap;
33 	struct file *realfile;
34 	int flags = file->f_flags | OVL_OPEN_FLAGS;
35 	int acc_mode = ACC_MODE(flags);
36 	int err;
37 
38 	if (flags & O_APPEND)
39 		acc_mode |= MAY_APPEND;
40 
41 	with_ovl_creds(inode->i_sb) {
42 		real_idmap = mnt_idmap(realpath->mnt);
43 		err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
44 		if (err) {
45 			realfile = ERR_PTR(err);
46 		} else {
47 			if (!inode_owner_or_capable(real_idmap, realinode))
48 				flags &= ~O_NOATIME;
49 
50 			realfile = backing_file_open(file_user_path(file),
51 						     flags, realpath, current_cred());
52 		}
53 	}
54 
55 	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
56 		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
57 		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
58 
59 	return realfile;
60 }
61 
62 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
63 
64 static int ovl_change_flags(struct file *file, unsigned int flags)
65 {
66 	struct inode *inode = file_inode(file);
67 	int err;
68 
69 	flags &= OVL_SETFL_MASK;
70 
71 	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
72 		return -EPERM;
73 
74 	if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
75 		return -EINVAL;
76 
77 	if (file->f_op->check_flags) {
78 		err = file->f_op->check_flags(flags);
79 		if (err)
80 			return err;
81 	}
82 
83 	spin_lock(&file->f_lock);
84 	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
85 	file->f_iocb_flags = iocb_flags(file);
86 	spin_unlock(&file->f_lock);
87 
88 	return 0;
89 }
90 
91 struct ovl_file {
92 	struct file *realfile;
93 	struct file *upperfile;
94 };
95 
96 struct ovl_file *ovl_file_alloc(struct file *realfile)
97 {
98 	struct ovl_file *of = kzalloc(sizeof(struct ovl_file), GFP_KERNEL);
99 
100 	if (unlikely(!of))
101 		return NULL;
102 
103 	of->realfile = realfile;
104 	return of;
105 }
106 
107 void ovl_file_free(struct ovl_file *of)
108 {
109 	fput(of->realfile);
110 	if (of->upperfile)
111 		fput(of->upperfile);
112 	kfree(of);
113 }
114 
115 static bool ovl_is_real_file(const struct file *realfile,
116 			     const struct path *realpath)
117 {
118 	return file_inode(realfile) == d_inode(realpath->dentry);
119 }
120 
121 static struct file *ovl_real_file_path(const struct file *file,
122 				       const struct path *realpath)
123 {
124 	struct ovl_file *of = file->private_data;
125 	struct file *realfile = of->realfile;
126 
127 	if (WARN_ON_ONCE(!realpath->dentry))
128 		return ERR_PTR(-EIO);
129 
130 	/*
131 	 * If the realfile that we want is not where the data used to be at
132 	 * open time, either we'd been copied up, or it's an fsync of a
133 	 * metacopied file.  We need the upperfile either way, so see if it
134 	 * is already opened and if it is not then open and store it.
135 	 */
136 	if (unlikely(!ovl_is_real_file(realfile, realpath))) {
137 		struct file *upperfile = READ_ONCE(of->upperfile);
138 		struct file *old;
139 
140 		if (!upperfile) { /* Nobody opened upperfile yet */
141 			upperfile = ovl_open_realfile(file, realpath);
142 			if (IS_ERR(upperfile))
143 				return upperfile;
144 
145 			/* Store the upperfile for later */
146 			old = cmpxchg_release(&of->upperfile, NULL, upperfile);
147 			if (old) { /* Someone opened upperfile before us */
148 				fput(upperfile);
149 				upperfile = old;
150 			}
151 		}
152 		/*
153 		 * Stored file must be from the right inode, unless someone's
154 		 * been corrupting the upper layer.
155 		 */
156 		if (WARN_ON_ONCE(!ovl_is_real_file(upperfile, realpath)))
157 			return ERR_PTR(-EIO);
158 
159 		realfile = upperfile;
160 	}
161 
162 	/* Did the flags change since open? */
163 	if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS)) {
164 		int err = ovl_change_flags(realfile, file->f_flags);
165 
166 		if (err)
167 			return ERR_PTR(err);
168 	}
169 
170 	return realfile;
171 }
172 
173 static struct file *ovl_real_file(const struct file *file)
174 {
175 	struct dentry *dentry = file_dentry(file);
176 	struct path realpath;
177 	int err;
178 
179 	if (d_is_dir(dentry)) {
180 		struct file *f = ovl_dir_real_file(file, false);
181 
182 		if (WARN_ON_ONCE(!f))
183 			return ERR_PTR(-EIO);
184 		return f;
185 	}
186 
187 	/* lazy lookup and verify of lowerdata */
188 	err = ovl_verify_lowerdata(dentry);
189 	if (err)
190 		return ERR_PTR(err);
191 
192 	ovl_path_realdata(dentry, &realpath);
193 
194 	return ovl_real_file_path(file, &realpath);
195 }
196 
197 static int ovl_open(struct inode *inode, struct file *file)
198 {
199 	struct dentry *dentry = file_dentry(file);
200 	struct file *realfile;
201 	struct path realpath;
202 	struct ovl_file *of;
203 	int err;
204 
205 	/* lazy lookup and verify lowerdata */
206 	err = ovl_verify_lowerdata(dentry);
207 	if (err)
208 		return err;
209 
210 	err = ovl_maybe_copy_up(dentry, file->f_flags);
211 	if (err)
212 		return err;
213 
214 	/* No longer need these flags, so don't pass them on to underlying fs */
215 	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
216 
217 	ovl_path_realdata(dentry, &realpath);
218 	if (!realpath.dentry)
219 		return -EIO;
220 
221 	realfile = ovl_open_realfile(file, &realpath);
222 	if (IS_ERR(realfile))
223 		return PTR_ERR(realfile);
224 
225 	of = ovl_file_alloc(realfile);
226 	if (!of) {
227 		fput(realfile);
228 		return -ENOMEM;
229 	}
230 
231 	file->private_data = of;
232 
233 	return 0;
234 }
235 
236 static int ovl_release(struct inode *inode, struct file *file)
237 {
238 	ovl_file_free(file->private_data);
239 	return 0;
240 }
241 
242 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
243 {
244 	struct inode *inode = file_inode(file);
245 	struct file *realfile;
246 	loff_t ret;
247 
248 	/*
249 	 * The two special cases below do not need to involve real fs,
250 	 * so we can optimizing concurrent callers.
251 	 */
252 	if (offset == 0) {
253 		if (whence == SEEK_CUR)
254 			return file->f_pos;
255 
256 		if (whence == SEEK_SET)
257 			return vfs_setpos(file, 0, 0);
258 	}
259 
260 	realfile = ovl_real_file(file);
261 	if (IS_ERR(realfile))
262 		return PTR_ERR(realfile);
263 
264 	/*
265 	 * Overlay file f_pos is the master copy that is preserved
266 	 * through copy up and modified on read/write, but only real
267 	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
268 	 * limitations that are more strict than ->s_maxbytes for specific
269 	 * files, so we use the real file to perform seeks.
270 	 */
271 	ovl_inode_lock(inode);
272 	realfile->f_pos = file->f_pos;
273 
274 	with_ovl_creds(inode->i_sb)
275 		ret = vfs_llseek(realfile, offset, whence);
276 
277 	file->f_pos = realfile->f_pos;
278 	ovl_inode_unlock(inode);
279 
280 	return ret;
281 }
282 
283 static void ovl_file_modified(struct file *file)
284 {
285 	/* Update size/mtime */
286 	ovl_copyattr(file_inode(file));
287 }
288 
289 static void ovl_file_end_write(struct kiocb *iocb, ssize_t ret)
290 {
291 	ovl_file_modified(iocb->ki_filp);
292 }
293 
294 static void ovl_file_accessed(struct file *file)
295 {
296 	struct inode *inode, *upperinode;
297 	struct timespec64 ctime, uctime;
298 	struct timespec64 mtime, umtime;
299 
300 	if (file->f_flags & O_NOATIME)
301 		return;
302 
303 	inode = file_inode(file);
304 	upperinode = ovl_inode_upper(inode);
305 
306 	if (!upperinode)
307 		return;
308 
309 	ctime = inode_get_ctime(inode);
310 	uctime = inode_get_ctime(upperinode);
311 	mtime = inode_get_mtime(inode);
312 	umtime = inode_get_mtime(upperinode);
313 	if ((!timespec64_equal(&mtime, &umtime)) ||
314 	     !timespec64_equal(&ctime, &uctime)) {
315 		inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode));
316 		inode_set_ctime_to_ts(inode, uctime);
317 	}
318 
319 	touch_atime(&file->f_path);
320 }
321 
322 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
323 {
324 	struct file *file = iocb->ki_filp;
325 	struct file *realfile;
326 	struct backing_file_ctx ctx = {
327 		.cred = ovl_creds(file_inode(file)->i_sb),
328 		.accessed = ovl_file_accessed,
329 	};
330 
331 	if (!iov_iter_count(iter))
332 		return 0;
333 
334 	realfile = ovl_real_file(file);
335 	if (IS_ERR(realfile))
336 		return PTR_ERR(realfile);
337 
338 	return backing_file_read_iter(realfile, iter, iocb, iocb->ki_flags,
339 				      &ctx);
340 }
341 
342 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
343 {
344 	struct file *file = iocb->ki_filp;
345 	struct inode *inode = file_inode(file);
346 	struct file *realfile;
347 	ssize_t ret;
348 	int ifl = iocb->ki_flags;
349 	struct backing_file_ctx ctx = {
350 		.cred = ovl_creds(inode->i_sb),
351 		.end_write = ovl_file_end_write,
352 	};
353 
354 	if (!iov_iter_count(iter))
355 		return 0;
356 
357 	inode_lock(inode);
358 	/* Update mode */
359 	ovl_copyattr(inode);
360 
361 	realfile = ovl_real_file(file);
362 	ret = PTR_ERR(realfile);
363 	if (IS_ERR(realfile))
364 		goto out_unlock;
365 
366 	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
367 		ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
368 
369 	ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx);
370 
371 out_unlock:
372 	inode_unlock(inode);
373 
374 	return ret;
375 }
376 
377 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
378 			       struct pipe_inode_info *pipe, size_t len,
379 			       unsigned int flags)
380 {
381 	struct file *realfile;
382 	ssize_t ret;
383 	struct backing_file_ctx ctx = {
384 		.cred = ovl_creds(file_inode(in)->i_sb),
385 		.accessed = ovl_file_accessed,
386 	};
387 	struct kiocb iocb;
388 
389 	realfile = ovl_real_file(in);
390 	if (IS_ERR(realfile))
391 		return PTR_ERR(realfile);
392 
393 	init_sync_kiocb(&iocb, in);
394 	iocb.ki_pos = *ppos;
395 	ret = backing_file_splice_read(realfile, &iocb, pipe, len, flags, &ctx);
396 	*ppos = iocb.ki_pos;
397 
398 	return ret;
399 }
400 
401 /*
402  * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
403  * due to lock order inversion between pipe->mutex in iter_file_splice_write()
404  * and file_start_write(realfile) in ovl_write_iter().
405  *
406  * So do everything ovl_write_iter() does and call iter_file_splice_write() on
407  * the real file.
408  */
409 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
410 				loff_t *ppos, size_t len, unsigned int flags)
411 {
412 	struct file *realfile;
413 	struct inode *inode = file_inode(out);
414 	ssize_t ret;
415 	struct backing_file_ctx ctx = {
416 		.cred = ovl_creds(inode->i_sb),
417 		.end_write = ovl_file_end_write,
418 	};
419 	struct kiocb iocb;
420 
421 	inode_lock(inode);
422 	/* Update mode */
423 	ovl_copyattr(inode);
424 
425 	realfile = ovl_real_file(out);
426 	ret = PTR_ERR(realfile);
427 	if (IS_ERR(realfile))
428 		goto out_unlock;
429 
430 	init_sync_kiocb(&iocb, out);
431 	iocb.ki_pos = *ppos;
432 	ret = backing_file_splice_write(pipe, realfile, &iocb, len, flags, &ctx);
433 	*ppos = iocb.ki_pos;
434 
435 out_unlock:
436 	inode_unlock(inode);
437 
438 	return ret;
439 }
440 
441 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
442 {
443 	struct dentry *dentry = file_dentry(file);
444 	enum ovl_path_type type;
445 	struct path upperpath;
446 	struct file *upperfile;
447 	int ret;
448 
449 	ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
450 	if (ret <= 0)
451 		return ret;
452 
453 	/* Don't sync lower file for fear of receiving EROFS error */
454 	type = ovl_path_type(dentry);
455 	if (!OVL_TYPE_UPPER(type) || (datasync && OVL_TYPE_MERGE(type)))
456 		return 0;
457 
458 	ovl_path_upper(dentry, &upperpath);
459 	upperfile = ovl_real_file_path(file, &upperpath);
460 	if (IS_ERR(upperfile))
461 		return PTR_ERR(upperfile);
462 
463 	with_ovl_creds(file_inode(file)->i_sb)
464 		return vfs_fsync_range(upperfile, start, end, datasync);
465 }
466 
467 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
468 {
469 	struct ovl_file *of = file->private_data;
470 	struct backing_file_ctx ctx = {
471 		.cred = ovl_creds(file_inode(file)->i_sb),
472 		.accessed = ovl_file_accessed,
473 	};
474 
475 	return backing_file_mmap(of->realfile, vma, &ctx);
476 }
477 
478 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
479 {
480 	struct inode *inode = file_inode(file);
481 	struct file *realfile;
482 	int ret;
483 
484 	inode_lock(inode);
485 	/* Update mode */
486 	ovl_copyattr(inode);
487 	ret = file_remove_privs(file);
488 	if (ret)
489 		goto out_unlock;
490 
491 	realfile = ovl_real_file(file);
492 	ret = PTR_ERR(realfile);
493 	if (IS_ERR(realfile))
494 		goto out_unlock;
495 
496 	with_ovl_creds(inode->i_sb)
497 		ret = vfs_fallocate(realfile, mode, offset, len);
498 
499 	/* Update size */
500 	ovl_file_modified(file);
501 
502 out_unlock:
503 	inode_unlock(inode);
504 
505 	return ret;
506 }
507 
508 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
509 {
510 	struct file *realfile;
511 
512 	realfile = ovl_real_file(file);
513 	if (IS_ERR(realfile))
514 		return PTR_ERR(realfile);
515 
516 	with_ovl_creds(file_inode(file)->i_sb)
517 		return vfs_fadvise(realfile, offset, len, advice);
518 }
519 
520 enum ovl_copyop {
521 	OVL_COPY,
522 	OVL_CLONE,
523 	OVL_DEDUPE,
524 };
525 
526 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
527 			    struct file *file_out, loff_t pos_out,
528 			    loff_t len, unsigned int flags, enum ovl_copyop op)
529 {
530 	struct inode *inode_out = file_inode(file_out);
531 	struct file *realfile_in, *realfile_out;
532 	loff_t ret;
533 
534 	inode_lock(inode_out);
535 	if (op != OVL_DEDUPE) {
536 		/* Update mode */
537 		ovl_copyattr(inode_out);
538 		ret = file_remove_privs(file_out);
539 		if (ret)
540 			goto out_unlock;
541 	}
542 
543 	realfile_out = ovl_real_file(file_out);
544 	ret = PTR_ERR(realfile_out);
545 	if (IS_ERR(realfile_out))
546 		goto out_unlock;
547 
548 	realfile_in = ovl_real_file(file_in);
549 	ret = PTR_ERR(realfile_in);
550 	if (IS_ERR(realfile_in))
551 		goto out_unlock;
552 
553 	with_ovl_creds(file_inode(file_out)->i_sb) {
554 		switch (op) {
555 		case OVL_COPY:
556 			ret = vfs_copy_file_range(realfile_in, pos_in,
557 						  realfile_out, pos_out, len, flags);
558 			break;
559 
560 		case OVL_CLONE:
561 			ret = vfs_clone_file_range(realfile_in, pos_in,
562 						   realfile_out, pos_out, len, flags);
563 			break;
564 
565 		case OVL_DEDUPE:
566 			ret = vfs_dedupe_file_range_one(realfile_in, pos_in,
567 							realfile_out, pos_out, len,
568 							flags);
569 			break;
570 		}
571 	}
572 
573 	/* Update size */
574 	ovl_file_modified(file_out);
575 
576 out_unlock:
577 	inode_unlock(inode_out);
578 
579 	return ret;
580 }
581 
582 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
583 				   struct file *file_out, loff_t pos_out,
584 				   size_t len, unsigned int flags)
585 {
586 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
587 			    OVL_COPY);
588 }
589 
590 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
591 				   struct file *file_out, loff_t pos_out,
592 				   loff_t len, unsigned int remap_flags)
593 {
594 	enum ovl_copyop op;
595 
596 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
597 		return -EINVAL;
598 
599 	if (remap_flags & REMAP_FILE_DEDUP)
600 		op = OVL_DEDUPE;
601 	else
602 		op = OVL_CLONE;
603 
604 	/*
605 	 * Don't copy up because of a dedupe request, this wouldn't make sense
606 	 * most of the time (data would be duplicated instead of deduplicated).
607 	 */
608 	if (op == OVL_DEDUPE &&
609 	    (!ovl_inode_upper(file_inode(file_in)) ||
610 	     !ovl_inode_upper(file_inode(file_out))))
611 		return -EPERM;
612 
613 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
614 			    remap_flags, op);
615 }
616 
617 static int ovl_flush(struct file *file, fl_owner_t id)
618 {
619 	struct file *realfile;
620 	int err = 0;
621 
622 	realfile = ovl_real_file(file);
623 	if (IS_ERR(realfile))
624 		return PTR_ERR(realfile);
625 
626 	if (realfile->f_op->flush) {
627 		with_ovl_creds(file_inode(file)->i_sb)
628 			err = realfile->f_op->flush(realfile, id);
629 	}
630 
631 	return err;
632 }
633 
634 const struct file_operations ovl_file_operations = {
635 	.open		= ovl_open,
636 	.release	= ovl_release,
637 	.llseek		= ovl_llseek,
638 	.read_iter	= ovl_read_iter,
639 	.write_iter	= ovl_write_iter,
640 	.fsync		= ovl_fsync,
641 	.mmap		= ovl_mmap,
642 	.fallocate	= ovl_fallocate,
643 	.fadvise	= ovl_fadvise,
644 	.flush		= ovl_flush,
645 	.splice_read    = ovl_splice_read,
646 	.splice_write   = ovl_splice_write,
647 
648 	.copy_file_range	= ovl_copy_file_range,
649 	.remap_file_range	= ovl_remap_file_range,
650 };
651