xref: /linux/fs/overlayfs/file.c (revision 6e7fd890f1d6ac83805409e9c346240de2705584)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 Red Hat, Inc.
4  */
5 
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/mount.h>
9 #include <linux/xattr.h>
10 #include <linux/uio.h>
11 #include <linux/uaccess.h>
12 #include <linux/security.h>
13 #include <linux/fs.h>
14 #include <linux/backing-file.h>
15 #include "overlayfs.h"
16 
17 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
18 {
19 	if (realinode != ovl_inode_upper(inode))
20 		return 'l';
21 	if (ovl_has_upperdata(inode))
22 		return 'u';
23 	else
24 		return 'm';
25 }
26 
27 static struct file *ovl_open_realfile(const struct file *file,
28 				      const struct path *realpath)
29 {
30 	struct inode *realinode = d_inode(realpath->dentry);
31 	struct inode *inode = file_inode(file);
32 	struct mnt_idmap *real_idmap;
33 	struct file *realfile;
34 	const struct cred *old_cred;
35 	int flags = file->f_flags | OVL_OPEN_FLAGS;
36 	int acc_mode = ACC_MODE(flags);
37 	int err;
38 
39 	if (flags & O_APPEND)
40 		acc_mode |= MAY_APPEND;
41 
42 	old_cred = ovl_override_creds(inode->i_sb);
43 	real_idmap = mnt_idmap(realpath->mnt);
44 	err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
45 	if (err) {
46 		realfile = ERR_PTR(err);
47 	} else {
48 		if (!inode_owner_or_capable(real_idmap, realinode))
49 			flags &= ~O_NOATIME;
50 
51 		realfile = backing_file_open(&file->f_path, flags, realpath,
52 					     current_cred());
53 	}
54 	revert_creds(old_cred);
55 
56 	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
57 		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
58 		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
59 
60 	return realfile;
61 }
62 
63 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
64 
65 static int ovl_change_flags(struct file *file, unsigned int flags)
66 {
67 	struct inode *inode = file_inode(file);
68 	int err;
69 
70 	flags &= OVL_SETFL_MASK;
71 
72 	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
73 		return -EPERM;
74 
75 	if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
76 		return -EINVAL;
77 
78 	if (file->f_op->check_flags) {
79 		err = file->f_op->check_flags(flags);
80 		if (err)
81 			return err;
82 	}
83 
84 	spin_lock(&file->f_lock);
85 	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
86 	file->f_iocb_flags = iocb_flags(file);
87 	spin_unlock(&file->f_lock);
88 
89 	return 0;
90 }
91 
92 static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
93 			       bool allow_meta)
94 {
95 	struct dentry *dentry = file_dentry(file);
96 	struct path realpath;
97 	int err;
98 
99 	real->flags = 0;
100 	real->file = file->private_data;
101 
102 	if (allow_meta) {
103 		ovl_path_real(dentry, &realpath);
104 	} else {
105 		/* lazy lookup and verify of lowerdata */
106 		err = ovl_verify_lowerdata(dentry);
107 		if (err)
108 			return err;
109 
110 		ovl_path_realdata(dentry, &realpath);
111 	}
112 	if (!realpath.dentry)
113 		return -EIO;
114 
115 	/* Has it been copied up since we'd opened it? */
116 	if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
117 		real->flags = FDPUT_FPUT;
118 		real->file = ovl_open_realfile(file, &realpath);
119 
120 		return PTR_ERR_OR_ZERO(real->file);
121 	}
122 
123 	/* Did the flags change since open? */
124 	if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
125 		return ovl_change_flags(real->file, file->f_flags);
126 
127 	return 0;
128 }
129 
130 static int ovl_real_fdget(const struct file *file, struct fd *real)
131 {
132 	if (d_is_dir(file_dentry(file))) {
133 		real->flags = 0;
134 		real->file = ovl_dir_real_file(file, false);
135 
136 		return PTR_ERR_OR_ZERO(real->file);
137 	}
138 
139 	return ovl_real_fdget_meta(file, real, false);
140 }
141 
142 static int ovl_open(struct inode *inode, struct file *file)
143 {
144 	struct dentry *dentry = file_dentry(file);
145 	struct file *realfile;
146 	struct path realpath;
147 	int err;
148 
149 	/* lazy lookup and verify lowerdata */
150 	err = ovl_verify_lowerdata(dentry);
151 	if (err)
152 		return err;
153 
154 	err = ovl_maybe_copy_up(dentry, file->f_flags);
155 	if (err)
156 		return err;
157 
158 	/* No longer need these flags, so don't pass them on to underlying fs */
159 	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
160 
161 	ovl_path_realdata(dentry, &realpath);
162 	if (!realpath.dentry)
163 		return -EIO;
164 
165 	realfile = ovl_open_realfile(file, &realpath);
166 	if (IS_ERR(realfile))
167 		return PTR_ERR(realfile);
168 
169 	file->private_data = realfile;
170 
171 	return 0;
172 }
173 
174 static int ovl_release(struct inode *inode, struct file *file)
175 {
176 	fput(file->private_data);
177 
178 	return 0;
179 }
180 
181 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
182 {
183 	struct inode *inode = file_inode(file);
184 	struct fd real;
185 	const struct cred *old_cred;
186 	loff_t ret;
187 
188 	/*
189 	 * The two special cases below do not need to involve real fs,
190 	 * so we can optimizing concurrent callers.
191 	 */
192 	if (offset == 0) {
193 		if (whence == SEEK_CUR)
194 			return file->f_pos;
195 
196 		if (whence == SEEK_SET)
197 			return vfs_setpos(file, 0, 0);
198 	}
199 
200 	ret = ovl_real_fdget(file, &real);
201 	if (ret)
202 		return ret;
203 
204 	/*
205 	 * Overlay file f_pos is the master copy that is preserved
206 	 * through copy up and modified on read/write, but only real
207 	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
208 	 * limitations that are more strict than ->s_maxbytes for specific
209 	 * files, so we use the real file to perform seeks.
210 	 */
211 	ovl_inode_lock(inode);
212 	real.file->f_pos = file->f_pos;
213 
214 	old_cred = ovl_override_creds(inode->i_sb);
215 	ret = vfs_llseek(real.file, offset, whence);
216 	revert_creds(old_cred);
217 
218 	file->f_pos = real.file->f_pos;
219 	ovl_inode_unlock(inode);
220 
221 	fdput(real);
222 
223 	return ret;
224 }
225 
226 static void ovl_file_modified(struct file *file)
227 {
228 	/* Update size/mtime */
229 	ovl_copyattr(file_inode(file));
230 }
231 
232 static void ovl_file_accessed(struct file *file)
233 {
234 	struct inode *inode, *upperinode;
235 	struct timespec64 ctime, uctime;
236 	struct timespec64 mtime, umtime;
237 
238 	if (file->f_flags & O_NOATIME)
239 		return;
240 
241 	inode = file_inode(file);
242 	upperinode = ovl_inode_upper(inode);
243 
244 	if (!upperinode)
245 		return;
246 
247 	ctime = inode_get_ctime(inode);
248 	uctime = inode_get_ctime(upperinode);
249 	mtime = inode_get_mtime(inode);
250 	umtime = inode_get_mtime(upperinode);
251 	if ((!timespec64_equal(&mtime, &umtime)) ||
252 	     !timespec64_equal(&ctime, &uctime)) {
253 		inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode));
254 		inode_set_ctime_to_ts(inode, uctime);
255 	}
256 
257 	touch_atime(&file->f_path);
258 }
259 
260 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
261 {
262 	struct file *file = iocb->ki_filp;
263 	struct fd real;
264 	ssize_t ret;
265 	struct backing_file_ctx ctx = {
266 		.cred = ovl_creds(file_inode(file)->i_sb),
267 		.user_file = file,
268 		.accessed = ovl_file_accessed,
269 	};
270 
271 	if (!iov_iter_count(iter))
272 		return 0;
273 
274 	ret = ovl_real_fdget(file, &real);
275 	if (ret)
276 		return ret;
277 
278 	ret = backing_file_read_iter(real.file, iter, iocb, iocb->ki_flags,
279 				     &ctx);
280 	fdput(real);
281 
282 	return ret;
283 }
284 
285 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
286 {
287 	struct file *file = iocb->ki_filp;
288 	struct inode *inode = file_inode(file);
289 	struct fd real;
290 	ssize_t ret;
291 	int ifl = iocb->ki_flags;
292 	struct backing_file_ctx ctx = {
293 		.cred = ovl_creds(inode->i_sb),
294 		.user_file = file,
295 		.end_write = ovl_file_modified,
296 	};
297 
298 	if (!iov_iter_count(iter))
299 		return 0;
300 
301 	inode_lock(inode);
302 	/* Update mode */
303 	ovl_copyattr(inode);
304 
305 	ret = ovl_real_fdget(file, &real);
306 	if (ret)
307 		goto out_unlock;
308 
309 	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
310 		ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
311 
312 	/*
313 	 * Overlayfs doesn't support deferred completions, don't copy
314 	 * this property in case it is set by the issuer.
315 	 */
316 	ifl &= ~IOCB_DIO_CALLER_COMP;
317 	ret = backing_file_write_iter(real.file, iter, iocb, ifl, &ctx);
318 	fdput(real);
319 
320 out_unlock:
321 	inode_unlock(inode);
322 
323 	return ret;
324 }
325 
326 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
327 			       struct pipe_inode_info *pipe, size_t len,
328 			       unsigned int flags)
329 {
330 	struct fd real;
331 	ssize_t ret;
332 	struct backing_file_ctx ctx = {
333 		.cred = ovl_creds(file_inode(in)->i_sb),
334 		.user_file = in,
335 		.accessed = ovl_file_accessed,
336 	};
337 
338 	ret = ovl_real_fdget(in, &real);
339 	if (ret)
340 		return ret;
341 
342 	ret = backing_file_splice_read(real.file, ppos, pipe, len, flags, &ctx);
343 	fdput(real);
344 
345 	return ret;
346 }
347 
348 /*
349  * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
350  * due to lock order inversion between pipe->mutex in iter_file_splice_write()
351  * and file_start_write(real.file) in ovl_write_iter().
352  *
353  * So do everything ovl_write_iter() does and call iter_file_splice_write() on
354  * the real file.
355  */
356 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
357 				loff_t *ppos, size_t len, unsigned int flags)
358 {
359 	struct fd real;
360 	struct inode *inode = file_inode(out);
361 	ssize_t ret;
362 	struct backing_file_ctx ctx = {
363 		.cred = ovl_creds(inode->i_sb),
364 		.user_file = out,
365 		.end_write = ovl_file_modified,
366 	};
367 
368 	inode_lock(inode);
369 	/* Update mode */
370 	ovl_copyattr(inode);
371 
372 	ret = ovl_real_fdget(out, &real);
373 	if (ret)
374 		goto out_unlock;
375 
376 	ret = backing_file_splice_write(pipe, real.file, ppos, len, flags, &ctx);
377 	fdput(real);
378 
379 out_unlock:
380 	inode_unlock(inode);
381 
382 	return ret;
383 }
384 
385 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
386 {
387 	struct fd real;
388 	const struct cred *old_cred;
389 	int ret;
390 
391 	ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
392 	if (ret <= 0)
393 		return ret;
394 
395 	ret = ovl_real_fdget_meta(file, &real, !datasync);
396 	if (ret)
397 		return ret;
398 
399 	/* Don't sync lower file for fear of receiving EROFS error */
400 	if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
401 		old_cred = ovl_override_creds(file_inode(file)->i_sb);
402 		ret = vfs_fsync_range(real.file, start, end, datasync);
403 		revert_creds(old_cred);
404 	}
405 
406 	fdput(real);
407 
408 	return ret;
409 }
410 
411 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
412 {
413 	struct file *realfile = file->private_data;
414 	struct backing_file_ctx ctx = {
415 		.cred = ovl_creds(file_inode(file)->i_sb),
416 		.user_file = file,
417 		.accessed = ovl_file_accessed,
418 	};
419 
420 	return backing_file_mmap(realfile, vma, &ctx);
421 }
422 
423 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
424 {
425 	struct inode *inode = file_inode(file);
426 	struct fd real;
427 	const struct cred *old_cred;
428 	int ret;
429 
430 	inode_lock(inode);
431 	/* Update mode */
432 	ovl_copyattr(inode);
433 	ret = file_remove_privs(file);
434 	if (ret)
435 		goto out_unlock;
436 
437 	ret = ovl_real_fdget(file, &real);
438 	if (ret)
439 		goto out_unlock;
440 
441 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
442 	ret = vfs_fallocate(real.file, mode, offset, len);
443 	revert_creds(old_cred);
444 
445 	/* Update size */
446 	ovl_file_modified(file);
447 
448 	fdput(real);
449 
450 out_unlock:
451 	inode_unlock(inode);
452 
453 	return ret;
454 }
455 
456 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
457 {
458 	struct fd real;
459 	const struct cred *old_cred;
460 	int ret;
461 
462 	ret = ovl_real_fdget(file, &real);
463 	if (ret)
464 		return ret;
465 
466 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
467 	ret = vfs_fadvise(real.file, offset, len, advice);
468 	revert_creds(old_cred);
469 
470 	fdput(real);
471 
472 	return ret;
473 }
474 
475 enum ovl_copyop {
476 	OVL_COPY,
477 	OVL_CLONE,
478 	OVL_DEDUPE,
479 };
480 
481 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
482 			    struct file *file_out, loff_t pos_out,
483 			    loff_t len, unsigned int flags, enum ovl_copyop op)
484 {
485 	struct inode *inode_out = file_inode(file_out);
486 	struct fd real_in, real_out;
487 	const struct cred *old_cred;
488 	loff_t ret;
489 
490 	inode_lock(inode_out);
491 	if (op != OVL_DEDUPE) {
492 		/* Update mode */
493 		ovl_copyattr(inode_out);
494 		ret = file_remove_privs(file_out);
495 		if (ret)
496 			goto out_unlock;
497 	}
498 
499 	ret = ovl_real_fdget(file_out, &real_out);
500 	if (ret)
501 		goto out_unlock;
502 
503 	ret = ovl_real_fdget(file_in, &real_in);
504 	if (ret) {
505 		fdput(real_out);
506 		goto out_unlock;
507 	}
508 
509 	old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
510 	switch (op) {
511 	case OVL_COPY:
512 		ret = vfs_copy_file_range(real_in.file, pos_in,
513 					  real_out.file, pos_out, len, flags);
514 		break;
515 
516 	case OVL_CLONE:
517 		ret = vfs_clone_file_range(real_in.file, pos_in,
518 					   real_out.file, pos_out, len, flags);
519 		break;
520 
521 	case OVL_DEDUPE:
522 		ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
523 						real_out.file, pos_out, len,
524 						flags);
525 		break;
526 	}
527 	revert_creds(old_cred);
528 
529 	/* Update size */
530 	ovl_file_modified(file_out);
531 
532 	fdput(real_in);
533 	fdput(real_out);
534 
535 out_unlock:
536 	inode_unlock(inode_out);
537 
538 	return ret;
539 }
540 
541 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
542 				   struct file *file_out, loff_t pos_out,
543 				   size_t len, unsigned int flags)
544 {
545 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
546 			    OVL_COPY);
547 }
548 
549 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
550 				   struct file *file_out, loff_t pos_out,
551 				   loff_t len, unsigned int remap_flags)
552 {
553 	enum ovl_copyop op;
554 
555 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
556 		return -EINVAL;
557 
558 	if (remap_flags & REMAP_FILE_DEDUP)
559 		op = OVL_DEDUPE;
560 	else
561 		op = OVL_CLONE;
562 
563 	/*
564 	 * Don't copy up because of a dedupe request, this wouldn't make sense
565 	 * most of the time (data would be duplicated instead of deduplicated).
566 	 */
567 	if (op == OVL_DEDUPE &&
568 	    (!ovl_inode_upper(file_inode(file_in)) ||
569 	     !ovl_inode_upper(file_inode(file_out))))
570 		return -EPERM;
571 
572 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
573 			    remap_flags, op);
574 }
575 
576 static int ovl_flush(struct file *file, fl_owner_t id)
577 {
578 	struct fd real;
579 	const struct cred *old_cred;
580 	int err;
581 
582 	err = ovl_real_fdget(file, &real);
583 	if (err)
584 		return err;
585 
586 	if (real.file->f_op->flush) {
587 		old_cred = ovl_override_creds(file_inode(file)->i_sb);
588 		err = real.file->f_op->flush(real.file, id);
589 		revert_creds(old_cred);
590 	}
591 	fdput(real);
592 
593 	return err;
594 }
595 
596 const struct file_operations ovl_file_operations = {
597 	.open		= ovl_open,
598 	.release	= ovl_release,
599 	.llseek		= ovl_llseek,
600 	.read_iter	= ovl_read_iter,
601 	.write_iter	= ovl_write_iter,
602 	.fsync		= ovl_fsync,
603 	.mmap		= ovl_mmap,
604 	.fallocate	= ovl_fallocate,
605 	.fadvise	= ovl_fadvise,
606 	.flush		= ovl_flush,
607 	.splice_read    = ovl_splice_read,
608 	.splice_write   = ovl_splice_write,
609 
610 	.copy_file_range	= ovl_copy_file_range,
611 	.remap_file_range	= ovl_remap_file_range,
612 };
613