xref: /linux/fs/overlayfs/file.c (revision 5c2e7736e20d9b348a44cafbfa639fe2653fbc34)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 Red Hat, Inc.
4  */
5 
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/mount.h>
9 #include <linux/xattr.h>
10 #include <linux/uio.h>
11 #include <linux/uaccess.h>
12 #include <linux/security.h>
13 #include <linux/fs.h>
14 #include <linux/backing-file.h>
15 #include "overlayfs.h"
16 
17 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
18 {
19 	if (realinode != ovl_inode_upper(inode))
20 		return 'l';
21 	if (ovl_has_upperdata(inode))
22 		return 'u';
23 	else
24 		return 'm';
25 }
26 
27 static struct file *ovl_open_realfile(const struct file *file,
28 				      const struct path *realpath)
29 {
30 	struct inode *realinode = d_inode(realpath->dentry);
31 	struct inode *inode = file_inode(file);
32 	struct mnt_idmap *real_idmap;
33 	struct file *realfile;
34 	const struct cred *old_cred;
35 	int flags = file->f_flags | OVL_OPEN_FLAGS;
36 	int acc_mode = ACC_MODE(flags);
37 	int err;
38 
39 	if (flags & O_APPEND)
40 		acc_mode |= MAY_APPEND;
41 
42 	old_cred = ovl_override_creds(inode->i_sb);
43 	real_idmap = mnt_idmap(realpath->mnt);
44 	err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
45 	if (err) {
46 		realfile = ERR_PTR(err);
47 	} else {
48 		if (!inode_owner_or_capable(real_idmap, realinode))
49 			flags &= ~O_NOATIME;
50 
51 		realfile = backing_file_open(&file->f_path, flags, realpath,
52 					     current_cred());
53 	}
54 	revert_creds(old_cred);
55 
56 	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
57 		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
58 		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
59 
60 	return realfile;
61 }
62 
63 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
64 
65 static int ovl_change_flags(struct file *file, unsigned int flags)
66 {
67 	struct inode *inode = file_inode(file);
68 	int err;
69 
70 	flags &= OVL_SETFL_MASK;
71 
72 	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
73 		return -EPERM;
74 
75 	if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
76 		return -EINVAL;
77 
78 	if (file->f_op->check_flags) {
79 		err = file->f_op->check_flags(flags);
80 		if (err)
81 			return err;
82 	}
83 
84 	spin_lock(&file->f_lock);
85 	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
86 	file->f_iocb_flags = iocb_flags(file);
87 	spin_unlock(&file->f_lock);
88 
89 	return 0;
90 }
91 
92 static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
93 			       bool allow_meta)
94 {
95 	struct dentry *dentry = file_dentry(file);
96 	struct file *realfile = file->private_data;
97 	struct path realpath;
98 	int err;
99 
100 	real->word = (unsigned long)realfile;
101 
102 	if (allow_meta) {
103 		ovl_path_real(dentry, &realpath);
104 	} else {
105 		/* lazy lookup and verify of lowerdata */
106 		err = ovl_verify_lowerdata(dentry);
107 		if (err)
108 			return err;
109 
110 		ovl_path_realdata(dentry, &realpath);
111 	}
112 	if (!realpath.dentry)
113 		return -EIO;
114 
115 	/* Has it been copied up since we'd opened it? */
116 	if (unlikely(file_inode(realfile) != d_inode(realpath.dentry))) {
117 		struct file *f = ovl_open_realfile(file, &realpath);
118 		if (IS_ERR(f))
119 			return PTR_ERR(f);
120 		real->word = (unsigned long)f | FDPUT_FPUT;
121 		return 0;
122 	}
123 
124 	/* Did the flags change since open? */
125 	if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS))
126 		return ovl_change_flags(realfile, file->f_flags);
127 
128 	return 0;
129 }
130 
131 static int ovl_real_fdget(const struct file *file, struct fd *real)
132 {
133 	if (d_is_dir(file_dentry(file))) {
134 		struct file *f = ovl_dir_real_file(file, false);
135 		if (IS_ERR(f))
136 			return PTR_ERR(f);
137 		real->word = (unsigned long)f;
138 		return 0;
139 	}
140 
141 	return ovl_real_fdget_meta(file, real, false);
142 }
143 
144 static int ovl_open(struct inode *inode, struct file *file)
145 {
146 	struct dentry *dentry = file_dentry(file);
147 	struct file *realfile;
148 	struct path realpath;
149 	int err;
150 
151 	/* lazy lookup and verify lowerdata */
152 	err = ovl_verify_lowerdata(dentry);
153 	if (err)
154 		return err;
155 
156 	err = ovl_maybe_copy_up(dentry, file->f_flags);
157 	if (err)
158 		return err;
159 
160 	/* No longer need these flags, so don't pass them on to underlying fs */
161 	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
162 
163 	ovl_path_realdata(dentry, &realpath);
164 	if (!realpath.dentry)
165 		return -EIO;
166 
167 	realfile = ovl_open_realfile(file, &realpath);
168 	if (IS_ERR(realfile))
169 		return PTR_ERR(realfile);
170 
171 	file->private_data = realfile;
172 
173 	return 0;
174 }
175 
176 static int ovl_release(struct inode *inode, struct file *file)
177 {
178 	fput(file->private_data);
179 
180 	return 0;
181 }
182 
183 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
184 {
185 	struct inode *inode = file_inode(file);
186 	struct fd real;
187 	const struct cred *old_cred;
188 	loff_t ret;
189 
190 	/*
191 	 * The two special cases below do not need to involve real fs,
192 	 * so we can optimizing concurrent callers.
193 	 */
194 	if (offset == 0) {
195 		if (whence == SEEK_CUR)
196 			return file->f_pos;
197 
198 		if (whence == SEEK_SET)
199 			return vfs_setpos(file, 0, 0);
200 	}
201 
202 	ret = ovl_real_fdget(file, &real);
203 	if (ret)
204 		return ret;
205 
206 	/*
207 	 * Overlay file f_pos is the master copy that is preserved
208 	 * through copy up and modified on read/write, but only real
209 	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
210 	 * limitations that are more strict than ->s_maxbytes for specific
211 	 * files, so we use the real file to perform seeks.
212 	 */
213 	ovl_inode_lock(inode);
214 	fd_file(real)->f_pos = file->f_pos;
215 
216 	old_cred = ovl_override_creds(inode->i_sb);
217 	ret = vfs_llseek(fd_file(real), offset, whence);
218 	revert_creds(old_cred);
219 
220 	file->f_pos = fd_file(real)->f_pos;
221 	ovl_inode_unlock(inode);
222 
223 	fdput(real);
224 
225 	return ret;
226 }
227 
228 static void ovl_file_modified(struct file *file)
229 {
230 	/* Update size/mtime */
231 	ovl_copyattr(file_inode(file));
232 }
233 
234 static void ovl_file_accessed(struct file *file)
235 {
236 	struct inode *inode, *upperinode;
237 	struct timespec64 ctime, uctime;
238 	struct timespec64 mtime, umtime;
239 
240 	if (file->f_flags & O_NOATIME)
241 		return;
242 
243 	inode = file_inode(file);
244 	upperinode = ovl_inode_upper(inode);
245 
246 	if (!upperinode)
247 		return;
248 
249 	ctime = inode_get_ctime(inode);
250 	uctime = inode_get_ctime(upperinode);
251 	mtime = inode_get_mtime(inode);
252 	umtime = inode_get_mtime(upperinode);
253 	if ((!timespec64_equal(&mtime, &umtime)) ||
254 	     !timespec64_equal(&ctime, &uctime)) {
255 		inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode));
256 		inode_set_ctime_to_ts(inode, uctime);
257 	}
258 
259 	touch_atime(&file->f_path);
260 }
261 
262 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
263 {
264 	struct file *file = iocb->ki_filp;
265 	struct fd real;
266 	ssize_t ret;
267 	struct backing_file_ctx ctx = {
268 		.cred = ovl_creds(file_inode(file)->i_sb),
269 		.user_file = file,
270 		.accessed = ovl_file_accessed,
271 	};
272 
273 	if (!iov_iter_count(iter))
274 		return 0;
275 
276 	ret = ovl_real_fdget(file, &real);
277 	if (ret)
278 		return ret;
279 
280 	ret = backing_file_read_iter(fd_file(real), iter, iocb, iocb->ki_flags,
281 				     &ctx);
282 	fdput(real);
283 
284 	return ret;
285 }
286 
287 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
288 {
289 	struct file *file = iocb->ki_filp;
290 	struct inode *inode = file_inode(file);
291 	struct fd real;
292 	ssize_t ret;
293 	int ifl = iocb->ki_flags;
294 	struct backing_file_ctx ctx = {
295 		.cred = ovl_creds(inode->i_sb),
296 		.user_file = file,
297 		.end_write = ovl_file_modified,
298 	};
299 
300 	if (!iov_iter_count(iter))
301 		return 0;
302 
303 	inode_lock(inode);
304 	/* Update mode */
305 	ovl_copyattr(inode);
306 
307 	ret = ovl_real_fdget(file, &real);
308 	if (ret)
309 		goto out_unlock;
310 
311 	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
312 		ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
313 
314 	/*
315 	 * Overlayfs doesn't support deferred completions, don't copy
316 	 * this property in case it is set by the issuer.
317 	 */
318 	ifl &= ~IOCB_DIO_CALLER_COMP;
319 	ret = backing_file_write_iter(fd_file(real), iter, iocb, ifl, &ctx);
320 	fdput(real);
321 
322 out_unlock:
323 	inode_unlock(inode);
324 
325 	return ret;
326 }
327 
328 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
329 			       struct pipe_inode_info *pipe, size_t len,
330 			       unsigned int flags)
331 {
332 	struct fd real;
333 	ssize_t ret;
334 	struct backing_file_ctx ctx = {
335 		.cred = ovl_creds(file_inode(in)->i_sb),
336 		.user_file = in,
337 		.accessed = ovl_file_accessed,
338 	};
339 
340 	ret = ovl_real_fdget(in, &real);
341 	if (ret)
342 		return ret;
343 
344 	ret = backing_file_splice_read(fd_file(real), ppos, pipe, len, flags, &ctx);
345 	fdput(real);
346 
347 	return ret;
348 }
349 
350 /*
351  * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
352  * due to lock order inversion between pipe->mutex in iter_file_splice_write()
353  * and file_start_write(fd_file(real)) in ovl_write_iter().
354  *
355  * So do everything ovl_write_iter() does and call iter_file_splice_write() on
356  * the real file.
357  */
358 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
359 				loff_t *ppos, size_t len, unsigned int flags)
360 {
361 	struct fd real;
362 	struct inode *inode = file_inode(out);
363 	ssize_t ret;
364 	struct backing_file_ctx ctx = {
365 		.cred = ovl_creds(inode->i_sb),
366 		.user_file = out,
367 		.end_write = ovl_file_modified,
368 	};
369 
370 	inode_lock(inode);
371 	/* Update mode */
372 	ovl_copyattr(inode);
373 
374 	ret = ovl_real_fdget(out, &real);
375 	if (ret)
376 		goto out_unlock;
377 
378 	ret = backing_file_splice_write(pipe, fd_file(real), ppos, len, flags, &ctx);
379 	fdput(real);
380 
381 out_unlock:
382 	inode_unlock(inode);
383 
384 	return ret;
385 }
386 
387 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
388 {
389 	struct fd real;
390 	const struct cred *old_cred;
391 	int ret;
392 
393 	ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
394 	if (ret <= 0)
395 		return ret;
396 
397 	ret = ovl_real_fdget_meta(file, &real, !datasync);
398 	if (ret)
399 		return ret;
400 
401 	/* Don't sync lower file for fear of receiving EROFS error */
402 	if (file_inode(fd_file(real)) == ovl_inode_upper(file_inode(file))) {
403 		old_cred = ovl_override_creds(file_inode(file)->i_sb);
404 		ret = vfs_fsync_range(fd_file(real), start, end, datasync);
405 		revert_creds(old_cred);
406 	}
407 
408 	fdput(real);
409 
410 	return ret;
411 }
412 
413 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
414 {
415 	struct file *realfile = file->private_data;
416 	struct backing_file_ctx ctx = {
417 		.cred = ovl_creds(file_inode(file)->i_sb),
418 		.user_file = file,
419 		.accessed = ovl_file_accessed,
420 	};
421 
422 	return backing_file_mmap(realfile, vma, &ctx);
423 }
424 
425 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
426 {
427 	struct inode *inode = file_inode(file);
428 	struct fd real;
429 	const struct cred *old_cred;
430 	int ret;
431 
432 	inode_lock(inode);
433 	/* Update mode */
434 	ovl_copyattr(inode);
435 	ret = file_remove_privs(file);
436 	if (ret)
437 		goto out_unlock;
438 
439 	ret = ovl_real_fdget(file, &real);
440 	if (ret)
441 		goto out_unlock;
442 
443 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
444 	ret = vfs_fallocate(fd_file(real), mode, offset, len);
445 	revert_creds(old_cred);
446 
447 	/* Update size */
448 	ovl_file_modified(file);
449 
450 	fdput(real);
451 
452 out_unlock:
453 	inode_unlock(inode);
454 
455 	return ret;
456 }
457 
458 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
459 {
460 	struct fd real;
461 	const struct cred *old_cred;
462 	int ret;
463 
464 	ret = ovl_real_fdget(file, &real);
465 	if (ret)
466 		return ret;
467 
468 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
469 	ret = vfs_fadvise(fd_file(real), offset, len, advice);
470 	revert_creds(old_cred);
471 
472 	fdput(real);
473 
474 	return ret;
475 }
476 
477 enum ovl_copyop {
478 	OVL_COPY,
479 	OVL_CLONE,
480 	OVL_DEDUPE,
481 };
482 
483 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
484 			    struct file *file_out, loff_t pos_out,
485 			    loff_t len, unsigned int flags, enum ovl_copyop op)
486 {
487 	struct inode *inode_out = file_inode(file_out);
488 	struct fd real_in, real_out;
489 	const struct cred *old_cred;
490 	loff_t ret;
491 
492 	inode_lock(inode_out);
493 	if (op != OVL_DEDUPE) {
494 		/* Update mode */
495 		ovl_copyattr(inode_out);
496 		ret = file_remove_privs(file_out);
497 		if (ret)
498 			goto out_unlock;
499 	}
500 
501 	ret = ovl_real_fdget(file_out, &real_out);
502 	if (ret)
503 		goto out_unlock;
504 
505 	ret = ovl_real_fdget(file_in, &real_in);
506 	if (ret) {
507 		fdput(real_out);
508 		goto out_unlock;
509 	}
510 
511 	old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
512 	switch (op) {
513 	case OVL_COPY:
514 		ret = vfs_copy_file_range(fd_file(real_in), pos_in,
515 					  fd_file(real_out), pos_out, len, flags);
516 		break;
517 
518 	case OVL_CLONE:
519 		ret = vfs_clone_file_range(fd_file(real_in), pos_in,
520 					   fd_file(real_out), pos_out, len, flags);
521 		break;
522 
523 	case OVL_DEDUPE:
524 		ret = vfs_dedupe_file_range_one(fd_file(real_in), pos_in,
525 						fd_file(real_out), pos_out, len,
526 						flags);
527 		break;
528 	}
529 	revert_creds(old_cred);
530 
531 	/* Update size */
532 	ovl_file_modified(file_out);
533 
534 	fdput(real_in);
535 	fdput(real_out);
536 
537 out_unlock:
538 	inode_unlock(inode_out);
539 
540 	return ret;
541 }
542 
543 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
544 				   struct file *file_out, loff_t pos_out,
545 				   size_t len, unsigned int flags)
546 {
547 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
548 			    OVL_COPY);
549 }
550 
551 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
552 				   struct file *file_out, loff_t pos_out,
553 				   loff_t len, unsigned int remap_flags)
554 {
555 	enum ovl_copyop op;
556 
557 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
558 		return -EINVAL;
559 
560 	if (remap_flags & REMAP_FILE_DEDUP)
561 		op = OVL_DEDUPE;
562 	else
563 		op = OVL_CLONE;
564 
565 	/*
566 	 * Don't copy up because of a dedupe request, this wouldn't make sense
567 	 * most of the time (data would be duplicated instead of deduplicated).
568 	 */
569 	if (op == OVL_DEDUPE &&
570 	    (!ovl_inode_upper(file_inode(file_in)) ||
571 	     !ovl_inode_upper(file_inode(file_out))))
572 		return -EPERM;
573 
574 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
575 			    remap_flags, op);
576 }
577 
578 static int ovl_flush(struct file *file, fl_owner_t id)
579 {
580 	struct fd real;
581 	const struct cred *old_cred;
582 	int err;
583 
584 	err = ovl_real_fdget(file, &real);
585 	if (err)
586 		return err;
587 
588 	if (fd_file(real)->f_op->flush) {
589 		old_cred = ovl_override_creds(file_inode(file)->i_sb);
590 		err = fd_file(real)->f_op->flush(fd_file(real), id);
591 		revert_creds(old_cred);
592 	}
593 	fdput(real);
594 
595 	return err;
596 }
597 
598 const struct file_operations ovl_file_operations = {
599 	.open		= ovl_open,
600 	.release	= ovl_release,
601 	.llseek		= ovl_llseek,
602 	.read_iter	= ovl_read_iter,
603 	.write_iter	= ovl_write_iter,
604 	.fsync		= ovl_fsync,
605 	.mmap		= ovl_mmap,
606 	.fallocate	= ovl_fallocate,
607 	.fadvise	= ovl_fadvise,
608 	.flush		= ovl_flush,
609 	.splice_read    = ovl_splice_read,
610 	.splice_write   = ovl_splice_write,
611 
612 	.copy_file_range	= ovl_copy_file_range,
613 	.remap_file_range	= ovl_remap_file_range,
614 };
615