xref: /linux/fs/overlayfs/file.c (revision 3027ce13e04eee76539ca65c2cb1028a01c8c508)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 Red Hat, Inc.
4  */
5 
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/mount.h>
9 #include <linux/xattr.h>
10 #include <linux/uio.h>
11 #include <linux/uaccess.h>
12 #include <linux/security.h>
13 #include <linux/fs.h>
14 #include <linux/backing-file.h>
15 #include "overlayfs.h"
16 
17 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
18 {
19 	if (realinode != ovl_inode_upper(inode))
20 		return 'l';
21 	if (ovl_has_upperdata(inode))
22 		return 'u';
23 	else
24 		return 'm';
25 }
26 
27 /* No atime modification on underlying */
28 #define OVL_OPEN_FLAGS (O_NOATIME)
29 
30 static struct file *ovl_open_realfile(const struct file *file,
31 				      const struct path *realpath)
32 {
33 	struct inode *realinode = d_inode(realpath->dentry);
34 	struct inode *inode = file_inode(file);
35 	struct mnt_idmap *real_idmap;
36 	struct file *realfile;
37 	const struct cred *old_cred;
38 	int flags = file->f_flags | OVL_OPEN_FLAGS;
39 	int acc_mode = ACC_MODE(flags);
40 	int err;
41 
42 	if (flags & O_APPEND)
43 		acc_mode |= MAY_APPEND;
44 
45 	old_cred = ovl_override_creds(inode->i_sb);
46 	real_idmap = mnt_idmap(realpath->mnt);
47 	err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
48 	if (err) {
49 		realfile = ERR_PTR(err);
50 	} else {
51 		if (!inode_owner_or_capable(real_idmap, realinode))
52 			flags &= ~O_NOATIME;
53 
54 		realfile = backing_file_open(&file->f_path, flags, realpath,
55 					     current_cred());
56 	}
57 	revert_creds(old_cred);
58 
59 	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
60 		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
61 		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
62 
63 	return realfile;
64 }
65 
66 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
67 
68 static int ovl_change_flags(struct file *file, unsigned int flags)
69 {
70 	struct inode *inode = file_inode(file);
71 	int err;
72 
73 	flags &= OVL_SETFL_MASK;
74 
75 	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
76 		return -EPERM;
77 
78 	if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
79 		return -EINVAL;
80 
81 	if (file->f_op->check_flags) {
82 		err = file->f_op->check_flags(flags);
83 		if (err)
84 			return err;
85 	}
86 
87 	spin_lock(&file->f_lock);
88 	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
89 	file->f_iocb_flags = iocb_flags(file);
90 	spin_unlock(&file->f_lock);
91 
92 	return 0;
93 }
94 
95 static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
96 			       bool allow_meta)
97 {
98 	struct dentry *dentry = file_dentry(file);
99 	struct path realpath;
100 	int err;
101 
102 	real->flags = 0;
103 	real->file = file->private_data;
104 
105 	if (allow_meta) {
106 		ovl_path_real(dentry, &realpath);
107 	} else {
108 		/* lazy lookup and verify of lowerdata */
109 		err = ovl_verify_lowerdata(dentry);
110 		if (err)
111 			return err;
112 
113 		ovl_path_realdata(dentry, &realpath);
114 	}
115 	if (!realpath.dentry)
116 		return -EIO;
117 
118 	/* Has it been copied up since we'd opened it? */
119 	if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
120 		real->flags = FDPUT_FPUT;
121 		real->file = ovl_open_realfile(file, &realpath);
122 
123 		return PTR_ERR_OR_ZERO(real->file);
124 	}
125 
126 	/* Did the flags change since open? */
127 	if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
128 		return ovl_change_flags(real->file, file->f_flags);
129 
130 	return 0;
131 }
132 
133 static int ovl_real_fdget(const struct file *file, struct fd *real)
134 {
135 	if (d_is_dir(file_dentry(file))) {
136 		real->flags = 0;
137 		real->file = ovl_dir_real_file(file, false);
138 
139 		return PTR_ERR_OR_ZERO(real->file);
140 	}
141 
142 	return ovl_real_fdget_meta(file, real, false);
143 }
144 
145 static int ovl_open(struct inode *inode, struct file *file)
146 {
147 	struct dentry *dentry = file_dentry(file);
148 	struct file *realfile;
149 	struct path realpath;
150 	int err;
151 
152 	/* lazy lookup and verify lowerdata */
153 	err = ovl_verify_lowerdata(dentry);
154 	if (err)
155 		return err;
156 
157 	err = ovl_maybe_copy_up(dentry, file->f_flags);
158 	if (err)
159 		return err;
160 
161 	/* No longer need these flags, so don't pass them on to underlying fs */
162 	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
163 
164 	ovl_path_realdata(dentry, &realpath);
165 	if (!realpath.dentry)
166 		return -EIO;
167 
168 	realfile = ovl_open_realfile(file, &realpath);
169 	if (IS_ERR(realfile))
170 		return PTR_ERR(realfile);
171 
172 	file->private_data = realfile;
173 
174 	return 0;
175 }
176 
177 static int ovl_release(struct inode *inode, struct file *file)
178 {
179 	fput(file->private_data);
180 
181 	return 0;
182 }
183 
184 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
185 {
186 	struct inode *inode = file_inode(file);
187 	struct fd real;
188 	const struct cred *old_cred;
189 	loff_t ret;
190 
191 	/*
192 	 * The two special cases below do not need to involve real fs,
193 	 * so we can optimizing concurrent callers.
194 	 */
195 	if (offset == 0) {
196 		if (whence == SEEK_CUR)
197 			return file->f_pos;
198 
199 		if (whence == SEEK_SET)
200 			return vfs_setpos(file, 0, 0);
201 	}
202 
203 	ret = ovl_real_fdget(file, &real);
204 	if (ret)
205 		return ret;
206 
207 	/*
208 	 * Overlay file f_pos is the master copy that is preserved
209 	 * through copy up and modified on read/write, but only real
210 	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
211 	 * limitations that are more strict than ->s_maxbytes for specific
212 	 * files, so we use the real file to perform seeks.
213 	 */
214 	ovl_inode_lock(inode);
215 	real.file->f_pos = file->f_pos;
216 
217 	old_cred = ovl_override_creds(inode->i_sb);
218 	ret = vfs_llseek(real.file, offset, whence);
219 	revert_creds(old_cred);
220 
221 	file->f_pos = real.file->f_pos;
222 	ovl_inode_unlock(inode);
223 
224 	fdput(real);
225 
226 	return ret;
227 }
228 
229 static void ovl_file_modified(struct file *file)
230 {
231 	/* Update size/mtime */
232 	ovl_copyattr(file_inode(file));
233 }
234 
235 static void ovl_file_accessed(struct file *file)
236 {
237 	struct inode *inode, *upperinode;
238 	struct timespec64 ctime, uctime;
239 	struct timespec64 mtime, umtime;
240 
241 	if (file->f_flags & O_NOATIME)
242 		return;
243 
244 	inode = file_inode(file);
245 	upperinode = ovl_inode_upper(inode);
246 
247 	if (!upperinode)
248 		return;
249 
250 	ctime = inode_get_ctime(inode);
251 	uctime = inode_get_ctime(upperinode);
252 	mtime = inode_get_mtime(inode);
253 	umtime = inode_get_mtime(upperinode);
254 	if ((!timespec64_equal(&mtime, &umtime)) ||
255 	     !timespec64_equal(&ctime, &uctime)) {
256 		inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode));
257 		inode_set_ctime_to_ts(inode, uctime);
258 	}
259 
260 	touch_atime(&file->f_path);
261 }
262 
263 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
264 {
265 	struct file *file = iocb->ki_filp;
266 	struct fd real;
267 	ssize_t ret;
268 	struct backing_file_ctx ctx = {
269 		.cred = ovl_creds(file_inode(file)->i_sb),
270 		.user_file = file,
271 		.accessed = ovl_file_accessed,
272 	};
273 
274 	if (!iov_iter_count(iter))
275 		return 0;
276 
277 	ret = ovl_real_fdget(file, &real);
278 	if (ret)
279 		return ret;
280 
281 	ret = backing_file_read_iter(real.file, iter, iocb, iocb->ki_flags,
282 				     &ctx);
283 	fdput(real);
284 
285 	return ret;
286 }
287 
288 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
289 {
290 	struct file *file = iocb->ki_filp;
291 	struct inode *inode = file_inode(file);
292 	struct fd real;
293 	ssize_t ret;
294 	int ifl = iocb->ki_flags;
295 	struct backing_file_ctx ctx = {
296 		.cred = ovl_creds(inode->i_sb),
297 		.user_file = file,
298 		.end_write = ovl_file_modified,
299 	};
300 
301 	if (!iov_iter_count(iter))
302 		return 0;
303 
304 	inode_lock(inode);
305 	/* Update mode */
306 	ovl_copyattr(inode);
307 
308 	ret = ovl_real_fdget(file, &real);
309 	if (ret)
310 		goto out_unlock;
311 
312 	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
313 		ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
314 
315 	/*
316 	 * Overlayfs doesn't support deferred completions, don't copy
317 	 * this property in case it is set by the issuer.
318 	 */
319 	ifl &= ~IOCB_DIO_CALLER_COMP;
320 	ret = backing_file_write_iter(real.file, iter, iocb, ifl, &ctx);
321 	fdput(real);
322 
323 out_unlock:
324 	inode_unlock(inode);
325 
326 	return ret;
327 }
328 
329 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
330 			       struct pipe_inode_info *pipe, size_t len,
331 			       unsigned int flags)
332 {
333 	struct fd real;
334 	ssize_t ret;
335 	struct backing_file_ctx ctx = {
336 		.cred = ovl_creds(file_inode(in)->i_sb),
337 		.user_file = in,
338 		.accessed = ovl_file_accessed,
339 	};
340 
341 	ret = ovl_real_fdget(in, &real);
342 	if (ret)
343 		return ret;
344 
345 	ret = backing_file_splice_read(real.file, ppos, pipe, len, flags, &ctx);
346 	fdput(real);
347 
348 	return ret;
349 }
350 
351 /*
352  * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
353  * due to lock order inversion between pipe->mutex in iter_file_splice_write()
354  * and file_start_write(real.file) in ovl_write_iter().
355  *
356  * So do everything ovl_write_iter() does and call iter_file_splice_write() on
357  * the real file.
358  */
359 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
360 				loff_t *ppos, size_t len, unsigned int flags)
361 {
362 	struct fd real;
363 	struct inode *inode = file_inode(out);
364 	ssize_t ret;
365 	struct backing_file_ctx ctx = {
366 		.cred = ovl_creds(inode->i_sb),
367 		.user_file = out,
368 		.end_write = ovl_file_modified,
369 	};
370 
371 	inode_lock(inode);
372 	/* Update mode */
373 	ovl_copyattr(inode);
374 
375 	ret = ovl_real_fdget(out, &real);
376 	if (ret)
377 		goto out_unlock;
378 
379 	ret = backing_file_splice_write(pipe, real.file, ppos, len, flags, &ctx);
380 	fdput(real);
381 
382 out_unlock:
383 	inode_unlock(inode);
384 
385 	return ret;
386 }
387 
388 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
389 {
390 	struct fd real;
391 	const struct cred *old_cred;
392 	int ret;
393 
394 	ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
395 	if (ret <= 0)
396 		return ret;
397 
398 	ret = ovl_real_fdget_meta(file, &real, !datasync);
399 	if (ret)
400 		return ret;
401 
402 	/* Don't sync lower file for fear of receiving EROFS error */
403 	if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
404 		old_cred = ovl_override_creds(file_inode(file)->i_sb);
405 		ret = vfs_fsync_range(real.file, start, end, datasync);
406 		revert_creds(old_cred);
407 	}
408 
409 	fdput(real);
410 
411 	return ret;
412 }
413 
414 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
415 {
416 	struct file *realfile = file->private_data;
417 	struct backing_file_ctx ctx = {
418 		.cred = ovl_creds(file_inode(file)->i_sb),
419 		.user_file = file,
420 		.accessed = ovl_file_accessed,
421 	};
422 
423 	return backing_file_mmap(realfile, vma, &ctx);
424 }
425 
426 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
427 {
428 	struct inode *inode = file_inode(file);
429 	struct fd real;
430 	const struct cred *old_cred;
431 	int ret;
432 
433 	inode_lock(inode);
434 	/* Update mode */
435 	ovl_copyattr(inode);
436 	ret = file_remove_privs(file);
437 	if (ret)
438 		goto out_unlock;
439 
440 	ret = ovl_real_fdget(file, &real);
441 	if (ret)
442 		goto out_unlock;
443 
444 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
445 	ret = vfs_fallocate(real.file, mode, offset, len);
446 	revert_creds(old_cred);
447 
448 	/* Update size */
449 	ovl_file_modified(file);
450 
451 	fdput(real);
452 
453 out_unlock:
454 	inode_unlock(inode);
455 
456 	return ret;
457 }
458 
459 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
460 {
461 	struct fd real;
462 	const struct cred *old_cred;
463 	int ret;
464 
465 	ret = ovl_real_fdget(file, &real);
466 	if (ret)
467 		return ret;
468 
469 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
470 	ret = vfs_fadvise(real.file, offset, len, advice);
471 	revert_creds(old_cred);
472 
473 	fdput(real);
474 
475 	return ret;
476 }
477 
478 enum ovl_copyop {
479 	OVL_COPY,
480 	OVL_CLONE,
481 	OVL_DEDUPE,
482 };
483 
484 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
485 			    struct file *file_out, loff_t pos_out,
486 			    loff_t len, unsigned int flags, enum ovl_copyop op)
487 {
488 	struct inode *inode_out = file_inode(file_out);
489 	struct fd real_in, real_out;
490 	const struct cred *old_cred;
491 	loff_t ret;
492 
493 	inode_lock(inode_out);
494 	if (op != OVL_DEDUPE) {
495 		/* Update mode */
496 		ovl_copyattr(inode_out);
497 		ret = file_remove_privs(file_out);
498 		if (ret)
499 			goto out_unlock;
500 	}
501 
502 	ret = ovl_real_fdget(file_out, &real_out);
503 	if (ret)
504 		goto out_unlock;
505 
506 	ret = ovl_real_fdget(file_in, &real_in);
507 	if (ret) {
508 		fdput(real_out);
509 		goto out_unlock;
510 	}
511 
512 	old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
513 	switch (op) {
514 	case OVL_COPY:
515 		ret = vfs_copy_file_range(real_in.file, pos_in,
516 					  real_out.file, pos_out, len, flags);
517 		break;
518 
519 	case OVL_CLONE:
520 		ret = vfs_clone_file_range(real_in.file, pos_in,
521 					   real_out.file, pos_out, len, flags);
522 		break;
523 
524 	case OVL_DEDUPE:
525 		ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
526 						real_out.file, pos_out, len,
527 						flags);
528 		break;
529 	}
530 	revert_creds(old_cred);
531 
532 	/* Update size */
533 	ovl_file_modified(file_out);
534 
535 	fdput(real_in);
536 	fdput(real_out);
537 
538 out_unlock:
539 	inode_unlock(inode_out);
540 
541 	return ret;
542 }
543 
544 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
545 				   struct file *file_out, loff_t pos_out,
546 				   size_t len, unsigned int flags)
547 {
548 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
549 			    OVL_COPY);
550 }
551 
552 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
553 				   struct file *file_out, loff_t pos_out,
554 				   loff_t len, unsigned int remap_flags)
555 {
556 	enum ovl_copyop op;
557 
558 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
559 		return -EINVAL;
560 
561 	if (remap_flags & REMAP_FILE_DEDUP)
562 		op = OVL_DEDUPE;
563 	else
564 		op = OVL_CLONE;
565 
566 	/*
567 	 * Don't copy up because of a dedupe request, this wouldn't make sense
568 	 * most of the time (data would be duplicated instead of deduplicated).
569 	 */
570 	if (op == OVL_DEDUPE &&
571 	    (!ovl_inode_upper(file_inode(file_in)) ||
572 	     !ovl_inode_upper(file_inode(file_out))))
573 		return -EPERM;
574 
575 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
576 			    remap_flags, op);
577 }
578 
579 static int ovl_flush(struct file *file, fl_owner_t id)
580 {
581 	struct fd real;
582 	const struct cred *old_cred;
583 	int err;
584 
585 	err = ovl_real_fdget(file, &real);
586 	if (err)
587 		return err;
588 
589 	if (real.file->f_op->flush) {
590 		old_cred = ovl_override_creds(file_inode(file)->i_sb);
591 		err = real.file->f_op->flush(real.file, id);
592 		revert_creds(old_cred);
593 	}
594 	fdput(real);
595 
596 	return err;
597 }
598 
599 const struct file_operations ovl_file_operations = {
600 	.open		= ovl_open,
601 	.release	= ovl_release,
602 	.llseek		= ovl_llseek,
603 	.read_iter	= ovl_read_iter,
604 	.write_iter	= ovl_write_iter,
605 	.fsync		= ovl_fsync,
606 	.mmap		= ovl_mmap,
607 	.fallocate	= ovl_fallocate,
608 	.fadvise	= ovl_fadvise,
609 	.flush		= ovl_flush,
610 	.splice_read    = ovl_splice_read,
611 	.splice_write   = ovl_splice_write,
612 
613 	.copy_file_range	= ovl_copy_file_range,
614 	.remap_file_range	= ovl_remap_file_range,
615 };
616