1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2017 Red Hat, Inc.
4 */
5
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/filelock.h>
9 #include <linux/mount.h>
10 #include <linux/xattr.h>
11 #include <linux/uio.h>
12 #include <linux/uaccess.h>
13 #include <linux/security.h>
14 #include <linux/fs.h>
15 #include <linux/backing-file.h>
16 #include "overlayfs.h"
17
ovl_whatisit(struct inode * inode,struct inode * realinode)18 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
19 {
20 if (realinode != ovl_inode_upper(inode))
21 return 'l';
22 if (ovl_has_upperdata(inode))
23 return 'u';
24 else
25 return 'm';
26 }
27
ovl_open_realfile(const struct file * file,const struct path * realpath)28 static struct file *ovl_open_realfile(const struct file *file,
29 const struct path *realpath)
30 {
31 struct inode *realinode = d_inode(realpath->dentry);
32 struct inode *inode = file_inode(file);
33 struct mnt_idmap *real_idmap;
34 struct file *realfile;
35 int flags = file->f_flags | OVL_OPEN_FLAGS;
36 int acc_mode = ACC_MODE(flags);
37 int err;
38
39 if (flags & O_APPEND)
40 acc_mode |= MAY_APPEND;
41
42 with_ovl_creds(inode->i_sb) {
43 real_idmap = mnt_idmap(realpath->mnt);
44 err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
45 if (err) {
46 realfile = ERR_PTR(err);
47 } else {
48 if (!inode_owner_or_capable(real_idmap, realinode))
49 flags &= ~O_NOATIME;
50
51 realfile = backing_file_open(file_user_path(file),
52 flags, realpath, current_cred());
53 }
54 }
55
56 pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
57 file, file, ovl_whatisit(inode, realinode), file->f_flags,
58 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
59
60 return realfile;
61 }
62
63 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
64
ovl_change_flags(struct file * file,unsigned int flags)65 static int ovl_change_flags(struct file *file, unsigned int flags)
66 {
67 struct inode *inode = file_inode(file);
68 int err;
69
70 flags &= OVL_SETFL_MASK;
71
72 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
73 return -EPERM;
74
75 if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
76 return -EINVAL;
77
78 if (file->f_op->check_flags) {
79 err = file->f_op->check_flags(flags);
80 if (err)
81 return err;
82 }
83
84 spin_lock(&file->f_lock);
85 file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
86 file->f_iocb_flags = iocb_flags(file);
87 spin_unlock(&file->f_lock);
88
89 return 0;
90 }
91
92 struct ovl_file {
93 struct file *realfile;
94 struct file *upperfile;
95 };
96
ovl_file_alloc(struct file * realfile)97 struct ovl_file *ovl_file_alloc(struct file *realfile)
98 {
99 struct ovl_file *of = kzalloc_obj(struct ovl_file);
100
101 if (unlikely(!of))
102 return NULL;
103
104 of->realfile = realfile;
105 return of;
106 }
107
ovl_file_free(struct ovl_file * of)108 void ovl_file_free(struct ovl_file *of)
109 {
110 fput(of->realfile);
111 if (of->upperfile)
112 fput(of->upperfile);
113 kfree(of);
114 }
115
ovl_is_real_file(const struct file * realfile,const struct path * realpath)116 static bool ovl_is_real_file(const struct file *realfile,
117 const struct path *realpath)
118 {
119 return file_inode(realfile) == d_inode(realpath->dentry);
120 }
121
ovl_real_file_path(const struct file * file,const struct path * realpath)122 static struct file *ovl_real_file_path(const struct file *file,
123 const struct path *realpath)
124 {
125 struct ovl_file *of = file->private_data;
126 struct file *realfile = of->realfile;
127
128 if (WARN_ON_ONCE(!realpath->dentry))
129 return ERR_PTR(-EIO);
130
131 /*
132 * If the realfile that we want is not where the data used to be at
133 * open time, either we'd been copied up, or it's an fsync of a
134 * metacopied file. We need the upperfile either way, so see if it
135 * is already opened and if it is not then open and store it.
136 */
137 if (unlikely(!ovl_is_real_file(realfile, realpath))) {
138 struct file *upperfile = READ_ONCE(of->upperfile);
139 struct file *old;
140
141 if (!upperfile) { /* Nobody opened upperfile yet */
142 upperfile = ovl_open_realfile(file, realpath);
143 if (IS_ERR(upperfile))
144 return upperfile;
145
146 /* Store the upperfile for later */
147 old = cmpxchg_release(&of->upperfile, NULL, upperfile);
148 if (old) { /* Someone opened upperfile before us */
149 fput(upperfile);
150 upperfile = old;
151 }
152 }
153 /*
154 * Stored file must be from the right inode, unless someone's
155 * been corrupting the upper layer.
156 */
157 if (WARN_ON_ONCE(!ovl_is_real_file(upperfile, realpath)))
158 return ERR_PTR(-EIO);
159
160 realfile = upperfile;
161 }
162
163 /* Did the flags change since open? */
164 if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS)) {
165 int err = ovl_change_flags(realfile, file->f_flags);
166
167 if (err)
168 return ERR_PTR(err);
169 }
170
171 return realfile;
172 }
173
ovl_real_file(const struct file * file)174 static struct file *ovl_real_file(const struct file *file)
175 {
176 struct dentry *dentry = file_dentry(file);
177 struct path realpath;
178 int err;
179
180 if (d_is_dir(dentry)) {
181 struct file *f = ovl_dir_real_file(file, false);
182
183 if (WARN_ON_ONCE(!f))
184 return ERR_PTR(-EIO);
185 return f;
186 }
187
188 /* lazy lookup and verify of lowerdata */
189 err = ovl_verify_lowerdata(dentry);
190 if (err)
191 return ERR_PTR(err);
192
193 ovl_path_realdata(dentry, &realpath);
194
195 return ovl_real_file_path(file, &realpath);
196 }
197
ovl_open(struct inode * inode,struct file * file)198 static int ovl_open(struct inode *inode, struct file *file)
199 {
200 struct dentry *dentry = file_dentry(file);
201 struct file *realfile;
202 struct path realpath;
203 struct ovl_file *of;
204 int err;
205
206 /* lazy lookup and verify lowerdata */
207 err = ovl_verify_lowerdata(dentry);
208 if (err)
209 return err;
210
211 err = ovl_maybe_copy_up(dentry, file->f_flags);
212 if (err)
213 return err;
214
215 /* No longer need these flags, so don't pass them on to underlying fs */
216 file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
217
218 ovl_path_realdata(dentry, &realpath);
219 if (!realpath.dentry)
220 return -EIO;
221
222 realfile = ovl_open_realfile(file, &realpath);
223 if (IS_ERR(realfile))
224 return PTR_ERR(realfile);
225
226 of = ovl_file_alloc(realfile);
227 if (!of) {
228 fput(realfile);
229 return -ENOMEM;
230 }
231
232 file->private_data = of;
233
234 return 0;
235 }
236
ovl_release(struct inode * inode,struct file * file)237 static int ovl_release(struct inode *inode, struct file *file)
238 {
239 ovl_file_free(file->private_data);
240 return 0;
241 }
242
ovl_llseek(struct file * file,loff_t offset,int whence)243 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
244 {
245 struct inode *inode = file_inode(file);
246 struct file *realfile;
247 loff_t ret;
248
249 /*
250 * The two special cases below do not need to involve real fs,
251 * so we can optimizing concurrent callers.
252 */
253 if (offset == 0) {
254 if (whence == SEEK_CUR)
255 return file->f_pos;
256
257 if (whence == SEEK_SET)
258 return vfs_setpos(file, 0, 0);
259 }
260
261 realfile = ovl_real_file(file);
262 if (IS_ERR(realfile))
263 return PTR_ERR(realfile);
264
265 /*
266 * Overlay file f_pos is the master copy that is preserved
267 * through copy up and modified on read/write, but only real
268 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
269 * limitations that are more strict than ->s_maxbytes for specific
270 * files, so we use the real file to perform seeks.
271 */
272 ovl_inode_lock(inode);
273 realfile->f_pos = file->f_pos;
274
275 with_ovl_creds(inode->i_sb)
276 ret = vfs_llseek(realfile, offset, whence);
277
278 file->f_pos = realfile->f_pos;
279 ovl_inode_unlock(inode);
280
281 return ret;
282 }
283
ovl_file_modified(struct file * file)284 static void ovl_file_modified(struct file *file)
285 {
286 /* Update size/mtime */
287 ovl_copyattr(file_inode(file));
288 }
289
ovl_file_end_write(struct kiocb * iocb,ssize_t ret)290 static void ovl_file_end_write(struct kiocb *iocb, ssize_t ret)
291 {
292 ovl_file_modified(iocb->ki_filp);
293 }
294
ovl_file_accessed(struct file * file)295 static void ovl_file_accessed(struct file *file)
296 {
297 struct inode *inode, *upperinode;
298 struct timespec64 ctime, uctime;
299 struct timespec64 mtime, umtime;
300
301 if (file->f_flags & O_NOATIME)
302 return;
303
304 inode = file_inode(file);
305 upperinode = ovl_inode_upper(inode);
306
307 if (!upperinode)
308 return;
309
310 ctime = inode_get_ctime(inode);
311 uctime = inode_get_ctime(upperinode);
312 mtime = inode_get_mtime(inode);
313 umtime = inode_get_mtime(upperinode);
314 if ((!timespec64_equal(&mtime, &umtime)) ||
315 !timespec64_equal(&ctime, &uctime)) {
316 inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode));
317 inode_set_ctime_to_ts(inode, uctime);
318 }
319
320 touch_atime(&file->f_path);
321 }
322
ovl_read_iter(struct kiocb * iocb,struct iov_iter * iter)323 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
324 {
325 struct file *file = iocb->ki_filp;
326 struct file *realfile;
327 struct backing_file_ctx ctx = {
328 .cred = ovl_creds(file_inode(file)->i_sb),
329 .accessed = ovl_file_accessed,
330 };
331
332 if (!iov_iter_count(iter))
333 return 0;
334
335 realfile = ovl_real_file(file);
336 if (IS_ERR(realfile))
337 return PTR_ERR(realfile);
338
339 return backing_file_read_iter(realfile, iter, iocb, iocb->ki_flags,
340 &ctx);
341 }
342
ovl_write_iter(struct kiocb * iocb,struct iov_iter * iter)343 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
344 {
345 struct file *file = iocb->ki_filp;
346 struct inode *inode = file_inode(file);
347 struct file *realfile;
348 ssize_t ret;
349 int ifl = iocb->ki_flags;
350 struct backing_file_ctx ctx = {
351 .cred = ovl_creds(inode->i_sb),
352 .end_write = ovl_file_end_write,
353 };
354
355 if (!iov_iter_count(iter))
356 return 0;
357
358 inode_lock(inode);
359 /* Update mode */
360 ovl_copyattr(inode);
361
362 realfile = ovl_real_file(file);
363 ret = PTR_ERR(realfile);
364 if (IS_ERR(realfile))
365 goto out_unlock;
366
367 if (!ovl_should_sync(OVL_FS(inode->i_sb)))
368 ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
369
370 ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx);
371
372 out_unlock:
373 inode_unlock(inode);
374
375 return ret;
376 }
377
ovl_splice_read(struct file * in,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)378 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
379 struct pipe_inode_info *pipe, size_t len,
380 unsigned int flags)
381 {
382 struct file *realfile;
383 ssize_t ret;
384 struct backing_file_ctx ctx = {
385 .cred = ovl_creds(file_inode(in)->i_sb),
386 .accessed = ovl_file_accessed,
387 };
388 struct kiocb iocb;
389
390 realfile = ovl_real_file(in);
391 if (IS_ERR(realfile))
392 return PTR_ERR(realfile);
393
394 init_sync_kiocb(&iocb, in);
395 iocb.ki_pos = *ppos;
396 ret = backing_file_splice_read(realfile, &iocb, pipe, len, flags, &ctx);
397 *ppos = iocb.ki_pos;
398
399 return ret;
400 }
401
402 /*
403 * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
404 * due to lock order inversion between pipe->mutex in iter_file_splice_write()
405 * and file_start_write(realfile) in ovl_write_iter().
406 *
407 * So do everything ovl_write_iter() does and call iter_file_splice_write() on
408 * the real file.
409 */
ovl_splice_write(struct pipe_inode_info * pipe,struct file * out,loff_t * ppos,size_t len,unsigned int flags)410 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
411 loff_t *ppos, size_t len, unsigned int flags)
412 {
413 struct file *realfile;
414 struct inode *inode = file_inode(out);
415 ssize_t ret;
416 struct backing_file_ctx ctx = {
417 .cred = ovl_creds(inode->i_sb),
418 .end_write = ovl_file_end_write,
419 };
420 struct kiocb iocb;
421
422 inode_lock(inode);
423 /* Update mode */
424 ovl_copyattr(inode);
425
426 realfile = ovl_real_file(out);
427 ret = PTR_ERR(realfile);
428 if (IS_ERR(realfile))
429 goto out_unlock;
430
431 init_sync_kiocb(&iocb, out);
432 iocb.ki_pos = *ppos;
433 ret = backing_file_splice_write(pipe, realfile, &iocb, len, flags, &ctx);
434 *ppos = iocb.ki_pos;
435
436 out_unlock:
437 inode_unlock(inode);
438
439 return ret;
440 }
441
ovl_fsync(struct file * file,loff_t start,loff_t end,int datasync)442 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
443 {
444 struct dentry *dentry = file_dentry(file);
445 enum ovl_path_type type;
446 struct path upperpath;
447 struct file *upperfile;
448 int ret;
449
450 ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
451 if (ret <= 0)
452 return ret;
453
454 /* Don't sync lower file for fear of receiving EROFS error */
455 type = ovl_path_type(dentry);
456 if (!OVL_TYPE_UPPER(type) || (datasync && OVL_TYPE_MERGE(type)))
457 return 0;
458
459 ovl_path_upper(dentry, &upperpath);
460 upperfile = ovl_real_file_path(file, &upperpath);
461 if (IS_ERR(upperfile))
462 return PTR_ERR(upperfile);
463
464 with_ovl_creds(file_inode(file)->i_sb)
465 return vfs_fsync_range(upperfile, start, end, datasync);
466 }
467
ovl_mmap(struct file * file,struct vm_area_struct * vma)468 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
469 {
470 struct ovl_file *of = file->private_data;
471 struct backing_file_ctx ctx = {
472 .cred = ovl_creds(file_inode(file)->i_sb),
473 .accessed = ovl_file_accessed,
474 };
475
476 return backing_file_mmap(of->realfile, vma, &ctx);
477 }
478
ovl_fallocate(struct file * file,int mode,loff_t offset,loff_t len)479 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
480 {
481 struct inode *inode = file_inode(file);
482 struct file *realfile;
483 int ret;
484
485 inode_lock(inode);
486 /* Update mode */
487 ovl_copyattr(inode);
488 ret = file_remove_privs(file);
489 if (ret)
490 goto out_unlock;
491
492 realfile = ovl_real_file(file);
493 ret = PTR_ERR(realfile);
494 if (IS_ERR(realfile))
495 goto out_unlock;
496
497 with_ovl_creds(inode->i_sb)
498 ret = vfs_fallocate(realfile, mode, offset, len);
499
500 /* Update size */
501 ovl_file_modified(file);
502
503 out_unlock:
504 inode_unlock(inode);
505
506 return ret;
507 }
508
ovl_fadvise(struct file * file,loff_t offset,loff_t len,int advice)509 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
510 {
511 struct file *realfile;
512
513 realfile = ovl_real_file(file);
514 if (IS_ERR(realfile))
515 return PTR_ERR(realfile);
516
517 with_ovl_creds(file_inode(file)->i_sb)
518 return vfs_fadvise(realfile, offset, len, advice);
519 }
520
521 enum ovl_copyop {
522 OVL_COPY,
523 OVL_CLONE,
524 OVL_DEDUPE,
525 };
526
ovl_copyfile(struct file * file_in,loff_t pos_in,struct file * file_out,loff_t pos_out,loff_t len,unsigned int flags,enum ovl_copyop op)527 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
528 struct file *file_out, loff_t pos_out,
529 loff_t len, unsigned int flags, enum ovl_copyop op)
530 {
531 struct inode *inode_out = file_inode(file_out);
532 struct file *realfile_in, *realfile_out;
533 loff_t ret;
534
535 inode_lock(inode_out);
536 if (op != OVL_DEDUPE) {
537 /* Update mode */
538 ovl_copyattr(inode_out);
539 ret = file_remove_privs(file_out);
540 if (ret)
541 goto out_unlock;
542 }
543
544 realfile_out = ovl_real_file(file_out);
545 ret = PTR_ERR(realfile_out);
546 if (IS_ERR(realfile_out))
547 goto out_unlock;
548
549 realfile_in = ovl_real_file(file_in);
550 ret = PTR_ERR(realfile_in);
551 if (IS_ERR(realfile_in))
552 goto out_unlock;
553
554 with_ovl_creds(file_inode(file_out)->i_sb) {
555 switch (op) {
556 case OVL_COPY:
557 ret = vfs_copy_file_range(realfile_in, pos_in,
558 realfile_out, pos_out, len, flags);
559 break;
560
561 case OVL_CLONE:
562 ret = vfs_clone_file_range(realfile_in, pos_in,
563 realfile_out, pos_out, len, flags);
564 break;
565
566 case OVL_DEDUPE:
567 ret = vfs_dedupe_file_range_one(realfile_in, pos_in,
568 realfile_out, pos_out, len,
569 flags);
570 break;
571 }
572 }
573
574 /* Update size */
575 ovl_file_modified(file_out);
576
577 out_unlock:
578 inode_unlock(inode_out);
579
580 return ret;
581 }
582
ovl_copy_file_range(struct file * file_in,loff_t pos_in,struct file * file_out,loff_t pos_out,size_t len,unsigned int flags)583 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
584 struct file *file_out, loff_t pos_out,
585 size_t len, unsigned int flags)
586 {
587 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
588 OVL_COPY);
589 }
590
ovl_remap_file_range(struct file * file_in,loff_t pos_in,struct file * file_out,loff_t pos_out,loff_t len,unsigned int remap_flags)591 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
592 struct file *file_out, loff_t pos_out,
593 loff_t len, unsigned int remap_flags)
594 {
595 enum ovl_copyop op;
596
597 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
598 return -EINVAL;
599
600 if (remap_flags & REMAP_FILE_DEDUP)
601 op = OVL_DEDUPE;
602 else
603 op = OVL_CLONE;
604
605 /*
606 * Don't copy up because of a dedupe request, this wouldn't make sense
607 * most of the time (data would be duplicated instead of deduplicated).
608 */
609 if (op == OVL_DEDUPE &&
610 (!ovl_inode_upper(file_inode(file_in)) ||
611 !ovl_inode_upper(file_inode(file_out))))
612 return -EPERM;
613
614 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
615 remap_flags, op);
616 }
617
ovl_flush(struct file * file,fl_owner_t id)618 static int ovl_flush(struct file *file, fl_owner_t id)
619 {
620 struct file *realfile;
621 int err = 0;
622
623 realfile = ovl_real_file(file);
624 if (IS_ERR(realfile))
625 return PTR_ERR(realfile);
626
627 if (realfile->f_op->flush) {
628 with_ovl_creds(file_inode(file)->i_sb)
629 err = realfile->f_op->flush(realfile, id);
630 }
631
632 return err;
633 }
634
635 const struct file_operations ovl_file_operations = {
636 .open = ovl_open,
637 .release = ovl_release,
638 .llseek = ovl_llseek,
639 .read_iter = ovl_read_iter,
640 .write_iter = ovl_write_iter,
641 .fsync = ovl_fsync,
642 .mmap = ovl_mmap,
643 .fallocate = ovl_fallocate,
644 .fadvise = ovl_fadvise,
645 .flush = ovl_flush,
646 .splice_read = ovl_splice_read,
647 .splice_write = ovl_splice_write,
648
649 .copy_file_range = ovl_copy_file_range,
650 .remap_file_range = ovl_remap_file_range,
651 .setlease = generic_setlease,
652 };
653