1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017 Red Hat, Inc. 4 */ 5 6 #include <linux/cred.h> 7 #include <linux/file.h> 8 #include <linux/mount.h> 9 #include <linux/xattr.h> 10 #include <linux/uio.h> 11 #include <linux/uaccess.h> 12 #include <linux/security.h> 13 #include <linux/fs.h> 14 #include <linux/backing-file.h> 15 #include "overlayfs.h" 16 17 static char ovl_whatisit(struct inode *inode, struct inode *realinode) 18 { 19 if (realinode != ovl_inode_upper(inode)) 20 return 'l'; 21 if (ovl_has_upperdata(inode)) 22 return 'u'; 23 else 24 return 'm'; 25 } 26 27 static struct file *ovl_open_realfile(const struct file *file, 28 const struct path *realpath) 29 { 30 struct inode *realinode = d_inode(realpath->dentry); 31 struct inode *inode = file_inode(file); 32 struct mnt_idmap *real_idmap; 33 struct file *realfile; 34 const struct cred *old_cred; 35 int flags = file->f_flags | OVL_OPEN_FLAGS; 36 int acc_mode = ACC_MODE(flags); 37 int err; 38 39 if (flags & O_APPEND) 40 acc_mode |= MAY_APPEND; 41 42 old_cred = ovl_override_creds(inode->i_sb); 43 real_idmap = mnt_idmap(realpath->mnt); 44 err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode); 45 if (err) { 46 realfile = ERR_PTR(err); 47 } else { 48 if (!inode_owner_or_capable(real_idmap, realinode)) 49 flags &= ~O_NOATIME; 50 51 realfile = backing_file_open(file_user_path(file), 52 flags, realpath, current_cred()); 53 } 54 ovl_revert_creds(old_cred); 55 56 pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n", 57 file, file, ovl_whatisit(inode, realinode), file->f_flags, 58 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags); 59 60 return realfile; 61 } 62 63 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) 64 65 static int ovl_change_flags(struct file *file, unsigned int flags) 66 { 67 struct inode *inode = file_inode(file); 68 int err; 69 70 flags &= OVL_SETFL_MASK; 71 72 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode)) 73 return -EPERM; 74 75 if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT)) 76 return -EINVAL; 77 78 if (file->f_op->check_flags) { 79 err = file->f_op->check_flags(flags); 80 if (err) 81 return err; 82 } 83 84 spin_lock(&file->f_lock); 85 file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags; 86 file->f_iocb_flags = iocb_flags(file); 87 spin_unlock(&file->f_lock); 88 89 return 0; 90 } 91 92 struct ovl_file { 93 struct file *realfile; 94 struct file *upperfile; 95 }; 96 97 struct ovl_file *ovl_file_alloc(struct file *realfile) 98 { 99 struct ovl_file *of = kzalloc(sizeof(struct ovl_file), GFP_KERNEL); 100 101 if (unlikely(!of)) 102 return NULL; 103 104 of->realfile = realfile; 105 return of; 106 } 107 108 void ovl_file_free(struct ovl_file *of) 109 { 110 fput(of->realfile); 111 if (of->upperfile) 112 fput(of->upperfile); 113 kfree(of); 114 } 115 116 static bool ovl_is_real_file(const struct file *realfile, 117 const struct path *realpath) 118 { 119 return file_inode(realfile) == d_inode(realpath->dentry); 120 } 121 122 static struct file *ovl_real_file_path(const struct file *file, 123 const struct path *realpath) 124 { 125 struct ovl_file *of = file->private_data; 126 struct file *realfile = of->realfile; 127 128 if (WARN_ON_ONCE(!realpath->dentry)) 129 return ERR_PTR(-EIO); 130 131 /* 132 * If the realfile that we want is not where the data used to be at 133 * open time, either we'd been copied up, or it's an fsync of a 134 * metacopied file. We need the upperfile either way, so see if it 135 * is already opened and if it is not then open and store it. 136 */ 137 if (unlikely(!ovl_is_real_file(realfile, realpath))) { 138 struct file *upperfile = READ_ONCE(of->upperfile); 139 struct file *old; 140 141 if (!upperfile) { /* Nobody opened upperfile yet */ 142 upperfile = ovl_open_realfile(file, realpath); 143 if (IS_ERR(upperfile)) 144 return upperfile; 145 146 /* Store the upperfile for later */ 147 old = cmpxchg_release(&of->upperfile, NULL, upperfile); 148 if (old) { /* Someone opened upperfile before us */ 149 fput(upperfile); 150 upperfile = old; 151 } 152 } 153 /* 154 * Stored file must be from the right inode, unless someone's 155 * been corrupting the upper layer. 156 */ 157 if (WARN_ON_ONCE(!ovl_is_real_file(upperfile, realpath))) 158 return ERR_PTR(-EIO); 159 160 realfile = upperfile; 161 } 162 163 /* Did the flags change since open? */ 164 if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS)) { 165 int err = ovl_change_flags(realfile, file->f_flags); 166 167 if (err) 168 return ERR_PTR(err); 169 } 170 171 return realfile; 172 } 173 174 static struct file *ovl_real_file(const struct file *file) 175 { 176 struct dentry *dentry = file_dentry(file); 177 struct path realpath; 178 int err; 179 180 if (d_is_dir(dentry)) { 181 struct file *f = ovl_dir_real_file(file, false); 182 183 if (WARN_ON_ONCE(!f)) 184 return ERR_PTR(-EIO); 185 return f; 186 } 187 188 /* lazy lookup and verify of lowerdata */ 189 err = ovl_verify_lowerdata(dentry); 190 if (err) 191 return ERR_PTR(err); 192 193 ovl_path_realdata(dentry, &realpath); 194 195 return ovl_real_file_path(file, &realpath); 196 } 197 198 static int ovl_open(struct inode *inode, struct file *file) 199 { 200 struct dentry *dentry = file_dentry(file); 201 struct file *realfile; 202 struct path realpath; 203 struct ovl_file *of; 204 int err; 205 206 /* lazy lookup and verify lowerdata */ 207 err = ovl_verify_lowerdata(dentry); 208 if (err) 209 return err; 210 211 err = ovl_maybe_copy_up(dentry, file->f_flags); 212 if (err) 213 return err; 214 215 /* No longer need these flags, so don't pass them on to underlying fs */ 216 file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 217 218 ovl_path_realdata(dentry, &realpath); 219 if (!realpath.dentry) 220 return -EIO; 221 222 realfile = ovl_open_realfile(file, &realpath); 223 if (IS_ERR(realfile)) 224 return PTR_ERR(realfile); 225 226 of = ovl_file_alloc(realfile); 227 if (!of) { 228 fput(realfile); 229 return -ENOMEM; 230 } 231 232 file->private_data = of; 233 234 return 0; 235 } 236 237 static int ovl_release(struct inode *inode, struct file *file) 238 { 239 ovl_file_free(file->private_data); 240 return 0; 241 } 242 243 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) 244 { 245 struct inode *inode = file_inode(file); 246 struct file *realfile; 247 const struct cred *old_cred; 248 loff_t ret; 249 250 /* 251 * The two special cases below do not need to involve real fs, 252 * so we can optimizing concurrent callers. 253 */ 254 if (offset == 0) { 255 if (whence == SEEK_CUR) 256 return file->f_pos; 257 258 if (whence == SEEK_SET) 259 return vfs_setpos(file, 0, 0); 260 } 261 262 realfile = ovl_real_file(file); 263 if (IS_ERR(realfile)) 264 return PTR_ERR(realfile); 265 266 /* 267 * Overlay file f_pos is the master copy that is preserved 268 * through copy up and modified on read/write, but only real 269 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose 270 * limitations that are more strict than ->s_maxbytes for specific 271 * files, so we use the real file to perform seeks. 272 */ 273 ovl_inode_lock(inode); 274 realfile->f_pos = file->f_pos; 275 276 old_cred = ovl_override_creds(inode->i_sb); 277 ret = vfs_llseek(realfile, offset, whence); 278 ovl_revert_creds(old_cred); 279 280 file->f_pos = realfile->f_pos; 281 ovl_inode_unlock(inode); 282 283 return ret; 284 } 285 286 static void ovl_file_modified(struct file *file) 287 { 288 /* Update size/mtime */ 289 ovl_copyattr(file_inode(file)); 290 } 291 292 static void ovl_file_end_write(struct kiocb *iocb, ssize_t ret) 293 { 294 ovl_file_modified(iocb->ki_filp); 295 } 296 297 static void ovl_file_accessed(struct file *file) 298 { 299 struct inode *inode, *upperinode; 300 struct timespec64 ctime, uctime; 301 struct timespec64 mtime, umtime; 302 303 if (file->f_flags & O_NOATIME) 304 return; 305 306 inode = file_inode(file); 307 upperinode = ovl_inode_upper(inode); 308 309 if (!upperinode) 310 return; 311 312 ctime = inode_get_ctime(inode); 313 uctime = inode_get_ctime(upperinode); 314 mtime = inode_get_mtime(inode); 315 umtime = inode_get_mtime(upperinode); 316 if ((!timespec64_equal(&mtime, &umtime)) || 317 !timespec64_equal(&ctime, &uctime)) { 318 inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode)); 319 inode_set_ctime_to_ts(inode, uctime); 320 } 321 322 touch_atime(&file->f_path); 323 } 324 325 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) 326 { 327 struct file *file = iocb->ki_filp; 328 struct file *realfile; 329 struct backing_file_ctx ctx = { 330 .cred = ovl_creds(file_inode(file)->i_sb), 331 .accessed = ovl_file_accessed, 332 }; 333 334 if (!iov_iter_count(iter)) 335 return 0; 336 337 realfile = ovl_real_file(file); 338 if (IS_ERR(realfile)) 339 return PTR_ERR(realfile); 340 341 return backing_file_read_iter(realfile, iter, iocb, iocb->ki_flags, 342 &ctx); 343 } 344 345 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) 346 { 347 struct file *file = iocb->ki_filp; 348 struct inode *inode = file_inode(file); 349 struct file *realfile; 350 ssize_t ret; 351 int ifl = iocb->ki_flags; 352 struct backing_file_ctx ctx = { 353 .cred = ovl_creds(inode->i_sb), 354 .end_write = ovl_file_end_write, 355 }; 356 357 if (!iov_iter_count(iter)) 358 return 0; 359 360 inode_lock(inode); 361 /* Update mode */ 362 ovl_copyattr(inode); 363 364 realfile = ovl_real_file(file); 365 ret = PTR_ERR(realfile); 366 if (IS_ERR(realfile)) 367 goto out_unlock; 368 369 if (!ovl_should_sync(OVL_FS(inode->i_sb))) 370 ifl &= ~(IOCB_DSYNC | IOCB_SYNC); 371 372 ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx); 373 374 out_unlock: 375 inode_unlock(inode); 376 377 return ret; 378 } 379 380 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, 381 struct pipe_inode_info *pipe, size_t len, 382 unsigned int flags) 383 { 384 struct file *realfile; 385 ssize_t ret; 386 struct backing_file_ctx ctx = { 387 .cred = ovl_creds(file_inode(in)->i_sb), 388 .accessed = ovl_file_accessed, 389 }; 390 struct kiocb iocb; 391 392 realfile = ovl_real_file(in); 393 if (IS_ERR(realfile)) 394 return PTR_ERR(realfile); 395 396 init_sync_kiocb(&iocb, in); 397 iocb.ki_pos = *ppos; 398 ret = backing_file_splice_read(realfile, &iocb, pipe, len, flags, &ctx); 399 *ppos = iocb.ki_pos; 400 401 return ret; 402 } 403 404 /* 405 * Calling iter_file_splice_write() directly from overlay's f_op may deadlock 406 * due to lock order inversion between pipe->mutex in iter_file_splice_write() 407 * and file_start_write(realfile) in ovl_write_iter(). 408 * 409 * So do everything ovl_write_iter() does and call iter_file_splice_write() on 410 * the real file. 411 */ 412 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, 413 loff_t *ppos, size_t len, unsigned int flags) 414 { 415 struct file *realfile; 416 struct inode *inode = file_inode(out); 417 ssize_t ret; 418 struct backing_file_ctx ctx = { 419 .cred = ovl_creds(inode->i_sb), 420 .end_write = ovl_file_end_write, 421 }; 422 struct kiocb iocb; 423 424 inode_lock(inode); 425 /* Update mode */ 426 ovl_copyattr(inode); 427 428 realfile = ovl_real_file(out); 429 ret = PTR_ERR(realfile); 430 if (IS_ERR(realfile)) 431 goto out_unlock; 432 433 init_sync_kiocb(&iocb, out); 434 iocb.ki_pos = *ppos; 435 ret = backing_file_splice_write(pipe, realfile, &iocb, len, flags, &ctx); 436 *ppos = iocb.ki_pos; 437 438 out_unlock: 439 inode_unlock(inode); 440 441 return ret; 442 } 443 444 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) 445 { 446 struct dentry *dentry = file_dentry(file); 447 enum ovl_path_type type; 448 struct path upperpath; 449 struct file *upperfile; 450 const struct cred *old_cred; 451 int ret; 452 453 ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); 454 if (ret <= 0) 455 return ret; 456 457 /* Don't sync lower file for fear of receiving EROFS error */ 458 type = ovl_path_type(dentry); 459 if (!OVL_TYPE_UPPER(type) || (datasync && OVL_TYPE_MERGE(type))) 460 return 0; 461 462 ovl_path_upper(dentry, &upperpath); 463 upperfile = ovl_real_file_path(file, &upperpath); 464 if (IS_ERR(upperfile)) 465 return PTR_ERR(upperfile); 466 467 old_cred = ovl_override_creds(file_inode(file)->i_sb); 468 ret = vfs_fsync_range(upperfile, start, end, datasync); 469 ovl_revert_creds(old_cred); 470 471 return ret; 472 } 473 474 static int ovl_mmap(struct file *file, struct vm_area_struct *vma) 475 { 476 struct ovl_file *of = file->private_data; 477 struct backing_file_ctx ctx = { 478 .cred = ovl_creds(file_inode(file)->i_sb), 479 .accessed = ovl_file_accessed, 480 }; 481 482 return backing_file_mmap(of->realfile, vma, &ctx); 483 } 484 485 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 486 { 487 struct inode *inode = file_inode(file); 488 struct file *realfile; 489 const struct cred *old_cred; 490 int ret; 491 492 inode_lock(inode); 493 /* Update mode */ 494 ovl_copyattr(inode); 495 ret = file_remove_privs(file); 496 if (ret) 497 goto out_unlock; 498 499 realfile = ovl_real_file(file); 500 ret = PTR_ERR(realfile); 501 if (IS_ERR(realfile)) 502 goto out_unlock; 503 504 old_cred = ovl_override_creds(file_inode(file)->i_sb); 505 ret = vfs_fallocate(realfile, mode, offset, len); 506 ovl_revert_creds(old_cred); 507 508 /* Update size */ 509 ovl_file_modified(file); 510 511 out_unlock: 512 inode_unlock(inode); 513 514 return ret; 515 } 516 517 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 518 { 519 struct file *realfile; 520 const struct cred *old_cred; 521 int ret; 522 523 realfile = ovl_real_file(file); 524 if (IS_ERR(realfile)) 525 return PTR_ERR(realfile); 526 527 old_cred = ovl_override_creds(file_inode(file)->i_sb); 528 ret = vfs_fadvise(realfile, offset, len, advice); 529 ovl_revert_creds(old_cred); 530 531 return ret; 532 } 533 534 enum ovl_copyop { 535 OVL_COPY, 536 OVL_CLONE, 537 OVL_DEDUPE, 538 }; 539 540 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, 541 struct file *file_out, loff_t pos_out, 542 loff_t len, unsigned int flags, enum ovl_copyop op) 543 { 544 struct inode *inode_out = file_inode(file_out); 545 struct file *realfile_in, *realfile_out; 546 const struct cred *old_cred; 547 loff_t ret; 548 549 inode_lock(inode_out); 550 if (op != OVL_DEDUPE) { 551 /* Update mode */ 552 ovl_copyattr(inode_out); 553 ret = file_remove_privs(file_out); 554 if (ret) 555 goto out_unlock; 556 } 557 558 realfile_out = ovl_real_file(file_out); 559 ret = PTR_ERR(realfile_out); 560 if (IS_ERR(realfile_out)) 561 goto out_unlock; 562 563 realfile_in = ovl_real_file(file_in); 564 ret = PTR_ERR(realfile_in); 565 if (IS_ERR(realfile_in)) 566 goto out_unlock; 567 568 old_cred = ovl_override_creds(file_inode(file_out)->i_sb); 569 switch (op) { 570 case OVL_COPY: 571 ret = vfs_copy_file_range(realfile_in, pos_in, 572 realfile_out, pos_out, len, flags); 573 break; 574 575 case OVL_CLONE: 576 ret = vfs_clone_file_range(realfile_in, pos_in, 577 realfile_out, pos_out, len, flags); 578 break; 579 580 case OVL_DEDUPE: 581 ret = vfs_dedupe_file_range_one(realfile_in, pos_in, 582 realfile_out, pos_out, len, 583 flags); 584 break; 585 } 586 ovl_revert_creds(old_cred); 587 588 /* Update size */ 589 ovl_file_modified(file_out); 590 591 out_unlock: 592 inode_unlock(inode_out); 593 594 return ret; 595 } 596 597 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, 598 struct file *file_out, loff_t pos_out, 599 size_t len, unsigned int flags) 600 { 601 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, 602 OVL_COPY); 603 } 604 605 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, 606 struct file *file_out, loff_t pos_out, 607 loff_t len, unsigned int remap_flags) 608 { 609 enum ovl_copyop op; 610 611 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 612 return -EINVAL; 613 614 if (remap_flags & REMAP_FILE_DEDUP) 615 op = OVL_DEDUPE; 616 else 617 op = OVL_CLONE; 618 619 /* 620 * Don't copy up because of a dedupe request, this wouldn't make sense 621 * most of the time (data would be duplicated instead of deduplicated). 622 */ 623 if (op == OVL_DEDUPE && 624 (!ovl_inode_upper(file_inode(file_in)) || 625 !ovl_inode_upper(file_inode(file_out)))) 626 return -EPERM; 627 628 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 629 remap_flags, op); 630 } 631 632 static int ovl_flush(struct file *file, fl_owner_t id) 633 { 634 struct file *realfile; 635 const struct cred *old_cred; 636 int err = 0; 637 638 realfile = ovl_real_file(file); 639 if (IS_ERR(realfile)) 640 return PTR_ERR(realfile); 641 642 if (realfile->f_op->flush) { 643 old_cred = ovl_override_creds(file_inode(file)->i_sb); 644 err = realfile->f_op->flush(realfile, id); 645 ovl_revert_creds(old_cred); 646 } 647 648 return err; 649 } 650 651 const struct file_operations ovl_file_operations = { 652 .open = ovl_open, 653 .release = ovl_release, 654 .llseek = ovl_llseek, 655 .read_iter = ovl_read_iter, 656 .write_iter = ovl_write_iter, 657 .fsync = ovl_fsync, 658 .mmap = ovl_mmap, 659 .fallocate = ovl_fallocate, 660 .fadvise = ovl_fadvise, 661 .flush = ovl_flush, 662 .splice_read = ovl_splice_read, 663 .splice_write = ovl_splice_write, 664 665 .copy_file_range = ovl_copy_file_range, 666 .remap_file_range = ovl_remap_file_range, 667 }; 668