1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017 Red Hat, Inc. 4 */ 5 6 #include <linux/cred.h> 7 #include <linux/file.h> 8 #include <linux/mount.h> 9 #include <linux/xattr.h> 10 #include <linux/uio.h> 11 #include <linux/uaccess.h> 12 #include <linux/security.h> 13 #include <linux/fs.h> 14 #include <linux/backing-file.h> 15 #include "overlayfs.h" 16 17 static char ovl_whatisit(struct inode *inode, struct inode *realinode) 18 { 19 if (realinode != ovl_inode_upper(inode)) 20 return 'l'; 21 if (ovl_has_upperdata(inode)) 22 return 'u'; 23 else 24 return 'm'; 25 } 26 27 static struct file *ovl_open_realfile(const struct file *file, 28 const struct path *realpath) 29 { 30 struct inode *realinode = d_inode(realpath->dentry); 31 struct inode *inode = file_inode(file); 32 struct mnt_idmap *real_idmap; 33 struct file *realfile; 34 const struct cred *old_cred; 35 int flags = file->f_flags | OVL_OPEN_FLAGS; 36 int acc_mode = ACC_MODE(flags); 37 int err; 38 39 if (flags & O_APPEND) 40 acc_mode |= MAY_APPEND; 41 42 old_cred = ovl_override_creds(inode->i_sb); 43 real_idmap = mnt_idmap(realpath->mnt); 44 err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode); 45 if (err) { 46 realfile = ERR_PTR(err); 47 } else { 48 if (!inode_owner_or_capable(real_idmap, realinode)) 49 flags &= ~O_NOATIME; 50 51 realfile = backing_file_open(&file->f_path, flags, realpath, 52 current_cred()); 53 } 54 ovl_revert_creds(old_cred); 55 56 pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n", 57 file, file, ovl_whatisit(inode, realinode), file->f_flags, 58 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags); 59 60 return realfile; 61 } 62 63 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) 64 65 static int ovl_change_flags(struct file *file, unsigned int flags) 66 { 67 struct inode *inode = file_inode(file); 68 int err; 69 70 flags &= OVL_SETFL_MASK; 71 72 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode)) 73 return -EPERM; 74 75 if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT)) 76 return -EINVAL; 77 78 if (file->f_op->check_flags) { 79 err = file->f_op->check_flags(flags); 80 if (err) 81 return err; 82 } 83 84 spin_lock(&file->f_lock); 85 file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags; 86 file->f_iocb_flags = iocb_flags(file); 87 spin_unlock(&file->f_lock); 88 89 return 0; 90 } 91 92 struct ovl_file { 93 struct file *realfile; 94 struct file *upperfile; 95 }; 96 97 struct ovl_file *ovl_file_alloc(struct file *realfile) 98 { 99 struct ovl_file *of = kzalloc(sizeof(struct ovl_file), GFP_KERNEL); 100 101 if (unlikely(!of)) 102 return NULL; 103 104 of->realfile = realfile; 105 return of; 106 } 107 108 void ovl_file_free(struct ovl_file *of) 109 { 110 fput(of->realfile); 111 if (of->upperfile) 112 fput(of->upperfile); 113 kfree(of); 114 } 115 116 static bool ovl_is_real_file(const struct file *realfile, 117 const struct path *realpath) 118 { 119 return file_inode(realfile) == d_inode(realpath->dentry); 120 } 121 122 static struct file *ovl_real_file_path(const struct file *file, 123 struct path *realpath) 124 { 125 struct ovl_file *of = file->private_data; 126 struct file *realfile = of->realfile; 127 128 if (WARN_ON_ONCE(!realpath->dentry)) 129 return ERR_PTR(-EIO); 130 131 /* 132 * If the realfile that we want is not where the data used to be at 133 * open time, either we'd been copied up, or it's an fsync of a 134 * metacopied file. We need the upperfile either way, so see if it 135 * is already opened and if it is not then open and store it. 136 */ 137 if (unlikely(!ovl_is_real_file(realfile, realpath))) { 138 struct file *upperfile = READ_ONCE(of->upperfile); 139 struct file *old; 140 141 if (!upperfile) { /* Nobody opened upperfile yet */ 142 upperfile = ovl_open_realfile(file, realpath); 143 if (IS_ERR(upperfile)) 144 return upperfile; 145 146 /* Store the upperfile for later */ 147 old = cmpxchg_release(&of->upperfile, NULL, upperfile); 148 if (old) { /* Someone opened upperfile before us */ 149 fput(upperfile); 150 upperfile = old; 151 } 152 } 153 /* 154 * Stored file must be from the right inode, unless someone's 155 * been corrupting the upper layer. 156 */ 157 if (WARN_ON_ONCE(!ovl_is_real_file(upperfile, realpath))) 158 return ERR_PTR(-EIO); 159 160 realfile = upperfile; 161 } 162 163 /* Did the flags change since open? */ 164 if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS)) { 165 int err = ovl_change_flags(realfile, file->f_flags); 166 167 if (err) 168 return ERR_PTR(err); 169 } 170 171 return realfile; 172 } 173 174 static struct file *ovl_real_file(const struct file *file) 175 { 176 struct dentry *dentry = file_dentry(file); 177 struct path realpath; 178 int err; 179 180 if (d_is_dir(dentry)) { 181 struct file *f = ovl_dir_real_file(file, false); 182 183 if (WARN_ON_ONCE(!f)) 184 return ERR_PTR(-EIO); 185 return f; 186 } 187 188 /* lazy lookup and verify of lowerdata */ 189 err = ovl_verify_lowerdata(dentry); 190 if (err) 191 return ERR_PTR(err); 192 193 ovl_path_realdata(dentry, &realpath); 194 195 return ovl_real_file_path(file, &realpath); 196 } 197 198 static int ovl_open(struct inode *inode, struct file *file) 199 { 200 struct dentry *dentry = file_dentry(file); 201 struct file *realfile; 202 struct path realpath; 203 struct ovl_file *of; 204 int err; 205 206 /* lazy lookup and verify lowerdata */ 207 err = ovl_verify_lowerdata(dentry); 208 if (err) 209 return err; 210 211 err = ovl_maybe_copy_up(dentry, file->f_flags); 212 if (err) 213 return err; 214 215 /* No longer need these flags, so don't pass them on to underlying fs */ 216 file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 217 218 ovl_path_realdata(dentry, &realpath); 219 if (!realpath.dentry) 220 return -EIO; 221 222 realfile = ovl_open_realfile(file, &realpath); 223 if (IS_ERR(realfile)) 224 return PTR_ERR(realfile); 225 226 of = ovl_file_alloc(realfile); 227 if (!of) { 228 fput(realfile); 229 return -ENOMEM; 230 } 231 232 file->private_data = of; 233 234 return 0; 235 } 236 237 static int ovl_release(struct inode *inode, struct file *file) 238 { 239 ovl_file_free(file->private_data); 240 return 0; 241 } 242 243 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) 244 { 245 struct inode *inode = file_inode(file); 246 struct file *realfile; 247 const struct cred *old_cred; 248 loff_t ret; 249 250 /* 251 * The two special cases below do not need to involve real fs, 252 * so we can optimizing concurrent callers. 253 */ 254 if (offset == 0) { 255 if (whence == SEEK_CUR) 256 return file->f_pos; 257 258 if (whence == SEEK_SET) 259 return vfs_setpos(file, 0, 0); 260 } 261 262 realfile = ovl_real_file(file); 263 if (IS_ERR(realfile)) 264 return PTR_ERR(realfile); 265 266 /* 267 * Overlay file f_pos is the master copy that is preserved 268 * through copy up and modified on read/write, but only real 269 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose 270 * limitations that are more strict than ->s_maxbytes for specific 271 * files, so we use the real file to perform seeks. 272 */ 273 ovl_inode_lock(inode); 274 realfile->f_pos = file->f_pos; 275 276 old_cred = ovl_override_creds(inode->i_sb); 277 ret = vfs_llseek(realfile, offset, whence); 278 ovl_revert_creds(old_cred); 279 280 file->f_pos = realfile->f_pos; 281 ovl_inode_unlock(inode); 282 283 return ret; 284 } 285 286 static void ovl_file_modified(struct file *file) 287 { 288 /* Update size/mtime */ 289 ovl_copyattr(file_inode(file)); 290 } 291 292 static void ovl_file_end_write(struct kiocb *iocb, ssize_t ret) 293 { 294 ovl_file_modified(iocb->ki_filp); 295 } 296 297 static void ovl_file_accessed(struct file *file) 298 { 299 struct inode *inode, *upperinode; 300 struct timespec64 ctime, uctime; 301 struct timespec64 mtime, umtime; 302 303 if (file->f_flags & O_NOATIME) 304 return; 305 306 inode = file_inode(file); 307 upperinode = ovl_inode_upper(inode); 308 309 if (!upperinode) 310 return; 311 312 ctime = inode_get_ctime(inode); 313 uctime = inode_get_ctime(upperinode); 314 mtime = inode_get_mtime(inode); 315 umtime = inode_get_mtime(upperinode); 316 if ((!timespec64_equal(&mtime, &umtime)) || 317 !timespec64_equal(&ctime, &uctime)) { 318 inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode)); 319 inode_set_ctime_to_ts(inode, uctime); 320 } 321 322 touch_atime(&file->f_path); 323 } 324 325 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) 326 { 327 struct file *file = iocb->ki_filp; 328 struct file *realfile; 329 struct backing_file_ctx ctx = { 330 .cred = ovl_creds(file_inode(file)->i_sb), 331 .accessed = ovl_file_accessed, 332 }; 333 334 if (!iov_iter_count(iter)) 335 return 0; 336 337 realfile = ovl_real_file(file); 338 if (IS_ERR(realfile)) 339 return PTR_ERR(realfile); 340 341 return backing_file_read_iter(realfile, iter, iocb, iocb->ki_flags, 342 &ctx); 343 } 344 345 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) 346 { 347 struct file *file = iocb->ki_filp; 348 struct inode *inode = file_inode(file); 349 struct file *realfile; 350 ssize_t ret; 351 int ifl = iocb->ki_flags; 352 struct backing_file_ctx ctx = { 353 .cred = ovl_creds(inode->i_sb), 354 .end_write = ovl_file_end_write, 355 }; 356 357 if (!iov_iter_count(iter)) 358 return 0; 359 360 inode_lock(inode); 361 /* Update mode */ 362 ovl_copyattr(inode); 363 364 realfile = ovl_real_file(file); 365 ret = PTR_ERR(realfile); 366 if (IS_ERR(realfile)) 367 goto out_unlock; 368 369 if (!ovl_should_sync(OVL_FS(inode->i_sb))) 370 ifl &= ~(IOCB_DSYNC | IOCB_SYNC); 371 372 /* 373 * Overlayfs doesn't support deferred completions, don't copy 374 * this property in case it is set by the issuer. 375 */ 376 ifl &= ~IOCB_DIO_CALLER_COMP; 377 ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx); 378 379 out_unlock: 380 inode_unlock(inode); 381 382 return ret; 383 } 384 385 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, 386 struct pipe_inode_info *pipe, size_t len, 387 unsigned int flags) 388 { 389 struct file *realfile; 390 ssize_t ret; 391 struct backing_file_ctx ctx = { 392 .cred = ovl_creds(file_inode(in)->i_sb), 393 .accessed = ovl_file_accessed, 394 }; 395 struct kiocb iocb; 396 397 realfile = ovl_real_file(in); 398 if (IS_ERR(realfile)) 399 return PTR_ERR(realfile); 400 401 init_sync_kiocb(&iocb, in); 402 iocb.ki_pos = *ppos; 403 ret = backing_file_splice_read(realfile, &iocb, pipe, len, flags, &ctx); 404 *ppos = iocb.ki_pos; 405 406 return ret; 407 } 408 409 /* 410 * Calling iter_file_splice_write() directly from overlay's f_op may deadlock 411 * due to lock order inversion between pipe->mutex in iter_file_splice_write() 412 * and file_start_write(realfile) in ovl_write_iter(). 413 * 414 * So do everything ovl_write_iter() does and call iter_file_splice_write() on 415 * the real file. 416 */ 417 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, 418 loff_t *ppos, size_t len, unsigned int flags) 419 { 420 struct file *realfile; 421 struct inode *inode = file_inode(out); 422 ssize_t ret; 423 struct backing_file_ctx ctx = { 424 .cred = ovl_creds(inode->i_sb), 425 .end_write = ovl_file_end_write, 426 }; 427 struct kiocb iocb; 428 429 inode_lock(inode); 430 /* Update mode */ 431 ovl_copyattr(inode); 432 433 realfile = ovl_real_file(out); 434 ret = PTR_ERR(realfile); 435 if (IS_ERR(realfile)) 436 goto out_unlock; 437 438 init_sync_kiocb(&iocb, out); 439 iocb.ki_pos = *ppos; 440 ret = backing_file_splice_write(pipe, realfile, &iocb, len, flags, &ctx); 441 *ppos = iocb.ki_pos; 442 443 out_unlock: 444 inode_unlock(inode); 445 446 return ret; 447 } 448 449 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) 450 { 451 struct dentry *dentry = file_dentry(file); 452 enum ovl_path_type type; 453 struct path upperpath; 454 struct file *upperfile; 455 const struct cred *old_cred; 456 int ret; 457 458 ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); 459 if (ret <= 0) 460 return ret; 461 462 /* Don't sync lower file for fear of receiving EROFS error */ 463 type = ovl_path_type(dentry); 464 if (!OVL_TYPE_UPPER(type) || (datasync && OVL_TYPE_MERGE(type))) 465 return 0; 466 467 ovl_path_upper(dentry, &upperpath); 468 upperfile = ovl_real_file_path(file, &upperpath); 469 if (IS_ERR(upperfile)) 470 return PTR_ERR(upperfile); 471 472 old_cred = ovl_override_creds(file_inode(file)->i_sb); 473 ret = vfs_fsync_range(upperfile, start, end, datasync); 474 ovl_revert_creds(old_cred); 475 476 return ret; 477 } 478 479 static int ovl_mmap(struct file *file, struct vm_area_struct *vma) 480 { 481 struct ovl_file *of = file->private_data; 482 struct backing_file_ctx ctx = { 483 .cred = ovl_creds(file_inode(file)->i_sb), 484 .accessed = ovl_file_accessed, 485 }; 486 487 return backing_file_mmap(of->realfile, vma, &ctx); 488 } 489 490 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 491 { 492 struct inode *inode = file_inode(file); 493 struct file *realfile; 494 const struct cred *old_cred; 495 int ret; 496 497 inode_lock(inode); 498 /* Update mode */ 499 ovl_copyattr(inode); 500 ret = file_remove_privs(file); 501 if (ret) 502 goto out_unlock; 503 504 realfile = ovl_real_file(file); 505 ret = PTR_ERR(realfile); 506 if (IS_ERR(realfile)) 507 goto out_unlock; 508 509 old_cred = ovl_override_creds(file_inode(file)->i_sb); 510 ret = vfs_fallocate(realfile, mode, offset, len); 511 ovl_revert_creds(old_cred); 512 513 /* Update size */ 514 ovl_file_modified(file); 515 516 out_unlock: 517 inode_unlock(inode); 518 519 return ret; 520 } 521 522 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 523 { 524 struct file *realfile; 525 const struct cred *old_cred; 526 int ret; 527 528 realfile = ovl_real_file(file); 529 if (IS_ERR(realfile)) 530 return PTR_ERR(realfile); 531 532 old_cred = ovl_override_creds(file_inode(file)->i_sb); 533 ret = vfs_fadvise(realfile, offset, len, advice); 534 ovl_revert_creds(old_cred); 535 536 return ret; 537 } 538 539 enum ovl_copyop { 540 OVL_COPY, 541 OVL_CLONE, 542 OVL_DEDUPE, 543 }; 544 545 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, 546 struct file *file_out, loff_t pos_out, 547 loff_t len, unsigned int flags, enum ovl_copyop op) 548 { 549 struct inode *inode_out = file_inode(file_out); 550 struct file *realfile_in, *realfile_out; 551 const struct cred *old_cred; 552 loff_t ret; 553 554 inode_lock(inode_out); 555 if (op != OVL_DEDUPE) { 556 /* Update mode */ 557 ovl_copyattr(inode_out); 558 ret = file_remove_privs(file_out); 559 if (ret) 560 goto out_unlock; 561 } 562 563 realfile_out = ovl_real_file(file_out); 564 ret = PTR_ERR(realfile_out); 565 if (IS_ERR(realfile_out)) 566 goto out_unlock; 567 568 realfile_in = ovl_real_file(file_in); 569 ret = PTR_ERR(realfile_in); 570 if (IS_ERR(realfile_in)) 571 goto out_unlock; 572 573 old_cred = ovl_override_creds(file_inode(file_out)->i_sb); 574 switch (op) { 575 case OVL_COPY: 576 ret = vfs_copy_file_range(realfile_in, pos_in, 577 realfile_out, pos_out, len, flags); 578 break; 579 580 case OVL_CLONE: 581 ret = vfs_clone_file_range(realfile_in, pos_in, 582 realfile_out, pos_out, len, flags); 583 break; 584 585 case OVL_DEDUPE: 586 ret = vfs_dedupe_file_range_one(realfile_in, pos_in, 587 realfile_out, pos_out, len, 588 flags); 589 break; 590 } 591 ovl_revert_creds(old_cred); 592 593 /* Update size */ 594 ovl_file_modified(file_out); 595 596 out_unlock: 597 inode_unlock(inode_out); 598 599 return ret; 600 } 601 602 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, 603 struct file *file_out, loff_t pos_out, 604 size_t len, unsigned int flags) 605 { 606 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, 607 OVL_COPY); 608 } 609 610 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, 611 struct file *file_out, loff_t pos_out, 612 loff_t len, unsigned int remap_flags) 613 { 614 enum ovl_copyop op; 615 616 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 617 return -EINVAL; 618 619 if (remap_flags & REMAP_FILE_DEDUP) 620 op = OVL_DEDUPE; 621 else 622 op = OVL_CLONE; 623 624 /* 625 * Don't copy up because of a dedupe request, this wouldn't make sense 626 * most of the time (data would be duplicated instead of deduplicated). 627 */ 628 if (op == OVL_DEDUPE && 629 (!ovl_inode_upper(file_inode(file_in)) || 630 !ovl_inode_upper(file_inode(file_out)))) 631 return -EPERM; 632 633 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 634 remap_flags, op); 635 } 636 637 static int ovl_flush(struct file *file, fl_owner_t id) 638 { 639 struct file *realfile; 640 const struct cred *old_cred; 641 int err = 0; 642 643 realfile = ovl_real_file(file); 644 if (IS_ERR(realfile)) 645 return PTR_ERR(realfile); 646 647 if (realfile->f_op->flush) { 648 old_cred = ovl_override_creds(file_inode(file)->i_sb); 649 err = realfile->f_op->flush(realfile, id); 650 ovl_revert_creds(old_cred); 651 } 652 653 return err; 654 } 655 656 const struct file_operations ovl_file_operations = { 657 .open = ovl_open, 658 .release = ovl_release, 659 .llseek = ovl_llseek, 660 .read_iter = ovl_read_iter, 661 .write_iter = ovl_write_iter, 662 .fsync = ovl_fsync, 663 .mmap = ovl_mmap, 664 .fallocate = ovl_fallocate, 665 .fadvise = ovl_fadvise, 666 .flush = ovl_flush, 667 .splice_read = ovl_splice_read, 668 .splice_write = ovl_splice_write, 669 670 .copy_file_range = ovl_copy_file_range, 671 .remap_file_range = ovl_remap_file_range, 672 }; 673