1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017 Red Hat, Inc. 4 */ 5 6 #include <linux/cred.h> 7 #include <linux/file.h> 8 #include <linux/filelock.h> 9 #include <linux/mount.h> 10 #include <linux/xattr.h> 11 #include <linux/uio.h> 12 #include <linux/uaccess.h> 13 #include <linux/security.h> 14 #include <linux/fs.h> 15 #include <linux/backing-file.h> 16 #include "overlayfs.h" 17 18 static char ovl_whatisit(struct inode *inode, struct inode *realinode) 19 { 20 if (realinode != ovl_inode_upper(inode)) 21 return 'l'; 22 if (ovl_has_upperdata(inode)) 23 return 'u'; 24 else 25 return 'm'; 26 } 27 28 static struct file *ovl_open_realfile(const struct file *file, 29 const struct path *realpath) 30 { 31 struct inode *realinode = d_inode(realpath->dentry); 32 struct inode *inode = file_inode(file); 33 struct mnt_idmap *real_idmap; 34 struct file *realfile; 35 int flags = file->f_flags | OVL_OPEN_FLAGS; 36 int acc_mode = ACC_MODE(flags); 37 int err; 38 39 if (flags & O_APPEND) 40 acc_mode |= MAY_APPEND; 41 42 with_ovl_creds(inode->i_sb) { 43 real_idmap = mnt_idmap(realpath->mnt); 44 err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode); 45 if (err) { 46 realfile = ERR_PTR(err); 47 } else { 48 if (!inode_owner_or_capable(real_idmap, realinode)) 49 flags &= ~O_NOATIME; 50 51 realfile = backing_file_open(file_user_path(file), 52 flags, realpath, current_cred()); 53 } 54 } 55 56 pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n", 57 file, file, ovl_whatisit(inode, realinode), file->f_flags, 58 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags); 59 60 return realfile; 61 } 62 63 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) 64 65 static int ovl_change_flags(struct file *file, unsigned int flags) 66 { 67 struct inode *inode = file_inode(file); 68 int err; 69 70 flags &= OVL_SETFL_MASK; 71 72 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode)) 73 return -EPERM; 74 75 if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT)) 76 return -EINVAL; 77 78 if (file->f_op->check_flags) { 79 err = file->f_op->check_flags(flags); 80 if (err) 81 return err; 82 } 83 84 spin_lock(&file->f_lock); 85 file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags; 86 file->f_iocb_flags = iocb_flags(file); 87 spin_unlock(&file->f_lock); 88 89 return 0; 90 } 91 92 struct ovl_file { 93 struct file *realfile; 94 struct file *upperfile; 95 }; 96 97 struct ovl_file *ovl_file_alloc(struct file *realfile) 98 { 99 struct ovl_file *of = kzalloc(sizeof(struct ovl_file), GFP_KERNEL); 100 101 if (unlikely(!of)) 102 return NULL; 103 104 of->realfile = realfile; 105 return of; 106 } 107 108 void ovl_file_free(struct ovl_file *of) 109 { 110 fput(of->realfile); 111 if (of->upperfile) 112 fput(of->upperfile); 113 kfree(of); 114 } 115 116 static bool ovl_is_real_file(const struct file *realfile, 117 const struct path *realpath) 118 { 119 return file_inode(realfile) == d_inode(realpath->dentry); 120 } 121 122 static struct file *ovl_real_file_path(const struct file *file, 123 const struct path *realpath) 124 { 125 struct ovl_file *of = file->private_data; 126 struct file *realfile = of->realfile; 127 128 if (WARN_ON_ONCE(!realpath->dentry)) 129 return ERR_PTR(-EIO); 130 131 /* 132 * If the realfile that we want is not where the data used to be at 133 * open time, either we'd been copied up, or it's an fsync of a 134 * metacopied file. We need the upperfile either way, so see if it 135 * is already opened and if it is not then open and store it. 136 */ 137 if (unlikely(!ovl_is_real_file(realfile, realpath))) { 138 struct file *upperfile = READ_ONCE(of->upperfile); 139 struct file *old; 140 141 if (!upperfile) { /* Nobody opened upperfile yet */ 142 upperfile = ovl_open_realfile(file, realpath); 143 if (IS_ERR(upperfile)) 144 return upperfile; 145 146 /* Store the upperfile for later */ 147 old = cmpxchg_release(&of->upperfile, NULL, upperfile); 148 if (old) { /* Someone opened upperfile before us */ 149 fput(upperfile); 150 upperfile = old; 151 } 152 } 153 /* 154 * Stored file must be from the right inode, unless someone's 155 * been corrupting the upper layer. 156 */ 157 if (WARN_ON_ONCE(!ovl_is_real_file(upperfile, realpath))) 158 return ERR_PTR(-EIO); 159 160 realfile = upperfile; 161 } 162 163 /* Did the flags change since open? */ 164 if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS)) { 165 int err = ovl_change_flags(realfile, file->f_flags); 166 167 if (err) 168 return ERR_PTR(err); 169 } 170 171 return realfile; 172 } 173 174 static struct file *ovl_real_file(const struct file *file) 175 { 176 struct dentry *dentry = file_dentry(file); 177 struct path realpath; 178 int err; 179 180 if (d_is_dir(dentry)) { 181 struct file *f = ovl_dir_real_file(file, false); 182 183 if (WARN_ON_ONCE(!f)) 184 return ERR_PTR(-EIO); 185 return f; 186 } 187 188 /* lazy lookup and verify of lowerdata */ 189 err = ovl_verify_lowerdata(dentry); 190 if (err) 191 return ERR_PTR(err); 192 193 ovl_path_realdata(dentry, &realpath); 194 195 return ovl_real_file_path(file, &realpath); 196 } 197 198 static int ovl_open(struct inode *inode, struct file *file) 199 { 200 struct dentry *dentry = file_dentry(file); 201 struct file *realfile; 202 struct path realpath; 203 struct ovl_file *of; 204 int err; 205 206 /* lazy lookup and verify lowerdata */ 207 err = ovl_verify_lowerdata(dentry); 208 if (err) 209 return err; 210 211 err = ovl_maybe_copy_up(dentry, file->f_flags); 212 if (err) 213 return err; 214 215 /* No longer need these flags, so don't pass them on to underlying fs */ 216 file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 217 218 ovl_path_realdata(dentry, &realpath); 219 if (!realpath.dentry) 220 return -EIO; 221 222 realfile = ovl_open_realfile(file, &realpath); 223 if (IS_ERR(realfile)) 224 return PTR_ERR(realfile); 225 226 of = ovl_file_alloc(realfile); 227 if (!of) { 228 fput(realfile); 229 return -ENOMEM; 230 } 231 232 file->private_data = of; 233 234 return 0; 235 } 236 237 static int ovl_release(struct inode *inode, struct file *file) 238 { 239 ovl_file_free(file->private_data); 240 return 0; 241 } 242 243 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) 244 { 245 struct inode *inode = file_inode(file); 246 struct file *realfile; 247 loff_t ret; 248 249 /* 250 * The two special cases below do not need to involve real fs, 251 * so we can optimizing concurrent callers. 252 */ 253 if (offset == 0) { 254 if (whence == SEEK_CUR) 255 return file->f_pos; 256 257 if (whence == SEEK_SET) 258 return vfs_setpos(file, 0, 0); 259 } 260 261 realfile = ovl_real_file(file); 262 if (IS_ERR(realfile)) 263 return PTR_ERR(realfile); 264 265 /* 266 * Overlay file f_pos is the master copy that is preserved 267 * through copy up and modified on read/write, but only real 268 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose 269 * limitations that are more strict than ->s_maxbytes for specific 270 * files, so we use the real file to perform seeks. 271 */ 272 ovl_inode_lock(inode); 273 realfile->f_pos = file->f_pos; 274 275 with_ovl_creds(inode->i_sb) 276 ret = vfs_llseek(realfile, offset, whence); 277 278 file->f_pos = realfile->f_pos; 279 ovl_inode_unlock(inode); 280 281 return ret; 282 } 283 284 static void ovl_file_modified(struct file *file) 285 { 286 /* Update size/mtime */ 287 ovl_copyattr(file_inode(file)); 288 } 289 290 static void ovl_file_end_write(struct kiocb *iocb, ssize_t ret) 291 { 292 ovl_file_modified(iocb->ki_filp); 293 } 294 295 static void ovl_file_accessed(struct file *file) 296 { 297 struct inode *inode, *upperinode; 298 struct timespec64 ctime, uctime; 299 struct timespec64 mtime, umtime; 300 301 if (file->f_flags & O_NOATIME) 302 return; 303 304 inode = file_inode(file); 305 upperinode = ovl_inode_upper(inode); 306 307 if (!upperinode) 308 return; 309 310 ctime = inode_get_ctime(inode); 311 uctime = inode_get_ctime(upperinode); 312 mtime = inode_get_mtime(inode); 313 umtime = inode_get_mtime(upperinode); 314 if ((!timespec64_equal(&mtime, &umtime)) || 315 !timespec64_equal(&ctime, &uctime)) { 316 inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode)); 317 inode_set_ctime_to_ts(inode, uctime); 318 } 319 320 touch_atime(&file->f_path); 321 } 322 323 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) 324 { 325 struct file *file = iocb->ki_filp; 326 struct file *realfile; 327 struct backing_file_ctx ctx = { 328 .cred = ovl_creds(file_inode(file)->i_sb), 329 .accessed = ovl_file_accessed, 330 }; 331 332 if (!iov_iter_count(iter)) 333 return 0; 334 335 realfile = ovl_real_file(file); 336 if (IS_ERR(realfile)) 337 return PTR_ERR(realfile); 338 339 return backing_file_read_iter(realfile, iter, iocb, iocb->ki_flags, 340 &ctx); 341 } 342 343 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) 344 { 345 struct file *file = iocb->ki_filp; 346 struct inode *inode = file_inode(file); 347 struct file *realfile; 348 ssize_t ret; 349 int ifl = iocb->ki_flags; 350 struct backing_file_ctx ctx = { 351 .cred = ovl_creds(inode->i_sb), 352 .end_write = ovl_file_end_write, 353 }; 354 355 if (!iov_iter_count(iter)) 356 return 0; 357 358 inode_lock(inode); 359 /* Update mode */ 360 ovl_copyattr(inode); 361 362 realfile = ovl_real_file(file); 363 ret = PTR_ERR(realfile); 364 if (IS_ERR(realfile)) 365 goto out_unlock; 366 367 if (!ovl_should_sync(OVL_FS(inode->i_sb))) 368 ifl &= ~(IOCB_DSYNC | IOCB_SYNC); 369 370 ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx); 371 372 out_unlock: 373 inode_unlock(inode); 374 375 return ret; 376 } 377 378 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, 379 struct pipe_inode_info *pipe, size_t len, 380 unsigned int flags) 381 { 382 struct file *realfile; 383 ssize_t ret; 384 struct backing_file_ctx ctx = { 385 .cred = ovl_creds(file_inode(in)->i_sb), 386 .accessed = ovl_file_accessed, 387 }; 388 struct kiocb iocb; 389 390 realfile = ovl_real_file(in); 391 if (IS_ERR(realfile)) 392 return PTR_ERR(realfile); 393 394 init_sync_kiocb(&iocb, in); 395 iocb.ki_pos = *ppos; 396 ret = backing_file_splice_read(realfile, &iocb, pipe, len, flags, &ctx); 397 *ppos = iocb.ki_pos; 398 399 return ret; 400 } 401 402 /* 403 * Calling iter_file_splice_write() directly from overlay's f_op may deadlock 404 * due to lock order inversion between pipe->mutex in iter_file_splice_write() 405 * and file_start_write(realfile) in ovl_write_iter(). 406 * 407 * So do everything ovl_write_iter() does and call iter_file_splice_write() on 408 * the real file. 409 */ 410 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, 411 loff_t *ppos, size_t len, unsigned int flags) 412 { 413 struct file *realfile; 414 struct inode *inode = file_inode(out); 415 ssize_t ret; 416 struct backing_file_ctx ctx = { 417 .cred = ovl_creds(inode->i_sb), 418 .end_write = ovl_file_end_write, 419 }; 420 struct kiocb iocb; 421 422 inode_lock(inode); 423 /* Update mode */ 424 ovl_copyattr(inode); 425 426 realfile = ovl_real_file(out); 427 ret = PTR_ERR(realfile); 428 if (IS_ERR(realfile)) 429 goto out_unlock; 430 431 init_sync_kiocb(&iocb, out); 432 iocb.ki_pos = *ppos; 433 ret = backing_file_splice_write(pipe, realfile, &iocb, len, flags, &ctx); 434 *ppos = iocb.ki_pos; 435 436 out_unlock: 437 inode_unlock(inode); 438 439 return ret; 440 } 441 442 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) 443 { 444 struct dentry *dentry = file_dentry(file); 445 enum ovl_path_type type; 446 struct path upperpath; 447 struct file *upperfile; 448 int ret; 449 450 ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); 451 if (ret <= 0) 452 return ret; 453 454 /* Don't sync lower file for fear of receiving EROFS error */ 455 type = ovl_path_type(dentry); 456 if (!OVL_TYPE_UPPER(type) || (datasync && OVL_TYPE_MERGE(type))) 457 return 0; 458 459 ovl_path_upper(dentry, &upperpath); 460 upperfile = ovl_real_file_path(file, &upperpath); 461 if (IS_ERR(upperfile)) 462 return PTR_ERR(upperfile); 463 464 with_ovl_creds(file_inode(file)->i_sb) 465 return vfs_fsync_range(upperfile, start, end, datasync); 466 } 467 468 static int ovl_mmap(struct file *file, struct vm_area_struct *vma) 469 { 470 struct ovl_file *of = file->private_data; 471 struct backing_file_ctx ctx = { 472 .cred = ovl_creds(file_inode(file)->i_sb), 473 .accessed = ovl_file_accessed, 474 }; 475 476 return backing_file_mmap(of->realfile, vma, &ctx); 477 } 478 479 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 480 { 481 struct inode *inode = file_inode(file); 482 struct file *realfile; 483 int ret; 484 485 inode_lock(inode); 486 /* Update mode */ 487 ovl_copyattr(inode); 488 ret = file_remove_privs(file); 489 if (ret) 490 goto out_unlock; 491 492 realfile = ovl_real_file(file); 493 ret = PTR_ERR(realfile); 494 if (IS_ERR(realfile)) 495 goto out_unlock; 496 497 with_ovl_creds(inode->i_sb) 498 ret = vfs_fallocate(realfile, mode, offset, len); 499 500 /* Update size */ 501 ovl_file_modified(file); 502 503 out_unlock: 504 inode_unlock(inode); 505 506 return ret; 507 } 508 509 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 510 { 511 struct file *realfile; 512 513 realfile = ovl_real_file(file); 514 if (IS_ERR(realfile)) 515 return PTR_ERR(realfile); 516 517 with_ovl_creds(file_inode(file)->i_sb) 518 return vfs_fadvise(realfile, offset, len, advice); 519 } 520 521 enum ovl_copyop { 522 OVL_COPY, 523 OVL_CLONE, 524 OVL_DEDUPE, 525 }; 526 527 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, 528 struct file *file_out, loff_t pos_out, 529 loff_t len, unsigned int flags, enum ovl_copyop op) 530 { 531 struct inode *inode_out = file_inode(file_out); 532 struct file *realfile_in, *realfile_out; 533 loff_t ret; 534 535 inode_lock(inode_out); 536 if (op != OVL_DEDUPE) { 537 /* Update mode */ 538 ovl_copyattr(inode_out); 539 ret = file_remove_privs(file_out); 540 if (ret) 541 goto out_unlock; 542 } 543 544 realfile_out = ovl_real_file(file_out); 545 ret = PTR_ERR(realfile_out); 546 if (IS_ERR(realfile_out)) 547 goto out_unlock; 548 549 realfile_in = ovl_real_file(file_in); 550 ret = PTR_ERR(realfile_in); 551 if (IS_ERR(realfile_in)) 552 goto out_unlock; 553 554 with_ovl_creds(file_inode(file_out)->i_sb) { 555 switch (op) { 556 case OVL_COPY: 557 ret = vfs_copy_file_range(realfile_in, pos_in, 558 realfile_out, pos_out, len, flags); 559 break; 560 561 case OVL_CLONE: 562 ret = vfs_clone_file_range(realfile_in, pos_in, 563 realfile_out, pos_out, len, flags); 564 break; 565 566 case OVL_DEDUPE: 567 ret = vfs_dedupe_file_range_one(realfile_in, pos_in, 568 realfile_out, pos_out, len, 569 flags); 570 break; 571 } 572 } 573 574 /* Update size */ 575 ovl_file_modified(file_out); 576 577 out_unlock: 578 inode_unlock(inode_out); 579 580 return ret; 581 } 582 583 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, 584 struct file *file_out, loff_t pos_out, 585 size_t len, unsigned int flags) 586 { 587 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, 588 OVL_COPY); 589 } 590 591 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, 592 struct file *file_out, loff_t pos_out, 593 loff_t len, unsigned int remap_flags) 594 { 595 enum ovl_copyop op; 596 597 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 598 return -EINVAL; 599 600 if (remap_flags & REMAP_FILE_DEDUP) 601 op = OVL_DEDUPE; 602 else 603 op = OVL_CLONE; 604 605 /* 606 * Don't copy up because of a dedupe request, this wouldn't make sense 607 * most of the time (data would be duplicated instead of deduplicated). 608 */ 609 if (op == OVL_DEDUPE && 610 (!ovl_inode_upper(file_inode(file_in)) || 611 !ovl_inode_upper(file_inode(file_out)))) 612 return -EPERM; 613 614 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 615 remap_flags, op); 616 } 617 618 static int ovl_flush(struct file *file, fl_owner_t id) 619 { 620 struct file *realfile; 621 int err = 0; 622 623 realfile = ovl_real_file(file); 624 if (IS_ERR(realfile)) 625 return PTR_ERR(realfile); 626 627 if (realfile->f_op->flush) { 628 with_ovl_creds(file_inode(file)->i_sb) 629 err = realfile->f_op->flush(realfile, id); 630 } 631 632 return err; 633 } 634 635 const struct file_operations ovl_file_operations = { 636 .open = ovl_open, 637 .release = ovl_release, 638 .llseek = ovl_llseek, 639 .read_iter = ovl_read_iter, 640 .write_iter = ovl_write_iter, 641 .fsync = ovl_fsync, 642 .mmap = ovl_mmap, 643 .fallocate = ovl_fallocate, 644 .fadvise = ovl_fadvise, 645 .flush = ovl_flush, 646 .splice_read = ovl_splice_read, 647 .splice_write = ovl_splice_write, 648 649 .copy_file_range = ovl_copy_file_range, 650 .remap_file_range = ovl_remap_file_range, 651 .setlease = generic_setlease, 652 }; 653