1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017 Red Hat, Inc. 4 */ 5 6 #include <linux/cred.h> 7 #include <linux/file.h> 8 #include <linux/mount.h> 9 #include <linux/xattr.h> 10 #include <linux/uio.h> 11 #include <linux/uaccess.h> 12 #include <linux/security.h> 13 #include <linux/fs.h> 14 #include <linux/backing-file.h> 15 #include "overlayfs.h" 16 17 static char ovl_whatisit(struct inode *inode, struct inode *realinode) 18 { 19 if (realinode != ovl_inode_upper(inode)) 20 return 'l'; 21 if (ovl_has_upperdata(inode)) 22 return 'u'; 23 else 24 return 'm'; 25 } 26 27 static struct file *ovl_open_realfile(const struct file *file, 28 const struct path *realpath) 29 { 30 struct inode *realinode = d_inode(realpath->dentry); 31 struct inode *inode = file_inode(file); 32 struct mnt_idmap *real_idmap; 33 struct file *realfile; 34 int flags = file->f_flags | OVL_OPEN_FLAGS; 35 int acc_mode = ACC_MODE(flags); 36 int err; 37 38 if (flags & O_APPEND) 39 acc_mode |= MAY_APPEND; 40 41 with_ovl_creds(inode->i_sb) { 42 real_idmap = mnt_idmap(realpath->mnt); 43 err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode); 44 if (err) { 45 realfile = ERR_PTR(err); 46 } else { 47 if (!inode_owner_or_capable(real_idmap, realinode)) 48 flags &= ~O_NOATIME; 49 50 realfile = backing_file_open(file_user_path(file), 51 flags, realpath, current_cred()); 52 } 53 } 54 55 pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n", 56 file, file, ovl_whatisit(inode, realinode), file->f_flags, 57 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags); 58 59 return realfile; 60 } 61 62 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) 63 64 static int ovl_change_flags(struct file *file, unsigned int flags) 65 { 66 struct inode *inode = file_inode(file); 67 int err; 68 69 flags &= OVL_SETFL_MASK; 70 71 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode)) 72 return -EPERM; 73 74 if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT)) 75 return -EINVAL; 76 77 if (file->f_op->check_flags) { 78 err = file->f_op->check_flags(flags); 79 if (err) 80 return err; 81 } 82 83 spin_lock(&file->f_lock); 84 file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags; 85 file->f_iocb_flags = iocb_flags(file); 86 spin_unlock(&file->f_lock); 87 88 return 0; 89 } 90 91 struct ovl_file { 92 struct file *realfile; 93 struct file *upperfile; 94 }; 95 96 struct ovl_file *ovl_file_alloc(struct file *realfile) 97 { 98 struct ovl_file *of = kzalloc(sizeof(struct ovl_file), GFP_KERNEL); 99 100 if (unlikely(!of)) 101 return NULL; 102 103 of->realfile = realfile; 104 return of; 105 } 106 107 void ovl_file_free(struct ovl_file *of) 108 { 109 fput(of->realfile); 110 if (of->upperfile) 111 fput(of->upperfile); 112 kfree(of); 113 } 114 115 static bool ovl_is_real_file(const struct file *realfile, 116 const struct path *realpath) 117 { 118 return file_inode(realfile) == d_inode(realpath->dentry); 119 } 120 121 static struct file *ovl_real_file_path(const struct file *file, 122 const struct path *realpath) 123 { 124 struct ovl_file *of = file->private_data; 125 struct file *realfile = of->realfile; 126 127 if (WARN_ON_ONCE(!realpath->dentry)) 128 return ERR_PTR(-EIO); 129 130 /* 131 * If the realfile that we want is not where the data used to be at 132 * open time, either we'd been copied up, or it's an fsync of a 133 * metacopied file. We need the upperfile either way, so see if it 134 * is already opened and if it is not then open and store it. 135 */ 136 if (unlikely(!ovl_is_real_file(realfile, realpath))) { 137 struct file *upperfile = READ_ONCE(of->upperfile); 138 struct file *old; 139 140 if (!upperfile) { /* Nobody opened upperfile yet */ 141 upperfile = ovl_open_realfile(file, realpath); 142 if (IS_ERR(upperfile)) 143 return upperfile; 144 145 /* Store the upperfile for later */ 146 old = cmpxchg_release(&of->upperfile, NULL, upperfile); 147 if (old) { /* Someone opened upperfile before us */ 148 fput(upperfile); 149 upperfile = old; 150 } 151 } 152 /* 153 * Stored file must be from the right inode, unless someone's 154 * been corrupting the upper layer. 155 */ 156 if (WARN_ON_ONCE(!ovl_is_real_file(upperfile, realpath))) 157 return ERR_PTR(-EIO); 158 159 realfile = upperfile; 160 } 161 162 /* Did the flags change since open? */ 163 if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS)) { 164 int err = ovl_change_flags(realfile, file->f_flags); 165 166 if (err) 167 return ERR_PTR(err); 168 } 169 170 return realfile; 171 } 172 173 static struct file *ovl_real_file(const struct file *file) 174 { 175 struct dentry *dentry = file_dentry(file); 176 struct path realpath; 177 int err; 178 179 if (d_is_dir(dentry)) { 180 struct file *f = ovl_dir_real_file(file, false); 181 182 if (WARN_ON_ONCE(!f)) 183 return ERR_PTR(-EIO); 184 return f; 185 } 186 187 /* lazy lookup and verify of lowerdata */ 188 err = ovl_verify_lowerdata(dentry); 189 if (err) 190 return ERR_PTR(err); 191 192 ovl_path_realdata(dentry, &realpath); 193 194 return ovl_real_file_path(file, &realpath); 195 } 196 197 static int ovl_open(struct inode *inode, struct file *file) 198 { 199 struct dentry *dentry = file_dentry(file); 200 struct file *realfile; 201 struct path realpath; 202 struct ovl_file *of; 203 int err; 204 205 /* lazy lookup and verify lowerdata */ 206 err = ovl_verify_lowerdata(dentry); 207 if (err) 208 return err; 209 210 err = ovl_maybe_copy_up(dentry, file->f_flags); 211 if (err) 212 return err; 213 214 /* No longer need these flags, so don't pass them on to underlying fs */ 215 file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 216 217 ovl_path_realdata(dentry, &realpath); 218 if (!realpath.dentry) 219 return -EIO; 220 221 realfile = ovl_open_realfile(file, &realpath); 222 if (IS_ERR(realfile)) 223 return PTR_ERR(realfile); 224 225 of = ovl_file_alloc(realfile); 226 if (!of) { 227 fput(realfile); 228 return -ENOMEM; 229 } 230 231 file->private_data = of; 232 233 return 0; 234 } 235 236 static int ovl_release(struct inode *inode, struct file *file) 237 { 238 ovl_file_free(file->private_data); 239 return 0; 240 } 241 242 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) 243 { 244 struct inode *inode = file_inode(file); 245 struct file *realfile; 246 loff_t ret; 247 248 /* 249 * The two special cases below do not need to involve real fs, 250 * so we can optimizing concurrent callers. 251 */ 252 if (offset == 0) { 253 if (whence == SEEK_CUR) 254 return file->f_pos; 255 256 if (whence == SEEK_SET) 257 return vfs_setpos(file, 0, 0); 258 } 259 260 realfile = ovl_real_file(file); 261 if (IS_ERR(realfile)) 262 return PTR_ERR(realfile); 263 264 /* 265 * Overlay file f_pos is the master copy that is preserved 266 * through copy up and modified on read/write, but only real 267 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose 268 * limitations that are more strict than ->s_maxbytes for specific 269 * files, so we use the real file to perform seeks. 270 */ 271 ovl_inode_lock(inode); 272 realfile->f_pos = file->f_pos; 273 274 with_ovl_creds(inode->i_sb) 275 ret = vfs_llseek(realfile, offset, whence); 276 277 file->f_pos = realfile->f_pos; 278 ovl_inode_unlock(inode); 279 280 return ret; 281 } 282 283 static void ovl_file_modified(struct file *file) 284 { 285 /* Update size/mtime */ 286 ovl_copyattr(file_inode(file)); 287 } 288 289 static void ovl_file_end_write(struct kiocb *iocb, ssize_t ret) 290 { 291 ovl_file_modified(iocb->ki_filp); 292 } 293 294 static void ovl_file_accessed(struct file *file) 295 { 296 struct inode *inode, *upperinode; 297 struct timespec64 ctime, uctime; 298 struct timespec64 mtime, umtime; 299 300 if (file->f_flags & O_NOATIME) 301 return; 302 303 inode = file_inode(file); 304 upperinode = ovl_inode_upper(inode); 305 306 if (!upperinode) 307 return; 308 309 ctime = inode_get_ctime(inode); 310 uctime = inode_get_ctime(upperinode); 311 mtime = inode_get_mtime(inode); 312 umtime = inode_get_mtime(upperinode); 313 if ((!timespec64_equal(&mtime, &umtime)) || 314 !timespec64_equal(&ctime, &uctime)) { 315 inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode)); 316 inode_set_ctime_to_ts(inode, uctime); 317 } 318 319 touch_atime(&file->f_path); 320 } 321 322 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) 323 { 324 struct file *file = iocb->ki_filp; 325 struct file *realfile; 326 struct backing_file_ctx ctx = { 327 .cred = ovl_creds(file_inode(file)->i_sb), 328 .accessed = ovl_file_accessed, 329 }; 330 331 if (!iov_iter_count(iter)) 332 return 0; 333 334 realfile = ovl_real_file(file); 335 if (IS_ERR(realfile)) 336 return PTR_ERR(realfile); 337 338 return backing_file_read_iter(realfile, iter, iocb, iocb->ki_flags, 339 &ctx); 340 } 341 342 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) 343 { 344 struct file *file = iocb->ki_filp; 345 struct inode *inode = file_inode(file); 346 struct file *realfile; 347 ssize_t ret; 348 int ifl = iocb->ki_flags; 349 struct backing_file_ctx ctx = { 350 .cred = ovl_creds(inode->i_sb), 351 .end_write = ovl_file_end_write, 352 }; 353 354 if (!iov_iter_count(iter)) 355 return 0; 356 357 inode_lock(inode); 358 /* Update mode */ 359 ovl_copyattr(inode); 360 361 realfile = ovl_real_file(file); 362 ret = PTR_ERR(realfile); 363 if (IS_ERR(realfile)) 364 goto out_unlock; 365 366 if (!ovl_should_sync(OVL_FS(inode->i_sb))) 367 ifl &= ~(IOCB_DSYNC | IOCB_SYNC); 368 369 ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx); 370 371 out_unlock: 372 inode_unlock(inode); 373 374 return ret; 375 } 376 377 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, 378 struct pipe_inode_info *pipe, size_t len, 379 unsigned int flags) 380 { 381 struct file *realfile; 382 ssize_t ret; 383 struct backing_file_ctx ctx = { 384 .cred = ovl_creds(file_inode(in)->i_sb), 385 .accessed = ovl_file_accessed, 386 }; 387 struct kiocb iocb; 388 389 realfile = ovl_real_file(in); 390 if (IS_ERR(realfile)) 391 return PTR_ERR(realfile); 392 393 init_sync_kiocb(&iocb, in); 394 iocb.ki_pos = *ppos; 395 ret = backing_file_splice_read(realfile, &iocb, pipe, len, flags, &ctx); 396 *ppos = iocb.ki_pos; 397 398 return ret; 399 } 400 401 /* 402 * Calling iter_file_splice_write() directly from overlay's f_op may deadlock 403 * due to lock order inversion between pipe->mutex in iter_file_splice_write() 404 * and file_start_write(realfile) in ovl_write_iter(). 405 * 406 * So do everything ovl_write_iter() does and call iter_file_splice_write() on 407 * the real file. 408 */ 409 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, 410 loff_t *ppos, size_t len, unsigned int flags) 411 { 412 struct file *realfile; 413 struct inode *inode = file_inode(out); 414 ssize_t ret; 415 struct backing_file_ctx ctx = { 416 .cred = ovl_creds(inode->i_sb), 417 .end_write = ovl_file_end_write, 418 }; 419 struct kiocb iocb; 420 421 inode_lock(inode); 422 /* Update mode */ 423 ovl_copyattr(inode); 424 425 realfile = ovl_real_file(out); 426 ret = PTR_ERR(realfile); 427 if (IS_ERR(realfile)) 428 goto out_unlock; 429 430 init_sync_kiocb(&iocb, out); 431 iocb.ki_pos = *ppos; 432 ret = backing_file_splice_write(pipe, realfile, &iocb, len, flags, &ctx); 433 *ppos = iocb.ki_pos; 434 435 out_unlock: 436 inode_unlock(inode); 437 438 return ret; 439 } 440 441 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) 442 { 443 struct dentry *dentry = file_dentry(file); 444 enum ovl_path_type type; 445 struct path upperpath; 446 struct file *upperfile; 447 int ret; 448 449 ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); 450 if (ret <= 0) 451 return ret; 452 453 /* Don't sync lower file for fear of receiving EROFS error */ 454 type = ovl_path_type(dentry); 455 if (!OVL_TYPE_UPPER(type) || (datasync && OVL_TYPE_MERGE(type))) 456 return 0; 457 458 ovl_path_upper(dentry, &upperpath); 459 upperfile = ovl_real_file_path(file, &upperpath); 460 if (IS_ERR(upperfile)) 461 return PTR_ERR(upperfile); 462 463 with_ovl_creds(file_inode(file)->i_sb) 464 return vfs_fsync_range(upperfile, start, end, datasync); 465 } 466 467 static int ovl_mmap(struct file *file, struct vm_area_struct *vma) 468 { 469 struct ovl_file *of = file->private_data; 470 struct backing_file_ctx ctx = { 471 .cred = ovl_creds(file_inode(file)->i_sb), 472 .accessed = ovl_file_accessed, 473 }; 474 475 return backing_file_mmap(of->realfile, vma, &ctx); 476 } 477 478 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 479 { 480 struct inode *inode = file_inode(file); 481 struct file *realfile; 482 int ret; 483 484 inode_lock(inode); 485 /* Update mode */ 486 ovl_copyattr(inode); 487 ret = file_remove_privs(file); 488 if (ret) 489 goto out_unlock; 490 491 realfile = ovl_real_file(file); 492 ret = PTR_ERR(realfile); 493 if (IS_ERR(realfile)) 494 goto out_unlock; 495 496 with_ovl_creds(inode->i_sb) 497 ret = vfs_fallocate(realfile, mode, offset, len); 498 499 /* Update size */ 500 ovl_file_modified(file); 501 502 out_unlock: 503 inode_unlock(inode); 504 505 return ret; 506 } 507 508 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 509 { 510 struct file *realfile; 511 512 realfile = ovl_real_file(file); 513 if (IS_ERR(realfile)) 514 return PTR_ERR(realfile); 515 516 with_ovl_creds(file_inode(file)->i_sb) 517 return vfs_fadvise(realfile, offset, len, advice); 518 } 519 520 enum ovl_copyop { 521 OVL_COPY, 522 OVL_CLONE, 523 OVL_DEDUPE, 524 }; 525 526 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, 527 struct file *file_out, loff_t pos_out, 528 loff_t len, unsigned int flags, enum ovl_copyop op) 529 { 530 struct inode *inode_out = file_inode(file_out); 531 struct file *realfile_in, *realfile_out; 532 loff_t ret; 533 534 inode_lock(inode_out); 535 if (op != OVL_DEDUPE) { 536 /* Update mode */ 537 ovl_copyattr(inode_out); 538 ret = file_remove_privs(file_out); 539 if (ret) 540 goto out_unlock; 541 } 542 543 realfile_out = ovl_real_file(file_out); 544 ret = PTR_ERR(realfile_out); 545 if (IS_ERR(realfile_out)) 546 goto out_unlock; 547 548 realfile_in = ovl_real_file(file_in); 549 ret = PTR_ERR(realfile_in); 550 if (IS_ERR(realfile_in)) 551 goto out_unlock; 552 553 with_ovl_creds(file_inode(file_out)->i_sb) { 554 switch (op) { 555 case OVL_COPY: 556 ret = vfs_copy_file_range(realfile_in, pos_in, 557 realfile_out, pos_out, len, flags); 558 break; 559 560 case OVL_CLONE: 561 ret = vfs_clone_file_range(realfile_in, pos_in, 562 realfile_out, pos_out, len, flags); 563 break; 564 565 case OVL_DEDUPE: 566 ret = vfs_dedupe_file_range_one(realfile_in, pos_in, 567 realfile_out, pos_out, len, 568 flags); 569 break; 570 } 571 } 572 573 /* Update size */ 574 ovl_file_modified(file_out); 575 576 out_unlock: 577 inode_unlock(inode_out); 578 579 return ret; 580 } 581 582 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, 583 struct file *file_out, loff_t pos_out, 584 size_t len, unsigned int flags) 585 { 586 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, 587 OVL_COPY); 588 } 589 590 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, 591 struct file *file_out, loff_t pos_out, 592 loff_t len, unsigned int remap_flags) 593 { 594 enum ovl_copyop op; 595 596 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 597 return -EINVAL; 598 599 if (remap_flags & REMAP_FILE_DEDUP) 600 op = OVL_DEDUPE; 601 else 602 op = OVL_CLONE; 603 604 /* 605 * Don't copy up because of a dedupe request, this wouldn't make sense 606 * most of the time (data would be duplicated instead of deduplicated). 607 */ 608 if (op == OVL_DEDUPE && 609 (!ovl_inode_upper(file_inode(file_in)) || 610 !ovl_inode_upper(file_inode(file_out)))) 611 return -EPERM; 612 613 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 614 remap_flags, op); 615 } 616 617 static int ovl_flush(struct file *file, fl_owner_t id) 618 { 619 struct file *realfile; 620 int err = 0; 621 622 realfile = ovl_real_file(file); 623 if (IS_ERR(realfile)) 624 return PTR_ERR(realfile); 625 626 if (realfile->f_op->flush) { 627 with_ovl_creds(file_inode(file)->i_sb) 628 err = realfile->f_op->flush(realfile, id); 629 } 630 631 return err; 632 } 633 634 const struct file_operations ovl_file_operations = { 635 .open = ovl_open, 636 .release = ovl_release, 637 .llseek = ovl_llseek, 638 .read_iter = ovl_read_iter, 639 .write_iter = ovl_write_iter, 640 .fsync = ovl_fsync, 641 .mmap = ovl_mmap, 642 .fallocate = ovl_fallocate, 643 .fadvise = ovl_fadvise, 644 .flush = ovl_flush, 645 .splice_read = ovl_splice_read, 646 .splice_write = ovl_splice_write, 647 648 .copy_file_range = ovl_copy_file_range, 649 .remap_file_range = ovl_remap_file_range, 650 }; 651