1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <uapi/linux/magic.h> 8 #include <linux/fs.h> 9 #include <linux/namei.h> 10 #include <linux/xattr.h> 11 #include <linux/mount.h> 12 #include <linux/parser.h> 13 #include <linux/module.h> 14 #include <linux/statfs.h> 15 #include <linux/seq_file.h> 16 #include <linux/posix_acl_xattr.h> 17 #include <linux/exportfs.h> 18 #include "overlayfs.h" 19 20 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 21 MODULE_DESCRIPTION("Overlay filesystem"); 22 MODULE_LICENSE("GPL"); 23 24 25 struct ovl_dir_cache; 26 27 #define OVL_MAX_STACK 500 28 29 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 30 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 31 MODULE_PARM_DESC(redirect_dir, 32 "Default to on or off for the redirect_dir feature"); 33 34 static bool ovl_redirect_always_follow = 35 IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW); 36 module_param_named(redirect_always_follow, ovl_redirect_always_follow, 37 bool, 0644); 38 MODULE_PARM_DESC(redirect_always_follow, 39 "Follow redirects even if redirect_dir feature is turned off"); 40 41 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 42 module_param_named(index, ovl_index_def, bool, 0644); 43 MODULE_PARM_DESC(index, 44 "Default to on or off for the inodes index feature"); 45 46 static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); 47 module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); 48 MODULE_PARM_DESC(nfs_export, 49 "Default to on or off for the NFS export feature"); 50 51 static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO); 52 module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644); 53 MODULE_PARM_DESC(xino_auto, 54 "Auto enable xino feature"); 55 56 static void ovl_entry_stack_free(struct ovl_entry *oe) 57 { 58 unsigned int i; 59 60 for (i = 0; i < oe->numlower; i++) 61 dput(oe->lowerstack[i].dentry); 62 } 63 64 static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY); 65 module_param_named(metacopy, ovl_metacopy_def, bool, 0644); 66 MODULE_PARM_DESC(metacopy, 67 "Default to on or off for the metadata only copy up feature"); 68 69 static void ovl_dentry_release(struct dentry *dentry) 70 { 71 struct ovl_entry *oe = dentry->d_fsdata; 72 73 if (oe) { 74 ovl_entry_stack_free(oe); 75 kfree_rcu(oe, rcu); 76 } 77 } 78 79 static struct dentry *ovl_d_real(struct dentry *dentry, 80 const struct inode *inode) 81 { 82 struct dentry *real; 83 84 /* It's an overlay file */ 85 if (inode && d_inode(dentry) == inode) 86 return dentry; 87 88 if (!d_is_reg(dentry)) { 89 if (!inode || inode == d_inode(dentry)) 90 return dentry; 91 goto bug; 92 } 93 94 real = ovl_dentry_upper(dentry); 95 if (real && (inode == d_inode(real))) 96 return real; 97 98 if (real && !inode && ovl_has_upperdata(d_inode(dentry))) 99 return real; 100 101 real = ovl_dentry_lowerdata(dentry); 102 if (!real) 103 goto bug; 104 105 /* Handle recursion */ 106 real = d_real(real, inode); 107 108 if (!inode || inode == d_inode(real)) 109 return real; 110 bug: 111 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 112 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 113 return dentry; 114 } 115 116 static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) 117 { 118 int ret = 1; 119 120 if (weak) { 121 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) 122 ret = d->d_op->d_weak_revalidate(d, flags); 123 } else if (d->d_flags & DCACHE_OP_REVALIDATE) { 124 ret = d->d_op->d_revalidate(d, flags); 125 if (!ret) { 126 if (!(flags & LOOKUP_RCU)) 127 d_invalidate(d); 128 ret = -ESTALE; 129 } 130 } 131 return ret; 132 } 133 134 static int ovl_dentry_revalidate_common(struct dentry *dentry, 135 unsigned int flags, bool weak) 136 { 137 struct ovl_entry *oe = dentry->d_fsdata; 138 struct dentry *upper; 139 unsigned int i; 140 int ret = 1; 141 142 upper = ovl_dentry_upper(dentry); 143 if (upper) 144 ret = ovl_revalidate_real(upper, flags, weak); 145 146 for (i = 0; ret > 0 && i < oe->numlower; i++) { 147 ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags, 148 weak); 149 } 150 return ret; 151 } 152 153 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 154 { 155 return ovl_dentry_revalidate_common(dentry, flags, false); 156 } 157 158 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 159 { 160 return ovl_dentry_revalidate_common(dentry, flags, true); 161 } 162 163 static const struct dentry_operations ovl_dentry_operations = { 164 .d_release = ovl_dentry_release, 165 .d_real = ovl_d_real, 166 .d_revalidate = ovl_dentry_revalidate, 167 .d_weak_revalidate = ovl_dentry_weak_revalidate, 168 }; 169 170 static struct kmem_cache *ovl_inode_cachep; 171 172 static struct inode *ovl_alloc_inode(struct super_block *sb) 173 { 174 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); 175 176 if (!oi) 177 return NULL; 178 179 oi->cache = NULL; 180 oi->redirect = NULL; 181 oi->version = 0; 182 oi->flags = 0; 183 oi->__upperdentry = NULL; 184 oi->lower = NULL; 185 oi->lowerdata = NULL; 186 mutex_init(&oi->lock); 187 188 return &oi->vfs_inode; 189 } 190 191 static void ovl_free_inode(struct inode *inode) 192 { 193 struct ovl_inode *oi = OVL_I(inode); 194 195 kfree(oi->redirect); 196 mutex_destroy(&oi->lock); 197 kmem_cache_free(ovl_inode_cachep, oi); 198 } 199 200 static void ovl_destroy_inode(struct inode *inode) 201 { 202 struct ovl_inode *oi = OVL_I(inode); 203 204 dput(oi->__upperdentry); 205 iput(oi->lower); 206 if (S_ISDIR(inode->i_mode)) 207 ovl_dir_cache_free(inode); 208 else 209 iput(oi->lowerdata); 210 } 211 212 static void ovl_free_fs(struct ovl_fs *ofs) 213 { 214 struct vfsmount **mounts; 215 unsigned i; 216 217 iput(ofs->workbasedir_trap); 218 iput(ofs->indexdir_trap); 219 iput(ofs->workdir_trap); 220 dput(ofs->whiteout); 221 dput(ofs->indexdir); 222 dput(ofs->workdir); 223 if (ofs->workdir_locked) 224 ovl_inuse_unlock(ofs->workbasedir); 225 dput(ofs->workbasedir); 226 if (ofs->upperdir_locked) 227 ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root); 228 229 /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */ 230 mounts = (struct vfsmount **) ofs->layers; 231 for (i = 0; i < ofs->numlayer; i++) { 232 iput(ofs->layers[i].trap); 233 mounts[i] = ofs->layers[i].mnt; 234 } 235 kern_unmount_array(mounts, ofs->numlayer); 236 kfree(ofs->layers); 237 for (i = 0; i < ofs->numfs; i++) 238 free_anon_bdev(ofs->fs[i].pseudo_dev); 239 kfree(ofs->fs); 240 241 kfree(ofs->config.lowerdir); 242 kfree(ofs->config.upperdir); 243 kfree(ofs->config.workdir); 244 kfree(ofs->config.redirect_mode); 245 if (ofs->creator_cred) 246 put_cred(ofs->creator_cred); 247 kfree(ofs); 248 } 249 250 static void ovl_put_super(struct super_block *sb) 251 { 252 struct ovl_fs *ofs = sb->s_fs_info; 253 254 ovl_free_fs(ofs); 255 } 256 257 /* Sync real dirty inodes in upper filesystem (if it exists) */ 258 static int ovl_sync_fs(struct super_block *sb, int wait) 259 { 260 struct ovl_fs *ofs = sb->s_fs_info; 261 struct super_block *upper_sb; 262 int ret; 263 264 if (!ovl_upper_mnt(ofs)) 265 return 0; 266 267 /* 268 * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC). 269 * All the super blocks will be iterated, including upper_sb. 270 * 271 * If this is a syncfs(2) call, then we do need to call 272 * sync_filesystem() on upper_sb, but enough if we do it when being 273 * called with wait == 1. 274 */ 275 if (!wait) 276 return 0; 277 278 upper_sb = ovl_upper_mnt(ofs)->mnt_sb; 279 280 down_read(&upper_sb->s_umount); 281 ret = sync_filesystem(upper_sb); 282 up_read(&upper_sb->s_umount); 283 284 return ret; 285 } 286 287 /** 288 * ovl_statfs 289 * @sb: The overlayfs super block 290 * @buf: The struct kstatfs to fill in with stats 291 * 292 * Get the filesystem statistics. As writes always target the upper layer 293 * filesystem pass the statfs to the upper filesystem (if it exists) 294 */ 295 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 296 { 297 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 298 struct dentry *root_dentry = dentry->d_sb->s_root; 299 struct path path; 300 int err; 301 302 ovl_path_real(root_dentry, &path); 303 304 err = vfs_statfs(&path, buf); 305 if (!err) { 306 buf->f_namelen = ofs->namelen; 307 buf->f_type = OVERLAYFS_SUPER_MAGIC; 308 } 309 310 return err; 311 } 312 313 /* Will this overlay be forced to mount/remount ro? */ 314 static bool ovl_force_readonly(struct ovl_fs *ofs) 315 { 316 return (!ovl_upper_mnt(ofs) || !ofs->workdir); 317 } 318 319 static const char *ovl_redirect_mode_def(void) 320 { 321 return ovl_redirect_dir_def ? "on" : "off"; 322 } 323 324 static const char * const ovl_xino_str[] = { 325 "off", 326 "auto", 327 "on", 328 }; 329 330 static inline int ovl_xino_def(void) 331 { 332 return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF; 333 } 334 335 /** 336 * ovl_show_options 337 * 338 * Prints the mount options for a given superblock. 339 * Returns zero; does not fail. 340 */ 341 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 342 { 343 struct super_block *sb = dentry->d_sb; 344 struct ovl_fs *ofs = sb->s_fs_info; 345 346 seq_show_option(m, "lowerdir", ofs->config.lowerdir); 347 if (ofs->config.upperdir) { 348 seq_show_option(m, "upperdir", ofs->config.upperdir); 349 seq_show_option(m, "workdir", ofs->config.workdir); 350 } 351 if (ofs->config.default_permissions) 352 seq_puts(m, ",default_permissions"); 353 if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0) 354 seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode); 355 if (ofs->config.index != ovl_index_def) 356 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); 357 if (ofs->config.nfs_export != ovl_nfs_export_def) 358 seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? 359 "on" : "off"); 360 if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb)) 361 seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]); 362 if (ofs->config.metacopy != ovl_metacopy_def) 363 seq_printf(m, ",metacopy=%s", 364 ofs->config.metacopy ? "on" : "off"); 365 return 0; 366 } 367 368 static int ovl_remount(struct super_block *sb, int *flags, char *data) 369 { 370 struct ovl_fs *ofs = sb->s_fs_info; 371 struct super_block *upper_sb; 372 int ret = 0; 373 374 if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs)) 375 return -EROFS; 376 377 if (*flags & SB_RDONLY && !sb_rdonly(sb)) { 378 upper_sb = ovl_upper_mnt(ofs)->mnt_sb; 379 down_read(&upper_sb->s_umount); 380 ret = sync_filesystem(upper_sb); 381 up_read(&upper_sb->s_umount); 382 } 383 384 return ret; 385 } 386 387 static const struct super_operations ovl_super_operations = { 388 .alloc_inode = ovl_alloc_inode, 389 .free_inode = ovl_free_inode, 390 .destroy_inode = ovl_destroy_inode, 391 .drop_inode = generic_delete_inode, 392 .put_super = ovl_put_super, 393 .sync_fs = ovl_sync_fs, 394 .statfs = ovl_statfs, 395 .show_options = ovl_show_options, 396 .remount_fs = ovl_remount, 397 }; 398 399 enum { 400 OPT_LOWERDIR, 401 OPT_UPPERDIR, 402 OPT_WORKDIR, 403 OPT_DEFAULT_PERMISSIONS, 404 OPT_REDIRECT_DIR, 405 OPT_INDEX_ON, 406 OPT_INDEX_OFF, 407 OPT_NFS_EXPORT_ON, 408 OPT_NFS_EXPORT_OFF, 409 OPT_XINO_ON, 410 OPT_XINO_OFF, 411 OPT_XINO_AUTO, 412 OPT_METACOPY_ON, 413 OPT_METACOPY_OFF, 414 OPT_ERR, 415 }; 416 417 static const match_table_t ovl_tokens = { 418 {OPT_LOWERDIR, "lowerdir=%s"}, 419 {OPT_UPPERDIR, "upperdir=%s"}, 420 {OPT_WORKDIR, "workdir=%s"}, 421 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 422 {OPT_REDIRECT_DIR, "redirect_dir=%s"}, 423 {OPT_INDEX_ON, "index=on"}, 424 {OPT_INDEX_OFF, "index=off"}, 425 {OPT_NFS_EXPORT_ON, "nfs_export=on"}, 426 {OPT_NFS_EXPORT_OFF, "nfs_export=off"}, 427 {OPT_XINO_ON, "xino=on"}, 428 {OPT_XINO_OFF, "xino=off"}, 429 {OPT_XINO_AUTO, "xino=auto"}, 430 {OPT_METACOPY_ON, "metacopy=on"}, 431 {OPT_METACOPY_OFF, "metacopy=off"}, 432 {OPT_ERR, NULL} 433 }; 434 435 static char *ovl_next_opt(char **s) 436 { 437 char *sbegin = *s; 438 char *p; 439 440 if (sbegin == NULL) 441 return NULL; 442 443 for (p = sbegin; *p; p++) { 444 if (*p == '\\') { 445 p++; 446 if (!*p) 447 break; 448 } else if (*p == ',') { 449 *p = '\0'; 450 *s = p + 1; 451 return sbegin; 452 } 453 } 454 *s = NULL; 455 return sbegin; 456 } 457 458 static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode) 459 { 460 if (strcmp(mode, "on") == 0) { 461 config->redirect_dir = true; 462 /* 463 * Does not make sense to have redirect creation without 464 * redirect following. 465 */ 466 config->redirect_follow = true; 467 } else if (strcmp(mode, "follow") == 0) { 468 config->redirect_follow = true; 469 } else if (strcmp(mode, "off") == 0) { 470 if (ovl_redirect_always_follow) 471 config->redirect_follow = true; 472 } else if (strcmp(mode, "nofollow") != 0) { 473 pr_err("bad mount option \"redirect_dir=%s\"\n", 474 mode); 475 return -EINVAL; 476 } 477 478 return 0; 479 } 480 481 static int ovl_parse_opt(char *opt, struct ovl_config *config) 482 { 483 char *p; 484 int err; 485 bool metacopy_opt = false, redirect_opt = false; 486 bool nfs_export_opt = false, index_opt = false; 487 488 config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); 489 if (!config->redirect_mode) 490 return -ENOMEM; 491 492 while ((p = ovl_next_opt(&opt)) != NULL) { 493 int token; 494 substring_t args[MAX_OPT_ARGS]; 495 496 if (!*p) 497 continue; 498 499 token = match_token(p, ovl_tokens, args); 500 switch (token) { 501 case OPT_UPPERDIR: 502 kfree(config->upperdir); 503 config->upperdir = match_strdup(&args[0]); 504 if (!config->upperdir) 505 return -ENOMEM; 506 break; 507 508 case OPT_LOWERDIR: 509 kfree(config->lowerdir); 510 config->lowerdir = match_strdup(&args[0]); 511 if (!config->lowerdir) 512 return -ENOMEM; 513 break; 514 515 case OPT_WORKDIR: 516 kfree(config->workdir); 517 config->workdir = match_strdup(&args[0]); 518 if (!config->workdir) 519 return -ENOMEM; 520 break; 521 522 case OPT_DEFAULT_PERMISSIONS: 523 config->default_permissions = true; 524 break; 525 526 case OPT_REDIRECT_DIR: 527 kfree(config->redirect_mode); 528 config->redirect_mode = match_strdup(&args[0]); 529 if (!config->redirect_mode) 530 return -ENOMEM; 531 redirect_opt = true; 532 break; 533 534 case OPT_INDEX_ON: 535 config->index = true; 536 index_opt = true; 537 break; 538 539 case OPT_INDEX_OFF: 540 config->index = false; 541 index_opt = true; 542 break; 543 544 case OPT_NFS_EXPORT_ON: 545 config->nfs_export = true; 546 nfs_export_opt = true; 547 break; 548 549 case OPT_NFS_EXPORT_OFF: 550 config->nfs_export = false; 551 nfs_export_opt = true; 552 break; 553 554 case OPT_XINO_ON: 555 config->xino = OVL_XINO_ON; 556 break; 557 558 case OPT_XINO_OFF: 559 config->xino = OVL_XINO_OFF; 560 break; 561 562 case OPT_XINO_AUTO: 563 config->xino = OVL_XINO_AUTO; 564 break; 565 566 case OPT_METACOPY_ON: 567 config->metacopy = true; 568 metacopy_opt = true; 569 break; 570 571 case OPT_METACOPY_OFF: 572 config->metacopy = false; 573 metacopy_opt = true; 574 break; 575 576 default: 577 pr_err("unrecognized mount option \"%s\" or missing value\n", 578 p); 579 return -EINVAL; 580 } 581 } 582 583 /* Workdir is useless in non-upper mount */ 584 if (!config->upperdir && config->workdir) { 585 pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 586 config->workdir); 587 kfree(config->workdir); 588 config->workdir = NULL; 589 } 590 591 err = ovl_parse_redirect_mode(config, config->redirect_mode); 592 if (err) 593 return err; 594 595 /* 596 * This is to make the logic below simpler. It doesn't make any other 597 * difference, since config->redirect_dir is only used for upper. 598 */ 599 if (!config->upperdir && config->redirect_follow) 600 config->redirect_dir = true; 601 602 /* Resolve metacopy -> redirect_dir dependency */ 603 if (config->metacopy && !config->redirect_dir) { 604 if (metacopy_opt && redirect_opt) { 605 pr_err("conflicting options: metacopy=on,redirect_dir=%s\n", 606 config->redirect_mode); 607 return -EINVAL; 608 } 609 if (redirect_opt) { 610 /* 611 * There was an explicit redirect_dir=... that resulted 612 * in this conflict. 613 */ 614 pr_info("disabling metacopy due to redirect_dir=%s\n", 615 config->redirect_mode); 616 config->metacopy = false; 617 } else { 618 /* Automatically enable redirect otherwise. */ 619 config->redirect_follow = config->redirect_dir = true; 620 } 621 } 622 623 /* Resolve nfs_export -> index dependency */ 624 if (config->nfs_export && !config->index) { 625 if (nfs_export_opt && index_opt) { 626 pr_err("conflicting options: nfs_export=on,index=off\n"); 627 return -EINVAL; 628 } 629 if (index_opt) { 630 /* 631 * There was an explicit index=off that resulted 632 * in this conflict. 633 */ 634 pr_info("disabling nfs_export due to index=off\n"); 635 config->nfs_export = false; 636 } else { 637 /* Automatically enable index otherwise. */ 638 config->index = true; 639 } 640 } 641 642 /* Resolve nfs_export -> !metacopy dependency */ 643 if (config->nfs_export && config->metacopy) { 644 if (nfs_export_opt && metacopy_opt) { 645 pr_err("conflicting options: nfs_export=on,metacopy=on\n"); 646 return -EINVAL; 647 } 648 if (metacopy_opt) { 649 /* 650 * There was an explicit metacopy=on that resulted 651 * in this conflict. 652 */ 653 pr_info("disabling nfs_export due to metacopy=on\n"); 654 config->nfs_export = false; 655 } else { 656 /* 657 * There was an explicit nfs_export=on that resulted 658 * in this conflict. 659 */ 660 pr_info("disabling metacopy due to nfs_export=on\n"); 661 config->metacopy = false; 662 } 663 } 664 665 return 0; 666 } 667 668 #define OVL_WORKDIR_NAME "work" 669 #define OVL_INDEXDIR_NAME "index" 670 671 static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, 672 const char *name, bool persist) 673 { 674 struct inode *dir = ofs->workbasedir->d_inode; 675 struct vfsmount *mnt = ovl_upper_mnt(ofs); 676 struct dentry *work; 677 int err; 678 bool retried = false; 679 680 inode_lock_nested(dir, I_MUTEX_PARENT); 681 retry: 682 work = lookup_one_len(name, ofs->workbasedir, strlen(name)); 683 684 if (!IS_ERR(work)) { 685 struct iattr attr = { 686 .ia_valid = ATTR_MODE, 687 .ia_mode = S_IFDIR | 0, 688 }; 689 690 if (work->d_inode) { 691 err = -EEXIST; 692 if (retried) 693 goto out_dput; 694 695 if (persist) 696 goto out_unlock; 697 698 retried = true; 699 ovl_workdir_cleanup(dir, mnt, work, 0); 700 dput(work); 701 goto retry; 702 } 703 704 work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode)); 705 err = PTR_ERR(work); 706 if (IS_ERR(work)) 707 goto out_err; 708 709 /* 710 * Try to remove POSIX ACL xattrs from workdir. We are good if: 711 * 712 * a) success (there was a POSIX ACL xattr and was removed) 713 * b) -ENODATA (there was no POSIX ACL xattr) 714 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 715 * 716 * There are various other error values that could effectively 717 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 718 * if the xattr name is too long), but the set of filesystems 719 * allowed as upper are limited to "normal" ones, where checking 720 * for the above two errors is sufficient. 721 */ 722 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 723 if (err && err != -ENODATA && err != -EOPNOTSUPP) 724 goto out_dput; 725 726 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 727 if (err && err != -ENODATA && err != -EOPNOTSUPP) 728 goto out_dput; 729 730 /* Clear any inherited mode bits */ 731 inode_lock(work->d_inode); 732 err = notify_change(work, &attr, NULL); 733 inode_unlock(work->d_inode); 734 if (err) 735 goto out_dput; 736 } else { 737 err = PTR_ERR(work); 738 goto out_err; 739 } 740 out_unlock: 741 inode_unlock(dir); 742 return work; 743 744 out_dput: 745 dput(work); 746 out_err: 747 pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n", 748 ofs->config.workdir, name, -err); 749 work = NULL; 750 goto out_unlock; 751 } 752 753 static void ovl_unescape(char *s) 754 { 755 char *d = s; 756 757 for (;; s++, d++) { 758 if (*s == '\\') 759 s++; 760 *d = *s; 761 if (!*s) 762 break; 763 } 764 } 765 766 static int ovl_mount_dir_noesc(const char *name, struct path *path) 767 { 768 int err = -EINVAL; 769 770 if (!*name) { 771 pr_err("empty lowerdir\n"); 772 goto out; 773 } 774 err = kern_path(name, LOOKUP_FOLLOW, path); 775 if (err) { 776 pr_err("failed to resolve '%s': %i\n", name, err); 777 goto out; 778 } 779 err = -EINVAL; 780 if (ovl_dentry_weird(path->dentry)) { 781 pr_err("filesystem on '%s' not supported\n", name); 782 goto out_put; 783 } 784 if (!d_is_dir(path->dentry)) { 785 pr_err("'%s' not a directory\n", name); 786 goto out_put; 787 } 788 return 0; 789 790 out_put: 791 path_put_init(path); 792 out: 793 return err; 794 } 795 796 static int ovl_mount_dir(const char *name, struct path *path) 797 { 798 int err = -ENOMEM; 799 char *tmp = kstrdup(name, GFP_KERNEL); 800 801 if (tmp) { 802 ovl_unescape(tmp); 803 err = ovl_mount_dir_noesc(tmp, path); 804 805 if (!err && path->dentry->d_flags & DCACHE_OP_REAL) { 806 pr_err("filesystem on '%s' not supported as upperdir\n", 807 tmp); 808 path_put_init(path); 809 err = -EINVAL; 810 } 811 kfree(tmp); 812 } 813 return err; 814 } 815 816 static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, 817 const char *name) 818 { 819 struct kstatfs statfs; 820 int err = vfs_statfs(path, &statfs); 821 822 if (err) 823 pr_err("statfs failed on '%s'\n", name); 824 else 825 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 826 827 return err; 828 } 829 830 static int ovl_lower_dir(const char *name, struct path *path, 831 struct ovl_fs *ofs, int *stack_depth) 832 { 833 int fh_type; 834 int err; 835 836 err = ovl_mount_dir_noesc(name, path); 837 if (err) 838 return err; 839 840 err = ovl_check_namelen(path, ofs, name); 841 if (err) 842 return err; 843 844 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 845 846 /* 847 * The inodes index feature and NFS export need to encode and decode 848 * file handles, so they require that all layers support them. 849 */ 850 fh_type = ovl_can_decode_fh(path->dentry->d_sb); 851 if ((ofs->config.nfs_export || 852 (ofs->config.index && ofs->config.upperdir)) && !fh_type) { 853 ofs->config.index = false; 854 ofs->config.nfs_export = false; 855 pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", 856 name); 857 } 858 859 /* Check if lower fs has 32bit inode numbers */ 860 if (fh_type != FILEID_INO32_GEN) 861 ofs->xino_mode = -1; 862 863 return 0; 864 } 865 866 /* Workdir should not be subdir of upperdir and vice versa */ 867 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 868 { 869 bool ok = false; 870 871 if (workdir != upperdir) { 872 ok = (lock_rename(workdir, upperdir) == NULL); 873 unlock_rename(workdir, upperdir); 874 } 875 return ok; 876 } 877 878 static unsigned int ovl_split_lowerdirs(char *str) 879 { 880 unsigned int ctr = 1; 881 char *s, *d; 882 883 for (s = d = str;; s++, d++) { 884 if (*s == '\\') { 885 s++; 886 } else if (*s == ':') { 887 *d = '\0'; 888 ctr++; 889 continue; 890 } 891 *d = *s; 892 if (!*s) 893 break; 894 } 895 return ctr; 896 } 897 898 static int __maybe_unused 899 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 900 struct dentry *dentry, struct inode *inode, 901 const char *name, void *buffer, size_t size) 902 { 903 return ovl_xattr_get(dentry, inode, handler->name, buffer, size); 904 } 905 906 static int __maybe_unused 907 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 908 struct dentry *dentry, struct inode *inode, 909 const char *name, const void *value, 910 size_t size, int flags) 911 { 912 struct dentry *workdir = ovl_workdir(dentry); 913 struct inode *realinode = ovl_inode_real(inode); 914 struct posix_acl *acl = NULL; 915 int err; 916 917 /* Check that everything is OK before copy-up */ 918 if (value) { 919 acl = posix_acl_from_xattr(&init_user_ns, value, size); 920 if (IS_ERR(acl)) 921 return PTR_ERR(acl); 922 } 923 err = -EOPNOTSUPP; 924 if (!IS_POSIXACL(d_inode(workdir))) 925 goto out_acl_release; 926 if (!realinode->i_op->set_acl) 927 goto out_acl_release; 928 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 929 err = acl ? -EACCES : 0; 930 goto out_acl_release; 931 } 932 err = -EPERM; 933 if (!inode_owner_or_capable(inode)) 934 goto out_acl_release; 935 936 posix_acl_release(acl); 937 938 /* 939 * Check if sgid bit needs to be cleared (actual setacl operation will 940 * be done with mounter's capabilities and so that won't do it for us). 941 */ 942 if (unlikely(inode->i_mode & S_ISGID) && 943 handler->flags == ACL_TYPE_ACCESS && 944 !in_group_p(inode->i_gid) && 945 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { 946 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 947 948 err = ovl_setattr(dentry, &iattr); 949 if (err) 950 return err; 951 } 952 953 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); 954 if (!err) 955 ovl_copyattr(ovl_inode_real(inode), inode); 956 957 return err; 958 959 out_acl_release: 960 posix_acl_release(acl); 961 return err; 962 } 963 964 static int ovl_own_xattr_get(const struct xattr_handler *handler, 965 struct dentry *dentry, struct inode *inode, 966 const char *name, void *buffer, size_t size) 967 { 968 return -EOPNOTSUPP; 969 } 970 971 static int ovl_own_xattr_set(const struct xattr_handler *handler, 972 struct dentry *dentry, struct inode *inode, 973 const char *name, const void *value, 974 size_t size, int flags) 975 { 976 return -EOPNOTSUPP; 977 } 978 979 static int ovl_other_xattr_get(const struct xattr_handler *handler, 980 struct dentry *dentry, struct inode *inode, 981 const char *name, void *buffer, size_t size) 982 { 983 return ovl_xattr_get(dentry, inode, name, buffer, size); 984 } 985 986 static int ovl_other_xattr_set(const struct xattr_handler *handler, 987 struct dentry *dentry, struct inode *inode, 988 const char *name, const void *value, 989 size_t size, int flags) 990 { 991 return ovl_xattr_set(dentry, inode, name, value, size, flags); 992 } 993 994 static const struct xattr_handler __maybe_unused 995 ovl_posix_acl_access_xattr_handler = { 996 .name = XATTR_NAME_POSIX_ACL_ACCESS, 997 .flags = ACL_TYPE_ACCESS, 998 .get = ovl_posix_acl_xattr_get, 999 .set = ovl_posix_acl_xattr_set, 1000 }; 1001 1002 static const struct xattr_handler __maybe_unused 1003 ovl_posix_acl_default_xattr_handler = { 1004 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 1005 .flags = ACL_TYPE_DEFAULT, 1006 .get = ovl_posix_acl_xattr_get, 1007 .set = ovl_posix_acl_xattr_set, 1008 }; 1009 1010 static const struct xattr_handler ovl_own_xattr_handler = { 1011 .prefix = OVL_XATTR_PREFIX, 1012 .get = ovl_own_xattr_get, 1013 .set = ovl_own_xattr_set, 1014 }; 1015 1016 static const struct xattr_handler ovl_other_xattr_handler = { 1017 .prefix = "", /* catch all */ 1018 .get = ovl_other_xattr_get, 1019 .set = ovl_other_xattr_set, 1020 }; 1021 1022 static const struct xattr_handler *ovl_xattr_handlers[] = { 1023 #ifdef CONFIG_FS_POSIX_ACL 1024 &ovl_posix_acl_access_xattr_handler, 1025 &ovl_posix_acl_default_xattr_handler, 1026 #endif 1027 &ovl_own_xattr_handler, 1028 &ovl_other_xattr_handler, 1029 NULL 1030 }; 1031 1032 static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, 1033 struct inode **ptrap, const char *name) 1034 { 1035 struct inode *trap; 1036 int err; 1037 1038 trap = ovl_get_trap_inode(sb, dir); 1039 err = PTR_ERR_OR_ZERO(trap); 1040 if (err) { 1041 if (err == -ELOOP) 1042 pr_err("conflicting %s path\n", name); 1043 return err; 1044 } 1045 1046 *ptrap = trap; 1047 return 0; 1048 } 1049 1050 /* 1051 * Determine how we treat concurrent use of upperdir/workdir based on the 1052 * index feature. This is papering over mount leaks of container runtimes, 1053 * for example, an old overlay mount is leaked and now its upperdir is 1054 * attempted to be used as a lower layer in a new overlay mount. 1055 */ 1056 static int ovl_report_in_use(struct ovl_fs *ofs, const char *name) 1057 { 1058 if (ofs->config.index) { 1059 pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n", 1060 name); 1061 return -EBUSY; 1062 } else { 1063 pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n", 1064 name); 1065 return 0; 1066 } 1067 } 1068 1069 static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, 1070 struct ovl_layer *upper_layer, struct path *upperpath) 1071 { 1072 struct vfsmount *upper_mnt; 1073 int err; 1074 1075 err = ovl_mount_dir(ofs->config.upperdir, upperpath); 1076 if (err) 1077 goto out; 1078 1079 /* Upper fs should not be r/o */ 1080 if (sb_rdonly(upperpath->mnt->mnt_sb)) { 1081 pr_err("upper fs is r/o, try multi-lower layers mount\n"); 1082 err = -EINVAL; 1083 goto out; 1084 } 1085 1086 err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir); 1087 if (err) 1088 goto out; 1089 1090 err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap, 1091 "upperdir"); 1092 if (err) 1093 goto out; 1094 1095 upper_mnt = clone_private_mount(upperpath); 1096 err = PTR_ERR(upper_mnt); 1097 if (IS_ERR(upper_mnt)) { 1098 pr_err("failed to clone upperpath\n"); 1099 goto out; 1100 } 1101 1102 /* Don't inherit atime flags */ 1103 upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 1104 upper_layer->mnt = upper_mnt; 1105 upper_layer->idx = 0; 1106 upper_layer->fsid = 0; 1107 1108 /* 1109 * Inherit SB_NOSEC flag from upperdir. 1110 * 1111 * This optimization changes behavior when a security related attribute 1112 * (suid/sgid/security.*) is changed on an underlying layer. This is 1113 * okay because we don't yet have guarantees in that case, but it will 1114 * need careful treatment once we want to honour changes to underlying 1115 * filesystems. 1116 */ 1117 if (upper_mnt->mnt_sb->s_flags & SB_NOSEC) 1118 sb->s_flags |= SB_NOSEC; 1119 1120 if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) { 1121 ofs->upperdir_locked = true; 1122 } else { 1123 err = ovl_report_in_use(ofs, "upperdir"); 1124 if (err) 1125 goto out; 1126 } 1127 1128 err = 0; 1129 out: 1130 return err; 1131 } 1132 1133 /* 1134 * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and 1135 * negative values if error is encountered. 1136 */ 1137 static int ovl_check_rename_whiteout(struct dentry *workdir) 1138 { 1139 struct inode *dir = d_inode(workdir); 1140 struct dentry *temp; 1141 struct dentry *dest; 1142 struct dentry *whiteout; 1143 struct name_snapshot name; 1144 int err; 1145 1146 inode_lock_nested(dir, I_MUTEX_PARENT); 1147 1148 temp = ovl_create_temp(workdir, OVL_CATTR(S_IFREG | 0)); 1149 err = PTR_ERR(temp); 1150 if (IS_ERR(temp)) 1151 goto out_unlock; 1152 1153 dest = ovl_lookup_temp(workdir); 1154 err = PTR_ERR(dest); 1155 if (IS_ERR(dest)) { 1156 dput(temp); 1157 goto out_unlock; 1158 } 1159 1160 /* Name is inline and stable - using snapshot as a copy helper */ 1161 take_dentry_name_snapshot(&name, temp); 1162 err = ovl_do_rename(dir, temp, dir, dest, RENAME_WHITEOUT); 1163 if (err) { 1164 if (err == -EINVAL) 1165 err = 0; 1166 goto cleanup_temp; 1167 } 1168 1169 whiteout = lookup_one_len(name.name.name, workdir, name.name.len); 1170 err = PTR_ERR(whiteout); 1171 if (IS_ERR(whiteout)) 1172 goto cleanup_temp; 1173 1174 err = ovl_is_whiteout(whiteout); 1175 1176 /* Best effort cleanup of whiteout and temp file */ 1177 if (err) 1178 ovl_cleanup(dir, whiteout); 1179 dput(whiteout); 1180 1181 cleanup_temp: 1182 ovl_cleanup(dir, temp); 1183 release_dentry_name_snapshot(&name); 1184 dput(temp); 1185 dput(dest); 1186 1187 out_unlock: 1188 inode_unlock(dir); 1189 1190 return err; 1191 } 1192 1193 static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, 1194 struct path *workpath) 1195 { 1196 struct vfsmount *mnt = ovl_upper_mnt(ofs); 1197 struct dentry *temp; 1198 bool rename_whiteout; 1199 bool d_type; 1200 int fh_type; 1201 int err; 1202 1203 err = mnt_want_write(mnt); 1204 if (err) 1205 return err; 1206 1207 ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); 1208 if (!ofs->workdir) 1209 goto out; 1210 1211 err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir"); 1212 if (err) 1213 goto out; 1214 1215 /* 1216 * Upper should support d_type, else whiteouts are visible. Given 1217 * workdir and upper are on same fs, we can do iterate_dir() on 1218 * workdir. This check requires successful creation of workdir in 1219 * previous step. 1220 */ 1221 err = ovl_check_d_type_supported(workpath); 1222 if (err < 0) 1223 goto out; 1224 1225 d_type = err; 1226 if (!d_type) 1227 pr_warn("upper fs needs to support d_type.\n"); 1228 1229 /* Check if upper/work fs supports O_TMPFILE */ 1230 temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0); 1231 ofs->tmpfile = !IS_ERR(temp); 1232 if (ofs->tmpfile) 1233 dput(temp); 1234 else 1235 pr_warn("upper fs does not support tmpfile.\n"); 1236 1237 1238 /* Check if upper/work fs supports RENAME_WHITEOUT */ 1239 err = ovl_check_rename_whiteout(ofs->workdir); 1240 if (err < 0) 1241 goto out; 1242 1243 rename_whiteout = err; 1244 if (!rename_whiteout) 1245 pr_warn("upper fs does not support RENAME_WHITEOUT.\n"); 1246 1247 /* 1248 * Check if upper/work fs supports trusted.overlay.* xattr 1249 */ 1250 err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0); 1251 if (err) { 1252 ofs->noxattr = true; 1253 ofs->config.index = false; 1254 ofs->config.metacopy = false; 1255 pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n"); 1256 err = 0; 1257 } else { 1258 vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); 1259 } 1260 1261 /* 1262 * We allowed sub-optimal upper fs configuration and don't want to break 1263 * users over kernel upgrade, but we never allowed remote upper fs, so 1264 * we can enforce strict requirements for remote upper fs. 1265 */ 1266 if (ovl_dentry_remote(ofs->workdir) && 1267 (!d_type || !rename_whiteout || ofs->noxattr)) { 1268 pr_err("upper fs missing required features.\n"); 1269 err = -EINVAL; 1270 goto out; 1271 } 1272 1273 /* Check if upper/work fs supports file handles */ 1274 fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); 1275 if (ofs->config.index && !fh_type) { 1276 ofs->config.index = false; 1277 pr_warn("upper fs does not support file handles, falling back to index=off.\n"); 1278 } 1279 1280 /* Check if upper fs has 32bit inode numbers */ 1281 if (fh_type != FILEID_INO32_GEN) 1282 ofs->xino_mode = -1; 1283 1284 /* NFS export of r/w mount depends on index */ 1285 if (ofs->config.nfs_export && !ofs->config.index) { 1286 pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n"); 1287 ofs->config.nfs_export = false; 1288 } 1289 out: 1290 mnt_drop_write(mnt); 1291 return err; 1292 } 1293 1294 static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs, 1295 struct path *upperpath) 1296 { 1297 int err; 1298 struct path workpath = { }; 1299 1300 err = ovl_mount_dir(ofs->config.workdir, &workpath); 1301 if (err) 1302 goto out; 1303 1304 err = -EINVAL; 1305 if (upperpath->mnt != workpath.mnt) { 1306 pr_err("workdir and upperdir must reside under the same mount\n"); 1307 goto out; 1308 } 1309 if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) { 1310 pr_err("workdir and upperdir must be separate subtrees\n"); 1311 goto out; 1312 } 1313 1314 ofs->workbasedir = dget(workpath.dentry); 1315 1316 if (ovl_inuse_trylock(ofs->workbasedir)) { 1317 ofs->workdir_locked = true; 1318 } else { 1319 err = ovl_report_in_use(ofs, "workdir"); 1320 if (err) 1321 goto out; 1322 } 1323 1324 err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap, 1325 "workdir"); 1326 if (err) 1327 goto out; 1328 1329 err = ovl_make_workdir(sb, ofs, &workpath); 1330 1331 out: 1332 path_put(&workpath); 1333 1334 return err; 1335 } 1336 1337 static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, 1338 struct ovl_entry *oe, struct path *upperpath) 1339 { 1340 struct vfsmount *mnt = ovl_upper_mnt(ofs); 1341 int err; 1342 1343 err = mnt_want_write(mnt); 1344 if (err) 1345 return err; 1346 1347 /* Verify lower root is upper root origin */ 1348 err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, 1349 true); 1350 if (err) { 1351 pr_err("failed to verify upper root origin\n"); 1352 goto out; 1353 } 1354 1355 ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); 1356 if (ofs->indexdir) { 1357 err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap, 1358 "indexdir"); 1359 if (err) 1360 goto out; 1361 1362 /* 1363 * Verify upper root is exclusively associated with index dir. 1364 * Older kernels stored upper fh in "trusted.overlay.origin" 1365 * xattr. If that xattr exists, verify that it is a match to 1366 * upper dir file handle. In any case, verify or set xattr 1367 * "trusted.overlay.upper" to indicate that index may have 1368 * directory entries. 1369 */ 1370 if (ovl_check_origin_xattr(ofs->indexdir)) { 1371 err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, 1372 upperpath->dentry, true, false); 1373 if (err) 1374 pr_err("failed to verify index dir 'origin' xattr\n"); 1375 } 1376 err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true); 1377 if (err) 1378 pr_err("failed to verify index dir 'upper' xattr\n"); 1379 1380 /* Cleanup bad/stale/orphan index entries */ 1381 if (!err) 1382 err = ovl_indexdir_cleanup(ofs); 1383 } 1384 if (err || !ofs->indexdir) 1385 pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1386 1387 out: 1388 mnt_drop_write(mnt); 1389 return err; 1390 } 1391 1392 static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) 1393 { 1394 unsigned int i; 1395 1396 if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs)) 1397 return true; 1398 1399 for (i = 0; i < ofs->numfs; i++) { 1400 /* 1401 * We use uuid to associate an overlay lower file handle with a 1402 * lower layer, so we can accept lower fs with null uuid as long 1403 * as all lower layers with null uuid are on the same fs. 1404 * if we detect multiple lower fs with the same uuid, we 1405 * disable lower file handle decoding on all of them. 1406 */ 1407 if (ofs->fs[i].is_lower && 1408 uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) { 1409 ofs->fs[i].bad_uuid = true; 1410 return false; 1411 } 1412 } 1413 return true; 1414 } 1415 1416 /* Get a unique fsid for the layer */ 1417 static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) 1418 { 1419 struct super_block *sb = path->mnt->mnt_sb; 1420 unsigned int i; 1421 dev_t dev; 1422 int err; 1423 bool bad_uuid = false; 1424 1425 for (i = 0; i < ofs->numfs; i++) { 1426 if (ofs->fs[i].sb == sb) 1427 return i; 1428 } 1429 1430 if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) { 1431 bad_uuid = true; 1432 if (ofs->config.index || ofs->config.nfs_export) { 1433 ofs->config.index = false; 1434 ofs->config.nfs_export = false; 1435 pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n", 1436 uuid_is_null(&sb->s_uuid) ? "null" : 1437 "conflicting", 1438 path->dentry); 1439 } 1440 } 1441 1442 err = get_anon_bdev(&dev); 1443 if (err) { 1444 pr_err("failed to get anonymous bdev for lowerpath\n"); 1445 return err; 1446 } 1447 1448 ofs->fs[ofs->numfs].sb = sb; 1449 ofs->fs[ofs->numfs].pseudo_dev = dev; 1450 ofs->fs[ofs->numfs].bad_uuid = bad_uuid; 1451 1452 return ofs->numfs++; 1453 } 1454 1455 static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, 1456 struct path *stack, unsigned int numlower, 1457 struct ovl_layer *layers) 1458 { 1459 int err; 1460 unsigned int i; 1461 1462 err = -ENOMEM; 1463 ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL); 1464 if (ofs->fs == NULL) 1465 goto out; 1466 1467 /* idx/fsid 0 are reserved for upper fs even with lower only overlay */ 1468 ofs->numfs++; 1469 1470 /* 1471 * All lower layers that share the same fs as upper layer, use the same 1472 * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower 1473 * only overlay to simplify ovl_fs_free(). 1474 * is_lower will be set if upper fs is shared with a lower layer. 1475 */ 1476 err = get_anon_bdev(&ofs->fs[0].pseudo_dev); 1477 if (err) { 1478 pr_err("failed to get anonymous bdev for upper fs\n"); 1479 goto out; 1480 } 1481 1482 if (ovl_upper_mnt(ofs)) { 1483 ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb; 1484 ofs->fs[0].is_lower = false; 1485 } 1486 1487 for (i = 0; i < numlower; i++) { 1488 struct vfsmount *mnt; 1489 struct inode *trap; 1490 int fsid; 1491 1492 err = fsid = ovl_get_fsid(ofs, &stack[i]); 1493 if (err < 0) 1494 goto out; 1495 1496 err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir"); 1497 if (err) 1498 goto out; 1499 1500 if (ovl_is_inuse(stack[i].dentry)) { 1501 err = ovl_report_in_use(ofs, "lowerdir"); 1502 if (err) 1503 goto out; 1504 } 1505 1506 mnt = clone_private_mount(&stack[i]); 1507 err = PTR_ERR(mnt); 1508 if (IS_ERR(mnt)) { 1509 pr_err("failed to clone lowerpath\n"); 1510 iput(trap); 1511 goto out; 1512 } 1513 1514 /* 1515 * Make lower layers R/O. That way fchmod/fchown on lower file 1516 * will fail instead of modifying lower fs. 1517 */ 1518 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1519 1520 layers[ofs->numlayer].trap = trap; 1521 layers[ofs->numlayer].mnt = mnt; 1522 layers[ofs->numlayer].idx = ofs->numlayer; 1523 layers[ofs->numlayer].fsid = fsid; 1524 layers[ofs->numlayer].fs = &ofs->fs[fsid]; 1525 ofs->numlayer++; 1526 ofs->fs[fsid].is_lower = true; 1527 } 1528 1529 /* 1530 * When all layers on same fs, overlay can use real inode numbers. 1531 * With mount option "xino=<on|auto>", mounter declares that there are 1532 * enough free high bits in underlying fs to hold the unique fsid. 1533 * If overlayfs does encounter underlying inodes using the high xino 1534 * bits reserved for fsid, it emits a warning and uses the original 1535 * inode number or a non persistent inode number allocated from a 1536 * dedicated range. 1537 */ 1538 if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) { 1539 if (ofs->config.xino == OVL_XINO_ON) 1540 pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); 1541 ofs->xino_mode = 0; 1542 } else if (ofs->config.xino == OVL_XINO_OFF) { 1543 ofs->xino_mode = -1; 1544 } else if (ofs->xino_mode < 0) { 1545 /* 1546 * This is a roundup of number of bits needed for encoding 1547 * fsid, where fsid 0 is reserved for upper fs (even with 1548 * lower only overlay) +1 extra bit is reserved for the non 1549 * persistent inode number range that is used for resolving 1550 * xino lower bits overflow. 1551 */ 1552 BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30); 1553 ofs->xino_mode = ilog2(ofs->numfs - 1) + 2; 1554 } 1555 1556 if (ofs->xino_mode > 0) { 1557 pr_info("\"xino\" feature enabled using %d upper inode bits.\n", 1558 ofs->xino_mode); 1559 } 1560 1561 err = 0; 1562 out: 1563 return err; 1564 } 1565 1566 static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, 1567 const char *lower, unsigned int numlower, 1568 struct ovl_fs *ofs, struct ovl_layer *layers) 1569 { 1570 int err; 1571 struct path *stack = NULL; 1572 unsigned int i; 1573 struct ovl_entry *oe; 1574 1575 if (!ofs->config.upperdir && numlower == 1) { 1576 pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n"); 1577 return ERR_PTR(-EINVAL); 1578 } else if (!ofs->config.upperdir && ofs->config.nfs_export && 1579 ofs->config.redirect_follow) { 1580 pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); 1581 ofs->config.nfs_export = false; 1582 } 1583 1584 stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL); 1585 if (!stack) 1586 return ERR_PTR(-ENOMEM); 1587 1588 err = -EINVAL; 1589 for (i = 0; i < numlower; i++) { 1590 err = ovl_lower_dir(lower, &stack[i], ofs, &sb->s_stack_depth); 1591 if (err) 1592 goto out_err; 1593 1594 lower = strchr(lower, '\0') + 1; 1595 } 1596 1597 err = -EINVAL; 1598 sb->s_stack_depth++; 1599 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 1600 pr_err("maximum fs stacking depth exceeded\n"); 1601 goto out_err; 1602 } 1603 1604 err = ovl_get_layers(sb, ofs, stack, numlower, layers); 1605 if (err) 1606 goto out_err; 1607 1608 err = -ENOMEM; 1609 oe = ovl_alloc_entry(numlower); 1610 if (!oe) 1611 goto out_err; 1612 1613 for (i = 0; i < numlower; i++) { 1614 oe->lowerstack[i].dentry = dget(stack[i].dentry); 1615 oe->lowerstack[i].layer = &ofs->layers[i+1]; 1616 } 1617 1618 out: 1619 for (i = 0; i < numlower; i++) 1620 path_put(&stack[i]); 1621 kfree(stack); 1622 1623 return oe; 1624 1625 out_err: 1626 oe = ERR_PTR(err); 1627 goto out; 1628 } 1629 1630 /* 1631 * Check if this layer root is a descendant of: 1632 * - another layer of this overlayfs instance 1633 * - upper/work dir of any overlayfs instance 1634 */ 1635 static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, 1636 struct dentry *dentry, const char *name) 1637 { 1638 struct dentry *next = dentry, *parent; 1639 int err = 0; 1640 1641 if (!dentry) 1642 return 0; 1643 1644 parent = dget_parent(next); 1645 1646 /* Walk back ancestors to root (inclusive) looking for traps */ 1647 while (!err && parent != next) { 1648 if (ovl_lookup_trap_inode(sb, parent)) { 1649 err = -ELOOP; 1650 pr_err("overlapping %s path\n", name); 1651 } else if (ovl_is_inuse(parent)) { 1652 err = ovl_report_in_use(ofs, name); 1653 } 1654 next = parent; 1655 parent = dget_parent(next); 1656 dput(next); 1657 } 1658 1659 dput(parent); 1660 1661 return err; 1662 } 1663 1664 /* 1665 * Check if any of the layers or work dirs overlap. 1666 */ 1667 static int ovl_check_overlapping_layers(struct super_block *sb, 1668 struct ovl_fs *ofs) 1669 { 1670 int i, err; 1671 1672 if (ovl_upper_mnt(ofs)) { 1673 err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root, 1674 "upperdir"); 1675 if (err) 1676 return err; 1677 1678 /* 1679 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of 1680 * this instance and covers overlapping work and index dirs, 1681 * unless work or index dir have been moved since created inside 1682 * workbasedir. In that case, we already have their traps in 1683 * inode cache and we will catch that case on lookup. 1684 */ 1685 err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir"); 1686 if (err) 1687 return err; 1688 } 1689 1690 for (i = 1; i < ofs->numlayer; i++) { 1691 err = ovl_check_layer(sb, ofs, 1692 ofs->layers[i].mnt->mnt_root, 1693 "lowerdir"); 1694 if (err) 1695 return err; 1696 } 1697 1698 return 0; 1699 } 1700 1701 static struct dentry *ovl_get_root(struct super_block *sb, 1702 struct dentry *upperdentry, 1703 struct ovl_entry *oe) 1704 { 1705 struct dentry *root; 1706 struct ovl_path *lowerpath = &oe->lowerstack[0]; 1707 unsigned long ino = d_inode(lowerpath->dentry)->i_ino; 1708 int fsid = lowerpath->layer->fsid; 1709 struct ovl_inode_params oip = { 1710 .upperdentry = upperdentry, 1711 .lowerpath = lowerpath, 1712 }; 1713 1714 root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1715 if (!root) 1716 return NULL; 1717 1718 root->d_fsdata = oe; 1719 1720 if (upperdentry) { 1721 /* Root inode uses upper st_ino/i_ino */ 1722 ino = d_inode(upperdentry)->i_ino; 1723 fsid = 0; 1724 ovl_dentry_set_upper_alias(root); 1725 if (ovl_is_impuredir(upperdentry)) 1726 ovl_set_flag(OVL_IMPURE, d_inode(root)); 1727 } 1728 1729 /* Root is always merge -> can have whiteouts */ 1730 ovl_set_flag(OVL_WHITEOUTS, d_inode(root)); 1731 ovl_dentry_set_flag(OVL_E_CONNECTED, root); 1732 ovl_set_upperdata(d_inode(root)); 1733 ovl_inode_init(d_inode(root), &oip, ino, fsid); 1734 ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE); 1735 1736 return root; 1737 } 1738 1739 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 1740 { 1741 struct path upperpath = { }; 1742 struct dentry *root_dentry; 1743 struct ovl_entry *oe; 1744 struct ovl_fs *ofs; 1745 struct ovl_layer *layers; 1746 struct cred *cred; 1747 char *splitlower = NULL; 1748 unsigned int numlower; 1749 int err; 1750 1751 sb->s_d_op = &ovl_dentry_operations; 1752 1753 err = -ENOMEM; 1754 ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 1755 if (!ofs) 1756 goto out; 1757 1758 ofs->creator_cred = cred = prepare_creds(); 1759 if (!cred) 1760 goto out_err; 1761 1762 /* Is there a reason anyone would want not to share whiteouts? */ 1763 ofs->share_whiteout = true; 1764 1765 ofs->config.index = ovl_index_def; 1766 ofs->config.nfs_export = ovl_nfs_export_def; 1767 ofs->config.xino = ovl_xino_def(); 1768 ofs->config.metacopy = ovl_metacopy_def; 1769 err = ovl_parse_opt((char *) data, &ofs->config); 1770 if (err) 1771 goto out_err; 1772 1773 err = -EINVAL; 1774 if (!ofs->config.lowerdir) { 1775 if (!silent) 1776 pr_err("missing 'lowerdir'\n"); 1777 goto out_err; 1778 } 1779 1780 err = -ENOMEM; 1781 splitlower = kstrdup(ofs->config.lowerdir, GFP_KERNEL); 1782 if (!splitlower) 1783 goto out_err; 1784 1785 numlower = ovl_split_lowerdirs(splitlower); 1786 if (numlower > OVL_MAX_STACK) { 1787 pr_err("too many lower directories, limit is %d\n", 1788 OVL_MAX_STACK); 1789 goto out_err; 1790 } 1791 1792 layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL); 1793 if (!layers) 1794 goto out_err; 1795 1796 ofs->layers = layers; 1797 /* Layer 0 is reserved for upper even if there's no upper */ 1798 ofs->numlayer = 1; 1799 1800 sb->s_stack_depth = 0; 1801 sb->s_maxbytes = MAX_LFS_FILESIZE; 1802 atomic_long_set(&ofs->last_ino, 1); 1803 /* Assume underlaying fs uses 32bit inodes unless proven otherwise */ 1804 if (ofs->config.xino != OVL_XINO_OFF) { 1805 ofs->xino_mode = BITS_PER_LONG - 32; 1806 if (!ofs->xino_mode) { 1807 pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n"); 1808 ofs->config.xino = OVL_XINO_OFF; 1809 } 1810 } 1811 1812 /* alloc/destroy_inode needed for setting up traps in inode cache */ 1813 sb->s_op = &ovl_super_operations; 1814 1815 if (ofs->config.upperdir) { 1816 if (!ofs->config.workdir) { 1817 pr_err("missing 'workdir'\n"); 1818 goto out_err; 1819 } 1820 1821 err = ovl_get_upper(sb, ofs, &layers[0], &upperpath); 1822 if (err) 1823 goto out_err; 1824 1825 err = ovl_get_workdir(sb, ofs, &upperpath); 1826 if (err) 1827 goto out_err; 1828 1829 if (!ofs->workdir) 1830 sb->s_flags |= SB_RDONLY; 1831 1832 sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth; 1833 sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran; 1834 1835 } 1836 oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers); 1837 err = PTR_ERR(oe); 1838 if (IS_ERR(oe)) 1839 goto out_err; 1840 1841 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1842 if (!ovl_upper_mnt(ofs)) 1843 sb->s_flags |= SB_RDONLY; 1844 1845 if (!(ovl_force_readonly(ofs)) && ofs->config.index) { 1846 /* index dir will act also as workdir */ 1847 dput(ofs->workdir); 1848 ofs->workdir = NULL; 1849 iput(ofs->workdir_trap); 1850 ofs->workdir_trap = NULL; 1851 1852 err = ovl_get_indexdir(sb, ofs, oe, &upperpath); 1853 if (err) 1854 goto out_free_oe; 1855 1856 /* Force r/o mount with no index dir */ 1857 if (ofs->indexdir) 1858 ofs->workdir = dget(ofs->indexdir); 1859 else 1860 sb->s_flags |= SB_RDONLY; 1861 } 1862 1863 err = ovl_check_overlapping_layers(sb, ofs); 1864 if (err) 1865 goto out_free_oe; 1866 1867 /* Show index=off in /proc/mounts for forced r/o mount */ 1868 if (!ofs->indexdir) { 1869 ofs->config.index = false; 1870 if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) { 1871 pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n"); 1872 ofs->config.nfs_export = false; 1873 } 1874 } 1875 1876 if (ofs->config.metacopy && ofs->config.nfs_export) { 1877 pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); 1878 ofs->config.nfs_export = false; 1879 } 1880 1881 if (ofs->config.nfs_export) 1882 sb->s_export_op = &ovl_export_operations; 1883 1884 /* Never override disk quota limits or use reserved space */ 1885 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1886 1887 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1888 sb->s_xattr = ovl_xattr_handlers; 1889 sb->s_fs_info = ofs; 1890 sb->s_flags |= SB_POSIXACL; 1891 sb->s_iflags |= SB_I_SKIP_SYNC; 1892 1893 err = -ENOMEM; 1894 root_dentry = ovl_get_root(sb, upperpath.dentry, oe); 1895 if (!root_dentry) 1896 goto out_free_oe; 1897 1898 mntput(upperpath.mnt); 1899 kfree(splitlower); 1900 1901 sb->s_root = root_dentry; 1902 1903 return 0; 1904 1905 out_free_oe: 1906 ovl_entry_stack_free(oe); 1907 kfree(oe); 1908 out_err: 1909 kfree(splitlower); 1910 path_put(&upperpath); 1911 ovl_free_fs(ofs); 1912 out: 1913 return err; 1914 } 1915 1916 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1917 const char *dev_name, void *raw_data) 1918 { 1919 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1920 } 1921 1922 static struct file_system_type ovl_fs_type = { 1923 .owner = THIS_MODULE, 1924 .name = "overlay", 1925 .mount = ovl_mount, 1926 .kill_sb = kill_anon_super, 1927 }; 1928 MODULE_ALIAS_FS("overlay"); 1929 1930 static void ovl_inode_init_once(void *foo) 1931 { 1932 struct ovl_inode *oi = foo; 1933 1934 inode_init_once(&oi->vfs_inode); 1935 } 1936 1937 static int __init ovl_init(void) 1938 { 1939 int err; 1940 1941 ovl_inode_cachep = kmem_cache_create("ovl_inode", 1942 sizeof(struct ovl_inode), 0, 1943 (SLAB_RECLAIM_ACCOUNT| 1944 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1945 ovl_inode_init_once); 1946 if (ovl_inode_cachep == NULL) 1947 return -ENOMEM; 1948 1949 err = ovl_aio_request_cache_init(); 1950 if (!err) { 1951 err = register_filesystem(&ovl_fs_type); 1952 if (!err) 1953 return 0; 1954 1955 ovl_aio_request_cache_destroy(); 1956 } 1957 kmem_cache_destroy(ovl_inode_cachep); 1958 1959 return err; 1960 } 1961 1962 static void __exit ovl_exit(void) 1963 { 1964 unregister_filesystem(&ovl_fs_type); 1965 1966 /* 1967 * Make sure all delayed rcu free inodes are flushed before we 1968 * destroy cache. 1969 */ 1970 rcu_barrier(); 1971 kmem_cache_destroy(ovl_inode_cachep); 1972 ovl_aio_request_cache_destroy(); 1973 } 1974 1975 module_init(ovl_init); 1976 module_exit(ovl_exit); 1977