1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/slab.h> 12 #include <linux/cred.h> 13 #include <linux/xattr.h> 14 #include <linux/posix_acl.h> 15 #include <linux/ratelimit.h> 16 #include "overlayfs.h" 17 18 19 static dev_t ovl_get_pseudo_dev(struct dentry *dentry) 20 { 21 struct ovl_entry *oe = dentry->d_fsdata; 22 23 return oe->lowerstack[0].layer->pseudo_dev; 24 } 25 26 int ovl_setattr(struct dentry *dentry, struct iattr *attr) 27 { 28 int err; 29 struct dentry *upperdentry; 30 const struct cred *old_cred; 31 32 /* 33 * Check for permissions before trying to copy-up. This is redundant 34 * since it will be rechecked later by ->setattr() on upper dentry. But 35 * without this, copy-up can be triggered by just about anybody. 36 * 37 * We don't initialize inode->size, which just means that 38 * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not 39 * check for a swapfile (which this won't be anyway). 40 */ 41 err = setattr_prepare(dentry, attr); 42 if (err) 43 return err; 44 45 err = ovl_want_write(dentry); 46 if (err) 47 goto out; 48 49 err = ovl_copy_up(dentry); 50 if (!err) { 51 upperdentry = ovl_dentry_upper(dentry); 52 53 if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 54 attr->ia_valid &= ~ATTR_MODE; 55 56 inode_lock(upperdentry->d_inode); 57 old_cred = ovl_override_creds(dentry->d_sb); 58 err = notify_change(upperdentry, attr, NULL); 59 revert_creds(old_cred); 60 if (!err) 61 ovl_copyattr(upperdentry->d_inode, dentry->d_inode); 62 inode_unlock(upperdentry->d_inode); 63 } 64 ovl_drop_write(dentry); 65 out: 66 return err; 67 } 68 69 int ovl_getattr(const struct path *path, struct kstat *stat, 70 u32 request_mask, unsigned int flags) 71 { 72 struct dentry *dentry = path->dentry; 73 enum ovl_path_type type; 74 struct path realpath; 75 const struct cred *old_cred; 76 bool is_dir = S_ISDIR(dentry->d_inode->i_mode); 77 bool samefs = ovl_same_sb(dentry->d_sb); 78 int err; 79 80 type = ovl_path_real(dentry, &realpath); 81 old_cred = ovl_override_creds(dentry->d_sb); 82 err = vfs_getattr(&realpath, stat, request_mask, flags); 83 if (err) 84 goto out; 85 86 /* 87 * For non-dir or same fs, we use st_ino of the copy up origin, if we 88 * know it. This guaranties constant st_dev/st_ino across copy up. 89 * 90 * If filesystem supports NFS export ops, this also guaranties 91 * persistent st_ino across mount cycle. 92 */ 93 if (!is_dir || samefs) { 94 if (OVL_TYPE_ORIGIN(type)) { 95 struct kstat lowerstat; 96 u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0); 97 98 ovl_path_lower(dentry, &realpath); 99 err = vfs_getattr(&realpath, &lowerstat, 100 lowermask, flags); 101 if (err) 102 goto out; 103 104 /* 105 * Lower hardlinks may be broken on copy up to different 106 * upper files, so we cannot use the lower origin st_ino 107 * for those different files, even for the same fs case. 108 * With inodes index enabled, it is safe to use st_ino 109 * of an indexed hardlinked origin. The index validates 110 * that the upper hardlink is not broken. 111 */ 112 if (is_dir || lowerstat.nlink == 1 || 113 ovl_test_flag(OVL_INDEX, d_inode(dentry))) 114 stat->ino = lowerstat.ino; 115 116 if (samefs) 117 WARN_ON_ONCE(stat->dev != lowerstat.dev); 118 else 119 stat->dev = ovl_get_pseudo_dev(dentry); 120 } 121 if (samefs) { 122 /* 123 * When all layers are on the same fs, all real inode 124 * number are unique, so we use the overlay st_dev, 125 * which is friendly to du -x. 126 */ 127 stat->dev = dentry->d_sb->s_dev; 128 } else if (!OVL_TYPE_UPPER(type)) { 129 /* 130 * For non-samefs setup, to make sure that st_dev/st_ino 131 * pair is unique across the system, we use a unique 132 * anonymous st_dev for lower layer inode. 133 */ 134 stat->dev = ovl_get_pseudo_dev(dentry); 135 } 136 } else { 137 /* 138 * Always use the overlay st_dev for directories, so 'find 139 * -xdev' will scan the entire overlay mount and won't cross the 140 * overlay mount boundaries. 141 * 142 * If not all layers are on the same fs the pair {real st_ino; 143 * overlay st_dev} is not unique, so use the non persistent 144 * overlay st_ino for directories. 145 */ 146 stat->dev = dentry->d_sb->s_dev; 147 stat->ino = dentry->d_inode->i_ino; 148 } 149 150 /* 151 * It's probably not worth it to count subdirs to get the 152 * correct link count. nlink=1 seems to pacify 'find' and 153 * other utilities. 154 */ 155 if (is_dir && OVL_TYPE_MERGE(type)) 156 stat->nlink = 1; 157 158 /* 159 * Return the overlay inode nlinks for indexed upper inodes. 160 * Overlay inode nlink counts the union of the upper hardlinks 161 * and non-covered lower hardlinks. It does not include the upper 162 * index hardlink. 163 */ 164 if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) 165 stat->nlink = dentry->d_inode->i_nlink; 166 167 out: 168 revert_creds(old_cred); 169 170 return err; 171 } 172 173 int ovl_permission(struct inode *inode, int mask) 174 { 175 struct inode *upperinode = ovl_inode_upper(inode); 176 struct inode *realinode = upperinode ?: ovl_inode_lower(inode); 177 const struct cred *old_cred; 178 int err; 179 180 /* Careful in RCU walk mode */ 181 if (!realinode) { 182 WARN_ON(!(mask & MAY_NOT_BLOCK)); 183 return -ECHILD; 184 } 185 186 /* 187 * Check overlay inode with the creds of task and underlying inode 188 * with creds of mounter 189 */ 190 err = generic_permission(inode, mask); 191 if (err) 192 return err; 193 194 old_cred = ovl_override_creds(inode->i_sb); 195 if (!upperinode && 196 !special_file(realinode->i_mode) && mask & MAY_WRITE) { 197 mask &= ~(MAY_WRITE | MAY_APPEND); 198 /* Make sure mounter can read file for copy up later */ 199 mask |= MAY_READ; 200 } 201 err = inode_permission(realinode, mask); 202 revert_creds(old_cred); 203 204 return err; 205 } 206 207 static const char *ovl_get_link(struct dentry *dentry, 208 struct inode *inode, 209 struct delayed_call *done) 210 { 211 const struct cred *old_cred; 212 const char *p; 213 214 if (!dentry) 215 return ERR_PTR(-ECHILD); 216 217 old_cred = ovl_override_creds(dentry->d_sb); 218 p = vfs_get_link(ovl_dentry_real(dentry), done); 219 revert_creds(old_cred); 220 return p; 221 } 222 223 bool ovl_is_private_xattr(const char *name) 224 { 225 return strncmp(name, OVL_XATTR_PREFIX, 226 sizeof(OVL_XATTR_PREFIX) - 1) == 0; 227 } 228 229 int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, 230 const void *value, size_t size, int flags) 231 { 232 int err; 233 struct dentry *upperdentry = ovl_i_dentry_upper(inode); 234 struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); 235 const struct cred *old_cred; 236 237 err = ovl_want_write(dentry); 238 if (err) 239 goto out; 240 241 if (!value && !upperdentry) { 242 err = vfs_getxattr(realdentry, name, NULL, 0); 243 if (err < 0) 244 goto out_drop_write; 245 } 246 247 if (!upperdentry) { 248 err = ovl_copy_up(dentry); 249 if (err) 250 goto out_drop_write; 251 252 realdentry = ovl_dentry_upper(dentry); 253 } 254 255 old_cred = ovl_override_creds(dentry->d_sb); 256 if (value) 257 err = vfs_setxattr(realdentry, name, value, size, flags); 258 else { 259 WARN_ON(flags != XATTR_REPLACE); 260 err = vfs_removexattr(realdentry, name); 261 } 262 revert_creds(old_cred); 263 264 out_drop_write: 265 ovl_drop_write(dentry); 266 out: 267 return err; 268 } 269 270 int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, 271 void *value, size_t size) 272 { 273 ssize_t res; 274 const struct cred *old_cred; 275 struct dentry *realdentry = 276 ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); 277 278 old_cred = ovl_override_creds(dentry->d_sb); 279 res = vfs_getxattr(realdentry, name, value, size); 280 revert_creds(old_cred); 281 return res; 282 } 283 284 static bool ovl_can_list(const char *s) 285 { 286 /* List all non-trusted xatts */ 287 if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) 288 return true; 289 290 /* Never list trusted.overlay, list other trusted for superuser only */ 291 return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); 292 } 293 294 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) 295 { 296 struct dentry *realdentry = ovl_dentry_real(dentry); 297 ssize_t res; 298 size_t len; 299 char *s; 300 const struct cred *old_cred; 301 302 old_cred = ovl_override_creds(dentry->d_sb); 303 res = vfs_listxattr(realdentry, list, size); 304 revert_creds(old_cred); 305 if (res <= 0 || size == 0) 306 return res; 307 308 /* filter out private xattrs */ 309 for (s = list, len = res; len;) { 310 size_t slen = strnlen(s, len) + 1; 311 312 /* underlying fs providing us with an broken xattr list? */ 313 if (WARN_ON(slen > len)) 314 return -EIO; 315 316 len -= slen; 317 if (!ovl_can_list(s)) { 318 res -= slen; 319 memmove(s, s + slen, len); 320 } else { 321 s += slen; 322 } 323 } 324 325 return res; 326 } 327 328 struct posix_acl *ovl_get_acl(struct inode *inode, int type) 329 { 330 struct inode *realinode = ovl_inode_real(inode); 331 const struct cred *old_cred; 332 struct posix_acl *acl; 333 334 if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) 335 return NULL; 336 337 old_cred = ovl_override_creds(inode->i_sb); 338 acl = get_acl(realinode, type); 339 revert_creds(old_cred); 340 341 return acl; 342 } 343 344 static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) 345 { 346 if (ovl_dentry_upper(dentry) && 347 ovl_dentry_has_upper_alias(dentry)) 348 return false; 349 350 if (special_file(d_inode(dentry)->i_mode)) 351 return false; 352 353 if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) 354 return false; 355 356 return true; 357 } 358 359 int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags) 360 { 361 int err = 0; 362 363 if (ovl_open_need_copy_up(dentry, file_flags)) { 364 err = ovl_want_write(dentry); 365 if (!err) { 366 err = ovl_copy_up_flags(dentry, file_flags); 367 ovl_drop_write(dentry); 368 } 369 } 370 371 return err; 372 } 373 374 int ovl_update_time(struct inode *inode, struct timespec *ts, int flags) 375 { 376 struct dentry *alias; 377 struct path upperpath; 378 379 if (!(flags & S_ATIME)) 380 return 0; 381 382 alias = d_find_any_alias(inode); 383 if (!alias) 384 return 0; 385 386 ovl_path_upper(alias, &upperpath); 387 if (upperpath.dentry) { 388 touch_atime(&upperpath); 389 inode->i_atime = d_inode(upperpath.dentry)->i_atime; 390 } 391 392 dput(alias); 393 394 return 0; 395 } 396 397 static const struct inode_operations ovl_file_inode_operations = { 398 .setattr = ovl_setattr, 399 .permission = ovl_permission, 400 .getattr = ovl_getattr, 401 .listxattr = ovl_listxattr, 402 .get_acl = ovl_get_acl, 403 .update_time = ovl_update_time, 404 }; 405 406 static const struct inode_operations ovl_symlink_inode_operations = { 407 .setattr = ovl_setattr, 408 .get_link = ovl_get_link, 409 .getattr = ovl_getattr, 410 .listxattr = ovl_listxattr, 411 .update_time = ovl_update_time, 412 }; 413 414 /* 415 * It is possible to stack overlayfs instance on top of another 416 * overlayfs instance as lower layer. We need to annonate the 417 * stackable i_mutex locks according to stack level of the super 418 * block instance. An overlayfs instance can never be in stack 419 * depth 0 (there is always a real fs below it). An overlayfs 420 * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth]. 421 * 422 * For example, here is a snip from /proc/lockdep_chains after 423 * dir_iterate of nested overlayfs: 424 * 425 * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) 426 * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) 427 * [...] &type->i_mutex_dir_key (stack_depth=0) 428 */ 429 #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH 430 431 static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) 432 { 433 #ifdef CONFIG_LOCKDEP 434 static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; 435 static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; 436 static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING]; 437 438 int depth = inode->i_sb->s_stack_depth - 1; 439 440 if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) 441 depth = 0; 442 443 if (S_ISDIR(inode->i_mode)) 444 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); 445 else 446 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); 447 448 lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]); 449 #endif 450 } 451 452 static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) 453 { 454 inode->i_ino = get_next_ino(); 455 inode->i_mode = mode; 456 inode->i_flags |= S_NOCMTIME; 457 #ifdef CONFIG_FS_POSIX_ACL 458 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; 459 #endif 460 461 ovl_lockdep_annotate_inode_mutex_key(inode); 462 463 switch (mode & S_IFMT) { 464 case S_IFREG: 465 inode->i_op = &ovl_file_inode_operations; 466 break; 467 468 case S_IFDIR: 469 inode->i_op = &ovl_dir_inode_operations; 470 inode->i_fop = &ovl_dir_operations; 471 break; 472 473 case S_IFLNK: 474 inode->i_op = &ovl_symlink_inode_operations; 475 break; 476 477 default: 478 inode->i_op = &ovl_file_inode_operations; 479 init_special_inode(inode, mode, rdev); 480 break; 481 } 482 } 483 484 /* 485 * With inodes index enabled, an overlay inode nlink counts the union of upper 486 * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure 487 * upper inode, the following nlink modifying operations can happen: 488 * 489 * 1. Lower hardlink copy up 490 * 2. Upper hardlink created, unlinked or renamed over 491 * 3. Lower hardlink whiteout or renamed over 492 * 493 * For the first, copy up case, the union nlink does not change, whether the 494 * operation succeeds or fails, but the upper inode nlink may change. 495 * Therefore, before copy up, we store the union nlink value relative to the 496 * lower inode nlink in the index inode xattr trusted.overlay.nlink. 497 * 498 * For the second, upper hardlink case, the union nlink should be incremented 499 * or decremented IFF the operation succeeds, aligned with nlink change of the 500 * upper inode. Therefore, before link/unlink/rename, we store the union nlink 501 * value relative to the upper inode nlink in the index inode. 502 * 503 * For the last, lower cover up case, we simplify things by preceding the 504 * whiteout or cover up with copy up. This makes sure that there is an index 505 * upper inode where the nlink xattr can be stored before the copied up upper 506 * entry is unlink. 507 */ 508 #define OVL_NLINK_ADD_UPPER (1 << 0) 509 510 /* 511 * On-disk format for indexed nlink: 512 * 513 * nlink relative to the upper inode - "U[+-]NUM" 514 * nlink relative to the lower inode - "L[+-]NUM" 515 */ 516 517 static int ovl_set_nlink_common(struct dentry *dentry, 518 struct dentry *realdentry, const char *format) 519 { 520 struct inode *inode = d_inode(dentry); 521 struct inode *realinode = d_inode(realdentry); 522 char buf[13]; 523 int len; 524 525 len = snprintf(buf, sizeof(buf), format, 526 (int) (inode->i_nlink - realinode->i_nlink)); 527 528 if (WARN_ON(len >= sizeof(buf))) 529 return -EIO; 530 531 return ovl_do_setxattr(ovl_dentry_upper(dentry), 532 OVL_XATTR_NLINK, buf, len, 0); 533 } 534 535 int ovl_set_nlink_upper(struct dentry *dentry) 536 { 537 return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i"); 538 } 539 540 int ovl_set_nlink_lower(struct dentry *dentry) 541 { 542 return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); 543 } 544 545 unsigned int ovl_get_nlink(struct dentry *lowerdentry, 546 struct dentry *upperdentry, 547 unsigned int fallback) 548 { 549 int nlink_diff; 550 int nlink; 551 char buf[13]; 552 int err; 553 554 if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) 555 return fallback; 556 557 err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); 558 if (err < 0) 559 goto fail; 560 561 buf[err] = '\0'; 562 if ((buf[0] != 'L' && buf[0] != 'U') || 563 (buf[1] != '+' && buf[1] != '-')) 564 goto fail; 565 566 err = kstrtoint(buf + 1, 10, &nlink_diff); 567 if (err < 0) 568 goto fail; 569 570 nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink; 571 nlink += nlink_diff; 572 573 if (nlink <= 0) 574 goto fail; 575 576 return nlink; 577 578 fail: 579 pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", 580 upperdentry, err); 581 return fallback; 582 } 583 584 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) 585 { 586 struct inode *inode; 587 588 inode = new_inode(sb); 589 if (inode) 590 ovl_fill_inode(inode, mode, rdev); 591 592 return inode; 593 } 594 595 static int ovl_inode_test(struct inode *inode, void *data) 596 { 597 return inode->i_private == data; 598 } 599 600 static int ovl_inode_set(struct inode *inode, void *data) 601 { 602 inode->i_private = data; 603 return 0; 604 } 605 606 static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, 607 struct dentry *upperdentry) 608 { 609 /* 610 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. 611 * This happens when finding a copied up overlay inode for a renamed 612 * or hardlinked overlay dentry and lower dentry cannot be followed 613 * by origin because lower fs does not support file handles. 614 */ 615 if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry)) 616 return false; 617 618 /* 619 * Allow non-NULL __upperdentry in inode even if upperdentry is NULL. 620 * This happens when finding a lower alias for a copied up hard link. 621 */ 622 if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry)) 623 return false; 624 625 return true; 626 } 627 628 struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, 629 struct dentry *index) 630 { 631 struct dentry *lowerdentry = ovl_dentry_lower(dentry); 632 struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; 633 struct inode *inode; 634 /* Already indexed or could be indexed on copy up? */ 635 bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry)); 636 637 if (WARN_ON(upperdentry && indexed && !lowerdentry)) 638 return ERR_PTR(-EIO); 639 640 if (!realinode) 641 realinode = d_inode(lowerdentry); 642 643 /* 644 * Copy up origin (lower) may exist for non-indexed upper, but we must 645 * not use lower as hash key in that case. 646 * Hash inodes that are or could be indexed by origin inode and 647 * non-indexed upper inodes that could be hard linked by upper inode. 648 */ 649 if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) { 650 struct inode *key = d_inode(indexed ? lowerdentry : 651 upperdentry); 652 unsigned int nlink; 653 654 inode = iget5_locked(dentry->d_sb, (unsigned long) key, 655 ovl_inode_test, ovl_inode_set, key); 656 if (!inode) 657 goto out_nomem; 658 if (!(inode->i_state & I_NEW)) { 659 /* 660 * Verify that the underlying files stored in the inode 661 * match those in the dentry. 662 */ 663 if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) { 664 iput(inode); 665 inode = ERR_PTR(-ESTALE); 666 goto out; 667 } 668 669 dput(upperdentry); 670 goto out; 671 } 672 673 nlink = ovl_get_nlink(lowerdentry, upperdentry, 674 realinode->i_nlink); 675 set_nlink(inode, nlink); 676 } else { 677 inode = new_inode(dentry->d_sb); 678 if (!inode) 679 goto out_nomem; 680 } 681 ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); 682 ovl_inode_init(inode, upperdentry, lowerdentry); 683 684 if (upperdentry && ovl_is_impuredir(upperdentry)) 685 ovl_set_flag(OVL_IMPURE, inode); 686 687 /* Check for non-merge dir that may have whiteouts */ 688 if (S_ISDIR(realinode->i_mode)) { 689 struct ovl_entry *oe = dentry->d_fsdata; 690 691 if (((upperdentry && lowerdentry) || oe->numlower > 1) || 692 ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { 693 ovl_set_flag(OVL_WHITEOUTS, inode); 694 } 695 } 696 697 if (inode->i_state & I_NEW) 698 unlock_new_inode(inode); 699 out: 700 return inode; 701 702 out_nomem: 703 inode = ERR_PTR(-ENOMEM); 704 goto out; 705 } 706