1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/slab.h> 12 #include <linux/cred.h> 13 #include <linux/xattr.h> 14 #include <linux/posix_acl.h> 15 #include <linux/ratelimit.h> 16 #include "overlayfs.h" 17 18 19 int ovl_setattr(struct dentry *dentry, struct iattr *attr) 20 { 21 int err; 22 bool full_copy_up = false; 23 struct dentry *upperdentry; 24 const struct cred *old_cred; 25 26 err = setattr_prepare(dentry, attr); 27 if (err) 28 return err; 29 30 err = ovl_want_write(dentry); 31 if (err) 32 goto out; 33 34 if (attr->ia_valid & ATTR_SIZE) { 35 struct inode *realinode = d_inode(ovl_dentry_real(dentry)); 36 37 err = -ETXTBSY; 38 if (atomic_read(&realinode->i_writecount) < 0) 39 goto out_drop_write; 40 41 /* Truncate should trigger data copy up as well */ 42 full_copy_up = true; 43 } 44 45 if (!full_copy_up) 46 err = ovl_copy_up(dentry); 47 else 48 err = ovl_copy_up_with_data(dentry); 49 if (!err) { 50 struct inode *winode = NULL; 51 52 upperdentry = ovl_dentry_upper(dentry); 53 54 if (attr->ia_valid & ATTR_SIZE) { 55 winode = d_inode(upperdentry); 56 err = get_write_access(winode); 57 if (err) 58 goto out_drop_write; 59 } 60 61 if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 62 attr->ia_valid &= ~ATTR_MODE; 63 64 inode_lock(upperdentry->d_inode); 65 old_cred = ovl_override_creds(dentry->d_sb); 66 err = notify_change(upperdentry, attr, NULL); 67 revert_creds(old_cred); 68 if (!err) 69 ovl_copyattr(upperdentry->d_inode, dentry->d_inode); 70 inode_unlock(upperdentry->d_inode); 71 72 if (winode) 73 put_write_access(winode); 74 } 75 out_drop_write: 76 ovl_drop_write(dentry); 77 out: 78 return err; 79 } 80 81 static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, 82 struct ovl_layer *lower_layer) 83 { 84 bool samefs = ovl_same_sb(dentry->d_sb); 85 unsigned int xinobits = ovl_xino_bits(dentry->d_sb); 86 87 if (samefs) { 88 /* 89 * When all layers are on the same fs, all real inode 90 * number are unique, so we use the overlay st_dev, 91 * which is friendly to du -x. 92 */ 93 stat->dev = dentry->d_sb->s_dev; 94 return 0; 95 } else if (xinobits) { 96 unsigned int shift = 64 - xinobits; 97 /* 98 * All inode numbers of underlying fs should not be using the 99 * high xinobits, so we use high xinobits to partition the 100 * overlay st_ino address space. The high bits holds the fsid 101 * (upper fsid is 0). This way overlay inode numbers are unique 102 * and all inodes use overlay st_dev. Inode numbers are also 103 * persistent for a given layer configuration. 104 */ 105 if (stat->ino >> shift) { 106 pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n", 107 dentry, stat->ino, xinobits); 108 } else { 109 if (lower_layer) 110 stat->ino |= ((u64)lower_layer->fsid) << shift; 111 112 stat->dev = dentry->d_sb->s_dev; 113 return 0; 114 } 115 } 116 117 /* The inode could not be mapped to a unified st_ino address space */ 118 if (S_ISDIR(dentry->d_inode->i_mode)) { 119 /* 120 * Always use the overlay st_dev for directories, so 'find 121 * -xdev' will scan the entire overlay mount and won't cross the 122 * overlay mount boundaries. 123 * 124 * If not all layers are on the same fs the pair {real st_ino; 125 * overlay st_dev} is not unique, so use the non persistent 126 * overlay st_ino for directories. 127 */ 128 stat->dev = dentry->d_sb->s_dev; 129 stat->ino = dentry->d_inode->i_ino; 130 } else if (lower_layer && lower_layer->fsid) { 131 /* 132 * For non-samefs setup, if we cannot map all layers st_ino 133 * to a unified address space, we need to make sure that st_dev 134 * is unique per lower fs. Upper layer uses real st_dev and 135 * lower layers use the unique anonymous bdev assigned to the 136 * lower fs. 137 */ 138 stat->dev = lower_layer->fs->pseudo_dev; 139 } 140 141 return 0; 142 } 143 144 int ovl_getattr(const struct path *path, struct kstat *stat, 145 u32 request_mask, unsigned int flags) 146 { 147 struct dentry *dentry = path->dentry; 148 enum ovl_path_type type; 149 struct path realpath; 150 const struct cred *old_cred; 151 bool is_dir = S_ISDIR(dentry->d_inode->i_mode); 152 bool samefs = ovl_same_sb(dentry->d_sb); 153 struct ovl_layer *lower_layer = NULL; 154 int err; 155 bool metacopy_blocks = false; 156 157 metacopy_blocks = ovl_is_metacopy_dentry(dentry); 158 159 type = ovl_path_real(dentry, &realpath); 160 old_cred = ovl_override_creds(dentry->d_sb); 161 err = vfs_getattr(&realpath, stat, request_mask, flags); 162 if (err) 163 goto out; 164 165 /* 166 * For non-dir or same fs, we use st_ino of the copy up origin. 167 * This guaranties constant st_dev/st_ino across copy up. 168 * With xino feature and non-samefs, we use st_ino of the copy up 169 * origin masked with high bits that represent the layer id. 170 * 171 * If lower filesystem supports NFS file handles, this also guaranties 172 * persistent st_ino across mount cycle. 173 */ 174 if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) { 175 if (!OVL_TYPE_UPPER(type)) { 176 lower_layer = ovl_layer_lower(dentry); 177 } else if (OVL_TYPE_ORIGIN(type)) { 178 struct kstat lowerstat; 179 u32 lowermask = STATX_INO | STATX_BLOCKS | 180 (!is_dir ? STATX_NLINK : 0); 181 182 ovl_path_lower(dentry, &realpath); 183 err = vfs_getattr(&realpath, &lowerstat, 184 lowermask, flags); 185 if (err) 186 goto out; 187 188 /* 189 * Lower hardlinks may be broken on copy up to different 190 * upper files, so we cannot use the lower origin st_ino 191 * for those different files, even for the same fs case. 192 * 193 * Similarly, several redirected dirs can point to the 194 * same dir on a lower layer. With the "verify_lower" 195 * feature, we do not use the lower origin st_ino, if 196 * we haven't verified that this redirect is unique. 197 * 198 * With inodes index enabled, it is safe to use st_ino 199 * of an indexed origin. The index validates that the 200 * upper hardlink is not broken and that a redirected 201 * dir is the only redirect to that origin. 202 */ 203 if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || 204 (!ovl_verify_lower(dentry->d_sb) && 205 (is_dir || lowerstat.nlink == 1))) { 206 stat->ino = lowerstat.ino; 207 lower_layer = ovl_layer_lower(dentry); 208 } 209 210 /* 211 * If we are querying a metacopy dentry and lower 212 * dentry is data dentry, then use the blocks we 213 * queried just now. We don't have to do additional 214 * vfs_getattr(). If lower itself is metacopy, then 215 * additional vfs_getattr() is unavoidable. 216 */ 217 if (metacopy_blocks && 218 realpath.dentry == ovl_dentry_lowerdata(dentry)) { 219 stat->blocks = lowerstat.blocks; 220 metacopy_blocks = false; 221 } 222 } 223 224 if (metacopy_blocks) { 225 /* 226 * If lower is not same as lowerdata or if there was 227 * no origin on upper, we can end up here. 228 */ 229 struct kstat lowerdatastat; 230 u32 lowermask = STATX_BLOCKS; 231 232 ovl_path_lowerdata(dentry, &realpath); 233 err = vfs_getattr(&realpath, &lowerdatastat, 234 lowermask, flags); 235 if (err) 236 goto out; 237 stat->blocks = lowerdatastat.blocks; 238 } 239 } 240 241 err = ovl_map_dev_ino(dentry, stat, lower_layer); 242 if (err) 243 goto out; 244 245 /* 246 * It's probably not worth it to count subdirs to get the 247 * correct link count. nlink=1 seems to pacify 'find' and 248 * other utilities. 249 */ 250 if (is_dir && OVL_TYPE_MERGE(type)) 251 stat->nlink = 1; 252 253 /* 254 * Return the overlay inode nlinks for indexed upper inodes. 255 * Overlay inode nlink counts the union of the upper hardlinks 256 * and non-covered lower hardlinks. It does not include the upper 257 * index hardlink. 258 */ 259 if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) 260 stat->nlink = dentry->d_inode->i_nlink; 261 262 out: 263 revert_creds(old_cred); 264 265 return err; 266 } 267 268 int ovl_permission(struct inode *inode, int mask) 269 { 270 struct inode *upperinode = ovl_inode_upper(inode); 271 struct inode *realinode = upperinode ?: ovl_inode_lower(inode); 272 const struct cred *old_cred; 273 int err; 274 275 /* Careful in RCU walk mode */ 276 if (!realinode) { 277 WARN_ON(!(mask & MAY_NOT_BLOCK)); 278 return -ECHILD; 279 } 280 281 /* 282 * Check overlay inode with the creds of task and underlying inode 283 * with creds of mounter 284 */ 285 err = generic_permission(inode, mask); 286 if (err) 287 return err; 288 289 old_cred = ovl_override_creds(inode->i_sb); 290 if (!upperinode && 291 !special_file(realinode->i_mode) && mask & MAY_WRITE) { 292 mask &= ~(MAY_WRITE | MAY_APPEND); 293 /* Make sure mounter can read file for copy up later */ 294 mask |= MAY_READ; 295 } 296 err = inode_permission(realinode, mask); 297 revert_creds(old_cred); 298 299 return err; 300 } 301 302 static const char *ovl_get_link(struct dentry *dentry, 303 struct inode *inode, 304 struct delayed_call *done) 305 { 306 const struct cred *old_cred; 307 const char *p; 308 309 if (!dentry) 310 return ERR_PTR(-ECHILD); 311 312 old_cred = ovl_override_creds(dentry->d_sb); 313 p = vfs_get_link(ovl_dentry_real(dentry), done); 314 revert_creds(old_cred); 315 return p; 316 } 317 318 bool ovl_is_private_xattr(const char *name) 319 { 320 return strncmp(name, OVL_XATTR_PREFIX, 321 sizeof(OVL_XATTR_PREFIX) - 1) == 0; 322 } 323 324 int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, 325 const void *value, size_t size, int flags) 326 { 327 int err; 328 struct dentry *upperdentry = ovl_i_dentry_upper(inode); 329 struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); 330 const struct cred *old_cred; 331 332 err = ovl_want_write(dentry); 333 if (err) 334 goto out; 335 336 if (!value && !upperdentry) { 337 err = vfs_getxattr(realdentry, name, NULL, 0); 338 if (err < 0) 339 goto out_drop_write; 340 } 341 342 if (!upperdentry) { 343 err = ovl_copy_up(dentry); 344 if (err) 345 goto out_drop_write; 346 347 realdentry = ovl_dentry_upper(dentry); 348 } 349 350 old_cred = ovl_override_creds(dentry->d_sb); 351 if (value) 352 err = vfs_setxattr(realdentry, name, value, size, flags); 353 else { 354 WARN_ON(flags != XATTR_REPLACE); 355 err = vfs_removexattr(realdentry, name); 356 } 357 revert_creds(old_cred); 358 359 /* copy c/mtime */ 360 ovl_copyattr(d_inode(realdentry), inode); 361 362 out_drop_write: 363 ovl_drop_write(dentry); 364 out: 365 return err; 366 } 367 368 int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, 369 void *value, size_t size) 370 { 371 ssize_t res; 372 const struct cred *old_cred; 373 struct dentry *realdentry = 374 ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); 375 376 old_cred = ovl_override_creds(dentry->d_sb); 377 res = vfs_getxattr(realdentry, name, value, size); 378 revert_creds(old_cred); 379 return res; 380 } 381 382 static bool ovl_can_list(const char *s) 383 { 384 /* List all non-trusted xatts */ 385 if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) 386 return true; 387 388 /* Never list trusted.overlay, list other trusted for superuser only */ 389 return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); 390 } 391 392 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) 393 { 394 struct dentry *realdentry = ovl_dentry_real(dentry); 395 ssize_t res; 396 size_t len; 397 char *s; 398 const struct cred *old_cred; 399 400 old_cred = ovl_override_creds(dentry->d_sb); 401 res = vfs_listxattr(realdentry, list, size); 402 revert_creds(old_cred); 403 if (res <= 0 || size == 0) 404 return res; 405 406 /* filter out private xattrs */ 407 for (s = list, len = res; len;) { 408 size_t slen = strnlen(s, len) + 1; 409 410 /* underlying fs providing us with an broken xattr list? */ 411 if (WARN_ON(slen > len)) 412 return -EIO; 413 414 len -= slen; 415 if (!ovl_can_list(s)) { 416 res -= slen; 417 memmove(s, s + slen, len); 418 } else { 419 s += slen; 420 } 421 } 422 423 return res; 424 } 425 426 struct posix_acl *ovl_get_acl(struct inode *inode, int type) 427 { 428 struct inode *realinode = ovl_inode_real(inode); 429 const struct cred *old_cred; 430 struct posix_acl *acl; 431 432 if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) 433 return NULL; 434 435 old_cred = ovl_override_creds(inode->i_sb); 436 acl = get_acl(realinode, type); 437 revert_creds(old_cred); 438 439 return acl; 440 } 441 442 int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) 443 { 444 if (flags & S_ATIME) { 445 struct ovl_fs *ofs = inode->i_sb->s_fs_info; 446 struct path upperpath = { 447 .mnt = ofs->upper_mnt, 448 .dentry = ovl_upperdentry_dereference(OVL_I(inode)), 449 }; 450 451 if (upperpath.dentry) { 452 touch_atime(&upperpath); 453 inode->i_atime = d_inode(upperpath.dentry)->i_atime; 454 } 455 } 456 return 0; 457 } 458 459 static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 460 u64 start, u64 len) 461 { 462 int err; 463 struct inode *realinode = ovl_inode_real(inode); 464 const struct cred *old_cred; 465 466 if (!realinode->i_op->fiemap) 467 return -EOPNOTSUPP; 468 469 old_cred = ovl_override_creds(inode->i_sb); 470 471 if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) 472 filemap_write_and_wait(realinode->i_mapping); 473 474 err = realinode->i_op->fiemap(realinode, fieinfo, start, len); 475 revert_creds(old_cred); 476 477 return err; 478 } 479 480 static const struct inode_operations ovl_file_inode_operations = { 481 .setattr = ovl_setattr, 482 .permission = ovl_permission, 483 .getattr = ovl_getattr, 484 .listxattr = ovl_listxattr, 485 .get_acl = ovl_get_acl, 486 .update_time = ovl_update_time, 487 .fiemap = ovl_fiemap, 488 }; 489 490 static const struct inode_operations ovl_symlink_inode_operations = { 491 .setattr = ovl_setattr, 492 .get_link = ovl_get_link, 493 .getattr = ovl_getattr, 494 .listxattr = ovl_listxattr, 495 .update_time = ovl_update_time, 496 }; 497 498 static const struct inode_operations ovl_special_inode_operations = { 499 .setattr = ovl_setattr, 500 .permission = ovl_permission, 501 .getattr = ovl_getattr, 502 .listxattr = ovl_listxattr, 503 .get_acl = ovl_get_acl, 504 .update_time = ovl_update_time, 505 }; 506 507 static const struct address_space_operations ovl_aops = { 508 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ 509 .direct_IO = noop_direct_IO, 510 }; 511 512 /* 513 * It is possible to stack overlayfs instance on top of another 514 * overlayfs instance as lower layer. We need to annonate the 515 * stackable i_mutex locks according to stack level of the super 516 * block instance. An overlayfs instance can never be in stack 517 * depth 0 (there is always a real fs below it). An overlayfs 518 * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth]. 519 * 520 * For example, here is a snip from /proc/lockdep_chains after 521 * dir_iterate of nested overlayfs: 522 * 523 * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) 524 * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) 525 * [...] &type->i_mutex_dir_key (stack_depth=0) 526 */ 527 #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH 528 529 static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) 530 { 531 #ifdef CONFIG_LOCKDEP 532 static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; 533 static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; 534 static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING]; 535 536 int depth = inode->i_sb->s_stack_depth - 1; 537 538 if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) 539 depth = 0; 540 541 if (S_ISDIR(inode->i_mode)) 542 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); 543 else 544 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); 545 546 lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]); 547 #endif 548 } 549 550 static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, 551 unsigned long ino, int fsid) 552 { 553 int xinobits = ovl_xino_bits(inode->i_sb); 554 555 /* 556 * When NFS export is enabled and d_ino is consistent with st_ino 557 * (samefs or i_ino has enough bits to encode layer), set the same 558 * value used for d_ino to i_ino, because nfsd readdirplus compares 559 * d_ino values to i_ino values of child entries. When called from 560 * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real 561 * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). 562 */ 563 if (inode->i_sb->s_export_op && 564 (ovl_same_sb(inode->i_sb) || xinobits)) { 565 inode->i_ino = ino; 566 if (xinobits && fsid && !(ino >> (64 - xinobits))) 567 inode->i_ino |= (unsigned long)fsid << (64 - xinobits); 568 } else { 569 inode->i_ino = get_next_ino(); 570 } 571 inode->i_mode = mode; 572 inode->i_flags |= S_NOCMTIME; 573 #ifdef CONFIG_FS_POSIX_ACL 574 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; 575 #endif 576 577 ovl_lockdep_annotate_inode_mutex_key(inode); 578 579 switch (mode & S_IFMT) { 580 case S_IFREG: 581 inode->i_op = &ovl_file_inode_operations; 582 inode->i_fop = &ovl_file_operations; 583 inode->i_mapping->a_ops = &ovl_aops; 584 break; 585 586 case S_IFDIR: 587 inode->i_op = &ovl_dir_inode_operations; 588 inode->i_fop = &ovl_dir_operations; 589 break; 590 591 case S_IFLNK: 592 inode->i_op = &ovl_symlink_inode_operations; 593 break; 594 595 default: 596 inode->i_op = &ovl_special_inode_operations; 597 init_special_inode(inode, mode, rdev); 598 break; 599 } 600 } 601 602 /* 603 * With inodes index enabled, an overlay inode nlink counts the union of upper 604 * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure 605 * upper inode, the following nlink modifying operations can happen: 606 * 607 * 1. Lower hardlink copy up 608 * 2. Upper hardlink created, unlinked or renamed over 609 * 3. Lower hardlink whiteout or renamed over 610 * 611 * For the first, copy up case, the union nlink does not change, whether the 612 * operation succeeds or fails, but the upper inode nlink may change. 613 * Therefore, before copy up, we store the union nlink value relative to the 614 * lower inode nlink in the index inode xattr trusted.overlay.nlink. 615 * 616 * For the second, upper hardlink case, the union nlink should be incremented 617 * or decremented IFF the operation succeeds, aligned with nlink change of the 618 * upper inode. Therefore, before link/unlink/rename, we store the union nlink 619 * value relative to the upper inode nlink in the index inode. 620 * 621 * For the last, lower cover up case, we simplify things by preceding the 622 * whiteout or cover up with copy up. This makes sure that there is an index 623 * upper inode where the nlink xattr can be stored before the copied up upper 624 * entry is unlink. 625 */ 626 #define OVL_NLINK_ADD_UPPER (1 << 0) 627 628 /* 629 * On-disk format for indexed nlink: 630 * 631 * nlink relative to the upper inode - "U[+-]NUM" 632 * nlink relative to the lower inode - "L[+-]NUM" 633 */ 634 635 static int ovl_set_nlink_common(struct dentry *dentry, 636 struct dentry *realdentry, const char *format) 637 { 638 struct inode *inode = d_inode(dentry); 639 struct inode *realinode = d_inode(realdentry); 640 char buf[13]; 641 int len; 642 643 len = snprintf(buf, sizeof(buf), format, 644 (int) (inode->i_nlink - realinode->i_nlink)); 645 646 if (WARN_ON(len >= sizeof(buf))) 647 return -EIO; 648 649 return ovl_do_setxattr(ovl_dentry_upper(dentry), 650 OVL_XATTR_NLINK, buf, len, 0); 651 } 652 653 int ovl_set_nlink_upper(struct dentry *dentry) 654 { 655 return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i"); 656 } 657 658 int ovl_set_nlink_lower(struct dentry *dentry) 659 { 660 return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); 661 } 662 663 unsigned int ovl_get_nlink(struct dentry *lowerdentry, 664 struct dentry *upperdentry, 665 unsigned int fallback) 666 { 667 int nlink_diff; 668 int nlink; 669 char buf[13]; 670 int err; 671 672 if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) 673 return fallback; 674 675 err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); 676 if (err < 0) 677 goto fail; 678 679 buf[err] = '\0'; 680 if ((buf[0] != 'L' && buf[0] != 'U') || 681 (buf[1] != '+' && buf[1] != '-')) 682 goto fail; 683 684 err = kstrtoint(buf + 1, 10, &nlink_diff); 685 if (err < 0) 686 goto fail; 687 688 nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink; 689 nlink += nlink_diff; 690 691 if (nlink <= 0) 692 goto fail; 693 694 return nlink; 695 696 fail: 697 pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", 698 upperdentry, err); 699 return fallback; 700 } 701 702 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) 703 { 704 struct inode *inode; 705 706 inode = new_inode(sb); 707 if (inode) 708 ovl_fill_inode(inode, mode, rdev, 0, 0); 709 710 return inode; 711 } 712 713 static int ovl_inode_test(struct inode *inode, void *data) 714 { 715 return inode->i_private == data; 716 } 717 718 static int ovl_inode_set(struct inode *inode, void *data) 719 { 720 inode->i_private = data; 721 return 0; 722 } 723 724 static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, 725 struct dentry *upperdentry, bool strict) 726 { 727 /* 728 * For directories, @strict verify from lookup path performs consistency 729 * checks, so NULL lower/upper in dentry must match NULL lower/upper in 730 * inode. Non @strict verify from NFS handle decode path passes NULL for 731 * 'unknown' lower/upper. 732 */ 733 if (S_ISDIR(inode->i_mode) && strict) { 734 /* Real lower dir moved to upper layer under us? */ 735 if (!lowerdentry && ovl_inode_lower(inode)) 736 return false; 737 738 /* Lookup of an uncovered redirect origin? */ 739 if (!upperdentry && ovl_inode_upper(inode)) 740 return false; 741 } 742 743 /* 744 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. 745 * This happens when finding a copied up overlay inode for a renamed 746 * or hardlinked overlay dentry and lower dentry cannot be followed 747 * by origin because lower fs does not support file handles. 748 */ 749 if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry)) 750 return false; 751 752 /* 753 * Allow non-NULL __upperdentry in inode even if upperdentry is NULL. 754 * This happens when finding a lower alias for a copied up hard link. 755 */ 756 if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry)) 757 return false; 758 759 return true; 760 } 761 762 struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, 763 bool is_upper) 764 { 765 struct inode *inode, *key = d_inode(real); 766 767 inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 768 if (!inode) 769 return NULL; 770 771 if (!ovl_verify_inode(inode, is_upper ? NULL : real, 772 is_upper ? real : NULL, false)) { 773 iput(inode); 774 return ERR_PTR(-ESTALE); 775 } 776 777 return inode; 778 } 779 780 bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir) 781 { 782 struct inode *key = d_inode(dir); 783 struct inode *trap; 784 bool res; 785 786 trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 787 if (!trap) 788 return false; 789 790 res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) && 791 !ovl_inode_lower(trap); 792 793 iput(trap); 794 return res; 795 } 796 797 /* 798 * Create an inode cache entry for layer root dir, that will intentionally 799 * fail ovl_verify_inode(), so any lookup that will find some layer root 800 * will fail. 801 */ 802 struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir) 803 { 804 struct inode *key = d_inode(dir); 805 struct inode *trap; 806 807 if (!d_is_dir(dir)) 808 return ERR_PTR(-ENOTDIR); 809 810 trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test, 811 ovl_inode_set, key); 812 if (!trap) 813 return ERR_PTR(-ENOMEM); 814 815 if (!(trap->i_state & I_NEW)) { 816 /* Conflicting layer roots? */ 817 iput(trap); 818 return ERR_PTR(-ELOOP); 819 } 820 821 trap->i_mode = S_IFDIR; 822 trap->i_flags = S_DEAD; 823 unlock_new_inode(trap); 824 825 return trap; 826 } 827 828 /* 829 * Does overlay inode need to be hashed by lower inode? 830 */ 831 static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, 832 struct dentry *lower, struct dentry *index) 833 { 834 struct ovl_fs *ofs = sb->s_fs_info; 835 836 /* No, if pure upper */ 837 if (!lower) 838 return false; 839 840 /* Yes, if already indexed */ 841 if (index) 842 return true; 843 844 /* Yes, if won't be copied up */ 845 if (!ofs->upper_mnt) 846 return true; 847 848 /* No, if lower hardlink is or will be broken on copy up */ 849 if ((upper || !ovl_indexdir(sb)) && 850 !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) 851 return false; 852 853 /* No, if non-indexed upper with NFS export */ 854 if (sb->s_export_op && upper) 855 return false; 856 857 /* Otherwise, hash by lower inode for fsnotify */ 858 return true; 859 } 860 861 static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode, 862 struct inode *key) 863 { 864 return newinode ? inode_insert5(newinode, (unsigned long) key, 865 ovl_inode_test, ovl_inode_set, key) : 866 iget5_locked(sb, (unsigned long) key, 867 ovl_inode_test, ovl_inode_set, key); 868 } 869 870 struct inode *ovl_get_inode(struct super_block *sb, 871 struct ovl_inode_params *oip) 872 { 873 struct dentry *upperdentry = oip->upperdentry; 874 struct ovl_path *lowerpath = oip->lowerpath; 875 struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; 876 struct inode *inode; 877 struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; 878 bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, 879 oip->index); 880 int fsid = bylower ? oip->lowerpath->layer->fsid : 0; 881 bool is_dir, metacopy = false; 882 unsigned long ino = 0; 883 int err = oip->newinode ? -EEXIST : -ENOMEM; 884 885 if (!realinode) 886 realinode = d_inode(lowerdentry); 887 888 /* 889 * Copy up origin (lower) may exist for non-indexed upper, but we must 890 * not use lower as hash key if this is a broken hardlink. 891 */ 892 is_dir = S_ISDIR(realinode->i_mode); 893 if (upperdentry || bylower) { 894 struct inode *key = d_inode(bylower ? lowerdentry : 895 upperdentry); 896 unsigned int nlink = is_dir ? 1 : realinode->i_nlink; 897 898 inode = ovl_iget5(sb, oip->newinode, key); 899 if (!inode) 900 goto out_err; 901 if (!(inode->i_state & I_NEW)) { 902 /* 903 * Verify that the underlying files stored in the inode 904 * match those in the dentry. 905 */ 906 if (!ovl_verify_inode(inode, lowerdentry, upperdentry, 907 true)) { 908 iput(inode); 909 err = -ESTALE; 910 goto out_err; 911 } 912 913 dput(upperdentry); 914 kfree(oip->redirect); 915 goto out; 916 } 917 918 /* Recalculate nlink for non-dir due to indexing */ 919 if (!is_dir) 920 nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); 921 set_nlink(inode, nlink); 922 ino = key->i_ino; 923 } else { 924 /* Lower hardlink that will be broken on copy up */ 925 inode = new_inode(sb); 926 if (!inode) { 927 err = -ENOMEM; 928 goto out_err; 929 } 930 } 931 ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); 932 ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata); 933 934 if (upperdentry && ovl_is_impuredir(upperdentry)) 935 ovl_set_flag(OVL_IMPURE, inode); 936 937 if (oip->index) 938 ovl_set_flag(OVL_INDEX, inode); 939 940 if (upperdentry) { 941 err = ovl_check_metacopy_xattr(upperdentry); 942 if (err < 0) 943 goto out_err; 944 metacopy = err; 945 if (!metacopy) 946 ovl_set_flag(OVL_UPPERDATA, inode); 947 } 948 949 OVL_I(inode)->redirect = oip->redirect; 950 951 if (bylower) 952 ovl_set_flag(OVL_CONST_INO, inode); 953 954 /* Check for non-merge dir that may have whiteouts */ 955 if (is_dir) { 956 if (((upperdentry && lowerdentry) || oip->numlower > 1) || 957 ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { 958 ovl_set_flag(OVL_WHITEOUTS, inode); 959 } 960 } 961 962 if (inode->i_state & I_NEW) 963 unlock_new_inode(inode); 964 out: 965 return inode; 966 967 out_err: 968 pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err); 969 inode = ERR_PTR(err); 970 goto out; 971 } 972