1 /* 2 * Copyright (C) 2011 Novell Inc. 3 * Copyright (C) 2016 Red Hat, Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/cred.h> 12 #include <linux/namei.h> 13 #include <linux/xattr.h> 14 #include <linux/ratelimit.h> 15 #include <linux/mount.h> 16 #include <linux/exportfs.h> 17 #include "overlayfs.h" 18 #include "ovl_entry.h" 19 20 struct ovl_lookup_data { 21 struct qstr name; 22 bool is_dir; 23 bool opaque; 24 bool stop; 25 bool last; 26 char *redirect; 27 }; 28 29 static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, 30 size_t prelen, const char *post) 31 { 32 int res; 33 char *s, *next, *buf = NULL; 34 35 res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0); 36 if (res < 0) { 37 if (res == -ENODATA || res == -EOPNOTSUPP) 38 return 0; 39 goto fail; 40 } 41 buf = kzalloc(prelen + res + strlen(post) + 1, GFP_TEMPORARY); 42 if (!buf) 43 return -ENOMEM; 44 45 if (res == 0) 46 goto invalid; 47 48 res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res); 49 if (res < 0) 50 goto fail; 51 if (res == 0) 52 goto invalid; 53 if (buf[0] == '/') { 54 for (s = buf; *s++ == '/'; s = next) { 55 next = strchrnul(s, '/'); 56 if (s == next) 57 goto invalid; 58 } 59 } else { 60 if (strchr(buf, '/') != NULL) 61 goto invalid; 62 63 memmove(buf + prelen, buf, res); 64 memcpy(buf, d->name.name, prelen); 65 } 66 67 strcat(buf, post); 68 kfree(d->redirect); 69 d->redirect = buf; 70 d->name.name = d->redirect; 71 d->name.len = strlen(d->redirect); 72 73 return 0; 74 75 err_free: 76 kfree(buf); 77 return 0; 78 fail: 79 pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res); 80 goto err_free; 81 invalid: 82 pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf); 83 goto err_free; 84 } 85 86 static int ovl_acceptable(void *ctx, struct dentry *dentry) 87 { 88 return 1; 89 } 90 91 static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) 92 { 93 int res; 94 struct ovl_fh *fh = NULL; 95 96 res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); 97 if (res < 0) { 98 if (res == -ENODATA || res == -EOPNOTSUPP) 99 return NULL; 100 goto fail; 101 } 102 /* Zero size value means "copied up but origin unknown" */ 103 if (res == 0) 104 return NULL; 105 106 fh = kzalloc(res, GFP_TEMPORARY); 107 if (!fh) 108 return ERR_PTR(-ENOMEM); 109 110 res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); 111 if (res < 0) 112 goto fail; 113 114 if (res < sizeof(struct ovl_fh) || res < fh->len) 115 goto invalid; 116 117 if (fh->magic != OVL_FH_MAGIC) 118 goto invalid; 119 120 /* Treat larger version and unknown flags as "origin unknown" */ 121 if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL) 122 goto out; 123 124 /* Treat endianness mismatch as "origin unknown" */ 125 if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) && 126 (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) 127 goto out; 128 129 return fh; 130 131 out: 132 kfree(fh); 133 return NULL; 134 135 fail: 136 pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res); 137 goto out; 138 invalid: 139 pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh); 140 goto out; 141 } 142 143 static struct dentry *ovl_get_origin(struct dentry *dentry, 144 struct vfsmount *mnt) 145 { 146 struct dentry *origin = NULL; 147 struct ovl_fh *fh = ovl_get_origin_fh(dentry); 148 int bytes; 149 150 if (IS_ERR_OR_NULL(fh)) 151 return (struct dentry *)fh; 152 153 /* 154 * Make sure that the stored uuid matches the uuid of the lower 155 * layer where file handle will be decoded. 156 */ 157 if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid)) 158 goto out; 159 160 bytes = (fh->len - offsetof(struct ovl_fh, fid)); 161 origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, 162 bytes >> 2, (int)fh->type, 163 ovl_acceptable, NULL); 164 if (IS_ERR(origin)) { 165 /* Treat stale file handle as "origin unknown" */ 166 if (origin == ERR_PTR(-ESTALE)) 167 origin = NULL; 168 goto out; 169 } 170 171 if (ovl_dentry_weird(origin) || 172 ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) 173 goto invalid; 174 175 out: 176 kfree(fh); 177 return origin; 178 179 invalid: 180 pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin); 181 dput(origin); 182 origin = NULL; 183 goto out; 184 } 185 186 static bool ovl_is_opaquedir(struct dentry *dentry) 187 { 188 return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE); 189 } 190 191 static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, 192 const char *name, unsigned int namelen, 193 size_t prelen, const char *post, 194 struct dentry **ret) 195 { 196 struct dentry *this; 197 int err; 198 199 this = lookup_one_len_unlocked(name, base, namelen); 200 if (IS_ERR(this)) { 201 err = PTR_ERR(this); 202 this = NULL; 203 if (err == -ENOENT || err == -ENAMETOOLONG) 204 goto out; 205 goto out_err; 206 } 207 if (!this->d_inode) 208 goto put_and_out; 209 210 if (ovl_dentry_weird(this)) { 211 /* Don't support traversing automounts and other weirdness */ 212 err = -EREMOTE; 213 goto out_err; 214 } 215 if (ovl_is_whiteout(this)) { 216 d->stop = d->opaque = true; 217 goto put_and_out; 218 } 219 if (!d_can_lookup(this)) { 220 d->stop = true; 221 if (d->is_dir) 222 goto put_and_out; 223 goto out; 224 } 225 d->is_dir = true; 226 if (!d->last && ovl_is_opaquedir(this)) { 227 d->stop = d->opaque = true; 228 goto out; 229 } 230 err = ovl_check_redirect(this, d, prelen, post); 231 if (err) 232 goto out_err; 233 out: 234 *ret = this; 235 return 0; 236 237 put_and_out: 238 dput(this); 239 this = NULL; 240 goto out; 241 242 out_err: 243 dput(this); 244 return err; 245 } 246 247 static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, 248 struct dentry **ret) 249 { 250 /* Counting down from the end, since the prefix can change */ 251 size_t rem = d->name.len - 1; 252 struct dentry *dentry = NULL; 253 int err; 254 255 if (d->name.name[0] != '/') 256 return ovl_lookup_single(base, d, d->name.name, d->name.len, 257 0, "", ret); 258 259 while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) { 260 const char *s = d->name.name + d->name.len - rem; 261 const char *next = strchrnul(s, '/'); 262 size_t thislen = next - s; 263 bool end = !next[0]; 264 265 /* Verify we did not go off the rails */ 266 if (WARN_ON(s[-1] != '/')) 267 return -EIO; 268 269 err = ovl_lookup_single(base, d, s, thislen, 270 d->name.len - rem, next, &base); 271 dput(dentry); 272 if (err) 273 return err; 274 dentry = base; 275 if (end) 276 break; 277 278 rem -= thislen + 1; 279 280 if (WARN_ON(rem >= d->name.len)) 281 return -EIO; 282 } 283 *ret = dentry; 284 return 0; 285 } 286 287 288 static int ovl_check_origin(struct dentry *upperdentry, 289 struct path *lowerstack, unsigned int numlower, 290 struct path **stackp, unsigned int *ctrp) 291 { 292 struct vfsmount *mnt; 293 struct dentry *origin = NULL; 294 int i; 295 296 297 for (i = 0; i < numlower; i++) { 298 mnt = lowerstack[i].mnt; 299 origin = ovl_get_origin(upperdentry, mnt); 300 if (IS_ERR(origin)) 301 return PTR_ERR(origin); 302 303 if (origin) 304 break; 305 } 306 307 if (!origin) 308 return 0; 309 310 BUG_ON(*ctrp); 311 if (!*stackp) 312 *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY); 313 if (!*stackp) { 314 dput(origin); 315 return -ENOMEM; 316 } 317 **stackp = (struct path) { .dentry = origin, .mnt = mnt }; 318 *ctrp = 1; 319 320 return 0; 321 } 322 323 /* 324 * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN. 325 * Return 0 on match, -ESTALE on mismatch, < 0 on error. 326 */ 327 static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) 328 { 329 struct ovl_fh *ofh = ovl_get_origin_fh(dentry); 330 int err = 0; 331 332 if (!ofh) 333 return -ENODATA; 334 335 if (IS_ERR(ofh)) 336 return PTR_ERR(ofh); 337 338 if (fh->len != ofh->len || memcmp(fh, ofh, fh->len)) 339 err = -ESTALE; 340 341 kfree(ofh); 342 return err; 343 } 344 345 /* 346 * Verify that an inode matches the origin file handle stored in upper inode. 347 * 348 * If @set is true and there is no stored file handle, encode and store origin 349 * file handle in OVL_XATTR_ORIGIN. 350 * 351 * Return 0 on match, -ESTALE on mismatch, < 0 on error. 352 */ 353 int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, 354 struct dentry *origin, bool is_upper, bool set) 355 { 356 struct inode *inode; 357 struct ovl_fh *fh; 358 int err; 359 360 fh = ovl_encode_fh(origin, is_upper); 361 err = PTR_ERR(fh); 362 if (IS_ERR(fh)) 363 goto fail; 364 365 err = ovl_verify_origin_fh(dentry, fh); 366 if (set && err == -ENODATA) 367 err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0); 368 if (err) 369 goto fail; 370 371 out: 372 kfree(fh); 373 return err; 374 375 fail: 376 inode = d_inode(origin); 377 pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n", 378 origin, inode ? inode->i_ino : 0, err); 379 goto out; 380 } 381 382 /* 383 * Verify that an index entry name matches the origin file handle stored in 384 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. 385 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error. 386 */ 387 int ovl_verify_index(struct dentry *index, struct path *lowerstack, 388 unsigned int numlower) 389 { 390 struct ovl_fh *fh = NULL; 391 size_t len; 392 struct path origin = { }; 393 struct path *stack = &origin; 394 unsigned int ctr = 0; 395 int err; 396 397 if (!d_inode(index)) 398 return 0; 399 400 err = -EISDIR; 401 if (d_is_dir(index)) 402 goto fail; 403 404 err = -EINVAL; 405 if (index->d_name.len < sizeof(struct ovl_fh)*2) 406 goto fail; 407 408 err = -ENOMEM; 409 len = index->d_name.len / 2; 410 fh = kzalloc(len, GFP_TEMPORARY); 411 if (!fh) 412 goto fail; 413 414 err = -EINVAL; 415 if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len) 416 goto fail; 417 418 err = ovl_verify_origin_fh(index, fh); 419 if (err) 420 goto fail; 421 422 err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr); 423 if (!err && !ctr) 424 err = -ESTALE; 425 if (err) 426 goto fail; 427 428 /* Check if index is orphan and don't warn before cleaning it */ 429 if (d_inode(index)->i_nlink == 1 && 430 ovl_get_nlink(index, origin.dentry, 0) == 0) 431 err = -ENOENT; 432 433 dput(origin.dentry); 434 out: 435 kfree(fh); 436 return err; 437 438 fail: 439 pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, err=%i)\n", 440 index, err); 441 goto out; 442 } 443 444 /* 445 * Lookup in indexdir for the index entry of a lower real inode or a copy up 446 * origin inode. The index entry name is the hex representation of the lower 447 * inode file handle. 448 * 449 * If the index dentry in negative, then either no lower aliases have been 450 * copied up yet, or aliases have been copied up in older kernels and are 451 * not indexed. 452 * 453 * If the index dentry for a copy up origin inode is positive, but points 454 * to an inode different than the upper inode, then either the upper inode 455 * has been copied up and not indexed or it was indexed, but since then 456 * index dir was cleared. Either way, that index cannot be used to indentify 457 * the overlay inode. 458 */ 459 int ovl_get_index_name(struct dentry *origin, struct qstr *name) 460 { 461 int err; 462 struct ovl_fh *fh; 463 char *n, *s; 464 465 fh = ovl_encode_fh(origin, false); 466 if (IS_ERR(fh)) 467 return PTR_ERR(fh); 468 469 err = -ENOMEM; 470 n = kzalloc(fh->len * 2, GFP_TEMPORARY); 471 if (n) { 472 s = bin2hex(n, fh, fh->len); 473 *name = (struct qstr) QSTR_INIT(n, s - n); 474 err = 0; 475 } 476 kfree(fh); 477 478 return err; 479 480 } 481 482 static struct dentry *ovl_lookup_index(struct dentry *dentry, 483 struct dentry *upper, 484 struct dentry *origin) 485 { 486 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 487 struct dentry *index; 488 struct inode *inode; 489 struct qstr name; 490 int err; 491 492 err = ovl_get_index_name(origin, &name); 493 if (err) 494 return ERR_PTR(err); 495 496 index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); 497 if (IS_ERR(index)) { 498 pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" 499 "overlayfs: mount with '-o index=off' to disable inodes index.\n", 500 d_inode(origin)->i_ino, name.len, name.name, 501 err); 502 goto out; 503 } 504 505 if (d_is_negative(index)) { 506 if (upper && d_inode(origin)->i_nlink > 1) { 507 pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n", 508 d_inode(origin)->i_ino); 509 goto fail; 510 } 511 512 dput(index); 513 index = NULL; 514 } else if (upper && d_inode(index) != d_inode(upper)) { 515 inode = d_inode(index); 516 pr_warn_ratelimited("overlayfs: wrong index found (index ino: %lu, upper ino: %lu).\n", 517 d_inode(index)->i_ino, 518 d_inode(upper)->i_ino); 519 goto fail; 520 } 521 522 out: 523 kfree(name.name); 524 return index; 525 526 fail: 527 dput(index); 528 index = ERR_PTR(-EIO); 529 goto out; 530 } 531 532 /* 533 * Returns next layer in stack starting from top. 534 * Returns -1 if this is the last layer. 535 */ 536 int ovl_path_next(int idx, struct dentry *dentry, struct path *path) 537 { 538 struct ovl_entry *oe = dentry->d_fsdata; 539 540 BUG_ON(idx < 0); 541 if (idx == 0) { 542 ovl_path_upper(dentry, path); 543 if (path->dentry) 544 return oe->numlower ? 1 : -1; 545 idx++; 546 } 547 BUG_ON(idx > oe->numlower); 548 *path = oe->lowerstack[idx - 1]; 549 550 return (idx < oe->numlower) ? idx + 1 : -1; 551 } 552 553 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, 554 unsigned int flags) 555 { 556 struct ovl_entry *oe; 557 const struct cred *old_cred; 558 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 559 struct ovl_entry *poe = dentry->d_parent->d_fsdata; 560 struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; 561 struct path *stack = NULL; 562 struct dentry *upperdir, *upperdentry = NULL; 563 struct dentry *index = NULL; 564 unsigned int ctr = 0; 565 struct inode *inode = NULL; 566 bool upperopaque = false; 567 char *upperredirect = NULL; 568 struct dentry *this; 569 unsigned int i; 570 int err; 571 struct ovl_lookup_data d = { 572 .name = dentry->d_name, 573 .is_dir = false, 574 .opaque = false, 575 .stop = false, 576 .last = !poe->numlower, 577 .redirect = NULL, 578 }; 579 580 if (dentry->d_name.len > ofs->namelen) 581 return ERR_PTR(-ENAMETOOLONG); 582 583 old_cred = ovl_override_creds(dentry->d_sb); 584 upperdir = ovl_dentry_upper(dentry->d_parent); 585 if (upperdir) { 586 err = ovl_lookup_layer(upperdir, &d, &upperdentry); 587 if (err) 588 goto out; 589 590 if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) { 591 dput(upperdentry); 592 err = -EREMOTE; 593 goto out; 594 } 595 if (upperdentry && !d.is_dir) { 596 BUG_ON(!d.stop || d.redirect); 597 /* 598 * Lookup copy up origin by decoding origin file handle. 599 * We may get a disconnected dentry, which is fine, 600 * because we only need to hold the origin inode in 601 * cache and use its inode number. We may even get a 602 * connected dentry, that is not under any of the lower 603 * layers root. That is also fine for using it's inode 604 * number - it's the same as if we held a reference 605 * to a dentry in lower layer that was moved under us. 606 */ 607 err = ovl_check_origin(upperdentry, roe->lowerstack, 608 roe->numlower, &stack, &ctr); 609 if (err) 610 goto out; 611 } 612 613 if (d.redirect) { 614 upperredirect = kstrdup(d.redirect, GFP_KERNEL); 615 if (!upperredirect) 616 goto out_put_upper; 617 if (d.redirect[0] == '/') 618 poe = roe; 619 } 620 upperopaque = d.opaque; 621 } 622 623 if (!d.stop && poe->numlower) { 624 err = -ENOMEM; 625 stack = kcalloc(ofs->numlower, sizeof(struct path), 626 GFP_TEMPORARY); 627 if (!stack) 628 goto out_put_upper; 629 } 630 631 for (i = 0; !d.stop && i < poe->numlower; i++) { 632 struct path lowerpath = poe->lowerstack[i]; 633 634 d.last = i == poe->numlower - 1; 635 err = ovl_lookup_layer(lowerpath.dentry, &d, &this); 636 if (err) 637 goto out_put; 638 639 if (!this) 640 continue; 641 642 stack[ctr].dentry = this; 643 stack[ctr].mnt = lowerpath.mnt; 644 ctr++; 645 646 if (d.stop) 647 break; 648 649 if (d.redirect && d.redirect[0] == '/' && poe != roe) { 650 poe = roe; 651 652 /* Find the current layer on the root dentry */ 653 for (i = 0; i < poe->numlower; i++) 654 if (poe->lowerstack[i].mnt == lowerpath.mnt) 655 break; 656 if (WARN_ON(i == poe->numlower)) 657 break; 658 } 659 } 660 661 /* Lookup index by lower inode and verify it matches upper inode */ 662 if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) { 663 struct dentry *origin = stack[0].dentry; 664 665 index = ovl_lookup_index(dentry, upperdentry, origin); 666 if (IS_ERR(index)) { 667 err = PTR_ERR(index); 668 index = NULL; 669 goto out_put; 670 } 671 } 672 673 oe = ovl_alloc_entry(ctr); 674 err = -ENOMEM; 675 if (!oe) 676 goto out_put; 677 678 oe->opaque = upperopaque; 679 memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); 680 dentry->d_fsdata = oe; 681 682 if (upperdentry) 683 ovl_dentry_set_upper_alias(dentry); 684 else if (index) 685 upperdentry = dget(index); 686 687 if (upperdentry || ctr) { 688 inode = ovl_get_inode(dentry, upperdentry); 689 err = PTR_ERR(inode); 690 if (IS_ERR(inode)) 691 goto out_free_oe; 692 693 OVL_I(inode)->redirect = upperredirect; 694 if (index) 695 ovl_set_flag(OVL_INDEX, inode); 696 } 697 698 revert_creds(old_cred); 699 dput(index); 700 kfree(stack); 701 kfree(d.redirect); 702 d_add(dentry, inode); 703 704 return NULL; 705 706 out_free_oe: 707 dentry->d_fsdata = NULL; 708 kfree(oe); 709 out_put: 710 dput(index); 711 for (i = 0; i < ctr; i++) 712 dput(stack[i].dentry); 713 kfree(stack); 714 out_put_upper: 715 dput(upperdentry); 716 kfree(upperredirect); 717 out: 718 kfree(d.redirect); 719 revert_creds(old_cred); 720 return ERR_PTR(err); 721 } 722 723 bool ovl_lower_positive(struct dentry *dentry) 724 { 725 struct ovl_entry *oe = dentry->d_fsdata; 726 struct ovl_entry *poe = dentry->d_parent->d_fsdata; 727 const struct qstr *name = &dentry->d_name; 728 unsigned int i; 729 bool positive = false; 730 bool done = false; 731 732 /* 733 * If dentry is negative, then lower is positive iff this is a 734 * whiteout. 735 */ 736 if (!dentry->d_inode) 737 return oe->opaque; 738 739 /* Negative upper -> positive lower */ 740 if (!ovl_dentry_upper(dentry)) 741 return true; 742 743 /* Positive upper -> have to look up lower to see whether it exists */ 744 for (i = 0; !done && !positive && i < poe->numlower; i++) { 745 struct dentry *this; 746 struct dentry *lowerdir = poe->lowerstack[i].dentry; 747 748 this = lookup_one_len_unlocked(name->name, lowerdir, 749 name->len); 750 if (IS_ERR(this)) { 751 switch (PTR_ERR(this)) { 752 case -ENOENT: 753 case -ENAMETOOLONG: 754 break; 755 756 default: 757 /* 758 * Assume something is there, we just couldn't 759 * access it. 760 */ 761 positive = true; 762 break; 763 } 764 } else { 765 if (this->d_inode) { 766 positive = !ovl_is_whiteout(this); 767 done = true; 768 } 769 dput(this); 770 } 771 } 772 773 return positive; 774 } 775