1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/slab.h> 9 #include <linux/namei.h> 10 #include <linux/file.h> 11 #include <linux/xattr.h> 12 #include <linux/rbtree.h> 13 #include <linux/security.h> 14 #include <linux/cred.h> 15 #include <linux/ratelimit.h> 16 #include "overlayfs.h" 17 18 struct ovl_cache_entry { 19 unsigned int len; 20 unsigned int type; 21 u64 real_ino; 22 u64 ino; 23 struct list_head l_node; 24 struct rb_node node; 25 struct ovl_cache_entry *next_maybe_whiteout; 26 bool is_upper; 27 bool is_whiteout; 28 bool check_xwhiteout; 29 char name[]; 30 }; 31 32 struct ovl_dir_cache { 33 long refcount; 34 u64 version; 35 struct list_head entries; 36 struct rb_root root; 37 }; 38 39 struct ovl_readdir_data { 40 struct dir_context ctx; 41 struct dentry *dentry; 42 bool is_lowest; 43 struct rb_root *root; 44 struct list_head *list; 45 struct list_head middle; 46 struct ovl_cache_entry *first_maybe_whiteout; 47 int count; 48 int err; 49 bool is_upper; 50 bool d_type_supported; 51 bool in_xwhiteouts_dir; 52 }; 53 54 struct ovl_dir_file { 55 bool is_real; 56 bool is_upper; 57 struct ovl_dir_cache *cache; 58 struct list_head *cursor; 59 struct file *realfile; 60 struct file *upperfile; 61 }; 62 63 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) 64 { 65 return rb_entry(n, struct ovl_cache_entry, node); 66 } 67 68 static bool ovl_cache_entry_find_link(const char *name, int len, 69 struct rb_node ***link, 70 struct rb_node **parent) 71 { 72 bool found = false; 73 struct rb_node **newp = *link; 74 75 while (!found && *newp) { 76 int cmp; 77 struct ovl_cache_entry *tmp; 78 79 *parent = *newp; 80 tmp = ovl_cache_entry_from_node(*newp); 81 cmp = strncmp(name, tmp->name, len); 82 if (cmp > 0) 83 newp = &tmp->node.rb_right; 84 else if (cmp < 0 || len < tmp->len) 85 newp = &tmp->node.rb_left; 86 else 87 found = true; 88 } 89 *link = newp; 90 91 return found; 92 } 93 94 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, 95 const char *name, int len) 96 { 97 struct rb_node *node = root->rb_node; 98 int cmp; 99 100 while (node) { 101 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); 102 103 cmp = strncmp(name, p->name, len); 104 if (cmp > 0) 105 node = p->node.rb_right; 106 else if (cmp < 0 || len < p->len) 107 node = p->node.rb_left; 108 else 109 return p; 110 } 111 112 return NULL; 113 } 114 115 static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd, 116 struct ovl_cache_entry *p) 117 { 118 /* Don't care if not doing ovl_iter() */ 119 if (!rdd->dentry) 120 return false; 121 122 /* Always recalc d_ino when remapping lower inode numbers */ 123 if (ovl_xino_bits(OVL_FS(rdd->dentry->d_sb))) 124 return true; 125 126 /* Always recalc d_ino for parent */ 127 if (strcmp(p->name, "..") == 0) 128 return true; 129 130 /* If this is lower, then native d_ino will do */ 131 if (!rdd->is_upper) 132 return false; 133 134 /* 135 * Recalc d_ino for '.' and for all entries if dir is impure (contains 136 * copied up entries) 137 */ 138 if ((p->name[0] == '.' && p->len == 1) || 139 ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry))) 140 return true; 141 142 return false; 143 } 144 145 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, 146 const char *name, int len, 147 u64 ino, unsigned int d_type) 148 { 149 struct ovl_cache_entry *p; 150 size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); 151 152 p = kmalloc(size, GFP_KERNEL); 153 if (!p) 154 return NULL; 155 156 memcpy(p->name, name, len); 157 p->name[len] = '\0'; 158 p->len = len; 159 p->type = d_type; 160 p->real_ino = ino; 161 p->ino = ino; 162 /* Defer setting d_ino for upper entry to ovl_iterate() */ 163 if (ovl_calc_d_ino(rdd, p)) 164 p->ino = 0; 165 p->is_upper = rdd->is_upper; 166 p->is_whiteout = false; 167 /* Defer check for overlay.whiteout to ovl_iterate() */ 168 p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG; 169 170 if (d_type == DT_CHR) { 171 p->next_maybe_whiteout = rdd->first_maybe_whiteout; 172 rdd->first_maybe_whiteout = p; 173 } 174 return p; 175 } 176 177 static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, 178 const char *name, int len, u64 ino, 179 unsigned int d_type) 180 { 181 struct rb_node **newp = &rdd->root->rb_node; 182 struct rb_node *parent = NULL; 183 struct ovl_cache_entry *p; 184 185 if (ovl_cache_entry_find_link(name, len, &newp, &parent)) 186 return true; 187 188 p = ovl_cache_entry_new(rdd, name, len, ino, d_type); 189 if (p == NULL) { 190 rdd->err = -ENOMEM; 191 return false; 192 } 193 194 list_add_tail(&p->l_node, rdd->list); 195 rb_link_node(&p->node, parent, newp); 196 rb_insert_color(&p->node, rdd->root); 197 198 return true; 199 } 200 201 static bool ovl_fill_lowest(struct ovl_readdir_data *rdd, 202 const char *name, int namelen, 203 loff_t offset, u64 ino, unsigned int d_type) 204 { 205 struct ovl_cache_entry *p; 206 207 p = ovl_cache_entry_find(rdd->root, name, namelen); 208 if (p) { 209 list_move_tail(&p->l_node, &rdd->middle); 210 } else { 211 p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); 212 if (p == NULL) 213 rdd->err = -ENOMEM; 214 else 215 list_add_tail(&p->l_node, &rdd->middle); 216 } 217 218 return rdd->err == 0; 219 } 220 221 void ovl_cache_free(struct list_head *list) 222 { 223 struct ovl_cache_entry *p; 224 struct ovl_cache_entry *n; 225 226 list_for_each_entry_safe(p, n, list, l_node) 227 kfree(p); 228 229 INIT_LIST_HEAD(list); 230 } 231 232 void ovl_dir_cache_free(struct inode *inode) 233 { 234 struct ovl_dir_cache *cache = ovl_dir_cache(inode); 235 236 if (cache) { 237 ovl_cache_free(&cache->entries); 238 kfree(cache); 239 } 240 } 241 242 static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode) 243 { 244 struct ovl_dir_cache *cache = od->cache; 245 246 WARN_ON(cache->refcount <= 0); 247 cache->refcount--; 248 if (!cache->refcount) { 249 if (ovl_dir_cache(inode) == cache) 250 ovl_set_dir_cache(inode, NULL); 251 252 ovl_cache_free(&cache->entries); 253 kfree(cache); 254 } 255 } 256 257 static bool ovl_fill_merge(struct dir_context *ctx, const char *name, 258 int namelen, loff_t offset, u64 ino, 259 unsigned int d_type) 260 { 261 struct ovl_readdir_data *rdd = 262 container_of(ctx, struct ovl_readdir_data, ctx); 263 264 rdd->count++; 265 if (!rdd->is_lowest) 266 return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); 267 else 268 return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type); 269 } 270 271 static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd) 272 { 273 int err; 274 struct ovl_cache_entry *p; 275 struct dentry *dentry, *dir = path->dentry; 276 const struct cred *old_cred; 277 278 old_cred = ovl_override_creds(rdd->dentry->d_sb); 279 280 err = down_write_killable(&dir->d_inode->i_rwsem); 281 if (!err) { 282 while (rdd->first_maybe_whiteout) { 283 p = rdd->first_maybe_whiteout; 284 rdd->first_maybe_whiteout = p->next_maybe_whiteout; 285 dentry = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len); 286 if (!IS_ERR(dentry)) { 287 p->is_whiteout = ovl_is_whiteout(dentry); 288 dput(dentry); 289 } 290 } 291 inode_unlock(dir->d_inode); 292 } 293 revert_creds(old_cred); 294 295 return err; 296 } 297 298 static inline int ovl_dir_read(const struct path *realpath, 299 struct ovl_readdir_data *rdd) 300 { 301 struct file *realfile; 302 int err; 303 304 realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE); 305 if (IS_ERR(realfile)) 306 return PTR_ERR(realfile); 307 308 rdd->in_xwhiteouts_dir = rdd->dentry && 309 ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath); 310 rdd->first_maybe_whiteout = NULL; 311 rdd->ctx.pos = 0; 312 do { 313 rdd->count = 0; 314 rdd->err = 0; 315 err = iterate_dir(realfile, &rdd->ctx); 316 if (err >= 0) 317 err = rdd->err; 318 } while (!err && rdd->count); 319 320 if (!err && rdd->first_maybe_whiteout && rdd->dentry) 321 err = ovl_check_whiteouts(realpath, rdd); 322 323 fput(realfile); 324 325 return err; 326 } 327 328 static void ovl_dir_reset(struct file *file) 329 { 330 struct ovl_dir_file *od = file->private_data; 331 struct ovl_dir_cache *cache = od->cache; 332 struct inode *inode = file_inode(file); 333 bool is_real; 334 335 if (cache && ovl_inode_version_get(inode) != cache->version) { 336 ovl_cache_put(od, inode); 337 od->cache = NULL; 338 od->cursor = NULL; 339 } 340 is_real = ovl_dir_is_real(inode); 341 if (od->is_real != is_real) { 342 /* is_real can only become false when dir is copied up */ 343 if (WARN_ON(is_real)) 344 return; 345 od->is_real = false; 346 } 347 } 348 349 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list, 350 struct rb_root *root) 351 { 352 int err; 353 struct path realpath; 354 struct ovl_readdir_data rdd = { 355 .ctx.actor = ovl_fill_merge, 356 .dentry = dentry, 357 .list = list, 358 .root = root, 359 .is_lowest = false, 360 }; 361 int idx, next; 362 363 for (idx = 0; idx != -1; idx = next) { 364 next = ovl_path_next(idx, dentry, &realpath); 365 rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry; 366 367 if (next != -1) { 368 err = ovl_dir_read(&realpath, &rdd); 369 if (err) 370 break; 371 } else { 372 /* 373 * Insert lowest layer entries before upper ones, this 374 * allows offsets to be reasonably constant 375 */ 376 list_add(&rdd.middle, rdd.list); 377 rdd.is_lowest = true; 378 err = ovl_dir_read(&realpath, &rdd); 379 list_del(&rdd.middle); 380 } 381 } 382 return err; 383 } 384 385 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) 386 { 387 struct list_head *p; 388 loff_t off = 0; 389 390 list_for_each(p, &od->cache->entries) { 391 if (off >= pos) 392 break; 393 off++; 394 } 395 /* Cursor is safe since the cache is stable */ 396 od->cursor = p; 397 } 398 399 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) 400 { 401 int res; 402 struct ovl_dir_cache *cache; 403 struct inode *inode = d_inode(dentry); 404 405 cache = ovl_dir_cache(inode); 406 if (cache && ovl_inode_version_get(inode) == cache->version) { 407 WARN_ON(!cache->refcount); 408 cache->refcount++; 409 return cache; 410 } 411 ovl_set_dir_cache(d_inode(dentry), NULL); 412 413 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 414 if (!cache) 415 return ERR_PTR(-ENOMEM); 416 417 cache->refcount = 1; 418 INIT_LIST_HEAD(&cache->entries); 419 cache->root = RB_ROOT; 420 421 res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root); 422 if (res) { 423 ovl_cache_free(&cache->entries); 424 kfree(cache); 425 return ERR_PTR(res); 426 } 427 428 cache->version = ovl_inode_version_get(inode); 429 ovl_set_dir_cache(inode, cache); 430 431 return cache; 432 } 433 434 /* Map inode number to lower fs unique range */ 435 static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, 436 const char *name, int namelen, bool warn) 437 { 438 unsigned int xinoshift = 64 - xinobits; 439 440 if (unlikely(ino >> xinoshift)) { 441 if (warn) { 442 pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", 443 namelen, name, ino, xinobits); 444 } 445 return ino; 446 } 447 448 /* 449 * The lowest xinobit is reserved for mapping the non-peresistent inode 450 * numbers range, but this range is only exposed via st_ino, not here. 451 */ 452 return ino | ((u64)fsid) << (xinoshift + 1); 453 } 454 455 /* 456 * Set d_ino for upper entries if needed. Non-upper entries should always report 457 * the uppermost real inode ino and should not call this function. 458 * 459 * When not all layer are on same fs, report real ino also for upper. 460 * 461 * When all layers are on the same fs, and upper has a reference to 462 * copy up origin, call vfs_getattr() on the overlay entry to make 463 * sure that d_ino will be consistent with st_ino from stat(2). 464 * 465 * Also checks the overlay.whiteout xattr by doing a full lookup which will return 466 * negative in this case. 467 */ 468 static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino) 469 470 { 471 struct dentry *dir = path->dentry; 472 struct ovl_fs *ofs = OVL_FS(dir->d_sb); 473 struct dentry *this = NULL; 474 enum ovl_path_type type; 475 u64 ino = p->real_ino; 476 int xinobits = ovl_xino_bits(ofs); 477 int err = 0; 478 479 if (!ovl_same_dev(ofs) && !p->check_xwhiteout) 480 goto out; 481 482 if (p->name[0] == '.') { 483 if (p->len == 1) { 484 this = dget(dir); 485 goto get; 486 } 487 if (p->len == 2 && p->name[1] == '.') { 488 /* we shall not be moved */ 489 this = dget(dir->d_parent); 490 goto get; 491 } 492 } 493 /* This checks also for xwhiteouts */ 494 this = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len); 495 if (IS_ERR_OR_NULL(this) || !this->d_inode) { 496 /* Mark a stale entry */ 497 p->is_whiteout = true; 498 if (IS_ERR(this)) { 499 err = PTR_ERR(this); 500 this = NULL; 501 goto fail; 502 } 503 goto out; 504 } 505 506 get: 507 if (!ovl_same_dev(ofs) || !update_ino) 508 goto out; 509 510 type = ovl_path_type(this); 511 if (OVL_TYPE_ORIGIN(type)) { 512 struct kstat stat; 513 struct path statpath = *path; 514 515 statpath.dentry = this; 516 err = vfs_getattr(&statpath, &stat, STATX_INO, 0); 517 if (err) 518 goto fail; 519 520 /* 521 * Directory inode is always on overlay st_dev. 522 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case 523 * of xino bits overflow. 524 */ 525 WARN_ON_ONCE(S_ISDIR(stat.mode) && 526 dir->d_sb->s_dev != stat.dev); 527 ino = stat.ino; 528 } else if (xinobits && !OVL_TYPE_UPPER(type)) { 529 ino = ovl_remap_lower_ino(ino, xinobits, 530 ovl_layer_lower(this)->fsid, 531 p->name, p->len, 532 ovl_xino_warn(ofs)); 533 } 534 535 out: 536 p->ino = ino; 537 dput(this); 538 return err; 539 540 fail: 541 pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n", 542 p->name, err); 543 goto out; 544 } 545 546 static bool ovl_fill_plain(struct dir_context *ctx, const char *name, 547 int namelen, loff_t offset, u64 ino, 548 unsigned int d_type) 549 { 550 struct ovl_cache_entry *p; 551 struct ovl_readdir_data *rdd = 552 container_of(ctx, struct ovl_readdir_data, ctx); 553 554 rdd->count++; 555 p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); 556 if (p == NULL) { 557 rdd->err = -ENOMEM; 558 return false; 559 } 560 list_add_tail(&p->l_node, rdd->list); 561 562 return true; 563 } 564 565 static int ovl_dir_read_impure(const struct path *path, struct list_head *list, 566 struct rb_root *root) 567 { 568 int err; 569 struct path realpath; 570 struct ovl_cache_entry *p, *n; 571 struct ovl_readdir_data rdd = { 572 .ctx.actor = ovl_fill_plain, 573 .list = list, 574 .root = root, 575 }; 576 577 INIT_LIST_HEAD(list); 578 *root = RB_ROOT; 579 ovl_path_upper(path->dentry, &realpath); 580 581 err = ovl_dir_read(&realpath, &rdd); 582 if (err) 583 return err; 584 585 list_for_each_entry_safe(p, n, list, l_node) { 586 if (strcmp(p->name, ".") != 0 && 587 strcmp(p->name, "..") != 0) { 588 err = ovl_cache_update(path, p, true); 589 if (err) 590 return err; 591 } 592 if (p->ino == p->real_ino) { 593 list_del(&p->l_node); 594 kfree(p); 595 } else { 596 struct rb_node **newp = &root->rb_node; 597 struct rb_node *parent = NULL; 598 599 if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len, 600 &newp, &parent))) 601 return -EIO; 602 603 rb_link_node(&p->node, parent, newp); 604 rb_insert_color(&p->node, root); 605 } 606 } 607 return 0; 608 } 609 610 static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path) 611 { 612 int res; 613 struct dentry *dentry = path->dentry; 614 struct inode *inode = d_inode(dentry); 615 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 616 struct ovl_dir_cache *cache; 617 618 cache = ovl_dir_cache(inode); 619 if (cache && ovl_inode_version_get(inode) == cache->version) 620 return cache; 621 622 /* Impure cache is not refcounted, free it here */ 623 ovl_dir_cache_free(inode); 624 ovl_set_dir_cache(inode, NULL); 625 626 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 627 if (!cache) 628 return ERR_PTR(-ENOMEM); 629 630 res = ovl_dir_read_impure(path, &cache->entries, &cache->root); 631 if (res) { 632 ovl_cache_free(&cache->entries); 633 kfree(cache); 634 return ERR_PTR(res); 635 } 636 if (list_empty(&cache->entries)) { 637 /* 638 * A good opportunity to get rid of an unneeded "impure" flag. 639 * Removing the "impure" xattr is best effort. 640 */ 641 if (!ovl_want_write(dentry)) { 642 ovl_removexattr(ofs, ovl_dentry_upper(dentry), 643 OVL_XATTR_IMPURE); 644 ovl_drop_write(dentry); 645 } 646 ovl_clear_flag(OVL_IMPURE, inode); 647 kfree(cache); 648 return NULL; 649 } 650 651 cache->version = ovl_inode_version_get(inode); 652 ovl_set_dir_cache(inode, cache); 653 654 return cache; 655 } 656 657 struct ovl_readdir_translate { 658 struct dir_context *orig_ctx; 659 struct ovl_dir_cache *cache; 660 struct dir_context ctx; 661 u64 parent_ino; 662 int fsid; 663 int xinobits; 664 bool xinowarn; 665 }; 666 667 static bool ovl_fill_real(struct dir_context *ctx, const char *name, 668 int namelen, loff_t offset, u64 ino, 669 unsigned int d_type) 670 { 671 struct ovl_readdir_translate *rdt = 672 container_of(ctx, struct ovl_readdir_translate, ctx); 673 struct dir_context *orig_ctx = rdt->orig_ctx; 674 675 if (rdt->parent_ino && strcmp(name, "..") == 0) { 676 ino = rdt->parent_ino; 677 } else if (rdt->cache) { 678 struct ovl_cache_entry *p; 679 680 p = ovl_cache_entry_find(&rdt->cache->root, name, namelen); 681 if (p) 682 ino = p->ino; 683 } else if (rdt->xinobits) { 684 ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid, 685 name, namelen, rdt->xinowarn); 686 } 687 688 return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); 689 } 690 691 static bool ovl_is_impure_dir(struct file *file) 692 { 693 struct ovl_dir_file *od = file->private_data; 694 struct inode *dir = file_inode(file); 695 696 /* 697 * Only upper dir can be impure, but if we are in the middle of 698 * iterating a lower real dir, dir could be copied up and marked 699 * impure. We only want the impure cache if we started iterating 700 * a real upper dir to begin with. 701 */ 702 return od->is_upper && ovl_test_flag(OVL_IMPURE, dir); 703 704 } 705 706 static int ovl_iterate_real(struct file *file, struct dir_context *ctx) 707 { 708 int err; 709 struct ovl_dir_file *od = file->private_data; 710 struct dentry *dir = file->f_path.dentry; 711 struct ovl_fs *ofs = OVL_FS(dir->d_sb); 712 const struct ovl_layer *lower_layer = ovl_layer_lower(dir); 713 struct ovl_readdir_translate rdt = { 714 .ctx.actor = ovl_fill_real, 715 .orig_ctx = ctx, 716 .xinobits = ovl_xino_bits(ofs), 717 .xinowarn = ovl_xino_warn(ofs), 718 }; 719 720 if (rdt.xinobits && lower_layer) 721 rdt.fsid = lower_layer->fsid; 722 723 if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) { 724 struct kstat stat; 725 struct path statpath = file->f_path; 726 727 statpath.dentry = dir->d_parent; 728 err = vfs_getattr(&statpath, &stat, STATX_INO, 0); 729 if (err) 730 return err; 731 732 WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); 733 rdt.parent_ino = stat.ino; 734 } 735 736 if (ovl_is_impure_dir(file)) { 737 rdt.cache = ovl_cache_get_impure(&file->f_path); 738 if (IS_ERR(rdt.cache)) 739 return PTR_ERR(rdt.cache); 740 } 741 742 err = iterate_dir(od->realfile, &rdt.ctx); 743 ctx->pos = rdt.ctx.pos; 744 745 return err; 746 } 747 748 749 static int ovl_iterate(struct file *file, struct dir_context *ctx) 750 { 751 struct ovl_dir_file *od = file->private_data; 752 struct dentry *dentry = file->f_path.dentry; 753 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 754 struct ovl_cache_entry *p; 755 const struct cred *old_cred; 756 int err; 757 758 old_cred = ovl_override_creds(dentry->d_sb); 759 if (!ctx->pos) 760 ovl_dir_reset(file); 761 762 if (od->is_real) { 763 /* 764 * If parent is merge, then need to adjust d_ino for '..', if 765 * dir is impure then need to adjust d_ino for copied up 766 * entries. 767 */ 768 if (ovl_xino_bits(ofs) || 769 (ovl_same_fs(ofs) && 770 (ovl_is_impure_dir(file) || 771 OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { 772 err = ovl_iterate_real(file, ctx); 773 } else { 774 err = iterate_dir(od->realfile, ctx); 775 } 776 goto out; 777 } 778 779 if (!od->cache) { 780 struct ovl_dir_cache *cache; 781 782 cache = ovl_cache_get(dentry); 783 err = PTR_ERR(cache); 784 if (IS_ERR(cache)) 785 goto out; 786 787 od->cache = cache; 788 ovl_seek_cursor(od, ctx->pos); 789 } 790 791 while (od->cursor != &od->cache->entries) { 792 p = list_entry(od->cursor, struct ovl_cache_entry, l_node); 793 if (!p->is_whiteout) { 794 if (!p->ino || p->check_xwhiteout) { 795 err = ovl_cache_update(&file->f_path, p, !p->ino); 796 if (err) 797 goto out; 798 } 799 } 800 /* ovl_cache_update() sets is_whiteout on stale entry */ 801 if (!p->is_whiteout) { 802 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) 803 break; 804 } 805 od->cursor = p->l_node.next; 806 ctx->pos++; 807 } 808 err = 0; 809 out: 810 revert_creds(old_cred); 811 return err; 812 } 813 814 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) 815 { 816 loff_t res; 817 struct ovl_dir_file *od = file->private_data; 818 819 inode_lock(file_inode(file)); 820 if (!file->f_pos) 821 ovl_dir_reset(file); 822 823 if (od->is_real) { 824 res = vfs_llseek(od->realfile, offset, origin); 825 file->f_pos = od->realfile->f_pos; 826 } else { 827 res = -EINVAL; 828 829 switch (origin) { 830 case SEEK_CUR: 831 offset += file->f_pos; 832 break; 833 case SEEK_SET: 834 break; 835 default: 836 goto out_unlock; 837 } 838 if (offset < 0) 839 goto out_unlock; 840 841 if (offset != file->f_pos) { 842 file->f_pos = offset; 843 if (od->cache) 844 ovl_seek_cursor(od, offset); 845 } 846 res = offset; 847 } 848 out_unlock: 849 inode_unlock(file_inode(file)); 850 851 return res; 852 } 853 854 static struct file *ovl_dir_open_realfile(const struct file *file, 855 const struct path *realpath) 856 { 857 struct file *res; 858 const struct cred *old_cred; 859 860 old_cred = ovl_override_creds(file_inode(file)->i_sb); 861 res = ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE)); 862 revert_creds(old_cred); 863 864 return res; 865 } 866 867 /* 868 * Like ovl_real_fdget(), returns upperfile if dir was copied up since open. 869 * Unlike ovl_real_fdget(), this caches upperfile in file->private_data. 870 * 871 * TODO: use same abstract type for file->private_data of dir and file so 872 * upperfile could also be cached for files as well. 873 */ 874 struct file *ovl_dir_real_file(const struct file *file, bool want_upper) 875 { 876 877 struct ovl_dir_file *od = file->private_data; 878 struct dentry *dentry = file->f_path.dentry; 879 struct file *old, *realfile = od->realfile; 880 881 if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) 882 return want_upper ? NULL : realfile; 883 884 /* 885 * Need to check if we started out being a lower dir, but got copied up 886 */ 887 if (!od->is_upper) { 888 realfile = READ_ONCE(od->upperfile); 889 if (!realfile) { 890 struct path upperpath; 891 892 ovl_path_upper(dentry, &upperpath); 893 realfile = ovl_dir_open_realfile(file, &upperpath); 894 if (IS_ERR(realfile)) 895 return realfile; 896 897 old = cmpxchg_release(&od->upperfile, NULL, realfile); 898 if (old) { 899 fput(realfile); 900 realfile = old; 901 } 902 } 903 } 904 905 return realfile; 906 } 907 908 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, 909 int datasync) 910 { 911 struct file *realfile; 912 int err; 913 914 err = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); 915 if (err <= 0) 916 return err; 917 918 realfile = ovl_dir_real_file(file, true); 919 err = PTR_ERR_OR_ZERO(realfile); 920 921 /* Nothing to sync for lower */ 922 if (!realfile || err) 923 return err; 924 925 return vfs_fsync_range(realfile, start, end, datasync); 926 } 927 928 static int ovl_dir_release(struct inode *inode, struct file *file) 929 { 930 struct ovl_dir_file *od = file->private_data; 931 932 if (od->cache) { 933 inode_lock(inode); 934 ovl_cache_put(od, inode); 935 inode_unlock(inode); 936 } 937 fput(od->realfile); 938 if (od->upperfile) 939 fput(od->upperfile); 940 kfree(od); 941 942 return 0; 943 } 944 945 static int ovl_dir_open(struct inode *inode, struct file *file) 946 { 947 struct path realpath; 948 struct file *realfile; 949 struct ovl_dir_file *od; 950 enum ovl_path_type type; 951 952 od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); 953 if (!od) 954 return -ENOMEM; 955 956 type = ovl_path_real(file->f_path.dentry, &realpath); 957 realfile = ovl_dir_open_realfile(file, &realpath); 958 if (IS_ERR(realfile)) { 959 kfree(od); 960 return PTR_ERR(realfile); 961 } 962 od->realfile = realfile; 963 od->is_real = ovl_dir_is_real(inode); 964 od->is_upper = OVL_TYPE_UPPER(type); 965 file->private_data = od; 966 967 return 0; 968 } 969 970 WRAP_DIR_ITER(ovl_iterate) // FIXME! 971 const struct file_operations ovl_dir_operations = { 972 .read = generic_read_dir, 973 .open = ovl_dir_open, 974 .iterate_shared = shared_ovl_iterate, 975 .llseek = ovl_dir_llseek, 976 .fsync = ovl_dir_fsync, 977 .release = ovl_dir_release, 978 }; 979 980 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) 981 { 982 int err; 983 struct ovl_cache_entry *p, *n; 984 struct rb_root root = RB_ROOT; 985 const struct cred *old_cred; 986 987 old_cred = ovl_override_creds(dentry->d_sb); 988 err = ovl_dir_read_merged(dentry, list, &root); 989 revert_creds(old_cred); 990 if (err) 991 return err; 992 993 err = 0; 994 995 list_for_each_entry_safe(p, n, list, l_node) { 996 /* 997 * Select whiteouts in upperdir, they should 998 * be cleared when deleting this directory. 999 */ 1000 if (p->is_whiteout) { 1001 if (p->is_upper) 1002 continue; 1003 goto del_entry; 1004 } 1005 1006 if (p->name[0] == '.') { 1007 if (p->len == 1) 1008 goto del_entry; 1009 if (p->len == 2 && p->name[1] == '.') 1010 goto del_entry; 1011 } 1012 err = -ENOTEMPTY; 1013 break; 1014 1015 del_entry: 1016 list_del(&p->l_node); 1017 kfree(p); 1018 } 1019 1020 return err; 1021 } 1022 1023 void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, 1024 struct list_head *list) 1025 { 1026 struct ovl_cache_entry *p; 1027 1028 inode_lock_nested(upper->d_inode, I_MUTEX_CHILD); 1029 list_for_each_entry(p, list, l_node) { 1030 struct dentry *dentry; 1031 1032 if (WARN_ON(!p->is_whiteout || !p->is_upper)) 1033 continue; 1034 1035 dentry = ovl_lookup_upper(ofs, p->name, upper, p->len); 1036 if (IS_ERR(dentry)) { 1037 pr_err("lookup '%s/%.*s' failed (%i)\n", 1038 upper->d_name.name, p->len, p->name, 1039 (int) PTR_ERR(dentry)); 1040 continue; 1041 } 1042 if (dentry->d_inode) 1043 ovl_cleanup(ofs, upper->d_inode, dentry); 1044 dput(dentry); 1045 } 1046 inode_unlock(upper->d_inode); 1047 } 1048 1049 static bool ovl_check_d_type(struct dir_context *ctx, const char *name, 1050 int namelen, loff_t offset, u64 ino, 1051 unsigned int d_type) 1052 { 1053 struct ovl_readdir_data *rdd = 1054 container_of(ctx, struct ovl_readdir_data, ctx); 1055 1056 /* Even if d_type is not supported, DT_DIR is returned for . and .. */ 1057 if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen)) 1058 return true; 1059 1060 if (d_type != DT_UNKNOWN) 1061 rdd->d_type_supported = true; 1062 1063 return true; 1064 } 1065 1066 /* 1067 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values 1068 * if error is encountered. 1069 */ 1070 int ovl_check_d_type_supported(const struct path *realpath) 1071 { 1072 int err; 1073 struct ovl_readdir_data rdd = { 1074 .ctx.actor = ovl_check_d_type, 1075 .d_type_supported = false, 1076 }; 1077 1078 err = ovl_dir_read(realpath, &rdd); 1079 if (err) 1080 return err; 1081 1082 return rdd.d_type_supported; 1083 } 1084 1085 #define OVL_INCOMPATDIR_NAME "incompat" 1086 1087 static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path, 1088 int level) 1089 { 1090 int err; 1091 struct inode *dir = path->dentry->d_inode; 1092 LIST_HEAD(list); 1093 struct ovl_cache_entry *p; 1094 struct ovl_readdir_data rdd = { 1095 .ctx.actor = ovl_fill_plain, 1096 .list = &list, 1097 }; 1098 bool incompat = false; 1099 1100 /* 1101 * The "work/incompat" directory is treated specially - if it is not 1102 * empty, instead of printing a generic error and mounting read-only, 1103 * we will error about incompat features and fail the mount. 1104 * 1105 * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name 1106 * starts with '#'. 1107 */ 1108 if (level == 2 && 1109 !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME)) 1110 incompat = true; 1111 1112 err = ovl_dir_read(path, &rdd); 1113 if (err) 1114 goto out; 1115 1116 inode_lock_nested(dir, I_MUTEX_PARENT); 1117 list_for_each_entry(p, &list, l_node) { 1118 struct dentry *dentry; 1119 1120 if (p->name[0] == '.') { 1121 if (p->len == 1) 1122 continue; 1123 if (p->len == 2 && p->name[1] == '.') 1124 continue; 1125 } else if (incompat) { 1126 pr_err("overlay with incompat feature '%s' cannot be mounted\n", 1127 p->name); 1128 err = -EINVAL; 1129 break; 1130 } 1131 dentry = ovl_lookup_upper(ofs, p->name, path->dentry, p->len); 1132 if (IS_ERR(dentry)) 1133 continue; 1134 if (dentry->d_inode) 1135 err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level); 1136 dput(dentry); 1137 if (err) 1138 break; 1139 } 1140 inode_unlock(dir); 1141 out: 1142 ovl_cache_free(&list); 1143 return err; 1144 } 1145 1146 int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, 1147 struct vfsmount *mnt, struct dentry *dentry, int level) 1148 { 1149 int err; 1150 1151 if (!d_is_dir(dentry) || level > 1) { 1152 return ovl_cleanup(ofs, dir, dentry); 1153 } 1154 1155 err = ovl_do_rmdir(ofs, dir, dentry); 1156 if (err) { 1157 struct path path = { .mnt = mnt, .dentry = dentry }; 1158 1159 inode_unlock(dir); 1160 err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1); 1161 inode_lock_nested(dir, I_MUTEX_PARENT); 1162 if (!err) 1163 err = ovl_cleanup(ofs, dir, dentry); 1164 } 1165 1166 return err; 1167 } 1168 1169 int ovl_indexdir_cleanup(struct ovl_fs *ofs) 1170 { 1171 int err; 1172 struct dentry *indexdir = ofs->indexdir; 1173 struct dentry *index = NULL; 1174 struct inode *dir = indexdir->d_inode; 1175 struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir }; 1176 LIST_HEAD(list); 1177 struct ovl_cache_entry *p; 1178 struct ovl_readdir_data rdd = { 1179 .ctx.actor = ovl_fill_plain, 1180 .list = &list, 1181 }; 1182 1183 err = ovl_dir_read(&path, &rdd); 1184 if (err) 1185 goto out; 1186 1187 inode_lock_nested(dir, I_MUTEX_PARENT); 1188 list_for_each_entry(p, &list, l_node) { 1189 if (p->name[0] == '.') { 1190 if (p->len == 1) 1191 continue; 1192 if (p->len == 2 && p->name[1] == '.') 1193 continue; 1194 } 1195 index = ovl_lookup_upper(ofs, p->name, indexdir, p->len); 1196 if (IS_ERR(index)) { 1197 err = PTR_ERR(index); 1198 index = NULL; 1199 break; 1200 } 1201 /* Cleanup leftover from index create/cleanup attempt */ 1202 if (index->d_name.name[0] == '#') { 1203 err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1); 1204 if (err) 1205 break; 1206 goto next; 1207 } 1208 err = ovl_verify_index(ofs, index); 1209 if (!err) { 1210 goto next; 1211 } else if (err == -ESTALE) { 1212 /* Cleanup stale index entries */ 1213 err = ovl_cleanup(ofs, dir, index); 1214 } else if (err != -ENOENT) { 1215 /* 1216 * Abort mount to avoid corrupting the index if 1217 * an incompatible index entry was found or on out 1218 * of memory. 1219 */ 1220 break; 1221 } else if (ofs->config.nfs_export) { 1222 /* 1223 * Whiteout orphan index to block future open by 1224 * handle after overlay nlink dropped to zero. 1225 */ 1226 err = ovl_cleanup_and_whiteout(ofs, dir, index); 1227 } else { 1228 /* Cleanup orphan index entries */ 1229 err = ovl_cleanup(ofs, dir, index); 1230 } 1231 1232 if (err) 1233 break; 1234 1235 next: 1236 dput(index); 1237 index = NULL; 1238 } 1239 dput(index); 1240 inode_unlock(dir); 1241 out: 1242 ovl_cache_free(&list); 1243 if (err) 1244 pr_err("failed index dir cleanup (%i)\n", err); 1245 return err; 1246 } 1247