1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/slab.h> 9 #include <linux/namei.h> 10 #include <linux/file.h> 11 #include <linux/filelock.h> 12 #include <linux/xattr.h> 13 #include <linux/rbtree.h> 14 #include <linux/security.h> 15 #include <linux/cred.h> 16 #include <linux/ratelimit.h> 17 #include <linux/overflow.h> 18 #include "overlayfs.h" 19 20 struct ovl_cache_entry { 21 unsigned int len; 22 unsigned int type; 23 u64 real_ino; 24 u64 ino; 25 struct list_head l_node; 26 struct rb_node node; 27 struct ovl_cache_entry *next_maybe_whiteout; 28 bool is_upper; 29 bool is_whiteout; 30 bool check_xwhiteout; 31 const char *c_name; 32 int c_len; 33 char name[]; 34 }; 35 36 struct ovl_dir_cache { 37 long refcount; 38 u64 version; 39 struct list_head entries; 40 struct rb_root root; 41 }; 42 43 struct ovl_readdir_data { 44 struct dir_context ctx; 45 struct dentry *dentry; 46 bool is_lowest; 47 struct rb_root *root; 48 struct list_head *list; 49 struct list_head middle; 50 struct ovl_cache_entry *first_maybe_whiteout; 51 struct unicode_map *map; 52 int count; 53 int err; 54 bool is_upper; 55 bool d_type_supported; 56 bool in_xwhiteouts_dir; 57 }; 58 59 struct ovl_dir_file { 60 bool is_real; 61 bool is_upper; 62 struct ovl_dir_cache *cache; 63 struct list_head *cursor; 64 struct file *realfile; 65 struct file *upperfile; 66 }; 67 68 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) 69 { 70 return rb_entry(n, struct ovl_cache_entry, node); 71 } 72 73 static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len, 74 char **dst) 75 { 76 const struct qstr qstr = { .name = str, .len = len }; 77 char *cf_name; 78 int cf_len; 79 80 if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || 81 name_is_dot_dotdot(str, len)) 82 return 0; 83 84 cf_name = kmalloc(NAME_MAX, GFP_KERNEL); 85 if (!cf_name) { 86 rdd->err = -ENOMEM; 87 return -ENOMEM; 88 } 89 90 cf_len = utf8_casefold(rdd->map, &qstr, cf_name, NAME_MAX); 91 if (cf_len > 0) 92 *dst = cf_name; 93 else 94 kfree(cf_name); 95 96 return cf_len; 97 } 98 99 static bool ovl_cache_entry_find_link(const char *name, int len, 100 struct rb_node ***link, 101 struct rb_node **parent) 102 { 103 bool found = false; 104 struct rb_node **newp = *link; 105 106 while (!found && *newp) { 107 int cmp; 108 struct ovl_cache_entry *tmp; 109 110 *parent = *newp; 111 tmp = ovl_cache_entry_from_node(*newp); 112 cmp = strncmp(name, tmp->c_name, len); 113 if (cmp > 0) 114 newp = &tmp->node.rb_right; 115 else if (cmp < 0 || len < tmp->c_len) 116 newp = &tmp->node.rb_left; 117 else 118 found = true; 119 } 120 *link = newp; 121 122 return found; 123 } 124 125 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, 126 const char *name, int len) 127 { 128 struct rb_node *node = root->rb_node; 129 int cmp; 130 131 while (node) { 132 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); 133 134 cmp = strncmp(name, p->c_name, len); 135 if (cmp > 0) 136 node = p->node.rb_right; 137 else if (cmp < 0 || len < p->c_len) 138 node = p->node.rb_left; 139 else 140 return p; 141 } 142 143 return NULL; 144 } 145 146 static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd, 147 struct ovl_cache_entry *p) 148 { 149 /* Don't care if not doing ovl_iter() */ 150 if (!rdd->dentry) 151 return false; 152 153 /* Always recalc d_ino when remapping lower inode numbers */ 154 if (ovl_xino_bits(OVL_FS(rdd->dentry->d_sb))) 155 return true; 156 157 /* Always recalc d_ino for parent */ 158 if (name_is_dotdot(p->name, p->len)) 159 return true; 160 161 /* If this is lower, then native d_ino will do */ 162 if (!rdd->is_upper) 163 return false; 164 165 /* 166 * Recalc d_ino for '.' and for all entries if dir is impure (contains 167 * copied up entries) 168 */ 169 if (name_is_dot(p->name, p->len) || 170 ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry))) 171 return true; 172 173 return false; 174 } 175 176 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, 177 const char *name, int len, 178 const char *c_name, int c_len, 179 u64 ino, unsigned int d_type) 180 { 181 struct ovl_cache_entry *p; 182 183 p = kmalloc_flex(*p, name, len + 1, GFP_KERNEL); 184 if (!p) 185 return NULL; 186 187 memcpy(p->name, name, len); 188 p->name[len] = '\0'; 189 p->len = len; 190 p->type = d_type; 191 p->real_ino = ino; 192 p->ino = ino; 193 /* Defer setting d_ino for upper entry to ovl_iterate() */ 194 if (ovl_calc_d_ino(rdd, p)) 195 p->ino = 0; 196 p->is_upper = rdd->is_upper; 197 p->is_whiteout = false; 198 /* Defer check for overlay.whiteout to ovl_iterate() */ 199 p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG; 200 201 if (c_name && c_name != name) { 202 p->c_name = c_name; 203 p->c_len = c_len; 204 } else { 205 p->c_name = p->name; 206 p->c_len = len; 207 } 208 209 if (d_type == DT_CHR) { 210 p->next_maybe_whiteout = rdd->first_maybe_whiteout; 211 rdd->first_maybe_whiteout = p; 212 } 213 return p; 214 } 215 216 /* Return 0 for found, 1 for added, <0 for error */ 217 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, 218 const char *name, int len, 219 const char *c_name, int c_len, 220 u64 ino, 221 unsigned int d_type) 222 { 223 struct rb_node **newp = &rdd->root->rb_node; 224 struct rb_node *parent = NULL; 225 struct ovl_cache_entry *p; 226 227 if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent)) 228 return 0; 229 230 p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type); 231 if (p == NULL) { 232 rdd->err = -ENOMEM; 233 return -ENOMEM; 234 } 235 236 list_add_tail(&p->l_node, rdd->list); 237 rb_link_node(&p->node, parent, newp); 238 rb_insert_color(&p->node, rdd->root); 239 240 return 1; 241 } 242 243 /* Return 0 for found, 1 for added, <0 for error */ 244 static int ovl_fill_lowest(struct ovl_readdir_data *rdd, 245 const char *name, int namelen, 246 const char *c_name, int c_len, 247 loff_t offset, u64 ino, unsigned int d_type) 248 { 249 struct ovl_cache_entry *p; 250 251 p = ovl_cache_entry_find(rdd->root, c_name, c_len); 252 if (p) { 253 list_move_tail(&p->l_node, &rdd->middle); 254 return 0; 255 } else { 256 p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len, 257 ino, d_type); 258 if (p == NULL) 259 rdd->err = -ENOMEM; 260 else 261 list_add_tail(&p->l_node, &rdd->middle); 262 } 263 264 return rdd->err ?: 1; 265 } 266 267 static void ovl_cache_entry_free(struct ovl_cache_entry *p) 268 { 269 if (p->c_name != p->name) 270 kfree(p->c_name); 271 kfree(p); 272 } 273 274 void ovl_cache_free(struct list_head *list) 275 { 276 struct ovl_cache_entry *p; 277 struct ovl_cache_entry *n; 278 279 list_for_each_entry_safe(p, n, list, l_node) 280 ovl_cache_entry_free(p); 281 282 INIT_LIST_HEAD(list); 283 } 284 285 void ovl_dir_cache_free(struct inode *inode) 286 { 287 struct ovl_dir_cache *cache = ovl_dir_cache(inode); 288 289 if (cache) { 290 ovl_cache_free(&cache->entries); 291 kfree(cache); 292 } 293 } 294 295 static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode) 296 { 297 struct ovl_dir_cache *cache = od->cache; 298 299 WARN_ON(cache->refcount <= 0); 300 cache->refcount--; 301 if (!cache->refcount) { 302 if (ovl_dir_cache(inode) == cache) 303 ovl_set_dir_cache(inode, NULL); 304 305 ovl_cache_free(&cache->entries); 306 kfree(cache); 307 } 308 } 309 310 static bool ovl_fill_merge(struct dir_context *ctx, const char *name, 311 int namelen, loff_t offset, u64 ino, 312 unsigned int d_type) 313 { 314 struct ovl_readdir_data *rdd = 315 container_of(ctx, struct ovl_readdir_data, ctx); 316 struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb); 317 const char *c_name = NULL; 318 char *cf_name = NULL; 319 int c_len = 0, ret; 320 321 if (ofs->casefold) 322 c_len = ovl_casefold(rdd, name, namelen, &cf_name); 323 324 if (rdd->err) 325 return false; 326 327 if (c_len <= 0) { 328 c_name = name; 329 c_len = namelen; 330 } else { 331 c_name = cf_name; 332 } 333 334 rdd->count++; 335 if (!rdd->is_lowest) 336 ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type); 337 else 338 ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type); 339 340 /* 341 * If ret == 1, that means that c_name is being used as part of struct 342 * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise, 343 * c_name was found in the rb-tree so we can free it here. 344 */ 345 if (ret != 1 && c_name != name) 346 kfree(c_name); 347 348 return ret >= 0; 349 } 350 351 static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd) 352 { 353 struct dentry *dentry, *dir = path->dentry; 354 355 while (rdd->first_maybe_whiteout) { 356 struct ovl_cache_entry *p = 357 rdd->first_maybe_whiteout; 358 rdd->first_maybe_whiteout = p->next_maybe_whiteout; 359 dentry = lookup_one_positive_killable(mnt_idmap(path->mnt), 360 &QSTR_LEN(p->name, p->len), 361 dir); 362 if (!IS_ERR(dentry)) { 363 p->is_whiteout = ovl_is_whiteout(dentry); 364 dput(dentry); 365 } else if (PTR_ERR(dentry) == -EINTR) { 366 return -EINTR; 367 } 368 } 369 370 return 0; 371 } 372 373 static inline int ovl_dir_read(const struct path *realpath, 374 struct ovl_readdir_data *rdd) 375 { 376 struct file *realfile; 377 int err; 378 379 realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE); 380 if (IS_ERR(realfile)) 381 return PTR_ERR(realfile); 382 383 rdd->first_maybe_whiteout = NULL; 384 rdd->ctx.pos = 0; 385 do { 386 rdd->count = 0; 387 rdd->err = 0; 388 err = iterate_dir(realfile, &rdd->ctx); 389 if (err >= 0) 390 err = rdd->err; 391 } while (!err && rdd->count); 392 393 if (!err && rdd->first_maybe_whiteout && rdd->dentry) 394 err = ovl_check_whiteouts(realpath, rdd); 395 396 fput(realfile); 397 398 return err; 399 } 400 401 static void ovl_dir_reset(struct file *file) 402 { 403 struct ovl_dir_file *od = file->private_data; 404 struct ovl_dir_cache *cache = od->cache; 405 struct inode *inode = file_inode(file); 406 bool is_real; 407 408 if (cache && ovl_inode_version_get(inode) != cache->version) { 409 ovl_cache_put(od, inode); 410 od->cache = NULL; 411 od->cursor = NULL; 412 } 413 is_real = ovl_dir_is_real(inode); 414 if (od->is_real != is_real) { 415 /* is_real can only become false when dir is copied up */ 416 if (WARN_ON(is_real)) 417 return; 418 od->is_real = false; 419 } 420 } 421 422 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list, 423 struct rb_root *root) 424 { 425 int err; 426 struct path realpath; 427 struct ovl_readdir_data rdd = { 428 .ctx.actor = ovl_fill_merge, 429 .ctx.count = INT_MAX, 430 .dentry = dentry, 431 .list = list, 432 .root = root, 433 .is_lowest = false, 434 .map = NULL, 435 }; 436 int idx, next; 437 const struct ovl_layer *layer; 438 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 439 440 for (idx = 0; idx != -1; idx = next) { 441 next = ovl_path_next(idx, dentry, &realpath, &layer); 442 443 if (ofs->casefold) 444 rdd.map = sb_encoding(realpath.dentry->d_sb); 445 446 rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry; 447 rdd.in_xwhiteouts_dir = layer->has_xwhiteouts && 448 ovl_dentry_has_xwhiteouts(dentry); 449 450 if (next != -1) { 451 err = ovl_dir_read(&realpath, &rdd); 452 if (err) 453 break; 454 } else { 455 /* 456 * Insert lowest layer entries before upper ones, this 457 * allows offsets to be reasonably constant 458 */ 459 list_add(&rdd.middle, rdd.list); 460 rdd.is_lowest = true; 461 err = ovl_dir_read(&realpath, &rdd); 462 list_del(&rdd.middle); 463 } 464 } 465 return err; 466 } 467 468 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) 469 { 470 struct list_head *p; 471 loff_t off = 0; 472 473 list_for_each(p, &od->cache->entries) { 474 if (off >= pos) 475 break; 476 off++; 477 } 478 /* Cursor is safe since the cache is stable */ 479 od->cursor = p; 480 } 481 482 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) 483 { 484 int res; 485 struct ovl_dir_cache *cache; 486 struct inode *inode = d_inode(dentry); 487 488 cache = ovl_dir_cache(inode); 489 if (cache && ovl_inode_version_get(inode) == cache->version) { 490 WARN_ON(!cache->refcount); 491 cache->refcount++; 492 return cache; 493 } 494 ovl_set_dir_cache(d_inode(dentry), NULL); 495 496 cache = kzalloc_obj(struct ovl_dir_cache, GFP_KERNEL); 497 if (!cache) 498 return ERR_PTR(-ENOMEM); 499 500 cache->refcount = 1; 501 INIT_LIST_HEAD(&cache->entries); 502 cache->root = RB_ROOT; 503 504 res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root); 505 if (res) { 506 ovl_cache_free(&cache->entries); 507 kfree(cache); 508 return ERR_PTR(res); 509 } 510 511 cache->version = ovl_inode_version_get(inode); 512 ovl_set_dir_cache(inode, cache); 513 514 return cache; 515 } 516 517 /* Map inode number to lower fs unique range */ 518 static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, 519 const char *name, int namelen, bool warn) 520 { 521 unsigned int xinoshift = 64 - xinobits; 522 523 if (unlikely(ino >> xinoshift)) { 524 if (warn) { 525 pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", 526 namelen, name, ino, xinobits); 527 } 528 return ino; 529 } 530 531 /* 532 * The lowest xinobit is reserved for mapping the non-peresistent inode 533 * numbers range, but this range is only exposed via st_ino, not here. 534 */ 535 return ino | ((u64)fsid) << (xinoshift + 1); 536 } 537 538 /* 539 * Set d_ino for upper entries if needed. Non-upper entries should always report 540 * the uppermost real inode ino and should not call this function. 541 * 542 * When not all layer are on same fs, report real ino also for upper. 543 * 544 * When all layers are on the same fs, and upper has a reference to 545 * copy up origin, call vfs_getattr() on the overlay entry to make 546 * sure that d_ino will be consistent with st_ino from stat(2). 547 * 548 * Also checks the overlay.whiteout xattr by doing a full lookup which will return 549 * negative in this case. 550 */ 551 static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino) 552 553 { 554 struct dentry *dir = path->dentry; 555 struct ovl_fs *ofs = OVL_FS(dir->d_sb); 556 struct dentry *this = NULL; 557 enum ovl_path_type type; 558 u64 ino = p->real_ino; 559 int xinobits = ovl_xino_bits(ofs); 560 int err = 0; 561 562 if (!ovl_same_dev(ofs) && !p->check_xwhiteout) 563 goto out; 564 565 if (name_is_dot_dotdot(p->name, p->len)) { 566 if (p->len == 1) { 567 this = dget(dir); 568 goto get; 569 } 570 if (p->len == 2) { 571 /* we shall not be moved */ 572 this = dget(dir->d_parent); 573 goto get; 574 } 575 } 576 /* This checks also for xwhiteouts */ 577 this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir); 578 if (IS_ERR_OR_NULL(this) || !this->d_inode) { 579 /* Mark a stale entry */ 580 p->is_whiteout = true; 581 if (IS_ERR(this)) { 582 err = PTR_ERR(this); 583 this = NULL; 584 goto fail; 585 } 586 goto out; 587 } 588 589 get: 590 if (!ovl_same_dev(ofs) || !update_ino) 591 goto out; 592 593 type = ovl_path_type(this); 594 if (OVL_TYPE_ORIGIN(type)) { 595 struct kstat stat; 596 struct path statpath = *path; 597 598 statpath.dentry = this; 599 err = vfs_getattr(&statpath, &stat, STATX_INO, 0); 600 if (err) 601 goto fail; 602 603 /* 604 * Directory inode is always on overlay st_dev. 605 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case 606 * of xino bits overflow. 607 */ 608 WARN_ON_ONCE(S_ISDIR(stat.mode) && 609 dir->d_sb->s_dev != stat.dev); 610 ino = stat.ino; 611 } else if (xinobits && !OVL_TYPE_UPPER(type)) { 612 ino = ovl_remap_lower_ino(ino, xinobits, 613 ovl_layer_lower(this)->fsid, 614 p->name, p->len, 615 ovl_xino_warn(ofs)); 616 } 617 618 out: 619 p->ino = ino; 620 dput(this); 621 return err; 622 623 fail: 624 pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n", 625 p->name, err); 626 goto out; 627 } 628 629 static bool ovl_fill_plain(struct dir_context *ctx, const char *name, 630 int namelen, loff_t offset, u64 ino, 631 unsigned int d_type) 632 { 633 struct ovl_cache_entry *p; 634 struct ovl_readdir_data *rdd = 635 container_of(ctx, struct ovl_readdir_data, ctx); 636 637 rdd->count++; 638 p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type); 639 if (p == NULL) { 640 rdd->err = -ENOMEM; 641 return false; 642 } 643 list_add_tail(&p->l_node, rdd->list); 644 645 return true; 646 } 647 648 static int ovl_dir_read_impure(const struct path *path, struct list_head *list, 649 struct rb_root *root) 650 { 651 int err; 652 struct path realpath; 653 struct ovl_cache_entry *p, *n; 654 struct ovl_readdir_data rdd = { 655 .ctx.actor = ovl_fill_plain, 656 .ctx.count = INT_MAX, 657 .list = list, 658 .root = root, 659 }; 660 661 INIT_LIST_HEAD(list); 662 *root = RB_ROOT; 663 ovl_path_upper(path->dentry, &realpath); 664 665 err = ovl_dir_read(&realpath, &rdd); 666 if (err) 667 return err; 668 669 list_for_each_entry_safe(p, n, list, l_node) { 670 if (!name_is_dot_dotdot(p->name, p->len)) { 671 err = ovl_cache_update(path, p, true); 672 if (err) 673 return err; 674 } 675 if (p->ino == p->real_ino) { 676 list_del(&p->l_node); 677 ovl_cache_entry_free(p); 678 } else { 679 struct rb_node **newp = &root->rb_node; 680 struct rb_node *parent = NULL; 681 682 if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len, 683 &newp, &parent))) 684 return -EIO; 685 686 rb_link_node(&p->node, parent, newp); 687 rb_insert_color(&p->node, root); 688 } 689 } 690 return 0; 691 } 692 693 static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path) 694 { 695 int res; 696 struct dentry *dentry = path->dentry; 697 struct inode *inode = d_inode(dentry); 698 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 699 struct ovl_dir_cache *cache; 700 701 cache = ovl_dir_cache(inode); 702 if (cache && ovl_inode_version_get(inode) == cache->version) 703 return cache; 704 705 /* Impure cache is not refcounted, free it here */ 706 ovl_dir_cache_free(inode); 707 ovl_set_dir_cache(inode, NULL); 708 709 cache = kzalloc_obj(struct ovl_dir_cache, GFP_KERNEL); 710 if (!cache) 711 return ERR_PTR(-ENOMEM); 712 713 res = ovl_dir_read_impure(path, &cache->entries, &cache->root); 714 if (res) { 715 ovl_cache_free(&cache->entries); 716 kfree(cache); 717 return ERR_PTR(res); 718 } 719 if (list_empty(&cache->entries)) { 720 /* 721 * A good opportunity to get rid of an unneeded "impure" flag. 722 * Removing the "impure" xattr is best effort. 723 */ 724 if (!ovl_want_write(dentry)) { 725 ovl_removexattr(ofs, ovl_dentry_upper(dentry), 726 OVL_XATTR_IMPURE); 727 ovl_drop_write(dentry); 728 } 729 ovl_clear_flag(OVL_IMPURE, inode); 730 kfree(cache); 731 return NULL; 732 } 733 734 cache->version = ovl_inode_version_get(inode); 735 ovl_set_dir_cache(inode, cache); 736 737 return cache; 738 } 739 740 struct ovl_readdir_translate { 741 struct dir_context *orig_ctx; 742 struct ovl_dir_cache *cache; 743 struct dir_context ctx; 744 u64 parent_ino; 745 int fsid; 746 int xinobits; 747 bool xinowarn; 748 }; 749 750 static bool ovl_fill_real(struct dir_context *ctx, const char *name, 751 int namelen, loff_t offset, u64 ino, 752 unsigned int d_type) 753 { 754 struct ovl_readdir_translate *rdt = 755 container_of(ctx, struct ovl_readdir_translate, ctx); 756 struct dir_context *orig_ctx = rdt->orig_ctx; 757 bool res; 758 759 if (rdt->parent_ino && name_is_dotdot(name, namelen)) { 760 ino = rdt->parent_ino; 761 } else if (rdt->cache) { 762 struct ovl_cache_entry *p; 763 764 p = ovl_cache_entry_find(&rdt->cache->root, name, namelen); 765 if (p) 766 ino = p->ino; 767 } else if (rdt->xinobits) { 768 ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid, 769 name, namelen, rdt->xinowarn); 770 } 771 772 res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); 773 ctx->count = orig_ctx->count; 774 775 return res; 776 } 777 778 static bool ovl_is_impure_dir(struct file *file) 779 { 780 struct ovl_dir_file *od = file->private_data; 781 struct inode *dir = file_inode(file); 782 783 /* 784 * Only upper dir can be impure, but if we are in the middle of 785 * iterating a lower real dir, dir could be copied up and marked 786 * impure. We only want the impure cache if we started iterating 787 * a real upper dir to begin with. 788 */ 789 return od->is_upper && ovl_test_flag(OVL_IMPURE, dir); 790 791 } 792 793 static int ovl_iterate_real(struct file *file, struct dir_context *ctx) 794 { 795 int err; 796 struct ovl_dir_file *od = file->private_data; 797 struct dentry *dir = file->f_path.dentry; 798 struct ovl_fs *ofs = OVL_FS(dir->d_sb); 799 const struct ovl_layer *lower_layer = ovl_layer_lower(dir); 800 struct ovl_readdir_translate rdt = { 801 .ctx.actor = ovl_fill_real, 802 .ctx.count = ctx->count, 803 .orig_ctx = ctx, 804 .xinobits = ovl_xino_bits(ofs), 805 .xinowarn = ovl_xino_warn(ofs), 806 }; 807 808 if (rdt.xinobits && lower_layer) 809 rdt.fsid = lower_layer->fsid; 810 811 if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) { 812 struct kstat stat; 813 struct path statpath = file->f_path; 814 815 statpath.dentry = dir->d_parent; 816 err = vfs_getattr(&statpath, &stat, STATX_INO, 0); 817 if (err) 818 return err; 819 820 WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); 821 rdt.parent_ino = stat.ino; 822 } 823 824 if (ovl_is_impure_dir(file)) { 825 rdt.cache = ovl_cache_get_impure(&file->f_path); 826 if (IS_ERR(rdt.cache)) 827 return PTR_ERR(rdt.cache); 828 } 829 830 err = iterate_dir(od->realfile, &rdt.ctx); 831 ctx->pos = rdt.ctx.pos; 832 833 return err; 834 } 835 836 static int ovl_iterate_merged(struct file *file, struct dir_context *ctx) 837 { 838 struct ovl_dir_file *od = file->private_data; 839 struct dentry *dentry = file->f_path.dentry; 840 struct ovl_cache_entry *p; 841 int err = 0; 842 843 if (!od->cache) { 844 struct ovl_dir_cache *cache; 845 846 cache = ovl_cache_get(dentry); 847 err = PTR_ERR(cache); 848 if (IS_ERR(cache)) 849 return err; 850 851 od->cache = cache; 852 ovl_seek_cursor(od, ctx->pos); 853 } 854 855 while (od->cursor != &od->cache->entries) { 856 p = list_entry(od->cursor, struct ovl_cache_entry, l_node); 857 if (!p->is_whiteout) { 858 if (!p->ino || p->check_xwhiteout) { 859 err = ovl_cache_update(&file->f_path, p, !p->ino); 860 if (err) 861 return err; 862 } 863 } 864 /* ovl_cache_update() sets is_whiteout on stale entry */ 865 if (!p->is_whiteout) { 866 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) 867 break; 868 } 869 od->cursor = p->l_node.next; 870 ctx->pos++; 871 } 872 return err; 873 } 874 875 static bool ovl_need_adjust_d_ino(struct file *file) 876 { 877 struct dentry *dentry = file->f_path.dentry; 878 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 879 880 /* If parent is merge, then need to adjust d_ino for '..' */ 881 if (ovl_xino_bits(ofs)) 882 return true; 883 884 /* Can't do consistent inode numbering */ 885 if (!ovl_same_fs(ofs)) 886 return false; 887 888 /* If dir is impure then need to adjust d_ino for copied up entries */ 889 if (ovl_is_impure_dir(file) || 890 OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))) 891 return true; 892 893 /* Pure: no need to adjust d_ino */ 894 return false; 895 } 896 897 898 static int ovl_iterate(struct file *file, struct dir_context *ctx) 899 { 900 struct ovl_dir_file *od = file->private_data; 901 902 if (!ctx->pos) 903 ovl_dir_reset(file); 904 905 with_ovl_creds(file_dentry(file)->d_sb) { 906 if (!od->is_real) 907 return ovl_iterate_merged(file, ctx); 908 909 if (ovl_need_adjust_d_ino(file)) 910 return ovl_iterate_real(file, ctx); 911 912 return iterate_dir(od->realfile, ctx); 913 } 914 } 915 916 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) 917 { 918 loff_t res; 919 struct ovl_dir_file *od = file->private_data; 920 921 inode_lock(file_inode(file)); 922 if (!file->f_pos) 923 ovl_dir_reset(file); 924 925 if (od->is_real) { 926 res = vfs_llseek(od->realfile, offset, origin); 927 file->f_pos = od->realfile->f_pos; 928 } else { 929 res = -EINVAL; 930 931 switch (origin) { 932 case SEEK_CUR: 933 offset += file->f_pos; 934 break; 935 case SEEK_SET: 936 break; 937 default: 938 goto out_unlock; 939 } 940 if (offset < 0) 941 goto out_unlock; 942 943 if (offset != file->f_pos) { 944 file->f_pos = offset; 945 if (od->cache) 946 ovl_seek_cursor(od, offset); 947 } 948 res = offset; 949 } 950 out_unlock: 951 inode_unlock(file_inode(file)); 952 953 return res; 954 } 955 956 static struct file *ovl_dir_open_realfile(const struct file *file, 957 const struct path *realpath) 958 { 959 with_ovl_creds(file_inode(file)->i_sb) 960 return ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE)); 961 } 962 963 /* 964 * Like ovl_real_fdget(), returns upperfile if dir was copied up since open. 965 * Unlike ovl_real_fdget(), this caches upperfile in file->private_data. 966 * 967 * TODO: use same abstract type for file->private_data of dir and file so 968 * upperfile could also be cached for files as well. 969 */ 970 struct file *ovl_dir_real_file(const struct file *file, bool want_upper) 971 { 972 973 struct ovl_dir_file *od = file->private_data; 974 struct dentry *dentry = file->f_path.dentry; 975 struct file *old, *realfile = od->realfile; 976 977 if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) 978 return want_upper ? NULL : realfile; 979 980 /* 981 * Need to check if we started out being a lower dir, but got copied up 982 */ 983 if (!od->is_upper) { 984 realfile = READ_ONCE(od->upperfile); 985 if (!realfile) { 986 struct path upperpath; 987 988 ovl_path_upper(dentry, &upperpath); 989 realfile = ovl_dir_open_realfile(file, &upperpath); 990 if (IS_ERR(realfile)) 991 return realfile; 992 993 old = cmpxchg_release(&od->upperfile, NULL, realfile); 994 if (old) { 995 fput(realfile); 996 realfile = old; 997 } 998 } 999 } 1000 1001 return realfile; 1002 } 1003 1004 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, 1005 int datasync) 1006 { 1007 struct file *realfile; 1008 int err; 1009 1010 err = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); 1011 if (err <= 0) 1012 return err; 1013 1014 realfile = ovl_dir_real_file(file, true); 1015 err = PTR_ERR_OR_ZERO(realfile); 1016 1017 /* Nothing to sync for lower */ 1018 if (!realfile || err) 1019 return err; 1020 1021 return vfs_fsync_range(realfile, start, end, datasync); 1022 } 1023 1024 static int ovl_dir_release(struct inode *inode, struct file *file) 1025 { 1026 struct ovl_dir_file *od = file->private_data; 1027 1028 if (od->cache) { 1029 inode_lock(inode); 1030 ovl_cache_put(od, inode); 1031 inode_unlock(inode); 1032 } 1033 fput(od->realfile); 1034 if (od->upperfile) 1035 fput(od->upperfile); 1036 kfree(od); 1037 1038 return 0; 1039 } 1040 1041 static int ovl_dir_open(struct inode *inode, struct file *file) 1042 { 1043 struct path realpath; 1044 struct file *realfile; 1045 struct ovl_dir_file *od; 1046 enum ovl_path_type type; 1047 1048 od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); 1049 if (!od) 1050 return -ENOMEM; 1051 1052 type = ovl_path_real(file->f_path.dentry, &realpath); 1053 realfile = ovl_dir_open_realfile(file, &realpath); 1054 if (IS_ERR(realfile)) { 1055 kfree(od); 1056 return PTR_ERR(realfile); 1057 } 1058 od->realfile = realfile; 1059 od->is_real = ovl_dir_is_real(inode); 1060 od->is_upper = OVL_TYPE_UPPER(type); 1061 file->private_data = od; 1062 1063 return 0; 1064 } 1065 1066 WRAP_DIR_ITER(ovl_iterate) // FIXME! 1067 const struct file_operations ovl_dir_operations = { 1068 .read = generic_read_dir, 1069 .open = ovl_dir_open, 1070 .iterate_shared = shared_ovl_iterate, 1071 .llseek = ovl_dir_llseek, 1072 .fsync = ovl_dir_fsync, 1073 .release = ovl_dir_release, 1074 .setlease = generic_setlease, 1075 }; 1076 1077 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) 1078 { 1079 int err; 1080 struct ovl_cache_entry *p, *n; 1081 struct rb_root root = RB_ROOT; 1082 1083 with_ovl_creds(dentry->d_sb) 1084 err = ovl_dir_read_merged(dentry, list, &root); 1085 if (err) 1086 return err; 1087 1088 err = 0; 1089 1090 list_for_each_entry_safe(p, n, list, l_node) { 1091 /* 1092 * Select whiteouts in upperdir, they should 1093 * be cleared when deleting this directory. 1094 */ 1095 if (p->is_whiteout) { 1096 if (p->is_upper) 1097 continue; 1098 goto del_entry; 1099 } 1100 1101 if (name_is_dot_dotdot(p->name, p->len)) 1102 goto del_entry; 1103 err = -ENOTEMPTY; 1104 break; 1105 1106 del_entry: 1107 list_del(&p->l_node); 1108 ovl_cache_entry_free(p); 1109 } 1110 1111 return err; 1112 } 1113 1114 void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, 1115 struct list_head *list) 1116 { 1117 struct ovl_cache_entry *p; 1118 1119 list_for_each_entry(p, list, l_node) { 1120 struct dentry *dentry; 1121 1122 if (WARN_ON(!p->is_whiteout || !p->is_upper)) 1123 continue; 1124 1125 dentry = ovl_lookup_upper_unlocked(ofs, p->name, upper, p->len); 1126 if (IS_ERR(dentry)) { 1127 pr_err("lookup '%s/%.*s' failed (%i)\n", 1128 upper->d_name.name, p->len, p->name, 1129 (int) PTR_ERR(dentry)); 1130 continue; 1131 } 1132 if (dentry->d_inode) 1133 ovl_cleanup(ofs, upper, dentry); 1134 dput(dentry); 1135 } 1136 } 1137 1138 static bool ovl_check_d_type(struct dir_context *ctx, const char *name, 1139 int namelen, loff_t offset, u64 ino, 1140 unsigned int d_type) 1141 { 1142 struct ovl_readdir_data *rdd = 1143 container_of(ctx, struct ovl_readdir_data, ctx); 1144 1145 /* Even if d_type is not supported, DT_DIR is returned for . and .. */ 1146 if (name_is_dot_dotdot(name, namelen)) 1147 return true; 1148 1149 if (d_type != DT_UNKNOWN) 1150 rdd->d_type_supported = true; 1151 1152 return true; 1153 } 1154 1155 /* 1156 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values 1157 * if error is encountered. 1158 */ 1159 int ovl_check_d_type_supported(const struct path *realpath) 1160 { 1161 int err; 1162 struct ovl_readdir_data rdd = { 1163 .ctx.actor = ovl_check_d_type, 1164 .ctx.count = INT_MAX, 1165 .d_type_supported = false, 1166 }; 1167 1168 err = ovl_dir_read(realpath, &rdd); 1169 if (err) 1170 return err; 1171 1172 return rdd.d_type_supported; 1173 } 1174 1175 #define OVL_INCOMPATDIR_NAME "incompat" 1176 1177 static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path, 1178 int level) 1179 { 1180 int err; 1181 LIST_HEAD(list); 1182 struct ovl_cache_entry *p; 1183 struct ovl_readdir_data rdd = { 1184 .ctx.actor = ovl_fill_plain, 1185 .ctx.count = INT_MAX, 1186 .list = &list, 1187 }; 1188 bool incompat = false; 1189 1190 /* 1191 * The "work/incompat" directory is treated specially - if it is not 1192 * empty, instead of printing a generic error and mounting read-only, 1193 * we will error about incompat features and fail the mount. 1194 * 1195 * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name 1196 * starts with '#'. 1197 */ 1198 if (level == 2 && 1199 !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME)) 1200 incompat = true; 1201 1202 err = ovl_dir_read(path, &rdd); 1203 if (err) 1204 goto out; 1205 1206 list_for_each_entry(p, &list, l_node) { 1207 struct dentry *dentry; 1208 1209 if (name_is_dot_dotdot(p->name, p->len)) { 1210 continue; 1211 } else if (incompat) { 1212 pr_err("overlay with incompat feature '%s' cannot be mounted\n", 1213 p->name); 1214 err = -EINVAL; 1215 break; 1216 } 1217 dentry = ovl_lookup_upper_unlocked(ofs, p->name, path->dentry, p->len); 1218 if (IS_ERR(dentry)) 1219 continue; 1220 if (dentry->d_inode) 1221 err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt, 1222 dentry, level); 1223 dput(dentry); 1224 if (err) 1225 break; 1226 } 1227 out: 1228 ovl_cache_free(&list); 1229 return err; 1230 } 1231 1232 int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent, 1233 struct vfsmount *mnt, struct dentry *dentry, int level) 1234 { 1235 int err; 1236 1237 if (!d_is_dir(dentry) || level > 1) 1238 return ovl_cleanup(ofs, parent, dentry); 1239 1240 dentry = start_removing_dentry(parent, dentry); 1241 if (IS_ERR(dentry)) 1242 return PTR_ERR(dentry); 1243 err = ovl_do_rmdir(ofs, parent->d_inode, dentry); 1244 end_removing(dentry); 1245 if (err) { 1246 struct path path = { .mnt = mnt, .dentry = dentry }; 1247 1248 err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1); 1249 if (!err) 1250 err = ovl_cleanup(ofs, parent, dentry); 1251 } 1252 1253 return err; 1254 } 1255 1256 int ovl_indexdir_cleanup(struct ovl_fs *ofs) 1257 { 1258 int err; 1259 struct dentry *indexdir = ofs->workdir; 1260 struct dentry *index = NULL; 1261 struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir }; 1262 LIST_HEAD(list); 1263 struct ovl_cache_entry *p; 1264 struct ovl_readdir_data rdd = { 1265 .ctx.actor = ovl_fill_plain, 1266 .ctx.count = INT_MAX, 1267 .list = &list, 1268 }; 1269 1270 err = ovl_dir_read(&path, &rdd); 1271 if (err) 1272 goto out; 1273 1274 list_for_each_entry(p, &list, l_node) { 1275 if (name_is_dot_dotdot(p->name, p->len)) 1276 continue; 1277 index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, p->len); 1278 if (IS_ERR(index)) { 1279 err = PTR_ERR(index); 1280 index = NULL; 1281 break; 1282 } 1283 /* Cleanup leftover from index create/cleanup attempt */ 1284 if (index->d_name.name[0] == '#') { 1285 err = ovl_workdir_cleanup(ofs, indexdir, path.mnt, index, 1); 1286 if (err) 1287 break; 1288 goto next; 1289 } 1290 err = ovl_verify_index(ofs, index); 1291 if (!err) { 1292 goto next; 1293 } else if (err == -ESTALE) { 1294 /* Cleanup stale index entries */ 1295 err = ovl_cleanup(ofs, indexdir, index); 1296 } else if (err != -ENOENT) { 1297 /* 1298 * Abort mount to avoid corrupting the index if 1299 * an incompatible index entry was found or on out 1300 * of memory. 1301 */ 1302 break; 1303 } else if (ofs->config.nfs_export) { 1304 /* 1305 * Whiteout orphan index to block future open by 1306 * handle after overlay nlink dropped to zero. 1307 */ 1308 err = ovl_cleanup_and_whiteout(ofs, indexdir, index); 1309 } else { 1310 /* Cleanup orphan index entries */ 1311 err = ovl_cleanup(ofs, indexdir, index); 1312 } 1313 1314 if (err) 1315 break; 1316 1317 next: 1318 dput(index); 1319 index = NULL; 1320 } 1321 dput(index); 1322 out: 1323 ovl_cache_free(&list); 1324 if (err) 1325 pr_err("failed index dir cleanup (%i)\n", err); 1326 return err; 1327 } 1328