1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/slab.h> 9 #include <linux/namei.h> 10 #include <linux/file.h> 11 #include <linux/xattr.h> 12 #include <linux/rbtree.h> 13 #include <linux/security.h> 14 #include <linux/cred.h> 15 #include <linux/ratelimit.h> 16 #include <linux/overflow.h> 17 #include "overlayfs.h" 18 19 struct ovl_cache_entry { 20 unsigned int len; 21 unsigned int type; 22 u64 real_ino; 23 u64 ino; 24 struct list_head l_node; 25 struct rb_node node; 26 struct ovl_cache_entry *next_maybe_whiteout; 27 bool is_upper; 28 bool is_whiteout; 29 bool check_xwhiteout; 30 const char *c_name; 31 int c_len; 32 char name[]; 33 }; 34 35 struct ovl_dir_cache { 36 long refcount; 37 u64 version; 38 struct list_head entries; 39 struct rb_root root; 40 }; 41 42 struct ovl_readdir_data { 43 struct dir_context ctx; 44 struct dentry *dentry; 45 bool is_lowest; 46 struct rb_root *root; 47 struct list_head *list; 48 struct list_head middle; 49 struct ovl_cache_entry *first_maybe_whiteout; 50 struct unicode_map *map; 51 int count; 52 int err; 53 bool is_upper; 54 bool d_type_supported; 55 bool in_xwhiteouts_dir; 56 }; 57 58 struct ovl_dir_file { 59 bool is_real; 60 bool is_upper; 61 struct ovl_dir_cache *cache; 62 struct list_head *cursor; 63 struct file *realfile; 64 struct file *upperfile; 65 }; 66 67 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) 68 { 69 return rb_entry(n, struct ovl_cache_entry, node); 70 } 71 72 static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len, 73 char **dst) 74 { 75 const struct qstr qstr = { .name = str, .len = len }; 76 char *cf_name; 77 int cf_len; 78 79 if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || is_dot_dotdot(str, len)) 80 return 0; 81 82 cf_name = kmalloc(NAME_MAX, GFP_KERNEL); 83 if (!cf_name) { 84 rdd->err = -ENOMEM; 85 return -ENOMEM; 86 } 87 88 cf_len = utf8_casefold(rdd->map, &qstr, cf_name, NAME_MAX); 89 if (cf_len > 0) 90 *dst = cf_name; 91 else 92 kfree(cf_name); 93 94 return cf_len; 95 } 96 97 static bool ovl_cache_entry_find_link(const char *name, int len, 98 struct rb_node ***link, 99 struct rb_node **parent) 100 { 101 bool found = false; 102 struct rb_node **newp = *link; 103 104 while (!found && *newp) { 105 int cmp; 106 struct ovl_cache_entry *tmp; 107 108 *parent = *newp; 109 tmp = ovl_cache_entry_from_node(*newp); 110 cmp = strncmp(name, tmp->c_name, len); 111 if (cmp > 0) 112 newp = &tmp->node.rb_right; 113 else if (cmp < 0 || len < tmp->c_len) 114 newp = &tmp->node.rb_left; 115 else 116 found = true; 117 } 118 *link = newp; 119 120 return found; 121 } 122 123 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, 124 const char *name, int len) 125 { 126 struct rb_node *node = root->rb_node; 127 int cmp; 128 129 while (node) { 130 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); 131 132 cmp = strncmp(name, p->c_name, len); 133 if (cmp > 0) 134 node = p->node.rb_right; 135 else if (cmp < 0 || len < p->c_len) 136 node = p->node.rb_left; 137 else 138 return p; 139 } 140 141 return NULL; 142 } 143 144 static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd, 145 struct ovl_cache_entry *p) 146 { 147 /* Don't care if not doing ovl_iter() */ 148 if (!rdd->dentry) 149 return false; 150 151 /* Always recalc d_ino when remapping lower inode numbers */ 152 if (ovl_xino_bits(OVL_FS(rdd->dentry->d_sb))) 153 return true; 154 155 /* Always recalc d_ino for parent */ 156 if (strcmp(p->name, "..") == 0) 157 return true; 158 159 /* If this is lower, then native d_ino will do */ 160 if (!rdd->is_upper) 161 return false; 162 163 /* 164 * Recalc d_ino for '.' and for all entries if dir is impure (contains 165 * copied up entries) 166 */ 167 if ((p->name[0] == '.' && p->len == 1) || 168 ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry))) 169 return true; 170 171 return false; 172 } 173 174 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, 175 const char *name, int len, 176 const char *c_name, int c_len, 177 u64 ino, unsigned int d_type) 178 { 179 struct ovl_cache_entry *p; 180 181 p = kmalloc(struct_size(p, name, len + 1), GFP_KERNEL); 182 if (!p) 183 return NULL; 184 185 memcpy(p->name, name, len); 186 p->name[len] = '\0'; 187 p->len = len; 188 p->type = d_type; 189 p->real_ino = ino; 190 p->ino = ino; 191 /* Defer setting d_ino for upper entry to ovl_iterate() */ 192 if (ovl_calc_d_ino(rdd, p)) 193 p->ino = 0; 194 p->is_upper = rdd->is_upper; 195 p->is_whiteout = false; 196 /* Defer check for overlay.whiteout to ovl_iterate() */ 197 p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG; 198 199 if (c_name && c_name != name) { 200 p->c_name = c_name; 201 p->c_len = c_len; 202 } else { 203 p->c_name = p->name; 204 p->c_len = len; 205 } 206 207 if (d_type == DT_CHR) { 208 p->next_maybe_whiteout = rdd->first_maybe_whiteout; 209 rdd->first_maybe_whiteout = p; 210 } 211 return p; 212 } 213 214 /* Return 0 for found, 1 for added, <0 for error */ 215 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, 216 const char *name, int len, 217 const char *c_name, int c_len, 218 u64 ino, 219 unsigned int d_type) 220 { 221 struct rb_node **newp = &rdd->root->rb_node; 222 struct rb_node *parent = NULL; 223 struct ovl_cache_entry *p; 224 225 if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent)) 226 return 0; 227 228 p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type); 229 if (p == NULL) { 230 rdd->err = -ENOMEM; 231 return -ENOMEM; 232 } 233 234 list_add_tail(&p->l_node, rdd->list); 235 rb_link_node(&p->node, parent, newp); 236 rb_insert_color(&p->node, rdd->root); 237 238 return 1; 239 } 240 241 /* Return 0 for found, 1 for added, <0 for error */ 242 static int ovl_fill_lowest(struct ovl_readdir_data *rdd, 243 const char *name, int namelen, 244 const char *c_name, int c_len, 245 loff_t offset, u64 ino, unsigned int d_type) 246 { 247 struct ovl_cache_entry *p; 248 249 p = ovl_cache_entry_find(rdd->root, c_name, c_len); 250 if (p) { 251 list_move_tail(&p->l_node, &rdd->middle); 252 return 0; 253 } else { 254 p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len, 255 ino, d_type); 256 if (p == NULL) 257 rdd->err = -ENOMEM; 258 else 259 list_add_tail(&p->l_node, &rdd->middle); 260 } 261 262 return rdd->err ?: 1; 263 } 264 265 static void ovl_cache_entry_free(struct ovl_cache_entry *p) 266 { 267 if (p->c_name != p->name) 268 kfree(p->c_name); 269 kfree(p); 270 } 271 272 void ovl_cache_free(struct list_head *list) 273 { 274 struct ovl_cache_entry *p; 275 struct ovl_cache_entry *n; 276 277 list_for_each_entry_safe(p, n, list, l_node) 278 ovl_cache_entry_free(p); 279 280 INIT_LIST_HEAD(list); 281 } 282 283 void ovl_dir_cache_free(struct inode *inode) 284 { 285 struct ovl_dir_cache *cache = ovl_dir_cache(inode); 286 287 if (cache) { 288 ovl_cache_free(&cache->entries); 289 kfree(cache); 290 } 291 } 292 293 static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode) 294 { 295 struct ovl_dir_cache *cache = od->cache; 296 297 WARN_ON(cache->refcount <= 0); 298 cache->refcount--; 299 if (!cache->refcount) { 300 if (ovl_dir_cache(inode) == cache) 301 ovl_set_dir_cache(inode, NULL); 302 303 ovl_cache_free(&cache->entries); 304 kfree(cache); 305 } 306 } 307 308 static bool ovl_fill_merge(struct dir_context *ctx, const char *name, 309 int namelen, loff_t offset, u64 ino, 310 unsigned int d_type) 311 { 312 struct ovl_readdir_data *rdd = 313 container_of(ctx, struct ovl_readdir_data, ctx); 314 struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb); 315 const char *c_name = NULL; 316 char *cf_name = NULL; 317 int c_len = 0, ret; 318 319 if (ofs->casefold) 320 c_len = ovl_casefold(rdd, name, namelen, &cf_name); 321 322 if (rdd->err) 323 return false; 324 325 if (c_len <= 0) { 326 c_name = name; 327 c_len = namelen; 328 } else { 329 c_name = cf_name; 330 } 331 332 rdd->count++; 333 if (!rdd->is_lowest) 334 ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type); 335 else 336 ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type); 337 338 /* 339 * If ret == 1, that means that c_name is being used as part of struct 340 * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise, 341 * c_name was found in the rb-tree so we can free it here. 342 */ 343 if (ret != 1 && c_name != name) 344 kfree(c_name); 345 346 return ret >= 0; 347 } 348 349 static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd) 350 { 351 struct dentry *dentry, *dir = path->dentry; 352 353 while (rdd->first_maybe_whiteout) { 354 struct ovl_cache_entry *p = 355 rdd->first_maybe_whiteout; 356 rdd->first_maybe_whiteout = p->next_maybe_whiteout; 357 dentry = lookup_one_positive_killable(mnt_idmap(path->mnt), 358 &QSTR_LEN(p->name, p->len), 359 dir); 360 if (!IS_ERR(dentry)) { 361 p->is_whiteout = ovl_is_whiteout(dentry); 362 dput(dentry); 363 } else if (PTR_ERR(dentry) == -EINTR) { 364 return -EINTR; 365 } 366 } 367 368 return 0; 369 } 370 371 static inline int ovl_dir_read(const struct path *realpath, 372 struct ovl_readdir_data *rdd) 373 { 374 struct file *realfile; 375 int err; 376 377 realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE); 378 if (IS_ERR(realfile)) 379 return PTR_ERR(realfile); 380 381 rdd->first_maybe_whiteout = NULL; 382 rdd->ctx.pos = 0; 383 do { 384 rdd->count = 0; 385 rdd->err = 0; 386 err = iterate_dir(realfile, &rdd->ctx); 387 if (err >= 0) 388 err = rdd->err; 389 } while (!err && rdd->count); 390 391 if (!err && rdd->first_maybe_whiteout && rdd->dentry) 392 err = ovl_check_whiteouts(realpath, rdd); 393 394 fput(realfile); 395 396 return err; 397 } 398 399 static void ovl_dir_reset(struct file *file) 400 { 401 struct ovl_dir_file *od = file->private_data; 402 struct ovl_dir_cache *cache = od->cache; 403 struct inode *inode = file_inode(file); 404 bool is_real; 405 406 if (cache && ovl_inode_version_get(inode) != cache->version) { 407 ovl_cache_put(od, inode); 408 od->cache = NULL; 409 od->cursor = NULL; 410 } 411 is_real = ovl_dir_is_real(inode); 412 if (od->is_real != is_real) { 413 /* is_real can only become false when dir is copied up */ 414 if (WARN_ON(is_real)) 415 return; 416 od->is_real = false; 417 } 418 } 419 420 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list, 421 struct rb_root *root) 422 { 423 int err; 424 struct path realpath; 425 struct ovl_readdir_data rdd = { 426 .ctx.actor = ovl_fill_merge, 427 .ctx.count = INT_MAX, 428 .dentry = dentry, 429 .list = list, 430 .root = root, 431 .is_lowest = false, 432 .map = NULL, 433 }; 434 int idx, next; 435 const struct ovl_layer *layer; 436 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 437 438 for (idx = 0; idx != -1; idx = next) { 439 next = ovl_path_next(idx, dentry, &realpath, &layer); 440 441 if (ofs->casefold) 442 rdd.map = sb_encoding(realpath.dentry->d_sb); 443 444 rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry; 445 rdd.in_xwhiteouts_dir = layer->has_xwhiteouts && 446 ovl_dentry_has_xwhiteouts(dentry); 447 448 if (next != -1) { 449 err = ovl_dir_read(&realpath, &rdd); 450 if (err) 451 break; 452 } else { 453 /* 454 * Insert lowest layer entries before upper ones, this 455 * allows offsets to be reasonably constant 456 */ 457 list_add(&rdd.middle, rdd.list); 458 rdd.is_lowest = true; 459 err = ovl_dir_read(&realpath, &rdd); 460 list_del(&rdd.middle); 461 } 462 } 463 return err; 464 } 465 466 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) 467 { 468 struct list_head *p; 469 loff_t off = 0; 470 471 list_for_each(p, &od->cache->entries) { 472 if (off >= pos) 473 break; 474 off++; 475 } 476 /* Cursor is safe since the cache is stable */ 477 od->cursor = p; 478 } 479 480 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) 481 { 482 int res; 483 struct ovl_dir_cache *cache; 484 struct inode *inode = d_inode(dentry); 485 486 cache = ovl_dir_cache(inode); 487 if (cache && ovl_inode_version_get(inode) == cache->version) { 488 WARN_ON(!cache->refcount); 489 cache->refcount++; 490 return cache; 491 } 492 ovl_set_dir_cache(d_inode(dentry), NULL); 493 494 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 495 if (!cache) 496 return ERR_PTR(-ENOMEM); 497 498 cache->refcount = 1; 499 INIT_LIST_HEAD(&cache->entries); 500 cache->root = RB_ROOT; 501 502 res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root); 503 if (res) { 504 ovl_cache_free(&cache->entries); 505 kfree(cache); 506 return ERR_PTR(res); 507 } 508 509 cache->version = ovl_inode_version_get(inode); 510 ovl_set_dir_cache(inode, cache); 511 512 return cache; 513 } 514 515 /* Map inode number to lower fs unique range */ 516 static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, 517 const char *name, int namelen, bool warn) 518 { 519 unsigned int xinoshift = 64 - xinobits; 520 521 if (unlikely(ino >> xinoshift)) { 522 if (warn) { 523 pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", 524 namelen, name, ino, xinobits); 525 } 526 return ino; 527 } 528 529 /* 530 * The lowest xinobit is reserved for mapping the non-peresistent inode 531 * numbers range, but this range is only exposed via st_ino, not here. 532 */ 533 return ino | ((u64)fsid) << (xinoshift + 1); 534 } 535 536 /* 537 * Set d_ino for upper entries if needed. Non-upper entries should always report 538 * the uppermost real inode ino and should not call this function. 539 * 540 * When not all layer are on same fs, report real ino also for upper. 541 * 542 * When all layers are on the same fs, and upper has a reference to 543 * copy up origin, call vfs_getattr() on the overlay entry to make 544 * sure that d_ino will be consistent with st_ino from stat(2). 545 * 546 * Also checks the overlay.whiteout xattr by doing a full lookup which will return 547 * negative in this case. 548 */ 549 static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino) 550 551 { 552 struct dentry *dir = path->dentry; 553 struct ovl_fs *ofs = OVL_FS(dir->d_sb); 554 struct dentry *this = NULL; 555 enum ovl_path_type type; 556 u64 ino = p->real_ino; 557 int xinobits = ovl_xino_bits(ofs); 558 int err = 0; 559 560 if (!ovl_same_dev(ofs) && !p->check_xwhiteout) 561 goto out; 562 563 if (p->name[0] == '.') { 564 if (p->len == 1) { 565 this = dget(dir); 566 goto get; 567 } 568 if (p->len == 2 && p->name[1] == '.') { 569 /* we shall not be moved */ 570 this = dget(dir->d_parent); 571 goto get; 572 } 573 } 574 /* This checks also for xwhiteouts */ 575 this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir); 576 if (IS_ERR_OR_NULL(this) || !this->d_inode) { 577 /* Mark a stale entry */ 578 p->is_whiteout = true; 579 if (IS_ERR(this)) { 580 err = PTR_ERR(this); 581 this = NULL; 582 goto fail; 583 } 584 goto out; 585 } 586 587 get: 588 if (!ovl_same_dev(ofs) || !update_ino) 589 goto out; 590 591 type = ovl_path_type(this); 592 if (OVL_TYPE_ORIGIN(type)) { 593 struct kstat stat; 594 struct path statpath = *path; 595 596 statpath.dentry = this; 597 err = vfs_getattr(&statpath, &stat, STATX_INO, 0); 598 if (err) 599 goto fail; 600 601 /* 602 * Directory inode is always on overlay st_dev. 603 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case 604 * of xino bits overflow. 605 */ 606 WARN_ON_ONCE(S_ISDIR(stat.mode) && 607 dir->d_sb->s_dev != stat.dev); 608 ino = stat.ino; 609 } else if (xinobits && !OVL_TYPE_UPPER(type)) { 610 ino = ovl_remap_lower_ino(ino, xinobits, 611 ovl_layer_lower(this)->fsid, 612 p->name, p->len, 613 ovl_xino_warn(ofs)); 614 } 615 616 out: 617 p->ino = ino; 618 dput(this); 619 return err; 620 621 fail: 622 pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n", 623 p->name, err); 624 goto out; 625 } 626 627 static bool ovl_fill_plain(struct dir_context *ctx, const char *name, 628 int namelen, loff_t offset, u64 ino, 629 unsigned int d_type) 630 { 631 struct ovl_cache_entry *p; 632 struct ovl_readdir_data *rdd = 633 container_of(ctx, struct ovl_readdir_data, ctx); 634 635 rdd->count++; 636 p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type); 637 if (p == NULL) { 638 rdd->err = -ENOMEM; 639 return false; 640 } 641 list_add_tail(&p->l_node, rdd->list); 642 643 return true; 644 } 645 646 static int ovl_dir_read_impure(const struct path *path, struct list_head *list, 647 struct rb_root *root) 648 { 649 int err; 650 struct path realpath; 651 struct ovl_cache_entry *p, *n; 652 struct ovl_readdir_data rdd = { 653 .ctx.actor = ovl_fill_plain, 654 .ctx.count = INT_MAX, 655 .list = list, 656 .root = root, 657 }; 658 659 INIT_LIST_HEAD(list); 660 *root = RB_ROOT; 661 ovl_path_upper(path->dentry, &realpath); 662 663 err = ovl_dir_read(&realpath, &rdd); 664 if (err) 665 return err; 666 667 list_for_each_entry_safe(p, n, list, l_node) { 668 if (strcmp(p->name, ".") != 0 && 669 strcmp(p->name, "..") != 0) { 670 err = ovl_cache_update(path, p, true); 671 if (err) 672 return err; 673 } 674 if (p->ino == p->real_ino) { 675 list_del(&p->l_node); 676 ovl_cache_entry_free(p); 677 } else { 678 struct rb_node **newp = &root->rb_node; 679 struct rb_node *parent = NULL; 680 681 if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len, 682 &newp, &parent))) 683 return -EIO; 684 685 rb_link_node(&p->node, parent, newp); 686 rb_insert_color(&p->node, root); 687 } 688 } 689 return 0; 690 } 691 692 static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path) 693 { 694 int res; 695 struct dentry *dentry = path->dentry; 696 struct inode *inode = d_inode(dentry); 697 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 698 struct ovl_dir_cache *cache; 699 700 cache = ovl_dir_cache(inode); 701 if (cache && ovl_inode_version_get(inode) == cache->version) 702 return cache; 703 704 /* Impure cache is not refcounted, free it here */ 705 ovl_dir_cache_free(inode); 706 ovl_set_dir_cache(inode, NULL); 707 708 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 709 if (!cache) 710 return ERR_PTR(-ENOMEM); 711 712 res = ovl_dir_read_impure(path, &cache->entries, &cache->root); 713 if (res) { 714 ovl_cache_free(&cache->entries); 715 kfree(cache); 716 return ERR_PTR(res); 717 } 718 if (list_empty(&cache->entries)) { 719 /* 720 * A good opportunity to get rid of an unneeded "impure" flag. 721 * Removing the "impure" xattr is best effort. 722 */ 723 if (!ovl_want_write(dentry)) { 724 ovl_removexattr(ofs, ovl_dentry_upper(dentry), 725 OVL_XATTR_IMPURE); 726 ovl_drop_write(dentry); 727 } 728 ovl_clear_flag(OVL_IMPURE, inode); 729 kfree(cache); 730 return NULL; 731 } 732 733 cache->version = ovl_inode_version_get(inode); 734 ovl_set_dir_cache(inode, cache); 735 736 return cache; 737 } 738 739 struct ovl_readdir_translate { 740 struct dir_context *orig_ctx; 741 struct ovl_dir_cache *cache; 742 struct dir_context ctx; 743 u64 parent_ino; 744 int fsid; 745 int xinobits; 746 bool xinowarn; 747 }; 748 749 static bool ovl_fill_real(struct dir_context *ctx, const char *name, 750 int namelen, loff_t offset, u64 ino, 751 unsigned int d_type) 752 { 753 struct ovl_readdir_translate *rdt = 754 container_of(ctx, struct ovl_readdir_translate, ctx); 755 struct dir_context *orig_ctx = rdt->orig_ctx; 756 bool res; 757 758 if (rdt->parent_ino && strcmp(name, "..") == 0) { 759 ino = rdt->parent_ino; 760 } else if (rdt->cache) { 761 struct ovl_cache_entry *p; 762 763 p = ovl_cache_entry_find(&rdt->cache->root, name, namelen); 764 if (p) 765 ino = p->ino; 766 } else if (rdt->xinobits) { 767 ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid, 768 name, namelen, rdt->xinowarn); 769 } 770 771 res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); 772 ctx->count = orig_ctx->count; 773 774 return res; 775 } 776 777 static bool ovl_is_impure_dir(struct file *file) 778 { 779 struct ovl_dir_file *od = file->private_data; 780 struct inode *dir = file_inode(file); 781 782 /* 783 * Only upper dir can be impure, but if we are in the middle of 784 * iterating a lower real dir, dir could be copied up and marked 785 * impure. We only want the impure cache if we started iterating 786 * a real upper dir to begin with. 787 */ 788 return od->is_upper && ovl_test_flag(OVL_IMPURE, dir); 789 790 } 791 792 static int ovl_iterate_real(struct file *file, struct dir_context *ctx) 793 { 794 int err; 795 struct ovl_dir_file *od = file->private_data; 796 struct dentry *dir = file->f_path.dentry; 797 struct ovl_fs *ofs = OVL_FS(dir->d_sb); 798 const struct ovl_layer *lower_layer = ovl_layer_lower(dir); 799 struct ovl_readdir_translate rdt = { 800 .ctx.actor = ovl_fill_real, 801 .ctx.count = ctx->count, 802 .orig_ctx = ctx, 803 .xinobits = ovl_xino_bits(ofs), 804 .xinowarn = ovl_xino_warn(ofs), 805 }; 806 807 if (rdt.xinobits && lower_layer) 808 rdt.fsid = lower_layer->fsid; 809 810 if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) { 811 struct kstat stat; 812 struct path statpath = file->f_path; 813 814 statpath.dentry = dir->d_parent; 815 err = vfs_getattr(&statpath, &stat, STATX_INO, 0); 816 if (err) 817 return err; 818 819 WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); 820 rdt.parent_ino = stat.ino; 821 } 822 823 if (ovl_is_impure_dir(file)) { 824 rdt.cache = ovl_cache_get_impure(&file->f_path); 825 if (IS_ERR(rdt.cache)) 826 return PTR_ERR(rdt.cache); 827 } 828 829 err = iterate_dir(od->realfile, &rdt.ctx); 830 ctx->pos = rdt.ctx.pos; 831 832 return err; 833 } 834 835 static int ovl_iterate_merged(struct file *file, struct dir_context *ctx) 836 { 837 struct ovl_dir_file *od = file->private_data; 838 struct dentry *dentry = file->f_path.dentry; 839 struct ovl_cache_entry *p; 840 int err = 0; 841 842 if (!od->cache) { 843 struct ovl_dir_cache *cache; 844 845 cache = ovl_cache_get(dentry); 846 err = PTR_ERR(cache); 847 if (IS_ERR(cache)) 848 return err; 849 850 od->cache = cache; 851 ovl_seek_cursor(od, ctx->pos); 852 } 853 854 while (od->cursor != &od->cache->entries) { 855 p = list_entry(od->cursor, struct ovl_cache_entry, l_node); 856 if (!p->is_whiteout) { 857 if (!p->ino || p->check_xwhiteout) { 858 err = ovl_cache_update(&file->f_path, p, !p->ino); 859 if (err) 860 return err; 861 } 862 } 863 /* ovl_cache_update() sets is_whiteout on stale entry */ 864 if (!p->is_whiteout) { 865 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) 866 break; 867 } 868 od->cursor = p->l_node.next; 869 ctx->pos++; 870 } 871 return err; 872 } 873 874 static bool ovl_need_adjust_d_ino(struct file *file) 875 { 876 struct dentry *dentry = file->f_path.dentry; 877 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 878 879 /* If parent is merge, then need to adjust d_ino for '..' */ 880 if (ovl_xino_bits(ofs)) 881 return true; 882 883 /* Can't do consistent inode numbering */ 884 if (!ovl_same_fs(ofs)) 885 return false; 886 887 /* If dir is impure then need to adjust d_ino for copied up entries */ 888 if (ovl_is_impure_dir(file) || 889 OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))) 890 return true; 891 892 /* Pure: no need to adjust d_ino */ 893 return false; 894 } 895 896 897 static int ovl_iterate(struct file *file, struct dir_context *ctx) 898 { 899 struct ovl_dir_file *od = file->private_data; 900 901 if (!ctx->pos) 902 ovl_dir_reset(file); 903 904 with_ovl_creds(file_dentry(file)->d_sb) { 905 if (!od->is_real) 906 return ovl_iterate_merged(file, ctx); 907 908 if (ovl_need_adjust_d_ino(file)) 909 return ovl_iterate_real(file, ctx); 910 911 return iterate_dir(od->realfile, ctx); 912 } 913 } 914 915 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) 916 { 917 loff_t res; 918 struct ovl_dir_file *od = file->private_data; 919 920 inode_lock(file_inode(file)); 921 if (!file->f_pos) 922 ovl_dir_reset(file); 923 924 if (od->is_real) { 925 res = vfs_llseek(od->realfile, offset, origin); 926 file->f_pos = od->realfile->f_pos; 927 } else { 928 res = -EINVAL; 929 930 switch (origin) { 931 case SEEK_CUR: 932 offset += file->f_pos; 933 break; 934 case SEEK_SET: 935 break; 936 default: 937 goto out_unlock; 938 } 939 if (offset < 0) 940 goto out_unlock; 941 942 if (offset != file->f_pos) { 943 file->f_pos = offset; 944 if (od->cache) 945 ovl_seek_cursor(od, offset); 946 } 947 res = offset; 948 } 949 out_unlock: 950 inode_unlock(file_inode(file)); 951 952 return res; 953 } 954 955 static struct file *ovl_dir_open_realfile(const struct file *file, 956 const struct path *realpath) 957 { 958 with_ovl_creds(file_inode(file)->i_sb) 959 return ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE)); 960 } 961 962 /* 963 * Like ovl_real_fdget(), returns upperfile if dir was copied up since open. 964 * Unlike ovl_real_fdget(), this caches upperfile in file->private_data. 965 * 966 * TODO: use same abstract type for file->private_data of dir and file so 967 * upperfile could also be cached for files as well. 968 */ 969 struct file *ovl_dir_real_file(const struct file *file, bool want_upper) 970 { 971 972 struct ovl_dir_file *od = file->private_data; 973 struct dentry *dentry = file->f_path.dentry; 974 struct file *old, *realfile = od->realfile; 975 976 if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) 977 return want_upper ? NULL : realfile; 978 979 /* 980 * Need to check if we started out being a lower dir, but got copied up 981 */ 982 if (!od->is_upper) { 983 realfile = READ_ONCE(od->upperfile); 984 if (!realfile) { 985 struct path upperpath; 986 987 ovl_path_upper(dentry, &upperpath); 988 realfile = ovl_dir_open_realfile(file, &upperpath); 989 if (IS_ERR(realfile)) 990 return realfile; 991 992 old = cmpxchg_release(&od->upperfile, NULL, realfile); 993 if (old) { 994 fput(realfile); 995 realfile = old; 996 } 997 } 998 } 999 1000 return realfile; 1001 } 1002 1003 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, 1004 int datasync) 1005 { 1006 struct file *realfile; 1007 int err; 1008 1009 err = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); 1010 if (err <= 0) 1011 return err; 1012 1013 realfile = ovl_dir_real_file(file, true); 1014 err = PTR_ERR_OR_ZERO(realfile); 1015 1016 /* Nothing to sync for lower */ 1017 if (!realfile || err) 1018 return err; 1019 1020 return vfs_fsync_range(realfile, start, end, datasync); 1021 } 1022 1023 static int ovl_dir_release(struct inode *inode, struct file *file) 1024 { 1025 struct ovl_dir_file *od = file->private_data; 1026 1027 if (od->cache) { 1028 inode_lock(inode); 1029 ovl_cache_put(od, inode); 1030 inode_unlock(inode); 1031 } 1032 fput(od->realfile); 1033 if (od->upperfile) 1034 fput(od->upperfile); 1035 kfree(od); 1036 1037 return 0; 1038 } 1039 1040 static int ovl_dir_open(struct inode *inode, struct file *file) 1041 { 1042 struct path realpath; 1043 struct file *realfile; 1044 struct ovl_dir_file *od; 1045 enum ovl_path_type type; 1046 1047 od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); 1048 if (!od) 1049 return -ENOMEM; 1050 1051 type = ovl_path_real(file->f_path.dentry, &realpath); 1052 realfile = ovl_dir_open_realfile(file, &realpath); 1053 if (IS_ERR(realfile)) { 1054 kfree(od); 1055 return PTR_ERR(realfile); 1056 } 1057 od->realfile = realfile; 1058 od->is_real = ovl_dir_is_real(inode); 1059 od->is_upper = OVL_TYPE_UPPER(type); 1060 file->private_data = od; 1061 1062 return 0; 1063 } 1064 1065 WRAP_DIR_ITER(ovl_iterate) // FIXME! 1066 const struct file_operations ovl_dir_operations = { 1067 .read = generic_read_dir, 1068 .open = ovl_dir_open, 1069 .iterate_shared = shared_ovl_iterate, 1070 .llseek = ovl_dir_llseek, 1071 .fsync = ovl_dir_fsync, 1072 .release = ovl_dir_release, 1073 }; 1074 1075 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) 1076 { 1077 int err; 1078 struct ovl_cache_entry *p, *n; 1079 struct rb_root root = RB_ROOT; 1080 1081 with_ovl_creds(dentry->d_sb) 1082 err = ovl_dir_read_merged(dentry, list, &root); 1083 if (err) 1084 return err; 1085 1086 err = 0; 1087 1088 list_for_each_entry_safe(p, n, list, l_node) { 1089 /* 1090 * Select whiteouts in upperdir, they should 1091 * be cleared when deleting this directory. 1092 */ 1093 if (p->is_whiteout) { 1094 if (p->is_upper) 1095 continue; 1096 goto del_entry; 1097 } 1098 1099 if (p->name[0] == '.') { 1100 if (p->len == 1) 1101 goto del_entry; 1102 if (p->len == 2 && p->name[1] == '.') 1103 goto del_entry; 1104 } 1105 err = -ENOTEMPTY; 1106 break; 1107 1108 del_entry: 1109 list_del(&p->l_node); 1110 ovl_cache_entry_free(p); 1111 } 1112 1113 return err; 1114 } 1115 1116 void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, 1117 struct list_head *list) 1118 { 1119 struct ovl_cache_entry *p; 1120 1121 list_for_each_entry(p, list, l_node) { 1122 struct dentry *dentry; 1123 1124 if (WARN_ON(!p->is_whiteout || !p->is_upper)) 1125 continue; 1126 1127 dentry = ovl_lookup_upper_unlocked(ofs, p->name, upper, p->len); 1128 if (IS_ERR(dentry)) { 1129 pr_err("lookup '%s/%.*s' failed (%i)\n", 1130 upper->d_name.name, p->len, p->name, 1131 (int) PTR_ERR(dentry)); 1132 continue; 1133 } 1134 if (dentry->d_inode) 1135 ovl_cleanup(ofs, upper, dentry); 1136 dput(dentry); 1137 } 1138 } 1139 1140 static bool ovl_check_d_type(struct dir_context *ctx, const char *name, 1141 int namelen, loff_t offset, u64 ino, 1142 unsigned int d_type) 1143 { 1144 struct ovl_readdir_data *rdd = 1145 container_of(ctx, struct ovl_readdir_data, ctx); 1146 1147 /* Even if d_type is not supported, DT_DIR is returned for . and .. */ 1148 if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen)) 1149 return true; 1150 1151 if (d_type != DT_UNKNOWN) 1152 rdd->d_type_supported = true; 1153 1154 return true; 1155 } 1156 1157 /* 1158 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values 1159 * if error is encountered. 1160 */ 1161 int ovl_check_d_type_supported(const struct path *realpath) 1162 { 1163 int err; 1164 struct ovl_readdir_data rdd = { 1165 .ctx.actor = ovl_check_d_type, 1166 .ctx.count = INT_MAX, 1167 .d_type_supported = false, 1168 }; 1169 1170 err = ovl_dir_read(realpath, &rdd); 1171 if (err) 1172 return err; 1173 1174 return rdd.d_type_supported; 1175 } 1176 1177 #define OVL_INCOMPATDIR_NAME "incompat" 1178 1179 static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path, 1180 int level) 1181 { 1182 int err; 1183 LIST_HEAD(list); 1184 struct ovl_cache_entry *p; 1185 struct ovl_readdir_data rdd = { 1186 .ctx.actor = ovl_fill_plain, 1187 .ctx.count = INT_MAX, 1188 .list = &list, 1189 }; 1190 bool incompat = false; 1191 1192 /* 1193 * The "work/incompat" directory is treated specially - if it is not 1194 * empty, instead of printing a generic error and mounting read-only, 1195 * we will error about incompat features and fail the mount. 1196 * 1197 * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name 1198 * starts with '#'. 1199 */ 1200 if (level == 2 && 1201 !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME)) 1202 incompat = true; 1203 1204 err = ovl_dir_read(path, &rdd); 1205 if (err) 1206 goto out; 1207 1208 list_for_each_entry(p, &list, l_node) { 1209 struct dentry *dentry; 1210 1211 if (p->name[0] == '.') { 1212 if (p->len == 1) 1213 continue; 1214 if (p->len == 2 && p->name[1] == '.') 1215 continue; 1216 } else if (incompat) { 1217 pr_err("overlay with incompat feature '%s' cannot be mounted\n", 1218 p->name); 1219 err = -EINVAL; 1220 break; 1221 } 1222 dentry = ovl_lookup_upper_unlocked(ofs, p->name, path->dentry, p->len); 1223 if (IS_ERR(dentry)) 1224 continue; 1225 if (dentry->d_inode) 1226 err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt, 1227 dentry, level); 1228 dput(dentry); 1229 if (err) 1230 break; 1231 } 1232 out: 1233 ovl_cache_free(&list); 1234 return err; 1235 } 1236 1237 int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent, 1238 struct vfsmount *mnt, struct dentry *dentry, int level) 1239 { 1240 int err; 1241 1242 if (!d_is_dir(dentry) || level > 1) 1243 return ovl_cleanup(ofs, parent, dentry); 1244 1245 dentry = start_removing_dentry(parent, dentry); 1246 if (IS_ERR(dentry)) 1247 return PTR_ERR(dentry); 1248 err = ovl_do_rmdir(ofs, parent->d_inode, dentry); 1249 end_removing(dentry); 1250 if (err) { 1251 struct path path = { .mnt = mnt, .dentry = dentry }; 1252 1253 err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1); 1254 if (!err) 1255 err = ovl_cleanup(ofs, parent, dentry); 1256 } 1257 1258 return err; 1259 } 1260 1261 int ovl_indexdir_cleanup(struct ovl_fs *ofs) 1262 { 1263 int err; 1264 struct dentry *indexdir = ofs->workdir; 1265 struct dentry *index = NULL; 1266 struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir }; 1267 LIST_HEAD(list); 1268 struct ovl_cache_entry *p; 1269 struct ovl_readdir_data rdd = { 1270 .ctx.actor = ovl_fill_plain, 1271 .ctx.count = INT_MAX, 1272 .list = &list, 1273 }; 1274 1275 err = ovl_dir_read(&path, &rdd); 1276 if (err) 1277 goto out; 1278 1279 list_for_each_entry(p, &list, l_node) { 1280 if (p->name[0] == '.') { 1281 if (p->len == 1) 1282 continue; 1283 if (p->len == 2 && p->name[1] == '.') 1284 continue; 1285 } 1286 index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, p->len); 1287 if (IS_ERR(index)) { 1288 err = PTR_ERR(index); 1289 index = NULL; 1290 break; 1291 } 1292 /* Cleanup leftover from index create/cleanup attempt */ 1293 if (index->d_name.name[0] == '#') { 1294 err = ovl_workdir_cleanup(ofs, indexdir, path.mnt, index, 1); 1295 if (err) 1296 break; 1297 goto next; 1298 } 1299 err = ovl_verify_index(ofs, index); 1300 if (!err) { 1301 goto next; 1302 } else if (err == -ESTALE) { 1303 /* Cleanup stale index entries */ 1304 err = ovl_cleanup(ofs, indexdir, index); 1305 } else if (err != -ENOENT) { 1306 /* 1307 * Abort mount to avoid corrupting the index if 1308 * an incompatible index entry was found or on out 1309 * of memory. 1310 */ 1311 break; 1312 } else if (ofs->config.nfs_export) { 1313 /* 1314 * Whiteout orphan index to block future open by 1315 * handle after overlay nlink dropped to zero. 1316 */ 1317 err = ovl_cleanup_and_whiteout(ofs, indexdir, index); 1318 } else { 1319 /* Cleanup orphan index entries */ 1320 err = ovl_cleanup(ofs, indexdir, index); 1321 } 1322 1323 if (err) 1324 break; 1325 1326 next: 1327 dput(index); 1328 index = NULL; 1329 } 1330 dput(index); 1331 out: 1332 ovl_cache_free(&list); 1333 if (err) 1334 pr_err("failed index dir cleanup (%i)\n", err); 1335 return err; 1336 } 1337