1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/slab.h> 9 #include <linux/namei.h> 10 #include <linux/file.h> 11 #include <linux/filelock.h> 12 #include <linux/xattr.h> 13 #include <linux/rbtree.h> 14 #include <linux/security.h> 15 #include <linux/cred.h> 16 #include <linux/ratelimit.h> 17 #include <linux/overflow.h> 18 #include "overlayfs.h" 19 20 struct ovl_cache_entry { 21 unsigned int len; 22 unsigned int type; 23 u64 real_ino; 24 u64 ino; 25 struct list_head l_node; 26 struct rb_node node; 27 struct ovl_cache_entry *next_maybe_whiteout; 28 bool is_upper; 29 bool is_whiteout; 30 bool check_xwhiteout; 31 const char *c_name; 32 int c_len; 33 char name[]; 34 }; 35 36 struct ovl_dir_cache { 37 long refcount; 38 u64 version; 39 struct list_head entries; 40 struct rb_root root; 41 }; 42 43 struct ovl_readdir_data { 44 struct dir_context ctx; 45 struct dentry *dentry; 46 bool is_lowest; 47 struct rb_root *root; 48 struct list_head *list; 49 struct list_head middle; 50 struct ovl_cache_entry *first_maybe_whiteout; 51 struct unicode_map *map; 52 int count; 53 int err; 54 bool is_upper; 55 bool d_type_supported; 56 bool in_xwhiteouts_dir; 57 }; 58 59 struct ovl_dir_file { 60 bool is_real; 61 bool is_upper; 62 struct ovl_dir_cache *cache; 63 struct list_head *cursor; 64 struct file *realfile; 65 struct file *upperfile; 66 }; 67 68 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) 69 { 70 return rb_entry(n, struct ovl_cache_entry, node); 71 } 72 73 static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len, 74 char **dst) 75 { 76 const struct qstr qstr = { .name = str, .len = len }; 77 char *cf_name; 78 int cf_len; 79 80 if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || is_dot_dotdot(str, len)) 81 return 0; 82 83 cf_name = kmalloc(NAME_MAX, GFP_KERNEL); 84 if (!cf_name) { 85 rdd->err = -ENOMEM; 86 return -ENOMEM; 87 } 88 89 cf_len = utf8_casefold(rdd->map, &qstr, cf_name, NAME_MAX); 90 if (cf_len > 0) 91 *dst = cf_name; 92 else 93 kfree(cf_name); 94 95 return cf_len; 96 } 97 98 static bool ovl_cache_entry_find_link(const char *name, int len, 99 struct rb_node ***link, 100 struct rb_node **parent) 101 { 102 bool found = false; 103 struct rb_node **newp = *link; 104 105 while (!found && *newp) { 106 int cmp; 107 struct ovl_cache_entry *tmp; 108 109 *parent = *newp; 110 tmp = ovl_cache_entry_from_node(*newp); 111 cmp = strncmp(name, tmp->c_name, len); 112 if (cmp > 0) 113 newp = &tmp->node.rb_right; 114 else if (cmp < 0 || len < tmp->c_len) 115 newp = &tmp->node.rb_left; 116 else 117 found = true; 118 } 119 *link = newp; 120 121 return found; 122 } 123 124 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, 125 const char *name, int len) 126 { 127 struct rb_node *node = root->rb_node; 128 int cmp; 129 130 while (node) { 131 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); 132 133 cmp = strncmp(name, p->c_name, len); 134 if (cmp > 0) 135 node = p->node.rb_right; 136 else if (cmp < 0 || len < p->c_len) 137 node = p->node.rb_left; 138 else 139 return p; 140 } 141 142 return NULL; 143 } 144 145 static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd, 146 struct ovl_cache_entry *p) 147 { 148 /* Don't care if not doing ovl_iter() */ 149 if (!rdd->dentry) 150 return false; 151 152 /* Always recalc d_ino when remapping lower inode numbers */ 153 if (ovl_xino_bits(OVL_FS(rdd->dentry->d_sb))) 154 return true; 155 156 /* Always recalc d_ino for parent */ 157 if (strcmp(p->name, "..") == 0) 158 return true; 159 160 /* If this is lower, then native d_ino will do */ 161 if (!rdd->is_upper) 162 return false; 163 164 /* 165 * Recalc d_ino for '.' and for all entries if dir is impure (contains 166 * copied up entries) 167 */ 168 if ((p->name[0] == '.' && p->len == 1) || 169 ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry))) 170 return true; 171 172 return false; 173 } 174 175 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, 176 const char *name, int len, 177 const char *c_name, int c_len, 178 u64 ino, unsigned int d_type) 179 { 180 struct ovl_cache_entry *p; 181 182 p = kmalloc(struct_size(p, name, len + 1), GFP_KERNEL); 183 if (!p) 184 return NULL; 185 186 memcpy(p->name, name, len); 187 p->name[len] = '\0'; 188 p->len = len; 189 p->type = d_type; 190 p->real_ino = ino; 191 p->ino = ino; 192 /* Defer setting d_ino for upper entry to ovl_iterate() */ 193 if (ovl_calc_d_ino(rdd, p)) 194 p->ino = 0; 195 p->is_upper = rdd->is_upper; 196 p->is_whiteout = false; 197 /* Defer check for overlay.whiteout to ovl_iterate() */ 198 p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG; 199 200 if (c_name && c_name != name) { 201 p->c_name = c_name; 202 p->c_len = c_len; 203 } else { 204 p->c_name = p->name; 205 p->c_len = len; 206 } 207 208 if (d_type == DT_CHR) { 209 p->next_maybe_whiteout = rdd->first_maybe_whiteout; 210 rdd->first_maybe_whiteout = p; 211 } 212 return p; 213 } 214 215 /* Return 0 for found, 1 for added, <0 for error */ 216 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, 217 const char *name, int len, 218 const char *c_name, int c_len, 219 u64 ino, 220 unsigned int d_type) 221 { 222 struct rb_node **newp = &rdd->root->rb_node; 223 struct rb_node *parent = NULL; 224 struct ovl_cache_entry *p; 225 226 if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent)) 227 return 0; 228 229 p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type); 230 if (p == NULL) { 231 rdd->err = -ENOMEM; 232 return -ENOMEM; 233 } 234 235 list_add_tail(&p->l_node, rdd->list); 236 rb_link_node(&p->node, parent, newp); 237 rb_insert_color(&p->node, rdd->root); 238 239 return 1; 240 } 241 242 /* Return 0 for found, 1 for added, <0 for error */ 243 static int ovl_fill_lowest(struct ovl_readdir_data *rdd, 244 const char *name, int namelen, 245 const char *c_name, int c_len, 246 loff_t offset, u64 ino, unsigned int d_type) 247 { 248 struct ovl_cache_entry *p; 249 250 p = ovl_cache_entry_find(rdd->root, c_name, c_len); 251 if (p) { 252 list_move_tail(&p->l_node, &rdd->middle); 253 return 0; 254 } else { 255 p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len, 256 ino, d_type); 257 if (p == NULL) 258 rdd->err = -ENOMEM; 259 else 260 list_add_tail(&p->l_node, &rdd->middle); 261 } 262 263 return rdd->err ?: 1; 264 } 265 266 static void ovl_cache_entry_free(struct ovl_cache_entry *p) 267 { 268 if (p->c_name != p->name) 269 kfree(p->c_name); 270 kfree(p); 271 } 272 273 void ovl_cache_free(struct list_head *list) 274 { 275 struct ovl_cache_entry *p; 276 struct ovl_cache_entry *n; 277 278 list_for_each_entry_safe(p, n, list, l_node) 279 ovl_cache_entry_free(p); 280 281 INIT_LIST_HEAD(list); 282 } 283 284 void ovl_dir_cache_free(struct inode *inode) 285 { 286 struct ovl_dir_cache *cache = ovl_dir_cache(inode); 287 288 if (cache) { 289 ovl_cache_free(&cache->entries); 290 kfree(cache); 291 } 292 } 293 294 static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode) 295 { 296 struct ovl_dir_cache *cache = od->cache; 297 298 WARN_ON(cache->refcount <= 0); 299 cache->refcount--; 300 if (!cache->refcount) { 301 if (ovl_dir_cache(inode) == cache) 302 ovl_set_dir_cache(inode, NULL); 303 304 ovl_cache_free(&cache->entries); 305 kfree(cache); 306 } 307 } 308 309 static bool ovl_fill_merge(struct dir_context *ctx, const char *name, 310 int namelen, loff_t offset, u64 ino, 311 unsigned int d_type) 312 { 313 struct ovl_readdir_data *rdd = 314 container_of(ctx, struct ovl_readdir_data, ctx); 315 struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb); 316 const char *c_name = NULL; 317 char *cf_name = NULL; 318 int c_len = 0, ret; 319 320 if (ofs->casefold) 321 c_len = ovl_casefold(rdd, name, namelen, &cf_name); 322 323 if (rdd->err) 324 return false; 325 326 if (c_len <= 0) { 327 c_name = name; 328 c_len = namelen; 329 } else { 330 c_name = cf_name; 331 } 332 333 rdd->count++; 334 if (!rdd->is_lowest) 335 ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type); 336 else 337 ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type); 338 339 /* 340 * If ret == 1, that means that c_name is being used as part of struct 341 * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise, 342 * c_name was found in the rb-tree so we can free it here. 343 */ 344 if (ret != 1 && c_name != name) 345 kfree(c_name); 346 347 return ret >= 0; 348 } 349 350 static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd) 351 { 352 struct dentry *dentry, *dir = path->dentry; 353 354 while (rdd->first_maybe_whiteout) { 355 struct ovl_cache_entry *p = 356 rdd->first_maybe_whiteout; 357 rdd->first_maybe_whiteout = p->next_maybe_whiteout; 358 dentry = lookup_one_positive_killable(mnt_idmap(path->mnt), 359 &QSTR_LEN(p->name, p->len), 360 dir); 361 if (!IS_ERR(dentry)) { 362 p->is_whiteout = ovl_is_whiteout(dentry); 363 dput(dentry); 364 } else if (PTR_ERR(dentry) == -EINTR) { 365 return -EINTR; 366 } 367 } 368 369 return 0; 370 } 371 372 static inline int ovl_dir_read(const struct path *realpath, 373 struct ovl_readdir_data *rdd) 374 { 375 struct file *realfile; 376 int err; 377 378 realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE); 379 if (IS_ERR(realfile)) 380 return PTR_ERR(realfile); 381 382 rdd->first_maybe_whiteout = NULL; 383 rdd->ctx.pos = 0; 384 do { 385 rdd->count = 0; 386 rdd->err = 0; 387 err = iterate_dir(realfile, &rdd->ctx); 388 if (err >= 0) 389 err = rdd->err; 390 } while (!err && rdd->count); 391 392 if (!err && rdd->first_maybe_whiteout && rdd->dentry) 393 err = ovl_check_whiteouts(realpath, rdd); 394 395 fput(realfile); 396 397 return err; 398 } 399 400 static void ovl_dir_reset(struct file *file) 401 { 402 struct ovl_dir_file *od = file->private_data; 403 struct ovl_dir_cache *cache = od->cache; 404 struct inode *inode = file_inode(file); 405 bool is_real; 406 407 if (cache && ovl_inode_version_get(inode) != cache->version) { 408 ovl_cache_put(od, inode); 409 od->cache = NULL; 410 od->cursor = NULL; 411 } 412 is_real = ovl_dir_is_real(inode); 413 if (od->is_real != is_real) { 414 /* is_real can only become false when dir is copied up */ 415 if (WARN_ON(is_real)) 416 return; 417 od->is_real = false; 418 } 419 } 420 421 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list, 422 struct rb_root *root) 423 { 424 int err; 425 struct path realpath; 426 struct ovl_readdir_data rdd = { 427 .ctx.actor = ovl_fill_merge, 428 .ctx.count = INT_MAX, 429 .dentry = dentry, 430 .list = list, 431 .root = root, 432 .is_lowest = false, 433 .map = NULL, 434 }; 435 int idx, next; 436 const struct ovl_layer *layer; 437 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 438 439 for (idx = 0; idx != -1; idx = next) { 440 next = ovl_path_next(idx, dentry, &realpath, &layer); 441 442 if (ofs->casefold) 443 rdd.map = sb_encoding(realpath.dentry->d_sb); 444 445 rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry; 446 rdd.in_xwhiteouts_dir = layer->has_xwhiteouts && 447 ovl_dentry_has_xwhiteouts(dentry); 448 449 if (next != -1) { 450 err = ovl_dir_read(&realpath, &rdd); 451 if (err) 452 break; 453 } else { 454 /* 455 * Insert lowest layer entries before upper ones, this 456 * allows offsets to be reasonably constant 457 */ 458 list_add(&rdd.middle, rdd.list); 459 rdd.is_lowest = true; 460 err = ovl_dir_read(&realpath, &rdd); 461 list_del(&rdd.middle); 462 } 463 } 464 return err; 465 } 466 467 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) 468 { 469 struct list_head *p; 470 loff_t off = 0; 471 472 list_for_each(p, &od->cache->entries) { 473 if (off >= pos) 474 break; 475 off++; 476 } 477 /* Cursor is safe since the cache is stable */ 478 od->cursor = p; 479 } 480 481 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) 482 { 483 int res; 484 struct ovl_dir_cache *cache; 485 struct inode *inode = d_inode(dentry); 486 487 cache = ovl_dir_cache(inode); 488 if (cache && ovl_inode_version_get(inode) == cache->version) { 489 WARN_ON(!cache->refcount); 490 cache->refcount++; 491 return cache; 492 } 493 ovl_set_dir_cache(d_inode(dentry), NULL); 494 495 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 496 if (!cache) 497 return ERR_PTR(-ENOMEM); 498 499 cache->refcount = 1; 500 INIT_LIST_HEAD(&cache->entries); 501 cache->root = RB_ROOT; 502 503 res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root); 504 if (res) { 505 ovl_cache_free(&cache->entries); 506 kfree(cache); 507 return ERR_PTR(res); 508 } 509 510 cache->version = ovl_inode_version_get(inode); 511 ovl_set_dir_cache(inode, cache); 512 513 return cache; 514 } 515 516 /* Map inode number to lower fs unique range */ 517 static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, 518 const char *name, int namelen, bool warn) 519 { 520 unsigned int xinoshift = 64 - xinobits; 521 522 if (unlikely(ino >> xinoshift)) { 523 if (warn) { 524 pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", 525 namelen, name, ino, xinobits); 526 } 527 return ino; 528 } 529 530 /* 531 * The lowest xinobit is reserved for mapping the non-peresistent inode 532 * numbers range, but this range is only exposed via st_ino, not here. 533 */ 534 return ino | ((u64)fsid) << (xinoshift + 1); 535 } 536 537 /* 538 * Set d_ino for upper entries if needed. Non-upper entries should always report 539 * the uppermost real inode ino and should not call this function. 540 * 541 * When not all layer are on same fs, report real ino also for upper. 542 * 543 * When all layers are on the same fs, and upper has a reference to 544 * copy up origin, call vfs_getattr() on the overlay entry to make 545 * sure that d_ino will be consistent with st_ino from stat(2). 546 * 547 * Also checks the overlay.whiteout xattr by doing a full lookup which will return 548 * negative in this case. 549 */ 550 static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino) 551 552 { 553 struct dentry *dir = path->dentry; 554 struct ovl_fs *ofs = OVL_FS(dir->d_sb); 555 struct dentry *this = NULL; 556 enum ovl_path_type type; 557 u64 ino = p->real_ino; 558 int xinobits = ovl_xino_bits(ofs); 559 int err = 0; 560 561 if (!ovl_same_dev(ofs) && !p->check_xwhiteout) 562 goto out; 563 564 if (p->name[0] == '.') { 565 if (p->len == 1) { 566 this = dget(dir); 567 goto get; 568 } 569 if (p->len == 2 && p->name[1] == '.') { 570 /* we shall not be moved */ 571 this = dget(dir->d_parent); 572 goto get; 573 } 574 } 575 /* This checks also for xwhiteouts */ 576 this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir); 577 if (IS_ERR_OR_NULL(this) || !this->d_inode) { 578 /* Mark a stale entry */ 579 p->is_whiteout = true; 580 if (IS_ERR(this)) { 581 err = PTR_ERR(this); 582 this = NULL; 583 goto fail; 584 } 585 goto out; 586 } 587 588 get: 589 if (!ovl_same_dev(ofs) || !update_ino) 590 goto out; 591 592 type = ovl_path_type(this); 593 if (OVL_TYPE_ORIGIN(type)) { 594 struct kstat stat; 595 struct path statpath = *path; 596 597 statpath.dentry = this; 598 err = vfs_getattr(&statpath, &stat, STATX_INO, 0); 599 if (err) 600 goto fail; 601 602 /* 603 * Directory inode is always on overlay st_dev. 604 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case 605 * of xino bits overflow. 606 */ 607 WARN_ON_ONCE(S_ISDIR(stat.mode) && 608 dir->d_sb->s_dev != stat.dev); 609 ino = stat.ino; 610 } else if (xinobits && !OVL_TYPE_UPPER(type)) { 611 ino = ovl_remap_lower_ino(ino, xinobits, 612 ovl_layer_lower(this)->fsid, 613 p->name, p->len, 614 ovl_xino_warn(ofs)); 615 } 616 617 out: 618 p->ino = ino; 619 dput(this); 620 return err; 621 622 fail: 623 pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n", 624 p->name, err); 625 goto out; 626 } 627 628 static bool ovl_fill_plain(struct dir_context *ctx, const char *name, 629 int namelen, loff_t offset, u64 ino, 630 unsigned int d_type) 631 { 632 struct ovl_cache_entry *p; 633 struct ovl_readdir_data *rdd = 634 container_of(ctx, struct ovl_readdir_data, ctx); 635 636 rdd->count++; 637 p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type); 638 if (p == NULL) { 639 rdd->err = -ENOMEM; 640 return false; 641 } 642 list_add_tail(&p->l_node, rdd->list); 643 644 return true; 645 } 646 647 static int ovl_dir_read_impure(const struct path *path, struct list_head *list, 648 struct rb_root *root) 649 { 650 int err; 651 struct path realpath; 652 struct ovl_cache_entry *p, *n; 653 struct ovl_readdir_data rdd = { 654 .ctx.actor = ovl_fill_plain, 655 .ctx.count = INT_MAX, 656 .list = list, 657 .root = root, 658 }; 659 660 INIT_LIST_HEAD(list); 661 *root = RB_ROOT; 662 ovl_path_upper(path->dentry, &realpath); 663 664 err = ovl_dir_read(&realpath, &rdd); 665 if (err) 666 return err; 667 668 list_for_each_entry_safe(p, n, list, l_node) { 669 if (strcmp(p->name, ".") != 0 && 670 strcmp(p->name, "..") != 0) { 671 err = ovl_cache_update(path, p, true); 672 if (err) 673 return err; 674 } 675 if (p->ino == p->real_ino) { 676 list_del(&p->l_node); 677 ovl_cache_entry_free(p); 678 } else { 679 struct rb_node **newp = &root->rb_node; 680 struct rb_node *parent = NULL; 681 682 if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len, 683 &newp, &parent))) 684 return -EIO; 685 686 rb_link_node(&p->node, parent, newp); 687 rb_insert_color(&p->node, root); 688 } 689 } 690 return 0; 691 } 692 693 static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path) 694 { 695 int res; 696 struct dentry *dentry = path->dentry; 697 struct inode *inode = d_inode(dentry); 698 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 699 struct ovl_dir_cache *cache; 700 701 cache = ovl_dir_cache(inode); 702 if (cache && ovl_inode_version_get(inode) == cache->version) 703 return cache; 704 705 /* Impure cache is not refcounted, free it here */ 706 ovl_dir_cache_free(inode); 707 ovl_set_dir_cache(inode, NULL); 708 709 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 710 if (!cache) 711 return ERR_PTR(-ENOMEM); 712 713 res = ovl_dir_read_impure(path, &cache->entries, &cache->root); 714 if (res) { 715 ovl_cache_free(&cache->entries); 716 kfree(cache); 717 return ERR_PTR(res); 718 } 719 if (list_empty(&cache->entries)) { 720 /* 721 * A good opportunity to get rid of an unneeded "impure" flag. 722 * Removing the "impure" xattr is best effort. 723 */ 724 if (!ovl_want_write(dentry)) { 725 ovl_removexattr(ofs, ovl_dentry_upper(dentry), 726 OVL_XATTR_IMPURE); 727 ovl_drop_write(dentry); 728 } 729 ovl_clear_flag(OVL_IMPURE, inode); 730 kfree(cache); 731 return NULL; 732 } 733 734 cache->version = ovl_inode_version_get(inode); 735 ovl_set_dir_cache(inode, cache); 736 737 return cache; 738 } 739 740 struct ovl_readdir_translate { 741 struct dir_context *orig_ctx; 742 struct ovl_dir_cache *cache; 743 struct dir_context ctx; 744 u64 parent_ino; 745 int fsid; 746 int xinobits; 747 bool xinowarn; 748 }; 749 750 static bool ovl_fill_real(struct dir_context *ctx, const char *name, 751 int namelen, loff_t offset, u64 ino, 752 unsigned int d_type) 753 { 754 struct ovl_readdir_translate *rdt = 755 container_of(ctx, struct ovl_readdir_translate, ctx); 756 struct dir_context *orig_ctx = rdt->orig_ctx; 757 bool res; 758 759 if (rdt->parent_ino && strcmp(name, "..") == 0) { 760 ino = rdt->parent_ino; 761 } else if (rdt->cache) { 762 struct ovl_cache_entry *p; 763 764 p = ovl_cache_entry_find(&rdt->cache->root, name, namelen); 765 if (p) 766 ino = p->ino; 767 } else if (rdt->xinobits) { 768 ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid, 769 name, namelen, rdt->xinowarn); 770 } 771 772 res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); 773 ctx->count = orig_ctx->count; 774 775 return res; 776 } 777 778 static bool ovl_is_impure_dir(struct file *file) 779 { 780 struct ovl_dir_file *od = file->private_data; 781 struct inode *dir = file_inode(file); 782 783 /* 784 * Only upper dir can be impure, but if we are in the middle of 785 * iterating a lower real dir, dir could be copied up and marked 786 * impure. We only want the impure cache if we started iterating 787 * a real upper dir to begin with. 788 */ 789 return od->is_upper && ovl_test_flag(OVL_IMPURE, dir); 790 791 } 792 793 static int ovl_iterate_real(struct file *file, struct dir_context *ctx) 794 { 795 int err; 796 struct ovl_dir_file *od = file->private_data; 797 struct dentry *dir = file->f_path.dentry; 798 struct ovl_fs *ofs = OVL_FS(dir->d_sb); 799 const struct ovl_layer *lower_layer = ovl_layer_lower(dir); 800 struct ovl_readdir_translate rdt = { 801 .ctx.actor = ovl_fill_real, 802 .ctx.count = ctx->count, 803 .orig_ctx = ctx, 804 .xinobits = ovl_xino_bits(ofs), 805 .xinowarn = ovl_xino_warn(ofs), 806 }; 807 808 if (rdt.xinobits && lower_layer) 809 rdt.fsid = lower_layer->fsid; 810 811 if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) { 812 struct kstat stat; 813 struct path statpath = file->f_path; 814 815 statpath.dentry = dir->d_parent; 816 err = vfs_getattr(&statpath, &stat, STATX_INO, 0); 817 if (err) 818 return err; 819 820 WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); 821 rdt.parent_ino = stat.ino; 822 } 823 824 if (ovl_is_impure_dir(file)) { 825 rdt.cache = ovl_cache_get_impure(&file->f_path); 826 if (IS_ERR(rdt.cache)) 827 return PTR_ERR(rdt.cache); 828 } 829 830 err = iterate_dir(od->realfile, &rdt.ctx); 831 ctx->pos = rdt.ctx.pos; 832 833 return err; 834 } 835 836 static int ovl_iterate_merged(struct file *file, struct dir_context *ctx) 837 { 838 struct ovl_dir_file *od = file->private_data; 839 struct dentry *dentry = file->f_path.dentry; 840 struct ovl_cache_entry *p; 841 int err = 0; 842 843 if (!od->cache) { 844 struct ovl_dir_cache *cache; 845 846 cache = ovl_cache_get(dentry); 847 err = PTR_ERR(cache); 848 if (IS_ERR(cache)) 849 return err; 850 851 od->cache = cache; 852 ovl_seek_cursor(od, ctx->pos); 853 } 854 855 while (od->cursor != &od->cache->entries) { 856 p = list_entry(od->cursor, struct ovl_cache_entry, l_node); 857 if (!p->is_whiteout) { 858 if (!p->ino || p->check_xwhiteout) { 859 err = ovl_cache_update(&file->f_path, p, !p->ino); 860 if (err) 861 return err; 862 } 863 } 864 /* ovl_cache_update() sets is_whiteout on stale entry */ 865 if (!p->is_whiteout) { 866 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) 867 break; 868 } 869 od->cursor = p->l_node.next; 870 ctx->pos++; 871 } 872 return err; 873 } 874 875 static bool ovl_need_adjust_d_ino(struct file *file) 876 { 877 struct dentry *dentry = file->f_path.dentry; 878 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 879 880 /* If parent is merge, then need to adjust d_ino for '..' */ 881 if (ovl_xino_bits(ofs)) 882 return true; 883 884 /* Can't do consistent inode numbering */ 885 if (!ovl_same_fs(ofs)) 886 return false; 887 888 /* If dir is impure then need to adjust d_ino for copied up entries */ 889 if (ovl_is_impure_dir(file) || 890 OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))) 891 return true; 892 893 /* Pure: no need to adjust d_ino */ 894 return false; 895 } 896 897 898 static int ovl_iterate(struct file *file, struct dir_context *ctx) 899 { 900 struct ovl_dir_file *od = file->private_data; 901 902 if (!ctx->pos) 903 ovl_dir_reset(file); 904 905 with_ovl_creds(file_dentry(file)->d_sb) { 906 if (!od->is_real) 907 return ovl_iterate_merged(file, ctx); 908 909 if (ovl_need_adjust_d_ino(file)) 910 return ovl_iterate_real(file, ctx); 911 912 return iterate_dir(od->realfile, ctx); 913 } 914 } 915 916 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) 917 { 918 loff_t res; 919 struct ovl_dir_file *od = file->private_data; 920 921 inode_lock(file_inode(file)); 922 if (!file->f_pos) 923 ovl_dir_reset(file); 924 925 if (od->is_real) { 926 res = vfs_llseek(od->realfile, offset, origin); 927 file->f_pos = od->realfile->f_pos; 928 } else { 929 res = -EINVAL; 930 931 switch (origin) { 932 case SEEK_CUR: 933 offset += file->f_pos; 934 break; 935 case SEEK_SET: 936 break; 937 default: 938 goto out_unlock; 939 } 940 if (offset < 0) 941 goto out_unlock; 942 943 if (offset != file->f_pos) { 944 file->f_pos = offset; 945 if (od->cache) 946 ovl_seek_cursor(od, offset); 947 } 948 res = offset; 949 } 950 out_unlock: 951 inode_unlock(file_inode(file)); 952 953 return res; 954 } 955 956 static struct file *ovl_dir_open_realfile(const struct file *file, 957 const struct path *realpath) 958 { 959 with_ovl_creds(file_inode(file)->i_sb) 960 return ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE)); 961 } 962 963 /* 964 * Like ovl_real_fdget(), returns upperfile if dir was copied up since open. 965 * Unlike ovl_real_fdget(), this caches upperfile in file->private_data. 966 * 967 * TODO: use same abstract type for file->private_data of dir and file so 968 * upperfile could also be cached for files as well. 969 */ 970 struct file *ovl_dir_real_file(const struct file *file, bool want_upper) 971 { 972 973 struct ovl_dir_file *od = file->private_data; 974 struct dentry *dentry = file->f_path.dentry; 975 struct file *old, *realfile = od->realfile; 976 977 if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) 978 return want_upper ? NULL : realfile; 979 980 /* 981 * Need to check if we started out being a lower dir, but got copied up 982 */ 983 if (!od->is_upper) { 984 realfile = READ_ONCE(od->upperfile); 985 if (!realfile) { 986 struct path upperpath; 987 988 ovl_path_upper(dentry, &upperpath); 989 realfile = ovl_dir_open_realfile(file, &upperpath); 990 if (IS_ERR(realfile)) 991 return realfile; 992 993 old = cmpxchg_release(&od->upperfile, NULL, realfile); 994 if (old) { 995 fput(realfile); 996 realfile = old; 997 } 998 } 999 } 1000 1001 return realfile; 1002 } 1003 1004 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, 1005 int datasync) 1006 { 1007 struct file *realfile; 1008 int err; 1009 1010 err = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); 1011 if (err <= 0) 1012 return err; 1013 1014 realfile = ovl_dir_real_file(file, true); 1015 err = PTR_ERR_OR_ZERO(realfile); 1016 1017 /* Nothing to sync for lower */ 1018 if (!realfile || err) 1019 return err; 1020 1021 return vfs_fsync_range(realfile, start, end, datasync); 1022 } 1023 1024 static int ovl_dir_release(struct inode *inode, struct file *file) 1025 { 1026 struct ovl_dir_file *od = file->private_data; 1027 1028 if (od->cache) { 1029 inode_lock(inode); 1030 ovl_cache_put(od, inode); 1031 inode_unlock(inode); 1032 } 1033 fput(od->realfile); 1034 if (od->upperfile) 1035 fput(od->upperfile); 1036 kfree(od); 1037 1038 return 0; 1039 } 1040 1041 static int ovl_dir_open(struct inode *inode, struct file *file) 1042 { 1043 struct path realpath; 1044 struct file *realfile; 1045 struct ovl_dir_file *od; 1046 enum ovl_path_type type; 1047 1048 od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); 1049 if (!od) 1050 return -ENOMEM; 1051 1052 type = ovl_path_real(file->f_path.dentry, &realpath); 1053 realfile = ovl_dir_open_realfile(file, &realpath); 1054 if (IS_ERR(realfile)) { 1055 kfree(od); 1056 return PTR_ERR(realfile); 1057 } 1058 od->realfile = realfile; 1059 od->is_real = ovl_dir_is_real(inode); 1060 od->is_upper = OVL_TYPE_UPPER(type); 1061 file->private_data = od; 1062 1063 return 0; 1064 } 1065 1066 WRAP_DIR_ITER(ovl_iterate) // FIXME! 1067 const struct file_operations ovl_dir_operations = { 1068 .read = generic_read_dir, 1069 .open = ovl_dir_open, 1070 .iterate_shared = shared_ovl_iterate, 1071 .llseek = ovl_dir_llseek, 1072 .fsync = ovl_dir_fsync, 1073 .release = ovl_dir_release, 1074 .setlease = generic_setlease, 1075 }; 1076 1077 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) 1078 { 1079 int err; 1080 struct ovl_cache_entry *p, *n; 1081 struct rb_root root = RB_ROOT; 1082 1083 with_ovl_creds(dentry->d_sb) 1084 err = ovl_dir_read_merged(dentry, list, &root); 1085 if (err) 1086 return err; 1087 1088 err = 0; 1089 1090 list_for_each_entry_safe(p, n, list, l_node) { 1091 /* 1092 * Select whiteouts in upperdir, they should 1093 * be cleared when deleting this directory. 1094 */ 1095 if (p->is_whiteout) { 1096 if (p->is_upper) 1097 continue; 1098 goto del_entry; 1099 } 1100 1101 if (p->name[0] == '.') { 1102 if (p->len == 1) 1103 goto del_entry; 1104 if (p->len == 2 && p->name[1] == '.') 1105 goto del_entry; 1106 } 1107 err = -ENOTEMPTY; 1108 break; 1109 1110 del_entry: 1111 list_del(&p->l_node); 1112 ovl_cache_entry_free(p); 1113 } 1114 1115 return err; 1116 } 1117 1118 void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, 1119 struct list_head *list) 1120 { 1121 struct ovl_cache_entry *p; 1122 1123 list_for_each_entry(p, list, l_node) { 1124 struct dentry *dentry; 1125 1126 if (WARN_ON(!p->is_whiteout || !p->is_upper)) 1127 continue; 1128 1129 dentry = ovl_lookup_upper_unlocked(ofs, p->name, upper, p->len); 1130 if (IS_ERR(dentry)) { 1131 pr_err("lookup '%s/%.*s' failed (%i)\n", 1132 upper->d_name.name, p->len, p->name, 1133 (int) PTR_ERR(dentry)); 1134 continue; 1135 } 1136 if (dentry->d_inode) 1137 ovl_cleanup(ofs, upper, dentry); 1138 dput(dentry); 1139 } 1140 } 1141 1142 static bool ovl_check_d_type(struct dir_context *ctx, const char *name, 1143 int namelen, loff_t offset, u64 ino, 1144 unsigned int d_type) 1145 { 1146 struct ovl_readdir_data *rdd = 1147 container_of(ctx, struct ovl_readdir_data, ctx); 1148 1149 /* Even if d_type is not supported, DT_DIR is returned for . and .. */ 1150 if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen)) 1151 return true; 1152 1153 if (d_type != DT_UNKNOWN) 1154 rdd->d_type_supported = true; 1155 1156 return true; 1157 } 1158 1159 /* 1160 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values 1161 * if error is encountered. 1162 */ 1163 int ovl_check_d_type_supported(const struct path *realpath) 1164 { 1165 int err; 1166 struct ovl_readdir_data rdd = { 1167 .ctx.actor = ovl_check_d_type, 1168 .ctx.count = INT_MAX, 1169 .d_type_supported = false, 1170 }; 1171 1172 err = ovl_dir_read(realpath, &rdd); 1173 if (err) 1174 return err; 1175 1176 return rdd.d_type_supported; 1177 } 1178 1179 #define OVL_INCOMPATDIR_NAME "incompat" 1180 1181 static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path, 1182 int level) 1183 { 1184 int err; 1185 LIST_HEAD(list); 1186 struct ovl_cache_entry *p; 1187 struct ovl_readdir_data rdd = { 1188 .ctx.actor = ovl_fill_plain, 1189 .ctx.count = INT_MAX, 1190 .list = &list, 1191 }; 1192 bool incompat = false; 1193 1194 /* 1195 * The "work/incompat" directory is treated specially - if it is not 1196 * empty, instead of printing a generic error and mounting read-only, 1197 * we will error about incompat features and fail the mount. 1198 * 1199 * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name 1200 * starts with '#'. 1201 */ 1202 if (level == 2 && 1203 !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME)) 1204 incompat = true; 1205 1206 err = ovl_dir_read(path, &rdd); 1207 if (err) 1208 goto out; 1209 1210 list_for_each_entry(p, &list, l_node) { 1211 struct dentry *dentry; 1212 1213 if (p->name[0] == '.') { 1214 if (p->len == 1) 1215 continue; 1216 if (p->len == 2 && p->name[1] == '.') 1217 continue; 1218 } else if (incompat) { 1219 pr_err("overlay with incompat feature '%s' cannot be mounted\n", 1220 p->name); 1221 err = -EINVAL; 1222 break; 1223 } 1224 dentry = ovl_lookup_upper_unlocked(ofs, p->name, path->dentry, p->len); 1225 if (IS_ERR(dentry)) 1226 continue; 1227 if (dentry->d_inode) 1228 err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt, 1229 dentry, level); 1230 dput(dentry); 1231 if (err) 1232 break; 1233 } 1234 out: 1235 ovl_cache_free(&list); 1236 return err; 1237 } 1238 1239 int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent, 1240 struct vfsmount *mnt, struct dentry *dentry, int level) 1241 { 1242 int err; 1243 1244 if (!d_is_dir(dentry) || level > 1) 1245 return ovl_cleanup(ofs, parent, dentry); 1246 1247 dentry = start_removing_dentry(parent, dentry); 1248 if (IS_ERR(dentry)) 1249 return PTR_ERR(dentry); 1250 err = ovl_do_rmdir(ofs, parent->d_inode, dentry); 1251 end_removing(dentry); 1252 if (err) { 1253 struct path path = { .mnt = mnt, .dentry = dentry }; 1254 1255 err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1); 1256 if (!err) 1257 err = ovl_cleanup(ofs, parent, dentry); 1258 } 1259 1260 return err; 1261 } 1262 1263 int ovl_indexdir_cleanup(struct ovl_fs *ofs) 1264 { 1265 int err; 1266 struct dentry *indexdir = ofs->workdir; 1267 struct dentry *index = NULL; 1268 struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir }; 1269 LIST_HEAD(list); 1270 struct ovl_cache_entry *p; 1271 struct ovl_readdir_data rdd = { 1272 .ctx.actor = ovl_fill_plain, 1273 .ctx.count = INT_MAX, 1274 .list = &list, 1275 }; 1276 1277 err = ovl_dir_read(&path, &rdd); 1278 if (err) 1279 goto out; 1280 1281 list_for_each_entry(p, &list, l_node) { 1282 if (p->name[0] == '.') { 1283 if (p->len == 1) 1284 continue; 1285 if (p->len == 2 && p->name[1] == '.') 1286 continue; 1287 } 1288 index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, p->len); 1289 if (IS_ERR(index)) { 1290 err = PTR_ERR(index); 1291 index = NULL; 1292 break; 1293 } 1294 /* Cleanup leftover from index create/cleanup attempt */ 1295 if (index->d_name.name[0] == '#') { 1296 err = ovl_workdir_cleanup(ofs, indexdir, path.mnt, index, 1); 1297 if (err) 1298 break; 1299 goto next; 1300 } 1301 err = ovl_verify_index(ofs, index); 1302 if (!err) { 1303 goto next; 1304 } else if (err == -ESTALE) { 1305 /* Cleanup stale index entries */ 1306 err = ovl_cleanup(ofs, indexdir, index); 1307 } else if (err != -ENOENT) { 1308 /* 1309 * Abort mount to avoid corrupting the index if 1310 * an incompatible index entry was found or on out 1311 * of memory. 1312 */ 1313 break; 1314 } else if (ofs->config.nfs_export) { 1315 /* 1316 * Whiteout orphan index to block future open by 1317 * handle after overlay nlink dropped to zero. 1318 */ 1319 err = ovl_cleanup_and_whiteout(ofs, indexdir, index); 1320 } else { 1321 /* Cleanup orphan index entries */ 1322 err = ovl_cleanup(ofs, indexdir, index); 1323 } 1324 1325 if (err) 1326 break; 1327 1328 next: 1329 dput(index); 1330 index = NULL; 1331 } 1332 dput(index); 1333 out: 1334 ovl_cache_free(&list); 1335 if (err) 1336 pr_err("failed index dir cleanup (%i)\n", err); 1337 return err; 1338 } 1339