1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/slab.h> 9 #include <linux/namei.h> 10 #include <linux/file.h> 11 #include <linux/xattr.h> 12 #include <linux/rbtree.h> 13 #include <linux/security.h> 14 #include <linux/cred.h> 15 #include <linux/ratelimit.h> 16 #include <linux/overflow.h> 17 #include "overlayfs.h" 18 19 struct ovl_cache_entry { 20 unsigned int len; 21 unsigned int type; 22 u64 real_ino; 23 u64 ino; 24 struct list_head l_node; 25 struct rb_node node; 26 struct ovl_cache_entry *next_maybe_whiteout; 27 bool is_upper; 28 bool is_whiteout; 29 bool check_xwhiteout; 30 const char *c_name; 31 int c_len; 32 char name[]; 33 }; 34 35 struct ovl_dir_cache { 36 long refcount; 37 u64 version; 38 struct list_head entries; 39 struct rb_root root; 40 }; 41 42 struct ovl_readdir_data { 43 struct dir_context ctx; 44 struct dentry *dentry; 45 bool is_lowest; 46 struct rb_root *root; 47 struct list_head *list; 48 struct list_head middle; 49 struct ovl_cache_entry *first_maybe_whiteout; 50 struct unicode_map *map; 51 int count; 52 int err; 53 bool is_upper; 54 bool d_type_supported; 55 bool in_xwhiteouts_dir; 56 }; 57 58 struct ovl_dir_file { 59 bool is_real; 60 bool is_upper; 61 struct ovl_dir_cache *cache; 62 struct list_head *cursor; 63 struct file *realfile; 64 struct file *upperfile; 65 }; 66 67 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) 68 { 69 return rb_entry(n, struct ovl_cache_entry, node); 70 } 71 72 static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len, 73 char **dst) 74 { 75 const struct qstr qstr = { .name = str, .len = len }; 76 char *cf_name; 77 int cf_len; 78 79 if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || is_dot_dotdot(str, len)) 80 return 0; 81 82 cf_name = kmalloc(NAME_MAX, GFP_KERNEL); 83 if (!cf_name) { 84 rdd->err = -ENOMEM; 85 return -ENOMEM; 86 } 87 88 cf_len = utf8_casefold(rdd->map, &qstr, cf_name, NAME_MAX); 89 if (cf_len > 0) 90 *dst = cf_name; 91 else 92 kfree(cf_name); 93 94 return cf_len; 95 } 96 97 static bool ovl_cache_entry_find_link(const char *name, int len, 98 struct rb_node ***link, 99 struct rb_node **parent) 100 { 101 bool found = false; 102 struct rb_node **newp = *link; 103 104 while (!found && *newp) { 105 int cmp; 106 struct ovl_cache_entry *tmp; 107 108 *parent = *newp; 109 tmp = ovl_cache_entry_from_node(*newp); 110 cmp = strncmp(name, tmp->c_name, len); 111 if (cmp > 0) 112 newp = &tmp->node.rb_right; 113 else if (cmp < 0 || len < tmp->c_len) 114 newp = &tmp->node.rb_left; 115 else 116 found = true; 117 } 118 *link = newp; 119 120 return found; 121 } 122 123 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, 124 const char *name, int len) 125 { 126 struct rb_node *node = root->rb_node; 127 int cmp; 128 129 while (node) { 130 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); 131 132 cmp = strncmp(name, p->c_name, len); 133 if (cmp > 0) 134 node = p->node.rb_right; 135 else if (cmp < 0 || len < p->c_len) 136 node = p->node.rb_left; 137 else 138 return p; 139 } 140 141 return NULL; 142 } 143 144 static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd, 145 struct ovl_cache_entry *p) 146 { 147 /* Don't care if not doing ovl_iter() */ 148 if (!rdd->dentry) 149 return false; 150 151 /* Always recalc d_ino when remapping lower inode numbers */ 152 if (ovl_xino_bits(OVL_FS(rdd->dentry->d_sb))) 153 return true; 154 155 /* Always recalc d_ino for parent */ 156 if (strcmp(p->name, "..") == 0) 157 return true; 158 159 /* If this is lower, then native d_ino will do */ 160 if (!rdd->is_upper) 161 return false; 162 163 /* 164 * Recalc d_ino for '.' and for all entries if dir is impure (contains 165 * copied up entries) 166 */ 167 if ((p->name[0] == '.' && p->len == 1) || 168 ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry))) 169 return true; 170 171 return false; 172 } 173 174 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, 175 const char *name, int len, 176 const char *c_name, int c_len, 177 u64 ino, unsigned int d_type) 178 { 179 struct ovl_cache_entry *p; 180 181 p = kmalloc(struct_size(p, name, len + 1), GFP_KERNEL); 182 if (!p) 183 return NULL; 184 185 memcpy(p->name, name, len); 186 p->name[len] = '\0'; 187 p->len = len; 188 p->type = d_type; 189 p->real_ino = ino; 190 p->ino = ino; 191 /* Defer setting d_ino for upper entry to ovl_iterate() */ 192 if (ovl_calc_d_ino(rdd, p)) 193 p->ino = 0; 194 p->is_upper = rdd->is_upper; 195 p->is_whiteout = false; 196 /* Defer check for overlay.whiteout to ovl_iterate() */ 197 p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG; 198 199 if (c_name && c_name != name) { 200 p->c_name = c_name; 201 p->c_len = c_len; 202 } else { 203 p->c_name = p->name; 204 p->c_len = len; 205 } 206 207 if (d_type == DT_CHR) { 208 p->next_maybe_whiteout = rdd->first_maybe_whiteout; 209 rdd->first_maybe_whiteout = p; 210 } 211 return p; 212 } 213 214 /* Return 0 for found, 1 for added, <0 for error */ 215 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, 216 const char *name, int len, 217 const char *c_name, int c_len, 218 u64 ino, 219 unsigned int d_type) 220 { 221 struct rb_node **newp = &rdd->root->rb_node; 222 struct rb_node *parent = NULL; 223 struct ovl_cache_entry *p; 224 225 if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent)) 226 return 0; 227 228 p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type); 229 if (p == NULL) { 230 rdd->err = -ENOMEM; 231 return -ENOMEM; 232 } 233 234 list_add_tail(&p->l_node, rdd->list); 235 rb_link_node(&p->node, parent, newp); 236 rb_insert_color(&p->node, rdd->root); 237 238 return 1; 239 } 240 241 /* Return 0 for found, 1 for added, <0 for error */ 242 static int ovl_fill_lowest(struct ovl_readdir_data *rdd, 243 const char *name, int namelen, 244 const char *c_name, int c_len, 245 loff_t offset, u64 ino, unsigned int d_type) 246 { 247 struct ovl_cache_entry *p; 248 249 p = ovl_cache_entry_find(rdd->root, c_name, c_len); 250 if (p) { 251 list_move_tail(&p->l_node, &rdd->middle); 252 return 0; 253 } else { 254 p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len, 255 ino, d_type); 256 if (p == NULL) 257 rdd->err = -ENOMEM; 258 else 259 list_add_tail(&p->l_node, &rdd->middle); 260 } 261 262 return rdd->err ?: 1; 263 } 264 265 static void ovl_cache_entry_free(struct ovl_cache_entry *p) 266 { 267 if (p->c_name != p->name) 268 kfree(p->c_name); 269 kfree(p); 270 } 271 272 void ovl_cache_free(struct list_head *list) 273 { 274 struct ovl_cache_entry *p; 275 struct ovl_cache_entry *n; 276 277 list_for_each_entry_safe(p, n, list, l_node) 278 ovl_cache_entry_free(p); 279 280 INIT_LIST_HEAD(list); 281 } 282 283 void ovl_dir_cache_free(struct inode *inode) 284 { 285 struct ovl_dir_cache *cache = ovl_dir_cache(inode); 286 287 if (cache) { 288 ovl_cache_free(&cache->entries); 289 kfree(cache); 290 } 291 } 292 293 static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode) 294 { 295 struct ovl_dir_cache *cache = od->cache; 296 297 WARN_ON(cache->refcount <= 0); 298 cache->refcount--; 299 if (!cache->refcount) { 300 if (ovl_dir_cache(inode) == cache) 301 ovl_set_dir_cache(inode, NULL); 302 303 ovl_cache_free(&cache->entries); 304 kfree(cache); 305 } 306 } 307 308 static bool ovl_fill_merge(struct dir_context *ctx, const char *name, 309 int namelen, loff_t offset, u64 ino, 310 unsigned int d_type) 311 { 312 struct ovl_readdir_data *rdd = 313 container_of(ctx, struct ovl_readdir_data, ctx); 314 struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb); 315 const char *c_name = NULL; 316 char *cf_name = NULL; 317 int c_len = 0, ret; 318 319 if (ofs->casefold) 320 c_len = ovl_casefold(rdd, name, namelen, &cf_name); 321 322 if (rdd->err) 323 return false; 324 325 if (c_len <= 0) { 326 c_name = name; 327 c_len = namelen; 328 } else { 329 c_name = cf_name; 330 } 331 332 rdd->count++; 333 if (!rdd->is_lowest) 334 ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type); 335 else 336 ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type); 337 338 /* 339 * If ret == 1, that means that c_name is being used as part of struct 340 * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise, 341 * c_name was found in the rb-tree so we can free it here. 342 */ 343 if (ret != 1 && c_name != name) 344 kfree(c_name); 345 346 return ret >= 0; 347 } 348 349 static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd) 350 { 351 int err = 0; 352 struct dentry *dentry, *dir = path->dentry; 353 const struct cred *old_cred; 354 355 old_cred = ovl_override_creds(rdd->dentry->d_sb); 356 357 while (rdd->first_maybe_whiteout) { 358 struct ovl_cache_entry *p = 359 rdd->first_maybe_whiteout; 360 rdd->first_maybe_whiteout = p->next_maybe_whiteout; 361 dentry = lookup_one_positive_killable(mnt_idmap(path->mnt), 362 &QSTR_LEN(p->name, p->len), 363 dir); 364 if (!IS_ERR(dentry)) { 365 p->is_whiteout = ovl_is_whiteout(dentry); 366 dput(dentry); 367 } else if (PTR_ERR(dentry) == -EINTR) { 368 err = -EINTR; 369 break; 370 } 371 } 372 ovl_revert_creds(old_cred); 373 374 return err; 375 } 376 377 static inline int ovl_dir_read(const struct path *realpath, 378 struct ovl_readdir_data *rdd) 379 { 380 struct file *realfile; 381 int err; 382 383 realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE); 384 if (IS_ERR(realfile)) 385 return PTR_ERR(realfile); 386 387 rdd->first_maybe_whiteout = NULL; 388 rdd->ctx.pos = 0; 389 do { 390 rdd->count = 0; 391 rdd->err = 0; 392 err = iterate_dir(realfile, &rdd->ctx); 393 if (err >= 0) 394 err = rdd->err; 395 } while (!err && rdd->count); 396 397 if (!err && rdd->first_maybe_whiteout && rdd->dentry) 398 err = ovl_check_whiteouts(realpath, rdd); 399 400 fput(realfile); 401 402 return err; 403 } 404 405 static void ovl_dir_reset(struct file *file) 406 { 407 struct ovl_dir_file *od = file->private_data; 408 struct ovl_dir_cache *cache = od->cache; 409 struct inode *inode = file_inode(file); 410 bool is_real; 411 412 if (cache && ovl_inode_version_get(inode) != cache->version) { 413 ovl_cache_put(od, inode); 414 od->cache = NULL; 415 od->cursor = NULL; 416 } 417 is_real = ovl_dir_is_real(inode); 418 if (od->is_real != is_real) { 419 /* is_real can only become false when dir is copied up */ 420 if (WARN_ON(is_real)) 421 return; 422 od->is_real = false; 423 } 424 } 425 426 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list, 427 struct rb_root *root) 428 { 429 int err; 430 struct path realpath; 431 struct ovl_readdir_data rdd = { 432 .ctx.actor = ovl_fill_merge, 433 .ctx.count = INT_MAX, 434 .dentry = dentry, 435 .list = list, 436 .root = root, 437 .is_lowest = false, 438 .map = NULL, 439 }; 440 int idx, next; 441 const struct ovl_layer *layer; 442 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 443 444 for (idx = 0; idx != -1; idx = next) { 445 next = ovl_path_next(idx, dentry, &realpath, &layer); 446 447 if (ofs->casefold) 448 rdd.map = sb_encoding(realpath.dentry->d_sb); 449 450 rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry; 451 rdd.in_xwhiteouts_dir = layer->has_xwhiteouts && 452 ovl_dentry_has_xwhiteouts(dentry); 453 454 if (next != -1) { 455 err = ovl_dir_read(&realpath, &rdd); 456 if (err) 457 break; 458 } else { 459 /* 460 * Insert lowest layer entries before upper ones, this 461 * allows offsets to be reasonably constant 462 */ 463 list_add(&rdd.middle, rdd.list); 464 rdd.is_lowest = true; 465 err = ovl_dir_read(&realpath, &rdd); 466 list_del(&rdd.middle); 467 } 468 } 469 return err; 470 } 471 472 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) 473 { 474 struct list_head *p; 475 loff_t off = 0; 476 477 list_for_each(p, &od->cache->entries) { 478 if (off >= pos) 479 break; 480 off++; 481 } 482 /* Cursor is safe since the cache is stable */ 483 od->cursor = p; 484 } 485 486 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) 487 { 488 int res; 489 struct ovl_dir_cache *cache; 490 struct inode *inode = d_inode(dentry); 491 492 cache = ovl_dir_cache(inode); 493 if (cache && ovl_inode_version_get(inode) == cache->version) { 494 WARN_ON(!cache->refcount); 495 cache->refcount++; 496 return cache; 497 } 498 ovl_set_dir_cache(d_inode(dentry), NULL); 499 500 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 501 if (!cache) 502 return ERR_PTR(-ENOMEM); 503 504 cache->refcount = 1; 505 INIT_LIST_HEAD(&cache->entries); 506 cache->root = RB_ROOT; 507 508 res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root); 509 if (res) { 510 ovl_cache_free(&cache->entries); 511 kfree(cache); 512 return ERR_PTR(res); 513 } 514 515 cache->version = ovl_inode_version_get(inode); 516 ovl_set_dir_cache(inode, cache); 517 518 return cache; 519 } 520 521 /* Map inode number to lower fs unique range */ 522 static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, 523 const char *name, int namelen, bool warn) 524 { 525 unsigned int xinoshift = 64 - xinobits; 526 527 if (unlikely(ino >> xinoshift)) { 528 if (warn) { 529 pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", 530 namelen, name, ino, xinobits); 531 } 532 return ino; 533 } 534 535 /* 536 * The lowest xinobit is reserved for mapping the non-peresistent inode 537 * numbers range, but this range is only exposed via st_ino, not here. 538 */ 539 return ino | ((u64)fsid) << (xinoshift + 1); 540 } 541 542 /* 543 * Set d_ino for upper entries if needed. Non-upper entries should always report 544 * the uppermost real inode ino and should not call this function. 545 * 546 * When not all layer are on same fs, report real ino also for upper. 547 * 548 * When all layers are on the same fs, and upper has a reference to 549 * copy up origin, call vfs_getattr() on the overlay entry to make 550 * sure that d_ino will be consistent with st_ino from stat(2). 551 * 552 * Also checks the overlay.whiteout xattr by doing a full lookup which will return 553 * negative in this case. 554 */ 555 static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino) 556 557 { 558 struct dentry *dir = path->dentry; 559 struct ovl_fs *ofs = OVL_FS(dir->d_sb); 560 struct dentry *this = NULL; 561 enum ovl_path_type type; 562 u64 ino = p->real_ino; 563 int xinobits = ovl_xino_bits(ofs); 564 int err = 0; 565 566 if (!ovl_same_dev(ofs) && !p->check_xwhiteout) 567 goto out; 568 569 if (p->name[0] == '.') { 570 if (p->len == 1) { 571 this = dget(dir); 572 goto get; 573 } 574 if (p->len == 2 && p->name[1] == '.') { 575 /* we shall not be moved */ 576 this = dget(dir->d_parent); 577 goto get; 578 } 579 } 580 /* This checks also for xwhiteouts */ 581 this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir); 582 if (IS_ERR_OR_NULL(this) || !this->d_inode) { 583 /* Mark a stale entry */ 584 p->is_whiteout = true; 585 if (IS_ERR(this)) { 586 err = PTR_ERR(this); 587 this = NULL; 588 goto fail; 589 } 590 goto out; 591 } 592 593 get: 594 if (!ovl_same_dev(ofs) || !update_ino) 595 goto out; 596 597 type = ovl_path_type(this); 598 if (OVL_TYPE_ORIGIN(type)) { 599 struct kstat stat; 600 struct path statpath = *path; 601 602 statpath.dentry = this; 603 err = vfs_getattr(&statpath, &stat, STATX_INO, 0); 604 if (err) 605 goto fail; 606 607 /* 608 * Directory inode is always on overlay st_dev. 609 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case 610 * of xino bits overflow. 611 */ 612 WARN_ON_ONCE(S_ISDIR(stat.mode) && 613 dir->d_sb->s_dev != stat.dev); 614 ino = stat.ino; 615 } else if (xinobits && !OVL_TYPE_UPPER(type)) { 616 ino = ovl_remap_lower_ino(ino, xinobits, 617 ovl_layer_lower(this)->fsid, 618 p->name, p->len, 619 ovl_xino_warn(ofs)); 620 } 621 622 out: 623 p->ino = ino; 624 dput(this); 625 return err; 626 627 fail: 628 pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n", 629 p->name, err); 630 goto out; 631 } 632 633 static bool ovl_fill_plain(struct dir_context *ctx, const char *name, 634 int namelen, loff_t offset, u64 ino, 635 unsigned int d_type) 636 { 637 struct ovl_cache_entry *p; 638 struct ovl_readdir_data *rdd = 639 container_of(ctx, struct ovl_readdir_data, ctx); 640 641 rdd->count++; 642 p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type); 643 if (p == NULL) { 644 rdd->err = -ENOMEM; 645 return false; 646 } 647 list_add_tail(&p->l_node, rdd->list); 648 649 return true; 650 } 651 652 static int ovl_dir_read_impure(const struct path *path, struct list_head *list, 653 struct rb_root *root) 654 { 655 int err; 656 struct path realpath; 657 struct ovl_cache_entry *p, *n; 658 struct ovl_readdir_data rdd = { 659 .ctx.actor = ovl_fill_plain, 660 .ctx.count = INT_MAX, 661 .list = list, 662 .root = root, 663 }; 664 665 INIT_LIST_HEAD(list); 666 *root = RB_ROOT; 667 ovl_path_upper(path->dentry, &realpath); 668 669 err = ovl_dir_read(&realpath, &rdd); 670 if (err) 671 return err; 672 673 list_for_each_entry_safe(p, n, list, l_node) { 674 if (strcmp(p->name, ".") != 0 && 675 strcmp(p->name, "..") != 0) { 676 err = ovl_cache_update(path, p, true); 677 if (err) 678 return err; 679 } 680 if (p->ino == p->real_ino) { 681 list_del(&p->l_node); 682 ovl_cache_entry_free(p); 683 } else { 684 struct rb_node **newp = &root->rb_node; 685 struct rb_node *parent = NULL; 686 687 if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len, 688 &newp, &parent))) 689 return -EIO; 690 691 rb_link_node(&p->node, parent, newp); 692 rb_insert_color(&p->node, root); 693 } 694 } 695 return 0; 696 } 697 698 static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path) 699 { 700 int res; 701 struct dentry *dentry = path->dentry; 702 struct inode *inode = d_inode(dentry); 703 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 704 struct ovl_dir_cache *cache; 705 706 cache = ovl_dir_cache(inode); 707 if (cache && ovl_inode_version_get(inode) == cache->version) 708 return cache; 709 710 /* Impure cache is not refcounted, free it here */ 711 ovl_dir_cache_free(inode); 712 ovl_set_dir_cache(inode, NULL); 713 714 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 715 if (!cache) 716 return ERR_PTR(-ENOMEM); 717 718 res = ovl_dir_read_impure(path, &cache->entries, &cache->root); 719 if (res) { 720 ovl_cache_free(&cache->entries); 721 kfree(cache); 722 return ERR_PTR(res); 723 } 724 if (list_empty(&cache->entries)) { 725 /* 726 * A good opportunity to get rid of an unneeded "impure" flag. 727 * Removing the "impure" xattr is best effort. 728 */ 729 if (!ovl_want_write(dentry)) { 730 ovl_removexattr(ofs, ovl_dentry_upper(dentry), 731 OVL_XATTR_IMPURE); 732 ovl_drop_write(dentry); 733 } 734 ovl_clear_flag(OVL_IMPURE, inode); 735 kfree(cache); 736 return NULL; 737 } 738 739 cache->version = ovl_inode_version_get(inode); 740 ovl_set_dir_cache(inode, cache); 741 742 return cache; 743 } 744 745 struct ovl_readdir_translate { 746 struct dir_context *orig_ctx; 747 struct ovl_dir_cache *cache; 748 struct dir_context ctx; 749 u64 parent_ino; 750 int fsid; 751 int xinobits; 752 bool xinowarn; 753 }; 754 755 static bool ovl_fill_real(struct dir_context *ctx, const char *name, 756 int namelen, loff_t offset, u64 ino, 757 unsigned int d_type) 758 { 759 struct ovl_readdir_translate *rdt = 760 container_of(ctx, struct ovl_readdir_translate, ctx); 761 struct dir_context *orig_ctx = rdt->orig_ctx; 762 bool res; 763 764 if (rdt->parent_ino && strcmp(name, "..") == 0) { 765 ino = rdt->parent_ino; 766 } else if (rdt->cache) { 767 struct ovl_cache_entry *p; 768 769 p = ovl_cache_entry_find(&rdt->cache->root, name, namelen); 770 if (p) 771 ino = p->ino; 772 } else if (rdt->xinobits) { 773 ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid, 774 name, namelen, rdt->xinowarn); 775 } 776 777 res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); 778 ctx->count = orig_ctx->count; 779 780 return res; 781 } 782 783 static bool ovl_is_impure_dir(struct file *file) 784 { 785 struct ovl_dir_file *od = file->private_data; 786 struct inode *dir = file_inode(file); 787 788 /* 789 * Only upper dir can be impure, but if we are in the middle of 790 * iterating a lower real dir, dir could be copied up and marked 791 * impure. We only want the impure cache if we started iterating 792 * a real upper dir to begin with. 793 */ 794 return od->is_upper && ovl_test_flag(OVL_IMPURE, dir); 795 796 } 797 798 static int ovl_iterate_real(struct file *file, struct dir_context *ctx) 799 { 800 int err; 801 struct ovl_dir_file *od = file->private_data; 802 struct dentry *dir = file->f_path.dentry; 803 struct ovl_fs *ofs = OVL_FS(dir->d_sb); 804 const struct ovl_layer *lower_layer = ovl_layer_lower(dir); 805 struct ovl_readdir_translate rdt = { 806 .ctx.actor = ovl_fill_real, 807 .ctx.count = ctx->count, 808 .orig_ctx = ctx, 809 .xinobits = ovl_xino_bits(ofs), 810 .xinowarn = ovl_xino_warn(ofs), 811 }; 812 813 if (rdt.xinobits && lower_layer) 814 rdt.fsid = lower_layer->fsid; 815 816 if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) { 817 struct kstat stat; 818 struct path statpath = file->f_path; 819 820 statpath.dentry = dir->d_parent; 821 err = vfs_getattr(&statpath, &stat, STATX_INO, 0); 822 if (err) 823 return err; 824 825 WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); 826 rdt.parent_ino = stat.ino; 827 } 828 829 if (ovl_is_impure_dir(file)) { 830 rdt.cache = ovl_cache_get_impure(&file->f_path); 831 if (IS_ERR(rdt.cache)) 832 return PTR_ERR(rdt.cache); 833 } 834 835 err = iterate_dir(od->realfile, &rdt.ctx); 836 ctx->pos = rdt.ctx.pos; 837 838 return err; 839 } 840 841 842 static int ovl_iterate(struct file *file, struct dir_context *ctx) 843 { 844 struct ovl_dir_file *od = file->private_data; 845 struct dentry *dentry = file->f_path.dentry; 846 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 847 struct ovl_cache_entry *p; 848 const struct cred *old_cred; 849 int err; 850 851 old_cred = ovl_override_creds(dentry->d_sb); 852 if (!ctx->pos) 853 ovl_dir_reset(file); 854 855 if (od->is_real) { 856 /* 857 * If parent is merge, then need to adjust d_ino for '..', if 858 * dir is impure then need to adjust d_ino for copied up 859 * entries. 860 */ 861 if (ovl_xino_bits(ofs) || 862 (ovl_same_fs(ofs) && 863 (ovl_is_impure_dir(file) || 864 OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { 865 err = ovl_iterate_real(file, ctx); 866 } else { 867 err = iterate_dir(od->realfile, ctx); 868 } 869 goto out; 870 } 871 872 if (!od->cache) { 873 struct ovl_dir_cache *cache; 874 875 cache = ovl_cache_get(dentry); 876 err = PTR_ERR(cache); 877 if (IS_ERR(cache)) 878 goto out; 879 880 od->cache = cache; 881 ovl_seek_cursor(od, ctx->pos); 882 } 883 884 while (od->cursor != &od->cache->entries) { 885 p = list_entry(od->cursor, struct ovl_cache_entry, l_node); 886 if (!p->is_whiteout) { 887 if (!p->ino || p->check_xwhiteout) { 888 err = ovl_cache_update(&file->f_path, p, !p->ino); 889 if (err) 890 goto out; 891 } 892 } 893 /* ovl_cache_update() sets is_whiteout on stale entry */ 894 if (!p->is_whiteout) { 895 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) 896 break; 897 } 898 od->cursor = p->l_node.next; 899 ctx->pos++; 900 } 901 err = 0; 902 out: 903 ovl_revert_creds(old_cred); 904 return err; 905 } 906 907 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) 908 { 909 loff_t res; 910 struct ovl_dir_file *od = file->private_data; 911 912 inode_lock(file_inode(file)); 913 if (!file->f_pos) 914 ovl_dir_reset(file); 915 916 if (od->is_real) { 917 res = vfs_llseek(od->realfile, offset, origin); 918 file->f_pos = od->realfile->f_pos; 919 } else { 920 res = -EINVAL; 921 922 switch (origin) { 923 case SEEK_CUR: 924 offset += file->f_pos; 925 break; 926 case SEEK_SET: 927 break; 928 default: 929 goto out_unlock; 930 } 931 if (offset < 0) 932 goto out_unlock; 933 934 if (offset != file->f_pos) { 935 file->f_pos = offset; 936 if (od->cache) 937 ovl_seek_cursor(od, offset); 938 } 939 res = offset; 940 } 941 out_unlock: 942 inode_unlock(file_inode(file)); 943 944 return res; 945 } 946 947 static struct file *ovl_dir_open_realfile(const struct file *file, 948 const struct path *realpath) 949 { 950 struct file *res; 951 const struct cred *old_cred; 952 953 old_cred = ovl_override_creds(file_inode(file)->i_sb); 954 res = ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE)); 955 ovl_revert_creds(old_cred); 956 957 return res; 958 } 959 960 /* 961 * Like ovl_real_fdget(), returns upperfile if dir was copied up since open. 962 * Unlike ovl_real_fdget(), this caches upperfile in file->private_data. 963 * 964 * TODO: use same abstract type for file->private_data of dir and file so 965 * upperfile could also be cached for files as well. 966 */ 967 struct file *ovl_dir_real_file(const struct file *file, bool want_upper) 968 { 969 970 struct ovl_dir_file *od = file->private_data; 971 struct dentry *dentry = file->f_path.dentry; 972 struct file *old, *realfile = od->realfile; 973 974 if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) 975 return want_upper ? NULL : realfile; 976 977 /* 978 * Need to check if we started out being a lower dir, but got copied up 979 */ 980 if (!od->is_upper) { 981 realfile = READ_ONCE(od->upperfile); 982 if (!realfile) { 983 struct path upperpath; 984 985 ovl_path_upper(dentry, &upperpath); 986 realfile = ovl_dir_open_realfile(file, &upperpath); 987 if (IS_ERR(realfile)) 988 return realfile; 989 990 old = cmpxchg_release(&od->upperfile, NULL, realfile); 991 if (old) { 992 fput(realfile); 993 realfile = old; 994 } 995 } 996 } 997 998 return realfile; 999 } 1000 1001 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, 1002 int datasync) 1003 { 1004 struct file *realfile; 1005 int err; 1006 1007 err = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); 1008 if (err <= 0) 1009 return err; 1010 1011 realfile = ovl_dir_real_file(file, true); 1012 err = PTR_ERR_OR_ZERO(realfile); 1013 1014 /* Nothing to sync for lower */ 1015 if (!realfile || err) 1016 return err; 1017 1018 return vfs_fsync_range(realfile, start, end, datasync); 1019 } 1020 1021 static int ovl_dir_release(struct inode *inode, struct file *file) 1022 { 1023 struct ovl_dir_file *od = file->private_data; 1024 1025 if (od->cache) { 1026 inode_lock(inode); 1027 ovl_cache_put(od, inode); 1028 inode_unlock(inode); 1029 } 1030 fput(od->realfile); 1031 if (od->upperfile) 1032 fput(od->upperfile); 1033 kfree(od); 1034 1035 return 0; 1036 } 1037 1038 static int ovl_dir_open(struct inode *inode, struct file *file) 1039 { 1040 struct path realpath; 1041 struct file *realfile; 1042 struct ovl_dir_file *od; 1043 enum ovl_path_type type; 1044 1045 od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); 1046 if (!od) 1047 return -ENOMEM; 1048 1049 type = ovl_path_real(file->f_path.dentry, &realpath); 1050 realfile = ovl_dir_open_realfile(file, &realpath); 1051 if (IS_ERR(realfile)) { 1052 kfree(od); 1053 return PTR_ERR(realfile); 1054 } 1055 od->realfile = realfile; 1056 od->is_real = ovl_dir_is_real(inode); 1057 od->is_upper = OVL_TYPE_UPPER(type); 1058 file->private_data = od; 1059 1060 return 0; 1061 } 1062 1063 WRAP_DIR_ITER(ovl_iterate) // FIXME! 1064 const struct file_operations ovl_dir_operations = { 1065 .read = generic_read_dir, 1066 .open = ovl_dir_open, 1067 .iterate_shared = shared_ovl_iterate, 1068 .llseek = ovl_dir_llseek, 1069 .fsync = ovl_dir_fsync, 1070 .release = ovl_dir_release, 1071 }; 1072 1073 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) 1074 { 1075 int err; 1076 struct ovl_cache_entry *p, *n; 1077 struct rb_root root = RB_ROOT; 1078 const struct cred *old_cred; 1079 1080 old_cred = ovl_override_creds(dentry->d_sb); 1081 err = ovl_dir_read_merged(dentry, list, &root); 1082 ovl_revert_creds(old_cred); 1083 if (err) 1084 return err; 1085 1086 err = 0; 1087 1088 list_for_each_entry_safe(p, n, list, l_node) { 1089 /* 1090 * Select whiteouts in upperdir, they should 1091 * be cleared when deleting this directory. 1092 */ 1093 if (p->is_whiteout) { 1094 if (p->is_upper) 1095 continue; 1096 goto del_entry; 1097 } 1098 1099 if (p->name[0] == '.') { 1100 if (p->len == 1) 1101 goto del_entry; 1102 if (p->len == 2 && p->name[1] == '.') 1103 goto del_entry; 1104 } 1105 err = -ENOTEMPTY; 1106 break; 1107 1108 del_entry: 1109 list_del(&p->l_node); 1110 ovl_cache_entry_free(p); 1111 } 1112 1113 return err; 1114 } 1115 1116 void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, 1117 struct list_head *list) 1118 { 1119 struct ovl_cache_entry *p; 1120 1121 list_for_each_entry(p, list, l_node) { 1122 struct dentry *dentry; 1123 1124 if (WARN_ON(!p->is_whiteout || !p->is_upper)) 1125 continue; 1126 1127 dentry = ovl_lookup_upper_unlocked(ofs, p->name, upper, p->len); 1128 if (IS_ERR(dentry)) { 1129 pr_err("lookup '%s/%.*s' failed (%i)\n", 1130 upper->d_name.name, p->len, p->name, 1131 (int) PTR_ERR(dentry)); 1132 continue; 1133 } 1134 if (dentry->d_inode) 1135 ovl_cleanup(ofs, upper, dentry); 1136 dput(dentry); 1137 } 1138 } 1139 1140 static bool ovl_check_d_type(struct dir_context *ctx, const char *name, 1141 int namelen, loff_t offset, u64 ino, 1142 unsigned int d_type) 1143 { 1144 struct ovl_readdir_data *rdd = 1145 container_of(ctx, struct ovl_readdir_data, ctx); 1146 1147 /* Even if d_type is not supported, DT_DIR is returned for . and .. */ 1148 if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen)) 1149 return true; 1150 1151 if (d_type != DT_UNKNOWN) 1152 rdd->d_type_supported = true; 1153 1154 return true; 1155 } 1156 1157 /* 1158 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values 1159 * if error is encountered. 1160 */ 1161 int ovl_check_d_type_supported(const struct path *realpath) 1162 { 1163 int err; 1164 struct ovl_readdir_data rdd = { 1165 .ctx.actor = ovl_check_d_type, 1166 .ctx.count = INT_MAX, 1167 .d_type_supported = false, 1168 }; 1169 1170 err = ovl_dir_read(realpath, &rdd); 1171 if (err) 1172 return err; 1173 1174 return rdd.d_type_supported; 1175 } 1176 1177 #define OVL_INCOMPATDIR_NAME "incompat" 1178 1179 static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path, 1180 int level) 1181 { 1182 int err; 1183 LIST_HEAD(list); 1184 struct ovl_cache_entry *p; 1185 struct ovl_readdir_data rdd = { 1186 .ctx.actor = ovl_fill_plain, 1187 .ctx.count = INT_MAX, 1188 .list = &list, 1189 }; 1190 bool incompat = false; 1191 1192 /* 1193 * The "work/incompat" directory is treated specially - if it is not 1194 * empty, instead of printing a generic error and mounting read-only, 1195 * we will error about incompat features and fail the mount. 1196 * 1197 * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name 1198 * starts with '#'. 1199 */ 1200 if (level == 2 && 1201 !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME)) 1202 incompat = true; 1203 1204 err = ovl_dir_read(path, &rdd); 1205 if (err) 1206 goto out; 1207 1208 list_for_each_entry(p, &list, l_node) { 1209 struct dentry *dentry; 1210 1211 if (p->name[0] == '.') { 1212 if (p->len == 1) 1213 continue; 1214 if (p->len == 2 && p->name[1] == '.') 1215 continue; 1216 } else if (incompat) { 1217 pr_err("overlay with incompat feature '%s' cannot be mounted\n", 1218 p->name); 1219 err = -EINVAL; 1220 break; 1221 } 1222 dentry = ovl_lookup_upper_unlocked(ofs, p->name, path->dentry, p->len); 1223 if (IS_ERR(dentry)) 1224 continue; 1225 if (dentry->d_inode) 1226 err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt, 1227 dentry, level); 1228 dput(dentry); 1229 if (err) 1230 break; 1231 } 1232 out: 1233 ovl_cache_free(&list); 1234 return err; 1235 } 1236 1237 int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent, 1238 struct vfsmount *mnt, struct dentry *dentry, int level) 1239 { 1240 int err; 1241 1242 if (!d_is_dir(dentry) || level > 1) 1243 return ovl_cleanup(ofs, parent, dentry); 1244 1245 err = ovl_parent_lock(parent, dentry); 1246 if (err) 1247 return err; 1248 err = ovl_do_rmdir(ofs, parent->d_inode, dentry); 1249 ovl_parent_unlock(parent); 1250 if (err) { 1251 struct path path = { .mnt = mnt, .dentry = dentry }; 1252 1253 err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1); 1254 if (!err) 1255 err = ovl_cleanup(ofs, parent, dentry); 1256 } 1257 1258 return err; 1259 } 1260 1261 int ovl_indexdir_cleanup(struct ovl_fs *ofs) 1262 { 1263 int err; 1264 struct dentry *indexdir = ofs->workdir; 1265 struct dentry *index = NULL; 1266 struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir }; 1267 LIST_HEAD(list); 1268 struct ovl_cache_entry *p; 1269 struct ovl_readdir_data rdd = { 1270 .ctx.actor = ovl_fill_plain, 1271 .ctx.count = INT_MAX, 1272 .list = &list, 1273 }; 1274 1275 err = ovl_dir_read(&path, &rdd); 1276 if (err) 1277 goto out; 1278 1279 list_for_each_entry(p, &list, l_node) { 1280 if (p->name[0] == '.') { 1281 if (p->len == 1) 1282 continue; 1283 if (p->len == 2 && p->name[1] == '.') 1284 continue; 1285 } 1286 index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, p->len); 1287 if (IS_ERR(index)) { 1288 err = PTR_ERR(index); 1289 index = NULL; 1290 break; 1291 } 1292 /* Cleanup leftover from index create/cleanup attempt */ 1293 if (index->d_name.name[0] == '#') { 1294 err = ovl_workdir_cleanup(ofs, indexdir, path.mnt, index, 1); 1295 if (err) 1296 break; 1297 goto next; 1298 } 1299 err = ovl_verify_index(ofs, index); 1300 if (!err) { 1301 goto next; 1302 } else if (err == -ESTALE) { 1303 /* Cleanup stale index entries */ 1304 err = ovl_cleanup(ofs, indexdir, index); 1305 } else if (err != -ENOENT) { 1306 /* 1307 * Abort mount to avoid corrupting the index if 1308 * an incompatible index entry was found or on out 1309 * of memory. 1310 */ 1311 break; 1312 } else if (ofs->config.nfs_export) { 1313 /* 1314 * Whiteout orphan index to block future open by 1315 * handle after overlay nlink dropped to zero. 1316 */ 1317 err = ovl_cleanup_and_whiteout(ofs, indexdir, index); 1318 } else { 1319 /* Cleanup orphan index entries */ 1320 err = ovl_cleanup(ofs, indexdir, index); 1321 } 1322 1323 if (err) 1324 break; 1325 1326 next: 1327 dput(index); 1328 index = NULL; 1329 } 1330 dput(index); 1331 out: 1332 ovl_cache_free(&list); 1333 if (err) 1334 pr_err("failed index dir cleanup (%i)\n", err); 1335 return err; 1336 } 1337