1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/slab.h> 12 #include <linux/namei.h> 13 #include <linux/file.h> 14 #include <linux/xattr.h> 15 #include <linux/rbtree.h> 16 #include <linux/security.h> 17 #include <linux/cred.h> 18 #include "overlayfs.h" 19 20 struct ovl_cache_entry { 21 unsigned int len; 22 unsigned int type; 23 u64 ino; 24 struct list_head l_node; 25 struct rb_node node; 26 struct ovl_cache_entry *next_maybe_whiteout; 27 bool is_whiteout; 28 char name[]; 29 }; 30 31 struct ovl_dir_cache { 32 long refcount; 33 u64 version; 34 struct list_head entries; 35 }; 36 37 struct ovl_readdir_data { 38 struct dir_context ctx; 39 bool is_lowest; 40 struct rb_root root; 41 struct list_head *list; 42 struct list_head middle; 43 struct ovl_cache_entry *first_maybe_whiteout; 44 int count; 45 int err; 46 bool d_type_supported; 47 }; 48 49 struct ovl_dir_file { 50 bool is_real; 51 bool is_upper; 52 struct ovl_dir_cache *cache; 53 struct list_head *cursor; 54 struct file *realfile; 55 struct file *upperfile; 56 }; 57 58 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) 59 { 60 return container_of(n, struct ovl_cache_entry, node); 61 } 62 63 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, 64 const char *name, int len) 65 { 66 struct rb_node *node = root->rb_node; 67 int cmp; 68 69 while (node) { 70 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); 71 72 cmp = strncmp(name, p->name, len); 73 if (cmp > 0) 74 node = p->node.rb_right; 75 else if (cmp < 0 || len < p->len) 76 node = p->node.rb_left; 77 else 78 return p; 79 } 80 81 return NULL; 82 } 83 84 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, 85 const char *name, int len, 86 u64 ino, unsigned int d_type) 87 { 88 struct ovl_cache_entry *p; 89 size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); 90 91 p = kmalloc(size, GFP_KERNEL); 92 if (!p) 93 return NULL; 94 95 memcpy(p->name, name, len); 96 p->name[len] = '\0'; 97 p->len = len; 98 p->type = d_type; 99 p->ino = ino; 100 p->is_whiteout = false; 101 102 if (d_type == DT_CHR) { 103 p->next_maybe_whiteout = rdd->first_maybe_whiteout; 104 rdd->first_maybe_whiteout = p; 105 } 106 return p; 107 } 108 109 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, 110 const char *name, int len, u64 ino, 111 unsigned int d_type) 112 { 113 struct rb_node **newp = &rdd->root.rb_node; 114 struct rb_node *parent = NULL; 115 struct ovl_cache_entry *p; 116 117 while (*newp) { 118 int cmp; 119 struct ovl_cache_entry *tmp; 120 121 parent = *newp; 122 tmp = ovl_cache_entry_from_node(*newp); 123 cmp = strncmp(name, tmp->name, len); 124 if (cmp > 0) 125 newp = &tmp->node.rb_right; 126 else if (cmp < 0 || len < tmp->len) 127 newp = &tmp->node.rb_left; 128 else 129 return 0; 130 } 131 132 p = ovl_cache_entry_new(rdd, name, len, ino, d_type); 133 if (p == NULL) 134 return -ENOMEM; 135 136 list_add_tail(&p->l_node, rdd->list); 137 rb_link_node(&p->node, parent, newp); 138 rb_insert_color(&p->node, &rdd->root); 139 140 return 0; 141 } 142 143 static int ovl_fill_lowest(struct ovl_readdir_data *rdd, 144 const char *name, int namelen, 145 loff_t offset, u64 ino, unsigned int d_type) 146 { 147 struct ovl_cache_entry *p; 148 149 p = ovl_cache_entry_find(&rdd->root, name, namelen); 150 if (p) { 151 list_move_tail(&p->l_node, &rdd->middle); 152 } else { 153 p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); 154 if (p == NULL) 155 rdd->err = -ENOMEM; 156 else 157 list_add_tail(&p->l_node, &rdd->middle); 158 } 159 160 return rdd->err; 161 } 162 163 void ovl_cache_free(struct list_head *list) 164 { 165 struct ovl_cache_entry *p; 166 struct ovl_cache_entry *n; 167 168 list_for_each_entry_safe(p, n, list, l_node) 169 kfree(p); 170 171 INIT_LIST_HEAD(list); 172 } 173 174 static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) 175 { 176 struct ovl_dir_cache *cache = od->cache; 177 178 WARN_ON(cache->refcount <= 0); 179 cache->refcount--; 180 if (!cache->refcount) { 181 if (ovl_dir_cache(dentry) == cache) 182 ovl_set_dir_cache(dentry, NULL); 183 184 ovl_cache_free(&cache->entries); 185 kfree(cache); 186 } 187 } 188 189 static int ovl_fill_merge(struct dir_context *ctx, const char *name, 190 int namelen, loff_t offset, u64 ino, 191 unsigned int d_type) 192 { 193 struct ovl_readdir_data *rdd = 194 container_of(ctx, struct ovl_readdir_data, ctx); 195 196 rdd->count++; 197 if (!rdd->is_lowest) 198 return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); 199 else 200 return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type); 201 } 202 203 static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) 204 { 205 int err; 206 struct ovl_cache_entry *p; 207 struct dentry *dentry; 208 const struct cred *old_cred; 209 struct cred *override_cred; 210 211 override_cred = prepare_creds(); 212 if (!override_cred) 213 return -ENOMEM; 214 215 /* 216 * CAP_DAC_OVERRIDE for lookup 217 */ 218 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 219 old_cred = override_creds(override_cred); 220 221 inode_lock(dir->d_inode); 222 err = 0; 223 // XXX: err = mutex_lock_killable(&dir->d_inode->i_mutex); 224 if (!err) { 225 while (rdd->first_maybe_whiteout) { 226 p = rdd->first_maybe_whiteout; 227 rdd->first_maybe_whiteout = p->next_maybe_whiteout; 228 dentry = lookup_one_len(p->name, dir, p->len); 229 if (!IS_ERR(dentry)) { 230 p->is_whiteout = ovl_is_whiteout(dentry); 231 dput(dentry); 232 } 233 } 234 inode_unlock(dir->d_inode); 235 } 236 revert_creds(old_cred); 237 put_cred(override_cred); 238 239 return err; 240 } 241 242 static inline int ovl_dir_read(struct path *realpath, 243 struct ovl_readdir_data *rdd) 244 { 245 struct file *realfile; 246 int err; 247 248 realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); 249 if (IS_ERR(realfile)) 250 return PTR_ERR(realfile); 251 252 rdd->first_maybe_whiteout = NULL; 253 rdd->ctx.pos = 0; 254 do { 255 rdd->count = 0; 256 rdd->err = 0; 257 err = iterate_dir(realfile, &rdd->ctx); 258 if (err >= 0) 259 err = rdd->err; 260 } while (!err && rdd->count); 261 262 if (!err && rdd->first_maybe_whiteout) 263 err = ovl_check_whiteouts(realpath->dentry, rdd); 264 265 fput(realfile); 266 267 return err; 268 } 269 270 static void ovl_dir_reset(struct file *file) 271 { 272 struct ovl_dir_file *od = file->private_data; 273 struct ovl_dir_cache *cache = od->cache; 274 struct dentry *dentry = file->f_path.dentry; 275 enum ovl_path_type type = ovl_path_type(dentry); 276 277 if (cache && ovl_dentry_version_get(dentry) != cache->version) { 278 ovl_cache_put(od, dentry); 279 od->cache = NULL; 280 od->cursor = NULL; 281 } 282 WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type)); 283 if (od->is_real && OVL_TYPE_MERGE(type)) 284 od->is_real = false; 285 } 286 287 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) 288 { 289 int err; 290 struct path realpath; 291 struct ovl_readdir_data rdd = { 292 .ctx.actor = ovl_fill_merge, 293 .list = list, 294 .root = RB_ROOT, 295 .is_lowest = false, 296 }; 297 int idx, next; 298 299 for (idx = 0; idx != -1; idx = next) { 300 next = ovl_path_next(idx, dentry, &realpath); 301 302 if (next != -1) { 303 err = ovl_dir_read(&realpath, &rdd); 304 if (err) 305 break; 306 } else { 307 /* 308 * Insert lowest layer entries before upper ones, this 309 * allows offsets to be reasonably constant 310 */ 311 list_add(&rdd.middle, rdd.list); 312 rdd.is_lowest = true; 313 err = ovl_dir_read(&realpath, &rdd); 314 list_del(&rdd.middle); 315 } 316 } 317 return err; 318 } 319 320 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) 321 { 322 struct list_head *p; 323 loff_t off = 0; 324 325 list_for_each(p, &od->cache->entries) { 326 if (off >= pos) 327 break; 328 off++; 329 } 330 /* Cursor is safe since the cache is stable */ 331 od->cursor = p; 332 } 333 334 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) 335 { 336 int res; 337 struct ovl_dir_cache *cache; 338 339 cache = ovl_dir_cache(dentry); 340 if (cache && ovl_dentry_version_get(dentry) == cache->version) { 341 cache->refcount++; 342 return cache; 343 } 344 ovl_set_dir_cache(dentry, NULL); 345 346 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 347 if (!cache) 348 return ERR_PTR(-ENOMEM); 349 350 cache->refcount = 1; 351 INIT_LIST_HEAD(&cache->entries); 352 353 res = ovl_dir_read_merged(dentry, &cache->entries); 354 if (res) { 355 ovl_cache_free(&cache->entries); 356 kfree(cache); 357 return ERR_PTR(res); 358 } 359 360 cache->version = ovl_dentry_version_get(dentry); 361 ovl_set_dir_cache(dentry, cache); 362 363 return cache; 364 } 365 366 static int ovl_iterate(struct file *file, struct dir_context *ctx) 367 { 368 struct ovl_dir_file *od = file->private_data; 369 struct dentry *dentry = file->f_path.dentry; 370 struct ovl_cache_entry *p; 371 372 if (!ctx->pos) 373 ovl_dir_reset(file); 374 375 if (od->is_real) 376 return iterate_dir(od->realfile, ctx); 377 378 if (!od->cache) { 379 struct ovl_dir_cache *cache; 380 381 cache = ovl_cache_get(dentry); 382 if (IS_ERR(cache)) 383 return PTR_ERR(cache); 384 385 od->cache = cache; 386 ovl_seek_cursor(od, ctx->pos); 387 } 388 389 while (od->cursor != &od->cache->entries) { 390 p = list_entry(od->cursor, struct ovl_cache_entry, l_node); 391 if (!p->is_whiteout) 392 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) 393 break; 394 od->cursor = p->l_node.next; 395 ctx->pos++; 396 } 397 return 0; 398 } 399 400 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) 401 { 402 loff_t res; 403 struct ovl_dir_file *od = file->private_data; 404 405 inode_lock(file_inode(file)); 406 if (!file->f_pos) 407 ovl_dir_reset(file); 408 409 if (od->is_real) { 410 res = vfs_llseek(od->realfile, offset, origin); 411 file->f_pos = od->realfile->f_pos; 412 } else { 413 res = -EINVAL; 414 415 switch (origin) { 416 case SEEK_CUR: 417 offset += file->f_pos; 418 break; 419 case SEEK_SET: 420 break; 421 default: 422 goto out_unlock; 423 } 424 if (offset < 0) 425 goto out_unlock; 426 427 if (offset != file->f_pos) { 428 file->f_pos = offset; 429 if (od->cache) 430 ovl_seek_cursor(od, offset); 431 } 432 res = offset; 433 } 434 out_unlock: 435 inode_unlock(file_inode(file)); 436 437 return res; 438 } 439 440 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, 441 int datasync) 442 { 443 struct ovl_dir_file *od = file->private_data; 444 struct dentry *dentry = file->f_path.dentry; 445 struct file *realfile = od->realfile; 446 447 /* 448 * Need to check if we started out being a lower dir, but got copied up 449 */ 450 if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { 451 struct inode *inode = file_inode(file); 452 453 realfile = lockless_dereference(od->upperfile); 454 if (!realfile) { 455 struct path upperpath; 456 457 ovl_path_upper(dentry, &upperpath); 458 realfile = ovl_path_open(&upperpath, O_RDONLY); 459 smp_mb__before_spinlock(); 460 inode_lock(inode); 461 if (!od->upperfile) { 462 if (IS_ERR(realfile)) { 463 inode_unlock(inode); 464 return PTR_ERR(realfile); 465 } 466 od->upperfile = realfile; 467 } else { 468 /* somebody has beaten us to it */ 469 if (!IS_ERR(realfile)) 470 fput(realfile); 471 realfile = od->upperfile; 472 } 473 inode_unlock(inode); 474 } 475 } 476 477 return vfs_fsync_range(realfile, start, end, datasync); 478 } 479 480 static int ovl_dir_release(struct inode *inode, struct file *file) 481 { 482 struct ovl_dir_file *od = file->private_data; 483 484 if (od->cache) { 485 inode_lock(inode); 486 ovl_cache_put(od, file->f_path.dentry); 487 inode_unlock(inode); 488 } 489 fput(od->realfile); 490 if (od->upperfile) 491 fput(od->upperfile); 492 kfree(od); 493 494 return 0; 495 } 496 497 static int ovl_dir_open(struct inode *inode, struct file *file) 498 { 499 struct path realpath; 500 struct file *realfile; 501 struct ovl_dir_file *od; 502 enum ovl_path_type type; 503 504 od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); 505 if (!od) 506 return -ENOMEM; 507 508 type = ovl_path_real(file->f_path.dentry, &realpath); 509 realfile = ovl_path_open(&realpath, file->f_flags); 510 if (IS_ERR(realfile)) { 511 kfree(od); 512 return PTR_ERR(realfile); 513 } 514 od->realfile = realfile; 515 od->is_real = !OVL_TYPE_MERGE(type); 516 od->is_upper = OVL_TYPE_UPPER(type); 517 file->private_data = od; 518 519 return 0; 520 } 521 522 const struct file_operations ovl_dir_operations = { 523 .read = generic_read_dir, 524 .open = ovl_dir_open, 525 .iterate = ovl_iterate, 526 .llseek = ovl_dir_llseek, 527 .fsync = ovl_dir_fsync, 528 .release = ovl_dir_release, 529 }; 530 531 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) 532 { 533 int err; 534 struct ovl_cache_entry *p; 535 536 err = ovl_dir_read_merged(dentry, list); 537 if (err) 538 return err; 539 540 err = 0; 541 542 list_for_each_entry(p, list, l_node) { 543 if (p->is_whiteout) 544 continue; 545 546 if (p->name[0] == '.') { 547 if (p->len == 1) 548 continue; 549 if (p->len == 2 && p->name[1] == '.') 550 continue; 551 } 552 err = -ENOTEMPTY; 553 break; 554 } 555 556 return err; 557 } 558 559 void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) 560 { 561 struct ovl_cache_entry *p; 562 563 inode_lock_nested(upper->d_inode, I_MUTEX_CHILD); 564 list_for_each_entry(p, list, l_node) { 565 struct dentry *dentry; 566 567 if (!p->is_whiteout) 568 continue; 569 570 dentry = lookup_one_len(p->name, upper, p->len); 571 if (IS_ERR(dentry)) { 572 pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", 573 upper->d_name.name, p->len, p->name, 574 (int) PTR_ERR(dentry)); 575 continue; 576 } 577 if (dentry->d_inode) 578 ovl_cleanup(upper->d_inode, dentry); 579 dput(dentry); 580 } 581 inode_unlock(upper->d_inode); 582 } 583 584 static int ovl_check_d_type(struct dir_context *ctx, const char *name, 585 int namelen, loff_t offset, u64 ino, 586 unsigned int d_type) 587 { 588 struct ovl_readdir_data *rdd = 589 container_of(ctx, struct ovl_readdir_data, ctx); 590 591 /* Even if d_type is not supported, DT_DIR is returned for . and .. */ 592 if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen)) 593 return 0; 594 595 if (d_type != DT_UNKNOWN) 596 rdd->d_type_supported = true; 597 598 return 0; 599 } 600 601 /* 602 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values 603 * if error is encountered. 604 */ 605 int ovl_check_d_type_supported(struct path *realpath) 606 { 607 int err; 608 struct ovl_readdir_data rdd = { 609 .ctx.actor = ovl_check_d_type, 610 .d_type_supported = false, 611 }; 612 613 err = ovl_dir_read(realpath, &rdd); 614 if (err) 615 return err; 616 617 return rdd.d_type_supported; 618 } 619