1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 *
4 * Copyright (C) 2011 Novell Inc.
5 */
6
7 #include <linux/fs.h>
8 #include <linux/slab.h>
9 #include <linux/namei.h>
10 #include <linux/file.h>
11 #include <linux/filelock.h>
12 #include <linux/xattr.h>
13 #include <linux/rbtree.h>
14 #include <linux/security.h>
15 #include <linux/cred.h>
16 #include <linux/ratelimit.h>
17 #include <linux/overflow.h>
18 #include "overlayfs.h"
19
20 struct ovl_cache_entry {
21 unsigned int len;
22 unsigned int type;
23 u64 real_ino;
24 u64 ino;
25 struct list_head l_node;
26 struct rb_node node;
27 struct ovl_cache_entry *next_maybe_whiteout;
28 bool is_upper;
29 bool is_whiteout;
30 bool check_xwhiteout;
31 const char *c_name;
32 int c_len;
33 char name[];
34 };
35
36 struct ovl_dir_cache {
37 long refcount;
38 u64 version;
39 struct list_head entries;
40 struct rb_root root;
41 };
42
43 struct ovl_readdir_data {
44 struct dir_context ctx;
45 struct dentry *dentry;
46 bool is_lowest;
47 struct rb_root *root;
48 struct list_head *list;
49 struct list_head middle;
50 struct ovl_cache_entry *first_maybe_whiteout;
51 struct unicode_map *map;
52 int count;
53 int err;
54 bool is_upper;
55 bool d_type_supported;
56 bool in_xwhiteouts_dir;
57 };
58
59 struct ovl_dir_file {
60 bool is_real;
61 bool is_upper;
62 struct ovl_dir_cache *cache;
63 struct list_head *cursor;
64 struct file *realfile;
65 struct file *upperfile;
66 };
67
ovl_cache_entry_from_node(struct rb_node * n)68 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
69 {
70 return rb_entry(n, struct ovl_cache_entry, node);
71 }
72
ovl_casefold(struct ovl_readdir_data * rdd,const char * str,int len,char ** dst)73 static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len,
74 char **dst)
75 {
76 const struct qstr qstr = { .name = str, .len = len };
77 char *cf_name;
78 int cf_len;
79
80 if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map ||
81 name_is_dot_dotdot(str, len))
82 return 0;
83
84 cf_name = kmalloc(NAME_MAX, GFP_KERNEL);
85 if (!cf_name) {
86 rdd->err = -ENOMEM;
87 return -ENOMEM;
88 }
89
90 cf_len = utf8_casefold(rdd->map, &qstr, cf_name, NAME_MAX);
91 if (cf_len > 0)
92 *dst = cf_name;
93 else
94 kfree(cf_name);
95
96 return cf_len;
97 }
98
ovl_cache_entry_find_link(const char * name,int len,struct rb_node *** link,struct rb_node ** parent)99 static bool ovl_cache_entry_find_link(const char *name, int len,
100 struct rb_node ***link,
101 struct rb_node **parent)
102 {
103 bool found = false;
104 struct rb_node **newp = *link;
105
106 while (!found && *newp) {
107 int cmp;
108 struct ovl_cache_entry *tmp;
109
110 *parent = *newp;
111 tmp = ovl_cache_entry_from_node(*newp);
112 cmp = strncmp(name, tmp->c_name, len);
113 if (cmp > 0)
114 newp = &tmp->node.rb_right;
115 else if (cmp < 0 || len < tmp->c_len)
116 newp = &tmp->node.rb_left;
117 else
118 found = true;
119 }
120 *link = newp;
121
122 return found;
123 }
124
ovl_cache_entry_find(struct rb_root * root,const char * name,int len)125 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
126 const char *name, int len)
127 {
128 struct rb_node *node = root->rb_node;
129 int cmp;
130
131 while (node) {
132 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
133
134 cmp = strncmp(name, p->c_name, len);
135 if (cmp > 0)
136 node = p->node.rb_right;
137 else if (cmp < 0 || len < p->c_len)
138 node = p->node.rb_left;
139 else
140 return p;
141 }
142
143 return NULL;
144 }
145
ovl_calc_d_ino(struct ovl_readdir_data * rdd,struct ovl_cache_entry * p)146 static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
147 struct ovl_cache_entry *p)
148 {
149 /* Don't care if not doing ovl_iter() */
150 if (!rdd->dentry)
151 return false;
152
153 /* Always recalc d_ino when remapping lower inode numbers */
154 if (ovl_xino_bits(OVL_FS(rdd->dentry->d_sb)))
155 return true;
156
157 /* Always recalc d_ino for parent */
158 if (name_is_dotdot(p->name, p->len))
159 return true;
160
161 /* If this is lower, then native d_ino will do */
162 if (!rdd->is_upper)
163 return false;
164
165 /*
166 * Recalc d_ino for '.' and for all entries if dir is impure (contains
167 * copied up entries)
168 */
169 if (name_is_dot(p->name, p->len) ||
170 ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry)))
171 return true;
172
173 return false;
174 }
175
ovl_cache_entry_new(struct ovl_readdir_data * rdd,const char * name,int len,const char * c_name,int c_len,u64 ino,unsigned int d_type)176 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
177 const char *name, int len,
178 const char *c_name, int c_len,
179 u64 ino, unsigned int d_type)
180 {
181 struct ovl_cache_entry *p;
182
183 p = kmalloc_flex(*p, name, len + 1);
184 if (!p)
185 return NULL;
186
187 memcpy(p->name, name, len);
188 p->name[len] = '\0';
189 p->len = len;
190 p->type = d_type;
191 p->real_ino = ino;
192 p->ino = ino;
193 /* Defer setting d_ino for upper entry to ovl_iterate() */
194 if (ovl_calc_d_ino(rdd, p))
195 p->ino = 0;
196 p->is_upper = rdd->is_upper;
197 p->is_whiteout = false;
198 /* Defer check for overlay.whiteout to ovl_iterate() */
199 p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
200
201 if (c_name && c_name != name) {
202 p->c_name = c_name;
203 p->c_len = c_len;
204 } else {
205 p->c_name = p->name;
206 p->c_len = len;
207 }
208
209 if (d_type == DT_CHR) {
210 p->next_maybe_whiteout = rdd->first_maybe_whiteout;
211 rdd->first_maybe_whiteout = p;
212 }
213 return p;
214 }
215
216 /* Return 0 for found, 1 for added, <0 for error */
ovl_cache_entry_add_rb(struct ovl_readdir_data * rdd,const char * name,int len,const char * c_name,int c_len,u64 ino,unsigned int d_type)217 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
218 const char *name, int len,
219 const char *c_name, int c_len,
220 u64 ino,
221 unsigned int d_type)
222 {
223 struct rb_node **newp = &rdd->root->rb_node;
224 struct rb_node *parent = NULL;
225 struct ovl_cache_entry *p;
226
227 if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent))
228 return 0;
229
230 p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type);
231 if (p == NULL) {
232 rdd->err = -ENOMEM;
233 return -ENOMEM;
234 }
235
236 list_add_tail(&p->l_node, rdd->list);
237 rb_link_node(&p->node, parent, newp);
238 rb_insert_color(&p->node, rdd->root);
239
240 return 1;
241 }
242
243 /* Return 0 for found, 1 for added, <0 for error */
ovl_fill_lowest(struct ovl_readdir_data * rdd,const char * name,int namelen,const char * c_name,int c_len,loff_t offset,u64 ino,unsigned int d_type)244 static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
245 const char *name, int namelen,
246 const char *c_name, int c_len,
247 loff_t offset, u64 ino, unsigned int d_type)
248 {
249 struct ovl_cache_entry *p;
250
251 p = ovl_cache_entry_find(rdd->root, c_name, c_len);
252 if (p) {
253 list_move_tail(&p->l_node, &rdd->middle);
254 return 0;
255 } else {
256 p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len,
257 ino, d_type);
258 if (p == NULL)
259 rdd->err = -ENOMEM;
260 else
261 list_add_tail(&p->l_node, &rdd->middle);
262 }
263
264 return rdd->err ?: 1;
265 }
266
ovl_cache_entry_free(struct ovl_cache_entry * p)267 static void ovl_cache_entry_free(struct ovl_cache_entry *p)
268 {
269 if (p->c_name != p->name)
270 kfree(p->c_name);
271 kfree(p);
272 }
273
ovl_cache_free(struct list_head * list)274 void ovl_cache_free(struct list_head *list)
275 {
276 struct ovl_cache_entry *p;
277 struct ovl_cache_entry *n;
278
279 list_for_each_entry_safe(p, n, list, l_node)
280 ovl_cache_entry_free(p);
281
282 INIT_LIST_HEAD(list);
283 }
284
ovl_dir_cache_free(struct inode * inode)285 void ovl_dir_cache_free(struct inode *inode)
286 {
287 struct ovl_dir_cache *cache = ovl_dir_cache(inode);
288
289 if (cache) {
290 ovl_cache_free(&cache->entries);
291 kfree(cache);
292 }
293 }
294
ovl_cache_put(struct ovl_dir_file * od,struct inode * inode)295 static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode)
296 {
297 struct ovl_dir_cache *cache = od->cache;
298
299 WARN_ON(cache->refcount <= 0);
300 cache->refcount--;
301 if (!cache->refcount) {
302 if (ovl_dir_cache(inode) == cache)
303 ovl_set_dir_cache(inode, NULL);
304
305 ovl_cache_free(&cache->entries);
306 kfree(cache);
307 }
308 }
309
ovl_fill_merge(struct dir_context * ctx,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)310 static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
311 int namelen, loff_t offset, u64 ino,
312 unsigned int d_type)
313 {
314 struct ovl_readdir_data *rdd =
315 container_of(ctx, struct ovl_readdir_data, ctx);
316 struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
317 const char *c_name = NULL;
318 char *cf_name = NULL;
319 int c_len = 0, ret;
320
321 if (ofs->casefold)
322 c_len = ovl_casefold(rdd, name, namelen, &cf_name);
323
324 if (rdd->err)
325 return false;
326
327 if (c_len <= 0) {
328 c_name = name;
329 c_len = namelen;
330 } else {
331 c_name = cf_name;
332 }
333
334 rdd->count++;
335 if (!rdd->is_lowest)
336 ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type);
337 else
338 ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type);
339
340 /*
341 * If ret == 1, that means that c_name is being used as part of struct
342 * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise,
343 * c_name was found in the rb-tree so we can free it here.
344 */
345 if (ret != 1 && c_name != name)
346 kfree(c_name);
347
348 return ret >= 0;
349 }
350
ovl_check_whiteouts(const struct path * path,struct ovl_readdir_data * rdd)351 static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
352 {
353 struct dentry *dentry, *dir = path->dentry;
354
355 while (rdd->first_maybe_whiteout) {
356 struct ovl_cache_entry *p =
357 rdd->first_maybe_whiteout;
358 rdd->first_maybe_whiteout = p->next_maybe_whiteout;
359 dentry = lookup_one_positive_killable(mnt_idmap(path->mnt),
360 &QSTR_LEN(p->name, p->len),
361 dir);
362 if (!IS_ERR(dentry)) {
363 p->is_whiteout = ovl_is_whiteout(dentry);
364 dput(dentry);
365 } else if (PTR_ERR(dentry) == -EINTR) {
366 return -EINTR;
367 }
368 }
369
370 return 0;
371 }
372
ovl_dir_read(const struct path * realpath,struct ovl_readdir_data * rdd)373 static inline int ovl_dir_read(const struct path *realpath,
374 struct ovl_readdir_data *rdd)
375 {
376 struct file *realfile;
377 int err;
378
379 realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE);
380 if (IS_ERR(realfile))
381 return PTR_ERR(realfile);
382
383 rdd->first_maybe_whiteout = NULL;
384 rdd->ctx.pos = 0;
385 do {
386 rdd->count = 0;
387 rdd->err = 0;
388 err = iterate_dir(realfile, &rdd->ctx);
389 if (err >= 0)
390 err = rdd->err;
391 } while (!err && rdd->count);
392
393 if (!err && rdd->first_maybe_whiteout && rdd->dentry)
394 err = ovl_check_whiteouts(realpath, rdd);
395
396 fput(realfile);
397
398 return err;
399 }
400
ovl_dir_reset(struct file * file)401 static void ovl_dir_reset(struct file *file)
402 {
403 struct ovl_dir_file *od = file->private_data;
404 struct ovl_dir_cache *cache = od->cache;
405 struct inode *inode = file_inode(file);
406 bool is_real;
407
408 if (cache && ovl_inode_version_get(inode) != cache->version) {
409 ovl_cache_put(od, inode);
410 od->cache = NULL;
411 od->cursor = NULL;
412 }
413 is_real = ovl_dir_is_real(inode);
414 if (od->is_real != is_real) {
415 /* is_real can only become false when dir is copied up */
416 if (WARN_ON(is_real))
417 return;
418 od->is_real = false;
419 }
420 }
421
ovl_dir_read_merged(struct dentry * dentry,struct list_head * list,struct rb_root * root)422 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
423 struct rb_root *root)
424 {
425 int err;
426 struct path realpath;
427 struct ovl_readdir_data rdd = {
428 .ctx.actor = ovl_fill_merge,
429 .ctx.count = INT_MAX,
430 .dentry = dentry,
431 .list = list,
432 .root = root,
433 .is_lowest = false,
434 .map = NULL,
435 };
436 int idx, next;
437 const struct ovl_layer *layer;
438 struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
439
440 for (idx = 0; idx != -1; idx = next) {
441 next = ovl_path_next(idx, dentry, &realpath, &layer);
442
443 if (ofs->casefold)
444 rdd.map = sb_encoding(realpath.dentry->d_sb);
445
446 rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
447 rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
448 ovl_dentry_has_xwhiteouts(dentry);
449
450 if (next != -1) {
451 err = ovl_dir_read(&realpath, &rdd);
452 if (err)
453 break;
454 } else {
455 /*
456 * Insert lowest layer entries before upper ones, this
457 * allows offsets to be reasonably constant
458 */
459 list_add(&rdd.middle, rdd.list);
460 rdd.is_lowest = true;
461 err = ovl_dir_read(&realpath, &rdd);
462 list_del(&rdd.middle);
463 }
464 }
465 return err;
466 }
467
ovl_seek_cursor(struct ovl_dir_file * od,loff_t pos)468 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
469 {
470 struct list_head *p;
471 loff_t off = 0;
472
473 list_for_each(p, &od->cache->entries) {
474 if (off >= pos)
475 break;
476 off++;
477 }
478 /* Cursor is safe since the cache is stable */
479 od->cursor = p;
480 }
481
ovl_cache_get(struct dentry * dentry)482 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
483 {
484 int res;
485 struct ovl_dir_cache *cache;
486 struct inode *inode = d_inode(dentry);
487
488 cache = ovl_dir_cache(inode);
489 if (cache && ovl_inode_version_get(inode) == cache->version) {
490 WARN_ON(!cache->refcount);
491 cache->refcount++;
492 return cache;
493 }
494 ovl_set_dir_cache(d_inode(dentry), NULL);
495
496 cache = kzalloc_obj(struct ovl_dir_cache);
497 if (!cache)
498 return ERR_PTR(-ENOMEM);
499
500 cache->refcount = 1;
501 INIT_LIST_HEAD(&cache->entries);
502 cache->root = RB_ROOT;
503
504 res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root);
505 if (res) {
506 ovl_cache_free(&cache->entries);
507 kfree(cache);
508 return ERR_PTR(res);
509 }
510
511 cache->version = ovl_inode_version_get(inode);
512 ovl_set_dir_cache(inode, cache);
513
514 return cache;
515 }
516
517 /* Map inode number to lower fs unique range */
ovl_remap_lower_ino(u64 ino,int xinobits,int fsid,const char * name,int namelen,bool warn)518 static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
519 const char *name, int namelen, bool warn)
520 {
521 unsigned int xinoshift = 64 - xinobits;
522
523 if (unlikely(ino >> xinoshift)) {
524 if (warn) {
525 pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
526 namelen, name, ino, xinobits);
527 }
528 return ino;
529 }
530
531 /*
532 * The lowest xinobit is reserved for mapping the non-peresistent inode
533 * numbers range, but this range is only exposed via st_ino, not here.
534 */
535 return ino | ((u64)fsid) << (xinoshift + 1);
536 }
537
538 /*
539 * Set d_ino for upper entries if needed. Non-upper entries should always report
540 * the uppermost real inode ino and should not call this function.
541 *
542 * When not all layer are on same fs, report real ino also for upper.
543 *
544 * When all layers are on the same fs, and upper has a reference to
545 * copy up origin, call vfs_getattr() on the overlay entry to make
546 * sure that d_ino will be consistent with st_ino from stat(2).
547 *
548 * Also checks the overlay.whiteout xattr by doing a full lookup which will return
549 * negative in this case.
550 */
ovl_cache_update(const struct path * path,struct ovl_cache_entry * p,bool update_ino)551 static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino)
552
553 {
554 struct dentry *dir = path->dentry;
555 struct ovl_fs *ofs = OVL_FS(dir->d_sb);
556 struct dentry *this = NULL;
557 enum ovl_path_type type;
558 u64 ino = p->real_ino;
559 int xinobits = ovl_xino_bits(ofs);
560 int err = 0;
561
562 if (!ovl_same_dev(ofs) && !p->check_xwhiteout)
563 goto out;
564
565 if (name_is_dot_dotdot(p->name, p->len)) {
566 if (p->len == 1) {
567 this = dget(dir);
568 goto get;
569 }
570 if (p->len == 2) {
571 /* we shall not be moved */
572 this = dget(dir->d_parent);
573 goto get;
574 }
575 }
576 /* This checks also for xwhiteouts */
577 this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir);
578 if (IS_ERR_OR_NULL(this) || !this->d_inode) {
579 /* Mark a stale entry */
580 p->is_whiteout = true;
581 if (IS_ERR(this)) {
582 err = PTR_ERR(this);
583 this = NULL;
584 goto fail;
585 }
586 goto out;
587 }
588
589 get:
590 if (!ovl_same_dev(ofs) || !update_ino)
591 goto out;
592
593 type = ovl_path_type(this);
594 if (OVL_TYPE_ORIGIN(type)) {
595 struct kstat stat;
596 struct path statpath = *path;
597
598 statpath.dentry = this;
599 err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
600 if (err)
601 goto fail;
602
603 /*
604 * Directory inode is always on overlay st_dev.
605 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
606 * of xino bits overflow.
607 */
608 WARN_ON_ONCE(S_ISDIR(stat.mode) &&
609 dir->d_sb->s_dev != stat.dev);
610 ino = stat.ino;
611 } else if (xinobits && !OVL_TYPE_UPPER(type)) {
612 ino = ovl_remap_lower_ino(ino, xinobits,
613 ovl_layer_lower(this)->fsid,
614 p->name, p->len,
615 ovl_xino_warn(ofs));
616 }
617
618 out:
619 p->ino = ino;
620 dput(this);
621 return err;
622
623 fail:
624 pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n",
625 p->name, err);
626 goto out;
627 }
628
ovl_fill_plain(struct dir_context * ctx,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)629 static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
630 int namelen, loff_t offset, u64 ino,
631 unsigned int d_type)
632 {
633 struct ovl_cache_entry *p;
634 struct ovl_readdir_data *rdd =
635 container_of(ctx, struct ovl_readdir_data, ctx);
636
637 rdd->count++;
638 p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
639 if (p == NULL) {
640 rdd->err = -ENOMEM;
641 return false;
642 }
643 list_add_tail(&p->l_node, rdd->list);
644
645 return true;
646 }
647
ovl_dir_read_impure(const struct path * path,struct list_head * list,struct rb_root * root)648 static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
649 struct rb_root *root)
650 {
651 int err;
652 struct path realpath;
653 struct ovl_cache_entry *p, *n;
654 struct ovl_readdir_data rdd = {
655 .ctx.actor = ovl_fill_plain,
656 .ctx.count = INT_MAX,
657 .list = list,
658 .root = root,
659 };
660
661 INIT_LIST_HEAD(list);
662 *root = RB_ROOT;
663 ovl_path_upper(path->dentry, &realpath);
664
665 err = ovl_dir_read(&realpath, &rdd);
666 if (err)
667 return err;
668
669 list_for_each_entry_safe(p, n, list, l_node) {
670 if (!name_is_dot_dotdot(p->name, p->len)) {
671 err = ovl_cache_update(path, p, true);
672 if (err)
673 return err;
674 }
675 if (p->ino == p->real_ino) {
676 list_del(&p->l_node);
677 ovl_cache_entry_free(p);
678 } else {
679 struct rb_node **newp = &root->rb_node;
680 struct rb_node *parent = NULL;
681
682 if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
683 &newp, &parent)))
684 return -EIO;
685
686 rb_link_node(&p->node, parent, newp);
687 rb_insert_color(&p->node, root);
688 }
689 }
690 return 0;
691 }
692
ovl_cache_get_impure(const struct path * path)693 static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path)
694 {
695 int res;
696 struct dentry *dentry = path->dentry;
697 struct inode *inode = d_inode(dentry);
698 struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
699 struct ovl_dir_cache *cache;
700
701 cache = ovl_dir_cache(inode);
702 if (cache && ovl_inode_version_get(inode) == cache->version)
703 return cache;
704
705 /* Impure cache is not refcounted, free it here */
706 ovl_dir_cache_free(inode);
707 ovl_set_dir_cache(inode, NULL);
708
709 cache = kzalloc_obj(struct ovl_dir_cache);
710 if (!cache)
711 return ERR_PTR(-ENOMEM);
712
713 res = ovl_dir_read_impure(path, &cache->entries, &cache->root);
714 if (res) {
715 ovl_cache_free(&cache->entries);
716 kfree(cache);
717 return ERR_PTR(res);
718 }
719 if (list_empty(&cache->entries)) {
720 /*
721 * A good opportunity to get rid of an unneeded "impure" flag.
722 * Removing the "impure" xattr is best effort.
723 */
724 if (!ovl_want_write(dentry)) {
725 ovl_removexattr(ofs, ovl_dentry_upper(dentry),
726 OVL_XATTR_IMPURE);
727 ovl_drop_write(dentry);
728 }
729 ovl_clear_flag(OVL_IMPURE, inode);
730 kfree(cache);
731 return NULL;
732 }
733
734 cache->version = ovl_inode_version_get(inode);
735 ovl_set_dir_cache(inode, cache);
736
737 return cache;
738 }
739
740 struct ovl_readdir_translate {
741 struct dir_context *orig_ctx;
742 struct ovl_dir_cache *cache;
743 struct dir_context ctx;
744 u64 parent_ino;
745 int fsid;
746 int xinobits;
747 bool xinowarn;
748 };
749
ovl_fill_real(struct dir_context * ctx,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)750 static bool ovl_fill_real(struct dir_context *ctx, const char *name,
751 int namelen, loff_t offset, u64 ino,
752 unsigned int d_type)
753 {
754 struct ovl_readdir_translate *rdt =
755 container_of(ctx, struct ovl_readdir_translate, ctx);
756 struct dir_context *orig_ctx = rdt->orig_ctx;
757 bool res;
758
759 if (rdt->parent_ino && name_is_dotdot(name, namelen)) {
760 ino = rdt->parent_ino;
761 } else if (rdt->cache) {
762 struct ovl_cache_entry *p;
763
764 p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
765 if (p)
766 ino = p->ino;
767 } else if (rdt->xinobits) {
768 ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
769 name, namelen, rdt->xinowarn);
770 }
771
772 res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
773 ctx->count = orig_ctx->count;
774
775 return res;
776 }
777
ovl_is_impure_dir(struct file * file)778 static bool ovl_is_impure_dir(struct file *file)
779 {
780 struct ovl_dir_file *od = file->private_data;
781 struct inode *dir = file_inode(file);
782
783 /*
784 * Only upper dir can be impure, but if we are in the middle of
785 * iterating a lower real dir, dir could be copied up and marked
786 * impure. We only want the impure cache if we started iterating
787 * a real upper dir to begin with.
788 */
789 return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
790
791 }
792
ovl_iterate_real(struct file * file,struct dir_context * ctx)793 static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
794 {
795 int err;
796 struct ovl_dir_file *od = file->private_data;
797 struct dentry *dir = file->f_path.dentry;
798 struct ovl_fs *ofs = OVL_FS(dir->d_sb);
799 const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
800 struct ovl_readdir_translate rdt = {
801 .ctx.actor = ovl_fill_real,
802 .ctx.count = ctx->count,
803 .orig_ctx = ctx,
804 .xinobits = ovl_xino_bits(ofs),
805 .xinowarn = ovl_xino_warn(ofs),
806 };
807
808 if (rdt.xinobits && lower_layer)
809 rdt.fsid = lower_layer->fsid;
810
811 if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
812 struct kstat stat;
813 struct path statpath = file->f_path;
814
815 statpath.dentry = dir->d_parent;
816 err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
817 if (err)
818 return err;
819
820 WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
821 rdt.parent_ino = stat.ino;
822 }
823
824 if (ovl_is_impure_dir(file)) {
825 rdt.cache = ovl_cache_get_impure(&file->f_path);
826 if (IS_ERR(rdt.cache))
827 return PTR_ERR(rdt.cache);
828 }
829
830 err = iterate_dir(od->realfile, &rdt.ctx);
831 ctx->pos = rdt.ctx.pos;
832
833 return err;
834 }
835
ovl_iterate_merged(struct file * file,struct dir_context * ctx)836 static int ovl_iterate_merged(struct file *file, struct dir_context *ctx)
837 {
838 struct ovl_dir_file *od = file->private_data;
839 struct dentry *dentry = file->f_path.dentry;
840 struct ovl_cache_entry *p;
841 int err = 0;
842
843 if (!od->cache) {
844 struct ovl_dir_cache *cache;
845
846 cache = ovl_cache_get(dentry);
847 err = PTR_ERR(cache);
848 if (IS_ERR(cache))
849 return err;
850
851 od->cache = cache;
852 ovl_seek_cursor(od, ctx->pos);
853 }
854
855 while (od->cursor != &od->cache->entries) {
856 p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
857 if (!p->is_whiteout) {
858 if (!p->ino || p->check_xwhiteout) {
859 err = ovl_cache_update(&file->f_path, p, !p->ino);
860 if (err)
861 return err;
862 }
863 }
864 /* ovl_cache_update() sets is_whiteout on stale entry */
865 if (!p->is_whiteout) {
866 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
867 break;
868 }
869 od->cursor = p->l_node.next;
870 ctx->pos++;
871 }
872 return err;
873 }
874
ovl_need_adjust_d_ino(struct file * file)875 static bool ovl_need_adjust_d_ino(struct file *file)
876 {
877 struct dentry *dentry = file->f_path.dentry;
878 struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
879
880 /* If parent is merge, then need to adjust d_ino for '..' */
881 if (ovl_xino_bits(ofs))
882 return true;
883
884 /* Can't do consistent inode numbering */
885 if (!ovl_same_fs(ofs))
886 return false;
887
888 /* If dir is impure then need to adjust d_ino for copied up entries */
889 if (ovl_is_impure_dir(file) ||
890 OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent)))
891 return true;
892
893 /* Pure: no need to adjust d_ino */
894 return false;
895 }
896
897
ovl_iterate(struct file * file,struct dir_context * ctx)898 static int ovl_iterate(struct file *file, struct dir_context *ctx)
899 {
900 struct ovl_dir_file *od = file->private_data;
901
902 if (!ctx->pos)
903 ovl_dir_reset(file);
904
905 with_ovl_creds(file_dentry(file)->d_sb) {
906 if (!od->is_real)
907 return ovl_iterate_merged(file, ctx);
908
909 if (ovl_need_adjust_d_ino(file))
910 return ovl_iterate_real(file, ctx);
911
912 return iterate_dir(od->realfile, ctx);
913 }
914 }
915
ovl_dir_llseek(struct file * file,loff_t offset,int origin)916 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
917 {
918 loff_t res;
919 struct ovl_dir_file *od = file->private_data;
920
921 inode_lock(file_inode(file));
922 if (!file->f_pos)
923 ovl_dir_reset(file);
924
925 if (od->is_real) {
926 res = vfs_llseek(od->realfile, offset, origin);
927 file->f_pos = od->realfile->f_pos;
928 } else {
929 res = -EINVAL;
930
931 switch (origin) {
932 case SEEK_CUR:
933 offset += file->f_pos;
934 break;
935 case SEEK_SET:
936 break;
937 default:
938 goto out_unlock;
939 }
940 if (offset < 0)
941 goto out_unlock;
942
943 if (offset != file->f_pos) {
944 file->f_pos = offset;
945 if (od->cache)
946 ovl_seek_cursor(od, offset);
947 }
948 res = offset;
949 }
950 out_unlock:
951 inode_unlock(file_inode(file));
952
953 return res;
954 }
955
ovl_dir_open_realfile(const struct file * file,const struct path * realpath)956 static struct file *ovl_dir_open_realfile(const struct file *file,
957 const struct path *realpath)
958 {
959 with_ovl_creds(file_inode(file)->i_sb)
960 return ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
961 }
962
963 /*
964 * Like ovl_real_fdget(), returns upperfile if dir was copied up since open.
965 * Unlike ovl_real_fdget(), this caches upperfile in file->private_data.
966 *
967 * TODO: use same abstract type for file->private_data of dir and file so
968 * upperfile could also be cached for files as well.
969 */
ovl_dir_real_file(const struct file * file,bool want_upper)970 struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
971 {
972
973 struct ovl_dir_file *od = file->private_data;
974 struct dentry *dentry = file->f_path.dentry;
975 struct file *old, *realfile = od->realfile;
976
977 if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
978 return want_upper ? NULL : realfile;
979
980 /*
981 * Need to check if we started out being a lower dir, but got copied up
982 */
983 if (!od->is_upper) {
984 realfile = READ_ONCE(od->upperfile);
985 if (!realfile) {
986 struct path upperpath;
987
988 ovl_path_upper(dentry, &upperpath);
989 realfile = ovl_dir_open_realfile(file, &upperpath);
990 if (IS_ERR(realfile))
991 return realfile;
992
993 old = cmpxchg_release(&od->upperfile, NULL, realfile);
994 if (old) {
995 fput(realfile);
996 realfile = old;
997 }
998 }
999 }
1000
1001 return realfile;
1002 }
1003
ovl_dir_fsync(struct file * file,loff_t start,loff_t end,int datasync)1004 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
1005 int datasync)
1006 {
1007 struct file *realfile;
1008 int err;
1009
1010 err = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
1011 if (err <= 0)
1012 return err;
1013
1014 realfile = ovl_dir_real_file(file, true);
1015 err = PTR_ERR_OR_ZERO(realfile);
1016
1017 /* Nothing to sync for lower */
1018 if (!realfile || err)
1019 return err;
1020
1021 return vfs_fsync_range(realfile, start, end, datasync);
1022 }
1023
ovl_dir_release(struct inode * inode,struct file * file)1024 static int ovl_dir_release(struct inode *inode, struct file *file)
1025 {
1026 struct ovl_dir_file *od = file->private_data;
1027
1028 if (od->cache) {
1029 inode_lock(inode);
1030 ovl_cache_put(od, inode);
1031 inode_unlock(inode);
1032 }
1033 fput(od->realfile);
1034 if (od->upperfile)
1035 fput(od->upperfile);
1036 kfree(od);
1037
1038 return 0;
1039 }
1040
ovl_dir_open(struct inode * inode,struct file * file)1041 static int ovl_dir_open(struct inode *inode, struct file *file)
1042 {
1043 struct path realpath;
1044 struct file *realfile;
1045 struct ovl_dir_file *od;
1046 enum ovl_path_type type;
1047
1048 od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
1049 if (!od)
1050 return -ENOMEM;
1051
1052 type = ovl_path_real(file->f_path.dentry, &realpath);
1053 realfile = ovl_dir_open_realfile(file, &realpath);
1054 if (IS_ERR(realfile)) {
1055 kfree(od);
1056 return PTR_ERR(realfile);
1057 }
1058 od->realfile = realfile;
1059 od->is_real = ovl_dir_is_real(inode);
1060 od->is_upper = OVL_TYPE_UPPER(type);
1061 file->private_data = od;
1062
1063 return 0;
1064 }
1065
1066 WRAP_DIR_ITER(ovl_iterate) // FIXME!
1067 const struct file_operations ovl_dir_operations = {
1068 .read = generic_read_dir,
1069 .open = ovl_dir_open,
1070 .iterate_shared = shared_ovl_iterate,
1071 .llseek = ovl_dir_llseek,
1072 .fsync = ovl_dir_fsync,
1073 .release = ovl_dir_release,
1074 .setlease = generic_setlease,
1075 };
1076
ovl_check_empty_dir(struct dentry * dentry,struct list_head * list)1077 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
1078 {
1079 int err;
1080 struct ovl_cache_entry *p, *n;
1081 struct rb_root root = RB_ROOT;
1082
1083 with_ovl_creds(dentry->d_sb)
1084 err = ovl_dir_read_merged(dentry, list, &root);
1085 if (err)
1086 return err;
1087
1088 err = 0;
1089
1090 list_for_each_entry_safe(p, n, list, l_node) {
1091 /*
1092 * Select whiteouts in upperdir, they should
1093 * be cleared when deleting this directory.
1094 */
1095 if (p->is_whiteout) {
1096 if (p->is_upper)
1097 continue;
1098 goto del_entry;
1099 }
1100
1101 if (name_is_dot_dotdot(p->name, p->len))
1102 goto del_entry;
1103 err = -ENOTEMPTY;
1104 break;
1105
1106 del_entry:
1107 list_del(&p->l_node);
1108 ovl_cache_entry_free(p);
1109 }
1110
1111 return err;
1112 }
1113
ovl_cleanup_whiteouts(struct ovl_fs * ofs,struct dentry * upper,struct list_head * list)1114 void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
1115 struct list_head *list)
1116 {
1117 struct ovl_cache_entry *p;
1118
1119 list_for_each_entry(p, list, l_node) {
1120 struct dentry *dentry;
1121
1122 if (WARN_ON(!p->is_whiteout || !p->is_upper))
1123 continue;
1124
1125 dentry = ovl_lookup_upper_unlocked(ofs, p->name, upper, p->len);
1126 if (IS_ERR(dentry)) {
1127 pr_err("lookup '%s/%.*s' failed (%i)\n",
1128 upper->d_name.name, p->len, p->name,
1129 (int) PTR_ERR(dentry));
1130 continue;
1131 }
1132 if (dentry->d_inode)
1133 ovl_cleanup(ofs, upper, dentry);
1134 dput(dentry);
1135 }
1136 }
1137
ovl_check_d_type(struct dir_context * ctx,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)1138 static bool ovl_check_d_type(struct dir_context *ctx, const char *name,
1139 int namelen, loff_t offset, u64 ino,
1140 unsigned int d_type)
1141 {
1142 struct ovl_readdir_data *rdd =
1143 container_of(ctx, struct ovl_readdir_data, ctx);
1144
1145 /* Even if d_type is not supported, DT_DIR is returned for . and .. */
1146 if (name_is_dot_dotdot(name, namelen))
1147 return true;
1148
1149 if (d_type != DT_UNKNOWN)
1150 rdd->d_type_supported = true;
1151
1152 return true;
1153 }
1154
1155 /*
1156 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
1157 * if error is encountered.
1158 */
ovl_check_d_type_supported(const struct path * realpath)1159 int ovl_check_d_type_supported(const struct path *realpath)
1160 {
1161 int err;
1162 struct ovl_readdir_data rdd = {
1163 .ctx.actor = ovl_check_d_type,
1164 .ctx.count = INT_MAX,
1165 .d_type_supported = false,
1166 };
1167
1168 err = ovl_dir_read(realpath, &rdd);
1169 if (err)
1170 return err;
1171
1172 return rdd.d_type_supported;
1173 }
1174
1175 #define OVL_INCOMPATDIR_NAME "incompat"
1176
ovl_workdir_cleanup_recurse(struct ovl_fs * ofs,const struct path * path,int level)1177 static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path,
1178 int level)
1179 {
1180 int err;
1181 LIST_HEAD(list);
1182 struct ovl_cache_entry *p;
1183 struct ovl_readdir_data rdd = {
1184 .ctx.actor = ovl_fill_plain,
1185 .ctx.count = INT_MAX,
1186 .list = &list,
1187 };
1188 bool incompat = false;
1189
1190 /*
1191 * The "work/incompat" directory is treated specially - if it is not
1192 * empty, instead of printing a generic error and mounting read-only,
1193 * we will error about incompat features and fail the mount.
1194 *
1195 * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name
1196 * starts with '#'.
1197 */
1198 if (level == 2 &&
1199 !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME))
1200 incompat = true;
1201
1202 err = ovl_dir_read(path, &rdd);
1203 if (err)
1204 goto out;
1205
1206 list_for_each_entry(p, &list, l_node) {
1207 struct dentry *dentry;
1208
1209 if (name_is_dot_dotdot(p->name, p->len)) {
1210 continue;
1211 } else if (incompat) {
1212 pr_err("overlay with incompat feature '%s' cannot be mounted\n",
1213 p->name);
1214 err = -EINVAL;
1215 break;
1216 }
1217 dentry = ovl_lookup_upper_unlocked(ofs, p->name, path->dentry, p->len);
1218 if (IS_ERR(dentry))
1219 continue;
1220 if (dentry->d_inode)
1221 err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt,
1222 dentry, level);
1223 dput(dentry);
1224 if (err)
1225 break;
1226 }
1227 out:
1228 ovl_cache_free(&list);
1229 return err;
1230 }
1231
ovl_workdir_cleanup(struct ovl_fs * ofs,struct dentry * parent,struct vfsmount * mnt,struct dentry * dentry,int level)1232 int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent,
1233 struct vfsmount *mnt, struct dentry *dentry, int level)
1234 {
1235 int err;
1236
1237 if (!d_is_dir(dentry) || level > 1)
1238 return ovl_cleanup(ofs, parent, dentry);
1239
1240 dentry = start_removing_dentry(parent, dentry);
1241 if (IS_ERR(dentry))
1242 return PTR_ERR(dentry);
1243 err = ovl_do_rmdir(ofs, parent->d_inode, dentry);
1244 end_removing(dentry);
1245 if (err) {
1246 struct path path = { .mnt = mnt, .dentry = dentry };
1247
1248 err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1);
1249 if (!err)
1250 err = ovl_cleanup(ofs, parent, dentry);
1251 }
1252
1253 return err;
1254 }
1255
ovl_indexdir_cleanup(struct ovl_fs * ofs)1256 int ovl_indexdir_cleanup(struct ovl_fs *ofs)
1257 {
1258 int err;
1259 struct dentry *indexdir = ofs->workdir;
1260 struct dentry *index = NULL;
1261 struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
1262 LIST_HEAD(list);
1263 struct ovl_cache_entry *p;
1264 struct ovl_readdir_data rdd = {
1265 .ctx.actor = ovl_fill_plain,
1266 .ctx.count = INT_MAX,
1267 .list = &list,
1268 };
1269
1270 err = ovl_dir_read(&path, &rdd);
1271 if (err)
1272 goto out;
1273
1274 list_for_each_entry(p, &list, l_node) {
1275 if (name_is_dot_dotdot(p->name, p->len))
1276 continue;
1277 index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, p->len);
1278 if (IS_ERR(index)) {
1279 err = PTR_ERR(index);
1280 index = NULL;
1281 break;
1282 }
1283 /* Cleanup leftover from index create/cleanup attempt */
1284 if (index->d_name.name[0] == '#') {
1285 err = ovl_workdir_cleanup(ofs, indexdir, path.mnt, index, 1);
1286 if (err)
1287 break;
1288 goto next;
1289 }
1290 err = ovl_verify_index(ofs, index);
1291 if (!err) {
1292 goto next;
1293 } else if (err == -ESTALE) {
1294 /* Cleanup stale index entries */
1295 err = ovl_cleanup(ofs, indexdir, index);
1296 } else if (err != -ENOENT) {
1297 /*
1298 * Abort mount to avoid corrupting the index if
1299 * an incompatible index entry was found or on out
1300 * of memory.
1301 */
1302 break;
1303 } else if (ofs->config.nfs_export) {
1304 /*
1305 * Whiteout orphan index to block future open by
1306 * handle after overlay nlink dropped to zero.
1307 */
1308 err = ovl_cleanup_and_whiteout(ofs, indexdir, index);
1309 } else {
1310 /* Cleanup orphan index entries */
1311 err = ovl_cleanup(ofs, indexdir, index);
1312 }
1313
1314 if (err)
1315 break;
1316
1317 next:
1318 dput(index);
1319 index = NULL;
1320 }
1321 dput(index);
1322 out:
1323 ovl_cache_free(&list);
1324 if (err)
1325 pr_err("failed index dir cleanup (%i)\n", err);
1326 return err;
1327 }
1328