1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 *
4 * Copyright (C) 2011 Novell Inc.
5 */
6
7 #include <linux/fs.h>
8 #include <linux/slab.h>
9 #include <linux/namei.h>
10 #include <linux/file.h>
11 #include <linux/filelock.h>
12 #include <linux/xattr.h>
13 #include <linux/rbtree.h>
14 #include <linux/security.h>
15 #include <linux/cred.h>
16 #include <linux/ratelimit.h>
17 #include <linux/overflow.h>
18 #include "overlayfs.h"
19
20 struct ovl_cache_entry {
21 unsigned int len;
22 unsigned int type;
23 u64 real_ino;
24 u64 ino;
25 struct list_head l_node;
26 struct rb_node node;
27 struct ovl_cache_entry *next_maybe_whiteout;
28 bool is_upper;
29 bool is_whiteout;
30 bool check_xwhiteout;
31 const char *c_name;
32 int c_len;
33 char name[];
34 };
35
36 struct ovl_dir_cache {
37 long refcount;
38 u64 version;
39 struct list_head entries;
40 struct rb_root root;
41 };
42
43 struct ovl_readdir_data {
44 struct dir_context ctx;
45 struct dentry *dentry;
46 bool is_lowest;
47 struct rb_root *root;
48 struct list_head *list;
49 struct list_head middle;
50 struct ovl_cache_entry *first_maybe_whiteout;
51 struct unicode_map *map;
52 int count;
53 int err;
54 bool is_upper;
55 bool d_type_supported;
56 bool in_xwhiteouts_dir;
57 };
58
59 struct ovl_dir_file {
60 bool is_real;
61 bool is_upper;
62 struct ovl_dir_cache *cache;
63 struct list_head *cursor;
64 struct file *realfile;
65 struct file *upperfile;
66 };
67
ovl_cache_entry_from_node(struct rb_node * n)68 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
69 {
70 return rb_entry(n, struct ovl_cache_entry, node);
71 }
72
ovl_casefold(struct ovl_readdir_data * rdd,const char * str,int len,char ** dst)73 static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len,
74 char **dst)
75 {
76 const struct qstr qstr = { .name = str, .len = len };
77 char *cf_name;
78 int cf_len;
79
80 if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map ||
81 name_is_dot_dotdot(str, len))
82 return 0;
83
84 cf_name = kmalloc(NAME_MAX, GFP_KERNEL);
85 if (!cf_name) {
86 rdd->err = -ENOMEM;
87 return -ENOMEM;
88 }
89
90 cf_len = utf8_casefold(rdd->map, &qstr, cf_name, NAME_MAX);
91 if (cf_len > 0)
92 *dst = cf_name;
93 else
94 kfree(cf_name);
95
96 return cf_len;
97 }
98
ovl_cache_entry_find_link(const char * name,int len,struct rb_node *** link,struct rb_node ** parent)99 static bool ovl_cache_entry_find_link(const char *name, int len,
100 struct rb_node ***link,
101 struct rb_node **parent)
102 {
103 bool found = false;
104 struct rb_node **newp = *link;
105
106 while (!found && *newp) {
107 int cmp;
108 struct ovl_cache_entry *tmp;
109
110 *parent = *newp;
111 tmp = ovl_cache_entry_from_node(*newp);
112 cmp = strncmp(name, tmp->c_name, len);
113 if (cmp > 0)
114 newp = &tmp->node.rb_right;
115 else if (cmp < 0 || len < tmp->c_len)
116 newp = &tmp->node.rb_left;
117 else
118 found = true;
119 }
120 *link = newp;
121
122 return found;
123 }
124
ovl_cache_entry_find(struct rb_root * root,const char * name,int len)125 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
126 const char *name, int len)
127 {
128 struct rb_node *node = root->rb_node;
129 int cmp;
130
131 while (node) {
132 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
133
134 cmp = strncmp(name, p->c_name, len);
135 if (cmp > 0)
136 node = p->node.rb_right;
137 else if (cmp < 0 || len < p->c_len)
138 node = p->node.rb_left;
139 else
140 return p;
141 }
142
143 return NULL;
144 }
145
ovl_calc_d_ino(struct ovl_readdir_data * rdd,struct ovl_cache_entry * p)146 static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
147 struct ovl_cache_entry *p)
148 {
149 /* Don't care if not doing ovl_iter() */
150 if (!rdd->dentry)
151 return false;
152
153 /* Always recalc d_ino when remapping lower inode numbers */
154 if (ovl_xino_bits(OVL_FS(rdd->dentry->d_sb)))
155 return true;
156
157 /* Always recalc d_ino for parent */
158 if (name_is_dotdot(p->name, p->len))
159 return true;
160
161 /* If this is lower, then native d_ino will do */
162 if (!rdd->is_upper)
163 return false;
164
165 /*
166 * Recalc d_ino for '.' and for all entries if dir is impure (contains
167 * copied up entries)
168 */
169 if (name_is_dot(p->name, p->len) ||
170 ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry)))
171 return true;
172
173 return false;
174 }
175
ovl_cache_entry_new(struct ovl_readdir_data * rdd,const char * name,int len,const char * c_name,int c_len,u64 ino,unsigned int d_type)176 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
177 const char *name, int len,
178 const char *c_name, int c_len,
179 u64 ino, unsigned int d_type)
180 {
181 struct ovl_cache_entry *p;
182
183 p = kmalloc_flex(*p, name, len + 1);
184 if (!p)
185 return NULL;
186
187 memcpy(p->name, name, len);
188 p->name[len] = '\0';
189 p->len = len;
190 p->type = d_type;
191 p->real_ino = ino;
192 p->ino = ino;
193 /* Defer setting d_ino for upper entry to ovl_iterate() */
194 if (ovl_calc_d_ino(rdd, p))
195 p->ino = 0;
196 p->is_upper = rdd->is_upper;
197 p->is_whiteout = false;
198 /* Defer check for overlay.whiteout to ovl_iterate() */
199 p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
200
201 if (c_name && c_name != name) {
202 p->c_name = c_name;
203 p->c_len = c_len;
204 } else {
205 p->c_name = p->name;
206 p->c_len = len;
207 }
208
209 if (d_type == DT_CHR) {
210 p->next_maybe_whiteout = rdd->first_maybe_whiteout;
211 rdd->first_maybe_whiteout = p;
212 }
213 return p;
214 }
215
216 /* Return 0 for found, 1 for added, <0 for error */
ovl_cache_entry_add_rb(struct ovl_readdir_data * rdd,const char * name,int len,const char * c_name,int c_len,u64 ino,unsigned int d_type)217 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
218 const char *name, int len,
219 const char *c_name, int c_len,
220 u64 ino,
221 unsigned int d_type)
222 {
223 struct rb_node **newp = &rdd->root->rb_node;
224 struct rb_node *parent = NULL;
225 struct ovl_cache_entry *p;
226
227 if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent))
228 return 0;
229
230 p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type);
231 if (p == NULL) {
232 rdd->err = -ENOMEM;
233 return -ENOMEM;
234 }
235
236 list_add_tail(&p->l_node, rdd->list);
237 rb_link_node(&p->node, parent, newp);
238 rb_insert_color(&p->node, rdd->root);
239
240 return 1;
241 }
242
243 /* Return 0 for found, 1 for added, <0 for error */
ovl_fill_lowest(struct ovl_readdir_data * rdd,const char * name,int namelen,const char * c_name,int c_len,loff_t offset,u64 ino,unsigned int d_type)244 static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
245 const char *name, int namelen,
246 const char *c_name, int c_len,
247 loff_t offset, u64 ino, unsigned int d_type)
248 {
249 struct ovl_cache_entry *p;
250
251 p = ovl_cache_entry_find(rdd->root, c_name, c_len);
252 if (p) {
253 list_move_tail(&p->l_node, &rdd->middle);
254 return 0;
255 } else {
256 p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len,
257 ino, d_type);
258 if (p == NULL)
259 rdd->err = -ENOMEM;
260 else
261 list_add_tail(&p->l_node, &rdd->middle);
262 }
263
264 return rdd->err ?: 1;
265 }
266
ovl_cache_entry_free(struct ovl_cache_entry * p)267 static void ovl_cache_entry_free(struct ovl_cache_entry *p)
268 {
269 if (p->c_name != p->name)
270 kfree(p->c_name);
271 kfree(p);
272 }
273
ovl_cache_free(struct list_head * list)274 void ovl_cache_free(struct list_head *list)
275 {
276 struct ovl_cache_entry *p;
277 struct ovl_cache_entry *n;
278
279 list_for_each_entry_safe(p, n, list, l_node)
280 ovl_cache_entry_free(p);
281
282 INIT_LIST_HEAD(list);
283 }
284
ovl_dir_cache_free(struct inode * inode)285 void ovl_dir_cache_free(struct inode *inode)
286 {
287 struct ovl_dir_cache *cache = ovl_dir_cache(inode);
288
289 if (cache) {
290 ovl_cache_free(&cache->entries);
291 kfree(cache);
292 }
293 }
294
ovl_cache_put(struct ovl_dir_file * od,struct inode * inode)295 static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode)
296 {
297 struct ovl_dir_cache *cache = od->cache;
298
299 WARN_ON(cache->refcount <= 0);
300 cache->refcount--;
301 if (!cache->refcount) {
302 if (ovl_dir_cache(inode) == cache)
303 ovl_set_dir_cache(inode, NULL);
304
305 ovl_cache_free(&cache->entries);
306 kfree(cache);
307 }
308 }
309
ovl_fill_merge(struct dir_context * ctx,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)310 static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
311 int namelen, loff_t offset, u64 ino,
312 unsigned int d_type)
313 {
314 struct ovl_readdir_data *rdd =
315 container_of(ctx, struct ovl_readdir_data, ctx);
316 struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
317 const char *c_name = NULL;
318 char *cf_name = NULL;
319 int c_len = 0, ret;
320
321 if (ofs->casefold)
322 c_len = ovl_casefold(rdd, name, namelen, &cf_name);
323
324 if (rdd->err)
325 return false;
326
327 if (c_len <= 0) {
328 c_name = name;
329 c_len = namelen;
330 } else {
331 c_name = cf_name;
332 }
333
334 rdd->count++;
335 if (!rdd->is_lowest)
336 ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type);
337 else
338 ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type);
339
340 /*
341 * If ret == 1, that means that c_name is being used as part of struct
342 * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise,
343 * c_name was found in the rb-tree so we can free it here.
344 */
345 if (ret != 1 && c_name != name)
346 kfree(c_name);
347
348 return ret >= 0;
349 }
350
ovl_check_whiteouts(const struct path * path,struct ovl_readdir_data * rdd)351 static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
352 {
353 struct dentry *dentry, *dir = path->dentry;
354
355 while (rdd->first_maybe_whiteout) {
356 struct ovl_cache_entry *p =
357 rdd->first_maybe_whiteout;
358 rdd->first_maybe_whiteout = p->next_maybe_whiteout;
359 dentry = lookup_one_positive_killable(mnt_idmap(path->mnt),
360 &QSTR_LEN(p->name, p->len),
361 dir);
362 if (!IS_ERR(dentry)) {
363 p->is_whiteout = ovl_is_whiteout(dentry);
364 dput(dentry);
365 } else if (PTR_ERR(dentry) == -EINTR) {
366 return -EINTR;
367 }
368 }
369
370 return 0;
371 }
372
ovl_dir_read(const struct path * realpath,struct ovl_readdir_data * rdd)373 static inline int ovl_dir_read(const struct path *realpath,
374 struct ovl_readdir_data *rdd)
375 {
376 struct file *realfile;
377 int err;
378
379 realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE);
380 if (IS_ERR(realfile))
381 return PTR_ERR(realfile);
382
383 rdd->first_maybe_whiteout = NULL;
384 rdd->ctx.pos = 0;
385 do {
386 rdd->count = 0;
387 rdd->err = 0;
388 err = iterate_dir(realfile, &rdd->ctx);
389 if (err >= 0)
390 err = rdd->err;
391 } while (!err && rdd->count);
392
393 if (!err && rdd->first_maybe_whiteout && rdd->dentry)
394 err = ovl_check_whiteouts(realpath, rdd);
395
396 fput(realfile);
397
398 return err;
399 }
400
ovl_dir_reset(struct file * file)401 static void ovl_dir_reset(struct file *file)
402 {
403 struct ovl_dir_file *od = file->private_data;
404 struct ovl_dir_cache *cache = od->cache;
405 struct inode *inode = file_inode(file);
406 bool is_real;
407
408 if (cache && ovl_inode_version_get(inode) != cache->version) {
409 ovl_cache_put(od, inode);
410 od->cache = NULL;
411 od->cursor = NULL;
412 }
413 is_real = ovl_dir_is_real(inode);
414 if (od->is_real != is_real) {
415 /* is_real can only become false when dir is copied up */
416 if (WARN_ON(is_real))
417 return;
418 od->is_real = false;
419 }
420 }
421
ovl_dir_read_merged(struct dentry * dentry,struct list_head * list,struct rb_root * root)422 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
423 struct rb_root *root)
424 {
425 int err;
426 struct path realpath;
427 struct ovl_readdir_data rdd = {
428 .ctx.actor = ovl_fill_merge,
429 .ctx.count = INT_MAX,
430 .dentry = dentry,
431 .list = list,
432 .root = root,
433 .is_lowest = false,
434 .map = NULL,
435 };
436 int idx, next;
437 const struct ovl_layer *layer;
438 struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
439
440 for (idx = 0; idx != -1; idx = next) {
441 next = ovl_path_next(idx, dentry, &realpath, &layer);
442
443 if (ofs->casefold)
444 rdd.map = sb_encoding(realpath.dentry->d_sb);
445
446 rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
447 rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
448 ovl_dentry_has_xwhiteouts(dentry);
449
450 if (next != -1) {
451 err = ovl_dir_read(&realpath, &rdd);
452 if (err)
453 break;
454 } else {
455 /*
456 * Insert lowest layer entries before upper ones, this
457 * allows offsets to be reasonably constant
458 */
459 list_add(&rdd.middle, rdd.list);
460 rdd.is_lowest = true;
461 err = ovl_dir_read(&realpath, &rdd);
462 list_del(&rdd.middle);
463 }
464 }
465 return err;
466 }
467
ovl_seek_cursor(struct ovl_dir_file * od,loff_t pos)468 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
469 {
470 struct list_head *p;
471 loff_t off = 0;
472
473 list_for_each(p, &od->cache->entries) {
474 if (off >= pos)
475 break;
476 off++;
477 }
478 /* Cursor is safe since the cache is stable */
479 od->cursor = p;
480 }
481
ovl_cache_get(struct dentry * dentry)482 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
483 {
484 int res;
485 struct ovl_dir_cache *cache;
486 struct inode *inode = d_inode(dentry);
487
488 cache = ovl_dir_cache(inode);
489 if (cache && ovl_inode_version_get(inode) == cache->version) {
490 WARN_ON(!cache->refcount);
491 cache->refcount++;
492 return cache;
493 }
494 ovl_set_dir_cache(d_inode(dentry), NULL);
495
496 cache = kzalloc_obj(struct ovl_dir_cache);
497 if (!cache)
498 return ERR_PTR(-ENOMEM);
499
500 cache->refcount = 1;
501 INIT_LIST_HEAD(&cache->entries);
502 cache->root = RB_ROOT;
503
504 res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root);
505 if (res) {
506 ovl_cache_free(&cache->entries);
507 kfree(cache);
508 return ERR_PTR(res);
509 }
510
511 cache->version = ovl_inode_version_get(inode);
512 ovl_set_dir_cache(inode, cache);
513
514 return cache;
515 }
516
517 /* Map inode number to lower fs unique range */
ovl_remap_lower_ino(u64 ino,int xinobits,int fsid,const char * name,int namelen,bool warn)518 static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
519 const char *name, int namelen, bool warn)
520 {
521 unsigned int xinoshift = 64 - xinobits;
522
523 if (unlikely(ino >> xinoshift)) {
524 if (warn) {
525 pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
526 namelen, name, ino, xinobits);
527 }
528 return ino;
529 }
530
531 /*
532 * The lowest xinobit is reserved for mapping the non-peresistent inode
533 * numbers range, but this range is only exposed via st_ino, not here.
534 */
535 return ino | ((u64)fsid) << (xinoshift + 1);
536 }
537
538 /*
539 * Set d_ino for upper entries if needed. Non-upper entries should always report
540 * the uppermost real inode ino and should not call this function.
541 *
542 * When not all layer are on same fs, report real ino also for upper.
543 *
544 * When all layers are on the same fs, and upper has a reference to
545 * copy up origin, call vfs_getattr() on the overlay entry to make
546 * sure that d_ino will be consistent with st_ino from stat(2).
547 *
548 * Also checks the overlay.whiteout xattr by doing a full lookup which will return
549 * negative in this case.
550 */
ovl_cache_update(const struct path * path,struct ovl_cache_entry * p,bool update_ino)551 static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino)
552
553 {
554 struct dentry *dir = path->dentry;
555 struct ovl_fs *ofs = OVL_FS(dir->d_sb);
556 struct dentry *this = NULL;
557 enum ovl_path_type type;
558 u64 ino = p->real_ino;
559 int xinobits = ovl_xino_bits(ofs);
560 int err = 0;
561
562 if (!ovl_same_dev(ofs) && !p->check_xwhiteout)
563 goto out;
564
565 if (name_is_dot_dotdot(p->name, p->len)) {
566 if (p->len == 1) {
567 this = dget(dir);
568 goto get;
569 }
570 if (p->len == 2) {
571 /* we shall not be moved */
572 this = dget(dir->d_parent);
573 goto get;
574 }
575 }
576 /* This checks also for xwhiteouts */
577 this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir);
578 if (IS_ERR_OR_NULL(this) || !this->d_inode) {
579 /* Mark a stale entry */
580 p->is_whiteout = true;
581 if (IS_ERR(this)) {
582 err = PTR_ERR(this);
583 this = NULL;
584 goto fail;
585 }
586 goto out;
587 }
588
589 get:
590 if (!ovl_same_dev(ofs) || !update_ino)
591 goto out;
592
593 type = ovl_path_type(this);
594 if (OVL_TYPE_ORIGIN(type)) {
595 struct kstat stat;
596 struct path statpath = *path;
597
598 statpath.dentry = this;
599 err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
600 if (err)
601 goto fail;
602
603 /*
604 * Directory inode is always on overlay st_dev.
605 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
606 * of xino bits overflow.
607 */
608 WARN_ON_ONCE(S_ISDIR(stat.mode) &&
609 dir->d_sb->s_dev != stat.dev);
610 ino = stat.ino;
611 } else if (xinobits && !OVL_TYPE_UPPER(type)) {
612 ino = ovl_remap_lower_ino(ino, xinobits,
613 ovl_layer_lower(this)->fsid,
614 p->name, p->len,
615 ovl_xino_warn(ofs));
616 }
617
618 out:
619 p->ino = ino;
620 dput(this);
621 return err;
622
623 fail:
624 pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n",
625 p->name, err);
626 goto out;
627 }
628
ovl_fill_plain(struct dir_context * ctx,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)629 static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
630 int namelen, loff_t offset, u64 ino,
631 unsigned int d_type)
632 {
633 struct ovl_cache_entry *p;
634 struct ovl_readdir_data *rdd =
635 container_of(ctx, struct ovl_readdir_data, ctx);
636
637 rdd->count++;
638 p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
639 if (p == NULL) {
640 rdd->err = -ENOMEM;
641 return false;
642 }
643 list_add_tail(&p->l_node, rdd->list);
644
645 return true;
646 }
647
ovl_dir_read_impure(const struct path * path,struct list_head * list,struct rb_root * root)648 static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
649 struct rb_root *root)
650 {
651 int err;
652 struct path realpath;
653 struct ovl_cache_entry *p, *n;
654 struct ovl_readdir_data rdd = {
655 .ctx.actor = ovl_fill_plain,
656 .ctx.count = INT_MAX,
657 .list = list,
658 .root = root,
659 };
660
661 INIT_LIST_HEAD(list);
662 *root = RB_ROOT;
663 ovl_path_upper(path->dentry, &realpath);
664
665 err = ovl_dir_read(&realpath, &rdd);
666 if (err)
667 return err;
668
669 list_for_each_entry_safe(p, n, list, l_node) {
670 if (!name_is_dot_dotdot(p->name, p->len)) {
671 err = ovl_cache_update(path, p, true);
672 if (err)
673 return err;
674 }
675 if (p->ino == p->real_ino) {
676 list_del(&p->l_node);
677 ovl_cache_entry_free(p);
678 } else {
679 struct rb_node **newp = &root->rb_node;
680 struct rb_node *parent = NULL;
681
682 if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
683 &newp, &parent)))
684 return -EIO;
685
686 rb_link_node(&p->node, parent, newp);
687 rb_insert_color(&p->node, root);
688 }
689 }
690 return 0;
691 }
692
ovl_cache_get_impure(const struct path * path)693 static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path)
694 {
695 int res;
696 struct dentry *dentry = path->dentry;
697 struct inode *inode = d_inode(dentry);
698 struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
699 struct ovl_dir_cache *cache;
700
701 cache = ovl_dir_cache(inode);
702 if (cache && ovl_inode_version_get(inode) == cache->version)
703 return cache;
704
705 /* Impure cache is not refcounted, free it here */
706 ovl_dir_cache_free(inode);
707 ovl_set_dir_cache(inode, NULL);
708
709 cache = kzalloc_obj(struct ovl_dir_cache);
710 if (!cache)
711 return ERR_PTR(-ENOMEM);
712
713 res = ovl_dir_read_impure(path, &cache->entries, &cache->root);
714 if (res) {
715 ovl_cache_free(&cache->entries);
716 kfree(cache);
717 return ERR_PTR(res);
718 }
719 if (list_empty(&cache->entries)) {
720 /*
721 * A good opportunity to get rid of an unneeded "impure" flag.
722 * Removing the "impure" xattr is best effort.
723 */
724 if (!ovl_want_write(dentry)) {
725 ovl_removexattr(ofs, ovl_dentry_upper(dentry),
726 OVL_XATTR_IMPURE);
727 ovl_drop_write(dentry);
728 }
729 ovl_clear_flag(OVL_IMPURE, inode);
730 kfree(cache);
731 return NULL;
732 }
733
734 cache->version = ovl_inode_version_get(inode);
735 ovl_set_dir_cache(inode, cache);
736
737 return cache;
738 }
739
740 struct ovl_readdir_translate {
741 struct dir_context *orig_ctx;
742 struct ovl_dir_cache *cache;
743 struct dir_context ctx;
744 u64 parent_ino;
745 int fsid;
746 int xinobits;
747 bool xinowarn;
748 };
749
ovl_fill_real(struct dir_context * ctx,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)750 static bool ovl_fill_real(struct dir_context *ctx, const char *name,
751 int namelen, loff_t offset, u64 ino,
752 unsigned int d_type)
753 {
754 struct ovl_readdir_translate *rdt =
755 container_of(ctx, struct ovl_readdir_translate, ctx);
756 struct dir_context *orig_ctx = rdt->orig_ctx;
757 bool res;
758
759 if (rdt->parent_ino && name_is_dotdot(name, namelen)) {
760 ino = rdt->parent_ino;
761 } else if (rdt->cache) {
762 struct ovl_cache_entry *p;
763
764 p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
765 if (p)
766 ino = p->ino;
767 } else if (rdt->xinobits) {
768 ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
769 name, namelen, rdt->xinowarn);
770 }
771
772 res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
773 ctx->count = orig_ctx->count;
774
775 return res;
776 }
777
ovl_is_impure_dir(struct file * file)778 static bool ovl_is_impure_dir(struct file *file)
779 {
780 struct ovl_dir_file *od = file->private_data;
781 struct inode *dir = file_inode(file);
782
783 /*
784 * Only upper dir can be impure, but if we are in the middle of
785 * iterating a lower real dir, dir could be copied up and marked
786 * impure. We only want the impure cache if we started iterating
787 * a real upper dir to begin with.
788 */
789 return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
790
791 }
792
ovl_iterate_real(struct file * file,struct dir_context * ctx)793 static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
794 {
795 int err;
796 struct ovl_dir_file *od = file->private_data;
797 struct dentry *dir = file->f_path.dentry;
798 struct ovl_fs *ofs = OVL_FS(dir->d_sb);
799 const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
800 struct ovl_readdir_translate rdt = {
801 .ctx.actor = ovl_fill_real,
802 .ctx.count = ctx->count,
803 .orig_ctx = ctx,
804 .xinobits = ovl_xino_bits(ofs),
805 .xinowarn = ovl_xino_warn(ofs),
806 };
807
808 if (rdt.xinobits && lower_layer)
809 rdt.fsid = lower_layer->fsid;
810
811 if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
812 struct kstat stat;
813 struct path statpath = file->f_path;
814
815 statpath.dentry = dir->d_parent;
816 err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
817 if (err)
818 return err;
819
820 WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
821 rdt.parent_ino = stat.ino;
822 }
823
824 if (ovl_is_impure_dir(file)) {
825 rdt.cache = ovl_cache_get_impure(&file->f_path);
826 if (IS_ERR(rdt.cache))
827 return PTR_ERR(rdt.cache);
828 }
829
830 err = iterate_dir(od->realfile, &rdt.ctx);
831 ctx->pos = rdt.ctx.pos;
832
833 return err;
834 }
835
ovl_iterate_merged(struct file * file,struct dir_context * ctx)836 static int ovl_iterate_merged(struct file *file, struct dir_context *ctx)
837 {
838 struct ovl_dir_file *od = file->private_data;
839 struct dentry *dentry = file->f_path.dentry;
840 struct ovl_cache_entry *p;
841 int err;
842
843 if (!od->cache) {
844 struct ovl_dir_cache *cache;
845
846 cache = ovl_cache_get(dentry);
847 if (IS_ERR(cache))
848 return PTR_ERR(cache);
849
850 od->cache = cache;
851 ovl_seek_cursor(od, ctx->pos);
852 }
853
854 while (od->cursor != &od->cache->entries) {
855 p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
856 if (!p->is_whiteout) {
857 if (!p->ino || p->check_xwhiteout) {
858 err = ovl_cache_update(&file->f_path, p, !p->ino);
859 if (err)
860 return err;
861 }
862 }
863 /* ovl_cache_update() sets is_whiteout on stale entry */
864 if (!p->is_whiteout) {
865 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
866 break;
867 }
868 od->cursor = p->l_node.next;
869 ctx->pos++;
870 }
871 return 0;
872 }
873
ovl_need_adjust_d_ino(struct file * file)874 static bool ovl_need_adjust_d_ino(struct file *file)
875 {
876 struct dentry *dentry = file->f_path.dentry;
877 struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
878
879 /* If parent is merge, then need to adjust d_ino for '..' */
880 if (ovl_xino_bits(ofs))
881 return true;
882
883 /* Can't do consistent inode numbering */
884 if (!ovl_same_fs(ofs))
885 return false;
886
887 /* If dir is impure then need to adjust d_ino for copied up entries */
888 if (ovl_is_impure_dir(file) ||
889 OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent)))
890 return true;
891
892 /* Pure: no need to adjust d_ino */
893 return false;
894 }
895
896
ovl_iterate(struct file * file,struct dir_context * ctx)897 static int ovl_iterate(struct file *file, struct dir_context *ctx)
898 {
899 struct ovl_dir_file *od = file->private_data;
900
901 if (!ctx->pos)
902 ovl_dir_reset(file);
903
904 with_ovl_creds(file_dentry(file)->d_sb) {
905 if (!od->is_real)
906 return ovl_iterate_merged(file, ctx);
907
908 if (ovl_need_adjust_d_ino(file))
909 return ovl_iterate_real(file, ctx);
910
911 return iterate_dir(od->realfile, ctx);
912 }
913 }
914
ovl_dir_llseek(struct file * file,loff_t offset,int origin)915 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
916 {
917 loff_t res;
918 struct ovl_dir_file *od = file->private_data;
919
920 inode_lock(file_inode(file));
921 if (!file->f_pos)
922 ovl_dir_reset(file);
923
924 if (od->is_real) {
925 res = vfs_llseek(od->realfile, offset, origin);
926 file->f_pos = od->realfile->f_pos;
927 } else {
928 res = -EINVAL;
929
930 switch (origin) {
931 case SEEK_CUR:
932 offset += file->f_pos;
933 break;
934 case SEEK_SET:
935 break;
936 default:
937 goto out_unlock;
938 }
939 if (offset < 0)
940 goto out_unlock;
941
942 if (offset != file->f_pos) {
943 file->f_pos = offset;
944 if (od->cache)
945 ovl_seek_cursor(od, offset);
946 }
947 res = offset;
948 }
949 out_unlock:
950 inode_unlock(file_inode(file));
951
952 return res;
953 }
954
ovl_dir_open_realfile(const struct file * file,const struct path * realpath)955 static struct file *ovl_dir_open_realfile(const struct file *file,
956 const struct path *realpath)
957 {
958 with_ovl_creds(file_inode(file)->i_sb)
959 return ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
960 }
961
962 /*
963 * Like ovl_real_fdget(), returns upperfile if dir was copied up since open.
964 * Unlike ovl_real_fdget(), this caches upperfile in file->private_data.
965 *
966 * TODO: use same abstract type for file->private_data of dir and file so
967 * upperfile could also be cached for files as well.
968 */
ovl_dir_real_file(const struct file * file,bool want_upper)969 struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
970 {
971
972 struct ovl_dir_file *od = file->private_data;
973 struct dentry *dentry = file->f_path.dentry;
974 struct file *old, *realfile = od->realfile;
975
976 if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
977 return want_upper ? NULL : realfile;
978
979 /*
980 * Need to check if we started out being a lower dir, but got copied up
981 */
982 if (!od->is_upper) {
983 realfile = READ_ONCE(od->upperfile);
984 if (!realfile) {
985 struct path upperpath;
986
987 ovl_path_upper(dentry, &upperpath);
988 realfile = ovl_dir_open_realfile(file, &upperpath);
989 if (IS_ERR(realfile))
990 return realfile;
991
992 old = cmpxchg_release(&od->upperfile, NULL, realfile);
993 if (old) {
994 fput(realfile);
995 realfile = old;
996 }
997 }
998 }
999
1000 return realfile;
1001 }
1002
ovl_dir_fsync(struct file * file,loff_t start,loff_t end,int datasync)1003 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
1004 int datasync)
1005 {
1006 struct file *realfile;
1007 int err;
1008
1009 err = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
1010 if (err <= 0)
1011 return err;
1012
1013 realfile = ovl_dir_real_file(file, true);
1014 err = PTR_ERR_OR_ZERO(realfile);
1015
1016 /* Nothing to sync for lower */
1017 if (!realfile || err)
1018 return err;
1019
1020 return vfs_fsync_range(realfile, start, end, datasync);
1021 }
1022
ovl_dir_release(struct inode * inode,struct file * file)1023 static int ovl_dir_release(struct inode *inode, struct file *file)
1024 {
1025 struct ovl_dir_file *od = file->private_data;
1026
1027 if (od->cache) {
1028 inode_lock(inode);
1029 ovl_cache_put(od, inode);
1030 inode_unlock(inode);
1031 }
1032 fput(od->realfile);
1033 if (od->upperfile)
1034 fput(od->upperfile);
1035 kfree(od);
1036
1037 return 0;
1038 }
1039
ovl_dir_open(struct inode * inode,struct file * file)1040 static int ovl_dir_open(struct inode *inode, struct file *file)
1041 {
1042 struct path realpath;
1043 struct file *realfile;
1044 struct ovl_dir_file *od;
1045 enum ovl_path_type type;
1046
1047 od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
1048 if (!od)
1049 return -ENOMEM;
1050
1051 type = ovl_path_real(file->f_path.dentry, &realpath);
1052 realfile = ovl_dir_open_realfile(file, &realpath);
1053 if (IS_ERR(realfile)) {
1054 kfree(od);
1055 return PTR_ERR(realfile);
1056 }
1057 od->realfile = realfile;
1058 od->is_real = ovl_dir_is_real(inode);
1059 od->is_upper = OVL_TYPE_UPPER(type);
1060 file->private_data = od;
1061
1062 return 0;
1063 }
1064
1065 WRAP_DIR_ITER(ovl_iterate) // FIXME!
1066 const struct file_operations ovl_dir_operations = {
1067 .read = generic_read_dir,
1068 .open = ovl_dir_open,
1069 .iterate_shared = shared_ovl_iterate,
1070 .llseek = ovl_dir_llseek,
1071 .fsync = ovl_dir_fsync,
1072 .release = ovl_dir_release,
1073 .setlease = generic_setlease,
1074 };
1075
ovl_check_empty_dir(struct dentry * dentry,struct list_head * list)1076 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
1077 {
1078 int err;
1079 struct ovl_cache_entry *p, *n;
1080 struct rb_root root = RB_ROOT;
1081
1082 with_ovl_creds(dentry->d_sb)
1083 err = ovl_dir_read_merged(dentry, list, &root);
1084 if (err)
1085 return err;
1086
1087 err = 0;
1088
1089 list_for_each_entry_safe(p, n, list, l_node) {
1090 /*
1091 * Select whiteouts in upperdir, they should
1092 * be cleared when deleting this directory.
1093 */
1094 if (p->is_whiteout) {
1095 if (p->is_upper)
1096 continue;
1097 goto del_entry;
1098 }
1099
1100 if (name_is_dot_dotdot(p->name, p->len))
1101 goto del_entry;
1102 err = -ENOTEMPTY;
1103 break;
1104
1105 del_entry:
1106 list_del(&p->l_node);
1107 ovl_cache_entry_free(p);
1108 }
1109
1110 return err;
1111 }
1112
ovl_cleanup_whiteouts(struct ovl_fs * ofs,struct dentry * upper,struct list_head * list)1113 void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
1114 struct list_head *list)
1115 {
1116 struct ovl_cache_entry *p;
1117
1118 list_for_each_entry(p, list, l_node) {
1119 struct dentry *dentry;
1120
1121 if (WARN_ON(!p->is_whiteout || !p->is_upper))
1122 continue;
1123
1124 dentry = ovl_lookup_upper_unlocked(ofs, p->name, upper, p->len);
1125 if (IS_ERR(dentry)) {
1126 pr_err("lookup '%s/%.*s' failed (%i)\n",
1127 upper->d_name.name, p->len, p->name,
1128 (int) PTR_ERR(dentry));
1129 continue;
1130 }
1131 if (dentry->d_inode)
1132 ovl_cleanup(ofs, upper, dentry);
1133 dput(dentry);
1134 }
1135 }
1136
ovl_check_d_type(struct dir_context * ctx,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)1137 static bool ovl_check_d_type(struct dir_context *ctx, const char *name,
1138 int namelen, loff_t offset, u64 ino,
1139 unsigned int d_type)
1140 {
1141 struct ovl_readdir_data *rdd =
1142 container_of(ctx, struct ovl_readdir_data, ctx);
1143
1144 /* Even if d_type is not supported, DT_DIR is returned for . and .. */
1145 if (name_is_dot_dotdot(name, namelen))
1146 return true;
1147
1148 if (d_type != DT_UNKNOWN)
1149 rdd->d_type_supported = true;
1150
1151 return true;
1152 }
1153
1154 /*
1155 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
1156 * if error is encountered.
1157 */
ovl_check_d_type_supported(const struct path * realpath)1158 int ovl_check_d_type_supported(const struct path *realpath)
1159 {
1160 int err;
1161 struct ovl_readdir_data rdd = {
1162 .ctx.actor = ovl_check_d_type,
1163 .ctx.count = INT_MAX,
1164 .d_type_supported = false,
1165 };
1166
1167 err = ovl_dir_read(realpath, &rdd);
1168 if (err)
1169 return err;
1170
1171 return rdd.d_type_supported;
1172 }
1173
1174 #define OVL_INCOMPATDIR_NAME "incompat"
1175
ovl_workdir_cleanup_recurse(struct ovl_fs * ofs,const struct path * path,int level)1176 static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path,
1177 int level)
1178 {
1179 int err;
1180 LIST_HEAD(list);
1181 struct ovl_cache_entry *p;
1182 struct ovl_readdir_data rdd = {
1183 .ctx.actor = ovl_fill_plain,
1184 .ctx.count = INT_MAX,
1185 .list = &list,
1186 };
1187 bool incompat = false;
1188
1189 /*
1190 * The "work/incompat" directory is treated specially - if it is not
1191 * empty, instead of printing a generic error and mounting read-only,
1192 * we will error about incompat features and fail the mount.
1193 *
1194 * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name
1195 * starts with '#'.
1196 */
1197 if (level == 2 &&
1198 !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME))
1199 incompat = true;
1200
1201 err = ovl_dir_read(path, &rdd);
1202 if (err)
1203 goto out;
1204
1205 list_for_each_entry(p, &list, l_node) {
1206 struct dentry *dentry;
1207
1208 if (name_is_dot_dotdot(p->name, p->len)) {
1209 continue;
1210 } else if (incompat) {
1211 pr_err("overlay with incompat feature '%s' cannot be mounted\n",
1212 p->name);
1213 err = -EINVAL;
1214 break;
1215 }
1216 dentry = ovl_lookup_upper_unlocked(ofs, p->name, path->dentry, p->len);
1217 if (IS_ERR(dentry))
1218 continue;
1219 if (dentry->d_inode)
1220 err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt,
1221 dentry, level);
1222 dput(dentry);
1223 if (err)
1224 break;
1225 }
1226 out:
1227 ovl_cache_free(&list);
1228 return err;
1229 }
1230
ovl_workdir_cleanup(struct ovl_fs * ofs,struct dentry * parent,struct vfsmount * mnt,struct dentry * dentry,int level)1231 int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent,
1232 struct vfsmount *mnt, struct dentry *dentry, int level)
1233 {
1234 int err;
1235
1236 if (!d_is_dir(dentry) || level > 1)
1237 return ovl_cleanup(ofs, parent, dentry);
1238
1239 dentry = start_removing_dentry(parent, dentry);
1240 if (IS_ERR(dentry))
1241 return PTR_ERR(dentry);
1242 err = ovl_do_rmdir(ofs, parent->d_inode, dentry);
1243 end_removing(dentry);
1244 if (err) {
1245 struct path path = { .mnt = mnt, .dentry = dentry };
1246
1247 err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1);
1248 if (!err)
1249 err = ovl_cleanup(ofs, parent, dentry);
1250 }
1251
1252 return err;
1253 }
1254
ovl_indexdir_cleanup(struct ovl_fs * ofs)1255 int ovl_indexdir_cleanup(struct ovl_fs *ofs)
1256 {
1257 int err;
1258 struct dentry *indexdir = ofs->workdir;
1259 struct dentry *index = NULL;
1260 struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
1261 LIST_HEAD(list);
1262 struct ovl_cache_entry *p;
1263 struct ovl_readdir_data rdd = {
1264 .ctx.actor = ovl_fill_plain,
1265 .ctx.count = INT_MAX,
1266 .list = &list,
1267 };
1268
1269 err = ovl_dir_read(&path, &rdd);
1270 if (err)
1271 goto out;
1272
1273 list_for_each_entry(p, &list, l_node) {
1274 if (name_is_dot_dotdot(p->name, p->len))
1275 continue;
1276 index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, p->len);
1277 if (IS_ERR(index)) {
1278 err = PTR_ERR(index);
1279 index = NULL;
1280 break;
1281 }
1282 /* Cleanup leftover from index create/cleanup attempt */
1283 if (index->d_name.name[0] == '#') {
1284 err = ovl_workdir_cleanup(ofs, indexdir, path.mnt, index, 1);
1285 if (err)
1286 break;
1287 goto next;
1288 }
1289 err = ovl_verify_index(ofs, index);
1290 if (!err) {
1291 goto next;
1292 } else if (err == -ESTALE) {
1293 /* Cleanup stale index entries */
1294 err = ovl_cleanup(ofs, indexdir, index);
1295 } else if (err != -ENOENT) {
1296 /*
1297 * Abort mount to avoid corrupting the index if
1298 * an incompatible index entry was found or on out
1299 * of memory.
1300 */
1301 break;
1302 } else if (ofs->config.nfs_export) {
1303 /*
1304 * Whiteout orphan index to block future open by
1305 * handle after overlay nlink dropped to zero.
1306 */
1307 err = ovl_cleanup_and_whiteout(ofs, indexdir, index);
1308 } else {
1309 /* Cleanup orphan index entries */
1310 err = ovl_cleanup(ofs, indexdir, index);
1311 }
1312
1313 if (err)
1314 break;
1315
1316 next:
1317 dput(index);
1318 index = NULL;
1319 }
1320 dput(index);
1321 out:
1322 ovl_cache_free(&list);
1323 if (err)
1324 pr_err("failed index dir cleanup (%i)\n", err);
1325 return err;
1326 }
1327