xref: /linux/fs/overlayfs/readdir.c (revision 28fb80f0891c01dc706a5f6cada94c9cf0f2b1c2)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *
4  * Copyright (C) 2011 Novell Inc.
5  */
6 
7 #include <linux/fs.h>
8 #include <linux/slab.h>
9 #include <linux/namei.h>
10 #include <linux/file.h>
11 #include <linux/xattr.h>
12 #include <linux/rbtree.h>
13 #include <linux/security.h>
14 #include <linux/cred.h>
15 #include <linux/ratelimit.h>
16 #include <linux/overflow.h>
17 #include "overlayfs.h"
18 
19 struct ovl_cache_entry {
20 	unsigned int len;
21 	unsigned int type;
22 	u64 real_ino;
23 	u64 ino;
24 	struct list_head l_node;
25 	struct rb_node node;
26 	struct ovl_cache_entry *next_maybe_whiteout;
27 	bool is_upper;
28 	bool is_whiteout;
29 	bool check_xwhiteout;
30 	char name[];
31 };
32 
33 struct ovl_dir_cache {
34 	long refcount;
35 	u64 version;
36 	struct list_head entries;
37 	struct rb_root root;
38 };
39 
40 struct ovl_readdir_data {
41 	struct dir_context ctx;
42 	struct dentry *dentry;
43 	bool is_lowest;
44 	struct rb_root *root;
45 	struct list_head *list;
46 	struct list_head middle;
47 	struct ovl_cache_entry *first_maybe_whiteout;
48 	int count;
49 	int err;
50 	bool is_upper;
51 	bool d_type_supported;
52 	bool in_xwhiteouts_dir;
53 };
54 
55 struct ovl_dir_file {
56 	bool is_real;
57 	bool is_upper;
58 	struct ovl_dir_cache *cache;
59 	struct list_head *cursor;
60 	struct file *realfile;
61 	struct file *upperfile;
62 };
63 
ovl_cache_entry_from_node(struct rb_node * n)64 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
65 {
66 	return rb_entry(n, struct ovl_cache_entry, node);
67 }
68 
ovl_cache_entry_find_link(const char * name,int len,struct rb_node *** link,struct rb_node ** parent)69 static bool ovl_cache_entry_find_link(const char *name, int len,
70 				      struct rb_node ***link,
71 				      struct rb_node **parent)
72 {
73 	bool found = false;
74 	struct rb_node **newp = *link;
75 
76 	while (!found && *newp) {
77 		int cmp;
78 		struct ovl_cache_entry *tmp;
79 
80 		*parent = *newp;
81 		tmp = ovl_cache_entry_from_node(*newp);
82 		cmp = strncmp(name, tmp->name, len);
83 		if (cmp > 0)
84 			newp = &tmp->node.rb_right;
85 		else if (cmp < 0 || len < tmp->len)
86 			newp = &tmp->node.rb_left;
87 		else
88 			found = true;
89 	}
90 	*link = newp;
91 
92 	return found;
93 }
94 
ovl_cache_entry_find(struct rb_root * root,const char * name,int len)95 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
96 						    const char *name, int len)
97 {
98 	struct rb_node *node = root->rb_node;
99 	int cmp;
100 
101 	while (node) {
102 		struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
103 
104 		cmp = strncmp(name, p->name, len);
105 		if (cmp > 0)
106 			node = p->node.rb_right;
107 		else if (cmp < 0 || len < p->len)
108 			node = p->node.rb_left;
109 		else
110 			return p;
111 	}
112 
113 	return NULL;
114 }
115 
ovl_calc_d_ino(struct ovl_readdir_data * rdd,struct ovl_cache_entry * p)116 static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
117 			   struct ovl_cache_entry *p)
118 {
119 	/* Don't care if not doing ovl_iter() */
120 	if (!rdd->dentry)
121 		return false;
122 
123 	/* Always recalc d_ino when remapping lower inode numbers */
124 	if (ovl_xino_bits(OVL_FS(rdd->dentry->d_sb)))
125 		return true;
126 
127 	/* Always recalc d_ino for parent */
128 	if (strcmp(p->name, "..") == 0)
129 		return true;
130 
131 	/* If this is lower, then native d_ino will do */
132 	if (!rdd->is_upper)
133 		return false;
134 
135 	/*
136 	 * Recalc d_ino for '.' and for all entries if dir is impure (contains
137 	 * copied up entries)
138 	 */
139 	if ((p->name[0] == '.' && p->len == 1) ||
140 	    ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry)))
141 		return true;
142 
143 	return false;
144 }
145 
ovl_cache_entry_new(struct ovl_readdir_data * rdd,const char * name,int len,u64 ino,unsigned int d_type)146 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
147 						   const char *name, int len,
148 						   u64 ino, unsigned int d_type)
149 {
150 	struct ovl_cache_entry *p;
151 
152 	p = kmalloc(struct_size(p, name, len + 1), GFP_KERNEL);
153 	if (!p)
154 		return NULL;
155 
156 	memcpy(p->name, name, len);
157 	p->name[len] = '\0';
158 	p->len = len;
159 	p->type = d_type;
160 	p->real_ino = ino;
161 	p->ino = ino;
162 	/* Defer setting d_ino for upper entry to ovl_iterate() */
163 	if (ovl_calc_d_ino(rdd, p))
164 		p->ino = 0;
165 	p->is_upper = rdd->is_upper;
166 	p->is_whiteout = false;
167 	/* Defer check for overlay.whiteout to ovl_iterate() */
168 	p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
169 
170 	if (d_type == DT_CHR) {
171 		p->next_maybe_whiteout = rdd->first_maybe_whiteout;
172 		rdd->first_maybe_whiteout = p;
173 	}
174 	return p;
175 }
176 
ovl_cache_entry_add_rb(struct ovl_readdir_data * rdd,const char * name,int len,u64 ino,unsigned int d_type)177 static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
178 				  const char *name, int len, u64 ino,
179 				  unsigned int d_type)
180 {
181 	struct rb_node **newp = &rdd->root->rb_node;
182 	struct rb_node *parent = NULL;
183 	struct ovl_cache_entry *p;
184 
185 	if (ovl_cache_entry_find_link(name, len, &newp, &parent))
186 		return true;
187 
188 	p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
189 	if (p == NULL) {
190 		rdd->err = -ENOMEM;
191 		return false;
192 	}
193 
194 	list_add_tail(&p->l_node, rdd->list);
195 	rb_link_node(&p->node, parent, newp);
196 	rb_insert_color(&p->node, rdd->root);
197 
198 	return true;
199 }
200 
ovl_fill_lowest(struct ovl_readdir_data * rdd,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)201 static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
202 			   const char *name, int namelen,
203 			   loff_t offset, u64 ino, unsigned int d_type)
204 {
205 	struct ovl_cache_entry *p;
206 
207 	p = ovl_cache_entry_find(rdd->root, name, namelen);
208 	if (p) {
209 		list_move_tail(&p->l_node, &rdd->middle);
210 	} else {
211 		p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
212 		if (p == NULL)
213 			rdd->err = -ENOMEM;
214 		else
215 			list_add_tail(&p->l_node, &rdd->middle);
216 	}
217 
218 	return rdd->err == 0;
219 }
220 
ovl_cache_free(struct list_head * list)221 void ovl_cache_free(struct list_head *list)
222 {
223 	struct ovl_cache_entry *p;
224 	struct ovl_cache_entry *n;
225 
226 	list_for_each_entry_safe(p, n, list, l_node)
227 		kfree(p);
228 
229 	INIT_LIST_HEAD(list);
230 }
231 
ovl_dir_cache_free(struct inode * inode)232 void ovl_dir_cache_free(struct inode *inode)
233 {
234 	struct ovl_dir_cache *cache = ovl_dir_cache(inode);
235 
236 	if (cache) {
237 		ovl_cache_free(&cache->entries);
238 		kfree(cache);
239 	}
240 }
241 
ovl_cache_put(struct ovl_dir_file * od,struct inode * inode)242 static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode)
243 {
244 	struct ovl_dir_cache *cache = od->cache;
245 
246 	WARN_ON(cache->refcount <= 0);
247 	cache->refcount--;
248 	if (!cache->refcount) {
249 		if (ovl_dir_cache(inode) == cache)
250 			ovl_set_dir_cache(inode, NULL);
251 
252 		ovl_cache_free(&cache->entries);
253 		kfree(cache);
254 	}
255 }
256 
ovl_fill_merge(struct dir_context * ctx,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)257 static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
258 			  int namelen, loff_t offset, u64 ino,
259 			  unsigned int d_type)
260 {
261 	struct ovl_readdir_data *rdd =
262 		container_of(ctx, struct ovl_readdir_data, ctx);
263 
264 	rdd->count++;
265 	if (!rdd->is_lowest)
266 		return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
267 	else
268 		return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
269 }
270 
ovl_check_whiteouts(const struct path * path,struct ovl_readdir_data * rdd)271 static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
272 {
273 	int err;
274 	struct dentry *dentry, *dir = path->dentry;
275 	const struct cred *old_cred;
276 
277 	old_cred = ovl_override_creds(rdd->dentry->d_sb);
278 
279 	err = down_write_killable(&dir->d_inode->i_rwsem);
280 	if (!err) {
281 		while (rdd->first_maybe_whiteout) {
282 			struct ovl_cache_entry *p =
283 				rdd->first_maybe_whiteout;
284 			rdd->first_maybe_whiteout = p->next_maybe_whiteout;
285 			dentry = lookup_one(mnt_idmap(path->mnt),
286 					    &QSTR_LEN(p->name, p->len), dir);
287 			if (!IS_ERR(dentry)) {
288 				p->is_whiteout = ovl_is_whiteout(dentry);
289 				dput(dentry);
290 			}
291 		}
292 		inode_unlock(dir->d_inode);
293 	}
294 	ovl_revert_creds(old_cred);
295 
296 	return err;
297 }
298 
ovl_dir_read(const struct path * realpath,struct ovl_readdir_data * rdd)299 static inline int ovl_dir_read(const struct path *realpath,
300 			       struct ovl_readdir_data *rdd)
301 {
302 	struct file *realfile;
303 	int err;
304 
305 	realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE);
306 	if (IS_ERR(realfile))
307 		return PTR_ERR(realfile);
308 
309 	rdd->first_maybe_whiteout = NULL;
310 	rdd->ctx.pos = 0;
311 	do {
312 		rdd->count = 0;
313 		rdd->err = 0;
314 		err = iterate_dir(realfile, &rdd->ctx);
315 		if (err >= 0)
316 			err = rdd->err;
317 	} while (!err && rdd->count);
318 
319 	if (!err && rdd->first_maybe_whiteout && rdd->dentry)
320 		err = ovl_check_whiteouts(realpath, rdd);
321 
322 	fput(realfile);
323 
324 	return err;
325 }
326 
ovl_dir_reset(struct file * file)327 static void ovl_dir_reset(struct file *file)
328 {
329 	struct ovl_dir_file *od = file->private_data;
330 	struct ovl_dir_cache *cache = od->cache;
331 	struct inode *inode = file_inode(file);
332 	bool is_real;
333 
334 	if (cache && ovl_inode_version_get(inode) != cache->version) {
335 		ovl_cache_put(od, inode);
336 		od->cache = NULL;
337 		od->cursor = NULL;
338 	}
339 	is_real = ovl_dir_is_real(inode);
340 	if (od->is_real != is_real) {
341 		/* is_real can only become false when dir is copied up */
342 		if (WARN_ON(is_real))
343 			return;
344 		od->is_real = false;
345 	}
346 }
347 
ovl_dir_read_merged(struct dentry * dentry,struct list_head * list,struct rb_root * root)348 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
349 	struct rb_root *root)
350 {
351 	int err;
352 	struct path realpath;
353 	struct ovl_readdir_data rdd = {
354 		.ctx.actor = ovl_fill_merge,
355 		.ctx.count = INT_MAX,
356 		.dentry = dentry,
357 		.list = list,
358 		.root = root,
359 		.is_lowest = false,
360 	};
361 	int idx, next;
362 	const struct ovl_layer *layer;
363 
364 	for (idx = 0; idx != -1; idx = next) {
365 		next = ovl_path_next(idx, dentry, &realpath, &layer);
366 		rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
367 		rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
368 					ovl_dentry_has_xwhiteouts(dentry);
369 
370 		if (next != -1) {
371 			err = ovl_dir_read(&realpath, &rdd);
372 			if (err)
373 				break;
374 		} else {
375 			/*
376 			 * Insert lowest layer entries before upper ones, this
377 			 * allows offsets to be reasonably constant
378 			 */
379 			list_add(&rdd.middle, rdd.list);
380 			rdd.is_lowest = true;
381 			err = ovl_dir_read(&realpath, &rdd);
382 			list_del(&rdd.middle);
383 		}
384 	}
385 	return err;
386 }
387 
ovl_seek_cursor(struct ovl_dir_file * od,loff_t pos)388 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
389 {
390 	struct list_head *p;
391 	loff_t off = 0;
392 
393 	list_for_each(p, &od->cache->entries) {
394 		if (off >= pos)
395 			break;
396 		off++;
397 	}
398 	/* Cursor is safe since the cache is stable */
399 	od->cursor = p;
400 }
401 
ovl_cache_get(struct dentry * dentry)402 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
403 {
404 	int res;
405 	struct ovl_dir_cache *cache;
406 	struct inode *inode = d_inode(dentry);
407 
408 	cache = ovl_dir_cache(inode);
409 	if (cache && ovl_inode_version_get(inode) == cache->version) {
410 		WARN_ON(!cache->refcount);
411 		cache->refcount++;
412 		return cache;
413 	}
414 	ovl_set_dir_cache(d_inode(dentry), NULL);
415 
416 	cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
417 	if (!cache)
418 		return ERR_PTR(-ENOMEM);
419 
420 	cache->refcount = 1;
421 	INIT_LIST_HEAD(&cache->entries);
422 	cache->root = RB_ROOT;
423 
424 	res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root);
425 	if (res) {
426 		ovl_cache_free(&cache->entries);
427 		kfree(cache);
428 		return ERR_PTR(res);
429 	}
430 
431 	cache->version = ovl_inode_version_get(inode);
432 	ovl_set_dir_cache(inode, cache);
433 
434 	return cache;
435 }
436 
437 /* Map inode number to lower fs unique range */
ovl_remap_lower_ino(u64 ino,int xinobits,int fsid,const char * name,int namelen,bool warn)438 static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
439 			       const char *name, int namelen, bool warn)
440 {
441 	unsigned int xinoshift = 64 - xinobits;
442 
443 	if (unlikely(ino >> xinoshift)) {
444 		if (warn) {
445 			pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
446 					    namelen, name, ino, xinobits);
447 		}
448 		return ino;
449 	}
450 
451 	/*
452 	 * The lowest xinobit is reserved for mapping the non-peresistent inode
453 	 * numbers range, but this range is only exposed via st_ino, not here.
454 	 */
455 	return ino | ((u64)fsid) << (xinoshift + 1);
456 }
457 
458 /*
459  * Set d_ino for upper entries if needed. Non-upper entries should always report
460  * the uppermost real inode ino and should not call this function.
461  *
462  * When not all layer are on same fs, report real ino also for upper.
463  *
464  * When all layers are on the same fs, and upper has a reference to
465  * copy up origin, call vfs_getattr() on the overlay entry to make
466  * sure that d_ino will be consistent with st_ino from stat(2).
467  *
468  * Also checks the overlay.whiteout xattr by doing a full lookup which will return
469  * negative in this case.
470  */
ovl_cache_update(const struct path * path,struct ovl_cache_entry * p,bool update_ino)471 static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino)
472 
473 {
474 	struct dentry *dir = path->dentry;
475 	struct ovl_fs *ofs = OVL_FS(dir->d_sb);
476 	struct dentry *this = NULL;
477 	enum ovl_path_type type;
478 	u64 ino = p->real_ino;
479 	int xinobits = ovl_xino_bits(ofs);
480 	int err = 0;
481 
482 	if (!ovl_same_dev(ofs) && !p->check_xwhiteout)
483 		goto out;
484 
485 	if (p->name[0] == '.') {
486 		if (p->len == 1) {
487 			this = dget(dir);
488 			goto get;
489 		}
490 		if (p->len == 2 && p->name[1] == '.') {
491 			/* we shall not be moved */
492 			this = dget(dir->d_parent);
493 			goto get;
494 		}
495 	}
496 	/* This checks also for xwhiteouts */
497 	this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir);
498 	if (IS_ERR_OR_NULL(this) || !this->d_inode) {
499 		/* Mark a stale entry */
500 		p->is_whiteout = true;
501 		if (IS_ERR(this)) {
502 			err = PTR_ERR(this);
503 			this = NULL;
504 			goto fail;
505 		}
506 		goto out;
507 	}
508 
509 get:
510 	if (!ovl_same_dev(ofs) || !update_ino)
511 		goto out;
512 
513 	type = ovl_path_type(this);
514 	if (OVL_TYPE_ORIGIN(type)) {
515 		struct kstat stat;
516 		struct path statpath = *path;
517 
518 		statpath.dentry = this;
519 		err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
520 		if (err)
521 			goto fail;
522 
523 		/*
524 		 * Directory inode is always on overlay st_dev.
525 		 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
526 		 * of xino bits overflow.
527 		 */
528 		WARN_ON_ONCE(S_ISDIR(stat.mode) &&
529 			     dir->d_sb->s_dev != stat.dev);
530 		ino = stat.ino;
531 	} else if (xinobits && !OVL_TYPE_UPPER(type)) {
532 		ino = ovl_remap_lower_ino(ino, xinobits,
533 					  ovl_layer_lower(this)->fsid,
534 					  p->name, p->len,
535 					  ovl_xino_warn(ofs));
536 	}
537 
538 out:
539 	p->ino = ino;
540 	dput(this);
541 	return err;
542 
543 fail:
544 	pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n",
545 			    p->name, err);
546 	goto out;
547 }
548 
ovl_fill_plain(struct dir_context * ctx,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)549 static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
550 			  int namelen, loff_t offset, u64 ino,
551 			  unsigned int d_type)
552 {
553 	struct ovl_cache_entry *p;
554 	struct ovl_readdir_data *rdd =
555 		container_of(ctx, struct ovl_readdir_data, ctx);
556 
557 	rdd->count++;
558 	p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
559 	if (p == NULL) {
560 		rdd->err = -ENOMEM;
561 		return false;
562 	}
563 	list_add_tail(&p->l_node, rdd->list);
564 
565 	return true;
566 }
567 
ovl_dir_read_impure(const struct path * path,struct list_head * list,struct rb_root * root)568 static int ovl_dir_read_impure(const struct path *path,  struct list_head *list,
569 			       struct rb_root *root)
570 {
571 	int err;
572 	struct path realpath;
573 	struct ovl_cache_entry *p, *n;
574 	struct ovl_readdir_data rdd = {
575 		.ctx.actor = ovl_fill_plain,
576 		.ctx.count = INT_MAX,
577 		.list = list,
578 		.root = root,
579 	};
580 
581 	INIT_LIST_HEAD(list);
582 	*root = RB_ROOT;
583 	ovl_path_upper(path->dentry, &realpath);
584 
585 	err = ovl_dir_read(&realpath, &rdd);
586 	if (err)
587 		return err;
588 
589 	list_for_each_entry_safe(p, n, list, l_node) {
590 		if (strcmp(p->name, ".") != 0 &&
591 		    strcmp(p->name, "..") != 0) {
592 			err = ovl_cache_update(path, p, true);
593 			if (err)
594 				return err;
595 		}
596 		if (p->ino == p->real_ino) {
597 			list_del(&p->l_node);
598 			kfree(p);
599 		} else {
600 			struct rb_node **newp = &root->rb_node;
601 			struct rb_node *parent = NULL;
602 
603 			if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
604 							      &newp, &parent)))
605 				return -EIO;
606 
607 			rb_link_node(&p->node, parent, newp);
608 			rb_insert_color(&p->node, root);
609 		}
610 	}
611 	return 0;
612 }
613 
ovl_cache_get_impure(const struct path * path)614 static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path)
615 {
616 	int res;
617 	struct dentry *dentry = path->dentry;
618 	struct inode *inode = d_inode(dentry);
619 	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
620 	struct ovl_dir_cache *cache;
621 
622 	cache = ovl_dir_cache(inode);
623 	if (cache && ovl_inode_version_get(inode) == cache->version)
624 		return cache;
625 
626 	/* Impure cache is not refcounted, free it here */
627 	ovl_dir_cache_free(inode);
628 	ovl_set_dir_cache(inode, NULL);
629 
630 	cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
631 	if (!cache)
632 		return ERR_PTR(-ENOMEM);
633 
634 	res = ovl_dir_read_impure(path, &cache->entries, &cache->root);
635 	if (res) {
636 		ovl_cache_free(&cache->entries);
637 		kfree(cache);
638 		return ERR_PTR(res);
639 	}
640 	if (list_empty(&cache->entries)) {
641 		/*
642 		 * A good opportunity to get rid of an unneeded "impure" flag.
643 		 * Removing the "impure" xattr is best effort.
644 		 */
645 		if (!ovl_want_write(dentry)) {
646 			ovl_removexattr(ofs, ovl_dentry_upper(dentry),
647 					OVL_XATTR_IMPURE);
648 			ovl_drop_write(dentry);
649 		}
650 		ovl_clear_flag(OVL_IMPURE, inode);
651 		kfree(cache);
652 		return NULL;
653 	}
654 
655 	cache->version = ovl_inode_version_get(inode);
656 	ovl_set_dir_cache(inode, cache);
657 
658 	return cache;
659 }
660 
661 struct ovl_readdir_translate {
662 	struct dir_context *orig_ctx;
663 	struct ovl_dir_cache *cache;
664 	struct dir_context ctx;
665 	u64 parent_ino;
666 	int fsid;
667 	int xinobits;
668 	bool xinowarn;
669 };
670 
ovl_fill_real(struct dir_context * ctx,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)671 static bool ovl_fill_real(struct dir_context *ctx, const char *name,
672 			   int namelen, loff_t offset, u64 ino,
673 			   unsigned int d_type)
674 {
675 	struct ovl_readdir_translate *rdt =
676 		container_of(ctx, struct ovl_readdir_translate, ctx);
677 	struct dir_context *orig_ctx = rdt->orig_ctx;
678 	bool res;
679 
680 	if (rdt->parent_ino && strcmp(name, "..") == 0) {
681 		ino = rdt->parent_ino;
682 	} else if (rdt->cache) {
683 		struct ovl_cache_entry *p;
684 
685 		p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
686 		if (p)
687 			ino = p->ino;
688 	} else if (rdt->xinobits) {
689 		ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
690 					  name, namelen, rdt->xinowarn);
691 	}
692 
693 	res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
694 	ctx->count = orig_ctx->count;
695 
696 	return res;
697 }
698 
ovl_is_impure_dir(struct file * file)699 static bool ovl_is_impure_dir(struct file *file)
700 {
701 	struct ovl_dir_file *od = file->private_data;
702 	struct inode *dir = file_inode(file);
703 
704 	/*
705 	 * Only upper dir can be impure, but if we are in the middle of
706 	 * iterating a lower real dir, dir could be copied up and marked
707 	 * impure. We only want the impure cache if we started iterating
708 	 * a real upper dir to begin with.
709 	 */
710 	return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
711 
712 }
713 
ovl_iterate_real(struct file * file,struct dir_context * ctx)714 static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
715 {
716 	int err;
717 	struct ovl_dir_file *od = file->private_data;
718 	struct dentry *dir = file->f_path.dentry;
719 	struct ovl_fs *ofs = OVL_FS(dir->d_sb);
720 	const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
721 	struct ovl_readdir_translate rdt = {
722 		.ctx.actor = ovl_fill_real,
723 		.ctx.count = ctx->count,
724 		.orig_ctx = ctx,
725 		.xinobits = ovl_xino_bits(ofs),
726 		.xinowarn = ovl_xino_warn(ofs),
727 	};
728 
729 	if (rdt.xinobits && lower_layer)
730 		rdt.fsid = lower_layer->fsid;
731 
732 	if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
733 		struct kstat stat;
734 		struct path statpath = file->f_path;
735 
736 		statpath.dentry = dir->d_parent;
737 		err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
738 		if (err)
739 			return err;
740 
741 		WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
742 		rdt.parent_ino = stat.ino;
743 	}
744 
745 	if (ovl_is_impure_dir(file)) {
746 		rdt.cache = ovl_cache_get_impure(&file->f_path);
747 		if (IS_ERR(rdt.cache))
748 			return PTR_ERR(rdt.cache);
749 	}
750 
751 	err = iterate_dir(od->realfile, &rdt.ctx);
752 	ctx->pos = rdt.ctx.pos;
753 
754 	return err;
755 }
756 
757 
ovl_iterate(struct file * file,struct dir_context * ctx)758 static int ovl_iterate(struct file *file, struct dir_context *ctx)
759 {
760 	struct ovl_dir_file *od = file->private_data;
761 	struct dentry *dentry = file->f_path.dentry;
762 	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
763 	struct ovl_cache_entry *p;
764 	const struct cred *old_cred;
765 	int err;
766 
767 	old_cred = ovl_override_creds(dentry->d_sb);
768 	if (!ctx->pos)
769 		ovl_dir_reset(file);
770 
771 	if (od->is_real) {
772 		/*
773 		 * If parent is merge, then need to adjust d_ino for '..', if
774 		 * dir is impure then need to adjust d_ino for copied up
775 		 * entries.
776 		 */
777 		if (ovl_xino_bits(ofs) ||
778 		    (ovl_same_fs(ofs) &&
779 		     (ovl_is_impure_dir(file) ||
780 		      OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
781 			err = ovl_iterate_real(file, ctx);
782 		} else {
783 			err = iterate_dir(od->realfile, ctx);
784 		}
785 		goto out;
786 	}
787 
788 	if (!od->cache) {
789 		struct ovl_dir_cache *cache;
790 
791 		cache = ovl_cache_get(dentry);
792 		err = PTR_ERR(cache);
793 		if (IS_ERR(cache))
794 			goto out;
795 
796 		od->cache = cache;
797 		ovl_seek_cursor(od, ctx->pos);
798 	}
799 
800 	while (od->cursor != &od->cache->entries) {
801 		p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
802 		if (!p->is_whiteout) {
803 			if (!p->ino || p->check_xwhiteout) {
804 				err = ovl_cache_update(&file->f_path, p, !p->ino);
805 				if (err)
806 					goto out;
807 			}
808 		}
809 		/* ovl_cache_update() sets is_whiteout on stale entry */
810 		if (!p->is_whiteout) {
811 			if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
812 				break;
813 		}
814 		od->cursor = p->l_node.next;
815 		ctx->pos++;
816 	}
817 	err = 0;
818 out:
819 	ovl_revert_creds(old_cred);
820 	return err;
821 }
822 
ovl_dir_llseek(struct file * file,loff_t offset,int origin)823 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
824 {
825 	loff_t res;
826 	struct ovl_dir_file *od = file->private_data;
827 
828 	inode_lock(file_inode(file));
829 	if (!file->f_pos)
830 		ovl_dir_reset(file);
831 
832 	if (od->is_real) {
833 		res = vfs_llseek(od->realfile, offset, origin);
834 		file->f_pos = od->realfile->f_pos;
835 	} else {
836 		res = -EINVAL;
837 
838 		switch (origin) {
839 		case SEEK_CUR:
840 			offset += file->f_pos;
841 			break;
842 		case SEEK_SET:
843 			break;
844 		default:
845 			goto out_unlock;
846 		}
847 		if (offset < 0)
848 			goto out_unlock;
849 
850 		if (offset != file->f_pos) {
851 			file->f_pos = offset;
852 			if (od->cache)
853 				ovl_seek_cursor(od, offset);
854 		}
855 		res = offset;
856 	}
857 out_unlock:
858 	inode_unlock(file_inode(file));
859 
860 	return res;
861 }
862 
ovl_dir_open_realfile(const struct file * file,const struct path * realpath)863 static struct file *ovl_dir_open_realfile(const struct file *file,
864 					  const struct path *realpath)
865 {
866 	struct file *res;
867 	const struct cred *old_cred;
868 
869 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
870 	res = ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
871 	ovl_revert_creds(old_cred);
872 
873 	return res;
874 }
875 
876 /*
877  * Like ovl_real_fdget(), returns upperfile if dir was copied up since open.
878  * Unlike ovl_real_fdget(), this caches upperfile in file->private_data.
879  *
880  * TODO: use same abstract type for file->private_data of dir and file so
881  * upperfile could also be cached for files as well.
882  */
ovl_dir_real_file(const struct file * file,bool want_upper)883 struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
884 {
885 
886 	struct ovl_dir_file *od = file->private_data;
887 	struct dentry *dentry = file->f_path.dentry;
888 	struct file *old, *realfile = od->realfile;
889 
890 	if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
891 		return want_upper ? NULL : realfile;
892 
893 	/*
894 	 * Need to check if we started out being a lower dir, but got copied up
895 	 */
896 	if (!od->is_upper) {
897 		realfile = READ_ONCE(od->upperfile);
898 		if (!realfile) {
899 			struct path upperpath;
900 
901 			ovl_path_upper(dentry, &upperpath);
902 			realfile = ovl_dir_open_realfile(file, &upperpath);
903 			if (IS_ERR(realfile))
904 				return realfile;
905 
906 			old = cmpxchg_release(&od->upperfile, NULL, realfile);
907 			if (old) {
908 				fput(realfile);
909 				realfile = old;
910 			}
911 		}
912 	}
913 
914 	return realfile;
915 }
916 
ovl_dir_fsync(struct file * file,loff_t start,loff_t end,int datasync)917 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
918 			 int datasync)
919 {
920 	struct file *realfile;
921 	int err;
922 
923 	err = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
924 	if (err <= 0)
925 		return err;
926 
927 	realfile = ovl_dir_real_file(file, true);
928 	err = PTR_ERR_OR_ZERO(realfile);
929 
930 	/* Nothing to sync for lower */
931 	if (!realfile || err)
932 		return err;
933 
934 	return vfs_fsync_range(realfile, start, end, datasync);
935 }
936 
ovl_dir_release(struct inode * inode,struct file * file)937 static int ovl_dir_release(struct inode *inode, struct file *file)
938 {
939 	struct ovl_dir_file *od = file->private_data;
940 
941 	if (od->cache) {
942 		inode_lock(inode);
943 		ovl_cache_put(od, inode);
944 		inode_unlock(inode);
945 	}
946 	fput(od->realfile);
947 	if (od->upperfile)
948 		fput(od->upperfile);
949 	kfree(od);
950 
951 	return 0;
952 }
953 
ovl_dir_open(struct inode * inode,struct file * file)954 static int ovl_dir_open(struct inode *inode, struct file *file)
955 {
956 	struct path realpath;
957 	struct file *realfile;
958 	struct ovl_dir_file *od;
959 	enum ovl_path_type type;
960 
961 	od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
962 	if (!od)
963 		return -ENOMEM;
964 
965 	type = ovl_path_real(file->f_path.dentry, &realpath);
966 	realfile = ovl_dir_open_realfile(file, &realpath);
967 	if (IS_ERR(realfile)) {
968 		kfree(od);
969 		return PTR_ERR(realfile);
970 	}
971 	od->realfile = realfile;
972 	od->is_real = ovl_dir_is_real(inode);
973 	od->is_upper = OVL_TYPE_UPPER(type);
974 	file->private_data = od;
975 
976 	return 0;
977 }
978 
979 WRAP_DIR_ITER(ovl_iterate) // FIXME!
980 const struct file_operations ovl_dir_operations = {
981 	.read		= generic_read_dir,
982 	.open		= ovl_dir_open,
983 	.iterate_shared	= shared_ovl_iterate,
984 	.llseek		= ovl_dir_llseek,
985 	.fsync		= ovl_dir_fsync,
986 	.release	= ovl_dir_release,
987 };
988 
ovl_check_empty_dir(struct dentry * dentry,struct list_head * list)989 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
990 {
991 	int err;
992 	struct ovl_cache_entry *p, *n;
993 	struct rb_root root = RB_ROOT;
994 	const struct cred *old_cred;
995 
996 	old_cred = ovl_override_creds(dentry->d_sb);
997 	err = ovl_dir_read_merged(dentry, list, &root);
998 	ovl_revert_creds(old_cred);
999 	if (err)
1000 		return err;
1001 
1002 	err = 0;
1003 
1004 	list_for_each_entry_safe(p, n, list, l_node) {
1005 		/*
1006 		 * Select whiteouts in upperdir, they should
1007 		 * be cleared when deleting this directory.
1008 		 */
1009 		if (p->is_whiteout) {
1010 			if (p->is_upper)
1011 				continue;
1012 			goto del_entry;
1013 		}
1014 
1015 		if (p->name[0] == '.') {
1016 			if (p->len == 1)
1017 				goto del_entry;
1018 			if (p->len == 2 && p->name[1] == '.')
1019 				goto del_entry;
1020 		}
1021 		err = -ENOTEMPTY;
1022 		break;
1023 
1024 del_entry:
1025 		list_del(&p->l_node);
1026 		kfree(p);
1027 	}
1028 
1029 	return err;
1030 }
1031 
ovl_cleanup_whiteouts(struct ovl_fs * ofs,struct dentry * upper,struct list_head * list)1032 void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
1033 			   struct list_head *list)
1034 {
1035 	struct ovl_cache_entry *p;
1036 
1037 	inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
1038 	list_for_each_entry(p, list, l_node) {
1039 		struct dentry *dentry;
1040 
1041 		if (WARN_ON(!p->is_whiteout || !p->is_upper))
1042 			continue;
1043 
1044 		dentry = ovl_lookup_upper(ofs, p->name, upper, p->len);
1045 		if (IS_ERR(dentry)) {
1046 			pr_err("lookup '%s/%.*s' failed (%i)\n",
1047 			       upper->d_name.name, p->len, p->name,
1048 			       (int) PTR_ERR(dentry));
1049 			continue;
1050 		}
1051 		if (dentry->d_inode)
1052 			ovl_cleanup(ofs, upper->d_inode, dentry);
1053 		dput(dentry);
1054 	}
1055 	inode_unlock(upper->d_inode);
1056 }
1057 
ovl_check_d_type(struct dir_context * ctx,const char * name,int namelen,loff_t offset,u64 ino,unsigned int d_type)1058 static bool ovl_check_d_type(struct dir_context *ctx, const char *name,
1059 			  int namelen, loff_t offset, u64 ino,
1060 			  unsigned int d_type)
1061 {
1062 	struct ovl_readdir_data *rdd =
1063 		container_of(ctx, struct ovl_readdir_data, ctx);
1064 
1065 	/* Even if d_type is not supported, DT_DIR is returned for . and .. */
1066 	if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
1067 		return true;
1068 
1069 	if (d_type != DT_UNKNOWN)
1070 		rdd->d_type_supported = true;
1071 
1072 	return true;
1073 }
1074 
1075 /*
1076  * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
1077  * if error is encountered.
1078  */
ovl_check_d_type_supported(const struct path * realpath)1079 int ovl_check_d_type_supported(const struct path *realpath)
1080 {
1081 	int err;
1082 	struct ovl_readdir_data rdd = {
1083 		.ctx.actor = ovl_check_d_type,
1084 		.ctx.count = INT_MAX,
1085 		.d_type_supported = false,
1086 	};
1087 
1088 	err = ovl_dir_read(realpath, &rdd);
1089 	if (err)
1090 		return err;
1091 
1092 	return rdd.d_type_supported;
1093 }
1094 
1095 #define OVL_INCOMPATDIR_NAME "incompat"
1096 
ovl_workdir_cleanup_recurse(struct ovl_fs * ofs,const struct path * path,int level)1097 static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path,
1098 				       int level)
1099 {
1100 	int err;
1101 	struct inode *dir = path->dentry->d_inode;
1102 	LIST_HEAD(list);
1103 	struct ovl_cache_entry *p;
1104 	struct ovl_readdir_data rdd = {
1105 		.ctx.actor = ovl_fill_plain,
1106 		.ctx.count = INT_MAX,
1107 		.list = &list,
1108 	};
1109 	bool incompat = false;
1110 
1111 	/*
1112 	 * The "work/incompat" directory is treated specially - if it is not
1113 	 * empty, instead of printing a generic error and mounting read-only,
1114 	 * we will error about incompat features and fail the mount.
1115 	 *
1116 	 * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name
1117 	 * starts with '#'.
1118 	 */
1119 	if (level == 2 &&
1120 	    !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME))
1121 		incompat = true;
1122 
1123 	err = ovl_dir_read(path, &rdd);
1124 	if (err)
1125 		goto out;
1126 
1127 	inode_lock_nested(dir, I_MUTEX_PARENT);
1128 	list_for_each_entry(p, &list, l_node) {
1129 		struct dentry *dentry;
1130 
1131 		if (p->name[0] == '.') {
1132 			if (p->len == 1)
1133 				continue;
1134 			if (p->len == 2 && p->name[1] == '.')
1135 				continue;
1136 		} else if (incompat) {
1137 			pr_err("overlay with incompat feature '%s' cannot be mounted\n",
1138 				p->name);
1139 			err = -EINVAL;
1140 			break;
1141 		}
1142 		dentry = ovl_lookup_upper(ofs, p->name, path->dentry, p->len);
1143 		if (IS_ERR(dentry))
1144 			continue;
1145 		if (dentry->d_inode)
1146 			err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level);
1147 		dput(dentry);
1148 		if (err)
1149 			break;
1150 	}
1151 	inode_unlock(dir);
1152 out:
1153 	ovl_cache_free(&list);
1154 	return err;
1155 }
1156 
ovl_workdir_cleanup(struct ovl_fs * ofs,struct inode * dir,struct vfsmount * mnt,struct dentry * dentry,int level)1157 int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir,
1158 			struct vfsmount *mnt, struct dentry *dentry, int level)
1159 {
1160 	int err;
1161 
1162 	if (!d_is_dir(dentry) || level > 1) {
1163 		return ovl_cleanup(ofs, dir, dentry);
1164 	}
1165 
1166 	err = ovl_do_rmdir(ofs, dir, dentry);
1167 	if (err) {
1168 		struct path path = { .mnt = mnt, .dentry = dentry };
1169 
1170 		inode_unlock(dir);
1171 		err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1);
1172 		inode_lock_nested(dir, I_MUTEX_PARENT);
1173 		if (!err)
1174 			err = ovl_cleanup(ofs, dir, dentry);
1175 	}
1176 
1177 	return err;
1178 }
1179 
ovl_indexdir_cleanup(struct ovl_fs * ofs)1180 int ovl_indexdir_cleanup(struct ovl_fs *ofs)
1181 {
1182 	int err;
1183 	struct dentry *indexdir = ofs->workdir;
1184 	struct dentry *index = NULL;
1185 	struct inode *dir = indexdir->d_inode;
1186 	struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
1187 	LIST_HEAD(list);
1188 	struct ovl_cache_entry *p;
1189 	struct ovl_readdir_data rdd = {
1190 		.ctx.actor = ovl_fill_plain,
1191 		.ctx.count = INT_MAX,
1192 		.list = &list,
1193 	};
1194 
1195 	err = ovl_dir_read(&path, &rdd);
1196 	if (err)
1197 		goto out;
1198 
1199 	inode_lock_nested(dir, I_MUTEX_PARENT);
1200 	list_for_each_entry(p, &list, l_node) {
1201 		if (p->name[0] == '.') {
1202 			if (p->len == 1)
1203 				continue;
1204 			if (p->len == 2 && p->name[1] == '.')
1205 				continue;
1206 		}
1207 		index = ovl_lookup_upper(ofs, p->name, indexdir, p->len);
1208 		if (IS_ERR(index)) {
1209 			err = PTR_ERR(index);
1210 			index = NULL;
1211 			break;
1212 		}
1213 		/* Cleanup leftover from index create/cleanup attempt */
1214 		if (index->d_name.name[0] == '#') {
1215 			err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1);
1216 			if (err)
1217 				break;
1218 			goto next;
1219 		}
1220 		err = ovl_verify_index(ofs, index);
1221 		if (!err) {
1222 			goto next;
1223 		} else if (err == -ESTALE) {
1224 			/* Cleanup stale index entries */
1225 			err = ovl_cleanup(ofs, dir, index);
1226 		} else if (err != -ENOENT) {
1227 			/*
1228 			 * Abort mount to avoid corrupting the index if
1229 			 * an incompatible index entry was found or on out
1230 			 * of memory.
1231 			 */
1232 			break;
1233 		} else if (ofs->config.nfs_export) {
1234 			/*
1235 			 * Whiteout orphan index to block future open by
1236 			 * handle after overlay nlink dropped to zero.
1237 			 */
1238 			err = ovl_cleanup_and_whiteout(ofs, dir, index);
1239 		} else {
1240 			/* Cleanup orphan index entries */
1241 			err = ovl_cleanup(ofs, dir, index);
1242 		}
1243 
1244 		if (err)
1245 			break;
1246 
1247 next:
1248 		dput(index);
1249 		index = NULL;
1250 	}
1251 	dput(index);
1252 	inode_unlock(dir);
1253 out:
1254 	ovl_cache_free(&list);
1255 	if (err)
1256 		pr_err("failed index dir cleanup (%i)\n", err);
1257 	return err;
1258 }
1259