xref: /linux/fs/overlayfs/readdir.c (revision 0d08df6c493898e679d9c517e77ea95c063d40ec)
1 /*
2  *
3  * Copyright (C) 2011 Novell Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/slab.h>
12 #include <linux/namei.h>
13 #include <linux/file.h>
14 #include <linux/xattr.h>
15 #include <linux/rbtree.h>
16 #include <linux/security.h>
17 #include <linux/cred.h>
18 #include "overlayfs.h"
19 
20 struct ovl_cache_entry {
21 	unsigned int len;
22 	unsigned int type;
23 	u64 ino;
24 	struct list_head l_node;
25 	struct rb_node node;
26 	struct ovl_cache_entry *next_maybe_whiteout;
27 	bool is_whiteout;
28 	char name[];
29 };
30 
31 struct ovl_dir_cache {
32 	long refcount;
33 	u64 version;
34 	struct list_head entries;
35 };
36 
37 struct ovl_readdir_data {
38 	struct dir_context ctx;
39 	bool is_lowest;
40 	struct rb_root root;
41 	struct list_head *list;
42 	struct list_head middle;
43 	struct ovl_cache_entry *first_maybe_whiteout;
44 	int count;
45 	int err;
46 	bool d_type_supported;
47 };
48 
49 struct ovl_dir_file {
50 	bool is_real;
51 	bool is_upper;
52 	struct ovl_dir_cache *cache;
53 	struct list_head *cursor;
54 	struct file *realfile;
55 	struct file *upperfile;
56 };
57 
58 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
59 {
60 	return container_of(n, struct ovl_cache_entry, node);
61 }
62 
63 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
64 						    const char *name, int len)
65 {
66 	struct rb_node *node = root->rb_node;
67 	int cmp;
68 
69 	while (node) {
70 		struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
71 
72 		cmp = strncmp(name, p->name, len);
73 		if (cmp > 0)
74 			node = p->node.rb_right;
75 		else if (cmp < 0 || len < p->len)
76 			node = p->node.rb_left;
77 		else
78 			return p;
79 	}
80 
81 	return NULL;
82 }
83 
84 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
85 						   const char *name, int len,
86 						   u64 ino, unsigned int d_type)
87 {
88 	struct ovl_cache_entry *p;
89 	size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
90 
91 	p = kmalloc(size, GFP_KERNEL);
92 	if (!p)
93 		return NULL;
94 
95 	memcpy(p->name, name, len);
96 	p->name[len] = '\0';
97 	p->len = len;
98 	p->type = d_type;
99 	p->ino = ino;
100 	p->is_whiteout = false;
101 
102 	if (d_type == DT_CHR) {
103 		p->next_maybe_whiteout = rdd->first_maybe_whiteout;
104 		rdd->first_maybe_whiteout = p;
105 	}
106 	return p;
107 }
108 
109 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
110 				  const char *name, int len, u64 ino,
111 				  unsigned int d_type)
112 {
113 	struct rb_node **newp = &rdd->root.rb_node;
114 	struct rb_node *parent = NULL;
115 	struct ovl_cache_entry *p;
116 
117 	while (*newp) {
118 		int cmp;
119 		struct ovl_cache_entry *tmp;
120 
121 		parent = *newp;
122 		tmp = ovl_cache_entry_from_node(*newp);
123 		cmp = strncmp(name, tmp->name, len);
124 		if (cmp > 0)
125 			newp = &tmp->node.rb_right;
126 		else if (cmp < 0 || len < tmp->len)
127 			newp = &tmp->node.rb_left;
128 		else
129 			return 0;
130 	}
131 
132 	p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
133 	if (p == NULL)
134 		return -ENOMEM;
135 
136 	list_add_tail(&p->l_node, rdd->list);
137 	rb_link_node(&p->node, parent, newp);
138 	rb_insert_color(&p->node, &rdd->root);
139 
140 	return 0;
141 }
142 
143 static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
144 			   const char *name, int namelen,
145 			   loff_t offset, u64 ino, unsigned int d_type)
146 {
147 	struct ovl_cache_entry *p;
148 
149 	p = ovl_cache_entry_find(&rdd->root, name, namelen);
150 	if (p) {
151 		list_move_tail(&p->l_node, &rdd->middle);
152 	} else {
153 		p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
154 		if (p == NULL)
155 			rdd->err = -ENOMEM;
156 		else
157 			list_add_tail(&p->l_node, &rdd->middle);
158 	}
159 
160 	return rdd->err;
161 }
162 
163 void ovl_cache_free(struct list_head *list)
164 {
165 	struct ovl_cache_entry *p;
166 	struct ovl_cache_entry *n;
167 
168 	list_for_each_entry_safe(p, n, list, l_node)
169 		kfree(p);
170 
171 	INIT_LIST_HEAD(list);
172 }
173 
174 static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
175 {
176 	struct ovl_dir_cache *cache = od->cache;
177 
178 	WARN_ON(cache->refcount <= 0);
179 	cache->refcount--;
180 	if (!cache->refcount) {
181 		if (ovl_dir_cache(dentry) == cache)
182 			ovl_set_dir_cache(dentry, NULL);
183 
184 		ovl_cache_free(&cache->entries);
185 		kfree(cache);
186 	}
187 }
188 
189 static int ovl_fill_merge(struct dir_context *ctx, const char *name,
190 			  int namelen, loff_t offset, u64 ino,
191 			  unsigned int d_type)
192 {
193 	struct ovl_readdir_data *rdd =
194 		container_of(ctx, struct ovl_readdir_data, ctx);
195 
196 	rdd->count++;
197 	if (!rdd->is_lowest)
198 		return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
199 	else
200 		return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
201 }
202 
203 static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
204 {
205 	int err;
206 	struct ovl_cache_entry *p;
207 	struct dentry *dentry;
208 	const struct cred *old_cred;
209 	struct cred *override_cred;
210 
211 	override_cred = prepare_creds();
212 	if (!override_cred)
213 		return -ENOMEM;
214 
215 	/*
216 	 * CAP_DAC_OVERRIDE for lookup
217 	 */
218 	cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
219 	old_cred = override_creds(override_cred);
220 
221 	inode_lock(dir->d_inode);
222 	err = 0;
223 	// XXX: err = mutex_lock_killable(&dir->d_inode->i_mutex);
224 	if (!err) {
225 		while (rdd->first_maybe_whiteout) {
226 			p = rdd->first_maybe_whiteout;
227 			rdd->first_maybe_whiteout = p->next_maybe_whiteout;
228 			dentry = lookup_one_len(p->name, dir, p->len);
229 			if (!IS_ERR(dentry)) {
230 				p->is_whiteout = ovl_is_whiteout(dentry);
231 				dput(dentry);
232 			}
233 		}
234 		inode_unlock(dir->d_inode);
235 	}
236 	revert_creds(old_cred);
237 	put_cred(override_cred);
238 
239 	return err;
240 }
241 
242 static inline int ovl_dir_read(struct path *realpath,
243 			       struct ovl_readdir_data *rdd)
244 {
245 	struct file *realfile;
246 	int err;
247 
248 	realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
249 	if (IS_ERR(realfile))
250 		return PTR_ERR(realfile);
251 
252 	rdd->first_maybe_whiteout = NULL;
253 	rdd->ctx.pos = 0;
254 	do {
255 		rdd->count = 0;
256 		rdd->err = 0;
257 		err = iterate_dir(realfile, &rdd->ctx);
258 		if (err >= 0)
259 			err = rdd->err;
260 	} while (!err && rdd->count);
261 
262 	if (!err && rdd->first_maybe_whiteout)
263 		err = ovl_check_whiteouts(realpath->dentry, rdd);
264 
265 	fput(realfile);
266 
267 	return err;
268 }
269 
270 static void ovl_dir_reset(struct file *file)
271 {
272 	struct ovl_dir_file *od = file->private_data;
273 	struct ovl_dir_cache *cache = od->cache;
274 	struct dentry *dentry = file->f_path.dentry;
275 	enum ovl_path_type type = ovl_path_type(dentry);
276 
277 	if (cache && ovl_dentry_version_get(dentry) != cache->version) {
278 		ovl_cache_put(od, dentry);
279 		od->cache = NULL;
280 		od->cursor = NULL;
281 	}
282 	WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
283 	if (od->is_real && OVL_TYPE_MERGE(type))
284 		od->is_real = false;
285 }
286 
287 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
288 {
289 	int err;
290 	struct path realpath;
291 	struct ovl_readdir_data rdd = {
292 		.ctx.actor = ovl_fill_merge,
293 		.list = list,
294 		.root = RB_ROOT,
295 		.is_lowest = false,
296 	};
297 	int idx, next;
298 
299 	for (idx = 0; idx != -1; idx = next) {
300 		next = ovl_path_next(idx, dentry, &realpath);
301 
302 		if (next != -1) {
303 			err = ovl_dir_read(&realpath, &rdd);
304 			if (err)
305 				break;
306 		} else {
307 			/*
308 			 * Insert lowest layer entries before upper ones, this
309 			 * allows offsets to be reasonably constant
310 			 */
311 			list_add(&rdd.middle, rdd.list);
312 			rdd.is_lowest = true;
313 			err = ovl_dir_read(&realpath, &rdd);
314 			list_del(&rdd.middle);
315 		}
316 	}
317 	return err;
318 }
319 
320 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
321 {
322 	struct list_head *p;
323 	loff_t off = 0;
324 
325 	list_for_each(p, &od->cache->entries) {
326 		if (off >= pos)
327 			break;
328 		off++;
329 	}
330 	/* Cursor is safe since the cache is stable */
331 	od->cursor = p;
332 }
333 
334 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
335 {
336 	int res;
337 	struct ovl_dir_cache *cache;
338 
339 	cache = ovl_dir_cache(dentry);
340 	if (cache && ovl_dentry_version_get(dentry) == cache->version) {
341 		cache->refcount++;
342 		return cache;
343 	}
344 	ovl_set_dir_cache(dentry, NULL);
345 
346 	cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
347 	if (!cache)
348 		return ERR_PTR(-ENOMEM);
349 
350 	cache->refcount = 1;
351 	INIT_LIST_HEAD(&cache->entries);
352 
353 	res = ovl_dir_read_merged(dentry, &cache->entries);
354 	if (res) {
355 		ovl_cache_free(&cache->entries);
356 		kfree(cache);
357 		return ERR_PTR(res);
358 	}
359 
360 	cache->version = ovl_dentry_version_get(dentry);
361 	ovl_set_dir_cache(dentry, cache);
362 
363 	return cache;
364 }
365 
366 static int ovl_iterate(struct file *file, struct dir_context *ctx)
367 {
368 	struct ovl_dir_file *od = file->private_data;
369 	struct dentry *dentry = file->f_path.dentry;
370 	struct ovl_cache_entry *p;
371 
372 	if (!ctx->pos)
373 		ovl_dir_reset(file);
374 
375 	if (od->is_real)
376 		return iterate_dir(od->realfile, ctx);
377 
378 	if (!od->cache) {
379 		struct ovl_dir_cache *cache;
380 
381 		cache = ovl_cache_get(dentry);
382 		if (IS_ERR(cache))
383 			return PTR_ERR(cache);
384 
385 		od->cache = cache;
386 		ovl_seek_cursor(od, ctx->pos);
387 	}
388 
389 	while (od->cursor != &od->cache->entries) {
390 		p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
391 		if (!p->is_whiteout)
392 			if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
393 				break;
394 		od->cursor = p->l_node.next;
395 		ctx->pos++;
396 	}
397 	return 0;
398 }
399 
400 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
401 {
402 	loff_t res;
403 	struct ovl_dir_file *od = file->private_data;
404 
405 	inode_lock(file_inode(file));
406 	if (!file->f_pos)
407 		ovl_dir_reset(file);
408 
409 	if (od->is_real) {
410 		res = vfs_llseek(od->realfile, offset, origin);
411 		file->f_pos = od->realfile->f_pos;
412 	} else {
413 		res = -EINVAL;
414 
415 		switch (origin) {
416 		case SEEK_CUR:
417 			offset += file->f_pos;
418 			break;
419 		case SEEK_SET:
420 			break;
421 		default:
422 			goto out_unlock;
423 		}
424 		if (offset < 0)
425 			goto out_unlock;
426 
427 		if (offset != file->f_pos) {
428 			file->f_pos = offset;
429 			if (od->cache)
430 				ovl_seek_cursor(od, offset);
431 		}
432 		res = offset;
433 	}
434 out_unlock:
435 	inode_unlock(file_inode(file));
436 
437 	return res;
438 }
439 
440 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
441 			 int datasync)
442 {
443 	struct ovl_dir_file *od = file->private_data;
444 	struct dentry *dentry = file->f_path.dentry;
445 	struct file *realfile = od->realfile;
446 
447 	/*
448 	 * Need to check if we started out being a lower dir, but got copied up
449 	 */
450 	if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
451 		struct inode *inode = file_inode(file);
452 
453 		realfile = lockless_dereference(od->upperfile);
454 		if (!realfile) {
455 			struct path upperpath;
456 
457 			ovl_path_upper(dentry, &upperpath);
458 			realfile = ovl_path_open(&upperpath, O_RDONLY);
459 			smp_mb__before_spinlock();
460 			inode_lock(inode);
461 			if (!od->upperfile) {
462 				if (IS_ERR(realfile)) {
463 					inode_unlock(inode);
464 					return PTR_ERR(realfile);
465 				}
466 				od->upperfile = realfile;
467 			} else {
468 				/* somebody has beaten us to it */
469 				if (!IS_ERR(realfile))
470 					fput(realfile);
471 				realfile = od->upperfile;
472 			}
473 			inode_unlock(inode);
474 		}
475 	}
476 
477 	return vfs_fsync_range(realfile, start, end, datasync);
478 }
479 
480 static int ovl_dir_release(struct inode *inode, struct file *file)
481 {
482 	struct ovl_dir_file *od = file->private_data;
483 
484 	if (od->cache) {
485 		inode_lock(inode);
486 		ovl_cache_put(od, file->f_path.dentry);
487 		inode_unlock(inode);
488 	}
489 	fput(od->realfile);
490 	if (od->upperfile)
491 		fput(od->upperfile);
492 	kfree(od);
493 
494 	return 0;
495 }
496 
497 static int ovl_dir_open(struct inode *inode, struct file *file)
498 {
499 	struct path realpath;
500 	struct file *realfile;
501 	struct ovl_dir_file *od;
502 	enum ovl_path_type type;
503 
504 	od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
505 	if (!od)
506 		return -ENOMEM;
507 
508 	type = ovl_path_real(file->f_path.dentry, &realpath);
509 	realfile = ovl_path_open(&realpath, file->f_flags);
510 	if (IS_ERR(realfile)) {
511 		kfree(od);
512 		return PTR_ERR(realfile);
513 	}
514 	od->realfile = realfile;
515 	od->is_real = !OVL_TYPE_MERGE(type);
516 	od->is_upper = OVL_TYPE_UPPER(type);
517 	file->private_data = od;
518 
519 	return 0;
520 }
521 
522 const struct file_operations ovl_dir_operations = {
523 	.read		= generic_read_dir,
524 	.open		= ovl_dir_open,
525 	.iterate	= ovl_iterate,
526 	.llseek		= ovl_dir_llseek,
527 	.fsync		= ovl_dir_fsync,
528 	.release	= ovl_dir_release,
529 };
530 
531 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
532 {
533 	int err;
534 	struct ovl_cache_entry *p;
535 
536 	err = ovl_dir_read_merged(dentry, list);
537 	if (err)
538 		return err;
539 
540 	err = 0;
541 
542 	list_for_each_entry(p, list, l_node) {
543 		if (p->is_whiteout)
544 			continue;
545 
546 		if (p->name[0] == '.') {
547 			if (p->len == 1)
548 				continue;
549 			if (p->len == 2 && p->name[1] == '.')
550 				continue;
551 		}
552 		err = -ENOTEMPTY;
553 		break;
554 	}
555 
556 	return err;
557 }
558 
559 void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
560 {
561 	struct ovl_cache_entry *p;
562 
563 	inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
564 	list_for_each_entry(p, list, l_node) {
565 		struct dentry *dentry;
566 
567 		if (!p->is_whiteout)
568 			continue;
569 
570 		dentry = lookup_one_len(p->name, upper, p->len);
571 		if (IS_ERR(dentry)) {
572 			pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
573 			       upper->d_name.name, p->len, p->name,
574 			       (int) PTR_ERR(dentry));
575 			continue;
576 		}
577 		if (dentry->d_inode)
578 			ovl_cleanup(upper->d_inode, dentry);
579 		dput(dentry);
580 	}
581 	inode_unlock(upper->d_inode);
582 }
583 
584 static int ovl_check_d_type(struct dir_context *ctx, const char *name,
585 			  int namelen, loff_t offset, u64 ino,
586 			  unsigned int d_type)
587 {
588 	struct ovl_readdir_data *rdd =
589 		container_of(ctx, struct ovl_readdir_data, ctx);
590 
591 	/* Even if d_type is not supported, DT_DIR is returned for . and .. */
592 	if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
593 		return 0;
594 
595 	if (d_type != DT_UNKNOWN)
596 		rdd->d_type_supported = true;
597 
598 	return 0;
599 }
600 
601 /*
602  * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
603  * if error is encountered.
604  */
605 int ovl_check_d_type_supported(struct path *realpath)
606 {
607 	int err;
608 	struct ovl_readdir_data rdd = {
609 		.ctx.actor = ovl_check_d_type,
610 		.d_type_supported = false,
611 	};
612 
613 	err = ovl_dir_read(realpath, &rdd);
614 	if (err)
615 		return err;
616 
617 	return rdd.d_type_supported;
618 }
619