xref: /linux/fs/ext4/orphan.c (revision 65989db7f88456273d0913d8d21f6097fa6aad19)
1 /*
2  * Ext4 orphan inode handling
3  */
4 #include <linux/fs.h>
5 #include <linux/quotaops.h>
6 #include <linux/buffer_head.h>
7 
8 #include "ext4.h"
9 #include "ext4_jbd2.h"
10 
ext4_orphan_file_add(handle_t * handle,struct inode * inode)11 static int ext4_orphan_file_add(handle_t *handle, struct inode *inode)
12 {
13 	int i, j, start;
14 	struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info;
15 	int ret = 0;
16 	bool found = false;
17 	__le32 *bdata;
18 	int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb);
19 	int looped = 0;
20 
21 	/*
22 	 * Find block with free orphan entry. Use CPU number for a naive hash
23 	 * for a search start in the orphan file
24 	 */
25 	start = raw_smp_processor_id()*13 % oi->of_blocks;
26 	i = start;
27 	do {
28 		if (atomic_dec_if_positive(&oi->of_binfo[i].ob_free_entries)
29 		    >= 0) {
30 			found = true;
31 			break;
32 		}
33 		if (++i >= oi->of_blocks)
34 			i = 0;
35 	} while (i != start);
36 
37 	if (!found) {
38 		/*
39 		 * For now we don't grow or shrink orphan file. We just use
40 		 * whatever was allocated at mke2fs time. The additional
41 		 * credits we would have to reserve for each orphan inode
42 		 * operation just don't seem worth it.
43 		 */
44 		return -ENOSPC;
45 	}
46 
47 	ret = ext4_journal_get_write_access(handle, inode->i_sb,
48 				oi->of_binfo[i].ob_bh, EXT4_JTR_ORPHAN_FILE);
49 	if (ret) {
50 		atomic_inc(&oi->of_binfo[i].ob_free_entries);
51 		return ret;
52 	}
53 
54 	bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
55 	/* Find empty slot in a block */
56 	j = 0;
57 	do {
58 		if (looped) {
59 			/*
60 			 * Did we walk through the block several times without
61 			 * finding free entry? It is theoretically possible
62 			 * if entries get constantly allocated and freed or
63 			 * if the block is corrupted. Avoid indefinite looping
64 			 * and bail. We'll use orphan list instead.
65 			 */
66 			if (looped > 3) {
67 				atomic_inc(&oi->of_binfo[i].ob_free_entries);
68 				return -ENOSPC;
69 			}
70 			cond_resched();
71 		}
72 		while (bdata[j]) {
73 			if (++j >= inodes_per_ob) {
74 				j = 0;
75 				looped++;
76 			}
77 		}
78 	} while (cmpxchg(&bdata[j], (__le32)0, cpu_to_le32(inode->i_ino)) !=
79 		 (__le32)0);
80 
81 	EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j;
82 	ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
83 
84 	return ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[i].ob_bh);
85 }
86 
87 /*
88  * ext4_orphan_add() links an unlinked or truncated inode into a list of
89  * such inodes, starting at the superblock, in case we crash before the
90  * file is closed/deleted, or in case the inode truncate spans multiple
91  * transactions and the last transaction is not recovered after a crash.
92  *
93  * At filesystem recovery time, we walk this list deleting unlinked
94  * inodes and truncating linked inodes in ext4_orphan_cleanup().
95  *
96  * Orphan list manipulation functions must be called under i_rwsem unless
97  * we are just creating the inode or deleting it.
98  */
ext4_orphan_add(handle_t * handle,struct inode * inode)99 int ext4_orphan_add(handle_t *handle, struct inode *inode)
100 {
101 	struct super_block *sb = inode->i_sb;
102 	struct ext4_sb_info *sbi = EXT4_SB(sb);
103 	struct ext4_iloc iloc;
104 	int err = 0, rc;
105 	bool dirty = false;
106 
107 	if (!sbi->s_journal || is_bad_inode(inode))
108 		return 0;
109 
110 	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
111 		     !inode_is_locked(inode));
112 	if (ext4_inode_orphan_tracked(inode))
113 		return 0;
114 
115 	/*
116 	 * Orphan handling is only valid for files with data blocks
117 	 * being truncated, or files being unlinked. Note that we either
118 	 * hold i_rwsem, or the inode can not be referenced from outside,
119 	 * so i_nlink should not be bumped due to race
120 	 */
121 	ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
122 		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
123 
124 	if (sbi->s_orphan_info.of_blocks) {
125 		err = ext4_orphan_file_add(handle, inode);
126 		/*
127 		 * Fallback to normal orphan list of orphan file is
128 		 * out of space
129 		 */
130 		if (err != -ENOSPC)
131 			return err;
132 	}
133 
134 	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
135 	err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
136 					    EXT4_JTR_NONE);
137 	if (err)
138 		goto out;
139 
140 	err = ext4_reserve_inode_write(handle, inode, &iloc);
141 	if (err)
142 		goto out;
143 
144 	mutex_lock(&sbi->s_orphan_lock);
145 	/*
146 	 * Due to previous errors inode may be already a part of on-disk
147 	 * orphan list. If so skip on-disk list modification.
148 	 */
149 	if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
150 	    (le32_to_cpu(sbi->s_es->s_inodes_count))) {
151 		/* Insert this inode at the head of the on-disk orphan list */
152 		NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
153 		lock_buffer(sbi->s_sbh);
154 		sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
155 		ext4_superblock_csum_set(sb);
156 		unlock_buffer(sbi->s_sbh);
157 		dirty = true;
158 	}
159 	list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
160 	mutex_unlock(&sbi->s_orphan_lock);
161 
162 	if (dirty) {
163 		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
164 		rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
165 		if (!err)
166 			err = rc;
167 		if (err) {
168 			/*
169 			 * We have to remove inode from in-memory list if
170 			 * addition to on disk orphan list failed. Stray orphan
171 			 * list entries can cause panics at unmount time.
172 			 */
173 			mutex_lock(&sbi->s_orphan_lock);
174 			list_del_init(&EXT4_I(inode)->i_orphan);
175 			mutex_unlock(&sbi->s_orphan_lock);
176 		}
177 	} else
178 		brelse(iloc.bh);
179 
180 	ext4_debug("superblock will point to %lu\n", inode->i_ino);
181 	ext4_debug("orphan inode %lu will point to %d\n",
182 			inode->i_ino, NEXT_ORPHAN(inode));
183 out:
184 	ext4_std_error(sb, err);
185 	return err;
186 }
187 
ext4_orphan_file_del(handle_t * handle,struct inode * inode)188 static int ext4_orphan_file_del(handle_t *handle, struct inode *inode)
189 {
190 	struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info;
191 	__le32 *bdata;
192 	int blk, off;
193 	int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb);
194 	int ret = 0;
195 
196 	if (!handle)
197 		goto out;
198 	blk = EXT4_I(inode)->i_orphan_idx / inodes_per_ob;
199 	off = EXT4_I(inode)->i_orphan_idx % inodes_per_ob;
200 	if (WARN_ON_ONCE(blk >= oi->of_blocks))
201 		goto out;
202 
203 	ret = ext4_journal_get_write_access(handle, inode->i_sb,
204 				oi->of_binfo[blk].ob_bh, EXT4_JTR_ORPHAN_FILE);
205 	if (ret)
206 		goto out;
207 
208 	bdata = (__le32 *)(oi->of_binfo[blk].ob_bh->b_data);
209 	bdata[off] = 0;
210 	atomic_inc(&oi->of_binfo[blk].ob_free_entries);
211 	ret = ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[blk].ob_bh);
212 out:
213 	ext4_clear_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
214 	INIT_LIST_HEAD(&EXT4_I(inode)->i_orphan);
215 
216 	return ret;
217 }
218 
219 /*
220  * ext4_orphan_del() removes an unlinked or truncated inode from the list
221  * of such inodes stored on disk, because it is finally being cleaned up.
222  */
ext4_orphan_del(handle_t * handle,struct inode * inode)223 int ext4_orphan_del(handle_t *handle, struct inode *inode)
224 {
225 	struct list_head *prev;
226 	struct ext4_inode_info *ei = EXT4_I(inode);
227 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
228 	__u32 ino_next;
229 	struct ext4_iloc iloc;
230 	int err = 0;
231 
232 	if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
233 		return 0;
234 
235 	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
236 		     !inode_is_locked(inode));
237 	if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE))
238 		return ext4_orphan_file_del(handle, inode);
239 
240 	/* Do this quick check before taking global s_orphan_lock. */
241 	if (list_empty(&ei->i_orphan))
242 		return 0;
243 
244 	if (handle) {
245 		/* Grab inode buffer early before taking global s_orphan_lock */
246 		err = ext4_reserve_inode_write(handle, inode, &iloc);
247 	}
248 
249 	mutex_lock(&sbi->s_orphan_lock);
250 	ext4_debug("remove inode %lu from orphan list\n", inode->i_ino);
251 
252 	prev = ei->i_orphan.prev;
253 	list_del_init(&ei->i_orphan);
254 
255 	/* If we're on an error path, we may not have a valid
256 	 * transaction handle with which to update the orphan list on
257 	 * disk, but we still need to remove the inode from the linked
258 	 * list in memory. */
259 	if (!handle || err) {
260 		mutex_unlock(&sbi->s_orphan_lock);
261 		goto out_err;
262 	}
263 
264 	ino_next = NEXT_ORPHAN(inode);
265 	if (prev == &sbi->s_orphan) {
266 		ext4_debug("superblock will point to %u\n", ino_next);
267 		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
268 		err = ext4_journal_get_write_access(handle, inode->i_sb,
269 						    sbi->s_sbh, EXT4_JTR_NONE);
270 		if (err) {
271 			mutex_unlock(&sbi->s_orphan_lock);
272 			goto out_brelse;
273 		}
274 		lock_buffer(sbi->s_sbh);
275 		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
276 		ext4_superblock_csum_set(inode->i_sb);
277 		unlock_buffer(sbi->s_sbh);
278 		mutex_unlock(&sbi->s_orphan_lock);
279 		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
280 	} else {
281 		struct ext4_iloc iloc2;
282 		struct inode *i_prev =
283 			&list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
284 
285 		ext4_debug("orphan inode %lu will point to %u\n",
286 			  i_prev->i_ino, ino_next);
287 		err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
288 		if (err) {
289 			mutex_unlock(&sbi->s_orphan_lock);
290 			goto out_brelse;
291 		}
292 		NEXT_ORPHAN(i_prev) = ino_next;
293 		err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
294 		mutex_unlock(&sbi->s_orphan_lock);
295 	}
296 	if (err)
297 		goto out_brelse;
298 	NEXT_ORPHAN(inode) = 0;
299 	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
300 out_err:
301 	ext4_std_error(inode->i_sb, err);
302 	return err;
303 
304 out_brelse:
305 	brelse(iloc.bh);
306 	goto out_err;
307 }
308 
309 #ifdef CONFIG_QUOTA
ext4_quota_on_mount(struct super_block * sb,int type)310 static int ext4_quota_on_mount(struct super_block *sb, int type)
311 {
312 	return dquot_quota_on_mount(sb,
313 		rcu_dereference_protected(EXT4_SB(sb)->s_qf_names[type],
314 					  lockdep_is_held(&sb->s_umount)),
315 		EXT4_SB(sb)->s_jquota_fmt, type);
316 }
317 #endif
318 
ext4_process_orphan(struct inode * inode,int * nr_truncates,int * nr_orphans)319 static void ext4_process_orphan(struct inode *inode,
320 				int *nr_truncates, int *nr_orphans)
321 {
322 	struct super_block *sb = inode->i_sb;
323 	int ret;
324 
325 	dquot_initialize(inode);
326 	if (inode->i_nlink) {
327 		if (test_opt(sb, DEBUG))
328 			ext4_msg(sb, KERN_DEBUG,
329 				"%s: truncating inode %lu to %lld bytes",
330 				__func__, inode->i_ino, inode->i_size);
331 		ext4_debug("truncating inode %lu to %lld bytes\n",
332 			   inode->i_ino, inode->i_size);
333 		inode_lock(inode);
334 		truncate_inode_pages(inode->i_mapping, inode->i_size);
335 		ret = ext4_truncate(inode);
336 		if (ret) {
337 			/*
338 			 * We need to clean up the in-core orphan list
339 			 * manually if ext4_truncate() failed to get a
340 			 * transaction handle.
341 			 */
342 			ext4_orphan_del(NULL, inode);
343 			ext4_std_error(inode->i_sb, ret);
344 		}
345 		inode_unlock(inode);
346 		(*nr_truncates)++;
347 	} else {
348 		if (test_opt(sb, DEBUG))
349 			ext4_msg(sb, KERN_DEBUG,
350 				"%s: deleting unreferenced inode %lu",
351 				__func__, inode->i_ino);
352 		ext4_debug("deleting unreferenced inode %lu\n",
353 			   inode->i_ino);
354 		(*nr_orphans)++;
355 	}
356 	iput(inode);  /* The delete magic happens here! */
357 }
358 
359 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
360  * the superblock) which were deleted from all directories, but held open by
361  * a process at the time of a crash.  We walk the list and try to delete these
362  * inodes at recovery time (only with a read-write filesystem).
363  *
364  * In order to keep the orphan inode chain consistent during traversal (in
365  * case of crash during recovery), we link each inode into the superblock
366  * orphan list_head and handle it the same way as an inode deletion during
367  * normal operation (which journals the operations for us).
368  *
369  * We only do an iget() and an iput() on each inode, which is very safe if we
370  * accidentally point at an in-use or already deleted inode.  The worst that
371  * can happen in this case is that we get a "bit already cleared" message from
372  * ext4_free_inode().  The only reason we would point at a wrong inode is if
373  * e2fsck was run on this filesystem, and it must have already done the orphan
374  * inode cleanup for us, so we can safely abort without any further action.
375  */
ext4_orphan_cleanup(struct super_block * sb,struct ext4_super_block * es)376 void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
377 {
378 	unsigned int s_flags = sb->s_flags;
379 	int nr_orphans = 0, nr_truncates = 0;
380 	struct inode *inode;
381 	int i, j;
382 #ifdef CONFIG_QUOTA
383 	int quota_update = 0;
384 #endif
385 	__le32 *bdata;
386 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
387 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
388 
389 	if (!es->s_last_orphan && !oi->of_blocks) {
390 		ext4_debug("no orphan inodes to clean up\n");
391 		return;
392 	}
393 
394 	if (bdev_read_only(sb->s_bdev)) {
395 		ext4_msg(sb, KERN_ERR, "write access "
396 			"unavailable, skipping orphan cleanup");
397 		return;
398 	}
399 
400 	/* Check if feature set would not allow a r/w mount */
401 	if (!ext4_feature_set_ok(sb, 0)) {
402 		ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
403 			 "unknown ROCOMPAT features");
404 		return;
405 	}
406 
407 	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
408 		/* don't clear list on RO mount w/ errors */
409 		if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
410 			ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
411 				  "clearing orphan list.");
412 			es->s_last_orphan = 0;
413 		}
414 		ext4_debug("Skipping orphan recovery on fs with errors.\n");
415 		return;
416 	}
417 
418 	if (s_flags & SB_RDONLY) {
419 		ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
420 		sb->s_flags &= ~SB_RDONLY;
421 	}
422 #ifdef CONFIG_QUOTA
423 	/*
424 	 * Turn on quotas which were not enabled for read-only mounts if
425 	 * filesystem has quota feature, so that they are updated correctly.
426 	 */
427 	if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {
428 		int ret = ext4_enable_quotas(sb);
429 
430 		if (!ret)
431 			quota_update = 1;
432 		else
433 			ext4_msg(sb, KERN_ERR,
434 				"Cannot turn on quotas: error %d", ret);
435 	}
436 
437 	/* Turn on journaled quotas used for old sytle */
438 	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
439 		if (EXT4_SB(sb)->s_qf_names[i]) {
440 			int ret = ext4_quota_on_mount(sb, i);
441 
442 			if (!ret)
443 				quota_update = 1;
444 			else
445 				ext4_msg(sb, KERN_ERR,
446 					"Cannot turn on journaled "
447 					"quota: type %d: error %d", i, ret);
448 		}
449 	}
450 #endif
451 
452 	while (es->s_last_orphan) {
453 		/*
454 		 * We may have encountered an error during cleanup; if
455 		 * so, skip the rest.
456 		 */
457 		if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
458 			ext4_debug("Skipping orphan recovery on fs with errors.\n");
459 			es->s_last_orphan = 0;
460 			break;
461 		}
462 
463 		inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
464 		if (IS_ERR(inode)) {
465 			es->s_last_orphan = 0;
466 			break;
467 		}
468 
469 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
470 		ext4_process_orphan(inode, &nr_truncates, &nr_orphans);
471 	}
472 
473 	for (i = 0; i < oi->of_blocks; i++) {
474 		bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
475 		for (j = 0; j < inodes_per_ob; j++) {
476 			if (!bdata[j])
477 				continue;
478 			inode = ext4_orphan_get(sb, le32_to_cpu(bdata[j]));
479 			if (IS_ERR(inode))
480 				continue;
481 			ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
482 			EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j;
483 			ext4_process_orphan(inode, &nr_truncates, &nr_orphans);
484 		}
485 	}
486 
487 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
488 
489 	if (nr_orphans)
490 		ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
491 		       PLURAL(nr_orphans));
492 	if (nr_truncates)
493 		ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
494 		       PLURAL(nr_truncates));
495 #ifdef CONFIG_QUOTA
496 	/* Turn off quotas if they were enabled for orphan cleanup */
497 	if (quota_update) {
498 		for (i = 0; i < EXT4_MAXQUOTAS; i++) {
499 			if (sb_dqopt(sb)->files[i])
500 				dquot_quota_off(sb, i);
501 		}
502 	}
503 #endif
504 	sb->s_flags = s_flags; /* Restore SB_RDONLY status */
505 }
506 
ext4_release_orphan_info(struct super_block * sb)507 void ext4_release_orphan_info(struct super_block *sb)
508 {
509 	int i;
510 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
511 
512 	if (!oi->of_blocks)
513 		return;
514 	for (i = 0; i < oi->of_blocks; i++)
515 		brelse(oi->of_binfo[i].ob_bh);
516 	kfree(oi->of_binfo);
517 }
518 
ext4_orphan_block_tail(struct super_block * sb,struct buffer_head * bh)519 static struct ext4_orphan_block_tail *ext4_orphan_block_tail(
520 						struct super_block *sb,
521 						struct buffer_head *bh)
522 {
523 	return (struct ext4_orphan_block_tail *)(bh->b_data + sb->s_blocksize -
524 				sizeof(struct ext4_orphan_block_tail));
525 }
526 
ext4_orphan_file_block_csum_verify(struct super_block * sb,struct buffer_head * bh)527 static int ext4_orphan_file_block_csum_verify(struct super_block *sb,
528 					      struct buffer_head *bh)
529 {
530 	__u32 calculated;
531 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
532 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
533 	struct ext4_orphan_block_tail *ot;
534 	__le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr);
535 
536 	if (!ext4_has_feature_metadata_csum(sb))
537 		return 1;
538 
539 	ot = ext4_orphan_block_tail(sb, bh);
540 	calculated = ext4_chksum(oi->of_csum_seed, (__u8 *)&dsk_block_nr,
541 				 sizeof(dsk_block_nr));
542 	calculated = ext4_chksum(calculated, (__u8 *)bh->b_data,
543 				 inodes_per_ob * sizeof(__u32));
544 	return le32_to_cpu(ot->ob_checksum) == calculated;
545 }
546 
547 /* This gets called only when checksumming is enabled */
ext4_orphan_file_block_trigger(struct jbd2_buffer_trigger_type * triggers,struct buffer_head * bh,void * data,size_t size)548 void ext4_orphan_file_block_trigger(struct jbd2_buffer_trigger_type *triggers,
549 				    struct buffer_head *bh,
550 				    void *data, size_t size)
551 {
552 	struct super_block *sb = EXT4_TRIGGER(triggers)->sb;
553 	__u32 csum;
554 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
555 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
556 	struct ext4_orphan_block_tail *ot;
557 	__le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr);
558 
559 	csum = ext4_chksum(oi->of_csum_seed, (__u8 *)&dsk_block_nr,
560 			   sizeof(dsk_block_nr));
561 	csum = ext4_chksum(csum, (__u8 *)data, inodes_per_ob * sizeof(__u32));
562 	ot = ext4_orphan_block_tail(sb, bh);
563 	ot->ob_checksum = cpu_to_le32(csum);
564 }
565 
ext4_init_orphan_info(struct super_block * sb)566 int ext4_init_orphan_info(struct super_block *sb)
567 {
568 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
569 	struct inode *inode;
570 	int i, j;
571 	int ret;
572 	int free;
573 	__le32 *bdata;
574 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
575 	struct ext4_orphan_block_tail *ot;
576 	ino_t orphan_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_orphan_file_inum);
577 
578 	if (!ext4_has_feature_orphan_file(sb))
579 		return 0;
580 
581 	inode = ext4_iget(sb, orphan_ino, EXT4_IGET_SPECIAL);
582 	if (IS_ERR(inode)) {
583 		ext4_msg(sb, KERN_ERR, "get orphan inode failed");
584 		return PTR_ERR(inode);
585 	}
586 	/*
587 	 * This is just an artificial limit to prevent corrupted fs from
588 	 * consuming absurd amounts of memory when pinning blocks of orphan
589 	 * file in memory.
590 	 */
591 	if (inode->i_size > 8 << 20) {
592 		ext4_msg(sb, KERN_ERR, "orphan file too big: %llu",
593 			 (unsigned long long)inode->i_size);
594 		ret = -EFSCORRUPTED;
595 		goto out_put;
596 	}
597 	oi->of_blocks = inode->i_size >> sb->s_blocksize_bits;
598 	oi->of_csum_seed = EXT4_I(inode)->i_csum_seed;
599 	oi->of_binfo = kvmalloc_array(oi->of_blocks,
600 				     sizeof(struct ext4_orphan_block),
601 				     GFP_KERNEL);
602 	if (!oi->of_binfo) {
603 		ret = -ENOMEM;
604 		goto out_put;
605 	}
606 	for (i = 0; i < oi->of_blocks; i++) {
607 		oi->of_binfo[i].ob_bh = ext4_bread(NULL, inode, i, 0);
608 		if (IS_ERR(oi->of_binfo[i].ob_bh)) {
609 			ret = PTR_ERR(oi->of_binfo[i].ob_bh);
610 			goto out_free;
611 		}
612 		if (!oi->of_binfo[i].ob_bh) {
613 			ret = -EIO;
614 			goto out_free;
615 		}
616 		ot = ext4_orphan_block_tail(sb, oi->of_binfo[i].ob_bh);
617 		if (le32_to_cpu(ot->ob_magic) != EXT4_ORPHAN_BLOCK_MAGIC) {
618 			ext4_error(sb, "orphan file block %d: bad magic", i);
619 			ret = -EIO;
620 			goto out_free;
621 		}
622 		if (!ext4_orphan_file_block_csum_verify(sb,
623 						oi->of_binfo[i].ob_bh)) {
624 			ext4_error(sb, "orphan file block %d: bad checksum", i);
625 			ret = -EIO;
626 			goto out_free;
627 		}
628 		bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
629 		free = 0;
630 		for (j = 0; j < inodes_per_ob; j++)
631 			if (bdata[j] == 0)
632 				free++;
633 		atomic_set(&oi->of_binfo[i].ob_free_entries, free);
634 	}
635 	iput(inode);
636 	return 0;
637 out_free:
638 	for (i--; i >= 0; i--)
639 		brelse(oi->of_binfo[i].ob_bh);
640 	kfree(oi->of_binfo);
641 out_put:
642 	iput(inode);
643 	return ret;
644 }
645 
ext4_orphan_file_empty(struct super_block * sb)646 int ext4_orphan_file_empty(struct super_block *sb)
647 {
648 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
649 	int i;
650 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
651 
652 	if (!ext4_has_feature_orphan_file(sb))
653 		return 1;
654 	for (i = 0; i < oi->of_blocks; i++)
655 		if (atomic_read(&oi->of_binfo[i].ob_free_entries) !=
656 		    inodes_per_ob)
657 			return 0;
658 	return 1;
659 }
660