xref: /linux/fs/ext4/orphan.c (revision 82f78acd5a9270370ef4aa3f032ede25f3dc91ee)
1 /*
2  * Ext4 orphan inode handling
3  */
4 #include <linux/fs.h>
5 #include <linux/quotaops.h>
6 #include <linux/buffer_head.h>
7 
8 #include "ext4.h"
9 #include "ext4_jbd2.h"
10 
11 #define EXT4_MAX_ORPHAN_FILE_BLOCKS 512
12 
ext4_orphan_file_add(handle_t * handle,struct inode * inode)13 static int ext4_orphan_file_add(handle_t *handle, struct inode *inode)
14 {
15 	int i, j, start;
16 	struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info;
17 	int ret = 0;
18 	bool found = false;
19 	__le32 *bdata;
20 	int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb);
21 	int looped = 0;
22 
23 	/*
24 	 * Find block with free orphan entry. Use CPU number for a naive hash
25 	 * for a search start in the orphan file
26 	 */
27 	start = raw_smp_processor_id()*13 % oi->of_blocks;
28 	i = start;
29 	do {
30 		if (atomic_dec_if_positive(&oi->of_binfo[i].ob_free_entries)
31 		    >= 0) {
32 			found = true;
33 			break;
34 		}
35 		if (++i >= oi->of_blocks)
36 			i = 0;
37 	} while (i != start);
38 
39 	if (!found) {
40 		/*
41 		 * For now we don't grow or shrink orphan file. We just use
42 		 * whatever was allocated at mke2fs time. The additional
43 		 * credits we would have to reserve for each orphan inode
44 		 * operation just don't seem worth it.
45 		 */
46 		return -ENOSPC;
47 	}
48 
49 	ret = ext4_journal_get_write_access(handle, inode->i_sb,
50 				oi->of_binfo[i].ob_bh, EXT4_JTR_ORPHAN_FILE);
51 	if (ret) {
52 		atomic_inc(&oi->of_binfo[i].ob_free_entries);
53 		return ret;
54 	}
55 
56 	bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
57 	/* Find empty slot in a block */
58 	j = 0;
59 	do {
60 		if (looped) {
61 			/*
62 			 * Did we walk through the block several times without
63 			 * finding free entry? It is theoretically possible
64 			 * if entries get constantly allocated and freed or
65 			 * if the block is corrupted. Avoid indefinite looping
66 			 * and bail. We'll use orphan list instead.
67 			 */
68 			if (looped > 3) {
69 				atomic_inc(&oi->of_binfo[i].ob_free_entries);
70 				return -ENOSPC;
71 			}
72 			cond_resched();
73 		}
74 		while (bdata[j]) {
75 			if (++j >= inodes_per_ob) {
76 				j = 0;
77 				looped++;
78 			}
79 		}
80 	} while (cmpxchg(&bdata[j], (__le32)0, cpu_to_le32(inode->i_ino)) !=
81 		 (__le32)0);
82 
83 	EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j;
84 	ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
85 
86 	return ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[i].ob_bh);
87 }
88 
89 /*
90  * ext4_orphan_add() links an unlinked or truncated inode into a list of
91  * such inodes, starting at the superblock, in case we crash before the
92  * file is closed/deleted, or in case the inode truncate spans multiple
93  * transactions and the last transaction is not recovered after a crash.
94  *
95  * At filesystem recovery time, we walk this list deleting unlinked
96  * inodes and truncating linked inodes in ext4_orphan_cleanup().
97  *
98  * Orphan list manipulation functions must be called under i_rwsem unless
99  * we are just creating the inode or deleting it.
100  */
ext4_orphan_add(handle_t * handle,struct inode * inode)101 int ext4_orphan_add(handle_t *handle, struct inode *inode)
102 {
103 	struct super_block *sb = inode->i_sb;
104 	struct ext4_sb_info *sbi = EXT4_SB(sb);
105 	struct ext4_iloc iloc;
106 	int err = 0, rc;
107 	bool dirty = false;
108 
109 	if (!sbi->s_journal || is_bad_inode(inode))
110 		return 0;
111 
112 	WARN_ON_ONCE(!(inode_state_read_once(inode) & (I_NEW | I_FREEING)) &&
113 		     !inode_is_locked(inode));
114 	if (ext4_inode_orphan_tracked(inode))
115 		return 0;
116 
117 	/*
118 	 * Orphan handling is only valid for files with data blocks
119 	 * being truncated, or files being unlinked. Note that we either
120 	 * hold i_rwsem, or the inode can not be referenced from outside,
121 	 * so i_nlink should not be bumped due to race
122 	 */
123 	ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
124 		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
125 
126 	if (sbi->s_orphan_info.of_blocks) {
127 		err = ext4_orphan_file_add(handle, inode);
128 		/*
129 		 * Fallback to normal orphan list of orphan file is
130 		 * out of space
131 		 */
132 		if (err != -ENOSPC)
133 			return err;
134 	}
135 
136 	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
137 	err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
138 					    EXT4_JTR_NONE);
139 	if (err)
140 		goto out;
141 
142 	err = ext4_reserve_inode_write(handle, inode, &iloc);
143 	if (err)
144 		goto out;
145 
146 	mutex_lock(&sbi->s_orphan_lock);
147 	/*
148 	 * Due to previous errors inode may be already a part of on-disk
149 	 * orphan list. If so skip on-disk list modification.
150 	 */
151 	if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
152 	    (le32_to_cpu(sbi->s_es->s_inodes_count))) {
153 		/* Insert this inode at the head of the on-disk orphan list */
154 		NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
155 		lock_buffer(sbi->s_sbh);
156 		sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
157 		ext4_superblock_csum_set(sb);
158 		unlock_buffer(sbi->s_sbh);
159 		dirty = true;
160 	}
161 	list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
162 	mutex_unlock(&sbi->s_orphan_lock);
163 
164 	if (dirty) {
165 		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
166 		rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
167 		if (!err)
168 			err = rc;
169 		if (err) {
170 			/*
171 			 * We have to remove inode from in-memory list if
172 			 * addition to on disk orphan list failed. Stray orphan
173 			 * list entries can cause panics at unmount time.
174 			 */
175 			mutex_lock(&sbi->s_orphan_lock);
176 			list_del_init(&EXT4_I(inode)->i_orphan);
177 			mutex_unlock(&sbi->s_orphan_lock);
178 		}
179 	} else
180 		brelse(iloc.bh);
181 
182 	ext4_debug("superblock will point to %lu\n", inode->i_ino);
183 	ext4_debug("orphan inode %lu will point to %d\n",
184 			inode->i_ino, NEXT_ORPHAN(inode));
185 out:
186 	ext4_std_error(sb, err);
187 	return err;
188 }
189 
ext4_orphan_file_del(handle_t * handle,struct inode * inode)190 static int ext4_orphan_file_del(handle_t *handle, struct inode *inode)
191 {
192 	struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info;
193 	__le32 *bdata;
194 	int blk, off;
195 	int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb);
196 	int ret = 0;
197 
198 	if (!handle)
199 		goto out;
200 	blk = EXT4_I(inode)->i_orphan_idx / inodes_per_ob;
201 	off = EXT4_I(inode)->i_orphan_idx % inodes_per_ob;
202 	if (WARN_ON_ONCE(blk >= oi->of_blocks))
203 		goto out;
204 
205 	ret = ext4_journal_get_write_access(handle, inode->i_sb,
206 				oi->of_binfo[blk].ob_bh, EXT4_JTR_ORPHAN_FILE);
207 	if (ret)
208 		goto out;
209 
210 	bdata = (__le32 *)(oi->of_binfo[blk].ob_bh->b_data);
211 	bdata[off] = 0;
212 	atomic_inc(&oi->of_binfo[blk].ob_free_entries);
213 	ret = ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[blk].ob_bh);
214 out:
215 	ext4_clear_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
216 	INIT_LIST_HEAD(&EXT4_I(inode)->i_orphan);
217 
218 	return ret;
219 }
220 
221 /*
222  * ext4_orphan_del() removes an unlinked or truncated inode from the list
223  * of such inodes stored on disk, because it is finally being cleaned up.
224  */
ext4_orphan_del(handle_t * handle,struct inode * inode)225 int ext4_orphan_del(handle_t *handle, struct inode *inode)
226 {
227 	struct list_head *prev;
228 	struct ext4_inode_info *ei = EXT4_I(inode);
229 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
230 	__u32 ino_next;
231 	struct ext4_iloc iloc;
232 	int err = 0;
233 
234 	if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
235 		return 0;
236 
237 	WARN_ON_ONCE(!(inode_state_read_once(inode) & (I_NEW | I_FREEING)) &&
238 		     !inode_is_locked(inode));
239 	if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE))
240 		return ext4_orphan_file_del(handle, inode);
241 
242 	/* Do this quick check before taking global s_orphan_lock. */
243 	if (list_empty(&ei->i_orphan))
244 		return 0;
245 
246 	if (handle) {
247 		/* Grab inode buffer early before taking global s_orphan_lock */
248 		err = ext4_reserve_inode_write(handle, inode, &iloc);
249 	}
250 
251 	mutex_lock(&sbi->s_orphan_lock);
252 	ext4_debug("remove inode %lu from orphan list\n", inode->i_ino);
253 
254 	prev = ei->i_orphan.prev;
255 	list_del_init(&ei->i_orphan);
256 
257 	/* If we're on an error path, we may not have a valid
258 	 * transaction handle with which to update the orphan list on
259 	 * disk, but we still need to remove the inode from the linked
260 	 * list in memory. */
261 	if (!handle || err) {
262 		mutex_unlock(&sbi->s_orphan_lock);
263 		goto out_err;
264 	}
265 
266 	ino_next = NEXT_ORPHAN(inode);
267 	if (prev == &sbi->s_orphan) {
268 		ext4_debug("superblock will point to %u\n", ino_next);
269 		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
270 		err = ext4_journal_get_write_access(handle, inode->i_sb,
271 						    sbi->s_sbh, EXT4_JTR_NONE);
272 		if (err) {
273 			mutex_unlock(&sbi->s_orphan_lock);
274 			goto out_brelse;
275 		}
276 		lock_buffer(sbi->s_sbh);
277 		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
278 		ext4_superblock_csum_set(inode->i_sb);
279 		unlock_buffer(sbi->s_sbh);
280 		mutex_unlock(&sbi->s_orphan_lock);
281 		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
282 	} else {
283 		struct ext4_iloc iloc2;
284 		struct inode *i_prev =
285 			&list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
286 
287 		ext4_debug("orphan inode %lu will point to %u\n",
288 			  i_prev->i_ino, ino_next);
289 		err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
290 		if (err) {
291 			mutex_unlock(&sbi->s_orphan_lock);
292 			goto out_brelse;
293 		}
294 		NEXT_ORPHAN(i_prev) = ino_next;
295 		err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
296 		mutex_unlock(&sbi->s_orphan_lock);
297 	}
298 	if (err)
299 		goto out_brelse;
300 	NEXT_ORPHAN(inode) = 0;
301 	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
302 out_err:
303 	ext4_std_error(inode->i_sb, err);
304 	return err;
305 
306 out_brelse:
307 	brelse(iloc.bh);
308 	goto out_err;
309 }
310 
311 #ifdef CONFIG_QUOTA
ext4_quota_on_mount(struct super_block * sb,int type)312 static int ext4_quota_on_mount(struct super_block *sb, int type)
313 {
314 	return dquot_quota_on_mount(sb,
315 		rcu_dereference_protected(EXT4_SB(sb)->s_qf_names[type],
316 					  lockdep_is_held(&sb->s_umount)),
317 		EXT4_SB(sb)->s_jquota_fmt, type);
318 }
319 #endif
320 
ext4_process_orphan(struct inode * inode,int * nr_truncates,int * nr_orphans)321 static void ext4_process_orphan(struct inode *inode,
322 				int *nr_truncates, int *nr_orphans)
323 {
324 	struct super_block *sb = inode->i_sb;
325 	int ret;
326 
327 	dquot_initialize(inode);
328 	if (inode->i_nlink) {
329 		if (test_opt(sb, DEBUG))
330 			ext4_msg(sb, KERN_DEBUG,
331 				"%s: truncating inode %lu to %lld bytes",
332 				__func__, inode->i_ino, inode->i_size);
333 		ext4_debug("truncating inode %lu to %lld bytes\n",
334 			   inode->i_ino, inode->i_size);
335 		inode_lock(inode);
336 		truncate_inode_pages(inode->i_mapping, inode->i_size);
337 		ret = ext4_truncate(inode);
338 		if (ret) {
339 			/*
340 			 * We need to clean up the in-core orphan list
341 			 * manually if ext4_truncate() failed to get a
342 			 * transaction handle.
343 			 */
344 			ext4_orphan_del(NULL, inode);
345 			ext4_std_error(inode->i_sb, ret);
346 		}
347 		inode_unlock(inode);
348 		(*nr_truncates)++;
349 	} else {
350 		if (test_opt(sb, DEBUG))
351 			ext4_msg(sb, KERN_DEBUG,
352 				"%s: deleting unreferenced inode %lu",
353 				__func__, inode->i_ino);
354 		ext4_debug("deleting unreferenced inode %lu\n",
355 			   inode->i_ino);
356 		(*nr_orphans)++;
357 	}
358 	iput(inode);  /* The delete magic happens here! */
359 }
360 
361 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
362  * the superblock) which were deleted from all directories, but held open by
363  * a process at the time of a crash.  We walk the list and try to delete these
364  * inodes at recovery time (only with a read-write filesystem).
365  *
366  * In order to keep the orphan inode chain consistent during traversal (in
367  * case of crash during recovery), we link each inode into the superblock
368  * orphan list_head and handle it the same way as an inode deletion during
369  * normal operation (which journals the operations for us).
370  *
371  * We only do an iget() and an iput() on each inode, which is very safe if we
372  * accidentally point at an in-use or already deleted inode.  The worst that
373  * can happen in this case is that we get a "bit already cleared" message from
374  * ext4_free_inode().  The only reason we would point at a wrong inode is if
375  * e2fsck was run on this filesystem, and it must have already done the orphan
376  * inode cleanup for us, so we can safely abort without any further action.
377  */
ext4_orphan_cleanup(struct super_block * sb,struct ext4_super_block * es)378 void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
379 {
380 	unsigned int s_flags = sb->s_flags;
381 	int nr_orphans = 0, nr_truncates = 0;
382 	struct inode *inode;
383 	int i, j;
384 #ifdef CONFIG_QUOTA
385 	int quota_update = 0;
386 #endif
387 	__le32 *bdata;
388 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
389 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
390 
391 	if (!es->s_last_orphan && !oi->of_blocks) {
392 		ext4_debug("no orphan inodes to clean up\n");
393 		return;
394 	}
395 
396 	if (bdev_read_only(sb->s_bdev)) {
397 		ext4_msg(sb, KERN_ERR, "write access "
398 			"unavailable, skipping orphan cleanup");
399 		return;
400 	}
401 
402 	/* Check if feature set would not allow a r/w mount */
403 	if (!ext4_feature_set_ok(sb, 0)) {
404 		ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
405 			 "unknown ROCOMPAT features");
406 		return;
407 	}
408 
409 	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
410 		/* don't clear list on RO mount w/ errors */
411 		if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
412 			ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
413 				  "clearing orphan list.");
414 			es->s_last_orphan = 0;
415 		}
416 		ext4_debug("Skipping orphan recovery on fs with errors.\n");
417 		return;
418 	}
419 
420 	if (s_flags & SB_RDONLY) {
421 		ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
422 		sb->s_flags &= ~SB_RDONLY;
423 	}
424 #ifdef CONFIG_QUOTA
425 	/*
426 	 * Turn on quotas which were not enabled for read-only mounts if
427 	 * filesystem has quota feature, so that they are updated correctly.
428 	 */
429 	if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {
430 		int ret = ext4_enable_quotas(sb);
431 
432 		if (!ret)
433 			quota_update = 1;
434 		else
435 			ext4_msg(sb, KERN_ERR,
436 				"Cannot turn on quotas: error %d", ret);
437 	}
438 
439 	/* Turn on journaled quotas used for old sytle */
440 	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
441 		if (EXT4_SB(sb)->s_qf_names[i]) {
442 			int ret = ext4_quota_on_mount(sb, i);
443 
444 			if (!ret)
445 				quota_update = 1;
446 			else
447 				ext4_msg(sb, KERN_ERR,
448 					"Cannot turn on journaled "
449 					"quota: type %d: error %d", i, ret);
450 		}
451 	}
452 #endif
453 
454 	while (es->s_last_orphan) {
455 		/*
456 		 * We may have encountered an error during cleanup; if
457 		 * so, skip the rest.
458 		 */
459 		if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
460 			ext4_debug("Skipping orphan recovery on fs with errors.\n");
461 			es->s_last_orphan = 0;
462 			break;
463 		}
464 
465 		inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
466 		if (IS_ERR(inode)) {
467 			es->s_last_orphan = 0;
468 			break;
469 		}
470 
471 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
472 		ext4_process_orphan(inode, &nr_truncates, &nr_orphans);
473 	}
474 
475 	for (i = 0; i < oi->of_blocks; i++) {
476 		bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
477 		for (j = 0; j < inodes_per_ob; j++) {
478 			if (!bdata[j])
479 				continue;
480 			inode = ext4_orphan_get(sb, le32_to_cpu(bdata[j]));
481 			if (IS_ERR(inode))
482 				continue;
483 			ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
484 			EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j;
485 			ext4_process_orphan(inode, &nr_truncates, &nr_orphans);
486 		}
487 	}
488 
489 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
490 
491 	if (nr_orphans)
492 		ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
493 		       PLURAL(nr_orphans));
494 	if (nr_truncates)
495 		ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
496 		       PLURAL(nr_truncates));
497 #ifdef CONFIG_QUOTA
498 	/* Turn off quotas if they were enabled for orphan cleanup */
499 	if (quota_update) {
500 		for (i = 0; i < EXT4_MAXQUOTAS; i++) {
501 			if (sb_dqopt(sb)->files[i])
502 				dquot_quota_off(sb, i);
503 		}
504 	}
505 #endif
506 	sb->s_flags = s_flags; /* Restore SB_RDONLY status */
507 }
508 
ext4_release_orphan_info(struct super_block * sb)509 void ext4_release_orphan_info(struct super_block *sb)
510 {
511 	int i;
512 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
513 
514 	if (!oi->of_blocks)
515 		return;
516 	for (i = 0; i < oi->of_blocks; i++)
517 		brelse(oi->of_binfo[i].ob_bh);
518 	kvfree(oi->of_binfo);
519 }
520 
ext4_orphan_block_tail(struct super_block * sb,struct buffer_head * bh)521 static struct ext4_orphan_block_tail *ext4_orphan_block_tail(
522 						struct super_block *sb,
523 						struct buffer_head *bh)
524 {
525 	return (struct ext4_orphan_block_tail *)(bh->b_data + sb->s_blocksize -
526 				sizeof(struct ext4_orphan_block_tail));
527 }
528 
ext4_orphan_file_block_csum_verify(struct super_block * sb,struct buffer_head * bh)529 static int ext4_orphan_file_block_csum_verify(struct super_block *sb,
530 					      struct buffer_head *bh)
531 {
532 	__u32 calculated;
533 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
534 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
535 	struct ext4_orphan_block_tail *ot;
536 	__le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr);
537 
538 	if (!ext4_has_feature_metadata_csum(sb))
539 		return 1;
540 
541 	ot = ext4_orphan_block_tail(sb, bh);
542 	calculated = ext4_chksum(oi->of_csum_seed, (__u8 *)&dsk_block_nr,
543 				 sizeof(dsk_block_nr));
544 	calculated = ext4_chksum(calculated, (__u8 *)bh->b_data,
545 				 inodes_per_ob * sizeof(__u32));
546 	return le32_to_cpu(ot->ob_checksum) == calculated;
547 }
548 
549 /* This gets called only when checksumming is enabled */
ext4_orphan_file_block_trigger(struct jbd2_buffer_trigger_type * triggers,struct buffer_head * bh,void * data,size_t size)550 void ext4_orphan_file_block_trigger(struct jbd2_buffer_trigger_type *triggers,
551 				    struct buffer_head *bh,
552 				    void *data, size_t size)
553 {
554 	struct super_block *sb = EXT4_TRIGGER(triggers)->sb;
555 	__u32 csum;
556 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
557 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
558 	struct ext4_orphan_block_tail *ot;
559 	__le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr);
560 
561 	csum = ext4_chksum(oi->of_csum_seed, (__u8 *)&dsk_block_nr,
562 			   sizeof(dsk_block_nr));
563 	csum = ext4_chksum(csum, (__u8 *)data, inodes_per_ob * sizeof(__u32));
564 	ot = ext4_orphan_block_tail(sb, bh);
565 	ot->ob_checksum = cpu_to_le32(csum);
566 }
567 
ext4_init_orphan_info(struct super_block * sb)568 int ext4_init_orphan_info(struct super_block *sb)
569 {
570 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
571 	struct inode *inode;
572 	int i, j;
573 	int ret;
574 	int free;
575 	__le32 *bdata;
576 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
577 	struct ext4_orphan_block_tail *ot;
578 	ino_t orphan_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_orphan_file_inum);
579 
580 	if (!ext4_has_feature_orphan_file(sb))
581 		return 0;
582 
583 	inode = ext4_iget(sb, orphan_ino, EXT4_IGET_SPECIAL);
584 	if (IS_ERR(inode)) {
585 		ext4_msg(sb, KERN_ERR, "get orphan inode failed");
586 		return PTR_ERR(inode);
587 	}
588 	/*
589 	 * This is just an artificial limit to prevent corrupted fs from
590 	 * consuming absurd amounts of memory when pinning blocks of orphan
591 	 * file in memory.
592 	 */
593 	if (inode->i_size > (EXT4_MAX_ORPHAN_FILE_BLOCKS << inode->i_blkbits)) {
594 		ext4_msg(sb, KERN_ERR, "orphan file too big: %llu",
595 			 (unsigned long long)inode->i_size);
596 		ret = -EFSCORRUPTED;
597 		goto out_put;
598 	}
599 	oi->of_blocks = inode->i_size >> sb->s_blocksize_bits;
600 	oi->of_csum_seed = EXT4_I(inode)->i_csum_seed;
601 	oi->of_binfo = kvmalloc_array(oi->of_blocks,
602 				     sizeof(struct ext4_orphan_block),
603 				     GFP_KERNEL);
604 	if (!oi->of_binfo) {
605 		ret = -ENOMEM;
606 		goto out_put;
607 	}
608 	for (i = 0; i < oi->of_blocks; i++) {
609 		oi->of_binfo[i].ob_bh = ext4_bread(NULL, inode, i, 0);
610 		if (IS_ERR(oi->of_binfo[i].ob_bh)) {
611 			ret = PTR_ERR(oi->of_binfo[i].ob_bh);
612 			goto out_free;
613 		}
614 		if (!oi->of_binfo[i].ob_bh) {
615 			ret = -EIO;
616 			goto out_free;
617 		}
618 		ot = ext4_orphan_block_tail(sb, oi->of_binfo[i].ob_bh);
619 		if (le32_to_cpu(ot->ob_magic) != EXT4_ORPHAN_BLOCK_MAGIC) {
620 			ext4_error(sb, "orphan file block %d: bad magic", i);
621 			ret = -EIO;
622 			goto out_free;
623 		}
624 		if (!ext4_orphan_file_block_csum_verify(sb,
625 						oi->of_binfo[i].ob_bh)) {
626 			ext4_error(sb, "orphan file block %d: bad checksum", i);
627 			ret = -EIO;
628 			goto out_free;
629 		}
630 		bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
631 		free = 0;
632 		for (j = 0; j < inodes_per_ob; j++)
633 			if (bdata[j] == 0)
634 				free++;
635 		atomic_set(&oi->of_binfo[i].ob_free_entries, free);
636 	}
637 	iput(inode);
638 	return 0;
639 out_free:
640 	for (i--; i >= 0; i--)
641 		brelse(oi->of_binfo[i].ob_bh);
642 	kvfree(oi->of_binfo);
643 out_put:
644 	iput(inode);
645 	return ret;
646 }
647 
ext4_orphan_file_empty(struct super_block * sb)648 int ext4_orphan_file_empty(struct super_block *sb)
649 {
650 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
651 	int i;
652 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
653 
654 	if (!ext4_has_feature_orphan_file(sb))
655 		return 1;
656 	for (i = 0; i < oi->of_blocks; i++)
657 		if (atomic_read(&oi->of_binfo[i].ob_free_entries) !=
658 		    inodes_per_ob)
659 			return 0;
660 	return 1;
661 }
662