xref: /titanic_50/usr/src/cmd/fs.d/ufs/fsck/dup_avl.c (revision b65731f1f612238279eb4d997f43589b535c5646)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Keep track of duplicate fragment references (elsewhere called
31  * blocks for ancient historical reasons).
32  *
33  * The duplicates are kept in a binary tree to attempt to minimize
34  * search times when checking the block lists of all active inodes
35  * for multiple uses.  This is opposed to using a simple linear list
36  * that is traversed for every block, as is used in the traditional
37  * fsck.  It can be very time-expensive if there's more than just a
38  * very few duplicates, and typically there are either none or lots.
39  *
40  * For each multiply-claimed fragment, we note all of the claiming
41  * inodes and their corresponding logical block numbers.  This allows
42  * reporting exactly which parts of which files were damaged, which
43  * provides at least a chance of recovering the bulk of the data on
44  * a seriously-corrupted filesystem.
45  */
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <unistd.h>
49 #include <sys/avl.h>
50 #define	_KERNEL
51 #include <sys/fs/ufs_fsdir.h>	/* for struct direct */
52 #undef _KERNEL
53 #include <sys/debug.h>
54 #include "fsck.h"
55 
56 #define	OFFSETOF(type, elt) ((size_t)(&((type *)NULL)->elt))
57 
58 /*
59  * For each physical fragment with multiple claimants, the specifics
60  * of each claim are recorded. This means there are N+1 AVL trees in
61  * use: one for each fragment's claimant table, plus one that orders
62  * the fragments themselves.
63  *
64  * The table of fragments simply has the physical fragment number
65  * (pfn) and has the root of the tree of the associated claimants.  It
66  * is keyed by the pfn and called dup_frags.
67  *
68  * The subsidiary trees list inodes and logical fragment number (lfn)
69  * for each claimant.  They are keyed first by inode number and then
70  * by lfn.  Both are needed, as it is possible for one inode to have
71  * multiple claims on the same fragment.
72  */
73 
74 typedef struct claimant {
75 	fsck_ino_t cl_inode;
76 	daddr32_t cl_lfn;
77 	avl_node_t cl_avl;
78 } claimant_t;
79 
80 typedef struct fragment {
81 	daddr32_t fr_pfn;
82 	avl_tree_t fr_claimants;
83 	avl_node_t fr_avl;
84 } fragment_t;
85 
86 typedef struct reference {
87 	daddr32_t ref_lfn;
88 	daddr32_t ref_pfn;
89 	avl_node_t ref_avl;
90 } reference_t;
91 
92 typedef struct inode_dup {
93 	fsck_ino_t id_ino;
94 	avl_tree_t id_fragments;
95 	avl_node_t id_avl;
96 } inode_dup_t;
97 
98 static avl_tree_t dup_frags;
99 
100 static void free_invert_frags(avl_tree_t *);
101 static void report_dup_lfn_pfn(daddr32_t, daddr32_t, daddr32_t, daddr32_t);
102 static inode_dup_t *new_inode_dup(fsck_ino_t);
103 static void invert_frags(avl_tree_t *, avl_tree_t *);
104 static void report_inode_dups(inode_dup_t *);
105 static int by_ino_cmp(const void *, const void *);
106 static int by_lfn_cmp(const void *, const void *);
107 static claimant_t *alloc_claimant(fsck_ino_t, daddr32_t);
108 static fragment_t *alloc_dup(daddr32_t);
109 static int claimant_cmp(const void *, const void *);
110 static int fragment_cmp(const void *, const void *);
111 static int decrement_claimant(fragment_t *, fsck_ino_t, daddr32_t);
112 static int increment_claimant(fragment_t *, fsck_ino_t, daddr32_t);
113 
114 /*
115  * Simple accessor function for the outside world so only we need to
116  * see and interpret our data structures.
117  */
118 int
119 have_dups(void)
120 {
121 	return (avl_numnodes(&dup_frags) > 0);
122 }
123 
124 /*
125  * Locates, creates, and deletes a record of a duplicate reference.
126  *
127  * For DB_INCR, returns true if the dup was added to the tree.
128  * For DB_DECR, returns true if the dup was in the tree.
129  */
130 int
131 find_dup_ref(daddr32_t fragno, fsck_ino_t ino, daddr32_t lfn, int flags)
132 {
133 	fragment_t key;
134 	fragment_t *dup;
135 	avl_index_t where;
136 	int added = 0;
137 	int removed = 0;
138 
139 	if (avl_first(&dup_frags) == NULL) {
140 		if (flags & DB_CREATE)
141 			avl_create(&dup_frags, fragment_cmp,
142 			    sizeof (fragment_t),
143 			    OFFSETOF(fragment_t, fr_avl));
144 		else
145 			return (0);
146 	}
147 
148 	key.fr_pfn = fragno;
149 	dup = avl_find(&dup_frags, (void *)&key, &where);
150 	if ((dup == NULL) & (flags & DB_CREATE)) {
151 		dup = alloc_dup(fragno);
152 		avl_insert(&dup_frags, (void *)dup, where);
153 	}
154 
155 	if (dup != NULL) {
156 		if (flags & DB_INCR) {
157 			if (debug)
158 				(void) printf(
159 				    "adding claim by ino %d as lfn %d\n",
160 				    ino, lfn);
161 			added = increment_claimant(dup, ino, lfn);
162 		} else if (flags & DB_DECR) {
163 			/*
164 			 * Note that dup may be invalidated by this call.
165 			 */
166 			removed = decrement_claimant(dup, ino, lfn);
167 			if (debug)
168 				(void) printf(
169 		    "check for claimant ino %d lfn %d returned %d\n",
170 				    ino, lfn, removed);
171 		}
172 	}
173 
174 	return (added || removed || (dup != NULL));
175 }
176 
177 /*
178  * Dump the duplicates table in a relatively user-friendly form.
179  * The idea is that the output can be useful when trying to manually
180  * work out which block belongs to which of the claiming inodes.
181  *
182  * What we have is a tree of duplicates indexed by physical
183  * fragment number.  What we want to report is:
184  *
185  *    Inode %d:
186  *        Logical Offset 0x%08llx,             Physical Fragment  %d
187  *        Logical Offsets 0x%08llx - 0x%08llx, Physical Fragments %d - %d
188  *        ...
189  *    Inode %d:
190  *        Logical Offsets 0x%08llx - 0x%08llx, Physical Fragments %d - %d
191  *    ...
192  */
193 int
194 report_dups(int quiet)
195 {
196 	int overlaps;
197 	inode_dup_t *inode;
198 	fragment_t *dup;
199 	avl_tree_t inode_frags;
200 
201 	overlaps = 0;
202 	ASSERT(have_dups());
203 	/*
204 	 * Figure out how many actual dups are still around.
205 	 * This tells us whether or not we can mark the
206 	 * filesystem clean.
207 	 */
208 	dup = avl_first(&dup_frags);
209 	while (dup != NULL) {
210 		if (avl_numnodes(&dup->fr_claimants) > 1) {
211 			overlaps++;
212 			break;
213 		}
214 		dup = AVL_NEXT(&dup_frags, dup);
215 	}
216 
217 	/*
218 	 * Now report on every object that still exists that
219 	 * had *any* dups associated with it.
220 	 */
221 	if (!quiet) {
222 		(void) puts("\nSome blocks that were found to be in "
223 			    "multiple files are still\nassigned to "
224 			    "file(s).\nFragments sorted by inode and "
225 			    "logical offsets:");
226 
227 		invert_frags(&dup_frags, &inode_frags);
228 		inode = avl_first(&inode_frags);
229 		while (inode != NULL) {
230 			report_inode_dups(inode);
231 			inode = AVL_NEXT(&inode_frags, inode);
232 		}
233 		(void) printf("\n");
234 
235 		free_invert_frags(&inode_frags);
236 	}
237 
238 	return (overlaps);
239 }
240 
241 static void
242 report_inode_dups(inode_dup_t *inode)
243 {
244 	reference_t *dup;
245 	daddr32_t first_lfn, last_lfn, first_pfn, last_pfn;
246 
247 	(void) printf("Inode %d:\n", inode->id_ino);
248 	dup = avl_first(&inode->id_fragments);
249 	first_lfn = last_lfn = dup->ref_lfn;
250 	first_pfn = last_pfn = dup->ref_pfn;
251 	while ((dup = AVL_NEXT(&inode->id_fragments, dup)) != NULL) {
252 		if (((last_lfn + 1) != dup->ref_lfn) ||
253 		    ((last_pfn + 1) != dup->ref_pfn)) {
254 			report_dup_lfn_pfn(first_lfn, last_lfn,
255 			    first_pfn, last_pfn);
256 			first_lfn = last_lfn = dup->ref_lfn;
257 			first_pfn = last_pfn = dup->ref_pfn;
258 		}
259 	}
260 	report_dup_lfn_pfn(first_lfn, last_lfn, first_pfn, last_pfn);
261 }
262 
263 static void
264 report_dup_lfn_pfn(daddr32_t first_lfn, daddr32_t last_lfn,
265 	daddr32_t first_pfn, daddr32_t last_pfn)
266 {
267 	if ((first_lfn == last_lfn) && (first_pfn == last_pfn)) {
268 		(void) printf(
269 	    "  Logical Offset  0x%08llx               Physical Fragment  %d\n",
270 		    (longlong_t)first_lfn * sblock.fs_fsize, first_pfn);
271 	} else {
272 		(void) printf(
273 		    "  Logical Offsets 0x%08llx - 0x%08llx, "
274 		    "Physical Fragments %d - %d\n",
275 		    (longlong_t)first_lfn * sblock.fs_fsize,
276 		    (longlong_t)last_lfn * sblock.fs_fsize,
277 		    first_pfn, last_pfn);
278 	}
279 }
280 
281 /*
282  * Given a tree of fragment_ts, each element of which has an integral
283  * sub-tree of claimant_ts, produce a tree of inode_dup_ts, each element
284  * of which has an integral sub-tree of reference_ts.
285  */
286 static void
287 invert_frags(avl_tree_t *source, avl_tree_t *target)
288 {
289 	fragment_t *src_frag;
290 	claimant_t *src_claim;
291 	inode_dup_t *tgt_inode;
292 	inode_dup_t tgt_inode_key;
293 	reference_t *tgt_ref;
294 	reference_t tgt_ref_key;
295 	avl_index_t where;
296 
297 	avl_create(target, by_ino_cmp, sizeof (inode_dup_t),
298 	    OFFSETOF(inode_dup_t, id_avl));
299 
300 	src_frag = avl_first(source);
301 	while (src_frag != NULL) {
302 		src_claim = avl_first(&src_frag->fr_claimants);
303 		while (src_claim != NULL) {
304 			/*
305 			 * Have we seen this inode before?
306 			 */
307 			tgt_inode_key.id_ino = src_claim->cl_inode;
308 			tgt_inode = avl_find(target, (void *)&tgt_inode_key,
309 			    &where);
310 			if (tgt_inode == NULL) {
311 				/*
312 				 * No, so set up a record for it.
313 				 */
314 				tgt_inode = new_inode_dup(src_claim->cl_inode);
315 				avl_insert(target, (void *)tgt_inode, where);
316 			}
317 			/*
318 			 * Now, how about this logical fragment?  In
319 			 * theory, we should never see a duplicate, since
320 			 * a given lfn only exists once for a given inode.
321 			 * As such, we ignore duplicate hits.
322 			 */
323 			tgt_ref_key.ref_lfn = src_claim->cl_lfn;
324 			tgt_ref = avl_find(&tgt_inode->id_fragments,
325 			    (void *)&tgt_ref_key, &where);
326 			if (tgt_ref == NULL) {
327 				/*
328 				 * Haven't seen it, add it.
329 				 */
330 				tgt_ref = (reference_t *)malloc(
331 				    sizeof (reference_t));
332 				if (tgt_ref == NULL)
333 					errexit("Out of memory in "
334 					    "invert_frags\n");
335 				tgt_ref->ref_lfn = src_claim->cl_lfn;
336 				tgt_ref->ref_pfn = src_frag->fr_pfn;
337 				avl_insert(&tgt_inode->id_fragments,
338 				    (void *)tgt_ref, where);
339 			}
340 			src_claim = AVL_NEXT(&src_frag->fr_claimants,
341 			    src_claim);
342 		}
343 		src_frag = AVL_NEXT(source, src_frag);
344 	}
345 }
346 
347 /*
348  * Discard memory associated with the inverted fragments tree created
349  * by report_dups() via invert_frags().
350  */
351 static void
352 free_invert_frags(avl_tree_t *tree)
353 {
354 	void *outer = NULL;	/* traversal cookie */
355 	void *inner;		/* traversal cookie */
356 	inode_dup_t *inode_dup;
357 	reference_t *ref_dup;
358 
359 	while ((inode_dup = avl_destroy_nodes(tree, &outer)) != NULL) {
360 		inner = NULL;
361 		while ((ref_dup = avl_destroy_nodes(&inode_dup->id_fragments,
362 		    &inner)) != NULL) {
363 			free((void *)ref_dup);
364 		}
365 		avl_destroy(&inode_dup->id_fragments);
366 		free((void *)inode_dup);
367 	}
368 	avl_destroy(tree);
369 }
370 
371 /*
372  * Discard all memory allocations associated with the current duplicates
373  * table.
374  */
375 void
376 free_dup_state(void)
377 {
378 	void *dup_cookie = NULL;
379 	void *claim_cookie;
380 	fragment_t *fragv;
381 	claimant_t *claimv;
382 
383 	while ((fragv = avl_destroy_nodes(&dup_frags, &dup_cookie)) != NULL) {
384 		claim_cookie = NULL;
385 		while ((claimv = avl_destroy_nodes(&fragv->fr_claimants,
386 		    &claim_cookie)) != NULL) {
387 			free((void *)claimv);
388 		}
389 		avl_destroy(&fragv->fr_claimants);
390 		free((void *)fragv);
391 	}
392 	avl_destroy(&dup_frags);
393 }
394 
395 /*
396  * If the given claimant has not been seen before, add it to DUP's
397  * list of them.  It's not fatal for the same PFN/INODE/LFN to get
398  * added twice, because pass1b() will add the same dups that pass1()
399  * did, plus one.
400  */
401 static int
402 increment_claimant(fragment_t *dup, fsck_ino_t ino, daddr32_t lfn)
403 {
404 	avl_index_t where;
405 	claimant_t *claimant;
406 	claimant_t key;
407 	int added = 0;
408 
409 	key.cl_inode = ino;
410 	key.cl_lfn = lfn;
411 	claimant = avl_find(&dup->fr_claimants, &key, &where);
412 	if (claimant == NULL) {
413 		if (debug)
414 			(void) printf("inserting claimant\n");
415 		claimant = alloc_claimant(ino, lfn);
416 		avl_insert(&dup->fr_claimants, (void *)claimant, where);
417 		statemap[ino] |= INCLEAR;
418 		added = 1;
419 	}
420 
421 	return (added);
422 }
423 
424 /*
425  * If the given claimant is on DUP's list, remove it.  It is not
426  * an error for the claimant to not be on the list.
427  */
428 static int
429 decrement_claimant(fragment_t *dup, fsck_ino_t ino, daddr32_t lfn)
430 {
431 	avl_index_t where;
432 	claimant_t *claimant;
433 	claimant_t key;
434 	int busy = 0;
435 
436 	key.cl_inode = ino;
437 	key.cl_lfn = lfn;
438 	claimant = avl_find(&dup->fr_claimants, &key, &where);
439 	if (claimant != NULL) {
440 		avl_remove(&dup->fr_claimants, claimant);
441 		if (avl_numnodes(&dup->fr_claimants) == 0) {
442 			avl_destroy(&dup->fr_claimants);
443 			avl_remove(&dup_frags, (void *)dup);
444 			free((void *)dup);
445 		} else {
446 			busy = 1;
447 		}
448 	}
449 
450 	return (busy);
451 }
452 
453 static claimant_t *
454 alloc_claimant(fsck_ino_t inode, daddr32_t lfn)
455 {
456 	claimant_t *new = (claimant_t *)malloc(sizeof (claimant_t));
457 
458 	if (new == NULL)
459 		errexit("Out of memory in alloc_claimant()\n");
460 
461 	new->cl_inode = inode;
462 	new->cl_lfn = lfn;
463 
464 	return (new);
465 }
466 
467 static fragment_t *
468 alloc_dup(daddr32_t pfn)
469 {
470 	fragment_t *new = (fragment_t *)malloc(sizeof (fragment_t));
471 
472 	if (new == NULL)
473 		errexit("Out of memory in alloc_dup()\n");
474 
475 	new->fr_pfn = pfn;
476 	avl_create(&new->fr_claimants, claimant_cmp, sizeof (fragment_t),
477 	    OFFSETOF(claimant_t, cl_avl));
478 
479 	return (new);
480 }
481 
482 /*
483  * Compare two fragment_t instances for avl_find().  It requires a
484  * return value of -1/0/1, so we can't just hand back left - right.
485  */
486 static int
487 fragment_cmp(const void *vlp, const void *vrp)
488 {
489 	const fragment_t *lp = (const fragment_t *)vlp;
490 	const fragment_t *rp = (const fragment_t *)vrp;
491 	int cmp = lp->fr_pfn - rp->fr_pfn;
492 
493 	if (cmp < 0)
494 		cmp = -1;
495 	else if (cmp > 0)
496 		cmp = 1;
497 
498 	return (cmp);
499 }
500 
501 /*
502  * Compare two claimant_t instances for avl_find().  It requires a
503  * return value of -1/0/1, so we can't just hand back left - right.
504  */
505 static int
506 claimant_cmp(const void *vlp, const void *vrp)
507 {
508 	const claimant_t *lp = (const claimant_t *)vlp;
509 	const claimant_t *rp = (const claimant_t *)vrp;
510 	int cmp;
511 
512 	cmp = lp->cl_inode - rp->cl_inode;
513 	if (cmp == 0) {
514 		/*
515 		 * lfn < 0 is a wildcard lfn match.
516 		 */
517 		if ((lp->cl_lfn >= 0) && (rp->cl_lfn >= 0))
518 			cmp = lp->cl_lfn - rp->cl_lfn;
519 	}
520 
521 	if (cmp < 0)
522 		cmp = -1;
523 	else if (cmp > 0)
524 		cmp = 1;
525 
526 	return (cmp);
527 }
528 
529 static int
530 by_ino_cmp(const void *vlp, const void *vrp)
531 {
532 	const inode_dup_t *lp = (const inode_dup_t *)vlp;
533 	const inode_dup_t *rp = (const inode_dup_t *)vrp;
534 	int cmp;
535 
536 	cmp = lp->id_ino - rp->id_ino;
537 
538 	if (cmp < 0)
539 		cmp = -1;
540 	else if (cmp > 0)
541 		cmp = 1;
542 
543 	return (cmp);
544 }
545 
546 static int
547 by_lfn_cmp(const void *vlp, const void *vrp)
548 {
549 	const reference_t *lp = (const reference_t *)vlp;
550 	const reference_t *rp = (const reference_t *)vrp;
551 	int cmp;
552 
553 	cmp = lp->ref_lfn - rp->ref_lfn;
554 
555 	if (cmp < 0)
556 		cmp = -1;
557 	else if (cmp > 0)
558 		cmp = 1;
559 
560 	return (cmp);
561 }
562 
563 static inode_dup_t *
564 new_inode_dup(fsck_ino_t inode)
565 {
566 	inode_dup_t *new;
567 
568 	new = (inode_dup_t *)malloc(sizeof (inode_dup_t));
569 	if (new == NULL)
570 		errexit("Out of memory in new_inode_dup\n");
571 	new->id_ino = inode;
572 	avl_create(&new->id_fragments, by_lfn_cmp, sizeof (reference_t),
573 	    OFFSETOF(reference_t, ref_avl));
574 
575 	return (new);
576 }
577