1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Keep track of duplicate fragment references (elsewhere called 31 * blocks for ancient historical reasons). 32 * 33 * The duplicates are kept in a binary tree to attempt to minimize 34 * search times when checking the block lists of all active inodes 35 * for multiple uses. This is opposed to using a simple linear list 36 * that is traversed for every block, as is used in the traditional 37 * fsck. It can be very time-expensive if there's more than just a 38 * very few duplicates, and typically there are either none or lots. 39 * 40 * For each multiply-claimed fragment, we note all of the claiming 41 * inodes and their corresponding logical block numbers. This allows 42 * reporting exactly which parts of which files were damaged, which 43 * provides at least a chance of recovering the bulk of the data on 44 * a seriously-corrupted filesystem. 45 */ 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <unistd.h> 49 #include <sys/avl.h> 50 #define _KERNEL 51 #include <sys/fs/ufs_fsdir.h> /* for struct direct */ 52 #undef _KERNEL 53 #include <sys/debug.h> 54 #include "fsck.h" 55 56 #define OFFSETOF(type, elt) ((size_t)(&((type *)NULL)->elt)) 57 58 /* 59 * For each physical fragment with multiple claimants, the specifics 60 * of each claim are recorded. This means there are N+1 AVL trees in 61 * use: one for each fragment's claimant table, plus one that orders 62 * the fragments themselves. 63 * 64 * The table of fragments simply has the physical fragment number 65 * (pfn) and has the root of the tree of the associated claimants. It 66 * is keyed by the pfn and called dup_frags. 67 * 68 * The subsidiary trees list inodes and logical fragment number (lfn) 69 * for each claimant. They are keyed first by inode number and then 70 * by lfn. Both are needed, as it is possible for one inode to have 71 * multiple claims on the same fragment. 72 */ 73 74 typedef struct claimant { 75 fsck_ino_t cl_inode; 76 daddr32_t cl_lfn; 77 avl_node_t cl_avl; 78 } claimant_t; 79 80 typedef struct fragment { 81 daddr32_t fr_pfn; 82 avl_tree_t fr_claimants; 83 avl_node_t fr_avl; 84 } fragment_t; 85 86 typedef struct reference { 87 daddr32_t ref_lfn; 88 daddr32_t ref_pfn; 89 avl_node_t ref_avl; 90 } reference_t; 91 92 typedef struct inode_dup { 93 fsck_ino_t id_ino; 94 avl_tree_t id_fragments; 95 avl_node_t id_avl; 96 } inode_dup_t; 97 98 static avl_tree_t dup_frags; 99 100 static void free_invert_frags(avl_tree_t *); 101 static void report_dup_lfn_pfn(daddr32_t, daddr32_t, daddr32_t, daddr32_t); 102 static inode_dup_t *new_inode_dup(fsck_ino_t); 103 static void invert_frags(avl_tree_t *, avl_tree_t *); 104 static void report_inode_dups(inode_dup_t *); 105 static int by_ino_cmp(const void *, const void *); 106 static int by_lfn_cmp(const void *, const void *); 107 static claimant_t *alloc_claimant(fsck_ino_t, daddr32_t); 108 static fragment_t *alloc_dup(daddr32_t); 109 static int claimant_cmp(const void *, const void *); 110 static int fragment_cmp(const void *, const void *); 111 static int decrement_claimant(fragment_t *, fsck_ino_t, daddr32_t); 112 static int increment_claimant(fragment_t *, fsck_ino_t, daddr32_t); 113 114 /* 115 * Simple accessor function for the outside world so only we need to 116 * see and interpret our data structures. 117 */ 118 int 119 have_dups(void) 120 { 121 return (avl_numnodes(&dup_frags) > 0); 122 } 123 124 /* 125 * Locates, creates, and deletes a record of a duplicate reference. 126 * 127 * For DB_INCR, returns true if the dup was added to the tree. 128 * For DB_DECR, returns true if the dup was in the tree. 129 */ 130 int 131 find_dup_ref(daddr32_t fragno, fsck_ino_t ino, daddr32_t lfn, int flags) 132 { 133 fragment_t key; 134 fragment_t *dup; 135 avl_index_t where; 136 int added = 0; 137 int removed = 0; 138 139 if (avl_first(&dup_frags) == NULL) { 140 if (flags & DB_CREATE) 141 avl_create(&dup_frags, fragment_cmp, 142 sizeof (fragment_t), 143 OFFSETOF(fragment_t, fr_avl)); 144 else 145 return (0); 146 } 147 148 key.fr_pfn = fragno; 149 dup = avl_find(&dup_frags, (void *)&key, &where); 150 if ((dup == NULL) & (flags & DB_CREATE)) { 151 dup = alloc_dup(fragno); 152 avl_insert(&dup_frags, (void *)dup, where); 153 } 154 155 if (dup != NULL) { 156 if (flags & DB_INCR) { 157 if (debug) 158 (void) printf( 159 "adding claim by ino %d as lfn %d\n", 160 ino, lfn); 161 added = increment_claimant(dup, ino, lfn); 162 } else if (flags & DB_DECR) { 163 /* 164 * Note that dup may be invalidated by this call. 165 */ 166 removed = decrement_claimant(dup, ino, lfn); 167 if (debug) 168 (void) printf( 169 "check for claimant ino %d lfn %d returned %d\n", 170 ino, lfn, removed); 171 } 172 } 173 174 return (added || removed || (dup != NULL)); 175 } 176 177 /* 178 * Dump the duplicates table in a relatively user-friendly form. 179 * The idea is that the output can be useful when trying to manually 180 * work out which block belongs to which of the claiming inodes. 181 * 182 * What we have is a tree of duplicates indexed by physical 183 * fragment number. What we want to report is: 184 * 185 * Inode %d: 186 * Logical Offset 0x%08llx, Physical Fragment %d 187 * Logical Offsets 0x%08llx - 0x%08llx, Physical Fragments %d - %d 188 * ... 189 * Inode %d: 190 * Logical Offsets 0x%08llx - 0x%08llx, Physical Fragments %d - %d 191 * ... 192 */ 193 int 194 report_dups(int quiet) 195 { 196 int overlaps; 197 inode_dup_t *inode; 198 fragment_t *dup; 199 avl_tree_t inode_frags; 200 201 overlaps = 0; 202 ASSERT(have_dups()); 203 /* 204 * Figure out how many actual dups are still around. 205 * This tells us whether or not we can mark the 206 * filesystem clean. 207 */ 208 dup = avl_first(&dup_frags); 209 while (dup != NULL) { 210 if (avl_numnodes(&dup->fr_claimants) > 1) { 211 overlaps++; 212 break; 213 } 214 dup = AVL_NEXT(&dup_frags, dup); 215 } 216 217 /* 218 * Now report on every object that still exists that 219 * had *any* dups associated with it. 220 */ 221 if (!quiet) { 222 (void) puts("\nSome blocks that were found to be in " 223 "multiple files are still\nassigned to " 224 "file(s).\nFragments sorted by inode and " 225 "logical offsets:"); 226 227 invert_frags(&dup_frags, &inode_frags); 228 inode = avl_first(&inode_frags); 229 while (inode != NULL) { 230 report_inode_dups(inode); 231 inode = AVL_NEXT(&inode_frags, inode); 232 } 233 (void) printf("\n"); 234 235 free_invert_frags(&inode_frags); 236 } 237 238 return (overlaps); 239 } 240 241 static void 242 report_inode_dups(inode_dup_t *inode) 243 { 244 reference_t *dup; 245 daddr32_t first_lfn, last_lfn, first_pfn, last_pfn; 246 247 (void) printf("Inode %d:\n", inode->id_ino); 248 dup = avl_first(&inode->id_fragments); 249 first_lfn = last_lfn = dup->ref_lfn; 250 first_pfn = last_pfn = dup->ref_pfn; 251 while ((dup = AVL_NEXT(&inode->id_fragments, dup)) != NULL) { 252 if (((last_lfn + 1) != dup->ref_lfn) || 253 ((last_pfn + 1) != dup->ref_pfn)) { 254 report_dup_lfn_pfn(first_lfn, last_lfn, 255 first_pfn, last_pfn); 256 first_lfn = last_lfn = dup->ref_lfn; 257 first_pfn = last_pfn = dup->ref_pfn; 258 } 259 } 260 report_dup_lfn_pfn(first_lfn, last_lfn, first_pfn, last_pfn); 261 } 262 263 static void 264 report_dup_lfn_pfn(daddr32_t first_lfn, daddr32_t last_lfn, 265 daddr32_t first_pfn, daddr32_t last_pfn) 266 { 267 if ((first_lfn == last_lfn) && (first_pfn == last_pfn)) { 268 (void) printf( 269 " Logical Offset 0x%08llx Physical Fragment %d\n", 270 (longlong_t)first_lfn * sblock.fs_fsize, first_pfn); 271 } else { 272 (void) printf( 273 " Logical Offsets 0x%08llx - 0x%08llx, " 274 "Physical Fragments %d - %d\n", 275 (longlong_t)first_lfn * sblock.fs_fsize, 276 (longlong_t)last_lfn * sblock.fs_fsize, 277 first_pfn, last_pfn); 278 } 279 } 280 281 /* 282 * Given a tree of fragment_ts, each element of which has an integral 283 * sub-tree of claimant_ts, produce a tree of inode_dup_ts, each element 284 * of which has an integral sub-tree of reference_ts. 285 */ 286 static void 287 invert_frags(avl_tree_t *source, avl_tree_t *target) 288 { 289 fragment_t *src_frag; 290 claimant_t *src_claim; 291 inode_dup_t *tgt_inode; 292 inode_dup_t tgt_inode_key; 293 reference_t *tgt_ref; 294 reference_t tgt_ref_key; 295 avl_index_t where; 296 297 avl_create(target, by_ino_cmp, sizeof (inode_dup_t), 298 OFFSETOF(inode_dup_t, id_avl)); 299 300 src_frag = avl_first(source); 301 while (src_frag != NULL) { 302 src_claim = avl_first(&src_frag->fr_claimants); 303 while (src_claim != NULL) { 304 /* 305 * Have we seen this inode before? 306 */ 307 tgt_inode_key.id_ino = src_claim->cl_inode; 308 tgt_inode = avl_find(target, (void *)&tgt_inode_key, 309 &where); 310 if (tgt_inode == NULL) { 311 /* 312 * No, so set up a record for it. 313 */ 314 tgt_inode = new_inode_dup(src_claim->cl_inode); 315 avl_insert(target, (void *)tgt_inode, where); 316 } 317 /* 318 * Now, how about this logical fragment? In 319 * theory, we should never see a duplicate, since 320 * a given lfn only exists once for a given inode. 321 * As such, we ignore duplicate hits. 322 */ 323 tgt_ref_key.ref_lfn = src_claim->cl_lfn; 324 tgt_ref = avl_find(&tgt_inode->id_fragments, 325 (void *)&tgt_ref_key, &where); 326 if (tgt_ref == NULL) { 327 /* 328 * Haven't seen it, add it. 329 */ 330 tgt_ref = (reference_t *)malloc( 331 sizeof (reference_t)); 332 if (tgt_ref == NULL) 333 errexit("Out of memory in " 334 "invert_frags\n"); 335 tgt_ref->ref_lfn = src_claim->cl_lfn; 336 tgt_ref->ref_pfn = src_frag->fr_pfn; 337 avl_insert(&tgt_inode->id_fragments, 338 (void *)tgt_ref, where); 339 } 340 src_claim = AVL_NEXT(&src_frag->fr_claimants, 341 src_claim); 342 } 343 src_frag = AVL_NEXT(source, src_frag); 344 } 345 } 346 347 /* 348 * Discard memory associated with the inverted fragments tree created 349 * by report_dups() via invert_frags(). 350 */ 351 static void 352 free_invert_frags(avl_tree_t *tree) 353 { 354 void *outer = NULL; /* traversal cookie */ 355 void *inner; /* traversal cookie */ 356 inode_dup_t *inode_dup; 357 reference_t *ref_dup; 358 359 while ((inode_dup = avl_destroy_nodes(tree, &outer)) != NULL) { 360 inner = NULL; 361 while ((ref_dup = avl_destroy_nodes(&inode_dup->id_fragments, 362 &inner)) != NULL) { 363 free((void *)ref_dup); 364 } 365 avl_destroy(&inode_dup->id_fragments); 366 free((void *)inode_dup); 367 } 368 avl_destroy(tree); 369 } 370 371 /* 372 * Discard all memory allocations associated with the current duplicates 373 * table. 374 */ 375 void 376 free_dup_state(void) 377 { 378 void *dup_cookie = NULL; 379 void *claim_cookie; 380 fragment_t *fragv; 381 claimant_t *claimv; 382 383 while ((fragv = avl_destroy_nodes(&dup_frags, &dup_cookie)) != NULL) { 384 claim_cookie = NULL; 385 while ((claimv = avl_destroy_nodes(&fragv->fr_claimants, 386 &claim_cookie)) != NULL) { 387 free((void *)claimv); 388 } 389 avl_destroy(&fragv->fr_claimants); 390 free((void *)fragv); 391 } 392 avl_destroy(&dup_frags); 393 } 394 395 /* 396 * If the given claimant has not been seen before, add it to DUP's 397 * list of them. It's not fatal for the same PFN/INODE/LFN to get 398 * added twice, because pass1b() will add the same dups that pass1() 399 * did, plus one. 400 */ 401 static int 402 increment_claimant(fragment_t *dup, fsck_ino_t ino, daddr32_t lfn) 403 { 404 avl_index_t where; 405 claimant_t *claimant; 406 claimant_t key; 407 int added = 0; 408 409 key.cl_inode = ino; 410 key.cl_lfn = lfn; 411 claimant = avl_find(&dup->fr_claimants, &key, &where); 412 if (claimant == NULL) { 413 if (debug) 414 (void) printf("inserting claimant\n"); 415 claimant = alloc_claimant(ino, lfn); 416 avl_insert(&dup->fr_claimants, (void *)claimant, where); 417 statemap[ino] |= INCLEAR; 418 added = 1; 419 } 420 421 return (added); 422 } 423 424 /* 425 * If the given claimant is on DUP's list, remove it. It is not 426 * an error for the claimant to not be on the list. 427 */ 428 static int 429 decrement_claimant(fragment_t *dup, fsck_ino_t ino, daddr32_t lfn) 430 { 431 avl_index_t where; 432 claimant_t *claimant; 433 claimant_t key; 434 int busy = 0; 435 436 key.cl_inode = ino; 437 key.cl_lfn = lfn; 438 claimant = avl_find(&dup->fr_claimants, &key, &where); 439 if (claimant != NULL) { 440 avl_remove(&dup->fr_claimants, claimant); 441 if (avl_numnodes(&dup->fr_claimants) == 0) { 442 avl_destroy(&dup->fr_claimants); 443 avl_remove(&dup_frags, (void *)dup); 444 free((void *)dup); 445 } else { 446 busy = 1; 447 } 448 } 449 450 return (busy); 451 } 452 453 static claimant_t * 454 alloc_claimant(fsck_ino_t inode, daddr32_t lfn) 455 { 456 claimant_t *new = (claimant_t *)malloc(sizeof (claimant_t)); 457 458 if (new == NULL) 459 errexit("Out of memory in alloc_claimant()\n"); 460 461 new->cl_inode = inode; 462 new->cl_lfn = lfn; 463 464 return (new); 465 } 466 467 static fragment_t * 468 alloc_dup(daddr32_t pfn) 469 { 470 fragment_t *new = (fragment_t *)malloc(sizeof (fragment_t)); 471 472 if (new == NULL) 473 errexit("Out of memory in alloc_dup()\n"); 474 475 new->fr_pfn = pfn; 476 avl_create(&new->fr_claimants, claimant_cmp, sizeof (fragment_t), 477 OFFSETOF(claimant_t, cl_avl)); 478 479 return (new); 480 } 481 482 /* 483 * Compare two fragment_t instances for avl_find(). It requires a 484 * return value of -1/0/1, so we can't just hand back left - right. 485 */ 486 static int 487 fragment_cmp(const void *vlp, const void *vrp) 488 { 489 const fragment_t *lp = (const fragment_t *)vlp; 490 const fragment_t *rp = (const fragment_t *)vrp; 491 int cmp = lp->fr_pfn - rp->fr_pfn; 492 493 if (cmp < 0) 494 cmp = -1; 495 else if (cmp > 0) 496 cmp = 1; 497 498 return (cmp); 499 } 500 501 /* 502 * Compare two claimant_t instances for avl_find(). It requires a 503 * return value of -1/0/1, so we can't just hand back left - right. 504 */ 505 static int 506 claimant_cmp(const void *vlp, const void *vrp) 507 { 508 const claimant_t *lp = (const claimant_t *)vlp; 509 const claimant_t *rp = (const claimant_t *)vrp; 510 int cmp; 511 512 cmp = lp->cl_inode - rp->cl_inode; 513 if (cmp == 0) { 514 /* 515 * lfn < 0 is a wildcard lfn match. 516 */ 517 if ((lp->cl_lfn >= 0) && (rp->cl_lfn >= 0)) 518 cmp = lp->cl_lfn - rp->cl_lfn; 519 } 520 521 if (cmp < 0) 522 cmp = -1; 523 else if (cmp > 0) 524 cmp = 1; 525 526 return (cmp); 527 } 528 529 static int 530 by_ino_cmp(const void *vlp, const void *vrp) 531 { 532 const inode_dup_t *lp = (const inode_dup_t *)vlp; 533 const inode_dup_t *rp = (const inode_dup_t *)vrp; 534 int cmp; 535 536 cmp = lp->id_ino - rp->id_ino; 537 538 if (cmp < 0) 539 cmp = -1; 540 else if (cmp > 0) 541 cmp = 1; 542 543 return (cmp); 544 } 545 546 static int 547 by_lfn_cmp(const void *vlp, const void *vrp) 548 { 549 const reference_t *lp = (const reference_t *)vlp; 550 const reference_t *rp = (const reference_t *)vrp; 551 int cmp; 552 553 cmp = lp->ref_lfn - rp->ref_lfn; 554 555 if (cmp < 0) 556 cmp = -1; 557 else if (cmp > 0) 558 cmp = 1; 559 560 return (cmp); 561 } 562 563 static inode_dup_t * 564 new_inode_dup(fsck_ino_t inode) 565 { 566 inode_dup_t *new; 567 568 new = (inode_dup_t *)malloc(sizeof (inode_dup_t)); 569 if (new == NULL) 570 errexit("Out of memory in new_inode_dup\n"); 571 new->id_ino = inode; 572 avl_create(&new->id_fragments, by_lfn_cmp, sizeof (reference_t), 573 OFFSETOF(reference_t, ref_avl)); 574 575 return (new); 576 } 577