1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Keep track of duplicate fragment references (elsewhere called 30 * blocks for ancient historical reasons). 31 * 32 * The duplicates are kept in a binary tree to attempt to minimize 33 * search times when checking the block lists of all active inodes 34 * for multiple uses. This is opposed to using a simple linear list 35 * that is traversed for every block, as is used in the traditional 36 * fsck. It can be very time-expensive if there's more than just a 37 * very few duplicates, and typically there are either none or lots. 38 * 39 * For each multiply-claimed fragment, we note all of the claiming 40 * inodes and their corresponding logical block numbers. This allows 41 * reporting exactly which parts of which files were damaged, which 42 * provides at least a chance of recovering the bulk of the data on 43 * a seriously-corrupted filesystem. 44 */ 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <unistd.h> 48 #include <sys/avl.h> 49 #define _KERNEL 50 #include <sys/fs/ufs_fsdir.h> /* for struct direct */ 51 #undef _KERNEL 52 #include <sys/debug.h> 53 #include "fsck.h" 54 55 #define OFFSETOF(type, elt) ((size_t)(&((type *)NULL)->elt)) 56 57 /* 58 * For each physical fragment with multiple claimants, the specifics 59 * of each claim are recorded. This means there are N+1 AVL trees in 60 * use: one for each fragment's claimant table, plus one that orders 61 * the fragments themselves. 62 * 63 * The table of fragments simply has the physical fragment number 64 * (pfn) and has the root of the tree of the associated claimants. It 65 * is keyed by the pfn and called dup_frags. 66 * 67 * The subsidiary trees list inodes and logical fragment number (lfn) 68 * for each claimant. They are keyed first by inode number and then 69 * by lfn. Both are needed, as it is possible for one inode to have 70 * multiple claims on the same fragment. 71 */ 72 73 typedef struct claimant { 74 fsck_ino_t cl_inode; 75 daddr32_t cl_lfn; 76 avl_node_t cl_avl; 77 } claimant_t; 78 79 typedef struct fragment { 80 daddr32_t fr_pfn; 81 avl_tree_t fr_claimants; 82 avl_node_t fr_avl; 83 } fragment_t; 84 85 typedef struct reference { 86 daddr32_t ref_lfn; 87 daddr32_t ref_pfn; 88 avl_node_t ref_avl; 89 } reference_t; 90 91 typedef struct inode_dup { 92 fsck_ino_t id_ino; 93 avl_tree_t id_fragments; 94 avl_node_t id_avl; 95 } inode_dup_t; 96 97 static avl_tree_t dup_frags; 98 99 static void free_invert_frags(avl_tree_t *); 100 static void report_dup_lfn_pfn(daddr32_t, daddr32_t, daddr32_t, daddr32_t); 101 static inode_dup_t *new_inode_dup(fsck_ino_t); 102 static void invert_frags(avl_tree_t *, avl_tree_t *); 103 static void report_inode_dups(inode_dup_t *); 104 static int by_ino_cmp(const void *, const void *); 105 static int by_lfn_cmp(const void *, const void *); 106 static claimant_t *alloc_claimant(fsck_ino_t, daddr32_t); 107 static fragment_t *alloc_dup(daddr32_t); 108 static int claimant_cmp(const void *, const void *); 109 static int fragment_cmp(const void *, const void *); 110 static int decrement_claimant(fragment_t *, fsck_ino_t, daddr32_t); 111 static int increment_claimant(fragment_t *, fsck_ino_t, daddr32_t); 112 113 /* 114 * Simple accessor function for the outside world so only we need to 115 * see and interpret our data structures. 116 */ 117 int 118 have_dups(void) 119 { 120 return (avl_numnodes(&dup_frags) > 0); 121 } 122 123 /* 124 * Locates, creates, and deletes a record of a duplicate reference. 125 * 126 * For DB_INCR, returns true if the dup was added to the tree. 127 * For DB_DECR, returns true if the dup was in the tree. 128 */ 129 int 130 find_dup_ref(daddr32_t fragno, fsck_ino_t ino, daddr32_t lfn, int flags) 131 { 132 fragment_t key; 133 fragment_t *dup; 134 avl_index_t where; 135 int added = 0; 136 int removed = 0; 137 138 if (avl_first(&dup_frags) == NULL) { 139 if (flags & DB_CREATE) 140 avl_create(&dup_frags, fragment_cmp, 141 sizeof (fragment_t), 142 OFFSETOF(fragment_t, fr_avl)); 143 else 144 return (0); 145 } 146 147 key.fr_pfn = fragno; 148 dup = avl_find(&dup_frags, (void *)&key, &where); 149 if ((dup == NULL) & (flags & DB_CREATE)) { 150 dup = alloc_dup(fragno); 151 avl_insert(&dup_frags, (void *)dup, where); 152 } 153 154 if (dup != NULL) { 155 if (flags & DB_INCR) { 156 if (debug) 157 (void) printf( 158 "adding claim by ino %d as lfn %d\n", 159 ino, lfn); 160 added = increment_claimant(dup, ino, lfn); 161 } else if (flags & DB_DECR) { 162 /* 163 * Note that dup may be invalidated by this call. 164 */ 165 removed = decrement_claimant(dup, ino, lfn); 166 if (debug) 167 (void) printf( 168 "check for claimant ino %d lfn %d returned %d\n", 169 ino, lfn, removed); 170 } 171 } 172 173 return (added || removed || (dup != NULL)); 174 } 175 176 /* 177 * Dump the duplicates table in a relatively user-friendly form. 178 * The idea is that the output can be useful when trying to manually 179 * work out which block belongs to which of the claiming inodes. 180 * 181 * What we have is a tree of duplicates indexed by physical 182 * fragment number. What we want to report is: 183 * 184 * Inode %d: 185 * Logical Offset 0x%08llx, Physical Fragment %d 186 * Logical Offsets 0x%08llx - 0x%08llx, Physical Fragments %d - %d 187 * ... 188 * Inode %d: 189 * Logical Offsets 0x%08llx - 0x%08llx, Physical Fragments %d - %d 190 * ... 191 */ 192 int 193 report_dups(int quiet) 194 { 195 int overlaps; 196 inode_dup_t *inode; 197 fragment_t *dup; 198 avl_tree_t inode_frags; 199 200 overlaps = 0; 201 ASSERT(have_dups()); 202 /* 203 * Figure out how many actual dups are still around. 204 * This tells us whether or not we can mark the 205 * filesystem clean. 206 */ 207 dup = avl_first(&dup_frags); 208 while (dup != NULL) { 209 if (avl_numnodes(&dup->fr_claimants) > 1) { 210 overlaps++; 211 break; 212 } 213 dup = AVL_NEXT(&dup_frags, dup); 214 } 215 216 /* 217 * Now report on every object that still exists that 218 * had *any* dups associated with it. 219 */ 220 if (!quiet) { 221 (void) puts("\nSome blocks that were found to be in " 222 "multiple files are still\nassigned to " 223 "file(s).\nFragments sorted by inode and " 224 "logical offsets:"); 225 226 invert_frags(&dup_frags, &inode_frags); 227 inode = avl_first(&inode_frags); 228 while (inode != NULL) { 229 report_inode_dups(inode); 230 inode = AVL_NEXT(&inode_frags, inode); 231 } 232 (void) printf("\n"); 233 234 free_invert_frags(&inode_frags); 235 } 236 237 return (overlaps); 238 } 239 240 static void 241 report_inode_dups(inode_dup_t *inode) 242 { 243 reference_t *dup; 244 daddr32_t first_lfn, last_lfn, first_pfn, last_pfn; 245 246 (void) printf("Inode %d:\n", inode->id_ino); 247 dup = avl_first(&inode->id_fragments); 248 first_lfn = last_lfn = dup->ref_lfn; 249 first_pfn = last_pfn = dup->ref_pfn; 250 while ((dup = AVL_NEXT(&inode->id_fragments, dup)) != NULL) { 251 if (((last_lfn + 1) != dup->ref_lfn) || 252 ((last_pfn + 1) != dup->ref_pfn)) { 253 report_dup_lfn_pfn(first_lfn, last_lfn, 254 first_pfn, last_pfn); 255 first_lfn = last_lfn = dup->ref_lfn; 256 first_pfn = last_pfn = dup->ref_pfn; 257 } 258 } 259 report_dup_lfn_pfn(first_lfn, last_lfn, first_pfn, last_pfn); 260 } 261 262 static void 263 report_dup_lfn_pfn(daddr32_t first_lfn, daddr32_t last_lfn, 264 daddr32_t first_pfn, daddr32_t last_pfn) 265 { 266 if ((first_lfn == last_lfn) && (first_pfn == last_pfn)) { 267 (void) printf( 268 " Logical Offset 0x%08llx Physical Fragment %d\n", 269 (longlong_t)first_lfn * sblock.fs_fsize, first_pfn); 270 } else { 271 (void) printf( 272 " Logical Offsets 0x%08llx - 0x%08llx, " 273 "Physical Fragments %d - %d\n", 274 (longlong_t)first_lfn * sblock.fs_fsize, 275 (longlong_t)last_lfn * sblock.fs_fsize, 276 first_pfn, last_pfn); 277 } 278 } 279 280 /* 281 * Given a tree of fragment_ts, each element of which has an integral 282 * sub-tree of claimant_ts, produce a tree of inode_dup_ts, each element 283 * of which has an integral sub-tree of reference_ts. 284 */ 285 static void 286 invert_frags(avl_tree_t *source, avl_tree_t *target) 287 { 288 fragment_t *src_frag; 289 claimant_t *src_claim; 290 inode_dup_t *tgt_inode; 291 inode_dup_t tgt_inode_key; 292 reference_t *tgt_ref; 293 reference_t tgt_ref_key; 294 avl_index_t where; 295 296 avl_create(target, by_ino_cmp, sizeof (inode_dup_t), 297 OFFSETOF(inode_dup_t, id_avl)); 298 299 src_frag = avl_first(source); 300 while (src_frag != NULL) { 301 src_claim = avl_first(&src_frag->fr_claimants); 302 while (src_claim != NULL) { 303 /* 304 * Have we seen this inode before? 305 */ 306 tgt_inode_key.id_ino = src_claim->cl_inode; 307 tgt_inode = avl_find(target, (void *)&tgt_inode_key, 308 &where); 309 if (tgt_inode == NULL) { 310 /* 311 * No, so set up a record for it. 312 */ 313 tgt_inode = new_inode_dup(src_claim->cl_inode); 314 avl_insert(target, (void *)tgt_inode, where); 315 } 316 /* 317 * Now, how about this logical fragment? In 318 * theory, we should never see a duplicate, since 319 * a given lfn only exists once for a given inode. 320 * As such, we ignore duplicate hits. 321 */ 322 tgt_ref_key.ref_lfn = src_claim->cl_lfn; 323 tgt_ref = avl_find(&tgt_inode->id_fragments, 324 (void *)&tgt_ref_key, &where); 325 if (tgt_ref == NULL) { 326 /* 327 * Haven't seen it, add it. 328 */ 329 tgt_ref = (reference_t *)malloc( 330 sizeof (reference_t)); 331 if (tgt_ref == NULL) 332 errexit("Out of memory in " 333 "invert_frags\n"); 334 tgt_ref->ref_lfn = src_claim->cl_lfn; 335 tgt_ref->ref_pfn = src_frag->fr_pfn; 336 avl_insert(&tgt_inode->id_fragments, 337 (void *)tgt_ref, where); 338 } 339 src_claim = AVL_NEXT(&src_frag->fr_claimants, 340 src_claim); 341 } 342 src_frag = AVL_NEXT(source, src_frag); 343 } 344 } 345 346 /* 347 * Discard memory associated with the inverted fragments tree created 348 * by report_dups() via invert_frags(). 349 */ 350 static void 351 free_invert_frags(avl_tree_t *tree) 352 { 353 void *outer = NULL; /* traversal cookie */ 354 void *inner; /* traversal cookie */ 355 inode_dup_t *inode_dup; 356 reference_t *ref_dup; 357 358 while ((inode_dup = avl_destroy_nodes(tree, &outer)) != NULL) { 359 inner = NULL; 360 while ((ref_dup = avl_destroy_nodes(&inode_dup->id_fragments, 361 &inner)) != NULL) { 362 free((void *)ref_dup); 363 } 364 avl_destroy(&inode_dup->id_fragments); 365 free((void *)inode_dup); 366 } 367 avl_destroy(tree); 368 } 369 370 /* 371 * Discard all memory allocations associated with the current duplicates 372 * table. 373 */ 374 void 375 free_dup_state(void) 376 { 377 void *dup_cookie = NULL; 378 void *claim_cookie; 379 fragment_t *fragv; 380 claimant_t *claimv; 381 382 while ((fragv = avl_destroy_nodes(&dup_frags, &dup_cookie)) != NULL) { 383 claim_cookie = NULL; 384 while ((claimv = avl_destroy_nodes(&fragv->fr_claimants, 385 &claim_cookie)) != NULL) { 386 free((void *)claimv); 387 } 388 avl_destroy(&fragv->fr_claimants); 389 free((void *)fragv); 390 } 391 avl_destroy(&dup_frags); 392 } 393 394 /* 395 * If the given claimant has not been seen before, add it to DUP's 396 * list of them. It's not fatal for the same PFN/INODE/LFN to get 397 * added twice, because pass1b() will add the same dups that pass1() 398 * did, plus one. 399 */ 400 static int 401 increment_claimant(fragment_t *dup, fsck_ino_t ino, daddr32_t lfn) 402 { 403 avl_index_t where; 404 claimant_t *claimant; 405 claimant_t key; 406 int added = 0; 407 408 key.cl_inode = ino; 409 key.cl_lfn = lfn; 410 claimant = avl_find(&dup->fr_claimants, &key, &where); 411 if (claimant == NULL) { 412 if (debug) 413 (void) printf("inserting claimant\n"); 414 claimant = alloc_claimant(ino, lfn); 415 avl_insert(&dup->fr_claimants, (void *)claimant, where); 416 statemap[ino] |= INCLEAR; 417 /* 418 * If the inode is to be cleared and has zero links then remove 419 * the zero link bit as it will be cleared anyway. If INZLINK 420 * is being removed and it's a directory inode then add the 421 * inode to the orphan directory list. 422 */ 423 if (statemap[ino] & INZLINK) { 424 statemap[ino] &= ~INZLINK; 425 if (statemap[ino] & DSTATE) { 426 add_orphan_dir(ino); 427 } 428 } 429 added = 1; 430 } 431 432 return (added); 433 } 434 435 /* 436 * If the given claimant is on DUP's list, remove it. It is not 437 * an error for the claimant to not be on the list. 438 */ 439 static int 440 decrement_claimant(fragment_t *dup, fsck_ino_t ino, daddr32_t lfn) 441 { 442 avl_index_t where; 443 claimant_t *claimant; 444 claimant_t key; 445 int busy = 0; 446 447 key.cl_inode = ino; 448 key.cl_lfn = lfn; 449 claimant = avl_find(&dup->fr_claimants, &key, &where); 450 if (claimant != NULL) { 451 avl_remove(&dup->fr_claimants, claimant); 452 if (avl_numnodes(&dup->fr_claimants) == 0) { 453 avl_destroy(&dup->fr_claimants); 454 avl_remove(&dup_frags, (void *)dup); 455 free((void *)dup); 456 } else { 457 busy = 1; 458 } 459 } 460 461 return (busy); 462 } 463 464 static claimant_t * 465 alloc_claimant(fsck_ino_t inode, daddr32_t lfn) 466 { 467 claimant_t *new = (claimant_t *)malloc(sizeof (claimant_t)); 468 469 if (new == NULL) 470 errexit("Out of memory in alloc_claimant()\n"); 471 472 new->cl_inode = inode; 473 new->cl_lfn = lfn; 474 475 return (new); 476 } 477 478 static fragment_t * 479 alloc_dup(daddr32_t pfn) 480 { 481 fragment_t *new = (fragment_t *)malloc(sizeof (fragment_t)); 482 483 if (new == NULL) 484 errexit("Out of memory in alloc_dup()\n"); 485 486 new->fr_pfn = pfn; 487 avl_create(&new->fr_claimants, claimant_cmp, sizeof (fragment_t), 488 OFFSETOF(claimant_t, cl_avl)); 489 490 return (new); 491 } 492 493 /* 494 * Compare two fragment_t instances for avl_find(). It requires a 495 * return value of -1/0/1, so we can't just hand back left - right. 496 */ 497 static int 498 fragment_cmp(const void *vlp, const void *vrp) 499 { 500 const fragment_t *lp = (const fragment_t *)vlp; 501 const fragment_t *rp = (const fragment_t *)vrp; 502 int cmp = lp->fr_pfn - rp->fr_pfn; 503 504 if (cmp < 0) 505 cmp = -1; 506 else if (cmp > 0) 507 cmp = 1; 508 509 return (cmp); 510 } 511 512 /* 513 * Compare two claimant_t instances for avl_find(). It requires a 514 * return value of -1/0/1, so we can't just hand back left - right. 515 */ 516 static int 517 claimant_cmp(const void *vlp, const void *vrp) 518 { 519 const claimant_t *lp = (const claimant_t *)vlp; 520 const claimant_t *rp = (const claimant_t *)vrp; 521 int cmp; 522 523 cmp = lp->cl_inode - rp->cl_inode; 524 if (cmp == 0) { 525 /* 526 * lfn < 0 is a wildcard lfn match. 527 */ 528 if ((lp->cl_lfn >= 0) && (rp->cl_lfn >= 0)) 529 cmp = lp->cl_lfn - rp->cl_lfn; 530 } 531 532 if (cmp < 0) 533 cmp = -1; 534 else if (cmp > 0) 535 cmp = 1; 536 537 return (cmp); 538 } 539 540 static int 541 by_ino_cmp(const void *vlp, const void *vrp) 542 { 543 const inode_dup_t *lp = (const inode_dup_t *)vlp; 544 const inode_dup_t *rp = (const inode_dup_t *)vrp; 545 int cmp; 546 547 cmp = lp->id_ino - rp->id_ino; 548 549 if (cmp < 0) 550 cmp = -1; 551 else if (cmp > 0) 552 cmp = 1; 553 554 return (cmp); 555 } 556 557 static int 558 by_lfn_cmp(const void *vlp, const void *vrp) 559 { 560 const reference_t *lp = (const reference_t *)vlp; 561 const reference_t *rp = (const reference_t *)vrp; 562 int cmp; 563 564 cmp = lp->ref_lfn - rp->ref_lfn; 565 566 if (cmp < 0) 567 cmp = -1; 568 else if (cmp > 0) 569 cmp = 1; 570 571 return (cmp); 572 } 573 574 static inode_dup_t * 575 new_inode_dup(fsck_ino_t inode) 576 { 577 inode_dup_t *new; 578 579 new = (inode_dup_t *)malloc(sizeof (inode_dup_t)); 580 if (new == NULL) 581 errexit("Out of memory in new_inode_dup\n"); 582 new->id_ino = inode; 583 avl_create(&new->id_fragments, by_lfn_cmp, sizeof (reference_t), 584 OFFSETOF(reference_t, ref_avl)); 585 586 return (new); 587 } 588