1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Keep track of duplicate fragment references (elsewhere called 28 * blocks for ancient historical reasons). 29 * 30 * The duplicates are kept in a binary tree to attempt to minimize 31 * search times when checking the block lists of all active inodes 32 * for multiple uses. This is opposed to using a simple linear list 33 * that is traversed for every block, as is used in the traditional 34 * fsck. It can be very time-expensive if there's more than just a 35 * very few duplicates, and typically there are either none or lots. 36 * 37 * For each multiply-claimed fragment, we note all of the claiming 38 * inodes and their corresponding logical block numbers. This allows 39 * reporting exactly which parts of which files were damaged, which 40 * provides at least a chance of recovering the bulk of the data on 41 * a seriously-corrupted filesystem. 42 */ 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <unistd.h> 46 #include <sys/avl.h> 47 #define _KERNEL 48 #include <sys/fs/ufs_fsdir.h> /* for struct direct */ 49 #undef _KERNEL 50 #include <sys/debug.h> 51 #include "fsck.h" 52 53 #define OFFSETOF(type, elt) ((size_t)(&((type *)NULL)->elt)) 54 55 /* 56 * For each physical fragment with multiple claimants, the specifics 57 * of each claim are recorded. This means there are N+1 AVL trees in 58 * use: one for each fragment's claimant table, plus one that orders 59 * the fragments themselves. 60 * 61 * The table of fragments simply has the physical fragment number 62 * (pfn) and has the root of the tree of the associated claimants. It 63 * is keyed by the pfn and called dup_frags. 64 * 65 * The subsidiary trees list inodes and logical fragment number (lfn) 66 * for each claimant. They are keyed first by inode number and then 67 * by lfn. Both are needed, as it is possible for one inode to have 68 * multiple claims on the same fragment. 69 */ 70 71 typedef struct claimant { 72 fsck_ino_t cl_inode; 73 daddr32_t cl_lfn; 74 avl_node_t cl_avl; 75 } claimant_t; 76 77 typedef struct fragment { 78 daddr32_t fr_pfn; 79 avl_tree_t fr_claimants; 80 avl_node_t fr_avl; 81 } fragment_t; 82 83 typedef struct reference { 84 daddr32_t ref_lfn; 85 daddr32_t ref_pfn; 86 avl_node_t ref_avl; 87 } reference_t; 88 89 typedef struct inode_dup { 90 fsck_ino_t id_ino; 91 avl_tree_t id_fragments; 92 avl_node_t id_avl; 93 } inode_dup_t; 94 95 static avl_tree_t dup_frags; 96 97 static void free_invert_frags(avl_tree_t *); 98 static void report_dup_lfn_pfn(daddr32_t, daddr32_t, daddr32_t, daddr32_t); 99 static inode_dup_t *new_inode_dup(fsck_ino_t); 100 static void invert_frags(avl_tree_t *, avl_tree_t *); 101 static void report_inode_dups(inode_dup_t *); 102 static int by_ino_cmp(const void *, const void *); 103 static int by_lfn_cmp(const void *, const void *); 104 static claimant_t *alloc_claimant(fsck_ino_t, daddr32_t); 105 static fragment_t *alloc_dup(daddr32_t); 106 static int claimant_cmp(const void *, const void *); 107 static int fragment_cmp(const void *, const void *); 108 static int decrement_claimant(fragment_t *, fsck_ino_t, daddr32_t); 109 static int increment_claimant(fragment_t *, fsck_ino_t, daddr32_t); 110 111 /* 112 * Simple accessor function for the outside world so only we need to 113 * see and interpret our data structures. 114 */ 115 int 116 have_dups(void) 117 { 118 return (avl_numnodes(&dup_frags) > 0); 119 } 120 121 /* 122 * Locates, creates, and deletes a record of a duplicate reference. 123 * 124 * For DB_INCR, returns true if the dup was added to the tree. 125 * For DB_DECR, returns true if the dup was in the tree. 126 */ 127 int 128 find_dup_ref(daddr32_t fragno, fsck_ino_t ino, daddr32_t lfn, int flags) 129 { 130 fragment_t key; 131 fragment_t *dup; 132 avl_index_t where; 133 int added = 0; 134 int removed = 0; 135 136 if (avl_first(&dup_frags) == NULL) { 137 if (flags & DB_CREATE) 138 avl_create(&dup_frags, fragment_cmp, 139 sizeof (fragment_t), 140 OFFSETOF(fragment_t, fr_avl)); 141 else 142 return (0); 143 } 144 145 key.fr_pfn = fragno; 146 dup = avl_find(&dup_frags, (void *)&key, &where); 147 if ((dup == NULL) & (flags & DB_CREATE)) { 148 dup = alloc_dup(fragno); 149 avl_insert(&dup_frags, (void *)dup, where); 150 } 151 152 if (dup != NULL) { 153 if (flags & DB_INCR) { 154 if (debug) 155 (void) printf( 156 "adding claim by ino %d as lfn %d\n", 157 ino, lfn); 158 added = increment_claimant(dup, ino, lfn); 159 } else if (flags & DB_DECR) { 160 /* 161 * Note that dup may be invalidated by this call. 162 */ 163 removed = decrement_claimant(dup, ino, lfn); 164 if (debug) 165 (void) printf( 166 "check for claimant ino %d lfn %d returned %d\n", 167 ino, lfn, removed); 168 } 169 } 170 171 return (added || removed || (dup != NULL)); 172 } 173 174 /* 175 * Dump the duplicates table in a relatively user-friendly form. 176 * The idea is that the output can be useful when trying to manually 177 * work out which block belongs to which of the claiming inodes. 178 * 179 * What we have is a tree of duplicates indexed by physical 180 * fragment number. What we want to report is: 181 * 182 * Inode %d: 183 * Logical Offset 0x%08llx, Physical Fragment %d 184 * Logical Offsets 0x%08llx - 0x%08llx, Physical Fragments %d - %d 185 * ... 186 * Inode %d: 187 * Logical Offsets 0x%08llx - 0x%08llx, Physical Fragments %d - %d 188 * ... 189 */ 190 int 191 report_dups(int quiet) 192 { 193 int overlaps; 194 inode_dup_t *inode; 195 fragment_t *dup; 196 avl_tree_t inode_frags; 197 198 overlaps = 0; 199 ASSERT(have_dups()); 200 /* 201 * Figure out how many actual dups are still around. 202 * This tells us whether or not we can mark the 203 * filesystem clean. 204 */ 205 dup = avl_first(&dup_frags); 206 while (dup != NULL) { 207 if (avl_numnodes(&dup->fr_claimants) > 1) { 208 overlaps++; 209 break; 210 } 211 dup = AVL_NEXT(&dup_frags, dup); 212 } 213 214 /* 215 * Now report on every object that still exists that 216 * had *any* dups associated with it. 217 */ 218 if (!quiet) { 219 (void) puts("\nSome blocks that were found to be in " 220 "multiple files are still\nassigned to " 221 "file(s).\nFragments sorted by inode and " 222 "logical offsets:"); 223 224 invert_frags(&dup_frags, &inode_frags); 225 inode = avl_first(&inode_frags); 226 while (inode != NULL) { 227 report_inode_dups(inode); 228 inode = AVL_NEXT(&inode_frags, inode); 229 } 230 (void) printf("\n"); 231 232 free_invert_frags(&inode_frags); 233 } 234 235 return (overlaps); 236 } 237 238 static void 239 report_inode_dups(inode_dup_t *inode) 240 { 241 reference_t *dup; 242 daddr32_t first_lfn, last_lfn, first_pfn, last_pfn; 243 244 (void) printf("Inode %d:\n", inode->id_ino); 245 dup = avl_first(&inode->id_fragments); 246 first_lfn = last_lfn = dup->ref_lfn; 247 first_pfn = last_pfn = dup->ref_pfn; 248 while ((dup = AVL_NEXT(&inode->id_fragments, dup)) != NULL) { 249 if (((last_lfn + 1) != dup->ref_lfn) || 250 ((last_pfn + 1) != dup->ref_pfn)) { 251 report_dup_lfn_pfn(first_lfn, last_lfn, 252 first_pfn, last_pfn); 253 first_lfn = last_lfn = dup->ref_lfn; 254 first_pfn = last_pfn = dup->ref_pfn; 255 } 256 } 257 report_dup_lfn_pfn(first_lfn, last_lfn, first_pfn, last_pfn); 258 } 259 260 static void 261 report_dup_lfn_pfn(daddr32_t first_lfn, daddr32_t last_lfn, 262 daddr32_t first_pfn, daddr32_t last_pfn) 263 { 264 if ((first_lfn == last_lfn) && (first_pfn == last_pfn)) { 265 (void) printf( 266 " Logical Offset 0x%08llx Physical Fragment %d\n", 267 (longlong_t)first_lfn * sblock.fs_fsize, first_pfn); 268 } else { 269 (void) printf( 270 " Logical Offsets 0x%08llx - 0x%08llx, " 271 "Physical Fragments %d - %d\n", 272 (longlong_t)first_lfn * sblock.fs_fsize, 273 (longlong_t)last_lfn * sblock.fs_fsize, 274 first_pfn, last_pfn); 275 } 276 } 277 278 /* 279 * Given a tree of fragment_ts, each element of which has an integral 280 * sub-tree of claimant_ts, produce a tree of inode_dup_ts, each element 281 * of which has an integral sub-tree of reference_ts. 282 */ 283 static void 284 invert_frags(avl_tree_t *source, avl_tree_t *target) 285 { 286 fragment_t *src_frag; 287 claimant_t *src_claim; 288 inode_dup_t *tgt_inode; 289 inode_dup_t tgt_inode_key; 290 reference_t *tgt_ref; 291 reference_t tgt_ref_key; 292 avl_index_t where; 293 294 avl_create(target, by_ino_cmp, sizeof (inode_dup_t), 295 OFFSETOF(inode_dup_t, id_avl)); 296 297 src_frag = avl_first(source); 298 while (src_frag != NULL) { 299 src_claim = avl_first(&src_frag->fr_claimants); 300 while (src_claim != NULL) { 301 /* 302 * Have we seen this inode before? 303 */ 304 tgt_inode_key.id_ino = src_claim->cl_inode; 305 tgt_inode = avl_find(target, (void *)&tgt_inode_key, 306 &where); 307 if (tgt_inode == NULL) { 308 /* 309 * No, so set up a record for it. 310 */ 311 tgt_inode = new_inode_dup(src_claim->cl_inode); 312 avl_insert(target, (void *)tgt_inode, where); 313 } 314 /* 315 * Now, how about this logical fragment? In 316 * theory, we should never see a duplicate, since 317 * a given lfn only exists once for a given inode. 318 * As such, we ignore duplicate hits. 319 */ 320 tgt_ref_key.ref_lfn = src_claim->cl_lfn; 321 tgt_ref = avl_find(&tgt_inode->id_fragments, 322 (void *)&tgt_ref_key, &where); 323 if (tgt_ref == NULL) { 324 /* 325 * Haven't seen it, add it. 326 */ 327 tgt_ref = (reference_t *)malloc( 328 sizeof (reference_t)); 329 if (tgt_ref == NULL) 330 errexit("Out of memory in " 331 "invert_frags\n"); 332 tgt_ref->ref_lfn = src_claim->cl_lfn; 333 tgt_ref->ref_pfn = src_frag->fr_pfn; 334 avl_insert(&tgt_inode->id_fragments, 335 (void *)tgt_ref, where); 336 } 337 src_claim = AVL_NEXT(&src_frag->fr_claimants, 338 src_claim); 339 } 340 src_frag = AVL_NEXT(source, src_frag); 341 } 342 } 343 344 /* 345 * Discard memory associated with the inverted fragments tree created 346 * by report_dups() via invert_frags(). 347 */ 348 static void 349 free_invert_frags(avl_tree_t *tree) 350 { 351 void *outer = NULL; /* traversal cookie */ 352 void *inner; /* traversal cookie */ 353 inode_dup_t *inode_dup; 354 reference_t *ref_dup; 355 356 while ((inode_dup = avl_destroy_nodes(tree, &outer)) != NULL) { 357 inner = NULL; 358 while ((ref_dup = avl_destroy_nodes(&inode_dup->id_fragments, 359 &inner)) != NULL) { 360 free((void *)ref_dup); 361 } 362 avl_destroy(&inode_dup->id_fragments); 363 free((void *)inode_dup); 364 } 365 avl_destroy(tree); 366 } 367 368 /* 369 * Discard all memory allocations associated with the current duplicates 370 * table. 371 */ 372 void 373 free_dup_state(void) 374 { 375 void *dup_cookie = NULL; 376 void *claim_cookie; 377 fragment_t *fragv; 378 claimant_t *claimv; 379 380 while ((fragv = avl_destroy_nodes(&dup_frags, &dup_cookie)) != NULL) { 381 claim_cookie = NULL; 382 while ((claimv = avl_destroy_nodes(&fragv->fr_claimants, 383 &claim_cookie)) != NULL) { 384 free((void *)claimv); 385 } 386 avl_destroy(&fragv->fr_claimants); 387 free((void *)fragv); 388 } 389 avl_destroy(&dup_frags); 390 } 391 392 /* 393 * If the given claimant has not been seen before, add it to DUP's 394 * list of them. It's not fatal for the same PFN/INODE/LFN to get 395 * added twice, because pass1b() will add the same dups that pass1() 396 * did, plus one. 397 */ 398 static int 399 increment_claimant(fragment_t *dup, fsck_ino_t ino, daddr32_t lfn) 400 { 401 avl_index_t where; 402 claimant_t *claimant; 403 claimant_t key; 404 int added = 0; 405 406 key.cl_inode = ino; 407 key.cl_lfn = lfn; 408 claimant = avl_find(&dup->fr_claimants, &key, &where); 409 if (claimant == NULL) { 410 if (debug) 411 (void) printf("inserting claimant\n"); 412 claimant = alloc_claimant(ino, lfn); 413 avl_insert(&dup->fr_claimants, (void *)claimant, where); 414 statemap[ino] |= INCLEAR; 415 /* 416 * If the inode is to be cleared and has zero links then remove 417 * the zero link bit as it will be cleared anyway. If INZLINK 418 * is being removed and it's a directory inode then add the 419 * inode to the orphan directory list. 420 */ 421 if (statemap[ino] & INZLINK) { 422 statemap[ino] &= ~INZLINK; 423 if (statemap[ino] & DSTATE) { 424 add_orphan_dir(ino); 425 } 426 } 427 added = 1; 428 } 429 430 return (added); 431 } 432 433 /* 434 * If the given claimant is on DUP's list, remove it. It is not 435 * an error for the claimant to not be on the list. 436 */ 437 static int 438 decrement_claimant(fragment_t *dup, fsck_ino_t ino, daddr32_t lfn) 439 { 440 avl_index_t where; 441 claimant_t *claimant; 442 claimant_t key; 443 int busy = 0; 444 445 key.cl_inode = ino; 446 key.cl_lfn = lfn; 447 claimant = avl_find(&dup->fr_claimants, &key, &where); 448 if (claimant != NULL) { 449 avl_remove(&dup->fr_claimants, claimant); 450 if (avl_numnodes(&dup->fr_claimants) == 0) { 451 avl_destroy(&dup->fr_claimants); 452 avl_remove(&dup_frags, (void *)dup); 453 free((void *)dup); 454 } else { 455 busy = 1; 456 } 457 } 458 459 return (busy); 460 } 461 462 static claimant_t * 463 alloc_claimant(fsck_ino_t inode, daddr32_t lfn) 464 { 465 claimant_t *new = (claimant_t *)malloc(sizeof (claimant_t)); 466 467 if (new == NULL) 468 errexit("Out of memory in alloc_claimant()\n"); 469 470 new->cl_inode = inode; 471 new->cl_lfn = lfn; 472 473 return (new); 474 } 475 476 static fragment_t * 477 alloc_dup(daddr32_t pfn) 478 { 479 fragment_t *new = (fragment_t *)malloc(sizeof (fragment_t)); 480 481 if (new == NULL) 482 errexit("Out of memory in alloc_dup()\n"); 483 484 new->fr_pfn = pfn; 485 avl_create(&new->fr_claimants, claimant_cmp, sizeof (fragment_t), 486 OFFSETOF(claimant_t, cl_avl)); 487 488 return (new); 489 } 490 491 /* 492 * Compare two fragment_t instances for avl_find(). It requires a 493 * return value of -1/0/1, so we can't just hand back left - right. 494 */ 495 static int 496 fragment_cmp(const void *vlp, const void *vrp) 497 { 498 const fragment_t *lp = (const fragment_t *)vlp; 499 const fragment_t *rp = (const fragment_t *)vrp; 500 int cmp = lp->fr_pfn - rp->fr_pfn; 501 502 if (cmp < 0) 503 cmp = -1; 504 else if (cmp > 0) 505 cmp = 1; 506 507 return (cmp); 508 } 509 510 /* 511 * Compare two claimant_t instances for avl_find(). It requires a 512 * return value of -1/0/1, so we can't just hand back left - right. 513 */ 514 static int 515 claimant_cmp(const void *vlp, const void *vrp) 516 { 517 const claimant_t *lp = (const claimant_t *)vlp; 518 const claimant_t *rp = (const claimant_t *)vrp; 519 int cmp; 520 521 cmp = lp->cl_inode - rp->cl_inode; 522 if (cmp == 0) { 523 /* 524 * lfn < 0 is a wildcard lfn match. 525 */ 526 if ((lp->cl_lfn >= 0) && (rp->cl_lfn >= 0)) 527 cmp = lp->cl_lfn - rp->cl_lfn; 528 } 529 530 if (cmp < 0) 531 cmp = -1; 532 else if (cmp > 0) 533 cmp = 1; 534 535 return (cmp); 536 } 537 538 static int 539 by_ino_cmp(const void *vlp, const void *vrp) 540 { 541 const inode_dup_t *lp = (const inode_dup_t *)vlp; 542 const inode_dup_t *rp = (const inode_dup_t *)vrp; 543 int cmp; 544 545 cmp = lp->id_ino - rp->id_ino; 546 547 if (cmp < 0) 548 cmp = -1; 549 else if (cmp > 0) 550 cmp = 1; 551 552 return (cmp); 553 } 554 555 static int 556 by_lfn_cmp(const void *vlp, const void *vrp) 557 { 558 const reference_t *lp = (const reference_t *)vlp; 559 const reference_t *rp = (const reference_t *)vrp; 560 int cmp; 561 562 cmp = lp->ref_lfn - rp->ref_lfn; 563 564 if (cmp < 0) 565 cmp = -1; 566 else if (cmp > 0) 567 cmp = 1; 568 569 return (cmp); 570 } 571 572 static inode_dup_t * 573 new_inode_dup(fsck_ino_t inode) 574 { 575 inode_dup_t *new; 576 577 new = (inode_dup_t *)malloc(sizeof (inode_dup_t)); 578 if (new == NULL) 579 errexit("Out of memory in new_inode_dup\n"); 580 new->id_ino = inode; 581 avl_create(&new->id_fragments, by_lfn_cmp, sizeof (reference_t), 582 OFFSETOF(reference_t, ref_avl)); 583 584 return (new); 585 } 586