1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <linux/completion.h> 13 #include <linux/buffer_head.h> 14 #include <linux/gfs2_ondisk.h> 15 #include <linux/crc32.h> 16 #include <linux/slow-work.h> 17 18 #include "gfs2.h" 19 #include "incore.h" 20 #include "bmap.h" 21 #include "glock.h" 22 #include "glops.h" 23 #include "lops.h" 24 #include "meta_io.h" 25 #include "recovery.h" 26 #include "super.h" 27 #include "util.h" 28 #include "dir.h" 29 30 int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, 31 struct buffer_head **bh) 32 { 33 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 34 struct gfs2_glock *gl = ip->i_gl; 35 int new = 0; 36 u64 dblock; 37 u32 extlen; 38 int error; 39 40 error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen); 41 if (error) 42 return error; 43 if (!dblock) { 44 gfs2_consist_inode(ip); 45 return -EIO; 46 } 47 48 *bh = gfs2_meta_ra(gl, dblock, extlen); 49 50 return error; 51 } 52 53 int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) 54 { 55 struct list_head *head = &sdp->sd_revoke_list; 56 struct gfs2_revoke_replay *rr; 57 int found = 0; 58 59 list_for_each_entry(rr, head, rr_list) { 60 if (rr->rr_blkno == blkno) { 61 found = 1; 62 break; 63 } 64 } 65 66 if (found) { 67 rr->rr_where = where; 68 return 0; 69 } 70 71 rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS); 72 if (!rr) 73 return -ENOMEM; 74 75 rr->rr_blkno = blkno; 76 rr->rr_where = where; 77 list_add(&rr->rr_list, head); 78 79 return 1; 80 } 81 82 int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) 83 { 84 struct gfs2_revoke_replay *rr; 85 int wrap, a, b, revoke; 86 int found = 0; 87 88 list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) { 89 if (rr->rr_blkno == blkno) { 90 found = 1; 91 break; 92 } 93 } 94 95 if (!found) 96 return 0; 97 98 wrap = (rr->rr_where < sdp->sd_replay_tail); 99 a = (sdp->sd_replay_tail < where); 100 b = (where < rr->rr_where); 101 revoke = (wrap) ? (a || b) : (a && b); 102 103 return revoke; 104 } 105 106 void gfs2_revoke_clean(struct gfs2_sbd *sdp) 107 { 108 struct list_head *head = &sdp->sd_revoke_list; 109 struct gfs2_revoke_replay *rr; 110 111 while (!list_empty(head)) { 112 rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list); 113 list_del(&rr->rr_list); 114 kfree(rr); 115 } 116 } 117 118 static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf) 119 { 120 const struct gfs2_log_header *str = buf; 121 122 if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) || 123 str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH)) 124 return 1; 125 126 lh->lh_sequence = be64_to_cpu(str->lh_sequence); 127 lh->lh_flags = be32_to_cpu(str->lh_flags); 128 lh->lh_tail = be32_to_cpu(str->lh_tail); 129 lh->lh_blkno = be32_to_cpu(str->lh_blkno); 130 lh->lh_hash = be32_to_cpu(str->lh_hash); 131 return 0; 132 } 133 134 /** 135 * get_log_header - read the log header for a given segment 136 * @jd: the journal 137 * @blk: the block to look at 138 * @lh: the log header to return 139 * 140 * Read the log header for a given segement in a given journal. Do a few 141 * sanity checks on it. 142 * 143 * Returns: 0 on success, 144 * 1 if the header was invalid or incomplete, 145 * errno on error 146 */ 147 148 static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, 149 struct gfs2_log_header_host *head) 150 { 151 struct buffer_head *bh; 152 struct gfs2_log_header_host uninitialized_var(lh); 153 const u32 nothing = 0; 154 u32 hash; 155 int error; 156 157 error = gfs2_replay_read_block(jd, blk, &bh); 158 if (error) 159 return error; 160 161 hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) - 162 sizeof(u32)); 163 hash = crc32_le(hash, (unsigned char const *)¬hing, sizeof(nothing)); 164 hash ^= (u32)~0; 165 error = gfs2_log_header_in(&lh, bh->b_data); 166 brelse(bh); 167 168 if (error || lh.lh_blkno != blk || lh.lh_hash != hash) 169 return 1; 170 171 *head = lh; 172 173 return 0; 174 } 175 176 /** 177 * find_good_lh - find a good log header 178 * @jd: the journal 179 * @blk: the segment to start searching from 180 * @lh: the log header to fill in 181 * @forward: if true search forward in the log, else search backward 182 * 183 * Call get_log_header() to get a log header for a segment, but if the 184 * segment is bad, either scan forward or backward until we find a good one. 185 * 186 * Returns: errno 187 */ 188 189 static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk, 190 struct gfs2_log_header_host *head) 191 { 192 unsigned int orig_blk = *blk; 193 int error; 194 195 for (;;) { 196 error = get_log_header(jd, *blk, head); 197 if (error <= 0) 198 return error; 199 200 if (++*blk == jd->jd_blocks) 201 *blk = 0; 202 203 if (*blk == orig_blk) { 204 gfs2_consist_inode(GFS2_I(jd->jd_inode)); 205 return -EIO; 206 } 207 } 208 } 209 210 /** 211 * jhead_scan - make sure we've found the head of the log 212 * @jd: the journal 213 * @head: this is filled in with the log descriptor of the head 214 * 215 * At this point, seg and lh should be either the head of the log or just 216 * before. Scan forward until we find the head. 217 * 218 * Returns: errno 219 */ 220 221 static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) 222 { 223 unsigned int blk = head->lh_blkno; 224 struct gfs2_log_header_host lh; 225 int error; 226 227 for (;;) { 228 if (++blk == jd->jd_blocks) 229 blk = 0; 230 231 error = get_log_header(jd, blk, &lh); 232 if (error < 0) 233 return error; 234 if (error == 1) 235 continue; 236 237 if (lh.lh_sequence == head->lh_sequence) { 238 gfs2_consist_inode(GFS2_I(jd->jd_inode)); 239 return -EIO; 240 } 241 if (lh.lh_sequence < head->lh_sequence) 242 break; 243 244 *head = lh; 245 } 246 247 return 0; 248 } 249 250 /** 251 * gfs2_find_jhead - find the head of a log 252 * @jd: the journal 253 * @head: the log descriptor for the head of the log is returned here 254 * 255 * Do a binary search of a journal and find the valid log entry with the 256 * highest sequence number. (i.e. the log head) 257 * 258 * Returns: errno 259 */ 260 261 int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) 262 { 263 struct gfs2_log_header_host lh_1, lh_m; 264 u32 blk_1, blk_2, blk_m; 265 int error; 266 267 blk_1 = 0; 268 blk_2 = jd->jd_blocks - 1; 269 270 for (;;) { 271 blk_m = (blk_1 + blk_2) / 2; 272 273 error = find_good_lh(jd, &blk_1, &lh_1); 274 if (error) 275 return error; 276 277 error = find_good_lh(jd, &blk_m, &lh_m); 278 if (error) 279 return error; 280 281 if (blk_1 == blk_m || blk_m == blk_2) 282 break; 283 284 if (lh_1.lh_sequence <= lh_m.lh_sequence) 285 blk_1 = blk_m; 286 else 287 blk_2 = blk_m; 288 } 289 290 error = jhead_scan(jd, &lh_1); 291 if (error) 292 return error; 293 294 *head = lh_1; 295 296 return error; 297 } 298 299 /** 300 * foreach_descriptor - go through the active part of the log 301 * @jd: the journal 302 * @start: the first log header in the active region 303 * @end: the last log header (don't process the contents of this entry)) 304 * 305 * Call a given function once for every log descriptor in the active 306 * portion of the log. 307 * 308 * Returns: errno 309 */ 310 311 static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, 312 unsigned int end, int pass) 313 { 314 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 315 struct buffer_head *bh; 316 struct gfs2_log_descriptor *ld; 317 int error = 0; 318 u32 length; 319 __be64 *ptr; 320 unsigned int offset = sizeof(struct gfs2_log_descriptor); 321 offset += sizeof(__be64) - 1; 322 offset &= ~(sizeof(__be64) - 1); 323 324 while (start != end) { 325 error = gfs2_replay_read_block(jd, start, &bh); 326 if (error) 327 return error; 328 if (gfs2_meta_check(sdp, bh)) { 329 brelse(bh); 330 return -EIO; 331 } 332 ld = (struct gfs2_log_descriptor *)bh->b_data; 333 length = be32_to_cpu(ld->ld_length); 334 335 if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) { 336 struct gfs2_log_header_host lh; 337 error = get_log_header(jd, start, &lh); 338 if (!error) { 339 gfs2_replay_incr_blk(sdp, &start); 340 brelse(bh); 341 continue; 342 } 343 if (error == 1) { 344 gfs2_consist_inode(GFS2_I(jd->jd_inode)); 345 error = -EIO; 346 } 347 brelse(bh); 348 return error; 349 } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) { 350 brelse(bh); 351 return -EIO; 352 } 353 ptr = (__be64 *)(bh->b_data + offset); 354 error = lops_scan_elements(jd, start, ld, ptr, pass); 355 if (error) { 356 brelse(bh); 357 return error; 358 } 359 360 while (length--) 361 gfs2_replay_incr_blk(sdp, &start); 362 363 brelse(bh); 364 } 365 366 return 0; 367 } 368 369 /** 370 * clean_journal - mark a dirty journal as being clean 371 * @sdp: the filesystem 372 * @jd: the journal 373 * @gl: the journal's glock 374 * @head: the head journal to start from 375 * 376 * Returns: errno 377 */ 378 379 static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) 380 { 381 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 382 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 383 unsigned int lblock; 384 struct gfs2_log_header *lh; 385 u32 hash; 386 struct buffer_head *bh; 387 int error; 388 struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; 389 390 lblock = head->lh_blkno; 391 gfs2_replay_incr_blk(sdp, &lblock); 392 bh_map.b_size = 1 << ip->i_inode.i_blkbits; 393 error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0); 394 if (error) 395 return error; 396 if (!bh_map.b_blocknr) { 397 gfs2_consist_inode(ip); 398 return -EIO; 399 } 400 401 bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr); 402 lock_buffer(bh); 403 memset(bh->b_data, 0, bh->b_size); 404 set_buffer_uptodate(bh); 405 clear_buffer_dirty(bh); 406 unlock_buffer(bh); 407 408 lh = (struct gfs2_log_header *)bh->b_data; 409 memset(lh, 0, sizeof(struct gfs2_log_header)); 410 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 411 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); 412 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); 413 lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1); 414 lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT); 415 lh->lh_blkno = cpu_to_be32(lblock); 416 hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header)); 417 lh->lh_hash = cpu_to_be32(hash); 418 419 set_buffer_dirty(bh); 420 if (sync_dirty_buffer(bh)) 421 gfs2_io_error_bh(sdp, bh); 422 brelse(bh); 423 424 return error; 425 } 426 427 428 static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, 429 unsigned int message) 430 { 431 char env_jid[20]; 432 char env_status[20]; 433 char *envp[] = { env_jid, env_status, NULL }; 434 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 435 ls->ls_recover_jid_done = jid; 436 ls->ls_recover_jid_status = message; 437 sprintf(env_jid, "JID=%d", jid); 438 sprintf(env_status, "RECOVERY=%s", 439 message == LM_RD_SUCCESS ? "Done" : "Failed"); 440 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); 441 } 442 443 static int gfs2_recover_get_ref(struct slow_work *work) 444 { 445 struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); 446 if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags)) 447 return -EBUSY; 448 return 0; 449 } 450 451 static void gfs2_recover_put_ref(struct slow_work *work) 452 { 453 struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); 454 clear_bit(JDF_RECOVERY, &jd->jd_flags); 455 smp_mb__after_clear_bit(); 456 wake_up_bit(&jd->jd_flags, JDF_RECOVERY); 457 } 458 459 static void gfs2_recover_work(struct slow_work *work) 460 { 461 struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); 462 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 463 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 464 struct gfs2_log_header_host head; 465 struct gfs2_holder j_gh, ji_gh, t_gh; 466 unsigned long t; 467 int ro = 0; 468 unsigned int pass; 469 int error; 470 471 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { 472 fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", 473 jd->jd_jid); 474 475 /* Acquire the journal lock so we can do recovery */ 476 477 error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, 478 LM_ST_EXCLUSIVE, 479 LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE, 480 &j_gh); 481 switch (error) { 482 case 0: 483 break; 484 485 case GLR_TRYFAILED: 486 fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid); 487 error = 0; 488 489 default: 490 goto fail; 491 }; 492 493 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 494 LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh); 495 if (error) 496 goto fail_gunlock_j; 497 } else { 498 fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid); 499 } 500 501 fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid); 502 503 error = gfs2_jdesc_check(jd); 504 if (error) 505 goto fail_gunlock_ji; 506 507 error = gfs2_find_jhead(jd, &head); 508 if (error) 509 goto fail_gunlock_ji; 510 511 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { 512 fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n", 513 jd->jd_jid); 514 515 t = jiffies; 516 517 /* Acquire a shared hold on the transaction lock */ 518 519 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 520 LM_FLAG_NOEXP | LM_FLAG_PRIORITY | 521 GL_NOCACHE, &t_gh); 522 if (error) 523 goto fail_gunlock_ji; 524 525 if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { 526 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) 527 ro = 1; 528 } else { 529 if (sdp->sd_vfs->s_flags & MS_RDONLY) { 530 /* check if device itself is read-only */ 531 ro = bdev_read_only(sdp->sd_vfs->s_bdev); 532 if (!ro) { 533 fs_info(sdp, "recovery required on " 534 "read-only filesystem.\n"); 535 fs_info(sdp, "write access will be " 536 "enabled during recovery.\n"); 537 } 538 } 539 } 540 541 if (ro) { 542 fs_warn(sdp, "jid=%u: Can't replay: read-only block " 543 "device\n", jd->jd_jid); 544 error = -EROFS; 545 goto fail_gunlock_tr; 546 } 547 548 fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid); 549 550 for (pass = 0; pass < 2; pass++) { 551 lops_before_scan(jd, &head, pass); 552 error = foreach_descriptor(jd, head.lh_tail, 553 head.lh_blkno, pass); 554 lops_after_scan(jd, error, pass); 555 if (error) 556 goto fail_gunlock_tr; 557 } 558 559 error = clean_journal(jd, &head); 560 if (error) 561 goto fail_gunlock_tr; 562 563 gfs2_glock_dq_uninit(&t_gh); 564 t = DIV_ROUND_UP(jiffies - t, HZ); 565 fs_info(sdp, "jid=%u: Journal replayed in %lus\n", 566 jd->jd_jid, t); 567 } 568 569 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) 570 gfs2_glock_dq_uninit(&ji_gh); 571 572 gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); 573 574 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) 575 gfs2_glock_dq_uninit(&j_gh); 576 577 fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); 578 return; 579 580 fail_gunlock_tr: 581 gfs2_glock_dq_uninit(&t_gh); 582 fail_gunlock_ji: 583 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { 584 gfs2_glock_dq_uninit(&ji_gh); 585 fail_gunlock_j: 586 gfs2_glock_dq_uninit(&j_gh); 587 } 588 589 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); 590 591 fail: 592 gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); 593 } 594 595 struct slow_work_ops gfs2_recover_ops = { 596 .get_ref = gfs2_recover_get_ref, 597 .put_ref = gfs2_recover_put_ref, 598 .execute = gfs2_recover_work, 599 }; 600 601 602 static int gfs2_recovery_wait(void *word) 603 { 604 schedule(); 605 return 0; 606 } 607 608 int gfs2_recover_journal(struct gfs2_jdesc *jd) 609 { 610 int rv; 611 rv = slow_work_enqueue(&jd->jd_work); 612 if (rv) 613 return rv; 614 wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); 615 return 0; 616 } 617 618