1 /* 2 * This file is part of UBIFS. 3 * 4 * Copyright (C) 2006-2008 Nokia Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published by 8 * the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program; if not, write to the Free Software Foundation, Inc., 51 17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 * 19 * Authors: Adrian Hunter 20 * Artem Bityutskiy (Битюцкий Артём) 21 */ 22 23 /* 24 * This file contains journal replay code. It runs when the file-system is being 25 * mounted and requires no locking. 26 * 27 * The larger is the journal, the longer it takes to scan it, so the longer it 28 * takes to mount UBIFS. This is why the journal has limited size which may be 29 * changed depending on the system requirements. But a larger journal gives 30 * faster I/O speed because it writes the index less frequently. So this is a 31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 32 * larger is the journal, the more memory its index may consume. 33 */ 34 35 #include "ubifs.h" 36 37 /* 38 * Replay flags. 39 * 40 * REPLAY_DELETION: node was deleted 41 * REPLAY_REF: node is a reference node 42 */ 43 enum { 44 REPLAY_DELETION = 1, 45 REPLAY_REF = 2, 46 }; 47 48 /** 49 * struct replay_entry - replay tree entry. 50 * @lnum: logical eraseblock number of the node 51 * @offs: node offset 52 * @len: node length 53 * @sqnum: node sequence number 54 * @flags: replay flags 55 * @rb: links the replay tree 56 * @key: node key 57 * @nm: directory entry name 58 * @old_size: truncation old size 59 * @new_size: truncation new size 60 * @free: amount of free space in a bud 61 * @dirty: amount of dirty space in a bud from padding and deletion nodes 62 * 63 * UBIFS journal replay must compare node sequence numbers, which means it must 64 * build a tree of node information to insert into the TNC. 65 */ 66 struct replay_entry { 67 int lnum; 68 int offs; 69 int len; 70 unsigned long long sqnum; 71 int flags; 72 struct rb_node rb; 73 union ubifs_key key; 74 union { 75 struct qstr nm; 76 struct { 77 loff_t old_size; 78 loff_t new_size; 79 }; 80 struct { 81 int free; 82 int dirty; 83 }; 84 }; 85 }; 86 87 /** 88 * struct bud_entry - entry in the list of buds to replay. 89 * @list: next bud in the list 90 * @bud: bud description object 91 * @free: free bytes in the bud 92 * @sqnum: reference node sequence number 93 */ 94 struct bud_entry { 95 struct list_head list; 96 struct ubifs_bud *bud; 97 int free; 98 unsigned long long sqnum; 99 }; 100 101 /** 102 * set_bud_lprops - set free and dirty space used by a bud. 103 * @c: UBIFS file-system description object 104 * @r: replay entry of bud 105 */ 106 static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) 107 { 108 const struct ubifs_lprops *lp; 109 int err = 0, dirty; 110 111 ubifs_get_lprops(c); 112 113 lp = ubifs_lpt_lookup_dirty(c, r->lnum); 114 if (IS_ERR(lp)) { 115 err = PTR_ERR(lp); 116 goto out; 117 } 118 119 dirty = lp->dirty; 120 if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 121 /* 122 * The LEB was added to the journal with a starting offset of 123 * zero which means the LEB must have been empty. The LEB 124 * property values should be lp->free == c->leb_size and 125 * lp->dirty == 0, but that is not the case. The reason is that 126 * the LEB was garbage collected. The garbage collector resets 127 * the free and dirty space without recording it anywhere except 128 * lprops, so if there is not a commit then lprops does not have 129 * that information next time the file system is mounted. 130 * 131 * We do not need to adjust free space because the scan has told 132 * us the exact value which is recorded in the replay entry as 133 * r->free. 134 * 135 * However we do need to subtract from the dirty space the 136 * amount of space that the garbage collector reclaimed, which 137 * is the whole LEB minus the amount of space that was free. 138 */ 139 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 140 lp->free, lp->dirty); 141 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 142 lp->free, lp->dirty); 143 dirty -= c->leb_size - lp->free; 144 /* 145 * If the replay order was perfect the dirty space would now be 146 * zero. The order is not perfect because the the journal heads 147 * race with eachother. This is not a problem but is does mean 148 * that the dirty space may temporarily exceed c->leb_size 149 * during the replay. 150 */ 151 if (dirty != 0) 152 dbg_msg("LEB %d lp: %d free %d dirty " 153 "replay: %d free %d dirty", r->lnum, lp->free, 154 lp->dirty, r->free, r->dirty); 155 } 156 lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, 157 lp->flags | LPROPS_TAKEN, 0); 158 if (IS_ERR(lp)) { 159 err = PTR_ERR(lp); 160 goto out; 161 } 162 out: 163 ubifs_release_lprops(c); 164 return err; 165 } 166 167 /** 168 * trun_remove_range - apply a replay entry for a truncation to the TNC. 169 * @c: UBIFS file-system description object 170 * @r: replay entry of truncation 171 */ 172 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 173 { 174 unsigned min_blk, max_blk; 175 union ubifs_key min_key, max_key; 176 ino_t ino; 177 178 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 179 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 180 min_blk += 1; 181 182 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 183 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 184 max_blk -= 1; 185 186 ino = key_inum(c, &r->key); 187 188 data_key_init(c, &min_key, ino, min_blk); 189 data_key_init(c, &max_key, ino, max_blk); 190 191 return ubifs_tnc_remove_range(c, &min_key, &max_key); 192 } 193 194 /** 195 * apply_replay_entry - apply a replay entry to the TNC. 196 * @c: UBIFS file-system description object 197 * @r: replay entry to apply 198 * 199 * Apply a replay entry to the TNC. 200 */ 201 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 202 { 203 int err, deletion = ((r->flags & REPLAY_DELETION) != 0); 204 205 dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, 206 r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); 207 208 /* Set c->replay_sqnum to help deal with dangling branches. */ 209 c->replay_sqnum = r->sqnum; 210 211 if (r->flags & REPLAY_REF) 212 err = set_bud_lprops(c, r); 213 else if (is_hash_key(c, &r->key)) { 214 if (deletion) 215 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 216 else 217 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 218 r->len, &r->nm); 219 } else { 220 if (deletion) 221 switch (key_type(c, &r->key)) { 222 case UBIFS_INO_KEY: 223 { 224 ino_t inum = key_inum(c, &r->key); 225 226 err = ubifs_tnc_remove_ino(c, inum); 227 break; 228 } 229 case UBIFS_TRUN_KEY: 230 err = trun_remove_range(c, r); 231 break; 232 default: 233 err = ubifs_tnc_remove(c, &r->key); 234 break; 235 } 236 else 237 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 238 r->len); 239 if (err) 240 return err; 241 242 if (c->need_recovery) 243 err = ubifs_recover_size_accum(c, &r->key, deletion, 244 r->new_size); 245 } 246 247 return err; 248 } 249 250 /** 251 * destroy_replay_tree - destroy the replay. 252 * @c: UBIFS file-system description object 253 * 254 * Destroy the replay tree. 255 */ 256 static void destroy_replay_tree(struct ubifs_info *c) 257 { 258 struct rb_node *this = c->replay_tree.rb_node; 259 struct replay_entry *r; 260 261 while (this) { 262 if (this->rb_left) { 263 this = this->rb_left; 264 continue; 265 } else if (this->rb_right) { 266 this = this->rb_right; 267 continue; 268 } 269 r = rb_entry(this, struct replay_entry, rb); 270 this = rb_parent(this); 271 if (this) { 272 if (this->rb_left == &r->rb) 273 this->rb_left = NULL; 274 else 275 this->rb_right = NULL; 276 } 277 if (is_hash_key(c, &r->key)) 278 kfree(r->nm.name); 279 kfree(r); 280 } 281 c->replay_tree = RB_ROOT; 282 } 283 284 /** 285 * apply_replay_tree - apply the replay tree to the TNC. 286 * @c: UBIFS file-system description object 287 * 288 * Apply the replay tree. 289 * Returns zero in case of success and a negative error code in case of 290 * failure. 291 */ 292 static int apply_replay_tree(struct ubifs_info *c) 293 { 294 struct rb_node *this = rb_first(&c->replay_tree); 295 296 while (this) { 297 struct replay_entry *r; 298 int err; 299 300 cond_resched(); 301 302 r = rb_entry(this, struct replay_entry, rb); 303 err = apply_replay_entry(c, r); 304 if (err) 305 return err; 306 this = rb_next(this); 307 } 308 return 0; 309 } 310 311 /** 312 * insert_node - insert a node to the replay tree. 313 * @c: UBIFS file-system description object 314 * @lnum: node logical eraseblock number 315 * @offs: node offset 316 * @len: node length 317 * @key: node key 318 * @sqnum: sequence number 319 * @deletion: non-zero if this is a deletion 320 * @used: number of bytes in use in a LEB 321 * @old_size: truncation old size 322 * @new_size: truncation new size 323 * 324 * This function inserts a scanned non-direntry node to the replay tree. The 325 * replay tree is an RB-tree containing @struct replay_entry elements which are 326 * indexed by the sequence number. The replay tree is applied at the very end 327 * of the replay process. Since the tree is sorted in sequence number order, 328 * the older modifications are applied first. This function returns zero in 329 * case of success and a negative error code in case of failure. 330 */ 331 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 332 union ubifs_key *key, unsigned long long sqnum, 333 int deletion, int *used, loff_t old_size, 334 loff_t new_size) 335 { 336 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 337 struct replay_entry *r; 338 339 if (key_inum(c, key) >= c->highest_inum) 340 c->highest_inum = key_inum(c, key); 341 342 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 343 while (*p) { 344 parent = *p; 345 r = rb_entry(parent, struct replay_entry, rb); 346 if (sqnum < r->sqnum) { 347 p = &(*p)->rb_left; 348 continue; 349 } else if (sqnum > r->sqnum) { 350 p = &(*p)->rb_right; 351 continue; 352 } 353 ubifs_err("duplicate sqnum in replay"); 354 return -EINVAL; 355 } 356 357 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 358 if (!r) 359 return -ENOMEM; 360 361 if (!deletion) 362 *used += ALIGN(len, 8); 363 r->lnum = lnum; 364 r->offs = offs; 365 r->len = len; 366 r->sqnum = sqnum; 367 r->flags = (deletion ? REPLAY_DELETION : 0); 368 r->old_size = old_size; 369 r->new_size = new_size; 370 key_copy(c, key, &r->key); 371 372 rb_link_node(&r->rb, parent, p); 373 rb_insert_color(&r->rb, &c->replay_tree); 374 return 0; 375 } 376 377 /** 378 * insert_dent - insert a directory entry node into the replay tree. 379 * @c: UBIFS file-system description object 380 * @lnum: node logical eraseblock number 381 * @offs: node offset 382 * @len: node length 383 * @key: node key 384 * @name: directory entry name 385 * @nlen: directory entry name length 386 * @sqnum: sequence number 387 * @deletion: non-zero if this is a deletion 388 * @used: number of bytes in use in a LEB 389 * 390 * This function inserts a scanned directory entry node to the replay tree. 391 * Returns zero in case of success and a negative error code in case of 392 * failure. 393 * 394 * This function is also used for extended attribute entries because they are 395 * implemented as directory entry nodes. 396 */ 397 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 398 union ubifs_key *key, const char *name, int nlen, 399 unsigned long long sqnum, int deletion, int *used) 400 { 401 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 402 struct replay_entry *r; 403 char *nbuf; 404 405 if (key_inum(c, key) >= c->highest_inum) 406 c->highest_inum = key_inum(c, key); 407 408 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 409 while (*p) { 410 parent = *p; 411 r = rb_entry(parent, struct replay_entry, rb); 412 if (sqnum < r->sqnum) { 413 p = &(*p)->rb_left; 414 continue; 415 } 416 if (sqnum > r->sqnum) { 417 p = &(*p)->rb_right; 418 continue; 419 } 420 ubifs_err("duplicate sqnum in replay"); 421 return -EINVAL; 422 } 423 424 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 425 if (!r) 426 return -ENOMEM; 427 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 428 if (!nbuf) { 429 kfree(r); 430 return -ENOMEM; 431 } 432 433 if (!deletion) 434 *used += ALIGN(len, 8); 435 r->lnum = lnum; 436 r->offs = offs; 437 r->len = len; 438 r->sqnum = sqnum; 439 r->nm.len = nlen; 440 memcpy(nbuf, name, nlen); 441 nbuf[nlen] = '\0'; 442 r->nm.name = nbuf; 443 r->flags = (deletion ? REPLAY_DELETION : 0); 444 key_copy(c, key, &r->key); 445 446 ubifs_assert(!*p); 447 rb_link_node(&r->rb, parent, p); 448 rb_insert_color(&r->rb, &c->replay_tree); 449 return 0; 450 } 451 452 /** 453 * ubifs_validate_entry - validate directory or extended attribute entry node. 454 * @c: UBIFS file-system description object 455 * @dent: the node to validate 456 * 457 * This function validates directory or extended attribute entry node @dent. 458 * Returns zero if the node is all right and a %-EINVAL if not. 459 */ 460 int ubifs_validate_entry(struct ubifs_info *c, 461 const struct ubifs_dent_node *dent) 462 { 463 int key_type = key_type_flash(c, dent->key); 464 int nlen = le16_to_cpu(dent->nlen); 465 466 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 467 dent->type >= UBIFS_ITYPES_CNT || 468 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 469 strnlen(dent->name, nlen) != nlen || 470 le64_to_cpu(dent->inum) > MAX_INUM) { 471 ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? 472 "directory entry" : "extended attribute entry"); 473 return -EINVAL; 474 } 475 476 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 477 ubifs_err("bad key type %d", key_type); 478 return -EINVAL; 479 } 480 481 return 0; 482 } 483 484 /** 485 * replay_bud - replay a bud logical eraseblock. 486 * @c: UBIFS file-system description object 487 * @lnum: bud logical eraseblock number to replay 488 * @offs: bud start offset 489 * @jhead: journal head to which this bud belongs 490 * @free: amount of free space in the bud is returned here 491 * @dirty: amount of dirty space from padding and deletion nodes is returned 492 * here 493 * 494 * This function returns zero in case of success and a negative error code in 495 * case of failure. 496 */ 497 static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 498 int *free, int *dirty) 499 { 500 int err = 0, used = 0; 501 struct ubifs_scan_leb *sleb; 502 struct ubifs_scan_node *snod; 503 struct ubifs_bud *bud; 504 505 dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); 506 if (c->need_recovery) 507 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); 508 else 509 sleb = ubifs_scan(c, lnum, offs, c->sbuf); 510 if (IS_ERR(sleb)) 511 return PTR_ERR(sleb); 512 513 /* 514 * The bud does not have to start from offset zero - the beginning of 515 * the 'lnum' LEB may contain previously committed data. One of the 516 * things we have to do in replay is to correctly update lprops with 517 * newer information about this LEB. 518 * 519 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 520 * bytes of free space because it only contain information about 521 * committed data. 522 * 523 * But we know that real amount of free space is 'c->leb_size - 524 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 525 * 'sleb->endpt' is used by bud data. We have to correctly calculate 526 * how much of these data are dirty and update lprops with this 527 * information. 528 * 529 * The dirt in that LEB region is comprised of padding nodes, deletion 530 * nodes, truncation nodes and nodes which are obsoleted by subsequent 531 * nodes in this LEB. So instead of calculating clean space, we 532 * calculate used space ('used' variable). 533 */ 534 535 list_for_each_entry(snod, &sleb->nodes, list) { 536 int deletion = 0; 537 538 cond_resched(); 539 540 if (snod->sqnum >= SQNUM_WATERMARK) { 541 ubifs_err("file system's life ended"); 542 goto out_dump; 543 } 544 545 if (snod->sqnum > c->max_sqnum) 546 c->max_sqnum = snod->sqnum; 547 548 switch (snod->type) { 549 case UBIFS_INO_NODE: 550 { 551 struct ubifs_ino_node *ino = snod->node; 552 loff_t new_size = le64_to_cpu(ino->size); 553 554 if (le32_to_cpu(ino->nlink) == 0) 555 deletion = 1; 556 err = insert_node(c, lnum, snod->offs, snod->len, 557 &snod->key, snod->sqnum, deletion, 558 &used, 0, new_size); 559 break; 560 } 561 case UBIFS_DATA_NODE: 562 { 563 struct ubifs_data_node *dn = snod->node; 564 loff_t new_size = le32_to_cpu(dn->size) + 565 key_block(c, &snod->key) * 566 UBIFS_BLOCK_SIZE; 567 568 err = insert_node(c, lnum, snod->offs, snod->len, 569 &snod->key, snod->sqnum, deletion, 570 &used, 0, new_size); 571 break; 572 } 573 case UBIFS_DENT_NODE: 574 case UBIFS_XENT_NODE: 575 { 576 struct ubifs_dent_node *dent = snod->node; 577 578 err = ubifs_validate_entry(c, dent); 579 if (err) 580 goto out_dump; 581 582 err = insert_dent(c, lnum, snod->offs, snod->len, 583 &snod->key, dent->name, 584 le16_to_cpu(dent->nlen), snod->sqnum, 585 !le64_to_cpu(dent->inum), &used); 586 break; 587 } 588 case UBIFS_TRUN_NODE: 589 { 590 struct ubifs_trun_node *trun = snod->node; 591 loff_t old_size = le64_to_cpu(trun->old_size); 592 loff_t new_size = le64_to_cpu(trun->new_size); 593 union ubifs_key key; 594 595 /* Validate truncation node */ 596 if (old_size < 0 || old_size > c->max_inode_sz || 597 new_size < 0 || new_size > c->max_inode_sz || 598 old_size <= new_size) { 599 ubifs_err("bad truncation node"); 600 goto out_dump; 601 } 602 603 /* 604 * Create a fake truncation key just to use the same 605 * functions which expect nodes to have keys. 606 */ 607 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 608 err = insert_node(c, lnum, snod->offs, snod->len, 609 &key, snod->sqnum, 1, &used, 610 old_size, new_size); 611 break; 612 } 613 default: 614 ubifs_err("unexpected node type %d in bud LEB %d:%d", 615 snod->type, lnum, snod->offs); 616 err = -EINVAL; 617 goto out_dump; 618 } 619 if (err) 620 goto out; 621 } 622 623 bud = ubifs_search_bud(c, lnum); 624 if (!bud) 625 BUG(); 626 627 ubifs_assert(sleb->endpt - offs >= used); 628 ubifs_assert(sleb->endpt % c->min_io_size == 0); 629 630 if (sleb->endpt + c->min_io_size <= c->leb_size && 631 !(c->vfs_sb->s_flags & MS_RDONLY)) 632 err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, 633 sleb->endpt, UBI_SHORTTERM); 634 635 *dirty = sleb->endpt - offs - used; 636 *free = c->leb_size - sleb->endpt; 637 638 out: 639 ubifs_scan_destroy(sleb); 640 return err; 641 642 out_dump: 643 ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); 644 dbg_dump_node(c, snod->node); 645 ubifs_scan_destroy(sleb); 646 return -EINVAL; 647 } 648 649 /** 650 * insert_ref_node - insert a reference node to the replay tree. 651 * @c: UBIFS file-system description object 652 * @lnum: node logical eraseblock number 653 * @offs: node offset 654 * @sqnum: sequence number 655 * @free: amount of free space in bud 656 * @dirty: amount of dirty space from padding and deletion nodes 657 * 658 * This function inserts a reference node to the replay tree and returns zero 659 * in case of success ort a negative error code in case of failure. 660 */ 661 static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, 662 unsigned long long sqnum, int free, int dirty) 663 { 664 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 665 struct replay_entry *r; 666 667 dbg_mnt("add ref LEB %d:%d", lnum, offs); 668 while (*p) { 669 parent = *p; 670 r = rb_entry(parent, struct replay_entry, rb); 671 if (sqnum < r->sqnum) { 672 p = &(*p)->rb_left; 673 continue; 674 } else if (sqnum > r->sqnum) { 675 p = &(*p)->rb_right; 676 continue; 677 } 678 ubifs_err("duplicate sqnum in replay tree"); 679 return -EINVAL; 680 } 681 682 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 683 if (!r) 684 return -ENOMEM; 685 686 r->lnum = lnum; 687 r->offs = offs; 688 r->sqnum = sqnum; 689 r->flags = REPLAY_REF; 690 r->free = free; 691 r->dirty = dirty; 692 693 rb_link_node(&r->rb, parent, p); 694 rb_insert_color(&r->rb, &c->replay_tree); 695 return 0; 696 } 697 698 /** 699 * replay_buds - replay all buds. 700 * @c: UBIFS file-system description object 701 * 702 * This function returns zero in case of success and a negative error code in 703 * case of failure. 704 */ 705 static int replay_buds(struct ubifs_info *c) 706 { 707 struct bud_entry *b; 708 int err, uninitialized_var(free), uninitialized_var(dirty); 709 710 list_for_each_entry(b, &c->replay_buds, list) { 711 err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, 712 &free, &dirty); 713 if (err) 714 return err; 715 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, 716 free, dirty); 717 if (err) 718 return err; 719 } 720 721 return 0; 722 } 723 724 /** 725 * destroy_bud_list - destroy the list of buds to replay. 726 * @c: UBIFS file-system description object 727 */ 728 static void destroy_bud_list(struct ubifs_info *c) 729 { 730 struct bud_entry *b; 731 732 while (!list_empty(&c->replay_buds)) { 733 b = list_entry(c->replay_buds.next, struct bud_entry, list); 734 list_del(&b->list); 735 kfree(b); 736 } 737 } 738 739 /** 740 * add_replay_bud - add a bud to the list of buds to replay. 741 * @c: UBIFS file-system description object 742 * @lnum: bud logical eraseblock number to replay 743 * @offs: bud start offset 744 * @jhead: journal head to which this bud belongs 745 * @sqnum: reference node sequence number 746 * 747 * This function returns zero in case of success and a negative error code in 748 * case of failure. 749 */ 750 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 751 unsigned long long sqnum) 752 { 753 struct ubifs_bud *bud; 754 struct bud_entry *b; 755 756 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 757 758 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 759 if (!bud) 760 return -ENOMEM; 761 762 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 763 if (!b) { 764 kfree(bud); 765 return -ENOMEM; 766 } 767 768 bud->lnum = lnum; 769 bud->start = offs; 770 bud->jhead = jhead; 771 ubifs_add_bud(c, bud); 772 773 b->bud = bud; 774 b->sqnum = sqnum; 775 list_add_tail(&b->list, &c->replay_buds); 776 777 return 0; 778 } 779 780 /** 781 * validate_ref - validate a reference node. 782 * @c: UBIFS file-system description object 783 * @ref: the reference node to validate 784 * @ref_lnum: LEB number of the reference node 785 * @ref_offs: reference node offset 786 * 787 * This function returns %1 if a bud reference already exists for the LEB. %0 is 788 * returned if the reference node is new, otherwise %-EINVAL is returned if 789 * validation failed. 790 */ 791 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 792 { 793 struct ubifs_bud *bud; 794 int lnum = le32_to_cpu(ref->lnum); 795 unsigned int offs = le32_to_cpu(ref->offs); 796 unsigned int jhead = le32_to_cpu(ref->jhead); 797 798 /* 799 * ref->offs may point to the end of LEB when the journal head points 800 * to the end of LEB and we write reference node for it during commit. 801 * So this is why we require 'offs > c->leb_size'. 802 */ 803 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 804 lnum < c->main_first || offs > c->leb_size || 805 offs & (c->min_io_size - 1)) 806 return -EINVAL; 807 808 /* Make sure we have not already looked at this bud */ 809 bud = ubifs_search_bud(c, lnum); 810 if (bud) { 811 if (bud->jhead == jhead && bud->start <= offs) 812 return 1; 813 ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); 814 return -EINVAL; 815 } 816 817 return 0; 818 } 819 820 /** 821 * replay_log_leb - replay a log logical eraseblock. 822 * @c: UBIFS file-system description object 823 * @lnum: log logical eraseblock to replay 824 * @offs: offset to start replaying from 825 * @sbuf: scan buffer 826 * 827 * This function replays a log LEB and returns zero in case of success, %1 if 828 * this is the last LEB in the log, and a negative error code in case of 829 * failure. 830 */ 831 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 832 { 833 int err; 834 struct ubifs_scan_leb *sleb; 835 struct ubifs_scan_node *snod; 836 const struct ubifs_cs_node *node; 837 838 dbg_mnt("replay log LEB %d:%d", lnum, offs); 839 sleb = ubifs_scan(c, lnum, offs, sbuf); 840 if (IS_ERR(sleb)) { 841 if (c->need_recovery) 842 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 843 if (IS_ERR(sleb)) 844 return PTR_ERR(sleb); 845 } 846 847 if (sleb->nodes_cnt == 0) { 848 err = 1; 849 goto out; 850 } 851 852 node = sleb->buf; 853 854 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 855 if (c->cs_sqnum == 0) { 856 /* 857 * This is the first log LEB we are looking at, make sure that 858 * the first node is a commit start node. Also record its 859 * sequence number so that UBIFS can determine where the log 860 * ends, because all nodes which were have higher sequence 861 * numbers. 862 */ 863 if (snod->type != UBIFS_CS_NODE) { 864 dbg_err("first log node at LEB %d:%d is not CS node", 865 lnum, offs); 866 goto out_dump; 867 } 868 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 869 dbg_err("first CS node at LEB %d:%d has wrong " 870 "commit number %llu expected %llu", 871 lnum, offs, 872 (unsigned long long)le64_to_cpu(node->cmt_no), 873 c->cmt_no); 874 goto out_dump; 875 } 876 877 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 878 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 879 } 880 881 if (snod->sqnum < c->cs_sqnum) { 882 /* 883 * This means that we reached end of log and now 884 * look to the older log data, which was already 885 * committed but the eraseblock was not erased (UBIFS 886 * only unmaps it). So this basically means we have to 887 * exit with "end of log" code. 888 */ 889 err = 1; 890 goto out; 891 } 892 893 /* Make sure the first node sits at offset zero of the LEB */ 894 if (snod->offs != 0) { 895 dbg_err("first node is not at zero offset"); 896 goto out_dump; 897 } 898 899 list_for_each_entry(snod, &sleb->nodes, list) { 900 901 cond_resched(); 902 903 if (snod->sqnum >= SQNUM_WATERMARK) { 904 ubifs_err("file system's life ended"); 905 goto out_dump; 906 } 907 908 if (snod->sqnum < c->cs_sqnum) { 909 dbg_err("bad sqnum %llu, commit sqnum %llu", 910 snod->sqnum, c->cs_sqnum); 911 goto out_dump; 912 } 913 914 if (snod->sqnum > c->max_sqnum) 915 c->max_sqnum = snod->sqnum; 916 917 switch (snod->type) { 918 case UBIFS_REF_NODE: { 919 const struct ubifs_ref_node *ref = snod->node; 920 921 err = validate_ref(c, ref); 922 if (err == 1) 923 break; /* Already have this bud */ 924 if (err) 925 goto out_dump; 926 927 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 928 le32_to_cpu(ref->offs), 929 le32_to_cpu(ref->jhead), 930 snod->sqnum); 931 if (err) 932 goto out; 933 934 break; 935 } 936 case UBIFS_CS_NODE: 937 /* Make sure it sits at the beginning of LEB */ 938 if (snod->offs != 0) { 939 ubifs_err("unexpected node in log"); 940 goto out_dump; 941 } 942 break; 943 default: 944 ubifs_err("unexpected node in log"); 945 goto out_dump; 946 } 947 } 948 949 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 950 c->lhead_lnum = lnum; 951 c->lhead_offs = sleb->endpt; 952 } 953 954 err = !sleb->endpt; 955 out: 956 ubifs_scan_destroy(sleb); 957 return err; 958 959 out_dump: 960 ubifs_err("log error detected while replying the log at LEB %d:%d", 961 lnum, offs + snod->offs); 962 dbg_dump_node(c, snod->node); 963 ubifs_scan_destroy(sleb); 964 return -EINVAL; 965 } 966 967 /** 968 * take_ihead - update the status of the index head in lprops to 'taken'. 969 * @c: UBIFS file-system description object 970 * 971 * This function returns the amount of free space in the index head LEB or a 972 * negative error code. 973 */ 974 static int take_ihead(struct ubifs_info *c) 975 { 976 const struct ubifs_lprops *lp; 977 int err, free; 978 979 ubifs_get_lprops(c); 980 981 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 982 if (IS_ERR(lp)) { 983 err = PTR_ERR(lp); 984 goto out; 985 } 986 987 free = lp->free; 988 989 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 990 lp->flags | LPROPS_TAKEN, 0); 991 if (IS_ERR(lp)) { 992 err = PTR_ERR(lp); 993 goto out; 994 } 995 996 err = free; 997 out: 998 ubifs_release_lprops(c); 999 return err; 1000 } 1001 1002 /** 1003 * ubifs_replay_journal - replay journal. 1004 * @c: UBIFS file-system description object 1005 * 1006 * This function scans the journal, replays and cleans it up. It makes sure all 1007 * memory data structures related to uncommitted journal are built (dirty TNC 1008 * tree, tree of buds, modified lprops, etc). 1009 */ 1010 int ubifs_replay_journal(struct ubifs_info *c) 1011 { 1012 int err, i, lnum, offs, free; 1013 void *sbuf = NULL; 1014 1015 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1016 1017 /* Update the status of the index head in lprops to 'taken' */ 1018 free = take_ihead(c); 1019 if (free < 0) 1020 return free; /* Error code */ 1021 1022 if (c->ihead_offs != c->leb_size - free) { 1023 ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, 1024 c->ihead_offs); 1025 return -EINVAL; 1026 } 1027 1028 sbuf = vmalloc(c->leb_size); 1029 if (!sbuf) 1030 return -ENOMEM; 1031 1032 dbg_mnt("start replaying the journal"); 1033 1034 c->replaying = 1; 1035 1036 lnum = c->ltail_lnum = c->lhead_lnum; 1037 offs = c->lhead_offs; 1038 1039 for (i = 0; i < c->log_lebs; i++, lnum++) { 1040 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { 1041 /* 1042 * The log is logically circular, we reached the last 1043 * LEB, switch to the first one. 1044 */ 1045 lnum = UBIFS_LOG_LNUM; 1046 offs = 0; 1047 } 1048 err = replay_log_leb(c, lnum, offs, sbuf); 1049 if (err == 1) 1050 /* We hit the end of the log */ 1051 break; 1052 if (err) 1053 goto out; 1054 offs = 0; 1055 } 1056 1057 err = replay_buds(c); 1058 if (err) 1059 goto out; 1060 1061 err = apply_replay_tree(c); 1062 if (err) 1063 goto out; 1064 1065 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1066 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1067 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1068 c->highest_inum); 1069 out: 1070 destroy_replay_tree(c); 1071 destroy_bud_list(c); 1072 vfree(sbuf); 1073 c->replaying = 0; 1074 return err; 1075 } 1076