1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * This file is part of UBIFS. 4 * 5 * Copyright (C) 2006-2008 Nokia Corporation. 6 * 7 * Authors: Adrian Hunter 8 * Artem Bityutskiy (Битюцкий Артём) 9 */ 10 11 /* 12 * This file contains journal replay code. It runs when the file-system is being 13 * mounted and requires no locking. 14 * 15 * The larger is the journal, the longer it takes to scan it, so the longer it 16 * takes to mount UBIFS. This is why the journal has limited size which may be 17 * changed depending on the system requirements. But a larger journal gives 18 * faster I/O speed because it writes the index less frequently. So this is a 19 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 20 * larger is the journal, the more memory its index may consume. 21 */ 22 23 #include "ubifs.h" 24 #include <linux/list_sort.h> 25 #include <crypto/hash.h> 26 27 /** 28 * struct replay_entry - replay list entry. 29 * @lnum: logical eraseblock number of the node 30 * @offs: node offset 31 * @len: node length 32 * @deletion: non-zero if this entry corresponds to a node deletion 33 * @sqnum: node sequence number 34 * @list: links the replay list 35 * @key: node key 36 * @nm: directory entry name 37 * @old_size: truncation old size 38 * @new_size: truncation new size 39 * 40 * The replay process first scans all buds and builds the replay list, then 41 * sorts the replay list in nodes sequence number order, and then inserts all 42 * the replay entries to the TNC. 43 */ 44 struct replay_entry { 45 int lnum; 46 int offs; 47 int len; 48 u8 hash[UBIFS_HASH_ARR_SZ]; 49 unsigned int deletion:1; 50 unsigned long long sqnum; 51 struct list_head list; 52 union ubifs_key key; 53 union { 54 struct fscrypt_name nm; 55 struct { 56 loff_t old_size; 57 loff_t new_size; 58 }; 59 }; 60 }; 61 62 /** 63 * struct bud_entry - entry in the list of buds to replay. 64 * @list: next bud in the list 65 * @bud: bud description object 66 * @sqnum: reference node sequence number 67 * @free: free bytes in the bud 68 * @dirty: dirty bytes in the bud 69 */ 70 struct bud_entry { 71 struct list_head list; 72 struct ubifs_bud *bud; 73 unsigned long long sqnum; 74 int free; 75 int dirty; 76 }; 77 78 /** 79 * set_bud_lprops - set free and dirty space used by a bud. 80 * @c: UBIFS file-system description object 81 * @b: bud entry which describes the bud 82 * 83 * This function makes sure the LEB properties of bud @b are set correctly 84 * after the replay. Returns zero in case of success and a negative error code 85 * in case of failure. 86 */ 87 static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) 88 { 89 const struct ubifs_lprops *lp; 90 int err = 0, dirty; 91 92 ubifs_get_lprops(c); 93 94 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); 95 if (IS_ERR(lp)) { 96 err = PTR_ERR(lp); 97 goto out; 98 } 99 100 dirty = lp->dirty; 101 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 102 /* 103 * The LEB was added to the journal with a starting offset of 104 * zero which means the LEB must have been empty. The LEB 105 * property values should be @lp->free == @c->leb_size and 106 * @lp->dirty == 0, but that is not the case. The reason is that 107 * the LEB had been garbage collected before it became the bud, 108 * and there was no commit in between. The garbage collector 109 * resets the free and dirty space without recording it 110 * anywhere except lprops, so if there was no commit then 111 * lprops does not have that information. 112 * 113 * We do not need to adjust free space because the scan has told 114 * us the exact value which is recorded in the replay entry as 115 * @b->free. 116 * 117 * However we do need to subtract from the dirty space the 118 * amount of space that the garbage collector reclaimed, which 119 * is the whole LEB minus the amount of space that was free. 120 */ 121 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 122 lp->free, lp->dirty); 123 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 124 lp->free, lp->dirty); 125 dirty -= c->leb_size - lp->free; 126 /* 127 * If the replay order was perfect the dirty space would now be 128 * zero. The order is not perfect because the journal heads 129 * race with each other. This is not a problem but is does mean 130 * that the dirty space may temporarily exceed c->leb_size 131 * during the replay. 132 */ 133 if (dirty != 0) 134 dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", 135 b->bud->lnum, lp->free, lp->dirty, b->free, 136 b->dirty); 137 } 138 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, 139 lp->flags | LPROPS_TAKEN, 0); 140 if (IS_ERR(lp)) { 141 err = PTR_ERR(lp); 142 goto out; 143 } 144 145 /* Make sure the journal head points to the latest bud */ 146 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, 147 b->bud->lnum, c->leb_size - b->free); 148 149 out: 150 ubifs_release_lprops(c); 151 return err; 152 } 153 154 /** 155 * set_buds_lprops - set free and dirty space for all replayed buds. 156 * @c: UBIFS file-system description object 157 * 158 * This function sets LEB properties for all replayed buds. Returns zero in 159 * case of success and a negative error code in case of failure. 160 */ 161 static int set_buds_lprops(struct ubifs_info *c) 162 { 163 struct bud_entry *b; 164 int err; 165 166 list_for_each_entry(b, &c->replay_buds, list) { 167 err = set_bud_lprops(c, b); 168 if (err) 169 return err; 170 } 171 172 return 0; 173 } 174 175 /** 176 * trun_remove_range - apply a replay entry for a truncation to the TNC. 177 * @c: UBIFS file-system description object 178 * @r: replay entry of truncation 179 */ 180 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 181 { 182 unsigned min_blk, max_blk; 183 union ubifs_key min_key, max_key; 184 ino_t ino; 185 186 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 187 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 188 min_blk += 1; 189 190 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 191 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 192 max_blk -= 1; 193 194 ino = key_inum(c, &r->key); 195 196 data_key_init(c, &min_key, ino, min_blk); 197 data_key_init(c, &max_key, ino, max_blk); 198 199 return ubifs_tnc_remove_range(c, &min_key, &max_key); 200 } 201 202 /** 203 * inode_still_linked - check whether inode in question will be re-linked. 204 * @c: UBIFS file-system description object 205 * @rino: replay entry to test 206 * 207 * O_TMPFILE files can be re-linked, this means link count goes from 0 to 1. 208 * This case needs special care, otherwise all references to the inode will 209 * be removed upon the first replay entry of an inode with link count 0 210 * is found. 211 */ 212 static bool inode_still_linked(struct ubifs_info *c, struct replay_entry *rino) 213 { 214 struct replay_entry *r; 215 216 ubifs_assert(c, rino->deletion); 217 ubifs_assert(c, key_type(c, &rino->key) == UBIFS_INO_KEY); 218 219 /* 220 * Find the most recent entry for the inode behind @rino and check 221 * whether it is a deletion. 222 */ 223 list_for_each_entry_reverse(r, &c->replay_list, list) { 224 ubifs_assert(c, r->sqnum >= rino->sqnum); 225 if (key_inum(c, &r->key) == key_inum(c, &rino->key) && 226 key_type(c, &r->key) == UBIFS_INO_KEY) 227 return r->deletion == 0; 228 229 } 230 231 ubifs_assert(c, 0); 232 return false; 233 } 234 235 /** 236 * apply_replay_entry - apply a replay entry to the TNC. 237 * @c: UBIFS file-system description object 238 * @r: replay entry to apply 239 * 240 * Apply a replay entry to the TNC. 241 */ 242 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 243 { 244 int err; 245 246 dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", 247 r->lnum, r->offs, r->len, r->deletion, r->sqnum); 248 249 if (is_hash_key(c, &r->key)) { 250 if (r->deletion) 251 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 252 else 253 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 254 r->len, r->hash, &r->nm); 255 } else { 256 if (r->deletion) 257 switch (key_type(c, &r->key)) { 258 case UBIFS_INO_KEY: 259 { 260 ino_t inum = key_inum(c, &r->key); 261 262 if (inode_still_linked(c, r)) { 263 err = 0; 264 break; 265 } 266 267 err = ubifs_tnc_remove_ino(c, inum); 268 break; 269 } 270 case UBIFS_TRUN_KEY: 271 err = trun_remove_range(c, r); 272 break; 273 default: 274 err = ubifs_tnc_remove(c, &r->key); 275 break; 276 } 277 else 278 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 279 r->len, r->hash); 280 if (err) 281 return err; 282 283 if (c->need_recovery) 284 err = ubifs_recover_size_accum(c, &r->key, r->deletion, 285 r->new_size); 286 } 287 288 return err; 289 } 290 291 /** 292 * replay_entries_cmp - compare 2 replay entries. 293 * @priv: UBIFS file-system description object 294 * @a: first replay entry 295 * @b: second replay entry 296 * 297 * This is a comparios function for 'list_sort()' which compares 2 replay 298 * entries @a and @b by comparing their sequence number. Returns %1 if @a has 299 * greater sequence number and %-1 otherwise. 300 */ 301 static int replay_entries_cmp(void *priv, const struct list_head *a, 302 const struct list_head *b) 303 { 304 struct ubifs_info *c = priv; 305 struct replay_entry *ra, *rb; 306 307 cond_resched(); 308 if (a == b) 309 return 0; 310 311 ra = list_entry(a, struct replay_entry, list); 312 rb = list_entry(b, struct replay_entry, list); 313 ubifs_assert(c, ra->sqnum != rb->sqnum); 314 if (ra->sqnum > rb->sqnum) 315 return 1; 316 return -1; 317 } 318 319 /** 320 * apply_replay_list - apply the replay list to the TNC. 321 * @c: UBIFS file-system description object 322 * 323 * Apply all entries in the replay list to the TNC. Returns zero in case of 324 * success and a negative error code in case of failure. 325 */ 326 static int apply_replay_list(struct ubifs_info *c) 327 { 328 struct replay_entry *r; 329 int err; 330 331 list_sort(c, &c->replay_list, &replay_entries_cmp); 332 333 list_for_each_entry(r, &c->replay_list, list) { 334 cond_resched(); 335 336 err = apply_replay_entry(c, r); 337 if (err) 338 return err; 339 } 340 341 return 0; 342 } 343 344 /** 345 * destroy_replay_list - destroy the replay. 346 * @c: UBIFS file-system description object 347 * 348 * Destroy the replay list. 349 */ 350 static void destroy_replay_list(struct ubifs_info *c) 351 { 352 struct replay_entry *r, *tmp; 353 354 list_for_each_entry_safe(r, tmp, &c->replay_list, list) { 355 if (is_hash_key(c, &r->key)) 356 kfree(fname_name(&r->nm)); 357 list_del(&r->list); 358 kfree(r); 359 } 360 } 361 362 /** 363 * insert_node - insert a node to the replay list 364 * @c: UBIFS file-system description object 365 * @lnum: node logical eraseblock number 366 * @offs: node offset 367 * @len: node length 368 * @hash: node hash 369 * @key: node key 370 * @sqnum: sequence number 371 * @deletion: non-zero if this is a deletion 372 * @used: number of bytes in use in a LEB 373 * @old_size: truncation old size 374 * @new_size: truncation new size 375 * 376 * This function inserts a scanned non-direntry node to the replay list. The 377 * replay list contains @struct replay_entry elements, and we sort this list in 378 * sequence number order before applying it. The replay list is applied at the 379 * very end of the replay process. Since the list is sorted in sequence number 380 * order, the older modifications are applied first. This function returns zero 381 * in case of success and a negative error code in case of failure. 382 */ 383 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 384 const u8 *hash, union ubifs_key *key, 385 unsigned long long sqnum, int deletion, int *used, 386 loff_t old_size, loff_t new_size) 387 { 388 struct replay_entry *r; 389 390 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 391 392 if (key_inum(c, key) >= c->highest_inum) 393 c->highest_inum = key_inum(c, key); 394 395 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 396 if (!r) 397 return -ENOMEM; 398 399 if (!deletion) 400 *used += ALIGN(len, 8); 401 r->lnum = lnum; 402 r->offs = offs; 403 r->len = len; 404 ubifs_copy_hash(c, hash, r->hash); 405 r->deletion = !!deletion; 406 r->sqnum = sqnum; 407 key_copy(c, key, &r->key); 408 r->old_size = old_size; 409 r->new_size = new_size; 410 411 list_add_tail(&r->list, &c->replay_list); 412 return 0; 413 } 414 415 /** 416 * insert_dent - insert a directory entry node into the replay list. 417 * @c: UBIFS file-system description object 418 * @lnum: node logical eraseblock number 419 * @offs: node offset 420 * @len: node length 421 * @hash: node hash 422 * @key: node key 423 * @name: directory entry name 424 * @nlen: directory entry name length 425 * @sqnum: sequence number 426 * @deletion: non-zero if this is a deletion 427 * @used: number of bytes in use in a LEB 428 * 429 * This function inserts a scanned directory entry node or an extended 430 * attribute entry to the replay list. Returns zero in case of success and a 431 * negative error code in case of failure. 432 */ 433 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 434 const u8 *hash, union ubifs_key *key, 435 const char *name, int nlen, unsigned long long sqnum, 436 int deletion, int *used) 437 { 438 struct replay_entry *r; 439 char *nbuf; 440 441 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 442 if (key_inum(c, key) >= c->highest_inum) 443 c->highest_inum = key_inum(c, key); 444 445 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 446 if (!r) 447 return -ENOMEM; 448 449 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 450 if (!nbuf) { 451 kfree(r); 452 return -ENOMEM; 453 } 454 455 if (!deletion) 456 *used += ALIGN(len, 8); 457 r->lnum = lnum; 458 r->offs = offs; 459 r->len = len; 460 ubifs_copy_hash(c, hash, r->hash); 461 r->deletion = !!deletion; 462 r->sqnum = sqnum; 463 key_copy(c, key, &r->key); 464 fname_len(&r->nm) = nlen; 465 memcpy(nbuf, name, nlen); 466 nbuf[nlen] = '\0'; 467 fname_name(&r->nm) = nbuf; 468 469 list_add_tail(&r->list, &c->replay_list); 470 return 0; 471 } 472 473 /** 474 * ubifs_validate_entry - validate directory or extended attribute entry node. 475 * @c: UBIFS file-system description object 476 * @dent: the node to validate 477 * 478 * This function validates directory or extended attribute entry node @dent. 479 * Returns zero if the node is all right and a %-EINVAL if not. 480 */ 481 int ubifs_validate_entry(struct ubifs_info *c, 482 const struct ubifs_dent_node *dent) 483 { 484 int key_type = key_type_flash(c, dent->key); 485 int nlen = le16_to_cpu(dent->nlen); 486 487 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 488 dent->type >= UBIFS_ITYPES_CNT || 489 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 490 (key_type == UBIFS_XENT_KEY && strnlen(dent->name, nlen) != nlen) || 491 le64_to_cpu(dent->inum) > MAX_INUM) { 492 ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ? 493 "directory entry" : "extended attribute entry"); 494 return -EINVAL; 495 } 496 497 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 498 ubifs_err(c, "bad key type %d", key_type); 499 return -EINVAL; 500 } 501 502 return 0; 503 } 504 505 /** 506 * is_last_bud - check if the bud is the last in the journal head. 507 * @c: UBIFS file-system description object 508 * @bud: bud description object 509 * 510 * This function checks if bud @bud is the last bud in its journal head. This 511 * information is then used by 'replay_bud()' to decide whether the bud can 512 * have corruptions or not. Indeed, only last buds can be corrupted by power 513 * cuts. Returns %1 if this is the last bud, and %0 if not. 514 */ 515 static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) 516 { 517 struct ubifs_jhead *jh = &c->jheads[bud->jhead]; 518 struct ubifs_bud *next; 519 uint32_t data; 520 int err; 521 522 if (list_is_last(&bud->list, &jh->buds_list)) 523 return 1; 524 525 /* 526 * The following is a quirk to make sure we work correctly with UBIFS 527 * images used with older UBIFS. 528 * 529 * Normally, the last bud will be the last in the journal head's list 530 * of bud. However, there is one exception if the UBIFS image belongs 531 * to older UBIFS. This is fairly unlikely: one would need to use old 532 * UBIFS, then have a power cut exactly at the right point, and then 533 * try to mount this image with new UBIFS. 534 * 535 * The exception is: it is possible to have 2 buds A and B, A goes 536 * before B, and B is the last, bud B is contains no data, and bud A is 537 * corrupted at the end. The reason is that in older versions when the 538 * journal code switched the next bud (from A to B), it first added a 539 * log reference node for the new bud (B), and only after this it 540 * synchronized the write-buffer of current bud (A). But later this was 541 * changed and UBIFS started to always synchronize the write-buffer of 542 * the bud (A) before writing the log reference for the new bud (B). 543 * 544 * But because older UBIFS always synchronized A's write-buffer before 545 * writing to B, we can recognize this exceptional situation but 546 * checking the contents of bud B - if it is empty, then A can be 547 * treated as the last and we can recover it. 548 * 549 * TODO: remove this piece of code in a couple of years (today it is 550 * 16.05.2011). 551 */ 552 next = list_entry(bud->list.next, struct ubifs_bud, list); 553 if (!list_is_last(&next->list, &jh->buds_list)) 554 return 0; 555 556 err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); 557 if (err) 558 return 0; 559 560 return data == 0xFFFFFFFF; 561 } 562 563 /* authenticate_sleb_hash is split out for stack usage */ 564 static int noinline_for_stack 565 authenticate_sleb_hash(struct ubifs_info *c, 566 struct shash_desc *log_hash, u8 *hash) 567 { 568 SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm); 569 570 hash_desc->tfm = c->hash_tfm; 571 572 ubifs_shash_copy_state(c, log_hash, hash_desc); 573 return crypto_shash_final(hash_desc, hash); 574 } 575 576 /** 577 * authenticate_sleb - authenticate one scan LEB 578 * @c: UBIFS file-system description object 579 * @sleb: the scan LEB to authenticate 580 * @log_hash: 581 * @is_last: if true, this is the last LEB 582 * 583 * This function iterates over the buds of a single LEB authenticating all buds 584 * with the authentication nodes on this LEB. Authentication nodes are written 585 * after some buds and contain a HMAC covering the authentication node itself 586 * and the buds between the last authentication node and the current 587 * authentication node. It can happen that the last buds cannot be authenticated 588 * because a powercut happened when some nodes were written but not the 589 * corresponding authentication node. This function returns the number of nodes 590 * that could be authenticated or a negative error code. 591 */ 592 static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, 593 struct shash_desc *log_hash, int is_last) 594 { 595 int n_not_auth = 0; 596 struct ubifs_scan_node *snod; 597 int n_nodes = 0; 598 int err; 599 u8 hash[UBIFS_HASH_ARR_SZ]; 600 u8 hmac[UBIFS_HMAC_ARR_SZ]; 601 602 if (!ubifs_authenticated(c)) 603 return sleb->nodes_cnt; 604 605 list_for_each_entry(snod, &sleb->nodes, list) { 606 607 n_nodes++; 608 609 if (snod->type == UBIFS_AUTH_NODE) { 610 struct ubifs_auth_node *auth = snod->node; 611 612 err = authenticate_sleb_hash(c, log_hash, hash); 613 if (err) 614 goto out; 615 616 err = crypto_shash_tfm_digest(c->hmac_tfm, hash, 617 c->hash_len, hmac); 618 if (err) 619 goto out; 620 621 err = ubifs_check_hmac(c, auth->hmac, hmac); 622 if (err) { 623 err = -EPERM; 624 goto out; 625 } 626 n_not_auth = 0; 627 } else { 628 err = crypto_shash_update(log_hash, snod->node, 629 snod->len); 630 if (err) 631 goto out; 632 n_not_auth++; 633 } 634 } 635 636 /* 637 * A powercut can happen when some nodes were written, but not yet 638 * the corresponding authentication node. This may only happen on 639 * the last bud though. 640 */ 641 if (n_not_auth) { 642 if (is_last) { 643 dbg_mnt("%d unauthenticated nodes found on LEB %d, Ignoring them", 644 n_not_auth, sleb->lnum); 645 err = 0; 646 } else { 647 dbg_mnt("%d unauthenticated nodes found on non-last LEB %d", 648 n_not_auth, sleb->lnum); 649 err = -EPERM; 650 } 651 } else { 652 err = 0; 653 } 654 out: 655 return err ? err : n_nodes - n_not_auth; 656 } 657 658 /** 659 * replay_bud - replay a bud logical eraseblock. 660 * @c: UBIFS file-system description object 661 * @b: bud entry which describes the bud 662 * 663 * This function replays bud @bud, recovers it if needed, and adds all nodes 664 * from this bud to the replay list. Returns zero in case of success and a 665 * negative error code in case of failure. 666 */ 667 static int replay_bud(struct ubifs_info *c, struct bud_entry *b) 668 { 669 int is_last = is_last_bud(c, b->bud); 670 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; 671 int n_nodes, n = 0; 672 struct ubifs_scan_leb *sleb; 673 struct ubifs_scan_node *snod; 674 675 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", 676 lnum, b->bud->jhead, offs, is_last); 677 678 if (c->need_recovery && is_last) 679 /* 680 * Recover only last LEBs in the journal heads, because power 681 * cuts may cause corruptions only in these LEBs, because only 682 * these LEBs could possibly be written to at the power cut 683 * time. 684 */ 685 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); 686 else 687 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 688 if (IS_ERR(sleb)) 689 return PTR_ERR(sleb); 690 691 n_nodes = authenticate_sleb(c, sleb, b->bud->log_hash, is_last); 692 if (n_nodes < 0) { 693 err = n_nodes; 694 goto out; 695 } 696 697 ubifs_shash_copy_state(c, b->bud->log_hash, 698 c->jheads[b->bud->jhead].log_hash); 699 700 /* 701 * The bud does not have to start from offset zero - the beginning of 702 * the 'lnum' LEB may contain previously committed data. One of the 703 * things we have to do in replay is to correctly update lprops with 704 * newer information about this LEB. 705 * 706 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 707 * bytes of free space because it only contain information about 708 * committed data. 709 * 710 * But we know that real amount of free space is 'c->leb_size - 711 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 712 * 'sleb->endpt' is used by bud data. We have to correctly calculate 713 * how much of these data are dirty and update lprops with this 714 * information. 715 * 716 * The dirt in that LEB region is comprised of padding nodes, deletion 717 * nodes, truncation nodes and nodes which are obsoleted by subsequent 718 * nodes in this LEB. So instead of calculating clean space, we 719 * calculate used space ('used' variable). 720 */ 721 722 list_for_each_entry(snod, &sleb->nodes, list) { 723 u8 hash[UBIFS_HASH_ARR_SZ]; 724 int deletion = 0; 725 726 cond_resched(); 727 728 if (snod->sqnum >= SQNUM_WATERMARK) { 729 ubifs_err(c, "file system's life ended"); 730 goto out_dump; 731 } 732 733 ubifs_node_calc_hash(c, snod->node, hash); 734 735 if (snod->sqnum > c->max_sqnum) 736 c->max_sqnum = snod->sqnum; 737 738 switch (snod->type) { 739 case UBIFS_INO_NODE: 740 { 741 struct ubifs_ino_node *ino = snod->node; 742 loff_t new_size = le64_to_cpu(ino->size); 743 744 if (le32_to_cpu(ino->nlink) == 0) 745 deletion = 1; 746 err = insert_node(c, lnum, snod->offs, snod->len, hash, 747 &snod->key, snod->sqnum, deletion, 748 &used, 0, new_size); 749 break; 750 } 751 case UBIFS_DATA_NODE: 752 { 753 struct ubifs_data_node *dn = snod->node; 754 loff_t new_size = le32_to_cpu(dn->size) + 755 key_block(c, &snod->key) * 756 UBIFS_BLOCK_SIZE; 757 758 err = insert_node(c, lnum, snod->offs, snod->len, hash, 759 &snod->key, snod->sqnum, deletion, 760 &used, 0, new_size); 761 break; 762 } 763 case UBIFS_DENT_NODE: 764 case UBIFS_XENT_NODE: 765 { 766 struct ubifs_dent_node *dent = snod->node; 767 768 err = ubifs_validate_entry(c, dent); 769 if (err) 770 goto out_dump; 771 772 err = insert_dent(c, lnum, snod->offs, snod->len, hash, 773 &snod->key, dent->name, 774 le16_to_cpu(dent->nlen), snod->sqnum, 775 !le64_to_cpu(dent->inum), &used); 776 break; 777 } 778 case UBIFS_TRUN_NODE: 779 { 780 struct ubifs_trun_node *trun = snod->node; 781 loff_t old_size = le64_to_cpu(trun->old_size); 782 loff_t new_size = le64_to_cpu(trun->new_size); 783 union ubifs_key key; 784 785 /* Validate truncation node */ 786 if (old_size < 0 || old_size > c->max_inode_sz || 787 new_size < 0 || new_size > c->max_inode_sz || 788 old_size <= new_size) { 789 ubifs_err(c, "bad truncation node"); 790 goto out_dump; 791 } 792 793 /* 794 * Create a fake truncation key just to use the same 795 * functions which expect nodes to have keys. 796 */ 797 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 798 err = insert_node(c, lnum, snod->offs, snod->len, hash, 799 &key, snod->sqnum, 1, &used, 800 old_size, new_size); 801 break; 802 } 803 case UBIFS_AUTH_NODE: 804 break; 805 default: 806 ubifs_err(c, "unexpected node type %d in bud LEB %d:%d", 807 snod->type, lnum, snod->offs); 808 err = -EINVAL; 809 goto out_dump; 810 } 811 if (err) 812 goto out; 813 814 n++; 815 if (n == n_nodes) 816 break; 817 } 818 819 ubifs_assert(c, ubifs_search_bud(c, lnum)); 820 ubifs_assert(c, sleb->endpt - offs >= used); 821 ubifs_assert(c, sleb->endpt % c->min_io_size == 0); 822 823 b->dirty = sleb->endpt - offs - used; 824 b->free = c->leb_size - sleb->endpt; 825 dbg_mnt("bud LEB %d replied: dirty %d, free %d", 826 lnum, b->dirty, b->free); 827 828 out: 829 ubifs_scan_destroy(sleb); 830 return err; 831 832 out_dump: 833 ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); 834 ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); 835 ubifs_scan_destroy(sleb); 836 return -EINVAL; 837 } 838 839 /** 840 * replay_buds - replay all buds. 841 * @c: UBIFS file-system description object 842 * 843 * This function returns zero in case of success and a negative error code in 844 * case of failure. 845 */ 846 static int replay_buds(struct ubifs_info *c) 847 { 848 struct bud_entry *b; 849 int err; 850 unsigned long long prev_sqnum = 0; 851 852 list_for_each_entry(b, &c->replay_buds, list) { 853 err = replay_bud(c, b); 854 if (err) 855 return err; 856 857 ubifs_assert(c, b->sqnum > prev_sqnum); 858 prev_sqnum = b->sqnum; 859 } 860 861 return 0; 862 } 863 864 /** 865 * destroy_bud_list - destroy the list of buds to replay. 866 * @c: UBIFS file-system description object 867 */ 868 static void destroy_bud_list(struct ubifs_info *c) 869 { 870 struct bud_entry *b; 871 872 while (!list_empty(&c->replay_buds)) { 873 b = list_entry(c->replay_buds.next, struct bud_entry, list); 874 list_del(&b->list); 875 kfree(b); 876 } 877 } 878 879 /** 880 * add_replay_bud - add a bud to the list of buds to replay. 881 * @c: UBIFS file-system description object 882 * @lnum: bud logical eraseblock number to replay 883 * @offs: bud start offset 884 * @jhead: journal head to which this bud belongs 885 * @sqnum: reference node sequence number 886 * 887 * This function returns zero in case of success and a negative error code in 888 * case of failure. 889 */ 890 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 891 unsigned long long sqnum) 892 { 893 struct ubifs_bud *bud; 894 struct bud_entry *b; 895 int err; 896 897 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 898 899 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 900 if (!bud) 901 return -ENOMEM; 902 903 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 904 if (!b) { 905 err = -ENOMEM; 906 goto out; 907 } 908 909 bud->lnum = lnum; 910 bud->start = offs; 911 bud->jhead = jhead; 912 bud->log_hash = ubifs_hash_get_desc(c); 913 if (IS_ERR(bud->log_hash)) { 914 err = PTR_ERR(bud->log_hash); 915 goto out; 916 } 917 918 ubifs_shash_copy_state(c, c->log_hash, bud->log_hash); 919 920 ubifs_add_bud(c, bud); 921 922 b->bud = bud; 923 b->sqnum = sqnum; 924 list_add_tail(&b->list, &c->replay_buds); 925 926 return 0; 927 out: 928 kfree(bud); 929 kfree(b); 930 931 return err; 932 } 933 934 /** 935 * validate_ref - validate a reference node. 936 * @c: UBIFS file-system description object 937 * @ref: the reference node to validate 938 * 939 * This function returns %1 if a bud reference already exists for the LEB. %0 is 940 * returned if the reference node is new, otherwise %-EINVAL is returned if 941 * validation failed. 942 */ 943 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 944 { 945 struct ubifs_bud *bud; 946 int lnum = le32_to_cpu(ref->lnum); 947 unsigned int offs = le32_to_cpu(ref->offs); 948 unsigned int jhead = le32_to_cpu(ref->jhead); 949 950 /* 951 * ref->offs may point to the end of LEB when the journal head points 952 * to the end of LEB and we write reference node for it during commit. 953 * So this is why we require 'offs > c->leb_size'. 954 */ 955 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 956 lnum < c->main_first || offs > c->leb_size || 957 offs & (c->min_io_size - 1)) 958 return -EINVAL; 959 960 /* Make sure we have not already looked at this bud */ 961 bud = ubifs_search_bud(c, lnum); 962 if (bud) { 963 if (bud->jhead == jhead && bud->start <= offs) 964 return 1; 965 ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs); 966 return -EINVAL; 967 } 968 969 return 0; 970 } 971 972 /** 973 * replay_log_leb - replay a log logical eraseblock. 974 * @c: UBIFS file-system description object 975 * @lnum: log logical eraseblock to replay 976 * @offs: offset to start replaying from 977 * @sbuf: scan buffer 978 * 979 * This function replays a log LEB and returns zero in case of success, %1 if 980 * this is the last LEB in the log, and a negative error code in case of 981 * failure. 982 */ 983 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 984 { 985 int err; 986 struct ubifs_scan_leb *sleb; 987 struct ubifs_scan_node *snod; 988 const struct ubifs_cs_node *node; 989 990 dbg_mnt("replay log LEB %d:%d", lnum, offs); 991 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); 992 if (IS_ERR(sleb)) { 993 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 994 return PTR_ERR(sleb); 995 /* 996 * Note, the below function will recover this log LEB only if 997 * it is the last, because unclean reboots can possibly corrupt 998 * only the tail of the log. 999 */ 1000 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 1001 if (IS_ERR(sleb)) 1002 return PTR_ERR(sleb); 1003 } 1004 1005 if (sleb->nodes_cnt == 0) { 1006 err = 1; 1007 goto out; 1008 } 1009 1010 node = sleb->buf; 1011 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 1012 if (c->cs_sqnum == 0) { 1013 /* 1014 * This is the first log LEB we are looking at, make sure that 1015 * the first node is a commit start node. Also record its 1016 * sequence number so that UBIFS can determine where the log 1017 * ends, because all nodes which were have higher sequence 1018 * numbers. 1019 */ 1020 if (snod->type != UBIFS_CS_NODE) { 1021 ubifs_err(c, "first log node at LEB %d:%d is not CS node", 1022 lnum, offs); 1023 goto out_dump; 1024 } 1025 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 1026 ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu", 1027 lnum, offs, 1028 (unsigned long long)le64_to_cpu(node->cmt_no), 1029 c->cmt_no); 1030 goto out_dump; 1031 } 1032 1033 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 1034 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 1035 1036 err = ubifs_shash_init(c, c->log_hash); 1037 if (err) 1038 goto out; 1039 1040 err = ubifs_shash_update(c, c->log_hash, node, UBIFS_CS_NODE_SZ); 1041 if (err < 0) 1042 goto out; 1043 } 1044 1045 if (snod->sqnum < c->cs_sqnum) { 1046 /* 1047 * This means that we reached end of log and now 1048 * look to the older log data, which was already 1049 * committed but the eraseblock was not erased (UBIFS 1050 * only un-maps it). So this basically means we have to 1051 * exit with "end of log" code. 1052 */ 1053 err = 1; 1054 goto out; 1055 } 1056 1057 /* Make sure the first node sits at offset zero of the LEB */ 1058 if (snod->offs != 0) { 1059 ubifs_err(c, "first node is not at zero offset"); 1060 goto out_dump; 1061 } 1062 1063 list_for_each_entry(snod, &sleb->nodes, list) { 1064 cond_resched(); 1065 1066 if (snod->sqnum >= SQNUM_WATERMARK) { 1067 ubifs_err(c, "file system's life ended"); 1068 goto out_dump; 1069 } 1070 1071 if (snod->sqnum < c->cs_sqnum) { 1072 ubifs_err(c, "bad sqnum %llu, commit sqnum %llu", 1073 snod->sqnum, c->cs_sqnum); 1074 goto out_dump; 1075 } 1076 1077 if (snod->sqnum > c->max_sqnum) 1078 c->max_sqnum = snod->sqnum; 1079 1080 switch (snod->type) { 1081 case UBIFS_REF_NODE: { 1082 const struct ubifs_ref_node *ref = snod->node; 1083 1084 err = validate_ref(c, ref); 1085 if (err == 1) 1086 break; /* Already have this bud */ 1087 if (err) 1088 goto out_dump; 1089 1090 err = ubifs_shash_update(c, c->log_hash, ref, 1091 UBIFS_REF_NODE_SZ); 1092 if (err) 1093 goto out; 1094 1095 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 1096 le32_to_cpu(ref->offs), 1097 le32_to_cpu(ref->jhead), 1098 snod->sqnum); 1099 if (err) 1100 goto out; 1101 1102 break; 1103 } 1104 case UBIFS_CS_NODE: 1105 /* Make sure it sits at the beginning of LEB */ 1106 if (snod->offs != 0) { 1107 ubifs_err(c, "unexpected node in log"); 1108 goto out_dump; 1109 } 1110 break; 1111 default: 1112 ubifs_err(c, "unexpected node in log"); 1113 goto out_dump; 1114 } 1115 } 1116 1117 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 1118 c->lhead_lnum = lnum; 1119 c->lhead_offs = sleb->endpt; 1120 } 1121 1122 err = !sleb->endpt; 1123 out: 1124 ubifs_scan_destroy(sleb); 1125 return err; 1126 1127 out_dump: 1128 ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", 1129 lnum, offs + snod->offs); 1130 ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); 1131 ubifs_scan_destroy(sleb); 1132 return -EINVAL; 1133 } 1134 1135 /** 1136 * take_ihead - update the status of the index head in lprops to 'taken'. 1137 * @c: UBIFS file-system description object 1138 * 1139 * This function returns the amount of free space in the index head LEB or a 1140 * negative error code. 1141 */ 1142 static int take_ihead(struct ubifs_info *c) 1143 { 1144 const struct ubifs_lprops *lp; 1145 int err, free; 1146 1147 ubifs_get_lprops(c); 1148 1149 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 1150 if (IS_ERR(lp)) { 1151 err = PTR_ERR(lp); 1152 goto out; 1153 } 1154 1155 free = lp->free; 1156 1157 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 1158 lp->flags | LPROPS_TAKEN, 0); 1159 if (IS_ERR(lp)) { 1160 err = PTR_ERR(lp); 1161 goto out; 1162 } 1163 1164 err = free; 1165 out: 1166 ubifs_release_lprops(c); 1167 return err; 1168 } 1169 1170 /** 1171 * ubifs_replay_journal - replay journal. 1172 * @c: UBIFS file-system description object 1173 * 1174 * This function scans the journal, replays and cleans it up. It makes sure all 1175 * memory data structures related to uncommitted journal are built (dirty TNC 1176 * tree, tree of buds, modified lprops, etc). 1177 */ 1178 int ubifs_replay_journal(struct ubifs_info *c) 1179 { 1180 int err, lnum, free; 1181 1182 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1183 1184 /* Update the status of the index head in lprops to 'taken' */ 1185 free = take_ihead(c); 1186 if (free < 0) 1187 return free; /* Error code */ 1188 1189 if (c->ihead_offs != c->leb_size - free) { 1190 ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum, 1191 c->ihead_offs); 1192 return -EINVAL; 1193 } 1194 1195 dbg_mnt("start replaying the journal"); 1196 c->replaying = 1; 1197 lnum = c->ltail_lnum = c->lhead_lnum; 1198 1199 do { 1200 err = replay_log_leb(c, lnum, 0, c->sbuf); 1201 if (err == 1) { 1202 if (lnum != c->lhead_lnum) 1203 /* We hit the end of the log */ 1204 break; 1205 1206 /* 1207 * The head of the log must always start with the 1208 * "commit start" node on a properly formatted UBIFS. 1209 * But we found no nodes at all, which means that 1210 * something went wrong and we cannot proceed mounting 1211 * the file-system. 1212 */ 1213 ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", 1214 lnum, 0); 1215 err = -EINVAL; 1216 } 1217 if (err) 1218 goto out; 1219 lnum = ubifs_next_log_lnum(c, lnum); 1220 } while (lnum != c->ltail_lnum); 1221 1222 err = replay_buds(c); 1223 if (err) 1224 goto out; 1225 1226 err = apply_replay_list(c); 1227 if (err) 1228 goto out; 1229 1230 err = set_buds_lprops(c); 1231 if (err) 1232 goto out; 1233 1234 /* 1235 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable 1236 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs 1237 * depend on it. This means we have to initialize it to make sure 1238 * budgeting works properly. 1239 */ 1240 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1241 c->bi.uncommitted_idx *= c->max_idx_node_sz; 1242 1243 ubifs_assert(c, c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1244 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", 1245 c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1246 (unsigned long)c->highest_inum); 1247 out: 1248 destroy_replay_list(c); 1249 destroy_bud_list(c); 1250 c->replaying = 0; 1251 return err; 1252 } 1253