1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * This file is part of UBIFS. 4 * 5 * Copyright (C) 2006-2008 Nokia Corporation. 6 * 7 * Authors: Adrian Hunter 8 * Artem Bityutskiy (Битюцкий Артём) 9 */ 10 11 /* 12 * This file contains journal replay code. It runs when the file-system is being 13 * mounted and requires no locking. 14 * 15 * The larger is the journal, the longer it takes to scan it, so the longer it 16 * takes to mount UBIFS. This is why the journal has limited size which may be 17 * changed depending on the system requirements. But a larger journal gives 18 * faster I/O speed because it writes the index less frequently. So this is a 19 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 20 * larger is the journal, the more memory its index may consume. 21 */ 22 23 #include "ubifs.h" 24 #include <linux/list_sort.h> 25 #include <crypto/hash.h> 26 27 /** 28 * struct replay_entry - replay list entry. 29 * @lnum: logical eraseblock number of the node 30 * @offs: node offset 31 * @len: node length 32 * @deletion: non-zero if this entry corresponds to a node deletion 33 * @sqnum: node sequence number 34 * @list: links the replay list 35 * @key: node key 36 * @nm: directory entry name 37 * @old_size: truncation old size 38 * @new_size: truncation new size 39 * 40 * The replay process first scans all buds and builds the replay list, then 41 * sorts the replay list in nodes sequence number order, and then inserts all 42 * the replay entries to the TNC. 43 */ 44 struct replay_entry { 45 int lnum; 46 int offs; 47 int len; 48 u8 hash[UBIFS_HASH_ARR_SZ]; 49 unsigned int deletion:1; 50 unsigned long long sqnum; 51 struct list_head list; 52 union ubifs_key key; 53 union { 54 struct fscrypt_name nm; 55 struct { 56 loff_t old_size; 57 loff_t new_size; 58 }; 59 }; 60 }; 61 62 /** 63 * struct bud_entry - entry in the list of buds to replay. 64 * @list: next bud in the list 65 * @bud: bud description object 66 * @sqnum: reference node sequence number 67 * @free: free bytes in the bud 68 * @dirty: dirty bytes in the bud 69 */ 70 struct bud_entry { 71 struct list_head list; 72 struct ubifs_bud *bud; 73 unsigned long long sqnum; 74 int free; 75 int dirty; 76 }; 77 78 /** 79 * set_bud_lprops - set free and dirty space used by a bud. 80 * @c: UBIFS file-system description object 81 * @b: bud entry which describes the bud 82 * 83 * This function makes sure the LEB properties of bud @b are set correctly 84 * after the replay. Returns zero in case of success and a negative error code 85 * in case of failure. 86 */ 87 static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) 88 { 89 const struct ubifs_lprops *lp; 90 int err = 0, dirty; 91 92 ubifs_get_lprops(c); 93 94 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); 95 if (IS_ERR(lp)) { 96 err = PTR_ERR(lp); 97 goto out; 98 } 99 100 dirty = lp->dirty; 101 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 102 /* 103 * The LEB was added to the journal with a starting offset of 104 * zero which means the LEB must have been empty. The LEB 105 * property values should be @lp->free == @c->leb_size and 106 * @lp->dirty == 0, but that is not the case. The reason is that 107 * the LEB had been garbage collected before it became the bud, 108 * and there was no commit in between. The garbage collector 109 * resets the free and dirty space without recording it 110 * anywhere except lprops, so if there was no commit then 111 * lprops does not have that information. 112 * 113 * We do not need to adjust free space because the scan has told 114 * us the exact value which is recorded in the replay entry as 115 * @b->free. 116 * 117 * However we do need to subtract from the dirty space the 118 * amount of space that the garbage collector reclaimed, which 119 * is the whole LEB minus the amount of space that was free. 120 */ 121 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 122 lp->free, lp->dirty); 123 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 124 lp->free, lp->dirty); 125 dirty -= c->leb_size - lp->free; 126 /* 127 * If the replay order was perfect the dirty space would now be 128 * zero. The order is not perfect because the journal heads 129 * race with each other. This is not a problem but is does mean 130 * that the dirty space may temporarily exceed c->leb_size 131 * during the replay. 132 */ 133 if (dirty != 0) 134 dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", 135 b->bud->lnum, lp->free, lp->dirty, b->free, 136 b->dirty); 137 } 138 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, 139 lp->flags | LPROPS_TAKEN, 0); 140 if (IS_ERR(lp)) { 141 err = PTR_ERR(lp); 142 goto out; 143 } 144 145 /* Make sure the journal head points to the latest bud */ 146 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, 147 b->bud->lnum, c->leb_size - b->free); 148 149 out: 150 ubifs_release_lprops(c); 151 return err; 152 } 153 154 /** 155 * set_buds_lprops - set free and dirty space for all replayed buds. 156 * @c: UBIFS file-system description object 157 * 158 * This function sets LEB properties for all replayed buds. Returns zero in 159 * case of success and a negative error code in case of failure. 160 */ 161 static int set_buds_lprops(struct ubifs_info *c) 162 { 163 struct bud_entry *b; 164 int err; 165 166 list_for_each_entry(b, &c->replay_buds, list) { 167 err = set_bud_lprops(c, b); 168 if (err) 169 return err; 170 } 171 172 return 0; 173 } 174 175 /** 176 * trun_remove_range - apply a replay entry for a truncation to the TNC. 177 * @c: UBIFS file-system description object 178 * @r: replay entry of truncation 179 */ 180 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 181 { 182 unsigned min_blk, max_blk; 183 union ubifs_key min_key, max_key; 184 ino_t ino; 185 186 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 187 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 188 min_blk += 1; 189 190 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 191 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 192 max_blk -= 1; 193 194 ino = key_inum(c, &r->key); 195 196 data_key_init(c, &min_key, ino, min_blk); 197 data_key_init(c, &max_key, ino, max_blk); 198 199 return ubifs_tnc_remove_range(c, &min_key, &max_key); 200 } 201 202 /** 203 * inode_still_linked - check whether inode in question will be re-linked. 204 * @c: UBIFS file-system description object 205 * @rino: replay entry to test 206 * 207 * O_TMPFILE files can be re-linked, this means link count goes from 0 to 1. 208 * This case needs special care, otherwise all references to the inode will 209 * be removed upon the first replay entry of an inode with link count 0 210 * is found. 211 */ 212 static bool inode_still_linked(struct ubifs_info *c, struct replay_entry *rino) 213 { 214 struct replay_entry *r; 215 216 ubifs_assert(c, rino->deletion); 217 ubifs_assert(c, key_type(c, &rino->key) == UBIFS_INO_KEY); 218 219 /* 220 * Find the most recent entry for the inode behind @rino and check 221 * whether it is a deletion. 222 */ 223 list_for_each_entry_reverse(r, &c->replay_list, list) { 224 ubifs_assert(c, r->sqnum >= rino->sqnum); 225 if (key_inum(c, &r->key) == key_inum(c, &rino->key) && 226 key_type(c, &r->key) == UBIFS_INO_KEY) 227 return r->deletion == 0; 228 229 } 230 231 ubifs_assert(c, 0); 232 return false; 233 } 234 235 /** 236 * apply_replay_entry - apply a replay entry to the TNC. 237 * @c: UBIFS file-system description object 238 * @r: replay entry to apply 239 * 240 * Apply a replay entry to the TNC. 241 */ 242 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 243 { 244 int err; 245 246 dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", 247 r->lnum, r->offs, r->len, r->deletion, r->sqnum); 248 249 if (is_hash_key(c, &r->key)) { 250 if (r->deletion) 251 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 252 else 253 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 254 r->len, r->hash, &r->nm); 255 } else { 256 if (r->deletion) 257 switch (key_type(c, &r->key)) { 258 case UBIFS_INO_KEY: 259 { 260 ino_t inum = key_inum(c, &r->key); 261 262 if (inode_still_linked(c, r)) { 263 err = 0; 264 break; 265 } 266 267 err = ubifs_tnc_remove_ino(c, inum); 268 break; 269 } 270 case UBIFS_TRUN_KEY: 271 err = trun_remove_range(c, r); 272 break; 273 default: 274 err = ubifs_tnc_remove(c, &r->key); 275 break; 276 } 277 else 278 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 279 r->len, r->hash); 280 if (err) 281 return err; 282 283 if (c->need_recovery) 284 err = ubifs_recover_size_accum(c, &r->key, r->deletion, 285 r->new_size); 286 } 287 288 return err; 289 } 290 291 /** 292 * replay_entries_cmp - compare 2 replay entries. 293 * @priv: UBIFS file-system description object 294 * @a: first replay entry 295 * @b: second replay entry 296 * 297 * This is a comparios function for 'list_sort()' which compares 2 replay 298 * entries @a and @b by comparing their sequence number. Returns %1 if @a has 299 * greater sequence number and %-1 otherwise. 300 */ 301 static int replay_entries_cmp(void *priv, const struct list_head *a, 302 const struct list_head *b) 303 { 304 struct ubifs_info *c = priv; 305 struct replay_entry *ra, *rb; 306 307 cond_resched(); 308 if (a == b) 309 return 0; 310 311 ra = list_entry(a, struct replay_entry, list); 312 rb = list_entry(b, struct replay_entry, list); 313 ubifs_assert(c, ra->sqnum != rb->sqnum); 314 if (ra->sqnum > rb->sqnum) 315 return 1; 316 return -1; 317 } 318 319 /** 320 * apply_replay_list - apply the replay list to the TNC. 321 * @c: UBIFS file-system description object 322 * 323 * Apply all entries in the replay list to the TNC. Returns zero in case of 324 * success and a negative error code in case of failure. 325 */ 326 static int apply_replay_list(struct ubifs_info *c) 327 { 328 struct replay_entry *r; 329 int err; 330 331 list_sort(c, &c->replay_list, &replay_entries_cmp); 332 333 list_for_each_entry(r, &c->replay_list, list) { 334 cond_resched(); 335 336 err = apply_replay_entry(c, r); 337 if (err) 338 return err; 339 } 340 341 return 0; 342 } 343 344 /** 345 * destroy_replay_list - destroy the replay. 346 * @c: UBIFS file-system description object 347 * 348 * Destroy the replay list. 349 */ 350 static void destroy_replay_list(struct ubifs_info *c) 351 { 352 struct replay_entry *r, *tmp; 353 354 list_for_each_entry_safe(r, tmp, &c->replay_list, list) { 355 if (is_hash_key(c, &r->key)) 356 kfree(fname_name(&r->nm)); 357 list_del(&r->list); 358 kfree(r); 359 } 360 } 361 362 /** 363 * insert_node - insert a node to the replay list 364 * @c: UBIFS file-system description object 365 * @lnum: node logical eraseblock number 366 * @offs: node offset 367 * @len: node length 368 * @key: node key 369 * @sqnum: sequence number 370 * @deletion: non-zero if this is a deletion 371 * @used: number of bytes in use in a LEB 372 * @old_size: truncation old size 373 * @new_size: truncation new size 374 * 375 * This function inserts a scanned non-direntry node to the replay list. The 376 * replay list contains @struct replay_entry elements, and we sort this list in 377 * sequence number order before applying it. The replay list is applied at the 378 * very end of the replay process. Since the list is sorted in sequence number 379 * order, the older modifications are applied first. This function returns zero 380 * in case of success and a negative error code in case of failure. 381 */ 382 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 383 const u8 *hash, union ubifs_key *key, 384 unsigned long long sqnum, int deletion, int *used, 385 loff_t old_size, loff_t new_size) 386 { 387 struct replay_entry *r; 388 389 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 390 391 if (key_inum(c, key) >= c->highest_inum) 392 c->highest_inum = key_inum(c, key); 393 394 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 395 if (!r) 396 return -ENOMEM; 397 398 if (!deletion) 399 *used += ALIGN(len, 8); 400 r->lnum = lnum; 401 r->offs = offs; 402 r->len = len; 403 ubifs_copy_hash(c, hash, r->hash); 404 r->deletion = !!deletion; 405 r->sqnum = sqnum; 406 key_copy(c, key, &r->key); 407 r->old_size = old_size; 408 r->new_size = new_size; 409 410 list_add_tail(&r->list, &c->replay_list); 411 return 0; 412 } 413 414 /** 415 * insert_dent - insert a directory entry node into the replay list. 416 * @c: UBIFS file-system description object 417 * @lnum: node logical eraseblock number 418 * @offs: node offset 419 * @len: node length 420 * @key: node key 421 * @name: directory entry name 422 * @nlen: directory entry name length 423 * @sqnum: sequence number 424 * @deletion: non-zero if this is a deletion 425 * @used: number of bytes in use in a LEB 426 * 427 * This function inserts a scanned directory entry node or an extended 428 * attribute entry to the replay list. Returns zero in case of success and a 429 * negative error code in case of failure. 430 */ 431 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 432 const u8 *hash, union ubifs_key *key, 433 const char *name, int nlen, unsigned long long sqnum, 434 int deletion, int *used) 435 { 436 struct replay_entry *r; 437 char *nbuf; 438 439 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 440 if (key_inum(c, key) >= c->highest_inum) 441 c->highest_inum = key_inum(c, key); 442 443 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 444 if (!r) 445 return -ENOMEM; 446 447 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 448 if (!nbuf) { 449 kfree(r); 450 return -ENOMEM; 451 } 452 453 if (!deletion) 454 *used += ALIGN(len, 8); 455 r->lnum = lnum; 456 r->offs = offs; 457 r->len = len; 458 ubifs_copy_hash(c, hash, r->hash); 459 r->deletion = !!deletion; 460 r->sqnum = sqnum; 461 key_copy(c, key, &r->key); 462 fname_len(&r->nm) = nlen; 463 memcpy(nbuf, name, nlen); 464 nbuf[nlen] = '\0'; 465 fname_name(&r->nm) = nbuf; 466 467 list_add_tail(&r->list, &c->replay_list); 468 return 0; 469 } 470 471 /** 472 * ubifs_validate_entry - validate directory or extended attribute entry node. 473 * @c: UBIFS file-system description object 474 * @dent: the node to validate 475 * 476 * This function validates directory or extended attribute entry node @dent. 477 * Returns zero if the node is all right and a %-EINVAL if not. 478 */ 479 int ubifs_validate_entry(struct ubifs_info *c, 480 const struct ubifs_dent_node *dent) 481 { 482 int key_type = key_type_flash(c, dent->key); 483 int nlen = le16_to_cpu(dent->nlen); 484 485 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 486 dent->type >= UBIFS_ITYPES_CNT || 487 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 488 (key_type == UBIFS_XENT_KEY && strnlen(dent->name, nlen) != nlen) || 489 le64_to_cpu(dent->inum) > MAX_INUM) { 490 ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ? 491 "directory entry" : "extended attribute entry"); 492 return -EINVAL; 493 } 494 495 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 496 ubifs_err(c, "bad key type %d", key_type); 497 return -EINVAL; 498 } 499 500 return 0; 501 } 502 503 /** 504 * is_last_bud - check if the bud is the last in the journal head. 505 * @c: UBIFS file-system description object 506 * @bud: bud description object 507 * 508 * This function checks if bud @bud is the last bud in its journal head. This 509 * information is then used by 'replay_bud()' to decide whether the bud can 510 * have corruptions or not. Indeed, only last buds can be corrupted by power 511 * cuts. Returns %1 if this is the last bud, and %0 if not. 512 */ 513 static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) 514 { 515 struct ubifs_jhead *jh = &c->jheads[bud->jhead]; 516 struct ubifs_bud *next; 517 uint32_t data; 518 int err; 519 520 if (list_is_last(&bud->list, &jh->buds_list)) 521 return 1; 522 523 /* 524 * The following is a quirk to make sure we work correctly with UBIFS 525 * images used with older UBIFS. 526 * 527 * Normally, the last bud will be the last in the journal head's list 528 * of bud. However, there is one exception if the UBIFS image belongs 529 * to older UBIFS. This is fairly unlikely: one would need to use old 530 * UBIFS, then have a power cut exactly at the right point, and then 531 * try to mount this image with new UBIFS. 532 * 533 * The exception is: it is possible to have 2 buds A and B, A goes 534 * before B, and B is the last, bud B is contains no data, and bud A is 535 * corrupted at the end. The reason is that in older versions when the 536 * journal code switched the next bud (from A to B), it first added a 537 * log reference node for the new bud (B), and only after this it 538 * synchronized the write-buffer of current bud (A). But later this was 539 * changed and UBIFS started to always synchronize the write-buffer of 540 * the bud (A) before writing the log reference for the new bud (B). 541 * 542 * But because older UBIFS always synchronized A's write-buffer before 543 * writing to B, we can recognize this exceptional situation but 544 * checking the contents of bud B - if it is empty, then A can be 545 * treated as the last and we can recover it. 546 * 547 * TODO: remove this piece of code in a couple of years (today it is 548 * 16.05.2011). 549 */ 550 next = list_entry(bud->list.next, struct ubifs_bud, list); 551 if (!list_is_last(&next->list, &jh->buds_list)) 552 return 0; 553 554 err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); 555 if (err) 556 return 0; 557 558 return data == 0xFFFFFFFF; 559 } 560 561 /* authenticate_sleb_hash is split out for stack usage */ 562 static int noinline_for_stack 563 authenticate_sleb_hash(struct ubifs_info *c, 564 struct shash_desc *log_hash, u8 *hash) 565 { 566 SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm); 567 568 hash_desc->tfm = c->hash_tfm; 569 570 ubifs_shash_copy_state(c, log_hash, hash_desc); 571 return crypto_shash_final(hash_desc, hash); 572 } 573 574 /** 575 * authenticate_sleb - authenticate one scan LEB 576 * @c: UBIFS file-system description object 577 * @sleb: the scan LEB to authenticate 578 * @log_hash: 579 * @is_last: if true, this is the last LEB 580 * 581 * This function iterates over the buds of a single LEB authenticating all buds 582 * with the authentication nodes on this LEB. Authentication nodes are written 583 * after some buds and contain a HMAC covering the authentication node itself 584 * and the buds between the last authentication node and the current 585 * authentication node. It can happen that the last buds cannot be authenticated 586 * because a powercut happened when some nodes were written but not the 587 * corresponding authentication node. This function returns the number of nodes 588 * that could be authenticated or a negative error code. 589 */ 590 static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, 591 struct shash_desc *log_hash, int is_last) 592 { 593 int n_not_auth = 0; 594 struct ubifs_scan_node *snod; 595 int n_nodes = 0; 596 int err; 597 u8 hash[UBIFS_HASH_ARR_SZ]; 598 u8 hmac[UBIFS_HMAC_ARR_SZ]; 599 600 if (!ubifs_authenticated(c)) 601 return sleb->nodes_cnt; 602 603 list_for_each_entry(snod, &sleb->nodes, list) { 604 605 n_nodes++; 606 607 if (snod->type == UBIFS_AUTH_NODE) { 608 struct ubifs_auth_node *auth = snod->node; 609 610 err = authenticate_sleb_hash(c, log_hash, hash); 611 if (err) 612 goto out; 613 614 err = crypto_shash_tfm_digest(c->hmac_tfm, hash, 615 c->hash_len, hmac); 616 if (err) 617 goto out; 618 619 err = ubifs_check_hmac(c, auth->hmac, hmac); 620 if (err) { 621 err = -EPERM; 622 goto out; 623 } 624 n_not_auth = 0; 625 } else { 626 err = crypto_shash_update(log_hash, snod->node, 627 snod->len); 628 if (err) 629 goto out; 630 n_not_auth++; 631 } 632 } 633 634 /* 635 * A powercut can happen when some nodes were written, but not yet 636 * the corresponding authentication node. This may only happen on 637 * the last bud though. 638 */ 639 if (n_not_auth) { 640 if (is_last) { 641 dbg_mnt("%d unauthenticated nodes found on LEB %d, Ignoring them", 642 n_not_auth, sleb->lnum); 643 err = 0; 644 } else { 645 dbg_mnt("%d unauthenticated nodes found on non-last LEB %d", 646 n_not_auth, sleb->lnum); 647 err = -EPERM; 648 } 649 } else { 650 err = 0; 651 } 652 out: 653 return err ? err : n_nodes - n_not_auth; 654 } 655 656 /** 657 * replay_bud - replay a bud logical eraseblock. 658 * @c: UBIFS file-system description object 659 * @b: bud entry which describes the bud 660 * 661 * This function replays bud @bud, recovers it if needed, and adds all nodes 662 * from this bud to the replay list. Returns zero in case of success and a 663 * negative error code in case of failure. 664 */ 665 static int replay_bud(struct ubifs_info *c, struct bud_entry *b) 666 { 667 int is_last = is_last_bud(c, b->bud); 668 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; 669 int n_nodes, n = 0; 670 struct ubifs_scan_leb *sleb; 671 struct ubifs_scan_node *snod; 672 673 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", 674 lnum, b->bud->jhead, offs, is_last); 675 676 if (c->need_recovery && is_last) 677 /* 678 * Recover only last LEBs in the journal heads, because power 679 * cuts may cause corruptions only in these LEBs, because only 680 * these LEBs could possibly be written to at the power cut 681 * time. 682 */ 683 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); 684 else 685 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 686 if (IS_ERR(sleb)) 687 return PTR_ERR(sleb); 688 689 n_nodes = authenticate_sleb(c, sleb, b->bud->log_hash, is_last); 690 if (n_nodes < 0) { 691 err = n_nodes; 692 goto out; 693 } 694 695 ubifs_shash_copy_state(c, b->bud->log_hash, 696 c->jheads[b->bud->jhead].log_hash); 697 698 /* 699 * The bud does not have to start from offset zero - the beginning of 700 * the 'lnum' LEB may contain previously committed data. One of the 701 * things we have to do in replay is to correctly update lprops with 702 * newer information about this LEB. 703 * 704 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 705 * bytes of free space because it only contain information about 706 * committed data. 707 * 708 * But we know that real amount of free space is 'c->leb_size - 709 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 710 * 'sleb->endpt' is used by bud data. We have to correctly calculate 711 * how much of these data are dirty and update lprops with this 712 * information. 713 * 714 * The dirt in that LEB region is comprised of padding nodes, deletion 715 * nodes, truncation nodes and nodes which are obsoleted by subsequent 716 * nodes in this LEB. So instead of calculating clean space, we 717 * calculate used space ('used' variable). 718 */ 719 720 list_for_each_entry(snod, &sleb->nodes, list) { 721 u8 hash[UBIFS_HASH_ARR_SZ]; 722 int deletion = 0; 723 724 cond_resched(); 725 726 if (snod->sqnum >= SQNUM_WATERMARK) { 727 ubifs_err(c, "file system's life ended"); 728 goto out_dump; 729 } 730 731 ubifs_node_calc_hash(c, snod->node, hash); 732 733 if (snod->sqnum > c->max_sqnum) 734 c->max_sqnum = snod->sqnum; 735 736 switch (snod->type) { 737 case UBIFS_INO_NODE: 738 { 739 struct ubifs_ino_node *ino = snod->node; 740 loff_t new_size = le64_to_cpu(ino->size); 741 742 if (le32_to_cpu(ino->nlink) == 0) 743 deletion = 1; 744 err = insert_node(c, lnum, snod->offs, snod->len, hash, 745 &snod->key, snod->sqnum, deletion, 746 &used, 0, new_size); 747 break; 748 } 749 case UBIFS_DATA_NODE: 750 { 751 struct ubifs_data_node *dn = snod->node; 752 loff_t new_size = le32_to_cpu(dn->size) + 753 key_block(c, &snod->key) * 754 UBIFS_BLOCK_SIZE; 755 756 err = insert_node(c, lnum, snod->offs, snod->len, hash, 757 &snod->key, snod->sqnum, deletion, 758 &used, 0, new_size); 759 break; 760 } 761 case UBIFS_DENT_NODE: 762 case UBIFS_XENT_NODE: 763 { 764 struct ubifs_dent_node *dent = snod->node; 765 766 err = ubifs_validate_entry(c, dent); 767 if (err) 768 goto out_dump; 769 770 err = insert_dent(c, lnum, snod->offs, snod->len, hash, 771 &snod->key, dent->name, 772 le16_to_cpu(dent->nlen), snod->sqnum, 773 !le64_to_cpu(dent->inum), &used); 774 break; 775 } 776 case UBIFS_TRUN_NODE: 777 { 778 struct ubifs_trun_node *trun = snod->node; 779 loff_t old_size = le64_to_cpu(trun->old_size); 780 loff_t new_size = le64_to_cpu(trun->new_size); 781 union ubifs_key key; 782 783 /* Validate truncation node */ 784 if (old_size < 0 || old_size > c->max_inode_sz || 785 new_size < 0 || new_size > c->max_inode_sz || 786 old_size <= new_size) { 787 ubifs_err(c, "bad truncation node"); 788 goto out_dump; 789 } 790 791 /* 792 * Create a fake truncation key just to use the same 793 * functions which expect nodes to have keys. 794 */ 795 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 796 err = insert_node(c, lnum, snod->offs, snod->len, hash, 797 &key, snod->sqnum, 1, &used, 798 old_size, new_size); 799 break; 800 } 801 case UBIFS_AUTH_NODE: 802 break; 803 default: 804 ubifs_err(c, "unexpected node type %d in bud LEB %d:%d", 805 snod->type, lnum, snod->offs); 806 err = -EINVAL; 807 goto out_dump; 808 } 809 if (err) 810 goto out; 811 812 n++; 813 if (n == n_nodes) 814 break; 815 } 816 817 ubifs_assert(c, ubifs_search_bud(c, lnum)); 818 ubifs_assert(c, sleb->endpt - offs >= used); 819 ubifs_assert(c, sleb->endpt % c->min_io_size == 0); 820 821 b->dirty = sleb->endpt - offs - used; 822 b->free = c->leb_size - sleb->endpt; 823 dbg_mnt("bud LEB %d replied: dirty %d, free %d", 824 lnum, b->dirty, b->free); 825 826 out: 827 ubifs_scan_destroy(sleb); 828 return err; 829 830 out_dump: 831 ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); 832 ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); 833 ubifs_scan_destroy(sleb); 834 return -EINVAL; 835 } 836 837 /** 838 * replay_buds - replay all buds. 839 * @c: UBIFS file-system description object 840 * 841 * This function returns zero in case of success and a negative error code in 842 * case of failure. 843 */ 844 static int replay_buds(struct ubifs_info *c) 845 { 846 struct bud_entry *b; 847 int err; 848 unsigned long long prev_sqnum = 0; 849 850 list_for_each_entry(b, &c->replay_buds, list) { 851 err = replay_bud(c, b); 852 if (err) 853 return err; 854 855 ubifs_assert(c, b->sqnum > prev_sqnum); 856 prev_sqnum = b->sqnum; 857 } 858 859 return 0; 860 } 861 862 /** 863 * destroy_bud_list - destroy the list of buds to replay. 864 * @c: UBIFS file-system description object 865 */ 866 static void destroy_bud_list(struct ubifs_info *c) 867 { 868 struct bud_entry *b; 869 870 while (!list_empty(&c->replay_buds)) { 871 b = list_entry(c->replay_buds.next, struct bud_entry, list); 872 list_del(&b->list); 873 kfree(b); 874 } 875 } 876 877 /** 878 * add_replay_bud - add a bud to the list of buds to replay. 879 * @c: UBIFS file-system description object 880 * @lnum: bud logical eraseblock number to replay 881 * @offs: bud start offset 882 * @jhead: journal head to which this bud belongs 883 * @sqnum: reference node sequence number 884 * 885 * This function returns zero in case of success and a negative error code in 886 * case of failure. 887 */ 888 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 889 unsigned long long sqnum) 890 { 891 struct ubifs_bud *bud; 892 struct bud_entry *b; 893 int err; 894 895 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 896 897 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 898 if (!bud) 899 return -ENOMEM; 900 901 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 902 if (!b) { 903 err = -ENOMEM; 904 goto out; 905 } 906 907 bud->lnum = lnum; 908 bud->start = offs; 909 bud->jhead = jhead; 910 bud->log_hash = ubifs_hash_get_desc(c); 911 if (IS_ERR(bud->log_hash)) { 912 err = PTR_ERR(bud->log_hash); 913 goto out; 914 } 915 916 ubifs_shash_copy_state(c, c->log_hash, bud->log_hash); 917 918 ubifs_add_bud(c, bud); 919 920 b->bud = bud; 921 b->sqnum = sqnum; 922 list_add_tail(&b->list, &c->replay_buds); 923 924 return 0; 925 out: 926 kfree(bud); 927 kfree(b); 928 929 return err; 930 } 931 932 /** 933 * validate_ref - validate a reference node. 934 * @c: UBIFS file-system description object 935 * @ref: the reference node to validate 936 * 937 * This function returns %1 if a bud reference already exists for the LEB. %0 is 938 * returned if the reference node is new, otherwise %-EINVAL is returned if 939 * validation failed. 940 */ 941 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 942 { 943 struct ubifs_bud *bud; 944 int lnum = le32_to_cpu(ref->lnum); 945 unsigned int offs = le32_to_cpu(ref->offs); 946 unsigned int jhead = le32_to_cpu(ref->jhead); 947 948 /* 949 * ref->offs may point to the end of LEB when the journal head points 950 * to the end of LEB and we write reference node for it during commit. 951 * So this is why we require 'offs > c->leb_size'. 952 */ 953 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 954 lnum < c->main_first || offs > c->leb_size || 955 offs & (c->min_io_size - 1)) 956 return -EINVAL; 957 958 /* Make sure we have not already looked at this bud */ 959 bud = ubifs_search_bud(c, lnum); 960 if (bud) { 961 if (bud->jhead == jhead && bud->start <= offs) 962 return 1; 963 ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs); 964 return -EINVAL; 965 } 966 967 return 0; 968 } 969 970 /** 971 * replay_log_leb - replay a log logical eraseblock. 972 * @c: UBIFS file-system description object 973 * @lnum: log logical eraseblock to replay 974 * @offs: offset to start replaying from 975 * @sbuf: scan buffer 976 * 977 * This function replays a log LEB and returns zero in case of success, %1 if 978 * this is the last LEB in the log, and a negative error code in case of 979 * failure. 980 */ 981 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 982 { 983 int err; 984 struct ubifs_scan_leb *sleb; 985 struct ubifs_scan_node *snod; 986 const struct ubifs_cs_node *node; 987 988 dbg_mnt("replay log LEB %d:%d", lnum, offs); 989 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); 990 if (IS_ERR(sleb)) { 991 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 992 return PTR_ERR(sleb); 993 /* 994 * Note, the below function will recover this log LEB only if 995 * it is the last, because unclean reboots can possibly corrupt 996 * only the tail of the log. 997 */ 998 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 999 if (IS_ERR(sleb)) 1000 return PTR_ERR(sleb); 1001 } 1002 1003 if (sleb->nodes_cnt == 0) { 1004 err = 1; 1005 goto out; 1006 } 1007 1008 node = sleb->buf; 1009 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 1010 if (c->cs_sqnum == 0) { 1011 /* 1012 * This is the first log LEB we are looking at, make sure that 1013 * the first node is a commit start node. Also record its 1014 * sequence number so that UBIFS can determine where the log 1015 * ends, because all nodes which were have higher sequence 1016 * numbers. 1017 */ 1018 if (snod->type != UBIFS_CS_NODE) { 1019 ubifs_err(c, "first log node at LEB %d:%d is not CS node", 1020 lnum, offs); 1021 goto out_dump; 1022 } 1023 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 1024 ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu", 1025 lnum, offs, 1026 (unsigned long long)le64_to_cpu(node->cmt_no), 1027 c->cmt_no); 1028 goto out_dump; 1029 } 1030 1031 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 1032 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 1033 1034 err = ubifs_shash_init(c, c->log_hash); 1035 if (err) 1036 goto out; 1037 1038 err = ubifs_shash_update(c, c->log_hash, node, UBIFS_CS_NODE_SZ); 1039 if (err < 0) 1040 goto out; 1041 } 1042 1043 if (snod->sqnum < c->cs_sqnum) { 1044 /* 1045 * This means that we reached end of log and now 1046 * look to the older log data, which was already 1047 * committed but the eraseblock was not erased (UBIFS 1048 * only un-maps it). So this basically means we have to 1049 * exit with "end of log" code. 1050 */ 1051 err = 1; 1052 goto out; 1053 } 1054 1055 /* Make sure the first node sits at offset zero of the LEB */ 1056 if (snod->offs != 0) { 1057 ubifs_err(c, "first node is not at zero offset"); 1058 goto out_dump; 1059 } 1060 1061 list_for_each_entry(snod, &sleb->nodes, list) { 1062 cond_resched(); 1063 1064 if (snod->sqnum >= SQNUM_WATERMARK) { 1065 ubifs_err(c, "file system's life ended"); 1066 goto out_dump; 1067 } 1068 1069 if (snod->sqnum < c->cs_sqnum) { 1070 ubifs_err(c, "bad sqnum %llu, commit sqnum %llu", 1071 snod->sqnum, c->cs_sqnum); 1072 goto out_dump; 1073 } 1074 1075 if (snod->sqnum > c->max_sqnum) 1076 c->max_sqnum = snod->sqnum; 1077 1078 switch (snod->type) { 1079 case UBIFS_REF_NODE: { 1080 const struct ubifs_ref_node *ref = snod->node; 1081 1082 err = validate_ref(c, ref); 1083 if (err == 1) 1084 break; /* Already have this bud */ 1085 if (err) 1086 goto out_dump; 1087 1088 err = ubifs_shash_update(c, c->log_hash, ref, 1089 UBIFS_REF_NODE_SZ); 1090 if (err) 1091 goto out; 1092 1093 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 1094 le32_to_cpu(ref->offs), 1095 le32_to_cpu(ref->jhead), 1096 snod->sqnum); 1097 if (err) 1098 goto out; 1099 1100 break; 1101 } 1102 case UBIFS_CS_NODE: 1103 /* Make sure it sits at the beginning of LEB */ 1104 if (snod->offs != 0) { 1105 ubifs_err(c, "unexpected node in log"); 1106 goto out_dump; 1107 } 1108 break; 1109 default: 1110 ubifs_err(c, "unexpected node in log"); 1111 goto out_dump; 1112 } 1113 } 1114 1115 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 1116 c->lhead_lnum = lnum; 1117 c->lhead_offs = sleb->endpt; 1118 } 1119 1120 err = !sleb->endpt; 1121 out: 1122 ubifs_scan_destroy(sleb); 1123 return err; 1124 1125 out_dump: 1126 ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", 1127 lnum, offs + snod->offs); 1128 ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); 1129 ubifs_scan_destroy(sleb); 1130 return -EINVAL; 1131 } 1132 1133 /** 1134 * take_ihead - update the status of the index head in lprops to 'taken'. 1135 * @c: UBIFS file-system description object 1136 * 1137 * This function returns the amount of free space in the index head LEB or a 1138 * negative error code. 1139 */ 1140 static int take_ihead(struct ubifs_info *c) 1141 { 1142 const struct ubifs_lprops *lp; 1143 int err, free; 1144 1145 ubifs_get_lprops(c); 1146 1147 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 1148 if (IS_ERR(lp)) { 1149 err = PTR_ERR(lp); 1150 goto out; 1151 } 1152 1153 free = lp->free; 1154 1155 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 1156 lp->flags | LPROPS_TAKEN, 0); 1157 if (IS_ERR(lp)) { 1158 err = PTR_ERR(lp); 1159 goto out; 1160 } 1161 1162 err = free; 1163 out: 1164 ubifs_release_lprops(c); 1165 return err; 1166 } 1167 1168 /** 1169 * ubifs_replay_journal - replay journal. 1170 * @c: UBIFS file-system description object 1171 * 1172 * This function scans the journal, replays and cleans it up. It makes sure all 1173 * memory data structures related to uncommitted journal are built (dirty TNC 1174 * tree, tree of buds, modified lprops, etc). 1175 */ 1176 int ubifs_replay_journal(struct ubifs_info *c) 1177 { 1178 int err, lnum, free; 1179 1180 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1181 1182 /* Update the status of the index head in lprops to 'taken' */ 1183 free = take_ihead(c); 1184 if (free < 0) 1185 return free; /* Error code */ 1186 1187 if (c->ihead_offs != c->leb_size - free) { 1188 ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum, 1189 c->ihead_offs); 1190 return -EINVAL; 1191 } 1192 1193 dbg_mnt("start replaying the journal"); 1194 c->replaying = 1; 1195 lnum = c->ltail_lnum = c->lhead_lnum; 1196 1197 do { 1198 err = replay_log_leb(c, lnum, 0, c->sbuf); 1199 if (err == 1) { 1200 if (lnum != c->lhead_lnum) 1201 /* We hit the end of the log */ 1202 break; 1203 1204 /* 1205 * The head of the log must always start with the 1206 * "commit start" node on a properly formatted UBIFS. 1207 * But we found no nodes at all, which means that 1208 * something went wrong and we cannot proceed mounting 1209 * the file-system. 1210 */ 1211 ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", 1212 lnum, 0); 1213 err = -EINVAL; 1214 } 1215 if (err) 1216 goto out; 1217 lnum = ubifs_next_log_lnum(c, lnum); 1218 } while (lnum != c->ltail_lnum); 1219 1220 err = replay_buds(c); 1221 if (err) 1222 goto out; 1223 1224 err = apply_replay_list(c); 1225 if (err) 1226 goto out; 1227 1228 err = set_buds_lprops(c); 1229 if (err) 1230 goto out; 1231 1232 /* 1233 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable 1234 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs 1235 * depend on it. This means we have to initialize it to make sure 1236 * budgeting works properly. 1237 */ 1238 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1239 c->bi.uncommitted_idx *= c->max_idx_node_sz; 1240 1241 ubifs_assert(c, c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1242 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", 1243 c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1244 (unsigned long)c->highest_inum); 1245 out: 1246 destroy_replay_list(c); 1247 destroy_bud_list(c); 1248 c->replaying = 0; 1249 return err; 1250 } 1251