1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * This file is part of UBIFS. 4 * 5 * Copyright (C) 2006-2008 Nokia Corporation. 6 * Copyright (C) 2006, 2007 University of Szeged, Hungary 7 * 8 * Authors: Artem Bityutskiy (Битюцкий Артём) 9 * Adrian Hunter 10 * Zoltan Sogor 11 */ 12 13 /* 14 * This file implements UBIFS I/O subsystem which provides various I/O-related 15 * helper functions (reading/writing/checking/validating nodes) and implements 16 * write-buffering support. Write buffers help to save space which otherwise 17 * would have been wasted for padding to the nearest minimal I/O unit boundary. 18 * Instead, data first goes to the write-buffer and is flushed when the 19 * buffer is full or when it is not used for some time (by timer). This is 20 * similar to the mechanism is used by JFFS2. 21 * 22 * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum 23 * write size (@c->max_write_size). The latter is the maximum amount of bytes 24 * the underlying flash is able to program at a time, and writing in 25 * @c->max_write_size units should presumably be faster. Obviously, 26 * @c->min_io_size <= @c->max_write_size. Write-buffers are of 27 * @c->max_write_size bytes in size for maximum performance. However, when a 28 * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size 29 * boundary) which contains data is written, not the whole write-buffer, 30 * because this is more space-efficient. 31 * 32 * This optimization adds few complications to the code. Indeed, on the one 33 * hand, we want to write in optimal @c->max_write_size bytes chunks, which 34 * also means aligning writes at the @c->max_write_size bytes offsets. On the 35 * other hand, we do not want to waste space when synchronizing the write 36 * buffer, so during synchronization we writes in smaller chunks. And this makes 37 * the next write offset to be not aligned to @c->max_write_size bytes. So the 38 * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned 39 * to @c->max_write_size bytes again. We do this by temporarily shrinking 40 * write-buffer size (@wbuf->size). 41 * 42 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by 43 * mutexes defined inside these objects. Since sometimes upper-level code 44 * has to lock the write-buffer (e.g. journal space reservation code), many 45 * functions related to write-buffers have "nolock" suffix which means that the 46 * caller has to lock the write-buffer before calling this function. 47 * 48 * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not 49 * aligned, UBIFS starts the next node from the aligned address, and the padded 50 * bytes may contain any rubbish. In other words, UBIFS does not put padding 51 * bytes in those small gaps. Common headers of nodes store real node lengths, 52 * not aligned lengths. Indexing nodes also store real lengths in branches. 53 * 54 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it 55 * uses padding nodes or padding bytes, if the padding node does not fit. 56 * 57 * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when 58 * they are read from the flash media. 59 */ 60 61 #include <linux/crc32.h> 62 #include <linux/slab.h> 63 #include "ubifs.h" 64 65 /** 66 * ubifs_ro_mode - switch UBIFS to read read-only mode. 67 * @c: UBIFS file-system description object 68 * @err: error code which is the reason of switching to R/O mode 69 */ 70 void ubifs_ro_mode(struct ubifs_info *c, int err) 71 { 72 if (!c->ro_error) { 73 c->ro_error = 1; 74 c->no_chk_data_crc = 0; 75 c->vfs_sb->s_flags |= SB_RDONLY; 76 ubifs_warn(c, "switched to read-only mode, error %d", err); 77 dump_stack(); 78 } 79 } 80 81 /* 82 * Below are simple wrappers over UBI I/O functions which include some 83 * additional checks and UBIFS debugging stuff. See corresponding UBI function 84 * for more information. 85 */ 86 87 int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, 88 int len, int even_ebadmsg) 89 { 90 int err; 91 92 err = ubi_read(c->ubi, lnum, buf, offs, len); 93 /* 94 * In case of %-EBADMSG print the error message only if the 95 * @even_ebadmsg is true. 96 */ 97 if (err && (err != -EBADMSG || even_ebadmsg)) { 98 ubifs_err(c, "reading %d bytes from LEB %d:%d failed, error %d", 99 len, lnum, offs, err); 100 dump_stack(); 101 } 102 return err; 103 } 104 105 int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, 106 int len) 107 { 108 int err; 109 110 ubifs_assert(c, !c->ro_media && !c->ro_mount); 111 if (c->ro_error) 112 return -EROFS; 113 if (!dbg_is_tst_rcvry(c)) 114 err = ubi_leb_write(c->ubi, lnum, buf, offs, len); 115 else 116 err = dbg_leb_write(c, lnum, buf, offs, len); 117 if (err) { 118 ubifs_err(c, "writing %d bytes to LEB %d:%d failed, error %d", 119 len, lnum, offs, err); 120 ubifs_ro_mode(c, err); 121 dump_stack(); 122 } 123 return err; 124 } 125 126 int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len) 127 { 128 int err; 129 130 ubifs_assert(c, !c->ro_media && !c->ro_mount); 131 if (c->ro_error) 132 return -EROFS; 133 if (!dbg_is_tst_rcvry(c)) 134 err = ubi_leb_change(c->ubi, lnum, buf, len); 135 else 136 err = dbg_leb_change(c, lnum, buf, len); 137 if (err) { 138 ubifs_err(c, "changing %d bytes in LEB %d failed, error %d", 139 len, lnum, err); 140 ubifs_ro_mode(c, err); 141 dump_stack(); 142 } 143 return err; 144 } 145 146 int ubifs_leb_unmap(struct ubifs_info *c, int lnum) 147 { 148 int err; 149 150 ubifs_assert(c, !c->ro_media && !c->ro_mount); 151 if (c->ro_error) 152 return -EROFS; 153 if (!dbg_is_tst_rcvry(c)) 154 err = ubi_leb_unmap(c->ubi, lnum); 155 else 156 err = dbg_leb_unmap(c, lnum); 157 if (err) { 158 ubifs_err(c, "unmap LEB %d failed, error %d", lnum, err); 159 ubifs_ro_mode(c, err); 160 dump_stack(); 161 } 162 return err; 163 } 164 165 int ubifs_leb_map(struct ubifs_info *c, int lnum) 166 { 167 int err; 168 169 ubifs_assert(c, !c->ro_media && !c->ro_mount); 170 if (c->ro_error) 171 return -EROFS; 172 if (!dbg_is_tst_rcvry(c)) 173 err = ubi_leb_map(c->ubi, lnum); 174 else 175 err = dbg_leb_map(c, lnum); 176 if (err) { 177 ubifs_err(c, "mapping LEB %d failed, error %d", lnum, err); 178 ubifs_ro_mode(c, err); 179 dump_stack(); 180 } 181 return err; 182 } 183 184 int ubifs_is_mapped(const struct ubifs_info *c, int lnum) 185 { 186 int err; 187 188 err = ubi_is_mapped(c->ubi, lnum); 189 if (err < 0) { 190 ubifs_err(c, "ubi_is_mapped failed for LEB %d, error %d", 191 lnum, err); 192 dump_stack(); 193 } 194 return err; 195 } 196 197 static void record_magic_error(struct ubifs_stats_info *stats) 198 { 199 if (stats) 200 stats->magic_errors++; 201 } 202 203 static void record_node_error(struct ubifs_stats_info *stats) 204 { 205 if (stats) 206 stats->node_errors++; 207 } 208 209 static void record_crc_error(struct ubifs_stats_info *stats) 210 { 211 if (stats) 212 stats->crc_errors++; 213 } 214 215 /** 216 * ubifs_check_node - check node. 217 * @c: UBIFS file-system description object 218 * @buf: node to check 219 * @len: node length 220 * @lnum: logical eraseblock number 221 * @offs: offset within the logical eraseblock 222 * @quiet: print no messages 223 * @must_chk_crc: indicates whether to always check the CRC 224 * 225 * This function checks node magic number and CRC checksum. This function also 226 * validates node length to prevent UBIFS from becoming crazy when an attacker 227 * feeds it a file-system image with incorrect nodes. For example, too large 228 * node length in the common header could cause UBIFS to read memory outside of 229 * allocated buffer when checking the CRC checksum. 230 * 231 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is 232 * true, which is controlled by corresponding UBIFS mount option. However, if 233 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is 234 * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are 235 * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC 236 * is checked. This is because during mounting or re-mounting from R/O mode to 237 * R/W mode we may read journal nodes (when replying the journal or doing the 238 * recovery) and the journal nodes may potentially be corrupted, so checking is 239 * required. 240 * 241 * This function returns zero in case of success and %-EUCLEAN in case of bad 242 * CRC or magic. 243 */ 244 int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len, 245 int lnum, int offs, int quiet, int must_chk_crc) 246 { 247 int err = -EINVAL, type, node_len; 248 uint32_t crc, node_crc, magic; 249 const struct ubifs_ch *ch = buf; 250 251 ubifs_assert(c, lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 252 ubifs_assert(c, !(offs & 7) && offs < c->leb_size); 253 254 magic = le32_to_cpu(ch->magic); 255 if (magic != UBIFS_NODE_MAGIC) { 256 if (!quiet) 257 ubifs_err(c, "bad magic %#08x, expected %#08x", 258 magic, UBIFS_NODE_MAGIC); 259 record_magic_error(c->stats); 260 err = -EUCLEAN; 261 goto out; 262 } 263 264 type = ch->node_type; 265 if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { 266 if (!quiet) 267 ubifs_err(c, "bad node type %d", type); 268 record_node_error(c->stats); 269 goto out; 270 } 271 272 node_len = le32_to_cpu(ch->len); 273 if (node_len + offs > c->leb_size) 274 goto out_len; 275 276 if (c->ranges[type].max_len == 0) { 277 if (node_len != c->ranges[type].len) 278 goto out_len; 279 } else if (node_len < c->ranges[type].min_len || 280 node_len > c->ranges[type].max_len) 281 goto out_len; 282 283 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting && 284 !c->remounting_rw && c->no_chk_data_crc) 285 return 0; 286 287 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 288 node_crc = le32_to_cpu(ch->crc); 289 if (crc != node_crc) { 290 if (!quiet) 291 ubifs_err(c, "bad CRC: calculated %#08x, read %#08x", 292 crc, node_crc); 293 record_crc_error(c->stats); 294 err = -EUCLEAN; 295 goto out; 296 } 297 298 return 0; 299 300 out_len: 301 if (!quiet) 302 ubifs_err(c, "bad node length %d", node_len); 303 out: 304 if (!quiet) { 305 ubifs_err(c, "bad node at LEB %d:%d", lnum, offs); 306 ubifs_dump_node(c, buf, len); 307 dump_stack(); 308 } 309 return err; 310 } 311 312 /** 313 * ubifs_pad - pad flash space. 314 * @c: UBIFS file-system description object 315 * @buf: buffer to put padding to 316 * @pad: how many bytes to pad 317 * 318 * The flash media obliges us to write only in chunks of %c->min_io_size and 319 * when we have to write less data we add padding node to the write-buffer and 320 * pad it to the next minimal I/O unit's boundary. Padding nodes help when the 321 * media is being scanned. If the amount of wasted space is not enough to fit a 322 * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes 323 * pattern (%UBIFS_PADDING_BYTE). 324 * 325 * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is 326 * used. 327 */ 328 void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) 329 { 330 ubifs_assert(c, pad >= 0); 331 332 if (pad >= UBIFS_PAD_NODE_SZ) { 333 struct ubifs_ch *ch = buf; 334 struct ubifs_pad_node *pad_node = buf; 335 336 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); 337 ch->node_type = UBIFS_PAD_NODE; 338 ch->group_type = UBIFS_NO_NODE_GROUP; 339 ch->padding[0] = ch->padding[1] = 0; 340 ch->sqnum = 0; 341 ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); 342 pad -= UBIFS_PAD_NODE_SZ; 343 pad_node->pad_len = cpu_to_le32(pad); 344 ubifs_crc_node(buf, UBIFS_PAD_NODE_SZ); 345 memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); 346 } else if (pad > 0) 347 /* Too little space, padding node won't fit */ 348 memset(buf, UBIFS_PADDING_BYTE, pad); 349 } 350 351 /** 352 * next_sqnum - get next sequence number. 353 * @c: UBIFS file-system description object 354 */ 355 static unsigned long long next_sqnum(struct ubifs_info *c) 356 { 357 unsigned long long sqnum; 358 359 spin_lock(&c->cnt_lock); 360 sqnum = ++c->max_sqnum; 361 spin_unlock(&c->cnt_lock); 362 363 if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { 364 if (sqnum >= SQNUM_WATERMARK) { 365 ubifs_err(c, "sequence number overflow %llu, end of life", 366 sqnum); 367 ubifs_ro_mode(c, -EINVAL); 368 } 369 ubifs_warn(c, "running out of sequence numbers, end of life soon"); 370 } 371 372 return sqnum; 373 } 374 375 void ubifs_init_node(struct ubifs_info *c, void *node, int len, int pad) 376 { 377 struct ubifs_ch *ch = node; 378 unsigned long long sqnum = next_sqnum(c); 379 380 ubifs_assert(c, len >= UBIFS_CH_SZ); 381 382 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); 383 ch->len = cpu_to_le32(len); 384 ch->group_type = UBIFS_NO_NODE_GROUP; 385 ch->sqnum = cpu_to_le64(sqnum); 386 ch->padding[0] = ch->padding[1] = 0; 387 388 if (pad) { 389 len = ALIGN(len, 8); 390 pad = ALIGN(len, c->min_io_size) - len; 391 ubifs_pad(c, node + len, pad); 392 } 393 } 394 395 void ubifs_crc_node(void *node, int len) 396 { 397 struct ubifs_ch *ch = node; 398 uint32_t crc; 399 400 crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); 401 ch->crc = cpu_to_le32(crc); 402 } 403 404 /** 405 * ubifs_prepare_node_hmac - prepare node to be written to flash. 406 * @c: UBIFS file-system description object 407 * @node: the node to pad 408 * @len: node length 409 * @hmac_offs: offset of the HMAC in the node 410 * @pad: if the buffer has to be padded 411 * 412 * This function prepares node at @node to be written to the media - it 413 * calculates node CRC, fills the common header, and adds proper padding up to 414 * the next minimum I/O unit if @pad is not zero. if @hmac_offs is positive then 415 * a HMAC is inserted into the node at the given offset. 416 * 417 * This function returns 0 for success or a negative error code otherwise. 418 */ 419 int ubifs_prepare_node_hmac(struct ubifs_info *c, void *node, int len, 420 int hmac_offs, int pad) 421 { 422 int err; 423 424 ubifs_init_node(c, node, len, pad); 425 426 if (hmac_offs > 0) { 427 err = ubifs_node_insert_hmac(c, node, len, hmac_offs); 428 if (err) 429 return err; 430 } 431 432 ubifs_crc_node(node, len); 433 434 return 0; 435 } 436 437 /** 438 * ubifs_prepare_node - prepare node to be written to flash. 439 * @c: UBIFS file-system description object 440 * @node: the node to pad 441 * @len: node length 442 * @pad: if the buffer has to be padded 443 * 444 * This function prepares node at @node to be written to the media - it 445 * calculates node CRC, fills the common header, and adds proper padding up to 446 * the next minimum I/O unit if @pad is not zero. 447 */ 448 void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) 449 { 450 /* 451 * Deliberately ignore return value since this function can only fail 452 * when a hmac offset is given. 453 */ 454 ubifs_prepare_node_hmac(c, node, len, 0, pad); 455 } 456 457 /** 458 * ubifs_prep_grp_node - prepare node of a group to be written to flash. 459 * @c: UBIFS file-system description object 460 * @node: the node to pad 461 * @len: node length 462 * @last: indicates the last node of the group 463 * 464 * This function prepares node at @node to be written to the media - it 465 * calculates node CRC and fills the common header. 466 */ 467 void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) 468 { 469 struct ubifs_ch *ch = node; 470 unsigned long long sqnum = next_sqnum(c); 471 472 ubifs_assert(c, len >= UBIFS_CH_SZ); 473 474 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); 475 ch->len = cpu_to_le32(len); 476 if (last) 477 ch->group_type = UBIFS_LAST_OF_NODE_GROUP; 478 else 479 ch->group_type = UBIFS_IN_NODE_GROUP; 480 ch->sqnum = cpu_to_le64(sqnum); 481 ch->padding[0] = ch->padding[1] = 0; 482 ubifs_crc_node(node, len); 483 } 484 485 /** 486 * wbuf_timer_callback_nolock - write-buffer timer callback function. 487 * @timer: timer data (write-buffer descriptor) 488 * 489 * This function is called when the write-buffer timer expires. 490 */ 491 static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) 492 { 493 struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); 494 495 dbg_io("jhead %s", dbg_jhead(wbuf->jhead)); 496 wbuf->need_sync = 1; 497 wbuf->c->need_wbuf_sync = 1; 498 ubifs_wake_up_bgt(wbuf->c); 499 return HRTIMER_NORESTART; 500 } 501 502 /** 503 * new_wbuf_timer_nolock - start new write-buffer timer. 504 * @c: UBIFS file-system description object 505 * @wbuf: write-buffer descriptor 506 */ 507 static void new_wbuf_timer_nolock(struct ubifs_info *c, struct ubifs_wbuf *wbuf) 508 { 509 ktime_t softlimit = ms_to_ktime(dirty_writeback_interval * 10); 510 unsigned long long delta = dirty_writeback_interval; 511 512 /* centi to milli, milli to nano, then 10% */ 513 delta *= 10ULL * NSEC_PER_MSEC / 10ULL; 514 515 ubifs_assert(c, !hrtimer_active(&wbuf->timer)); 516 ubifs_assert(c, delta <= ULONG_MAX); 517 518 if (wbuf->no_timer) 519 return; 520 dbg_io("set timer for jhead %s, %llu-%llu millisecs", 521 dbg_jhead(wbuf->jhead), 522 div_u64(ktime_to_ns(softlimit), USEC_PER_SEC), 523 div_u64(ktime_to_ns(softlimit) + delta, USEC_PER_SEC)); 524 hrtimer_start_range_ns(&wbuf->timer, softlimit, delta, 525 HRTIMER_MODE_REL); 526 } 527 528 /** 529 * cancel_wbuf_timer_nolock - cancel write-buffer timer. 530 * @wbuf: write-buffer descriptor 531 */ 532 static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) 533 { 534 if (wbuf->no_timer) 535 return; 536 wbuf->need_sync = 0; 537 hrtimer_cancel(&wbuf->timer); 538 } 539 540 /** 541 * ubifs_wbuf_sync_nolock - synchronize write-buffer. 542 * @wbuf: write-buffer to synchronize 543 * 544 * This function synchronizes write-buffer @buf and returns zero in case of 545 * success or a negative error code in case of failure. 546 * 547 * Note, although write-buffers are of @c->max_write_size, this function does 548 * not necessarily writes all @c->max_write_size bytes to the flash. Instead, 549 * if the write-buffer is only partially filled with data, only the used part 550 * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized. 551 * This way we waste less space. 552 */ 553 int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) 554 { 555 struct ubifs_info *c = wbuf->c; 556 int err, dirt, sync_len; 557 558 cancel_wbuf_timer_nolock(wbuf); 559 if (!wbuf->used || wbuf->lnum == -1) 560 /* Write-buffer is empty or not seeked */ 561 return 0; 562 563 dbg_io("LEB %d:%d, %d bytes, jhead %s", 564 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); 565 ubifs_assert(c, !(wbuf->avail & 7)); 566 ubifs_assert(c, wbuf->offs + wbuf->size <= c->leb_size); 567 ubifs_assert(c, wbuf->size >= c->min_io_size); 568 ubifs_assert(c, wbuf->size <= c->max_write_size); 569 ubifs_assert(c, wbuf->size % c->min_io_size == 0); 570 ubifs_assert(c, !c->ro_media && !c->ro_mount); 571 if (c->leb_size - wbuf->offs >= c->max_write_size) 572 ubifs_assert(c, !((wbuf->offs + wbuf->size) % c->max_write_size)); 573 574 if (c->ro_error) 575 return -EROFS; 576 577 /* 578 * Do not write whole write buffer but write only the minimum necessary 579 * amount of min. I/O units. 580 */ 581 sync_len = ALIGN(wbuf->used, c->min_io_size); 582 dirt = sync_len - wbuf->used; 583 if (dirt) 584 ubifs_pad(c, wbuf->buf + wbuf->used, dirt); 585 err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len); 586 if (err) 587 return err; 588 589 spin_lock(&wbuf->lock); 590 wbuf->offs += sync_len; 591 /* 592 * Now @wbuf->offs is not necessarily aligned to @c->max_write_size. 593 * But our goal is to optimize writes and make sure we write in 594 * @c->max_write_size chunks and to @c->max_write_size-aligned offset. 595 * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make 596 * sure that @wbuf->offs + @wbuf->size is aligned to 597 * @c->max_write_size. This way we make sure that after next 598 * write-buffer flush we are again at the optimal offset (aligned to 599 * @c->max_write_size). 600 */ 601 if (c->leb_size - wbuf->offs < c->max_write_size) 602 wbuf->size = c->leb_size - wbuf->offs; 603 else if (wbuf->offs & (c->max_write_size - 1)) 604 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; 605 else 606 wbuf->size = c->max_write_size; 607 wbuf->avail = wbuf->size; 608 wbuf->used = 0; 609 wbuf->next_ino = 0; 610 spin_unlock(&wbuf->lock); 611 612 if (wbuf->sync_callback) 613 err = wbuf->sync_callback(c, wbuf->lnum, 614 c->leb_size - wbuf->offs, dirt); 615 return err; 616 } 617 618 /** 619 * ubifs_wbuf_seek_nolock - seek write-buffer. 620 * @wbuf: write-buffer 621 * @lnum: logical eraseblock number to seek to 622 * @offs: logical eraseblock offset to seek to 623 * 624 * This function targets the write-buffer to logical eraseblock @lnum:@offs. 625 * The write-buffer has to be empty. Returns zero in case of success and a 626 * negative error code in case of failure. 627 */ 628 int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs) 629 { 630 const struct ubifs_info *c = wbuf->c; 631 632 dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead)); 633 ubifs_assert(c, lnum >= 0 && lnum < c->leb_cnt); 634 ubifs_assert(c, offs >= 0 && offs <= c->leb_size); 635 ubifs_assert(c, offs % c->min_io_size == 0 && !(offs & 7)); 636 ubifs_assert(c, lnum != wbuf->lnum); 637 ubifs_assert(c, wbuf->used == 0); 638 639 spin_lock(&wbuf->lock); 640 wbuf->lnum = lnum; 641 wbuf->offs = offs; 642 if (c->leb_size - wbuf->offs < c->max_write_size) 643 wbuf->size = c->leb_size - wbuf->offs; 644 else if (wbuf->offs & (c->max_write_size - 1)) 645 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; 646 else 647 wbuf->size = c->max_write_size; 648 wbuf->avail = wbuf->size; 649 wbuf->used = 0; 650 spin_unlock(&wbuf->lock); 651 652 return 0; 653 } 654 655 /** 656 * ubifs_bg_wbufs_sync - synchronize write-buffers. 657 * @c: UBIFS file-system description object 658 * 659 * This function is called by background thread to synchronize write-buffers. 660 * Returns zero in case of success and a negative error code in case of 661 * failure. 662 */ 663 int ubifs_bg_wbufs_sync(struct ubifs_info *c) 664 { 665 int err, i; 666 667 ubifs_assert(c, !c->ro_media && !c->ro_mount); 668 if (!c->need_wbuf_sync) 669 return 0; 670 c->need_wbuf_sync = 0; 671 672 if (c->ro_error) { 673 err = -EROFS; 674 goto out_timers; 675 } 676 677 dbg_io("synchronize"); 678 for (i = 0; i < c->jhead_cnt; i++) { 679 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; 680 681 cond_resched(); 682 683 /* 684 * If the mutex is locked then wbuf is being changed, so 685 * synchronization is not necessary. 686 */ 687 if (mutex_is_locked(&wbuf->io_mutex)) 688 continue; 689 690 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 691 if (!wbuf->need_sync) { 692 mutex_unlock(&wbuf->io_mutex); 693 continue; 694 } 695 696 err = ubifs_wbuf_sync_nolock(wbuf); 697 mutex_unlock(&wbuf->io_mutex); 698 if (err) { 699 ubifs_err(c, "cannot sync write-buffer, error %d", err); 700 ubifs_ro_mode(c, err); 701 goto out_timers; 702 } 703 } 704 705 return 0; 706 707 out_timers: 708 /* Cancel all timers to prevent repeated errors */ 709 for (i = 0; i < c->jhead_cnt; i++) { 710 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; 711 712 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 713 cancel_wbuf_timer_nolock(wbuf); 714 mutex_unlock(&wbuf->io_mutex); 715 } 716 return err; 717 } 718 719 /** 720 * ubifs_wbuf_write_nolock - write data to flash via write-buffer. 721 * @wbuf: write-buffer 722 * @buf: node to write 723 * @len: node length 724 * 725 * This function writes data to flash via write-buffer @wbuf. This means that 726 * the last piece of the node won't reach the flash media immediately if it 727 * does not take whole max. write unit (@c->max_write_size). Instead, the node 728 * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or 729 * because more data are appended to the write-buffer). 730 * 731 * This function returns zero in case of success and a negative error code in 732 * case of failure. If the node cannot be written because there is no more 733 * space in this logical eraseblock, %-ENOSPC is returned. 734 */ 735 int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) 736 { 737 struct ubifs_info *c = wbuf->c; 738 int err, n, written = 0, aligned_len = ALIGN(len, 8); 739 740 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, 741 dbg_ntype(((struct ubifs_ch *)buf)->node_type), 742 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used); 743 ubifs_assert(c, len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); 744 ubifs_assert(c, wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); 745 ubifs_assert(c, !(wbuf->offs & 7) && wbuf->offs <= c->leb_size); 746 ubifs_assert(c, wbuf->avail > 0 && wbuf->avail <= wbuf->size); 747 ubifs_assert(c, wbuf->size >= c->min_io_size); 748 ubifs_assert(c, wbuf->size <= c->max_write_size); 749 ubifs_assert(c, wbuf->size % c->min_io_size == 0); 750 ubifs_assert(c, mutex_is_locked(&wbuf->io_mutex)); 751 ubifs_assert(c, !c->ro_media && !c->ro_mount); 752 ubifs_assert(c, !c->space_fixup); 753 if (c->leb_size - wbuf->offs >= c->max_write_size) 754 ubifs_assert(c, !((wbuf->offs + wbuf->size) % c->max_write_size)); 755 756 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 757 err = -ENOSPC; 758 goto out; 759 } 760 761 cancel_wbuf_timer_nolock(wbuf); 762 763 if (c->ro_error) 764 return -EROFS; 765 766 if (aligned_len <= wbuf->avail) { 767 /* 768 * The node is not very large and fits entirely within 769 * write-buffer. 770 */ 771 memcpy(wbuf->buf + wbuf->used, buf, len); 772 if (aligned_len > len) { 773 ubifs_assert(c, aligned_len - len < 8); 774 ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len); 775 } 776 777 if (aligned_len == wbuf->avail) { 778 dbg_io("flush jhead %s wbuf to LEB %d:%d", 779 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 780 err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, 781 wbuf->offs, wbuf->size); 782 if (err) 783 goto out; 784 785 spin_lock(&wbuf->lock); 786 wbuf->offs += wbuf->size; 787 if (c->leb_size - wbuf->offs >= c->max_write_size) 788 wbuf->size = c->max_write_size; 789 else 790 wbuf->size = c->leb_size - wbuf->offs; 791 wbuf->avail = wbuf->size; 792 wbuf->used = 0; 793 wbuf->next_ino = 0; 794 spin_unlock(&wbuf->lock); 795 } else { 796 spin_lock(&wbuf->lock); 797 wbuf->avail -= aligned_len; 798 wbuf->used += aligned_len; 799 spin_unlock(&wbuf->lock); 800 } 801 802 goto exit; 803 } 804 805 if (wbuf->used) { 806 /* 807 * The node is large enough and does not fit entirely within 808 * current available space. We have to fill and flush 809 * write-buffer and switch to the next max. write unit. 810 */ 811 dbg_io("flush jhead %s wbuf to LEB %d:%d", 812 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 813 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); 814 err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, 815 wbuf->size); 816 if (err) 817 goto out; 818 819 wbuf->offs += wbuf->size; 820 len -= wbuf->avail; 821 aligned_len -= wbuf->avail; 822 written += wbuf->avail; 823 } else if (wbuf->offs & (c->max_write_size - 1)) { 824 /* 825 * The write-buffer offset is not aligned to 826 * @c->max_write_size and @wbuf->size is less than 827 * @c->max_write_size. Write @wbuf->size bytes to make sure the 828 * following writes are done in optimal @c->max_write_size 829 * chunks. 830 */ 831 dbg_io("write %d bytes to LEB %d:%d", 832 wbuf->size, wbuf->lnum, wbuf->offs); 833 err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs, 834 wbuf->size); 835 if (err) 836 goto out; 837 838 wbuf->offs += wbuf->size; 839 len -= wbuf->size; 840 aligned_len -= wbuf->size; 841 written += wbuf->size; 842 } 843 844 /* 845 * The remaining data may take more whole max. write units, so write the 846 * remains multiple to max. write unit size directly to the flash media. 847 * We align node length to 8-byte boundary because we anyway flash wbuf 848 * if the remaining space is less than 8 bytes. 849 */ 850 n = aligned_len >> c->max_write_shift; 851 if (n) { 852 int m = n - 1; 853 854 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, 855 wbuf->offs); 856 857 if (m) { 858 /* '(n-1)<<c->max_write_shift < len' is always true. */ 859 m <<= c->max_write_shift; 860 err = ubifs_leb_write(c, wbuf->lnum, buf + written, 861 wbuf->offs, m); 862 if (err) 863 goto out; 864 wbuf->offs += m; 865 aligned_len -= m; 866 len -= m; 867 written += m; 868 } 869 870 /* 871 * The non-written len of buf may be less than 'n' because 872 * parameter 'len' is not 8 bytes aligned, so here we read 873 * min(len, n) bytes from buf. 874 */ 875 n = 1 << c->max_write_shift; 876 memcpy(wbuf->buf, buf + written, min(len, n)); 877 if (n > len) { 878 ubifs_assert(c, n - len < 8); 879 ubifs_pad(c, wbuf->buf + len, n - len); 880 } 881 882 err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n); 883 if (err) 884 goto out; 885 wbuf->offs += n; 886 aligned_len -= n; 887 len -= min(len, n); 888 written += n; 889 } 890 891 spin_lock(&wbuf->lock); 892 if (aligned_len) { 893 /* 894 * And now we have what's left and what does not take whole 895 * max. write unit, so write it to the write-buffer and we are 896 * done. 897 */ 898 memcpy(wbuf->buf, buf + written, len); 899 if (aligned_len > len) { 900 ubifs_assert(c, aligned_len - len < 8); 901 ubifs_pad(c, wbuf->buf + len, aligned_len - len); 902 } 903 } 904 905 if (c->leb_size - wbuf->offs >= c->max_write_size) 906 wbuf->size = c->max_write_size; 907 else 908 wbuf->size = c->leb_size - wbuf->offs; 909 wbuf->avail = wbuf->size - aligned_len; 910 wbuf->used = aligned_len; 911 wbuf->next_ino = 0; 912 spin_unlock(&wbuf->lock); 913 914 exit: 915 if (wbuf->sync_callback) { 916 int free = c->leb_size - wbuf->offs - wbuf->used; 917 918 err = wbuf->sync_callback(c, wbuf->lnum, free, 0); 919 if (err) 920 goto out; 921 } 922 923 if (wbuf->used) 924 new_wbuf_timer_nolock(c, wbuf); 925 926 return 0; 927 928 out: 929 ubifs_err(c, "cannot write %d bytes to LEB %d:%d, error %d", 930 len, wbuf->lnum, wbuf->offs, err); 931 ubifs_dump_node(c, buf, written + len); 932 dump_stack(); 933 ubifs_dump_leb(c, wbuf->lnum); 934 return err; 935 } 936 937 /** 938 * ubifs_write_node_hmac - write node to the media. 939 * @c: UBIFS file-system description object 940 * @buf: the node to write 941 * @len: node length 942 * @lnum: logical eraseblock number 943 * @offs: offset within the logical eraseblock 944 * @hmac_offs: offset of the HMAC within the node 945 * 946 * This function automatically fills node magic number, assigns sequence 947 * number, and calculates node CRC checksum. The length of the @buf buffer has 948 * to be aligned to the minimal I/O unit size. This function automatically 949 * appends padding node and padding bytes if needed. Returns zero in case of 950 * success and a negative error code in case of failure. 951 */ 952 int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum, 953 int offs, int hmac_offs) 954 { 955 int err, buf_len = ALIGN(len, c->min_io_size); 956 957 dbg_io("LEB %d:%d, %s, length %d (aligned %d)", 958 lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len, 959 buf_len); 960 ubifs_assert(c, lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 961 ubifs_assert(c, offs % c->min_io_size == 0 && offs < c->leb_size); 962 ubifs_assert(c, !c->ro_media && !c->ro_mount); 963 ubifs_assert(c, !c->space_fixup); 964 965 if (c->ro_error) 966 return -EROFS; 967 968 err = ubifs_prepare_node_hmac(c, buf, len, hmac_offs, 1); 969 if (err) 970 return err; 971 972 err = ubifs_leb_write(c, lnum, buf, offs, buf_len); 973 if (err) 974 ubifs_dump_node(c, buf, len); 975 976 return err; 977 } 978 979 /** 980 * ubifs_write_node - write node to the media. 981 * @c: UBIFS file-system description object 982 * @buf: the node to write 983 * @len: node length 984 * @lnum: logical eraseblock number 985 * @offs: offset within the logical eraseblock 986 * 987 * This function automatically fills node magic number, assigns sequence 988 * number, and calculates node CRC checksum. The length of the @buf buffer has 989 * to be aligned to the minimal I/O unit size. This function automatically 990 * appends padding node and padding bytes if needed. Returns zero in case of 991 * success and a negative error code in case of failure. 992 */ 993 int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, 994 int offs) 995 { 996 return ubifs_write_node_hmac(c, buf, len, lnum, offs, -1); 997 } 998 999 /** 1000 * ubifs_read_node_wbuf - read node from the media or write-buffer. 1001 * @wbuf: wbuf to check for un-written data 1002 * @buf: buffer to read to 1003 * @type: node type 1004 * @len: node length 1005 * @lnum: logical eraseblock number 1006 * @offs: offset within the logical eraseblock 1007 * 1008 * This function reads a node of known type and length, checks it and stores 1009 * in @buf. If the node partially or fully sits in the write-buffer, this 1010 * function takes data from the buffer, otherwise it reads the flash media. 1011 * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative 1012 * error code in case of failure. 1013 */ 1014 int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, 1015 int lnum, int offs) 1016 { 1017 const struct ubifs_info *c = wbuf->c; 1018 int err, rlen, overlap; 1019 struct ubifs_ch *ch = buf; 1020 1021 dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs, 1022 dbg_ntype(type), len, dbg_jhead(wbuf->jhead)); 1023 ubifs_assert(c, wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 1024 ubifs_assert(c, !(offs & 7) && offs < c->leb_size); 1025 ubifs_assert(c, type >= 0 && type < UBIFS_NODE_TYPES_CNT); 1026 1027 spin_lock(&wbuf->lock); 1028 overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); 1029 if (!overlap) { 1030 /* We may safely unlock the write-buffer and read the data */ 1031 spin_unlock(&wbuf->lock); 1032 return ubifs_read_node(c, buf, type, len, lnum, offs); 1033 } 1034 1035 /* Don't read under wbuf */ 1036 rlen = wbuf->offs - offs; 1037 if (rlen < 0) 1038 rlen = 0; 1039 1040 /* Copy the rest from the write-buffer */ 1041 memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); 1042 spin_unlock(&wbuf->lock); 1043 1044 if (rlen > 0) { 1045 /* Read everything that goes before write-buffer */ 1046 err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0); 1047 if (err && err != -EBADMSG) 1048 return err; 1049 } 1050 1051 if (type != ch->node_type) { 1052 ubifs_err(c, "bad node type (%d but expected %d)", 1053 ch->node_type, type); 1054 goto out; 1055 } 1056 1057 err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0); 1058 if (err) { 1059 ubifs_err(c, "expected node type %d", type); 1060 return err; 1061 } 1062 1063 rlen = le32_to_cpu(ch->len); 1064 if (rlen != len) { 1065 ubifs_err(c, "bad node length %d, expected %d", rlen, len); 1066 goto out; 1067 } 1068 1069 return 0; 1070 1071 out: 1072 ubifs_err(c, "bad node at LEB %d:%d", lnum, offs); 1073 ubifs_dump_node(c, buf, len); 1074 dump_stack(); 1075 return -EINVAL; 1076 } 1077 1078 /** 1079 * ubifs_read_node - read node. 1080 * @c: UBIFS file-system description object 1081 * @buf: buffer to read to 1082 * @type: node type 1083 * @len: node length (not aligned) 1084 * @lnum: logical eraseblock number 1085 * @offs: offset within the logical eraseblock 1086 * 1087 * This function reads a node of known type and length, checks it and 1088 * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched 1089 * and a negative error code in case of failure. 1090 */ 1091 int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, 1092 int lnum, int offs) 1093 { 1094 int err, l; 1095 struct ubifs_ch *ch = buf; 1096 1097 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); 1098 ubifs_assert(c, lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 1099 ubifs_assert(c, len >= UBIFS_CH_SZ && offs + len <= c->leb_size); 1100 ubifs_assert(c, !(offs & 7) && offs < c->leb_size); 1101 ubifs_assert(c, type >= 0 && type < UBIFS_NODE_TYPES_CNT); 1102 1103 err = ubifs_leb_read(c, lnum, buf, offs, len, 0); 1104 if (err && err != -EBADMSG) 1105 return err; 1106 1107 if (type != ch->node_type) { 1108 ubifs_errc(c, "bad node type (%d but expected %d)", 1109 ch->node_type, type); 1110 goto out; 1111 } 1112 1113 err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0); 1114 if (err) { 1115 ubifs_errc(c, "expected node type %d", type); 1116 return err; 1117 } 1118 1119 l = le32_to_cpu(ch->len); 1120 if (l != len) { 1121 ubifs_errc(c, "bad node length %d, expected %d", l, len); 1122 goto out; 1123 } 1124 1125 return 0; 1126 1127 out: 1128 ubifs_errc(c, "bad node at LEB %d:%d, LEB mapping status %d", lnum, 1129 offs, ubi_is_mapped(c->ubi, lnum)); 1130 if (!c->probing) { 1131 ubifs_dump_node(c, buf, len); 1132 dump_stack(); 1133 } 1134 return -EINVAL; 1135 } 1136 1137 /** 1138 * ubifs_wbuf_init - initialize write-buffer. 1139 * @c: UBIFS file-system description object 1140 * @wbuf: write-buffer to initialize 1141 * 1142 * This function initializes write-buffer. Returns zero in case of success 1143 * %-ENOMEM in case of failure. 1144 */ 1145 int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) 1146 { 1147 size_t size; 1148 1149 wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL); 1150 if (!wbuf->buf) 1151 return -ENOMEM; 1152 1153 size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); 1154 wbuf->inodes = kmalloc(size, GFP_KERNEL); 1155 if (!wbuf->inodes) { 1156 kfree(wbuf->buf); 1157 wbuf->buf = NULL; 1158 return -ENOMEM; 1159 } 1160 1161 wbuf->used = 0; 1162 wbuf->lnum = wbuf->offs = -1; 1163 /* 1164 * If the LEB starts at the max. write size aligned address, then 1165 * write-buffer size has to be set to @c->max_write_size. Otherwise, 1166 * set it to something smaller so that it ends at the closest max. 1167 * write size boundary. 1168 */ 1169 size = c->max_write_size - (c->leb_start % c->max_write_size); 1170 wbuf->avail = wbuf->size = size; 1171 wbuf->sync_callback = NULL; 1172 mutex_init(&wbuf->io_mutex); 1173 spin_lock_init(&wbuf->lock); 1174 wbuf->c = c; 1175 wbuf->next_ino = 0; 1176 1177 hrtimer_setup(&wbuf->timer, wbuf_timer_callback_nolock, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1178 return 0; 1179 } 1180 1181 /** 1182 * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. 1183 * @wbuf: the write-buffer where to add 1184 * @inum: the inode number 1185 * 1186 * This function adds an inode number to the inode array of the write-buffer. 1187 */ 1188 void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum) 1189 { 1190 if (!wbuf->buf) 1191 /* NOR flash or something similar */ 1192 return; 1193 1194 spin_lock(&wbuf->lock); 1195 if (wbuf->used) 1196 wbuf->inodes[wbuf->next_ino++] = inum; 1197 spin_unlock(&wbuf->lock); 1198 } 1199 1200 /** 1201 * wbuf_has_ino - returns if the wbuf contains data from the inode. 1202 * @wbuf: the write-buffer 1203 * @inum: the inode number 1204 * 1205 * This function returns with %1 if the write-buffer contains some data from the 1206 * given inode otherwise it returns with %0. 1207 */ 1208 static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum) 1209 { 1210 int i, ret = 0; 1211 1212 spin_lock(&wbuf->lock); 1213 for (i = 0; i < wbuf->next_ino; i++) 1214 if (inum == wbuf->inodes[i]) { 1215 ret = 1; 1216 break; 1217 } 1218 spin_unlock(&wbuf->lock); 1219 1220 return ret; 1221 } 1222 1223 /** 1224 * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode. 1225 * @c: UBIFS file-system description object 1226 * @inode: inode to synchronize 1227 * 1228 * This function synchronizes write-buffers which contain nodes belonging to 1229 * @inode. Returns zero in case of success and a negative error code in case of 1230 * failure. 1231 */ 1232 int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode) 1233 { 1234 int i, err = 0; 1235 1236 for (i = 0; i < c->jhead_cnt; i++) { 1237 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; 1238 1239 if (i == GCHD) 1240 /* 1241 * GC head is special, do not look at it. Even if the 1242 * head contains something related to this inode, it is 1243 * a _copy_ of corresponding on-flash node which sits 1244 * somewhere else. 1245 */ 1246 continue; 1247 1248 if (!wbuf_has_ino(wbuf, inode->i_ino)) 1249 continue; 1250 1251 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 1252 if (wbuf_has_ino(wbuf, inode->i_ino)) 1253 err = ubifs_wbuf_sync_nolock(wbuf); 1254 mutex_unlock(&wbuf->io_mutex); 1255 1256 if (err) { 1257 ubifs_ro_mode(c, err); 1258 return err; 1259 } 1260 } 1261 return 0; 1262 } 1263