1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Swap block device support for MTDs 4 * Turns an MTD device into a swap device with block wear leveling 5 * 6 * Copyright © 2007,2011 Nokia Corporation. All rights reserved. 7 * 8 * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com> 9 * 10 * Based on Richard Purdie's earlier implementation in 2007. Background 11 * support and lock-less operation written by Adrian Hunter. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/mtd/mtd.h> 17 #include <linux/mtd/blktrans.h> 18 #include <linux/rbtree.h> 19 #include <linux/sched.h> 20 #include <linux/slab.h> 21 #include <linux/vmalloc.h> 22 #include <linux/genhd.h> 23 #include <linux/swap.h> 24 #include <linux/debugfs.h> 25 #include <linux/seq_file.h> 26 #include <linux/device.h> 27 #include <linux/math64.h> 28 29 #define MTDSWAP_PREFIX "mtdswap" 30 31 /* 32 * The number of free eraseblocks when GC should stop 33 */ 34 #define CLEAN_BLOCK_THRESHOLD 20 35 36 /* 37 * Number of free eraseblocks below which GC can also collect low frag 38 * blocks. 39 */ 40 #define LOW_FRAG_GC_THRESHOLD 5 41 42 /* 43 * Wear level cost amortization. We want to do wear leveling on the background 44 * without disturbing gc too much. This is made by defining max GC frequency. 45 * Frequency value 6 means 1/6 of the GC passes will pick an erase block based 46 * on the biggest wear difference rather than the biggest dirtiness. 47 * 48 * The lower freq2 should be chosen so that it makes sure the maximum erase 49 * difference will decrease even if a malicious application is deliberately 50 * trying to make erase differences large. 51 */ 52 #define MAX_ERASE_DIFF 4000 53 #define COLLECT_NONDIRTY_BASE MAX_ERASE_DIFF 54 #define COLLECT_NONDIRTY_FREQ1 6 55 #define COLLECT_NONDIRTY_FREQ2 4 56 57 #define PAGE_UNDEF UINT_MAX 58 #define BLOCK_UNDEF UINT_MAX 59 #define BLOCK_ERROR (UINT_MAX - 1) 60 #define BLOCK_MAX (UINT_MAX - 2) 61 62 #define EBLOCK_BAD (1 << 0) 63 #define EBLOCK_NOMAGIC (1 << 1) 64 #define EBLOCK_BITFLIP (1 << 2) 65 #define EBLOCK_FAILED (1 << 3) 66 #define EBLOCK_READERR (1 << 4) 67 #define EBLOCK_IDX_SHIFT 5 68 69 struct swap_eb { 70 struct rb_node rb; 71 struct rb_root *root; 72 73 unsigned int flags; 74 unsigned int active_count; 75 unsigned int erase_count; 76 unsigned int pad; /* speeds up pointer decrement */ 77 }; 78 79 #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \ 80 rb)->erase_count) 81 #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \ 82 rb)->erase_count) 83 84 struct mtdswap_tree { 85 struct rb_root root; 86 unsigned int count; 87 }; 88 89 enum { 90 MTDSWAP_CLEAN, 91 MTDSWAP_USED, 92 MTDSWAP_LOWFRAG, 93 MTDSWAP_HIFRAG, 94 MTDSWAP_DIRTY, 95 MTDSWAP_BITFLIP, 96 MTDSWAP_FAILING, 97 MTDSWAP_TREE_CNT, 98 }; 99 100 struct mtdswap_dev { 101 struct mtd_blktrans_dev *mbd_dev; 102 struct mtd_info *mtd; 103 struct device *dev; 104 105 unsigned int *page_data; 106 unsigned int *revmap; 107 108 unsigned int eblks; 109 unsigned int spare_eblks; 110 unsigned int pages_per_eblk; 111 unsigned int max_erase_count; 112 struct swap_eb *eb_data; 113 114 struct mtdswap_tree trees[MTDSWAP_TREE_CNT]; 115 116 unsigned long long sect_read_count; 117 unsigned long long sect_write_count; 118 unsigned long long mtd_write_count; 119 unsigned long long mtd_read_count; 120 unsigned long long discard_count; 121 unsigned long long discard_page_count; 122 123 unsigned int curr_write_pos; 124 struct swap_eb *curr_write; 125 126 char *page_buf; 127 char *oob_buf; 128 }; 129 130 struct mtdswap_oobdata { 131 __le16 magic; 132 __le32 count; 133 } __packed; 134 135 #define MTDSWAP_MAGIC_CLEAN 0x2095 136 #define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1) 137 #define MTDSWAP_TYPE_CLEAN 0 138 #define MTDSWAP_TYPE_DIRTY 1 139 #define MTDSWAP_OOBSIZE sizeof(struct mtdswap_oobdata) 140 141 #define MTDSWAP_ERASE_RETRIES 3 /* Before marking erase block bad */ 142 #define MTDSWAP_IO_RETRIES 3 143 144 enum { 145 MTDSWAP_SCANNED_CLEAN, 146 MTDSWAP_SCANNED_DIRTY, 147 MTDSWAP_SCANNED_BITFLIP, 148 MTDSWAP_SCANNED_BAD, 149 }; 150 151 /* 152 * In the worst case mtdswap_writesect() has allocated the last clean 153 * page from the current block and is then pre-empted by the GC 154 * thread. The thread can consume a full erase block when moving a 155 * block. 156 */ 157 #define MIN_SPARE_EBLOCKS 2 158 #define MIN_ERASE_BLOCKS (MIN_SPARE_EBLOCKS + 1) 159 160 #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root) 161 #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL) 162 #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name)) 163 #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count) 164 165 #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv) 166 167 static char partitions[128] = ""; 168 module_param_string(partitions, partitions, sizeof(partitions), 0444); 169 MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap " 170 "partitions=\"1,3,5\""); 171 172 static unsigned int spare_eblocks = 10; 173 module_param(spare_eblocks, uint, 0444); 174 MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for " 175 "garbage collection (default 10%)"); 176 177 static bool header; /* false */ 178 module_param(header, bool, 0444); 179 MODULE_PARM_DESC(header, 180 "Include builtin swap header (default 0, without header)"); 181 182 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background); 183 184 static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb) 185 { 186 return (loff_t)(eb - d->eb_data) * d->mtd->erasesize; 187 } 188 189 static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb) 190 { 191 unsigned int oldidx; 192 struct mtdswap_tree *tp; 193 194 if (eb->root) { 195 tp = container_of(eb->root, struct mtdswap_tree, root); 196 oldidx = tp - &d->trees[0]; 197 198 d->trees[oldidx].count--; 199 rb_erase(&eb->rb, eb->root); 200 } 201 } 202 203 static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb) 204 { 205 struct rb_node **p, *parent = NULL; 206 struct swap_eb *cur; 207 208 p = &root->rb_node; 209 while (*p) { 210 parent = *p; 211 cur = rb_entry(parent, struct swap_eb, rb); 212 if (eb->erase_count > cur->erase_count) 213 p = &(*p)->rb_right; 214 else 215 p = &(*p)->rb_left; 216 } 217 218 rb_link_node(&eb->rb, parent, p); 219 rb_insert_color(&eb->rb, root); 220 } 221 222 static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx) 223 { 224 struct rb_root *root; 225 226 if (eb->root == &d->trees[idx].root) 227 return; 228 229 mtdswap_eb_detach(d, eb); 230 root = &d->trees[idx].root; 231 __mtdswap_rb_add(root, eb); 232 eb->root = root; 233 d->trees[idx].count++; 234 } 235 236 static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx) 237 { 238 struct rb_node *p; 239 unsigned int i; 240 241 p = rb_first(root); 242 i = 0; 243 while (i < idx && p) { 244 p = rb_next(p); 245 i++; 246 } 247 248 return p; 249 } 250 251 static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb) 252 { 253 int ret; 254 loff_t offset; 255 256 d->spare_eblks--; 257 eb->flags |= EBLOCK_BAD; 258 mtdswap_eb_detach(d, eb); 259 eb->root = NULL; 260 261 /* badblocks not supported */ 262 if (!mtd_can_have_bb(d->mtd)) 263 return 1; 264 265 offset = mtdswap_eb_offset(d, eb); 266 dev_warn(d->dev, "Marking bad block at %08llx\n", offset); 267 ret = mtd_block_markbad(d->mtd, offset); 268 269 if (ret) { 270 dev_warn(d->dev, "Mark block bad failed for block at %08llx " 271 "error %d\n", offset, ret); 272 return ret; 273 } 274 275 return 1; 276 277 } 278 279 static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb) 280 { 281 unsigned int marked = eb->flags & EBLOCK_FAILED; 282 struct swap_eb *curr_write = d->curr_write; 283 284 eb->flags |= EBLOCK_FAILED; 285 if (curr_write == eb) { 286 d->curr_write = NULL; 287 288 if (!marked && d->curr_write_pos != 0) { 289 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 290 return 0; 291 } 292 } 293 294 return mtdswap_handle_badblock(d, eb); 295 } 296 297 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from, 298 struct mtd_oob_ops *ops) 299 { 300 int ret = mtd_read_oob(d->mtd, from, ops); 301 302 if (mtd_is_bitflip(ret)) 303 return ret; 304 305 if (ret) { 306 dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n", 307 ret, from); 308 return ret; 309 } 310 311 if (ops->oobretlen < ops->ooblen) { 312 dev_warn(d->dev, "Read OOB return short read (%zd bytes not " 313 "%zd) for block at %08llx\n", 314 ops->oobretlen, ops->ooblen, from); 315 return -EIO; 316 } 317 318 return 0; 319 } 320 321 static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb) 322 { 323 struct mtdswap_oobdata *data, *data2; 324 int ret; 325 loff_t offset; 326 struct mtd_oob_ops ops; 327 328 offset = mtdswap_eb_offset(d, eb); 329 330 /* Check first if the block is bad. */ 331 if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset)) 332 return MTDSWAP_SCANNED_BAD; 333 334 ops.ooblen = 2 * d->mtd->oobavail; 335 ops.oobbuf = d->oob_buf; 336 ops.ooboffs = 0; 337 ops.datbuf = NULL; 338 ops.mode = MTD_OPS_AUTO_OOB; 339 340 ret = mtdswap_read_oob(d, offset, &ops); 341 342 if (ret && !mtd_is_bitflip(ret)) 343 return ret; 344 345 data = (struct mtdswap_oobdata *)d->oob_buf; 346 data2 = (struct mtdswap_oobdata *) 347 (d->oob_buf + d->mtd->oobavail); 348 349 if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) { 350 eb->erase_count = le32_to_cpu(data->count); 351 if (mtd_is_bitflip(ret)) 352 ret = MTDSWAP_SCANNED_BITFLIP; 353 else { 354 if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY) 355 ret = MTDSWAP_SCANNED_DIRTY; 356 else 357 ret = MTDSWAP_SCANNED_CLEAN; 358 } 359 } else { 360 eb->flags |= EBLOCK_NOMAGIC; 361 ret = MTDSWAP_SCANNED_DIRTY; 362 } 363 364 return ret; 365 } 366 367 static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb, 368 u16 marker) 369 { 370 struct mtdswap_oobdata n; 371 int ret; 372 loff_t offset; 373 struct mtd_oob_ops ops; 374 375 ops.ooboffs = 0; 376 ops.oobbuf = (uint8_t *)&n; 377 ops.mode = MTD_OPS_AUTO_OOB; 378 ops.datbuf = NULL; 379 380 if (marker == MTDSWAP_TYPE_CLEAN) { 381 n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN); 382 n.count = cpu_to_le32(eb->erase_count); 383 ops.ooblen = MTDSWAP_OOBSIZE; 384 offset = mtdswap_eb_offset(d, eb); 385 } else { 386 n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY); 387 ops.ooblen = sizeof(n.magic); 388 offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize; 389 } 390 391 ret = mtd_write_oob(d->mtd, offset, &ops); 392 393 if (ret) { 394 dev_warn(d->dev, "Write OOB failed for block at %08llx " 395 "error %d\n", offset, ret); 396 if (ret == -EIO || mtd_is_eccerr(ret)) 397 mtdswap_handle_write_error(d, eb); 398 return ret; 399 } 400 401 if (ops.oobretlen != ops.ooblen) { 402 dev_warn(d->dev, "Short OOB write for block at %08llx: " 403 "%zd not %zd\n", 404 offset, ops.oobretlen, ops.ooblen); 405 return ret; 406 } 407 408 return 0; 409 } 410 411 /* 412 * Are there any erase blocks without MAGIC_CLEAN header, presumably 413 * because power was cut off after erase but before header write? We 414 * need to guestimate the erase count. 415 */ 416 static void mtdswap_check_counts(struct mtdswap_dev *d) 417 { 418 struct rb_root hist_root = RB_ROOT; 419 struct rb_node *medrb; 420 struct swap_eb *eb; 421 unsigned int i, cnt, median; 422 423 cnt = 0; 424 for (i = 0; i < d->eblks; i++) { 425 eb = d->eb_data + i; 426 427 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 428 continue; 429 430 __mtdswap_rb_add(&hist_root, eb); 431 cnt++; 432 } 433 434 if (cnt == 0) 435 return; 436 437 medrb = mtdswap_rb_index(&hist_root, cnt / 2); 438 median = rb_entry(medrb, struct swap_eb, rb)->erase_count; 439 440 d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root); 441 442 for (i = 0; i < d->eblks; i++) { 443 eb = d->eb_data + i; 444 445 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR)) 446 eb->erase_count = median; 447 448 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 449 continue; 450 451 rb_erase(&eb->rb, &hist_root); 452 } 453 } 454 455 static void mtdswap_scan_eblks(struct mtdswap_dev *d) 456 { 457 int status; 458 unsigned int i, idx; 459 struct swap_eb *eb; 460 461 for (i = 0; i < d->eblks; i++) { 462 eb = d->eb_data + i; 463 464 status = mtdswap_read_markers(d, eb); 465 if (status < 0) 466 eb->flags |= EBLOCK_READERR; 467 else if (status == MTDSWAP_SCANNED_BAD) { 468 eb->flags |= EBLOCK_BAD; 469 continue; 470 } 471 472 switch (status) { 473 case MTDSWAP_SCANNED_CLEAN: 474 idx = MTDSWAP_CLEAN; 475 break; 476 case MTDSWAP_SCANNED_DIRTY: 477 case MTDSWAP_SCANNED_BITFLIP: 478 idx = MTDSWAP_DIRTY; 479 break; 480 default: 481 idx = MTDSWAP_FAILING; 482 } 483 484 eb->flags |= (idx << EBLOCK_IDX_SHIFT); 485 } 486 487 mtdswap_check_counts(d); 488 489 for (i = 0; i < d->eblks; i++) { 490 eb = d->eb_data + i; 491 492 if (eb->flags & EBLOCK_BAD) 493 continue; 494 495 idx = eb->flags >> EBLOCK_IDX_SHIFT; 496 mtdswap_rb_add(d, eb, idx); 497 } 498 } 499 500 /* 501 * Place eblk into a tree corresponding to its number of active blocks 502 * it contains. 503 */ 504 static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb) 505 { 506 unsigned int weight = eb->active_count; 507 unsigned int maxweight = d->pages_per_eblk; 508 509 if (eb == d->curr_write) 510 return; 511 512 if (eb->flags & EBLOCK_BITFLIP) 513 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 514 else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED)) 515 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 516 if (weight == maxweight) 517 mtdswap_rb_add(d, eb, MTDSWAP_USED); 518 else if (weight == 0) 519 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 520 else if (weight > (maxweight/2)) 521 mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG); 522 else 523 mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG); 524 } 525 526 static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb) 527 { 528 struct mtd_info *mtd = d->mtd; 529 struct erase_info erase; 530 unsigned int retries = 0; 531 int ret; 532 533 eb->erase_count++; 534 if (eb->erase_count > d->max_erase_count) 535 d->max_erase_count = eb->erase_count; 536 537 retry: 538 memset(&erase, 0, sizeof(struct erase_info)); 539 erase.addr = mtdswap_eb_offset(d, eb); 540 erase.len = mtd->erasesize; 541 542 ret = mtd_erase(mtd, &erase); 543 if (ret) { 544 if (retries++ < MTDSWAP_ERASE_RETRIES) { 545 dev_warn(d->dev, 546 "erase of erase block %#llx on %s failed", 547 erase.addr, mtd->name); 548 yield(); 549 goto retry; 550 } 551 552 dev_err(d->dev, "Cannot erase erase block %#llx on %s\n", 553 erase.addr, mtd->name); 554 555 mtdswap_handle_badblock(d, eb); 556 return -EIO; 557 } 558 559 return 0; 560 } 561 562 static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page, 563 unsigned int *block) 564 { 565 int ret; 566 struct swap_eb *old_eb = d->curr_write; 567 struct rb_root *clean_root; 568 struct swap_eb *eb; 569 570 if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) { 571 do { 572 if (TREE_EMPTY(d, CLEAN)) 573 return -ENOSPC; 574 575 clean_root = TREE_ROOT(d, CLEAN); 576 eb = rb_entry(rb_first(clean_root), struct swap_eb, rb); 577 rb_erase(&eb->rb, clean_root); 578 eb->root = NULL; 579 TREE_COUNT(d, CLEAN)--; 580 581 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY); 582 } while (ret == -EIO || mtd_is_eccerr(ret)); 583 584 if (ret) 585 return ret; 586 587 d->curr_write_pos = 0; 588 d->curr_write = eb; 589 if (old_eb) 590 mtdswap_store_eb(d, old_eb); 591 } 592 593 *block = (d->curr_write - d->eb_data) * d->pages_per_eblk + 594 d->curr_write_pos; 595 596 d->curr_write->active_count++; 597 d->revmap[*block] = page; 598 d->curr_write_pos++; 599 600 return 0; 601 } 602 603 static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d) 604 { 605 return TREE_COUNT(d, CLEAN) * d->pages_per_eblk + 606 d->pages_per_eblk - d->curr_write_pos; 607 } 608 609 static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d) 610 { 611 return mtdswap_free_page_cnt(d) > d->pages_per_eblk; 612 } 613 614 static int mtdswap_write_block(struct mtdswap_dev *d, char *buf, 615 unsigned int page, unsigned int *bp, int gc_context) 616 { 617 struct mtd_info *mtd = d->mtd; 618 struct swap_eb *eb; 619 size_t retlen; 620 loff_t writepos; 621 int ret; 622 623 retry: 624 if (!gc_context) 625 while (!mtdswap_enough_free_pages(d)) 626 if (mtdswap_gc(d, 0) > 0) 627 return -ENOSPC; 628 629 ret = mtdswap_map_free_block(d, page, bp); 630 eb = d->eb_data + (*bp / d->pages_per_eblk); 631 632 if (ret == -EIO || mtd_is_eccerr(ret)) { 633 d->curr_write = NULL; 634 eb->active_count--; 635 d->revmap[*bp] = PAGE_UNDEF; 636 goto retry; 637 } 638 639 if (ret < 0) 640 return ret; 641 642 writepos = (loff_t)*bp << PAGE_SHIFT; 643 ret = mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf); 644 if (ret == -EIO || mtd_is_eccerr(ret)) { 645 d->curr_write_pos--; 646 eb->active_count--; 647 d->revmap[*bp] = PAGE_UNDEF; 648 mtdswap_handle_write_error(d, eb); 649 goto retry; 650 } 651 652 if (ret < 0) { 653 dev_err(d->dev, "Write to MTD device failed: %d (%zd written)", 654 ret, retlen); 655 goto err; 656 } 657 658 if (retlen != PAGE_SIZE) { 659 dev_err(d->dev, "Short write to MTD device: %zd written", 660 retlen); 661 ret = -EIO; 662 goto err; 663 } 664 665 return ret; 666 667 err: 668 d->curr_write_pos--; 669 eb->active_count--; 670 d->revmap[*bp] = PAGE_UNDEF; 671 672 return ret; 673 } 674 675 static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock, 676 unsigned int *newblock) 677 { 678 struct mtd_info *mtd = d->mtd; 679 struct swap_eb *eb, *oldeb; 680 int ret; 681 size_t retlen; 682 unsigned int page, retries; 683 loff_t readpos; 684 685 page = d->revmap[oldblock]; 686 readpos = (loff_t) oldblock << PAGE_SHIFT; 687 retries = 0; 688 689 retry: 690 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf); 691 692 if (ret < 0 && !mtd_is_bitflip(ret)) { 693 oldeb = d->eb_data + oldblock / d->pages_per_eblk; 694 oldeb->flags |= EBLOCK_READERR; 695 696 dev_err(d->dev, "Read Error: %d (block %u)\n", ret, 697 oldblock); 698 retries++; 699 if (retries < MTDSWAP_IO_RETRIES) 700 goto retry; 701 702 goto read_error; 703 } 704 705 if (retlen != PAGE_SIZE) { 706 dev_err(d->dev, "Short read: %zd (block %u)\n", retlen, 707 oldblock); 708 ret = -EIO; 709 goto read_error; 710 } 711 712 ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1); 713 if (ret < 0) { 714 d->page_data[page] = BLOCK_ERROR; 715 dev_err(d->dev, "Write error: %d\n", ret); 716 return ret; 717 } 718 719 eb = d->eb_data + *newblock / d->pages_per_eblk; 720 d->page_data[page] = *newblock; 721 d->revmap[oldblock] = PAGE_UNDEF; 722 eb = d->eb_data + oldblock / d->pages_per_eblk; 723 eb->active_count--; 724 725 return 0; 726 727 read_error: 728 d->page_data[page] = BLOCK_ERROR; 729 d->revmap[oldblock] = PAGE_UNDEF; 730 return ret; 731 } 732 733 static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb) 734 { 735 unsigned int i, block, eblk_base, newblock; 736 int ret, errcode; 737 738 errcode = 0; 739 eblk_base = (eb - d->eb_data) * d->pages_per_eblk; 740 741 for (i = 0; i < d->pages_per_eblk; i++) { 742 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 743 return -ENOSPC; 744 745 block = eblk_base + i; 746 if (d->revmap[block] == PAGE_UNDEF) 747 continue; 748 749 ret = mtdswap_move_block(d, block, &newblock); 750 if (ret < 0 && !errcode) 751 errcode = ret; 752 } 753 754 return errcode; 755 } 756 757 static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d) 758 { 759 int idx, stopat; 760 761 if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_THRESHOLD) 762 stopat = MTDSWAP_LOWFRAG; 763 else 764 stopat = MTDSWAP_HIFRAG; 765 766 for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--) 767 if (d->trees[idx].root.rb_node != NULL) 768 return idx; 769 770 return -1; 771 } 772 773 static int mtdswap_wlfreq(unsigned int maxdiff) 774 { 775 unsigned int h, x, y, dist, base; 776 777 /* 778 * Calculate linear ramp down from f1 to f2 when maxdiff goes from 779 * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE. Similar 780 * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE. 781 */ 782 783 dist = maxdiff - MAX_ERASE_DIFF; 784 if (dist > COLLECT_NONDIRTY_BASE) 785 dist = COLLECT_NONDIRTY_BASE; 786 787 /* 788 * Modelling the slop as right angular triangle with base 789 * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is 790 * equal to the ratio h/base. 791 */ 792 h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2; 793 base = COLLECT_NONDIRTY_BASE; 794 795 x = dist - base; 796 y = (x * h + base / 2) / base; 797 798 return COLLECT_NONDIRTY_FREQ2 + y; 799 } 800 801 static int mtdswap_choose_wl_tree(struct mtdswap_dev *d) 802 { 803 static unsigned int pick_cnt; 804 unsigned int i, idx = -1, wear, max; 805 struct rb_root *root; 806 807 max = 0; 808 for (i = 0; i <= MTDSWAP_DIRTY; i++) { 809 root = &d->trees[i].root; 810 if (root->rb_node == NULL) 811 continue; 812 813 wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root); 814 if (wear > max) { 815 max = wear; 816 idx = i; 817 } 818 } 819 820 if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) { 821 pick_cnt = 0; 822 return idx; 823 } 824 825 pick_cnt++; 826 return -1; 827 } 828 829 static int mtdswap_choose_gc_tree(struct mtdswap_dev *d, 830 unsigned int background) 831 { 832 int idx; 833 834 if (TREE_NONEMPTY(d, FAILING) && 835 (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY)))) 836 return MTDSWAP_FAILING; 837 838 idx = mtdswap_choose_wl_tree(d); 839 if (idx >= MTDSWAP_CLEAN) 840 return idx; 841 842 return __mtdswap_choose_gc_tree(d); 843 } 844 845 static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d, 846 unsigned int background) 847 { 848 struct rb_root *rp = NULL; 849 struct swap_eb *eb = NULL; 850 int idx; 851 852 if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD && 853 TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING)) 854 return NULL; 855 856 idx = mtdswap_choose_gc_tree(d, background); 857 if (idx < 0) 858 return NULL; 859 860 rp = &d->trees[idx].root; 861 eb = rb_entry(rb_first(rp), struct swap_eb, rb); 862 863 rb_erase(&eb->rb, rp); 864 eb->root = NULL; 865 d->trees[idx].count--; 866 return eb; 867 } 868 869 static unsigned int mtdswap_test_patt(unsigned int i) 870 { 871 return i % 2 ? 0x55555555 : 0xAAAAAAAA; 872 } 873 874 static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d, 875 struct swap_eb *eb) 876 { 877 struct mtd_info *mtd = d->mtd; 878 unsigned int test, i, j, patt, mtd_pages; 879 loff_t base, pos; 880 unsigned int *p1 = (unsigned int *)d->page_buf; 881 unsigned char *p2 = (unsigned char *)d->oob_buf; 882 struct mtd_oob_ops ops; 883 int ret; 884 885 ops.mode = MTD_OPS_AUTO_OOB; 886 ops.len = mtd->writesize; 887 ops.ooblen = mtd->oobavail; 888 ops.ooboffs = 0; 889 ops.datbuf = d->page_buf; 890 ops.oobbuf = d->oob_buf; 891 base = mtdswap_eb_offset(d, eb); 892 mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize; 893 894 for (test = 0; test < 2; test++) { 895 pos = base; 896 for (i = 0; i < mtd_pages; i++) { 897 patt = mtdswap_test_patt(test + i); 898 memset(d->page_buf, patt, mtd->writesize); 899 memset(d->oob_buf, patt, mtd->oobavail); 900 ret = mtd_write_oob(mtd, pos, &ops); 901 if (ret) 902 goto error; 903 904 pos += mtd->writesize; 905 } 906 907 pos = base; 908 for (i = 0; i < mtd_pages; i++) { 909 ret = mtd_read_oob(mtd, pos, &ops); 910 if (ret) 911 goto error; 912 913 patt = mtdswap_test_patt(test + i); 914 for (j = 0; j < mtd->writesize/sizeof(int); j++) 915 if (p1[j] != patt) 916 goto error; 917 918 for (j = 0; j < mtd->oobavail; j++) 919 if (p2[j] != (unsigned char)patt) 920 goto error; 921 922 pos += mtd->writesize; 923 } 924 925 ret = mtdswap_erase_block(d, eb); 926 if (ret) 927 goto error; 928 } 929 930 eb->flags &= ~EBLOCK_READERR; 931 return 1; 932 933 error: 934 mtdswap_handle_badblock(d, eb); 935 return 0; 936 } 937 938 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background) 939 { 940 struct swap_eb *eb; 941 int ret; 942 943 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 944 return 1; 945 946 eb = mtdswap_pick_gc_eblk(d, background); 947 if (!eb) 948 return 1; 949 950 ret = mtdswap_gc_eblock(d, eb); 951 if (ret == -ENOSPC) 952 return 1; 953 954 if (eb->flags & EBLOCK_FAILED) { 955 mtdswap_handle_badblock(d, eb); 956 return 0; 957 } 958 959 eb->flags &= ~EBLOCK_BITFLIP; 960 ret = mtdswap_erase_block(d, eb); 961 if ((eb->flags & EBLOCK_READERR) && 962 (ret || !mtdswap_eblk_passes(d, eb))) 963 return 0; 964 965 if (ret == 0) 966 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN); 967 968 if (ret == 0) 969 mtdswap_rb_add(d, eb, MTDSWAP_CLEAN); 970 else if (ret != -EIO && !mtd_is_eccerr(ret)) 971 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 972 973 return 0; 974 } 975 976 static void mtdswap_background(struct mtd_blktrans_dev *dev) 977 { 978 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 979 int ret; 980 981 while (1) { 982 ret = mtdswap_gc(d, 1); 983 if (ret || mtd_blktrans_cease_background(dev)) 984 return; 985 } 986 } 987 988 static void mtdswap_cleanup(struct mtdswap_dev *d) 989 { 990 vfree(d->eb_data); 991 vfree(d->revmap); 992 vfree(d->page_data); 993 kfree(d->oob_buf); 994 kfree(d->page_buf); 995 } 996 997 static int mtdswap_flush(struct mtd_blktrans_dev *dev) 998 { 999 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1000 1001 mtd_sync(d->mtd); 1002 return 0; 1003 } 1004 1005 static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size) 1006 { 1007 loff_t offset; 1008 unsigned int badcnt; 1009 1010 badcnt = 0; 1011 1012 if (mtd_can_have_bb(mtd)) 1013 for (offset = 0; offset < size; offset += mtd->erasesize) 1014 if (mtd_block_isbad(mtd, offset)) 1015 badcnt++; 1016 1017 return badcnt; 1018 } 1019 1020 static int mtdswap_writesect(struct mtd_blktrans_dev *dev, 1021 unsigned long page, char *buf) 1022 { 1023 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1024 unsigned int newblock, mapped; 1025 struct swap_eb *eb; 1026 int ret; 1027 1028 d->sect_write_count++; 1029 1030 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 1031 return -ENOSPC; 1032 1033 if (header) { 1034 /* Ignore writes to the header page */ 1035 if (unlikely(page == 0)) 1036 return 0; 1037 1038 page--; 1039 } 1040 1041 mapped = d->page_data[page]; 1042 if (mapped <= BLOCK_MAX) { 1043 eb = d->eb_data + (mapped / d->pages_per_eblk); 1044 eb->active_count--; 1045 mtdswap_store_eb(d, eb); 1046 d->page_data[page] = BLOCK_UNDEF; 1047 d->revmap[mapped] = PAGE_UNDEF; 1048 } 1049 1050 ret = mtdswap_write_block(d, buf, page, &newblock, 0); 1051 d->mtd_write_count++; 1052 1053 if (ret < 0) 1054 return ret; 1055 1056 d->page_data[page] = newblock; 1057 1058 return 0; 1059 } 1060 1061 /* Provide a dummy swap header for the kernel */ 1062 static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf) 1063 { 1064 union swap_header *hd = (union swap_header *)(buf); 1065 1066 memset(buf, 0, PAGE_SIZE - 10); 1067 1068 hd->info.version = 1; 1069 hd->info.last_page = d->mbd_dev->size - 1; 1070 hd->info.nr_badpages = 0; 1071 1072 memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10); 1073 1074 return 0; 1075 } 1076 1077 static int mtdswap_readsect(struct mtd_blktrans_dev *dev, 1078 unsigned long page, char *buf) 1079 { 1080 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1081 struct mtd_info *mtd = d->mtd; 1082 unsigned int realblock, retries; 1083 loff_t readpos; 1084 struct swap_eb *eb; 1085 size_t retlen; 1086 int ret; 1087 1088 d->sect_read_count++; 1089 1090 if (header) { 1091 if (unlikely(page == 0)) 1092 return mtdswap_auto_header(d, buf); 1093 1094 page--; 1095 } 1096 1097 realblock = d->page_data[page]; 1098 if (realblock > BLOCK_MAX) { 1099 memset(buf, 0x0, PAGE_SIZE); 1100 if (realblock == BLOCK_UNDEF) 1101 return 0; 1102 else 1103 return -EIO; 1104 } 1105 1106 eb = d->eb_data + (realblock / d->pages_per_eblk); 1107 BUG_ON(d->revmap[realblock] == PAGE_UNDEF); 1108 1109 readpos = (loff_t)realblock << PAGE_SHIFT; 1110 retries = 0; 1111 1112 retry: 1113 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf); 1114 1115 d->mtd_read_count++; 1116 if (mtd_is_bitflip(ret)) { 1117 eb->flags |= EBLOCK_BITFLIP; 1118 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 1119 ret = 0; 1120 } 1121 1122 if (ret < 0) { 1123 dev_err(d->dev, "Read error %d\n", ret); 1124 eb->flags |= EBLOCK_READERR; 1125 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 1126 retries++; 1127 if (retries < MTDSWAP_IO_RETRIES) 1128 goto retry; 1129 1130 return ret; 1131 } 1132 1133 if (retlen != PAGE_SIZE) { 1134 dev_err(d->dev, "Short read %zd\n", retlen); 1135 return -EIO; 1136 } 1137 1138 return 0; 1139 } 1140 1141 static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first, 1142 unsigned nr_pages) 1143 { 1144 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1145 unsigned long page; 1146 struct swap_eb *eb; 1147 unsigned int mapped; 1148 1149 d->discard_count++; 1150 1151 for (page = first; page < first + nr_pages; page++) { 1152 mapped = d->page_data[page]; 1153 if (mapped <= BLOCK_MAX) { 1154 eb = d->eb_data + (mapped / d->pages_per_eblk); 1155 eb->active_count--; 1156 mtdswap_store_eb(d, eb); 1157 d->page_data[page] = BLOCK_UNDEF; 1158 d->revmap[mapped] = PAGE_UNDEF; 1159 d->discard_page_count++; 1160 } else if (mapped == BLOCK_ERROR) { 1161 d->page_data[page] = BLOCK_UNDEF; 1162 d->discard_page_count++; 1163 } 1164 } 1165 1166 return 0; 1167 } 1168 1169 static int mtdswap_show(struct seq_file *s, void *data) 1170 { 1171 struct mtdswap_dev *d = (struct mtdswap_dev *) s->private; 1172 unsigned long sum; 1173 unsigned int count[MTDSWAP_TREE_CNT]; 1174 unsigned int min[MTDSWAP_TREE_CNT]; 1175 unsigned int max[MTDSWAP_TREE_CNT]; 1176 unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages; 1177 uint64_t use_size; 1178 static const char * const name[] = { 1179 "clean", "used", "low", "high", "dirty", "bitflip", "failing" 1180 }; 1181 1182 mutex_lock(&d->mbd_dev->lock); 1183 1184 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1185 struct rb_root *root = &d->trees[i].root; 1186 1187 if (root->rb_node) { 1188 count[i] = d->trees[i].count; 1189 min[i] = MTDSWAP_ECNT_MIN(root); 1190 max[i] = MTDSWAP_ECNT_MAX(root); 1191 } else 1192 count[i] = 0; 1193 } 1194 1195 if (d->curr_write) { 1196 cw = 1; 1197 cwp = d->curr_write_pos; 1198 cwecount = d->curr_write->erase_count; 1199 } 1200 1201 sum = 0; 1202 for (i = 0; i < d->eblks; i++) 1203 sum += d->eb_data[i].erase_count; 1204 1205 use_size = (uint64_t)d->eblks * d->mtd->erasesize; 1206 bb_cnt = mtdswap_badblocks(d->mtd, use_size); 1207 1208 mapped = 0; 1209 pages = d->mbd_dev->size; 1210 for (i = 0; i < pages; i++) 1211 if (d->page_data[i] != BLOCK_UNDEF) 1212 mapped++; 1213 1214 mutex_unlock(&d->mbd_dev->lock); 1215 1216 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1217 if (!count[i]) 1218 continue; 1219 1220 if (min[i] != max[i]) 1221 seq_printf(s, "%s:\t%5d erase blocks, erased min %d, " 1222 "max %d times\n", 1223 name[i], count[i], min[i], max[i]); 1224 else 1225 seq_printf(s, "%s:\t%5d erase blocks, all erased %d " 1226 "times\n", name[i], count[i], min[i]); 1227 } 1228 1229 if (bb_cnt) 1230 seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt); 1231 1232 if (cw) 1233 seq_printf(s, "current erase block: %u pages used, %u free, " 1234 "erased %u times\n", 1235 cwp, d->pages_per_eblk - cwp, cwecount); 1236 1237 seq_printf(s, "total erasures: %lu\n", sum); 1238 1239 seq_puts(s, "\n"); 1240 1241 seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count); 1242 seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count); 1243 seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count); 1244 seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count); 1245 seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count); 1246 seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count); 1247 1248 seq_puts(s, "\n"); 1249 seq_printf(s, "total pages: %u\n", pages); 1250 seq_printf(s, "pages mapped: %u\n", mapped); 1251 1252 return 0; 1253 } 1254 DEFINE_SHOW_ATTRIBUTE(mtdswap); 1255 1256 static int mtdswap_add_debugfs(struct mtdswap_dev *d) 1257 { 1258 struct dentry *root = d->mtd->dbg.dfs_dir; 1259 1260 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 1261 return 0; 1262 1263 if (IS_ERR_OR_NULL(root)) 1264 return -1; 1265 1266 debugfs_create_file("mtdswap_stats", S_IRUSR, root, d, &mtdswap_fops); 1267 1268 return 0; 1269 } 1270 1271 static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks, 1272 unsigned int spare_cnt) 1273 { 1274 struct mtd_info *mtd = d->mbd_dev->mtd; 1275 unsigned int i, eblk_bytes, pages, blocks; 1276 int ret = -ENOMEM; 1277 1278 d->mtd = mtd; 1279 d->eblks = eblocks; 1280 d->spare_eblks = spare_cnt; 1281 d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT; 1282 1283 pages = d->mbd_dev->size; 1284 blocks = eblocks * d->pages_per_eblk; 1285 1286 for (i = 0; i < MTDSWAP_TREE_CNT; i++) 1287 d->trees[i].root = RB_ROOT; 1288 1289 d->page_data = vmalloc(array_size(pages, sizeof(int))); 1290 if (!d->page_data) 1291 goto page_data_fail; 1292 1293 d->revmap = vmalloc(array_size(blocks, sizeof(int))); 1294 if (!d->revmap) 1295 goto revmap_fail; 1296 1297 eblk_bytes = sizeof(struct swap_eb)*d->eblks; 1298 d->eb_data = vzalloc(eblk_bytes); 1299 if (!d->eb_data) 1300 goto eb_data_fail; 1301 1302 for (i = 0; i < pages; i++) 1303 d->page_data[i] = BLOCK_UNDEF; 1304 1305 for (i = 0; i < blocks; i++) 1306 d->revmap[i] = PAGE_UNDEF; 1307 1308 d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1309 if (!d->page_buf) 1310 goto page_buf_fail; 1311 1312 d->oob_buf = kmalloc_array(2, mtd->oobavail, GFP_KERNEL); 1313 if (!d->oob_buf) 1314 goto oob_buf_fail; 1315 1316 mtdswap_scan_eblks(d); 1317 1318 return 0; 1319 1320 oob_buf_fail: 1321 kfree(d->page_buf); 1322 page_buf_fail: 1323 vfree(d->eb_data); 1324 eb_data_fail: 1325 vfree(d->revmap); 1326 revmap_fail: 1327 vfree(d->page_data); 1328 page_data_fail: 1329 printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret); 1330 return ret; 1331 } 1332 1333 static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) 1334 { 1335 struct mtdswap_dev *d; 1336 struct mtd_blktrans_dev *mbd_dev; 1337 char *parts; 1338 char *this_opt; 1339 unsigned long part; 1340 unsigned int eblocks, eavailable, bad_blocks, spare_cnt; 1341 uint64_t swap_size, use_size, size_limit; 1342 int ret; 1343 1344 parts = &partitions[0]; 1345 if (!*parts) 1346 return; 1347 1348 while ((this_opt = strsep(&parts, ",")) != NULL) { 1349 if (kstrtoul(this_opt, 0, &part) < 0) 1350 return; 1351 1352 if (mtd->index == part) 1353 break; 1354 } 1355 1356 if (mtd->index != part) 1357 return; 1358 1359 if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) { 1360 printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE " 1361 "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE); 1362 return; 1363 } 1364 1365 if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) { 1366 printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size" 1367 " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize); 1368 return; 1369 } 1370 1371 if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) { 1372 printk(KERN_ERR "%s: Not enough free bytes in OOB, " 1373 "%d available, %zu needed.\n", 1374 MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE); 1375 return; 1376 } 1377 1378 if (spare_eblocks > 100) 1379 spare_eblocks = 100; 1380 1381 use_size = mtd->size; 1382 size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE; 1383 1384 if (mtd->size > size_limit) { 1385 printk(KERN_WARNING "%s: Device too large. Limiting size to " 1386 "%llu bytes\n", MTDSWAP_PREFIX, size_limit); 1387 use_size = size_limit; 1388 } 1389 1390 eblocks = mtd_div_by_eb(use_size, mtd); 1391 use_size = (uint64_t)eblocks * mtd->erasesize; 1392 bad_blocks = mtdswap_badblocks(mtd, use_size); 1393 eavailable = eblocks - bad_blocks; 1394 1395 if (eavailable < MIN_ERASE_BLOCKS) { 1396 printk(KERN_ERR "%s: Not enough erase blocks. %u available, " 1397 "%d needed\n", MTDSWAP_PREFIX, eavailable, 1398 MIN_ERASE_BLOCKS); 1399 return; 1400 } 1401 1402 spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100); 1403 1404 if (spare_cnt < MIN_SPARE_EBLOCKS) 1405 spare_cnt = MIN_SPARE_EBLOCKS; 1406 1407 if (spare_cnt > eavailable - 1) 1408 spare_cnt = eavailable - 1; 1409 1410 swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize + 1411 (header ? PAGE_SIZE : 0); 1412 1413 printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, " 1414 "%u spare, %u bad blocks\n", 1415 MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks); 1416 1417 d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL); 1418 if (!d) 1419 return; 1420 1421 mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL); 1422 if (!mbd_dev) { 1423 kfree(d); 1424 return; 1425 } 1426 1427 d->mbd_dev = mbd_dev; 1428 mbd_dev->priv = d; 1429 1430 mbd_dev->mtd = mtd; 1431 mbd_dev->devnum = mtd->index; 1432 mbd_dev->size = swap_size >> PAGE_SHIFT; 1433 mbd_dev->tr = tr; 1434 1435 if (!(mtd->flags & MTD_WRITEABLE)) 1436 mbd_dev->readonly = 1; 1437 1438 if (mtdswap_init(d, eblocks, spare_cnt) < 0) 1439 goto init_failed; 1440 1441 if (add_mtd_blktrans_dev(mbd_dev) < 0) 1442 goto cleanup; 1443 1444 d->dev = disk_to_dev(mbd_dev->disk); 1445 1446 ret = mtdswap_add_debugfs(d); 1447 if (ret < 0) 1448 goto debugfs_failed; 1449 1450 return; 1451 1452 debugfs_failed: 1453 del_mtd_blktrans_dev(mbd_dev); 1454 1455 cleanup: 1456 mtdswap_cleanup(d); 1457 1458 init_failed: 1459 kfree(mbd_dev); 1460 kfree(d); 1461 } 1462 1463 static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev) 1464 { 1465 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1466 1467 del_mtd_blktrans_dev(dev); 1468 mtdswap_cleanup(d); 1469 kfree(d); 1470 } 1471 1472 static struct mtd_blktrans_ops mtdswap_ops = { 1473 .name = "mtdswap", 1474 .major = 0, 1475 .part_bits = 0, 1476 .blksize = PAGE_SIZE, 1477 .flush = mtdswap_flush, 1478 .readsect = mtdswap_readsect, 1479 .writesect = mtdswap_writesect, 1480 .discard = mtdswap_discard, 1481 .background = mtdswap_background, 1482 .add_mtd = mtdswap_add_mtd, 1483 .remove_dev = mtdswap_remove_dev, 1484 .owner = THIS_MODULE, 1485 }; 1486 1487 static int __init mtdswap_modinit(void) 1488 { 1489 return register_mtd_blktrans(&mtdswap_ops); 1490 } 1491 1492 static void __exit mtdswap_modexit(void) 1493 { 1494 deregister_mtd_blktrans(&mtdswap_ops); 1495 } 1496 1497 module_init(mtdswap_modinit); 1498 module_exit(mtdswap_modexit); 1499 1500 1501 MODULE_LICENSE("GPL"); 1502 MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>"); 1503 MODULE_DESCRIPTION("Block device access to an MTD suitable for using as " 1504 "swap space"); 1505