1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Swap block device support for MTDs 4 * Turns an MTD device into a swap device with block wear leveling 5 * 6 * Copyright © 2007,2011 Nokia Corporation. All rights reserved. 7 * 8 * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com> 9 * 10 * Based on Richard Purdie's earlier implementation in 2007. Background 11 * support and lock-less operation written by Adrian Hunter. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/mtd/mtd.h> 17 #include <linux/mtd/blktrans.h> 18 #include <linux/rbtree.h> 19 #include <linux/sched.h> 20 #include <linux/slab.h> 21 #include <linux/vmalloc.h> 22 #include <linux/genhd.h> 23 #include <linux/swap.h> 24 #include <linux/debugfs.h> 25 #include <linux/seq_file.h> 26 #include <linux/device.h> 27 #include <linux/math64.h> 28 29 #define MTDSWAP_PREFIX "mtdswap" 30 31 /* 32 * The number of free eraseblocks when GC should stop 33 */ 34 #define CLEAN_BLOCK_THRESHOLD 20 35 36 /* 37 * Number of free eraseblocks below which GC can also collect low frag 38 * blocks. 39 */ 40 #define LOW_FRAG_GC_THRESHOLD 5 41 42 /* 43 * Wear level cost amortization. We want to do wear leveling on the background 44 * without disturbing gc too much. This is made by defining max GC frequency. 45 * Frequency value 6 means 1/6 of the GC passes will pick an erase block based 46 * on the biggest wear difference rather than the biggest dirtiness. 47 * 48 * The lower freq2 should be chosen so that it makes sure the maximum erase 49 * difference will decrease even if a malicious application is deliberately 50 * trying to make erase differences large. 51 */ 52 #define MAX_ERASE_DIFF 4000 53 #define COLLECT_NONDIRTY_BASE MAX_ERASE_DIFF 54 #define COLLECT_NONDIRTY_FREQ1 6 55 #define COLLECT_NONDIRTY_FREQ2 4 56 57 #define PAGE_UNDEF UINT_MAX 58 #define BLOCK_UNDEF UINT_MAX 59 #define BLOCK_ERROR (UINT_MAX - 1) 60 #define BLOCK_MAX (UINT_MAX - 2) 61 62 #define EBLOCK_BAD (1 << 0) 63 #define EBLOCK_NOMAGIC (1 << 1) 64 #define EBLOCK_BITFLIP (1 << 2) 65 #define EBLOCK_FAILED (1 << 3) 66 #define EBLOCK_READERR (1 << 4) 67 #define EBLOCK_IDX_SHIFT 5 68 69 struct swap_eb { 70 struct rb_node rb; 71 struct rb_root *root; 72 73 unsigned int flags; 74 unsigned int active_count; 75 unsigned int erase_count; 76 unsigned int pad; /* speeds up pointer decrement */ 77 }; 78 79 #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \ 80 rb)->erase_count) 81 #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \ 82 rb)->erase_count) 83 84 struct mtdswap_tree { 85 struct rb_root root; 86 unsigned int count; 87 }; 88 89 enum { 90 MTDSWAP_CLEAN, 91 MTDSWAP_USED, 92 MTDSWAP_LOWFRAG, 93 MTDSWAP_HIFRAG, 94 MTDSWAP_DIRTY, 95 MTDSWAP_BITFLIP, 96 MTDSWAP_FAILING, 97 MTDSWAP_TREE_CNT, 98 }; 99 100 struct mtdswap_dev { 101 struct mtd_blktrans_dev *mbd_dev; 102 struct mtd_info *mtd; 103 struct device *dev; 104 105 unsigned int *page_data; 106 unsigned int *revmap; 107 108 unsigned int eblks; 109 unsigned int spare_eblks; 110 unsigned int pages_per_eblk; 111 unsigned int max_erase_count; 112 struct swap_eb *eb_data; 113 114 struct mtdswap_tree trees[MTDSWAP_TREE_CNT]; 115 116 unsigned long long sect_read_count; 117 unsigned long long sect_write_count; 118 unsigned long long mtd_write_count; 119 unsigned long long mtd_read_count; 120 unsigned long long discard_count; 121 unsigned long long discard_page_count; 122 123 unsigned int curr_write_pos; 124 struct swap_eb *curr_write; 125 126 char *page_buf; 127 char *oob_buf; 128 }; 129 130 struct mtdswap_oobdata { 131 __le16 magic; 132 __le32 count; 133 } __packed; 134 135 #define MTDSWAP_MAGIC_CLEAN 0x2095 136 #define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1) 137 #define MTDSWAP_TYPE_CLEAN 0 138 #define MTDSWAP_TYPE_DIRTY 1 139 #define MTDSWAP_OOBSIZE sizeof(struct mtdswap_oobdata) 140 141 #define MTDSWAP_ERASE_RETRIES 3 /* Before marking erase block bad */ 142 #define MTDSWAP_IO_RETRIES 3 143 144 enum { 145 MTDSWAP_SCANNED_CLEAN, 146 MTDSWAP_SCANNED_DIRTY, 147 MTDSWAP_SCANNED_BITFLIP, 148 MTDSWAP_SCANNED_BAD, 149 }; 150 151 /* 152 * In the worst case mtdswap_writesect() has allocated the last clean 153 * page from the current block and is then pre-empted by the GC 154 * thread. The thread can consume a full erase block when moving a 155 * block. 156 */ 157 #define MIN_SPARE_EBLOCKS 2 158 #define MIN_ERASE_BLOCKS (MIN_SPARE_EBLOCKS + 1) 159 160 #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root) 161 #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL) 162 #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name)) 163 #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count) 164 165 #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv) 166 167 static char partitions[128] = ""; 168 module_param_string(partitions, partitions, sizeof(partitions), 0444); 169 MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap " 170 "partitions=\"1,3,5\""); 171 172 static unsigned int spare_eblocks = 10; 173 module_param(spare_eblocks, uint, 0444); 174 MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for " 175 "garbage collection (default 10%)"); 176 177 static bool header; /* false */ 178 module_param(header, bool, 0444); 179 MODULE_PARM_DESC(header, 180 "Include builtin swap header (default 0, without header)"); 181 182 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background); 183 184 static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb) 185 { 186 return (loff_t)(eb - d->eb_data) * d->mtd->erasesize; 187 } 188 189 static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb) 190 { 191 unsigned int oldidx; 192 struct mtdswap_tree *tp; 193 194 if (eb->root) { 195 tp = container_of(eb->root, struct mtdswap_tree, root); 196 oldidx = tp - &d->trees[0]; 197 198 d->trees[oldidx].count--; 199 rb_erase(&eb->rb, eb->root); 200 } 201 } 202 203 static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb) 204 { 205 struct rb_node **p, *parent = NULL; 206 struct swap_eb *cur; 207 208 p = &root->rb_node; 209 while (*p) { 210 parent = *p; 211 cur = rb_entry(parent, struct swap_eb, rb); 212 if (eb->erase_count > cur->erase_count) 213 p = &(*p)->rb_right; 214 else 215 p = &(*p)->rb_left; 216 } 217 218 rb_link_node(&eb->rb, parent, p); 219 rb_insert_color(&eb->rb, root); 220 } 221 222 static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx) 223 { 224 struct rb_root *root; 225 226 if (eb->root == &d->trees[idx].root) 227 return; 228 229 mtdswap_eb_detach(d, eb); 230 root = &d->trees[idx].root; 231 __mtdswap_rb_add(root, eb); 232 eb->root = root; 233 d->trees[idx].count++; 234 } 235 236 static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx) 237 { 238 struct rb_node *p; 239 unsigned int i; 240 241 p = rb_first(root); 242 i = 0; 243 while (i < idx && p) { 244 p = rb_next(p); 245 i++; 246 } 247 248 return p; 249 } 250 251 static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb) 252 { 253 int ret; 254 loff_t offset; 255 256 d->spare_eblks--; 257 eb->flags |= EBLOCK_BAD; 258 mtdswap_eb_detach(d, eb); 259 eb->root = NULL; 260 261 /* badblocks not supported */ 262 if (!mtd_can_have_bb(d->mtd)) 263 return 1; 264 265 offset = mtdswap_eb_offset(d, eb); 266 dev_warn(d->dev, "Marking bad block at %08llx\n", offset); 267 ret = mtd_block_markbad(d->mtd, offset); 268 269 if (ret) { 270 dev_warn(d->dev, "Mark block bad failed for block at %08llx " 271 "error %d\n", offset, ret); 272 return ret; 273 } 274 275 return 1; 276 277 } 278 279 static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb) 280 { 281 unsigned int marked = eb->flags & EBLOCK_FAILED; 282 struct swap_eb *curr_write = d->curr_write; 283 284 eb->flags |= EBLOCK_FAILED; 285 if (curr_write == eb) { 286 d->curr_write = NULL; 287 288 if (!marked && d->curr_write_pos != 0) { 289 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 290 return 0; 291 } 292 } 293 294 return mtdswap_handle_badblock(d, eb); 295 } 296 297 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from, 298 struct mtd_oob_ops *ops) 299 { 300 int ret = mtd_read_oob(d->mtd, from, ops); 301 302 if (mtd_is_bitflip(ret)) 303 return ret; 304 305 if (ret) { 306 dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n", 307 ret, from); 308 return ret; 309 } 310 311 if (ops->oobretlen < ops->ooblen) { 312 dev_warn(d->dev, "Read OOB return short read (%zd bytes not " 313 "%zd) for block at %08llx\n", 314 ops->oobretlen, ops->ooblen, from); 315 return -EIO; 316 } 317 318 return 0; 319 } 320 321 static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb) 322 { 323 struct mtdswap_oobdata *data, *data2; 324 int ret; 325 loff_t offset; 326 struct mtd_oob_ops ops; 327 328 offset = mtdswap_eb_offset(d, eb); 329 330 /* Check first if the block is bad. */ 331 if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset)) 332 return MTDSWAP_SCANNED_BAD; 333 334 ops.ooblen = 2 * d->mtd->oobavail; 335 ops.oobbuf = d->oob_buf; 336 ops.ooboffs = 0; 337 ops.datbuf = NULL; 338 ops.mode = MTD_OPS_AUTO_OOB; 339 340 ret = mtdswap_read_oob(d, offset, &ops); 341 342 if (ret && !mtd_is_bitflip(ret)) 343 return ret; 344 345 data = (struct mtdswap_oobdata *)d->oob_buf; 346 data2 = (struct mtdswap_oobdata *) 347 (d->oob_buf + d->mtd->oobavail); 348 349 if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) { 350 eb->erase_count = le32_to_cpu(data->count); 351 if (mtd_is_bitflip(ret)) 352 ret = MTDSWAP_SCANNED_BITFLIP; 353 else { 354 if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY) 355 ret = MTDSWAP_SCANNED_DIRTY; 356 else 357 ret = MTDSWAP_SCANNED_CLEAN; 358 } 359 } else { 360 eb->flags |= EBLOCK_NOMAGIC; 361 ret = MTDSWAP_SCANNED_DIRTY; 362 } 363 364 return ret; 365 } 366 367 static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb, 368 u16 marker) 369 { 370 struct mtdswap_oobdata n; 371 int ret; 372 loff_t offset; 373 struct mtd_oob_ops ops; 374 375 ops.ooboffs = 0; 376 ops.oobbuf = (uint8_t *)&n; 377 ops.mode = MTD_OPS_AUTO_OOB; 378 ops.datbuf = NULL; 379 380 if (marker == MTDSWAP_TYPE_CLEAN) { 381 n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN); 382 n.count = cpu_to_le32(eb->erase_count); 383 ops.ooblen = MTDSWAP_OOBSIZE; 384 offset = mtdswap_eb_offset(d, eb); 385 } else { 386 n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY); 387 ops.ooblen = sizeof(n.magic); 388 offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize; 389 } 390 391 ret = mtd_write_oob(d->mtd, offset, &ops); 392 393 if (ret) { 394 dev_warn(d->dev, "Write OOB failed for block at %08llx " 395 "error %d\n", offset, ret); 396 if (ret == -EIO || mtd_is_eccerr(ret)) 397 mtdswap_handle_write_error(d, eb); 398 return ret; 399 } 400 401 if (ops.oobretlen != ops.ooblen) { 402 dev_warn(d->dev, "Short OOB write for block at %08llx: " 403 "%zd not %zd\n", 404 offset, ops.oobretlen, ops.ooblen); 405 return ret; 406 } 407 408 return 0; 409 } 410 411 /* 412 * Are there any erase blocks without MAGIC_CLEAN header, presumably 413 * because power was cut off after erase but before header write? We 414 * need to guestimate the erase count. 415 */ 416 static void mtdswap_check_counts(struct mtdswap_dev *d) 417 { 418 struct rb_root hist_root = RB_ROOT; 419 struct rb_node *medrb; 420 struct swap_eb *eb; 421 unsigned int i, cnt, median; 422 423 cnt = 0; 424 for (i = 0; i < d->eblks; i++) { 425 eb = d->eb_data + i; 426 427 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 428 continue; 429 430 __mtdswap_rb_add(&hist_root, eb); 431 cnt++; 432 } 433 434 if (cnt == 0) 435 return; 436 437 medrb = mtdswap_rb_index(&hist_root, cnt / 2); 438 median = rb_entry(medrb, struct swap_eb, rb)->erase_count; 439 440 d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root); 441 442 for (i = 0; i < d->eblks; i++) { 443 eb = d->eb_data + i; 444 445 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR)) 446 eb->erase_count = median; 447 448 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 449 continue; 450 451 rb_erase(&eb->rb, &hist_root); 452 } 453 } 454 455 static void mtdswap_scan_eblks(struct mtdswap_dev *d) 456 { 457 int status; 458 unsigned int i, idx; 459 struct swap_eb *eb; 460 461 for (i = 0; i < d->eblks; i++) { 462 eb = d->eb_data + i; 463 464 status = mtdswap_read_markers(d, eb); 465 if (status < 0) 466 eb->flags |= EBLOCK_READERR; 467 else if (status == MTDSWAP_SCANNED_BAD) { 468 eb->flags |= EBLOCK_BAD; 469 continue; 470 } 471 472 switch (status) { 473 case MTDSWAP_SCANNED_CLEAN: 474 idx = MTDSWAP_CLEAN; 475 break; 476 case MTDSWAP_SCANNED_DIRTY: 477 case MTDSWAP_SCANNED_BITFLIP: 478 idx = MTDSWAP_DIRTY; 479 break; 480 default: 481 idx = MTDSWAP_FAILING; 482 } 483 484 eb->flags |= (idx << EBLOCK_IDX_SHIFT); 485 } 486 487 mtdswap_check_counts(d); 488 489 for (i = 0; i < d->eblks; i++) { 490 eb = d->eb_data + i; 491 492 if (eb->flags & EBLOCK_BAD) 493 continue; 494 495 idx = eb->flags >> EBLOCK_IDX_SHIFT; 496 mtdswap_rb_add(d, eb, idx); 497 } 498 } 499 500 /* 501 * Place eblk into a tree corresponding to its number of active blocks 502 * it contains. 503 */ 504 static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb) 505 { 506 unsigned int weight = eb->active_count; 507 unsigned int maxweight = d->pages_per_eblk; 508 509 if (eb == d->curr_write) 510 return; 511 512 if (eb->flags & EBLOCK_BITFLIP) 513 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 514 else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED)) 515 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 516 if (weight == maxweight) 517 mtdswap_rb_add(d, eb, MTDSWAP_USED); 518 else if (weight == 0) 519 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 520 else if (weight > (maxweight/2)) 521 mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG); 522 else 523 mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG); 524 } 525 526 static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb) 527 { 528 struct mtd_info *mtd = d->mtd; 529 struct erase_info erase; 530 unsigned int retries = 0; 531 int ret; 532 533 eb->erase_count++; 534 if (eb->erase_count > d->max_erase_count) 535 d->max_erase_count = eb->erase_count; 536 537 retry: 538 memset(&erase, 0, sizeof(struct erase_info)); 539 erase.addr = mtdswap_eb_offset(d, eb); 540 erase.len = mtd->erasesize; 541 542 ret = mtd_erase(mtd, &erase); 543 if (ret) { 544 if (retries++ < MTDSWAP_ERASE_RETRIES) { 545 dev_warn(d->dev, 546 "erase of erase block %#llx on %s failed", 547 erase.addr, mtd->name); 548 yield(); 549 goto retry; 550 } 551 552 dev_err(d->dev, "Cannot erase erase block %#llx on %s\n", 553 erase.addr, mtd->name); 554 555 mtdswap_handle_badblock(d, eb); 556 return -EIO; 557 } 558 559 return 0; 560 } 561 562 static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page, 563 unsigned int *block) 564 { 565 int ret; 566 struct swap_eb *old_eb = d->curr_write; 567 struct rb_root *clean_root; 568 struct swap_eb *eb; 569 570 if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) { 571 do { 572 if (TREE_EMPTY(d, CLEAN)) 573 return -ENOSPC; 574 575 clean_root = TREE_ROOT(d, CLEAN); 576 eb = rb_entry(rb_first(clean_root), struct swap_eb, rb); 577 rb_erase(&eb->rb, clean_root); 578 eb->root = NULL; 579 TREE_COUNT(d, CLEAN)--; 580 581 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY); 582 } while (ret == -EIO || mtd_is_eccerr(ret)); 583 584 if (ret) 585 return ret; 586 587 d->curr_write_pos = 0; 588 d->curr_write = eb; 589 if (old_eb) 590 mtdswap_store_eb(d, old_eb); 591 } 592 593 *block = (d->curr_write - d->eb_data) * d->pages_per_eblk + 594 d->curr_write_pos; 595 596 d->curr_write->active_count++; 597 d->revmap[*block] = page; 598 d->curr_write_pos++; 599 600 return 0; 601 } 602 603 static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d) 604 { 605 return TREE_COUNT(d, CLEAN) * d->pages_per_eblk + 606 d->pages_per_eblk - d->curr_write_pos; 607 } 608 609 static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d) 610 { 611 return mtdswap_free_page_cnt(d) > d->pages_per_eblk; 612 } 613 614 static int mtdswap_write_block(struct mtdswap_dev *d, char *buf, 615 unsigned int page, unsigned int *bp, int gc_context) 616 { 617 struct mtd_info *mtd = d->mtd; 618 struct swap_eb *eb; 619 size_t retlen; 620 loff_t writepos; 621 int ret; 622 623 retry: 624 if (!gc_context) 625 while (!mtdswap_enough_free_pages(d)) 626 if (mtdswap_gc(d, 0) > 0) 627 return -ENOSPC; 628 629 ret = mtdswap_map_free_block(d, page, bp); 630 eb = d->eb_data + (*bp / d->pages_per_eblk); 631 632 if (ret == -EIO || mtd_is_eccerr(ret)) { 633 d->curr_write = NULL; 634 eb->active_count--; 635 d->revmap[*bp] = PAGE_UNDEF; 636 goto retry; 637 } 638 639 if (ret < 0) 640 return ret; 641 642 writepos = (loff_t)*bp << PAGE_SHIFT; 643 ret = mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf); 644 if (ret == -EIO || mtd_is_eccerr(ret)) { 645 d->curr_write_pos--; 646 eb->active_count--; 647 d->revmap[*bp] = PAGE_UNDEF; 648 mtdswap_handle_write_error(d, eb); 649 goto retry; 650 } 651 652 if (ret < 0) { 653 dev_err(d->dev, "Write to MTD device failed: %d (%zd written)", 654 ret, retlen); 655 goto err; 656 } 657 658 if (retlen != PAGE_SIZE) { 659 dev_err(d->dev, "Short write to MTD device: %zd written", 660 retlen); 661 ret = -EIO; 662 goto err; 663 } 664 665 return ret; 666 667 err: 668 d->curr_write_pos--; 669 eb->active_count--; 670 d->revmap[*bp] = PAGE_UNDEF; 671 672 return ret; 673 } 674 675 static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock, 676 unsigned int *newblock) 677 { 678 struct mtd_info *mtd = d->mtd; 679 struct swap_eb *eb, *oldeb; 680 int ret; 681 size_t retlen; 682 unsigned int page, retries; 683 loff_t readpos; 684 685 page = d->revmap[oldblock]; 686 readpos = (loff_t) oldblock << PAGE_SHIFT; 687 retries = 0; 688 689 retry: 690 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf); 691 692 if (ret < 0 && !mtd_is_bitflip(ret)) { 693 oldeb = d->eb_data + oldblock / d->pages_per_eblk; 694 oldeb->flags |= EBLOCK_READERR; 695 696 dev_err(d->dev, "Read Error: %d (block %u)\n", ret, 697 oldblock); 698 retries++; 699 if (retries < MTDSWAP_IO_RETRIES) 700 goto retry; 701 702 goto read_error; 703 } 704 705 if (retlen != PAGE_SIZE) { 706 dev_err(d->dev, "Short read: %zd (block %u)\n", retlen, 707 oldblock); 708 ret = -EIO; 709 goto read_error; 710 } 711 712 ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1); 713 if (ret < 0) { 714 d->page_data[page] = BLOCK_ERROR; 715 dev_err(d->dev, "Write error: %d\n", ret); 716 return ret; 717 } 718 719 eb = d->eb_data + *newblock / d->pages_per_eblk; 720 d->page_data[page] = *newblock; 721 d->revmap[oldblock] = PAGE_UNDEF; 722 eb = d->eb_data + oldblock / d->pages_per_eblk; 723 eb->active_count--; 724 725 return 0; 726 727 read_error: 728 d->page_data[page] = BLOCK_ERROR; 729 d->revmap[oldblock] = PAGE_UNDEF; 730 return ret; 731 } 732 733 static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb) 734 { 735 unsigned int i, block, eblk_base, newblock; 736 int ret, errcode; 737 738 errcode = 0; 739 eblk_base = (eb - d->eb_data) * d->pages_per_eblk; 740 741 for (i = 0; i < d->pages_per_eblk; i++) { 742 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 743 return -ENOSPC; 744 745 block = eblk_base + i; 746 if (d->revmap[block] == PAGE_UNDEF) 747 continue; 748 749 ret = mtdswap_move_block(d, block, &newblock); 750 if (ret < 0 && !errcode) 751 errcode = ret; 752 } 753 754 return errcode; 755 } 756 757 static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d) 758 { 759 int idx, stopat; 760 761 if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_THRESHOLD) 762 stopat = MTDSWAP_LOWFRAG; 763 else 764 stopat = MTDSWAP_HIFRAG; 765 766 for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--) 767 if (d->trees[idx].root.rb_node != NULL) 768 return idx; 769 770 return -1; 771 } 772 773 static int mtdswap_wlfreq(unsigned int maxdiff) 774 { 775 unsigned int h, x, y, dist, base; 776 777 /* 778 * Calculate linear ramp down from f1 to f2 when maxdiff goes from 779 * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE. Similar 780 * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE. 781 */ 782 783 dist = maxdiff - MAX_ERASE_DIFF; 784 if (dist > COLLECT_NONDIRTY_BASE) 785 dist = COLLECT_NONDIRTY_BASE; 786 787 /* 788 * Modelling the slop as right angular triangle with base 789 * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is 790 * equal to the ratio h/base. 791 */ 792 h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2; 793 base = COLLECT_NONDIRTY_BASE; 794 795 x = dist - base; 796 y = (x * h + base / 2) / base; 797 798 return COLLECT_NONDIRTY_FREQ2 + y; 799 } 800 801 static int mtdswap_choose_wl_tree(struct mtdswap_dev *d) 802 { 803 static unsigned int pick_cnt; 804 unsigned int i, idx = -1, wear, max; 805 struct rb_root *root; 806 807 max = 0; 808 for (i = 0; i <= MTDSWAP_DIRTY; i++) { 809 root = &d->trees[i].root; 810 if (root->rb_node == NULL) 811 continue; 812 813 wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root); 814 if (wear > max) { 815 max = wear; 816 idx = i; 817 } 818 } 819 820 if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) { 821 pick_cnt = 0; 822 return idx; 823 } 824 825 pick_cnt++; 826 return -1; 827 } 828 829 static int mtdswap_choose_gc_tree(struct mtdswap_dev *d, 830 unsigned int background) 831 { 832 int idx; 833 834 if (TREE_NONEMPTY(d, FAILING) && 835 (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY)))) 836 return MTDSWAP_FAILING; 837 838 idx = mtdswap_choose_wl_tree(d); 839 if (idx >= MTDSWAP_CLEAN) 840 return idx; 841 842 return __mtdswap_choose_gc_tree(d); 843 } 844 845 static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d, 846 unsigned int background) 847 { 848 struct rb_root *rp = NULL; 849 struct swap_eb *eb = NULL; 850 int idx; 851 852 if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD && 853 TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING)) 854 return NULL; 855 856 idx = mtdswap_choose_gc_tree(d, background); 857 if (idx < 0) 858 return NULL; 859 860 rp = &d->trees[idx].root; 861 eb = rb_entry(rb_first(rp), struct swap_eb, rb); 862 863 rb_erase(&eb->rb, rp); 864 eb->root = NULL; 865 d->trees[idx].count--; 866 return eb; 867 } 868 869 static unsigned int mtdswap_test_patt(unsigned int i) 870 { 871 return i % 2 ? 0x55555555 : 0xAAAAAAAA; 872 } 873 874 static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d, 875 struct swap_eb *eb) 876 { 877 struct mtd_info *mtd = d->mtd; 878 unsigned int test, i, j, patt, mtd_pages; 879 loff_t base, pos; 880 unsigned int *p1 = (unsigned int *)d->page_buf; 881 unsigned char *p2 = (unsigned char *)d->oob_buf; 882 struct mtd_oob_ops ops; 883 int ret; 884 885 ops.mode = MTD_OPS_AUTO_OOB; 886 ops.len = mtd->writesize; 887 ops.ooblen = mtd->oobavail; 888 ops.ooboffs = 0; 889 ops.datbuf = d->page_buf; 890 ops.oobbuf = d->oob_buf; 891 base = mtdswap_eb_offset(d, eb); 892 mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize; 893 894 for (test = 0; test < 2; test++) { 895 pos = base; 896 for (i = 0; i < mtd_pages; i++) { 897 patt = mtdswap_test_patt(test + i); 898 memset(d->page_buf, patt, mtd->writesize); 899 memset(d->oob_buf, patt, mtd->oobavail); 900 ret = mtd_write_oob(mtd, pos, &ops); 901 if (ret) 902 goto error; 903 904 pos += mtd->writesize; 905 } 906 907 pos = base; 908 for (i = 0; i < mtd_pages; i++) { 909 ret = mtd_read_oob(mtd, pos, &ops); 910 if (ret) 911 goto error; 912 913 patt = mtdswap_test_patt(test + i); 914 for (j = 0; j < mtd->writesize/sizeof(int); j++) 915 if (p1[j] != patt) 916 goto error; 917 918 for (j = 0; j < mtd->oobavail; j++) 919 if (p2[j] != (unsigned char)patt) 920 goto error; 921 922 pos += mtd->writesize; 923 } 924 925 ret = mtdswap_erase_block(d, eb); 926 if (ret) 927 goto error; 928 } 929 930 eb->flags &= ~EBLOCK_READERR; 931 return 1; 932 933 error: 934 mtdswap_handle_badblock(d, eb); 935 return 0; 936 } 937 938 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background) 939 { 940 struct swap_eb *eb; 941 int ret; 942 943 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 944 return 1; 945 946 eb = mtdswap_pick_gc_eblk(d, background); 947 if (!eb) 948 return 1; 949 950 ret = mtdswap_gc_eblock(d, eb); 951 if (ret == -ENOSPC) 952 return 1; 953 954 if (eb->flags & EBLOCK_FAILED) { 955 mtdswap_handle_badblock(d, eb); 956 return 0; 957 } 958 959 eb->flags &= ~EBLOCK_BITFLIP; 960 ret = mtdswap_erase_block(d, eb); 961 if ((eb->flags & EBLOCK_READERR) && 962 (ret || !mtdswap_eblk_passes(d, eb))) 963 return 0; 964 965 if (ret == 0) 966 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN); 967 968 if (ret == 0) 969 mtdswap_rb_add(d, eb, MTDSWAP_CLEAN); 970 else if (ret != -EIO && !mtd_is_eccerr(ret)) 971 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 972 973 return 0; 974 } 975 976 static void mtdswap_background(struct mtd_blktrans_dev *dev) 977 { 978 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 979 int ret; 980 981 while (1) { 982 ret = mtdswap_gc(d, 1); 983 if (ret || mtd_blktrans_cease_background(dev)) 984 return; 985 } 986 } 987 988 static void mtdswap_cleanup(struct mtdswap_dev *d) 989 { 990 vfree(d->eb_data); 991 vfree(d->revmap); 992 vfree(d->page_data); 993 kfree(d->oob_buf); 994 kfree(d->page_buf); 995 } 996 997 static int mtdswap_flush(struct mtd_blktrans_dev *dev) 998 { 999 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1000 1001 mtd_sync(d->mtd); 1002 return 0; 1003 } 1004 1005 static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size) 1006 { 1007 loff_t offset; 1008 unsigned int badcnt; 1009 1010 badcnt = 0; 1011 1012 if (mtd_can_have_bb(mtd)) 1013 for (offset = 0; offset < size; offset += mtd->erasesize) 1014 if (mtd_block_isbad(mtd, offset)) 1015 badcnt++; 1016 1017 return badcnt; 1018 } 1019 1020 static int mtdswap_writesect(struct mtd_blktrans_dev *dev, 1021 unsigned long page, char *buf) 1022 { 1023 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1024 unsigned int newblock, mapped; 1025 struct swap_eb *eb; 1026 int ret; 1027 1028 d->sect_write_count++; 1029 1030 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 1031 return -ENOSPC; 1032 1033 if (header) { 1034 /* Ignore writes to the header page */ 1035 if (unlikely(page == 0)) 1036 return 0; 1037 1038 page--; 1039 } 1040 1041 mapped = d->page_data[page]; 1042 if (mapped <= BLOCK_MAX) { 1043 eb = d->eb_data + (mapped / d->pages_per_eblk); 1044 eb->active_count--; 1045 mtdswap_store_eb(d, eb); 1046 d->page_data[page] = BLOCK_UNDEF; 1047 d->revmap[mapped] = PAGE_UNDEF; 1048 } 1049 1050 ret = mtdswap_write_block(d, buf, page, &newblock, 0); 1051 d->mtd_write_count++; 1052 1053 if (ret < 0) 1054 return ret; 1055 1056 eb = d->eb_data + (newblock / d->pages_per_eblk); 1057 d->page_data[page] = newblock; 1058 1059 return 0; 1060 } 1061 1062 /* Provide a dummy swap header for the kernel */ 1063 static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf) 1064 { 1065 union swap_header *hd = (union swap_header *)(buf); 1066 1067 memset(buf, 0, PAGE_SIZE - 10); 1068 1069 hd->info.version = 1; 1070 hd->info.last_page = d->mbd_dev->size - 1; 1071 hd->info.nr_badpages = 0; 1072 1073 memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10); 1074 1075 return 0; 1076 } 1077 1078 static int mtdswap_readsect(struct mtd_blktrans_dev *dev, 1079 unsigned long page, char *buf) 1080 { 1081 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1082 struct mtd_info *mtd = d->mtd; 1083 unsigned int realblock, retries; 1084 loff_t readpos; 1085 struct swap_eb *eb; 1086 size_t retlen; 1087 int ret; 1088 1089 d->sect_read_count++; 1090 1091 if (header) { 1092 if (unlikely(page == 0)) 1093 return mtdswap_auto_header(d, buf); 1094 1095 page--; 1096 } 1097 1098 realblock = d->page_data[page]; 1099 if (realblock > BLOCK_MAX) { 1100 memset(buf, 0x0, PAGE_SIZE); 1101 if (realblock == BLOCK_UNDEF) 1102 return 0; 1103 else 1104 return -EIO; 1105 } 1106 1107 eb = d->eb_data + (realblock / d->pages_per_eblk); 1108 BUG_ON(d->revmap[realblock] == PAGE_UNDEF); 1109 1110 readpos = (loff_t)realblock << PAGE_SHIFT; 1111 retries = 0; 1112 1113 retry: 1114 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf); 1115 1116 d->mtd_read_count++; 1117 if (mtd_is_bitflip(ret)) { 1118 eb->flags |= EBLOCK_BITFLIP; 1119 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 1120 ret = 0; 1121 } 1122 1123 if (ret < 0) { 1124 dev_err(d->dev, "Read error %d\n", ret); 1125 eb->flags |= EBLOCK_READERR; 1126 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 1127 retries++; 1128 if (retries < MTDSWAP_IO_RETRIES) 1129 goto retry; 1130 1131 return ret; 1132 } 1133 1134 if (retlen != PAGE_SIZE) { 1135 dev_err(d->dev, "Short read %zd\n", retlen); 1136 return -EIO; 1137 } 1138 1139 return 0; 1140 } 1141 1142 static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first, 1143 unsigned nr_pages) 1144 { 1145 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1146 unsigned long page; 1147 struct swap_eb *eb; 1148 unsigned int mapped; 1149 1150 d->discard_count++; 1151 1152 for (page = first; page < first + nr_pages; page++) { 1153 mapped = d->page_data[page]; 1154 if (mapped <= BLOCK_MAX) { 1155 eb = d->eb_data + (mapped / d->pages_per_eblk); 1156 eb->active_count--; 1157 mtdswap_store_eb(d, eb); 1158 d->page_data[page] = BLOCK_UNDEF; 1159 d->revmap[mapped] = PAGE_UNDEF; 1160 d->discard_page_count++; 1161 } else if (mapped == BLOCK_ERROR) { 1162 d->page_data[page] = BLOCK_UNDEF; 1163 d->discard_page_count++; 1164 } 1165 } 1166 1167 return 0; 1168 } 1169 1170 static int mtdswap_show(struct seq_file *s, void *data) 1171 { 1172 struct mtdswap_dev *d = (struct mtdswap_dev *) s->private; 1173 unsigned long sum; 1174 unsigned int count[MTDSWAP_TREE_CNT]; 1175 unsigned int min[MTDSWAP_TREE_CNT]; 1176 unsigned int max[MTDSWAP_TREE_CNT]; 1177 unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages; 1178 uint64_t use_size; 1179 static const char * const name[] = { 1180 "clean", "used", "low", "high", "dirty", "bitflip", "failing" 1181 }; 1182 1183 mutex_lock(&d->mbd_dev->lock); 1184 1185 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1186 struct rb_root *root = &d->trees[i].root; 1187 1188 if (root->rb_node) { 1189 count[i] = d->trees[i].count; 1190 min[i] = MTDSWAP_ECNT_MIN(root); 1191 max[i] = MTDSWAP_ECNT_MAX(root); 1192 } else 1193 count[i] = 0; 1194 } 1195 1196 if (d->curr_write) { 1197 cw = 1; 1198 cwp = d->curr_write_pos; 1199 cwecount = d->curr_write->erase_count; 1200 } 1201 1202 sum = 0; 1203 for (i = 0; i < d->eblks; i++) 1204 sum += d->eb_data[i].erase_count; 1205 1206 use_size = (uint64_t)d->eblks * d->mtd->erasesize; 1207 bb_cnt = mtdswap_badblocks(d->mtd, use_size); 1208 1209 mapped = 0; 1210 pages = d->mbd_dev->size; 1211 for (i = 0; i < pages; i++) 1212 if (d->page_data[i] != BLOCK_UNDEF) 1213 mapped++; 1214 1215 mutex_unlock(&d->mbd_dev->lock); 1216 1217 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1218 if (!count[i]) 1219 continue; 1220 1221 if (min[i] != max[i]) 1222 seq_printf(s, "%s:\t%5d erase blocks, erased min %d, " 1223 "max %d times\n", 1224 name[i], count[i], min[i], max[i]); 1225 else 1226 seq_printf(s, "%s:\t%5d erase blocks, all erased %d " 1227 "times\n", name[i], count[i], min[i]); 1228 } 1229 1230 if (bb_cnt) 1231 seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt); 1232 1233 if (cw) 1234 seq_printf(s, "current erase block: %u pages used, %u free, " 1235 "erased %u times\n", 1236 cwp, d->pages_per_eblk - cwp, cwecount); 1237 1238 seq_printf(s, "total erasures: %lu\n", sum); 1239 1240 seq_puts(s, "\n"); 1241 1242 seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count); 1243 seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count); 1244 seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count); 1245 seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count); 1246 seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count); 1247 seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count); 1248 1249 seq_puts(s, "\n"); 1250 seq_printf(s, "total pages: %u\n", pages); 1251 seq_printf(s, "pages mapped: %u\n", mapped); 1252 1253 return 0; 1254 } 1255 DEFINE_SHOW_ATTRIBUTE(mtdswap); 1256 1257 static int mtdswap_add_debugfs(struct mtdswap_dev *d) 1258 { 1259 struct dentry *root = d->mtd->dbg.dfs_dir; 1260 struct dentry *dent; 1261 1262 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 1263 return 0; 1264 1265 if (IS_ERR_OR_NULL(root)) 1266 return -1; 1267 1268 dent = debugfs_create_file("mtdswap_stats", S_IRUSR, root, d, 1269 &mtdswap_fops); 1270 if (!dent) { 1271 dev_err(d->dev, "debugfs_create_file failed\n"); 1272 return -1; 1273 } 1274 1275 return 0; 1276 } 1277 1278 static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks, 1279 unsigned int spare_cnt) 1280 { 1281 struct mtd_info *mtd = d->mbd_dev->mtd; 1282 unsigned int i, eblk_bytes, pages, blocks; 1283 int ret = -ENOMEM; 1284 1285 d->mtd = mtd; 1286 d->eblks = eblocks; 1287 d->spare_eblks = spare_cnt; 1288 d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT; 1289 1290 pages = d->mbd_dev->size; 1291 blocks = eblocks * d->pages_per_eblk; 1292 1293 for (i = 0; i < MTDSWAP_TREE_CNT; i++) 1294 d->trees[i].root = RB_ROOT; 1295 1296 d->page_data = vmalloc(array_size(pages, sizeof(int))); 1297 if (!d->page_data) 1298 goto page_data_fail; 1299 1300 d->revmap = vmalloc(array_size(blocks, sizeof(int))); 1301 if (!d->revmap) 1302 goto revmap_fail; 1303 1304 eblk_bytes = sizeof(struct swap_eb)*d->eblks; 1305 d->eb_data = vzalloc(eblk_bytes); 1306 if (!d->eb_data) 1307 goto eb_data_fail; 1308 1309 for (i = 0; i < pages; i++) 1310 d->page_data[i] = BLOCK_UNDEF; 1311 1312 for (i = 0; i < blocks; i++) 1313 d->revmap[i] = PAGE_UNDEF; 1314 1315 d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1316 if (!d->page_buf) 1317 goto page_buf_fail; 1318 1319 d->oob_buf = kmalloc_array(2, mtd->oobavail, GFP_KERNEL); 1320 if (!d->oob_buf) 1321 goto oob_buf_fail; 1322 1323 mtdswap_scan_eblks(d); 1324 1325 return 0; 1326 1327 oob_buf_fail: 1328 kfree(d->page_buf); 1329 page_buf_fail: 1330 vfree(d->eb_data); 1331 eb_data_fail: 1332 vfree(d->revmap); 1333 revmap_fail: 1334 vfree(d->page_data); 1335 page_data_fail: 1336 printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret); 1337 return ret; 1338 } 1339 1340 static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) 1341 { 1342 struct mtdswap_dev *d; 1343 struct mtd_blktrans_dev *mbd_dev; 1344 char *parts; 1345 char *this_opt; 1346 unsigned long part; 1347 unsigned int eblocks, eavailable, bad_blocks, spare_cnt; 1348 uint64_t swap_size, use_size, size_limit; 1349 int ret; 1350 1351 parts = &partitions[0]; 1352 if (!*parts) 1353 return; 1354 1355 while ((this_opt = strsep(&parts, ",")) != NULL) { 1356 if (kstrtoul(this_opt, 0, &part) < 0) 1357 return; 1358 1359 if (mtd->index == part) 1360 break; 1361 } 1362 1363 if (mtd->index != part) 1364 return; 1365 1366 if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) { 1367 printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE " 1368 "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE); 1369 return; 1370 } 1371 1372 if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) { 1373 printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size" 1374 " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize); 1375 return; 1376 } 1377 1378 if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) { 1379 printk(KERN_ERR "%s: Not enough free bytes in OOB, " 1380 "%d available, %zu needed.\n", 1381 MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE); 1382 return; 1383 } 1384 1385 if (spare_eblocks > 100) 1386 spare_eblocks = 100; 1387 1388 use_size = mtd->size; 1389 size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE; 1390 1391 if (mtd->size > size_limit) { 1392 printk(KERN_WARNING "%s: Device too large. Limiting size to " 1393 "%llu bytes\n", MTDSWAP_PREFIX, size_limit); 1394 use_size = size_limit; 1395 } 1396 1397 eblocks = mtd_div_by_eb(use_size, mtd); 1398 use_size = (uint64_t)eblocks * mtd->erasesize; 1399 bad_blocks = mtdswap_badblocks(mtd, use_size); 1400 eavailable = eblocks - bad_blocks; 1401 1402 if (eavailable < MIN_ERASE_BLOCKS) { 1403 printk(KERN_ERR "%s: Not enough erase blocks. %u available, " 1404 "%d needed\n", MTDSWAP_PREFIX, eavailable, 1405 MIN_ERASE_BLOCKS); 1406 return; 1407 } 1408 1409 spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100); 1410 1411 if (spare_cnt < MIN_SPARE_EBLOCKS) 1412 spare_cnt = MIN_SPARE_EBLOCKS; 1413 1414 if (spare_cnt > eavailable - 1) 1415 spare_cnt = eavailable - 1; 1416 1417 swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize + 1418 (header ? PAGE_SIZE : 0); 1419 1420 printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, " 1421 "%u spare, %u bad blocks\n", 1422 MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks); 1423 1424 d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL); 1425 if (!d) 1426 return; 1427 1428 mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL); 1429 if (!mbd_dev) { 1430 kfree(d); 1431 return; 1432 } 1433 1434 d->mbd_dev = mbd_dev; 1435 mbd_dev->priv = d; 1436 1437 mbd_dev->mtd = mtd; 1438 mbd_dev->devnum = mtd->index; 1439 mbd_dev->size = swap_size >> PAGE_SHIFT; 1440 mbd_dev->tr = tr; 1441 1442 if (!(mtd->flags & MTD_WRITEABLE)) 1443 mbd_dev->readonly = 1; 1444 1445 if (mtdswap_init(d, eblocks, spare_cnt) < 0) 1446 goto init_failed; 1447 1448 if (add_mtd_blktrans_dev(mbd_dev) < 0) 1449 goto cleanup; 1450 1451 d->dev = disk_to_dev(mbd_dev->disk); 1452 1453 ret = mtdswap_add_debugfs(d); 1454 if (ret < 0) 1455 goto debugfs_failed; 1456 1457 return; 1458 1459 debugfs_failed: 1460 del_mtd_blktrans_dev(mbd_dev); 1461 1462 cleanup: 1463 mtdswap_cleanup(d); 1464 1465 init_failed: 1466 kfree(mbd_dev); 1467 kfree(d); 1468 } 1469 1470 static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev) 1471 { 1472 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1473 1474 del_mtd_blktrans_dev(dev); 1475 mtdswap_cleanup(d); 1476 kfree(d); 1477 } 1478 1479 static struct mtd_blktrans_ops mtdswap_ops = { 1480 .name = "mtdswap", 1481 .major = 0, 1482 .part_bits = 0, 1483 .blksize = PAGE_SIZE, 1484 .flush = mtdswap_flush, 1485 .readsect = mtdswap_readsect, 1486 .writesect = mtdswap_writesect, 1487 .discard = mtdswap_discard, 1488 .background = mtdswap_background, 1489 .add_mtd = mtdswap_add_mtd, 1490 .remove_dev = mtdswap_remove_dev, 1491 .owner = THIS_MODULE, 1492 }; 1493 1494 static int __init mtdswap_modinit(void) 1495 { 1496 return register_mtd_blktrans(&mtdswap_ops); 1497 } 1498 1499 static void __exit mtdswap_modexit(void) 1500 { 1501 deregister_mtd_blktrans(&mtdswap_ops); 1502 } 1503 1504 module_init(mtdswap_modinit); 1505 module_exit(mtdswap_modexit); 1506 1507 1508 MODULE_LICENSE("GPL"); 1509 MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>"); 1510 MODULE_DESCRIPTION("Block device access to an MTD suitable for using as " 1511 "swap space"); 1512