1 /* 2 * Bad block management 3 * 4 * - Heavily based on MD badblocks code from Neil Brown 5 * 6 * Copyright (c) 2015, Intel Corporation. 7 * 8 * This program is free software; you can redistribute it and/or modify it 9 * under the terms and conditions of the GNU General Public License, 10 * version 2, as published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 * more details. 16 */ 17 18 #include <linux/badblocks.h> 19 #include <linux/seqlock.h> 20 #include <linux/device.h> 21 #include <linux/kernel.h> 22 #include <linux/module.h> 23 #include <linux/stddef.h> 24 #include <linux/types.h> 25 #include <linux/slab.h> 26 27 /** 28 * badblocks_check() - check a given range for bad sectors 29 * @bb: the badblocks structure that holds all badblock information 30 * @s: sector (start) at which to check for badblocks 31 * @sectors: number of sectors to check for badblocks 32 * @first_bad: pointer to store location of the first badblock 33 * @bad_sectors: pointer to store number of badblocks after @first_bad 34 * 35 * We can record which blocks on each device are 'bad' and so just 36 * fail those blocks, or that stripe, rather than the whole device. 37 * Entries in the bad-block table are 64bits wide. This comprises: 38 * Length of bad-range, in sectors: 0-511 for lengths 1-512 39 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) 40 * A 'shift' can be set so that larger blocks are tracked and 41 * consequently larger devices can be covered. 42 * 'Acknowledged' flag - 1 bit. - the most significant bit. 43 * 44 * Locking of the bad-block table uses a seqlock so badblocks_check 45 * might need to retry if it is very unlucky. 46 * We will sometimes want to check for bad blocks in a bi_end_io function, 47 * so we use the write_seqlock_irq variant. 48 * 49 * When looking for a bad block we specify a range and want to 50 * know if any block in the range is bad. So we binary-search 51 * to the last range that starts at-or-before the given endpoint, 52 * (or "before the sector after the target range") 53 * then see if it ends after the given start. 54 * 55 * Return: 56 * 0: there are no known bad blocks in the range 57 * 1: there are known bad block which are all acknowledged 58 * -1: there are bad blocks which have not yet been acknowledged in metadata. 59 * plus the start/length of the first bad section we overlap. 60 */ 61 int badblocks_check(struct badblocks *bb, sector_t s, int sectors, 62 sector_t *first_bad, int *bad_sectors) 63 { 64 int hi; 65 int lo; 66 u64 *p = bb->page; 67 int rv; 68 sector_t target = s + sectors; 69 unsigned seq; 70 71 if (bb->shift > 0) { 72 /* round the start down, and the end up */ 73 s >>= bb->shift; 74 target += (1<<bb->shift) - 1; 75 target >>= bb->shift; 76 sectors = target - s; 77 } 78 /* 'target' is now the first block after the bad range */ 79 80 retry: 81 seq = read_seqbegin(&bb->lock); 82 lo = 0; 83 rv = 0; 84 hi = bb->count; 85 86 /* Binary search between lo and hi for 'target' 87 * i.e. for the last range that starts before 'target' 88 */ 89 /* INVARIANT: ranges before 'lo' and at-or-after 'hi' 90 * are known not to be the last range before target. 91 * VARIANT: hi-lo is the number of possible 92 * ranges, and decreases until it reaches 1 93 */ 94 while (hi - lo > 1) { 95 int mid = (lo + hi) / 2; 96 sector_t a = BB_OFFSET(p[mid]); 97 98 if (a < target) 99 /* This could still be the one, earlier ranges 100 * could not. 101 */ 102 lo = mid; 103 else 104 /* This and later ranges are definitely out. */ 105 hi = mid; 106 } 107 /* 'lo' might be the last that started before target, but 'hi' isn't */ 108 if (hi > lo) { 109 /* need to check all range that end after 's' to see if 110 * any are unacknowledged. 111 */ 112 while (lo >= 0 && 113 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { 114 if (BB_OFFSET(p[lo]) < target) { 115 /* starts before the end, and finishes after 116 * the start, so they must overlap 117 */ 118 if (rv != -1 && BB_ACK(p[lo])) 119 rv = 1; 120 else 121 rv = -1; 122 *first_bad = BB_OFFSET(p[lo]); 123 *bad_sectors = BB_LEN(p[lo]); 124 } 125 lo--; 126 } 127 } 128 129 if (read_seqretry(&bb->lock, seq)) 130 goto retry; 131 132 return rv; 133 } 134 EXPORT_SYMBOL_GPL(badblocks_check); 135 136 static void badblocks_update_acked(struct badblocks *bb) 137 { 138 u64 *p = bb->page; 139 int i; 140 bool unacked = false; 141 142 if (!bb->unacked_exist) 143 return; 144 145 for (i = 0; i < bb->count ; i++) { 146 if (!BB_ACK(p[i])) { 147 unacked = true; 148 break; 149 } 150 } 151 152 if (!unacked) 153 bb->unacked_exist = 0; 154 } 155 156 /** 157 * badblocks_set() - Add a range of bad blocks to the table. 158 * @bb: the badblocks structure that holds all badblock information 159 * @s: first sector to mark as bad 160 * @sectors: number of sectors to mark as bad 161 * @acknowledged: weather to mark the bad sectors as acknowledged 162 * 163 * This might extend the table, or might contract it if two adjacent ranges 164 * can be merged. We binary-search to find the 'insertion' point, then 165 * decide how best to handle it. 166 * 167 * Return: 168 * 0: success 169 * 1: failed to set badblocks (out of space) 170 */ 171 int badblocks_set(struct badblocks *bb, sector_t s, int sectors, 172 int acknowledged) 173 { 174 u64 *p; 175 int lo, hi; 176 int rv = 0; 177 unsigned long flags; 178 179 if (bb->shift < 0) 180 /* badblocks are disabled */ 181 return 0; 182 183 if (bb->shift) { 184 /* round the start down, and the end up */ 185 sector_t next = s + sectors; 186 187 s >>= bb->shift; 188 next += (1<<bb->shift) - 1; 189 next >>= bb->shift; 190 sectors = next - s; 191 } 192 193 write_seqlock_irqsave(&bb->lock, flags); 194 195 p = bb->page; 196 lo = 0; 197 hi = bb->count; 198 /* Find the last range that starts at-or-before 's' */ 199 while (hi - lo > 1) { 200 int mid = (lo + hi) / 2; 201 sector_t a = BB_OFFSET(p[mid]); 202 203 if (a <= s) 204 lo = mid; 205 else 206 hi = mid; 207 } 208 if (hi > lo && BB_OFFSET(p[lo]) > s) 209 hi = lo; 210 211 if (hi > lo) { 212 /* we found a range that might merge with the start 213 * of our new range 214 */ 215 sector_t a = BB_OFFSET(p[lo]); 216 sector_t e = a + BB_LEN(p[lo]); 217 int ack = BB_ACK(p[lo]); 218 219 if (e >= s) { 220 /* Yes, we can merge with a previous range */ 221 if (s == a && s + sectors >= e) 222 /* new range covers old */ 223 ack = acknowledged; 224 else 225 ack = ack && acknowledged; 226 227 if (e < s + sectors) 228 e = s + sectors; 229 if (e - a <= BB_MAX_LEN) { 230 p[lo] = BB_MAKE(a, e-a, ack); 231 s = e; 232 } else { 233 /* does not all fit in one range, 234 * make p[lo] maximal 235 */ 236 if (BB_LEN(p[lo]) != BB_MAX_LEN) 237 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack); 238 s = a + BB_MAX_LEN; 239 } 240 sectors = e - s; 241 } 242 } 243 if (sectors && hi < bb->count) { 244 /* 'hi' points to the first range that starts after 's'. 245 * Maybe we can merge with the start of that range 246 */ 247 sector_t a = BB_OFFSET(p[hi]); 248 sector_t e = a + BB_LEN(p[hi]); 249 int ack = BB_ACK(p[hi]); 250 251 if (a <= s + sectors) { 252 /* merging is possible */ 253 if (e <= s + sectors) { 254 /* full overlap */ 255 e = s + sectors; 256 ack = acknowledged; 257 } else 258 ack = ack && acknowledged; 259 260 a = s; 261 if (e - a <= BB_MAX_LEN) { 262 p[hi] = BB_MAKE(a, e-a, ack); 263 s = e; 264 } else { 265 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack); 266 s = a + BB_MAX_LEN; 267 } 268 sectors = e - s; 269 lo = hi; 270 hi++; 271 } 272 } 273 if (sectors == 0 && hi < bb->count) { 274 /* we might be able to combine lo and hi */ 275 /* Note: 's' is at the end of 'lo' */ 276 sector_t a = BB_OFFSET(p[hi]); 277 int lolen = BB_LEN(p[lo]); 278 int hilen = BB_LEN(p[hi]); 279 int newlen = lolen + hilen - (s - a); 280 281 if (s >= a && newlen < BB_MAX_LEN) { 282 /* yes, we can combine them */ 283 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]); 284 285 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack); 286 memmove(p + hi, p + hi + 1, 287 (bb->count - hi - 1) * 8); 288 bb->count--; 289 } 290 } 291 while (sectors) { 292 /* didn't merge (it all). 293 * Need to add a range just before 'hi' 294 */ 295 if (bb->count >= MAX_BADBLOCKS) { 296 /* No room for more */ 297 rv = 1; 298 break; 299 } else { 300 int this_sectors = sectors; 301 302 memmove(p + hi + 1, p + hi, 303 (bb->count - hi) * 8); 304 bb->count++; 305 306 if (this_sectors > BB_MAX_LEN) 307 this_sectors = BB_MAX_LEN; 308 p[hi] = BB_MAKE(s, this_sectors, acknowledged); 309 sectors -= this_sectors; 310 s += this_sectors; 311 } 312 } 313 314 bb->changed = 1; 315 if (!acknowledged) 316 bb->unacked_exist = 1; 317 else 318 badblocks_update_acked(bb); 319 write_sequnlock_irqrestore(&bb->lock, flags); 320 321 return rv; 322 } 323 EXPORT_SYMBOL_GPL(badblocks_set); 324 325 /** 326 * badblocks_clear() - Remove a range of bad blocks to the table. 327 * @bb: the badblocks structure that holds all badblock information 328 * @s: first sector to mark as bad 329 * @sectors: number of sectors to mark as bad 330 * 331 * This may involve extending the table if we spilt a region, 332 * but it must not fail. So if the table becomes full, we just 333 * drop the remove request. 334 * 335 * Return: 336 * 0: success 337 * 1: failed to clear badblocks 338 */ 339 int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) 340 { 341 u64 *p; 342 int lo, hi; 343 sector_t target = s + sectors; 344 int rv = 0; 345 346 if (bb->shift > 0) { 347 /* When clearing we round the start up and the end down. 348 * This should not matter as the shift should align with 349 * the block size and no rounding should ever be needed. 350 * However it is better the think a block is bad when it 351 * isn't than to think a block is not bad when it is. 352 */ 353 s += (1<<bb->shift) - 1; 354 s >>= bb->shift; 355 target >>= bb->shift; 356 sectors = target - s; 357 } 358 359 write_seqlock_irq(&bb->lock); 360 361 p = bb->page; 362 lo = 0; 363 hi = bb->count; 364 /* Find the last range that starts before 'target' */ 365 while (hi - lo > 1) { 366 int mid = (lo + hi) / 2; 367 sector_t a = BB_OFFSET(p[mid]); 368 369 if (a < target) 370 lo = mid; 371 else 372 hi = mid; 373 } 374 if (hi > lo) { 375 /* p[lo] is the last range that could overlap the 376 * current range. Earlier ranges could also overlap, 377 * but only this one can overlap the end of the range. 378 */ 379 if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) && 380 (BB_OFFSET(p[lo]) < target)) { 381 /* Partial overlap, leave the tail of this range */ 382 int ack = BB_ACK(p[lo]); 383 sector_t a = BB_OFFSET(p[lo]); 384 sector_t end = a + BB_LEN(p[lo]); 385 386 if (a < s) { 387 /* we need to split this range */ 388 if (bb->count >= MAX_BADBLOCKS) { 389 rv = -ENOSPC; 390 goto out; 391 } 392 memmove(p+lo+1, p+lo, (bb->count - lo) * 8); 393 bb->count++; 394 p[lo] = BB_MAKE(a, s-a, ack); 395 lo++; 396 } 397 p[lo] = BB_MAKE(target, end - target, ack); 398 /* there is no longer an overlap */ 399 hi = lo; 400 lo--; 401 } 402 while (lo >= 0 && 403 (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) && 404 (BB_OFFSET(p[lo]) < target)) { 405 /* This range does overlap */ 406 if (BB_OFFSET(p[lo]) < s) { 407 /* Keep the early parts of this range. */ 408 int ack = BB_ACK(p[lo]); 409 sector_t start = BB_OFFSET(p[lo]); 410 411 p[lo] = BB_MAKE(start, s - start, ack); 412 /* now low doesn't overlap, so.. */ 413 break; 414 } 415 lo--; 416 } 417 /* 'lo' is strictly before, 'hi' is strictly after, 418 * anything between needs to be discarded 419 */ 420 if (hi - lo > 1) { 421 memmove(p+lo+1, p+hi, (bb->count - hi) * 8); 422 bb->count -= (hi - lo - 1); 423 } 424 } 425 426 badblocks_update_acked(bb); 427 bb->changed = 1; 428 out: 429 write_sequnlock_irq(&bb->lock); 430 return rv; 431 } 432 EXPORT_SYMBOL_GPL(badblocks_clear); 433 434 /** 435 * ack_all_badblocks() - Acknowledge all bad blocks in a list. 436 * @bb: the badblocks structure that holds all badblock information 437 * 438 * This only succeeds if ->changed is clear. It is used by 439 * in-kernel metadata updates 440 */ 441 void ack_all_badblocks(struct badblocks *bb) 442 { 443 if (bb->page == NULL || bb->changed) 444 /* no point even trying */ 445 return; 446 write_seqlock_irq(&bb->lock); 447 448 if (bb->changed == 0 && bb->unacked_exist) { 449 u64 *p = bb->page; 450 int i; 451 452 for (i = 0; i < bb->count ; i++) { 453 if (!BB_ACK(p[i])) { 454 sector_t start = BB_OFFSET(p[i]); 455 int len = BB_LEN(p[i]); 456 457 p[i] = BB_MAKE(start, len, 1); 458 } 459 } 460 bb->unacked_exist = 0; 461 } 462 write_sequnlock_irq(&bb->lock); 463 } 464 EXPORT_SYMBOL_GPL(ack_all_badblocks); 465 466 /** 467 * badblocks_show() - sysfs access to bad-blocks list 468 * @bb: the badblocks structure that holds all badblock information 469 * @page: buffer received from sysfs 470 * @unack: weather to show unacknowledged badblocks 471 * 472 * Return: 473 * Length of returned data 474 */ 475 ssize_t badblocks_show(struct badblocks *bb, char *page, int unack) 476 { 477 size_t len; 478 int i; 479 u64 *p = bb->page; 480 unsigned seq; 481 482 if (bb->shift < 0) 483 return 0; 484 485 retry: 486 seq = read_seqbegin(&bb->lock); 487 488 len = 0; 489 i = 0; 490 491 while (len < PAGE_SIZE && i < bb->count) { 492 sector_t s = BB_OFFSET(p[i]); 493 unsigned int length = BB_LEN(p[i]); 494 int ack = BB_ACK(p[i]); 495 496 i++; 497 498 if (unack && ack) 499 continue; 500 501 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", 502 (unsigned long long)s << bb->shift, 503 length << bb->shift); 504 } 505 if (unack && len == 0) 506 bb->unacked_exist = 0; 507 508 if (read_seqretry(&bb->lock, seq)) 509 goto retry; 510 511 return len; 512 } 513 EXPORT_SYMBOL_GPL(badblocks_show); 514 515 /** 516 * badblocks_store() - sysfs access to bad-blocks list 517 * @bb: the badblocks structure that holds all badblock information 518 * @page: buffer received from sysfs 519 * @len: length of data received from sysfs 520 * @unack: weather to show unacknowledged badblocks 521 * 522 * Return: 523 * Length of the buffer processed or -ve error. 524 */ 525 ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, 526 int unack) 527 { 528 unsigned long long sector; 529 int length; 530 char newline; 531 532 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { 533 case 3: 534 if (newline != '\n') 535 return -EINVAL; 536 /* fall through */ 537 case 2: 538 if (length <= 0) 539 return -EINVAL; 540 break; 541 default: 542 return -EINVAL; 543 } 544 545 if (badblocks_set(bb, sector, length, !unack)) 546 return -ENOSPC; 547 else 548 return len; 549 } 550 EXPORT_SYMBOL_GPL(badblocks_store); 551 552 static int __badblocks_init(struct device *dev, struct badblocks *bb, 553 int enable) 554 { 555 bb->dev = dev; 556 bb->count = 0; 557 if (enable) 558 bb->shift = 0; 559 else 560 bb->shift = -1; 561 if (dev) 562 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL); 563 else 564 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL); 565 if (!bb->page) { 566 bb->shift = -1; 567 return -ENOMEM; 568 } 569 seqlock_init(&bb->lock); 570 571 return 0; 572 } 573 574 /** 575 * badblocks_init() - initialize the badblocks structure 576 * @bb: the badblocks structure that holds all badblock information 577 * @enable: weather to enable badblocks accounting 578 * 579 * Return: 580 * 0: success 581 * -ve errno: on error 582 */ 583 int badblocks_init(struct badblocks *bb, int enable) 584 { 585 return __badblocks_init(NULL, bb, enable); 586 } 587 EXPORT_SYMBOL_GPL(badblocks_init); 588 589 int devm_init_badblocks(struct device *dev, struct badblocks *bb) 590 { 591 if (!bb) 592 return -EINVAL; 593 return __badblocks_init(dev, bb, 1); 594 } 595 EXPORT_SYMBOL_GPL(devm_init_badblocks); 596 597 /** 598 * badblocks_exit() - free the badblocks structure 599 * @bb: the badblocks structure that holds all badblock information 600 */ 601 void badblocks_exit(struct badblocks *bb) 602 { 603 if (!bb) 604 return; 605 if (bb->dev) 606 devm_kfree(bb->dev, bb->page); 607 else 608 kfree(bb->page); 609 bb->page = NULL; 610 } 611 EXPORT_SYMBOL_GPL(badblocks_exit); 612