1 /* 2 * Bad block management 3 * 4 * - Heavily based on MD badblocks code from Neil Brown 5 * 6 * Copyright (c) 2015, Intel Corporation. 7 * 8 * This program is free software; you can redistribute it and/or modify it 9 * under the terms and conditions of the GNU General Public License, 10 * version 2, as published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 * more details. 16 */ 17 18 #include <linux/badblocks.h> 19 #include <linux/seqlock.h> 20 #include <linux/device.h> 21 #include <linux/kernel.h> 22 #include <linux/module.h> 23 #include <linux/stddef.h> 24 #include <linux/types.h> 25 #include <linux/slab.h> 26 27 /** 28 * badblocks_check() - check a given range for bad sectors 29 * @bb: the badblocks structure that holds all badblock information 30 * @s: sector (start) at which to check for badblocks 31 * @sectors: number of sectors to check for badblocks 32 * @first_bad: pointer to store location of the first badblock 33 * @bad_sectors: pointer to store number of badblocks after @first_bad 34 * 35 * We can record which blocks on each device are 'bad' and so just 36 * fail those blocks, or that stripe, rather than the whole device. 37 * Entries in the bad-block table are 64bits wide. This comprises: 38 * Length of bad-range, in sectors: 0-511 for lengths 1-512 39 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) 40 * A 'shift' can be set so that larger blocks are tracked and 41 * consequently larger devices can be covered. 42 * 'Acknowledged' flag - 1 bit. - the most significant bit. 43 * 44 * Locking of the bad-block table uses a seqlock so badblocks_check 45 * might need to retry if it is very unlucky. 46 * We will sometimes want to check for bad blocks in a bi_end_io function, 47 * so we use the write_seqlock_irq variant. 48 * 49 * When looking for a bad block we specify a range and want to 50 * know if any block in the range is bad. So we binary-search 51 * to the last range that starts at-or-before the given endpoint, 52 * (or "before the sector after the target range") 53 * then see if it ends after the given start. 54 * 55 * Return: 56 * 0: there are no known bad blocks in the range 57 * 1: there are known bad block which are all acknowledged 58 * -1: there are bad blocks which have not yet been acknowledged in metadata. 59 * plus the start/length of the first bad section we overlap. 60 */ 61 int badblocks_check(struct badblocks *bb, sector_t s, int sectors, 62 sector_t *first_bad, int *bad_sectors) 63 { 64 int hi; 65 int lo; 66 u64 *p = bb->page; 67 int rv; 68 sector_t target = s + sectors; 69 unsigned seq; 70 71 if (bb->shift > 0) { 72 /* round the start down, and the end up */ 73 s >>= bb->shift; 74 target += (1<<bb->shift) - 1; 75 target >>= bb->shift; 76 sectors = target - s; 77 } 78 /* 'target' is now the first block after the bad range */ 79 80 retry: 81 seq = read_seqbegin(&bb->lock); 82 lo = 0; 83 rv = 0; 84 hi = bb->count; 85 86 /* Binary search between lo and hi for 'target' 87 * i.e. for the last range that starts before 'target' 88 */ 89 /* INVARIANT: ranges before 'lo' and at-or-after 'hi' 90 * are known not to be the last range before target. 91 * VARIANT: hi-lo is the number of possible 92 * ranges, and decreases until it reaches 1 93 */ 94 while (hi - lo > 1) { 95 int mid = (lo + hi) / 2; 96 sector_t a = BB_OFFSET(p[mid]); 97 98 if (a < target) 99 /* This could still be the one, earlier ranges 100 * could not. 101 */ 102 lo = mid; 103 else 104 /* This and later ranges are definitely out. */ 105 hi = mid; 106 } 107 /* 'lo' might be the last that started before target, but 'hi' isn't */ 108 if (hi > lo) { 109 /* need to check all range that end after 's' to see if 110 * any are unacknowledged. 111 */ 112 while (lo >= 0 && 113 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { 114 if (BB_OFFSET(p[lo]) < target) { 115 /* starts before the end, and finishes after 116 * the start, so they must overlap 117 */ 118 if (rv != -1 && BB_ACK(p[lo])) 119 rv = 1; 120 else 121 rv = -1; 122 *first_bad = BB_OFFSET(p[lo]); 123 *bad_sectors = BB_LEN(p[lo]); 124 } 125 lo--; 126 } 127 } 128 129 if (read_seqretry(&bb->lock, seq)) 130 goto retry; 131 132 return rv; 133 } 134 EXPORT_SYMBOL_GPL(badblocks_check); 135 136 /** 137 * badblocks_set() - Add a range of bad blocks to the table. 138 * @bb: the badblocks structure that holds all badblock information 139 * @s: first sector to mark as bad 140 * @sectors: number of sectors to mark as bad 141 * @acknowledged: weather to mark the bad sectors as acknowledged 142 * 143 * This might extend the table, or might contract it if two adjacent ranges 144 * can be merged. We binary-search to find the 'insertion' point, then 145 * decide how best to handle it. 146 * 147 * Return: 148 * 0: success 149 * 1: failed to set badblocks (out of space) 150 */ 151 int badblocks_set(struct badblocks *bb, sector_t s, int sectors, 152 int acknowledged) 153 { 154 u64 *p; 155 int lo, hi; 156 int rv = 0; 157 unsigned long flags; 158 159 if (bb->shift < 0) 160 /* badblocks are disabled */ 161 return 0; 162 163 if (bb->shift) { 164 /* round the start down, and the end up */ 165 sector_t next = s + sectors; 166 167 s >>= bb->shift; 168 next += (1<<bb->shift) - 1; 169 next >>= bb->shift; 170 sectors = next - s; 171 } 172 173 write_seqlock_irqsave(&bb->lock, flags); 174 175 p = bb->page; 176 lo = 0; 177 hi = bb->count; 178 /* Find the last range that starts at-or-before 's' */ 179 while (hi - lo > 1) { 180 int mid = (lo + hi) / 2; 181 sector_t a = BB_OFFSET(p[mid]); 182 183 if (a <= s) 184 lo = mid; 185 else 186 hi = mid; 187 } 188 if (hi > lo && BB_OFFSET(p[lo]) > s) 189 hi = lo; 190 191 if (hi > lo) { 192 /* we found a range that might merge with the start 193 * of our new range 194 */ 195 sector_t a = BB_OFFSET(p[lo]); 196 sector_t e = a + BB_LEN(p[lo]); 197 int ack = BB_ACK(p[lo]); 198 199 if (e >= s) { 200 /* Yes, we can merge with a previous range */ 201 if (s == a && s + sectors >= e) 202 /* new range covers old */ 203 ack = acknowledged; 204 else 205 ack = ack && acknowledged; 206 207 if (e < s + sectors) 208 e = s + sectors; 209 if (e - a <= BB_MAX_LEN) { 210 p[lo] = BB_MAKE(a, e-a, ack); 211 s = e; 212 } else { 213 /* does not all fit in one range, 214 * make p[lo] maximal 215 */ 216 if (BB_LEN(p[lo]) != BB_MAX_LEN) 217 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack); 218 s = a + BB_MAX_LEN; 219 } 220 sectors = e - s; 221 } 222 } 223 if (sectors && hi < bb->count) { 224 /* 'hi' points to the first range that starts after 's'. 225 * Maybe we can merge with the start of that range 226 */ 227 sector_t a = BB_OFFSET(p[hi]); 228 sector_t e = a + BB_LEN(p[hi]); 229 int ack = BB_ACK(p[hi]); 230 231 if (a <= s + sectors) { 232 /* merging is possible */ 233 if (e <= s + sectors) { 234 /* full overlap */ 235 e = s + sectors; 236 ack = acknowledged; 237 } else 238 ack = ack && acknowledged; 239 240 a = s; 241 if (e - a <= BB_MAX_LEN) { 242 p[hi] = BB_MAKE(a, e-a, ack); 243 s = e; 244 } else { 245 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack); 246 s = a + BB_MAX_LEN; 247 } 248 sectors = e - s; 249 lo = hi; 250 hi++; 251 } 252 } 253 if (sectors == 0 && hi < bb->count) { 254 /* we might be able to combine lo and hi */ 255 /* Note: 's' is at the end of 'lo' */ 256 sector_t a = BB_OFFSET(p[hi]); 257 int lolen = BB_LEN(p[lo]); 258 int hilen = BB_LEN(p[hi]); 259 int newlen = lolen + hilen - (s - a); 260 261 if (s >= a && newlen < BB_MAX_LEN) { 262 /* yes, we can combine them */ 263 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]); 264 265 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack); 266 memmove(p + hi, p + hi + 1, 267 (bb->count - hi - 1) * 8); 268 bb->count--; 269 } 270 } 271 while (sectors) { 272 /* didn't merge (it all). 273 * Need to add a range just before 'hi' 274 */ 275 if (bb->count >= MAX_BADBLOCKS) { 276 /* No room for more */ 277 rv = 1; 278 break; 279 } else { 280 int this_sectors = sectors; 281 282 memmove(p + hi + 1, p + hi, 283 (bb->count - hi) * 8); 284 bb->count++; 285 286 if (this_sectors > BB_MAX_LEN) 287 this_sectors = BB_MAX_LEN; 288 p[hi] = BB_MAKE(s, this_sectors, acknowledged); 289 sectors -= this_sectors; 290 s += this_sectors; 291 } 292 } 293 294 bb->changed = 1; 295 if (!acknowledged) 296 bb->unacked_exist = 1; 297 write_sequnlock_irqrestore(&bb->lock, flags); 298 299 return rv; 300 } 301 EXPORT_SYMBOL_GPL(badblocks_set); 302 303 /** 304 * badblocks_clear() - Remove a range of bad blocks to the table. 305 * @bb: the badblocks structure that holds all badblock information 306 * @s: first sector to mark as bad 307 * @sectors: number of sectors to mark as bad 308 * 309 * This may involve extending the table if we spilt a region, 310 * but it must not fail. So if the table becomes full, we just 311 * drop the remove request. 312 * 313 * Return: 314 * 0: success 315 * 1: failed to clear badblocks 316 */ 317 int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) 318 { 319 u64 *p; 320 int lo, hi; 321 sector_t target = s + sectors; 322 int rv = 0; 323 324 if (bb->shift > 0) { 325 /* When clearing we round the start up and the end down. 326 * This should not matter as the shift should align with 327 * the block size and no rounding should ever be needed. 328 * However it is better the think a block is bad when it 329 * isn't than to think a block is not bad when it is. 330 */ 331 s += (1<<bb->shift) - 1; 332 s >>= bb->shift; 333 target >>= bb->shift; 334 sectors = target - s; 335 } 336 337 write_seqlock_irq(&bb->lock); 338 339 p = bb->page; 340 lo = 0; 341 hi = bb->count; 342 /* Find the last range that starts before 'target' */ 343 while (hi - lo > 1) { 344 int mid = (lo + hi) / 2; 345 sector_t a = BB_OFFSET(p[mid]); 346 347 if (a < target) 348 lo = mid; 349 else 350 hi = mid; 351 } 352 if (hi > lo) { 353 /* p[lo] is the last range that could overlap the 354 * current range. Earlier ranges could also overlap, 355 * but only this one can overlap the end of the range. 356 */ 357 if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) { 358 /* Partial overlap, leave the tail of this range */ 359 int ack = BB_ACK(p[lo]); 360 sector_t a = BB_OFFSET(p[lo]); 361 sector_t end = a + BB_LEN(p[lo]); 362 363 if (a < s) { 364 /* we need to split this range */ 365 if (bb->count >= MAX_BADBLOCKS) { 366 rv = -ENOSPC; 367 goto out; 368 } 369 memmove(p+lo+1, p+lo, (bb->count - lo) * 8); 370 bb->count++; 371 p[lo] = BB_MAKE(a, s-a, ack); 372 lo++; 373 } 374 p[lo] = BB_MAKE(target, end - target, ack); 375 /* there is no longer an overlap */ 376 hi = lo; 377 lo--; 378 } 379 while (lo >= 0 && 380 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { 381 /* This range does overlap */ 382 if (BB_OFFSET(p[lo]) < s) { 383 /* Keep the early parts of this range. */ 384 int ack = BB_ACK(p[lo]); 385 sector_t start = BB_OFFSET(p[lo]); 386 387 p[lo] = BB_MAKE(start, s - start, ack); 388 /* now low doesn't overlap, so.. */ 389 break; 390 } 391 lo--; 392 } 393 /* 'lo' is strictly before, 'hi' is strictly after, 394 * anything between needs to be discarded 395 */ 396 if (hi - lo > 1) { 397 memmove(p+lo+1, p+hi, (bb->count - hi) * 8); 398 bb->count -= (hi - lo - 1); 399 } 400 } 401 402 bb->changed = 1; 403 out: 404 write_sequnlock_irq(&bb->lock); 405 return rv; 406 } 407 EXPORT_SYMBOL_GPL(badblocks_clear); 408 409 /** 410 * ack_all_badblocks() - Acknowledge all bad blocks in a list. 411 * @bb: the badblocks structure that holds all badblock information 412 * 413 * This only succeeds if ->changed is clear. It is used by 414 * in-kernel metadata updates 415 */ 416 void ack_all_badblocks(struct badblocks *bb) 417 { 418 if (bb->page == NULL || bb->changed) 419 /* no point even trying */ 420 return; 421 write_seqlock_irq(&bb->lock); 422 423 if (bb->changed == 0 && bb->unacked_exist) { 424 u64 *p = bb->page; 425 int i; 426 427 for (i = 0; i < bb->count ; i++) { 428 if (!BB_ACK(p[i])) { 429 sector_t start = BB_OFFSET(p[i]); 430 int len = BB_LEN(p[i]); 431 432 p[i] = BB_MAKE(start, len, 1); 433 } 434 } 435 bb->unacked_exist = 0; 436 } 437 write_sequnlock_irq(&bb->lock); 438 } 439 EXPORT_SYMBOL_GPL(ack_all_badblocks); 440 441 /** 442 * badblocks_show() - sysfs access to bad-blocks list 443 * @bb: the badblocks structure that holds all badblock information 444 * @page: buffer received from sysfs 445 * @unack: weather to show unacknowledged badblocks 446 * 447 * Return: 448 * Length of returned data 449 */ 450 ssize_t badblocks_show(struct badblocks *bb, char *page, int unack) 451 { 452 size_t len; 453 int i; 454 u64 *p = bb->page; 455 unsigned seq; 456 457 if (bb->shift < 0) 458 return 0; 459 460 retry: 461 seq = read_seqbegin(&bb->lock); 462 463 len = 0; 464 i = 0; 465 466 while (len < PAGE_SIZE && i < bb->count) { 467 sector_t s = BB_OFFSET(p[i]); 468 unsigned int length = BB_LEN(p[i]); 469 int ack = BB_ACK(p[i]); 470 471 i++; 472 473 if (unack && ack) 474 continue; 475 476 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", 477 (unsigned long long)s << bb->shift, 478 length << bb->shift); 479 } 480 if (unack && len == 0) 481 bb->unacked_exist = 0; 482 483 if (read_seqretry(&bb->lock, seq)) 484 goto retry; 485 486 return len; 487 } 488 EXPORT_SYMBOL_GPL(badblocks_show); 489 490 /** 491 * badblocks_store() - sysfs access to bad-blocks list 492 * @bb: the badblocks structure that holds all badblock information 493 * @page: buffer received from sysfs 494 * @len: length of data received from sysfs 495 * @unack: weather to show unacknowledged badblocks 496 * 497 * Return: 498 * Length of the buffer processed or -ve error. 499 */ 500 ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, 501 int unack) 502 { 503 unsigned long long sector; 504 int length; 505 char newline; 506 507 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { 508 case 3: 509 if (newline != '\n') 510 return -EINVAL; 511 case 2: 512 if (length <= 0) 513 return -EINVAL; 514 break; 515 default: 516 return -EINVAL; 517 } 518 519 if (badblocks_set(bb, sector, length, !unack)) 520 return -ENOSPC; 521 else 522 return len; 523 } 524 EXPORT_SYMBOL_GPL(badblocks_store); 525 526 static int __badblocks_init(struct device *dev, struct badblocks *bb, 527 int enable) 528 { 529 bb->dev = dev; 530 bb->count = 0; 531 if (enable) 532 bb->shift = 0; 533 else 534 bb->shift = -1; 535 if (dev) 536 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL); 537 else 538 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL); 539 if (!bb->page) { 540 bb->shift = -1; 541 return -ENOMEM; 542 } 543 seqlock_init(&bb->lock); 544 545 return 0; 546 } 547 548 /** 549 * badblocks_init() - initialize the badblocks structure 550 * @bb: the badblocks structure that holds all badblock information 551 * @enable: weather to enable badblocks accounting 552 * 553 * Return: 554 * 0: success 555 * -ve errno: on error 556 */ 557 int badblocks_init(struct badblocks *bb, int enable) 558 { 559 return __badblocks_init(NULL, bb, enable); 560 } 561 EXPORT_SYMBOL_GPL(badblocks_init); 562 563 int devm_init_badblocks(struct device *dev, struct badblocks *bb) 564 { 565 if (!bb) 566 return -EINVAL; 567 return __badblocks_init(dev, bb, 1); 568 } 569 EXPORT_SYMBOL_GPL(devm_init_badblocks); 570 571 /** 572 * badblocks_exit() - free the badblocks structure 573 * @bb: the badblocks structure that holds all badblock information 574 */ 575 void badblocks_exit(struct badblocks *bb) 576 { 577 if (!bb) 578 return; 579 if (bb->dev) 580 devm_kfree(bb->dev, bb->page); 581 else 582 kfree(bb->page); 583 bb->page = NULL; 584 } 585 EXPORT_SYMBOL_GPL(badblocks_exit); 586