1 /* 2 * Block Translation Table 3 * Copyright (c) 2014-2015, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 */ 14 #include <linux/highmem.h> 15 #include <linux/debugfs.h> 16 #include <linux/blkdev.h> 17 #include <linux/module.h> 18 #include <linux/device.h> 19 #include <linux/mutex.h> 20 #include <linux/hdreg.h> 21 #include <linux/genhd.h> 22 #include <linux/sizes.h> 23 #include <linux/ndctl.h> 24 #include <linux/fs.h> 25 #include <linux/nd.h> 26 #include "btt.h" 27 #include "nd.h" 28 29 enum log_ent_request { 30 LOG_NEW_ENT = 0, 31 LOG_OLD_ENT 32 }; 33 34 static struct device *to_dev(struct arena_info *arena) 35 { 36 return &arena->nd_btt->dev; 37 } 38 39 static u64 adjust_initial_offset(struct nd_btt *nd_btt, u64 offset) 40 { 41 return offset + nd_btt->initial_offset; 42 } 43 44 static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, 45 void *buf, size_t n, unsigned long flags) 46 { 47 struct nd_btt *nd_btt = arena->nd_btt; 48 struct nd_namespace_common *ndns = nd_btt->ndns; 49 50 /* arena offsets may be shifted from the base of the device */ 51 offset = adjust_initial_offset(nd_btt, offset); 52 return nvdimm_read_bytes(ndns, offset, buf, n, flags); 53 } 54 55 static int arena_write_bytes(struct arena_info *arena, resource_size_t offset, 56 void *buf, size_t n, unsigned long flags) 57 { 58 struct nd_btt *nd_btt = arena->nd_btt; 59 struct nd_namespace_common *ndns = nd_btt->ndns; 60 61 /* arena offsets may be shifted from the base of the device */ 62 offset = adjust_initial_offset(nd_btt, offset); 63 return nvdimm_write_bytes(ndns, offset, buf, n, flags); 64 } 65 66 static int btt_info_write(struct arena_info *arena, struct btt_sb *super) 67 { 68 int ret; 69 70 /* 71 * infooff and info2off should always be at least 512B aligned. 72 * We rely on that to make sure rw_bytes does error clearing 73 * correctly, so make sure that is the case. 74 */ 75 dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->infooff, 512), 76 "arena->infooff: %#llx is unaligned\n", arena->infooff); 77 dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512), 78 "arena->info2off: %#llx is unaligned\n", arena->info2off); 79 80 ret = arena_write_bytes(arena, arena->info2off, super, 81 sizeof(struct btt_sb), 0); 82 if (ret) 83 return ret; 84 85 return arena_write_bytes(arena, arena->infooff, super, 86 sizeof(struct btt_sb), 0); 87 } 88 89 static int btt_info_read(struct arena_info *arena, struct btt_sb *super) 90 { 91 return arena_read_bytes(arena, arena->infooff, super, 92 sizeof(struct btt_sb), 0); 93 } 94 95 /* 96 * 'raw' version of btt_map write 97 * Assumptions: 98 * mapping is in little-endian 99 * mapping contains 'E' and 'Z' flags as desired 100 */ 101 static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping, 102 unsigned long flags) 103 { 104 u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); 105 106 if (unlikely(lba >= arena->external_nlba)) 107 dev_err_ratelimited(to_dev(arena), 108 "%s: lba %#x out of range (max: %#x)\n", 109 __func__, lba, arena->external_nlba); 110 return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE, flags); 111 } 112 113 static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping, 114 u32 z_flag, u32 e_flag, unsigned long rwb_flags) 115 { 116 u32 ze; 117 __le32 mapping_le; 118 119 /* 120 * This 'mapping' is supposed to be just the LBA mapping, without 121 * any flags set, so strip the flag bits. 122 */ 123 mapping = ent_lba(mapping); 124 125 ze = (z_flag << 1) + e_flag; 126 switch (ze) { 127 case 0: 128 /* 129 * We want to set neither of the Z or E flags, and 130 * in the actual layout, this means setting the bit 131 * positions of both to '1' to indicate a 'normal' 132 * map entry 133 */ 134 mapping |= MAP_ENT_NORMAL; 135 break; 136 case 1: 137 mapping |= (1 << MAP_ERR_SHIFT); 138 break; 139 case 2: 140 mapping |= (1 << MAP_TRIM_SHIFT); 141 break; 142 default: 143 /* 144 * The case where Z and E are both sent in as '1' could be 145 * construed as a valid 'normal' case, but we decide not to, 146 * to avoid confusion 147 */ 148 dev_err_ratelimited(to_dev(arena), 149 "Invalid use of Z and E flags\n"); 150 return -EIO; 151 } 152 153 mapping_le = cpu_to_le32(mapping); 154 return __btt_map_write(arena, lba, mapping_le, rwb_flags); 155 } 156 157 static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, 158 int *trim, int *error, unsigned long rwb_flags) 159 { 160 int ret; 161 __le32 in; 162 u32 raw_mapping, postmap, ze, z_flag, e_flag; 163 u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); 164 165 if (unlikely(lba >= arena->external_nlba)) 166 dev_err_ratelimited(to_dev(arena), 167 "%s: lba %#x out of range (max: %#x)\n", 168 __func__, lba, arena->external_nlba); 169 170 ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE, rwb_flags); 171 if (ret) 172 return ret; 173 174 raw_mapping = le32_to_cpu(in); 175 176 z_flag = ent_z_flag(raw_mapping); 177 e_flag = ent_e_flag(raw_mapping); 178 ze = (z_flag << 1) + e_flag; 179 postmap = ent_lba(raw_mapping); 180 181 /* Reuse the {z,e}_flag variables for *trim and *error */ 182 z_flag = 0; 183 e_flag = 0; 184 185 switch (ze) { 186 case 0: 187 /* Initial state. Return postmap = premap */ 188 *mapping = lba; 189 break; 190 case 1: 191 *mapping = postmap; 192 e_flag = 1; 193 break; 194 case 2: 195 *mapping = postmap; 196 z_flag = 1; 197 break; 198 case 3: 199 *mapping = postmap; 200 break; 201 default: 202 return -EIO; 203 } 204 205 if (trim) 206 *trim = z_flag; 207 if (error) 208 *error = e_flag; 209 210 return ret; 211 } 212 213 static int btt_log_read_pair(struct arena_info *arena, u32 lane, 214 struct log_entry *ent) 215 { 216 return arena_read_bytes(arena, 217 arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, 218 2 * LOG_ENT_SIZE, 0); 219 } 220 221 static struct dentry *debugfs_root; 222 223 static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, 224 int idx) 225 { 226 char dirname[32]; 227 struct dentry *d; 228 229 /* If for some reason, parent bttN was not created, exit */ 230 if (!parent) 231 return; 232 233 snprintf(dirname, 32, "arena%d", idx); 234 d = debugfs_create_dir(dirname, parent); 235 if (IS_ERR_OR_NULL(d)) 236 return; 237 a->debugfs_dir = d; 238 239 debugfs_create_x64("size", S_IRUGO, d, &a->size); 240 debugfs_create_x64("external_lba_start", S_IRUGO, d, 241 &a->external_lba_start); 242 debugfs_create_x32("internal_nlba", S_IRUGO, d, &a->internal_nlba); 243 debugfs_create_u32("internal_lbasize", S_IRUGO, d, 244 &a->internal_lbasize); 245 debugfs_create_x32("external_nlba", S_IRUGO, d, &a->external_nlba); 246 debugfs_create_u32("external_lbasize", S_IRUGO, d, 247 &a->external_lbasize); 248 debugfs_create_u32("nfree", S_IRUGO, d, &a->nfree); 249 debugfs_create_u16("version_major", S_IRUGO, d, &a->version_major); 250 debugfs_create_u16("version_minor", S_IRUGO, d, &a->version_minor); 251 debugfs_create_x64("nextoff", S_IRUGO, d, &a->nextoff); 252 debugfs_create_x64("infooff", S_IRUGO, d, &a->infooff); 253 debugfs_create_x64("dataoff", S_IRUGO, d, &a->dataoff); 254 debugfs_create_x64("mapoff", S_IRUGO, d, &a->mapoff); 255 debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); 256 debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); 257 debugfs_create_x32("flags", S_IRUGO, d, &a->flags); 258 } 259 260 static void btt_debugfs_init(struct btt *btt) 261 { 262 int i = 0; 263 struct arena_info *arena; 264 265 btt->debugfs_dir = debugfs_create_dir(dev_name(&btt->nd_btt->dev), 266 debugfs_root); 267 if (IS_ERR_OR_NULL(btt->debugfs_dir)) 268 return; 269 270 list_for_each_entry(arena, &btt->arena_list, list) { 271 arena_debugfs_init(arena, btt->debugfs_dir, i); 272 i++; 273 } 274 } 275 276 /* 277 * This function accepts two log entries, and uses the 278 * sequence number to find the 'older' entry. 279 * It also updates the sequence number in this old entry to 280 * make it the 'new' one if the mark_flag is set. 281 * Finally, it returns which of the entries was the older one. 282 * 283 * TODO The logic feels a bit kludge-y. make it better.. 284 */ 285 static int btt_log_get_old(struct log_entry *ent) 286 { 287 int old; 288 289 /* 290 * the first ever time this is seen, the entry goes into [0] 291 * the next time, the following logic works out to put this 292 * (next) entry into [1] 293 */ 294 if (ent[0].seq == 0) { 295 ent[0].seq = cpu_to_le32(1); 296 return 0; 297 } 298 299 if (ent[0].seq == ent[1].seq) 300 return -EINVAL; 301 if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) 302 return -EINVAL; 303 304 if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { 305 if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) 306 old = 0; 307 else 308 old = 1; 309 } else { 310 if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) 311 old = 1; 312 else 313 old = 0; 314 } 315 316 return old; 317 } 318 319 /* 320 * This function copies the desired (old/new) log entry into ent if 321 * it is not NULL. It returns the sub-slot number (0 or 1) 322 * where the desired log entry was found. Negative return values 323 * indicate errors. 324 */ 325 static int btt_log_read(struct arena_info *arena, u32 lane, 326 struct log_entry *ent, int old_flag) 327 { 328 int ret; 329 int old_ent, ret_ent; 330 struct log_entry log[2]; 331 332 ret = btt_log_read_pair(arena, lane, log); 333 if (ret) 334 return -EIO; 335 336 old_ent = btt_log_get_old(log); 337 if (old_ent < 0 || old_ent > 1) { 338 dev_err(to_dev(arena), 339 "log corruption (%d): lane %d seq [%d, %d]\n", 340 old_ent, lane, log[0].seq, log[1].seq); 341 /* TODO set error state? */ 342 return -EIO; 343 } 344 345 ret_ent = (old_flag ? old_ent : (1 - old_ent)); 346 347 if (ent != NULL) 348 memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); 349 350 return ret_ent; 351 } 352 353 /* 354 * This function commits a log entry to media 355 * It does _not_ prepare the freelist entry for the next write 356 * btt_flog_write is the wrapper for updating the freelist elements 357 */ 358 static int __btt_log_write(struct arena_info *arena, u32 lane, 359 u32 sub, struct log_entry *ent, unsigned long flags) 360 { 361 int ret; 362 /* 363 * Ignore the padding in log_entry for calculating log_half. 364 * The entry is 'committed' when we write the sequence number, 365 * and we want to ensure that that is the last thing written. 366 * We don't bother writing the padding as that would be extra 367 * media wear and write amplification 368 */ 369 unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; 370 u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); 371 void *src = ent; 372 373 /* split the 16B write into atomic, durable halves */ 374 ret = arena_write_bytes(arena, ns_off, src, log_half, flags); 375 if (ret) 376 return ret; 377 378 ns_off += log_half; 379 src += log_half; 380 return arena_write_bytes(arena, ns_off, src, log_half, flags); 381 } 382 383 static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub, 384 struct log_entry *ent) 385 { 386 int ret; 387 388 ret = __btt_log_write(arena, lane, sub, ent, NVDIMM_IO_ATOMIC); 389 if (ret) 390 return ret; 391 392 /* prepare the next free entry */ 393 arena->freelist[lane].sub = 1 - arena->freelist[lane].sub; 394 if (++(arena->freelist[lane].seq) == 4) 395 arena->freelist[lane].seq = 1; 396 if (ent_e_flag(ent->old_map)) 397 arena->freelist[lane].has_err = 1; 398 arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map)); 399 400 return ret; 401 } 402 403 /* 404 * This function initializes the BTT map to the initial state, which is 405 * all-zeroes, and indicates an identity mapping 406 */ 407 static int btt_map_init(struct arena_info *arena) 408 { 409 int ret = -EINVAL; 410 void *zerobuf; 411 size_t offset = 0; 412 size_t chunk_size = SZ_2M; 413 size_t mapsize = arena->logoff - arena->mapoff; 414 415 zerobuf = kzalloc(chunk_size, GFP_KERNEL); 416 if (!zerobuf) 417 return -ENOMEM; 418 419 /* 420 * mapoff should always be at least 512B aligned. We rely on that to 421 * make sure rw_bytes does error clearing correctly, so make sure that 422 * is the case. 423 */ 424 dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->mapoff, 512), 425 "arena->mapoff: %#llx is unaligned\n", arena->mapoff); 426 427 while (mapsize) { 428 size_t size = min(mapsize, chunk_size); 429 430 dev_WARN_ONCE(to_dev(arena), size < 512, 431 "chunk size: %#zx is unaligned\n", size); 432 ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf, 433 size, 0); 434 if (ret) 435 goto free; 436 437 offset += size; 438 mapsize -= size; 439 cond_resched(); 440 } 441 442 free: 443 kfree(zerobuf); 444 return ret; 445 } 446 447 /* 448 * This function initializes the BTT log with 'fake' entries pointing 449 * to the initial reserved set of blocks as being free 450 */ 451 static int btt_log_init(struct arena_info *arena) 452 { 453 size_t logsize = arena->info2off - arena->logoff; 454 size_t chunk_size = SZ_4K, offset = 0; 455 struct log_entry log; 456 void *zerobuf; 457 int ret; 458 u32 i; 459 460 zerobuf = kzalloc(chunk_size, GFP_KERNEL); 461 if (!zerobuf) 462 return -ENOMEM; 463 /* 464 * logoff should always be at least 512B aligned. We rely on that to 465 * make sure rw_bytes does error clearing correctly, so make sure that 466 * is the case. 467 */ 468 dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->logoff, 512), 469 "arena->logoff: %#llx is unaligned\n", arena->logoff); 470 471 while (logsize) { 472 size_t size = min(logsize, chunk_size); 473 474 dev_WARN_ONCE(to_dev(arena), size < 512, 475 "chunk size: %#zx is unaligned\n", size); 476 ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf, 477 size, 0); 478 if (ret) 479 goto free; 480 481 offset += size; 482 logsize -= size; 483 cond_resched(); 484 } 485 486 for (i = 0; i < arena->nfree; i++) { 487 log.lba = cpu_to_le32(i); 488 log.old_map = cpu_to_le32(arena->external_nlba + i); 489 log.new_map = cpu_to_le32(arena->external_nlba + i); 490 log.seq = cpu_to_le32(LOG_SEQ_INIT); 491 ret = __btt_log_write(arena, i, 0, &log, 0); 492 if (ret) 493 goto free; 494 } 495 496 free: 497 kfree(zerobuf); 498 return ret; 499 } 500 501 static u64 to_namespace_offset(struct arena_info *arena, u64 lba) 502 { 503 return arena->dataoff + ((u64)lba * arena->internal_lbasize); 504 } 505 506 static int arena_clear_freelist_error(struct arena_info *arena, u32 lane) 507 { 508 int ret = 0; 509 510 if (arena->freelist[lane].has_err) { 511 void *zero_page = page_address(ZERO_PAGE(0)); 512 u32 lba = arena->freelist[lane].block; 513 u64 nsoff = to_namespace_offset(arena, lba); 514 unsigned long len = arena->sector_size; 515 516 mutex_lock(&arena->err_lock); 517 518 while (len) { 519 unsigned long chunk = min(len, PAGE_SIZE); 520 521 ret = arena_write_bytes(arena, nsoff, zero_page, 522 chunk, 0); 523 if (ret) 524 break; 525 len -= chunk; 526 nsoff += chunk; 527 if (len == 0) 528 arena->freelist[lane].has_err = 0; 529 } 530 mutex_unlock(&arena->err_lock); 531 } 532 return ret; 533 } 534 535 static int btt_freelist_init(struct arena_info *arena) 536 { 537 int old, new, ret; 538 u32 i, map_entry; 539 struct log_entry log_new, log_old; 540 541 arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry), 542 GFP_KERNEL); 543 if (!arena->freelist) 544 return -ENOMEM; 545 546 for (i = 0; i < arena->nfree; i++) { 547 old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT); 548 if (old < 0) 549 return old; 550 551 new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT); 552 if (new < 0) 553 return new; 554 555 /* sub points to the next one to be overwritten */ 556 arena->freelist[i].sub = 1 - new; 557 arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq)); 558 arena->freelist[i].block = le32_to_cpu(log_new.old_map); 559 560 /* 561 * FIXME: if error clearing fails during init, we want to make 562 * the BTT read-only 563 */ 564 if (ent_e_flag(log_new.old_map)) { 565 ret = arena_clear_freelist_error(arena, i); 566 if (ret) 567 dev_err_ratelimited(to_dev(arena), 568 "Unable to clear known errors\n"); 569 } 570 571 /* This implies a newly created or untouched flog entry */ 572 if (log_new.old_map == log_new.new_map) 573 continue; 574 575 /* Check if map recovery is needed */ 576 ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry, 577 NULL, NULL, 0); 578 if (ret) 579 return ret; 580 if ((le32_to_cpu(log_new.new_map) != map_entry) && 581 (le32_to_cpu(log_new.old_map) == map_entry)) { 582 /* 583 * Last transaction wrote the flog, but wasn't able 584 * to complete the map write. So fix up the map. 585 */ 586 ret = btt_map_write(arena, le32_to_cpu(log_new.lba), 587 le32_to_cpu(log_new.new_map), 0, 0, 0); 588 if (ret) 589 return ret; 590 } 591 } 592 593 return 0; 594 } 595 596 static int btt_rtt_init(struct arena_info *arena) 597 { 598 arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); 599 if (arena->rtt == NULL) 600 return -ENOMEM; 601 602 return 0; 603 } 604 605 static int btt_maplocks_init(struct arena_info *arena) 606 { 607 u32 i; 608 609 arena->map_locks = kcalloc(arena->nfree, sizeof(struct aligned_lock), 610 GFP_KERNEL); 611 if (!arena->map_locks) 612 return -ENOMEM; 613 614 for (i = 0; i < arena->nfree; i++) 615 spin_lock_init(&arena->map_locks[i].lock); 616 617 return 0; 618 } 619 620 static struct arena_info *alloc_arena(struct btt *btt, size_t size, 621 size_t start, size_t arena_off) 622 { 623 struct arena_info *arena; 624 u64 logsize, mapsize, datasize; 625 u64 available = size; 626 627 arena = kzalloc(sizeof(struct arena_info), GFP_KERNEL); 628 if (!arena) 629 return NULL; 630 arena->nd_btt = btt->nd_btt; 631 arena->sector_size = btt->sector_size; 632 633 if (!size) 634 return arena; 635 636 arena->size = size; 637 arena->external_lba_start = start; 638 arena->external_lbasize = btt->lbasize; 639 arena->internal_lbasize = roundup(arena->external_lbasize, 640 INT_LBASIZE_ALIGNMENT); 641 arena->nfree = BTT_DEFAULT_NFREE; 642 arena->version_major = btt->nd_btt->version_major; 643 arena->version_minor = btt->nd_btt->version_minor; 644 645 if (available % BTT_PG_SIZE) 646 available -= (available % BTT_PG_SIZE); 647 648 /* Two pages are reserved for the super block and its copy */ 649 available -= 2 * BTT_PG_SIZE; 650 651 /* The log takes a fixed amount of space based on nfree */ 652 logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), 653 BTT_PG_SIZE); 654 available -= logsize; 655 656 /* Calculate optimal split between map and data area */ 657 arena->internal_nlba = div_u64(available - BTT_PG_SIZE, 658 arena->internal_lbasize + MAP_ENT_SIZE); 659 arena->external_nlba = arena->internal_nlba - arena->nfree; 660 661 mapsize = roundup((arena->external_nlba * MAP_ENT_SIZE), BTT_PG_SIZE); 662 datasize = available - mapsize; 663 664 /* 'Absolute' values, relative to start of storage space */ 665 arena->infooff = arena_off; 666 arena->dataoff = arena->infooff + BTT_PG_SIZE; 667 arena->mapoff = arena->dataoff + datasize; 668 arena->logoff = arena->mapoff + mapsize; 669 arena->info2off = arena->logoff + logsize; 670 return arena; 671 } 672 673 static void free_arenas(struct btt *btt) 674 { 675 struct arena_info *arena, *next; 676 677 list_for_each_entry_safe(arena, next, &btt->arena_list, list) { 678 list_del(&arena->list); 679 kfree(arena->rtt); 680 kfree(arena->map_locks); 681 kfree(arena->freelist); 682 debugfs_remove_recursive(arena->debugfs_dir); 683 kfree(arena); 684 } 685 } 686 687 /* 688 * This function reads an existing valid btt superblock and 689 * populates the corresponding arena_info struct 690 */ 691 static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super, 692 u64 arena_off) 693 { 694 arena->internal_nlba = le32_to_cpu(super->internal_nlba); 695 arena->internal_lbasize = le32_to_cpu(super->internal_lbasize); 696 arena->external_nlba = le32_to_cpu(super->external_nlba); 697 arena->external_lbasize = le32_to_cpu(super->external_lbasize); 698 arena->nfree = le32_to_cpu(super->nfree); 699 arena->version_major = le16_to_cpu(super->version_major); 700 arena->version_minor = le16_to_cpu(super->version_minor); 701 702 arena->nextoff = (super->nextoff == 0) ? 0 : (arena_off + 703 le64_to_cpu(super->nextoff)); 704 arena->infooff = arena_off; 705 arena->dataoff = arena_off + le64_to_cpu(super->dataoff); 706 arena->mapoff = arena_off + le64_to_cpu(super->mapoff); 707 arena->logoff = arena_off + le64_to_cpu(super->logoff); 708 arena->info2off = arena_off + le64_to_cpu(super->info2off); 709 710 arena->size = (le64_to_cpu(super->nextoff) > 0) 711 ? (le64_to_cpu(super->nextoff)) 712 : (arena->info2off - arena->infooff + BTT_PG_SIZE); 713 714 arena->flags = le32_to_cpu(super->flags); 715 } 716 717 static int discover_arenas(struct btt *btt) 718 { 719 int ret = 0; 720 struct arena_info *arena; 721 struct btt_sb *super; 722 size_t remaining = btt->rawsize; 723 u64 cur_nlba = 0; 724 size_t cur_off = 0; 725 int num_arenas = 0; 726 727 super = kzalloc(sizeof(*super), GFP_KERNEL); 728 if (!super) 729 return -ENOMEM; 730 731 while (remaining) { 732 /* Alloc memory for arena */ 733 arena = alloc_arena(btt, 0, 0, 0); 734 if (!arena) { 735 ret = -ENOMEM; 736 goto out_super; 737 } 738 739 arena->infooff = cur_off; 740 ret = btt_info_read(arena, super); 741 if (ret) 742 goto out; 743 744 if (!nd_btt_arena_is_valid(btt->nd_btt, super)) { 745 if (remaining == btt->rawsize) { 746 btt->init_state = INIT_NOTFOUND; 747 dev_info(to_dev(arena), "No existing arenas\n"); 748 goto out; 749 } else { 750 dev_err(to_dev(arena), 751 "Found corrupted metadata!\n"); 752 ret = -ENODEV; 753 goto out; 754 } 755 } 756 757 arena->external_lba_start = cur_nlba; 758 parse_arena_meta(arena, super, cur_off); 759 760 mutex_init(&arena->err_lock); 761 ret = btt_freelist_init(arena); 762 if (ret) 763 goto out; 764 765 ret = btt_rtt_init(arena); 766 if (ret) 767 goto out; 768 769 ret = btt_maplocks_init(arena); 770 if (ret) 771 goto out; 772 773 list_add_tail(&arena->list, &btt->arena_list); 774 775 remaining -= arena->size; 776 cur_off += arena->size; 777 cur_nlba += arena->external_nlba; 778 num_arenas++; 779 780 if (arena->nextoff == 0) 781 break; 782 } 783 btt->num_arenas = num_arenas; 784 btt->nlba = cur_nlba; 785 btt->init_state = INIT_READY; 786 787 kfree(super); 788 return ret; 789 790 out: 791 kfree(arena); 792 free_arenas(btt); 793 out_super: 794 kfree(super); 795 return ret; 796 } 797 798 static int create_arenas(struct btt *btt) 799 { 800 size_t remaining = btt->rawsize; 801 size_t cur_off = 0; 802 803 while (remaining) { 804 struct arena_info *arena; 805 size_t arena_size = min_t(u64, ARENA_MAX_SIZE, remaining); 806 807 remaining -= arena_size; 808 if (arena_size < ARENA_MIN_SIZE) 809 break; 810 811 arena = alloc_arena(btt, arena_size, btt->nlba, cur_off); 812 if (!arena) { 813 free_arenas(btt); 814 return -ENOMEM; 815 } 816 btt->nlba += arena->external_nlba; 817 if (remaining >= ARENA_MIN_SIZE) 818 arena->nextoff = arena->size; 819 else 820 arena->nextoff = 0; 821 cur_off += arena_size; 822 list_add_tail(&arena->list, &btt->arena_list); 823 } 824 825 return 0; 826 } 827 828 /* 829 * This function completes arena initialization by writing 830 * all the metadata. 831 * It is only called for an uninitialized arena when a write 832 * to that arena occurs for the first time. 833 */ 834 static int btt_arena_write_layout(struct arena_info *arena) 835 { 836 int ret; 837 u64 sum; 838 struct btt_sb *super; 839 struct nd_btt *nd_btt = arena->nd_btt; 840 const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev); 841 842 ret = btt_map_init(arena); 843 if (ret) 844 return ret; 845 846 ret = btt_log_init(arena); 847 if (ret) 848 return ret; 849 850 super = kzalloc(sizeof(struct btt_sb), GFP_NOIO); 851 if (!super) 852 return -ENOMEM; 853 854 strncpy(super->signature, BTT_SIG, BTT_SIG_LEN); 855 memcpy(super->uuid, nd_btt->uuid, 16); 856 memcpy(super->parent_uuid, parent_uuid, 16); 857 super->flags = cpu_to_le32(arena->flags); 858 super->version_major = cpu_to_le16(arena->version_major); 859 super->version_minor = cpu_to_le16(arena->version_minor); 860 super->external_lbasize = cpu_to_le32(arena->external_lbasize); 861 super->external_nlba = cpu_to_le32(arena->external_nlba); 862 super->internal_lbasize = cpu_to_le32(arena->internal_lbasize); 863 super->internal_nlba = cpu_to_le32(arena->internal_nlba); 864 super->nfree = cpu_to_le32(arena->nfree); 865 super->infosize = cpu_to_le32(sizeof(struct btt_sb)); 866 super->nextoff = cpu_to_le64(arena->nextoff); 867 /* 868 * Subtract arena->infooff (arena start) so numbers are relative 869 * to 'this' arena 870 */ 871 super->dataoff = cpu_to_le64(arena->dataoff - arena->infooff); 872 super->mapoff = cpu_to_le64(arena->mapoff - arena->infooff); 873 super->logoff = cpu_to_le64(arena->logoff - arena->infooff); 874 super->info2off = cpu_to_le64(arena->info2off - arena->infooff); 875 876 super->flags = 0; 877 sum = nd_sb_checksum((struct nd_gen_sb *) super); 878 super->checksum = cpu_to_le64(sum); 879 880 ret = btt_info_write(arena, super); 881 882 kfree(super); 883 return ret; 884 } 885 886 /* 887 * This function completes the initialization for the BTT namespace 888 * such that it is ready to accept IOs 889 */ 890 static int btt_meta_init(struct btt *btt) 891 { 892 int ret = 0; 893 struct arena_info *arena; 894 895 mutex_lock(&btt->init_lock); 896 list_for_each_entry(arena, &btt->arena_list, list) { 897 ret = btt_arena_write_layout(arena); 898 if (ret) 899 goto unlock; 900 901 ret = btt_freelist_init(arena); 902 if (ret) 903 goto unlock; 904 905 ret = btt_rtt_init(arena); 906 if (ret) 907 goto unlock; 908 909 ret = btt_maplocks_init(arena); 910 if (ret) 911 goto unlock; 912 } 913 914 btt->init_state = INIT_READY; 915 916 unlock: 917 mutex_unlock(&btt->init_lock); 918 return ret; 919 } 920 921 static u32 btt_meta_size(struct btt *btt) 922 { 923 return btt->lbasize - btt->sector_size; 924 } 925 926 /* 927 * This function calculates the arena in which the given LBA lies 928 * by doing a linear walk. This is acceptable since we expect only 929 * a few arenas. If we have backing devices that get much larger, 930 * we can construct a balanced binary tree of arenas at init time 931 * so that this range search becomes faster. 932 */ 933 static int lba_to_arena(struct btt *btt, sector_t sector, __u32 *premap, 934 struct arena_info **arena) 935 { 936 struct arena_info *arena_list; 937 __u64 lba = div_u64(sector << SECTOR_SHIFT, btt->sector_size); 938 939 list_for_each_entry(arena_list, &btt->arena_list, list) { 940 if (lba < arena_list->external_nlba) { 941 *arena = arena_list; 942 *premap = lba; 943 return 0; 944 } 945 lba -= arena_list->external_nlba; 946 } 947 948 return -EIO; 949 } 950 951 /* 952 * The following (lock_map, unlock_map) are mostly just to improve 953 * readability, since they index into an array of locks 954 */ 955 static void lock_map(struct arena_info *arena, u32 premap) 956 __acquires(&arena->map_locks[idx].lock) 957 { 958 u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree; 959 960 spin_lock(&arena->map_locks[idx].lock); 961 } 962 963 static void unlock_map(struct arena_info *arena, u32 premap) 964 __releases(&arena->map_locks[idx].lock) 965 { 966 u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree; 967 968 spin_unlock(&arena->map_locks[idx].lock); 969 } 970 971 static int btt_data_read(struct arena_info *arena, struct page *page, 972 unsigned int off, u32 lba, u32 len) 973 { 974 int ret; 975 u64 nsoff = to_namespace_offset(arena, lba); 976 void *mem = kmap_atomic(page); 977 978 ret = arena_read_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC); 979 kunmap_atomic(mem); 980 981 return ret; 982 } 983 984 static int btt_data_write(struct arena_info *arena, u32 lba, 985 struct page *page, unsigned int off, u32 len) 986 { 987 int ret; 988 u64 nsoff = to_namespace_offset(arena, lba); 989 void *mem = kmap_atomic(page); 990 991 ret = arena_write_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC); 992 kunmap_atomic(mem); 993 994 return ret; 995 } 996 997 static void zero_fill_data(struct page *page, unsigned int off, u32 len) 998 { 999 void *mem = kmap_atomic(page); 1000 1001 memset(mem + off, 0, len); 1002 kunmap_atomic(mem); 1003 } 1004 1005 #ifdef CONFIG_BLK_DEV_INTEGRITY 1006 static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip, 1007 struct arena_info *arena, u32 postmap, int rw) 1008 { 1009 unsigned int len = btt_meta_size(btt); 1010 u64 meta_nsoff; 1011 int ret = 0; 1012 1013 if (bip == NULL) 1014 return 0; 1015 1016 meta_nsoff = to_namespace_offset(arena, postmap) + btt->sector_size; 1017 1018 while (len) { 1019 unsigned int cur_len; 1020 struct bio_vec bv; 1021 void *mem; 1022 1023 bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); 1024 /* 1025 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and 1026 * .bv_offset already adjusted for iter->bi_bvec_done, and we 1027 * can use those directly 1028 */ 1029 1030 cur_len = min(len, bv.bv_len); 1031 mem = kmap_atomic(bv.bv_page); 1032 if (rw) 1033 ret = arena_write_bytes(arena, meta_nsoff, 1034 mem + bv.bv_offset, cur_len, 1035 NVDIMM_IO_ATOMIC); 1036 else 1037 ret = arena_read_bytes(arena, meta_nsoff, 1038 mem + bv.bv_offset, cur_len, 1039 NVDIMM_IO_ATOMIC); 1040 1041 kunmap_atomic(mem); 1042 if (ret) 1043 return ret; 1044 1045 len -= cur_len; 1046 meta_nsoff += cur_len; 1047 if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len)) 1048 return -EIO; 1049 } 1050 1051 return ret; 1052 } 1053 1054 #else /* CONFIG_BLK_DEV_INTEGRITY */ 1055 static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip, 1056 struct arena_info *arena, u32 postmap, int rw) 1057 { 1058 return 0; 1059 } 1060 #endif 1061 1062 static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, 1063 struct page *page, unsigned int off, sector_t sector, 1064 unsigned int len) 1065 { 1066 int ret = 0; 1067 int t_flag, e_flag; 1068 struct arena_info *arena = NULL; 1069 u32 lane = 0, premap, postmap; 1070 1071 while (len) { 1072 u32 cur_len; 1073 1074 lane = nd_region_acquire_lane(btt->nd_region); 1075 1076 ret = lba_to_arena(btt, sector, &premap, &arena); 1077 if (ret) 1078 goto out_lane; 1079 1080 cur_len = min(btt->sector_size, len); 1081 1082 ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag, 1083 NVDIMM_IO_ATOMIC); 1084 if (ret) 1085 goto out_lane; 1086 1087 /* 1088 * We loop to make sure that the post map LBA didn't change 1089 * from under us between writing the RTT and doing the actual 1090 * read. 1091 */ 1092 while (1) { 1093 u32 new_map; 1094 int new_t, new_e; 1095 1096 if (t_flag) { 1097 zero_fill_data(page, off, cur_len); 1098 goto out_lane; 1099 } 1100 1101 if (e_flag) { 1102 ret = -EIO; 1103 goto out_lane; 1104 } 1105 1106 arena->rtt[lane] = RTT_VALID | postmap; 1107 /* 1108 * Barrier to make sure this write is not reordered 1109 * to do the verification map_read before the RTT store 1110 */ 1111 barrier(); 1112 1113 ret = btt_map_read(arena, premap, &new_map, &new_t, 1114 &new_e, NVDIMM_IO_ATOMIC); 1115 if (ret) 1116 goto out_rtt; 1117 1118 if ((postmap == new_map) && (t_flag == new_t) && 1119 (e_flag == new_e)) 1120 break; 1121 1122 postmap = new_map; 1123 t_flag = new_t; 1124 e_flag = new_e; 1125 } 1126 1127 ret = btt_data_read(arena, page, off, postmap, cur_len); 1128 if (ret) { 1129 int rc; 1130 1131 /* Media error - set the e_flag */ 1132 rc = btt_map_write(arena, premap, postmap, 0, 1, 1133 NVDIMM_IO_ATOMIC); 1134 goto out_rtt; 1135 } 1136 1137 if (bip) { 1138 ret = btt_rw_integrity(btt, bip, arena, postmap, READ); 1139 if (ret) 1140 goto out_rtt; 1141 } 1142 1143 arena->rtt[lane] = RTT_INVALID; 1144 nd_region_release_lane(btt->nd_region, lane); 1145 1146 len -= cur_len; 1147 off += cur_len; 1148 sector += btt->sector_size >> SECTOR_SHIFT; 1149 } 1150 1151 return 0; 1152 1153 out_rtt: 1154 arena->rtt[lane] = RTT_INVALID; 1155 out_lane: 1156 nd_region_release_lane(btt->nd_region, lane); 1157 return ret; 1158 } 1159 1160 /* 1161 * Normally, arena_{read,write}_bytes will take care of the initial offset 1162 * adjustment, but in the case of btt_is_badblock, where we query is_bad_pmem, 1163 * we need the final, raw namespace offset here 1164 */ 1165 static bool btt_is_badblock(struct btt *btt, struct arena_info *arena, 1166 u32 postmap) 1167 { 1168 u64 nsoff = adjust_initial_offset(arena->nd_btt, 1169 to_namespace_offset(arena, postmap)); 1170 sector_t phys_sector = nsoff >> 9; 1171 1172 return is_bad_pmem(btt->phys_bb, phys_sector, arena->internal_lbasize); 1173 } 1174 1175 static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, 1176 sector_t sector, struct page *page, unsigned int off, 1177 unsigned int len) 1178 { 1179 int ret = 0; 1180 struct arena_info *arena = NULL; 1181 u32 premap = 0, old_postmap, new_postmap, lane = 0, i; 1182 struct log_entry log; 1183 int sub; 1184 1185 while (len) { 1186 u32 cur_len; 1187 int e_flag; 1188 1189 retry: 1190 lane = nd_region_acquire_lane(btt->nd_region); 1191 1192 ret = lba_to_arena(btt, sector, &premap, &arena); 1193 if (ret) 1194 goto out_lane; 1195 cur_len = min(btt->sector_size, len); 1196 1197 if ((arena->flags & IB_FLAG_ERROR_MASK) != 0) { 1198 ret = -EIO; 1199 goto out_lane; 1200 } 1201 1202 if (btt_is_badblock(btt, arena, arena->freelist[lane].block)) 1203 arena->freelist[lane].has_err = 1; 1204 1205 if (mutex_is_locked(&arena->err_lock) 1206 || arena->freelist[lane].has_err) { 1207 nd_region_release_lane(btt->nd_region, lane); 1208 1209 ret = arena_clear_freelist_error(arena, lane); 1210 if (ret) 1211 return ret; 1212 1213 /* OK to acquire a different lane/free block */ 1214 goto retry; 1215 } 1216 1217 new_postmap = arena->freelist[lane].block; 1218 1219 /* Wait if the new block is being read from */ 1220 for (i = 0; i < arena->nfree; i++) 1221 while (arena->rtt[i] == (RTT_VALID | new_postmap)) 1222 cpu_relax(); 1223 1224 1225 if (new_postmap >= arena->internal_nlba) { 1226 ret = -EIO; 1227 goto out_lane; 1228 } 1229 1230 ret = btt_data_write(arena, new_postmap, page, off, cur_len); 1231 if (ret) 1232 goto out_lane; 1233 1234 if (bip) { 1235 ret = btt_rw_integrity(btt, bip, arena, new_postmap, 1236 WRITE); 1237 if (ret) 1238 goto out_lane; 1239 } 1240 1241 lock_map(arena, premap); 1242 ret = btt_map_read(arena, premap, &old_postmap, NULL, &e_flag, 1243 NVDIMM_IO_ATOMIC); 1244 if (ret) 1245 goto out_map; 1246 if (old_postmap >= arena->internal_nlba) { 1247 ret = -EIO; 1248 goto out_map; 1249 } 1250 if (e_flag) 1251 set_e_flag(old_postmap); 1252 1253 log.lba = cpu_to_le32(premap); 1254 log.old_map = cpu_to_le32(old_postmap); 1255 log.new_map = cpu_to_le32(new_postmap); 1256 log.seq = cpu_to_le32(arena->freelist[lane].seq); 1257 sub = arena->freelist[lane].sub; 1258 ret = btt_flog_write(arena, lane, sub, &log); 1259 if (ret) 1260 goto out_map; 1261 1262 ret = btt_map_write(arena, premap, new_postmap, 0, 0, 1263 NVDIMM_IO_ATOMIC); 1264 if (ret) 1265 goto out_map; 1266 1267 unlock_map(arena, premap); 1268 nd_region_release_lane(btt->nd_region, lane); 1269 1270 if (e_flag) { 1271 ret = arena_clear_freelist_error(arena, lane); 1272 if (ret) 1273 return ret; 1274 } 1275 1276 len -= cur_len; 1277 off += cur_len; 1278 sector += btt->sector_size >> SECTOR_SHIFT; 1279 } 1280 1281 return 0; 1282 1283 out_map: 1284 unlock_map(arena, premap); 1285 out_lane: 1286 nd_region_release_lane(btt->nd_region, lane); 1287 return ret; 1288 } 1289 1290 static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, 1291 struct page *page, unsigned int len, unsigned int off, 1292 bool is_write, sector_t sector) 1293 { 1294 int ret; 1295 1296 if (!is_write) { 1297 ret = btt_read_pg(btt, bip, page, off, sector, len); 1298 flush_dcache_page(page); 1299 } else { 1300 flush_dcache_page(page); 1301 ret = btt_write_pg(btt, bip, sector, page, off, len); 1302 } 1303 1304 return ret; 1305 } 1306 1307 static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) 1308 { 1309 struct bio_integrity_payload *bip = bio_integrity(bio); 1310 struct btt *btt = q->queuedata; 1311 struct bvec_iter iter; 1312 unsigned long start; 1313 struct bio_vec bvec; 1314 int err = 0; 1315 bool do_acct; 1316 1317 if (!bio_integrity_prep(bio)) 1318 return BLK_QC_T_NONE; 1319 1320 do_acct = nd_iostat_start(bio, &start); 1321 bio_for_each_segment(bvec, bio, iter) { 1322 unsigned int len = bvec.bv_len; 1323 1324 if (len > PAGE_SIZE || len < btt->sector_size || 1325 len % btt->sector_size) { 1326 dev_err_ratelimited(&btt->nd_btt->dev, 1327 "unaligned bio segment (len: %d)\n", len); 1328 bio->bi_status = BLK_STS_IOERR; 1329 break; 1330 } 1331 1332 err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, 1333 op_is_write(bio_op(bio)), iter.bi_sector); 1334 if (err) { 1335 dev_err(&btt->nd_btt->dev, 1336 "io error in %s sector %lld, len %d,\n", 1337 (op_is_write(bio_op(bio))) ? "WRITE" : 1338 "READ", 1339 (unsigned long long) iter.bi_sector, len); 1340 bio->bi_status = errno_to_blk_status(err); 1341 break; 1342 } 1343 } 1344 if (do_acct) 1345 nd_iostat_end(bio, start); 1346 1347 bio_endio(bio); 1348 return BLK_QC_T_NONE; 1349 } 1350 1351 static int btt_rw_page(struct block_device *bdev, sector_t sector, 1352 struct page *page, bool is_write) 1353 { 1354 struct btt *btt = bdev->bd_disk->private_data; 1355 int rc; 1356 unsigned int len; 1357 1358 len = hpage_nr_pages(page) * PAGE_SIZE; 1359 rc = btt_do_bvec(btt, NULL, page, len, 0, is_write, sector); 1360 if (rc == 0) 1361 page_endio(page, is_write, 0); 1362 1363 return rc; 1364 } 1365 1366 1367 static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo) 1368 { 1369 /* some standard values */ 1370 geo->heads = 1 << 6; 1371 geo->sectors = 1 << 5; 1372 geo->cylinders = get_capacity(bd->bd_disk) >> 11; 1373 return 0; 1374 } 1375 1376 static const struct block_device_operations btt_fops = { 1377 .owner = THIS_MODULE, 1378 .rw_page = btt_rw_page, 1379 .getgeo = btt_getgeo, 1380 .revalidate_disk = nvdimm_revalidate_disk, 1381 }; 1382 1383 static int btt_blk_init(struct btt *btt) 1384 { 1385 struct nd_btt *nd_btt = btt->nd_btt; 1386 struct nd_namespace_common *ndns = nd_btt->ndns; 1387 1388 /* create a new disk and request queue for btt */ 1389 btt->btt_queue = blk_alloc_queue(GFP_KERNEL); 1390 if (!btt->btt_queue) 1391 return -ENOMEM; 1392 1393 btt->btt_disk = alloc_disk(0); 1394 if (!btt->btt_disk) { 1395 blk_cleanup_queue(btt->btt_queue); 1396 return -ENOMEM; 1397 } 1398 1399 nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name); 1400 btt->btt_disk->first_minor = 0; 1401 btt->btt_disk->fops = &btt_fops; 1402 btt->btt_disk->private_data = btt; 1403 btt->btt_disk->queue = btt->btt_queue; 1404 btt->btt_disk->flags = GENHD_FL_EXT_DEVT; 1405 1406 blk_queue_make_request(btt->btt_queue, btt_make_request); 1407 blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); 1408 blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); 1409 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue); 1410 btt->btt_queue->queuedata = btt; 1411 1412 set_capacity(btt->btt_disk, 0); 1413 device_add_disk(&btt->nd_btt->dev, btt->btt_disk); 1414 if (btt_meta_size(btt)) { 1415 int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt)); 1416 1417 if (rc) { 1418 del_gendisk(btt->btt_disk); 1419 put_disk(btt->btt_disk); 1420 blk_cleanup_queue(btt->btt_queue); 1421 return rc; 1422 } 1423 } 1424 set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); 1425 btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; 1426 revalidate_disk(btt->btt_disk); 1427 1428 return 0; 1429 } 1430 1431 static void btt_blk_cleanup(struct btt *btt) 1432 { 1433 del_gendisk(btt->btt_disk); 1434 put_disk(btt->btt_disk); 1435 blk_cleanup_queue(btt->btt_queue); 1436 } 1437 1438 /** 1439 * btt_init - initialize a block translation table for the given device 1440 * @nd_btt: device with BTT geometry and backing device info 1441 * @rawsize: raw size in bytes of the backing device 1442 * @lbasize: lba size of the backing device 1443 * @uuid: A uuid for the backing device - this is stored on media 1444 * @maxlane: maximum number of parallel requests the device can handle 1445 * 1446 * Initialize a Block Translation Table on a backing device to provide 1447 * single sector power fail atomicity. 1448 * 1449 * Context: 1450 * Might sleep. 1451 * 1452 * Returns: 1453 * Pointer to a new struct btt on success, NULL on failure. 1454 */ 1455 static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize, 1456 u32 lbasize, u8 *uuid, struct nd_region *nd_region) 1457 { 1458 int ret; 1459 struct btt *btt; 1460 struct nd_namespace_io *nsio; 1461 struct device *dev = &nd_btt->dev; 1462 1463 btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL); 1464 if (!btt) 1465 return NULL; 1466 1467 btt->nd_btt = nd_btt; 1468 btt->rawsize = rawsize; 1469 btt->lbasize = lbasize; 1470 btt->sector_size = ((lbasize >= 4096) ? 4096 : 512); 1471 INIT_LIST_HEAD(&btt->arena_list); 1472 mutex_init(&btt->init_lock); 1473 btt->nd_region = nd_region; 1474 nsio = to_nd_namespace_io(&nd_btt->ndns->dev); 1475 btt->phys_bb = &nsio->bb; 1476 1477 ret = discover_arenas(btt); 1478 if (ret) { 1479 dev_err(dev, "init: error in arena_discover: %d\n", ret); 1480 return NULL; 1481 } 1482 1483 if (btt->init_state != INIT_READY && nd_region->ro) { 1484 dev_warn(dev, "%s is read-only, unable to init btt metadata\n", 1485 dev_name(&nd_region->dev)); 1486 return NULL; 1487 } else if (btt->init_state != INIT_READY) { 1488 btt->num_arenas = (rawsize / ARENA_MAX_SIZE) + 1489 ((rawsize % ARENA_MAX_SIZE) ? 1 : 0); 1490 dev_dbg(dev, "init: %d arenas for %llu rawsize\n", 1491 btt->num_arenas, rawsize); 1492 1493 ret = create_arenas(btt); 1494 if (ret) { 1495 dev_info(dev, "init: create_arenas: %d\n", ret); 1496 return NULL; 1497 } 1498 1499 ret = btt_meta_init(btt); 1500 if (ret) { 1501 dev_err(dev, "init: error in meta_init: %d\n", ret); 1502 return NULL; 1503 } 1504 } 1505 1506 ret = btt_blk_init(btt); 1507 if (ret) { 1508 dev_err(dev, "init: error in blk_init: %d\n", ret); 1509 return NULL; 1510 } 1511 1512 btt_debugfs_init(btt); 1513 1514 return btt; 1515 } 1516 1517 /** 1518 * btt_fini - de-initialize a BTT 1519 * @btt: the BTT handle that was generated by btt_init 1520 * 1521 * De-initialize a Block Translation Table on device removal 1522 * 1523 * Context: 1524 * Might sleep. 1525 */ 1526 static void btt_fini(struct btt *btt) 1527 { 1528 if (btt) { 1529 btt_blk_cleanup(btt); 1530 free_arenas(btt); 1531 debugfs_remove_recursive(btt->debugfs_dir); 1532 } 1533 } 1534 1535 int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns) 1536 { 1537 struct nd_btt *nd_btt = to_nd_btt(ndns->claim); 1538 struct nd_region *nd_region; 1539 struct btt_sb *btt_sb; 1540 struct btt *btt; 1541 size_t rawsize; 1542 1543 if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize) { 1544 dev_dbg(&nd_btt->dev, "incomplete btt configuration\n"); 1545 return -ENODEV; 1546 } 1547 1548 btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL); 1549 if (!btt_sb) 1550 return -ENOMEM; 1551 1552 /* 1553 * If this returns < 0, that is ok as it just means there wasn't 1554 * an existing BTT, and we're creating a new one. We still need to 1555 * call this as we need the version dependent fields in nd_btt to be 1556 * set correctly based on the holder class 1557 */ 1558 nd_btt_version(nd_btt, ndns, btt_sb); 1559 1560 rawsize = nvdimm_namespace_capacity(ndns) - nd_btt->initial_offset; 1561 if (rawsize < ARENA_MIN_SIZE) { 1562 dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n", 1563 dev_name(&ndns->dev), 1564 ARENA_MIN_SIZE + nd_btt->initial_offset); 1565 return -ENXIO; 1566 } 1567 nd_region = to_nd_region(nd_btt->dev.parent); 1568 btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid, 1569 nd_region); 1570 if (!btt) 1571 return -ENOMEM; 1572 nd_btt->btt = btt; 1573 1574 return 0; 1575 } 1576 EXPORT_SYMBOL(nvdimm_namespace_attach_btt); 1577 1578 int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt) 1579 { 1580 struct btt *btt = nd_btt->btt; 1581 1582 btt_fini(btt); 1583 nd_btt->btt = NULL; 1584 1585 return 0; 1586 } 1587 EXPORT_SYMBOL(nvdimm_namespace_detach_btt); 1588 1589 static int __init nd_btt_init(void) 1590 { 1591 int rc = 0; 1592 1593 debugfs_root = debugfs_create_dir("btt", NULL); 1594 if (IS_ERR_OR_NULL(debugfs_root)) 1595 rc = -ENXIO; 1596 1597 return rc; 1598 } 1599 1600 static void __exit nd_btt_exit(void) 1601 { 1602 debugfs_remove_recursive(debugfs_root); 1603 } 1604 1605 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT); 1606 MODULE_AUTHOR("Vishal Verma <vishal.l.verma@linux.intel.com>"); 1607 MODULE_LICENSE("GPL v2"); 1608 module_init(nd_btt_init); 1609 module_exit(nd_btt_exit); 1610