1 // SPDX-License-Identifier: GPL-2.0 2 3 #define pr_fmt(fmt) "papr-scm: " fmt 4 5 #include <linux/of.h> 6 #include <linux/kernel.h> 7 #include <linux/module.h> 8 #include <linux/ioport.h> 9 #include <linux/seq_file.h> 10 #include <linux/slab.h> 11 #include <linux/sysfs.h> 12 #include <linux/ndctl.h> 13 #include <linux/sched.h> 14 #include <linux/libnvdimm.h> 15 #include <linux/platform_device.h> 16 #include <linux/delay.h> 17 #include <linux/seq_buf.h> 18 #include <linux/nd.h> 19 20 #include <asm/plpar_wrappers.h> 21 #include <uapi/linux/papr_pdsm.h> 22 #include <linux/papr_scm.h> 23 #include <asm/mce.h> 24 #include <linux/unaligned.h> 25 #include <linux/perf_event.h> 26 27 #define BIND_ANY_ADDR (~0ul) 28 29 #define PAPR_SCM_DIMM_CMD_MASK \ 30 ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ 31 (1ul << ND_CMD_GET_CONFIG_DATA) | \ 32 (1ul << ND_CMD_SET_CONFIG_DATA) | \ 33 (1ul << ND_CMD_CALL)) 34 35 /* Struct holding a single performance metric */ 36 struct papr_scm_perf_stat { 37 u8 stat_id[8]; 38 __be64 stat_val; 39 } __packed; 40 41 /* Struct exchanged between kernel and PHYP for fetching drc perf stats */ 42 struct papr_scm_perf_stats { 43 u8 eye_catcher[8]; 44 /* Should be PAPR_SCM_PERF_STATS_VERSION */ 45 __be32 stats_version; 46 /* Number of stats following */ 47 __be32 num_statistics; 48 /* zero or more performance matrics */ 49 struct papr_scm_perf_stat scm_statistic[]; 50 } __packed; 51 52 /* private struct associated with each region */ 53 struct papr_scm_priv { 54 struct platform_device *pdev; 55 struct device_node *dn; 56 uint32_t drc_index; 57 uint64_t blocks; 58 uint64_t block_size; 59 int metadata_size; 60 bool is_volatile; 61 bool hcall_flush_required; 62 63 uint64_t bound_addr; 64 65 struct nvdimm_bus_descriptor bus_desc; 66 struct nvdimm_bus *bus; 67 struct nvdimm *nvdimm; 68 struct resource res; 69 struct nd_region *region; 70 struct nd_interleave_set nd_set; 71 struct list_head region_list; 72 73 /* Protect dimm health data from concurrent read/writes */ 74 struct mutex health_mutex; 75 76 /* Last time the health information of the dimm was updated */ 77 unsigned long lasthealth_jiffies; 78 79 /* Health information for the dimm */ 80 u64 health_bitmap; 81 82 /* Holds the last known dirty shutdown counter value */ 83 u64 dirty_shutdown_counter; 84 85 /* length of the stat buffer as expected by phyp */ 86 size_t stat_buffer_len; 87 88 /* The bits which needs to be overridden */ 89 u64 health_bitmap_inject_mask; 90 }; 91 92 static int papr_scm_pmem_flush(struct nd_region *nd_region, 93 struct bio *bio __maybe_unused) 94 { 95 struct papr_scm_priv *p = nd_region_provider_data(nd_region); 96 unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0; 97 long rc; 98 99 dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index); 100 101 do { 102 rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token); 103 token = ret_buf[0]; 104 105 /* Check if we are stalled for some time */ 106 if (H_IS_LONG_BUSY(rc)) { 107 msleep(get_longbusy_msecs(rc)); 108 rc = H_BUSY; 109 } else if (rc == H_BUSY) { 110 cond_resched(); 111 } 112 } while (rc == H_BUSY); 113 114 if (rc) { 115 dev_err(&p->pdev->dev, "flush error: %ld", rc); 116 rc = -EIO; 117 } else { 118 dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index); 119 } 120 121 return rc; 122 } 123 124 static LIST_HEAD(papr_nd_regions); 125 static DEFINE_MUTEX(papr_ndr_lock); 126 127 static int drc_pmem_bind(struct papr_scm_priv *p) 128 { 129 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 130 uint64_t saved = 0; 131 uint64_t token; 132 int64_t rc; 133 134 /* 135 * When the hypervisor cannot map all the requested memory in a single 136 * hcall it returns H_BUSY and we call again with the token until 137 * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS 138 * leave the system in an undefined state, so we wait. 139 */ 140 token = 0; 141 142 do { 143 rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0, 144 p->blocks, BIND_ANY_ADDR, token); 145 token = ret[0]; 146 if (!saved) 147 saved = ret[1]; 148 cond_resched(); 149 } while (rc == H_BUSY); 150 151 if (rc) 152 return rc; 153 154 p->bound_addr = saved; 155 dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", 156 p->drc_index, (unsigned long)saved); 157 return rc; 158 } 159 160 static void drc_pmem_unbind(struct papr_scm_priv *p) 161 { 162 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 163 uint64_t token = 0; 164 int64_t rc; 165 166 dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index); 167 168 /* NB: unbind has the same retry requirements as drc_pmem_bind() */ 169 do { 170 171 /* Unbind of all SCM resources associated with drcIndex */ 172 rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC, 173 p->drc_index, token); 174 token = ret[0]; 175 176 /* Check if we are stalled for some time */ 177 if (H_IS_LONG_BUSY(rc)) { 178 msleep(get_longbusy_msecs(rc)); 179 rc = H_BUSY; 180 } else if (rc == H_BUSY) { 181 cond_resched(); 182 } 183 184 } while (rc == H_BUSY); 185 186 if (rc) 187 dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); 188 else 189 dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n", 190 p->drc_index); 191 192 return; 193 } 194 195 static int drc_pmem_query_n_bind(struct papr_scm_priv *p) 196 { 197 unsigned long start_addr; 198 unsigned long end_addr; 199 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 200 int64_t rc; 201 202 203 rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, 204 p->drc_index, 0); 205 if (rc) 206 goto err_out; 207 start_addr = ret[0]; 208 209 /* Make sure the full region is bound. */ 210 rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, 211 p->drc_index, p->blocks - 1); 212 if (rc) 213 goto err_out; 214 end_addr = ret[0]; 215 216 if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size)) 217 goto err_out; 218 219 p->bound_addr = start_addr; 220 dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", p->drc_index, start_addr); 221 return rc; 222 223 err_out: 224 dev_info(&p->pdev->dev, 225 "Failed to query, trying an unbind followed by bind"); 226 drc_pmem_unbind(p); 227 return drc_pmem_bind(p); 228 } 229 230 /* 231 * Query the Dimm performance stats from PHYP and copy them (if returned) to 232 * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast 233 * (num_stats + header) bytes. 234 * - If buff_stats == NULL the return value is the size in bytes of the buffer 235 * needed to hold all supported performance-statistics. 236 * - If buff_stats != NULL and num_stats == 0 then we copy all known 237 * performance-statistics to 'buff_stat' and expect to be large enough to 238 * hold them. 239 * - if buff_stats != NULL and num_stats > 0 then copy the requested 240 * performance-statistics to buff_stats. 241 */ 242 static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, 243 struct papr_scm_perf_stats *buff_stats, 244 unsigned int num_stats) 245 { 246 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 247 size_t size; 248 s64 rc; 249 250 /* Setup the out buffer */ 251 if (buff_stats) { 252 memcpy(buff_stats->eye_catcher, 253 PAPR_SCM_PERF_STATS_EYECATCHER, 8); 254 buff_stats->stats_version = 255 cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION); 256 buff_stats->num_statistics = 257 cpu_to_be32(num_stats); 258 259 /* 260 * Calculate the buffer size based on num-stats provided 261 * or use the prefetched max buffer length 262 */ 263 if (num_stats) 264 /* Calculate size from the num_stats */ 265 size = sizeof(struct papr_scm_perf_stats) + 266 num_stats * sizeof(struct papr_scm_perf_stat); 267 else 268 size = p->stat_buffer_len; 269 } else { 270 /* In case of no out buffer ignore the size */ 271 size = 0; 272 } 273 274 /* Do the HCALL asking PHYP for info */ 275 rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index, 276 buff_stats ? virt_to_phys(buff_stats) : 0, 277 size); 278 279 /* Check if the error was due to an unknown stat-id */ 280 if (rc == H_PARTIAL) { 281 dev_err(&p->pdev->dev, 282 "Unknown performance stats, Err:0x%016lX\n", ret[0]); 283 return -ENOENT; 284 } else if (rc == H_AUTHORITY) { 285 dev_info(&p->pdev->dev, 286 "Permission denied while accessing performance stats"); 287 return -EPERM; 288 } else if (rc == H_UNSUPPORTED) { 289 dev_dbg(&p->pdev->dev, "Performance stats unsupported\n"); 290 return -EOPNOTSUPP; 291 } else if (rc != H_SUCCESS) { 292 dev_err(&p->pdev->dev, 293 "Failed to query performance stats, Err:%lld\n", rc); 294 return -EIO; 295 296 } else if (!size) { 297 /* Handle case where stat buffer size was requested */ 298 dev_dbg(&p->pdev->dev, 299 "Performance stats size %ld\n", ret[0]); 300 return ret[0]; 301 } 302 303 /* Successfully fetched the requested stats from phyp */ 304 dev_dbg(&p->pdev->dev, 305 "Performance stats returned %d stats\n", 306 be32_to_cpu(buff_stats->num_statistics)); 307 return 0; 308 } 309 310 #ifdef CONFIG_PERF_EVENTS 311 #define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu) 312 313 static const char * const nvdimm_events_map[] = { 314 [1] = "CtlResCt", 315 [2] = "CtlResTm", 316 [3] = "PonSecs ", 317 [4] = "MemLife ", 318 [5] = "CritRscU", 319 [6] = "HostLCnt", 320 [7] = "HostSCnt", 321 [8] = "HostSDur", 322 [9] = "HostLDur", 323 [10] = "MedRCnt ", 324 [11] = "MedWCnt ", 325 [12] = "MedRDur ", 326 [13] = "MedWDur ", 327 [14] = "CchRHCnt", 328 [15] = "CchWHCnt", 329 [16] = "FastWCnt", 330 }; 331 332 static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count) 333 { 334 struct papr_scm_perf_stat *stat; 335 struct papr_scm_perf_stats *stats; 336 struct papr_scm_priv *p = dev_get_drvdata(dev); 337 int rc, size; 338 339 /* Invalid eventcode */ 340 if (event->attr.config == 0 || event->attr.config >= ARRAY_SIZE(nvdimm_events_map)) 341 return -EINVAL; 342 343 /* Allocate request buffer enough to hold single performance stat */ 344 size = sizeof(struct papr_scm_perf_stats) + 345 sizeof(struct papr_scm_perf_stat); 346 347 if (!p) 348 return -EINVAL; 349 350 stats = kzalloc(size, GFP_KERNEL); 351 if (!stats) 352 return -ENOMEM; 353 354 stat = &stats->scm_statistic[0]; 355 memcpy(&stat->stat_id, 356 nvdimm_events_map[event->attr.config], 357 sizeof(stat->stat_id)); 358 stat->stat_val = 0; 359 360 rc = drc_pmem_query_stats(p, stats, 1); 361 if (rc < 0) { 362 kfree(stats); 363 return rc; 364 } 365 366 *count = be64_to_cpu(stat->stat_val); 367 kfree(stats); 368 return 0; 369 } 370 371 static int papr_scm_pmu_event_init(struct perf_event *event) 372 { 373 struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); 374 struct papr_scm_priv *p; 375 376 if (!nd_pmu) 377 return -EINVAL; 378 379 /* test the event attr type for PMU enumeration */ 380 if (event->attr.type != event->pmu->type) 381 return -ENOENT; 382 383 /* it does not support event sampling mode */ 384 if (is_sampling_event(event)) 385 return -EOPNOTSUPP; 386 387 /* no branch sampling */ 388 if (has_branch_stack(event)) 389 return -EOPNOTSUPP; 390 391 p = (struct papr_scm_priv *)nd_pmu->dev->driver_data; 392 if (!p) 393 return -EINVAL; 394 395 /* Invalid eventcode */ 396 if (event->attr.config == 0 || event->attr.config > 16) 397 return -EINVAL; 398 399 return 0; 400 } 401 402 static int papr_scm_pmu_add(struct perf_event *event, int flags) 403 { 404 u64 count; 405 int rc; 406 struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); 407 408 if (!nd_pmu) 409 return -EINVAL; 410 411 if (flags & PERF_EF_START) { 412 rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &count); 413 if (rc) 414 return rc; 415 416 local64_set(&event->hw.prev_count, count); 417 } 418 419 return 0; 420 } 421 422 static void papr_scm_pmu_read(struct perf_event *event) 423 { 424 u64 prev, now; 425 int rc; 426 struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); 427 428 if (!nd_pmu) 429 return; 430 431 rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &now); 432 if (rc) 433 return; 434 435 prev = local64_xchg(&event->hw.prev_count, now); 436 local64_add(now - prev, &event->count); 437 } 438 439 static void papr_scm_pmu_del(struct perf_event *event, int flags) 440 { 441 papr_scm_pmu_read(event); 442 } 443 444 static void papr_scm_pmu_register(struct papr_scm_priv *p) 445 { 446 struct nvdimm_pmu *nd_pmu; 447 int rc, nodeid; 448 449 nd_pmu = kzalloc_obj(*nd_pmu); 450 if (!nd_pmu) { 451 rc = -ENOMEM; 452 goto pmu_err_print; 453 } 454 455 if (!p->stat_buffer_len) { 456 rc = -ENOENT; 457 goto pmu_check_events_err; 458 } 459 460 nd_pmu->pmu.task_ctx_nr = perf_invalid_context; 461 nd_pmu->pmu.name = nvdimm_name(p->nvdimm); 462 nd_pmu->pmu.event_init = papr_scm_pmu_event_init; 463 nd_pmu->pmu.read = papr_scm_pmu_read; 464 nd_pmu->pmu.add = papr_scm_pmu_add; 465 nd_pmu->pmu.del = papr_scm_pmu_del; 466 467 nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT | 468 PERF_PMU_CAP_NO_EXCLUDE; 469 470 /*updating the cpumask variable */ 471 nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev)); 472 nd_pmu->arch_cpumask = *cpumask_of_node(nodeid); 473 474 rc = register_nvdimm_pmu(nd_pmu, p->pdev); 475 if (rc) 476 goto pmu_check_events_err; 477 478 /* 479 * Set archdata.priv value to nvdimm_pmu structure, to handle the 480 * unregistering of pmu device. 481 */ 482 p->pdev->archdata.priv = nd_pmu; 483 return; 484 485 pmu_check_events_err: 486 kfree(nd_pmu); 487 pmu_err_print: 488 dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc); 489 } 490 491 #else 492 static void papr_scm_pmu_register(struct papr_scm_priv *p) { } 493 #endif 494 495 /* 496 * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the 497 * health information. 498 */ 499 static int __drc_pmem_query_health(struct papr_scm_priv *p) 500 { 501 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 502 u64 bitmap = 0; 503 long rc; 504 505 /* issue the hcall */ 506 rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); 507 if (rc == H_SUCCESS) 508 bitmap = ret[0] & ret[1]; 509 else if (rc == H_FUNCTION) 510 dev_info_once(&p->pdev->dev, 511 "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap"); 512 else { 513 514 dev_err(&p->pdev->dev, 515 "Failed to query health information, Err:%ld\n", rc); 516 return -ENXIO; 517 } 518 519 p->lasthealth_jiffies = jiffies; 520 /* Allow injecting specific health bits via inject mask. */ 521 if (p->health_bitmap_inject_mask) 522 bitmap = (bitmap & ~p->health_bitmap_inject_mask) | 523 p->health_bitmap_inject_mask; 524 WRITE_ONCE(p->health_bitmap, bitmap); 525 dev_dbg(&p->pdev->dev, 526 "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", 527 ret[0], ret[1]); 528 529 return 0; 530 } 531 532 /* Min interval in seconds for assuming stable dimm health */ 533 #define MIN_HEALTH_QUERY_INTERVAL 60 534 535 /* Query cached health info and if needed call drc_pmem_query_health */ 536 static int drc_pmem_query_health(struct papr_scm_priv *p) 537 { 538 unsigned long cache_timeout; 539 int rc; 540 541 /* Protect concurrent modifications to papr_scm_priv */ 542 rc = mutex_lock_interruptible(&p->health_mutex); 543 if (rc) 544 return rc; 545 546 /* Jiffies offset for which the health data is assumed to be same */ 547 cache_timeout = p->lasthealth_jiffies + 548 secs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL); 549 550 /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */ 551 if (time_after(jiffies, cache_timeout)) 552 rc = __drc_pmem_query_health(p); 553 else 554 /* Assume cached health data is valid */ 555 rc = 0; 556 557 mutex_unlock(&p->health_mutex); 558 return rc; 559 } 560 561 static int papr_scm_meta_get(struct papr_scm_priv *p, 562 struct nd_cmd_get_config_data_hdr *hdr) 563 { 564 unsigned long data[PLPAR_HCALL_BUFSIZE]; 565 unsigned long offset, data_offset; 566 int len, read; 567 int64_t ret; 568 569 if ((hdr->in_offset + hdr->in_length) > p->metadata_size) 570 return -EINVAL; 571 572 for (len = hdr->in_length; len; len -= read) { 573 574 data_offset = hdr->in_length - len; 575 offset = hdr->in_offset + data_offset; 576 577 if (len >= 8) 578 read = 8; 579 else if (len >= 4) 580 read = 4; 581 else if (len >= 2) 582 read = 2; 583 else 584 read = 1; 585 586 ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index, 587 offset, read); 588 589 if (ret == H_PARAMETER) /* bad DRC index */ 590 return -ENODEV; 591 if (ret) 592 return -EINVAL; /* other invalid parameter */ 593 594 switch (read) { 595 case 8: 596 *(uint64_t *)(hdr->out_buf + data_offset) = be64_to_cpu(data[0]); 597 break; 598 case 4: 599 *(uint32_t *)(hdr->out_buf + data_offset) = be32_to_cpu(data[0] & 0xffffffff); 600 break; 601 602 case 2: 603 *(uint16_t *)(hdr->out_buf + data_offset) = be16_to_cpu(data[0] & 0xffff); 604 break; 605 606 case 1: 607 *(uint8_t *)(hdr->out_buf + data_offset) = (data[0] & 0xff); 608 break; 609 } 610 } 611 return 0; 612 } 613 614 static int papr_scm_meta_set(struct papr_scm_priv *p, 615 struct nd_cmd_set_config_hdr *hdr) 616 { 617 unsigned long offset, data_offset; 618 int len, wrote; 619 unsigned long data; 620 __be64 data_be; 621 int64_t ret; 622 623 if ((hdr->in_offset + hdr->in_length) > p->metadata_size) 624 return -EINVAL; 625 626 for (len = hdr->in_length; len; len -= wrote) { 627 628 data_offset = hdr->in_length - len; 629 offset = hdr->in_offset + data_offset; 630 631 if (len >= 8) { 632 data = *(uint64_t *)(hdr->in_buf + data_offset); 633 data_be = cpu_to_be64(data); 634 wrote = 8; 635 } else if (len >= 4) { 636 data = *(uint32_t *)(hdr->in_buf + data_offset); 637 data &= 0xffffffff; 638 data_be = cpu_to_be32(data); 639 wrote = 4; 640 } else if (len >= 2) { 641 data = *(uint16_t *)(hdr->in_buf + data_offset); 642 data &= 0xffff; 643 data_be = cpu_to_be16(data); 644 wrote = 2; 645 } else { 646 data_be = *(uint8_t *)(hdr->in_buf + data_offset); 647 data_be &= 0xff; 648 wrote = 1; 649 } 650 651 ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index, 652 offset, data_be, wrote); 653 if (ret == H_PARAMETER) /* bad DRC index */ 654 return -ENODEV; 655 if (ret) 656 return -EINVAL; /* other invalid parameter */ 657 } 658 659 return 0; 660 } 661 662 /* 663 * Do a sanity checks on the inputs args to dimm-control function and return 664 * '0' if valid. Validation of PDSM payloads happens later in 665 * papr_scm_service_pdsm. 666 */ 667 static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, 668 unsigned int buf_len) 669 { 670 unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK; 671 struct nd_cmd_pkg *nd_cmd; 672 struct papr_scm_priv *p; 673 enum papr_pdsm pdsm; 674 675 /* Only dimm-specific calls are supported atm */ 676 if (!nvdimm) 677 return -EINVAL; 678 679 /* get the provider data from struct nvdimm */ 680 p = nvdimm_provider_data(nvdimm); 681 682 if (!test_bit(cmd, &cmd_mask)) { 683 dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd); 684 return -EINVAL; 685 } 686 687 /* For CMD_CALL verify pdsm request */ 688 if (cmd == ND_CMD_CALL) { 689 /* Verify the envelope and envelop size */ 690 if (!buf || 691 buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) { 692 dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n", 693 buf_len); 694 return -EINVAL; 695 } 696 697 /* Verify that the nd_cmd_pkg.nd_family is correct */ 698 nd_cmd = (struct nd_cmd_pkg *)buf; 699 700 if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) { 701 dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n", 702 nd_cmd->nd_family); 703 return -EINVAL; 704 } 705 706 pdsm = (enum papr_pdsm)nd_cmd->nd_command; 707 708 /* Verify if the pdsm command is valid */ 709 if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) { 710 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", 711 pdsm); 712 return -EINVAL; 713 } 714 715 /* Have enough space to hold returned 'nd_pkg_pdsm' header */ 716 if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) { 717 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n", 718 pdsm); 719 return -EINVAL; 720 } 721 } 722 723 /* Let the command be further processed */ 724 return 0; 725 } 726 727 static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p, 728 union nd_pdsm_payload *payload) 729 { 730 int rc, size; 731 u64 statval; 732 struct papr_scm_perf_stat *stat; 733 struct papr_scm_perf_stats *stats; 734 735 /* Silently fail if fetching performance metrics isn't supported */ 736 if (!p->stat_buffer_len) 737 return 0; 738 739 /* Allocate request buffer enough to hold single performance stat */ 740 size = sizeof(struct papr_scm_perf_stats) + 741 sizeof(struct papr_scm_perf_stat); 742 743 stats = kzalloc(size, GFP_KERNEL); 744 if (!stats) 745 return -ENOMEM; 746 747 stat = &stats->scm_statistic[0]; 748 memcpy(&stat->stat_id, "MemLife ", sizeof(stat->stat_id)); 749 stat->stat_val = 0; 750 751 /* Fetch the fuel gauge and populate it in payload */ 752 rc = drc_pmem_query_stats(p, stats, 1); 753 if (rc < 0) { 754 dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n", rc); 755 goto free_stats; 756 } 757 758 statval = be64_to_cpu(stat->stat_val); 759 dev_dbg(&p->pdev->dev, 760 "Fetched fuel-gauge %llu", statval); 761 payload->health.extension_flags |= 762 PDSM_DIMM_HEALTH_RUN_GAUGE_VALID; 763 payload->health.dimm_fuel_gauge = statval; 764 765 rc = sizeof(struct nd_papr_pdsm_health); 766 767 free_stats: 768 kfree(stats); 769 return rc; 770 } 771 772 /* Add the dirty-shutdown-counter value to the pdsm */ 773 static int papr_pdsm_dsc(struct papr_scm_priv *p, 774 union nd_pdsm_payload *payload) 775 { 776 payload->health.extension_flags |= PDSM_DIMM_DSC_VALID; 777 payload->health.dimm_dsc = p->dirty_shutdown_counter; 778 779 return sizeof(struct nd_papr_pdsm_health); 780 } 781 782 /* Fetch the DIMM health info and populate it in provided package. */ 783 static int papr_pdsm_health(struct papr_scm_priv *p, 784 union nd_pdsm_payload *payload) 785 { 786 int rc; 787 788 /* Ensure dimm health mutex is taken preventing concurrent access */ 789 rc = mutex_lock_interruptible(&p->health_mutex); 790 if (rc) 791 goto out; 792 793 /* Always fetch upto date dimm health data ignoring cached values */ 794 rc = __drc_pmem_query_health(p); 795 if (rc) { 796 mutex_unlock(&p->health_mutex); 797 goto out; 798 } 799 800 /* update health struct with various flags derived from health bitmap */ 801 payload->health = (struct nd_papr_pdsm_health) { 802 .extension_flags = 0, 803 .dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK), 804 .dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK), 805 .dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK), 806 .dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), 807 .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), 808 .dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED), 809 .dimm_health = PAPR_PDSM_DIMM_HEALTHY, 810 }; 811 812 /* Update field dimm_health based on health_bitmap flags */ 813 if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL) 814 payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL; 815 else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL) 816 payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL; 817 else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY) 818 payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY; 819 820 /* struct populated hence can release the mutex now */ 821 mutex_unlock(&p->health_mutex); 822 823 /* Populate the fuel gauge meter in the payload */ 824 papr_pdsm_fuel_gauge(p, payload); 825 /* Populate the dirty-shutdown-counter field */ 826 papr_pdsm_dsc(p, payload); 827 828 rc = sizeof(struct nd_papr_pdsm_health); 829 830 out: 831 return rc; 832 } 833 834 /* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */ 835 static int papr_pdsm_smart_inject(struct papr_scm_priv *p, 836 union nd_pdsm_payload *payload) 837 { 838 int rc; 839 u32 supported_flags = 0; 840 u64 inject_mask = 0, clear_mask = 0; 841 u64 mask; 842 843 /* Check for individual smart error flags and update inject/clear masks */ 844 if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) { 845 supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL; 846 if (payload->smart_inject.fatal_enable) 847 inject_mask |= PAPR_PMEM_HEALTH_FATAL; 848 else 849 clear_mask |= PAPR_PMEM_HEALTH_FATAL; 850 } 851 852 if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) { 853 supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; 854 if (payload->smart_inject.unsafe_shutdown_enable) 855 inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; 856 else 857 clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; 858 } 859 860 dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n", 861 inject_mask, clear_mask); 862 863 /* Prevent concurrent access to dimm health bitmap related members */ 864 rc = mutex_lock_interruptible(&p->health_mutex); 865 if (rc) 866 return rc; 867 868 /* Use inject/clear masks to set health_bitmap_inject_mask */ 869 mask = READ_ONCE(p->health_bitmap_inject_mask); 870 mask = (mask & ~clear_mask) | inject_mask; 871 WRITE_ONCE(p->health_bitmap_inject_mask, mask); 872 873 /* Invalidate cached health bitmap */ 874 p->lasthealth_jiffies = 0; 875 876 mutex_unlock(&p->health_mutex); 877 878 /* Return the supported flags back to userspace */ 879 payload->smart_inject.flags = supported_flags; 880 881 return sizeof(struct nd_papr_pdsm_health); 882 } 883 884 /* 885 * 'struct pdsm_cmd_desc' 886 * Identifies supported PDSMs' expected length of in/out payloads 887 * and pdsm service function. 888 * 889 * size_in : Size of input payload if any in the PDSM request. 890 * size_out : Size of output payload if any in the PDSM request. 891 * service : Service function for the PDSM request. Return semantics: 892 * rc < 0 : Error servicing PDSM and rc indicates the error. 893 * rc >=0 : Serviced successfully and 'rc' indicate number of 894 * bytes written to payload. 895 */ 896 struct pdsm_cmd_desc { 897 u32 size_in; 898 u32 size_out; 899 int (*service)(struct papr_scm_priv *dimm, 900 union nd_pdsm_payload *payload); 901 }; 902 903 /* Holds all supported PDSMs' command descriptors */ 904 static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { 905 [PAPR_PDSM_MIN] = { 906 .size_in = 0, 907 .size_out = 0, 908 .service = NULL, 909 }, 910 /* New PDSM command descriptors to be added below */ 911 912 [PAPR_PDSM_HEALTH] = { 913 .size_in = 0, 914 .size_out = sizeof(struct nd_papr_pdsm_health), 915 .service = papr_pdsm_health, 916 }, 917 918 [PAPR_PDSM_SMART_INJECT] = { 919 .size_in = sizeof(struct nd_papr_pdsm_smart_inject), 920 .size_out = sizeof(struct nd_papr_pdsm_smart_inject), 921 .service = papr_pdsm_smart_inject, 922 }, 923 /* Empty */ 924 [PAPR_PDSM_MAX] = { 925 .size_in = 0, 926 .size_out = 0, 927 .service = NULL, 928 }, 929 }; 930 931 /* Given a valid pdsm cmd return its command descriptor else return NULL */ 932 static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd) 933 { 934 if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) 935 return &__pdsm_cmd_descriptors[cmd]; 936 937 return NULL; 938 } 939 940 /* 941 * For a given pdsm request call an appropriate service function. 942 * Returns errors if any while handling the pdsm command package. 943 */ 944 static int papr_scm_service_pdsm(struct papr_scm_priv *p, 945 struct nd_cmd_pkg *pkg) 946 { 947 /* Get the PDSM header and PDSM command */ 948 struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; 949 enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; 950 const struct pdsm_cmd_desc *pdsc; 951 int rc; 952 953 /* Fetch corresponding pdsm descriptor for validation and servicing */ 954 pdsc = pdsm_cmd_desc(pdsm); 955 956 /* Validate pdsm descriptor */ 957 /* Ensure that reserved fields are 0 */ 958 if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) { 959 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n", 960 pdsm); 961 return -EINVAL; 962 } 963 964 /* If pdsm expects some input, then ensure that the size_in matches */ 965 if (pdsc->size_in && 966 pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) { 967 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n", 968 pdsm, pkg->nd_size_in); 969 return -EINVAL; 970 } 971 972 /* If pdsm wants to return data, then ensure that size_out matches */ 973 if (pdsc->size_out && 974 pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) { 975 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n", 976 pdsm, pkg->nd_size_out); 977 return -EINVAL; 978 } 979 980 /* Service the pdsm */ 981 if (pdsc->service) { 982 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm); 983 984 rc = pdsc->service(p, &pdsm_pkg->payload); 985 986 if (rc < 0) { 987 /* error encountered while servicing pdsm */ 988 pdsm_pkg->cmd_status = rc; 989 pkg->nd_fw_size = ND_PDSM_HDR_SIZE; 990 } else { 991 /* pdsm serviced and 'rc' bytes written to payload */ 992 pdsm_pkg->cmd_status = 0; 993 pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc; 994 } 995 } else { 996 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n", 997 pdsm); 998 pdsm_pkg->cmd_status = -ENOENT; 999 pkg->nd_fw_size = ND_PDSM_HDR_SIZE; 1000 } 1001 1002 return pdsm_pkg->cmd_status; 1003 } 1004 1005 static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, 1006 struct nvdimm *nvdimm, unsigned int cmd, void *buf, 1007 unsigned int buf_len, int *cmd_rc) 1008 { 1009 struct nd_cmd_get_config_size *get_size_hdr; 1010 struct nd_cmd_pkg *call_pkg = NULL; 1011 struct papr_scm_priv *p; 1012 int rc; 1013 1014 rc = is_cmd_valid(nvdimm, cmd, buf, buf_len); 1015 if (rc) { 1016 pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc); 1017 return rc; 1018 } 1019 1020 /* Use a local variable in case cmd_rc pointer is NULL */ 1021 if (!cmd_rc) 1022 cmd_rc = &rc; 1023 1024 p = nvdimm_provider_data(nvdimm); 1025 1026 switch (cmd) { 1027 case ND_CMD_GET_CONFIG_SIZE: 1028 get_size_hdr = buf; 1029 1030 get_size_hdr->status = 0; 1031 get_size_hdr->max_xfer = 8; 1032 get_size_hdr->config_size = p->metadata_size; 1033 *cmd_rc = 0; 1034 break; 1035 1036 case ND_CMD_GET_CONFIG_DATA: 1037 *cmd_rc = papr_scm_meta_get(p, buf); 1038 break; 1039 1040 case ND_CMD_SET_CONFIG_DATA: 1041 *cmd_rc = papr_scm_meta_set(p, buf); 1042 break; 1043 1044 case ND_CMD_CALL: 1045 call_pkg = (struct nd_cmd_pkg *)buf; 1046 *cmd_rc = papr_scm_service_pdsm(p, call_pkg); 1047 break; 1048 1049 default: 1050 dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); 1051 return -EINVAL; 1052 } 1053 1054 dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc); 1055 1056 return 0; 1057 } 1058 1059 static ssize_t health_bitmap_inject_show(struct device *dev, 1060 struct device_attribute *attr, 1061 char *buf) 1062 { 1063 struct nvdimm *dimm = to_nvdimm(dev); 1064 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1065 1066 return sysfs_emit(buf, "%#llx\n", 1067 READ_ONCE(p->health_bitmap_inject_mask)); 1068 } 1069 1070 static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject); 1071 1072 static ssize_t perf_stats_show(struct device *dev, 1073 struct device_attribute *attr, char *buf) 1074 { 1075 int index; 1076 ssize_t rc; 1077 struct seq_buf s; 1078 struct papr_scm_perf_stat *stat; 1079 struct papr_scm_perf_stats *stats; 1080 struct nvdimm *dimm = to_nvdimm(dev); 1081 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1082 1083 if (!p->stat_buffer_len) 1084 return -ENOENT; 1085 1086 /* Allocate the buffer for phyp where stats are written */ 1087 stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); 1088 if (!stats) 1089 return -ENOMEM; 1090 1091 /* Ask phyp to return all dimm perf stats */ 1092 rc = drc_pmem_query_stats(p, stats, 0); 1093 if (rc) 1094 goto free_stats; 1095 /* 1096 * Go through the returned output buffer and print stats and 1097 * values. Since stat_id is essentially a char string of 1098 * 8 bytes, simply use the string format specifier to print it. 1099 */ 1100 seq_buf_init(&s, buf, PAGE_SIZE); 1101 for (index = 0, stat = stats->scm_statistic; 1102 index < be32_to_cpu(stats->num_statistics); 1103 ++index, ++stat) { 1104 seq_buf_printf(&s, "%.8s = 0x%016llX\n", 1105 stat->stat_id, 1106 be64_to_cpu(stat->stat_val)); 1107 } 1108 1109 free_stats: 1110 kfree(stats); 1111 return rc ? rc : (ssize_t)seq_buf_used(&s); 1112 } 1113 static DEVICE_ATTR_ADMIN_RO(perf_stats); 1114 1115 static ssize_t flags_show(struct device *dev, 1116 struct device_attribute *attr, char *buf) 1117 { 1118 struct nvdimm *dimm = to_nvdimm(dev); 1119 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1120 struct seq_buf s; 1121 u64 health; 1122 int rc; 1123 1124 rc = drc_pmem_query_health(p); 1125 if (rc) 1126 return rc; 1127 1128 /* Copy health_bitmap locally, check masks & update out buffer */ 1129 health = READ_ONCE(p->health_bitmap); 1130 1131 seq_buf_init(&s, buf, PAGE_SIZE); 1132 if (health & PAPR_PMEM_UNARMED_MASK) 1133 seq_buf_printf(&s, "not_armed "); 1134 1135 if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK) 1136 seq_buf_printf(&s, "flush_fail "); 1137 1138 if (health & PAPR_PMEM_BAD_RESTORE_MASK) 1139 seq_buf_printf(&s, "restore_fail "); 1140 1141 if (health & PAPR_PMEM_ENCRYPTED) 1142 seq_buf_printf(&s, "encrypted "); 1143 1144 if (health & PAPR_PMEM_SMART_EVENT_MASK) 1145 seq_buf_printf(&s, "smart_notify "); 1146 1147 if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED) 1148 seq_buf_printf(&s, "scrubbed locked "); 1149 1150 if (seq_buf_used(&s)) 1151 seq_buf_printf(&s, "\n"); 1152 1153 return seq_buf_used(&s); 1154 } 1155 DEVICE_ATTR_RO(flags); 1156 1157 static ssize_t dirty_shutdown_show(struct device *dev, 1158 struct device_attribute *attr, char *buf) 1159 { 1160 struct nvdimm *dimm = to_nvdimm(dev); 1161 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1162 1163 return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter); 1164 } 1165 DEVICE_ATTR_RO(dirty_shutdown); 1166 1167 static umode_t papr_nd_attribute_visible(struct kobject *kobj, 1168 struct attribute *attr, int n) 1169 { 1170 struct device *dev = kobj_to_dev(kobj); 1171 struct nvdimm *nvdimm = to_nvdimm(dev); 1172 struct papr_scm_priv *p = nvdimm_provider_data(nvdimm); 1173 1174 /* For if perf-stats not available remove perf_stats sysfs */ 1175 if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0) 1176 return 0; 1177 1178 return attr->mode; 1179 } 1180 1181 /* papr_scm specific dimm attributes */ 1182 static struct attribute *papr_nd_attributes[] = { 1183 &dev_attr_flags.attr, 1184 &dev_attr_perf_stats.attr, 1185 &dev_attr_dirty_shutdown.attr, 1186 &dev_attr_health_bitmap_inject.attr, 1187 NULL, 1188 }; 1189 1190 static const struct attribute_group papr_nd_attribute_group = { 1191 .name = "papr", 1192 .is_visible = papr_nd_attribute_visible, 1193 .attrs = papr_nd_attributes, 1194 }; 1195 1196 static const struct attribute_group *papr_nd_attr_groups[] = { 1197 &papr_nd_attribute_group, 1198 NULL, 1199 }; 1200 1201 static int papr_scm_nvdimm_init(struct papr_scm_priv *p) 1202 { 1203 struct device *dev = &p->pdev->dev; 1204 struct nd_mapping_desc mapping; 1205 struct nd_region_desc ndr_desc; 1206 unsigned long dimm_flags; 1207 int target_nid, online_nid; 1208 1209 p->bus_desc.ndctl = papr_scm_ndctl; 1210 p->bus_desc.module = THIS_MODULE; 1211 p->bus_desc.of_node = p->pdev->dev.of_node; 1212 p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL); 1213 1214 /* Set the dimm command family mask to accept PDSMs */ 1215 set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask); 1216 1217 if (!p->bus_desc.provider_name) 1218 return -ENOMEM; 1219 1220 p->bus = nvdimm_bus_register(NULL, &p->bus_desc); 1221 if (!p->bus) { 1222 dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn); 1223 kfree(p->bus_desc.provider_name); 1224 return -ENXIO; 1225 } 1226 1227 dimm_flags = 0; 1228 set_bit(NDD_LABELING, &dimm_flags); 1229 1230 /* 1231 * Check if the nvdimm is unarmed. No locking needed as we are still 1232 * initializing. Ignore error encountered if any. 1233 */ 1234 __drc_pmem_query_health(p); 1235 1236 if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK) 1237 set_bit(NDD_UNARMED, &dimm_flags); 1238 1239 p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, 1240 dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); 1241 if (!p->nvdimm) { 1242 dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); 1243 goto err; 1244 } 1245 1246 if (nvdimm_bus_check_dimm_count(p->bus, 1)) 1247 goto err; 1248 1249 /* now add the region */ 1250 1251 memset(&mapping, 0, sizeof(mapping)); 1252 mapping.nvdimm = p->nvdimm; 1253 mapping.start = 0; 1254 mapping.size = p->blocks * p->block_size; // XXX: potential overflow? 1255 1256 memset(&ndr_desc, 0, sizeof(ndr_desc)); 1257 target_nid = dev_to_node(&p->pdev->dev); 1258 online_nid = numa_map_to_online_node(target_nid); 1259 ndr_desc.numa_node = online_nid; 1260 ndr_desc.target_node = target_nid; 1261 ndr_desc.res = &p->res; 1262 ndr_desc.of_node = p->dn; 1263 ndr_desc.provider_data = p; 1264 ndr_desc.mapping = &mapping; 1265 ndr_desc.num_mappings = 1; 1266 ndr_desc.nd_set = &p->nd_set; 1267 1268 if (p->hcall_flush_required) { 1269 set_bit(ND_REGION_ASYNC, &ndr_desc.flags); 1270 ndr_desc.flush = papr_scm_pmem_flush; 1271 } 1272 1273 if (p->is_volatile) 1274 p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); 1275 else { 1276 set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags); 1277 p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc); 1278 } 1279 if (!p->region) { 1280 dev_err(dev, "Error registering region %pR from %pOF\n", 1281 ndr_desc.res, p->dn); 1282 goto err; 1283 } 1284 if (target_nid != online_nid) 1285 dev_info(dev, "Region registered with target node %d and online node %d", 1286 target_nid, online_nid); 1287 1288 mutex_lock(&papr_ndr_lock); 1289 list_add_tail(&p->region_list, &papr_nd_regions); 1290 mutex_unlock(&papr_ndr_lock); 1291 1292 return 0; 1293 1294 err: nvdimm_bus_unregister(p->bus); 1295 kfree(p->bus_desc.provider_name); 1296 return -ENXIO; 1297 } 1298 1299 static void papr_scm_add_badblock(struct nd_region *region, 1300 struct nvdimm_bus *bus, u64 phys_addr) 1301 { 1302 u64 aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); 1303 1304 if (nvdimm_bus_add_badrange(bus, aligned_addr, L1_CACHE_BYTES)) { 1305 pr_err("Bad block registration for 0x%llx failed\n", phys_addr); 1306 return; 1307 } 1308 1309 pr_debug("Add memory range (0x%llx - 0x%llx) as bad range\n", 1310 aligned_addr, aligned_addr + L1_CACHE_BYTES); 1311 1312 nvdimm_region_notify(region, NVDIMM_REVALIDATE_POISON); 1313 } 1314 1315 static int handle_mce_ue(struct notifier_block *nb, unsigned long val, 1316 void *data) 1317 { 1318 struct machine_check_event *evt = data; 1319 struct papr_scm_priv *p; 1320 u64 phys_addr; 1321 bool found = false; 1322 1323 if (evt->error_type != MCE_ERROR_TYPE_UE) 1324 return NOTIFY_DONE; 1325 1326 if (list_empty(&papr_nd_regions)) 1327 return NOTIFY_DONE; 1328 1329 /* 1330 * The physical address obtained here is PAGE_SIZE aligned, so get the 1331 * exact address from the effective address 1332 */ 1333 phys_addr = evt->u.ue_error.physical_address + 1334 (evt->u.ue_error.effective_address & ~PAGE_MASK); 1335 1336 if (!evt->u.ue_error.physical_address_provided || 1337 !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) 1338 return NOTIFY_DONE; 1339 1340 /* mce notifier is called from a process context, so mutex is safe */ 1341 mutex_lock(&papr_ndr_lock); 1342 list_for_each_entry(p, &papr_nd_regions, region_list) { 1343 if (phys_addr >= p->res.start && phys_addr <= p->res.end) { 1344 found = true; 1345 break; 1346 } 1347 } 1348 1349 if (found) 1350 papr_scm_add_badblock(p->region, p->bus, phys_addr); 1351 1352 mutex_unlock(&papr_ndr_lock); 1353 1354 return found ? NOTIFY_OK : NOTIFY_DONE; 1355 } 1356 1357 static struct notifier_block mce_ue_nb = { 1358 .notifier_call = handle_mce_ue 1359 }; 1360 1361 static int papr_scm_probe(struct platform_device *pdev) 1362 { 1363 struct device_node *dn = pdev->dev.of_node; 1364 u32 drc_index, metadata_size; 1365 u64 blocks, block_size; 1366 struct papr_scm_priv *p; 1367 u8 uuid_raw[UUID_SIZE]; 1368 const char *uuid_str; 1369 ssize_t stat_size; 1370 uuid_t uuid; 1371 int rc; 1372 1373 /* check we have all the required DT properties */ 1374 if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) { 1375 dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn); 1376 return -ENODEV; 1377 } 1378 1379 if (of_property_read_u64(dn, "ibm,block-size", &block_size)) { 1380 dev_err(&pdev->dev, "%pOF: missing block-size!\n", dn); 1381 return -ENODEV; 1382 } 1383 1384 if (of_property_read_u64(dn, "ibm,number-of-blocks", &blocks)) { 1385 dev_err(&pdev->dev, "%pOF: missing number-of-blocks!\n", dn); 1386 return -ENODEV; 1387 } 1388 1389 if (of_property_read_string(dn, "ibm,unit-guid", &uuid_str)) { 1390 dev_err(&pdev->dev, "%pOF: missing unit-guid!\n", dn); 1391 return -ENODEV; 1392 } 1393 1394 /* 1395 * open firmware platform device create won't update the NUMA 1396 * distance table. For PAPR SCM devices we use numa_map_to_online_node() 1397 * to find the nearest online NUMA node and that requires correct 1398 * distance table information. 1399 */ 1400 update_numa_distance(dn); 1401 1402 p = kzalloc_obj(*p); 1403 if (!p) 1404 return -ENOMEM; 1405 1406 /* Initialize the dimm mutex */ 1407 mutex_init(&p->health_mutex); 1408 1409 /* optional DT properties */ 1410 of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); 1411 1412 p->dn = dn; 1413 p->drc_index = drc_index; 1414 p->block_size = block_size; 1415 p->blocks = blocks; 1416 p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); 1417 p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); 1418 1419 if (of_property_read_u64(dn, "ibm,persistence-failed-count", 1420 &p->dirty_shutdown_counter)) 1421 p->dirty_shutdown_counter = 0; 1422 1423 /* We just need to ensure that set cookies are unique across */ 1424 uuid_parse(uuid_str, &uuid); 1425 1426 /* 1427 * The cookie1 and cookie2 are not really little endian. 1428 * We store a raw buffer representation of the 1429 * uuid string so that we can compare this with the label 1430 * area cookie irrespective of the endian configuration 1431 * with which the kernel is built. 1432 * 1433 * Historically we stored the cookie in the below format. 1434 * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa 1435 * cookie1 was 0xfd423b0b671b5172 1436 * cookie2 was 0xaabce8cae35b1d8d 1437 */ 1438 export_uuid(uuid_raw, &uuid); 1439 p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]); 1440 p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]); 1441 1442 /* might be zero */ 1443 p->metadata_size = metadata_size; 1444 p->pdev = pdev; 1445 1446 /* request the hypervisor to bind this region to somewhere in memory */ 1447 rc = drc_pmem_bind(p); 1448 1449 /* If phyp says drc memory still bound then force unbound and retry */ 1450 if (rc == H_OVERLAP) 1451 rc = drc_pmem_query_n_bind(p); 1452 1453 if (rc != H_SUCCESS) { 1454 dev_err(&p->pdev->dev, "bind err: %d\n", rc); 1455 rc = -ENXIO; 1456 goto err; 1457 } 1458 1459 /* setup the resource for the newly bound range */ 1460 p->res.start = p->bound_addr; 1461 p->res.end = p->bound_addr + p->blocks * p->block_size - 1; 1462 p->res.name = pdev->name; 1463 p->res.flags = IORESOURCE_MEM; 1464 1465 /* Try retrieving the stat buffer and see if its supported */ 1466 stat_size = drc_pmem_query_stats(p, NULL, 0); 1467 if (stat_size > 0) { 1468 p->stat_buffer_len = stat_size; 1469 dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", 1470 p->stat_buffer_len); 1471 } 1472 1473 rc = papr_scm_nvdimm_init(p); 1474 if (rc) 1475 goto err2; 1476 1477 platform_set_drvdata(pdev, p); 1478 papr_scm_pmu_register(p); 1479 1480 return 0; 1481 1482 err2: drc_pmem_unbind(p); 1483 err: kfree(p); 1484 return rc; 1485 } 1486 1487 static void papr_scm_remove(struct platform_device *pdev) 1488 { 1489 struct papr_scm_priv *p = platform_get_drvdata(pdev); 1490 1491 mutex_lock(&papr_ndr_lock); 1492 list_del(&p->region_list); 1493 mutex_unlock(&papr_ndr_lock); 1494 1495 nvdimm_bus_unregister(p->bus); 1496 drc_pmem_unbind(p); 1497 1498 if (pdev->archdata.priv) 1499 unregister_nvdimm_pmu(pdev->archdata.priv); 1500 1501 pdev->archdata.priv = NULL; 1502 kfree(p->bus_desc.provider_name); 1503 kfree(p); 1504 } 1505 1506 static const struct of_device_id papr_scm_match[] = { 1507 { .compatible = "ibm,pmemory" }, 1508 { .compatible = "ibm,pmemory-v2" }, 1509 { }, 1510 }; 1511 1512 static struct platform_driver papr_scm_driver = { 1513 .probe = papr_scm_probe, 1514 .remove = papr_scm_remove, 1515 .driver = { 1516 .name = "papr_scm", 1517 .of_match_table = papr_scm_match, 1518 }, 1519 }; 1520 1521 static int __init papr_scm_init(void) 1522 { 1523 int ret; 1524 1525 ret = platform_driver_register(&papr_scm_driver); 1526 if (!ret) 1527 mce_register_notifier(&mce_ue_nb); 1528 1529 return ret; 1530 } 1531 module_init(papr_scm_init); 1532 1533 static void __exit papr_scm_exit(void) 1534 { 1535 mce_unregister_notifier(&mce_ue_nb); 1536 platform_driver_unregister(&papr_scm_driver); 1537 } 1538 module_exit(papr_scm_exit); 1539 1540 MODULE_DEVICE_TABLE(of, papr_scm_match); 1541 MODULE_DESCRIPTION("PAPR Storage Class Memory interface driver"); 1542 MODULE_LICENSE("GPL"); 1543 MODULE_AUTHOR("IBM Corporation"); 1544