1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * CXL EDAC memory feature driver. 4 * 5 * Copyright (c) 2024-2025 HiSilicon Limited. 6 * 7 * - Supports functions to configure EDAC features of the 8 * CXL memory devices. 9 * - Registers with the EDAC device subsystem driver to expose 10 * the features sysfs attributes to the user for configuring 11 * CXL memory RAS feature. 12 */ 13 14 #include <linux/cleanup.h> 15 #include <linux/edac.h> 16 #include <linux/limits.h> 17 #include <linux/unaligned.h> 18 #include <linux/xarray.h> 19 #include <cxl/features.h> 20 #include <cxl.h> 21 #include <cxlmem.h> 22 #include "core.h" 23 #include "trace.h" 24 25 #define CXL_NR_EDAC_DEV_FEATURES 7 26 27 #define CXL_SCRUB_NO_REGION -1 28 29 struct cxl_patrol_scrub_context { 30 u8 instance; 31 u16 get_feat_size; 32 u16 set_feat_size; 33 u8 get_version; 34 u8 set_version; 35 u16 effects; 36 struct cxl_memdev *cxlmd; 37 struct cxl_region *cxlr; 38 }; 39 40 /* 41 * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-222 Device Patrol Scrub Control 42 * Feature Readable Attributes. 43 */ 44 struct cxl_scrub_rd_attrbs { 45 u8 scrub_cycle_cap; 46 __le16 scrub_cycle_hours; 47 u8 scrub_flags; 48 } __packed; 49 50 /* 51 * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-223 Device Patrol Scrub Control 52 * Feature Writable Attributes. 53 */ 54 struct cxl_scrub_wr_attrbs { 55 u8 scrub_cycle_hours; 56 u8 scrub_flags; 57 } __packed; 58 59 #define CXL_SCRUB_CONTROL_CHANGEABLE BIT(0) 60 #define CXL_SCRUB_CONTROL_REALTIME BIT(1) 61 #define CXL_SCRUB_CONTROL_CYCLE_MASK GENMASK(7, 0) 62 #define CXL_SCRUB_CONTROL_MIN_CYCLE_MASK GENMASK(15, 8) 63 #define CXL_SCRUB_CONTROL_ENABLE BIT(0) 64 65 #define CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap) \ 66 FIELD_GET(CXL_SCRUB_CONTROL_CHANGEABLE, cap) 67 #define CXL_GET_SCRUB_CYCLE(cycle) \ 68 FIELD_GET(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle) 69 #define CXL_GET_SCRUB_MIN_CYCLE(cycle) \ 70 FIELD_GET(CXL_SCRUB_CONTROL_MIN_CYCLE_MASK, cycle) 71 #define CXL_GET_SCRUB_EN_STS(flags) FIELD_GET(CXL_SCRUB_CONTROL_ENABLE, flags) 72 73 #define CXL_SET_SCRUB_CYCLE(cycle) \ 74 FIELD_PREP(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle) 75 #define CXL_SET_SCRUB_EN(en) FIELD_PREP(CXL_SCRUB_CONTROL_ENABLE, en) 76 77 static int cxl_mem_scrub_get_attrbs(struct cxl_mailbox *cxl_mbox, u8 *cap, 78 u16 *cycle, u8 *flags, u8 *min_cycle) 79 { 80 size_t rd_data_size = sizeof(struct cxl_scrub_rd_attrbs); 81 size_t data_size; 82 struct cxl_scrub_rd_attrbs *rd_attrbs __free(kfree) = 83 kzalloc(rd_data_size, GFP_KERNEL); 84 if (!rd_attrbs) 85 return -ENOMEM; 86 87 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID, 88 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 89 rd_data_size, 0, NULL); 90 if (!data_size) 91 return -EIO; 92 93 *cap = rd_attrbs->scrub_cycle_cap; 94 *cycle = le16_to_cpu(rd_attrbs->scrub_cycle_hours); 95 *flags = rd_attrbs->scrub_flags; 96 if (min_cycle) 97 *min_cycle = CXL_GET_SCRUB_MIN_CYCLE(*cycle); 98 99 return 0; 100 } 101 102 static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, 103 u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle) 104 { 105 struct cxl_mailbox *cxl_mbox; 106 struct cxl_region_params *p; 107 struct cxl_memdev *cxlmd; 108 struct cxl_region *cxlr; 109 u8 min_scrub_cycle = 0; 110 int i, ret; 111 112 if (!cxl_ps_ctx->cxlr) { 113 cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox; 114 return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, 115 flags, min_cycle); 116 } 117 118 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); 119 if ((ret = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) 120 return ret; 121 122 cxlr = cxl_ps_ctx->cxlr; 123 p = &cxlr->params; 124 125 for (i = 0; i < p->nr_targets; i++) { 126 struct cxl_endpoint_decoder *cxled = p->targets[i]; 127 128 cxlmd = cxled_to_memdev(cxled); 129 cxl_mbox = &cxlmd->cxlds->cxl_mbox; 130 ret = cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags, 131 min_cycle); 132 if (ret) 133 return ret; 134 135 /* 136 * The min_scrub_cycle of a region is the max of minimum scrub 137 * cycles supported by memdevs that back the region. 138 */ 139 if (min_cycle) 140 min_scrub_cycle = max(*min_cycle, min_scrub_cycle); 141 } 142 143 if (min_cycle) 144 *min_cycle = min_scrub_cycle; 145 146 return 0; 147 } 148 149 static int cxl_scrub_set_attrbs_region(struct device *dev, 150 struct cxl_patrol_scrub_context *cxl_ps_ctx, 151 u8 cycle, u8 flags) 152 { 153 struct cxl_scrub_wr_attrbs wr_attrbs; 154 struct cxl_mailbox *cxl_mbox; 155 struct cxl_region_params *p; 156 struct cxl_memdev *cxlmd; 157 struct cxl_region *cxlr; 158 int ret, i; 159 160 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); 161 if ((ret = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) 162 return ret; 163 164 cxlr = cxl_ps_ctx->cxlr; 165 p = &cxlr->params; 166 wr_attrbs.scrub_cycle_hours = cycle; 167 wr_attrbs.scrub_flags = flags; 168 169 for (i = 0; i < p->nr_targets; i++) { 170 struct cxl_endpoint_decoder *cxled = p->targets[i]; 171 172 cxlmd = cxled_to_memdev(cxled); 173 cxl_mbox = &cxlmd->cxlds->cxl_mbox; 174 ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID, 175 cxl_ps_ctx->set_version, &wr_attrbs, 176 sizeof(wr_attrbs), 177 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 178 0, NULL); 179 if (ret) 180 return ret; 181 182 if (cycle != cxlmd->scrub_cycle) { 183 if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION) 184 dev_info(dev, 185 "Device scrub rate(%d hours) set by region%d rate overwritten by region%d scrub rate(%d hours)\n", 186 cxlmd->scrub_cycle, 187 cxlmd->scrub_region_id, cxlr->id, 188 cycle); 189 190 cxlmd->scrub_cycle = cycle; 191 cxlmd->scrub_region_id = cxlr->id; 192 } 193 } 194 195 return 0; 196 } 197 198 static int cxl_scrub_set_attrbs_device(struct device *dev, 199 struct cxl_patrol_scrub_context *cxl_ps_ctx, 200 u8 cycle, u8 flags) 201 { 202 struct cxl_scrub_wr_attrbs wr_attrbs; 203 struct cxl_mailbox *cxl_mbox; 204 struct cxl_memdev *cxlmd; 205 int ret; 206 207 wr_attrbs.scrub_cycle_hours = cycle; 208 wr_attrbs.scrub_flags = flags; 209 210 cxlmd = cxl_ps_ctx->cxlmd; 211 cxl_mbox = &cxlmd->cxlds->cxl_mbox; 212 ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID, 213 cxl_ps_ctx->set_version, &wr_attrbs, 214 sizeof(wr_attrbs), 215 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 0, 216 NULL); 217 if (ret) 218 return ret; 219 220 if (cycle != cxlmd->scrub_cycle) { 221 if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION) 222 dev_info(dev, 223 "Device scrub rate(%d hours) set by region%d rate overwritten with device local scrub rate(%d hours)\n", 224 cxlmd->scrub_cycle, cxlmd->scrub_region_id, 225 cycle); 226 227 cxlmd->scrub_cycle = cycle; 228 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION; 229 } 230 231 return 0; 232 } 233 234 static int cxl_scrub_set_attrbs(struct device *dev, 235 struct cxl_patrol_scrub_context *cxl_ps_ctx, 236 u8 cycle, u8 flags) 237 { 238 if (cxl_ps_ctx->cxlr) 239 return cxl_scrub_set_attrbs_region(dev, cxl_ps_ctx, cycle, flags); 240 241 return cxl_scrub_set_attrbs_device(dev, cxl_ps_ctx, cycle, flags); 242 } 243 244 static int cxl_patrol_scrub_get_enabled_bg(struct device *dev, void *drv_data, 245 bool *enabled) 246 { 247 struct cxl_patrol_scrub_context *ctx = drv_data; 248 u8 cap, flags; 249 u16 cycle; 250 int ret; 251 252 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL); 253 if (ret) 254 return ret; 255 256 *enabled = CXL_GET_SCRUB_EN_STS(flags); 257 258 return 0; 259 } 260 261 static int cxl_patrol_scrub_set_enabled_bg(struct device *dev, void *drv_data, 262 bool enable) 263 { 264 struct cxl_patrol_scrub_context *ctx = drv_data; 265 u8 cap, flags, wr_cycle; 266 u16 rd_cycle; 267 int ret; 268 269 if (!capable(CAP_SYS_RAWIO)) 270 return -EPERM; 271 272 ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, NULL); 273 if (ret) 274 return ret; 275 276 wr_cycle = CXL_GET_SCRUB_CYCLE(rd_cycle); 277 flags = CXL_SET_SCRUB_EN(enable); 278 279 return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags); 280 } 281 282 static int cxl_patrol_scrub_get_min_scrub_cycle(struct device *dev, 283 void *drv_data, u32 *min) 284 { 285 struct cxl_patrol_scrub_context *ctx = drv_data; 286 u8 cap, flags, min_cycle; 287 u16 cycle; 288 int ret; 289 290 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, &min_cycle); 291 if (ret) 292 return ret; 293 294 *min = min_cycle * 3600; 295 296 return 0; 297 } 298 299 static int cxl_patrol_scrub_get_max_scrub_cycle(struct device *dev, 300 void *drv_data, u32 *max) 301 { 302 *max = U8_MAX * 3600; /* Max set by register size */ 303 304 return 0; 305 } 306 307 static int cxl_patrol_scrub_get_scrub_cycle(struct device *dev, void *drv_data, 308 u32 *scrub_cycle_secs) 309 { 310 struct cxl_patrol_scrub_context *ctx = drv_data; 311 u8 cap, flags; 312 u16 cycle; 313 int ret; 314 315 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL); 316 if (ret) 317 return ret; 318 319 *scrub_cycle_secs = CXL_GET_SCRUB_CYCLE(cycle) * 3600; 320 321 return 0; 322 } 323 324 static int cxl_patrol_scrub_set_scrub_cycle(struct device *dev, void *drv_data, 325 u32 scrub_cycle_secs) 326 { 327 struct cxl_patrol_scrub_context *ctx = drv_data; 328 u8 scrub_cycle_hours = scrub_cycle_secs / 3600; 329 u8 cap, wr_cycle, flags, min_cycle; 330 u16 rd_cycle; 331 int ret; 332 333 if (!capable(CAP_SYS_RAWIO)) 334 return -EPERM; 335 336 ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, &min_cycle); 337 if (ret) 338 return ret; 339 340 if (!CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap)) 341 return -EOPNOTSUPP; 342 343 if (scrub_cycle_hours < min_cycle) { 344 dev_dbg(dev, "Invalid CXL patrol scrub cycle(%d) to set\n", 345 scrub_cycle_hours); 346 dev_dbg(dev, 347 "Minimum supported CXL patrol scrub cycle in hour %d\n", 348 min_cycle); 349 return -EINVAL; 350 } 351 wr_cycle = CXL_SET_SCRUB_CYCLE(scrub_cycle_hours); 352 353 return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags); 354 } 355 356 static const struct edac_scrub_ops cxl_ps_scrub_ops = { 357 .get_enabled_bg = cxl_patrol_scrub_get_enabled_bg, 358 .set_enabled_bg = cxl_patrol_scrub_set_enabled_bg, 359 .get_min_cycle = cxl_patrol_scrub_get_min_scrub_cycle, 360 .get_max_cycle = cxl_patrol_scrub_get_max_scrub_cycle, 361 .get_cycle_duration = cxl_patrol_scrub_get_scrub_cycle, 362 .set_cycle_duration = cxl_patrol_scrub_set_scrub_cycle, 363 }; 364 365 static int cxl_memdev_scrub_init(struct cxl_memdev *cxlmd, 366 struct edac_dev_feature *ras_feature, 367 u8 scrub_inst) 368 { 369 struct cxl_patrol_scrub_context *cxl_ps_ctx; 370 struct cxl_feat_entry *feat_entry; 371 u8 cap, flags; 372 u16 cycle; 373 int rc; 374 375 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds), 376 &CXL_FEAT_PATROL_SCRUB_UUID); 377 if (IS_ERR(feat_entry)) 378 return -EOPNOTSUPP; 379 380 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE)) 381 return -EOPNOTSUPP; 382 383 cxl_ps_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL); 384 if (!cxl_ps_ctx) 385 return -ENOMEM; 386 387 *cxl_ps_ctx = (struct cxl_patrol_scrub_context){ 388 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 389 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 390 .get_version = feat_entry->get_feat_ver, 391 .set_version = feat_entry->set_feat_ver, 392 .effects = le16_to_cpu(feat_entry->effects), 393 .instance = scrub_inst, 394 .cxlmd = cxlmd, 395 }; 396 397 rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap, &cycle, 398 &flags, NULL); 399 if (rc) 400 return rc; 401 402 cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle); 403 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION; 404 405 ras_feature->ft_type = RAS_FEAT_SCRUB; 406 ras_feature->instance = cxl_ps_ctx->instance; 407 ras_feature->scrub_ops = &cxl_ps_scrub_ops; 408 ras_feature->ctx = cxl_ps_ctx; 409 410 return 0; 411 } 412 413 static int cxl_region_scrub_init(struct cxl_region *cxlr, 414 struct edac_dev_feature *ras_feature, 415 u8 scrub_inst) 416 { 417 struct cxl_patrol_scrub_context *cxl_ps_ctx; 418 struct cxl_region_params *p = &cxlr->params; 419 struct cxl_feat_entry *feat_entry = NULL; 420 struct cxl_memdev *cxlmd; 421 u8 cap, flags; 422 u16 cycle; 423 int i, rc; 424 425 /* 426 * The cxl_region_rwsem must be held if the code below is used in a context 427 * other than when the region is in the probe state, as shown here. 428 */ 429 for (i = 0; i < p->nr_targets; i++) { 430 struct cxl_endpoint_decoder *cxled = p->targets[i]; 431 432 cxlmd = cxled_to_memdev(cxled); 433 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds), 434 &CXL_FEAT_PATROL_SCRUB_UUID); 435 if (IS_ERR(feat_entry)) 436 return -EOPNOTSUPP; 437 438 if (!(le32_to_cpu(feat_entry->flags) & 439 CXL_FEATURE_F_CHANGEABLE)) 440 return -EOPNOTSUPP; 441 442 rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap, 443 &cycle, &flags, NULL); 444 if (rc) 445 return rc; 446 447 cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle); 448 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION; 449 } 450 451 cxl_ps_ctx = devm_kzalloc(&cxlr->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL); 452 if (!cxl_ps_ctx) 453 return -ENOMEM; 454 455 *cxl_ps_ctx = (struct cxl_patrol_scrub_context){ 456 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 457 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 458 .get_version = feat_entry->get_feat_ver, 459 .set_version = feat_entry->set_feat_ver, 460 .effects = le16_to_cpu(feat_entry->effects), 461 .instance = scrub_inst, 462 .cxlr = cxlr, 463 }; 464 465 ras_feature->ft_type = RAS_FEAT_SCRUB; 466 ras_feature->instance = cxl_ps_ctx->instance; 467 ras_feature->scrub_ops = &cxl_ps_scrub_ops; 468 ras_feature->ctx = cxl_ps_ctx; 469 470 return 0; 471 } 472 473 struct cxl_ecs_context { 474 u16 num_media_frus; 475 u16 get_feat_size; 476 u16 set_feat_size; 477 u8 get_version; 478 u8 set_version; 479 u16 effects; 480 struct cxl_memdev *cxlmd; 481 }; 482 483 /* 484 * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-225 DDR5 ECS Control Feature 485 * Readable Attributes. 486 */ 487 struct cxl_ecs_fru_rd_attrbs { 488 u8 ecs_cap; 489 __le16 ecs_config; 490 u8 ecs_flags; 491 } __packed; 492 493 struct cxl_ecs_rd_attrbs { 494 u8 ecs_log_cap; 495 struct cxl_ecs_fru_rd_attrbs fru_attrbs[]; 496 } __packed; 497 498 /* 499 * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-226 DDR5 ECS Control Feature 500 * Writable Attributes. 501 */ 502 struct cxl_ecs_fru_wr_attrbs { 503 __le16 ecs_config; 504 } __packed; 505 506 struct cxl_ecs_wr_attrbs { 507 u8 ecs_log_cap; 508 struct cxl_ecs_fru_wr_attrbs fru_attrbs[]; 509 } __packed; 510 511 #define CXL_ECS_LOG_ENTRY_TYPE_MASK GENMASK(1, 0) 512 #define CXL_ECS_REALTIME_REPORT_CAP_MASK BIT(0) 513 #define CXL_ECS_THRESHOLD_COUNT_MASK GENMASK(2, 0) 514 #define CXL_ECS_COUNT_MODE_MASK BIT(3) 515 #define CXL_ECS_RESET_COUNTER_MASK BIT(4) 516 #define CXL_ECS_RESET_COUNTER 1 517 518 enum { 519 ECS_THRESHOLD_256 = 256, 520 ECS_THRESHOLD_1024 = 1024, 521 ECS_THRESHOLD_4096 = 4096, 522 }; 523 524 enum { 525 ECS_THRESHOLD_IDX_256 = 3, 526 ECS_THRESHOLD_IDX_1024 = 4, 527 ECS_THRESHOLD_IDX_4096 = 5, 528 }; 529 530 static const u16 ecs_supp_threshold[] = { 531 [ECS_THRESHOLD_IDX_256] = 256, 532 [ECS_THRESHOLD_IDX_1024] = 1024, 533 [ECS_THRESHOLD_IDX_4096] = 4096, 534 }; 535 536 enum { 537 ECS_LOG_ENTRY_TYPE_DRAM = 0x0, 538 ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU = 0x1, 539 }; 540 541 enum cxl_ecs_count_mode { 542 ECS_MODE_COUNTS_ROWS = 0, 543 ECS_MODE_COUNTS_CODEWORDS = 1, 544 }; 545 546 static int cxl_mem_ecs_get_attrbs(struct device *dev, 547 struct cxl_ecs_context *cxl_ecs_ctx, 548 int fru_id, u8 *log_cap, u16 *config) 549 { 550 struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd; 551 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; 552 struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs; 553 size_t rd_data_size; 554 size_t data_size; 555 556 rd_data_size = cxl_ecs_ctx->get_feat_size; 557 558 struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) = 559 kvzalloc(rd_data_size, GFP_KERNEL); 560 if (!rd_attrbs) 561 return -ENOMEM; 562 563 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID, 564 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 565 rd_data_size, 0, NULL); 566 if (!data_size) 567 return -EIO; 568 569 fru_rd_attrbs = rd_attrbs->fru_attrbs; 570 *log_cap = rd_attrbs->ecs_log_cap; 571 *config = le16_to_cpu(fru_rd_attrbs[fru_id].ecs_config); 572 573 return 0; 574 } 575 576 static int cxl_mem_ecs_set_attrbs(struct device *dev, 577 struct cxl_ecs_context *cxl_ecs_ctx, 578 int fru_id, u8 log_cap, u16 config) 579 { 580 struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd; 581 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; 582 struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs; 583 struct cxl_ecs_fru_wr_attrbs *fru_wr_attrbs; 584 size_t rd_data_size, wr_data_size; 585 u16 num_media_frus, count; 586 size_t data_size; 587 588 num_media_frus = cxl_ecs_ctx->num_media_frus; 589 rd_data_size = cxl_ecs_ctx->get_feat_size; 590 wr_data_size = cxl_ecs_ctx->set_feat_size; 591 struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) = 592 kvzalloc(rd_data_size, GFP_KERNEL); 593 if (!rd_attrbs) 594 return -ENOMEM; 595 596 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID, 597 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 598 rd_data_size, 0, NULL); 599 if (!data_size) 600 return -EIO; 601 602 struct cxl_ecs_wr_attrbs *wr_attrbs __free(kvfree) = 603 kvzalloc(wr_data_size, GFP_KERNEL); 604 if (!wr_attrbs) 605 return -ENOMEM; 606 607 /* 608 * Fill writable attributes from the current attributes read 609 * for all the media FRUs. 610 */ 611 fru_rd_attrbs = rd_attrbs->fru_attrbs; 612 fru_wr_attrbs = wr_attrbs->fru_attrbs; 613 wr_attrbs->ecs_log_cap = log_cap; 614 for (count = 0; count < num_media_frus; count++) 615 fru_wr_attrbs[count].ecs_config = 616 fru_rd_attrbs[count].ecs_config; 617 618 fru_wr_attrbs[fru_id].ecs_config = cpu_to_le16(config); 619 620 return cxl_set_feature(cxl_mbox, &CXL_FEAT_ECS_UUID, 621 cxl_ecs_ctx->set_version, wr_attrbs, 622 wr_data_size, 623 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 624 0, NULL); 625 } 626 627 static u8 cxl_get_ecs_log_entry_type(u8 log_cap, u16 config) 628 { 629 return FIELD_GET(CXL_ECS_LOG_ENTRY_TYPE_MASK, log_cap); 630 } 631 632 static u16 cxl_get_ecs_threshold(u8 log_cap, u16 config) 633 { 634 u8 index = FIELD_GET(CXL_ECS_THRESHOLD_COUNT_MASK, config); 635 636 return ecs_supp_threshold[index]; 637 } 638 639 static u8 cxl_get_ecs_count_mode(u8 log_cap, u16 config) 640 { 641 return FIELD_GET(CXL_ECS_COUNT_MODE_MASK, config); 642 } 643 644 #define CXL_ECS_GET_ATTR(attrb) \ 645 static int cxl_ecs_get_##attrb(struct device *dev, void *drv_data, \ 646 int fru_id, u32 *val) \ 647 { \ 648 struct cxl_ecs_context *ctx = drv_data; \ 649 u8 log_cap; \ 650 u16 config; \ 651 int ret; \ 652 \ 653 ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \ 654 &config); \ 655 if (ret) \ 656 return ret; \ 657 \ 658 *val = cxl_get_ecs_##attrb(log_cap, config); \ 659 \ 660 return 0; \ 661 } 662 663 CXL_ECS_GET_ATTR(log_entry_type) 664 CXL_ECS_GET_ATTR(count_mode) 665 CXL_ECS_GET_ATTR(threshold) 666 667 static int cxl_set_ecs_log_entry_type(struct device *dev, u8 *log_cap, 668 u16 *config, u32 val) 669 { 670 if (val != ECS_LOG_ENTRY_TYPE_DRAM && 671 val != ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU) 672 return -EINVAL; 673 674 *log_cap = FIELD_PREP(CXL_ECS_LOG_ENTRY_TYPE_MASK, val); 675 676 return 0; 677 } 678 679 static int cxl_set_ecs_threshold(struct device *dev, u8 *log_cap, u16 *config, 680 u32 val) 681 { 682 *config &= ~CXL_ECS_THRESHOLD_COUNT_MASK; 683 684 switch (val) { 685 case ECS_THRESHOLD_256: 686 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK, 687 ECS_THRESHOLD_IDX_256); 688 break; 689 case ECS_THRESHOLD_1024: 690 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK, 691 ECS_THRESHOLD_IDX_1024); 692 break; 693 case ECS_THRESHOLD_4096: 694 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK, 695 ECS_THRESHOLD_IDX_4096); 696 break; 697 default: 698 dev_dbg(dev, "Invalid CXL ECS threshold count(%u) to set\n", 699 val); 700 dev_dbg(dev, "Supported ECS threshold counts: %u, %u, %u\n", 701 ECS_THRESHOLD_256, ECS_THRESHOLD_1024, 702 ECS_THRESHOLD_4096); 703 return -EINVAL; 704 } 705 706 return 0; 707 } 708 709 static int cxl_set_ecs_count_mode(struct device *dev, u8 *log_cap, u16 *config, 710 u32 val) 711 { 712 if (val != ECS_MODE_COUNTS_ROWS && val != ECS_MODE_COUNTS_CODEWORDS) { 713 dev_dbg(dev, "Invalid CXL ECS scrub mode(%d) to set\n", val); 714 dev_dbg(dev, 715 "Supported ECS Modes: 0: ECS counts rows with errors," 716 " 1: ECS counts codewords with errors\n"); 717 return -EINVAL; 718 } 719 720 *config &= ~CXL_ECS_COUNT_MODE_MASK; 721 *config |= FIELD_PREP(CXL_ECS_COUNT_MODE_MASK, val); 722 723 return 0; 724 } 725 726 static int cxl_set_ecs_reset_counter(struct device *dev, u8 *log_cap, 727 u16 *config, u32 val) 728 { 729 if (val != CXL_ECS_RESET_COUNTER) 730 return -EINVAL; 731 732 *config &= ~CXL_ECS_RESET_COUNTER_MASK; 733 *config |= FIELD_PREP(CXL_ECS_RESET_COUNTER_MASK, val); 734 735 return 0; 736 } 737 738 #define CXL_ECS_SET_ATTR(attrb) \ 739 static int cxl_ecs_set_##attrb(struct device *dev, void *drv_data, \ 740 int fru_id, u32 val) \ 741 { \ 742 struct cxl_ecs_context *ctx = drv_data; \ 743 u8 log_cap; \ 744 u16 config; \ 745 int ret; \ 746 \ 747 if (!capable(CAP_SYS_RAWIO)) \ 748 return -EPERM; \ 749 \ 750 ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \ 751 &config); \ 752 if (ret) \ 753 return ret; \ 754 \ 755 ret = cxl_set_ecs_##attrb(dev, &log_cap, &config, val); \ 756 if (ret) \ 757 return ret; \ 758 \ 759 return cxl_mem_ecs_set_attrbs(dev, ctx, fru_id, log_cap, \ 760 config); \ 761 } 762 CXL_ECS_SET_ATTR(log_entry_type) 763 CXL_ECS_SET_ATTR(count_mode) 764 CXL_ECS_SET_ATTR(reset_counter) 765 CXL_ECS_SET_ATTR(threshold) 766 767 static const struct edac_ecs_ops cxl_ecs_ops = { 768 .get_log_entry_type = cxl_ecs_get_log_entry_type, 769 .set_log_entry_type = cxl_ecs_set_log_entry_type, 770 .get_mode = cxl_ecs_get_count_mode, 771 .set_mode = cxl_ecs_set_count_mode, 772 .reset = cxl_ecs_set_reset_counter, 773 .get_threshold = cxl_ecs_get_threshold, 774 .set_threshold = cxl_ecs_set_threshold, 775 }; 776 777 static int cxl_memdev_ecs_init(struct cxl_memdev *cxlmd, 778 struct edac_dev_feature *ras_feature) 779 { 780 struct cxl_ecs_context *cxl_ecs_ctx; 781 struct cxl_feat_entry *feat_entry; 782 int num_media_frus; 783 784 feat_entry = 785 cxl_feature_info(to_cxlfs(cxlmd->cxlds), &CXL_FEAT_ECS_UUID); 786 if (IS_ERR(feat_entry)) 787 return -EOPNOTSUPP; 788 789 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE)) 790 return -EOPNOTSUPP; 791 792 num_media_frus = (le16_to_cpu(feat_entry->get_feat_size) - 793 sizeof(struct cxl_ecs_rd_attrbs)) / 794 sizeof(struct cxl_ecs_fru_rd_attrbs); 795 if (!num_media_frus) 796 return -EOPNOTSUPP; 797 798 cxl_ecs_ctx = 799 devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ecs_ctx), GFP_KERNEL); 800 if (!cxl_ecs_ctx) 801 return -ENOMEM; 802 803 *cxl_ecs_ctx = (struct cxl_ecs_context){ 804 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 805 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 806 .get_version = feat_entry->get_feat_ver, 807 .set_version = feat_entry->set_feat_ver, 808 .effects = le16_to_cpu(feat_entry->effects), 809 .num_media_frus = num_media_frus, 810 .cxlmd = cxlmd, 811 }; 812 813 ras_feature->ft_type = RAS_FEAT_ECS; 814 ras_feature->ecs_ops = &cxl_ecs_ops; 815 ras_feature->ctx = cxl_ecs_ctx; 816 ras_feature->ecs_info.num_media_frus = num_media_frus; 817 818 return 0; 819 } 820 821 /* 822 * Perform Maintenance CXL 3.2 Spec 8.2.10.7.1 823 */ 824 825 /* 826 * Perform Maintenance input payload 827 * CXL rev 3.2 section 8.2.10.7.1 Table 8-117 828 */ 829 struct cxl_mbox_maintenance_hdr { 830 u8 op_class; 831 u8 op_subclass; 832 } __packed; 833 834 static int cxl_perform_maintenance(struct cxl_mailbox *cxl_mbox, u8 class, 835 u8 subclass, void *data_in, 836 size_t data_in_size) 837 { 838 struct cxl_memdev_maintenance_pi { 839 struct cxl_mbox_maintenance_hdr hdr; 840 u8 data[]; 841 } __packed; 842 struct cxl_mbox_cmd mbox_cmd; 843 size_t hdr_size; 844 845 struct cxl_memdev_maintenance_pi *pi __free(kvfree) = 846 kvzalloc(cxl_mbox->payload_size, GFP_KERNEL); 847 if (!pi) 848 return -ENOMEM; 849 850 pi->hdr.op_class = class; 851 pi->hdr.op_subclass = subclass; 852 hdr_size = sizeof(pi->hdr); 853 /* 854 * Check minimum mbox payload size is available for 855 * the maintenance data transfer. 856 */ 857 if (hdr_size + data_in_size > cxl_mbox->payload_size) 858 return -ENOMEM; 859 860 memcpy(pi->data, data_in, data_in_size); 861 mbox_cmd = (struct cxl_mbox_cmd){ 862 .opcode = CXL_MBOX_OP_DO_MAINTENANCE, 863 .size_in = hdr_size + data_in_size, 864 .payload_in = pi, 865 }; 866 867 return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); 868 } 869 870 /* 871 * Support for finding a memory operation attributes 872 * are from the current boot or not. 873 */ 874 875 struct cxl_mem_err_rec { 876 struct xarray rec_gen_media; 877 struct xarray rec_dram; 878 }; 879 880 enum cxl_mem_repair_type { 881 CXL_PPR, 882 CXL_CACHELINE_SPARING, 883 CXL_ROW_SPARING, 884 CXL_BANK_SPARING, 885 CXL_RANK_SPARING, 886 CXL_REPAIR_MAX, 887 }; 888 889 /** 890 * struct cxl_mem_repair_attrbs - CXL memory repair attributes 891 * @dpa: DPA of memory to repair 892 * @nibble_mask: nibble mask, identifies one or more nibbles on the memory bus 893 * @row: row of memory to repair 894 * @column: column of memory to repair 895 * @channel: channel of memory to repair 896 * @sub_channel: sub channel of memory to repair 897 * @rank: rank of memory to repair 898 * @bank_group: bank group of memory to repair 899 * @bank: bank of memory to repair 900 * @repair_type: repair type. For eg. PPR, memory sparing etc. 901 */ 902 struct cxl_mem_repair_attrbs { 903 u64 dpa; 904 u32 nibble_mask; 905 u32 row; 906 u16 column; 907 u8 channel; 908 u8 sub_channel; 909 u8 rank; 910 u8 bank_group; 911 u8 bank; 912 enum cxl_mem_repair_type repair_type; 913 }; 914 915 static struct cxl_event_gen_media * 916 cxl_find_rec_gen_media(struct cxl_memdev *cxlmd, 917 struct cxl_mem_repair_attrbs *attrbs) 918 { 919 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 920 struct cxl_event_gen_media *rec; 921 922 if (!array_rec) 923 return NULL; 924 925 rec = xa_load(&array_rec->rec_gen_media, attrbs->dpa); 926 if (!rec) 927 return NULL; 928 929 if (attrbs->repair_type == CXL_PPR) 930 return rec; 931 932 return NULL; 933 } 934 935 static struct cxl_event_dram * 936 cxl_find_rec_dram(struct cxl_memdev *cxlmd, 937 struct cxl_mem_repair_attrbs *attrbs) 938 { 939 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 940 struct cxl_event_dram *rec; 941 u16 validity_flags; 942 943 if (!array_rec) 944 return NULL; 945 946 rec = xa_load(&array_rec->rec_dram, attrbs->dpa); 947 if (!rec) 948 return NULL; 949 950 validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags); 951 if (!(validity_flags & CXL_DER_VALID_CHANNEL) || 952 !(validity_flags & CXL_DER_VALID_RANK)) 953 return NULL; 954 955 switch (attrbs->repair_type) { 956 case CXL_PPR: 957 if (!(validity_flags & CXL_DER_VALID_NIBBLE) || 958 get_unaligned_le24(rec->nibble_mask) == attrbs->nibble_mask) 959 return rec; 960 break; 961 case CXL_CACHELINE_SPARING: 962 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) || 963 !(validity_flags & CXL_DER_VALID_BANK) || 964 !(validity_flags & CXL_DER_VALID_ROW) || 965 !(validity_flags & CXL_DER_VALID_COLUMN)) 966 return NULL; 967 968 if (rec->media_hdr.channel == attrbs->channel && 969 rec->media_hdr.rank == attrbs->rank && 970 rec->bank_group == attrbs->bank_group && 971 rec->bank == attrbs->bank && 972 get_unaligned_le24(rec->row) == attrbs->row && 973 get_unaligned_le16(rec->column) == attrbs->column && 974 (!(validity_flags & CXL_DER_VALID_NIBBLE) || 975 get_unaligned_le24(rec->nibble_mask) == 976 attrbs->nibble_mask) && 977 (!(validity_flags & CXL_DER_VALID_SUB_CHANNEL) || 978 rec->sub_channel == attrbs->sub_channel)) 979 return rec; 980 break; 981 case CXL_ROW_SPARING: 982 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) || 983 !(validity_flags & CXL_DER_VALID_BANK) || 984 !(validity_flags & CXL_DER_VALID_ROW)) 985 return NULL; 986 987 if (rec->media_hdr.channel == attrbs->channel && 988 rec->media_hdr.rank == attrbs->rank && 989 rec->bank_group == attrbs->bank_group && 990 rec->bank == attrbs->bank && 991 get_unaligned_le24(rec->row) == attrbs->row && 992 (!(validity_flags & CXL_DER_VALID_NIBBLE) || 993 get_unaligned_le24(rec->nibble_mask) == 994 attrbs->nibble_mask)) 995 return rec; 996 break; 997 case CXL_BANK_SPARING: 998 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) || 999 !(validity_flags & CXL_DER_VALID_BANK)) 1000 return NULL; 1001 1002 if (rec->media_hdr.channel == attrbs->channel && 1003 rec->media_hdr.rank == attrbs->rank && 1004 rec->bank_group == attrbs->bank_group && 1005 rec->bank == attrbs->bank && 1006 (!(validity_flags & CXL_DER_VALID_NIBBLE) || 1007 get_unaligned_le24(rec->nibble_mask) == 1008 attrbs->nibble_mask)) 1009 return rec; 1010 break; 1011 case CXL_RANK_SPARING: 1012 if (rec->media_hdr.channel == attrbs->channel && 1013 rec->media_hdr.rank == attrbs->rank && 1014 (!(validity_flags & CXL_DER_VALID_NIBBLE) || 1015 get_unaligned_le24(rec->nibble_mask) == 1016 attrbs->nibble_mask)) 1017 return rec; 1018 break; 1019 default: 1020 return NULL; 1021 } 1022 1023 return NULL; 1024 } 1025 1026 #define CXL_MAX_STORAGE_DAYS 10 1027 #define CXL_MAX_STORAGE_TIME_SECS (CXL_MAX_STORAGE_DAYS * 24 * 60 * 60) 1028 1029 static void cxl_del_expired_gmedia_recs(struct xarray *rec_xarray, 1030 struct cxl_event_gen_media *cur_rec) 1031 { 1032 u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp); 1033 struct cxl_event_gen_media *rec; 1034 unsigned long index; 1035 u64 delta_ts_secs; 1036 1037 xa_for_each(rec_xarray, index, rec) { 1038 delta_ts_secs = (cur_ts - 1039 le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL; 1040 if (delta_ts_secs >= CXL_MAX_STORAGE_TIME_SECS) { 1041 xa_erase(rec_xarray, index); 1042 kfree(rec); 1043 } 1044 } 1045 } 1046 1047 static void cxl_del_expired_dram_recs(struct xarray *rec_xarray, 1048 struct cxl_event_dram *cur_rec) 1049 { 1050 u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp); 1051 struct cxl_event_dram *rec; 1052 unsigned long index; 1053 u64 delta_secs; 1054 1055 xa_for_each(rec_xarray, index, rec) { 1056 delta_secs = (cur_ts - 1057 le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL; 1058 if (delta_secs >= CXL_MAX_STORAGE_TIME_SECS) { 1059 xa_erase(rec_xarray, index); 1060 kfree(rec); 1061 } 1062 } 1063 } 1064 1065 #define CXL_MAX_REC_STORAGE_COUNT 200 1066 1067 static void cxl_del_overflow_old_recs(struct xarray *rec_xarray) 1068 { 1069 void *err_rec; 1070 unsigned long index, count = 0; 1071 1072 xa_for_each(rec_xarray, index, err_rec) 1073 count++; 1074 1075 if (count <= CXL_MAX_REC_STORAGE_COUNT) 1076 return; 1077 1078 count -= CXL_MAX_REC_STORAGE_COUNT; 1079 xa_for_each(rec_xarray, index, err_rec) { 1080 xa_erase(rec_xarray, index); 1081 kfree(err_rec); 1082 count--; 1083 if (!count) 1084 break; 1085 } 1086 } 1087 1088 int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt) 1089 { 1090 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 1091 struct cxl_event_gen_media *rec; 1092 void *old_rec; 1093 1094 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec) 1095 return 0; 1096 1097 rec = kmemdup(&evt->gen_media, sizeof(*rec), GFP_KERNEL); 1098 if (!rec) 1099 return -ENOMEM; 1100 1101 old_rec = xa_store(&array_rec->rec_gen_media, 1102 le64_to_cpu(rec->media_hdr.phys_addr), rec, 1103 GFP_KERNEL); 1104 if (xa_is_err(old_rec)) { 1105 kfree(rec); 1106 return xa_err(old_rec); 1107 } 1108 1109 kfree(old_rec); 1110 1111 cxl_del_expired_gmedia_recs(&array_rec->rec_gen_media, rec); 1112 cxl_del_overflow_old_recs(&array_rec->rec_gen_media); 1113 1114 return 0; 1115 } 1116 EXPORT_SYMBOL_NS_GPL(cxl_store_rec_gen_media, "CXL"); 1117 1118 int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt) 1119 { 1120 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 1121 struct cxl_event_dram *rec; 1122 void *old_rec; 1123 1124 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec) 1125 return 0; 1126 1127 rec = kmemdup(&evt->dram, sizeof(*rec), GFP_KERNEL); 1128 if (!rec) 1129 return -ENOMEM; 1130 1131 old_rec = xa_store(&array_rec->rec_dram, 1132 le64_to_cpu(rec->media_hdr.phys_addr), rec, 1133 GFP_KERNEL); 1134 if (xa_is_err(old_rec)) { 1135 kfree(rec); 1136 return xa_err(old_rec); 1137 } 1138 1139 kfree(old_rec); 1140 1141 cxl_del_expired_dram_recs(&array_rec->rec_dram, rec); 1142 cxl_del_overflow_old_recs(&array_rec->rec_dram); 1143 1144 return 0; 1145 } 1146 EXPORT_SYMBOL_NS_GPL(cxl_store_rec_dram, "CXL"); 1147 1148 static bool cxl_is_memdev_memory_online(const struct cxl_memdev *cxlmd) 1149 { 1150 struct cxl_port *port = cxlmd->endpoint; 1151 1152 if (port && cxl_num_decoders_committed(port)) 1153 return true; 1154 1155 return false; 1156 } 1157 1158 /* 1159 * CXL memory sparing control 1160 */ 1161 enum cxl_mem_sparing_granularity { 1162 CXL_MEM_SPARING_CACHELINE, 1163 CXL_MEM_SPARING_ROW, 1164 CXL_MEM_SPARING_BANK, 1165 CXL_MEM_SPARING_RANK, 1166 CXL_MEM_SPARING_MAX 1167 }; 1168 1169 struct cxl_mem_sparing_context { 1170 struct cxl_memdev *cxlmd; 1171 uuid_t repair_uuid; 1172 u16 get_feat_size; 1173 u16 set_feat_size; 1174 u16 effects; 1175 u8 instance; 1176 u8 get_version; 1177 u8 set_version; 1178 u8 op_class; 1179 u8 op_subclass; 1180 bool cap_safe_when_in_use; 1181 bool cap_hard_sparing; 1182 bool cap_soft_sparing; 1183 u8 channel; 1184 u8 rank; 1185 u8 bank_group; 1186 u32 nibble_mask; 1187 u64 dpa; 1188 u32 row; 1189 u16 column; 1190 u8 bank; 1191 u8 sub_channel; 1192 enum edac_mem_repair_type repair_type; 1193 bool persist_mode; 1194 }; 1195 1196 #define CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK BIT(0) 1197 #define CXL_SPARING_RD_CAP_HARD_SPARING_MASK BIT(1) 1198 #define CXL_SPARING_RD_CAP_SOFT_SPARING_MASK BIT(2) 1199 1200 #define CXL_SPARING_WR_DEVICE_INITIATED_MASK BIT(0) 1201 1202 #define CXL_SPARING_QUERY_RESOURCE_FLAG BIT(0) 1203 #define CXL_SET_HARD_SPARING_FLAG BIT(1) 1204 #define CXL_SPARING_SUB_CHNL_VALID_FLAG BIT(2) 1205 #define CXL_SPARING_NIB_MASK_VALID_FLAG BIT(3) 1206 1207 #define CXL_GET_SPARING_SAFE_IN_USE(flags) \ 1208 (FIELD_GET(CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK, \ 1209 flags) ^ 1) 1210 #define CXL_GET_CAP_HARD_SPARING(flags) \ 1211 FIELD_GET(CXL_SPARING_RD_CAP_HARD_SPARING_MASK, \ 1212 flags) 1213 #define CXL_GET_CAP_SOFT_SPARING(flags) \ 1214 FIELD_GET(CXL_SPARING_RD_CAP_SOFT_SPARING_MASK, \ 1215 flags) 1216 1217 #define CXL_SET_SPARING_QUERY_RESOURCE(val) \ 1218 FIELD_PREP(CXL_SPARING_QUERY_RESOURCE_FLAG, val) 1219 #define CXL_SET_HARD_SPARING(val) \ 1220 FIELD_PREP(CXL_SET_HARD_SPARING_FLAG, val) 1221 #define CXL_SET_SPARING_SUB_CHNL_VALID(val) \ 1222 FIELD_PREP(CXL_SPARING_SUB_CHNL_VALID_FLAG, val) 1223 #define CXL_SET_SPARING_NIB_MASK_VALID(val) \ 1224 FIELD_PREP(CXL_SPARING_NIB_MASK_VALID_FLAG, val) 1225 1226 /* 1227 * See CXL spec rev 3.2 @8.2.10.7.2.3 Table 8-134 Memory Sparing Feature 1228 * Readable Attributes. 1229 */ 1230 struct cxl_memdev_repair_rd_attrbs_hdr { 1231 u8 max_op_latency; 1232 __le16 op_cap; 1233 __le16 op_mode; 1234 u8 op_class; 1235 u8 op_subclass; 1236 u8 rsvd[9]; 1237 } __packed; 1238 1239 struct cxl_memdev_sparing_rd_attrbs { 1240 struct cxl_memdev_repair_rd_attrbs_hdr hdr; 1241 u8 rsvd; 1242 __le16 restriction_flags; 1243 } __packed; 1244 1245 /* 1246 * See CXL spec rev 3.2 @8.2.10.7.1.4 Table 8-120 Memory Sparing Input Payload. 1247 */ 1248 struct cxl_memdev_sparing_in_payload { 1249 u8 flags; 1250 u8 channel; 1251 u8 rank; 1252 u8 nibble_mask[3]; 1253 u8 bank_group; 1254 u8 bank; 1255 u8 row[3]; 1256 __le16 column; 1257 u8 sub_channel; 1258 } __packed; 1259 1260 static int 1261 cxl_mem_sparing_get_attrbs(struct cxl_mem_sparing_context *cxl_sparing_ctx) 1262 { 1263 size_t rd_data_size = sizeof(struct cxl_memdev_sparing_rd_attrbs); 1264 struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd; 1265 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; 1266 u16 restriction_flags; 1267 size_t data_size; 1268 u16 return_code; 1269 struct cxl_memdev_sparing_rd_attrbs *rd_attrbs __free(kfree) = 1270 kzalloc(rd_data_size, GFP_KERNEL); 1271 if (!rd_attrbs) 1272 return -ENOMEM; 1273 1274 data_size = cxl_get_feature(cxl_mbox, &cxl_sparing_ctx->repair_uuid, 1275 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 1276 rd_data_size, 0, &return_code); 1277 if (!data_size) 1278 return -EIO; 1279 1280 cxl_sparing_ctx->op_class = rd_attrbs->hdr.op_class; 1281 cxl_sparing_ctx->op_subclass = rd_attrbs->hdr.op_subclass; 1282 restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags); 1283 cxl_sparing_ctx->cap_safe_when_in_use = 1284 CXL_GET_SPARING_SAFE_IN_USE(restriction_flags); 1285 cxl_sparing_ctx->cap_hard_sparing = 1286 CXL_GET_CAP_HARD_SPARING(restriction_flags); 1287 cxl_sparing_ctx->cap_soft_sparing = 1288 CXL_GET_CAP_SOFT_SPARING(restriction_flags); 1289 1290 return 0; 1291 } 1292 1293 static struct cxl_event_dram * 1294 cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd, 1295 struct cxl_mem_sparing_context *ctx) 1296 { 1297 struct cxl_mem_repair_attrbs attrbs = { 0 }; 1298 1299 attrbs.dpa = ctx->dpa; 1300 attrbs.channel = ctx->channel; 1301 attrbs.rank = ctx->rank; 1302 attrbs.nibble_mask = ctx->nibble_mask; 1303 switch (ctx->repair_type) { 1304 case EDAC_REPAIR_CACHELINE_SPARING: 1305 attrbs.repair_type = CXL_CACHELINE_SPARING; 1306 attrbs.bank_group = ctx->bank_group; 1307 attrbs.bank = ctx->bank; 1308 attrbs.row = ctx->row; 1309 attrbs.column = ctx->column; 1310 attrbs.sub_channel = ctx->sub_channel; 1311 break; 1312 case EDAC_REPAIR_ROW_SPARING: 1313 attrbs.repair_type = CXL_ROW_SPARING; 1314 attrbs.bank_group = ctx->bank_group; 1315 attrbs.bank = ctx->bank; 1316 attrbs.row = ctx->row; 1317 break; 1318 case EDAC_REPAIR_BANK_SPARING: 1319 attrbs.repair_type = CXL_BANK_SPARING; 1320 attrbs.bank_group = ctx->bank_group; 1321 attrbs.bank = ctx->bank; 1322 break; 1323 case EDAC_REPAIR_RANK_SPARING: 1324 attrbs.repair_type = CXL_RANK_SPARING; 1325 break; 1326 default: 1327 return NULL; 1328 } 1329 1330 return cxl_find_rec_dram(cxlmd, &attrbs); 1331 } 1332 1333 static int 1334 cxl_mem_perform_sparing(struct device *dev, 1335 struct cxl_mem_sparing_context *cxl_sparing_ctx) 1336 { 1337 struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd; 1338 struct cxl_memdev_sparing_in_payload sparing_pi; 1339 struct cxl_event_dram *rec = NULL; 1340 u16 validity_flags = 0; 1341 int ret; 1342 1343 ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); 1344 if ((ret = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) 1345 return ret; 1346 1347 ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); 1348 if ((ret = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) 1349 return ret; 1350 1351 if (!cxl_sparing_ctx->cap_safe_when_in_use) { 1352 /* Memory to repair must be offline */ 1353 if (cxl_is_memdev_memory_online(cxlmd)) 1354 return -EBUSY; 1355 } else { 1356 if (cxl_is_memdev_memory_online(cxlmd)) { 1357 rec = cxl_mem_get_rec_dram(cxlmd, cxl_sparing_ctx); 1358 if (!rec) 1359 return -EINVAL; 1360 1361 if (!get_unaligned_le16(rec->media_hdr.validity_flags)) 1362 return -EINVAL; 1363 } 1364 } 1365 1366 memset(&sparing_pi, 0, sizeof(sparing_pi)); 1367 sparing_pi.flags = CXL_SET_SPARING_QUERY_RESOURCE(0); 1368 if (cxl_sparing_ctx->persist_mode) 1369 sparing_pi.flags |= CXL_SET_HARD_SPARING(1); 1370 1371 if (rec) 1372 validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags); 1373 1374 switch (cxl_sparing_ctx->repair_type) { 1375 case EDAC_REPAIR_CACHELINE_SPARING: 1376 sparing_pi.column = cpu_to_le16(cxl_sparing_ctx->column); 1377 if (!rec || (validity_flags & CXL_DER_VALID_SUB_CHANNEL)) { 1378 sparing_pi.flags |= CXL_SET_SPARING_SUB_CHNL_VALID(1); 1379 sparing_pi.sub_channel = cxl_sparing_ctx->sub_channel; 1380 } 1381 fallthrough; 1382 case EDAC_REPAIR_ROW_SPARING: 1383 put_unaligned_le24(cxl_sparing_ctx->row, sparing_pi.row); 1384 fallthrough; 1385 case EDAC_REPAIR_BANK_SPARING: 1386 sparing_pi.bank_group = cxl_sparing_ctx->bank_group; 1387 sparing_pi.bank = cxl_sparing_ctx->bank; 1388 fallthrough; 1389 case EDAC_REPAIR_RANK_SPARING: 1390 sparing_pi.rank = cxl_sparing_ctx->rank; 1391 fallthrough; 1392 default: 1393 sparing_pi.channel = cxl_sparing_ctx->channel; 1394 if ((rec && (validity_flags & CXL_DER_VALID_NIBBLE)) || 1395 (!rec && (!cxl_sparing_ctx->nibble_mask || 1396 (cxl_sparing_ctx->nibble_mask & 0xFFFFFF)))) { 1397 sparing_pi.flags |= CXL_SET_SPARING_NIB_MASK_VALID(1); 1398 put_unaligned_le24(cxl_sparing_ctx->nibble_mask, 1399 sparing_pi.nibble_mask); 1400 } 1401 break; 1402 } 1403 1404 return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox, 1405 cxl_sparing_ctx->op_class, 1406 cxl_sparing_ctx->op_subclass, 1407 &sparing_pi, sizeof(sparing_pi)); 1408 } 1409 1410 static int cxl_mem_sparing_get_repair_type(struct device *dev, void *drv_data, 1411 const char **repair_type) 1412 { 1413 struct cxl_mem_sparing_context *ctx = drv_data; 1414 1415 switch (ctx->repair_type) { 1416 case EDAC_REPAIR_CACHELINE_SPARING: 1417 case EDAC_REPAIR_ROW_SPARING: 1418 case EDAC_REPAIR_BANK_SPARING: 1419 case EDAC_REPAIR_RANK_SPARING: 1420 *repair_type = edac_repair_type[ctx->repair_type]; 1421 break; 1422 default: 1423 return -EINVAL; 1424 } 1425 1426 return 0; 1427 } 1428 1429 #define CXL_SPARING_GET_ATTR(attrb, data_type) \ 1430 static int cxl_mem_sparing_get_##attrb( \ 1431 struct device *dev, void *drv_data, data_type *val) \ 1432 { \ 1433 struct cxl_mem_sparing_context *ctx = drv_data; \ 1434 \ 1435 *val = ctx->attrb; \ 1436 \ 1437 return 0; \ 1438 } 1439 CXL_SPARING_GET_ATTR(persist_mode, bool) 1440 CXL_SPARING_GET_ATTR(dpa, u64) 1441 CXL_SPARING_GET_ATTR(nibble_mask, u32) 1442 CXL_SPARING_GET_ATTR(bank_group, u32) 1443 CXL_SPARING_GET_ATTR(bank, u32) 1444 CXL_SPARING_GET_ATTR(rank, u32) 1445 CXL_SPARING_GET_ATTR(row, u32) 1446 CXL_SPARING_GET_ATTR(column, u32) 1447 CXL_SPARING_GET_ATTR(channel, u32) 1448 CXL_SPARING_GET_ATTR(sub_channel, u32) 1449 1450 #define CXL_SPARING_SET_ATTR(attrb, data_type) \ 1451 static int cxl_mem_sparing_set_##attrb(struct device *dev, \ 1452 void *drv_data, data_type val) \ 1453 { \ 1454 struct cxl_mem_sparing_context *ctx = drv_data; \ 1455 \ 1456 ctx->attrb = val; \ 1457 \ 1458 return 0; \ 1459 } 1460 CXL_SPARING_SET_ATTR(nibble_mask, u32) 1461 CXL_SPARING_SET_ATTR(bank_group, u32) 1462 CXL_SPARING_SET_ATTR(bank, u32) 1463 CXL_SPARING_SET_ATTR(rank, u32) 1464 CXL_SPARING_SET_ATTR(row, u32) 1465 CXL_SPARING_SET_ATTR(column, u32) 1466 CXL_SPARING_SET_ATTR(channel, u32) 1467 CXL_SPARING_SET_ATTR(sub_channel, u32) 1468 1469 static int cxl_mem_sparing_set_persist_mode(struct device *dev, void *drv_data, 1470 bool persist_mode) 1471 { 1472 struct cxl_mem_sparing_context *ctx = drv_data; 1473 1474 if ((persist_mode && ctx->cap_hard_sparing) || 1475 (!persist_mode && ctx->cap_soft_sparing)) 1476 ctx->persist_mode = persist_mode; 1477 else 1478 return -EOPNOTSUPP; 1479 1480 return 0; 1481 } 1482 1483 static int cxl_get_mem_sparing_safe_when_in_use(struct device *dev, 1484 void *drv_data, bool *safe) 1485 { 1486 struct cxl_mem_sparing_context *ctx = drv_data; 1487 1488 *safe = ctx->cap_safe_when_in_use; 1489 1490 return 0; 1491 } 1492 1493 static int cxl_mem_sparing_get_min_dpa(struct device *dev, void *drv_data, 1494 u64 *min_dpa) 1495 { 1496 struct cxl_mem_sparing_context *ctx = drv_data; 1497 struct cxl_memdev *cxlmd = ctx->cxlmd; 1498 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1499 1500 *min_dpa = cxlds->dpa_res.start; 1501 1502 return 0; 1503 } 1504 1505 static int cxl_mem_sparing_get_max_dpa(struct device *dev, void *drv_data, 1506 u64 *max_dpa) 1507 { 1508 struct cxl_mem_sparing_context *ctx = drv_data; 1509 struct cxl_memdev *cxlmd = ctx->cxlmd; 1510 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1511 1512 *max_dpa = cxlds->dpa_res.end; 1513 1514 return 0; 1515 } 1516 1517 static int cxl_mem_sparing_set_dpa(struct device *dev, void *drv_data, u64 dpa) 1518 { 1519 struct cxl_mem_sparing_context *ctx = drv_data; 1520 struct cxl_memdev *cxlmd = ctx->cxlmd; 1521 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1522 1523 if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa)) 1524 return -EINVAL; 1525 1526 ctx->dpa = dpa; 1527 1528 return 0; 1529 } 1530 1531 static int cxl_do_mem_sparing(struct device *dev, void *drv_data, u32 val) 1532 { 1533 struct cxl_mem_sparing_context *ctx = drv_data; 1534 1535 if (val != EDAC_DO_MEM_REPAIR) 1536 return -EINVAL; 1537 1538 return cxl_mem_perform_sparing(dev, ctx); 1539 } 1540 1541 #define RANK_OPS \ 1542 .get_repair_type = cxl_mem_sparing_get_repair_type, \ 1543 .get_persist_mode = cxl_mem_sparing_get_persist_mode, \ 1544 .set_persist_mode = cxl_mem_sparing_set_persist_mode, \ 1545 .get_repair_safe_when_in_use = cxl_get_mem_sparing_safe_when_in_use, \ 1546 .get_min_dpa = cxl_mem_sparing_get_min_dpa, \ 1547 .get_max_dpa = cxl_mem_sparing_get_max_dpa, \ 1548 .get_dpa = cxl_mem_sparing_get_dpa, \ 1549 .set_dpa = cxl_mem_sparing_set_dpa, \ 1550 .get_nibble_mask = cxl_mem_sparing_get_nibble_mask, \ 1551 .set_nibble_mask = cxl_mem_sparing_set_nibble_mask, \ 1552 .get_rank = cxl_mem_sparing_get_rank, \ 1553 .set_rank = cxl_mem_sparing_set_rank, \ 1554 .get_channel = cxl_mem_sparing_get_channel, \ 1555 .set_channel = cxl_mem_sparing_set_channel, \ 1556 .do_repair = cxl_do_mem_sparing 1557 1558 #define BANK_OPS \ 1559 RANK_OPS, .get_bank_group = cxl_mem_sparing_get_bank_group, \ 1560 .set_bank_group = cxl_mem_sparing_set_bank_group, \ 1561 .get_bank = cxl_mem_sparing_get_bank, \ 1562 .set_bank = cxl_mem_sparing_set_bank 1563 1564 #define ROW_OPS \ 1565 BANK_OPS, .get_row = cxl_mem_sparing_get_row, \ 1566 .set_row = cxl_mem_sparing_set_row 1567 1568 #define CACHELINE_OPS \ 1569 ROW_OPS, .get_column = cxl_mem_sparing_get_column, \ 1570 .set_column = cxl_mem_sparing_set_column, \ 1571 .get_sub_channel = cxl_mem_sparing_get_sub_channel, \ 1572 .set_sub_channel = cxl_mem_sparing_set_sub_channel 1573 1574 static const struct edac_mem_repair_ops cxl_rank_sparing_ops = { 1575 RANK_OPS, 1576 }; 1577 1578 static const struct edac_mem_repair_ops cxl_bank_sparing_ops = { 1579 BANK_OPS, 1580 }; 1581 1582 static const struct edac_mem_repair_ops cxl_row_sparing_ops = { 1583 ROW_OPS, 1584 }; 1585 1586 static const struct edac_mem_repair_ops cxl_cacheline_sparing_ops = { 1587 CACHELINE_OPS, 1588 }; 1589 1590 struct cxl_mem_sparing_desc { 1591 const uuid_t repair_uuid; 1592 enum edac_mem_repair_type repair_type; 1593 const struct edac_mem_repair_ops *repair_ops; 1594 }; 1595 1596 static const struct cxl_mem_sparing_desc mem_sparing_desc[] = { 1597 { 1598 .repair_uuid = CXL_FEAT_CACHELINE_SPARING_UUID, 1599 .repair_type = EDAC_REPAIR_CACHELINE_SPARING, 1600 .repair_ops = &cxl_cacheline_sparing_ops, 1601 }, 1602 { 1603 .repair_uuid = CXL_FEAT_ROW_SPARING_UUID, 1604 .repair_type = EDAC_REPAIR_ROW_SPARING, 1605 .repair_ops = &cxl_row_sparing_ops, 1606 }, 1607 { 1608 .repair_uuid = CXL_FEAT_BANK_SPARING_UUID, 1609 .repair_type = EDAC_REPAIR_BANK_SPARING, 1610 .repair_ops = &cxl_bank_sparing_ops, 1611 }, 1612 { 1613 .repair_uuid = CXL_FEAT_RANK_SPARING_UUID, 1614 .repair_type = EDAC_REPAIR_RANK_SPARING, 1615 .repair_ops = &cxl_rank_sparing_ops, 1616 }, 1617 }; 1618 1619 static int cxl_memdev_sparing_init(struct cxl_memdev *cxlmd, 1620 struct edac_dev_feature *ras_feature, 1621 const struct cxl_mem_sparing_desc *desc, 1622 u8 repair_inst) 1623 { 1624 struct cxl_mem_sparing_context *cxl_sparing_ctx; 1625 struct cxl_feat_entry *feat_entry; 1626 int ret; 1627 1628 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds), 1629 &desc->repair_uuid); 1630 if (IS_ERR(feat_entry)) 1631 return -EOPNOTSUPP; 1632 1633 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE)) 1634 return -EOPNOTSUPP; 1635 1636 cxl_sparing_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sparing_ctx), 1637 GFP_KERNEL); 1638 if (!cxl_sparing_ctx) 1639 return -ENOMEM; 1640 1641 *cxl_sparing_ctx = (struct cxl_mem_sparing_context){ 1642 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 1643 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 1644 .get_version = feat_entry->get_feat_ver, 1645 .set_version = feat_entry->set_feat_ver, 1646 .effects = le16_to_cpu(feat_entry->effects), 1647 .cxlmd = cxlmd, 1648 .repair_type = desc->repair_type, 1649 .instance = repair_inst++, 1650 }; 1651 uuid_copy(&cxl_sparing_ctx->repair_uuid, &desc->repair_uuid); 1652 1653 ret = cxl_mem_sparing_get_attrbs(cxl_sparing_ctx); 1654 if (ret) 1655 return ret; 1656 1657 if ((cxl_sparing_ctx->cap_soft_sparing && 1658 cxl_sparing_ctx->cap_hard_sparing) || 1659 cxl_sparing_ctx->cap_soft_sparing) 1660 cxl_sparing_ctx->persist_mode = 0; 1661 else if (cxl_sparing_ctx->cap_hard_sparing) 1662 cxl_sparing_ctx->persist_mode = 1; 1663 else 1664 return -EOPNOTSUPP; 1665 1666 ras_feature->ft_type = RAS_FEAT_MEM_REPAIR; 1667 ras_feature->instance = cxl_sparing_ctx->instance; 1668 ras_feature->mem_repair_ops = desc->repair_ops; 1669 ras_feature->ctx = cxl_sparing_ctx; 1670 1671 return 0; 1672 } 1673 1674 /* 1675 * CXL memory soft PPR & hard PPR control 1676 */ 1677 struct cxl_ppr_context { 1678 uuid_t repair_uuid; 1679 u8 instance; 1680 u16 get_feat_size; 1681 u16 set_feat_size; 1682 u8 get_version; 1683 u8 set_version; 1684 u16 effects; 1685 u8 op_class; 1686 u8 op_subclass; 1687 bool cap_dpa; 1688 bool cap_nib_mask; 1689 bool media_accessible; 1690 bool data_retained; 1691 struct cxl_memdev *cxlmd; 1692 enum edac_mem_repair_type repair_type; 1693 bool persist_mode; 1694 u64 dpa; 1695 u32 nibble_mask; 1696 }; 1697 1698 /* 1699 * See CXL rev 3.2 @8.2.10.7.2.1 Table 8-128 sPPR Feature Readable Attributes 1700 * 1701 * See CXL rev 3.2 @8.2.10.7.2.2 Table 8-131 hPPR Feature Readable Attributes 1702 */ 1703 1704 #define CXL_PPR_OP_CAP_DEVICE_INITIATED BIT(0) 1705 #define CXL_PPR_OP_MODE_DEV_INITIATED BIT(0) 1706 1707 #define CXL_PPR_FLAG_DPA_SUPPORT_MASK BIT(0) 1708 #define CXL_PPR_FLAG_NIB_SUPPORT_MASK BIT(1) 1709 #define CXL_PPR_FLAG_MEM_SPARING_EV_REC_SUPPORT_MASK BIT(2) 1710 #define CXL_PPR_FLAG_DEV_INITED_PPR_AT_BOOT_CAP_MASK BIT(3) 1711 1712 #define CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK BIT(0) 1713 #define CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK BIT(2) 1714 1715 #define CXL_PPR_SPARING_EV_REC_EN_MASK BIT(0) 1716 #define CXL_PPR_DEV_INITED_PPR_AT_BOOT_EN_MASK BIT(1) 1717 1718 #define CXL_PPR_GET_CAP_DPA(flags) \ 1719 FIELD_GET(CXL_PPR_FLAG_DPA_SUPPORT_MASK, flags) 1720 #define CXL_PPR_GET_CAP_NIB_MASK(flags) \ 1721 FIELD_GET(CXL_PPR_FLAG_NIB_SUPPORT_MASK, flags) 1722 #define CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags) \ 1723 (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK, \ 1724 restriction_flags) ^ 1) 1725 #define CXL_PPR_GET_DATA_RETAINED(restriction_flags) \ 1726 (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK, \ 1727 restriction_flags) ^ 1) 1728 1729 struct cxl_memdev_ppr_rd_attrbs { 1730 struct cxl_memdev_repair_rd_attrbs_hdr hdr; 1731 u8 ppr_flags; 1732 __le16 restriction_flags; 1733 u8 ppr_op_mode; 1734 } __packed; 1735 1736 /* 1737 * See CXL rev 3.2 @8.2.10.7.1.2 Table 8-118 sPPR Maintenance Input Payload 1738 * 1739 * See CXL rev 3.2 @8.2.10.7.1.3 Table 8-119 hPPR Maintenance Input Payload 1740 */ 1741 struct cxl_memdev_ppr_maintenance_attrbs { 1742 u8 flags; 1743 __le64 dpa; 1744 u8 nibble_mask[3]; 1745 } __packed; 1746 1747 static int cxl_mem_ppr_get_attrbs(struct cxl_ppr_context *cxl_ppr_ctx) 1748 { 1749 size_t rd_data_size = sizeof(struct cxl_memdev_ppr_rd_attrbs); 1750 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1751 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; 1752 u16 restriction_flags; 1753 size_t data_size; 1754 u16 return_code; 1755 1756 struct cxl_memdev_ppr_rd_attrbs *rd_attrbs __free(kfree) = 1757 kmalloc(rd_data_size, GFP_KERNEL); 1758 if (!rd_attrbs) 1759 return -ENOMEM; 1760 1761 data_size = cxl_get_feature(cxl_mbox, &cxl_ppr_ctx->repair_uuid, 1762 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 1763 rd_data_size, 0, &return_code); 1764 if (!data_size) 1765 return -EIO; 1766 1767 cxl_ppr_ctx->op_class = rd_attrbs->hdr.op_class; 1768 cxl_ppr_ctx->op_subclass = rd_attrbs->hdr.op_subclass; 1769 cxl_ppr_ctx->cap_dpa = CXL_PPR_GET_CAP_DPA(rd_attrbs->ppr_flags); 1770 cxl_ppr_ctx->cap_nib_mask = 1771 CXL_PPR_GET_CAP_NIB_MASK(rd_attrbs->ppr_flags); 1772 1773 restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags); 1774 cxl_ppr_ctx->media_accessible = 1775 CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags); 1776 cxl_ppr_ctx->data_retained = 1777 CXL_PPR_GET_DATA_RETAINED(restriction_flags); 1778 1779 return 0; 1780 } 1781 1782 static int cxl_mem_perform_ppr(struct cxl_ppr_context *cxl_ppr_ctx) 1783 { 1784 struct cxl_memdev_ppr_maintenance_attrbs maintenance_attrbs; 1785 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1786 struct cxl_mem_repair_attrbs attrbs = { 0 }; 1787 int ret; 1788 1789 ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); 1790 if ((ret = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) 1791 return ret; 1792 1793 ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); 1794 if ((ret = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) 1795 return ret; 1796 1797 if (!cxl_ppr_ctx->media_accessible || !cxl_ppr_ctx->data_retained) { 1798 /* Memory to repair must be offline */ 1799 if (cxl_is_memdev_memory_online(cxlmd)) 1800 return -EBUSY; 1801 } else { 1802 if (cxl_is_memdev_memory_online(cxlmd)) { 1803 /* Check memory to repair is from the current boot */ 1804 attrbs.repair_type = CXL_PPR; 1805 attrbs.dpa = cxl_ppr_ctx->dpa; 1806 attrbs.nibble_mask = cxl_ppr_ctx->nibble_mask; 1807 if (!cxl_find_rec_dram(cxlmd, &attrbs) && 1808 !cxl_find_rec_gen_media(cxlmd, &attrbs)) 1809 return -EINVAL; 1810 } 1811 } 1812 1813 memset(&maintenance_attrbs, 0, sizeof(maintenance_attrbs)); 1814 maintenance_attrbs.flags = 0; 1815 maintenance_attrbs.dpa = cpu_to_le64(cxl_ppr_ctx->dpa); 1816 put_unaligned_le24(cxl_ppr_ctx->nibble_mask, 1817 maintenance_attrbs.nibble_mask); 1818 1819 return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox, 1820 cxl_ppr_ctx->op_class, 1821 cxl_ppr_ctx->op_subclass, 1822 &maintenance_attrbs, 1823 sizeof(maintenance_attrbs)); 1824 } 1825 1826 static int cxl_ppr_get_repair_type(struct device *dev, void *drv_data, 1827 const char **repair_type) 1828 { 1829 *repair_type = edac_repair_type[EDAC_REPAIR_PPR]; 1830 1831 return 0; 1832 } 1833 1834 static int cxl_ppr_get_persist_mode(struct device *dev, void *drv_data, 1835 bool *persist_mode) 1836 { 1837 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1838 1839 *persist_mode = cxl_ppr_ctx->persist_mode; 1840 1841 return 0; 1842 } 1843 1844 static int cxl_get_ppr_safe_when_in_use(struct device *dev, void *drv_data, 1845 bool *safe) 1846 { 1847 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1848 1849 *safe = cxl_ppr_ctx->media_accessible & cxl_ppr_ctx->data_retained; 1850 1851 return 0; 1852 } 1853 1854 static int cxl_ppr_get_min_dpa(struct device *dev, void *drv_data, u64 *min_dpa) 1855 { 1856 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1857 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1858 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1859 1860 *min_dpa = cxlds->dpa_res.start; 1861 1862 return 0; 1863 } 1864 1865 static int cxl_ppr_get_max_dpa(struct device *dev, void *drv_data, u64 *max_dpa) 1866 { 1867 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1868 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1869 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1870 1871 *max_dpa = cxlds->dpa_res.end; 1872 1873 return 0; 1874 } 1875 1876 static int cxl_ppr_get_dpa(struct device *dev, void *drv_data, u64 *dpa) 1877 { 1878 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1879 1880 *dpa = cxl_ppr_ctx->dpa; 1881 1882 return 0; 1883 } 1884 1885 static int cxl_ppr_set_dpa(struct device *dev, void *drv_data, u64 dpa) 1886 { 1887 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1888 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1889 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1890 1891 if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa)) 1892 return -EINVAL; 1893 1894 cxl_ppr_ctx->dpa = dpa; 1895 1896 return 0; 1897 } 1898 1899 static int cxl_ppr_get_nibble_mask(struct device *dev, void *drv_data, 1900 u32 *nibble_mask) 1901 { 1902 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1903 1904 *nibble_mask = cxl_ppr_ctx->nibble_mask; 1905 1906 return 0; 1907 } 1908 1909 static int cxl_ppr_set_nibble_mask(struct device *dev, void *drv_data, 1910 u32 nibble_mask) 1911 { 1912 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1913 1914 cxl_ppr_ctx->nibble_mask = nibble_mask; 1915 1916 return 0; 1917 } 1918 1919 static int cxl_do_ppr(struct device *dev, void *drv_data, u32 val) 1920 { 1921 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1922 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1923 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1924 1925 if (val != EDAC_DO_MEM_REPAIR || 1926 !cxl_resource_contains_addr(&cxlds->dpa_res, cxl_ppr_ctx->dpa)) 1927 return -EINVAL; 1928 1929 return cxl_mem_perform_ppr(cxl_ppr_ctx); 1930 } 1931 1932 static const struct edac_mem_repair_ops cxl_sppr_ops = { 1933 .get_repair_type = cxl_ppr_get_repair_type, 1934 .get_persist_mode = cxl_ppr_get_persist_mode, 1935 .get_repair_safe_when_in_use = cxl_get_ppr_safe_when_in_use, 1936 .get_min_dpa = cxl_ppr_get_min_dpa, 1937 .get_max_dpa = cxl_ppr_get_max_dpa, 1938 .get_dpa = cxl_ppr_get_dpa, 1939 .set_dpa = cxl_ppr_set_dpa, 1940 .get_nibble_mask = cxl_ppr_get_nibble_mask, 1941 .set_nibble_mask = cxl_ppr_set_nibble_mask, 1942 .do_repair = cxl_do_ppr, 1943 }; 1944 1945 static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd, 1946 struct edac_dev_feature *ras_feature, 1947 u8 repair_inst) 1948 { 1949 struct cxl_ppr_context *cxl_sppr_ctx; 1950 struct cxl_feat_entry *feat_entry; 1951 int ret; 1952 1953 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds), 1954 &CXL_FEAT_SPPR_UUID); 1955 if (IS_ERR(feat_entry)) 1956 return -EOPNOTSUPP; 1957 1958 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE)) 1959 return -EOPNOTSUPP; 1960 1961 cxl_sppr_ctx = 1962 devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sppr_ctx), GFP_KERNEL); 1963 if (!cxl_sppr_ctx) 1964 return -ENOMEM; 1965 1966 *cxl_sppr_ctx = (struct cxl_ppr_context){ 1967 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 1968 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 1969 .get_version = feat_entry->get_feat_ver, 1970 .set_version = feat_entry->set_feat_ver, 1971 .effects = le16_to_cpu(feat_entry->effects), 1972 .cxlmd = cxlmd, 1973 .repair_type = EDAC_REPAIR_PPR, 1974 .persist_mode = 0, 1975 .instance = repair_inst, 1976 }; 1977 uuid_copy(&cxl_sppr_ctx->repair_uuid, &CXL_FEAT_SPPR_UUID); 1978 1979 ret = cxl_mem_ppr_get_attrbs(cxl_sppr_ctx); 1980 if (ret) 1981 return ret; 1982 1983 ras_feature->ft_type = RAS_FEAT_MEM_REPAIR; 1984 ras_feature->instance = cxl_sppr_ctx->instance; 1985 ras_feature->mem_repair_ops = &cxl_sppr_ops; 1986 ras_feature->ctx = cxl_sppr_ctx; 1987 1988 return 0; 1989 } 1990 1991 int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd) 1992 { 1993 struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES]; 1994 int num_ras_features = 0; 1995 u8 repair_inst = 0; 1996 int rc; 1997 1998 if (IS_ENABLED(CONFIG_CXL_EDAC_SCRUB)) { 1999 rc = cxl_memdev_scrub_init(cxlmd, &ras_features[num_ras_features], 0); 2000 if (rc < 0 && rc != -EOPNOTSUPP) 2001 return rc; 2002 2003 if (rc != -EOPNOTSUPP) 2004 num_ras_features++; 2005 } 2006 2007 if (IS_ENABLED(CONFIG_CXL_EDAC_ECS)) { 2008 rc = cxl_memdev_ecs_init(cxlmd, &ras_features[num_ras_features]); 2009 if (rc < 0 && rc != -EOPNOTSUPP) 2010 return rc; 2011 2012 if (rc != -EOPNOTSUPP) 2013 num_ras_features++; 2014 } 2015 2016 if (IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR)) { 2017 for (int i = 0; i < CXL_MEM_SPARING_MAX; i++) { 2018 rc = cxl_memdev_sparing_init(cxlmd, 2019 &ras_features[num_ras_features], 2020 &mem_sparing_desc[i], repair_inst); 2021 if (rc == -EOPNOTSUPP) 2022 continue; 2023 if (rc < 0) 2024 return rc; 2025 2026 repair_inst++; 2027 num_ras_features++; 2028 } 2029 2030 rc = cxl_memdev_soft_ppr_init(cxlmd, &ras_features[num_ras_features], 2031 repair_inst); 2032 if (rc < 0 && rc != -EOPNOTSUPP) 2033 return rc; 2034 2035 if (rc != -EOPNOTSUPP) { 2036 repair_inst++; 2037 num_ras_features++; 2038 } 2039 2040 if (repair_inst) { 2041 struct cxl_mem_err_rec *array_rec = 2042 devm_kzalloc(&cxlmd->dev, sizeof(*array_rec), 2043 GFP_KERNEL); 2044 if (!array_rec) 2045 return -ENOMEM; 2046 2047 xa_init(&array_rec->rec_gen_media); 2048 xa_init(&array_rec->rec_dram); 2049 cxlmd->err_rec_array = array_rec; 2050 } 2051 } 2052 2053 if (!num_ras_features) 2054 return -EINVAL; 2055 2056 char *cxl_dev_name __free(kfree) = 2057 kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlmd->dev)); 2058 if (!cxl_dev_name) 2059 return -ENOMEM; 2060 2061 return edac_dev_register(&cxlmd->dev, cxl_dev_name, NULL, 2062 num_ras_features, ras_features); 2063 } 2064 EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_register, "CXL"); 2065 2066 int devm_cxl_region_edac_register(struct cxl_region *cxlr) 2067 { 2068 struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES]; 2069 int num_ras_features = 0; 2070 int rc; 2071 2072 if (!IS_ENABLED(CONFIG_CXL_EDAC_SCRUB)) 2073 return 0; 2074 2075 rc = cxl_region_scrub_init(cxlr, &ras_features[num_ras_features], 0); 2076 if (rc < 0) 2077 return rc; 2078 2079 num_ras_features++; 2080 2081 char *cxl_dev_name __free(kfree) = 2082 kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlr->dev)); 2083 if (!cxl_dev_name) 2084 return -ENOMEM; 2085 2086 return edac_dev_register(&cxlr->dev, cxl_dev_name, NULL, 2087 num_ras_features, ras_features); 2088 } 2089 EXPORT_SYMBOL_NS_GPL(devm_cxl_region_edac_register, "CXL"); 2090 2091 void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd) 2092 { 2093 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 2094 struct cxl_event_gen_media *rec_gen_media; 2095 struct cxl_event_dram *rec_dram; 2096 unsigned long index; 2097 2098 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec) 2099 return; 2100 2101 xa_for_each(&array_rec->rec_dram, index, rec_dram) 2102 kfree(rec_dram); 2103 xa_destroy(&array_rec->rec_dram); 2104 2105 xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media) 2106 kfree(rec_gen_media); 2107 xa_destroy(&array_rec->rec_gen_media); 2108 } 2109 EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_release, "CXL"); 2110