1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * CXL EDAC memory feature driver. 4 * 5 * Copyright (c) 2024-2025 HiSilicon Limited. 6 * 7 * - Supports functions to configure EDAC features of the 8 * CXL memory devices. 9 * - Registers with the EDAC device subsystem driver to expose 10 * the features sysfs attributes to the user for configuring 11 * CXL memory RAS feature. 12 */ 13 14 #include <linux/cleanup.h> 15 #include <linux/edac.h> 16 #include <linux/limits.h> 17 #include <linux/unaligned.h> 18 #include <linux/xarray.h> 19 #include <cxl/features.h> 20 #include <cxl.h> 21 #include <cxlmem.h> 22 #include "core.h" 23 #include "trace.h" 24 25 #define CXL_NR_EDAC_DEV_FEATURES 7 26 27 #define CXL_SCRUB_NO_REGION -1 28 29 struct cxl_patrol_scrub_context { 30 u8 instance; 31 u16 get_feat_size; 32 u16 set_feat_size; 33 u8 get_version; 34 u8 set_version; 35 u16 effects; 36 struct cxl_memdev *cxlmd; 37 struct cxl_region *cxlr; 38 }; 39 40 /* 41 * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-222 Device Patrol Scrub Control 42 * Feature Readable Attributes. 43 */ 44 struct cxl_scrub_rd_attrbs { 45 u8 scrub_cycle_cap; 46 __le16 scrub_cycle_hours; 47 u8 scrub_flags; 48 } __packed; 49 50 /* 51 * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-223 Device Patrol Scrub Control 52 * Feature Writable Attributes. 53 */ 54 struct cxl_scrub_wr_attrbs { 55 u8 scrub_cycle_hours; 56 u8 scrub_flags; 57 } __packed; 58 59 #define CXL_SCRUB_CONTROL_CHANGEABLE BIT(0) 60 #define CXL_SCRUB_CONTROL_REALTIME BIT(1) 61 #define CXL_SCRUB_CONTROL_CYCLE_MASK GENMASK(7, 0) 62 #define CXL_SCRUB_CONTROL_MIN_CYCLE_MASK GENMASK(15, 8) 63 #define CXL_SCRUB_CONTROL_ENABLE BIT(0) 64 65 #define CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap) \ 66 FIELD_GET(CXL_SCRUB_CONTROL_CHANGEABLE, cap) 67 #define CXL_GET_SCRUB_CYCLE(cycle) \ 68 FIELD_GET(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle) 69 #define CXL_GET_SCRUB_MIN_CYCLE(cycle) \ 70 FIELD_GET(CXL_SCRUB_CONTROL_MIN_CYCLE_MASK, cycle) 71 #define CXL_GET_SCRUB_EN_STS(flags) FIELD_GET(CXL_SCRUB_CONTROL_ENABLE, flags) 72 73 #define CXL_SET_SCRUB_CYCLE(cycle) \ 74 FIELD_PREP(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle) 75 #define CXL_SET_SCRUB_EN(en) FIELD_PREP(CXL_SCRUB_CONTROL_ENABLE, en) 76 77 static int cxl_mem_scrub_get_attrbs(struct cxl_mailbox *cxl_mbox, u8 *cap, 78 u16 *cycle, u8 *flags, u8 *min_cycle) 79 { 80 size_t rd_data_size = sizeof(struct cxl_scrub_rd_attrbs); 81 size_t data_size; 82 struct cxl_scrub_rd_attrbs *rd_attrbs __free(kfree) = 83 kzalloc(rd_data_size, GFP_KERNEL); 84 if (!rd_attrbs) 85 return -ENOMEM; 86 87 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID, 88 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 89 rd_data_size, 0, NULL); 90 if (!data_size) 91 return -EIO; 92 93 *cap = rd_attrbs->scrub_cycle_cap; 94 *cycle = le16_to_cpu(rd_attrbs->scrub_cycle_hours); 95 *flags = rd_attrbs->scrub_flags; 96 if (min_cycle) 97 *min_cycle = CXL_GET_SCRUB_MIN_CYCLE(*cycle); 98 99 return 0; 100 } 101 102 static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, 103 u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle) 104 { 105 struct cxl_mailbox *cxl_mbox; 106 struct cxl_region_params *p; 107 struct cxl_memdev *cxlmd; 108 struct cxl_region *cxlr; 109 u8 min_scrub_cycle = 0; 110 int i, ret; 111 112 if (!cxl_ps_ctx->cxlr) { 113 cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox; 114 return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, 115 flags, min_cycle); 116 } 117 118 struct rw_semaphore *region_lock __free(rwsem_read_release) = 119 rwsem_read_intr_acquire(&cxl_region_rwsem); 120 if (!region_lock) 121 return -EINTR; 122 123 cxlr = cxl_ps_ctx->cxlr; 124 p = &cxlr->params; 125 126 for (i = 0; i < p->nr_targets; i++) { 127 struct cxl_endpoint_decoder *cxled = p->targets[i]; 128 129 cxlmd = cxled_to_memdev(cxled); 130 cxl_mbox = &cxlmd->cxlds->cxl_mbox; 131 ret = cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags, 132 min_cycle); 133 if (ret) 134 return ret; 135 136 /* 137 * The min_scrub_cycle of a region is the max of minimum scrub 138 * cycles supported by memdevs that back the region. 139 */ 140 if (min_cycle) 141 min_scrub_cycle = max(*min_cycle, min_scrub_cycle); 142 } 143 144 if (min_cycle) 145 *min_cycle = min_scrub_cycle; 146 147 return 0; 148 } 149 150 static int cxl_scrub_set_attrbs_region(struct device *dev, 151 struct cxl_patrol_scrub_context *cxl_ps_ctx, 152 u8 cycle, u8 flags) 153 { 154 struct cxl_scrub_wr_attrbs wr_attrbs; 155 struct cxl_mailbox *cxl_mbox; 156 struct cxl_region_params *p; 157 struct cxl_memdev *cxlmd; 158 struct cxl_region *cxlr; 159 int ret, i; 160 161 struct rw_semaphore *region_lock __free(rwsem_read_release) = 162 rwsem_read_intr_acquire(&cxl_region_rwsem); 163 if (!region_lock) 164 return -EINTR; 165 166 cxlr = cxl_ps_ctx->cxlr; 167 p = &cxlr->params; 168 wr_attrbs.scrub_cycle_hours = cycle; 169 wr_attrbs.scrub_flags = flags; 170 171 for (i = 0; i < p->nr_targets; i++) { 172 struct cxl_endpoint_decoder *cxled = p->targets[i]; 173 174 cxlmd = cxled_to_memdev(cxled); 175 cxl_mbox = &cxlmd->cxlds->cxl_mbox; 176 ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID, 177 cxl_ps_ctx->set_version, &wr_attrbs, 178 sizeof(wr_attrbs), 179 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 180 0, NULL); 181 if (ret) 182 return ret; 183 184 if (cycle != cxlmd->scrub_cycle) { 185 if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION) 186 dev_info(dev, 187 "Device scrub rate(%d hours) set by region%d rate overwritten by region%d scrub rate(%d hours)\n", 188 cxlmd->scrub_cycle, 189 cxlmd->scrub_region_id, cxlr->id, 190 cycle); 191 192 cxlmd->scrub_cycle = cycle; 193 cxlmd->scrub_region_id = cxlr->id; 194 } 195 } 196 197 return 0; 198 } 199 200 static int cxl_scrub_set_attrbs_device(struct device *dev, 201 struct cxl_patrol_scrub_context *cxl_ps_ctx, 202 u8 cycle, u8 flags) 203 { 204 struct cxl_scrub_wr_attrbs wr_attrbs; 205 struct cxl_mailbox *cxl_mbox; 206 struct cxl_memdev *cxlmd; 207 int ret; 208 209 wr_attrbs.scrub_cycle_hours = cycle; 210 wr_attrbs.scrub_flags = flags; 211 212 cxlmd = cxl_ps_ctx->cxlmd; 213 cxl_mbox = &cxlmd->cxlds->cxl_mbox; 214 ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID, 215 cxl_ps_ctx->set_version, &wr_attrbs, 216 sizeof(wr_attrbs), 217 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 0, 218 NULL); 219 if (ret) 220 return ret; 221 222 if (cycle != cxlmd->scrub_cycle) { 223 if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION) 224 dev_info(dev, 225 "Device scrub rate(%d hours) set by region%d rate overwritten with device local scrub rate(%d hours)\n", 226 cxlmd->scrub_cycle, cxlmd->scrub_region_id, 227 cycle); 228 229 cxlmd->scrub_cycle = cycle; 230 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION; 231 } 232 233 return 0; 234 } 235 236 static int cxl_scrub_set_attrbs(struct device *dev, 237 struct cxl_patrol_scrub_context *cxl_ps_ctx, 238 u8 cycle, u8 flags) 239 { 240 if (cxl_ps_ctx->cxlr) 241 return cxl_scrub_set_attrbs_region(dev, cxl_ps_ctx, cycle, flags); 242 243 return cxl_scrub_set_attrbs_device(dev, cxl_ps_ctx, cycle, flags); 244 } 245 246 static int cxl_patrol_scrub_get_enabled_bg(struct device *dev, void *drv_data, 247 bool *enabled) 248 { 249 struct cxl_patrol_scrub_context *ctx = drv_data; 250 u8 cap, flags; 251 u16 cycle; 252 int ret; 253 254 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL); 255 if (ret) 256 return ret; 257 258 *enabled = CXL_GET_SCRUB_EN_STS(flags); 259 260 return 0; 261 } 262 263 static int cxl_patrol_scrub_set_enabled_bg(struct device *dev, void *drv_data, 264 bool enable) 265 { 266 struct cxl_patrol_scrub_context *ctx = drv_data; 267 u8 cap, flags, wr_cycle; 268 u16 rd_cycle; 269 int ret; 270 271 if (!capable(CAP_SYS_RAWIO)) 272 return -EPERM; 273 274 ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, NULL); 275 if (ret) 276 return ret; 277 278 wr_cycle = CXL_GET_SCRUB_CYCLE(rd_cycle); 279 flags = CXL_SET_SCRUB_EN(enable); 280 281 return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags); 282 } 283 284 static int cxl_patrol_scrub_get_min_scrub_cycle(struct device *dev, 285 void *drv_data, u32 *min) 286 { 287 struct cxl_patrol_scrub_context *ctx = drv_data; 288 u8 cap, flags, min_cycle; 289 u16 cycle; 290 int ret; 291 292 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, &min_cycle); 293 if (ret) 294 return ret; 295 296 *min = min_cycle * 3600; 297 298 return 0; 299 } 300 301 static int cxl_patrol_scrub_get_max_scrub_cycle(struct device *dev, 302 void *drv_data, u32 *max) 303 { 304 *max = U8_MAX * 3600; /* Max set by register size */ 305 306 return 0; 307 } 308 309 static int cxl_patrol_scrub_get_scrub_cycle(struct device *dev, void *drv_data, 310 u32 *scrub_cycle_secs) 311 { 312 struct cxl_patrol_scrub_context *ctx = drv_data; 313 u8 cap, flags; 314 u16 cycle; 315 int ret; 316 317 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL); 318 if (ret) 319 return ret; 320 321 *scrub_cycle_secs = CXL_GET_SCRUB_CYCLE(cycle) * 3600; 322 323 return 0; 324 } 325 326 static int cxl_patrol_scrub_set_scrub_cycle(struct device *dev, void *drv_data, 327 u32 scrub_cycle_secs) 328 { 329 struct cxl_patrol_scrub_context *ctx = drv_data; 330 u8 scrub_cycle_hours = scrub_cycle_secs / 3600; 331 u8 cap, wr_cycle, flags, min_cycle; 332 u16 rd_cycle; 333 int ret; 334 335 if (!capable(CAP_SYS_RAWIO)) 336 return -EPERM; 337 338 ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, &min_cycle); 339 if (ret) 340 return ret; 341 342 if (!CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap)) 343 return -EOPNOTSUPP; 344 345 if (scrub_cycle_hours < min_cycle) { 346 dev_dbg(dev, "Invalid CXL patrol scrub cycle(%d) to set\n", 347 scrub_cycle_hours); 348 dev_dbg(dev, 349 "Minimum supported CXL patrol scrub cycle in hour %d\n", 350 min_cycle); 351 return -EINVAL; 352 } 353 wr_cycle = CXL_SET_SCRUB_CYCLE(scrub_cycle_hours); 354 355 return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags); 356 } 357 358 static const struct edac_scrub_ops cxl_ps_scrub_ops = { 359 .get_enabled_bg = cxl_patrol_scrub_get_enabled_bg, 360 .set_enabled_bg = cxl_patrol_scrub_set_enabled_bg, 361 .get_min_cycle = cxl_patrol_scrub_get_min_scrub_cycle, 362 .get_max_cycle = cxl_patrol_scrub_get_max_scrub_cycle, 363 .get_cycle_duration = cxl_patrol_scrub_get_scrub_cycle, 364 .set_cycle_duration = cxl_patrol_scrub_set_scrub_cycle, 365 }; 366 367 static int cxl_memdev_scrub_init(struct cxl_memdev *cxlmd, 368 struct edac_dev_feature *ras_feature, 369 u8 scrub_inst) 370 { 371 struct cxl_patrol_scrub_context *cxl_ps_ctx; 372 struct cxl_feat_entry *feat_entry; 373 u8 cap, flags; 374 u16 cycle; 375 int rc; 376 377 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds), 378 &CXL_FEAT_PATROL_SCRUB_UUID); 379 if (IS_ERR(feat_entry)) 380 return -EOPNOTSUPP; 381 382 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE)) 383 return -EOPNOTSUPP; 384 385 cxl_ps_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL); 386 if (!cxl_ps_ctx) 387 return -ENOMEM; 388 389 *cxl_ps_ctx = (struct cxl_patrol_scrub_context){ 390 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 391 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 392 .get_version = feat_entry->get_feat_ver, 393 .set_version = feat_entry->set_feat_ver, 394 .effects = le16_to_cpu(feat_entry->effects), 395 .instance = scrub_inst, 396 .cxlmd = cxlmd, 397 }; 398 399 rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap, &cycle, 400 &flags, NULL); 401 if (rc) 402 return rc; 403 404 cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle); 405 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION; 406 407 ras_feature->ft_type = RAS_FEAT_SCRUB; 408 ras_feature->instance = cxl_ps_ctx->instance; 409 ras_feature->scrub_ops = &cxl_ps_scrub_ops; 410 ras_feature->ctx = cxl_ps_ctx; 411 412 return 0; 413 } 414 415 static int cxl_region_scrub_init(struct cxl_region *cxlr, 416 struct edac_dev_feature *ras_feature, 417 u8 scrub_inst) 418 { 419 struct cxl_patrol_scrub_context *cxl_ps_ctx; 420 struct cxl_region_params *p = &cxlr->params; 421 struct cxl_feat_entry *feat_entry = NULL; 422 struct cxl_memdev *cxlmd; 423 u8 cap, flags; 424 u16 cycle; 425 int i, rc; 426 427 /* 428 * The cxl_region_rwsem must be held if the code below is used in a context 429 * other than when the region is in the probe state, as shown here. 430 */ 431 for (i = 0; i < p->nr_targets; i++) { 432 struct cxl_endpoint_decoder *cxled = p->targets[i]; 433 434 cxlmd = cxled_to_memdev(cxled); 435 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds), 436 &CXL_FEAT_PATROL_SCRUB_UUID); 437 if (IS_ERR(feat_entry)) 438 return -EOPNOTSUPP; 439 440 if (!(le32_to_cpu(feat_entry->flags) & 441 CXL_FEATURE_F_CHANGEABLE)) 442 return -EOPNOTSUPP; 443 444 rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap, 445 &cycle, &flags, NULL); 446 if (rc) 447 return rc; 448 449 cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle); 450 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION; 451 } 452 453 cxl_ps_ctx = devm_kzalloc(&cxlr->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL); 454 if (!cxl_ps_ctx) 455 return -ENOMEM; 456 457 *cxl_ps_ctx = (struct cxl_patrol_scrub_context){ 458 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 459 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 460 .get_version = feat_entry->get_feat_ver, 461 .set_version = feat_entry->set_feat_ver, 462 .effects = le16_to_cpu(feat_entry->effects), 463 .instance = scrub_inst, 464 .cxlr = cxlr, 465 }; 466 467 ras_feature->ft_type = RAS_FEAT_SCRUB; 468 ras_feature->instance = cxl_ps_ctx->instance; 469 ras_feature->scrub_ops = &cxl_ps_scrub_ops; 470 ras_feature->ctx = cxl_ps_ctx; 471 472 return 0; 473 } 474 475 struct cxl_ecs_context { 476 u16 num_media_frus; 477 u16 get_feat_size; 478 u16 set_feat_size; 479 u8 get_version; 480 u8 set_version; 481 u16 effects; 482 struct cxl_memdev *cxlmd; 483 }; 484 485 /* 486 * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-225 DDR5 ECS Control Feature 487 * Readable Attributes. 488 */ 489 struct cxl_ecs_fru_rd_attrbs { 490 u8 ecs_cap; 491 __le16 ecs_config; 492 u8 ecs_flags; 493 } __packed; 494 495 struct cxl_ecs_rd_attrbs { 496 u8 ecs_log_cap; 497 struct cxl_ecs_fru_rd_attrbs fru_attrbs[]; 498 } __packed; 499 500 /* 501 * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-226 DDR5 ECS Control Feature 502 * Writable Attributes. 503 */ 504 struct cxl_ecs_fru_wr_attrbs { 505 __le16 ecs_config; 506 } __packed; 507 508 struct cxl_ecs_wr_attrbs { 509 u8 ecs_log_cap; 510 struct cxl_ecs_fru_wr_attrbs fru_attrbs[]; 511 } __packed; 512 513 #define CXL_ECS_LOG_ENTRY_TYPE_MASK GENMASK(1, 0) 514 #define CXL_ECS_REALTIME_REPORT_CAP_MASK BIT(0) 515 #define CXL_ECS_THRESHOLD_COUNT_MASK GENMASK(2, 0) 516 #define CXL_ECS_COUNT_MODE_MASK BIT(3) 517 #define CXL_ECS_RESET_COUNTER_MASK BIT(4) 518 #define CXL_ECS_RESET_COUNTER 1 519 520 enum { 521 ECS_THRESHOLD_256 = 256, 522 ECS_THRESHOLD_1024 = 1024, 523 ECS_THRESHOLD_4096 = 4096, 524 }; 525 526 enum { 527 ECS_THRESHOLD_IDX_256 = 3, 528 ECS_THRESHOLD_IDX_1024 = 4, 529 ECS_THRESHOLD_IDX_4096 = 5, 530 }; 531 532 static const u16 ecs_supp_threshold[] = { 533 [ECS_THRESHOLD_IDX_256] = 256, 534 [ECS_THRESHOLD_IDX_1024] = 1024, 535 [ECS_THRESHOLD_IDX_4096] = 4096, 536 }; 537 538 enum { 539 ECS_LOG_ENTRY_TYPE_DRAM = 0x0, 540 ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU = 0x1, 541 }; 542 543 enum cxl_ecs_count_mode { 544 ECS_MODE_COUNTS_ROWS = 0, 545 ECS_MODE_COUNTS_CODEWORDS = 1, 546 }; 547 548 static int cxl_mem_ecs_get_attrbs(struct device *dev, 549 struct cxl_ecs_context *cxl_ecs_ctx, 550 int fru_id, u8 *log_cap, u16 *config) 551 { 552 struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd; 553 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; 554 struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs; 555 size_t rd_data_size; 556 size_t data_size; 557 558 rd_data_size = cxl_ecs_ctx->get_feat_size; 559 560 struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) = 561 kvzalloc(rd_data_size, GFP_KERNEL); 562 if (!rd_attrbs) 563 return -ENOMEM; 564 565 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID, 566 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 567 rd_data_size, 0, NULL); 568 if (!data_size) 569 return -EIO; 570 571 fru_rd_attrbs = rd_attrbs->fru_attrbs; 572 *log_cap = rd_attrbs->ecs_log_cap; 573 *config = le16_to_cpu(fru_rd_attrbs[fru_id].ecs_config); 574 575 return 0; 576 } 577 578 static int cxl_mem_ecs_set_attrbs(struct device *dev, 579 struct cxl_ecs_context *cxl_ecs_ctx, 580 int fru_id, u8 log_cap, u16 config) 581 { 582 struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd; 583 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; 584 struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs; 585 struct cxl_ecs_fru_wr_attrbs *fru_wr_attrbs; 586 size_t rd_data_size, wr_data_size; 587 u16 num_media_frus, count; 588 size_t data_size; 589 590 num_media_frus = cxl_ecs_ctx->num_media_frus; 591 rd_data_size = cxl_ecs_ctx->get_feat_size; 592 wr_data_size = cxl_ecs_ctx->set_feat_size; 593 struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) = 594 kvzalloc(rd_data_size, GFP_KERNEL); 595 if (!rd_attrbs) 596 return -ENOMEM; 597 598 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID, 599 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 600 rd_data_size, 0, NULL); 601 if (!data_size) 602 return -EIO; 603 604 struct cxl_ecs_wr_attrbs *wr_attrbs __free(kvfree) = 605 kvzalloc(wr_data_size, GFP_KERNEL); 606 if (!wr_attrbs) 607 return -ENOMEM; 608 609 /* 610 * Fill writable attributes from the current attributes read 611 * for all the media FRUs. 612 */ 613 fru_rd_attrbs = rd_attrbs->fru_attrbs; 614 fru_wr_attrbs = wr_attrbs->fru_attrbs; 615 wr_attrbs->ecs_log_cap = log_cap; 616 for (count = 0; count < num_media_frus; count++) 617 fru_wr_attrbs[count].ecs_config = 618 fru_rd_attrbs[count].ecs_config; 619 620 fru_wr_attrbs[fru_id].ecs_config = cpu_to_le16(config); 621 622 return cxl_set_feature(cxl_mbox, &CXL_FEAT_ECS_UUID, 623 cxl_ecs_ctx->set_version, wr_attrbs, 624 wr_data_size, 625 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 626 0, NULL); 627 } 628 629 static u8 cxl_get_ecs_log_entry_type(u8 log_cap, u16 config) 630 { 631 return FIELD_GET(CXL_ECS_LOG_ENTRY_TYPE_MASK, log_cap); 632 } 633 634 static u16 cxl_get_ecs_threshold(u8 log_cap, u16 config) 635 { 636 u8 index = FIELD_GET(CXL_ECS_THRESHOLD_COUNT_MASK, config); 637 638 return ecs_supp_threshold[index]; 639 } 640 641 static u8 cxl_get_ecs_count_mode(u8 log_cap, u16 config) 642 { 643 return FIELD_GET(CXL_ECS_COUNT_MODE_MASK, config); 644 } 645 646 #define CXL_ECS_GET_ATTR(attrb) \ 647 static int cxl_ecs_get_##attrb(struct device *dev, void *drv_data, \ 648 int fru_id, u32 *val) \ 649 { \ 650 struct cxl_ecs_context *ctx = drv_data; \ 651 u8 log_cap; \ 652 u16 config; \ 653 int ret; \ 654 \ 655 ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \ 656 &config); \ 657 if (ret) \ 658 return ret; \ 659 \ 660 *val = cxl_get_ecs_##attrb(log_cap, config); \ 661 \ 662 return 0; \ 663 } 664 665 CXL_ECS_GET_ATTR(log_entry_type) 666 CXL_ECS_GET_ATTR(count_mode) 667 CXL_ECS_GET_ATTR(threshold) 668 669 static int cxl_set_ecs_log_entry_type(struct device *dev, u8 *log_cap, 670 u16 *config, u32 val) 671 { 672 if (val != ECS_LOG_ENTRY_TYPE_DRAM && 673 val != ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU) 674 return -EINVAL; 675 676 *log_cap = FIELD_PREP(CXL_ECS_LOG_ENTRY_TYPE_MASK, val); 677 678 return 0; 679 } 680 681 static int cxl_set_ecs_threshold(struct device *dev, u8 *log_cap, u16 *config, 682 u32 val) 683 { 684 *config &= ~CXL_ECS_THRESHOLD_COUNT_MASK; 685 686 switch (val) { 687 case ECS_THRESHOLD_256: 688 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK, 689 ECS_THRESHOLD_IDX_256); 690 break; 691 case ECS_THRESHOLD_1024: 692 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK, 693 ECS_THRESHOLD_IDX_1024); 694 break; 695 case ECS_THRESHOLD_4096: 696 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK, 697 ECS_THRESHOLD_IDX_4096); 698 break; 699 default: 700 dev_dbg(dev, "Invalid CXL ECS threshold count(%d) to set\n", 701 val); 702 dev_dbg(dev, "Supported ECS threshold counts: %u, %u, %u\n", 703 ECS_THRESHOLD_256, ECS_THRESHOLD_1024, 704 ECS_THRESHOLD_4096); 705 return -EINVAL; 706 } 707 708 return 0; 709 } 710 711 static int cxl_set_ecs_count_mode(struct device *dev, u8 *log_cap, u16 *config, 712 u32 val) 713 { 714 if (val != ECS_MODE_COUNTS_ROWS && val != ECS_MODE_COUNTS_CODEWORDS) { 715 dev_dbg(dev, "Invalid CXL ECS scrub mode(%d) to set\n", val); 716 dev_dbg(dev, 717 "Supported ECS Modes: 0: ECS counts rows with errors," 718 " 1: ECS counts codewords with errors\n"); 719 return -EINVAL; 720 } 721 722 *config &= ~CXL_ECS_COUNT_MODE_MASK; 723 *config |= FIELD_PREP(CXL_ECS_COUNT_MODE_MASK, val); 724 725 return 0; 726 } 727 728 static int cxl_set_ecs_reset_counter(struct device *dev, u8 *log_cap, 729 u16 *config, u32 val) 730 { 731 if (val != CXL_ECS_RESET_COUNTER) 732 return -EINVAL; 733 734 *config &= ~CXL_ECS_RESET_COUNTER_MASK; 735 *config |= FIELD_PREP(CXL_ECS_RESET_COUNTER_MASK, val); 736 737 return 0; 738 } 739 740 #define CXL_ECS_SET_ATTR(attrb) \ 741 static int cxl_ecs_set_##attrb(struct device *dev, void *drv_data, \ 742 int fru_id, u32 val) \ 743 { \ 744 struct cxl_ecs_context *ctx = drv_data; \ 745 u8 log_cap; \ 746 u16 config; \ 747 int ret; \ 748 \ 749 if (!capable(CAP_SYS_RAWIO)) \ 750 return -EPERM; \ 751 \ 752 ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \ 753 &config); \ 754 if (ret) \ 755 return ret; \ 756 \ 757 ret = cxl_set_ecs_##attrb(dev, &log_cap, &config, val); \ 758 if (ret) \ 759 return ret; \ 760 \ 761 return cxl_mem_ecs_set_attrbs(dev, ctx, fru_id, log_cap, \ 762 config); \ 763 } 764 CXL_ECS_SET_ATTR(log_entry_type) 765 CXL_ECS_SET_ATTR(count_mode) 766 CXL_ECS_SET_ATTR(reset_counter) 767 CXL_ECS_SET_ATTR(threshold) 768 769 static const struct edac_ecs_ops cxl_ecs_ops = { 770 .get_log_entry_type = cxl_ecs_get_log_entry_type, 771 .set_log_entry_type = cxl_ecs_set_log_entry_type, 772 .get_mode = cxl_ecs_get_count_mode, 773 .set_mode = cxl_ecs_set_count_mode, 774 .reset = cxl_ecs_set_reset_counter, 775 .get_threshold = cxl_ecs_get_threshold, 776 .set_threshold = cxl_ecs_set_threshold, 777 }; 778 779 static int cxl_memdev_ecs_init(struct cxl_memdev *cxlmd, 780 struct edac_dev_feature *ras_feature) 781 { 782 struct cxl_ecs_context *cxl_ecs_ctx; 783 struct cxl_feat_entry *feat_entry; 784 int num_media_frus; 785 786 feat_entry = 787 cxl_feature_info(to_cxlfs(cxlmd->cxlds), &CXL_FEAT_ECS_UUID); 788 if (IS_ERR(feat_entry)) 789 return -EOPNOTSUPP; 790 791 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE)) 792 return -EOPNOTSUPP; 793 794 num_media_frus = (le16_to_cpu(feat_entry->get_feat_size) - 795 sizeof(struct cxl_ecs_rd_attrbs)) / 796 sizeof(struct cxl_ecs_fru_rd_attrbs); 797 if (!num_media_frus) 798 return -EOPNOTSUPP; 799 800 cxl_ecs_ctx = 801 devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ecs_ctx), GFP_KERNEL); 802 if (!cxl_ecs_ctx) 803 return -ENOMEM; 804 805 *cxl_ecs_ctx = (struct cxl_ecs_context){ 806 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 807 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 808 .get_version = feat_entry->get_feat_ver, 809 .set_version = feat_entry->set_feat_ver, 810 .effects = le16_to_cpu(feat_entry->effects), 811 .num_media_frus = num_media_frus, 812 .cxlmd = cxlmd, 813 }; 814 815 ras_feature->ft_type = RAS_FEAT_ECS; 816 ras_feature->ecs_ops = &cxl_ecs_ops; 817 ras_feature->ctx = cxl_ecs_ctx; 818 ras_feature->ecs_info.num_media_frus = num_media_frus; 819 820 return 0; 821 } 822 823 /* 824 * Perform Maintenance CXL 3.2 Spec 8.2.10.7.1 825 */ 826 827 /* 828 * Perform Maintenance input payload 829 * CXL rev 3.2 section 8.2.10.7.1 Table 8-117 830 */ 831 struct cxl_mbox_maintenance_hdr { 832 u8 op_class; 833 u8 op_subclass; 834 } __packed; 835 836 static int cxl_perform_maintenance(struct cxl_mailbox *cxl_mbox, u8 class, 837 u8 subclass, void *data_in, 838 size_t data_in_size) 839 { 840 struct cxl_memdev_maintenance_pi { 841 struct cxl_mbox_maintenance_hdr hdr; 842 u8 data[]; 843 } __packed; 844 struct cxl_mbox_cmd mbox_cmd; 845 size_t hdr_size; 846 847 struct cxl_memdev_maintenance_pi *pi __free(kvfree) = 848 kvzalloc(cxl_mbox->payload_size, GFP_KERNEL); 849 if (!pi) 850 return -ENOMEM; 851 852 pi->hdr.op_class = class; 853 pi->hdr.op_subclass = subclass; 854 hdr_size = sizeof(pi->hdr); 855 /* 856 * Check minimum mbox payload size is available for 857 * the maintenance data transfer. 858 */ 859 if (hdr_size + data_in_size > cxl_mbox->payload_size) 860 return -ENOMEM; 861 862 memcpy(pi->data, data_in, data_in_size); 863 mbox_cmd = (struct cxl_mbox_cmd){ 864 .opcode = CXL_MBOX_OP_DO_MAINTENANCE, 865 .size_in = hdr_size + data_in_size, 866 .payload_in = pi, 867 }; 868 869 return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); 870 } 871 872 /* 873 * Support for finding a memory operation attributes 874 * are from the current boot or not. 875 */ 876 877 struct cxl_mem_err_rec { 878 struct xarray rec_gen_media; 879 struct xarray rec_dram; 880 }; 881 882 enum cxl_mem_repair_type { 883 CXL_PPR, 884 CXL_CACHELINE_SPARING, 885 CXL_ROW_SPARING, 886 CXL_BANK_SPARING, 887 CXL_RANK_SPARING, 888 CXL_REPAIR_MAX, 889 }; 890 891 /** 892 * struct cxl_mem_repair_attrbs - CXL memory repair attributes 893 * @dpa: DPA of memory to repair 894 * @nibble_mask: nibble mask, identifies one or more nibbles on the memory bus 895 * @row: row of memory to repair 896 * @column: column of memory to repair 897 * @channel: channel of memory to repair 898 * @sub_channel: sub channel of memory to repair 899 * @rank: rank of memory to repair 900 * @bank_group: bank group of memory to repair 901 * @bank: bank of memory to repair 902 * @repair_type: repair type. For eg. PPR, memory sparing etc. 903 */ 904 struct cxl_mem_repair_attrbs { 905 u64 dpa; 906 u32 nibble_mask; 907 u32 row; 908 u16 column; 909 u8 channel; 910 u8 sub_channel; 911 u8 rank; 912 u8 bank_group; 913 u8 bank; 914 enum cxl_mem_repair_type repair_type; 915 }; 916 917 static struct cxl_event_gen_media * 918 cxl_find_rec_gen_media(struct cxl_memdev *cxlmd, 919 struct cxl_mem_repair_attrbs *attrbs) 920 { 921 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 922 struct cxl_event_gen_media *rec; 923 924 if (!array_rec) 925 return NULL; 926 927 rec = xa_load(&array_rec->rec_gen_media, attrbs->dpa); 928 if (!rec) 929 return NULL; 930 931 if (attrbs->repair_type == CXL_PPR) 932 return rec; 933 934 return NULL; 935 } 936 937 static struct cxl_event_dram * 938 cxl_find_rec_dram(struct cxl_memdev *cxlmd, 939 struct cxl_mem_repair_attrbs *attrbs) 940 { 941 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 942 struct cxl_event_dram *rec; 943 u16 validity_flags; 944 945 if (!array_rec) 946 return NULL; 947 948 rec = xa_load(&array_rec->rec_dram, attrbs->dpa); 949 if (!rec) 950 return NULL; 951 952 validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags); 953 if (!(validity_flags & CXL_DER_VALID_CHANNEL) || 954 !(validity_flags & CXL_DER_VALID_RANK)) 955 return NULL; 956 957 switch (attrbs->repair_type) { 958 case CXL_PPR: 959 if (!(validity_flags & CXL_DER_VALID_NIBBLE) || 960 get_unaligned_le24(rec->nibble_mask) == attrbs->nibble_mask) 961 return rec; 962 break; 963 case CXL_CACHELINE_SPARING: 964 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) || 965 !(validity_flags & CXL_DER_VALID_BANK) || 966 !(validity_flags & CXL_DER_VALID_ROW) || 967 !(validity_flags & CXL_DER_VALID_COLUMN)) 968 return NULL; 969 970 if (rec->media_hdr.channel == attrbs->channel && 971 rec->media_hdr.rank == attrbs->rank && 972 rec->bank_group == attrbs->bank_group && 973 rec->bank == attrbs->bank && 974 get_unaligned_le24(rec->row) == attrbs->row && 975 get_unaligned_le16(rec->column) == attrbs->column && 976 (!(validity_flags & CXL_DER_VALID_NIBBLE) || 977 get_unaligned_le24(rec->nibble_mask) == 978 attrbs->nibble_mask) && 979 (!(validity_flags & CXL_DER_VALID_SUB_CHANNEL) || 980 rec->sub_channel == attrbs->sub_channel)) 981 return rec; 982 break; 983 case CXL_ROW_SPARING: 984 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) || 985 !(validity_flags & CXL_DER_VALID_BANK) || 986 !(validity_flags & CXL_DER_VALID_ROW)) 987 return NULL; 988 989 if (rec->media_hdr.channel == attrbs->channel && 990 rec->media_hdr.rank == attrbs->rank && 991 rec->bank_group == attrbs->bank_group && 992 rec->bank == attrbs->bank && 993 get_unaligned_le24(rec->row) == attrbs->row && 994 (!(validity_flags & CXL_DER_VALID_NIBBLE) || 995 get_unaligned_le24(rec->nibble_mask) == 996 attrbs->nibble_mask)) 997 return rec; 998 break; 999 case CXL_BANK_SPARING: 1000 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) || 1001 !(validity_flags & CXL_DER_VALID_BANK)) 1002 return NULL; 1003 1004 if (rec->media_hdr.channel == attrbs->channel && 1005 rec->media_hdr.rank == attrbs->rank && 1006 rec->bank_group == attrbs->bank_group && 1007 rec->bank == attrbs->bank && 1008 (!(validity_flags & CXL_DER_VALID_NIBBLE) || 1009 get_unaligned_le24(rec->nibble_mask) == 1010 attrbs->nibble_mask)) 1011 return rec; 1012 break; 1013 case CXL_RANK_SPARING: 1014 if (rec->media_hdr.channel == attrbs->channel && 1015 rec->media_hdr.rank == attrbs->rank && 1016 (!(validity_flags & CXL_DER_VALID_NIBBLE) || 1017 get_unaligned_le24(rec->nibble_mask) == 1018 attrbs->nibble_mask)) 1019 return rec; 1020 break; 1021 default: 1022 return NULL; 1023 } 1024 1025 return NULL; 1026 } 1027 1028 #define CXL_MAX_STORAGE_DAYS 10 1029 #define CXL_MAX_STORAGE_TIME_SECS (CXL_MAX_STORAGE_DAYS * 24 * 60 * 60) 1030 1031 static void cxl_del_expired_gmedia_recs(struct xarray *rec_xarray, 1032 struct cxl_event_gen_media *cur_rec) 1033 { 1034 u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp); 1035 struct cxl_event_gen_media *rec; 1036 unsigned long index; 1037 u64 delta_ts_secs; 1038 1039 xa_for_each(rec_xarray, index, rec) { 1040 delta_ts_secs = (cur_ts - 1041 le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL; 1042 if (delta_ts_secs >= CXL_MAX_STORAGE_TIME_SECS) { 1043 xa_erase(rec_xarray, index); 1044 kfree(rec); 1045 } 1046 } 1047 } 1048 1049 static void cxl_del_expired_dram_recs(struct xarray *rec_xarray, 1050 struct cxl_event_dram *cur_rec) 1051 { 1052 u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp); 1053 struct cxl_event_dram *rec; 1054 unsigned long index; 1055 u64 delta_secs; 1056 1057 xa_for_each(rec_xarray, index, rec) { 1058 delta_secs = (cur_ts - 1059 le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL; 1060 if (delta_secs >= CXL_MAX_STORAGE_TIME_SECS) { 1061 xa_erase(rec_xarray, index); 1062 kfree(rec); 1063 } 1064 } 1065 } 1066 1067 #define CXL_MAX_REC_STORAGE_COUNT 200 1068 1069 static void cxl_del_overflow_old_recs(struct xarray *rec_xarray) 1070 { 1071 void *err_rec; 1072 unsigned long index, count = 0; 1073 1074 xa_for_each(rec_xarray, index, err_rec) 1075 count++; 1076 1077 if (count <= CXL_MAX_REC_STORAGE_COUNT) 1078 return; 1079 1080 count -= CXL_MAX_REC_STORAGE_COUNT; 1081 xa_for_each(rec_xarray, index, err_rec) { 1082 xa_erase(rec_xarray, index); 1083 kfree(err_rec); 1084 count--; 1085 if (!count) 1086 break; 1087 } 1088 } 1089 1090 int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt) 1091 { 1092 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 1093 struct cxl_event_gen_media *rec; 1094 void *old_rec; 1095 1096 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec) 1097 return 0; 1098 1099 rec = kmemdup(&evt->gen_media, sizeof(*rec), GFP_KERNEL); 1100 if (!rec) 1101 return -ENOMEM; 1102 1103 old_rec = xa_store(&array_rec->rec_gen_media, 1104 le64_to_cpu(rec->media_hdr.phys_addr), rec, 1105 GFP_KERNEL); 1106 if (xa_is_err(old_rec)) { 1107 kfree(rec); 1108 return xa_err(old_rec); 1109 } 1110 1111 kfree(old_rec); 1112 1113 cxl_del_expired_gmedia_recs(&array_rec->rec_gen_media, rec); 1114 cxl_del_overflow_old_recs(&array_rec->rec_gen_media); 1115 1116 return 0; 1117 } 1118 EXPORT_SYMBOL_NS_GPL(cxl_store_rec_gen_media, "CXL"); 1119 1120 int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt) 1121 { 1122 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 1123 struct cxl_event_dram *rec; 1124 void *old_rec; 1125 1126 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec) 1127 return 0; 1128 1129 rec = kmemdup(&evt->dram, sizeof(*rec), GFP_KERNEL); 1130 if (!rec) 1131 return -ENOMEM; 1132 1133 old_rec = xa_store(&array_rec->rec_dram, 1134 le64_to_cpu(rec->media_hdr.phys_addr), rec, 1135 GFP_KERNEL); 1136 if (xa_is_err(old_rec)) { 1137 kfree(rec); 1138 return xa_err(old_rec); 1139 } 1140 1141 kfree(old_rec); 1142 1143 cxl_del_expired_dram_recs(&array_rec->rec_dram, rec); 1144 cxl_del_overflow_old_recs(&array_rec->rec_dram); 1145 1146 return 0; 1147 } 1148 EXPORT_SYMBOL_NS_GPL(cxl_store_rec_dram, "CXL"); 1149 1150 static bool cxl_is_memdev_memory_online(const struct cxl_memdev *cxlmd) 1151 { 1152 struct cxl_port *port = cxlmd->endpoint; 1153 1154 if (port && cxl_num_decoders_committed(port)) 1155 return true; 1156 1157 return false; 1158 } 1159 1160 /* 1161 * CXL memory sparing control 1162 */ 1163 enum cxl_mem_sparing_granularity { 1164 CXL_MEM_SPARING_CACHELINE, 1165 CXL_MEM_SPARING_ROW, 1166 CXL_MEM_SPARING_BANK, 1167 CXL_MEM_SPARING_RANK, 1168 CXL_MEM_SPARING_MAX 1169 }; 1170 1171 struct cxl_mem_sparing_context { 1172 struct cxl_memdev *cxlmd; 1173 uuid_t repair_uuid; 1174 u16 get_feat_size; 1175 u16 set_feat_size; 1176 u16 effects; 1177 u8 instance; 1178 u8 get_version; 1179 u8 set_version; 1180 u8 op_class; 1181 u8 op_subclass; 1182 bool cap_safe_when_in_use; 1183 bool cap_hard_sparing; 1184 bool cap_soft_sparing; 1185 u8 channel; 1186 u8 rank; 1187 u8 bank_group; 1188 u32 nibble_mask; 1189 u64 dpa; 1190 u32 row; 1191 u16 column; 1192 u8 bank; 1193 u8 sub_channel; 1194 enum edac_mem_repair_type repair_type; 1195 bool persist_mode; 1196 }; 1197 1198 #define CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK BIT(0) 1199 #define CXL_SPARING_RD_CAP_HARD_SPARING_MASK BIT(1) 1200 #define CXL_SPARING_RD_CAP_SOFT_SPARING_MASK BIT(2) 1201 1202 #define CXL_SPARING_WR_DEVICE_INITIATED_MASK BIT(0) 1203 1204 #define CXL_SPARING_QUERY_RESOURCE_FLAG BIT(0) 1205 #define CXL_SET_HARD_SPARING_FLAG BIT(1) 1206 #define CXL_SPARING_SUB_CHNL_VALID_FLAG BIT(2) 1207 #define CXL_SPARING_NIB_MASK_VALID_FLAG BIT(3) 1208 1209 #define CXL_GET_SPARING_SAFE_IN_USE(flags) \ 1210 (FIELD_GET(CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK, \ 1211 flags) ^ 1) 1212 #define CXL_GET_CAP_HARD_SPARING(flags) \ 1213 FIELD_GET(CXL_SPARING_RD_CAP_HARD_SPARING_MASK, \ 1214 flags) 1215 #define CXL_GET_CAP_SOFT_SPARING(flags) \ 1216 FIELD_GET(CXL_SPARING_RD_CAP_SOFT_SPARING_MASK, \ 1217 flags) 1218 1219 #define CXL_SET_SPARING_QUERY_RESOURCE(val) \ 1220 FIELD_PREP(CXL_SPARING_QUERY_RESOURCE_FLAG, val) 1221 #define CXL_SET_HARD_SPARING(val) \ 1222 FIELD_PREP(CXL_SET_HARD_SPARING_FLAG, val) 1223 #define CXL_SET_SPARING_SUB_CHNL_VALID(val) \ 1224 FIELD_PREP(CXL_SPARING_SUB_CHNL_VALID_FLAG, val) 1225 #define CXL_SET_SPARING_NIB_MASK_VALID(val) \ 1226 FIELD_PREP(CXL_SPARING_NIB_MASK_VALID_FLAG, val) 1227 1228 /* 1229 * See CXL spec rev 3.2 @8.2.10.7.2.3 Table 8-134 Memory Sparing Feature 1230 * Readable Attributes. 1231 */ 1232 struct cxl_memdev_repair_rd_attrbs_hdr { 1233 u8 max_op_latency; 1234 __le16 op_cap; 1235 __le16 op_mode; 1236 u8 op_class; 1237 u8 op_subclass; 1238 u8 rsvd[9]; 1239 } __packed; 1240 1241 struct cxl_memdev_sparing_rd_attrbs { 1242 struct cxl_memdev_repair_rd_attrbs_hdr hdr; 1243 u8 rsvd; 1244 __le16 restriction_flags; 1245 } __packed; 1246 1247 /* 1248 * See CXL spec rev 3.2 @8.2.10.7.1.4 Table 8-120 Memory Sparing Input Payload. 1249 */ 1250 struct cxl_memdev_sparing_in_payload { 1251 u8 flags; 1252 u8 channel; 1253 u8 rank; 1254 u8 nibble_mask[3]; 1255 u8 bank_group; 1256 u8 bank; 1257 u8 row[3]; 1258 __le16 column; 1259 u8 sub_channel; 1260 } __packed; 1261 1262 static int 1263 cxl_mem_sparing_get_attrbs(struct cxl_mem_sparing_context *cxl_sparing_ctx) 1264 { 1265 size_t rd_data_size = sizeof(struct cxl_memdev_sparing_rd_attrbs); 1266 struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd; 1267 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; 1268 u16 restriction_flags; 1269 size_t data_size; 1270 u16 return_code; 1271 struct cxl_memdev_sparing_rd_attrbs *rd_attrbs __free(kfree) = 1272 kzalloc(rd_data_size, GFP_KERNEL); 1273 if (!rd_attrbs) 1274 return -ENOMEM; 1275 1276 data_size = cxl_get_feature(cxl_mbox, &cxl_sparing_ctx->repair_uuid, 1277 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 1278 rd_data_size, 0, &return_code); 1279 if (!data_size) 1280 return -EIO; 1281 1282 cxl_sparing_ctx->op_class = rd_attrbs->hdr.op_class; 1283 cxl_sparing_ctx->op_subclass = rd_attrbs->hdr.op_subclass; 1284 restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags); 1285 cxl_sparing_ctx->cap_safe_when_in_use = 1286 CXL_GET_SPARING_SAFE_IN_USE(restriction_flags); 1287 cxl_sparing_ctx->cap_hard_sparing = 1288 CXL_GET_CAP_HARD_SPARING(restriction_flags); 1289 cxl_sparing_ctx->cap_soft_sparing = 1290 CXL_GET_CAP_SOFT_SPARING(restriction_flags); 1291 1292 return 0; 1293 } 1294 1295 static struct cxl_event_dram * 1296 cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd, 1297 struct cxl_mem_sparing_context *ctx) 1298 { 1299 struct cxl_mem_repair_attrbs attrbs = { 0 }; 1300 1301 attrbs.dpa = ctx->dpa; 1302 attrbs.channel = ctx->channel; 1303 attrbs.rank = ctx->rank; 1304 attrbs.nibble_mask = ctx->nibble_mask; 1305 switch (ctx->repair_type) { 1306 case EDAC_REPAIR_CACHELINE_SPARING: 1307 attrbs.repair_type = CXL_CACHELINE_SPARING; 1308 attrbs.bank_group = ctx->bank_group; 1309 attrbs.bank = ctx->bank; 1310 attrbs.row = ctx->row; 1311 attrbs.column = ctx->column; 1312 attrbs.sub_channel = ctx->sub_channel; 1313 break; 1314 case EDAC_REPAIR_ROW_SPARING: 1315 attrbs.repair_type = CXL_ROW_SPARING; 1316 attrbs.bank_group = ctx->bank_group; 1317 attrbs.bank = ctx->bank; 1318 attrbs.row = ctx->row; 1319 break; 1320 case EDAC_REPAIR_BANK_SPARING: 1321 attrbs.repair_type = CXL_BANK_SPARING; 1322 attrbs.bank_group = ctx->bank_group; 1323 attrbs.bank = ctx->bank; 1324 break; 1325 case EDAC_REPAIR_RANK_SPARING: 1326 attrbs.repair_type = CXL_RANK_SPARING; 1327 break; 1328 default: 1329 return NULL; 1330 } 1331 1332 return cxl_find_rec_dram(cxlmd, &attrbs); 1333 } 1334 1335 static int 1336 cxl_mem_perform_sparing(struct device *dev, 1337 struct cxl_mem_sparing_context *cxl_sparing_ctx) 1338 { 1339 struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd; 1340 struct cxl_memdev_sparing_in_payload sparing_pi; 1341 struct cxl_event_dram *rec = NULL; 1342 u16 validity_flags = 0; 1343 1344 struct rw_semaphore *region_lock __free(rwsem_read_release) = 1345 rwsem_read_intr_acquire(&cxl_region_rwsem); 1346 if (!region_lock) 1347 return -EINTR; 1348 1349 struct rw_semaphore *dpa_lock __free(rwsem_read_release) = 1350 rwsem_read_intr_acquire(&cxl_dpa_rwsem); 1351 if (!dpa_lock) 1352 return -EINTR; 1353 1354 if (!cxl_sparing_ctx->cap_safe_when_in_use) { 1355 /* Memory to repair must be offline */ 1356 if (cxl_is_memdev_memory_online(cxlmd)) 1357 return -EBUSY; 1358 } else { 1359 if (cxl_is_memdev_memory_online(cxlmd)) { 1360 rec = cxl_mem_get_rec_dram(cxlmd, cxl_sparing_ctx); 1361 if (!rec) 1362 return -EINVAL; 1363 1364 if (!get_unaligned_le16(rec->media_hdr.validity_flags)) 1365 return -EINVAL; 1366 } 1367 } 1368 1369 memset(&sparing_pi, 0, sizeof(sparing_pi)); 1370 sparing_pi.flags = CXL_SET_SPARING_QUERY_RESOURCE(0); 1371 if (cxl_sparing_ctx->persist_mode) 1372 sparing_pi.flags |= CXL_SET_HARD_SPARING(1); 1373 1374 if (rec) 1375 validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags); 1376 1377 switch (cxl_sparing_ctx->repair_type) { 1378 case EDAC_REPAIR_CACHELINE_SPARING: 1379 sparing_pi.column = cpu_to_le16(cxl_sparing_ctx->column); 1380 if (!rec || (validity_flags & CXL_DER_VALID_SUB_CHANNEL)) { 1381 sparing_pi.flags |= CXL_SET_SPARING_SUB_CHNL_VALID(1); 1382 sparing_pi.sub_channel = cxl_sparing_ctx->sub_channel; 1383 } 1384 fallthrough; 1385 case EDAC_REPAIR_ROW_SPARING: 1386 put_unaligned_le24(cxl_sparing_ctx->row, sparing_pi.row); 1387 fallthrough; 1388 case EDAC_REPAIR_BANK_SPARING: 1389 sparing_pi.bank_group = cxl_sparing_ctx->bank_group; 1390 sparing_pi.bank = cxl_sparing_ctx->bank; 1391 fallthrough; 1392 case EDAC_REPAIR_RANK_SPARING: 1393 sparing_pi.rank = cxl_sparing_ctx->rank; 1394 fallthrough; 1395 default: 1396 sparing_pi.channel = cxl_sparing_ctx->channel; 1397 if ((rec && (validity_flags & CXL_DER_VALID_NIBBLE)) || 1398 (!rec && (!cxl_sparing_ctx->nibble_mask || 1399 (cxl_sparing_ctx->nibble_mask & 0xFFFFFF)))) { 1400 sparing_pi.flags |= CXL_SET_SPARING_NIB_MASK_VALID(1); 1401 put_unaligned_le24(cxl_sparing_ctx->nibble_mask, 1402 sparing_pi.nibble_mask); 1403 } 1404 break; 1405 } 1406 1407 return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox, 1408 cxl_sparing_ctx->op_class, 1409 cxl_sparing_ctx->op_subclass, 1410 &sparing_pi, sizeof(sparing_pi)); 1411 } 1412 1413 static int cxl_mem_sparing_get_repair_type(struct device *dev, void *drv_data, 1414 const char **repair_type) 1415 { 1416 struct cxl_mem_sparing_context *ctx = drv_data; 1417 1418 switch (ctx->repair_type) { 1419 case EDAC_REPAIR_CACHELINE_SPARING: 1420 case EDAC_REPAIR_ROW_SPARING: 1421 case EDAC_REPAIR_BANK_SPARING: 1422 case EDAC_REPAIR_RANK_SPARING: 1423 *repair_type = edac_repair_type[ctx->repair_type]; 1424 break; 1425 default: 1426 return -EINVAL; 1427 } 1428 1429 return 0; 1430 } 1431 1432 #define CXL_SPARING_GET_ATTR(attrb, data_type) \ 1433 static int cxl_mem_sparing_get_##attrb( \ 1434 struct device *dev, void *drv_data, data_type *val) \ 1435 { \ 1436 struct cxl_mem_sparing_context *ctx = drv_data; \ 1437 \ 1438 *val = ctx->attrb; \ 1439 \ 1440 return 0; \ 1441 } 1442 CXL_SPARING_GET_ATTR(persist_mode, bool) 1443 CXL_SPARING_GET_ATTR(dpa, u64) 1444 CXL_SPARING_GET_ATTR(nibble_mask, u32) 1445 CXL_SPARING_GET_ATTR(bank_group, u32) 1446 CXL_SPARING_GET_ATTR(bank, u32) 1447 CXL_SPARING_GET_ATTR(rank, u32) 1448 CXL_SPARING_GET_ATTR(row, u32) 1449 CXL_SPARING_GET_ATTR(column, u32) 1450 CXL_SPARING_GET_ATTR(channel, u32) 1451 CXL_SPARING_GET_ATTR(sub_channel, u32) 1452 1453 #define CXL_SPARING_SET_ATTR(attrb, data_type) \ 1454 static int cxl_mem_sparing_set_##attrb(struct device *dev, \ 1455 void *drv_data, data_type val) \ 1456 { \ 1457 struct cxl_mem_sparing_context *ctx = drv_data; \ 1458 \ 1459 ctx->attrb = val; \ 1460 \ 1461 return 0; \ 1462 } 1463 CXL_SPARING_SET_ATTR(nibble_mask, u32) 1464 CXL_SPARING_SET_ATTR(bank_group, u32) 1465 CXL_SPARING_SET_ATTR(bank, u32) 1466 CXL_SPARING_SET_ATTR(rank, u32) 1467 CXL_SPARING_SET_ATTR(row, u32) 1468 CXL_SPARING_SET_ATTR(column, u32) 1469 CXL_SPARING_SET_ATTR(channel, u32) 1470 CXL_SPARING_SET_ATTR(sub_channel, u32) 1471 1472 static int cxl_mem_sparing_set_persist_mode(struct device *dev, void *drv_data, 1473 bool persist_mode) 1474 { 1475 struct cxl_mem_sparing_context *ctx = drv_data; 1476 1477 if ((persist_mode && ctx->cap_hard_sparing) || 1478 (!persist_mode && ctx->cap_soft_sparing)) 1479 ctx->persist_mode = persist_mode; 1480 else 1481 return -EOPNOTSUPP; 1482 1483 return 0; 1484 } 1485 1486 static int cxl_get_mem_sparing_safe_when_in_use(struct device *dev, 1487 void *drv_data, bool *safe) 1488 { 1489 struct cxl_mem_sparing_context *ctx = drv_data; 1490 1491 *safe = ctx->cap_safe_when_in_use; 1492 1493 return 0; 1494 } 1495 1496 static int cxl_mem_sparing_get_min_dpa(struct device *dev, void *drv_data, 1497 u64 *min_dpa) 1498 { 1499 struct cxl_mem_sparing_context *ctx = drv_data; 1500 struct cxl_memdev *cxlmd = ctx->cxlmd; 1501 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1502 1503 *min_dpa = cxlds->dpa_res.start; 1504 1505 return 0; 1506 } 1507 1508 static int cxl_mem_sparing_get_max_dpa(struct device *dev, void *drv_data, 1509 u64 *max_dpa) 1510 { 1511 struct cxl_mem_sparing_context *ctx = drv_data; 1512 struct cxl_memdev *cxlmd = ctx->cxlmd; 1513 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1514 1515 *max_dpa = cxlds->dpa_res.end; 1516 1517 return 0; 1518 } 1519 1520 static int cxl_mem_sparing_set_dpa(struct device *dev, void *drv_data, u64 dpa) 1521 { 1522 struct cxl_mem_sparing_context *ctx = drv_data; 1523 struct cxl_memdev *cxlmd = ctx->cxlmd; 1524 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1525 1526 if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) 1527 return -EINVAL; 1528 1529 ctx->dpa = dpa; 1530 1531 return 0; 1532 } 1533 1534 static int cxl_do_mem_sparing(struct device *dev, void *drv_data, u32 val) 1535 { 1536 struct cxl_mem_sparing_context *ctx = drv_data; 1537 1538 if (val != EDAC_DO_MEM_REPAIR) 1539 return -EINVAL; 1540 1541 return cxl_mem_perform_sparing(dev, ctx); 1542 } 1543 1544 #define RANK_OPS \ 1545 .get_repair_type = cxl_mem_sparing_get_repair_type, \ 1546 .get_persist_mode = cxl_mem_sparing_get_persist_mode, \ 1547 .set_persist_mode = cxl_mem_sparing_set_persist_mode, \ 1548 .get_repair_safe_when_in_use = cxl_get_mem_sparing_safe_when_in_use, \ 1549 .get_min_dpa = cxl_mem_sparing_get_min_dpa, \ 1550 .get_max_dpa = cxl_mem_sparing_get_max_dpa, \ 1551 .get_dpa = cxl_mem_sparing_get_dpa, \ 1552 .set_dpa = cxl_mem_sparing_set_dpa, \ 1553 .get_nibble_mask = cxl_mem_sparing_get_nibble_mask, \ 1554 .set_nibble_mask = cxl_mem_sparing_set_nibble_mask, \ 1555 .get_rank = cxl_mem_sparing_get_rank, \ 1556 .set_rank = cxl_mem_sparing_set_rank, \ 1557 .get_channel = cxl_mem_sparing_get_channel, \ 1558 .set_channel = cxl_mem_sparing_set_channel, \ 1559 .do_repair = cxl_do_mem_sparing 1560 1561 #define BANK_OPS \ 1562 RANK_OPS, .get_bank_group = cxl_mem_sparing_get_bank_group, \ 1563 .set_bank_group = cxl_mem_sparing_set_bank_group, \ 1564 .get_bank = cxl_mem_sparing_get_bank, \ 1565 .set_bank = cxl_mem_sparing_set_bank 1566 1567 #define ROW_OPS \ 1568 BANK_OPS, .get_row = cxl_mem_sparing_get_row, \ 1569 .set_row = cxl_mem_sparing_set_row 1570 1571 #define CACHELINE_OPS \ 1572 ROW_OPS, .get_column = cxl_mem_sparing_get_column, \ 1573 .set_column = cxl_mem_sparing_set_column, \ 1574 .get_sub_channel = cxl_mem_sparing_get_sub_channel, \ 1575 .set_sub_channel = cxl_mem_sparing_set_sub_channel 1576 1577 static const struct edac_mem_repair_ops cxl_rank_sparing_ops = { 1578 RANK_OPS, 1579 }; 1580 1581 static const struct edac_mem_repair_ops cxl_bank_sparing_ops = { 1582 BANK_OPS, 1583 }; 1584 1585 static const struct edac_mem_repair_ops cxl_row_sparing_ops = { 1586 ROW_OPS, 1587 }; 1588 1589 static const struct edac_mem_repair_ops cxl_cacheline_sparing_ops = { 1590 CACHELINE_OPS, 1591 }; 1592 1593 struct cxl_mem_sparing_desc { 1594 const uuid_t repair_uuid; 1595 enum edac_mem_repair_type repair_type; 1596 const struct edac_mem_repair_ops *repair_ops; 1597 }; 1598 1599 static const struct cxl_mem_sparing_desc mem_sparing_desc[] = { 1600 { 1601 .repair_uuid = CXL_FEAT_CACHELINE_SPARING_UUID, 1602 .repair_type = EDAC_REPAIR_CACHELINE_SPARING, 1603 .repair_ops = &cxl_cacheline_sparing_ops, 1604 }, 1605 { 1606 .repair_uuid = CXL_FEAT_ROW_SPARING_UUID, 1607 .repair_type = EDAC_REPAIR_ROW_SPARING, 1608 .repair_ops = &cxl_row_sparing_ops, 1609 }, 1610 { 1611 .repair_uuid = CXL_FEAT_BANK_SPARING_UUID, 1612 .repair_type = EDAC_REPAIR_BANK_SPARING, 1613 .repair_ops = &cxl_bank_sparing_ops, 1614 }, 1615 { 1616 .repair_uuid = CXL_FEAT_RANK_SPARING_UUID, 1617 .repair_type = EDAC_REPAIR_RANK_SPARING, 1618 .repair_ops = &cxl_rank_sparing_ops, 1619 }, 1620 }; 1621 1622 static int cxl_memdev_sparing_init(struct cxl_memdev *cxlmd, 1623 struct edac_dev_feature *ras_feature, 1624 const struct cxl_mem_sparing_desc *desc, 1625 u8 repair_inst) 1626 { 1627 struct cxl_mem_sparing_context *cxl_sparing_ctx; 1628 struct cxl_feat_entry *feat_entry; 1629 int ret; 1630 1631 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds), 1632 &desc->repair_uuid); 1633 if (IS_ERR(feat_entry)) 1634 return -EOPNOTSUPP; 1635 1636 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE)) 1637 return -EOPNOTSUPP; 1638 1639 cxl_sparing_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sparing_ctx), 1640 GFP_KERNEL); 1641 if (!cxl_sparing_ctx) 1642 return -ENOMEM; 1643 1644 *cxl_sparing_ctx = (struct cxl_mem_sparing_context){ 1645 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 1646 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 1647 .get_version = feat_entry->get_feat_ver, 1648 .set_version = feat_entry->set_feat_ver, 1649 .effects = le16_to_cpu(feat_entry->effects), 1650 .cxlmd = cxlmd, 1651 .repair_type = desc->repair_type, 1652 .instance = repair_inst++, 1653 }; 1654 uuid_copy(&cxl_sparing_ctx->repair_uuid, &desc->repair_uuid); 1655 1656 ret = cxl_mem_sparing_get_attrbs(cxl_sparing_ctx); 1657 if (ret) 1658 return ret; 1659 1660 if ((cxl_sparing_ctx->cap_soft_sparing && 1661 cxl_sparing_ctx->cap_hard_sparing) || 1662 cxl_sparing_ctx->cap_soft_sparing) 1663 cxl_sparing_ctx->persist_mode = 0; 1664 else if (cxl_sparing_ctx->cap_hard_sparing) 1665 cxl_sparing_ctx->persist_mode = 1; 1666 else 1667 return -EOPNOTSUPP; 1668 1669 ras_feature->ft_type = RAS_FEAT_MEM_REPAIR; 1670 ras_feature->instance = cxl_sparing_ctx->instance; 1671 ras_feature->mem_repair_ops = desc->repair_ops; 1672 ras_feature->ctx = cxl_sparing_ctx; 1673 1674 return 0; 1675 } 1676 1677 /* 1678 * CXL memory soft PPR & hard PPR control 1679 */ 1680 struct cxl_ppr_context { 1681 uuid_t repair_uuid; 1682 u8 instance; 1683 u16 get_feat_size; 1684 u16 set_feat_size; 1685 u8 get_version; 1686 u8 set_version; 1687 u16 effects; 1688 u8 op_class; 1689 u8 op_subclass; 1690 bool cap_dpa; 1691 bool cap_nib_mask; 1692 bool media_accessible; 1693 bool data_retained; 1694 struct cxl_memdev *cxlmd; 1695 enum edac_mem_repair_type repair_type; 1696 bool persist_mode; 1697 u64 dpa; 1698 u32 nibble_mask; 1699 }; 1700 1701 /* 1702 * See CXL rev 3.2 @8.2.10.7.2.1 Table 8-128 sPPR Feature Readable Attributes 1703 * 1704 * See CXL rev 3.2 @8.2.10.7.2.2 Table 8-131 hPPR Feature Readable Attributes 1705 */ 1706 1707 #define CXL_PPR_OP_CAP_DEVICE_INITIATED BIT(0) 1708 #define CXL_PPR_OP_MODE_DEV_INITIATED BIT(0) 1709 1710 #define CXL_PPR_FLAG_DPA_SUPPORT_MASK BIT(0) 1711 #define CXL_PPR_FLAG_NIB_SUPPORT_MASK BIT(1) 1712 #define CXL_PPR_FLAG_MEM_SPARING_EV_REC_SUPPORT_MASK BIT(2) 1713 #define CXL_PPR_FLAG_DEV_INITED_PPR_AT_BOOT_CAP_MASK BIT(3) 1714 1715 #define CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK BIT(0) 1716 #define CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK BIT(2) 1717 1718 #define CXL_PPR_SPARING_EV_REC_EN_MASK BIT(0) 1719 #define CXL_PPR_DEV_INITED_PPR_AT_BOOT_EN_MASK BIT(1) 1720 1721 #define CXL_PPR_GET_CAP_DPA(flags) \ 1722 FIELD_GET(CXL_PPR_FLAG_DPA_SUPPORT_MASK, flags) 1723 #define CXL_PPR_GET_CAP_NIB_MASK(flags) \ 1724 FIELD_GET(CXL_PPR_FLAG_NIB_SUPPORT_MASK, flags) 1725 #define CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags) \ 1726 (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK, \ 1727 restriction_flags) ^ 1) 1728 #define CXL_PPR_GET_DATA_RETAINED(restriction_flags) \ 1729 (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK, \ 1730 restriction_flags) ^ 1) 1731 1732 struct cxl_memdev_ppr_rd_attrbs { 1733 struct cxl_memdev_repair_rd_attrbs_hdr hdr; 1734 u8 ppr_flags; 1735 __le16 restriction_flags; 1736 u8 ppr_op_mode; 1737 } __packed; 1738 1739 /* 1740 * See CXL rev 3.2 @8.2.10.7.1.2 Table 8-118 sPPR Maintenance Input Payload 1741 * 1742 * See CXL rev 3.2 @8.2.10.7.1.3 Table 8-119 hPPR Maintenance Input Payload 1743 */ 1744 struct cxl_memdev_ppr_maintenance_attrbs { 1745 u8 flags; 1746 __le64 dpa; 1747 u8 nibble_mask[3]; 1748 } __packed; 1749 1750 static int cxl_mem_ppr_get_attrbs(struct cxl_ppr_context *cxl_ppr_ctx) 1751 { 1752 size_t rd_data_size = sizeof(struct cxl_memdev_ppr_rd_attrbs); 1753 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1754 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; 1755 u16 restriction_flags; 1756 size_t data_size; 1757 u16 return_code; 1758 1759 struct cxl_memdev_ppr_rd_attrbs *rd_attrbs __free(kfree) = 1760 kmalloc(rd_data_size, GFP_KERNEL); 1761 if (!rd_attrbs) 1762 return -ENOMEM; 1763 1764 data_size = cxl_get_feature(cxl_mbox, &cxl_ppr_ctx->repair_uuid, 1765 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 1766 rd_data_size, 0, &return_code); 1767 if (!data_size) 1768 return -EIO; 1769 1770 cxl_ppr_ctx->op_class = rd_attrbs->hdr.op_class; 1771 cxl_ppr_ctx->op_subclass = rd_attrbs->hdr.op_subclass; 1772 cxl_ppr_ctx->cap_dpa = CXL_PPR_GET_CAP_DPA(rd_attrbs->ppr_flags); 1773 cxl_ppr_ctx->cap_nib_mask = 1774 CXL_PPR_GET_CAP_NIB_MASK(rd_attrbs->ppr_flags); 1775 1776 restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags); 1777 cxl_ppr_ctx->media_accessible = 1778 CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags); 1779 cxl_ppr_ctx->data_retained = 1780 CXL_PPR_GET_DATA_RETAINED(restriction_flags); 1781 1782 return 0; 1783 } 1784 1785 static int cxl_mem_perform_ppr(struct cxl_ppr_context *cxl_ppr_ctx) 1786 { 1787 struct cxl_memdev_ppr_maintenance_attrbs maintenance_attrbs; 1788 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1789 struct cxl_mem_repair_attrbs attrbs = { 0 }; 1790 1791 struct rw_semaphore *region_lock __free(rwsem_read_release) = 1792 rwsem_read_intr_acquire(&cxl_region_rwsem); 1793 if (!region_lock) 1794 return -EINTR; 1795 1796 struct rw_semaphore *dpa_lock __free(rwsem_read_release) = 1797 rwsem_read_intr_acquire(&cxl_dpa_rwsem); 1798 if (!dpa_lock) 1799 return -EINTR; 1800 1801 if (!cxl_ppr_ctx->media_accessible || !cxl_ppr_ctx->data_retained) { 1802 /* Memory to repair must be offline */ 1803 if (cxl_is_memdev_memory_online(cxlmd)) 1804 return -EBUSY; 1805 } else { 1806 if (cxl_is_memdev_memory_online(cxlmd)) { 1807 /* Check memory to repair is from the current boot */ 1808 attrbs.repair_type = CXL_PPR; 1809 attrbs.dpa = cxl_ppr_ctx->dpa; 1810 attrbs.nibble_mask = cxl_ppr_ctx->nibble_mask; 1811 if (!cxl_find_rec_dram(cxlmd, &attrbs) && 1812 !cxl_find_rec_gen_media(cxlmd, &attrbs)) 1813 return -EINVAL; 1814 } 1815 } 1816 1817 memset(&maintenance_attrbs, 0, sizeof(maintenance_attrbs)); 1818 maintenance_attrbs.flags = 0; 1819 maintenance_attrbs.dpa = cpu_to_le64(cxl_ppr_ctx->dpa); 1820 put_unaligned_le24(cxl_ppr_ctx->nibble_mask, 1821 maintenance_attrbs.nibble_mask); 1822 1823 return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox, 1824 cxl_ppr_ctx->op_class, 1825 cxl_ppr_ctx->op_subclass, 1826 &maintenance_attrbs, 1827 sizeof(maintenance_attrbs)); 1828 } 1829 1830 static int cxl_ppr_get_repair_type(struct device *dev, void *drv_data, 1831 const char **repair_type) 1832 { 1833 *repair_type = edac_repair_type[EDAC_REPAIR_PPR]; 1834 1835 return 0; 1836 } 1837 1838 static int cxl_ppr_get_persist_mode(struct device *dev, void *drv_data, 1839 bool *persist_mode) 1840 { 1841 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1842 1843 *persist_mode = cxl_ppr_ctx->persist_mode; 1844 1845 return 0; 1846 } 1847 1848 static int cxl_get_ppr_safe_when_in_use(struct device *dev, void *drv_data, 1849 bool *safe) 1850 { 1851 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1852 1853 *safe = cxl_ppr_ctx->media_accessible & cxl_ppr_ctx->data_retained; 1854 1855 return 0; 1856 } 1857 1858 static int cxl_ppr_get_min_dpa(struct device *dev, void *drv_data, u64 *min_dpa) 1859 { 1860 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1861 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1862 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1863 1864 *min_dpa = cxlds->dpa_res.start; 1865 1866 return 0; 1867 } 1868 1869 static int cxl_ppr_get_max_dpa(struct device *dev, void *drv_data, u64 *max_dpa) 1870 { 1871 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1872 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1873 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1874 1875 *max_dpa = cxlds->dpa_res.end; 1876 1877 return 0; 1878 } 1879 1880 static int cxl_ppr_get_dpa(struct device *dev, void *drv_data, u64 *dpa) 1881 { 1882 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1883 1884 *dpa = cxl_ppr_ctx->dpa; 1885 1886 return 0; 1887 } 1888 1889 static int cxl_ppr_set_dpa(struct device *dev, void *drv_data, u64 dpa) 1890 { 1891 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1892 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1893 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1894 1895 if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) 1896 return -EINVAL; 1897 1898 cxl_ppr_ctx->dpa = dpa; 1899 1900 return 0; 1901 } 1902 1903 static int cxl_ppr_get_nibble_mask(struct device *dev, void *drv_data, 1904 u32 *nibble_mask) 1905 { 1906 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1907 1908 *nibble_mask = cxl_ppr_ctx->nibble_mask; 1909 1910 return 0; 1911 } 1912 1913 static int cxl_ppr_set_nibble_mask(struct device *dev, void *drv_data, 1914 u32 nibble_mask) 1915 { 1916 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1917 1918 cxl_ppr_ctx->nibble_mask = nibble_mask; 1919 1920 return 0; 1921 } 1922 1923 static int cxl_do_ppr(struct device *dev, void *drv_data, u32 val) 1924 { 1925 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1926 1927 if (!cxl_ppr_ctx->dpa || val != EDAC_DO_MEM_REPAIR) 1928 return -EINVAL; 1929 1930 return cxl_mem_perform_ppr(cxl_ppr_ctx); 1931 } 1932 1933 static const struct edac_mem_repair_ops cxl_sppr_ops = { 1934 .get_repair_type = cxl_ppr_get_repair_type, 1935 .get_persist_mode = cxl_ppr_get_persist_mode, 1936 .get_repair_safe_when_in_use = cxl_get_ppr_safe_when_in_use, 1937 .get_min_dpa = cxl_ppr_get_min_dpa, 1938 .get_max_dpa = cxl_ppr_get_max_dpa, 1939 .get_dpa = cxl_ppr_get_dpa, 1940 .set_dpa = cxl_ppr_set_dpa, 1941 .get_nibble_mask = cxl_ppr_get_nibble_mask, 1942 .set_nibble_mask = cxl_ppr_set_nibble_mask, 1943 .do_repair = cxl_do_ppr, 1944 }; 1945 1946 static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd, 1947 struct edac_dev_feature *ras_feature, 1948 u8 repair_inst) 1949 { 1950 struct cxl_ppr_context *cxl_sppr_ctx; 1951 struct cxl_feat_entry *feat_entry; 1952 int ret; 1953 1954 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds), 1955 &CXL_FEAT_SPPR_UUID); 1956 if (IS_ERR(feat_entry)) 1957 return -EOPNOTSUPP; 1958 1959 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE)) 1960 return -EOPNOTSUPP; 1961 1962 cxl_sppr_ctx = 1963 devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sppr_ctx), GFP_KERNEL); 1964 if (!cxl_sppr_ctx) 1965 return -ENOMEM; 1966 1967 *cxl_sppr_ctx = (struct cxl_ppr_context){ 1968 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 1969 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 1970 .get_version = feat_entry->get_feat_ver, 1971 .set_version = feat_entry->set_feat_ver, 1972 .effects = le16_to_cpu(feat_entry->effects), 1973 .cxlmd = cxlmd, 1974 .repair_type = EDAC_REPAIR_PPR, 1975 .persist_mode = 0, 1976 .instance = repair_inst, 1977 }; 1978 uuid_copy(&cxl_sppr_ctx->repair_uuid, &CXL_FEAT_SPPR_UUID); 1979 1980 ret = cxl_mem_ppr_get_attrbs(cxl_sppr_ctx); 1981 if (ret) 1982 return ret; 1983 1984 ras_feature->ft_type = RAS_FEAT_MEM_REPAIR; 1985 ras_feature->instance = cxl_sppr_ctx->instance; 1986 ras_feature->mem_repair_ops = &cxl_sppr_ops; 1987 ras_feature->ctx = cxl_sppr_ctx; 1988 1989 return 0; 1990 } 1991 1992 int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd) 1993 { 1994 struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES]; 1995 int num_ras_features = 0; 1996 u8 repair_inst = 0; 1997 int rc; 1998 1999 if (IS_ENABLED(CONFIG_CXL_EDAC_SCRUB)) { 2000 rc = cxl_memdev_scrub_init(cxlmd, &ras_features[num_ras_features], 0); 2001 if (rc < 0 && rc != -EOPNOTSUPP) 2002 return rc; 2003 2004 if (rc != -EOPNOTSUPP) 2005 num_ras_features++; 2006 } 2007 2008 if (IS_ENABLED(CONFIG_CXL_EDAC_ECS)) { 2009 rc = cxl_memdev_ecs_init(cxlmd, &ras_features[num_ras_features]); 2010 if (rc < 0 && rc != -EOPNOTSUPP) 2011 return rc; 2012 2013 if (rc != -EOPNOTSUPP) 2014 num_ras_features++; 2015 } 2016 2017 if (IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR)) { 2018 for (int i = 0; i < CXL_MEM_SPARING_MAX; i++) { 2019 rc = cxl_memdev_sparing_init(cxlmd, 2020 &ras_features[num_ras_features], 2021 &mem_sparing_desc[i], repair_inst); 2022 if (rc == -EOPNOTSUPP) 2023 continue; 2024 if (rc < 0) 2025 return rc; 2026 2027 repair_inst++; 2028 num_ras_features++; 2029 } 2030 2031 rc = cxl_memdev_soft_ppr_init(cxlmd, &ras_features[num_ras_features], 2032 repair_inst); 2033 if (rc < 0 && rc != -EOPNOTSUPP) 2034 return rc; 2035 2036 if (rc != -EOPNOTSUPP) { 2037 repair_inst++; 2038 num_ras_features++; 2039 } 2040 2041 if (repair_inst) { 2042 struct cxl_mem_err_rec *array_rec = 2043 devm_kzalloc(&cxlmd->dev, sizeof(*array_rec), 2044 GFP_KERNEL); 2045 if (!array_rec) 2046 return -ENOMEM; 2047 2048 xa_init(&array_rec->rec_gen_media); 2049 xa_init(&array_rec->rec_dram); 2050 cxlmd->err_rec_array = array_rec; 2051 } 2052 } 2053 2054 if (!num_ras_features) 2055 return -EINVAL; 2056 2057 char *cxl_dev_name __free(kfree) = 2058 kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlmd->dev)); 2059 if (!cxl_dev_name) 2060 return -ENOMEM; 2061 2062 return edac_dev_register(&cxlmd->dev, cxl_dev_name, NULL, 2063 num_ras_features, ras_features); 2064 } 2065 EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_register, "CXL"); 2066 2067 int devm_cxl_region_edac_register(struct cxl_region *cxlr) 2068 { 2069 struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES]; 2070 int num_ras_features = 0; 2071 int rc; 2072 2073 if (!IS_ENABLED(CONFIG_CXL_EDAC_SCRUB)) 2074 return 0; 2075 2076 rc = cxl_region_scrub_init(cxlr, &ras_features[num_ras_features], 0); 2077 if (rc < 0) 2078 return rc; 2079 2080 num_ras_features++; 2081 2082 char *cxl_dev_name __free(kfree) = 2083 kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlr->dev)); 2084 if (!cxl_dev_name) 2085 return -ENOMEM; 2086 2087 return edac_dev_register(&cxlr->dev, cxl_dev_name, NULL, 2088 num_ras_features, ras_features); 2089 } 2090 EXPORT_SYMBOL_NS_GPL(devm_cxl_region_edac_register, "CXL"); 2091 2092 void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd) 2093 { 2094 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 2095 struct cxl_event_gen_media *rec_gen_media; 2096 struct cxl_event_dram *rec_dram; 2097 unsigned long index; 2098 2099 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec) 2100 return; 2101 2102 xa_for_each(&array_rec->rec_dram, index, rec_dram) 2103 kfree(rec_dram); 2104 xa_destroy(&array_rec->rec_dram); 2105 2106 xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media) 2107 kfree(rec_gen_media); 2108 xa_destroy(&array_rec->rec_gen_media); 2109 } 2110 EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_release, "CXL"); 2111