1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * CXL EDAC memory feature driver.
4 *
5 * Copyright (c) 2024-2025 HiSilicon Limited.
6 *
7 * - Supports functions to configure EDAC features of the
8 * CXL memory devices.
9 * - Registers with the EDAC device subsystem driver to expose
10 * the features sysfs attributes to the user for configuring
11 * CXL memory RAS feature.
12 */
13
14 #include <linux/cleanup.h>
15 #include <linux/edac.h>
16 #include <linux/limits.h>
17 #include <linux/unaligned.h>
18 #include <linux/xarray.h>
19 #include <cxl/features.h>
20 #include <cxl.h>
21 #include <cxlmem.h>
22 #include "core.h"
23 #include "trace.h"
24
25 #define CXL_NR_EDAC_DEV_FEATURES 7
26
27 #define CXL_SCRUB_NO_REGION -1
28
29 struct cxl_patrol_scrub_context {
30 u8 instance;
31 u16 get_feat_size;
32 u16 set_feat_size;
33 u8 get_version;
34 u8 set_version;
35 u16 effects;
36 struct cxl_memdev *cxlmd;
37 struct cxl_region *cxlr;
38 };
39
40 /*
41 * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-222 Device Patrol Scrub Control
42 * Feature Readable Attributes.
43 */
44 struct cxl_scrub_rd_attrbs {
45 u8 scrub_cycle_cap;
46 __le16 scrub_cycle_hours;
47 u8 scrub_flags;
48 } __packed;
49
50 /*
51 * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-223 Device Patrol Scrub Control
52 * Feature Writable Attributes.
53 */
54 struct cxl_scrub_wr_attrbs {
55 u8 scrub_cycle_hours;
56 u8 scrub_flags;
57 } __packed;
58
59 #define CXL_SCRUB_CONTROL_CHANGEABLE BIT(0)
60 #define CXL_SCRUB_CONTROL_REALTIME BIT(1)
61 #define CXL_SCRUB_CONTROL_CYCLE_MASK GENMASK(7, 0)
62 #define CXL_SCRUB_CONTROL_MIN_CYCLE_MASK GENMASK(15, 8)
63 #define CXL_SCRUB_CONTROL_ENABLE BIT(0)
64
65 #define CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap) \
66 FIELD_GET(CXL_SCRUB_CONTROL_CHANGEABLE, cap)
67 #define CXL_GET_SCRUB_CYCLE(cycle) \
68 FIELD_GET(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle)
69 #define CXL_GET_SCRUB_MIN_CYCLE(cycle) \
70 FIELD_GET(CXL_SCRUB_CONTROL_MIN_CYCLE_MASK, cycle)
71 #define CXL_GET_SCRUB_EN_STS(flags) FIELD_GET(CXL_SCRUB_CONTROL_ENABLE, flags)
72
73 #define CXL_SET_SCRUB_CYCLE(cycle) \
74 FIELD_PREP(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle)
75 #define CXL_SET_SCRUB_EN(en) FIELD_PREP(CXL_SCRUB_CONTROL_ENABLE, en)
76
cxl_mem_scrub_get_attrbs(struct cxl_mailbox * cxl_mbox,u8 * cap,u16 * cycle,u8 * flags,u8 * min_cycle)77 static int cxl_mem_scrub_get_attrbs(struct cxl_mailbox *cxl_mbox, u8 *cap,
78 u16 *cycle, u8 *flags, u8 *min_cycle)
79 {
80 size_t rd_data_size = sizeof(struct cxl_scrub_rd_attrbs);
81 size_t data_size;
82 struct cxl_scrub_rd_attrbs *rd_attrbs __free(kfree) =
83 kzalloc(rd_data_size, GFP_KERNEL);
84 if (!rd_attrbs)
85 return -ENOMEM;
86
87 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
88 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
89 rd_data_size, 0, NULL);
90 if (!data_size)
91 return -EIO;
92
93 *cap = rd_attrbs->scrub_cycle_cap;
94 *cycle = le16_to_cpu(rd_attrbs->scrub_cycle_hours);
95 *flags = rd_attrbs->scrub_flags;
96 if (min_cycle)
97 *min_cycle = CXL_GET_SCRUB_MIN_CYCLE(*cycle);
98
99 return 0;
100 }
101
cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context * cxl_ps_ctx,u8 * cap,u16 * cycle,u8 * flags,u8 * min_cycle)102 static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx,
103 u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle)
104 {
105 struct cxl_mailbox *cxl_mbox;
106 struct cxl_region_params *p;
107 struct cxl_memdev *cxlmd;
108 struct cxl_region *cxlr;
109 u8 min_scrub_cycle = 0;
110 int i, ret;
111
112 if (!cxl_ps_ctx->cxlr) {
113 cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox;
114 return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle,
115 flags, min_cycle);
116 }
117
118 struct rw_semaphore *region_lock __free(rwsem_read_release) =
119 rwsem_read_intr_acquire(&cxl_region_rwsem);
120 if (!region_lock)
121 return -EINTR;
122
123 cxlr = cxl_ps_ctx->cxlr;
124 p = &cxlr->params;
125
126 for (i = 0; i < p->nr_targets; i++) {
127 struct cxl_endpoint_decoder *cxled = p->targets[i];
128
129 cxlmd = cxled_to_memdev(cxled);
130 cxl_mbox = &cxlmd->cxlds->cxl_mbox;
131 ret = cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags,
132 min_cycle);
133 if (ret)
134 return ret;
135
136 /*
137 * The min_scrub_cycle of a region is the max of minimum scrub
138 * cycles supported by memdevs that back the region.
139 */
140 if (min_cycle)
141 min_scrub_cycle = max(*min_cycle, min_scrub_cycle);
142 }
143
144 if (min_cycle)
145 *min_cycle = min_scrub_cycle;
146
147 return 0;
148 }
149
cxl_scrub_set_attrbs_region(struct device * dev,struct cxl_patrol_scrub_context * cxl_ps_ctx,u8 cycle,u8 flags)150 static int cxl_scrub_set_attrbs_region(struct device *dev,
151 struct cxl_patrol_scrub_context *cxl_ps_ctx,
152 u8 cycle, u8 flags)
153 {
154 struct cxl_scrub_wr_attrbs wr_attrbs;
155 struct cxl_mailbox *cxl_mbox;
156 struct cxl_region_params *p;
157 struct cxl_memdev *cxlmd;
158 struct cxl_region *cxlr;
159 int ret, i;
160
161 struct rw_semaphore *region_lock __free(rwsem_read_release) =
162 rwsem_read_intr_acquire(&cxl_region_rwsem);
163 if (!region_lock)
164 return -EINTR;
165
166 cxlr = cxl_ps_ctx->cxlr;
167 p = &cxlr->params;
168 wr_attrbs.scrub_cycle_hours = cycle;
169 wr_attrbs.scrub_flags = flags;
170
171 for (i = 0; i < p->nr_targets; i++) {
172 struct cxl_endpoint_decoder *cxled = p->targets[i];
173
174 cxlmd = cxled_to_memdev(cxled);
175 cxl_mbox = &cxlmd->cxlds->cxl_mbox;
176 ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
177 cxl_ps_ctx->set_version, &wr_attrbs,
178 sizeof(wr_attrbs),
179 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET,
180 0, NULL);
181 if (ret)
182 return ret;
183
184 if (cycle != cxlmd->scrub_cycle) {
185 if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION)
186 dev_info(dev,
187 "Device scrub rate(%d hours) set by region%d rate overwritten by region%d scrub rate(%d hours)\n",
188 cxlmd->scrub_cycle,
189 cxlmd->scrub_region_id, cxlr->id,
190 cycle);
191
192 cxlmd->scrub_cycle = cycle;
193 cxlmd->scrub_region_id = cxlr->id;
194 }
195 }
196
197 return 0;
198 }
199
cxl_scrub_set_attrbs_device(struct device * dev,struct cxl_patrol_scrub_context * cxl_ps_ctx,u8 cycle,u8 flags)200 static int cxl_scrub_set_attrbs_device(struct device *dev,
201 struct cxl_patrol_scrub_context *cxl_ps_ctx,
202 u8 cycle, u8 flags)
203 {
204 struct cxl_scrub_wr_attrbs wr_attrbs;
205 struct cxl_mailbox *cxl_mbox;
206 struct cxl_memdev *cxlmd;
207 int ret;
208
209 wr_attrbs.scrub_cycle_hours = cycle;
210 wr_attrbs.scrub_flags = flags;
211
212 cxlmd = cxl_ps_ctx->cxlmd;
213 cxl_mbox = &cxlmd->cxlds->cxl_mbox;
214 ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
215 cxl_ps_ctx->set_version, &wr_attrbs,
216 sizeof(wr_attrbs),
217 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 0,
218 NULL);
219 if (ret)
220 return ret;
221
222 if (cycle != cxlmd->scrub_cycle) {
223 if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION)
224 dev_info(dev,
225 "Device scrub rate(%d hours) set by region%d rate overwritten with device local scrub rate(%d hours)\n",
226 cxlmd->scrub_cycle, cxlmd->scrub_region_id,
227 cycle);
228
229 cxlmd->scrub_cycle = cycle;
230 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
231 }
232
233 return 0;
234 }
235
cxl_scrub_set_attrbs(struct device * dev,struct cxl_patrol_scrub_context * cxl_ps_ctx,u8 cycle,u8 flags)236 static int cxl_scrub_set_attrbs(struct device *dev,
237 struct cxl_patrol_scrub_context *cxl_ps_ctx,
238 u8 cycle, u8 flags)
239 {
240 if (cxl_ps_ctx->cxlr)
241 return cxl_scrub_set_attrbs_region(dev, cxl_ps_ctx, cycle, flags);
242
243 return cxl_scrub_set_attrbs_device(dev, cxl_ps_ctx, cycle, flags);
244 }
245
cxl_patrol_scrub_get_enabled_bg(struct device * dev,void * drv_data,bool * enabled)246 static int cxl_patrol_scrub_get_enabled_bg(struct device *dev, void *drv_data,
247 bool *enabled)
248 {
249 struct cxl_patrol_scrub_context *ctx = drv_data;
250 u8 cap, flags;
251 u16 cycle;
252 int ret;
253
254 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL);
255 if (ret)
256 return ret;
257
258 *enabled = CXL_GET_SCRUB_EN_STS(flags);
259
260 return 0;
261 }
262
cxl_patrol_scrub_set_enabled_bg(struct device * dev,void * drv_data,bool enable)263 static int cxl_patrol_scrub_set_enabled_bg(struct device *dev, void *drv_data,
264 bool enable)
265 {
266 struct cxl_patrol_scrub_context *ctx = drv_data;
267 u8 cap, flags, wr_cycle;
268 u16 rd_cycle;
269 int ret;
270
271 if (!capable(CAP_SYS_RAWIO))
272 return -EPERM;
273
274 ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, NULL);
275 if (ret)
276 return ret;
277
278 wr_cycle = CXL_GET_SCRUB_CYCLE(rd_cycle);
279 flags = CXL_SET_SCRUB_EN(enable);
280
281 return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags);
282 }
283
cxl_patrol_scrub_get_min_scrub_cycle(struct device * dev,void * drv_data,u32 * min)284 static int cxl_patrol_scrub_get_min_scrub_cycle(struct device *dev,
285 void *drv_data, u32 *min)
286 {
287 struct cxl_patrol_scrub_context *ctx = drv_data;
288 u8 cap, flags, min_cycle;
289 u16 cycle;
290 int ret;
291
292 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, &min_cycle);
293 if (ret)
294 return ret;
295
296 *min = min_cycle * 3600;
297
298 return 0;
299 }
300
cxl_patrol_scrub_get_max_scrub_cycle(struct device * dev,void * drv_data,u32 * max)301 static int cxl_patrol_scrub_get_max_scrub_cycle(struct device *dev,
302 void *drv_data, u32 *max)
303 {
304 *max = U8_MAX * 3600; /* Max set by register size */
305
306 return 0;
307 }
308
cxl_patrol_scrub_get_scrub_cycle(struct device * dev,void * drv_data,u32 * scrub_cycle_secs)309 static int cxl_patrol_scrub_get_scrub_cycle(struct device *dev, void *drv_data,
310 u32 *scrub_cycle_secs)
311 {
312 struct cxl_patrol_scrub_context *ctx = drv_data;
313 u8 cap, flags;
314 u16 cycle;
315 int ret;
316
317 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL);
318 if (ret)
319 return ret;
320
321 *scrub_cycle_secs = CXL_GET_SCRUB_CYCLE(cycle) * 3600;
322
323 return 0;
324 }
325
cxl_patrol_scrub_set_scrub_cycle(struct device * dev,void * drv_data,u32 scrub_cycle_secs)326 static int cxl_patrol_scrub_set_scrub_cycle(struct device *dev, void *drv_data,
327 u32 scrub_cycle_secs)
328 {
329 struct cxl_patrol_scrub_context *ctx = drv_data;
330 u8 scrub_cycle_hours = scrub_cycle_secs / 3600;
331 u8 cap, wr_cycle, flags, min_cycle;
332 u16 rd_cycle;
333 int ret;
334
335 if (!capable(CAP_SYS_RAWIO))
336 return -EPERM;
337
338 ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, &min_cycle);
339 if (ret)
340 return ret;
341
342 if (!CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap))
343 return -EOPNOTSUPP;
344
345 if (scrub_cycle_hours < min_cycle) {
346 dev_dbg(dev, "Invalid CXL patrol scrub cycle(%d) to set\n",
347 scrub_cycle_hours);
348 dev_dbg(dev,
349 "Minimum supported CXL patrol scrub cycle in hour %d\n",
350 min_cycle);
351 return -EINVAL;
352 }
353 wr_cycle = CXL_SET_SCRUB_CYCLE(scrub_cycle_hours);
354
355 return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags);
356 }
357
358 static const struct edac_scrub_ops cxl_ps_scrub_ops = {
359 .get_enabled_bg = cxl_patrol_scrub_get_enabled_bg,
360 .set_enabled_bg = cxl_patrol_scrub_set_enabled_bg,
361 .get_min_cycle = cxl_patrol_scrub_get_min_scrub_cycle,
362 .get_max_cycle = cxl_patrol_scrub_get_max_scrub_cycle,
363 .get_cycle_duration = cxl_patrol_scrub_get_scrub_cycle,
364 .set_cycle_duration = cxl_patrol_scrub_set_scrub_cycle,
365 };
366
cxl_memdev_scrub_init(struct cxl_memdev * cxlmd,struct edac_dev_feature * ras_feature,u8 scrub_inst)367 static int cxl_memdev_scrub_init(struct cxl_memdev *cxlmd,
368 struct edac_dev_feature *ras_feature,
369 u8 scrub_inst)
370 {
371 struct cxl_patrol_scrub_context *cxl_ps_ctx;
372 struct cxl_feat_entry *feat_entry;
373 u8 cap, flags;
374 u16 cycle;
375 int rc;
376
377 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
378 &CXL_FEAT_PATROL_SCRUB_UUID);
379 if (IS_ERR(feat_entry))
380 return -EOPNOTSUPP;
381
382 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
383 return -EOPNOTSUPP;
384
385 cxl_ps_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL);
386 if (!cxl_ps_ctx)
387 return -ENOMEM;
388
389 *cxl_ps_ctx = (struct cxl_patrol_scrub_context){
390 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
391 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
392 .get_version = feat_entry->get_feat_ver,
393 .set_version = feat_entry->set_feat_ver,
394 .effects = le16_to_cpu(feat_entry->effects),
395 .instance = scrub_inst,
396 .cxlmd = cxlmd,
397 };
398
399 rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap, &cycle,
400 &flags, NULL);
401 if (rc)
402 return rc;
403
404 cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle);
405 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
406
407 ras_feature->ft_type = RAS_FEAT_SCRUB;
408 ras_feature->instance = cxl_ps_ctx->instance;
409 ras_feature->scrub_ops = &cxl_ps_scrub_ops;
410 ras_feature->ctx = cxl_ps_ctx;
411
412 return 0;
413 }
414
cxl_region_scrub_init(struct cxl_region * cxlr,struct edac_dev_feature * ras_feature,u8 scrub_inst)415 static int cxl_region_scrub_init(struct cxl_region *cxlr,
416 struct edac_dev_feature *ras_feature,
417 u8 scrub_inst)
418 {
419 struct cxl_patrol_scrub_context *cxl_ps_ctx;
420 struct cxl_region_params *p = &cxlr->params;
421 struct cxl_feat_entry *feat_entry = NULL;
422 struct cxl_memdev *cxlmd;
423 u8 cap, flags;
424 u16 cycle;
425 int i, rc;
426
427 /*
428 * The cxl_region_rwsem must be held if the code below is used in a context
429 * other than when the region is in the probe state, as shown here.
430 */
431 for (i = 0; i < p->nr_targets; i++) {
432 struct cxl_endpoint_decoder *cxled = p->targets[i];
433
434 cxlmd = cxled_to_memdev(cxled);
435 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
436 &CXL_FEAT_PATROL_SCRUB_UUID);
437 if (IS_ERR(feat_entry))
438 return -EOPNOTSUPP;
439
440 if (!(le32_to_cpu(feat_entry->flags) &
441 CXL_FEATURE_F_CHANGEABLE))
442 return -EOPNOTSUPP;
443
444 rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap,
445 &cycle, &flags, NULL);
446 if (rc)
447 return rc;
448
449 cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle);
450 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
451 }
452
453 cxl_ps_ctx = devm_kzalloc(&cxlr->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL);
454 if (!cxl_ps_ctx)
455 return -ENOMEM;
456
457 *cxl_ps_ctx = (struct cxl_patrol_scrub_context){
458 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
459 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
460 .get_version = feat_entry->get_feat_ver,
461 .set_version = feat_entry->set_feat_ver,
462 .effects = le16_to_cpu(feat_entry->effects),
463 .instance = scrub_inst,
464 .cxlr = cxlr,
465 };
466
467 ras_feature->ft_type = RAS_FEAT_SCRUB;
468 ras_feature->instance = cxl_ps_ctx->instance;
469 ras_feature->scrub_ops = &cxl_ps_scrub_ops;
470 ras_feature->ctx = cxl_ps_ctx;
471
472 return 0;
473 }
474
475 struct cxl_ecs_context {
476 u16 num_media_frus;
477 u16 get_feat_size;
478 u16 set_feat_size;
479 u8 get_version;
480 u8 set_version;
481 u16 effects;
482 struct cxl_memdev *cxlmd;
483 };
484
485 /*
486 * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-225 DDR5 ECS Control Feature
487 * Readable Attributes.
488 */
489 struct cxl_ecs_fru_rd_attrbs {
490 u8 ecs_cap;
491 __le16 ecs_config;
492 u8 ecs_flags;
493 } __packed;
494
495 struct cxl_ecs_rd_attrbs {
496 u8 ecs_log_cap;
497 struct cxl_ecs_fru_rd_attrbs fru_attrbs[];
498 } __packed;
499
500 /*
501 * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-226 DDR5 ECS Control Feature
502 * Writable Attributes.
503 */
504 struct cxl_ecs_fru_wr_attrbs {
505 __le16 ecs_config;
506 } __packed;
507
508 struct cxl_ecs_wr_attrbs {
509 u8 ecs_log_cap;
510 struct cxl_ecs_fru_wr_attrbs fru_attrbs[];
511 } __packed;
512
513 #define CXL_ECS_LOG_ENTRY_TYPE_MASK GENMASK(1, 0)
514 #define CXL_ECS_REALTIME_REPORT_CAP_MASK BIT(0)
515 #define CXL_ECS_THRESHOLD_COUNT_MASK GENMASK(2, 0)
516 #define CXL_ECS_COUNT_MODE_MASK BIT(3)
517 #define CXL_ECS_RESET_COUNTER_MASK BIT(4)
518 #define CXL_ECS_RESET_COUNTER 1
519
520 enum {
521 ECS_THRESHOLD_256 = 256,
522 ECS_THRESHOLD_1024 = 1024,
523 ECS_THRESHOLD_4096 = 4096,
524 };
525
526 enum {
527 ECS_THRESHOLD_IDX_256 = 3,
528 ECS_THRESHOLD_IDX_1024 = 4,
529 ECS_THRESHOLD_IDX_4096 = 5,
530 };
531
532 static const u16 ecs_supp_threshold[] = {
533 [ECS_THRESHOLD_IDX_256] = 256,
534 [ECS_THRESHOLD_IDX_1024] = 1024,
535 [ECS_THRESHOLD_IDX_4096] = 4096,
536 };
537
538 enum {
539 ECS_LOG_ENTRY_TYPE_DRAM = 0x0,
540 ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU = 0x1,
541 };
542
543 enum cxl_ecs_count_mode {
544 ECS_MODE_COUNTS_ROWS = 0,
545 ECS_MODE_COUNTS_CODEWORDS = 1,
546 };
547
cxl_mem_ecs_get_attrbs(struct device * dev,struct cxl_ecs_context * cxl_ecs_ctx,int fru_id,u8 * log_cap,u16 * config)548 static int cxl_mem_ecs_get_attrbs(struct device *dev,
549 struct cxl_ecs_context *cxl_ecs_ctx,
550 int fru_id, u8 *log_cap, u16 *config)
551 {
552 struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd;
553 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
554 struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs;
555 size_t rd_data_size;
556 size_t data_size;
557
558 rd_data_size = cxl_ecs_ctx->get_feat_size;
559
560 struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) =
561 kvzalloc(rd_data_size, GFP_KERNEL);
562 if (!rd_attrbs)
563 return -ENOMEM;
564
565 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
566 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
567 rd_data_size, 0, NULL);
568 if (!data_size)
569 return -EIO;
570
571 fru_rd_attrbs = rd_attrbs->fru_attrbs;
572 *log_cap = rd_attrbs->ecs_log_cap;
573 *config = le16_to_cpu(fru_rd_attrbs[fru_id].ecs_config);
574
575 return 0;
576 }
577
cxl_mem_ecs_set_attrbs(struct device * dev,struct cxl_ecs_context * cxl_ecs_ctx,int fru_id,u8 log_cap,u16 config)578 static int cxl_mem_ecs_set_attrbs(struct device *dev,
579 struct cxl_ecs_context *cxl_ecs_ctx,
580 int fru_id, u8 log_cap, u16 config)
581 {
582 struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd;
583 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
584 struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs;
585 struct cxl_ecs_fru_wr_attrbs *fru_wr_attrbs;
586 size_t rd_data_size, wr_data_size;
587 u16 num_media_frus, count;
588 size_t data_size;
589
590 num_media_frus = cxl_ecs_ctx->num_media_frus;
591 rd_data_size = cxl_ecs_ctx->get_feat_size;
592 wr_data_size = cxl_ecs_ctx->set_feat_size;
593 struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) =
594 kvzalloc(rd_data_size, GFP_KERNEL);
595 if (!rd_attrbs)
596 return -ENOMEM;
597
598 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
599 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
600 rd_data_size, 0, NULL);
601 if (!data_size)
602 return -EIO;
603
604 struct cxl_ecs_wr_attrbs *wr_attrbs __free(kvfree) =
605 kvzalloc(wr_data_size, GFP_KERNEL);
606 if (!wr_attrbs)
607 return -ENOMEM;
608
609 /*
610 * Fill writable attributes from the current attributes read
611 * for all the media FRUs.
612 */
613 fru_rd_attrbs = rd_attrbs->fru_attrbs;
614 fru_wr_attrbs = wr_attrbs->fru_attrbs;
615 wr_attrbs->ecs_log_cap = log_cap;
616 for (count = 0; count < num_media_frus; count++)
617 fru_wr_attrbs[count].ecs_config =
618 fru_rd_attrbs[count].ecs_config;
619
620 fru_wr_attrbs[fru_id].ecs_config = cpu_to_le16(config);
621
622 return cxl_set_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
623 cxl_ecs_ctx->set_version, wr_attrbs,
624 wr_data_size,
625 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET,
626 0, NULL);
627 }
628
cxl_get_ecs_log_entry_type(u8 log_cap,u16 config)629 static u8 cxl_get_ecs_log_entry_type(u8 log_cap, u16 config)
630 {
631 return FIELD_GET(CXL_ECS_LOG_ENTRY_TYPE_MASK, log_cap);
632 }
633
cxl_get_ecs_threshold(u8 log_cap,u16 config)634 static u16 cxl_get_ecs_threshold(u8 log_cap, u16 config)
635 {
636 u8 index = FIELD_GET(CXL_ECS_THRESHOLD_COUNT_MASK, config);
637
638 return ecs_supp_threshold[index];
639 }
640
cxl_get_ecs_count_mode(u8 log_cap,u16 config)641 static u8 cxl_get_ecs_count_mode(u8 log_cap, u16 config)
642 {
643 return FIELD_GET(CXL_ECS_COUNT_MODE_MASK, config);
644 }
645
646 #define CXL_ECS_GET_ATTR(attrb) \
647 static int cxl_ecs_get_##attrb(struct device *dev, void *drv_data, \
648 int fru_id, u32 *val) \
649 { \
650 struct cxl_ecs_context *ctx = drv_data; \
651 u8 log_cap; \
652 u16 config; \
653 int ret; \
654 \
655 ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \
656 &config); \
657 if (ret) \
658 return ret; \
659 \
660 *val = cxl_get_ecs_##attrb(log_cap, config); \
661 \
662 return 0; \
663 }
664
665 CXL_ECS_GET_ATTR(log_entry_type)
CXL_ECS_GET_ATTR(count_mode)666 CXL_ECS_GET_ATTR(count_mode)
667 CXL_ECS_GET_ATTR(threshold)
668
669 static int cxl_set_ecs_log_entry_type(struct device *dev, u8 *log_cap,
670 u16 *config, u32 val)
671 {
672 if (val != ECS_LOG_ENTRY_TYPE_DRAM &&
673 val != ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU)
674 return -EINVAL;
675
676 *log_cap = FIELD_PREP(CXL_ECS_LOG_ENTRY_TYPE_MASK, val);
677
678 return 0;
679 }
680
cxl_set_ecs_threshold(struct device * dev,u8 * log_cap,u16 * config,u32 val)681 static int cxl_set_ecs_threshold(struct device *dev, u8 *log_cap, u16 *config,
682 u32 val)
683 {
684 *config &= ~CXL_ECS_THRESHOLD_COUNT_MASK;
685
686 switch (val) {
687 case ECS_THRESHOLD_256:
688 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
689 ECS_THRESHOLD_IDX_256);
690 break;
691 case ECS_THRESHOLD_1024:
692 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
693 ECS_THRESHOLD_IDX_1024);
694 break;
695 case ECS_THRESHOLD_4096:
696 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
697 ECS_THRESHOLD_IDX_4096);
698 break;
699 default:
700 dev_dbg(dev, "Invalid CXL ECS threshold count(%d) to set\n",
701 val);
702 dev_dbg(dev, "Supported ECS threshold counts: %u, %u, %u\n",
703 ECS_THRESHOLD_256, ECS_THRESHOLD_1024,
704 ECS_THRESHOLD_4096);
705 return -EINVAL;
706 }
707
708 return 0;
709 }
710
cxl_set_ecs_count_mode(struct device * dev,u8 * log_cap,u16 * config,u32 val)711 static int cxl_set_ecs_count_mode(struct device *dev, u8 *log_cap, u16 *config,
712 u32 val)
713 {
714 if (val != ECS_MODE_COUNTS_ROWS && val != ECS_MODE_COUNTS_CODEWORDS) {
715 dev_dbg(dev, "Invalid CXL ECS scrub mode(%d) to set\n", val);
716 dev_dbg(dev,
717 "Supported ECS Modes: 0: ECS counts rows with errors,"
718 " 1: ECS counts codewords with errors\n");
719 return -EINVAL;
720 }
721
722 *config &= ~CXL_ECS_COUNT_MODE_MASK;
723 *config |= FIELD_PREP(CXL_ECS_COUNT_MODE_MASK, val);
724
725 return 0;
726 }
727
cxl_set_ecs_reset_counter(struct device * dev,u8 * log_cap,u16 * config,u32 val)728 static int cxl_set_ecs_reset_counter(struct device *dev, u8 *log_cap,
729 u16 *config, u32 val)
730 {
731 if (val != CXL_ECS_RESET_COUNTER)
732 return -EINVAL;
733
734 *config &= ~CXL_ECS_RESET_COUNTER_MASK;
735 *config |= FIELD_PREP(CXL_ECS_RESET_COUNTER_MASK, val);
736
737 return 0;
738 }
739
740 #define CXL_ECS_SET_ATTR(attrb) \
741 static int cxl_ecs_set_##attrb(struct device *dev, void *drv_data, \
742 int fru_id, u32 val) \
743 { \
744 struct cxl_ecs_context *ctx = drv_data; \
745 u8 log_cap; \
746 u16 config; \
747 int ret; \
748 \
749 if (!capable(CAP_SYS_RAWIO)) \
750 return -EPERM; \
751 \
752 ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \
753 &config); \
754 if (ret) \
755 return ret; \
756 \
757 ret = cxl_set_ecs_##attrb(dev, &log_cap, &config, val); \
758 if (ret) \
759 return ret; \
760 \
761 return cxl_mem_ecs_set_attrbs(dev, ctx, fru_id, log_cap, \
762 config); \
763 }
764 CXL_ECS_SET_ATTR(log_entry_type)
765 CXL_ECS_SET_ATTR(count_mode)
766 CXL_ECS_SET_ATTR(reset_counter)
767 CXL_ECS_SET_ATTR(threshold)
768
769 static const struct edac_ecs_ops cxl_ecs_ops = {
770 .get_log_entry_type = cxl_ecs_get_log_entry_type,
771 .set_log_entry_type = cxl_ecs_set_log_entry_type,
772 .get_mode = cxl_ecs_get_count_mode,
773 .set_mode = cxl_ecs_set_count_mode,
774 .reset = cxl_ecs_set_reset_counter,
775 .get_threshold = cxl_ecs_get_threshold,
776 .set_threshold = cxl_ecs_set_threshold,
777 };
778
cxl_memdev_ecs_init(struct cxl_memdev * cxlmd,struct edac_dev_feature * ras_feature)779 static int cxl_memdev_ecs_init(struct cxl_memdev *cxlmd,
780 struct edac_dev_feature *ras_feature)
781 {
782 struct cxl_ecs_context *cxl_ecs_ctx;
783 struct cxl_feat_entry *feat_entry;
784 int num_media_frus;
785
786 feat_entry =
787 cxl_feature_info(to_cxlfs(cxlmd->cxlds), &CXL_FEAT_ECS_UUID);
788 if (IS_ERR(feat_entry))
789 return -EOPNOTSUPP;
790
791 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
792 return -EOPNOTSUPP;
793
794 num_media_frus = (le16_to_cpu(feat_entry->get_feat_size) -
795 sizeof(struct cxl_ecs_rd_attrbs)) /
796 sizeof(struct cxl_ecs_fru_rd_attrbs);
797 if (!num_media_frus)
798 return -EOPNOTSUPP;
799
800 cxl_ecs_ctx =
801 devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ecs_ctx), GFP_KERNEL);
802 if (!cxl_ecs_ctx)
803 return -ENOMEM;
804
805 *cxl_ecs_ctx = (struct cxl_ecs_context){
806 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
807 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
808 .get_version = feat_entry->get_feat_ver,
809 .set_version = feat_entry->set_feat_ver,
810 .effects = le16_to_cpu(feat_entry->effects),
811 .num_media_frus = num_media_frus,
812 .cxlmd = cxlmd,
813 };
814
815 ras_feature->ft_type = RAS_FEAT_ECS;
816 ras_feature->ecs_ops = &cxl_ecs_ops;
817 ras_feature->ctx = cxl_ecs_ctx;
818 ras_feature->ecs_info.num_media_frus = num_media_frus;
819
820 return 0;
821 }
822
823 /*
824 * Perform Maintenance CXL 3.2 Spec 8.2.10.7.1
825 */
826
827 /*
828 * Perform Maintenance input payload
829 * CXL rev 3.2 section 8.2.10.7.1 Table 8-117
830 */
831 struct cxl_mbox_maintenance_hdr {
832 u8 op_class;
833 u8 op_subclass;
834 } __packed;
835
cxl_perform_maintenance(struct cxl_mailbox * cxl_mbox,u8 class,u8 subclass,void * data_in,size_t data_in_size)836 static int cxl_perform_maintenance(struct cxl_mailbox *cxl_mbox, u8 class,
837 u8 subclass, void *data_in,
838 size_t data_in_size)
839 {
840 struct cxl_memdev_maintenance_pi {
841 struct cxl_mbox_maintenance_hdr hdr;
842 u8 data[];
843 } __packed;
844 struct cxl_mbox_cmd mbox_cmd;
845 size_t hdr_size;
846
847 struct cxl_memdev_maintenance_pi *pi __free(kvfree) =
848 kvzalloc(cxl_mbox->payload_size, GFP_KERNEL);
849 if (!pi)
850 return -ENOMEM;
851
852 pi->hdr.op_class = class;
853 pi->hdr.op_subclass = subclass;
854 hdr_size = sizeof(pi->hdr);
855 /*
856 * Check minimum mbox payload size is available for
857 * the maintenance data transfer.
858 */
859 if (hdr_size + data_in_size > cxl_mbox->payload_size)
860 return -ENOMEM;
861
862 memcpy(pi->data, data_in, data_in_size);
863 mbox_cmd = (struct cxl_mbox_cmd){
864 .opcode = CXL_MBOX_OP_DO_MAINTENANCE,
865 .size_in = hdr_size + data_in_size,
866 .payload_in = pi,
867 };
868
869 return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
870 }
871
872 /*
873 * Support for finding a memory operation attributes
874 * are from the current boot or not.
875 */
876
877 struct cxl_mem_err_rec {
878 struct xarray rec_gen_media;
879 struct xarray rec_dram;
880 };
881
882 enum cxl_mem_repair_type {
883 CXL_PPR,
884 CXL_CACHELINE_SPARING,
885 CXL_ROW_SPARING,
886 CXL_BANK_SPARING,
887 CXL_RANK_SPARING,
888 CXL_REPAIR_MAX,
889 };
890
891 /**
892 * struct cxl_mem_repair_attrbs - CXL memory repair attributes
893 * @dpa: DPA of memory to repair
894 * @nibble_mask: nibble mask, identifies one or more nibbles on the memory bus
895 * @row: row of memory to repair
896 * @column: column of memory to repair
897 * @channel: channel of memory to repair
898 * @sub_channel: sub channel of memory to repair
899 * @rank: rank of memory to repair
900 * @bank_group: bank group of memory to repair
901 * @bank: bank of memory to repair
902 * @repair_type: repair type. For eg. PPR, memory sparing etc.
903 */
904 struct cxl_mem_repair_attrbs {
905 u64 dpa;
906 u32 nibble_mask;
907 u32 row;
908 u16 column;
909 u8 channel;
910 u8 sub_channel;
911 u8 rank;
912 u8 bank_group;
913 u8 bank;
914 enum cxl_mem_repair_type repair_type;
915 };
916
917 static struct cxl_event_gen_media *
cxl_find_rec_gen_media(struct cxl_memdev * cxlmd,struct cxl_mem_repair_attrbs * attrbs)918 cxl_find_rec_gen_media(struct cxl_memdev *cxlmd,
919 struct cxl_mem_repair_attrbs *attrbs)
920 {
921 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
922 struct cxl_event_gen_media *rec;
923
924 if (!array_rec)
925 return NULL;
926
927 rec = xa_load(&array_rec->rec_gen_media, attrbs->dpa);
928 if (!rec)
929 return NULL;
930
931 if (attrbs->repair_type == CXL_PPR)
932 return rec;
933
934 return NULL;
935 }
936
937 static struct cxl_event_dram *
cxl_find_rec_dram(struct cxl_memdev * cxlmd,struct cxl_mem_repair_attrbs * attrbs)938 cxl_find_rec_dram(struct cxl_memdev *cxlmd,
939 struct cxl_mem_repair_attrbs *attrbs)
940 {
941 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
942 struct cxl_event_dram *rec;
943 u16 validity_flags;
944
945 if (!array_rec)
946 return NULL;
947
948 rec = xa_load(&array_rec->rec_dram, attrbs->dpa);
949 if (!rec)
950 return NULL;
951
952 validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags);
953 if (!(validity_flags & CXL_DER_VALID_CHANNEL) ||
954 !(validity_flags & CXL_DER_VALID_RANK))
955 return NULL;
956
957 switch (attrbs->repair_type) {
958 case CXL_PPR:
959 if (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
960 get_unaligned_le24(rec->nibble_mask) == attrbs->nibble_mask)
961 return rec;
962 break;
963 case CXL_CACHELINE_SPARING:
964 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
965 !(validity_flags & CXL_DER_VALID_BANK) ||
966 !(validity_flags & CXL_DER_VALID_ROW) ||
967 !(validity_flags & CXL_DER_VALID_COLUMN))
968 return NULL;
969
970 if (rec->media_hdr.channel == attrbs->channel &&
971 rec->media_hdr.rank == attrbs->rank &&
972 rec->bank_group == attrbs->bank_group &&
973 rec->bank == attrbs->bank &&
974 get_unaligned_le24(rec->row) == attrbs->row &&
975 get_unaligned_le16(rec->column) == attrbs->column &&
976 (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
977 get_unaligned_le24(rec->nibble_mask) ==
978 attrbs->nibble_mask) &&
979 (!(validity_flags & CXL_DER_VALID_SUB_CHANNEL) ||
980 rec->sub_channel == attrbs->sub_channel))
981 return rec;
982 break;
983 case CXL_ROW_SPARING:
984 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
985 !(validity_flags & CXL_DER_VALID_BANK) ||
986 !(validity_flags & CXL_DER_VALID_ROW))
987 return NULL;
988
989 if (rec->media_hdr.channel == attrbs->channel &&
990 rec->media_hdr.rank == attrbs->rank &&
991 rec->bank_group == attrbs->bank_group &&
992 rec->bank == attrbs->bank &&
993 get_unaligned_le24(rec->row) == attrbs->row &&
994 (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
995 get_unaligned_le24(rec->nibble_mask) ==
996 attrbs->nibble_mask))
997 return rec;
998 break;
999 case CXL_BANK_SPARING:
1000 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
1001 !(validity_flags & CXL_DER_VALID_BANK))
1002 return NULL;
1003
1004 if (rec->media_hdr.channel == attrbs->channel &&
1005 rec->media_hdr.rank == attrbs->rank &&
1006 rec->bank_group == attrbs->bank_group &&
1007 rec->bank == attrbs->bank &&
1008 (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
1009 get_unaligned_le24(rec->nibble_mask) ==
1010 attrbs->nibble_mask))
1011 return rec;
1012 break;
1013 case CXL_RANK_SPARING:
1014 if (rec->media_hdr.channel == attrbs->channel &&
1015 rec->media_hdr.rank == attrbs->rank &&
1016 (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
1017 get_unaligned_le24(rec->nibble_mask) ==
1018 attrbs->nibble_mask))
1019 return rec;
1020 break;
1021 default:
1022 return NULL;
1023 }
1024
1025 return NULL;
1026 }
1027
1028 #define CXL_MAX_STORAGE_DAYS 10
1029 #define CXL_MAX_STORAGE_TIME_SECS (CXL_MAX_STORAGE_DAYS * 24 * 60 * 60)
1030
cxl_del_expired_gmedia_recs(struct xarray * rec_xarray,struct cxl_event_gen_media * cur_rec)1031 static void cxl_del_expired_gmedia_recs(struct xarray *rec_xarray,
1032 struct cxl_event_gen_media *cur_rec)
1033 {
1034 u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp);
1035 struct cxl_event_gen_media *rec;
1036 unsigned long index;
1037 u64 delta_ts_secs;
1038
1039 xa_for_each(rec_xarray, index, rec) {
1040 delta_ts_secs = (cur_ts -
1041 le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL;
1042 if (delta_ts_secs >= CXL_MAX_STORAGE_TIME_SECS) {
1043 xa_erase(rec_xarray, index);
1044 kfree(rec);
1045 }
1046 }
1047 }
1048
cxl_del_expired_dram_recs(struct xarray * rec_xarray,struct cxl_event_dram * cur_rec)1049 static void cxl_del_expired_dram_recs(struct xarray *rec_xarray,
1050 struct cxl_event_dram *cur_rec)
1051 {
1052 u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp);
1053 struct cxl_event_dram *rec;
1054 unsigned long index;
1055 u64 delta_secs;
1056
1057 xa_for_each(rec_xarray, index, rec) {
1058 delta_secs = (cur_ts -
1059 le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL;
1060 if (delta_secs >= CXL_MAX_STORAGE_TIME_SECS) {
1061 xa_erase(rec_xarray, index);
1062 kfree(rec);
1063 }
1064 }
1065 }
1066
1067 #define CXL_MAX_REC_STORAGE_COUNT 200
1068
cxl_del_overflow_old_recs(struct xarray * rec_xarray)1069 static void cxl_del_overflow_old_recs(struct xarray *rec_xarray)
1070 {
1071 void *err_rec;
1072 unsigned long index, count = 0;
1073
1074 xa_for_each(rec_xarray, index, err_rec)
1075 count++;
1076
1077 if (count <= CXL_MAX_REC_STORAGE_COUNT)
1078 return;
1079
1080 count -= CXL_MAX_REC_STORAGE_COUNT;
1081 xa_for_each(rec_xarray, index, err_rec) {
1082 xa_erase(rec_xarray, index);
1083 kfree(err_rec);
1084 count--;
1085 if (!count)
1086 break;
1087 }
1088 }
1089
cxl_store_rec_gen_media(struct cxl_memdev * cxlmd,union cxl_event * evt)1090 int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt)
1091 {
1092 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
1093 struct cxl_event_gen_media *rec;
1094 void *old_rec;
1095
1096 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
1097 return 0;
1098
1099 rec = kmemdup(&evt->gen_media, sizeof(*rec), GFP_KERNEL);
1100 if (!rec)
1101 return -ENOMEM;
1102
1103 old_rec = xa_store(&array_rec->rec_gen_media,
1104 le64_to_cpu(rec->media_hdr.phys_addr), rec,
1105 GFP_KERNEL);
1106 if (xa_is_err(old_rec)) {
1107 kfree(rec);
1108 return xa_err(old_rec);
1109 }
1110
1111 kfree(old_rec);
1112
1113 cxl_del_expired_gmedia_recs(&array_rec->rec_gen_media, rec);
1114 cxl_del_overflow_old_recs(&array_rec->rec_gen_media);
1115
1116 return 0;
1117 }
1118 EXPORT_SYMBOL_NS_GPL(cxl_store_rec_gen_media, "CXL");
1119
cxl_store_rec_dram(struct cxl_memdev * cxlmd,union cxl_event * evt)1120 int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt)
1121 {
1122 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
1123 struct cxl_event_dram *rec;
1124 void *old_rec;
1125
1126 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
1127 return 0;
1128
1129 rec = kmemdup(&evt->dram, sizeof(*rec), GFP_KERNEL);
1130 if (!rec)
1131 return -ENOMEM;
1132
1133 old_rec = xa_store(&array_rec->rec_dram,
1134 le64_to_cpu(rec->media_hdr.phys_addr), rec,
1135 GFP_KERNEL);
1136 if (xa_is_err(old_rec)) {
1137 kfree(rec);
1138 return xa_err(old_rec);
1139 }
1140
1141 kfree(old_rec);
1142
1143 cxl_del_expired_dram_recs(&array_rec->rec_dram, rec);
1144 cxl_del_overflow_old_recs(&array_rec->rec_dram);
1145
1146 return 0;
1147 }
1148 EXPORT_SYMBOL_NS_GPL(cxl_store_rec_dram, "CXL");
1149
cxl_is_memdev_memory_online(const struct cxl_memdev * cxlmd)1150 static bool cxl_is_memdev_memory_online(const struct cxl_memdev *cxlmd)
1151 {
1152 struct cxl_port *port = cxlmd->endpoint;
1153
1154 if (port && cxl_num_decoders_committed(port))
1155 return true;
1156
1157 return false;
1158 }
1159
1160 /*
1161 * CXL memory sparing control
1162 */
1163 enum cxl_mem_sparing_granularity {
1164 CXL_MEM_SPARING_CACHELINE,
1165 CXL_MEM_SPARING_ROW,
1166 CXL_MEM_SPARING_BANK,
1167 CXL_MEM_SPARING_RANK,
1168 CXL_MEM_SPARING_MAX
1169 };
1170
1171 struct cxl_mem_sparing_context {
1172 struct cxl_memdev *cxlmd;
1173 uuid_t repair_uuid;
1174 u16 get_feat_size;
1175 u16 set_feat_size;
1176 u16 effects;
1177 u8 instance;
1178 u8 get_version;
1179 u8 set_version;
1180 u8 op_class;
1181 u8 op_subclass;
1182 bool cap_safe_when_in_use;
1183 bool cap_hard_sparing;
1184 bool cap_soft_sparing;
1185 u8 channel;
1186 u8 rank;
1187 u8 bank_group;
1188 u32 nibble_mask;
1189 u64 dpa;
1190 u32 row;
1191 u16 column;
1192 u8 bank;
1193 u8 sub_channel;
1194 enum edac_mem_repair_type repair_type;
1195 bool persist_mode;
1196 };
1197
1198 #define CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK BIT(0)
1199 #define CXL_SPARING_RD_CAP_HARD_SPARING_MASK BIT(1)
1200 #define CXL_SPARING_RD_CAP_SOFT_SPARING_MASK BIT(2)
1201
1202 #define CXL_SPARING_WR_DEVICE_INITIATED_MASK BIT(0)
1203
1204 #define CXL_SPARING_QUERY_RESOURCE_FLAG BIT(0)
1205 #define CXL_SET_HARD_SPARING_FLAG BIT(1)
1206 #define CXL_SPARING_SUB_CHNL_VALID_FLAG BIT(2)
1207 #define CXL_SPARING_NIB_MASK_VALID_FLAG BIT(3)
1208
1209 #define CXL_GET_SPARING_SAFE_IN_USE(flags) \
1210 (FIELD_GET(CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK, \
1211 flags) ^ 1)
1212 #define CXL_GET_CAP_HARD_SPARING(flags) \
1213 FIELD_GET(CXL_SPARING_RD_CAP_HARD_SPARING_MASK, \
1214 flags)
1215 #define CXL_GET_CAP_SOFT_SPARING(flags) \
1216 FIELD_GET(CXL_SPARING_RD_CAP_SOFT_SPARING_MASK, \
1217 flags)
1218
1219 #define CXL_SET_SPARING_QUERY_RESOURCE(val) \
1220 FIELD_PREP(CXL_SPARING_QUERY_RESOURCE_FLAG, val)
1221 #define CXL_SET_HARD_SPARING(val) \
1222 FIELD_PREP(CXL_SET_HARD_SPARING_FLAG, val)
1223 #define CXL_SET_SPARING_SUB_CHNL_VALID(val) \
1224 FIELD_PREP(CXL_SPARING_SUB_CHNL_VALID_FLAG, val)
1225 #define CXL_SET_SPARING_NIB_MASK_VALID(val) \
1226 FIELD_PREP(CXL_SPARING_NIB_MASK_VALID_FLAG, val)
1227
1228 /*
1229 * See CXL spec rev 3.2 @8.2.10.7.2.3 Table 8-134 Memory Sparing Feature
1230 * Readable Attributes.
1231 */
1232 struct cxl_memdev_repair_rd_attrbs_hdr {
1233 u8 max_op_latency;
1234 __le16 op_cap;
1235 __le16 op_mode;
1236 u8 op_class;
1237 u8 op_subclass;
1238 u8 rsvd[9];
1239 } __packed;
1240
1241 struct cxl_memdev_sparing_rd_attrbs {
1242 struct cxl_memdev_repair_rd_attrbs_hdr hdr;
1243 u8 rsvd;
1244 __le16 restriction_flags;
1245 } __packed;
1246
1247 /*
1248 * See CXL spec rev 3.2 @8.2.10.7.1.4 Table 8-120 Memory Sparing Input Payload.
1249 */
1250 struct cxl_memdev_sparing_in_payload {
1251 u8 flags;
1252 u8 channel;
1253 u8 rank;
1254 u8 nibble_mask[3];
1255 u8 bank_group;
1256 u8 bank;
1257 u8 row[3];
1258 __le16 column;
1259 u8 sub_channel;
1260 } __packed;
1261
1262 static int
cxl_mem_sparing_get_attrbs(struct cxl_mem_sparing_context * cxl_sparing_ctx)1263 cxl_mem_sparing_get_attrbs(struct cxl_mem_sparing_context *cxl_sparing_ctx)
1264 {
1265 size_t rd_data_size = sizeof(struct cxl_memdev_sparing_rd_attrbs);
1266 struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd;
1267 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
1268 u16 restriction_flags;
1269 size_t data_size;
1270 u16 return_code;
1271 struct cxl_memdev_sparing_rd_attrbs *rd_attrbs __free(kfree) =
1272 kzalloc(rd_data_size, GFP_KERNEL);
1273 if (!rd_attrbs)
1274 return -ENOMEM;
1275
1276 data_size = cxl_get_feature(cxl_mbox, &cxl_sparing_ctx->repair_uuid,
1277 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
1278 rd_data_size, 0, &return_code);
1279 if (!data_size)
1280 return -EIO;
1281
1282 cxl_sparing_ctx->op_class = rd_attrbs->hdr.op_class;
1283 cxl_sparing_ctx->op_subclass = rd_attrbs->hdr.op_subclass;
1284 restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags);
1285 cxl_sparing_ctx->cap_safe_when_in_use =
1286 CXL_GET_SPARING_SAFE_IN_USE(restriction_flags);
1287 cxl_sparing_ctx->cap_hard_sparing =
1288 CXL_GET_CAP_HARD_SPARING(restriction_flags);
1289 cxl_sparing_ctx->cap_soft_sparing =
1290 CXL_GET_CAP_SOFT_SPARING(restriction_flags);
1291
1292 return 0;
1293 }
1294
1295 static struct cxl_event_dram *
cxl_mem_get_rec_dram(struct cxl_memdev * cxlmd,struct cxl_mem_sparing_context * ctx)1296 cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd,
1297 struct cxl_mem_sparing_context *ctx)
1298 {
1299 struct cxl_mem_repair_attrbs attrbs = { 0 };
1300
1301 attrbs.dpa = ctx->dpa;
1302 attrbs.channel = ctx->channel;
1303 attrbs.rank = ctx->rank;
1304 attrbs.nibble_mask = ctx->nibble_mask;
1305 switch (ctx->repair_type) {
1306 case EDAC_REPAIR_CACHELINE_SPARING:
1307 attrbs.repair_type = CXL_CACHELINE_SPARING;
1308 attrbs.bank_group = ctx->bank_group;
1309 attrbs.bank = ctx->bank;
1310 attrbs.row = ctx->row;
1311 attrbs.column = ctx->column;
1312 attrbs.sub_channel = ctx->sub_channel;
1313 break;
1314 case EDAC_REPAIR_ROW_SPARING:
1315 attrbs.repair_type = CXL_ROW_SPARING;
1316 attrbs.bank_group = ctx->bank_group;
1317 attrbs.bank = ctx->bank;
1318 attrbs.row = ctx->row;
1319 break;
1320 case EDAC_REPAIR_BANK_SPARING:
1321 attrbs.repair_type = CXL_BANK_SPARING;
1322 attrbs.bank_group = ctx->bank_group;
1323 attrbs.bank = ctx->bank;
1324 break;
1325 case EDAC_REPAIR_RANK_SPARING:
1326 attrbs.repair_type = CXL_RANK_SPARING;
1327 break;
1328 default:
1329 return NULL;
1330 }
1331
1332 return cxl_find_rec_dram(cxlmd, &attrbs);
1333 }
1334
1335 static int
cxl_mem_perform_sparing(struct device * dev,struct cxl_mem_sparing_context * cxl_sparing_ctx)1336 cxl_mem_perform_sparing(struct device *dev,
1337 struct cxl_mem_sparing_context *cxl_sparing_ctx)
1338 {
1339 struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd;
1340 struct cxl_memdev_sparing_in_payload sparing_pi;
1341 struct cxl_event_dram *rec = NULL;
1342 u16 validity_flags = 0;
1343
1344 struct rw_semaphore *region_lock __free(rwsem_read_release) =
1345 rwsem_read_intr_acquire(&cxl_region_rwsem);
1346 if (!region_lock)
1347 return -EINTR;
1348
1349 struct rw_semaphore *dpa_lock __free(rwsem_read_release) =
1350 rwsem_read_intr_acquire(&cxl_dpa_rwsem);
1351 if (!dpa_lock)
1352 return -EINTR;
1353
1354 if (!cxl_sparing_ctx->cap_safe_when_in_use) {
1355 /* Memory to repair must be offline */
1356 if (cxl_is_memdev_memory_online(cxlmd))
1357 return -EBUSY;
1358 } else {
1359 if (cxl_is_memdev_memory_online(cxlmd)) {
1360 rec = cxl_mem_get_rec_dram(cxlmd, cxl_sparing_ctx);
1361 if (!rec)
1362 return -EINVAL;
1363
1364 if (!get_unaligned_le16(rec->media_hdr.validity_flags))
1365 return -EINVAL;
1366 }
1367 }
1368
1369 memset(&sparing_pi, 0, sizeof(sparing_pi));
1370 sparing_pi.flags = CXL_SET_SPARING_QUERY_RESOURCE(0);
1371 if (cxl_sparing_ctx->persist_mode)
1372 sparing_pi.flags |= CXL_SET_HARD_SPARING(1);
1373
1374 if (rec)
1375 validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags);
1376
1377 switch (cxl_sparing_ctx->repair_type) {
1378 case EDAC_REPAIR_CACHELINE_SPARING:
1379 sparing_pi.column = cpu_to_le16(cxl_sparing_ctx->column);
1380 if (!rec || (validity_flags & CXL_DER_VALID_SUB_CHANNEL)) {
1381 sparing_pi.flags |= CXL_SET_SPARING_SUB_CHNL_VALID(1);
1382 sparing_pi.sub_channel = cxl_sparing_ctx->sub_channel;
1383 }
1384 fallthrough;
1385 case EDAC_REPAIR_ROW_SPARING:
1386 put_unaligned_le24(cxl_sparing_ctx->row, sparing_pi.row);
1387 fallthrough;
1388 case EDAC_REPAIR_BANK_SPARING:
1389 sparing_pi.bank_group = cxl_sparing_ctx->bank_group;
1390 sparing_pi.bank = cxl_sparing_ctx->bank;
1391 fallthrough;
1392 case EDAC_REPAIR_RANK_SPARING:
1393 sparing_pi.rank = cxl_sparing_ctx->rank;
1394 fallthrough;
1395 default:
1396 sparing_pi.channel = cxl_sparing_ctx->channel;
1397 if ((rec && (validity_flags & CXL_DER_VALID_NIBBLE)) ||
1398 (!rec && (!cxl_sparing_ctx->nibble_mask ||
1399 (cxl_sparing_ctx->nibble_mask & 0xFFFFFF)))) {
1400 sparing_pi.flags |= CXL_SET_SPARING_NIB_MASK_VALID(1);
1401 put_unaligned_le24(cxl_sparing_ctx->nibble_mask,
1402 sparing_pi.nibble_mask);
1403 }
1404 break;
1405 }
1406
1407 return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox,
1408 cxl_sparing_ctx->op_class,
1409 cxl_sparing_ctx->op_subclass,
1410 &sparing_pi, sizeof(sparing_pi));
1411 }
1412
cxl_mem_sparing_get_repair_type(struct device * dev,void * drv_data,const char ** repair_type)1413 static int cxl_mem_sparing_get_repair_type(struct device *dev, void *drv_data,
1414 const char **repair_type)
1415 {
1416 struct cxl_mem_sparing_context *ctx = drv_data;
1417
1418 switch (ctx->repair_type) {
1419 case EDAC_REPAIR_CACHELINE_SPARING:
1420 case EDAC_REPAIR_ROW_SPARING:
1421 case EDAC_REPAIR_BANK_SPARING:
1422 case EDAC_REPAIR_RANK_SPARING:
1423 *repair_type = edac_repair_type[ctx->repair_type];
1424 break;
1425 default:
1426 return -EINVAL;
1427 }
1428
1429 return 0;
1430 }
1431
1432 #define CXL_SPARING_GET_ATTR(attrb, data_type) \
1433 static int cxl_mem_sparing_get_##attrb( \
1434 struct device *dev, void *drv_data, data_type *val) \
1435 { \
1436 struct cxl_mem_sparing_context *ctx = drv_data; \
1437 \
1438 *val = ctx->attrb; \
1439 \
1440 return 0; \
1441 }
CXL_SPARING_GET_ATTR(persist_mode,bool)1442 CXL_SPARING_GET_ATTR(persist_mode, bool)
1443 CXL_SPARING_GET_ATTR(dpa, u64)
1444 CXL_SPARING_GET_ATTR(nibble_mask, u32)
1445 CXL_SPARING_GET_ATTR(bank_group, u32)
1446 CXL_SPARING_GET_ATTR(bank, u32)
1447 CXL_SPARING_GET_ATTR(rank, u32)
1448 CXL_SPARING_GET_ATTR(row, u32)
1449 CXL_SPARING_GET_ATTR(column, u32)
1450 CXL_SPARING_GET_ATTR(channel, u32)
1451 CXL_SPARING_GET_ATTR(sub_channel, u32)
1452
1453 #define CXL_SPARING_SET_ATTR(attrb, data_type) \
1454 static int cxl_mem_sparing_set_##attrb(struct device *dev, \
1455 void *drv_data, data_type val) \
1456 { \
1457 struct cxl_mem_sparing_context *ctx = drv_data; \
1458 \
1459 ctx->attrb = val; \
1460 \
1461 return 0; \
1462 }
1463 CXL_SPARING_SET_ATTR(nibble_mask, u32)
1464 CXL_SPARING_SET_ATTR(bank_group, u32)
1465 CXL_SPARING_SET_ATTR(bank, u32)
1466 CXL_SPARING_SET_ATTR(rank, u32)
1467 CXL_SPARING_SET_ATTR(row, u32)
1468 CXL_SPARING_SET_ATTR(column, u32)
1469 CXL_SPARING_SET_ATTR(channel, u32)
1470 CXL_SPARING_SET_ATTR(sub_channel, u32)
1471
1472 static int cxl_mem_sparing_set_persist_mode(struct device *dev, void *drv_data,
1473 bool persist_mode)
1474 {
1475 struct cxl_mem_sparing_context *ctx = drv_data;
1476
1477 if ((persist_mode && ctx->cap_hard_sparing) ||
1478 (!persist_mode && ctx->cap_soft_sparing))
1479 ctx->persist_mode = persist_mode;
1480 else
1481 return -EOPNOTSUPP;
1482
1483 return 0;
1484 }
1485
cxl_get_mem_sparing_safe_when_in_use(struct device * dev,void * drv_data,bool * safe)1486 static int cxl_get_mem_sparing_safe_when_in_use(struct device *dev,
1487 void *drv_data, bool *safe)
1488 {
1489 struct cxl_mem_sparing_context *ctx = drv_data;
1490
1491 *safe = ctx->cap_safe_when_in_use;
1492
1493 return 0;
1494 }
1495
cxl_mem_sparing_get_min_dpa(struct device * dev,void * drv_data,u64 * min_dpa)1496 static int cxl_mem_sparing_get_min_dpa(struct device *dev, void *drv_data,
1497 u64 *min_dpa)
1498 {
1499 struct cxl_mem_sparing_context *ctx = drv_data;
1500 struct cxl_memdev *cxlmd = ctx->cxlmd;
1501 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1502
1503 *min_dpa = cxlds->dpa_res.start;
1504
1505 return 0;
1506 }
1507
cxl_mem_sparing_get_max_dpa(struct device * dev,void * drv_data,u64 * max_dpa)1508 static int cxl_mem_sparing_get_max_dpa(struct device *dev, void *drv_data,
1509 u64 *max_dpa)
1510 {
1511 struct cxl_mem_sparing_context *ctx = drv_data;
1512 struct cxl_memdev *cxlmd = ctx->cxlmd;
1513 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1514
1515 *max_dpa = cxlds->dpa_res.end;
1516
1517 return 0;
1518 }
1519
cxl_mem_sparing_set_dpa(struct device * dev,void * drv_data,u64 dpa)1520 static int cxl_mem_sparing_set_dpa(struct device *dev, void *drv_data, u64 dpa)
1521 {
1522 struct cxl_mem_sparing_context *ctx = drv_data;
1523 struct cxl_memdev *cxlmd = ctx->cxlmd;
1524 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1525
1526 if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end)
1527 return -EINVAL;
1528
1529 ctx->dpa = dpa;
1530
1531 return 0;
1532 }
1533
cxl_do_mem_sparing(struct device * dev,void * drv_data,u32 val)1534 static int cxl_do_mem_sparing(struct device *dev, void *drv_data, u32 val)
1535 {
1536 struct cxl_mem_sparing_context *ctx = drv_data;
1537
1538 if (val != EDAC_DO_MEM_REPAIR)
1539 return -EINVAL;
1540
1541 return cxl_mem_perform_sparing(dev, ctx);
1542 }
1543
1544 #define RANK_OPS \
1545 .get_repair_type = cxl_mem_sparing_get_repair_type, \
1546 .get_persist_mode = cxl_mem_sparing_get_persist_mode, \
1547 .set_persist_mode = cxl_mem_sparing_set_persist_mode, \
1548 .get_repair_safe_when_in_use = cxl_get_mem_sparing_safe_when_in_use, \
1549 .get_min_dpa = cxl_mem_sparing_get_min_dpa, \
1550 .get_max_dpa = cxl_mem_sparing_get_max_dpa, \
1551 .get_dpa = cxl_mem_sparing_get_dpa, \
1552 .set_dpa = cxl_mem_sparing_set_dpa, \
1553 .get_nibble_mask = cxl_mem_sparing_get_nibble_mask, \
1554 .set_nibble_mask = cxl_mem_sparing_set_nibble_mask, \
1555 .get_rank = cxl_mem_sparing_get_rank, \
1556 .set_rank = cxl_mem_sparing_set_rank, \
1557 .get_channel = cxl_mem_sparing_get_channel, \
1558 .set_channel = cxl_mem_sparing_set_channel, \
1559 .do_repair = cxl_do_mem_sparing
1560
1561 #define BANK_OPS \
1562 RANK_OPS, .get_bank_group = cxl_mem_sparing_get_bank_group, \
1563 .set_bank_group = cxl_mem_sparing_set_bank_group, \
1564 .get_bank = cxl_mem_sparing_get_bank, \
1565 .set_bank = cxl_mem_sparing_set_bank
1566
1567 #define ROW_OPS \
1568 BANK_OPS, .get_row = cxl_mem_sparing_get_row, \
1569 .set_row = cxl_mem_sparing_set_row
1570
1571 #define CACHELINE_OPS \
1572 ROW_OPS, .get_column = cxl_mem_sparing_get_column, \
1573 .set_column = cxl_mem_sparing_set_column, \
1574 .get_sub_channel = cxl_mem_sparing_get_sub_channel, \
1575 .set_sub_channel = cxl_mem_sparing_set_sub_channel
1576
1577 static const struct edac_mem_repair_ops cxl_rank_sparing_ops = {
1578 RANK_OPS,
1579 };
1580
1581 static const struct edac_mem_repair_ops cxl_bank_sparing_ops = {
1582 BANK_OPS,
1583 };
1584
1585 static const struct edac_mem_repair_ops cxl_row_sparing_ops = {
1586 ROW_OPS,
1587 };
1588
1589 static const struct edac_mem_repair_ops cxl_cacheline_sparing_ops = {
1590 CACHELINE_OPS,
1591 };
1592
1593 struct cxl_mem_sparing_desc {
1594 const uuid_t repair_uuid;
1595 enum edac_mem_repair_type repair_type;
1596 const struct edac_mem_repair_ops *repair_ops;
1597 };
1598
1599 static const struct cxl_mem_sparing_desc mem_sparing_desc[] = {
1600 {
1601 .repair_uuid = CXL_FEAT_CACHELINE_SPARING_UUID,
1602 .repair_type = EDAC_REPAIR_CACHELINE_SPARING,
1603 .repair_ops = &cxl_cacheline_sparing_ops,
1604 },
1605 {
1606 .repair_uuid = CXL_FEAT_ROW_SPARING_UUID,
1607 .repair_type = EDAC_REPAIR_ROW_SPARING,
1608 .repair_ops = &cxl_row_sparing_ops,
1609 },
1610 {
1611 .repair_uuid = CXL_FEAT_BANK_SPARING_UUID,
1612 .repair_type = EDAC_REPAIR_BANK_SPARING,
1613 .repair_ops = &cxl_bank_sparing_ops,
1614 },
1615 {
1616 .repair_uuid = CXL_FEAT_RANK_SPARING_UUID,
1617 .repair_type = EDAC_REPAIR_RANK_SPARING,
1618 .repair_ops = &cxl_rank_sparing_ops,
1619 },
1620 };
1621
cxl_memdev_sparing_init(struct cxl_memdev * cxlmd,struct edac_dev_feature * ras_feature,const struct cxl_mem_sparing_desc * desc,u8 repair_inst)1622 static int cxl_memdev_sparing_init(struct cxl_memdev *cxlmd,
1623 struct edac_dev_feature *ras_feature,
1624 const struct cxl_mem_sparing_desc *desc,
1625 u8 repair_inst)
1626 {
1627 struct cxl_mem_sparing_context *cxl_sparing_ctx;
1628 struct cxl_feat_entry *feat_entry;
1629 int ret;
1630
1631 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
1632 &desc->repair_uuid);
1633 if (IS_ERR(feat_entry))
1634 return -EOPNOTSUPP;
1635
1636 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
1637 return -EOPNOTSUPP;
1638
1639 cxl_sparing_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sparing_ctx),
1640 GFP_KERNEL);
1641 if (!cxl_sparing_ctx)
1642 return -ENOMEM;
1643
1644 *cxl_sparing_ctx = (struct cxl_mem_sparing_context){
1645 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
1646 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
1647 .get_version = feat_entry->get_feat_ver,
1648 .set_version = feat_entry->set_feat_ver,
1649 .effects = le16_to_cpu(feat_entry->effects),
1650 .cxlmd = cxlmd,
1651 .repair_type = desc->repair_type,
1652 .instance = repair_inst++,
1653 };
1654 uuid_copy(&cxl_sparing_ctx->repair_uuid, &desc->repair_uuid);
1655
1656 ret = cxl_mem_sparing_get_attrbs(cxl_sparing_ctx);
1657 if (ret)
1658 return ret;
1659
1660 if ((cxl_sparing_ctx->cap_soft_sparing &&
1661 cxl_sparing_ctx->cap_hard_sparing) ||
1662 cxl_sparing_ctx->cap_soft_sparing)
1663 cxl_sparing_ctx->persist_mode = 0;
1664 else if (cxl_sparing_ctx->cap_hard_sparing)
1665 cxl_sparing_ctx->persist_mode = 1;
1666 else
1667 return -EOPNOTSUPP;
1668
1669 ras_feature->ft_type = RAS_FEAT_MEM_REPAIR;
1670 ras_feature->instance = cxl_sparing_ctx->instance;
1671 ras_feature->mem_repair_ops = desc->repair_ops;
1672 ras_feature->ctx = cxl_sparing_ctx;
1673
1674 return 0;
1675 }
1676
1677 /*
1678 * CXL memory soft PPR & hard PPR control
1679 */
1680 struct cxl_ppr_context {
1681 uuid_t repair_uuid;
1682 u8 instance;
1683 u16 get_feat_size;
1684 u16 set_feat_size;
1685 u8 get_version;
1686 u8 set_version;
1687 u16 effects;
1688 u8 op_class;
1689 u8 op_subclass;
1690 bool cap_dpa;
1691 bool cap_nib_mask;
1692 bool media_accessible;
1693 bool data_retained;
1694 struct cxl_memdev *cxlmd;
1695 enum edac_mem_repair_type repair_type;
1696 bool persist_mode;
1697 u64 dpa;
1698 u32 nibble_mask;
1699 };
1700
1701 /*
1702 * See CXL rev 3.2 @8.2.10.7.2.1 Table 8-128 sPPR Feature Readable Attributes
1703 *
1704 * See CXL rev 3.2 @8.2.10.7.2.2 Table 8-131 hPPR Feature Readable Attributes
1705 */
1706
1707 #define CXL_PPR_OP_CAP_DEVICE_INITIATED BIT(0)
1708 #define CXL_PPR_OP_MODE_DEV_INITIATED BIT(0)
1709
1710 #define CXL_PPR_FLAG_DPA_SUPPORT_MASK BIT(0)
1711 #define CXL_PPR_FLAG_NIB_SUPPORT_MASK BIT(1)
1712 #define CXL_PPR_FLAG_MEM_SPARING_EV_REC_SUPPORT_MASK BIT(2)
1713 #define CXL_PPR_FLAG_DEV_INITED_PPR_AT_BOOT_CAP_MASK BIT(3)
1714
1715 #define CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK BIT(0)
1716 #define CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK BIT(2)
1717
1718 #define CXL_PPR_SPARING_EV_REC_EN_MASK BIT(0)
1719 #define CXL_PPR_DEV_INITED_PPR_AT_BOOT_EN_MASK BIT(1)
1720
1721 #define CXL_PPR_GET_CAP_DPA(flags) \
1722 FIELD_GET(CXL_PPR_FLAG_DPA_SUPPORT_MASK, flags)
1723 #define CXL_PPR_GET_CAP_NIB_MASK(flags) \
1724 FIELD_GET(CXL_PPR_FLAG_NIB_SUPPORT_MASK, flags)
1725 #define CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags) \
1726 (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK, \
1727 restriction_flags) ^ 1)
1728 #define CXL_PPR_GET_DATA_RETAINED(restriction_flags) \
1729 (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK, \
1730 restriction_flags) ^ 1)
1731
1732 struct cxl_memdev_ppr_rd_attrbs {
1733 struct cxl_memdev_repair_rd_attrbs_hdr hdr;
1734 u8 ppr_flags;
1735 __le16 restriction_flags;
1736 u8 ppr_op_mode;
1737 } __packed;
1738
1739 /*
1740 * See CXL rev 3.2 @8.2.10.7.1.2 Table 8-118 sPPR Maintenance Input Payload
1741 *
1742 * See CXL rev 3.2 @8.2.10.7.1.3 Table 8-119 hPPR Maintenance Input Payload
1743 */
1744 struct cxl_memdev_ppr_maintenance_attrbs {
1745 u8 flags;
1746 __le64 dpa;
1747 u8 nibble_mask[3];
1748 } __packed;
1749
cxl_mem_ppr_get_attrbs(struct cxl_ppr_context * cxl_ppr_ctx)1750 static int cxl_mem_ppr_get_attrbs(struct cxl_ppr_context *cxl_ppr_ctx)
1751 {
1752 size_t rd_data_size = sizeof(struct cxl_memdev_ppr_rd_attrbs);
1753 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1754 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
1755 u16 restriction_flags;
1756 size_t data_size;
1757 u16 return_code;
1758
1759 struct cxl_memdev_ppr_rd_attrbs *rd_attrbs __free(kfree) =
1760 kmalloc(rd_data_size, GFP_KERNEL);
1761 if (!rd_attrbs)
1762 return -ENOMEM;
1763
1764 data_size = cxl_get_feature(cxl_mbox, &cxl_ppr_ctx->repair_uuid,
1765 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
1766 rd_data_size, 0, &return_code);
1767 if (!data_size)
1768 return -EIO;
1769
1770 cxl_ppr_ctx->op_class = rd_attrbs->hdr.op_class;
1771 cxl_ppr_ctx->op_subclass = rd_attrbs->hdr.op_subclass;
1772 cxl_ppr_ctx->cap_dpa = CXL_PPR_GET_CAP_DPA(rd_attrbs->ppr_flags);
1773 cxl_ppr_ctx->cap_nib_mask =
1774 CXL_PPR_GET_CAP_NIB_MASK(rd_attrbs->ppr_flags);
1775
1776 restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags);
1777 cxl_ppr_ctx->media_accessible =
1778 CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags);
1779 cxl_ppr_ctx->data_retained =
1780 CXL_PPR_GET_DATA_RETAINED(restriction_flags);
1781
1782 return 0;
1783 }
1784
cxl_mem_perform_ppr(struct cxl_ppr_context * cxl_ppr_ctx)1785 static int cxl_mem_perform_ppr(struct cxl_ppr_context *cxl_ppr_ctx)
1786 {
1787 struct cxl_memdev_ppr_maintenance_attrbs maintenance_attrbs;
1788 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1789 struct cxl_mem_repair_attrbs attrbs = { 0 };
1790
1791 struct rw_semaphore *region_lock __free(rwsem_read_release) =
1792 rwsem_read_intr_acquire(&cxl_region_rwsem);
1793 if (!region_lock)
1794 return -EINTR;
1795
1796 struct rw_semaphore *dpa_lock __free(rwsem_read_release) =
1797 rwsem_read_intr_acquire(&cxl_dpa_rwsem);
1798 if (!dpa_lock)
1799 return -EINTR;
1800
1801 if (!cxl_ppr_ctx->media_accessible || !cxl_ppr_ctx->data_retained) {
1802 /* Memory to repair must be offline */
1803 if (cxl_is_memdev_memory_online(cxlmd))
1804 return -EBUSY;
1805 } else {
1806 if (cxl_is_memdev_memory_online(cxlmd)) {
1807 /* Check memory to repair is from the current boot */
1808 attrbs.repair_type = CXL_PPR;
1809 attrbs.dpa = cxl_ppr_ctx->dpa;
1810 attrbs.nibble_mask = cxl_ppr_ctx->nibble_mask;
1811 if (!cxl_find_rec_dram(cxlmd, &attrbs) &&
1812 !cxl_find_rec_gen_media(cxlmd, &attrbs))
1813 return -EINVAL;
1814 }
1815 }
1816
1817 memset(&maintenance_attrbs, 0, sizeof(maintenance_attrbs));
1818 maintenance_attrbs.flags = 0;
1819 maintenance_attrbs.dpa = cpu_to_le64(cxl_ppr_ctx->dpa);
1820 put_unaligned_le24(cxl_ppr_ctx->nibble_mask,
1821 maintenance_attrbs.nibble_mask);
1822
1823 return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox,
1824 cxl_ppr_ctx->op_class,
1825 cxl_ppr_ctx->op_subclass,
1826 &maintenance_attrbs,
1827 sizeof(maintenance_attrbs));
1828 }
1829
cxl_ppr_get_repair_type(struct device * dev,void * drv_data,const char ** repair_type)1830 static int cxl_ppr_get_repair_type(struct device *dev, void *drv_data,
1831 const char **repair_type)
1832 {
1833 *repair_type = edac_repair_type[EDAC_REPAIR_PPR];
1834
1835 return 0;
1836 }
1837
cxl_ppr_get_persist_mode(struct device * dev,void * drv_data,bool * persist_mode)1838 static int cxl_ppr_get_persist_mode(struct device *dev, void *drv_data,
1839 bool *persist_mode)
1840 {
1841 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1842
1843 *persist_mode = cxl_ppr_ctx->persist_mode;
1844
1845 return 0;
1846 }
1847
cxl_get_ppr_safe_when_in_use(struct device * dev,void * drv_data,bool * safe)1848 static int cxl_get_ppr_safe_when_in_use(struct device *dev, void *drv_data,
1849 bool *safe)
1850 {
1851 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1852
1853 *safe = cxl_ppr_ctx->media_accessible & cxl_ppr_ctx->data_retained;
1854
1855 return 0;
1856 }
1857
cxl_ppr_get_min_dpa(struct device * dev,void * drv_data,u64 * min_dpa)1858 static int cxl_ppr_get_min_dpa(struct device *dev, void *drv_data, u64 *min_dpa)
1859 {
1860 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1861 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1862 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1863
1864 *min_dpa = cxlds->dpa_res.start;
1865
1866 return 0;
1867 }
1868
cxl_ppr_get_max_dpa(struct device * dev,void * drv_data,u64 * max_dpa)1869 static int cxl_ppr_get_max_dpa(struct device *dev, void *drv_data, u64 *max_dpa)
1870 {
1871 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1872 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1873 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1874
1875 *max_dpa = cxlds->dpa_res.end;
1876
1877 return 0;
1878 }
1879
cxl_ppr_get_dpa(struct device * dev,void * drv_data,u64 * dpa)1880 static int cxl_ppr_get_dpa(struct device *dev, void *drv_data, u64 *dpa)
1881 {
1882 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1883
1884 *dpa = cxl_ppr_ctx->dpa;
1885
1886 return 0;
1887 }
1888
cxl_ppr_set_dpa(struct device * dev,void * drv_data,u64 dpa)1889 static int cxl_ppr_set_dpa(struct device *dev, void *drv_data, u64 dpa)
1890 {
1891 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1892 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1893 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1894
1895 if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end)
1896 return -EINVAL;
1897
1898 cxl_ppr_ctx->dpa = dpa;
1899
1900 return 0;
1901 }
1902
cxl_ppr_get_nibble_mask(struct device * dev,void * drv_data,u32 * nibble_mask)1903 static int cxl_ppr_get_nibble_mask(struct device *dev, void *drv_data,
1904 u32 *nibble_mask)
1905 {
1906 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1907
1908 *nibble_mask = cxl_ppr_ctx->nibble_mask;
1909
1910 return 0;
1911 }
1912
cxl_ppr_set_nibble_mask(struct device * dev,void * drv_data,u32 nibble_mask)1913 static int cxl_ppr_set_nibble_mask(struct device *dev, void *drv_data,
1914 u32 nibble_mask)
1915 {
1916 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1917
1918 cxl_ppr_ctx->nibble_mask = nibble_mask;
1919
1920 return 0;
1921 }
1922
cxl_do_ppr(struct device * dev,void * drv_data,u32 val)1923 static int cxl_do_ppr(struct device *dev, void *drv_data, u32 val)
1924 {
1925 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1926
1927 if (!cxl_ppr_ctx->dpa || val != EDAC_DO_MEM_REPAIR)
1928 return -EINVAL;
1929
1930 return cxl_mem_perform_ppr(cxl_ppr_ctx);
1931 }
1932
1933 static const struct edac_mem_repair_ops cxl_sppr_ops = {
1934 .get_repair_type = cxl_ppr_get_repair_type,
1935 .get_persist_mode = cxl_ppr_get_persist_mode,
1936 .get_repair_safe_when_in_use = cxl_get_ppr_safe_when_in_use,
1937 .get_min_dpa = cxl_ppr_get_min_dpa,
1938 .get_max_dpa = cxl_ppr_get_max_dpa,
1939 .get_dpa = cxl_ppr_get_dpa,
1940 .set_dpa = cxl_ppr_set_dpa,
1941 .get_nibble_mask = cxl_ppr_get_nibble_mask,
1942 .set_nibble_mask = cxl_ppr_set_nibble_mask,
1943 .do_repair = cxl_do_ppr,
1944 };
1945
cxl_memdev_soft_ppr_init(struct cxl_memdev * cxlmd,struct edac_dev_feature * ras_feature,u8 repair_inst)1946 static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd,
1947 struct edac_dev_feature *ras_feature,
1948 u8 repair_inst)
1949 {
1950 struct cxl_ppr_context *cxl_sppr_ctx;
1951 struct cxl_feat_entry *feat_entry;
1952 int ret;
1953
1954 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
1955 &CXL_FEAT_SPPR_UUID);
1956 if (IS_ERR(feat_entry))
1957 return -EOPNOTSUPP;
1958
1959 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
1960 return -EOPNOTSUPP;
1961
1962 cxl_sppr_ctx =
1963 devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sppr_ctx), GFP_KERNEL);
1964 if (!cxl_sppr_ctx)
1965 return -ENOMEM;
1966
1967 *cxl_sppr_ctx = (struct cxl_ppr_context){
1968 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
1969 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
1970 .get_version = feat_entry->get_feat_ver,
1971 .set_version = feat_entry->set_feat_ver,
1972 .effects = le16_to_cpu(feat_entry->effects),
1973 .cxlmd = cxlmd,
1974 .repair_type = EDAC_REPAIR_PPR,
1975 .persist_mode = 0,
1976 .instance = repair_inst,
1977 };
1978 uuid_copy(&cxl_sppr_ctx->repair_uuid, &CXL_FEAT_SPPR_UUID);
1979
1980 ret = cxl_mem_ppr_get_attrbs(cxl_sppr_ctx);
1981 if (ret)
1982 return ret;
1983
1984 ras_feature->ft_type = RAS_FEAT_MEM_REPAIR;
1985 ras_feature->instance = cxl_sppr_ctx->instance;
1986 ras_feature->mem_repair_ops = &cxl_sppr_ops;
1987 ras_feature->ctx = cxl_sppr_ctx;
1988
1989 return 0;
1990 }
1991
devm_cxl_memdev_edac_register(struct cxl_memdev * cxlmd)1992 int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
1993 {
1994 struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES];
1995 int num_ras_features = 0;
1996 u8 repair_inst = 0;
1997 int rc;
1998
1999 if (IS_ENABLED(CONFIG_CXL_EDAC_SCRUB)) {
2000 rc = cxl_memdev_scrub_init(cxlmd, &ras_features[num_ras_features], 0);
2001 if (rc < 0 && rc != -EOPNOTSUPP)
2002 return rc;
2003
2004 if (rc != -EOPNOTSUPP)
2005 num_ras_features++;
2006 }
2007
2008 if (IS_ENABLED(CONFIG_CXL_EDAC_ECS)) {
2009 rc = cxl_memdev_ecs_init(cxlmd, &ras_features[num_ras_features]);
2010 if (rc < 0 && rc != -EOPNOTSUPP)
2011 return rc;
2012
2013 if (rc != -EOPNOTSUPP)
2014 num_ras_features++;
2015 }
2016
2017 if (IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR)) {
2018 for (int i = 0; i < CXL_MEM_SPARING_MAX; i++) {
2019 rc = cxl_memdev_sparing_init(cxlmd,
2020 &ras_features[num_ras_features],
2021 &mem_sparing_desc[i], repair_inst);
2022 if (rc == -EOPNOTSUPP)
2023 continue;
2024 if (rc < 0)
2025 return rc;
2026
2027 repair_inst++;
2028 num_ras_features++;
2029 }
2030
2031 rc = cxl_memdev_soft_ppr_init(cxlmd, &ras_features[num_ras_features],
2032 repair_inst);
2033 if (rc < 0 && rc != -EOPNOTSUPP)
2034 return rc;
2035
2036 if (rc != -EOPNOTSUPP) {
2037 repair_inst++;
2038 num_ras_features++;
2039 }
2040
2041 if (repair_inst) {
2042 struct cxl_mem_err_rec *array_rec =
2043 devm_kzalloc(&cxlmd->dev, sizeof(*array_rec),
2044 GFP_KERNEL);
2045 if (!array_rec)
2046 return -ENOMEM;
2047
2048 xa_init(&array_rec->rec_gen_media);
2049 xa_init(&array_rec->rec_dram);
2050 cxlmd->err_rec_array = array_rec;
2051 }
2052 }
2053
2054 if (!num_ras_features)
2055 return -EINVAL;
2056
2057 char *cxl_dev_name __free(kfree) =
2058 kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlmd->dev));
2059 if (!cxl_dev_name)
2060 return -ENOMEM;
2061
2062 return edac_dev_register(&cxlmd->dev, cxl_dev_name, NULL,
2063 num_ras_features, ras_features);
2064 }
2065 EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_register, "CXL");
2066
devm_cxl_region_edac_register(struct cxl_region * cxlr)2067 int devm_cxl_region_edac_register(struct cxl_region *cxlr)
2068 {
2069 struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES];
2070 int num_ras_features = 0;
2071 int rc;
2072
2073 if (!IS_ENABLED(CONFIG_CXL_EDAC_SCRUB))
2074 return 0;
2075
2076 rc = cxl_region_scrub_init(cxlr, &ras_features[num_ras_features], 0);
2077 if (rc < 0)
2078 return rc;
2079
2080 num_ras_features++;
2081
2082 char *cxl_dev_name __free(kfree) =
2083 kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlr->dev));
2084 if (!cxl_dev_name)
2085 return -ENOMEM;
2086
2087 return edac_dev_register(&cxlr->dev, cxl_dev_name, NULL,
2088 num_ras_features, ras_features);
2089 }
2090 EXPORT_SYMBOL_NS_GPL(devm_cxl_region_edac_register, "CXL");
2091
devm_cxl_memdev_edac_release(struct cxl_memdev * cxlmd)2092 void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd)
2093 {
2094 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
2095 struct cxl_event_gen_media *rec_gen_media;
2096 struct cxl_event_dram *rec_dram;
2097 unsigned long index;
2098
2099 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
2100 return;
2101
2102 xa_for_each(&array_rec->rec_dram, index, rec_dram)
2103 kfree(rec_dram);
2104 xa_destroy(&array_rec->rec_dram);
2105
2106 xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media)
2107 kfree(rec_gen_media);
2108 xa_destroy(&array_rec->rec_gen_media);
2109 }
2110 EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_release, "CXL");
2111