1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
3 #include <linux/memregion.h>
4 #include <linux/genalloc.h>
5 #include <linux/debugfs.h>
6 #include <linux/device.h>
7 #include <linux/module.h>
8 #include <linux/memory.h>
9 #include <linux/slab.h>
10 #include <linux/uuid.h>
11 #include <linux/sort.h>
12 #include <linux/idr.h>
13 #include <linux/memory-tiers.h>
14 #include <linux/string_choices.h>
15 #include <cxlmem.h>
16 #include <cxl.h>
17 #include "core.h"
18
19 /**
20 * DOC: cxl core region
21 *
22 * CXL Regions represent mapped memory capacity in system physical address
23 * space. Whereas the CXL Root Decoders identify the bounds of potential CXL
24 * Memory ranges, Regions represent the active mapped capacity by the HDM
25 * Decoder Capability structures throughout the Host Bridges, Switches, and
26 * Endpoints in the topology.
27 *
28 * Region configuration has ordering constraints. UUID may be set at any time
29 * but is only visible for persistent regions.
30 * 1. Interleave granularity
31 * 2. Interleave size
32 * 3. Decoder targets
33 */
34
35 /*
36 * nodemask that sets per node when the access_coordinates for the node has
37 * been updated by the CXL memory hotplug notifier.
38 */
39 static nodemask_t nodemask_region_seen = NODE_MASK_NONE;
40
41 static struct cxl_region *to_cxl_region(struct device *dev);
42
43 #define __ACCESS_ATTR_RO(_level, _name) { \
44 .attr = { .name = __stringify(_name), .mode = 0444 }, \
45 .show = _name##_access##_level##_show, \
46 }
47
48 #define ACCESS_DEVICE_ATTR_RO(level, name) \
49 struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name)
50
51 #define ACCESS_ATTR_RO(level, attrib) \
52 static ssize_t attrib##_access##level##_show(struct device *dev, \
53 struct device_attribute *attr, \
54 char *buf) \
55 { \
56 struct cxl_region *cxlr = to_cxl_region(dev); \
57 \
58 if (cxlr->coord[level].attrib == 0) \
59 return -ENOENT; \
60 \
61 return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib); \
62 } \
63 static ACCESS_DEVICE_ATTR_RO(level, attrib)
64
65 ACCESS_ATTR_RO(0, read_bandwidth);
66 ACCESS_ATTR_RO(0, read_latency);
67 ACCESS_ATTR_RO(0, write_bandwidth);
68 ACCESS_ATTR_RO(0, write_latency);
69
70 #define ACCESS_ATTR_DECLARE(level, attrib) \
71 (&dev_attr_access##level##_##attrib.attr)
72
73 static struct attribute *access0_coordinate_attrs[] = {
74 ACCESS_ATTR_DECLARE(0, read_bandwidth),
75 ACCESS_ATTR_DECLARE(0, write_bandwidth),
76 ACCESS_ATTR_DECLARE(0, read_latency),
77 ACCESS_ATTR_DECLARE(0, write_latency),
78 NULL
79 };
80
81 ACCESS_ATTR_RO(1, read_bandwidth);
82 ACCESS_ATTR_RO(1, read_latency);
83 ACCESS_ATTR_RO(1, write_bandwidth);
84 ACCESS_ATTR_RO(1, write_latency);
85
86 static struct attribute *access1_coordinate_attrs[] = {
87 ACCESS_ATTR_DECLARE(1, read_bandwidth),
88 ACCESS_ATTR_DECLARE(1, write_bandwidth),
89 ACCESS_ATTR_DECLARE(1, read_latency),
90 ACCESS_ATTR_DECLARE(1, write_latency),
91 NULL
92 };
93
94 #define ACCESS_VISIBLE(level) \
95 static umode_t cxl_region_access##level##_coordinate_visible( \
96 struct kobject *kobj, struct attribute *a, int n) \
97 { \
98 struct device *dev = kobj_to_dev(kobj); \
99 struct cxl_region *cxlr = to_cxl_region(dev); \
100 \
101 if (a == &dev_attr_access##level##_read_latency.attr && \
102 cxlr->coord[level].read_latency == 0) \
103 return 0; \
104 \
105 if (a == &dev_attr_access##level##_write_latency.attr && \
106 cxlr->coord[level].write_latency == 0) \
107 return 0; \
108 \
109 if (a == &dev_attr_access##level##_read_bandwidth.attr && \
110 cxlr->coord[level].read_bandwidth == 0) \
111 return 0; \
112 \
113 if (a == &dev_attr_access##level##_write_bandwidth.attr && \
114 cxlr->coord[level].write_bandwidth == 0) \
115 return 0; \
116 \
117 return a->mode; \
118 }
119
120 ACCESS_VISIBLE(0);
121 ACCESS_VISIBLE(1);
122
123 static const struct attribute_group cxl_region_access0_coordinate_group = {
124 .name = "access0",
125 .attrs = access0_coordinate_attrs,
126 .is_visible = cxl_region_access0_coordinate_visible,
127 };
128
get_cxl_region_access0_group(void)129 static const struct attribute_group *get_cxl_region_access0_group(void)
130 {
131 return &cxl_region_access0_coordinate_group;
132 }
133
134 static const struct attribute_group cxl_region_access1_coordinate_group = {
135 .name = "access1",
136 .attrs = access1_coordinate_attrs,
137 .is_visible = cxl_region_access1_coordinate_visible,
138 };
139
get_cxl_region_access1_group(void)140 static const struct attribute_group *get_cxl_region_access1_group(void)
141 {
142 return &cxl_region_access1_coordinate_group;
143 }
144
uuid_show(struct device * dev,struct device_attribute * attr,char * buf)145 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
146 char *buf)
147 {
148 struct cxl_region *cxlr = to_cxl_region(dev);
149 struct cxl_region_params *p = &cxlr->params;
150 ssize_t rc;
151
152 ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
153 if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
154 return rc;
155 if (cxlr->mode != CXL_PARTMODE_PMEM)
156 return sysfs_emit(buf, "\n");
157 return sysfs_emit(buf, "%pUb\n", &p->uuid);
158 }
159
is_dup(struct device * match,void * data)160 static int is_dup(struct device *match, void *data)
161 {
162 struct cxl_region_params *p;
163 struct cxl_region *cxlr;
164 uuid_t *uuid = data;
165
166 if (!is_cxl_region(match))
167 return 0;
168
169 lockdep_assert_held(&cxl_rwsem.region);
170 cxlr = to_cxl_region(match);
171 p = &cxlr->params;
172
173 if (uuid_equal(&p->uuid, uuid)) {
174 dev_dbg(match, "already has uuid: %pUb\n", uuid);
175 return -EBUSY;
176 }
177
178 return 0;
179 }
180
uuid_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)181 static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
182 const char *buf, size_t len)
183 {
184 struct cxl_region *cxlr = to_cxl_region(dev);
185 struct cxl_region_params *p = &cxlr->params;
186 uuid_t temp;
187 ssize_t rc;
188
189 if (len != UUID_STRING_LEN + 1)
190 return -EINVAL;
191
192 rc = uuid_parse(buf, &temp);
193 if (rc)
194 return rc;
195
196 if (uuid_is_null(&temp))
197 return -EINVAL;
198
199 ACQUIRE(rwsem_write_kill, region_rwsem)(&cxl_rwsem.region);
200 if ((rc = ACQUIRE_ERR(rwsem_write_kill, ®ion_rwsem)))
201 return rc;
202
203 if (uuid_equal(&p->uuid, &temp))
204 return len;
205
206 if (p->state >= CXL_CONFIG_ACTIVE)
207 return -EBUSY;
208
209 rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
210 if (rc < 0)
211 return rc;
212
213 uuid_copy(&p->uuid, &temp);
214
215 return len;
216 }
217 static DEVICE_ATTR_RW(uuid);
218
cxl_rr_load(struct cxl_port * port,struct cxl_region * cxlr)219 static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
220 struct cxl_region *cxlr)
221 {
222 return xa_load(&port->regions, (unsigned long)cxlr);
223 }
224
cxl_region_invalidate_memregion(struct cxl_region * cxlr)225 static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
226 {
227 if (!cpu_cache_has_invalidate_memregion()) {
228 if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
229 dev_info_once(
230 &cxlr->dev,
231 "Bypassing cpu_cache_invalidate_memregion() for testing!\n");
232 return 0;
233 }
234 dev_WARN(&cxlr->dev,
235 "Failed to synchronize CPU cache state\n");
236 return -ENXIO;
237 }
238
239 if (!cxlr->params.res)
240 return -ENXIO;
241 cpu_cache_invalidate_memregion(cxlr->params.res->start,
242 resource_size(cxlr->params.res));
243 return 0;
244 }
245
cxl_region_decode_reset(struct cxl_region * cxlr,int count)246 static void cxl_region_decode_reset(struct cxl_region *cxlr, int count)
247 {
248 struct cxl_region_params *p = &cxlr->params;
249 int i;
250
251 if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
252 return;
253
254 /*
255 * Before region teardown attempt to flush, evict any data cached for
256 * this region, or scream loudly about missing arch / platform support
257 * for CXL teardown.
258 */
259 cxl_region_invalidate_memregion(cxlr);
260
261 for (i = count - 1; i >= 0; i--) {
262 struct cxl_endpoint_decoder *cxled = p->targets[i];
263 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
264 struct cxl_port *iter = cxled_to_port(cxled);
265 struct cxl_dev_state *cxlds = cxlmd->cxlds;
266 struct cxl_ep *ep;
267
268 if (cxlds->rcd)
269 goto endpoint_reset;
270
271 while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
272 iter = to_cxl_port(iter->dev.parent);
273
274 for (ep = cxl_ep_load(iter, cxlmd); iter;
275 iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
276 struct cxl_region_ref *cxl_rr;
277 struct cxl_decoder *cxld;
278
279 cxl_rr = cxl_rr_load(iter, cxlr);
280 cxld = cxl_rr->decoder;
281 if (cxld->reset)
282 cxld->reset(cxld);
283 set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
284 }
285
286 endpoint_reset:
287 cxled->cxld.reset(&cxled->cxld);
288 set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
289 }
290
291 /* all decoders associated with this region have been torn down */
292 clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
293 }
294
commit_decoder(struct cxl_decoder * cxld)295 static int commit_decoder(struct cxl_decoder *cxld)
296 {
297 struct cxl_switch_decoder *cxlsd = NULL;
298
299 if (cxld->commit)
300 return cxld->commit(cxld);
301
302 if (is_switch_decoder(&cxld->dev))
303 cxlsd = to_cxl_switch_decoder(&cxld->dev);
304
305 if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1,
306 "->commit() is required\n"))
307 return -ENXIO;
308 return 0;
309 }
310
cxl_region_decode_commit(struct cxl_region * cxlr)311 static int cxl_region_decode_commit(struct cxl_region *cxlr)
312 {
313 struct cxl_region_params *p = &cxlr->params;
314 int i, rc = 0;
315
316 for (i = 0; i < p->nr_targets; i++) {
317 struct cxl_endpoint_decoder *cxled = p->targets[i];
318 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
319 struct cxl_region_ref *cxl_rr;
320 struct cxl_decoder *cxld;
321 struct cxl_port *iter;
322 struct cxl_ep *ep;
323
324 /* commit bottom up */
325 for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
326 iter = to_cxl_port(iter->dev.parent)) {
327 cxl_rr = cxl_rr_load(iter, cxlr);
328 cxld = cxl_rr->decoder;
329 rc = commit_decoder(cxld);
330 if (rc)
331 break;
332 }
333
334 if (rc) {
335 /* programming @iter failed, teardown */
336 for (ep = cxl_ep_load(iter, cxlmd); ep && iter;
337 iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
338 cxl_rr = cxl_rr_load(iter, cxlr);
339 cxld = cxl_rr->decoder;
340 if (cxld->reset)
341 cxld->reset(cxld);
342 }
343
344 cxled->cxld.reset(&cxled->cxld);
345 goto err;
346 }
347 }
348
349 return 0;
350
351 err:
352 /* undo the targets that were successfully committed */
353 cxl_region_decode_reset(cxlr, i);
354 return rc;
355 }
356
queue_reset(struct cxl_region * cxlr)357 static int queue_reset(struct cxl_region *cxlr)
358 {
359 struct cxl_region_params *p = &cxlr->params;
360 int rc;
361
362 ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
363 if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
364 return rc;
365
366 /* Already in the requested state? */
367 if (p->state < CXL_CONFIG_COMMIT)
368 return 0;
369
370 p->state = CXL_CONFIG_RESET_PENDING;
371
372 return 0;
373 }
374
__commit(struct cxl_region * cxlr)375 static int __commit(struct cxl_region *cxlr)
376 {
377 struct cxl_region_params *p = &cxlr->params;
378 int rc;
379
380 ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
381 if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
382 return rc;
383
384 /* Already in the requested state? */
385 if (p->state >= CXL_CONFIG_COMMIT)
386 return 0;
387
388 /* Not ready to commit? */
389 if (p->state < CXL_CONFIG_ACTIVE)
390 return -ENXIO;
391
392 /*
393 * Invalidate caches before region setup to drop any speculative
394 * consumption of this address space
395 */
396 rc = cxl_region_invalidate_memregion(cxlr);
397 if (rc)
398 return rc;
399
400 rc = cxl_region_decode_commit(cxlr);
401 if (rc)
402 return rc;
403
404 p->state = CXL_CONFIG_COMMIT;
405
406 return 0;
407 }
408
commit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)409 static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
410 const char *buf, size_t len)
411 {
412 struct cxl_region *cxlr = to_cxl_region(dev);
413 struct cxl_region_params *p = &cxlr->params;
414 bool commit;
415 ssize_t rc;
416
417 rc = kstrtobool(buf, &commit);
418 if (rc)
419 return rc;
420
421 if (commit) {
422 rc = __commit(cxlr);
423 if (rc)
424 return rc;
425 return len;
426 }
427
428 if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
429 return -EPERM;
430
431 rc = queue_reset(cxlr);
432 if (rc)
433 return rc;
434
435 /*
436 * Unmap the region and depend the reset-pending state to ensure
437 * it does not go active again until post reset
438 */
439 device_release_driver(&cxlr->dev);
440
441 /*
442 * With the reset pending take cxl_rwsem.region unconditionally
443 * to ensure the reset gets handled before returning.
444 */
445 guard(rwsem_write)(&cxl_rwsem.region);
446
447 /*
448 * Revalidate that the reset is still pending in case another
449 * thread already handled this reset.
450 */
451 if (p->state == CXL_CONFIG_RESET_PENDING) {
452 cxl_region_decode_reset(cxlr, p->interleave_ways);
453 p->state = CXL_CONFIG_ACTIVE;
454 }
455
456 return len;
457 }
458
commit_show(struct device * dev,struct device_attribute * attr,char * buf)459 static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
460 char *buf)
461 {
462 struct cxl_region *cxlr = to_cxl_region(dev);
463 struct cxl_region_params *p = &cxlr->params;
464 ssize_t rc;
465
466 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
467 if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
468 return rc;
469 return sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
470 }
471 static DEVICE_ATTR_RW(commit);
472
interleave_ways_show(struct device * dev,struct device_attribute * attr,char * buf)473 static ssize_t interleave_ways_show(struct device *dev,
474 struct device_attribute *attr, char *buf)
475 {
476 struct cxl_region *cxlr = to_cxl_region(dev);
477 struct cxl_region_params *p = &cxlr->params;
478 int rc;
479
480 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
481 if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
482 return rc;
483 return sysfs_emit(buf, "%d\n", p->interleave_ways);
484 }
485
486 static const struct attribute_group *get_cxl_region_target_group(void);
487
interleave_ways_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)488 static ssize_t interleave_ways_store(struct device *dev,
489 struct device_attribute *attr,
490 const char *buf, size_t len)
491 {
492 struct cxl_region *cxlr = to_cxl_region(dev);
493 struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
494 struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
495 struct cxl_region_params *p = &cxlr->params;
496 unsigned int val, save;
497 int rc;
498 u8 iw;
499
500 rc = kstrtouint(buf, 0, &val);
501 if (rc)
502 return rc;
503
504 rc = ways_to_eiw(val, &iw);
505 if (rc)
506 return rc;
507
508 /*
509 * Even for x3, x6, and x12 interleaves the region interleave must be a
510 * power of 2 multiple of the host bridge interleave.
511 */
512 if (!is_power_of_2(val / cxld->interleave_ways) ||
513 (val % cxld->interleave_ways)) {
514 dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val);
515 return -EINVAL;
516 }
517
518 ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
519 if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
520 return rc;
521
522 if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
523 return -EBUSY;
524
525 save = p->interleave_ways;
526 p->interleave_ways = val;
527 rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
528 if (rc) {
529 p->interleave_ways = save;
530 return rc;
531 }
532
533 return len;
534 }
535 static DEVICE_ATTR_RW(interleave_ways);
536
interleave_granularity_show(struct device * dev,struct device_attribute * attr,char * buf)537 static ssize_t interleave_granularity_show(struct device *dev,
538 struct device_attribute *attr,
539 char *buf)
540 {
541 struct cxl_region *cxlr = to_cxl_region(dev);
542 struct cxl_region_params *p = &cxlr->params;
543 int rc;
544
545 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
546 if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
547 return rc;
548 return sysfs_emit(buf, "%d\n", p->interleave_granularity);
549 }
550
interleave_granularity_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)551 static ssize_t interleave_granularity_store(struct device *dev,
552 struct device_attribute *attr,
553 const char *buf, size_t len)
554 {
555 struct cxl_region *cxlr = to_cxl_region(dev);
556 struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
557 struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
558 struct cxl_region_params *p = &cxlr->params;
559 int rc, val;
560 u16 ig;
561
562 rc = kstrtoint(buf, 0, &val);
563 if (rc)
564 return rc;
565
566 rc = granularity_to_eig(val, &ig);
567 if (rc)
568 return rc;
569
570 /*
571 * When the host-bridge is interleaved, disallow region granularity !=
572 * root granularity. Regions with a granularity less than the root
573 * interleave result in needing multiple endpoints to support a single
574 * slot in the interleave (possible to support in the future). Regions
575 * with a granularity greater than the root interleave result in invalid
576 * DPA translations (invalid to support).
577 */
578 if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
579 return -EINVAL;
580
581 ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
582 if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
583 return rc;
584
585 if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
586 return -EBUSY;
587
588 p->interleave_granularity = val;
589
590 return len;
591 }
592 static DEVICE_ATTR_RW(interleave_granularity);
593
resource_show(struct device * dev,struct device_attribute * attr,char * buf)594 static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
595 char *buf)
596 {
597 struct cxl_region *cxlr = to_cxl_region(dev);
598 struct cxl_region_params *p = &cxlr->params;
599 u64 resource = -1ULL;
600 int rc;
601
602 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
603 if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
604 return rc;
605
606 if (p->res)
607 resource = p->res->start;
608 return sysfs_emit(buf, "%#llx\n", resource);
609 }
610 static DEVICE_ATTR_RO(resource);
611
mode_show(struct device * dev,struct device_attribute * attr,char * buf)612 static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
613 char *buf)
614 {
615 struct cxl_region *cxlr = to_cxl_region(dev);
616 const char *desc;
617
618 if (cxlr->mode == CXL_PARTMODE_RAM)
619 desc = "ram";
620 else if (cxlr->mode == CXL_PARTMODE_PMEM)
621 desc = "pmem";
622 else
623 desc = "";
624
625 return sysfs_emit(buf, "%s\n", desc);
626 }
627 static DEVICE_ATTR_RO(mode);
628
alloc_hpa(struct cxl_region * cxlr,resource_size_t size)629 static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
630 {
631 struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
632 struct cxl_region_params *p = &cxlr->params;
633 struct resource *res;
634 u64 remainder = 0;
635
636 lockdep_assert_held_write(&cxl_rwsem.region);
637
638 /* Nothing to do... */
639 if (p->res && resource_size(p->res) == size)
640 return 0;
641
642 /* To change size the old size must be freed first */
643 if (p->res)
644 return -EBUSY;
645
646 if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
647 return -EBUSY;
648
649 /* ways, granularity and uuid (if PMEM) need to be set before HPA */
650 if (!p->interleave_ways || !p->interleave_granularity ||
651 (cxlr->mode == CXL_PARTMODE_PMEM && uuid_is_null(&p->uuid)))
652 return -ENXIO;
653
654 div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder);
655 if (remainder)
656 return -EINVAL;
657
658 res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
659 dev_name(&cxlr->dev));
660 if (IS_ERR(res)) {
661 dev_dbg(&cxlr->dev,
662 "HPA allocation error (%ld) for size:%pap in %s %pr\n",
663 PTR_ERR(res), &size, cxlrd->res->name, cxlrd->res);
664 return PTR_ERR(res);
665 }
666
667 cxlr->hpa_range = DEFINE_RANGE(res->start, res->end);
668
669 p->res = res;
670 p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
671
672 return 0;
673 }
674
cxl_region_iomem_release(struct cxl_region * cxlr)675 static void cxl_region_iomem_release(struct cxl_region *cxlr)
676 {
677 struct cxl_region_params *p = &cxlr->params;
678
679 if (device_is_registered(&cxlr->dev))
680 lockdep_assert_held_write(&cxl_rwsem.region);
681 if (p->res) {
682 /*
683 * Autodiscovered regions may not have been able to insert their
684 * resource.
685 */
686 if (p->res->parent)
687 remove_resource(p->res);
688 kfree(p->res);
689 p->res = NULL;
690 }
691 }
692
free_hpa(struct cxl_region * cxlr)693 static int free_hpa(struct cxl_region *cxlr)
694 {
695 struct cxl_region_params *p = &cxlr->params;
696
697 lockdep_assert_held_write(&cxl_rwsem.region);
698
699 if (!p->res)
700 return 0;
701
702 if (p->state >= CXL_CONFIG_ACTIVE)
703 return -EBUSY;
704
705 cxlr->hpa_range = DEFINE_RANGE(0, -1);
706
707 cxl_region_iomem_release(cxlr);
708 p->state = CXL_CONFIG_IDLE;
709 return 0;
710 }
711
size_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)712 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
713 const char *buf, size_t len)
714 {
715 struct cxl_region *cxlr = to_cxl_region(dev);
716 u64 val;
717 int rc;
718
719 rc = kstrtou64(buf, 0, &val);
720 if (rc)
721 return rc;
722
723 ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
724 if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
725 return rc;
726
727 if (val)
728 rc = alloc_hpa(cxlr, val);
729 else
730 rc = free_hpa(cxlr);
731
732 if (rc)
733 return rc;
734
735 return len;
736 }
737
size_show(struct device * dev,struct device_attribute * attr,char * buf)738 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
739 char *buf)
740 {
741 struct cxl_region *cxlr = to_cxl_region(dev);
742 struct cxl_region_params *p = &cxlr->params;
743 u64 size = 0;
744 ssize_t rc;
745
746 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
747 if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
748 return rc;
749 if (p->res)
750 size = resource_size(p->res);
751 return sysfs_emit(buf, "%#llx\n", size);
752 }
753 static DEVICE_ATTR_RW(size);
754
extended_linear_cache_size_show(struct device * dev,struct device_attribute * attr,char * buf)755 static ssize_t extended_linear_cache_size_show(struct device *dev,
756 struct device_attribute *attr,
757 char *buf)
758 {
759 struct cxl_region *cxlr = to_cxl_region(dev);
760 struct cxl_region_params *p = &cxlr->params;
761 ssize_t rc;
762
763 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
764 if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
765 return rc;
766 return sysfs_emit(buf, "%pap\n", &p->cache_size);
767 }
768 static DEVICE_ATTR_RO(extended_linear_cache_size);
769
770 static struct attribute *cxl_region_attrs[] = {
771 &dev_attr_uuid.attr,
772 &dev_attr_commit.attr,
773 &dev_attr_interleave_ways.attr,
774 &dev_attr_interleave_granularity.attr,
775 &dev_attr_resource.attr,
776 &dev_attr_size.attr,
777 &dev_attr_mode.attr,
778 &dev_attr_extended_linear_cache_size.attr,
779 NULL,
780 };
781
cxl_region_visible(struct kobject * kobj,struct attribute * a,int n)782 static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
783 int n)
784 {
785 struct device *dev = kobj_to_dev(kobj);
786 struct cxl_region *cxlr = to_cxl_region(dev);
787
788 /*
789 * Support tooling that expects to find a 'uuid' attribute for all
790 * regions regardless of mode.
791 */
792 if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM)
793 return 0444;
794
795 /*
796 * Don't display extended linear cache attribute if there is no
797 * extended linear cache.
798 */
799 if (a == &dev_attr_extended_linear_cache_size.attr &&
800 cxlr->params.cache_size == 0)
801 return 0;
802
803 return a->mode;
804 }
805
806 static const struct attribute_group cxl_region_group = {
807 .attrs = cxl_region_attrs,
808 .is_visible = cxl_region_visible,
809 };
810
show_targetN(struct cxl_region * cxlr,char * buf,int pos)811 static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
812 {
813 struct cxl_region_params *p = &cxlr->params;
814 struct cxl_endpoint_decoder *cxled;
815 int rc;
816
817 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
818 if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
819 return rc;
820
821 if (pos >= p->interleave_ways) {
822 dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
823 p->interleave_ways);
824 return -ENXIO;
825 }
826
827 cxled = p->targets[pos];
828 if (!cxled)
829 return sysfs_emit(buf, "\n");
830 return sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
831 }
832
check_commit_order(struct device * dev,void * data)833 static int check_commit_order(struct device *dev, void *data)
834 {
835 struct cxl_decoder *cxld = to_cxl_decoder(dev);
836
837 /*
838 * if port->commit_end is not the only free decoder, then out of
839 * order shutdown has occurred, block further allocations until
840 * that is resolved
841 */
842 if (((cxld->flags & CXL_DECODER_F_ENABLE) == 0))
843 return -EBUSY;
844 return 0;
845 }
846
match_free_decoder(struct device * dev,const void * data)847 static int match_free_decoder(struct device *dev, const void *data)
848 {
849 struct cxl_port *port = to_cxl_port(dev->parent);
850 struct cxl_decoder *cxld;
851 int rc;
852
853 if (!is_switch_decoder(dev))
854 return 0;
855
856 cxld = to_cxl_decoder(dev);
857
858 if (cxld->id != port->commit_end + 1)
859 return 0;
860
861 if (cxld->region) {
862 dev_dbg(dev->parent,
863 "next decoder to commit (%s) is already reserved (%s)\n",
864 dev_name(dev), dev_name(&cxld->region->dev));
865 return 0;
866 }
867
868 rc = device_for_each_child_reverse_from(dev->parent, dev, NULL,
869 check_commit_order);
870 if (rc) {
871 dev_dbg(dev->parent,
872 "unable to allocate %s due to out of order shutdown\n",
873 dev_name(dev));
874 return 0;
875 }
876 return 1;
877 }
878
spa_maps_hpa(const struct cxl_region_params * p,const struct range * range)879 static bool spa_maps_hpa(const struct cxl_region_params *p,
880 const struct range *range)
881 {
882 if (!p->res)
883 return false;
884
885 /*
886 * The extended linear cache region is constructed by a 1:1 ratio
887 * where the SPA maps equal amounts of DRAM and CXL HPA capacity with
888 * CXL decoders at the high end of the SPA range.
889 */
890 return p->res->start + p->cache_size == range->start &&
891 p->res->end == range->end;
892 }
893
match_auto_decoder(struct device * dev,const void * data)894 static int match_auto_decoder(struct device *dev, const void *data)
895 {
896 const struct cxl_region_params *p = data;
897 struct cxl_decoder *cxld;
898 struct range *r;
899
900 if (!is_switch_decoder(dev))
901 return 0;
902
903 cxld = to_cxl_decoder(dev);
904 r = &cxld->hpa_range;
905
906 if (spa_maps_hpa(p, r))
907 return 1;
908
909 return 0;
910 }
911
912 /**
913 * cxl_port_pick_region_decoder() - assign or lookup a decoder for a region
914 * @port: a port in the ancestry of the endpoint implied by @cxled
915 * @cxled: endpoint decoder to be, or currently, mapped by @port
916 * @cxlr: region to establish, or validate, decode @port
917 *
918 * In the region creation path cxl_port_pick_region_decoder() is an
919 * allocator to find a free port. In the region assembly path, it is
920 * recalling the decoder that platform firmware picked for validation
921 * purposes.
922 *
923 * The result is recorded in a 'struct cxl_region_ref' in @port.
924 */
925 static struct cxl_decoder *
cxl_port_pick_region_decoder(struct cxl_port * port,struct cxl_endpoint_decoder * cxled,struct cxl_region * cxlr)926 cxl_port_pick_region_decoder(struct cxl_port *port,
927 struct cxl_endpoint_decoder *cxled,
928 struct cxl_region *cxlr)
929 {
930 struct device *dev;
931
932 if (port == cxled_to_port(cxled))
933 return &cxled->cxld;
934
935 if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
936 dev = device_find_child(&port->dev, &cxlr->params,
937 match_auto_decoder);
938 else
939 dev = device_find_child(&port->dev, NULL, match_free_decoder);
940 if (!dev)
941 return NULL;
942 /*
943 * This decoder is pinned registered as long as the endpoint decoder is
944 * registered, and endpoint decoder unregistration holds the
945 * cxl_rwsem.region over unregister events, so no need to hold on to
946 * this extra reference.
947 */
948 put_device(dev);
949 return to_cxl_decoder(dev);
950 }
951
auto_order_ok(struct cxl_port * port,struct cxl_region * cxlr_iter,struct cxl_decoder * cxld)952 static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter,
953 struct cxl_decoder *cxld)
954 {
955 struct cxl_region_ref *rr = cxl_rr_load(port, cxlr_iter);
956 struct cxl_decoder *cxld_iter = rr->decoder;
957
958 /*
959 * Allow the out of order assembly of auto-discovered regions.
960 * Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders
961 * in HPA order. Confirm that the decoder with the lesser HPA
962 * starting address has the lesser id.
963 */
964 dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n",
965 dev_name(&cxld->dev), cxld->id,
966 dev_name(&cxld_iter->dev), cxld_iter->id);
967
968 if (cxld_iter->id > cxld->id)
969 return true;
970
971 return false;
972 }
973
974 static struct cxl_region_ref *
alloc_region_ref(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,struct cxl_decoder * cxld)975 alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
976 struct cxl_endpoint_decoder *cxled,
977 struct cxl_decoder *cxld)
978 {
979 struct cxl_region_params *p = &cxlr->params;
980 struct cxl_region_ref *cxl_rr, *iter;
981 unsigned long index;
982 int rc;
983
984 xa_for_each(&port->regions, index, iter) {
985 struct cxl_region_params *ip = &iter->region->params;
986
987 if (!ip->res || ip->res->start < p->res->start)
988 continue;
989
990 if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
991 if (auto_order_ok(port, iter->region, cxld))
992 continue;
993 }
994 dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n",
995 dev_name(&port->dev),
996 dev_name(&iter->region->dev), ip->res, p->res);
997
998 return ERR_PTR(-EBUSY);
999 }
1000
1001 cxl_rr = kzalloc_obj(*cxl_rr);
1002 if (!cxl_rr)
1003 return ERR_PTR(-ENOMEM);
1004 cxl_rr->port = port;
1005 cxl_rr->region = cxlr;
1006 cxl_rr->nr_targets = 1;
1007 xa_init(&cxl_rr->endpoints);
1008
1009 rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL);
1010 if (rc) {
1011 dev_dbg(&cxlr->dev,
1012 "%s: failed to track region reference: %d\n",
1013 dev_name(&port->dev), rc);
1014 kfree(cxl_rr);
1015 return ERR_PTR(rc);
1016 }
1017
1018 return cxl_rr;
1019 }
1020
cxl_rr_free_decoder(struct cxl_region_ref * cxl_rr)1021 static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
1022 {
1023 struct cxl_region *cxlr = cxl_rr->region;
1024 struct cxl_decoder *cxld = cxl_rr->decoder;
1025
1026 if (!cxld)
1027 return;
1028
1029 dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
1030 if (cxld->region == cxlr) {
1031 cxld->region = NULL;
1032 put_device(&cxlr->dev);
1033 }
1034 }
1035
free_region_ref(struct cxl_region_ref * cxl_rr)1036 static void free_region_ref(struct cxl_region_ref *cxl_rr)
1037 {
1038 struct cxl_port *port = cxl_rr->port;
1039 struct cxl_region *cxlr = cxl_rr->region;
1040
1041 cxl_rr_free_decoder(cxl_rr);
1042 xa_erase(&port->regions, (unsigned long)cxlr);
1043 xa_destroy(&cxl_rr->endpoints);
1044 kfree(cxl_rr);
1045 }
1046
cxl_rr_ep_add(struct cxl_region_ref * cxl_rr,struct cxl_endpoint_decoder * cxled)1047 static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
1048 struct cxl_endpoint_decoder *cxled)
1049 {
1050 int rc;
1051 struct cxl_port *port = cxl_rr->port;
1052 struct cxl_region *cxlr = cxl_rr->region;
1053 struct cxl_decoder *cxld = cxl_rr->decoder;
1054 struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled));
1055
1056 if (ep) {
1057 rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep,
1058 GFP_KERNEL);
1059 if (rc)
1060 return rc;
1061 }
1062 cxl_rr->nr_eps++;
1063
1064 if (!cxld->region) {
1065 cxld->region = cxlr;
1066 get_device(&cxlr->dev);
1067 }
1068
1069 return 0;
1070 }
1071
cxl_rr_assign_decoder(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,struct cxl_region_ref * cxl_rr,struct cxl_decoder * cxld)1072 static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr,
1073 struct cxl_endpoint_decoder *cxled,
1074 struct cxl_region_ref *cxl_rr,
1075 struct cxl_decoder *cxld)
1076 {
1077 if (cxld->region) {
1078 dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
1079 dev_name(&port->dev), dev_name(&cxld->dev),
1080 dev_name(&cxld->region->dev));
1081 return -EBUSY;
1082 }
1083
1084 /*
1085 * Endpoints should already match the region type, but backstop that
1086 * assumption with an assertion. Switch-decoders change mapping-type
1087 * based on what is mapped when they are assigned to a region.
1088 */
1089 dev_WARN_ONCE(&cxlr->dev,
1090 port == cxled_to_port(cxled) &&
1091 cxld->target_type != cxlr->type,
1092 "%s:%s mismatch decoder type %d -> %d\n",
1093 dev_name(&cxled_to_memdev(cxled)->dev),
1094 dev_name(&cxld->dev), cxld->target_type, cxlr->type);
1095 cxld->target_type = cxlr->type;
1096 cxl_rr->decoder = cxld;
1097 return 0;
1098 }
1099
cxl_region_setup_flags(struct cxl_region * cxlr,struct cxl_decoder * cxld)1100 static void cxl_region_setup_flags(struct cxl_region *cxlr,
1101 struct cxl_decoder *cxld)
1102 {
1103 if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) {
1104 set_bit(CXL_REGION_F_LOCK, &cxlr->flags);
1105 clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
1106 }
1107
1108 if (test_bit(CXL_DECODER_F_NORMALIZED_ADDRESSING, &cxld->flags))
1109 set_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags);
1110 }
1111
1112 /**
1113 * cxl_port_attach_region() - track a region's interest in a port by endpoint
1114 * @port: port to add a new region reference 'struct cxl_region_ref'
1115 * @cxlr: region to attach to @port
1116 * @cxled: endpoint decoder used to create or further pin a region reference
1117 * @pos: interleave position of @cxled in @cxlr
1118 *
1119 * The attach event is an opportunity to validate CXL decode setup
1120 * constraints and record metadata needed for programming HDM decoders,
1121 * in particular decoder target lists.
1122 *
1123 * The steps are:
1124 *
1125 * - validate that there are no other regions with a higher HPA already
1126 * associated with @port
1127 * - establish a region reference if one is not already present
1128 *
1129 * - additionally allocate a decoder instance that will host @cxlr on
1130 * @port
1131 *
1132 * - pin the region reference by the endpoint
1133 * - account for how many entries in @port's target list are needed to
1134 * cover all of the added endpoints.
1135 */
cxl_port_attach_region(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos)1136 static int cxl_port_attach_region(struct cxl_port *port,
1137 struct cxl_region *cxlr,
1138 struct cxl_endpoint_decoder *cxled, int pos)
1139 {
1140 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1141 struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1142 struct cxl_region_ref *cxl_rr;
1143 bool nr_targets_inc = false;
1144 struct cxl_decoder *cxld;
1145 unsigned long index;
1146 int rc = -EBUSY;
1147
1148 lockdep_assert_held_write(&cxl_rwsem.region);
1149
1150 cxl_rr = cxl_rr_load(port, cxlr);
1151 if (cxl_rr) {
1152 struct cxl_ep *ep_iter;
1153 int found = 0;
1154
1155 /*
1156 * Walk the existing endpoints that have been attached to
1157 * @cxlr at @port and see if they share the same 'next' port
1158 * in the downstream direction. I.e. endpoints that share common
1159 * upstream switch.
1160 */
1161 xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
1162 if (ep_iter == ep)
1163 continue;
1164 if (ep_iter->next == ep->next) {
1165 found++;
1166 break;
1167 }
1168 }
1169
1170 /*
1171 * New target port, or @port is an endpoint port that always
1172 * accounts its own local decode as a target.
1173 */
1174 if (!found || !ep->next) {
1175 cxl_rr->nr_targets++;
1176 nr_targets_inc = true;
1177 }
1178 } else {
1179 struct cxl_decoder *cxld;
1180
1181 cxld = cxl_port_pick_region_decoder(port, cxled, cxlr);
1182 if (!cxld) {
1183 dev_dbg(&cxlr->dev, "%s: no decoder available\n",
1184 dev_name(&port->dev));
1185 return -EBUSY;
1186 }
1187
1188 cxl_rr = alloc_region_ref(port, cxlr, cxled, cxld);
1189 if (IS_ERR(cxl_rr)) {
1190 dev_dbg(&cxlr->dev,
1191 "%s: failed to allocate region reference\n",
1192 dev_name(&port->dev));
1193 return PTR_ERR(cxl_rr);
1194 }
1195 nr_targets_inc = true;
1196
1197 rc = cxl_rr_assign_decoder(port, cxlr, cxled, cxl_rr, cxld);
1198 if (rc)
1199 goto out_erase;
1200 }
1201 cxld = cxl_rr->decoder;
1202
1203 /*
1204 * the number of targets should not exceed the target_count
1205 * of the decoder
1206 */
1207 if (is_switch_decoder(&cxld->dev)) {
1208 struct cxl_switch_decoder *cxlsd;
1209
1210 cxlsd = to_cxl_switch_decoder(&cxld->dev);
1211 if (cxl_rr->nr_targets > cxlsd->nr_targets) {
1212 dev_dbg(&cxlr->dev,
1213 "%s:%s %s add: %s:%s @ %d overflows targets: %d\n",
1214 dev_name(port->uport_dev), dev_name(&port->dev),
1215 dev_name(&cxld->dev), dev_name(&cxlmd->dev),
1216 dev_name(&cxled->cxld.dev), pos,
1217 cxlsd->nr_targets);
1218 rc = -ENXIO;
1219 goto out_erase;
1220 }
1221 }
1222
1223 cxl_region_setup_flags(cxlr, cxld);
1224
1225 rc = cxl_rr_ep_add(cxl_rr, cxled);
1226 if (rc) {
1227 dev_dbg(&cxlr->dev,
1228 "%s: failed to track endpoint %s:%s reference\n",
1229 dev_name(&port->dev), dev_name(&cxlmd->dev),
1230 dev_name(&cxld->dev));
1231 goto out_erase;
1232 }
1233
1234 dev_dbg(&cxlr->dev,
1235 "%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n",
1236 dev_name(port->uport_dev), dev_name(&port->dev),
1237 dev_name(&cxld->dev), dev_name(&cxlmd->dev),
1238 dev_name(&cxled->cxld.dev), pos,
1239 ep ? ep->next ? dev_name(ep->next->uport_dev) :
1240 dev_name(&cxlmd->dev) :
1241 "none",
1242 cxl_rr->nr_eps, cxl_rr->nr_targets);
1243
1244 return 0;
1245 out_erase:
1246 if (nr_targets_inc)
1247 cxl_rr->nr_targets--;
1248 if (cxl_rr->nr_eps == 0)
1249 free_region_ref(cxl_rr);
1250 return rc;
1251 }
1252
cxl_port_detach_region(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled)1253 static void cxl_port_detach_region(struct cxl_port *port,
1254 struct cxl_region *cxlr,
1255 struct cxl_endpoint_decoder *cxled)
1256 {
1257 struct cxl_region_ref *cxl_rr;
1258 struct cxl_ep *ep = NULL;
1259
1260 lockdep_assert_held_write(&cxl_rwsem.region);
1261
1262 cxl_rr = cxl_rr_load(port, cxlr);
1263 if (!cxl_rr)
1264 return;
1265
1266 /*
1267 * Endpoint ports do not carry cxl_ep references, and they
1268 * never target more than one endpoint by definition
1269 */
1270 if (cxl_rr->decoder == &cxled->cxld)
1271 cxl_rr->nr_eps--;
1272 else
1273 ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled);
1274 if (ep) {
1275 struct cxl_ep *ep_iter;
1276 unsigned long index;
1277 int found = 0;
1278
1279 cxl_rr->nr_eps--;
1280 xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
1281 if (ep_iter->next == ep->next) {
1282 found++;
1283 break;
1284 }
1285 }
1286 if (!found)
1287 cxl_rr->nr_targets--;
1288 }
1289
1290 if (cxl_rr->nr_eps == 0)
1291 free_region_ref(cxl_rr);
1292 }
1293
check_last_peer(struct cxl_endpoint_decoder * cxled,struct cxl_ep * ep,struct cxl_region_ref * cxl_rr,int distance)1294 static int check_last_peer(struct cxl_endpoint_decoder *cxled,
1295 struct cxl_ep *ep, struct cxl_region_ref *cxl_rr,
1296 int distance)
1297 {
1298 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1299 struct cxl_region *cxlr = cxl_rr->region;
1300 struct cxl_region_params *p = &cxlr->params;
1301 struct cxl_endpoint_decoder *cxled_peer;
1302 struct cxl_port *port = cxl_rr->port;
1303 struct cxl_memdev *cxlmd_peer;
1304 struct cxl_ep *ep_peer;
1305 int pos = cxled->pos;
1306
1307 /*
1308 * If this position wants to share a dport with the last endpoint mapped
1309 * then that endpoint, at index 'position - distance', must also be
1310 * mapped by this dport.
1311 */
1312 if (pos < distance) {
1313 dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n",
1314 dev_name(port->uport_dev), dev_name(&port->dev),
1315 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1316 return -ENXIO;
1317 }
1318 cxled_peer = p->targets[pos - distance];
1319 cxlmd_peer = cxled_to_memdev(cxled_peer);
1320 ep_peer = cxl_ep_load(port, cxlmd_peer);
1321 if (ep->dport != ep_peer->dport) {
1322 dev_dbg(&cxlr->dev,
1323 "%s:%s: %s:%s pos %d mismatched peer %s:%s\n",
1324 dev_name(port->uport_dev), dev_name(&port->dev),
1325 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos,
1326 dev_name(&cxlmd_peer->dev),
1327 dev_name(&cxled_peer->cxld.dev));
1328 return -ENXIO;
1329 }
1330
1331 return 0;
1332 }
1333
check_interleave_cap(struct cxl_decoder * cxld,int iw,int ig)1334 static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
1335 {
1336 struct cxl_port *port = to_cxl_port(cxld->dev.parent);
1337 struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
1338 unsigned int interleave_mask;
1339 u8 eiw;
1340 u16 eig;
1341 int high_pos, low_pos;
1342
1343 if (!test_bit(iw, &cxlhdm->iw_cap_mask))
1344 return -ENXIO;
1345 /*
1346 * Per CXL specification r3.1(8.2.4.20.13 Decoder Protection),
1347 * if eiw < 8:
1348 * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + 8 + eiw]
1349 * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0]
1350 *
1351 * when the eiw is 0, all the bits of HPAOFFSET[51: 0] are used, the
1352 * interleave bits are none.
1353 *
1354 * if eiw >= 8:
1355 * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + eiw] / 3
1356 * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0]
1357 *
1358 * when the eiw is 8, all the bits of HPAOFFSET[51: 0] are used, the
1359 * interleave bits are none.
1360 */
1361 ways_to_eiw(iw, &eiw);
1362 if (eiw == 0 || eiw == 8)
1363 return 0;
1364
1365 granularity_to_eig(ig, &eig);
1366 if (eiw > 8)
1367 high_pos = eiw + eig - 1;
1368 else
1369 high_pos = eiw + eig + 7;
1370 low_pos = eig + 8;
1371 interleave_mask = GENMASK(high_pos, low_pos);
1372 if (interleave_mask & ~cxlhdm->interleave_mask)
1373 return -ENXIO;
1374
1375 return 0;
1376 }
1377
cxl_port_setup_targets(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled)1378 static int cxl_port_setup_targets(struct cxl_port *port,
1379 struct cxl_region *cxlr,
1380 struct cxl_endpoint_decoder *cxled)
1381 {
1382 struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
1383 int parent_iw, parent_ig, ig, iw, rc, pos = cxled->pos;
1384 struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
1385 struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1386 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1387 struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1388 struct cxl_region_params *p = &cxlr->params;
1389 struct cxl_decoder *cxld = cxl_rr->decoder;
1390 struct cxl_switch_decoder *cxlsd;
1391 struct cxl_port *iter = port;
1392 u16 eig, peig;
1393 u8 eiw, peiw;
1394
1395 /*
1396 * While root level decoders support x3, x6, x12, switch level
1397 * decoders only support powers of 2 up to x16.
1398 */
1399 if (!is_power_of_2(cxl_rr->nr_targets)) {
1400 dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
1401 dev_name(port->uport_dev), dev_name(&port->dev),
1402 cxl_rr->nr_targets);
1403 return -EINVAL;
1404 }
1405
1406 cxlsd = to_cxl_switch_decoder(&cxld->dev);
1407 if (cxl_rr->nr_targets_set) {
1408 int i, distance = 1;
1409 struct cxl_region_ref *cxl_rr_iter;
1410
1411 /*
1412 * The "distance" between peer downstream ports represents which
1413 * endpoint positions in the region interleave a given port can
1414 * host.
1415 *
1416 * For example, at the root of a hierarchy the distance is
1417 * always 1 as every index targets a different host-bridge. At
1418 * each subsequent switch level those ports map every Nth region
1419 * position where N is the width of the switch == distance.
1420 */
1421 do {
1422 cxl_rr_iter = cxl_rr_load(iter, cxlr);
1423 distance *= cxl_rr_iter->nr_targets;
1424 iter = to_cxl_port(iter->dev.parent);
1425 } while (!is_cxl_root(iter));
1426 distance *= cxlrd->cxlsd.cxld.interleave_ways;
1427
1428 for (i = 0; i < cxl_rr->nr_targets_set; i++)
1429 if (ep->dport == cxlsd->target[i]) {
1430 rc = check_last_peer(cxled, ep, cxl_rr,
1431 distance);
1432 if (rc)
1433 return rc;
1434 goto out_target_set;
1435 }
1436 goto add_target;
1437 }
1438
1439 if (is_cxl_root(parent_port)) {
1440 /*
1441 * Root decoder IG is always set to value in CFMWS which
1442 * may be different than this region's IG. We can use the
1443 * region's IG here since interleave_granularity_store()
1444 * does not allow interleaved host-bridges with
1445 * root IG != region IG.
1446 */
1447 parent_ig = p->interleave_granularity;
1448 parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
1449 /*
1450 * For purposes of address bit routing, use power-of-2 math for
1451 * switch ports.
1452 */
1453 if (!is_power_of_2(parent_iw))
1454 parent_iw /= 3;
1455 } else {
1456 struct cxl_region_ref *parent_rr;
1457 struct cxl_decoder *parent_cxld;
1458
1459 parent_rr = cxl_rr_load(parent_port, cxlr);
1460 parent_cxld = parent_rr->decoder;
1461 parent_ig = parent_cxld->interleave_granularity;
1462 parent_iw = parent_cxld->interleave_ways;
1463 }
1464
1465 rc = granularity_to_eig(parent_ig, &peig);
1466 if (rc) {
1467 dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
1468 dev_name(parent_port->uport_dev),
1469 dev_name(&parent_port->dev), parent_ig);
1470 return rc;
1471 }
1472
1473 rc = ways_to_eiw(parent_iw, &peiw);
1474 if (rc) {
1475 dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
1476 dev_name(parent_port->uport_dev),
1477 dev_name(&parent_port->dev), parent_iw);
1478 return rc;
1479 }
1480
1481 iw = cxl_rr->nr_targets;
1482 rc = ways_to_eiw(iw, &eiw);
1483 if (rc) {
1484 dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
1485 dev_name(port->uport_dev), dev_name(&port->dev), iw);
1486 return rc;
1487 }
1488
1489 /*
1490 * Interleave granularity is a multiple of @parent_port granularity.
1491 * Multiplier is the parent port interleave ways.
1492 */
1493 rc = granularity_to_eig(parent_ig * parent_iw, &eig);
1494 if (rc) {
1495 dev_dbg(&cxlr->dev,
1496 "%s: invalid granularity calculation (%d * %d)\n",
1497 dev_name(&parent_port->dev), parent_ig, parent_iw);
1498 return rc;
1499 }
1500
1501 rc = eig_to_granularity(eig, &ig);
1502 if (rc) {
1503 dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
1504 dev_name(port->uport_dev), dev_name(&port->dev),
1505 256 << eig);
1506 return rc;
1507 }
1508
1509 if (iw > 8 || iw > cxlsd->nr_targets) {
1510 dev_dbg(&cxlr->dev,
1511 "%s:%s:%s: ways: %d overflows targets: %d\n",
1512 dev_name(port->uport_dev), dev_name(&port->dev),
1513 dev_name(&cxld->dev), iw, cxlsd->nr_targets);
1514 return -ENXIO;
1515 }
1516
1517 if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1518 if (cxld->interleave_ways != iw ||
1519 (iw > 1 && cxld->interleave_granularity != ig) ||
1520 !spa_maps_hpa(p, &cxld->hpa_range) ||
1521 ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
1522 dev_err(&cxlr->dev,
1523 "%s:%s %s expected iw: %d ig: %d %pr\n",
1524 dev_name(port->uport_dev), dev_name(&port->dev),
1525 __func__, iw, ig, p->res);
1526 dev_err(&cxlr->dev,
1527 "%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n",
1528 dev_name(port->uport_dev), dev_name(&port->dev),
1529 __func__, cxld->interleave_ways,
1530 cxld->interleave_granularity,
1531 str_enabled_disabled(cxld->flags & CXL_DECODER_F_ENABLE),
1532 cxld->hpa_range.start, cxld->hpa_range.end);
1533 return -ENXIO;
1534 }
1535 } else {
1536 rc = check_interleave_cap(cxld, iw, ig);
1537 if (rc) {
1538 dev_dbg(&cxlr->dev,
1539 "%s:%s iw: %d ig: %d is not supported\n",
1540 dev_name(port->uport_dev),
1541 dev_name(&port->dev), iw, ig);
1542 return rc;
1543 }
1544
1545 cxld->interleave_ways = iw;
1546 cxld->interleave_granularity = ig;
1547 cxld->hpa_range = (struct range) {
1548 .start = p->res->start,
1549 .end = p->res->end,
1550 };
1551 }
1552 dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport_dev),
1553 dev_name(&port->dev), iw, ig);
1554 add_target:
1555 if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) {
1556 dev_dbg(&cxlr->dev,
1557 "%s:%s: targets full trying to add %s:%s at %d\n",
1558 dev_name(port->uport_dev), dev_name(&port->dev),
1559 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1560 return -ENXIO;
1561 }
1562 if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1563 if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) {
1564 dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n",
1565 dev_name(port->uport_dev), dev_name(&port->dev),
1566 dev_name(&cxlsd->cxld.dev),
1567 dev_name(ep->dport->dport_dev),
1568 cxl_rr->nr_targets_set);
1569 return -ENXIO;
1570 }
1571 } else {
1572 cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
1573 cxlsd->cxld.target_map[cxl_rr->nr_targets_set] = ep->dport->port_id;
1574 }
1575 cxl_rr->nr_targets_set++;
1576 out_target_set:
1577 dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
1578 dev_name(port->uport_dev), dev_name(&port->dev),
1579 cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev),
1580 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1581
1582 return 0;
1583 }
1584
cxl_port_reset_targets(struct cxl_port * port,struct cxl_region * cxlr)1585 static void cxl_port_reset_targets(struct cxl_port *port,
1586 struct cxl_region *cxlr)
1587 {
1588 struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1589 struct cxl_decoder *cxld;
1590
1591 /*
1592 * After the last endpoint has been detached the entire cxl_rr may now
1593 * be gone.
1594 */
1595 if (!cxl_rr)
1596 return;
1597 cxl_rr->nr_targets_set = 0;
1598
1599 cxld = cxl_rr->decoder;
1600 cxld->hpa_range = (struct range) {
1601 .start = 0,
1602 .end = -1,
1603 };
1604 }
1605
cxl_region_teardown_targets(struct cxl_region * cxlr)1606 static void cxl_region_teardown_targets(struct cxl_region *cxlr)
1607 {
1608 struct cxl_region_params *p = &cxlr->params;
1609 struct cxl_endpoint_decoder *cxled;
1610 struct cxl_dev_state *cxlds;
1611 struct cxl_memdev *cxlmd;
1612 struct cxl_port *iter;
1613 struct cxl_ep *ep;
1614 int i;
1615
1616 /*
1617 * In the auto-discovery case skip automatic teardown since the
1618 * address space is already active
1619 */
1620 if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
1621 return;
1622
1623 for (i = 0; i < p->nr_targets; i++) {
1624 cxled = p->targets[i];
1625 cxlmd = cxled_to_memdev(cxled);
1626 cxlds = cxlmd->cxlds;
1627
1628 if (cxlds->rcd)
1629 continue;
1630
1631 iter = cxled_to_port(cxled);
1632 while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1633 iter = to_cxl_port(iter->dev.parent);
1634
1635 for (ep = cxl_ep_load(iter, cxlmd); iter;
1636 iter = ep->next, ep = cxl_ep_load(iter, cxlmd))
1637 cxl_port_reset_targets(iter, cxlr);
1638 }
1639 }
1640
cxl_region_setup_targets(struct cxl_region * cxlr)1641 static int cxl_region_setup_targets(struct cxl_region *cxlr)
1642 {
1643 struct cxl_region_params *p = &cxlr->params;
1644 struct cxl_endpoint_decoder *cxled;
1645 struct cxl_dev_state *cxlds;
1646 int i, rc, rch = 0, vh = 0;
1647 struct cxl_memdev *cxlmd;
1648 struct cxl_port *iter;
1649 struct cxl_ep *ep;
1650
1651 for (i = 0; i < p->nr_targets; i++) {
1652 cxled = p->targets[i];
1653 cxlmd = cxled_to_memdev(cxled);
1654 cxlds = cxlmd->cxlds;
1655
1656 /* validate that all targets agree on topology */
1657 if (!cxlds->rcd) {
1658 vh++;
1659 } else {
1660 rch++;
1661 continue;
1662 }
1663
1664 iter = cxled_to_port(cxled);
1665 while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1666 iter = to_cxl_port(iter->dev.parent);
1667
1668 /*
1669 * Descend the topology tree programming / validating
1670 * targets while looking for conflicts.
1671 */
1672 for (ep = cxl_ep_load(iter, cxlmd); iter;
1673 iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
1674 rc = cxl_port_setup_targets(iter, cxlr, cxled);
1675 if (rc) {
1676 cxl_region_teardown_targets(cxlr);
1677 return rc;
1678 }
1679 }
1680 }
1681
1682 if (rch && vh) {
1683 dev_err(&cxlr->dev, "mismatched CXL topologies detected\n");
1684 cxl_region_teardown_targets(cxlr);
1685 return -ENXIO;
1686 }
1687
1688 return 0;
1689 }
1690
cxl_region_validate_position(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos)1691 static int cxl_region_validate_position(struct cxl_region *cxlr,
1692 struct cxl_endpoint_decoder *cxled,
1693 int pos)
1694 {
1695 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1696 struct cxl_region_params *p = &cxlr->params;
1697 int i;
1698
1699 if (pos < 0 || pos >= p->interleave_ways) {
1700 dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1701 p->interleave_ways);
1702 return -ENXIO;
1703 }
1704
1705 if (p->targets[pos] == cxled)
1706 return 0;
1707
1708 if (p->targets[pos]) {
1709 struct cxl_endpoint_decoder *cxled_target = p->targets[pos];
1710 struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled_target);
1711
1712 dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n",
1713 pos, dev_name(&cxlmd_target->dev),
1714 dev_name(&cxled_target->cxld.dev));
1715 return -EBUSY;
1716 }
1717
1718 for (i = 0; i < p->interleave_ways; i++) {
1719 struct cxl_endpoint_decoder *cxled_target;
1720 struct cxl_memdev *cxlmd_target;
1721
1722 cxled_target = p->targets[i];
1723 if (!cxled_target)
1724 continue;
1725
1726 cxlmd_target = cxled_to_memdev(cxled_target);
1727 if (cxlmd_target == cxlmd) {
1728 dev_dbg(&cxlr->dev,
1729 "%s already specified at position %d via: %s\n",
1730 dev_name(&cxlmd->dev), pos,
1731 dev_name(&cxled_target->cxld.dev));
1732 return -EBUSY;
1733 }
1734 }
1735
1736 return 0;
1737 }
1738
cxl_region_attach_position(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,const struct cxl_dport * dport,int pos)1739 static int cxl_region_attach_position(struct cxl_region *cxlr,
1740 struct cxl_endpoint_decoder *cxled,
1741 const struct cxl_dport *dport, int pos)
1742 {
1743 struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
1744 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1745 struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
1746 struct cxl_decoder *cxld = &cxlsd->cxld;
1747 int iw = cxld->interleave_ways;
1748 struct cxl_port *iter;
1749 int rc;
1750
1751 if (dport != cxlrd->cxlsd.target[pos % iw]) {
1752 dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
1753 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1754 dev_name(&cxlrd->cxlsd.cxld.dev));
1755 return -ENXIO;
1756 }
1757
1758 for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1759 iter = to_cxl_port(iter->dev.parent)) {
1760 rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
1761 if (rc)
1762 goto err;
1763 }
1764
1765 return 0;
1766
1767 err:
1768 for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1769 iter = to_cxl_port(iter->dev.parent))
1770 cxl_port_detach_region(iter, cxlr, cxled);
1771 return rc;
1772 }
1773
cxl_region_attach_auto(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos)1774 static int cxl_region_attach_auto(struct cxl_region *cxlr,
1775 struct cxl_endpoint_decoder *cxled, int pos)
1776 {
1777 struct cxl_region_params *p = &cxlr->params;
1778
1779 if (cxled->state != CXL_DECODER_STATE_AUTO) {
1780 dev_err(&cxlr->dev,
1781 "%s: unable to add decoder to autodetected region\n",
1782 dev_name(&cxled->cxld.dev));
1783 return -EINVAL;
1784 }
1785
1786 if (pos >= 0) {
1787 dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n",
1788 dev_name(&cxled->cxld.dev), pos);
1789 return -EINVAL;
1790 }
1791
1792 if (p->nr_targets >= p->interleave_ways) {
1793 dev_err(&cxlr->dev, "%s: no more target slots available\n",
1794 dev_name(&cxled->cxld.dev));
1795 return -ENXIO;
1796 }
1797
1798 /*
1799 * Temporarily record the endpoint decoder into the target array. Yes,
1800 * this means that userspace can view devices in the wrong position
1801 * before the region activates, and must be careful to understand when
1802 * it might be racing region autodiscovery.
1803 */
1804 pos = p->nr_targets;
1805 p->targets[pos] = cxled;
1806 cxled->pos = pos;
1807 p->nr_targets++;
1808
1809 return 0;
1810 }
1811
cmp_interleave_pos(const void * a,const void * b)1812 static int cmp_interleave_pos(const void *a, const void *b)
1813 {
1814 struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
1815 struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
1816
1817 return cxled_a->pos - cxled_b->pos;
1818 }
1819
match_switch_decoder_by_range(struct device * dev,const void * data)1820 static int match_switch_decoder_by_range(struct device *dev,
1821 const void *data)
1822 {
1823 struct cxl_switch_decoder *cxlsd;
1824 const struct range *r1, *r2 = data;
1825
1826
1827 if (!is_switch_decoder(dev))
1828 return 0;
1829
1830 cxlsd = to_cxl_switch_decoder(dev);
1831 r1 = &cxlsd->cxld.hpa_range;
1832
1833 if (is_root_decoder(dev))
1834 return range_contains(r1, r2);
1835 return (r1->start == r2->start && r1->end == r2->end);
1836 }
1837
find_pos_and_ways(struct cxl_port * port,struct range * range,int * pos,int * ways)1838 static int find_pos_and_ways(struct cxl_port *port, struct range *range,
1839 int *pos, int *ways)
1840 {
1841 struct cxl_switch_decoder *cxlsd;
1842 struct cxl_port *parent;
1843 struct device *dev;
1844 int rc = -ENXIO;
1845
1846 parent = parent_port_of(port);
1847 if (!parent)
1848 return rc;
1849
1850 dev = device_find_child(&parent->dev, range,
1851 match_switch_decoder_by_range);
1852 if (!dev) {
1853 dev_err(port->uport_dev,
1854 "failed to find decoder mapping %#llx-%#llx\n",
1855 range->start, range->end);
1856 return rc;
1857 }
1858 cxlsd = to_cxl_switch_decoder(dev);
1859 *ways = cxlsd->cxld.interleave_ways;
1860
1861 for (int i = 0; i < *ways; i++) {
1862 if (cxlsd->target[i] == port->parent_dport) {
1863 *pos = i;
1864 rc = 0;
1865 break;
1866 }
1867 }
1868 put_device(dev);
1869
1870 if (rc)
1871 dev_err(port->uport_dev,
1872 "failed to find %s:%s in target list of %s\n",
1873 dev_name(&port->dev),
1874 dev_name(port->parent_dport->dport_dev),
1875 dev_name(&cxlsd->cxld.dev));
1876
1877 return rc;
1878 }
1879
1880 /**
1881 * cxl_calc_interleave_pos() - calculate an endpoint position in a region
1882 * @cxled: endpoint decoder member of given region
1883 * @hpa_range: translated HPA range of the endpoint
1884 *
1885 * The endpoint position is calculated by traversing the topology from
1886 * the endpoint to the root decoder and iteratively applying this
1887 * calculation:
1888 *
1889 * position = position * parent_ways + parent_pos;
1890 *
1891 * ...where @position is inferred from switch and root decoder target lists.
1892 *
1893 * Return: position >= 0 on success
1894 * -ENXIO on failure
1895 */
cxl_calc_interleave_pos(struct cxl_endpoint_decoder * cxled,struct range * hpa_range)1896 static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled,
1897 struct range *hpa_range)
1898 {
1899 struct cxl_port *iter, *port = cxled_to_port(cxled);
1900 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1901 int parent_ways = 0, parent_pos = 0, pos = 0;
1902 int rc;
1903
1904 /*
1905 * Example: the expected interleave order of the 4-way region shown
1906 * below is: mem0, mem2, mem1, mem3
1907 *
1908 * root_port
1909 * / \
1910 * host_bridge_0 host_bridge_1
1911 * | | | |
1912 * mem0 mem1 mem2 mem3
1913 *
1914 * In the example the calculator will iterate twice. The first iteration
1915 * uses the mem position in the host-bridge and the ways of the host-
1916 * bridge to generate the first, or local, position. The second
1917 * iteration uses the host-bridge position in the root_port and the ways
1918 * of the root_port to refine the position.
1919 *
1920 * A trace of the calculation per endpoint looks like this:
1921 * mem0: pos = 0 * 2 + 0 mem2: pos = 0 * 2 + 0
1922 * pos = 0 * 2 + 0 pos = 0 * 2 + 1
1923 * pos: 0 pos: 1
1924 *
1925 * mem1: pos = 0 * 2 + 1 mem3: pos = 0 * 2 + 1
1926 * pos = 1 * 2 + 0 pos = 1 * 2 + 1
1927 * pos: 2 pos = 3
1928 *
1929 * Note that while this example is simple, the method applies to more
1930 * complex topologies, including those with switches.
1931 */
1932
1933 /* Iterate from endpoint to root_port refining the position */
1934 for (iter = port; iter; iter = parent_port_of(iter)) {
1935 if (is_cxl_root(iter))
1936 break;
1937
1938 rc = find_pos_and_ways(iter, hpa_range, &parent_pos,
1939 &parent_ways);
1940 if (rc)
1941 return rc;
1942
1943 pos = pos * parent_ways + parent_pos;
1944 }
1945
1946 dev_dbg(&cxlmd->dev,
1947 "decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n",
1948 dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent),
1949 dev_name(&port->dev), hpa_range->start, hpa_range->end, pos);
1950
1951 return pos;
1952 }
1953
cxl_region_sort_targets(struct cxl_region * cxlr)1954 static int cxl_region_sort_targets(struct cxl_region *cxlr)
1955 {
1956 struct cxl_region_params *p = &cxlr->params;
1957 int i, rc = 0;
1958
1959 for (i = 0; i < p->nr_targets; i++) {
1960 struct cxl_endpoint_decoder *cxled = p->targets[i];
1961
1962 cxled->pos = cxl_calc_interleave_pos(cxled, &cxlr->hpa_range);
1963 /*
1964 * Record that sorting failed, but still continue to calc
1965 * cxled->pos so that follow-on code paths can reliably
1966 * do p->targets[cxled->pos] to self-reference their entry.
1967 */
1968 if (cxled->pos < 0)
1969 rc = -ENXIO;
1970 }
1971 /* Keep the cxlr target list in interleave position order */
1972 sort(p->targets, p->nr_targets, sizeof(p->targets[0]),
1973 cmp_interleave_pos, NULL);
1974
1975 dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
1976 return rc;
1977 }
1978
cxl_region_attach(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos)1979 static int cxl_region_attach(struct cxl_region *cxlr,
1980 struct cxl_endpoint_decoder *cxled, int pos)
1981 {
1982 struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
1983 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1984 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1985 struct cxl_region_params *p = &cxlr->params;
1986 struct cxl_port *ep_port, *root_port;
1987 struct cxl_dport *dport;
1988 int rc = -ENXIO;
1989
1990 rc = check_interleave_cap(&cxled->cxld, p->interleave_ways,
1991 p->interleave_granularity);
1992 if (rc) {
1993 dev_dbg(&cxlr->dev, "%s iw: %d ig: %d is not supported\n",
1994 dev_name(&cxled->cxld.dev), p->interleave_ways,
1995 p->interleave_granularity);
1996 return rc;
1997 }
1998
1999 if (cxled->part < 0) {
2000 dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
2001 return -ENODEV;
2002 }
2003
2004 if (cxlds->part[cxled->part].mode != cxlr->mode) {
2005 dev_dbg(&cxlr->dev, "%s region mode: %d mismatch\n",
2006 dev_name(&cxled->cxld.dev), cxlr->mode);
2007 return -EINVAL;
2008 }
2009
2010 /* all full of members, or interleave config not established? */
2011 if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
2012 dev_dbg(&cxlr->dev, "region already active\n");
2013 return -EBUSY;
2014 }
2015
2016 if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
2017 dev_dbg(&cxlr->dev, "interleave config missing\n");
2018 return -ENXIO;
2019 }
2020
2021 if (p->nr_targets >= p->interleave_ways) {
2022 dev_dbg(&cxlr->dev, "region already has %d endpoints\n",
2023 p->nr_targets);
2024 return -EINVAL;
2025 }
2026
2027 ep_port = cxled_to_port(cxled);
2028 root_port = cxlrd_to_port(cxlrd);
2029 dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
2030 if (!dport) {
2031 dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n",
2032 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2033 dev_name(cxlr->dev.parent));
2034 return -ENXIO;
2035 }
2036
2037 if (cxled->cxld.target_type != cxlr->type) {
2038 dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
2039 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2040 cxled->cxld.target_type, cxlr->type);
2041 return -ENXIO;
2042 }
2043
2044 if (!cxled->dpa_res) {
2045 dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n",
2046 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev));
2047 return -ENXIO;
2048 }
2049
2050 if (resource_size(cxled->dpa_res) * p->interleave_ways + p->cache_size !=
2051 resource_size(p->res)) {
2052 dev_dbg(&cxlr->dev,
2053 "%s:%s-size-%#llx * ways-%d + cache-%#llx != region-size-%#llx\n",
2054 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2055 (u64)resource_size(cxled->dpa_res), p->interleave_ways,
2056 (u64)p->cache_size, (u64)resource_size(p->res));
2057 return -EINVAL;
2058 }
2059
2060 cxl_region_perf_data_calculate(cxlr, cxled);
2061
2062 if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
2063 int i;
2064
2065 rc = cxl_region_attach_auto(cxlr, cxled, pos);
2066 if (rc)
2067 return rc;
2068
2069 /* await more targets to arrive... */
2070 if (p->nr_targets < p->interleave_ways)
2071 return 0;
2072
2073 /*
2074 * All targets are here, which implies all PCI enumeration that
2075 * affects this region has been completed. Walk the topology to
2076 * sort the devices into their relative region decode position.
2077 */
2078 rc = cxl_region_sort_targets(cxlr);
2079 if (rc)
2080 return rc;
2081
2082 for (i = 0; i < p->nr_targets; i++) {
2083 cxled = p->targets[i];
2084 ep_port = cxled_to_port(cxled);
2085 dport = cxl_find_dport_by_dev(root_port,
2086 ep_port->host_bridge);
2087 rc = cxl_region_attach_position(cxlr, cxled, dport, i);
2088 if (rc)
2089 return rc;
2090 }
2091
2092 rc = cxl_region_setup_targets(cxlr);
2093 if (rc)
2094 return rc;
2095
2096 /*
2097 * If target setup succeeds in the autodiscovery case
2098 * then the region is already committed.
2099 */
2100 p->state = CXL_CONFIG_COMMIT;
2101 cxl_region_shared_upstream_bandwidth_update(cxlr);
2102
2103 return 0;
2104 }
2105
2106 rc = cxl_region_validate_position(cxlr, cxled, pos);
2107 if (rc)
2108 return rc;
2109
2110 rc = cxl_region_attach_position(cxlr, cxled, dport, pos);
2111 if (rc)
2112 return rc;
2113
2114 p->targets[pos] = cxled;
2115 cxled->pos = pos;
2116 p->nr_targets++;
2117
2118 if (p->nr_targets == p->interleave_ways) {
2119 rc = cxl_region_setup_targets(cxlr);
2120 if (rc)
2121 return rc;
2122 p->state = CXL_CONFIG_ACTIVE;
2123 cxl_region_shared_upstream_bandwidth_update(cxlr);
2124 }
2125
2126 cxled->cxld.interleave_ways = p->interleave_ways;
2127 cxled->cxld.interleave_granularity = p->interleave_granularity;
2128 cxled->cxld.hpa_range = (struct range) {
2129 .start = p->res->start,
2130 .end = p->res->end,
2131 };
2132
2133 if (p->nr_targets != p->interleave_ways)
2134 return 0;
2135
2136 /*
2137 * Test the auto-discovery position calculator function
2138 * against this successfully created user-defined region.
2139 * A fail message here means that this interleave config
2140 * will fail when presented as CXL_REGION_F_AUTO.
2141 */
2142 for (int i = 0; i < p->nr_targets; i++) {
2143 struct cxl_endpoint_decoder *cxled = p->targets[i];
2144 int test_pos;
2145
2146 test_pos = cxl_calc_interleave_pos(cxled, &cxlr->hpa_range);
2147 dev_dbg(&cxled->cxld.dev,
2148 "Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n",
2149 (test_pos == cxled->pos) ? "success" : "fail",
2150 test_pos, cxled->pos);
2151 }
2152
2153 return 0;
2154 }
2155
2156 static struct cxl_region *
__cxl_decoder_detach(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos,enum cxl_detach_mode mode)2157 __cxl_decoder_detach(struct cxl_region *cxlr,
2158 struct cxl_endpoint_decoder *cxled, int pos,
2159 enum cxl_detach_mode mode)
2160 {
2161 struct cxl_region_params *p;
2162
2163 lockdep_assert_held_write(&cxl_rwsem.region);
2164
2165 if (!cxled) {
2166 p = &cxlr->params;
2167
2168 if (pos >= p->interleave_ways) {
2169 dev_dbg(&cxlr->dev, "position %d out of range %d\n",
2170 pos, p->interleave_ways);
2171 return NULL;
2172 }
2173
2174 if (!p->targets[pos])
2175 return NULL;
2176 cxled = p->targets[pos];
2177 } else {
2178 cxlr = cxled->cxld.region;
2179 if (!cxlr)
2180 return NULL;
2181 p = &cxlr->params;
2182 }
2183
2184 if (mode == DETACH_INVALIDATE)
2185 cxled->part = -1;
2186
2187 if (p->state > CXL_CONFIG_ACTIVE) {
2188 cxl_region_decode_reset(cxlr, p->interleave_ways);
2189 p->state = CXL_CONFIG_ACTIVE;
2190 }
2191
2192 for (struct cxl_port *iter = cxled_to_port(cxled); !is_cxl_root(iter);
2193 iter = to_cxl_port(iter->dev.parent))
2194 cxl_port_detach_region(iter, cxlr, cxled);
2195
2196 if (cxled->pos < 0 || cxled->pos >= p->interleave_ways ||
2197 p->targets[cxled->pos] != cxled) {
2198 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2199
2200 dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
2201 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2202 cxled->pos);
2203 return NULL;
2204 }
2205
2206 if (p->state == CXL_CONFIG_ACTIVE) {
2207 p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
2208 cxl_region_teardown_targets(cxlr);
2209 }
2210 p->targets[cxled->pos] = NULL;
2211 p->nr_targets--;
2212 cxled->cxld.hpa_range = (struct range) {
2213 .start = 0,
2214 .end = -1,
2215 };
2216
2217 get_device(&cxlr->dev);
2218 return cxlr;
2219 }
2220
2221 /*
2222 * Cleanup a decoder's interest in a region. There are 2 cases to
2223 * handle, removing an unknown @cxled from a known position in a region
2224 * (detach_target()) or removing a known @cxled from an unknown @cxlr
2225 * (cxld_unregister())
2226 *
2227 * When the detachment finds a region release the region driver.
2228 */
cxl_decoder_detach(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos,enum cxl_detach_mode mode)2229 int cxl_decoder_detach(struct cxl_region *cxlr,
2230 struct cxl_endpoint_decoder *cxled, int pos,
2231 enum cxl_detach_mode mode)
2232 {
2233 struct cxl_region *detach;
2234
2235 /* when the decoder is being destroyed lock unconditionally */
2236 if (mode == DETACH_INVALIDATE) {
2237 guard(rwsem_write)(&cxl_rwsem.region);
2238 detach = __cxl_decoder_detach(cxlr, cxled, pos, mode);
2239 } else {
2240 int rc;
2241
2242 ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
2243 if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
2244 return rc;
2245 detach = __cxl_decoder_detach(cxlr, cxled, pos, mode);
2246 }
2247
2248 if (detach) {
2249 device_release_driver(&detach->dev);
2250 put_device(&detach->dev);
2251 }
2252 return 0;
2253 }
2254
__attach_target(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos,unsigned int state)2255 static int __attach_target(struct cxl_region *cxlr,
2256 struct cxl_endpoint_decoder *cxled, int pos,
2257 unsigned int state)
2258 {
2259 int rc;
2260
2261 if (state == TASK_INTERRUPTIBLE) {
2262 ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
2263 if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
2264 return rc;
2265 guard(rwsem_read)(&cxl_rwsem.dpa);
2266 return cxl_region_attach(cxlr, cxled, pos);
2267 }
2268 guard(rwsem_write)(&cxl_rwsem.region);
2269 guard(rwsem_read)(&cxl_rwsem.dpa);
2270 return cxl_region_attach(cxlr, cxled, pos);
2271 }
2272
attach_target(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos,unsigned int state)2273 static int attach_target(struct cxl_region *cxlr,
2274 struct cxl_endpoint_decoder *cxled, int pos,
2275 unsigned int state)
2276 {
2277 int rc = __attach_target(cxlr, cxled, pos, state);
2278
2279 if (rc == 0)
2280 return 0;
2281
2282 dev_warn(cxled->cxld.dev.parent, "failed to attach %s to %s: %d\n",
2283 dev_name(&cxled->cxld.dev), dev_name(&cxlr->dev), rc);
2284 return rc;
2285 }
2286
detach_target(struct cxl_region * cxlr,int pos)2287 static int detach_target(struct cxl_region *cxlr, int pos)
2288 {
2289 return cxl_decoder_detach(cxlr, NULL, pos, DETACH_ONLY);
2290 }
2291
store_targetN(struct cxl_region * cxlr,const char * buf,int pos,size_t len)2292 static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
2293 size_t len)
2294 {
2295 int rc;
2296
2297 if (sysfs_streq(buf, "\n"))
2298 rc = detach_target(cxlr, pos);
2299 else {
2300 struct device *dev;
2301
2302 dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
2303 if (!dev)
2304 return -ENODEV;
2305
2306 if (!is_endpoint_decoder(dev)) {
2307 rc = -EINVAL;
2308 goto out;
2309 }
2310
2311 rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
2312 TASK_INTERRUPTIBLE);
2313 out:
2314 put_device(dev);
2315 }
2316
2317 if (rc < 0)
2318 return rc;
2319 return len;
2320 }
2321
2322 #define TARGET_ATTR_RW(n) \
2323 static ssize_t target##n##_show( \
2324 struct device *dev, struct device_attribute *attr, char *buf) \
2325 { \
2326 return show_targetN(to_cxl_region(dev), buf, (n)); \
2327 } \
2328 static ssize_t target##n##_store(struct device *dev, \
2329 struct device_attribute *attr, \
2330 const char *buf, size_t len) \
2331 { \
2332 return store_targetN(to_cxl_region(dev), buf, (n), len); \
2333 } \
2334 static DEVICE_ATTR_RW(target##n)
2335
2336 TARGET_ATTR_RW(0);
2337 TARGET_ATTR_RW(1);
2338 TARGET_ATTR_RW(2);
2339 TARGET_ATTR_RW(3);
2340 TARGET_ATTR_RW(4);
2341 TARGET_ATTR_RW(5);
2342 TARGET_ATTR_RW(6);
2343 TARGET_ATTR_RW(7);
2344 TARGET_ATTR_RW(8);
2345 TARGET_ATTR_RW(9);
2346 TARGET_ATTR_RW(10);
2347 TARGET_ATTR_RW(11);
2348 TARGET_ATTR_RW(12);
2349 TARGET_ATTR_RW(13);
2350 TARGET_ATTR_RW(14);
2351 TARGET_ATTR_RW(15);
2352
2353 static struct attribute *target_attrs[] = {
2354 &dev_attr_target0.attr,
2355 &dev_attr_target1.attr,
2356 &dev_attr_target2.attr,
2357 &dev_attr_target3.attr,
2358 &dev_attr_target4.attr,
2359 &dev_attr_target5.attr,
2360 &dev_attr_target6.attr,
2361 &dev_attr_target7.attr,
2362 &dev_attr_target8.attr,
2363 &dev_attr_target9.attr,
2364 &dev_attr_target10.attr,
2365 &dev_attr_target11.attr,
2366 &dev_attr_target12.attr,
2367 &dev_attr_target13.attr,
2368 &dev_attr_target14.attr,
2369 &dev_attr_target15.attr,
2370 NULL,
2371 };
2372
cxl_region_target_visible(struct kobject * kobj,struct attribute * a,int n)2373 static umode_t cxl_region_target_visible(struct kobject *kobj,
2374 struct attribute *a, int n)
2375 {
2376 struct device *dev = kobj_to_dev(kobj);
2377 struct cxl_region *cxlr = to_cxl_region(dev);
2378 struct cxl_region_params *p = &cxlr->params;
2379
2380 if (n < p->interleave_ways)
2381 return a->mode;
2382 return 0;
2383 }
2384
2385 static const struct attribute_group cxl_region_target_group = {
2386 .attrs = target_attrs,
2387 .is_visible = cxl_region_target_visible,
2388 };
2389
get_cxl_region_target_group(void)2390 static const struct attribute_group *get_cxl_region_target_group(void)
2391 {
2392 return &cxl_region_target_group;
2393 }
2394
2395 static const struct attribute_group *region_groups[] = {
2396 &cxl_base_attribute_group,
2397 &cxl_region_group,
2398 &cxl_region_target_group,
2399 &cxl_region_access0_coordinate_group,
2400 &cxl_region_access1_coordinate_group,
2401 NULL,
2402 };
2403
cxl_region_release(struct device * dev)2404 static void cxl_region_release(struct device *dev)
2405 {
2406 struct cxl_region *cxlr = to_cxl_region(dev);
2407 struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
2408 int id = atomic_read(&cxlrd->region_id);
2409
2410 /*
2411 * Try to reuse the recently idled id rather than the cached
2412 * next id to prevent the region id space from increasing
2413 * unnecessarily.
2414 */
2415 if (cxlr->id < id)
2416 if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
2417 memregion_free(id);
2418 goto out;
2419 }
2420
2421 memregion_free(cxlr->id);
2422 out:
2423 put_device(dev->parent);
2424 kfree(cxlr);
2425 }
2426
2427 const struct device_type cxl_region_type = {
2428 .name = "cxl_region",
2429 .release = cxl_region_release,
2430 .groups = region_groups
2431 };
2432
is_cxl_region(struct device * dev)2433 bool is_cxl_region(struct device *dev)
2434 {
2435 return dev->type == &cxl_region_type;
2436 }
2437 EXPORT_SYMBOL_NS_GPL(is_cxl_region, "CXL");
2438
to_cxl_region(struct device * dev)2439 static struct cxl_region *to_cxl_region(struct device *dev)
2440 {
2441 if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
2442 "not a cxl_region device\n"))
2443 return NULL;
2444
2445 return container_of(dev, struct cxl_region, dev);
2446 }
2447
unregister_region(void * _cxlr)2448 static void unregister_region(void *_cxlr)
2449 {
2450 struct cxl_region *cxlr = _cxlr;
2451 struct cxl_region_params *p = &cxlr->params;
2452 int i;
2453
2454 device_del(&cxlr->dev);
2455
2456 /*
2457 * Now that region sysfs is shutdown, the parameter block is now
2458 * read-only, so no need to hold the region rwsem to access the
2459 * region parameters.
2460 */
2461 for (i = 0; i < p->interleave_ways; i++)
2462 detach_target(cxlr, i);
2463
2464 cxlr->hpa_range = DEFINE_RANGE(0, -1);
2465
2466 cxl_region_iomem_release(cxlr);
2467 put_device(&cxlr->dev);
2468 }
2469
2470 static struct lock_class_key cxl_region_key;
2471
cxl_region_alloc(struct cxl_root_decoder * cxlrd,int id)2472 static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
2473 {
2474 struct cxl_region *cxlr;
2475 struct device *dev;
2476
2477 cxlr = kzalloc_obj(*cxlr);
2478 if (!cxlr) {
2479 memregion_free(id);
2480 return ERR_PTR(-ENOMEM);
2481 }
2482
2483 dev = &cxlr->dev;
2484 device_initialize(dev);
2485 lockdep_set_class(&dev->mutex, &cxl_region_key);
2486 dev->parent = &cxlrd->cxlsd.cxld.dev;
2487 /*
2488 * Keep root decoder pinned through cxl_region_release to fixup
2489 * region id allocations
2490 */
2491 get_device(dev->parent);
2492 cxlr->cxlrd = cxlrd;
2493 cxlr->id = id;
2494
2495 device_set_pm_not_required(dev);
2496 dev->bus = &cxl_bus_type;
2497 dev->type = &cxl_region_type;
2498 cxl_region_setup_flags(cxlr, &cxlrd->cxlsd.cxld);
2499
2500 return cxlr;
2501 }
2502
cxl_region_update_coordinates(struct cxl_region * cxlr,int nid)2503 static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
2504 {
2505 int cset = 0;
2506 int rc;
2507
2508 for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
2509 if (cxlr->coord[i].read_bandwidth) {
2510 node_update_perf_attrs(nid, &cxlr->coord[i], i);
2511 cset++;
2512 }
2513 }
2514
2515 if (!cset)
2516 return false;
2517
2518 rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access0_group());
2519 if (rc)
2520 dev_dbg(&cxlr->dev, "Failed to update access0 group\n");
2521
2522 rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access1_group());
2523 if (rc)
2524 dev_dbg(&cxlr->dev, "Failed to update access1 group\n");
2525
2526 return true;
2527 }
2528
cxl_region_perf_attrs_callback(struct notifier_block * nb,unsigned long action,void * arg)2529 static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
2530 unsigned long action, void *arg)
2531 {
2532 struct cxl_region *cxlr = container_of(nb, struct cxl_region,
2533 node_notifier);
2534 struct node_notify *nn = arg;
2535 int nid = nn->nid;
2536 int region_nid;
2537
2538 if (action != NODE_ADDED_FIRST_MEMORY)
2539 return NOTIFY_DONE;
2540
2541 /*
2542 * No need to hold cxl_rwsem.region; region parameters are stable
2543 * within the cxl_region driver.
2544 */
2545 region_nid = phys_to_target_node(cxlr->params.res->start);
2546 if (nid != region_nid)
2547 return NOTIFY_DONE;
2548
2549 /* No action needed if node bit already set */
2550 if (node_test_and_set(nid, nodemask_region_seen))
2551 return NOTIFY_DONE;
2552
2553 if (!cxl_region_update_coordinates(cxlr, nid))
2554 return NOTIFY_DONE;
2555
2556 return NOTIFY_OK;
2557 }
2558
cxl_region_calculate_adistance(struct notifier_block * nb,unsigned long nid,void * data)2559 static int cxl_region_calculate_adistance(struct notifier_block *nb,
2560 unsigned long nid, void *data)
2561 {
2562 struct cxl_region *cxlr = container_of(nb, struct cxl_region,
2563 adist_notifier);
2564 struct access_coordinate *perf;
2565 int *adist = data;
2566 int region_nid;
2567
2568 /*
2569 * No need to hold cxl_rwsem.region; region parameters are stable
2570 * within the cxl_region driver.
2571 */
2572 region_nid = phys_to_target_node(cxlr->params.res->start);
2573 if (nid != region_nid)
2574 return NOTIFY_OK;
2575
2576 perf = &cxlr->coord[ACCESS_COORDINATE_CPU];
2577
2578 if (mt_perf_to_adistance(perf, adist))
2579 return NOTIFY_OK;
2580
2581 return NOTIFY_STOP;
2582 }
2583
2584 /**
2585 * devm_cxl_add_region - Adds a region to a decoder
2586 * @cxlrd: root decoder
2587 * @id: memregion id to create, or memregion_free() on failure
2588 * @mode: mode for the endpoint decoders of this region
2589 * @type: select whether this is an expander or accelerator (type-2 or type-3)
2590 *
2591 * This is the second step of region initialization. Regions exist within an
2592 * address space which is mapped by a @cxlrd.
2593 *
2594 * Return: 0 if the region was added to the @cxlrd, else returns negative error
2595 * code. The region will be named "regionZ" where Z is the unique region number.
2596 */
devm_cxl_add_region(struct cxl_root_decoder * cxlrd,int id,enum cxl_partition_mode mode,enum cxl_decoder_type type)2597 static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
2598 int id,
2599 enum cxl_partition_mode mode,
2600 enum cxl_decoder_type type)
2601 {
2602 struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
2603 struct cxl_region *cxlr;
2604 struct device *dev;
2605 int rc;
2606
2607 cxlr = cxl_region_alloc(cxlrd, id);
2608 if (IS_ERR(cxlr))
2609 return cxlr;
2610 cxlr->mode = mode;
2611 cxlr->type = type;
2612
2613 dev = &cxlr->dev;
2614 rc = dev_set_name(dev, "region%d", id);
2615 if (rc)
2616 goto err;
2617
2618 rc = device_add(dev);
2619 if (rc)
2620 goto err;
2621
2622 rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
2623 if (rc)
2624 return ERR_PTR(rc);
2625
2626 dev_dbg(port->uport_dev, "%s: created %s\n",
2627 dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
2628 return cxlr;
2629
2630 err:
2631 put_device(dev);
2632 return ERR_PTR(rc);
2633 }
2634
__create_region_show(struct cxl_root_decoder * cxlrd,char * buf)2635 static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
2636 {
2637 return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
2638 }
2639
create_pmem_region_show(struct device * dev,struct device_attribute * attr,char * buf)2640 static ssize_t create_pmem_region_show(struct device *dev,
2641 struct device_attribute *attr, char *buf)
2642 {
2643 return __create_region_show(to_cxl_root_decoder(dev), buf);
2644 }
2645
create_ram_region_show(struct device * dev,struct device_attribute * attr,char * buf)2646 static ssize_t create_ram_region_show(struct device *dev,
2647 struct device_attribute *attr, char *buf)
2648 {
2649 return __create_region_show(to_cxl_root_decoder(dev), buf);
2650 }
2651
__create_region(struct cxl_root_decoder * cxlrd,enum cxl_partition_mode mode,int id)2652 static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
2653 enum cxl_partition_mode mode, int id)
2654 {
2655 int rc;
2656
2657 switch (mode) {
2658 case CXL_PARTMODE_RAM:
2659 case CXL_PARTMODE_PMEM:
2660 break;
2661 default:
2662 dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
2663 return ERR_PTR(-EINVAL);
2664 }
2665
2666 rc = memregion_alloc(GFP_KERNEL);
2667 if (rc < 0)
2668 return ERR_PTR(rc);
2669
2670 if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
2671 memregion_free(rc);
2672 return ERR_PTR(-EBUSY);
2673 }
2674
2675 return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
2676 }
2677
create_region_store(struct device * dev,const char * buf,size_t len,enum cxl_partition_mode mode)2678 static ssize_t create_region_store(struct device *dev, const char *buf,
2679 size_t len, enum cxl_partition_mode mode)
2680 {
2681 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2682 struct cxl_region *cxlr;
2683 int rc, id;
2684
2685 rc = sscanf(buf, "region%d\n", &id);
2686 if (rc != 1)
2687 return -EINVAL;
2688
2689 cxlr = __create_region(cxlrd, mode, id);
2690 if (IS_ERR(cxlr))
2691 return PTR_ERR(cxlr);
2692
2693 return len;
2694 }
2695
create_pmem_region_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2696 static ssize_t create_pmem_region_store(struct device *dev,
2697 struct device_attribute *attr,
2698 const char *buf, size_t len)
2699 {
2700 return create_region_store(dev, buf, len, CXL_PARTMODE_PMEM);
2701 }
2702 DEVICE_ATTR_RW(create_pmem_region);
2703
create_ram_region_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2704 static ssize_t create_ram_region_store(struct device *dev,
2705 struct device_attribute *attr,
2706 const char *buf, size_t len)
2707 {
2708 return create_region_store(dev, buf, len, CXL_PARTMODE_RAM);
2709 }
2710 DEVICE_ATTR_RW(create_ram_region);
2711
region_show(struct device * dev,struct device_attribute * attr,char * buf)2712 static ssize_t region_show(struct device *dev, struct device_attribute *attr,
2713 char *buf)
2714 {
2715 struct cxl_decoder *cxld = to_cxl_decoder(dev);
2716 ssize_t rc;
2717
2718 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
2719 if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
2720 return rc;
2721
2722 if (cxld->region)
2723 return sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
2724 return sysfs_emit(buf, "\n");
2725 }
2726 DEVICE_ATTR_RO(region);
2727
2728 static struct cxl_region *
cxl_find_region_by_name(struct cxl_root_decoder * cxlrd,const char * name)2729 cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
2730 {
2731 struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
2732 struct device *region_dev;
2733
2734 region_dev = device_find_child_by_name(&cxld->dev, name);
2735 if (!region_dev)
2736 return ERR_PTR(-ENODEV);
2737
2738 return to_cxl_region(region_dev);
2739 }
2740
delete_region_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2741 static ssize_t delete_region_store(struct device *dev,
2742 struct device_attribute *attr,
2743 const char *buf, size_t len)
2744 {
2745 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2746 struct cxl_port *port = to_cxl_port(dev->parent);
2747 struct cxl_region *cxlr;
2748
2749 cxlr = cxl_find_region_by_name(cxlrd, buf);
2750 if (IS_ERR(cxlr))
2751 return PTR_ERR(cxlr);
2752
2753 devm_release_action(port->uport_dev, unregister_region, cxlr);
2754 put_device(&cxlr->dev);
2755
2756 return len;
2757 }
2758 DEVICE_ATTR_WO(delete_region);
2759
cxl_pmem_region_release(struct device * dev)2760 static void cxl_pmem_region_release(struct device *dev)
2761 {
2762 struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
2763 int i;
2764
2765 for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
2766 struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd;
2767
2768 put_device(&cxlmd->dev);
2769 }
2770
2771 kfree(cxlr_pmem);
2772 }
2773
2774 static const struct attribute_group *cxl_pmem_region_attribute_groups[] = {
2775 &cxl_base_attribute_group,
2776 NULL,
2777 };
2778
2779 const struct device_type cxl_pmem_region_type = {
2780 .name = "cxl_pmem_region",
2781 .release = cxl_pmem_region_release,
2782 .groups = cxl_pmem_region_attribute_groups,
2783 };
2784
is_cxl_pmem_region(struct device * dev)2785 bool is_cxl_pmem_region(struct device *dev)
2786 {
2787 return dev->type == &cxl_pmem_region_type;
2788 }
2789 EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, "CXL");
2790
to_cxl_pmem_region(struct device * dev)2791 struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
2792 {
2793 if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev),
2794 "not a cxl_pmem_region device\n"))
2795 return NULL;
2796 return container_of(dev, struct cxl_pmem_region, dev);
2797 }
2798 EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, "CXL");
2799
2800 struct cxl_poison_context {
2801 struct cxl_port *port;
2802 int part;
2803 u64 offset;
2804 };
2805
cxl_get_poison_unmapped(struct cxl_memdev * cxlmd,struct cxl_poison_context * ctx)2806 static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
2807 struct cxl_poison_context *ctx)
2808 {
2809 struct cxl_dev_state *cxlds = cxlmd->cxlds;
2810 const struct resource *res;
2811 struct resource *p, *last;
2812 u64 offset, length;
2813 int rc = 0;
2814
2815 if (ctx->part < 0)
2816 return 0;
2817
2818 /*
2819 * Collect poison for the remaining unmapped resources after
2820 * poison is collected by committed endpoints decoders.
2821 */
2822 for (int i = ctx->part; i < cxlds->nr_partitions; i++) {
2823 res = &cxlds->part[i].res;
2824 for (p = res->child, last = NULL; p; p = p->sibling)
2825 last = p;
2826 if (last)
2827 offset = last->end + 1;
2828 else
2829 offset = res->start;
2830 length = res->end - offset + 1;
2831 if (!length)
2832 break;
2833 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2834 if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM)
2835 continue;
2836 if (rc)
2837 break;
2838 }
2839
2840 return rc;
2841 }
2842
poison_by_decoder(struct device * dev,void * arg)2843 static int poison_by_decoder(struct device *dev, void *arg)
2844 {
2845 struct cxl_poison_context *ctx = arg;
2846 struct cxl_endpoint_decoder *cxled;
2847 enum cxl_partition_mode mode;
2848 struct cxl_dev_state *cxlds;
2849 struct cxl_memdev *cxlmd;
2850 u64 offset, length;
2851 int rc = 0;
2852
2853 if (!is_endpoint_decoder(dev))
2854 return rc;
2855
2856 cxled = to_cxl_endpoint_decoder(dev);
2857 if (!cxled->dpa_res)
2858 return rc;
2859
2860 cxlmd = cxled_to_memdev(cxled);
2861 cxlds = cxlmd->cxlds;
2862 mode = cxlds->part[cxled->part].mode;
2863
2864 if (cxled->skip) {
2865 offset = cxled->dpa_res->start - cxled->skip;
2866 length = cxled->skip;
2867 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2868 if (rc == -EFAULT && mode == CXL_PARTMODE_RAM)
2869 rc = 0;
2870 if (rc)
2871 return rc;
2872 }
2873
2874 offset = cxled->dpa_res->start;
2875 length = cxled->dpa_res->end - offset + 1;
2876 rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
2877 if (rc == -EFAULT && mode == CXL_PARTMODE_RAM)
2878 rc = 0;
2879 if (rc)
2880 return rc;
2881
2882 /* Iterate until commit_end is reached */
2883 if (cxled->cxld.id == ctx->port->commit_end) {
2884 ctx->offset = cxled->dpa_res->end + 1;
2885 ctx->part = cxled->part;
2886 return 1;
2887 }
2888
2889 return 0;
2890 }
2891
cxl_get_poison_by_endpoint(struct cxl_port * port)2892 int cxl_get_poison_by_endpoint(struct cxl_port *port)
2893 {
2894 struct cxl_poison_context ctx;
2895 int rc = 0;
2896
2897 ctx = (struct cxl_poison_context) {
2898 .port = port,
2899 .part = -1,
2900 };
2901
2902 rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
2903 if (rc == 1)
2904 rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
2905 &ctx);
2906
2907 return rc;
2908 }
2909
2910 struct cxl_dpa_to_region_context {
2911 struct cxl_region *cxlr;
2912 u64 dpa;
2913 };
2914
__cxl_dpa_to_region(struct device * dev,void * arg)2915 static int __cxl_dpa_to_region(struct device *dev, void *arg)
2916 {
2917 struct cxl_dpa_to_region_context *ctx = arg;
2918 struct cxl_endpoint_decoder *cxled;
2919 struct cxl_region *cxlr;
2920 u64 dpa = ctx->dpa;
2921
2922 if (!is_endpoint_decoder(dev))
2923 return 0;
2924
2925 cxled = to_cxl_endpoint_decoder(dev);
2926 if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res))
2927 return 0;
2928
2929 if (!cxl_resource_contains_addr(cxled->dpa_res, dpa))
2930 return 0;
2931
2932 /*
2933 * Stop the region search (return 1) when an endpoint mapping is
2934 * found. The region may not be fully constructed so offering
2935 * the cxlr in the context structure is not guaranteed.
2936 */
2937 cxlr = cxled->cxld.region;
2938 if (cxlr)
2939 dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
2940 dev_name(&cxlr->dev));
2941 else
2942 dev_dbg(dev, "dpa:0x%llx mapped in endpoint:%s\n", dpa,
2943 dev_name(dev));
2944
2945 ctx->cxlr = cxlr;
2946
2947 return 1;
2948 }
2949
cxl_dpa_to_region(const struct cxl_memdev * cxlmd,u64 dpa)2950 struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa)
2951 {
2952 struct cxl_dpa_to_region_context ctx;
2953 struct cxl_port *port;
2954
2955 ctx = (struct cxl_dpa_to_region_context) {
2956 .dpa = dpa,
2957 };
2958 port = cxlmd->endpoint;
2959 if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port))
2960 device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
2961
2962 return ctx.cxlr;
2963 }
2964
cxl_is_hpa_in_chunk(u64 hpa,struct cxl_region * cxlr,int pos)2965 static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos)
2966 {
2967 struct cxl_region_params *p = &cxlr->params;
2968 int gran = p->interleave_granularity;
2969 int ways = p->interleave_ways;
2970 u64 offset;
2971
2972 /* Is the hpa in an expected chunk for its pos(-ition) */
2973 offset = hpa - p->res->start;
2974 offset = do_div(offset, gran * ways);
2975 if ((offset >= pos * gran) && (offset < (pos + 1) * gran))
2976 return true;
2977
2978 dev_dbg(&cxlr->dev,
2979 "Addr trans fail: hpa 0x%llx not in expected chunk\n", hpa);
2980
2981 return false;
2982 }
2983
2984 #define CXL_POS_ZERO 0
2985 /**
2986 * cxl_validate_translation_params
2987 * @eiw: encoded interleave ways
2988 * @eig: encoded interleave granularity
2989 * @pos: position in interleave
2990 *
2991 * Callers pass CXL_POS_ZERO when no position parameter needs validating.
2992 *
2993 * Returns: 0 on success, -EINVAL on first invalid parameter
2994 */
cxl_validate_translation_params(u8 eiw,u16 eig,int pos)2995 int cxl_validate_translation_params(u8 eiw, u16 eig, int pos)
2996 {
2997 int ways, gran;
2998
2999 if (eiw_to_ways(eiw, &ways)) {
3000 pr_debug("%s: invalid eiw=%u\n", __func__, eiw);
3001 return -EINVAL;
3002 }
3003 if (eig_to_granularity(eig, &gran)) {
3004 pr_debug("%s: invalid eig=%u\n", __func__, eig);
3005 return -EINVAL;
3006 }
3007 if (pos < 0 || pos >= ways) {
3008 pr_debug("%s: invalid pos=%d for ways=%u\n", __func__, pos,
3009 ways);
3010 return -EINVAL;
3011 }
3012
3013 return 0;
3014 }
3015 EXPORT_SYMBOL_FOR_MODULES(cxl_validate_translation_params, "cxl_translate");
3016
cxl_calculate_dpa_offset(u64 hpa_offset,u8 eiw,u16 eig)3017 u64 cxl_calculate_dpa_offset(u64 hpa_offset, u8 eiw, u16 eig)
3018 {
3019 u64 dpa_offset, bits_lower, bits_upper, temp;
3020 int ret;
3021
3022 ret = cxl_validate_translation_params(eiw, eig, CXL_POS_ZERO);
3023 if (ret)
3024 return ULLONG_MAX;
3025
3026 /*
3027 * DPA offset: CXL Spec 3.2 Section 8.2.4.20.13
3028 * Lower bits [IG+7:0] pass through unchanged
3029 * (eiw < 8)
3030 * Per spec: DPAOffset[51:IG+8] = (HPAOffset[51:IG+IW+8] >> IW)
3031 * Clear the position bits to isolate upper section, then
3032 * reverse the left shift by eiw that occurred during DPA->HPA
3033 * (eiw >= 8)
3034 * Per spec: DPAOffset[51:IG+8] = HPAOffset[51:IG+IW] / 3
3035 * Extract upper bits from the correct bit range and divide by 3
3036 * to recover the original DPA upper bits
3037 */
3038 bits_lower = hpa_offset & GENMASK_ULL(eig + 7, 0);
3039 if (eiw < 8) {
3040 temp = hpa_offset &= ~GENMASK_ULL(eig + eiw + 8 - 1, 0);
3041 dpa_offset = temp >> eiw;
3042 } else {
3043 bits_upper = div64_u64(hpa_offset >> (eig + eiw), 3);
3044 dpa_offset = bits_upper << (eig + 8);
3045 }
3046 dpa_offset |= bits_lower;
3047
3048 return dpa_offset;
3049 }
3050 EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_dpa_offset, "cxl_translate");
3051
cxl_calculate_position(u64 hpa_offset,u8 eiw,u16 eig)3052 int cxl_calculate_position(u64 hpa_offset, u8 eiw, u16 eig)
3053 {
3054 unsigned int ways = 0;
3055 u64 shifted, rem;
3056 int pos, ret;
3057
3058 ret = cxl_validate_translation_params(eiw, eig, CXL_POS_ZERO);
3059 if (ret)
3060 return ret;
3061
3062 if (!eiw)
3063 /* position is 0 if no interleaving */
3064 return 0;
3065
3066 /*
3067 * Interleave position: CXL Spec 3.2 Section 8.2.4.20.13
3068 * eiw < 8
3069 * Position is in the IW bits at HPA_OFFSET[IG+8+IW-1:IG+8].
3070 * Per spec "remove IW bits starting with bit position IG+8"
3071 * eiw >= 8
3072 * Position is not explicitly stored in HPA_OFFSET bits. It is
3073 * derived from the modulo operation of the upper bits using
3074 * the total number of interleave ways.
3075 */
3076 if (eiw < 8) {
3077 pos = (hpa_offset >> (eig + 8)) & GENMASK(eiw - 1, 0);
3078 } else {
3079 shifted = hpa_offset >> (eig + 8);
3080 eiw_to_ways(eiw, &ways);
3081 div64_u64_rem(shifted, ways, &rem);
3082 pos = rem;
3083 }
3084
3085 return pos;
3086 }
3087 EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_position, "cxl_translate");
3088
cxl_calculate_hpa_offset(u64 dpa_offset,int pos,u8 eiw,u16 eig)3089 u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig)
3090 {
3091 u64 mask_upper, hpa_offset, bits_upper;
3092 int ret;
3093
3094 ret = cxl_validate_translation_params(eiw, eig, pos);
3095 if (ret)
3096 return ULLONG_MAX;
3097
3098 /*
3099 * The device position in the region interleave set was removed
3100 * from the offset at HPA->DPA translation. To reconstruct the
3101 * HPA, place the 'pos' in the offset.
3102 *
3103 * The placement of 'pos' in the HPA is determined by interleave
3104 * ways and granularity and is defined in the CXL Spec 3.0 Section
3105 * 8.2.4.19.13 Implementation Note: Device Decode Logic
3106 */
3107
3108 mask_upper = GENMASK_ULL(51, eig + 8);
3109
3110 if (eiw < 8) {
3111 hpa_offset = (dpa_offset & mask_upper) << eiw;
3112 hpa_offset |= pos << (eig + 8);
3113 } else {
3114 bits_upper = (dpa_offset & mask_upper) >> (eig + 8);
3115 bits_upper = bits_upper * 3;
3116 hpa_offset = ((bits_upper << (eiw - 8)) + pos) << (eig + 8);
3117 }
3118
3119 /* The lower bits remain unchanged */
3120 hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
3121
3122 return hpa_offset;
3123 }
3124 EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_hpa_offset, "cxl_translate");
3125
decode_pos(int region_ways,int hb_ways,int pos,int * pos_port,int * pos_hb)3126 static int decode_pos(int region_ways, int hb_ways, int pos, int *pos_port,
3127 int *pos_hb)
3128 {
3129 int devices_per_hb;
3130
3131 /*
3132 * Decode for 3-6-12 way interleaves as defined in the CXL
3133 * Spec 4.0 9.13.1.1 Legal Interleaving Configurations.
3134 * Region creation should prevent invalid combinations but
3135 * sanity check here to avoid a silent bad decode.
3136 */
3137 switch (hb_ways) {
3138 case 3:
3139 if (region_ways != 3 && region_ways != 6 && region_ways != 12)
3140 return -EINVAL;
3141 break;
3142 case 6:
3143 if (region_ways != 6 && region_ways != 12)
3144 return -EINVAL;
3145 break;
3146 case 12:
3147 if (region_ways != 12)
3148 return -EINVAL;
3149 break;
3150 default:
3151 return -EINVAL;
3152 }
3153 /*
3154 * Each host bridge contributes an equal number of endpoints
3155 * that are laid out contiguously per host bridge. Modulo
3156 * selects the port within a host bridge and division selects
3157 * the host bridge position.
3158 */
3159 devices_per_hb = region_ways / hb_ways;
3160 *pos_port = pos % devices_per_hb;
3161 *pos_hb = pos / devices_per_hb;
3162
3163 return 0;
3164 }
3165
3166 /*
3167 * restore_parent() reconstruct the address in parent
3168 *
3169 * This math, specifically the bitmask creation 'mask = gran - 1' relies
3170 * on the CXL Spec requirement that interleave granularity is always a
3171 * power of two.
3172 *
3173 * [mask] isolate the offset with the granularity
3174 * [addr & ~mask] remove the offset leaving the aligned portion
3175 * [* ways] distribute across all interleave ways
3176 * [+ (pos * gran)] add the positional offset
3177 * [+ (addr & mask)] restore the masked offset
3178 */
restore_parent(u64 addr,u64 pos,u64 gran,u64 ways)3179 static u64 restore_parent(u64 addr, u64 pos, u64 gran, u64 ways)
3180 {
3181 u64 mask = gran - 1;
3182
3183 return ((addr & ~mask) * ways) + (pos * gran) + (addr & mask);
3184 }
3185
3186 /*
3187 * unaligned_dpa_to_hpa() translates a DPA to HPA when the region resource
3188 * start address is not aligned at Host Bridge Interleave Ways * 256MB.
3189 *
3190 * Unaligned start addresses only occur with MOD3 interleaves. All power-
3191 * of-two interleaves are guaranteed aligned.
3192 */
unaligned_dpa_to_hpa(struct cxl_decoder * cxld,struct cxl_region_params * p,int pos,u64 dpa)3193 static u64 unaligned_dpa_to_hpa(struct cxl_decoder *cxld,
3194 struct cxl_region_params *p, int pos, u64 dpa)
3195 {
3196 int ways_port = p->interleave_ways / cxld->interleave_ways;
3197 int gran_port = p->interleave_granularity;
3198 int gran_hb = cxld->interleave_granularity;
3199 int ways_hb = cxld->interleave_ways;
3200 int pos_port, pos_hb, gran_shift;
3201 u64 hpa_port = 0;
3202
3203 /* Decode an endpoint 'pos' into port and host-bridge components */
3204 if (decode_pos(p->interleave_ways, ways_hb, pos, &pos_port, &pos_hb)) {
3205 dev_dbg(&cxld->dev, "not supported for region ways:%d\n",
3206 p->interleave_ways);
3207 return ULLONG_MAX;
3208 }
3209
3210 /* Restore the port parent address if needed */
3211 if (gran_hb != gran_port)
3212 hpa_port = restore_parent(dpa, pos_port, gran_port, ways_port);
3213 else
3214 hpa_port = dpa;
3215
3216 /*
3217 * Complete the HPA reconstruction by restoring the address as if
3218 * each HB position is a candidate. Test against expected pos_hb
3219 * to confirm match.
3220 */
3221 gran_shift = ilog2(gran_hb);
3222 for (int position = 0; position < ways_hb; position++) {
3223 u64 shifted, hpa;
3224
3225 hpa = restore_parent(hpa_port, position, gran_hb, ways_hb);
3226 hpa += p->res->start;
3227
3228 shifted = hpa >> gran_shift;
3229 if (do_div(shifted, ways_hb) == pos_hb)
3230 return hpa;
3231 }
3232
3233 dev_dbg(&cxld->dev, "fail dpa:%#llx region:%pr pos:%d\n", dpa, p->res,
3234 pos);
3235 dev_dbg(&cxld->dev, " port-w/g/p:%d/%d/%d hb-w/g/p:%d/%d/%d\n",
3236 ways_port, gran_port, pos_port, ways_hb, gran_hb, pos_hb);
3237
3238 return ULLONG_MAX;
3239 }
3240
region_is_unaligned_mod3(struct cxl_region * cxlr)3241 static bool region_is_unaligned_mod3(struct cxl_region *cxlr)
3242 {
3243 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
3244 struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
3245 struct cxl_region_params *p = &cxlr->params;
3246 int hbiw = cxld->interleave_ways;
3247 u64 rem;
3248
3249 if (is_power_of_2(hbiw))
3250 return false;
3251
3252 div64_u64_rem(p->res->start, (u64)hbiw * SZ_256M, &rem);
3253
3254 return (rem != 0);
3255 }
3256
cxl_dpa_to_hpa(struct cxl_region * cxlr,const struct cxl_memdev * cxlmd,u64 dpa)3257 u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
3258 u64 dpa)
3259 {
3260 struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
3261 struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
3262 struct cxl_region_params *p = &cxlr->params;
3263 struct cxl_endpoint_decoder *cxled = NULL;
3264 u64 base, dpa_offset, hpa_offset, hpa;
3265 bool unaligned = false;
3266 u16 eig = 0;
3267 u8 eiw = 0;
3268 int pos;
3269
3270 /*
3271 * Conversion between SPA and DPA is not supported in
3272 * Normalized Address mode.
3273 */
3274 if (test_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags))
3275 return ULLONG_MAX;
3276
3277 for (int i = 0; i < p->nr_targets; i++) {
3278 if (cxlmd == cxled_to_memdev(p->targets[i])) {
3279 cxled = p->targets[i];
3280 break;
3281 }
3282 }
3283 if (!cxled)
3284 return ULLONG_MAX;
3285
3286 base = cxl_dpa_resource_start(cxled);
3287 if (base == RESOURCE_SIZE_MAX)
3288 return ULLONG_MAX;
3289
3290 dpa_offset = dpa - base;
3291
3292 /* Unaligned calc for MOD3 interleaves not hbiw * 256MB aligned */
3293 unaligned = region_is_unaligned_mod3(cxlr);
3294 if (unaligned) {
3295 hpa = unaligned_dpa_to_hpa(cxld, p, cxled->pos, dpa_offset);
3296 if (hpa == ULLONG_MAX)
3297 return ULLONG_MAX;
3298
3299 goto skip_aligned;
3300 }
3301 /*
3302 * Aligned calc for all power-of-2 interleaves and for MOD3
3303 * interleaves that are aligned at hbiw * 256MB
3304 */
3305 pos = cxled->pos;
3306 ways_to_eiw(p->interleave_ways, &eiw);
3307 granularity_to_eig(p->interleave_granularity, &eig);
3308
3309 hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, eiw, eig);
3310 if (hpa_offset == ULLONG_MAX)
3311 return ULLONG_MAX;
3312
3313 /* Apply the hpa_offset to the region base address */
3314 hpa = hpa_offset + p->res->start;
3315
3316 skip_aligned:
3317 hpa += p->cache_size;
3318
3319 /* Root decoder translation overrides typical modulo decode */
3320 if (cxlrd->ops.hpa_to_spa)
3321 hpa = cxlrd->ops.hpa_to_spa(cxlrd, hpa);
3322
3323 if (hpa == ULLONG_MAX)
3324 return ULLONG_MAX;
3325
3326 if (!cxl_resource_contains_addr(p->res, hpa)) {
3327 dev_dbg(&cxlr->dev,
3328 "Addr trans fail: hpa 0x%llx not in region\n", hpa);
3329 return ULLONG_MAX;
3330 }
3331 /* Chunk check applies to aligned modulo decodes only */
3332 if (!unaligned && !cxlrd->ops.hpa_to_spa &&
3333 !cxl_is_hpa_in_chunk(hpa, cxlr, pos))
3334 return ULLONG_MAX;
3335
3336 return hpa;
3337 }
3338
3339 struct dpa_result {
3340 struct cxl_memdev *cxlmd;
3341 u64 dpa;
3342 };
3343
unaligned_region_offset_to_dpa_result(struct cxl_region * cxlr,u64 offset,struct dpa_result * result)3344 static int unaligned_region_offset_to_dpa_result(struct cxl_region *cxlr,
3345 u64 offset,
3346 struct dpa_result *result)
3347 {
3348 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
3349 struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
3350 struct cxl_region_params *p = &cxlr->params;
3351 u64 interleave_width, interleave_index;
3352 u64 gran, gran_offset, dpa_offset;
3353 u64 hpa = p->res->start + offset;
3354 u64 tmp = offset;
3355
3356 /*
3357 * Unaligned addresses are not algebraically invertible. Calculate
3358 * a dpa_offset independent of the target device and then enumerate
3359 * and test that dpa_offset against each candidate endpoint decoder.
3360 */
3361 gran = cxld->interleave_granularity;
3362 interleave_width = gran * cxld->interleave_ways;
3363 interleave_index = div64_u64(offset, interleave_width);
3364 gran_offset = do_div(tmp, gran);
3365
3366 dpa_offset = interleave_index * gran + gran_offset;
3367
3368 for (int i = 0; i < p->nr_targets; i++) {
3369 struct cxl_endpoint_decoder *cxled = p->targets[i];
3370 int pos = cxled->pos;
3371 u64 test_hpa;
3372
3373 test_hpa = unaligned_dpa_to_hpa(cxld, p, pos, dpa_offset);
3374 if (test_hpa == hpa) {
3375 result->cxlmd = cxled_to_memdev(cxled);
3376 result->dpa =
3377 cxl_dpa_resource_start(cxled) + dpa_offset;
3378 return 0;
3379 }
3380 }
3381 dev_err(&cxlr->dev,
3382 "failed to resolve HPA %#llx in unaligned MOD3 region\n", hpa);
3383
3384 return -ENXIO;
3385 }
3386
region_offset_to_dpa_result(struct cxl_region * cxlr,u64 offset,struct dpa_result * result)3387 static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
3388 struct dpa_result *result)
3389 {
3390 struct cxl_region_params *p = &cxlr->params;
3391 struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
3392 struct cxl_endpoint_decoder *cxled;
3393 u64 hpa_offset = offset;
3394 u64 dpa, dpa_offset;
3395 u16 eig = 0;
3396 u8 eiw = 0;
3397 int pos;
3398
3399 lockdep_assert_held(&cxl_rwsem.region);
3400 lockdep_assert_held(&cxl_rwsem.dpa);
3401
3402 /* Input validation ensures valid ways and gran */
3403 granularity_to_eig(p->interleave_granularity, &eig);
3404 ways_to_eiw(p->interleave_ways, &eiw);
3405
3406 /*
3407 * If the root decoder has SPA to CXL HPA callback, use it. Otherwise
3408 * CXL HPA is assumed to equal SPA.
3409 */
3410 if (cxlrd->ops.spa_to_hpa) {
3411 hpa_offset = cxlrd->ops.spa_to_hpa(cxlrd, p->res->start + offset);
3412 if (hpa_offset == ULLONG_MAX) {
3413 dev_dbg(&cxlr->dev, "HPA not found for %pr offset %#llx\n",
3414 p->res, offset);
3415 return -ENXIO;
3416 }
3417 hpa_offset -= p->res->start;
3418 }
3419
3420 if (region_is_unaligned_mod3(cxlr))
3421 return unaligned_region_offset_to_dpa_result(cxlr, offset,
3422 result);
3423
3424 pos = cxl_calculate_position(hpa_offset, eiw, eig);
3425 if (pos < 0 || pos >= p->nr_targets) {
3426 dev_dbg(&cxlr->dev, "Invalid position %d for %d targets\n",
3427 pos, p->nr_targets);
3428 return -ENXIO;
3429 }
3430
3431 dpa_offset = cxl_calculate_dpa_offset(hpa_offset, eiw, eig);
3432
3433 /* Look-up and return the result: a memdev and a DPA */
3434 for (int i = 0; i < p->nr_targets; i++) {
3435 cxled = p->targets[i];
3436 if (cxled->pos != pos)
3437 continue;
3438
3439 dpa = cxl_dpa_resource_start(cxled);
3440 if (dpa != RESOURCE_SIZE_MAX)
3441 dpa += dpa_offset;
3442
3443 result->cxlmd = cxled_to_memdev(cxled);
3444 result->dpa = dpa;
3445
3446 return 0;
3447 }
3448 dev_err(&cxlr->dev, "No device found for position %d\n", pos);
3449
3450 return -ENXIO;
3451 }
3452
3453 static struct lock_class_key cxl_pmem_region_key;
3454
cxl_pmem_region_alloc(struct cxl_region * cxlr)3455 static int cxl_pmem_region_alloc(struct cxl_region *cxlr)
3456 {
3457 struct cxl_region_params *p = &cxlr->params;
3458 struct cxl_nvdimm_bridge *cxl_nvb;
3459 struct device *dev;
3460 int i;
3461
3462 guard(rwsem_read)(&cxl_rwsem.region);
3463 if (p->state != CXL_CONFIG_COMMIT)
3464 return -ENXIO;
3465
3466 struct cxl_pmem_region *cxlr_pmem __free(kfree) =
3467 kzalloc_flex(*cxlr_pmem, mapping, p->nr_targets);
3468 if (!cxlr_pmem)
3469 return -ENOMEM;
3470
3471 cxlr_pmem->hpa_range.start = p->res->start;
3472 cxlr_pmem->hpa_range.end = p->res->end;
3473
3474 /* Snapshot the region configuration underneath the cxl_rwsem.region */
3475 cxlr_pmem->nr_mappings = p->nr_targets;
3476 for (i = 0; i < p->nr_targets; i++) {
3477 struct cxl_endpoint_decoder *cxled = p->targets[i];
3478 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3479 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
3480
3481 /*
3482 * Regions never span CXL root devices, so by definition the
3483 * bridge for one device is the same for all.
3484 */
3485 if (i == 0) {
3486 cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint);
3487 if (!cxl_nvb)
3488 return -ENODEV;
3489 cxlr->cxl_nvb = cxl_nvb;
3490 }
3491 m->cxlmd = cxlmd;
3492 get_device(&cxlmd->dev);
3493 m->start = cxled->dpa_res->start;
3494 m->size = resource_size(cxled->dpa_res);
3495 m->position = i;
3496 }
3497
3498 dev = &cxlr_pmem->dev;
3499 device_initialize(dev);
3500 lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
3501 device_set_pm_not_required(dev);
3502 dev->parent = &cxlr->dev;
3503 dev->bus = &cxl_bus_type;
3504 dev->type = &cxl_pmem_region_type;
3505 cxlr_pmem->cxlr = cxlr;
3506 cxlr->cxlr_pmem = no_free_ptr(cxlr_pmem);
3507
3508 return 0;
3509 }
3510
cxl_dax_region_release(struct device * dev)3511 static void cxl_dax_region_release(struct device *dev)
3512 {
3513 struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
3514
3515 kfree(cxlr_dax);
3516 }
3517
3518 static const struct attribute_group *cxl_dax_region_attribute_groups[] = {
3519 &cxl_base_attribute_group,
3520 NULL,
3521 };
3522
3523 const struct device_type cxl_dax_region_type = {
3524 .name = "cxl_dax_region",
3525 .release = cxl_dax_region_release,
3526 .groups = cxl_dax_region_attribute_groups,
3527 };
3528
is_cxl_dax_region(struct device * dev)3529 static bool is_cxl_dax_region(struct device *dev)
3530 {
3531 return dev->type == &cxl_dax_region_type;
3532 }
3533
to_cxl_dax_region(struct device * dev)3534 struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
3535 {
3536 if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev),
3537 "not a cxl_dax_region device\n"))
3538 return NULL;
3539 return container_of(dev, struct cxl_dax_region, dev);
3540 }
3541 EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, "CXL");
3542
3543 static struct lock_class_key cxl_dax_region_key;
3544
cxl_dax_region_alloc(struct cxl_region * cxlr)3545 static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
3546 {
3547 struct cxl_region_params *p = &cxlr->params;
3548 struct cxl_dax_region *cxlr_dax;
3549 struct device *dev;
3550
3551 guard(rwsem_read)(&cxl_rwsem.region);
3552 if (p->state != CXL_CONFIG_COMMIT)
3553 return ERR_PTR(-ENXIO);
3554
3555 cxlr_dax = kzalloc_obj(*cxlr_dax);
3556 if (!cxlr_dax)
3557 return ERR_PTR(-ENOMEM);
3558
3559 cxlr_dax->hpa_range.start = p->res->start;
3560 cxlr_dax->hpa_range.end = p->res->end;
3561
3562 dev = &cxlr_dax->dev;
3563 cxlr_dax->cxlr = cxlr;
3564 device_initialize(dev);
3565 lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
3566 device_set_pm_not_required(dev);
3567 dev->parent = &cxlr->dev;
3568 dev->bus = &cxl_bus_type;
3569 dev->type = &cxl_dax_region_type;
3570
3571 return cxlr_dax;
3572 }
3573
cxlr_pmem_unregister(void * _cxlr_pmem)3574 static void cxlr_pmem_unregister(void *_cxlr_pmem)
3575 {
3576 struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
3577 struct cxl_region *cxlr = cxlr_pmem->cxlr;
3578 struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
3579
3580 /*
3581 * Either the bridge is in ->remove() context under the device_lock(),
3582 * or cxlr_release_nvdimm() is cancelling the bridge's release action
3583 * for @cxlr_pmem and doing it itself (while manually holding the bridge
3584 * lock).
3585 */
3586 device_lock_assert(&cxl_nvb->dev);
3587 cxlr->cxlr_pmem = NULL;
3588 cxlr_pmem->cxlr = NULL;
3589 device_unregister(&cxlr_pmem->dev);
3590 }
3591
cxlr_release_nvdimm(void * _cxlr)3592 static void cxlr_release_nvdimm(void *_cxlr)
3593 {
3594 struct cxl_region *cxlr = _cxlr;
3595 struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
3596
3597 scoped_guard(device, &cxl_nvb->dev) {
3598 if (cxlr->cxlr_pmem)
3599 devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
3600 cxlr->cxlr_pmem);
3601 }
3602 cxlr->cxl_nvb = NULL;
3603 put_device(&cxl_nvb->dev);
3604 }
3605
3606 /**
3607 * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge
3608 * @cxlr: parent CXL region for this pmem region bridge device
3609 *
3610 * Return: 0 on success negative error code on failure.
3611 */
devm_cxl_add_pmem_region(struct cxl_region * cxlr)3612 static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
3613 {
3614 struct cxl_pmem_region *cxlr_pmem;
3615 struct cxl_nvdimm_bridge *cxl_nvb;
3616 struct device *dev;
3617 int rc;
3618
3619 rc = cxl_pmem_region_alloc(cxlr);
3620 if (rc)
3621 return rc;
3622 cxlr_pmem = cxlr->cxlr_pmem;
3623 cxl_nvb = cxlr->cxl_nvb;
3624
3625 dev = &cxlr_pmem->dev;
3626 rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
3627 if (rc)
3628 goto err;
3629
3630 rc = device_add(dev);
3631 if (rc)
3632 goto err;
3633
3634 dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
3635 dev_name(dev));
3636
3637 scoped_guard(device, &cxl_nvb->dev) {
3638 if (cxl_nvb->dev.driver)
3639 rc = devm_add_action_or_reset(&cxl_nvb->dev,
3640 cxlr_pmem_unregister,
3641 cxlr_pmem);
3642 else
3643 rc = -ENXIO;
3644 }
3645
3646 if (rc)
3647 goto err_bridge;
3648
3649 /* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */
3650 return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr);
3651
3652 err:
3653 put_device(dev);
3654 err_bridge:
3655 put_device(&cxl_nvb->dev);
3656 cxlr->cxl_nvb = NULL;
3657 return rc;
3658 }
3659
cxlr_dax_unregister(void * _cxlr_dax)3660 static void cxlr_dax_unregister(void *_cxlr_dax)
3661 {
3662 struct cxl_dax_region *cxlr_dax = _cxlr_dax;
3663
3664 device_unregister(&cxlr_dax->dev);
3665 }
3666
devm_cxl_add_dax_region(struct cxl_region * cxlr)3667 static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
3668 {
3669 struct cxl_dax_region *cxlr_dax;
3670 struct device *dev;
3671 int rc;
3672
3673 cxlr_dax = cxl_dax_region_alloc(cxlr);
3674 if (IS_ERR(cxlr_dax))
3675 return PTR_ERR(cxlr_dax);
3676
3677 dev = &cxlr_dax->dev;
3678 rc = dev_set_name(dev, "dax_region%d", cxlr->id);
3679 if (rc)
3680 goto err;
3681
3682 rc = device_add(dev);
3683 if (rc)
3684 goto err;
3685
3686 dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
3687 dev_name(dev));
3688
3689 return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
3690 cxlr_dax);
3691 err:
3692 put_device(dev);
3693 return rc;
3694 }
3695
match_root_decoder(struct device * dev,const void * data)3696 static int match_root_decoder(struct device *dev, const void *data)
3697 {
3698 const struct range *r1, *r2 = data;
3699 struct cxl_root_decoder *cxlrd;
3700
3701 if (!is_root_decoder(dev))
3702 return 0;
3703
3704 cxlrd = to_cxl_root_decoder(dev);
3705 r1 = &cxlrd->cxlsd.cxld.hpa_range;
3706
3707 return range_contains(r1, r2);
3708 }
3709
cxl_root_setup_translation(struct cxl_root * cxl_root,struct cxl_region_context * ctx)3710 static int cxl_root_setup_translation(struct cxl_root *cxl_root,
3711 struct cxl_region_context *ctx)
3712 {
3713 if (!cxl_root->ops.translation_setup_root)
3714 return 0;
3715
3716 return cxl_root->ops.translation_setup_root(cxl_root, ctx);
3717 }
3718
3719 /*
3720 * Note, when finished with the device, drop the reference with
3721 * put_device() or use the put_cxl_root_decoder helper.
3722 */
3723 static struct cxl_root_decoder *
get_cxl_root_decoder(struct cxl_endpoint_decoder * cxled,struct cxl_region_context * ctx)3724 get_cxl_root_decoder(struct cxl_endpoint_decoder *cxled,
3725 struct cxl_region_context *ctx)
3726 {
3727 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3728 struct cxl_port *port = cxled_to_port(cxled);
3729 struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
3730 struct device *cxlrd_dev;
3731 int rc;
3732
3733 /*
3734 * Adjust the endpoint's HPA range and interleaving
3735 * configuration to the root decoder’s memory space before
3736 * setting up the root decoder.
3737 */
3738 rc = cxl_root_setup_translation(cxl_root, ctx);
3739 if (rc) {
3740 dev_err(cxlmd->dev.parent,
3741 "%s:%s Failed to setup translation for address range %#llx:%#llx\n",
3742 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3743 ctx->hpa_range.start, ctx->hpa_range.end);
3744 return ERR_PTR(rc);
3745 }
3746
3747 cxlrd_dev = device_find_child(&cxl_root->port.dev, &ctx->hpa_range,
3748 match_root_decoder);
3749 if (!cxlrd_dev) {
3750 dev_err(cxlmd->dev.parent,
3751 "%s:%s no CXL window for range %#llx:%#llx\n",
3752 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3753 ctx->hpa_range.start, ctx->hpa_range.end);
3754 return ERR_PTR(-ENXIO);
3755 }
3756
3757 return to_cxl_root_decoder(cxlrd_dev);
3758 }
3759
match_region_by_range(struct device * dev,const void * data)3760 static int match_region_by_range(struct device *dev, const void *data)
3761 {
3762 struct cxl_region_params *p;
3763 struct cxl_region *cxlr;
3764 const struct range *r = data;
3765
3766 if (!is_cxl_region(dev))
3767 return 0;
3768
3769 cxlr = to_cxl_region(dev);
3770 p = &cxlr->params;
3771
3772 guard(rwsem_read)(&cxl_rwsem.region);
3773 return spa_maps_hpa(p, r);
3774 }
3775
cxl_extended_linear_cache_resize(struct cxl_region * cxlr,struct resource * res)3776 static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,
3777 struct resource *res)
3778 {
3779 struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
3780 struct cxl_region_params *p = &cxlr->params;
3781 resource_size_t size = resource_size(res);
3782 resource_size_t cache_size, start;
3783
3784 cache_size = cxlrd->cache_size;
3785 if (!cache_size)
3786 return 0;
3787
3788 if (size != cache_size) {
3789 dev_warn(&cxlr->dev,
3790 "Extended Linear Cache size %pa != CXL size %pa. No Support!",
3791 &cache_size, &size);
3792 return -ENXIO;
3793 }
3794
3795 /*
3796 * Move the start of the range to where the cache range starts. The
3797 * implementation assumes that the cache range is in front of the
3798 * CXL range. This is not dictated by the HMAT spec but is how the
3799 * current known implementation is configured.
3800 *
3801 * The cache range is expected to be within the CFMWS. The adjusted
3802 * res->start should not be less than cxlrd->res->start.
3803 */
3804 start = res->start - cache_size;
3805 if (start < cxlrd->res->start)
3806 return -ENXIO;
3807
3808 res->start = start;
3809 p->cache_size = cache_size;
3810
3811 return 0;
3812 }
3813
__construct_region(struct cxl_region * cxlr,struct cxl_region_context * ctx)3814 static int __construct_region(struct cxl_region *cxlr,
3815 struct cxl_region_context *ctx)
3816 {
3817 struct cxl_endpoint_decoder *cxled = ctx->cxled;
3818 struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
3819 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3820 struct range *hpa_range = &ctx->hpa_range;
3821 struct cxl_region_params *p;
3822 struct resource *res;
3823 int rc;
3824
3825 guard(rwsem_write)(&cxl_rwsem.region);
3826 p = &cxlr->params;
3827 if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
3828 dev_err(cxlmd->dev.parent,
3829 "%s:%s: %s autodiscovery interrupted\n",
3830 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3831 __func__);
3832 return -EBUSY;
3833 }
3834
3835 set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
3836 cxlr->hpa_range = *hpa_range;
3837
3838 res = kmalloc_obj(*res);
3839 if (!res)
3840 return -ENOMEM;
3841
3842 *res = DEFINE_RES_MEM_NAMED(hpa_range->start, range_len(hpa_range),
3843 dev_name(&cxlr->dev));
3844
3845 rc = cxl_extended_linear_cache_resize(cxlr, res);
3846 if (rc && rc != -EOPNOTSUPP) {
3847 /*
3848 * Failing to support extended linear cache region resize does not
3849 * prevent the region from functioning. Only causes cxl list showing
3850 * incorrect region size.
3851 */
3852 dev_warn(cxlmd->dev.parent,
3853 "Extended linear cache calculation failed rc:%d\n", rc);
3854 }
3855
3856 rc = sysfs_update_group(&cxlr->dev.kobj, &cxl_region_group);
3857 if (rc)
3858 return rc;
3859
3860 rc = insert_resource(cxlrd->res, res);
3861 if (rc) {
3862 /*
3863 * Platform-firmware may not have split resources like "System
3864 * RAM" on CXL window boundaries see cxl_region_iomem_release()
3865 */
3866 dev_warn(cxlmd->dev.parent,
3867 "%s:%s: %s %s cannot insert resource\n",
3868 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3869 __func__, dev_name(&cxlr->dev));
3870 }
3871
3872 p->res = res;
3873 p->interleave_ways = ctx->interleave_ways;
3874 p->interleave_granularity = ctx->interleave_granularity;
3875 p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
3876
3877 rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
3878 if (rc)
3879 return rc;
3880
3881 dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
3882 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
3883 dev_name(&cxlr->dev), p->res, p->interleave_ways,
3884 p->interleave_granularity);
3885
3886 /* ...to match put_device() in cxl_add_to_region() */
3887 get_device(&cxlr->dev);
3888
3889 return 0;
3890 }
3891
3892 /* Establish an empty region covering the given HPA range */
construct_region(struct cxl_root_decoder * cxlrd,struct cxl_region_context * ctx)3893 static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
3894 struct cxl_region_context *ctx)
3895 {
3896 struct cxl_endpoint_decoder *cxled = ctx->cxled;
3897 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3898 struct cxl_port *port = cxlrd_to_port(cxlrd);
3899 struct cxl_dev_state *cxlds = cxlmd->cxlds;
3900 int rc, part = READ_ONCE(cxled->part);
3901 struct cxl_region *cxlr;
3902
3903 do {
3904 cxlr = __create_region(cxlrd, cxlds->part[part].mode,
3905 atomic_read(&cxlrd->region_id));
3906 } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
3907
3908 if (IS_ERR(cxlr)) {
3909 dev_err(cxlmd->dev.parent,
3910 "%s:%s: %s failed assign region: %ld\n",
3911 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3912 __func__, PTR_ERR(cxlr));
3913 return cxlr;
3914 }
3915
3916 rc = __construct_region(cxlr, ctx);
3917 if (rc) {
3918 devm_release_action(port->uport_dev, unregister_region, cxlr);
3919 return ERR_PTR(rc);
3920 }
3921
3922 return cxlr;
3923 }
3924
3925 static struct cxl_region *
cxl_find_region_by_range(struct cxl_root_decoder * cxlrd,struct range * hpa_range)3926 cxl_find_region_by_range(struct cxl_root_decoder *cxlrd,
3927 struct range *hpa_range)
3928 {
3929 struct device *region_dev;
3930
3931 region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa_range,
3932 match_region_by_range);
3933 if (!region_dev)
3934 return NULL;
3935
3936 return to_cxl_region(region_dev);
3937 }
3938
cxl_add_to_region(struct cxl_endpoint_decoder * cxled)3939 int cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
3940 {
3941 struct cxl_region_context ctx;
3942 struct cxl_region_params *p;
3943 bool attach = false;
3944 int rc;
3945
3946 ctx = (struct cxl_region_context) {
3947 .cxled = cxled,
3948 .hpa_range = cxled->cxld.hpa_range,
3949 .interleave_ways = cxled->cxld.interleave_ways,
3950 .interleave_granularity = cxled->cxld.interleave_granularity,
3951 };
3952
3953 struct cxl_root_decoder *cxlrd __free(put_cxl_root_decoder) =
3954 get_cxl_root_decoder(cxled, &ctx);
3955
3956 if (IS_ERR(cxlrd))
3957 return PTR_ERR(cxlrd);
3958
3959 /*
3960 * Ensure that, if multiple threads race to construct_region()
3961 * for the HPA range, one does the construction and the others
3962 * add to that.
3963 */
3964 mutex_lock(&cxlrd->range_lock);
3965 struct cxl_region *cxlr __free(put_cxl_region) =
3966 cxl_find_region_by_range(cxlrd, &ctx.hpa_range);
3967 if (!cxlr)
3968 cxlr = construct_region(cxlrd, &ctx);
3969 mutex_unlock(&cxlrd->range_lock);
3970
3971 rc = PTR_ERR_OR_ZERO(cxlr);
3972 if (rc)
3973 return rc;
3974
3975 attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
3976
3977 scoped_guard(rwsem_read, &cxl_rwsem.region) {
3978 p = &cxlr->params;
3979 attach = p->state == CXL_CONFIG_COMMIT;
3980 }
3981
3982 if (attach) {
3983 /*
3984 * If device_attach() fails the range may still be active via
3985 * the platform-firmware memory map, otherwise the driver for
3986 * regions is local to this file, so driver matching can't fail.
3987 */
3988 if (device_attach(&cxlr->dev) < 0)
3989 dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
3990 p->res);
3991 }
3992
3993 return rc;
3994 }
3995 EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL");
3996
cxl_port_get_spa_cache_alias(struct cxl_port * endpoint,u64 spa)3997 u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa)
3998 {
3999 struct cxl_region_ref *iter;
4000 unsigned long index;
4001
4002 if (!endpoint)
4003 return ~0ULL;
4004
4005 guard(rwsem_write)(&cxl_rwsem.region);
4006
4007 xa_for_each(&endpoint->regions, index, iter) {
4008 struct cxl_region_params *p = &iter->region->params;
4009
4010 if (cxl_resource_contains_addr(p->res, spa)) {
4011 if (!p->cache_size)
4012 return ~0ULL;
4013
4014 if (spa >= p->res->start + p->cache_size)
4015 return spa - p->cache_size;
4016
4017 return spa + p->cache_size;
4018 }
4019 }
4020
4021 return ~0ULL;
4022 }
4023 EXPORT_SYMBOL_NS_GPL(cxl_port_get_spa_cache_alias, "CXL");
4024
is_system_ram(struct resource * res,void * arg)4025 static int is_system_ram(struct resource *res, void *arg)
4026 {
4027 struct cxl_region *cxlr = arg;
4028 struct cxl_region_params *p = &cxlr->params;
4029
4030 dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
4031 return 1;
4032 }
4033
shutdown_notifiers(void * _cxlr)4034 static void shutdown_notifiers(void *_cxlr)
4035 {
4036 struct cxl_region *cxlr = _cxlr;
4037
4038 unregister_node_notifier(&cxlr->node_notifier);
4039 unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
4040 }
4041
remove_debugfs(void * dentry)4042 static void remove_debugfs(void *dentry)
4043 {
4044 debugfs_remove_recursive(dentry);
4045 }
4046
validate_region_offset(struct cxl_region * cxlr,u64 offset)4047 static int validate_region_offset(struct cxl_region *cxlr, u64 offset)
4048 {
4049 struct cxl_region_params *p = &cxlr->params;
4050 resource_size_t region_size;
4051 u64 hpa;
4052
4053 if (offset < p->cache_size) {
4054 dev_err(&cxlr->dev,
4055 "Offset %#llx is within extended linear cache %pa\n",
4056 offset, &p->cache_size);
4057 return -EINVAL;
4058 }
4059
4060 region_size = resource_size(p->res);
4061 if (offset >= region_size) {
4062 dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pa\n",
4063 offset, ®ion_size);
4064 return -EINVAL;
4065 }
4066
4067 hpa = p->res->start + offset;
4068 if (hpa < p->res->start || hpa > p->res->end) {
4069 dev_err(&cxlr->dev, "HPA %#llx not in region %pr\n", hpa,
4070 p->res);
4071 return -EINVAL;
4072 }
4073
4074 return 0;
4075 }
4076
cxl_region_debugfs_poison_inject(void * data,u64 offset)4077 static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
4078 {
4079 struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
4080 struct cxl_region *cxlr = data;
4081 int rc;
4082
4083 ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
4084 if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
4085 return rc;
4086
4087 ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
4088 if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
4089 return rc;
4090
4091 if (validate_region_offset(cxlr, offset))
4092 return -EINVAL;
4093
4094 offset -= cxlr->params.cache_size;
4095 rc = region_offset_to_dpa_result(cxlr, offset, &result);
4096 if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
4097 dev_dbg(&cxlr->dev,
4098 "Failed to resolve DPA for region offset %#llx rc %d\n",
4099 offset, rc);
4100
4101 return rc ? rc : -EINVAL;
4102 }
4103
4104 return cxl_inject_poison_locked(result.cxlmd, result.dpa);
4105 }
4106
4107 DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL,
4108 cxl_region_debugfs_poison_inject, "%llx\n");
4109
cxl_region_debugfs_poison_clear(void * data,u64 offset)4110 static int cxl_region_debugfs_poison_clear(void *data, u64 offset)
4111 {
4112 struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
4113 struct cxl_region *cxlr = data;
4114 int rc;
4115
4116 ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
4117 if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
4118 return rc;
4119
4120 ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
4121 if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
4122 return rc;
4123
4124 if (validate_region_offset(cxlr, offset))
4125 return -EINVAL;
4126
4127 offset -= cxlr->params.cache_size;
4128 rc = region_offset_to_dpa_result(cxlr, offset, &result);
4129 if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
4130 dev_dbg(&cxlr->dev,
4131 "Failed to resolve DPA for region offset %#llx rc %d\n",
4132 offset, rc);
4133
4134 return rc ? rc : -EINVAL;
4135 }
4136
4137 return cxl_clear_poison_locked(result.cxlmd, result.dpa);
4138 }
4139
4140 DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL,
4141 cxl_region_debugfs_poison_clear, "%llx\n");
4142
cxl_region_setup_poison(struct cxl_region * cxlr)4143 static int cxl_region_setup_poison(struct cxl_region *cxlr)
4144 {
4145 struct device *dev = &cxlr->dev;
4146 struct cxl_region_params *p = &cxlr->params;
4147 struct dentry *dentry;
4148
4149 /*
4150 * Do not enable poison injection in Normalized Address mode.
4151 * Conversion between SPA and DPA is required for this, but it is
4152 * not supported in this mode.
4153 */
4154 if (test_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags))
4155 return 0;
4156
4157 /* Create poison attributes if all memdevs support the capabilities */
4158 for (int i = 0; i < p->nr_targets; i++) {
4159 struct cxl_endpoint_decoder *cxled = p->targets[i];
4160 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
4161
4162 if (!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_INJECT) ||
4163 !cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_CLEAR))
4164 return 0;
4165 }
4166
4167 dentry = cxl_debugfs_create_dir(dev_name(dev));
4168 debugfs_create_file("inject_poison", 0200, dentry, cxlr,
4169 &cxl_poison_inject_fops);
4170 debugfs_create_file("clear_poison", 0200, dentry, cxlr,
4171 &cxl_poison_clear_fops);
4172
4173 return devm_add_action_or_reset(dev, remove_debugfs, dentry);
4174 }
4175
cxl_region_can_probe(struct cxl_region * cxlr)4176 static int cxl_region_can_probe(struct cxl_region *cxlr)
4177 {
4178 struct cxl_region_params *p = &cxlr->params;
4179 int rc;
4180
4181 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
4182 if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) {
4183 dev_dbg(&cxlr->dev, "probe interrupted\n");
4184 return rc;
4185 }
4186
4187 if (p->state < CXL_CONFIG_COMMIT) {
4188 dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
4189 return -ENXIO;
4190 }
4191
4192 if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) {
4193 dev_err(&cxlr->dev,
4194 "failed to activate, re-commit region and retry\n");
4195 return -ENXIO;
4196 }
4197
4198 return 0;
4199 }
4200
cxl_region_probe(struct device * dev)4201 static int cxl_region_probe(struct device *dev)
4202 {
4203 struct cxl_region *cxlr = to_cxl_region(dev);
4204 struct cxl_region_params *p = &cxlr->params;
4205 int rc;
4206
4207 rc = cxl_region_can_probe(cxlr);
4208 if (rc)
4209 return rc;
4210
4211 /*
4212 * From this point on any path that changes the region's state away from
4213 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
4214 */
4215
4216 cxlr->node_notifier.notifier_call = cxl_region_perf_attrs_callback;
4217 cxlr->node_notifier.priority = CXL_CALLBACK_PRI;
4218 register_node_notifier(&cxlr->node_notifier);
4219
4220 cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
4221 cxlr->adist_notifier.priority = 100;
4222 register_mt_adistance_algorithm(&cxlr->adist_notifier);
4223
4224 rc = devm_add_action_or_reset(&cxlr->dev, shutdown_notifiers, cxlr);
4225 if (rc)
4226 return rc;
4227
4228 rc = cxl_region_setup_poison(cxlr);
4229 if (rc)
4230 return rc;
4231
4232 switch (cxlr->mode) {
4233 case CXL_PARTMODE_PMEM:
4234 rc = devm_cxl_region_edac_register(cxlr);
4235 if (rc)
4236 dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
4237 cxlr->id);
4238
4239 return devm_cxl_add_pmem_region(cxlr);
4240 case CXL_PARTMODE_RAM:
4241 rc = devm_cxl_region_edac_register(cxlr);
4242 if (rc)
4243 dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
4244 cxlr->id);
4245
4246 /*
4247 * The region can not be manged by CXL if any portion of
4248 * it is already online as 'System RAM'
4249 */
4250 if (walk_iomem_res_desc(IORES_DESC_NONE,
4251 IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
4252 p->res->start, p->res->end, cxlr,
4253 is_system_ram) > 0)
4254 return 0;
4255 return devm_cxl_add_dax_region(cxlr);
4256 default:
4257 dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
4258 cxlr->mode);
4259 return -ENXIO;
4260 }
4261 }
4262
4263 static struct cxl_driver cxl_region_driver = {
4264 .name = "cxl_region",
4265 .probe = cxl_region_probe,
4266 .id = CXL_DEVICE_REGION,
4267 };
4268
cxl_region_init(void)4269 int cxl_region_init(void)
4270 {
4271 return cxl_driver_register(&cxl_region_driver);
4272 }
4273
cxl_region_exit(void)4274 void cxl_region_exit(void)
4275 {
4276 cxl_driver_unregister(&cxl_region_driver);
4277 }
4278
4279 MODULE_IMPORT_NS("CXL");
4280 MODULE_IMPORT_NS("DEVMEM");
4281 MODULE_ALIAS_CXL(CXL_DEVICE_REGION);
4282