xref: /linux/drivers/cxl/core/region.c (revision a5210135489ae7bc1ef1cb4a8157361dd7b468cd)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
3 #include <linux/memregion.h>
4 #include <linux/genalloc.h>
5 #include <linux/debugfs.h>
6 #include <linux/device.h>
7 #include <linux/module.h>
8 #include <linux/memory.h>
9 #include <linux/slab.h>
10 #include <linux/uuid.h>
11 #include <linux/sort.h>
12 #include <linux/idr.h>
13 #include <linux/memory-tiers.h>
14 #include <linux/string_choices.h>
15 #include <cxlmem.h>
16 #include <cxl.h>
17 #include "core.h"
18 
19 /**
20  * DOC: cxl core region
21  *
22  * CXL Regions represent mapped memory capacity in system physical address
23  * space. Whereas the CXL Root Decoders identify the bounds of potential CXL
24  * Memory ranges, Regions represent the active mapped capacity by the HDM
25  * Decoder Capability structures throughout the Host Bridges, Switches, and
26  * Endpoints in the topology.
27  *
28  * Region configuration has ordering constraints. UUID may be set at any time
29  * but is only visible for persistent regions.
30  * 1. Interleave granularity
31  * 2. Interleave size
32  * 3. Decoder targets
33  */
34 
35 /*
36  * nodemask that sets per node when the access_coordinates for the node has
37  * been updated by the CXL memory hotplug notifier.
38  */
39 static nodemask_t nodemask_region_seen = NODE_MASK_NONE;
40 
41 static struct cxl_region *to_cxl_region(struct device *dev);
42 
43 #define __ACCESS_ATTR_RO(_level, _name) {				\
44 	.attr	= { .name = __stringify(_name), .mode = 0444 },		\
45 	.show	= _name##_access##_level##_show,			\
46 }
47 
48 #define ACCESS_DEVICE_ATTR_RO(level, name)	\
49 	struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name)
50 
51 #define ACCESS_ATTR_RO(level, attrib)					      \
52 static ssize_t attrib##_access##level##_show(struct device *dev,	      \
53 					  struct device_attribute *attr,      \
54 					  char *buf)			      \
55 {									      \
56 	struct cxl_region *cxlr = to_cxl_region(dev);			      \
57 									      \
58 	if (cxlr->coord[level].attrib == 0)				      \
59 		return -ENOENT;						      \
60 									      \
61 	return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib);	      \
62 }									      \
63 static ACCESS_DEVICE_ATTR_RO(level, attrib)
64 
65 ACCESS_ATTR_RO(0, read_bandwidth);
66 ACCESS_ATTR_RO(0, read_latency);
67 ACCESS_ATTR_RO(0, write_bandwidth);
68 ACCESS_ATTR_RO(0, write_latency);
69 
70 #define ACCESS_ATTR_DECLARE(level, attrib)	\
71 	(&dev_attr_access##level##_##attrib.attr)
72 
73 static struct attribute *access0_coordinate_attrs[] = {
74 	ACCESS_ATTR_DECLARE(0, read_bandwidth),
75 	ACCESS_ATTR_DECLARE(0, write_bandwidth),
76 	ACCESS_ATTR_DECLARE(0, read_latency),
77 	ACCESS_ATTR_DECLARE(0, write_latency),
78 	NULL
79 };
80 
81 ACCESS_ATTR_RO(1, read_bandwidth);
82 ACCESS_ATTR_RO(1, read_latency);
83 ACCESS_ATTR_RO(1, write_bandwidth);
84 ACCESS_ATTR_RO(1, write_latency);
85 
86 static struct attribute *access1_coordinate_attrs[] = {
87 	ACCESS_ATTR_DECLARE(1, read_bandwidth),
88 	ACCESS_ATTR_DECLARE(1, write_bandwidth),
89 	ACCESS_ATTR_DECLARE(1, read_latency),
90 	ACCESS_ATTR_DECLARE(1, write_latency),
91 	NULL
92 };
93 
94 #define ACCESS_VISIBLE(level)						\
95 static umode_t cxl_region_access##level##_coordinate_visible(		\
96 		struct kobject *kobj, struct attribute *a, int n)	\
97 {									\
98 	struct device *dev = kobj_to_dev(kobj);				\
99 	struct cxl_region *cxlr = to_cxl_region(dev);			\
100 									\
101 	if (a == &dev_attr_access##level##_read_latency.attr &&		\
102 	    cxlr->coord[level].read_latency == 0)			\
103 		return 0;						\
104 									\
105 	if (a == &dev_attr_access##level##_write_latency.attr &&	\
106 	    cxlr->coord[level].write_latency == 0)			\
107 		return 0;						\
108 									\
109 	if (a == &dev_attr_access##level##_read_bandwidth.attr &&	\
110 	    cxlr->coord[level].read_bandwidth == 0)			\
111 		return 0;						\
112 									\
113 	if (a == &dev_attr_access##level##_write_bandwidth.attr &&	\
114 	    cxlr->coord[level].write_bandwidth == 0)			\
115 		return 0;						\
116 									\
117 	return a->mode;							\
118 }
119 
120 ACCESS_VISIBLE(0);
121 ACCESS_VISIBLE(1);
122 
123 static const struct attribute_group cxl_region_access0_coordinate_group = {
124 	.name = "access0",
125 	.attrs = access0_coordinate_attrs,
126 	.is_visible = cxl_region_access0_coordinate_visible,
127 };
128 
get_cxl_region_access0_group(void)129 static const struct attribute_group *get_cxl_region_access0_group(void)
130 {
131 	return &cxl_region_access0_coordinate_group;
132 }
133 
134 static const struct attribute_group cxl_region_access1_coordinate_group = {
135 	.name = "access1",
136 	.attrs = access1_coordinate_attrs,
137 	.is_visible = cxl_region_access1_coordinate_visible,
138 };
139 
get_cxl_region_access1_group(void)140 static const struct attribute_group *get_cxl_region_access1_group(void)
141 {
142 	return &cxl_region_access1_coordinate_group;
143 }
144 
uuid_show(struct device * dev,struct device_attribute * attr,char * buf)145 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
146 			 char *buf)
147 {
148 	struct cxl_region *cxlr = to_cxl_region(dev);
149 	struct cxl_region_params *p = &cxlr->params;
150 	ssize_t rc;
151 
152 	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
153 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
154 		return rc;
155 	if (cxlr->mode != CXL_PARTMODE_PMEM)
156 		return sysfs_emit(buf, "\n");
157 	return sysfs_emit(buf, "%pUb\n", &p->uuid);
158 }
159 
is_dup(struct device * match,void * data)160 static int is_dup(struct device *match, void *data)
161 {
162 	struct cxl_region_params *p;
163 	struct cxl_region *cxlr;
164 	uuid_t *uuid = data;
165 
166 	if (!is_cxl_region(match))
167 		return 0;
168 
169 	lockdep_assert_held(&cxl_rwsem.region);
170 	cxlr = to_cxl_region(match);
171 	p = &cxlr->params;
172 
173 	if (uuid_equal(&p->uuid, uuid)) {
174 		dev_dbg(match, "already has uuid: %pUb\n", uuid);
175 		return -EBUSY;
176 	}
177 
178 	return 0;
179 }
180 
uuid_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)181 static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
182 			  const char *buf, size_t len)
183 {
184 	struct cxl_region *cxlr = to_cxl_region(dev);
185 	struct cxl_region_params *p = &cxlr->params;
186 	uuid_t temp;
187 	ssize_t rc;
188 
189 	if (len != UUID_STRING_LEN + 1)
190 		return -EINVAL;
191 
192 	rc = uuid_parse(buf, &temp);
193 	if (rc)
194 		return rc;
195 
196 	if (uuid_is_null(&temp))
197 		return -EINVAL;
198 
199 	ACQUIRE(rwsem_write_kill, region_rwsem)(&cxl_rwsem.region);
200 	if ((rc = ACQUIRE_ERR(rwsem_write_kill, &region_rwsem)))
201 		return rc;
202 
203 	if (uuid_equal(&p->uuid, &temp))
204 		return len;
205 
206 	if (p->state >= CXL_CONFIG_ACTIVE)
207 		return -EBUSY;
208 
209 	rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
210 	if (rc < 0)
211 		return rc;
212 
213 	uuid_copy(&p->uuid, &temp);
214 
215 	return len;
216 }
217 static DEVICE_ATTR_RW(uuid);
218 
cxl_rr_load(struct cxl_port * port,struct cxl_region * cxlr)219 static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
220 					  struct cxl_region *cxlr)
221 {
222 	return xa_load(&port->regions, (unsigned long)cxlr);
223 }
224 
cxl_region_invalidate_memregion(struct cxl_region * cxlr)225 static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
226 {
227 	if (!cpu_cache_has_invalidate_memregion()) {
228 		if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
229 			dev_info_once(
230 				&cxlr->dev,
231 				"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
232 			return 0;
233 		}
234 		dev_WARN(&cxlr->dev,
235 			"Failed to synchronize CPU cache state\n");
236 		return -ENXIO;
237 	}
238 
239 	if (!cxlr->params.res)
240 		return -ENXIO;
241 	cpu_cache_invalidate_memregion(cxlr->params.res->start,
242 				       resource_size(cxlr->params.res));
243 	return 0;
244 }
245 
cxl_region_decode_reset(struct cxl_region * cxlr,int count)246 static void cxl_region_decode_reset(struct cxl_region *cxlr, int count)
247 {
248 	struct cxl_region_params *p = &cxlr->params;
249 	int i;
250 
251 	if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
252 		return;
253 
254 	/*
255 	 * Before region teardown attempt to flush, evict any data cached for
256 	 * this region, or scream loudly about missing arch / platform support
257 	 * for CXL teardown.
258 	 */
259 	cxl_region_invalidate_memregion(cxlr);
260 
261 	for (i = count - 1; i >= 0; i--) {
262 		struct cxl_endpoint_decoder *cxled = p->targets[i];
263 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
264 		struct cxl_port *iter = cxled_to_port(cxled);
265 		struct cxl_dev_state *cxlds = cxlmd->cxlds;
266 		struct cxl_ep *ep;
267 
268 		if (cxlds->rcd)
269 			goto endpoint_reset;
270 
271 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
272 			iter = to_cxl_port(iter->dev.parent);
273 
274 		for (ep = cxl_ep_load(iter, cxlmd); iter;
275 		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
276 			struct cxl_region_ref *cxl_rr;
277 			struct cxl_decoder *cxld;
278 
279 			cxl_rr = cxl_rr_load(iter, cxlr);
280 			cxld = cxl_rr->decoder;
281 			if (cxld->reset)
282 				cxld->reset(cxld);
283 			set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
284 		}
285 
286 endpoint_reset:
287 		cxled->cxld.reset(&cxled->cxld);
288 		set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
289 	}
290 
291 	/* all decoders associated with this region have been torn down */
292 	clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
293 }
294 
commit_decoder(struct cxl_decoder * cxld)295 static int commit_decoder(struct cxl_decoder *cxld)
296 {
297 	struct cxl_switch_decoder *cxlsd = NULL;
298 
299 	if (cxld->commit)
300 		return cxld->commit(cxld);
301 
302 	if (is_switch_decoder(&cxld->dev))
303 		cxlsd = to_cxl_switch_decoder(&cxld->dev);
304 
305 	if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1,
306 			  "->commit() is required\n"))
307 		return -ENXIO;
308 	return 0;
309 }
310 
cxl_region_decode_commit(struct cxl_region * cxlr)311 static int cxl_region_decode_commit(struct cxl_region *cxlr)
312 {
313 	struct cxl_region_params *p = &cxlr->params;
314 	int i, rc = 0;
315 
316 	for (i = 0; i < p->nr_targets; i++) {
317 		struct cxl_endpoint_decoder *cxled = p->targets[i];
318 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
319 		struct cxl_region_ref *cxl_rr;
320 		struct cxl_decoder *cxld;
321 		struct cxl_port *iter;
322 		struct cxl_ep *ep;
323 
324 		/* commit bottom up */
325 		for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
326 		     iter = to_cxl_port(iter->dev.parent)) {
327 			cxl_rr = cxl_rr_load(iter, cxlr);
328 			cxld = cxl_rr->decoder;
329 			rc = commit_decoder(cxld);
330 			if (rc)
331 				break;
332 		}
333 
334 		if (rc) {
335 			/* programming @iter failed, teardown */
336 			for (ep = cxl_ep_load(iter, cxlmd); ep && iter;
337 			     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
338 				cxl_rr = cxl_rr_load(iter, cxlr);
339 				cxld = cxl_rr->decoder;
340 				if (cxld->reset)
341 					cxld->reset(cxld);
342 			}
343 
344 			cxled->cxld.reset(&cxled->cxld);
345 			goto err;
346 		}
347 	}
348 
349 	return 0;
350 
351 err:
352 	/* undo the targets that were successfully committed */
353 	cxl_region_decode_reset(cxlr, i);
354 	return rc;
355 }
356 
queue_reset(struct cxl_region * cxlr)357 static int queue_reset(struct cxl_region *cxlr)
358 {
359 	struct cxl_region_params *p = &cxlr->params;
360 	int rc;
361 
362 	ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
363 	if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
364 		return rc;
365 
366 	/* Already in the requested state? */
367 	if (p->state < CXL_CONFIG_COMMIT)
368 		return 0;
369 
370 	p->state = CXL_CONFIG_RESET_PENDING;
371 
372 	return 0;
373 }
374 
__commit(struct cxl_region * cxlr)375 static int __commit(struct cxl_region *cxlr)
376 {
377 	struct cxl_region_params *p = &cxlr->params;
378 	int rc;
379 
380 	ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
381 	if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
382 		return rc;
383 
384 	/* Already in the requested state? */
385 	if (p->state >= CXL_CONFIG_COMMIT)
386 		return 0;
387 
388 	/* Not ready to commit? */
389 	if (p->state < CXL_CONFIG_ACTIVE)
390 		return -ENXIO;
391 
392 	/*
393 	 * Invalidate caches before region setup to drop any speculative
394 	 * consumption of this address space
395 	 */
396 	rc = cxl_region_invalidate_memregion(cxlr);
397 	if (rc)
398 		return rc;
399 
400 	rc = cxl_region_decode_commit(cxlr);
401 	if (rc)
402 		return rc;
403 
404 	p->state = CXL_CONFIG_COMMIT;
405 
406 	return 0;
407 }
408 
commit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)409 static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
410 			    const char *buf, size_t len)
411 {
412 	struct cxl_region *cxlr = to_cxl_region(dev);
413 	struct cxl_region_params *p = &cxlr->params;
414 	bool commit;
415 	ssize_t rc;
416 
417 	rc = kstrtobool(buf, &commit);
418 	if (rc)
419 		return rc;
420 
421 	if (commit) {
422 		rc = __commit(cxlr);
423 		if (rc)
424 			return rc;
425 		return len;
426 	}
427 
428 	if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
429 		return -EPERM;
430 
431 	rc = queue_reset(cxlr);
432 	if (rc)
433 		return rc;
434 
435 	/*
436 	 * Unmap the region and depend the reset-pending state to ensure
437 	 * it does not go active again until post reset
438 	 */
439 	device_release_driver(&cxlr->dev);
440 
441 	/*
442 	 * With the reset pending take cxl_rwsem.region unconditionally
443 	 * to ensure the reset gets handled before returning.
444 	 */
445 	guard(rwsem_write)(&cxl_rwsem.region);
446 
447 	/*
448 	 * Revalidate that the reset is still pending in case another
449 	 * thread already handled this reset.
450 	 */
451 	if (p->state == CXL_CONFIG_RESET_PENDING) {
452 		cxl_region_decode_reset(cxlr, p->interleave_ways);
453 		p->state = CXL_CONFIG_ACTIVE;
454 	}
455 
456 	return len;
457 }
458 
commit_show(struct device * dev,struct device_attribute * attr,char * buf)459 static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
460 			   char *buf)
461 {
462 	struct cxl_region *cxlr = to_cxl_region(dev);
463 	struct cxl_region_params *p = &cxlr->params;
464 	ssize_t rc;
465 
466 	ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
467 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
468 		return rc;
469 	return sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
470 }
471 static DEVICE_ATTR_RW(commit);
472 
interleave_ways_show(struct device * dev,struct device_attribute * attr,char * buf)473 static ssize_t interleave_ways_show(struct device *dev,
474 				    struct device_attribute *attr, char *buf)
475 {
476 	struct cxl_region *cxlr = to_cxl_region(dev);
477 	struct cxl_region_params *p = &cxlr->params;
478 	int rc;
479 
480 	ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
481 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
482 		return rc;
483 	return sysfs_emit(buf, "%d\n", p->interleave_ways);
484 }
485 
486 static const struct attribute_group *get_cxl_region_target_group(void);
487 
set_interleave_ways(struct cxl_region * cxlr,int val)488 static int set_interleave_ways(struct cxl_region *cxlr, int val)
489 {
490 	struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
491 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
492 	struct cxl_region_params *p = &cxlr->params;
493 	int save, rc;
494 	u8 iw;
495 
496 	rc = ways_to_eiw(val, &iw);
497 	if (rc)
498 		return rc;
499 
500 	/*
501 	 * Even for x3, x6, and x12 interleaves the region interleave must be a
502 	 * power of 2 multiple of the host bridge interleave.
503 	 */
504 	if (!is_power_of_2(val / cxld->interleave_ways) ||
505 	    (val % cxld->interleave_ways)) {
506 		dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val);
507 		return -EINVAL;
508 	}
509 
510 	lockdep_assert_held_write(&cxl_rwsem.region);
511 
512 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
513 		return -EBUSY;
514 
515 	save = p->interleave_ways;
516 	p->interleave_ways = val;
517 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
518 	if (rc)
519 		p->interleave_ways = save;
520 
521 	return rc;
522 }
523 
interleave_ways_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)524 static ssize_t interleave_ways_store(struct device *dev,
525 				     struct device_attribute *attr,
526 				     const char *buf, size_t len)
527 {
528 	struct cxl_region *cxlr = to_cxl_region(dev);
529 	int val;
530 	int rc;
531 
532 	rc = kstrtoint(buf, 0, &val);
533 	if (rc)
534 		return rc;
535 
536 	ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
537 	if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
538 		return rc;
539 
540 	rc = set_interleave_ways(cxlr, val);
541 	if (rc)
542 		return rc;
543 
544 	return len;
545 }
546 static DEVICE_ATTR_RW(interleave_ways);
547 
interleave_granularity_show(struct device * dev,struct device_attribute * attr,char * buf)548 static ssize_t interleave_granularity_show(struct device *dev,
549 					   struct device_attribute *attr,
550 					   char *buf)
551 {
552 	struct cxl_region *cxlr = to_cxl_region(dev);
553 	struct cxl_region_params *p = &cxlr->params;
554 	int rc;
555 
556 	ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
557 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
558 		return rc;
559 	return sysfs_emit(buf, "%d\n", p->interleave_granularity);
560 }
561 
set_interleave_granularity(struct cxl_region * cxlr,int val)562 static int set_interleave_granularity(struct cxl_region *cxlr, int val)
563 {
564 	struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
565 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
566 	struct cxl_region_params *p = &cxlr->params;
567 	int rc;
568 	u16 ig;
569 
570 	rc = granularity_to_eig(val, &ig);
571 	if (rc)
572 		return rc;
573 
574 	/*
575 	 * When the host-bridge is interleaved, disallow region granularity !=
576 	 * root granularity. Regions with a granularity less than the root
577 	 * interleave result in needing multiple endpoints to support a single
578 	 * slot in the interleave (possible to support in the future). Regions
579 	 * with a granularity greater than the root interleave result in invalid
580 	 * DPA translations (invalid to support).
581 	 */
582 	if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
583 		return -EINVAL;
584 
585 	lockdep_assert_held_write(&cxl_rwsem.region);
586 
587 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
588 		return -EBUSY;
589 
590 	p->interleave_granularity = val;
591 	return 0;
592 }
593 
interleave_granularity_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)594 static ssize_t interleave_granularity_store(struct device *dev,
595 					    struct device_attribute *attr,
596 					    const char *buf, size_t len)
597 {
598 	struct cxl_region *cxlr = to_cxl_region(dev);
599 	int rc, val;
600 
601 	rc = kstrtoint(buf, 0, &val);
602 	if (rc)
603 		return rc;
604 
605 	ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
606 	if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
607 		return rc;
608 
609 	rc = set_interleave_granularity(cxlr, val);
610 	if (rc)
611 		return rc;
612 
613 	return len;
614 }
615 static DEVICE_ATTR_RW(interleave_granularity);
616 
resource_show(struct device * dev,struct device_attribute * attr,char * buf)617 static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
618 			     char *buf)
619 {
620 	struct cxl_region *cxlr = to_cxl_region(dev);
621 	struct cxl_region_params *p = &cxlr->params;
622 	u64 resource = -1ULL;
623 	int rc;
624 
625 	ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
626 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
627 		return rc;
628 
629 	if (p->res)
630 		resource = p->res->start;
631 	return sysfs_emit(buf, "%#llx\n", resource);
632 }
633 static DEVICE_ATTR_RO(resource);
634 
mode_show(struct device * dev,struct device_attribute * attr,char * buf)635 static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
636 			 char *buf)
637 {
638 	struct cxl_region *cxlr = to_cxl_region(dev);
639 	const char *desc;
640 
641 	if (cxlr->mode == CXL_PARTMODE_RAM)
642 		desc = "ram";
643 	else if (cxlr->mode == CXL_PARTMODE_PMEM)
644 		desc = "pmem";
645 	else
646 		desc = "";
647 
648 	return sysfs_emit(buf, "%s\n", desc);
649 }
650 static DEVICE_ATTR_RO(mode);
651 
alloc_hpa(struct cxl_region * cxlr,resource_size_t size)652 static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
653 {
654 	struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
655 	struct cxl_region_params *p = &cxlr->params;
656 	struct resource *res;
657 	u64 remainder = 0;
658 
659 	lockdep_assert_held_write(&cxl_rwsem.region);
660 
661 	/* Nothing to do... */
662 	if (p->res && resource_size(p->res) == size)
663 		return 0;
664 
665 	/* To change size the old size must be freed first */
666 	if (p->res)
667 		return -EBUSY;
668 
669 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
670 		return -EBUSY;
671 
672 	/* ways, granularity and uuid (if PMEM) need to be set before HPA */
673 	if (!p->interleave_ways || !p->interleave_granularity ||
674 	    (cxlr->mode == CXL_PARTMODE_PMEM && uuid_is_null(&p->uuid)))
675 		return -ENXIO;
676 
677 	div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder);
678 	if (remainder)
679 		return -EINVAL;
680 
681 	res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
682 				    dev_name(&cxlr->dev));
683 	if (IS_ERR(res)) {
684 		dev_dbg(&cxlr->dev,
685 			"HPA allocation error (%ld) for size:%pap in %s %pr\n",
686 			PTR_ERR(res), &size, cxlrd->res->name, cxlrd->res);
687 		return PTR_ERR(res);
688 	}
689 
690 	cxlr->hpa_range = DEFINE_RANGE(res->start, res->end);
691 
692 	p->res = res;
693 	p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
694 
695 	return 0;
696 }
697 
cxl_region_iomem_release(struct cxl_region * cxlr)698 static void cxl_region_iomem_release(struct cxl_region *cxlr)
699 {
700 	struct cxl_region_params *p = &cxlr->params;
701 
702 	if (device_is_registered(&cxlr->dev))
703 		lockdep_assert_held_write(&cxl_rwsem.region);
704 	if (p->res) {
705 		/*
706 		 * Autodiscovered regions may not have been able to insert their
707 		 * resource.
708 		 */
709 		if (p->res->parent)
710 			remove_resource(p->res);
711 		kfree(p->res);
712 		p->res = NULL;
713 	}
714 }
715 
free_hpa(struct cxl_region * cxlr)716 static int free_hpa(struct cxl_region *cxlr)
717 {
718 	struct cxl_region_params *p = &cxlr->params;
719 
720 	lockdep_assert_held_write(&cxl_rwsem.region);
721 
722 	if (!p->res)
723 		return 0;
724 
725 	if (p->state >= CXL_CONFIG_ACTIVE)
726 		return -EBUSY;
727 
728 	cxlr->hpa_range = DEFINE_RANGE(0, -1);
729 
730 	cxl_region_iomem_release(cxlr);
731 	p->state = CXL_CONFIG_IDLE;
732 	return 0;
733 }
734 
size_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)735 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
736 			  const char *buf, size_t len)
737 {
738 	struct cxl_region *cxlr = to_cxl_region(dev);
739 	u64 val;
740 	int rc;
741 
742 	rc = kstrtou64(buf, 0, &val);
743 	if (rc)
744 		return rc;
745 
746 	ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
747 	if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
748 		return rc;
749 
750 	if (val)
751 		rc = alloc_hpa(cxlr, val);
752 	else
753 		rc = free_hpa(cxlr);
754 
755 	if (rc)
756 		return rc;
757 
758 	return len;
759 }
760 
size_show(struct device * dev,struct device_attribute * attr,char * buf)761 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
762 			 char *buf)
763 {
764 	struct cxl_region *cxlr = to_cxl_region(dev);
765 	struct cxl_region_params *p = &cxlr->params;
766 	u64 size = 0;
767 	ssize_t rc;
768 
769 	ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
770 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
771 		return rc;
772 	if (p->res)
773 		size = resource_size(p->res);
774 	return sysfs_emit(buf, "%#llx\n", size);
775 }
776 static DEVICE_ATTR_RW(size);
777 
extended_linear_cache_size_show(struct device * dev,struct device_attribute * attr,char * buf)778 static ssize_t extended_linear_cache_size_show(struct device *dev,
779 					       struct device_attribute *attr,
780 					       char *buf)
781 {
782 	struct cxl_region *cxlr = to_cxl_region(dev);
783 	struct cxl_region_params *p = &cxlr->params;
784 	ssize_t rc;
785 
786 	ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
787 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
788 		return rc;
789 	return sysfs_emit(buf, "%pap\n", &p->cache_size);
790 }
791 static DEVICE_ATTR_RO(extended_linear_cache_size);
792 
locked_show(struct device * dev,struct device_attribute * attr,char * buf)793 static ssize_t locked_show(struct device *dev,
794 			   struct device_attribute *attr,
795 			   char *buf)
796 {
797 	struct cxl_region *cxlr = to_cxl_region(dev);
798 	int rc;
799 
800 	ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
801 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
802 		return rc;
803 
804 	rc = test_bit(CXL_REGION_F_LOCK, &cxlr->flags);
805 	return sysfs_emit(buf, "%d\n", rc);
806 }
807 static DEVICE_ATTR_RO(locked);
808 
809 static struct attribute *cxl_region_attrs[] = {
810 	&dev_attr_uuid.attr,
811 	&dev_attr_commit.attr,
812 	&dev_attr_interleave_ways.attr,
813 	&dev_attr_interleave_granularity.attr,
814 	&dev_attr_resource.attr,
815 	&dev_attr_size.attr,
816 	&dev_attr_mode.attr,
817 	&dev_attr_extended_linear_cache_size.attr,
818 	&dev_attr_locked.attr,
819 	NULL,
820 };
821 
cxl_region_visible(struct kobject * kobj,struct attribute * a,int n)822 static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
823 				  int n)
824 {
825 	struct device *dev = kobj_to_dev(kobj);
826 	struct cxl_region *cxlr = to_cxl_region(dev);
827 
828 	/*
829 	 * Support tooling that expects to find a 'uuid' attribute for all
830 	 * regions regardless of mode.
831 	 */
832 	if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM)
833 		return 0444;
834 
835 	/*
836 	 * Don't display extended linear cache attribute if there is no
837 	 * extended linear cache.
838 	 */
839 	if (a == &dev_attr_extended_linear_cache_size.attr &&
840 	    cxlr->params.cache_size == 0)
841 		return 0;
842 
843 	return a->mode;
844 }
845 
846 static const struct attribute_group cxl_region_group = {
847 	.attrs = cxl_region_attrs,
848 	.is_visible = cxl_region_visible,
849 };
850 
show_targetN(struct cxl_region * cxlr,char * buf,int pos)851 static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
852 {
853 	struct cxl_region_params *p = &cxlr->params;
854 	struct cxl_endpoint_decoder *cxled;
855 	int rc;
856 
857 	ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
858 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
859 		return rc;
860 
861 	if (pos >= p->interleave_ways) {
862 		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
863 			p->interleave_ways);
864 		return -ENXIO;
865 	}
866 
867 	cxled = p->targets[pos];
868 	if (!cxled)
869 		return sysfs_emit(buf, "\n");
870 	return sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
871 }
872 
check_commit_order(struct device * dev,void * data)873 static int check_commit_order(struct device *dev, void *data)
874 {
875 	struct cxl_decoder *cxld = to_cxl_decoder(dev);
876 
877 	/*
878 	 * if port->commit_end is not the only free decoder, then out of
879 	 * order shutdown has occurred, block further allocations until
880 	 * that is resolved
881 	 */
882 	if (((cxld->flags & CXL_DECODER_F_ENABLE) == 0))
883 		return -EBUSY;
884 	return 0;
885 }
886 
match_free_decoder(struct device * dev,const void * data)887 static int match_free_decoder(struct device *dev, const void *data)
888 {
889 	struct cxl_port *port = to_cxl_port(dev->parent);
890 	struct cxl_decoder *cxld;
891 	int rc;
892 
893 	if (!is_switch_decoder(dev))
894 		return 0;
895 
896 	cxld = to_cxl_decoder(dev);
897 
898 	if (cxld->id != port->commit_end + 1)
899 		return 0;
900 
901 	if (cxld->region) {
902 		dev_dbg(dev->parent,
903 			"next decoder to commit (%s) is already reserved (%s)\n",
904 			dev_name(dev), dev_name(&cxld->region->dev));
905 		return 0;
906 	}
907 
908 	rc = device_for_each_child_reverse_from(dev->parent, dev, NULL,
909 						check_commit_order);
910 	if (rc) {
911 		dev_dbg(dev->parent,
912 			"unable to allocate %s due to out of order shutdown\n",
913 			dev_name(dev));
914 		return 0;
915 	}
916 	return 1;
917 }
918 
spa_maps_hpa(const struct cxl_region_params * p,const struct range * range)919 static bool spa_maps_hpa(const struct cxl_region_params *p,
920 			 const struct range *range)
921 {
922 	if (!p->res)
923 		return false;
924 
925 	/*
926 	 * The extended linear cache region is constructed by a 1:1 ratio
927 	 * where the SPA maps equal amounts of DRAM and CXL HPA capacity with
928 	 * CXL decoders at the high end of the SPA range.
929 	 */
930 	return p->res->start + p->cache_size == range->start &&
931 		p->res->end == range->end;
932 }
933 
match_auto_decoder(struct device * dev,const void * data)934 static int match_auto_decoder(struct device *dev, const void *data)
935 {
936 	const struct cxl_region_params *p = data;
937 	struct cxl_decoder *cxld;
938 	struct range *r;
939 
940 	if (!is_switch_decoder(dev))
941 		return 0;
942 
943 	cxld = to_cxl_decoder(dev);
944 	r = &cxld->hpa_range;
945 
946 	if (spa_maps_hpa(p, r))
947 		return 1;
948 
949 	return 0;
950 }
951 
952 /**
953  * cxl_port_pick_region_decoder() - assign or lookup a decoder for a region
954  * @port: a port in the ancestry of the endpoint implied by @cxled
955  * @cxled: endpoint decoder to be, or currently, mapped by @port
956  * @cxlr: region to establish, or validate, decode @port
957  *
958  * In the region creation path cxl_port_pick_region_decoder() is an
959  * allocator to find a free port. In the region assembly path, it is
960  * recalling the decoder that platform firmware picked for validation
961  * purposes.
962  *
963  * The result is recorded in a 'struct cxl_region_ref' in @port.
964  */
965 static struct cxl_decoder *
cxl_port_pick_region_decoder(struct cxl_port * port,struct cxl_endpoint_decoder * cxled,struct cxl_region * cxlr)966 cxl_port_pick_region_decoder(struct cxl_port *port,
967 			     struct cxl_endpoint_decoder *cxled,
968 			     struct cxl_region *cxlr)
969 {
970 	struct device *dev;
971 
972 	if (port == cxled_to_port(cxled))
973 		return &cxled->cxld;
974 
975 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
976 		dev = device_find_child(&port->dev, &cxlr->params,
977 					match_auto_decoder);
978 	else
979 		dev = device_find_child(&port->dev, NULL, match_free_decoder);
980 	if (!dev)
981 		return NULL;
982 	/*
983 	 * This decoder is pinned registered as long as the endpoint decoder is
984 	 * registered, and endpoint decoder unregistration holds the
985 	 * cxl_rwsem.region over unregister events, so no need to hold on to
986 	 * this extra reference.
987 	 */
988 	put_device(dev);
989 	return to_cxl_decoder(dev);
990 }
991 
auto_order_ok(struct cxl_port * port,struct cxl_region * cxlr_iter,struct cxl_decoder * cxld)992 static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter,
993 			  struct cxl_decoder *cxld)
994 {
995 	struct cxl_region_ref *rr = cxl_rr_load(port, cxlr_iter);
996 	struct cxl_decoder *cxld_iter = rr->decoder;
997 
998 	/*
999 	 * Allow the out of order assembly of auto-discovered regions.
1000 	 * Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders
1001 	 * in HPA order. Confirm that the decoder with the lesser HPA
1002 	 * starting address has the lesser id.
1003 	 */
1004 	dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n",
1005 		dev_name(&cxld->dev), cxld->id,
1006 		dev_name(&cxld_iter->dev), cxld_iter->id);
1007 
1008 	if (cxld_iter->id > cxld->id)
1009 		return true;
1010 
1011 	return false;
1012 }
1013 
1014 static struct cxl_region_ref *
alloc_region_ref(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,struct cxl_decoder * cxld)1015 alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
1016 		 struct cxl_endpoint_decoder *cxled,
1017 		 struct cxl_decoder *cxld)
1018 {
1019 	struct cxl_region_params *p = &cxlr->params;
1020 	struct cxl_region_ref *cxl_rr, *iter;
1021 	unsigned long index;
1022 	int rc;
1023 
1024 	xa_for_each(&port->regions, index, iter) {
1025 		struct cxl_region_params *ip = &iter->region->params;
1026 
1027 		if (!ip->res || ip->res->start < p->res->start)
1028 			continue;
1029 
1030 		if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1031 			if (auto_order_ok(port, iter->region, cxld))
1032 				continue;
1033 		}
1034 		dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n",
1035 			dev_name(&port->dev),
1036 			dev_name(&iter->region->dev), ip->res, p->res);
1037 
1038 		return ERR_PTR(-EBUSY);
1039 	}
1040 
1041 	cxl_rr = kzalloc_obj(*cxl_rr);
1042 	if (!cxl_rr)
1043 		return ERR_PTR(-ENOMEM);
1044 	cxl_rr->port = port;
1045 	cxl_rr->region = cxlr;
1046 	cxl_rr->nr_targets = 1;
1047 	xa_init(&cxl_rr->endpoints);
1048 
1049 	rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL);
1050 	if (rc) {
1051 		dev_dbg(&cxlr->dev,
1052 			"%s: failed to track region reference: %d\n",
1053 			dev_name(&port->dev), rc);
1054 		kfree(cxl_rr);
1055 		return ERR_PTR(rc);
1056 	}
1057 
1058 	return cxl_rr;
1059 }
1060 
cxl_rr_free_decoder(struct cxl_region_ref * cxl_rr)1061 static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
1062 {
1063 	struct cxl_region *cxlr = cxl_rr->region;
1064 	struct cxl_decoder *cxld = cxl_rr->decoder;
1065 
1066 	if (!cxld)
1067 		return;
1068 
1069 	dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
1070 	if (cxld->region == cxlr) {
1071 		cxld->region = NULL;
1072 		put_device(&cxlr->dev);
1073 	}
1074 }
1075 
free_region_ref(struct cxl_region_ref * cxl_rr)1076 static void free_region_ref(struct cxl_region_ref *cxl_rr)
1077 {
1078 	struct cxl_port *port = cxl_rr->port;
1079 	struct cxl_region *cxlr = cxl_rr->region;
1080 
1081 	cxl_rr_free_decoder(cxl_rr);
1082 	xa_erase(&port->regions, (unsigned long)cxlr);
1083 	xa_destroy(&cxl_rr->endpoints);
1084 	kfree(cxl_rr);
1085 }
1086 
cxl_rr_ep_add(struct cxl_region_ref * cxl_rr,struct cxl_endpoint_decoder * cxled)1087 static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
1088 			 struct cxl_endpoint_decoder *cxled)
1089 {
1090 	int rc;
1091 	struct cxl_port *port = cxl_rr->port;
1092 	struct cxl_region *cxlr = cxl_rr->region;
1093 	struct cxl_decoder *cxld = cxl_rr->decoder;
1094 	struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled));
1095 
1096 	if (ep) {
1097 		rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep,
1098 			       GFP_KERNEL);
1099 		if (rc)
1100 			return rc;
1101 	}
1102 	cxl_rr->nr_eps++;
1103 
1104 	if (!cxld->region) {
1105 		cxld->region = cxlr;
1106 
1107 		/*
1108 		 * Now that cxld->region is set the intermediate staging state
1109 		 * can be cleared.
1110 		 */
1111 		if (cxld == &cxled->cxld &&
1112 		    cxled->state == CXL_DECODER_STATE_AUTO_STAGED)
1113 			cxled->state = CXL_DECODER_STATE_AUTO;
1114 		get_device(&cxlr->dev);
1115 	}
1116 
1117 	return 0;
1118 }
1119 
cxl_rr_assign_decoder(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,struct cxl_region_ref * cxl_rr,struct cxl_decoder * cxld)1120 static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr,
1121 				 struct cxl_endpoint_decoder *cxled,
1122 				 struct cxl_region_ref *cxl_rr,
1123 				 struct cxl_decoder *cxld)
1124 {
1125 	if (cxld->region) {
1126 		dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
1127 			dev_name(&port->dev), dev_name(&cxld->dev),
1128 			dev_name(&cxld->region->dev));
1129 		return -EBUSY;
1130 	}
1131 
1132 	/*
1133 	 * Endpoints should already match the region type, but backstop that
1134 	 * assumption with an assertion. Switch-decoders change mapping-type
1135 	 * based on what is mapped when they are assigned to a region.
1136 	 */
1137 	dev_WARN_ONCE(&cxlr->dev,
1138 		      port == cxled_to_port(cxled) &&
1139 			      cxld->target_type != cxlr->type,
1140 		      "%s:%s mismatch decoder type %d -> %d\n",
1141 		      dev_name(&cxled_to_memdev(cxled)->dev),
1142 		      dev_name(&cxld->dev), cxld->target_type, cxlr->type);
1143 	cxld->target_type = cxlr->type;
1144 	cxl_rr->decoder = cxld;
1145 	return 0;
1146 }
1147 
cxl_region_setup_flags(struct cxl_region * cxlr,struct cxl_decoder * cxld)1148 static void cxl_region_setup_flags(struct cxl_region *cxlr,
1149 				   struct cxl_decoder *cxld)
1150 {
1151 	if (cxld->flags & CXL_DECODER_F_LOCK) {
1152 		set_bit(CXL_REGION_F_LOCK, &cxlr->flags);
1153 		clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
1154 	}
1155 
1156 	if (cxld->flags & CXL_DECODER_F_NORMALIZED_ADDRESSING)
1157 		set_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags);
1158 }
1159 
1160 /**
1161  * cxl_port_attach_region() - track a region's interest in a port by endpoint
1162  * @port: port to add a new region reference 'struct cxl_region_ref'
1163  * @cxlr: region to attach to @port
1164  * @cxled: endpoint decoder used to create or further pin a region reference
1165  * @pos: interleave position of @cxled in @cxlr
1166  *
1167  * The attach event is an opportunity to validate CXL decode setup
1168  * constraints and record metadata needed for programming HDM decoders,
1169  * in particular decoder target lists.
1170  *
1171  * The steps are:
1172  *
1173  * - validate that there are no other regions with a higher HPA already
1174  *   associated with @port
1175  * - establish a region reference if one is not already present
1176  *
1177  *   - additionally allocate a decoder instance that will host @cxlr on
1178  *     @port
1179  *
1180  * - pin the region reference by the endpoint
1181  * - account for how many entries in @port's target list are needed to
1182  *   cover all of the added endpoints.
1183  */
cxl_port_attach_region(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos)1184 static int cxl_port_attach_region(struct cxl_port *port,
1185 				  struct cxl_region *cxlr,
1186 				  struct cxl_endpoint_decoder *cxled, int pos)
1187 {
1188 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1189 	struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1190 	struct cxl_region_ref *cxl_rr;
1191 	bool nr_targets_inc = false;
1192 	struct cxl_decoder *cxld;
1193 	unsigned long index;
1194 	int rc = -EBUSY;
1195 
1196 	lockdep_assert_held_write(&cxl_rwsem.region);
1197 
1198 	cxl_rr = cxl_rr_load(port, cxlr);
1199 	if (cxl_rr) {
1200 		struct cxl_ep *ep_iter;
1201 		int found = 0;
1202 
1203 		/*
1204 		 * Walk the existing endpoints that have been attached to
1205 		 * @cxlr at @port and see if they share the same 'next' port
1206 		 * in the downstream direction. I.e. endpoints that share common
1207 		 * upstream switch.
1208 		 */
1209 		xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
1210 			if (ep_iter == ep)
1211 				continue;
1212 			if (ep_iter->next == ep->next) {
1213 				found++;
1214 				break;
1215 			}
1216 		}
1217 
1218 		/*
1219 		 * New target port, or @port is an endpoint port that always
1220 		 * accounts its own local decode as a target.
1221 		 */
1222 		if (!found || !ep->next) {
1223 			cxl_rr->nr_targets++;
1224 			nr_targets_inc = true;
1225 		}
1226 	} else {
1227 		struct cxl_decoder *cxld;
1228 
1229 		cxld = cxl_port_pick_region_decoder(port, cxled, cxlr);
1230 		if (!cxld) {
1231 			dev_dbg(&cxlr->dev, "%s: no decoder available\n",
1232 				dev_name(&port->dev));
1233 			return -EBUSY;
1234 		}
1235 
1236 		cxl_rr = alloc_region_ref(port, cxlr, cxled, cxld);
1237 		if (IS_ERR(cxl_rr)) {
1238 			dev_dbg(&cxlr->dev,
1239 				"%s: failed to allocate region reference\n",
1240 				dev_name(&port->dev));
1241 			return PTR_ERR(cxl_rr);
1242 		}
1243 		nr_targets_inc = true;
1244 
1245 		rc = cxl_rr_assign_decoder(port, cxlr, cxled, cxl_rr, cxld);
1246 		if (rc)
1247 			goto out_erase;
1248 	}
1249 	cxld = cxl_rr->decoder;
1250 
1251 	/*
1252 	 * the number of targets should not exceed the target_count
1253 	 * of the decoder
1254 	 */
1255 	if (is_switch_decoder(&cxld->dev)) {
1256 		struct cxl_switch_decoder *cxlsd;
1257 
1258 		cxlsd = to_cxl_switch_decoder(&cxld->dev);
1259 		if (cxl_rr->nr_targets > cxlsd->nr_targets) {
1260 			dev_dbg(&cxlr->dev,
1261 				"%s:%s %s add: %s:%s @ %d overflows targets: %d\n",
1262 				dev_name(port->uport_dev), dev_name(&port->dev),
1263 				dev_name(&cxld->dev), dev_name(&cxlmd->dev),
1264 				dev_name(&cxled->cxld.dev), pos,
1265 				cxlsd->nr_targets);
1266 			rc = -ENXIO;
1267 			goto out_erase;
1268 		}
1269 	}
1270 
1271 	cxl_region_setup_flags(cxlr, cxld);
1272 
1273 	rc = cxl_rr_ep_add(cxl_rr, cxled);
1274 	if (rc) {
1275 		dev_dbg(&cxlr->dev,
1276 			"%s: failed to track endpoint %s:%s reference\n",
1277 			dev_name(&port->dev), dev_name(&cxlmd->dev),
1278 			dev_name(&cxld->dev));
1279 		goto out_erase;
1280 	}
1281 
1282 	dev_dbg(&cxlr->dev,
1283 		"%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n",
1284 		dev_name(port->uport_dev), dev_name(&port->dev),
1285 		dev_name(&cxld->dev), dev_name(&cxlmd->dev),
1286 		dev_name(&cxled->cxld.dev), pos,
1287 		ep ? ep->next ? dev_name(ep->next->uport_dev) :
1288 				      dev_name(&cxlmd->dev) :
1289 			   "none",
1290 		cxl_rr->nr_eps, cxl_rr->nr_targets);
1291 
1292 	return 0;
1293 out_erase:
1294 	if (nr_targets_inc)
1295 		cxl_rr->nr_targets--;
1296 	if (cxl_rr->nr_eps == 0)
1297 		free_region_ref(cxl_rr);
1298 	return rc;
1299 }
1300 
cxl_port_detach_region(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled)1301 static void cxl_port_detach_region(struct cxl_port *port,
1302 				   struct cxl_region *cxlr,
1303 				   struct cxl_endpoint_decoder *cxled)
1304 {
1305 	struct cxl_region_ref *cxl_rr;
1306 	struct cxl_ep *ep = NULL;
1307 
1308 	lockdep_assert_held_write(&cxl_rwsem.region);
1309 
1310 	cxl_rr = cxl_rr_load(port, cxlr);
1311 	if (!cxl_rr)
1312 		return;
1313 
1314 	/*
1315 	 * Endpoint ports do not carry cxl_ep references, and they
1316 	 * never target more than one endpoint by definition
1317 	 */
1318 	if (cxl_rr->decoder == &cxled->cxld)
1319 		cxl_rr->nr_eps--;
1320 	else
1321 		ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled);
1322 	if (ep) {
1323 		struct cxl_ep *ep_iter;
1324 		unsigned long index;
1325 		int found = 0;
1326 
1327 		cxl_rr->nr_eps--;
1328 		xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
1329 			if (ep_iter->next == ep->next) {
1330 				found++;
1331 				break;
1332 			}
1333 		}
1334 		if (!found)
1335 			cxl_rr->nr_targets--;
1336 	}
1337 
1338 	if (cxl_rr->nr_eps == 0)
1339 		free_region_ref(cxl_rr);
1340 }
1341 
check_last_peer(struct cxl_endpoint_decoder * cxled,struct cxl_ep * ep,struct cxl_region_ref * cxl_rr,int distance)1342 static int check_last_peer(struct cxl_endpoint_decoder *cxled,
1343 			   struct cxl_ep *ep, struct cxl_region_ref *cxl_rr,
1344 			   int distance)
1345 {
1346 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1347 	struct cxl_region *cxlr = cxl_rr->region;
1348 	struct cxl_region_params *p = &cxlr->params;
1349 	struct cxl_endpoint_decoder *cxled_peer;
1350 	struct cxl_port *port = cxl_rr->port;
1351 	struct cxl_memdev *cxlmd_peer;
1352 	struct cxl_ep *ep_peer;
1353 	int pos = cxled->pos;
1354 
1355 	/*
1356 	 * If this position wants to share a dport with the last endpoint mapped
1357 	 * then that endpoint, at index 'position - distance', must also be
1358 	 * mapped by this dport.
1359 	 */
1360 	if (pos < distance) {
1361 		dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n",
1362 			dev_name(port->uport_dev), dev_name(&port->dev),
1363 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1364 		return -ENXIO;
1365 	}
1366 	cxled_peer = p->targets[pos - distance];
1367 	cxlmd_peer = cxled_to_memdev(cxled_peer);
1368 	ep_peer = cxl_ep_load(port, cxlmd_peer);
1369 	if (ep->dport != ep_peer->dport) {
1370 		dev_dbg(&cxlr->dev,
1371 			"%s:%s: %s:%s pos %d mismatched peer %s:%s\n",
1372 			dev_name(port->uport_dev), dev_name(&port->dev),
1373 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos,
1374 			dev_name(&cxlmd_peer->dev),
1375 			dev_name(&cxled_peer->cxld.dev));
1376 		return -ENXIO;
1377 	}
1378 
1379 	return 0;
1380 }
1381 
check_interleave_cap(struct cxl_decoder * cxld,int iw,int ig)1382 static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
1383 {
1384 	struct cxl_port *port = to_cxl_port(cxld->dev.parent);
1385 	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
1386 	unsigned int interleave_mask;
1387 	u8 eiw;
1388 	u16 eig;
1389 	int high_pos, low_pos;
1390 
1391 	if (!test_bit(iw, &cxlhdm->iw_cap_mask))
1392 		return -ENXIO;
1393 	/*
1394 	 * Per CXL specification r3.1(8.2.4.20.13 Decoder Protection),
1395 	 * if eiw < 8:
1396 	 *   DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + 8 + eiw]
1397 	 *   DPAOFFSET[eig + 7: 0]  = HPAOFFSET[eig + 7: 0]
1398 	 *
1399 	 *   when the eiw is 0, all the bits of HPAOFFSET[51: 0] are used, the
1400 	 *   interleave bits are none.
1401 	 *
1402 	 * if eiw >= 8:
1403 	 *   DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + eiw] / 3
1404 	 *   DPAOFFSET[eig + 7: 0]  = HPAOFFSET[eig + 7: 0]
1405 	 *
1406 	 *   when the eiw is 8, all the bits of HPAOFFSET[51: 0] are used, the
1407 	 *   interleave bits are none.
1408 	 */
1409 	ways_to_eiw(iw, &eiw);
1410 	if (eiw == 0 || eiw == 8)
1411 		return 0;
1412 
1413 	granularity_to_eig(ig, &eig);
1414 	if (eiw > 8)
1415 		high_pos = eiw + eig - 1;
1416 	else
1417 		high_pos = eiw + eig + 7;
1418 	low_pos = eig + 8;
1419 	interleave_mask = GENMASK(high_pos, low_pos);
1420 	if (interleave_mask & ~cxlhdm->interleave_mask)
1421 		return -ENXIO;
1422 
1423 	return 0;
1424 }
1425 
cxl_port_setup_targets(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled)1426 static int cxl_port_setup_targets(struct cxl_port *port,
1427 				  struct cxl_region *cxlr,
1428 				  struct cxl_endpoint_decoder *cxled)
1429 {
1430 	struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
1431 	int parent_iw, parent_ig, ig, iw, rc, pos = cxled->pos;
1432 	struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
1433 	struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1434 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1435 	struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1436 	struct cxl_region_params *p = &cxlr->params;
1437 	struct cxl_decoder *cxld = cxl_rr->decoder;
1438 	struct cxl_switch_decoder *cxlsd;
1439 	struct cxl_port *iter = port;
1440 	u16 eig, peig;
1441 	u8 eiw, peiw;
1442 
1443 	/*
1444 	 * While root level decoders support x3, x6, x12, switch level
1445 	 * decoders only support powers of 2 up to x16.
1446 	 */
1447 	if (!is_power_of_2(cxl_rr->nr_targets)) {
1448 		dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
1449 			dev_name(port->uport_dev), dev_name(&port->dev),
1450 			cxl_rr->nr_targets);
1451 		return -EINVAL;
1452 	}
1453 
1454 	cxlsd = to_cxl_switch_decoder(&cxld->dev);
1455 	if (cxl_rr->nr_targets_set) {
1456 		int i, distance = 1;
1457 		struct cxl_region_ref *cxl_rr_iter;
1458 
1459 		/*
1460 		 * The "distance" between peer downstream ports represents which
1461 		 * endpoint positions in the region interleave a given port can
1462 		 * host.
1463 		 *
1464 		 * For example, at the root of a hierarchy the distance is
1465 		 * always 1 as every index targets a different host-bridge. At
1466 		 * each subsequent switch level those ports map every Nth region
1467 		 * position where N is the width of the switch == distance.
1468 		 */
1469 		do {
1470 			cxl_rr_iter = cxl_rr_load(iter, cxlr);
1471 			distance *= cxl_rr_iter->nr_targets;
1472 			iter = to_cxl_port(iter->dev.parent);
1473 		} while (!is_cxl_root(iter));
1474 		distance *= cxlrd->cxlsd.cxld.interleave_ways;
1475 
1476 		for (i = 0; i < cxl_rr->nr_targets_set; i++)
1477 			if (ep->dport == cxlsd->target[i]) {
1478 				rc = check_last_peer(cxled, ep, cxl_rr,
1479 						     distance);
1480 				if (rc)
1481 					return rc;
1482 				goto out_target_set;
1483 			}
1484 		goto add_target;
1485 	}
1486 
1487 	if (is_cxl_root(parent_port)) {
1488 		/*
1489 		 * Root decoder IG is always set to value in CFMWS which
1490 		 * may be different than this region's IG.  We can use the
1491 		 * region's IG here since interleave_granularity_store()
1492 		 * does not allow interleaved host-bridges with
1493 		 * root IG != region IG.
1494 		 */
1495 		parent_ig = p->interleave_granularity;
1496 		parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
1497 		/*
1498 		 * For purposes of address bit routing, use power-of-2 math for
1499 		 * switch ports.
1500 		 */
1501 		if (!is_power_of_2(parent_iw))
1502 			parent_iw /= 3;
1503 	} else {
1504 		struct cxl_region_ref *parent_rr;
1505 		struct cxl_decoder *parent_cxld;
1506 
1507 		parent_rr = cxl_rr_load(parent_port, cxlr);
1508 		parent_cxld = parent_rr->decoder;
1509 		parent_ig = parent_cxld->interleave_granularity;
1510 		parent_iw = parent_cxld->interleave_ways;
1511 	}
1512 
1513 	rc = granularity_to_eig(parent_ig, &peig);
1514 	if (rc) {
1515 		dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
1516 			dev_name(parent_port->uport_dev),
1517 			dev_name(&parent_port->dev), parent_ig);
1518 		return rc;
1519 	}
1520 
1521 	rc = ways_to_eiw(parent_iw, &peiw);
1522 	if (rc) {
1523 		dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
1524 			dev_name(parent_port->uport_dev),
1525 			dev_name(&parent_port->dev), parent_iw);
1526 		return rc;
1527 	}
1528 
1529 	iw = cxl_rr->nr_targets;
1530 	rc = ways_to_eiw(iw, &eiw);
1531 	if (rc) {
1532 		dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
1533 			dev_name(port->uport_dev), dev_name(&port->dev), iw);
1534 		return rc;
1535 	}
1536 
1537 	/*
1538 	 * Interleave granularity is a multiple of @parent_port granularity.
1539 	 * Multiplier is the parent port interleave ways.
1540 	 */
1541 	rc = granularity_to_eig(parent_ig * parent_iw, &eig);
1542 	if (rc) {
1543 		dev_dbg(&cxlr->dev,
1544 			"%s: invalid granularity calculation (%d * %d)\n",
1545 			dev_name(&parent_port->dev), parent_ig, parent_iw);
1546 		return rc;
1547 	}
1548 
1549 	rc = eig_to_granularity(eig, &ig);
1550 	if (rc) {
1551 		dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
1552 			dev_name(port->uport_dev), dev_name(&port->dev),
1553 			256 << eig);
1554 		return rc;
1555 	}
1556 
1557 	if (iw > 8 || iw > cxlsd->nr_targets) {
1558 		dev_dbg(&cxlr->dev,
1559 			"%s:%s:%s: ways: %d overflows targets: %d\n",
1560 			dev_name(port->uport_dev), dev_name(&port->dev),
1561 			dev_name(&cxld->dev), iw, cxlsd->nr_targets);
1562 		return -ENXIO;
1563 	}
1564 
1565 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1566 		if (cxld->interleave_ways != iw ||
1567 		    (iw > 1 && cxld->interleave_granularity != ig) ||
1568 		    !spa_maps_hpa(p, &cxld->hpa_range) ||
1569 		    ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
1570 			dev_err(&cxlr->dev,
1571 				"%s:%s %s expected iw: %d ig: %d %pr\n",
1572 				dev_name(port->uport_dev), dev_name(&port->dev),
1573 				__func__, iw, ig, p->res);
1574 			dev_err(&cxlr->dev,
1575 				"%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n",
1576 				dev_name(port->uport_dev), dev_name(&port->dev),
1577 				__func__, cxld->interleave_ways,
1578 				cxld->interleave_granularity,
1579 				str_enabled_disabled(cxld->flags & CXL_DECODER_F_ENABLE),
1580 				cxld->hpa_range.start, cxld->hpa_range.end);
1581 			return -ENXIO;
1582 		}
1583 	} else {
1584 		rc = check_interleave_cap(cxld, iw, ig);
1585 		if (rc) {
1586 			dev_dbg(&cxlr->dev,
1587 				"%s:%s iw: %d ig: %d is not supported\n",
1588 				dev_name(port->uport_dev),
1589 				dev_name(&port->dev), iw, ig);
1590 			return rc;
1591 		}
1592 
1593 		cxld->interleave_ways = iw;
1594 		cxld->interleave_granularity = ig;
1595 		cxld->hpa_range = (struct range) {
1596 			.start = p->res->start,
1597 			.end = p->res->end,
1598 		};
1599 	}
1600 	dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport_dev),
1601 		dev_name(&port->dev), iw, ig);
1602 add_target:
1603 	if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) {
1604 		dev_dbg(&cxlr->dev,
1605 			"%s:%s: targets full trying to add %s:%s at %d\n",
1606 			dev_name(port->uport_dev), dev_name(&port->dev),
1607 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1608 		return -ENXIO;
1609 	}
1610 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1611 		if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) {
1612 			dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n",
1613 				dev_name(port->uport_dev), dev_name(&port->dev),
1614 				dev_name(&cxlsd->cxld.dev),
1615 				dev_name(ep->dport->dport_dev),
1616 				cxl_rr->nr_targets_set);
1617 			return -ENXIO;
1618 		}
1619 	} else {
1620 		cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
1621 		cxlsd->cxld.target_map[cxl_rr->nr_targets_set] = ep->dport->port_id;
1622 	}
1623 	cxl_rr->nr_targets_set++;
1624 out_target_set:
1625 	dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
1626 		dev_name(port->uport_dev), dev_name(&port->dev),
1627 		cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev),
1628 		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1629 
1630 	return 0;
1631 }
1632 
cxl_port_reset_targets(struct cxl_port * port,struct cxl_region * cxlr)1633 static void cxl_port_reset_targets(struct cxl_port *port,
1634 				   struct cxl_region *cxlr)
1635 {
1636 	struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1637 	struct cxl_decoder *cxld;
1638 
1639 	/*
1640 	 * After the last endpoint has been detached the entire cxl_rr may now
1641 	 * be gone.
1642 	 */
1643 	if (!cxl_rr)
1644 		return;
1645 	cxl_rr->nr_targets_set = 0;
1646 
1647 	cxld = cxl_rr->decoder;
1648 	cxld->hpa_range = (struct range) {
1649 		.start = 0,
1650 		.end = -1,
1651 	};
1652 }
1653 
cxl_region_teardown_targets(struct cxl_region * cxlr)1654 static void cxl_region_teardown_targets(struct cxl_region *cxlr)
1655 {
1656 	struct cxl_region_params *p = &cxlr->params;
1657 	struct cxl_endpoint_decoder *cxled;
1658 	struct cxl_dev_state *cxlds;
1659 	struct cxl_memdev *cxlmd;
1660 	struct cxl_port *iter;
1661 	struct cxl_ep *ep;
1662 	int i;
1663 
1664 	/*
1665 	 * In the auto-discovery case skip automatic teardown since the
1666 	 * address space is already active
1667 	 */
1668 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
1669 		return;
1670 
1671 	for (i = 0; i < p->nr_targets; i++) {
1672 		cxled = p->targets[i];
1673 		cxlmd = cxled_to_memdev(cxled);
1674 		cxlds = cxlmd->cxlds;
1675 
1676 		if (cxlds->rcd)
1677 			continue;
1678 
1679 		iter = cxled_to_port(cxled);
1680 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1681 			iter = to_cxl_port(iter->dev.parent);
1682 
1683 		for (ep = cxl_ep_load(iter, cxlmd); iter;
1684 		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd))
1685 			cxl_port_reset_targets(iter, cxlr);
1686 	}
1687 }
1688 
cxl_region_setup_targets(struct cxl_region * cxlr)1689 static int cxl_region_setup_targets(struct cxl_region *cxlr)
1690 {
1691 	struct cxl_region_params *p = &cxlr->params;
1692 	struct cxl_endpoint_decoder *cxled;
1693 	struct cxl_dev_state *cxlds;
1694 	int i, rc, rch = 0, vh = 0;
1695 	struct cxl_memdev *cxlmd;
1696 	struct cxl_port *iter;
1697 	struct cxl_ep *ep;
1698 
1699 	for (i = 0; i < p->nr_targets; i++) {
1700 		cxled = p->targets[i];
1701 		cxlmd = cxled_to_memdev(cxled);
1702 		cxlds = cxlmd->cxlds;
1703 
1704 		/* validate that all targets agree on topology */
1705 		if (!cxlds->rcd) {
1706 			vh++;
1707 		} else {
1708 			rch++;
1709 			continue;
1710 		}
1711 
1712 		iter = cxled_to_port(cxled);
1713 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1714 			iter = to_cxl_port(iter->dev.parent);
1715 
1716 		/*
1717 		 * Descend the topology tree programming / validating
1718 		 * targets while looking for conflicts.
1719 		 */
1720 		for (ep = cxl_ep_load(iter, cxlmd); iter;
1721 		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
1722 			rc = cxl_port_setup_targets(iter, cxlr, cxled);
1723 			if (rc) {
1724 				cxl_region_teardown_targets(cxlr);
1725 				return rc;
1726 			}
1727 		}
1728 	}
1729 
1730 	if (rch && vh) {
1731 		dev_err(&cxlr->dev, "mismatched CXL topologies detected\n");
1732 		cxl_region_teardown_targets(cxlr);
1733 		return -ENXIO;
1734 	}
1735 
1736 	return 0;
1737 }
1738 
cxl_region_validate_position(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos)1739 static int cxl_region_validate_position(struct cxl_region *cxlr,
1740 					struct cxl_endpoint_decoder *cxled,
1741 					int pos)
1742 {
1743 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1744 	struct cxl_region_params *p = &cxlr->params;
1745 	int i;
1746 
1747 	if (pos < 0 || pos >= p->interleave_ways) {
1748 		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1749 			p->interleave_ways);
1750 		return -ENXIO;
1751 	}
1752 
1753 	if (p->targets[pos] == cxled)
1754 		return 0;
1755 
1756 	if (p->targets[pos]) {
1757 		struct cxl_endpoint_decoder *cxled_target = p->targets[pos];
1758 		struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled_target);
1759 
1760 		dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n",
1761 			pos, dev_name(&cxlmd_target->dev),
1762 			dev_name(&cxled_target->cxld.dev));
1763 		return -EBUSY;
1764 	}
1765 
1766 	for (i = 0; i < p->interleave_ways; i++) {
1767 		struct cxl_endpoint_decoder *cxled_target;
1768 		struct cxl_memdev *cxlmd_target;
1769 
1770 		cxled_target = p->targets[i];
1771 		if (!cxled_target)
1772 			continue;
1773 
1774 		cxlmd_target = cxled_to_memdev(cxled_target);
1775 		if (cxlmd_target == cxlmd) {
1776 			dev_dbg(&cxlr->dev,
1777 				"%s already specified at position %d via: %s\n",
1778 				dev_name(&cxlmd->dev), pos,
1779 				dev_name(&cxled_target->cxld.dev));
1780 			return -EBUSY;
1781 		}
1782 	}
1783 
1784 	return 0;
1785 }
1786 
cxl_region_attach_position(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,const struct cxl_dport * dport,int pos)1787 static int cxl_region_attach_position(struct cxl_region *cxlr,
1788 				      struct cxl_endpoint_decoder *cxled,
1789 				      const struct cxl_dport *dport, int pos)
1790 {
1791 	struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
1792 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1793 	struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
1794 	struct cxl_decoder *cxld = &cxlsd->cxld;
1795 	int iw = cxld->interleave_ways;
1796 	struct cxl_port *iter;
1797 	int rc;
1798 
1799 	if (dport != cxlrd->cxlsd.target[pos % iw]) {
1800 		dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
1801 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1802 			dev_name(&cxlrd->cxlsd.cxld.dev));
1803 		return -ENXIO;
1804 	}
1805 
1806 	for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1807 	     iter = to_cxl_port(iter->dev.parent)) {
1808 		rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
1809 		if (rc)
1810 			goto err;
1811 	}
1812 
1813 	return 0;
1814 
1815 err:
1816 	for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1817 	     iter = to_cxl_port(iter->dev.parent))
1818 		cxl_port_detach_region(iter, cxlr, cxled);
1819 	return rc;
1820 }
1821 
cxl_region_attach_auto(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos)1822 static int cxl_region_attach_auto(struct cxl_region *cxlr,
1823 				  struct cxl_endpoint_decoder *cxled, int pos)
1824 {
1825 	struct cxl_region_params *p = &cxlr->params;
1826 
1827 	if (cxled->state != CXL_DECODER_STATE_AUTO) {
1828 		dev_err(&cxlr->dev,
1829 			"%s: unable to add decoder to autodetected region\n",
1830 			dev_name(&cxled->cxld.dev));
1831 		return -EINVAL;
1832 	}
1833 
1834 	if (pos >= 0) {
1835 		dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n",
1836 			dev_name(&cxled->cxld.dev), pos);
1837 		return -EINVAL;
1838 	}
1839 
1840 	if (p->nr_targets >= p->interleave_ways) {
1841 		dev_err(&cxlr->dev, "%s: no more target slots available\n",
1842 			dev_name(&cxled->cxld.dev));
1843 		return -ENXIO;
1844 	}
1845 
1846 	/*
1847 	 * Temporarily record the endpoint decoder into the target array. Yes,
1848 	 * this means that userspace can view devices in the wrong position
1849 	 * before the region activates, and must be careful to understand when
1850 	 * it might be racing region autodiscovery.
1851 	 */
1852 	pos = p->nr_targets;
1853 	p->targets[pos] = cxled;
1854 	cxled->pos = pos;
1855 	cxled->state = CXL_DECODER_STATE_AUTO_STAGED;
1856 	p->nr_targets++;
1857 
1858 	return 0;
1859 }
1860 
cmp_interleave_pos(const void * a,const void * b)1861 static int cmp_interleave_pos(const void *a, const void *b)
1862 {
1863 	struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
1864 	struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
1865 
1866 	return cxled_a->pos - cxled_b->pos;
1867 }
1868 
match_switch_decoder_by_range(struct device * dev,const void * data)1869 static int match_switch_decoder_by_range(struct device *dev,
1870 					 const void *data)
1871 {
1872 	struct cxl_switch_decoder *cxlsd;
1873 	const struct range *r1, *r2 = data;
1874 
1875 
1876 	if (!is_switch_decoder(dev))
1877 		return 0;
1878 
1879 	cxlsd = to_cxl_switch_decoder(dev);
1880 	r1 = &cxlsd->cxld.hpa_range;
1881 
1882 	if (is_root_decoder(dev))
1883 		return range_contains(r1, r2);
1884 	return (r1->start == r2->start && r1->end == r2->end);
1885 }
1886 
find_pos_and_ways(struct cxl_port * port,struct range * range,int * pos,int * ways)1887 static int find_pos_and_ways(struct cxl_port *port, struct range *range,
1888 			     int *pos, int *ways)
1889 {
1890 	struct cxl_switch_decoder *cxlsd;
1891 	struct cxl_port *parent;
1892 	struct device *dev;
1893 	int rc = -ENXIO;
1894 
1895 	parent = parent_port_of(port);
1896 	if (!parent)
1897 		return rc;
1898 
1899 	dev = device_find_child(&parent->dev, range,
1900 				match_switch_decoder_by_range);
1901 	if (!dev) {
1902 		dev_err(port->uport_dev,
1903 			"failed to find decoder mapping %#llx-%#llx\n",
1904 			range->start, range->end);
1905 		return rc;
1906 	}
1907 	cxlsd = to_cxl_switch_decoder(dev);
1908 	*ways = cxlsd->cxld.interleave_ways;
1909 
1910 	for (int i = 0; i < *ways; i++) {
1911 		if (cxlsd->target[i] == port->parent_dport) {
1912 			*pos = i;
1913 			rc = 0;
1914 			break;
1915 		}
1916 	}
1917 	put_device(dev);
1918 
1919 	if (rc)
1920 		dev_err(port->uport_dev,
1921 			"failed to find %s:%s in target list of %s\n",
1922 			dev_name(&port->dev),
1923 			dev_name(port->parent_dport->dport_dev),
1924 			dev_name(&cxlsd->cxld.dev));
1925 
1926 	return rc;
1927 }
1928 
1929 /**
1930  * cxl_calc_interleave_pos() - calculate an endpoint position in a region
1931  * @cxled: endpoint decoder member of given region
1932  * @hpa_range: translated HPA range of the endpoint
1933  *
1934  * The endpoint position is calculated by traversing the topology from
1935  * the endpoint to the root decoder and iteratively applying this
1936  * calculation:
1937  *
1938  *    position = position * parent_ways + parent_pos;
1939  *
1940  * ...where @position is inferred from switch and root decoder target lists.
1941  *
1942  * Return: position >= 0 on success
1943  *	   -ENXIO on failure
1944  */
cxl_calc_interleave_pos(struct cxl_endpoint_decoder * cxled,struct range * hpa_range)1945 static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled,
1946 				   struct range *hpa_range)
1947 {
1948 	struct cxl_port *iter, *port = cxled_to_port(cxled);
1949 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1950 	int parent_ways = 0, parent_pos = 0, pos = 0;
1951 	int rc;
1952 
1953 	/*
1954 	 * Example: the expected interleave order of the 4-way region shown
1955 	 * below is: mem0, mem2, mem1, mem3
1956 	 *
1957 	 *		  root_port
1958 	 *                 /      \
1959 	 *      host_bridge_0    host_bridge_1
1960 	 *        |    |           |    |
1961 	 *       mem0 mem1        mem2 mem3
1962 	 *
1963 	 * In the example the calculator will iterate twice. The first iteration
1964 	 * uses the mem position in the host-bridge and the ways of the host-
1965 	 * bridge to generate the first, or local, position. The second
1966 	 * iteration uses the host-bridge position in the root_port and the ways
1967 	 * of the root_port to refine the position.
1968 	 *
1969 	 * A trace of the calculation per endpoint looks like this:
1970 	 * mem0: pos = 0 * 2 + 0    mem2: pos = 0 * 2 + 0
1971 	 *       pos = 0 * 2 + 0          pos = 0 * 2 + 1
1972 	 *       pos: 0                   pos: 1
1973 	 *
1974 	 * mem1: pos = 0 * 2 + 1    mem3: pos = 0 * 2 + 1
1975 	 *       pos = 1 * 2 + 0          pos = 1 * 2 + 1
1976 	 *       pos: 2                   pos = 3
1977 	 *
1978 	 * Note that while this example is simple, the method applies to more
1979 	 * complex topologies, including those with switches.
1980 	 */
1981 
1982 	/* Iterate from endpoint to root_port refining the position */
1983 	for (iter = port; iter; iter = parent_port_of(iter)) {
1984 		if (is_cxl_root(iter))
1985 			break;
1986 
1987 		rc = find_pos_and_ways(iter, hpa_range, &parent_pos,
1988 				       &parent_ways);
1989 		if (rc)
1990 			return rc;
1991 
1992 		pos = pos * parent_ways + parent_pos;
1993 	}
1994 
1995 	dev_dbg(&cxlmd->dev,
1996 		"decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n",
1997 		dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent),
1998 		dev_name(&port->dev), hpa_range->start, hpa_range->end, pos);
1999 
2000 	return pos;
2001 }
2002 
cxl_region_sort_targets(struct cxl_region * cxlr)2003 static int cxl_region_sort_targets(struct cxl_region *cxlr)
2004 {
2005 	struct cxl_region_params *p = &cxlr->params;
2006 	int i, rc = 0;
2007 
2008 	for (i = 0; i < p->nr_targets; i++) {
2009 		struct cxl_endpoint_decoder *cxled = p->targets[i];
2010 
2011 		cxled->pos = cxl_calc_interleave_pos(cxled, &cxlr->hpa_range);
2012 		/*
2013 		 * Record that sorting failed, but still continue to calc
2014 		 * cxled->pos so that follow-on code paths can reliably
2015 		 * do p->targets[cxled->pos] to self-reference their entry.
2016 		 */
2017 		if (cxled->pos < 0)
2018 			rc = -ENXIO;
2019 	}
2020 	/* Keep the cxlr target list in interleave position order */
2021 	sort(p->targets, p->nr_targets, sizeof(p->targets[0]),
2022 	     cmp_interleave_pos, NULL);
2023 
2024 	dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
2025 	return rc;
2026 }
2027 
cxl_region_attach(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos)2028 static int cxl_region_attach(struct cxl_region *cxlr,
2029 			     struct cxl_endpoint_decoder *cxled, int pos)
2030 {
2031 	struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
2032 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2033 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
2034 	struct cxl_region_params *p = &cxlr->params;
2035 	struct cxl_port *ep_port, *root_port;
2036 	struct cxl_dport *dport;
2037 	int rc = -ENXIO;
2038 
2039 	rc = check_interleave_cap(&cxled->cxld, p->interleave_ways,
2040 				  p->interleave_granularity);
2041 	if (rc) {
2042 		dev_dbg(&cxlr->dev, "%s iw: %d ig: %d is not supported\n",
2043 			dev_name(&cxled->cxld.dev), p->interleave_ways,
2044 			p->interleave_granularity);
2045 		return rc;
2046 	}
2047 
2048 	if (cxled->part < 0) {
2049 		dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
2050 		return -ENODEV;
2051 	}
2052 
2053 	if (cxlds->part[cxled->part].mode != cxlr->mode) {
2054 		dev_dbg(&cxlr->dev, "%s region mode: %d mismatch\n",
2055 			dev_name(&cxled->cxld.dev), cxlr->mode);
2056 		return -EINVAL;
2057 	}
2058 
2059 	/* all full of members, or interleave config not established? */
2060 	if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
2061 		dev_dbg(&cxlr->dev, "region already active\n");
2062 		return -EBUSY;
2063 	}
2064 
2065 	if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
2066 		dev_dbg(&cxlr->dev, "interleave config missing\n");
2067 		return -ENXIO;
2068 	}
2069 
2070 	if (p->nr_targets >= p->interleave_ways) {
2071 		dev_dbg(&cxlr->dev, "region already has %d endpoints\n",
2072 			p->nr_targets);
2073 		return -EINVAL;
2074 	}
2075 
2076 	ep_port = cxled_to_port(cxled);
2077 	root_port = cxlrd_to_port(cxlrd);
2078 	dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
2079 	if (!dport) {
2080 		dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n",
2081 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2082 			dev_name(cxlr->dev.parent));
2083 		return -ENXIO;
2084 	}
2085 
2086 	if (cxled->cxld.target_type != cxlr->type) {
2087 		dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
2088 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2089 			cxled->cxld.target_type, cxlr->type);
2090 		return -ENXIO;
2091 	}
2092 
2093 	if (!cxled->dpa_res) {
2094 		dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n",
2095 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev));
2096 		return -ENXIO;
2097 	}
2098 
2099 	if (resource_size(cxled->dpa_res) * p->interleave_ways + p->cache_size !=
2100 	    resource_size(p->res)) {
2101 		dev_dbg(&cxlr->dev,
2102 			"%s:%s-size-%#llx * ways-%d + cache-%#llx != region-size-%#llx\n",
2103 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2104 			(u64)resource_size(cxled->dpa_res), p->interleave_ways,
2105 			(u64)p->cache_size, (u64)resource_size(p->res));
2106 		return -EINVAL;
2107 	}
2108 
2109 	cxl_region_perf_data_calculate(cxlr, cxled);
2110 
2111 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
2112 		int i;
2113 
2114 		rc = cxl_region_attach_auto(cxlr, cxled, pos);
2115 		if (rc)
2116 			return rc;
2117 
2118 		/* await more targets to arrive... */
2119 		if (p->nr_targets < p->interleave_ways)
2120 			return 0;
2121 
2122 		/*
2123 		 * All targets are here, which implies all PCI enumeration that
2124 		 * affects this region has been completed. Walk the topology to
2125 		 * sort the devices into their relative region decode position.
2126 		 */
2127 		rc = cxl_region_sort_targets(cxlr);
2128 		if (rc)
2129 			return rc;
2130 
2131 		for (i = 0; i < p->nr_targets; i++) {
2132 			cxled = p->targets[i];
2133 			ep_port = cxled_to_port(cxled);
2134 			dport = cxl_find_dport_by_dev(root_port,
2135 						      ep_port->host_bridge);
2136 			rc = cxl_region_attach_position(cxlr, cxled, dport, i);
2137 			if (rc)
2138 				return rc;
2139 		}
2140 
2141 		rc = cxl_region_setup_targets(cxlr);
2142 		if (rc)
2143 			return rc;
2144 
2145 		/*
2146 		 * If target setup succeeds in the autodiscovery case
2147 		 * then the region is already committed.
2148 		 */
2149 		p->state = CXL_CONFIG_COMMIT;
2150 		cxl_region_shared_upstream_bandwidth_update(cxlr);
2151 
2152 		return 0;
2153 	}
2154 
2155 	rc = cxl_region_validate_position(cxlr, cxled, pos);
2156 	if (rc)
2157 		return rc;
2158 
2159 	rc = cxl_region_attach_position(cxlr, cxled, dport, pos);
2160 	if (rc)
2161 		return rc;
2162 
2163 	p->targets[pos] = cxled;
2164 	cxled->pos = pos;
2165 	p->nr_targets++;
2166 
2167 	if (p->nr_targets == p->interleave_ways) {
2168 		rc = cxl_region_setup_targets(cxlr);
2169 		if (rc)
2170 			return rc;
2171 		p->state = CXL_CONFIG_ACTIVE;
2172 		cxl_region_shared_upstream_bandwidth_update(cxlr);
2173 	}
2174 
2175 	cxled->cxld.interleave_ways = p->interleave_ways;
2176 	cxled->cxld.interleave_granularity = p->interleave_granularity;
2177 	cxled->cxld.hpa_range = (struct range) {
2178 		.start = p->res->start,
2179 		.end = p->res->end,
2180 	};
2181 
2182 	if (p->nr_targets != p->interleave_ways)
2183 		return 0;
2184 
2185 	/*
2186 	 * Test the auto-discovery position calculator function
2187 	 * against this successfully created user-defined region.
2188 	 * A fail message here means that this interleave config
2189 	 * will fail when presented as CXL_REGION_F_AUTO.
2190 	 */
2191 	for (int i = 0; i < p->nr_targets; i++) {
2192 		struct cxl_endpoint_decoder *cxled = p->targets[i];
2193 		int test_pos;
2194 
2195 		test_pos = cxl_calc_interleave_pos(cxled, &cxlr->hpa_range);
2196 		dev_dbg(&cxled->cxld.dev,
2197 			"Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n",
2198 			(test_pos == cxled->pos) ? "success" : "fail",
2199 			test_pos, cxled->pos);
2200 	}
2201 
2202 	return 0;
2203 }
2204 
cxl_region_by_target(struct device * dev,const void * data)2205 static int cxl_region_by_target(struct device *dev, const void *data)
2206 {
2207 	const struct cxl_endpoint_decoder *cxled = data;
2208 	struct cxl_region_params *p;
2209 	struct cxl_region *cxlr;
2210 
2211 	if (!is_cxl_region(dev))
2212 		return 0;
2213 
2214 	cxlr = to_cxl_region(dev);
2215 	p = &cxlr->params;
2216 	return p->targets[cxled->pos] == cxled;
2217 }
2218 
2219 /*
2220  * When an auto-region fails to assemble the decoder may be listed as a target,
2221  * but not fully attached.
2222  */
cxl_cancel_auto_attach(struct cxl_endpoint_decoder * cxled)2223 static void cxl_cancel_auto_attach(struct cxl_endpoint_decoder *cxled)
2224 {
2225 	struct cxl_region_params *p;
2226 	struct cxl_region *cxlr;
2227 	int pos = cxled->pos;
2228 
2229 	if (cxled->state != CXL_DECODER_STATE_AUTO_STAGED)
2230 		return;
2231 
2232 	struct device *dev __free(put_device) =
2233 		bus_find_device(&cxl_bus_type, NULL, cxled, cxl_region_by_target);
2234 	if (!dev)
2235 		return;
2236 
2237 	cxlr = to_cxl_region(dev);
2238 	p = &cxlr->params;
2239 
2240 	p->nr_targets--;
2241 	cxled->state = CXL_DECODER_STATE_AUTO;
2242 	cxled->pos = -1;
2243 	p->targets[pos] = NULL;
2244 }
2245 
2246 static struct cxl_region *
__cxl_decoder_detach(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos,enum cxl_detach_mode mode)2247 __cxl_decoder_detach(struct cxl_region *cxlr,
2248 		     struct cxl_endpoint_decoder *cxled, int pos,
2249 		     enum cxl_detach_mode mode)
2250 {
2251 	struct cxl_region_params *p;
2252 
2253 	lockdep_assert_held_write(&cxl_rwsem.region);
2254 
2255 	if (!cxled) {
2256 		p = &cxlr->params;
2257 
2258 		if (pos >= p->interleave_ways) {
2259 			dev_dbg(&cxlr->dev, "position %d out of range %d\n",
2260 				pos, p->interleave_ways);
2261 			return NULL;
2262 		}
2263 
2264 		if (!p->targets[pos])
2265 			return NULL;
2266 		cxled = p->targets[pos];
2267 	} else {
2268 		cxlr = cxled->cxld.region;
2269 		if (!cxlr) {
2270 			cxl_cancel_auto_attach(cxled);
2271 			return NULL;
2272 		}
2273 		p = &cxlr->params;
2274 	}
2275 
2276 	if (mode == DETACH_INVALIDATE)
2277 		cxled->part = -1;
2278 
2279 	if (p->state > CXL_CONFIG_ACTIVE) {
2280 		cxl_region_decode_reset(cxlr, p->interleave_ways);
2281 		p->state = CXL_CONFIG_ACTIVE;
2282 	}
2283 
2284 	for (struct cxl_port *iter = cxled_to_port(cxled); !is_cxl_root(iter);
2285 	     iter = to_cxl_port(iter->dev.parent))
2286 		cxl_port_detach_region(iter, cxlr, cxled);
2287 
2288 	if (cxled->pos < 0 || cxled->pos >= p->interleave_ways ||
2289 	    p->targets[cxled->pos] != cxled) {
2290 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2291 
2292 		dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
2293 			      dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2294 			      cxled->pos);
2295 		return NULL;
2296 	}
2297 
2298 	if (p->state == CXL_CONFIG_ACTIVE) {
2299 		p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
2300 		cxl_region_teardown_targets(cxlr);
2301 	}
2302 	p->targets[cxled->pos] = NULL;
2303 	p->nr_targets--;
2304 	cxled->cxld.hpa_range = (struct range) {
2305 		.start = 0,
2306 		.end = -1,
2307 	};
2308 
2309 	get_device(&cxlr->dev);
2310 	return cxlr;
2311 }
2312 
2313 /*
2314  * Cleanup a decoder's interest in a region. There are 2 cases to
2315  * handle, removing an unknown @cxled from a known position in a region
2316  * (detach_target()) or removing a known @cxled from an unknown @cxlr
2317  * (cxld_unregister())
2318  *
2319  * When the detachment finds a region release the region driver.
2320  */
cxl_decoder_detach(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos,enum cxl_detach_mode mode)2321 int cxl_decoder_detach(struct cxl_region *cxlr,
2322 		       struct cxl_endpoint_decoder *cxled, int pos,
2323 		       enum cxl_detach_mode mode)
2324 {
2325 	struct cxl_region *detach;
2326 
2327 	/* when the decoder is being destroyed lock unconditionally */
2328 	if (mode == DETACH_INVALIDATE) {
2329 		guard(rwsem_write)(&cxl_rwsem.region);
2330 		detach = __cxl_decoder_detach(cxlr, cxled, pos, mode);
2331 	} else {
2332 		int rc;
2333 
2334 		ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
2335 		if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
2336 			return rc;
2337 		detach = __cxl_decoder_detach(cxlr, cxled, pos, mode);
2338 	}
2339 
2340 	if (detach) {
2341 		device_release_driver(&detach->dev);
2342 		put_device(&detach->dev);
2343 	}
2344 	return 0;
2345 }
2346 
__attach_target(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos,unsigned int state)2347 static int __attach_target(struct cxl_region *cxlr,
2348 			   struct cxl_endpoint_decoder *cxled, int pos,
2349 			   unsigned int state)
2350 {
2351 	int rc;
2352 
2353 	if (state == TASK_INTERRUPTIBLE) {
2354 		ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
2355 		if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
2356 			return rc;
2357 		guard(rwsem_read)(&cxl_rwsem.dpa);
2358 		return cxl_region_attach(cxlr, cxled, pos);
2359 	}
2360 	guard(rwsem_write)(&cxl_rwsem.region);
2361 	guard(rwsem_read)(&cxl_rwsem.dpa);
2362 	return cxl_region_attach(cxlr, cxled, pos);
2363 }
2364 
attach_target(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos,unsigned int state)2365 static int attach_target(struct cxl_region *cxlr,
2366 			 struct cxl_endpoint_decoder *cxled, int pos,
2367 			 unsigned int state)
2368 {
2369 	int rc = __attach_target(cxlr, cxled, pos, state);
2370 
2371 	if (rc == 0)
2372 		return 0;
2373 
2374 	dev_warn(cxled->cxld.dev.parent, "failed to attach %s to %s: %d\n",
2375 		 dev_name(&cxled->cxld.dev), dev_name(&cxlr->dev), rc);
2376 	return rc;
2377 }
2378 
detach_target(struct cxl_region * cxlr,int pos)2379 static int detach_target(struct cxl_region *cxlr, int pos)
2380 {
2381 	return cxl_decoder_detach(cxlr, NULL, pos, DETACH_ONLY);
2382 }
2383 
store_targetN(struct cxl_region * cxlr,const char * buf,int pos,size_t len)2384 static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
2385 			    size_t len)
2386 {
2387 	int rc;
2388 
2389 	if (sysfs_streq(buf, "\n"))
2390 		rc = detach_target(cxlr, pos);
2391 	else {
2392 		struct device *dev;
2393 
2394 		dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
2395 		if (!dev)
2396 			return -ENODEV;
2397 
2398 		if (!is_endpoint_decoder(dev)) {
2399 			rc = -EINVAL;
2400 			goto out;
2401 		}
2402 
2403 		rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
2404 				   TASK_INTERRUPTIBLE);
2405 out:
2406 		put_device(dev);
2407 	}
2408 
2409 	if (rc < 0)
2410 		return rc;
2411 	return len;
2412 }
2413 
2414 #define TARGET_ATTR_RW(n)                                              \
2415 static ssize_t target##n##_show(                                       \
2416 	struct device *dev, struct device_attribute *attr, char *buf)  \
2417 {                                                                      \
2418 	return show_targetN(to_cxl_region(dev), buf, (n));             \
2419 }                                                                      \
2420 static ssize_t target##n##_store(struct device *dev,                   \
2421 				 struct device_attribute *attr,        \
2422 				 const char *buf, size_t len)          \
2423 {                                                                      \
2424 	return store_targetN(to_cxl_region(dev), buf, (n), len);       \
2425 }                                                                      \
2426 static DEVICE_ATTR_RW(target##n)
2427 
2428 TARGET_ATTR_RW(0);
2429 TARGET_ATTR_RW(1);
2430 TARGET_ATTR_RW(2);
2431 TARGET_ATTR_RW(3);
2432 TARGET_ATTR_RW(4);
2433 TARGET_ATTR_RW(5);
2434 TARGET_ATTR_RW(6);
2435 TARGET_ATTR_RW(7);
2436 TARGET_ATTR_RW(8);
2437 TARGET_ATTR_RW(9);
2438 TARGET_ATTR_RW(10);
2439 TARGET_ATTR_RW(11);
2440 TARGET_ATTR_RW(12);
2441 TARGET_ATTR_RW(13);
2442 TARGET_ATTR_RW(14);
2443 TARGET_ATTR_RW(15);
2444 
2445 static struct attribute *target_attrs[] = {
2446 	&dev_attr_target0.attr,
2447 	&dev_attr_target1.attr,
2448 	&dev_attr_target2.attr,
2449 	&dev_attr_target3.attr,
2450 	&dev_attr_target4.attr,
2451 	&dev_attr_target5.attr,
2452 	&dev_attr_target6.attr,
2453 	&dev_attr_target7.attr,
2454 	&dev_attr_target8.attr,
2455 	&dev_attr_target9.attr,
2456 	&dev_attr_target10.attr,
2457 	&dev_attr_target11.attr,
2458 	&dev_attr_target12.attr,
2459 	&dev_attr_target13.attr,
2460 	&dev_attr_target14.attr,
2461 	&dev_attr_target15.attr,
2462 	NULL,
2463 };
2464 
cxl_region_target_visible(struct kobject * kobj,struct attribute * a,int n)2465 static umode_t cxl_region_target_visible(struct kobject *kobj,
2466 					 struct attribute *a, int n)
2467 {
2468 	struct device *dev = kobj_to_dev(kobj);
2469 	struct cxl_region *cxlr = to_cxl_region(dev);
2470 	struct cxl_region_params *p = &cxlr->params;
2471 
2472 	if (n < p->interleave_ways)
2473 		return a->mode;
2474 	return 0;
2475 }
2476 
2477 static const struct attribute_group cxl_region_target_group = {
2478 	.attrs = target_attrs,
2479 	.is_visible = cxl_region_target_visible,
2480 };
2481 
get_cxl_region_target_group(void)2482 static const struct attribute_group *get_cxl_region_target_group(void)
2483 {
2484 	return &cxl_region_target_group;
2485 }
2486 
2487 static const struct attribute_group *region_groups[] = {
2488 	&cxl_base_attribute_group,
2489 	&cxl_region_group,
2490 	&cxl_region_target_group,
2491 	&cxl_region_access0_coordinate_group,
2492 	&cxl_region_access1_coordinate_group,
2493 	NULL,
2494 };
2495 
cxl_region_release(struct device * dev)2496 static void cxl_region_release(struct device *dev)
2497 {
2498 	struct cxl_region *cxlr = to_cxl_region(dev);
2499 	struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
2500 	int id = atomic_read(&cxlrd->region_id);
2501 
2502 	/*
2503 	 * Try to reuse the recently idled id rather than the cached
2504 	 * next id to prevent the region id space from increasing
2505 	 * unnecessarily.
2506 	 */
2507 	if (cxlr->id < id)
2508 		if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
2509 			memregion_free(id);
2510 			goto out;
2511 		}
2512 
2513 	memregion_free(cxlr->id);
2514 out:
2515 	put_device(dev->parent);
2516 	kfree(cxlr);
2517 }
2518 
2519 const struct device_type cxl_region_type = {
2520 	.name = "cxl_region",
2521 	.release = cxl_region_release,
2522 	.groups = region_groups
2523 };
2524 
is_cxl_region(struct device * dev)2525 bool is_cxl_region(struct device *dev)
2526 {
2527 	return dev->type == &cxl_region_type;
2528 }
2529 EXPORT_SYMBOL_NS_GPL(is_cxl_region, "CXL");
2530 
to_cxl_region(struct device * dev)2531 static struct cxl_region *to_cxl_region(struct device *dev)
2532 {
2533 	if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
2534 			  "not a cxl_region device\n"))
2535 		return NULL;
2536 
2537 	return container_of(dev, struct cxl_region, dev);
2538 }
2539 
unregister_region(void * _cxlr)2540 static void unregister_region(void *_cxlr)
2541 {
2542 	struct cxl_region *cxlr = _cxlr;
2543 	struct cxl_region_params *p = &cxlr->params;
2544 	int i;
2545 
2546 	device_del(&cxlr->dev);
2547 
2548 	/*
2549 	 * Now that region sysfs is shutdown, the parameter block is now
2550 	 * read-only, so no need to hold the region rwsem to access the
2551 	 * region parameters.
2552 	 */
2553 	for (i = 0; i < p->interleave_ways; i++)
2554 		detach_target(cxlr, i);
2555 
2556 	cxlr->hpa_range = DEFINE_RANGE(0, -1);
2557 
2558 	cxl_region_iomem_release(cxlr);
2559 	put_device(&cxlr->dev);
2560 }
2561 
2562 static struct lock_class_key cxl_region_key;
2563 
cxl_region_alloc(struct cxl_root_decoder * cxlrd,int id)2564 static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
2565 {
2566 	struct cxl_region *cxlr;
2567 	struct device *dev;
2568 
2569 	cxlr = kzalloc_obj(*cxlr);
2570 	if (!cxlr) {
2571 		memregion_free(id);
2572 		return ERR_PTR(-ENOMEM);
2573 	}
2574 
2575 	dev = &cxlr->dev;
2576 	device_initialize(dev);
2577 	lockdep_set_class(&dev->mutex, &cxl_region_key);
2578 	dev->parent = &cxlrd->cxlsd.cxld.dev;
2579 	/*
2580 	 * Keep root decoder pinned through cxl_region_release to fixup
2581 	 * region id allocations
2582 	 */
2583 	get_device(dev->parent);
2584 	cxlr->cxlrd = cxlrd;
2585 	cxlr->id = id;
2586 
2587 	device_set_pm_not_required(dev);
2588 	dev->bus = &cxl_bus_type;
2589 	dev->type = &cxl_region_type;
2590 	cxl_region_setup_flags(cxlr, &cxlrd->cxlsd.cxld);
2591 
2592 	return cxlr;
2593 }
2594 
cxl_region_update_coordinates(struct cxl_region * cxlr,int nid)2595 static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
2596 {
2597 	int cset = 0;
2598 	int rc;
2599 
2600 	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
2601 		if (cxlr->coord[i].read_bandwidth) {
2602 			node_update_perf_attrs(nid, &cxlr->coord[i], i);
2603 			cset++;
2604 		}
2605 	}
2606 
2607 	if (!cset)
2608 		return false;
2609 
2610 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access0_group());
2611 	if (rc)
2612 		dev_dbg(&cxlr->dev, "Failed to update access0 group\n");
2613 
2614 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access1_group());
2615 	if (rc)
2616 		dev_dbg(&cxlr->dev, "Failed to update access1 group\n");
2617 
2618 	return true;
2619 }
2620 
cxl_region_perf_attrs_callback(struct notifier_block * nb,unsigned long action,void * arg)2621 static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
2622 					  unsigned long action, void *arg)
2623 {
2624 	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
2625 					       node_notifier);
2626 	struct node_notify *nn = arg;
2627 	int nid = nn->nid;
2628 	int region_nid;
2629 
2630 	if (action != NODE_ADDED_FIRST_MEMORY)
2631 		return NOTIFY_DONE;
2632 
2633 	/*
2634 	 * No need to hold cxl_rwsem.region; region parameters are stable
2635 	 * within the cxl_region driver.
2636 	 */
2637 	region_nid = phys_to_target_node(cxlr->params.res->start);
2638 	if (nid != region_nid)
2639 		return NOTIFY_DONE;
2640 
2641 	/* No action needed if node bit already set */
2642 	if (node_test_and_set(nid, nodemask_region_seen))
2643 		return NOTIFY_DONE;
2644 
2645 	if (!cxl_region_update_coordinates(cxlr, nid))
2646 		return NOTIFY_DONE;
2647 
2648 	return NOTIFY_OK;
2649 }
2650 
cxl_region_calculate_adistance(struct notifier_block * nb,unsigned long nid,void * data)2651 static int cxl_region_calculate_adistance(struct notifier_block *nb,
2652 					  unsigned long nid, void *data)
2653 {
2654 	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
2655 					       adist_notifier);
2656 	struct access_coordinate *perf;
2657 	int *adist = data;
2658 	int region_nid;
2659 
2660 	/*
2661 	 * No need to hold cxl_rwsem.region; region parameters are stable
2662 	 * within the cxl_region driver.
2663 	 */
2664 	region_nid = phys_to_target_node(cxlr->params.res->start);
2665 	if (nid != region_nid)
2666 		return NOTIFY_OK;
2667 
2668 	perf = &cxlr->coord[ACCESS_COORDINATE_CPU];
2669 
2670 	if (mt_perf_to_adistance(perf, adist))
2671 		return NOTIFY_OK;
2672 
2673 	return NOTIFY_STOP;
2674 }
2675 
2676 /**
2677  * devm_cxl_add_region - Adds a region to a decoder
2678  * @cxlrd: root decoder
2679  * @id: memregion id to create, or memregion_free() on failure
2680  * @mode: mode for the endpoint decoders of this region
2681  * @type: select whether this is an expander or accelerator (type-2 or type-3)
2682  *
2683  * This is the second step of region initialization. Regions exist within an
2684  * address space which is mapped by a @cxlrd.
2685  *
2686  * Return: 0 if the region was added to the @cxlrd, else returns negative error
2687  * code. The region will be named "regionZ" where Z is the unique region number.
2688  */
devm_cxl_add_region(struct cxl_root_decoder * cxlrd,int id,enum cxl_partition_mode mode,enum cxl_decoder_type type)2689 static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
2690 					      int id,
2691 					      enum cxl_partition_mode mode,
2692 					      enum cxl_decoder_type type)
2693 {
2694 	struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
2695 	struct cxl_region *cxlr;
2696 	struct device *dev;
2697 	int rc;
2698 
2699 	cxlr = cxl_region_alloc(cxlrd, id);
2700 	if (IS_ERR(cxlr))
2701 		return cxlr;
2702 	cxlr->mode = mode;
2703 	cxlr->type = type;
2704 
2705 	dev = &cxlr->dev;
2706 	rc = dev_set_name(dev, "region%d", id);
2707 	if (rc)
2708 		goto err;
2709 
2710 	rc = device_add(dev);
2711 	if (rc)
2712 		goto err;
2713 
2714 	rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
2715 	if (rc)
2716 		return ERR_PTR(rc);
2717 
2718 	dev_dbg(port->uport_dev, "%s: created %s\n",
2719 		dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
2720 	return cxlr;
2721 
2722 err:
2723 	put_device(dev);
2724 	return ERR_PTR(rc);
2725 }
2726 
__create_region_show(struct cxl_root_decoder * cxlrd,char * buf)2727 static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
2728 {
2729 	return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
2730 }
2731 
create_pmem_region_show(struct device * dev,struct device_attribute * attr,char * buf)2732 static ssize_t create_pmem_region_show(struct device *dev,
2733 				       struct device_attribute *attr, char *buf)
2734 {
2735 	return __create_region_show(to_cxl_root_decoder(dev), buf);
2736 }
2737 
create_ram_region_show(struct device * dev,struct device_attribute * attr,char * buf)2738 static ssize_t create_ram_region_show(struct device *dev,
2739 				      struct device_attribute *attr, char *buf)
2740 {
2741 	return __create_region_show(to_cxl_root_decoder(dev), buf);
2742 }
2743 
__create_region(struct cxl_root_decoder * cxlrd,enum cxl_partition_mode mode,int id,enum cxl_decoder_type target_type)2744 static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
2745 					  enum cxl_partition_mode mode, int id,
2746 					  enum cxl_decoder_type target_type)
2747 {
2748 	int rc;
2749 
2750 	switch (mode) {
2751 	case CXL_PARTMODE_RAM:
2752 	case CXL_PARTMODE_PMEM:
2753 		break;
2754 	default:
2755 		dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
2756 		return ERR_PTR(-EINVAL);
2757 	}
2758 
2759 	rc = memregion_alloc(GFP_KERNEL);
2760 	if (rc < 0)
2761 		return ERR_PTR(rc);
2762 
2763 	if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
2764 		memregion_free(rc);
2765 		return ERR_PTR(-EBUSY);
2766 	}
2767 
2768 	return devm_cxl_add_region(cxlrd, id, mode, target_type);
2769 }
2770 
create_region_store(struct device * dev,const char * buf,size_t len,enum cxl_partition_mode mode)2771 static ssize_t create_region_store(struct device *dev, const char *buf,
2772 				   size_t len, enum cxl_partition_mode mode)
2773 {
2774 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2775 	struct cxl_region *cxlr;
2776 	int rc, id;
2777 
2778 	rc = sscanf(buf, "region%d\n", &id);
2779 	if (rc != 1)
2780 		return -EINVAL;
2781 
2782 	cxlr = __create_region(cxlrd, mode, id, CXL_DECODER_HOSTONLYMEM);
2783 	if (IS_ERR(cxlr))
2784 		return PTR_ERR(cxlr);
2785 
2786 	return len;
2787 }
2788 
create_pmem_region_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2789 static ssize_t create_pmem_region_store(struct device *dev,
2790 					struct device_attribute *attr,
2791 					const char *buf, size_t len)
2792 {
2793 	return create_region_store(dev, buf, len, CXL_PARTMODE_PMEM);
2794 }
2795 DEVICE_ATTR_RW(create_pmem_region);
2796 
create_ram_region_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2797 static ssize_t create_ram_region_store(struct device *dev,
2798 				       struct device_attribute *attr,
2799 				       const char *buf, size_t len)
2800 {
2801 	return create_region_store(dev, buf, len, CXL_PARTMODE_RAM);
2802 }
2803 DEVICE_ATTR_RW(create_ram_region);
2804 
region_show(struct device * dev,struct device_attribute * attr,char * buf)2805 static ssize_t region_show(struct device *dev, struct device_attribute *attr,
2806 			   char *buf)
2807 {
2808 	struct cxl_decoder *cxld = to_cxl_decoder(dev);
2809 	ssize_t rc;
2810 
2811 	ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
2812 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
2813 		return rc;
2814 
2815 	if (cxld->region)
2816 		return sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
2817 	return sysfs_emit(buf, "\n");
2818 }
2819 DEVICE_ATTR_RO(region);
2820 
2821 static struct cxl_region *
cxl_find_region_by_name(struct cxl_root_decoder * cxlrd,const char * name)2822 cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
2823 {
2824 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
2825 	struct device *region_dev;
2826 
2827 	region_dev = device_find_child_by_name(&cxld->dev, name);
2828 	if (!region_dev)
2829 		return ERR_PTR(-ENODEV);
2830 
2831 	return to_cxl_region(region_dev);
2832 }
2833 
delete_region_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2834 static ssize_t delete_region_store(struct device *dev,
2835 				   struct device_attribute *attr,
2836 				   const char *buf, size_t len)
2837 {
2838 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2839 	struct cxl_port *port = to_cxl_port(dev->parent);
2840 	struct cxl_region *cxlr;
2841 
2842 	cxlr = cxl_find_region_by_name(cxlrd, buf);
2843 	if (IS_ERR(cxlr))
2844 		return PTR_ERR(cxlr);
2845 
2846 	devm_release_action(port->uport_dev, unregister_region, cxlr);
2847 	put_device(&cxlr->dev);
2848 
2849 	return len;
2850 }
2851 DEVICE_ATTR_WO(delete_region);
2852 
2853 struct cxl_poison_context {
2854 	struct cxl_port *port;
2855 	int part;
2856 	u64 offset;
2857 };
2858 
cxl_get_poison_unmapped(struct cxl_memdev * cxlmd,struct cxl_poison_context * ctx)2859 static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
2860 				   struct cxl_poison_context *ctx)
2861 {
2862 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
2863 	const struct resource *res;
2864 	struct resource *p, *last;
2865 	u64 offset, length;
2866 	int rc = 0;
2867 
2868 	if (ctx->part < 0)
2869 		return 0;
2870 
2871 	/*
2872 	 * Collect poison for the remaining unmapped resources after
2873 	 * poison is collected by committed endpoints decoders.
2874 	 */
2875 	for (int i = ctx->part; i < cxlds->nr_partitions; i++) {
2876 		res = &cxlds->part[i].res;
2877 		for (p = res->child, last = NULL; p; p = p->sibling)
2878 			last = p;
2879 		if (last)
2880 			offset = last->end + 1;
2881 		else
2882 			offset = res->start;
2883 		length = res->end - offset + 1;
2884 		if (!length)
2885 			break;
2886 		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2887 		if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM)
2888 			continue;
2889 		if (rc)
2890 			break;
2891 	}
2892 
2893 	return rc;
2894 }
2895 
poison_by_decoder(struct device * dev,void * arg)2896 static int poison_by_decoder(struct device *dev, void *arg)
2897 {
2898 	struct cxl_poison_context *ctx = arg;
2899 	struct cxl_endpoint_decoder *cxled;
2900 	enum cxl_partition_mode mode;
2901 	struct cxl_dev_state *cxlds;
2902 	struct cxl_memdev *cxlmd;
2903 	u64 offset, length;
2904 	int rc = 0;
2905 
2906 	if (!is_endpoint_decoder(dev))
2907 		return rc;
2908 
2909 	cxled = to_cxl_endpoint_decoder(dev);
2910 	if (!cxled->dpa_res)
2911 		return rc;
2912 
2913 	cxlmd = cxled_to_memdev(cxled);
2914 	cxlds = cxlmd->cxlds;
2915 	mode = cxlds->part[cxled->part].mode;
2916 
2917 	if (cxled->skip) {
2918 		offset = cxled->dpa_res->start - cxled->skip;
2919 		length = cxled->skip;
2920 		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2921 		if (rc == -EFAULT && mode == CXL_PARTMODE_RAM)
2922 			rc = 0;
2923 		if (rc)
2924 			return rc;
2925 	}
2926 
2927 	offset = cxled->dpa_res->start;
2928 	length = cxled->dpa_res->end - offset + 1;
2929 	rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
2930 	if (rc == -EFAULT && mode == CXL_PARTMODE_RAM)
2931 		rc = 0;
2932 	if (rc)
2933 		return rc;
2934 
2935 	/* Iterate until commit_end is reached */
2936 	if (cxled->cxld.id == ctx->port->commit_end) {
2937 		ctx->offset = cxled->dpa_res->end + 1;
2938 		ctx->part = cxled->part;
2939 		return 1;
2940 	}
2941 
2942 	return 0;
2943 }
2944 
cxl_get_poison_by_endpoint(struct cxl_port * port)2945 int cxl_get_poison_by_endpoint(struct cxl_port *port)
2946 {
2947 	struct cxl_poison_context ctx;
2948 	int rc = 0;
2949 
2950 	ctx = (struct cxl_poison_context) {
2951 		.port = port,
2952 		.part = -1,
2953 	};
2954 
2955 	rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
2956 	if (rc == 1)
2957 		rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
2958 					     &ctx);
2959 
2960 	return rc;
2961 }
2962 
2963 struct cxl_dpa_to_region_context {
2964 	struct cxl_region *cxlr;
2965 	u64 dpa;
2966 };
2967 
__cxl_dpa_to_region(struct device * dev,void * arg)2968 static int __cxl_dpa_to_region(struct device *dev, void *arg)
2969 {
2970 	struct cxl_dpa_to_region_context *ctx = arg;
2971 	struct cxl_endpoint_decoder *cxled;
2972 	struct cxl_region *cxlr;
2973 	u64 dpa = ctx->dpa;
2974 
2975 	if (!is_endpoint_decoder(dev))
2976 		return 0;
2977 
2978 	cxled = to_cxl_endpoint_decoder(dev);
2979 	if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res))
2980 		return 0;
2981 
2982 	if (!cxl_resource_contains_addr(cxled->dpa_res, dpa))
2983 		return 0;
2984 
2985 	/*
2986 	 * Stop the region search (return 1) when an endpoint mapping is
2987 	 * found. The region may not be fully constructed so offering
2988 	 * the cxlr in the context structure is not guaranteed.
2989 	 */
2990 	cxlr = cxled->cxld.region;
2991 	if (cxlr)
2992 		dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
2993 			dev_name(&cxlr->dev));
2994 	else
2995 		dev_dbg(dev, "dpa:0x%llx mapped in endpoint:%s\n", dpa,
2996 			dev_name(dev));
2997 
2998 	ctx->cxlr = cxlr;
2999 
3000 	return 1;
3001 }
3002 
cxl_dpa_to_region(const struct cxl_memdev * cxlmd,u64 dpa)3003 struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa)
3004 {
3005 	struct cxl_dpa_to_region_context ctx;
3006 	struct cxl_port *port = cxlmd->endpoint;
3007 
3008 	if (!cxlmd->dev.driver)
3009 		return NULL;
3010 
3011 	ctx = (struct cxl_dpa_to_region_context) {
3012 		.dpa = dpa,
3013 	};
3014 	if (cxl_num_decoders_committed(port))
3015 		device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
3016 
3017 	return ctx.cxlr;
3018 }
3019 
cxl_is_hpa_in_chunk(u64 hpa,struct cxl_region * cxlr,int pos)3020 static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos)
3021 {
3022 	struct cxl_region_params *p = &cxlr->params;
3023 	int gran = p->interleave_granularity;
3024 	int ways = p->interleave_ways;
3025 	u64 offset;
3026 
3027 	/* Is the hpa in an expected chunk for its pos(-ition) */
3028 	offset = hpa - p->res->start;
3029 	offset = do_div(offset, gran * ways);
3030 	if ((offset >= pos * gran) && (offset < (pos + 1) * gran))
3031 		return true;
3032 
3033 	dev_dbg(&cxlr->dev,
3034 		"Addr trans fail: hpa 0x%llx not in expected chunk\n", hpa);
3035 
3036 	return false;
3037 }
3038 
3039 #define CXL_POS_ZERO 0
3040 /**
3041  * cxl_validate_translation_params
3042  * @eiw: encoded interleave ways
3043  * @eig: encoded interleave granularity
3044  * @pos: position in interleave
3045  *
3046  * Callers pass CXL_POS_ZERO when no position parameter needs validating.
3047  *
3048  * Returns: 0 on success, -EINVAL on first invalid parameter
3049  */
cxl_validate_translation_params(u8 eiw,u16 eig,int pos)3050 int cxl_validate_translation_params(u8 eiw, u16 eig, int pos)
3051 {
3052 	int ways, gran;
3053 
3054 	if (eiw_to_ways(eiw, &ways)) {
3055 		pr_debug("%s: invalid eiw=%u\n", __func__, eiw);
3056 		return -EINVAL;
3057 	}
3058 	if (eig_to_granularity(eig, &gran)) {
3059 		pr_debug("%s: invalid eig=%u\n", __func__, eig);
3060 		return -EINVAL;
3061 	}
3062 	if (pos < 0 || pos >= ways) {
3063 		pr_debug("%s: invalid pos=%d for ways=%u\n", __func__, pos,
3064 			 ways);
3065 		return -EINVAL;
3066 	}
3067 
3068 	return 0;
3069 }
3070 EXPORT_SYMBOL_FOR_MODULES(cxl_validate_translation_params, "cxl_translate");
3071 
cxl_calculate_dpa_offset(u64 hpa_offset,u8 eiw,u16 eig)3072 u64 cxl_calculate_dpa_offset(u64 hpa_offset, u8 eiw, u16 eig)
3073 {
3074 	u64 dpa_offset, bits_lower, bits_upper, temp;
3075 	int ret;
3076 
3077 	ret = cxl_validate_translation_params(eiw, eig, CXL_POS_ZERO);
3078 	if (ret)
3079 		return ULLONG_MAX;
3080 
3081 	/*
3082 	 * DPA offset: CXL Spec 3.2 Section 8.2.4.20.13
3083 	 * Lower bits [IG+7:0] pass through unchanged
3084 	 * (eiw < 8)
3085 	 *	Per spec: DPAOffset[51:IG+8] = (HPAOffset[51:IG+IW+8] >> IW)
3086 	 *	Clear the position bits to isolate upper section, then
3087 	 *	reverse the left shift by eiw that occurred during DPA->HPA
3088 	 * (eiw >= 8)
3089 	 *	Per spec: DPAOffset[51:IG+8] = HPAOffset[51:IG+IW] / 3
3090 	 *	Extract upper bits from the correct bit range and divide by 3
3091 	 *	to recover the original DPA upper bits
3092 	 */
3093 	bits_lower = hpa_offset & GENMASK_ULL(eig + 7, 0);
3094 	if (eiw < 8) {
3095 		temp = hpa_offset &= ~GENMASK_ULL(eig + eiw + 8 - 1, 0);
3096 		dpa_offset = temp >> eiw;
3097 	} else {
3098 		bits_upper = div64_u64(hpa_offset >> (eig + eiw), 3);
3099 		dpa_offset = bits_upper << (eig + 8);
3100 	}
3101 	dpa_offset |= bits_lower;
3102 
3103 	return dpa_offset;
3104 }
3105 EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_dpa_offset, "cxl_translate");
3106 
cxl_calculate_position(u64 hpa_offset,u8 eiw,u16 eig)3107 int cxl_calculate_position(u64 hpa_offset, u8 eiw, u16 eig)
3108 {
3109 	unsigned int ways = 0;
3110 	u64 shifted, rem;
3111 	int pos, ret;
3112 
3113 	ret = cxl_validate_translation_params(eiw, eig, CXL_POS_ZERO);
3114 	if (ret)
3115 		return ret;
3116 
3117 	if (!eiw)
3118 		/* position is 0 if no interleaving */
3119 		return 0;
3120 
3121 	/*
3122 	 * Interleave position: CXL Spec 3.2 Section 8.2.4.20.13
3123 	 * eiw < 8
3124 	 *	Position is in the IW bits at HPA_OFFSET[IG+8+IW-1:IG+8].
3125 	 *	Per spec "remove IW bits starting with bit position IG+8"
3126 	 * eiw >= 8
3127 	 *	Position is not explicitly stored in HPA_OFFSET bits. It is
3128 	 *	derived from the modulo operation of the upper bits using
3129 	 *	the total number of interleave ways.
3130 	 */
3131 	if (eiw < 8) {
3132 		pos = (hpa_offset >> (eig + 8)) & GENMASK(eiw - 1, 0);
3133 	} else {
3134 		shifted = hpa_offset >> (eig + 8);
3135 		eiw_to_ways(eiw, &ways);
3136 		div64_u64_rem(shifted, ways, &rem);
3137 		pos = rem;
3138 	}
3139 
3140 	return pos;
3141 }
3142 EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_position, "cxl_translate");
3143 
cxl_calculate_hpa_offset(u64 dpa_offset,int pos,u8 eiw,u16 eig)3144 u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig)
3145 {
3146 	u64 mask_upper, hpa_offset, bits_upper;
3147 	int ret;
3148 
3149 	ret = cxl_validate_translation_params(eiw, eig, pos);
3150 	if (ret)
3151 		return ULLONG_MAX;
3152 
3153 	/*
3154 	 * The device position in the region interleave set was removed
3155 	 * from the offset at HPA->DPA translation. To reconstruct the
3156 	 * HPA, place the 'pos' in the offset.
3157 	 *
3158 	 * The placement of 'pos' in the HPA is determined by interleave
3159 	 * ways and granularity and is defined in the CXL Spec 3.0 Section
3160 	 * 8.2.4.19.13 Implementation Note: Device Decode Logic
3161 	 */
3162 
3163 	mask_upper = GENMASK_ULL(51, eig + 8);
3164 
3165 	if (eiw < 8) {
3166 		hpa_offset = (dpa_offset & mask_upper) << eiw;
3167 		hpa_offset |= pos << (eig + 8);
3168 	} else {
3169 		bits_upper = (dpa_offset & mask_upper) >> (eig + 8);
3170 		bits_upper = bits_upper * 3;
3171 		hpa_offset = ((bits_upper << (eiw - 8)) + pos) << (eig + 8);
3172 	}
3173 
3174 	/* The lower bits remain unchanged */
3175 	hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
3176 
3177 	return hpa_offset;
3178 }
3179 EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_hpa_offset, "cxl_translate");
3180 
decode_pos(int region_ways,int hb_ways,int pos,int * pos_port,int * pos_hb)3181 static int decode_pos(int region_ways, int hb_ways, int pos, int *pos_port,
3182 		      int *pos_hb)
3183 {
3184 	int devices_per_hb;
3185 
3186 	/*
3187 	 * Decode for 3-6-12 way interleaves as defined in the CXL
3188 	 * Spec 4.0 9.13.1.1 Legal Interleaving Configurations.
3189 	 * Region creation should prevent invalid combinations but
3190 	 * sanity check here to avoid a silent bad decode.
3191 	 */
3192 	switch (hb_ways) {
3193 	case 3:
3194 		if (region_ways != 3 && region_ways != 6 && region_ways != 12)
3195 			return -EINVAL;
3196 		break;
3197 	case 6:
3198 		if (region_ways != 6 && region_ways != 12)
3199 			return -EINVAL;
3200 		break;
3201 	case 12:
3202 		if (region_ways != 12)
3203 			return -EINVAL;
3204 		break;
3205 	default:
3206 		return -EINVAL;
3207 	}
3208 	/*
3209 	 * Each host bridge contributes an equal number of endpoints
3210 	 * that are laid out contiguously per host bridge. Modulo
3211 	 * selects the port within a host bridge and division selects
3212 	 * the host bridge position.
3213 	 */
3214 	devices_per_hb = region_ways / hb_ways;
3215 	*pos_port = pos % devices_per_hb;
3216 	*pos_hb = pos / devices_per_hb;
3217 
3218 	return 0;
3219 }
3220 
3221 /*
3222  * restore_parent() reconstruct the address in parent
3223  *
3224  * This math, specifically the bitmask creation 'mask = gran - 1' relies
3225  * on the CXL Spec requirement that interleave granularity is always a
3226  * power of two.
3227  *
3228  * [mask]		isolate the offset with the granularity
3229  * [addr & ~mask]	remove the offset leaving the aligned portion
3230  * [* ways]		distribute across all interleave ways
3231  * [+ (pos * gran)]	add the positional offset
3232  * [+ (addr & mask)]	restore the masked offset
3233  */
restore_parent(u64 addr,u64 pos,u64 gran,u64 ways)3234 static u64 restore_parent(u64 addr, u64 pos, u64 gran, u64 ways)
3235 {
3236 	u64 mask = gran - 1;
3237 
3238 	return ((addr & ~mask) * ways) + (pos * gran) + (addr & mask);
3239 }
3240 
3241 /*
3242  * unaligned_dpa_to_hpa() translates a DPA to HPA when the region resource
3243  * start address is not aligned at Host Bridge Interleave Ways * 256MB.
3244  *
3245  * Unaligned start addresses only occur with MOD3 interleaves. All power-
3246  * of-two interleaves are guaranteed aligned.
3247  */
unaligned_dpa_to_hpa(struct cxl_decoder * cxld,struct cxl_region_params * p,int pos,u64 dpa)3248 static u64 unaligned_dpa_to_hpa(struct cxl_decoder *cxld,
3249 				struct cxl_region_params *p, int pos, u64 dpa)
3250 {
3251 	int ways_port = p->interleave_ways / cxld->interleave_ways;
3252 	int gran_port = p->interleave_granularity;
3253 	int gran_hb = cxld->interleave_granularity;
3254 	int ways_hb = cxld->interleave_ways;
3255 	int pos_port, pos_hb, gran_shift;
3256 	u64 hpa_port = 0;
3257 
3258 	/* Decode an endpoint 'pos' into port and host-bridge components */
3259 	if (decode_pos(p->interleave_ways, ways_hb, pos, &pos_port, &pos_hb)) {
3260 		dev_dbg(&cxld->dev, "not supported for region ways:%d\n",
3261 			p->interleave_ways);
3262 		return ULLONG_MAX;
3263 	}
3264 
3265 	/* Restore the port parent address if needed */
3266 	if (gran_hb != gran_port)
3267 		hpa_port = restore_parent(dpa, pos_port, gran_port, ways_port);
3268 	else
3269 		hpa_port = dpa;
3270 
3271 	/*
3272 	 * Complete the HPA reconstruction by restoring the address as if
3273 	 * each HB position is a candidate. Test against expected pos_hb
3274 	 * to confirm match.
3275 	 */
3276 	gran_shift = ilog2(gran_hb);
3277 	for (int position = 0; position < ways_hb; position++) {
3278 		u64 shifted, hpa;
3279 
3280 		hpa = restore_parent(hpa_port, position, gran_hb, ways_hb);
3281 		hpa += p->res->start;
3282 
3283 		shifted = hpa >> gran_shift;
3284 		if (do_div(shifted, ways_hb) == pos_hb)
3285 			return hpa;
3286 	}
3287 
3288 	dev_dbg(&cxld->dev, "fail dpa:%#llx region:%pr pos:%d\n", dpa, p->res,
3289 		pos);
3290 	dev_dbg(&cxld->dev, "     port-w/g/p:%d/%d/%d hb-w/g/p:%d/%d/%d\n",
3291 		ways_port, gran_port, pos_port, ways_hb, gran_hb, pos_hb);
3292 
3293 	return ULLONG_MAX;
3294 }
3295 
region_is_unaligned_mod3(struct cxl_region * cxlr)3296 static bool region_is_unaligned_mod3(struct cxl_region *cxlr)
3297 {
3298 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
3299 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
3300 	struct cxl_region_params *p = &cxlr->params;
3301 	int hbiw = cxld->interleave_ways;
3302 	u64 rem;
3303 
3304 	if (is_power_of_2(hbiw))
3305 		return false;
3306 
3307 	div64_u64_rem(p->res->start, (u64)hbiw * SZ_256M, &rem);
3308 
3309 	return (rem != 0);
3310 }
3311 
cxl_dpa_to_hpa(struct cxl_region * cxlr,const struct cxl_memdev * cxlmd,u64 dpa)3312 u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
3313 		   u64 dpa)
3314 {
3315 	struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
3316 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
3317 	struct cxl_region_params *p = &cxlr->params;
3318 	struct cxl_endpoint_decoder *cxled = NULL;
3319 	u64 base, dpa_offset, hpa_offset, hpa;
3320 	bool unaligned = false;
3321 	u16 eig = 0;
3322 	u8 eiw = 0;
3323 	int pos;
3324 
3325 	/*
3326 	 * Conversion between SPA and DPA is not supported in
3327 	 * Normalized Address mode.
3328 	 */
3329 	if (test_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags))
3330 		return ULLONG_MAX;
3331 
3332 	for (int i = 0; i < p->nr_targets; i++) {
3333 		if (cxlmd == cxled_to_memdev(p->targets[i])) {
3334 			cxled = p->targets[i];
3335 			break;
3336 		}
3337 	}
3338 	if (!cxled)
3339 		return ULLONG_MAX;
3340 
3341 	base = cxl_dpa_resource_start(cxled);
3342 	if (base == RESOURCE_SIZE_MAX)
3343 		return ULLONG_MAX;
3344 
3345 	dpa_offset = dpa - base;
3346 
3347 	/* Unaligned calc for MOD3 interleaves not hbiw * 256MB aligned */
3348 	unaligned = region_is_unaligned_mod3(cxlr);
3349 	if (unaligned) {
3350 		hpa = unaligned_dpa_to_hpa(cxld, p, cxled->pos, dpa_offset);
3351 		if (hpa == ULLONG_MAX)
3352 			return ULLONG_MAX;
3353 
3354 		goto skip_aligned;
3355 	}
3356 	/*
3357 	 * Aligned calc for all power-of-2 interleaves and for MOD3
3358 	 * interleaves that are aligned at hbiw * 256MB
3359 	 */
3360 	pos = cxled->pos;
3361 	ways_to_eiw(p->interleave_ways, &eiw);
3362 	granularity_to_eig(p->interleave_granularity, &eig);
3363 
3364 	hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, eiw, eig);
3365 	if (hpa_offset == ULLONG_MAX)
3366 		return ULLONG_MAX;
3367 
3368 	/* Apply the hpa_offset to the region base address */
3369 	hpa = hpa_offset + p->res->start;
3370 
3371 skip_aligned:
3372 	hpa += p->cache_size;
3373 
3374 	/* Root decoder translation overrides typical modulo decode */
3375 	if (cxlrd->ops.hpa_to_spa)
3376 		hpa = cxlrd->ops.hpa_to_spa(cxlrd, hpa);
3377 
3378 	if (hpa == ULLONG_MAX)
3379 		return ULLONG_MAX;
3380 
3381 	if (!cxl_resource_contains_addr(p->res, hpa)) {
3382 		dev_dbg(&cxlr->dev,
3383 			"Addr trans fail: hpa 0x%llx not in region\n", hpa);
3384 		return ULLONG_MAX;
3385 	}
3386 	/* Chunk check applies to aligned modulo decodes only */
3387 	if (!unaligned && !cxlrd->ops.hpa_to_spa &&
3388 	    !cxl_is_hpa_in_chunk(hpa, cxlr, pos))
3389 		return ULLONG_MAX;
3390 
3391 	return hpa;
3392 }
3393 
3394 struct dpa_result {
3395 	struct cxl_memdev *cxlmd;
3396 	u64 dpa;
3397 };
3398 
unaligned_region_offset_to_dpa_result(struct cxl_region * cxlr,u64 offset,struct dpa_result * result)3399 static int unaligned_region_offset_to_dpa_result(struct cxl_region *cxlr,
3400 						 u64 offset,
3401 						 struct dpa_result *result)
3402 {
3403 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
3404 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
3405 	struct cxl_region_params *p = &cxlr->params;
3406 	u64 interleave_width, interleave_index;
3407 	u64 gran, gran_offset, dpa_offset;
3408 	u64 hpa = p->res->start + offset;
3409 	u64 tmp = offset;
3410 
3411 	/*
3412 	 * Unaligned addresses are not algebraically invertible. Calculate
3413 	 * a dpa_offset independent of the target device and then enumerate
3414 	 * and test that dpa_offset against each candidate endpoint decoder.
3415 	 */
3416 	gran = cxld->interleave_granularity;
3417 	interleave_width = gran * cxld->interleave_ways;
3418 	interleave_index = div64_u64(offset, interleave_width);
3419 	gran_offset = do_div(tmp, gran);
3420 
3421 	dpa_offset = interleave_index * gran + gran_offset;
3422 
3423 	for (int i = 0; i < p->nr_targets; i++) {
3424 		struct cxl_endpoint_decoder *cxled = p->targets[i];
3425 		int pos = cxled->pos;
3426 		u64 test_hpa;
3427 
3428 		test_hpa = unaligned_dpa_to_hpa(cxld, p, pos, dpa_offset);
3429 		if (test_hpa == hpa) {
3430 			result->cxlmd = cxled_to_memdev(cxled);
3431 			result->dpa =
3432 				cxl_dpa_resource_start(cxled) + dpa_offset;
3433 			return 0;
3434 		}
3435 	}
3436 	dev_err(&cxlr->dev,
3437 		"failed to resolve HPA %#llx in unaligned MOD3 region\n", hpa);
3438 
3439 	return -ENXIO;
3440 }
3441 
region_offset_to_dpa_result(struct cxl_region * cxlr,u64 offset,struct dpa_result * result)3442 static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
3443 				       struct dpa_result *result)
3444 {
3445 	struct cxl_region_params *p = &cxlr->params;
3446 	struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
3447 	struct cxl_endpoint_decoder *cxled;
3448 	u64 hpa_offset = offset;
3449 	u64 dpa, dpa_offset;
3450 	u16 eig = 0;
3451 	u8 eiw = 0;
3452 	int pos;
3453 
3454 	lockdep_assert_held(&cxl_rwsem.region);
3455 	lockdep_assert_held(&cxl_rwsem.dpa);
3456 
3457 	/* Input validation ensures valid ways and gran */
3458 	granularity_to_eig(p->interleave_granularity, &eig);
3459 	ways_to_eiw(p->interleave_ways, &eiw);
3460 
3461 	/*
3462 	 * If the root decoder has SPA to CXL HPA callback, use it. Otherwise
3463 	 * CXL HPA is assumed to equal SPA.
3464 	 */
3465 	if (cxlrd->ops.spa_to_hpa) {
3466 		hpa_offset = cxlrd->ops.spa_to_hpa(cxlrd, p->res->start + offset);
3467 		if (hpa_offset == ULLONG_MAX) {
3468 			dev_dbg(&cxlr->dev, "HPA not found for %pr offset %#llx\n",
3469 				p->res, offset);
3470 			return -ENXIO;
3471 		}
3472 		hpa_offset -= p->res->start;
3473 	}
3474 
3475 	if (region_is_unaligned_mod3(cxlr))
3476 		return unaligned_region_offset_to_dpa_result(cxlr, offset,
3477 							     result);
3478 
3479 	pos = cxl_calculate_position(hpa_offset, eiw, eig);
3480 	if (pos < 0 || pos >= p->nr_targets) {
3481 		dev_dbg(&cxlr->dev, "Invalid position %d for %d targets\n",
3482 			pos, p->nr_targets);
3483 		return -ENXIO;
3484 	}
3485 
3486 	dpa_offset = cxl_calculate_dpa_offset(hpa_offset, eiw, eig);
3487 
3488 	/* Look-up and return the result: a memdev and a DPA */
3489 	for (int i = 0; i < p->nr_targets; i++) {
3490 		cxled = p->targets[i];
3491 		if (cxled->pos != pos)
3492 			continue;
3493 
3494 		dpa = cxl_dpa_resource_start(cxled);
3495 		if (dpa != RESOURCE_SIZE_MAX)
3496 			dpa += dpa_offset;
3497 
3498 		result->cxlmd = cxled_to_memdev(cxled);
3499 		result->dpa = dpa;
3500 
3501 		return 0;
3502 	}
3503 	dev_err(&cxlr->dev, "No device found for position %d\n", pos);
3504 
3505 	return -ENXIO;
3506 }
3507 
match_root_decoder(struct device * dev,const void * data)3508 static int match_root_decoder(struct device *dev, const void *data)
3509 {
3510 	const struct range *r1, *r2 = data;
3511 	struct cxl_root_decoder *cxlrd;
3512 
3513 	if (!is_root_decoder(dev))
3514 		return 0;
3515 
3516 	cxlrd = to_cxl_root_decoder(dev);
3517 	r1 = &cxlrd->cxlsd.cxld.hpa_range;
3518 
3519 	return range_contains(r1, r2);
3520 }
3521 
cxl_root_setup_translation(struct cxl_root * cxl_root,struct cxl_region_context * ctx)3522 static int cxl_root_setup_translation(struct cxl_root *cxl_root,
3523 				      struct cxl_region_context *ctx)
3524 {
3525 	if (!cxl_root->ops.translation_setup_root)
3526 		return 0;
3527 
3528 	return cxl_root->ops.translation_setup_root(cxl_root, ctx);
3529 }
3530 
3531 /*
3532  * Note, when finished with the device, drop the reference with
3533  * put_device() or use the put_cxl_root_decoder helper.
3534  */
3535 static struct cxl_root_decoder *
get_cxl_root_decoder(struct cxl_endpoint_decoder * cxled,struct cxl_region_context * ctx)3536 get_cxl_root_decoder(struct cxl_endpoint_decoder *cxled,
3537 		     struct cxl_region_context *ctx)
3538 {
3539 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3540 	struct cxl_port *port = cxled_to_port(cxled);
3541 	struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
3542 	struct device *cxlrd_dev;
3543 	int rc;
3544 
3545 	/*
3546 	 * Adjust the endpoint's HPA range and interleaving
3547 	 * configuration to the root decoder’s memory space before
3548 	 * setting up the root decoder.
3549 	 */
3550 	rc = cxl_root_setup_translation(cxl_root, ctx);
3551 	if (rc) {
3552 		dev_err(cxlmd->dev.parent,
3553 			"%s:%s Failed to setup translation for address range %#llx:%#llx\n",
3554 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3555 			ctx->hpa_range.start, ctx->hpa_range.end);
3556 		return ERR_PTR(rc);
3557 	}
3558 
3559 	cxlrd_dev = device_find_child(&cxl_root->port.dev, &ctx->hpa_range,
3560 				      match_root_decoder);
3561 	if (!cxlrd_dev) {
3562 		dev_err(cxlmd->dev.parent,
3563 			"%s:%s no CXL window for range %#llx:%#llx\n",
3564 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3565 			ctx->hpa_range.start, ctx->hpa_range.end);
3566 		return ERR_PTR(-ENXIO);
3567 	}
3568 
3569 	return to_cxl_root_decoder(cxlrd_dev);
3570 }
3571 
match_region_by_range(struct device * dev,const void * data)3572 static int match_region_by_range(struct device *dev, const void *data)
3573 {
3574 	struct cxl_region_params *p;
3575 	struct cxl_region *cxlr;
3576 	const struct range *r = data;
3577 
3578 	if (!is_cxl_region(dev))
3579 		return 0;
3580 
3581 	cxlr = to_cxl_region(dev);
3582 	p = &cxlr->params;
3583 
3584 	guard(rwsem_read)(&cxl_rwsem.region);
3585 	return spa_maps_hpa(p, r);
3586 }
3587 
cxl_extended_linear_cache_resize(struct cxl_region * cxlr,struct resource * res)3588 static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,
3589 					    struct resource *res)
3590 {
3591 	struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
3592 	struct cxl_region_params *p = &cxlr->params;
3593 	resource_size_t size = resource_size(res);
3594 	resource_size_t cache_size, start;
3595 
3596 	cache_size = cxlrd->cache_size;
3597 	if (!cache_size)
3598 		return 0;
3599 
3600 	if (size != cache_size) {
3601 		dev_warn(&cxlr->dev,
3602 			 "Extended Linear Cache size %pa != CXL size %pa. No Support!",
3603 			 &cache_size, &size);
3604 		return -ENXIO;
3605 	}
3606 
3607 	/*
3608 	 * Move the start of the range to where the cache range starts. The
3609 	 * implementation assumes that the cache range is in front of the
3610 	 * CXL range. This is not dictated by the HMAT spec but is how the
3611 	 * current known implementation is configured.
3612 	 *
3613 	 * The cache range is expected to be within the CFMWS. The adjusted
3614 	 * res->start should not be less than cxlrd->res->start.
3615 	 */
3616 	start = res->start - cache_size;
3617 	if (start < cxlrd->res->start)
3618 		return -ENXIO;
3619 
3620 	res->start = start;
3621 	p->cache_size = cache_size;
3622 
3623 	return 0;
3624 }
3625 
__construct_region(struct cxl_region * cxlr,struct cxl_region_context * ctx)3626 static int __construct_region(struct cxl_region *cxlr,
3627 			      struct cxl_region_context *ctx)
3628 {
3629 	struct cxl_endpoint_decoder *cxled = ctx->cxled;
3630 	struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
3631 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3632 	struct range *hpa_range = &ctx->hpa_range;
3633 	struct cxl_region_params *p;
3634 	struct resource *res;
3635 	int rc;
3636 
3637 	guard(rwsem_write)(&cxl_rwsem.region);
3638 	p = &cxlr->params;
3639 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
3640 		dev_err(cxlmd->dev.parent,
3641 			"%s:%s: %s autodiscovery interrupted\n",
3642 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3643 			__func__);
3644 		return -EBUSY;
3645 	}
3646 
3647 	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
3648 	cxlr->hpa_range = *hpa_range;
3649 
3650 	res = kmalloc_obj(*res);
3651 	if (!res)
3652 		return -ENOMEM;
3653 
3654 	*res = DEFINE_RES_MEM_NAMED(hpa_range->start, range_len(hpa_range),
3655 				    dev_name(&cxlr->dev));
3656 
3657 	rc = cxl_extended_linear_cache_resize(cxlr, res);
3658 	if (rc && rc != -EOPNOTSUPP) {
3659 		/*
3660 		 * Failing to support extended linear cache region resize does not
3661 		 * prevent the region from functioning. Only causes cxl list showing
3662 		 * incorrect region size.
3663 		 */
3664 		dev_warn(cxlmd->dev.parent,
3665 			 "Extended linear cache calculation failed rc:%d\n", rc);
3666 	}
3667 
3668 	rc = sysfs_update_group(&cxlr->dev.kobj, &cxl_region_group);
3669 	if (rc) {
3670 		kfree(res);
3671 		return rc;
3672 	}
3673 
3674 	rc = insert_resource(cxlrd->res, res);
3675 	if (rc) {
3676 		/*
3677 		 * Platform-firmware may not have split resources like "System
3678 		 * RAM" on CXL window boundaries see cxl_region_iomem_release()
3679 		 */
3680 		dev_warn(cxlmd->dev.parent,
3681 			 "%s:%s: %s %s cannot insert resource\n",
3682 			 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3683 			 __func__, dev_name(&cxlr->dev));
3684 	}
3685 
3686 	p->res = res;
3687 	p->interleave_ways = ctx->interleave_ways;
3688 	p->interleave_granularity = ctx->interleave_granularity;
3689 	p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
3690 
3691 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
3692 	if (rc)
3693 		return rc;
3694 
3695 	dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
3696 		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
3697 		dev_name(&cxlr->dev), p->res, p->interleave_ways,
3698 		p->interleave_granularity);
3699 
3700 	/* ...to match put_device() in cxl_add_to_region() */
3701 	get_device(&cxlr->dev);
3702 
3703 	return 0;
3704 }
3705 
3706 /* Establish an empty region covering the given HPA range */
construct_region(struct cxl_root_decoder * cxlrd,struct cxl_region_context * ctx)3707 static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
3708 					   struct cxl_region_context *ctx)
3709 {
3710 	struct cxl_endpoint_decoder *cxled = ctx->cxled;
3711 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3712 	struct cxl_port *port = cxlrd_to_port(cxlrd);
3713 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
3714 	int rc, part = READ_ONCE(cxled->part);
3715 	struct cxl_region *cxlr;
3716 
3717 	do {
3718 		cxlr = __create_region(cxlrd, cxlds->part[part].mode,
3719 				       atomic_read(&cxlrd->region_id),
3720 				       cxled->cxld.target_type);
3721 	} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
3722 
3723 	if (IS_ERR(cxlr)) {
3724 		dev_err(cxlmd->dev.parent,
3725 			"%s:%s: %s failed assign region: %ld\n",
3726 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3727 			__func__, PTR_ERR(cxlr));
3728 		return cxlr;
3729 	}
3730 
3731 	rc = __construct_region(cxlr, ctx);
3732 	if (rc) {
3733 		devm_release_action(port->uport_dev, unregister_region, cxlr);
3734 		return ERR_PTR(rc);
3735 	}
3736 
3737 	return cxlr;
3738 }
3739 
3740 static struct cxl_region *
cxl_find_region_by_range(struct cxl_root_decoder * cxlrd,struct range * hpa_range)3741 cxl_find_region_by_range(struct cxl_root_decoder *cxlrd,
3742 			 struct range *hpa_range)
3743 {
3744 	struct device *region_dev;
3745 
3746 	region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa_range,
3747 				       match_region_by_range);
3748 	if (!region_dev)
3749 		return NULL;
3750 
3751 	return to_cxl_region(region_dev);
3752 }
3753 
cxl_add_to_region(struct cxl_endpoint_decoder * cxled)3754 int cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
3755 {
3756 	struct cxl_region_context ctx;
3757 	struct cxl_region_params *p;
3758 	bool attach = false;
3759 	int rc;
3760 
3761 	ctx = (struct cxl_region_context) {
3762 		.cxled = cxled,
3763 		.hpa_range = cxled->cxld.hpa_range,
3764 		.interleave_ways = cxled->cxld.interleave_ways,
3765 		.interleave_granularity = cxled->cxld.interleave_granularity,
3766 	};
3767 
3768 	struct cxl_root_decoder *cxlrd __free(put_cxl_root_decoder) =
3769 		get_cxl_root_decoder(cxled, &ctx);
3770 
3771 	if (IS_ERR(cxlrd))
3772 		return PTR_ERR(cxlrd);
3773 
3774 	/*
3775 	 * Ensure that, if multiple threads race to construct_region()
3776 	 * for the HPA range, one does the construction and the others
3777 	 * add to that.
3778 	 */
3779 	mutex_lock(&cxlrd->range_lock);
3780 	struct cxl_region *cxlr __free(put_cxl_region) =
3781 		cxl_find_region_by_range(cxlrd, &ctx.hpa_range);
3782 	if (!cxlr)
3783 		cxlr = construct_region(cxlrd, &ctx);
3784 	mutex_unlock(&cxlrd->range_lock);
3785 
3786 	rc = PTR_ERR_OR_ZERO(cxlr);
3787 	if (rc)
3788 		return rc;
3789 
3790 	attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
3791 
3792 	scoped_guard(rwsem_read, &cxl_rwsem.region) {
3793 		p = &cxlr->params;
3794 		attach = p->state == CXL_CONFIG_COMMIT;
3795 	}
3796 
3797 	if (attach) {
3798 		/*
3799 		 * If device_attach() fails the range may still be active via
3800 		 * the platform-firmware memory map, otherwise the driver for
3801 		 * regions is local to this file, so driver matching can't fail.
3802 		 */
3803 		if (device_attach(&cxlr->dev) < 0)
3804 			dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
3805 				p->res);
3806 	}
3807 
3808 	return rc;
3809 }
3810 EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL");
3811 
cxl_port_get_spa_cache_alias(struct cxl_port * endpoint,u64 spa)3812 u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa)
3813 {
3814 	struct cxl_region_ref *iter;
3815 	unsigned long index;
3816 
3817 	if (!endpoint)
3818 		return ~0ULL;
3819 
3820 	guard(rwsem_write)(&cxl_rwsem.region);
3821 
3822 	xa_for_each(&endpoint->regions, index, iter) {
3823 		struct cxl_region_params *p = &iter->region->params;
3824 
3825 		if (cxl_resource_contains_addr(p->res, spa)) {
3826 			if (!p->cache_size)
3827 				return ~0ULL;
3828 
3829 			if (spa >= p->res->start + p->cache_size)
3830 				return spa - p->cache_size;
3831 
3832 			return spa + p->cache_size;
3833 		}
3834 	}
3835 
3836 	return ~0ULL;
3837 }
3838 EXPORT_SYMBOL_NS_GPL(cxl_port_get_spa_cache_alias, "CXL");
3839 
is_system_ram(struct resource * res,void * arg)3840 static int is_system_ram(struct resource *res, void *arg)
3841 {
3842 	struct cxl_region *cxlr = arg;
3843 	struct cxl_region_params *p = &cxlr->params;
3844 
3845 	dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
3846 	return 1;
3847 }
3848 
shutdown_notifiers(void * _cxlr)3849 static void shutdown_notifiers(void *_cxlr)
3850 {
3851 	struct cxl_region *cxlr = _cxlr;
3852 
3853 	unregister_node_notifier(&cxlr->node_notifier);
3854 	unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
3855 }
3856 
remove_debugfs(void * dentry)3857 static void remove_debugfs(void *dentry)
3858 {
3859 	debugfs_remove_recursive(dentry);
3860 }
3861 
validate_region_offset(struct cxl_region * cxlr,u64 offset)3862 static int validate_region_offset(struct cxl_region *cxlr, u64 offset)
3863 {
3864 	struct cxl_region_params *p = &cxlr->params;
3865 	resource_size_t region_size;
3866 	u64 hpa;
3867 
3868 	if (offset < p->cache_size) {
3869 		dev_err(&cxlr->dev,
3870 			"Offset %#llx is within extended linear cache %pa\n",
3871 			offset, &p->cache_size);
3872 		return -EINVAL;
3873 	}
3874 
3875 	region_size = resource_size(p->res);
3876 	if (offset >= region_size) {
3877 		dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pa\n",
3878 			offset, &region_size);
3879 		return -EINVAL;
3880 	}
3881 
3882 	hpa = p->res->start + offset;
3883 	if (hpa < p->res->start || hpa > p->res->end) {
3884 		dev_err(&cxlr->dev, "HPA %#llx not in region %pr\n", hpa,
3885 			p->res);
3886 		return -EINVAL;
3887 	}
3888 
3889 	return 0;
3890 }
3891 
cxl_region_debugfs_poison_inject(void * data,u64 offset)3892 static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
3893 {
3894 	struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
3895 	struct cxl_region *cxlr = data;
3896 	int rc;
3897 
3898 	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
3899 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
3900 		return rc;
3901 
3902 	ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
3903 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
3904 		return rc;
3905 
3906 	if (validate_region_offset(cxlr, offset))
3907 		return -EINVAL;
3908 
3909 	offset -= cxlr->params.cache_size;
3910 	rc = region_offset_to_dpa_result(cxlr, offset, &result);
3911 	if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
3912 		dev_dbg(&cxlr->dev,
3913 			"Failed to resolve DPA for region offset %#llx rc %d\n",
3914 			offset, rc);
3915 
3916 		return rc ? rc : -EINVAL;
3917 	}
3918 
3919 	return cxl_inject_poison_locked(result.cxlmd, result.dpa);
3920 }
3921 
3922 DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL,
3923 			 cxl_region_debugfs_poison_inject, "%llx\n");
3924 
cxl_region_debugfs_poison_clear(void * data,u64 offset)3925 static int cxl_region_debugfs_poison_clear(void *data, u64 offset)
3926 {
3927 	struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
3928 	struct cxl_region *cxlr = data;
3929 	int rc;
3930 
3931 	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
3932 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
3933 		return rc;
3934 
3935 	ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
3936 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
3937 		return rc;
3938 
3939 	if (validate_region_offset(cxlr, offset))
3940 		return -EINVAL;
3941 
3942 	offset -= cxlr->params.cache_size;
3943 	rc = region_offset_to_dpa_result(cxlr, offset, &result);
3944 	if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
3945 		dev_dbg(&cxlr->dev,
3946 			"Failed to resolve DPA for region offset %#llx rc %d\n",
3947 			offset, rc);
3948 
3949 		return rc ? rc : -EINVAL;
3950 	}
3951 
3952 	return cxl_clear_poison_locked(result.cxlmd, result.dpa);
3953 }
3954 
3955 DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL,
3956 			 cxl_region_debugfs_poison_clear, "%llx\n");
3957 
cxl_region_setup_poison(struct cxl_region * cxlr)3958 static int cxl_region_setup_poison(struct cxl_region *cxlr)
3959 {
3960 	struct device *dev = &cxlr->dev;
3961 	struct cxl_region_params *p = &cxlr->params;
3962 	struct dentry *dentry;
3963 
3964 	/*
3965 	 * Do not enable poison injection in Normalized Address mode.
3966 	 * Conversion between SPA and DPA is required for this, but it is
3967 	 * not supported in this mode.
3968 	 */
3969 	if (test_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags))
3970 		return 0;
3971 
3972 	/* Create poison attributes if all memdevs support the capabilities */
3973 	for (int i = 0; i < p->nr_targets; i++) {
3974 		struct cxl_endpoint_decoder *cxled = p->targets[i];
3975 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3976 
3977 		if (!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_INJECT) ||
3978 		    !cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_CLEAR))
3979 			return 0;
3980 	}
3981 
3982 	dentry = cxl_debugfs_create_dir(dev_name(dev));
3983 	debugfs_create_file("inject_poison", 0200, dentry, cxlr,
3984 			    &cxl_poison_inject_fops);
3985 	debugfs_create_file("clear_poison", 0200, dentry, cxlr,
3986 			    &cxl_poison_clear_fops);
3987 
3988 	return devm_add_action_or_reset(dev, remove_debugfs, dentry);
3989 }
3990 
region_contains_resource(struct device * dev,const void * data)3991 static int region_contains_resource(struct device *dev, const void *data)
3992 {
3993 	const struct resource *res = data;
3994 	struct cxl_region *cxlr;
3995 	struct cxl_region_params *p;
3996 
3997 	if (!is_cxl_region(dev))
3998 		return 0;
3999 
4000 	cxlr = to_cxl_region(dev);
4001 	p = &cxlr->params;
4002 
4003 	if (p->state != CXL_CONFIG_COMMIT)
4004 		return 0;
4005 
4006 	if (!p->res)
4007 		return 0;
4008 
4009 	return resource_contains(p->res, res) ? 1 : 0;
4010 }
4011 
cxl_region_contains_resource(const struct resource * res)4012 bool cxl_region_contains_resource(const struct resource *res)
4013 {
4014 	guard(rwsem_read)(&cxl_rwsem.region);
4015 	struct device *dev __free(put_device) = bus_find_device(
4016 		&cxl_bus_type, NULL, res, region_contains_resource);
4017 	return !!dev;
4018 }
4019 EXPORT_SYMBOL_FOR_MODULES(cxl_region_contains_resource, "dax_hmem");
4020 
cxl_region_can_probe(struct cxl_region * cxlr)4021 static int cxl_region_can_probe(struct cxl_region *cxlr)
4022 {
4023 	struct cxl_region_params *p = &cxlr->params;
4024 	int rc;
4025 
4026 	ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
4027 	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) {
4028 		dev_dbg(&cxlr->dev, "probe interrupted\n");
4029 		return rc;
4030 	}
4031 
4032 	if (p->state < CXL_CONFIG_COMMIT) {
4033 		dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
4034 		return -ENXIO;
4035 	}
4036 
4037 	if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) {
4038 		dev_err(&cxlr->dev,
4039 			"failed to activate, re-commit region and retry\n");
4040 		return -ENXIO;
4041 	}
4042 
4043 	return 0;
4044 }
4045 
cxl_region_probe(struct device * dev)4046 static int cxl_region_probe(struct device *dev)
4047 {
4048 	struct cxl_region *cxlr = to_cxl_region(dev);
4049 	struct cxl_region_params *p = &cxlr->params;
4050 	int rc;
4051 
4052 	rc = cxl_region_can_probe(cxlr);
4053 	if (rc)
4054 		return rc;
4055 
4056 	/*
4057 	 * From this point on any path that changes the region's state away from
4058 	 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
4059 	 */
4060 
4061 	cxlr->node_notifier.notifier_call = cxl_region_perf_attrs_callback;
4062 	cxlr->node_notifier.priority = CXL_CALLBACK_PRI;
4063 	register_node_notifier(&cxlr->node_notifier);
4064 
4065 	cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
4066 	cxlr->adist_notifier.priority = 100;
4067 	register_mt_adistance_algorithm(&cxlr->adist_notifier);
4068 
4069 	rc = devm_add_action_or_reset(&cxlr->dev, shutdown_notifiers, cxlr);
4070 	if (rc)
4071 		return rc;
4072 
4073 	rc = cxl_region_setup_poison(cxlr);
4074 	if (rc)
4075 		return rc;
4076 
4077 	switch (cxlr->mode) {
4078 	case CXL_PARTMODE_PMEM:
4079 		rc = devm_cxl_region_edac_register(cxlr);
4080 		if (rc)
4081 			dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
4082 				cxlr->id);
4083 
4084 		return devm_cxl_add_pmem_region(cxlr);
4085 	case CXL_PARTMODE_RAM:
4086 		rc = devm_cxl_region_edac_register(cxlr);
4087 		if (rc)
4088 			dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
4089 				cxlr->id);
4090 
4091 		/*
4092 		 * The region can not be manged by CXL if any portion of
4093 		 * it is already online as 'System RAM'
4094 		 */
4095 		if (walk_iomem_res_desc(IORES_DESC_NONE,
4096 					IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
4097 					p->res->start, p->res->end, cxlr,
4098 					is_system_ram) > 0)
4099 			return 0;
4100 		return devm_cxl_add_dax_region(cxlr);
4101 	default:
4102 		dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
4103 			cxlr->mode);
4104 		return -ENXIO;
4105 	}
4106 }
4107 
4108 static struct cxl_driver cxl_region_driver = {
4109 	.name = "cxl_region",
4110 	.probe = cxl_region_probe,
4111 	.id = CXL_DEVICE_REGION,
4112 };
4113 
cxl_region_init(void)4114 int cxl_region_init(void)
4115 {
4116 	return cxl_driver_register(&cxl_region_driver);
4117 }
4118 
cxl_region_exit(void)4119 void cxl_region_exit(void)
4120 {
4121 	cxl_driver_unregister(&cxl_region_driver);
4122 }
4123 
4124 MODULE_IMPORT_NS("CXL");
4125 MODULE_IMPORT_NS("DEVMEM");
4126 MODULE_ALIAS_CXL(CXL_DEVICE_REGION);
4127