xref: /linux/drivers/cxl/core/region.c (revision 3e7819886281e077e82006fe4804b0d6b0f5643b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
3 #include <linux/memregion.h>
4 #include <linux/genalloc.h>
5 #include <linux/device.h>
6 #include <linux/module.h>
7 #include <linux/memory.h>
8 #include <linux/slab.h>
9 #include <linux/uuid.h>
10 #include <linux/sort.h>
11 #include <linux/idr.h>
12 #include <cxlmem.h>
13 #include <cxl.h>
14 #include "core.h"
15 
16 /**
17  * DOC: cxl core region
18  *
19  * CXL Regions represent mapped memory capacity in system physical address
20  * space. Whereas the CXL Root Decoders identify the bounds of potential CXL
21  * Memory ranges, Regions represent the active mapped capacity by the HDM
22  * Decoder Capability structures throughout the Host Bridges, Switches, and
23  * Endpoints in the topology.
24  *
25  * Region configuration has ordering constraints. UUID may be set at any time
26  * but is only visible for persistent regions.
27  * 1. Interleave granularity
28  * 2. Interleave size
29  * 3. Decoder targets
30  */
31 
32 static struct cxl_region *to_cxl_region(struct device *dev);
33 
34 #define __ACCESS_ATTR_RO(_level, _name) {				\
35 	.attr	= { .name = __stringify(_name), .mode = 0444 },		\
36 	.show	= _name##_access##_level##_show,			\
37 }
38 
39 #define ACCESS_DEVICE_ATTR_RO(level, name)	\
40 	struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name)
41 
42 #define ACCESS_ATTR_RO(level, attrib)					      \
43 static ssize_t attrib##_access##level##_show(struct device *dev,	      \
44 					  struct device_attribute *attr,      \
45 					  char *buf)			      \
46 {									      \
47 	struct cxl_region *cxlr = to_cxl_region(dev);			      \
48 									      \
49 	if (cxlr->coord[level].attrib == 0)				      \
50 		return -ENOENT;						      \
51 									      \
52 	return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib);	      \
53 }									      \
54 static ACCESS_DEVICE_ATTR_RO(level, attrib)
55 
56 ACCESS_ATTR_RO(0, read_bandwidth);
57 ACCESS_ATTR_RO(0, read_latency);
58 ACCESS_ATTR_RO(0, write_bandwidth);
59 ACCESS_ATTR_RO(0, write_latency);
60 
61 #define ACCESS_ATTR_DECLARE(level, attrib)	\
62 	(&dev_attr_access##level##_##attrib.attr)
63 
64 static struct attribute *access0_coordinate_attrs[] = {
65 	ACCESS_ATTR_DECLARE(0, read_bandwidth),
66 	ACCESS_ATTR_DECLARE(0, write_bandwidth),
67 	ACCESS_ATTR_DECLARE(0, read_latency),
68 	ACCESS_ATTR_DECLARE(0, write_latency),
69 	NULL
70 };
71 
72 ACCESS_ATTR_RO(1, read_bandwidth);
73 ACCESS_ATTR_RO(1, read_latency);
74 ACCESS_ATTR_RO(1, write_bandwidth);
75 ACCESS_ATTR_RO(1, write_latency);
76 
77 static struct attribute *access1_coordinate_attrs[] = {
78 	ACCESS_ATTR_DECLARE(1, read_bandwidth),
79 	ACCESS_ATTR_DECLARE(1, write_bandwidth),
80 	ACCESS_ATTR_DECLARE(1, read_latency),
81 	ACCESS_ATTR_DECLARE(1, write_latency),
82 	NULL
83 };
84 
85 #define ACCESS_VISIBLE(level)						\
86 static umode_t cxl_region_access##level##_coordinate_visible(		\
87 		struct kobject *kobj, struct attribute *a, int n)	\
88 {									\
89 	struct device *dev = kobj_to_dev(kobj);				\
90 	struct cxl_region *cxlr = to_cxl_region(dev);			\
91 									\
92 	if (a == &dev_attr_access##level##_read_latency.attr &&		\
93 	    cxlr->coord[level].read_latency == 0)			\
94 		return 0;						\
95 									\
96 	if (a == &dev_attr_access##level##_write_latency.attr &&	\
97 	    cxlr->coord[level].write_latency == 0)			\
98 		return 0;						\
99 									\
100 	if (a == &dev_attr_access##level##_read_bandwidth.attr &&	\
101 	    cxlr->coord[level].read_bandwidth == 0)			\
102 		return 0;						\
103 									\
104 	if (a == &dev_attr_access##level##_write_bandwidth.attr &&	\
105 	    cxlr->coord[level].write_bandwidth == 0)			\
106 		return 0;						\
107 									\
108 	return a->mode;							\
109 }
110 
111 ACCESS_VISIBLE(0);
112 ACCESS_VISIBLE(1);
113 
114 static const struct attribute_group cxl_region_access0_coordinate_group = {
115 	.name = "access0",
116 	.attrs = access0_coordinate_attrs,
117 	.is_visible = cxl_region_access0_coordinate_visible,
118 };
119 
120 static const struct attribute_group *get_cxl_region_access0_group(void)
121 {
122 	return &cxl_region_access0_coordinate_group;
123 }
124 
125 static const struct attribute_group cxl_region_access1_coordinate_group = {
126 	.name = "access1",
127 	.attrs = access1_coordinate_attrs,
128 	.is_visible = cxl_region_access1_coordinate_visible,
129 };
130 
131 static const struct attribute_group *get_cxl_region_access1_group(void)
132 {
133 	return &cxl_region_access1_coordinate_group;
134 }
135 
136 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
137 			 char *buf)
138 {
139 	struct cxl_region *cxlr = to_cxl_region(dev);
140 	struct cxl_region_params *p = &cxlr->params;
141 	ssize_t rc;
142 
143 	rc = down_read_interruptible(&cxl_region_rwsem);
144 	if (rc)
145 		return rc;
146 	if (cxlr->mode != CXL_DECODER_PMEM)
147 		rc = sysfs_emit(buf, "\n");
148 	else
149 		rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
150 	up_read(&cxl_region_rwsem);
151 
152 	return rc;
153 }
154 
155 static int is_dup(struct device *match, void *data)
156 {
157 	struct cxl_region_params *p;
158 	struct cxl_region *cxlr;
159 	uuid_t *uuid = data;
160 
161 	if (!is_cxl_region(match))
162 		return 0;
163 
164 	lockdep_assert_held(&cxl_region_rwsem);
165 	cxlr = to_cxl_region(match);
166 	p = &cxlr->params;
167 
168 	if (uuid_equal(&p->uuid, uuid)) {
169 		dev_dbg(match, "already has uuid: %pUb\n", uuid);
170 		return -EBUSY;
171 	}
172 
173 	return 0;
174 }
175 
176 static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
177 			  const char *buf, size_t len)
178 {
179 	struct cxl_region *cxlr = to_cxl_region(dev);
180 	struct cxl_region_params *p = &cxlr->params;
181 	uuid_t temp;
182 	ssize_t rc;
183 
184 	if (len != UUID_STRING_LEN + 1)
185 		return -EINVAL;
186 
187 	rc = uuid_parse(buf, &temp);
188 	if (rc)
189 		return rc;
190 
191 	if (uuid_is_null(&temp))
192 		return -EINVAL;
193 
194 	rc = down_write_killable(&cxl_region_rwsem);
195 	if (rc)
196 		return rc;
197 
198 	if (uuid_equal(&p->uuid, &temp))
199 		goto out;
200 
201 	rc = -EBUSY;
202 	if (p->state >= CXL_CONFIG_ACTIVE)
203 		goto out;
204 
205 	rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
206 	if (rc < 0)
207 		goto out;
208 
209 	uuid_copy(&p->uuid, &temp);
210 out:
211 	up_write(&cxl_region_rwsem);
212 
213 	if (rc)
214 		return rc;
215 	return len;
216 }
217 static DEVICE_ATTR_RW(uuid);
218 
219 static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
220 					  struct cxl_region *cxlr)
221 {
222 	return xa_load(&port->regions, (unsigned long)cxlr);
223 }
224 
225 static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
226 {
227 	if (!cpu_cache_has_invalidate_memregion()) {
228 		if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
229 			dev_info_once(
230 				&cxlr->dev,
231 				"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
232 			return 0;
233 		} else {
234 			dev_err(&cxlr->dev,
235 				"Failed to synchronize CPU cache state\n");
236 			return -ENXIO;
237 		}
238 	}
239 
240 	cpu_cache_invalidate_memregion(IORES_DESC_CXL);
241 	return 0;
242 }
243 
244 static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
245 {
246 	struct cxl_region_params *p = &cxlr->params;
247 	int i, rc = 0;
248 
249 	/*
250 	 * Before region teardown attempt to flush, and if the flush
251 	 * fails cancel the region teardown for data consistency
252 	 * concerns
253 	 */
254 	rc = cxl_region_invalidate_memregion(cxlr);
255 	if (rc)
256 		return rc;
257 
258 	for (i = count - 1; i >= 0; i--) {
259 		struct cxl_endpoint_decoder *cxled = p->targets[i];
260 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
261 		struct cxl_port *iter = cxled_to_port(cxled);
262 		struct cxl_dev_state *cxlds = cxlmd->cxlds;
263 		struct cxl_ep *ep;
264 
265 		if (cxlds->rcd)
266 			goto endpoint_reset;
267 
268 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
269 			iter = to_cxl_port(iter->dev.parent);
270 
271 		for (ep = cxl_ep_load(iter, cxlmd); iter;
272 		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
273 			struct cxl_region_ref *cxl_rr;
274 			struct cxl_decoder *cxld;
275 
276 			cxl_rr = cxl_rr_load(iter, cxlr);
277 			cxld = cxl_rr->decoder;
278 			if (cxld->reset)
279 				rc = cxld->reset(cxld);
280 			if (rc)
281 				return rc;
282 			set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
283 		}
284 
285 endpoint_reset:
286 		rc = cxled->cxld.reset(&cxled->cxld);
287 		if (rc)
288 			return rc;
289 		set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
290 	}
291 
292 	/* all decoders associated with this region have been torn down */
293 	clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
294 
295 	return 0;
296 }
297 
298 static int commit_decoder(struct cxl_decoder *cxld)
299 {
300 	struct cxl_switch_decoder *cxlsd = NULL;
301 
302 	if (cxld->commit)
303 		return cxld->commit(cxld);
304 
305 	if (is_switch_decoder(&cxld->dev))
306 		cxlsd = to_cxl_switch_decoder(&cxld->dev);
307 
308 	if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1,
309 			  "->commit() is required\n"))
310 		return -ENXIO;
311 	return 0;
312 }
313 
314 static int cxl_region_decode_commit(struct cxl_region *cxlr)
315 {
316 	struct cxl_region_params *p = &cxlr->params;
317 	int i, rc = 0;
318 
319 	for (i = 0; i < p->nr_targets; i++) {
320 		struct cxl_endpoint_decoder *cxled = p->targets[i];
321 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
322 		struct cxl_region_ref *cxl_rr;
323 		struct cxl_decoder *cxld;
324 		struct cxl_port *iter;
325 		struct cxl_ep *ep;
326 
327 		/* commit bottom up */
328 		for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
329 		     iter = to_cxl_port(iter->dev.parent)) {
330 			cxl_rr = cxl_rr_load(iter, cxlr);
331 			cxld = cxl_rr->decoder;
332 			rc = commit_decoder(cxld);
333 			if (rc)
334 				break;
335 		}
336 
337 		if (rc) {
338 			/* programming @iter failed, teardown */
339 			for (ep = cxl_ep_load(iter, cxlmd); ep && iter;
340 			     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
341 				cxl_rr = cxl_rr_load(iter, cxlr);
342 				cxld = cxl_rr->decoder;
343 				if (cxld->reset)
344 					cxld->reset(cxld);
345 			}
346 
347 			cxled->cxld.reset(&cxled->cxld);
348 			goto err;
349 		}
350 	}
351 
352 	return 0;
353 
354 err:
355 	/* undo the targets that were successfully committed */
356 	cxl_region_decode_reset(cxlr, i);
357 	return rc;
358 }
359 
360 static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
361 			    const char *buf, size_t len)
362 {
363 	struct cxl_region *cxlr = to_cxl_region(dev);
364 	struct cxl_region_params *p = &cxlr->params;
365 	bool commit;
366 	ssize_t rc;
367 
368 	rc = kstrtobool(buf, &commit);
369 	if (rc)
370 		return rc;
371 
372 	rc = down_write_killable(&cxl_region_rwsem);
373 	if (rc)
374 		return rc;
375 
376 	/* Already in the requested state? */
377 	if (commit && p->state >= CXL_CONFIG_COMMIT)
378 		goto out;
379 	if (!commit && p->state < CXL_CONFIG_COMMIT)
380 		goto out;
381 
382 	/* Not ready to commit? */
383 	if (commit && p->state < CXL_CONFIG_ACTIVE) {
384 		rc = -ENXIO;
385 		goto out;
386 	}
387 
388 	/*
389 	 * Invalidate caches before region setup to drop any speculative
390 	 * consumption of this address space
391 	 */
392 	rc = cxl_region_invalidate_memregion(cxlr);
393 	if (rc)
394 		goto out;
395 
396 	if (commit) {
397 		rc = cxl_region_decode_commit(cxlr);
398 		if (rc == 0)
399 			p->state = CXL_CONFIG_COMMIT;
400 	} else {
401 		p->state = CXL_CONFIG_RESET_PENDING;
402 		up_write(&cxl_region_rwsem);
403 		device_release_driver(&cxlr->dev);
404 		down_write(&cxl_region_rwsem);
405 
406 		/*
407 		 * The lock was dropped, so need to revalidate that the reset is
408 		 * still pending.
409 		 */
410 		if (p->state == CXL_CONFIG_RESET_PENDING) {
411 			rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
412 			/*
413 			 * Revert to committed since there may still be active
414 			 * decoders associated with this region, or move forward
415 			 * to active to mark the reset successful
416 			 */
417 			if (rc)
418 				p->state = CXL_CONFIG_COMMIT;
419 			else
420 				p->state = CXL_CONFIG_ACTIVE;
421 		}
422 	}
423 
424 out:
425 	up_write(&cxl_region_rwsem);
426 
427 	if (rc)
428 		return rc;
429 	return len;
430 }
431 
432 static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
433 			   char *buf)
434 {
435 	struct cxl_region *cxlr = to_cxl_region(dev);
436 	struct cxl_region_params *p = &cxlr->params;
437 	ssize_t rc;
438 
439 	rc = down_read_interruptible(&cxl_region_rwsem);
440 	if (rc)
441 		return rc;
442 	rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
443 	up_read(&cxl_region_rwsem);
444 
445 	return rc;
446 }
447 static DEVICE_ATTR_RW(commit);
448 
449 static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
450 				  int n)
451 {
452 	struct device *dev = kobj_to_dev(kobj);
453 	struct cxl_region *cxlr = to_cxl_region(dev);
454 
455 	/*
456 	 * Support tooling that expects to find a 'uuid' attribute for all
457 	 * regions regardless of mode.
458 	 */
459 	if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
460 		return 0444;
461 	return a->mode;
462 }
463 
464 static ssize_t interleave_ways_show(struct device *dev,
465 				    struct device_attribute *attr, char *buf)
466 {
467 	struct cxl_region *cxlr = to_cxl_region(dev);
468 	struct cxl_region_params *p = &cxlr->params;
469 	ssize_t rc;
470 
471 	rc = down_read_interruptible(&cxl_region_rwsem);
472 	if (rc)
473 		return rc;
474 	rc = sysfs_emit(buf, "%d\n", p->interleave_ways);
475 	up_read(&cxl_region_rwsem);
476 
477 	return rc;
478 }
479 
480 static const struct attribute_group *get_cxl_region_target_group(void);
481 
482 static ssize_t interleave_ways_store(struct device *dev,
483 				     struct device_attribute *attr,
484 				     const char *buf, size_t len)
485 {
486 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
487 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
488 	struct cxl_region *cxlr = to_cxl_region(dev);
489 	struct cxl_region_params *p = &cxlr->params;
490 	unsigned int val, save;
491 	int rc;
492 	u8 iw;
493 
494 	rc = kstrtouint(buf, 0, &val);
495 	if (rc)
496 		return rc;
497 
498 	rc = ways_to_eiw(val, &iw);
499 	if (rc)
500 		return rc;
501 
502 	/*
503 	 * Even for x3, x6, and x12 interleaves the region interleave must be a
504 	 * power of 2 multiple of the host bridge interleave.
505 	 */
506 	if (!is_power_of_2(val / cxld->interleave_ways) ||
507 	    (val % cxld->interleave_ways)) {
508 		dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val);
509 		return -EINVAL;
510 	}
511 
512 	rc = down_write_killable(&cxl_region_rwsem);
513 	if (rc)
514 		return rc;
515 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
516 		rc = -EBUSY;
517 		goto out;
518 	}
519 
520 	save = p->interleave_ways;
521 	p->interleave_ways = val;
522 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
523 	if (rc)
524 		p->interleave_ways = save;
525 out:
526 	up_write(&cxl_region_rwsem);
527 	if (rc)
528 		return rc;
529 	return len;
530 }
531 static DEVICE_ATTR_RW(interleave_ways);
532 
533 static ssize_t interleave_granularity_show(struct device *dev,
534 					   struct device_attribute *attr,
535 					   char *buf)
536 {
537 	struct cxl_region *cxlr = to_cxl_region(dev);
538 	struct cxl_region_params *p = &cxlr->params;
539 	ssize_t rc;
540 
541 	rc = down_read_interruptible(&cxl_region_rwsem);
542 	if (rc)
543 		return rc;
544 	rc = sysfs_emit(buf, "%d\n", p->interleave_granularity);
545 	up_read(&cxl_region_rwsem);
546 
547 	return rc;
548 }
549 
550 static ssize_t interleave_granularity_store(struct device *dev,
551 					    struct device_attribute *attr,
552 					    const char *buf, size_t len)
553 {
554 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
555 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
556 	struct cxl_region *cxlr = to_cxl_region(dev);
557 	struct cxl_region_params *p = &cxlr->params;
558 	int rc, val;
559 	u16 ig;
560 
561 	rc = kstrtoint(buf, 0, &val);
562 	if (rc)
563 		return rc;
564 
565 	rc = granularity_to_eig(val, &ig);
566 	if (rc)
567 		return rc;
568 
569 	/*
570 	 * When the host-bridge is interleaved, disallow region granularity !=
571 	 * root granularity. Regions with a granularity less than the root
572 	 * interleave result in needing multiple endpoints to support a single
573 	 * slot in the interleave (possible to support in the future). Regions
574 	 * with a granularity greater than the root interleave result in invalid
575 	 * DPA translations (invalid to support).
576 	 */
577 	if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
578 		return -EINVAL;
579 
580 	rc = down_write_killable(&cxl_region_rwsem);
581 	if (rc)
582 		return rc;
583 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
584 		rc = -EBUSY;
585 		goto out;
586 	}
587 
588 	p->interleave_granularity = val;
589 out:
590 	up_write(&cxl_region_rwsem);
591 	if (rc)
592 		return rc;
593 	return len;
594 }
595 static DEVICE_ATTR_RW(interleave_granularity);
596 
597 static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
598 			     char *buf)
599 {
600 	struct cxl_region *cxlr = to_cxl_region(dev);
601 	struct cxl_region_params *p = &cxlr->params;
602 	u64 resource = -1ULL;
603 	ssize_t rc;
604 
605 	rc = down_read_interruptible(&cxl_region_rwsem);
606 	if (rc)
607 		return rc;
608 	if (p->res)
609 		resource = p->res->start;
610 	rc = sysfs_emit(buf, "%#llx\n", resource);
611 	up_read(&cxl_region_rwsem);
612 
613 	return rc;
614 }
615 static DEVICE_ATTR_RO(resource);
616 
617 static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
618 			 char *buf)
619 {
620 	struct cxl_region *cxlr = to_cxl_region(dev);
621 
622 	return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode));
623 }
624 static DEVICE_ATTR_RO(mode);
625 
626 static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
627 {
628 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
629 	struct cxl_region_params *p = &cxlr->params;
630 	struct resource *res;
631 	u64 remainder = 0;
632 
633 	lockdep_assert_held_write(&cxl_region_rwsem);
634 
635 	/* Nothing to do... */
636 	if (p->res && resource_size(p->res) == size)
637 		return 0;
638 
639 	/* To change size the old size must be freed first */
640 	if (p->res)
641 		return -EBUSY;
642 
643 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
644 		return -EBUSY;
645 
646 	/* ways, granularity and uuid (if PMEM) need to be set before HPA */
647 	if (!p->interleave_ways || !p->interleave_granularity ||
648 	    (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
649 		return -ENXIO;
650 
651 	div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder);
652 	if (remainder)
653 		return -EINVAL;
654 
655 	res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
656 				    dev_name(&cxlr->dev));
657 	if (IS_ERR(res)) {
658 		dev_dbg(&cxlr->dev,
659 			"HPA allocation error (%ld) for size:%pap in %s %pr\n",
660 			PTR_ERR(res), &size, cxlrd->res->name, cxlrd->res);
661 		return PTR_ERR(res);
662 	}
663 
664 	p->res = res;
665 	p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
666 
667 	return 0;
668 }
669 
670 static void cxl_region_iomem_release(struct cxl_region *cxlr)
671 {
672 	struct cxl_region_params *p = &cxlr->params;
673 
674 	if (device_is_registered(&cxlr->dev))
675 		lockdep_assert_held_write(&cxl_region_rwsem);
676 	if (p->res) {
677 		/*
678 		 * Autodiscovered regions may not have been able to insert their
679 		 * resource.
680 		 */
681 		if (p->res->parent)
682 			remove_resource(p->res);
683 		kfree(p->res);
684 		p->res = NULL;
685 	}
686 }
687 
688 static int free_hpa(struct cxl_region *cxlr)
689 {
690 	struct cxl_region_params *p = &cxlr->params;
691 
692 	lockdep_assert_held_write(&cxl_region_rwsem);
693 
694 	if (!p->res)
695 		return 0;
696 
697 	if (p->state >= CXL_CONFIG_ACTIVE)
698 		return -EBUSY;
699 
700 	cxl_region_iomem_release(cxlr);
701 	p->state = CXL_CONFIG_IDLE;
702 	return 0;
703 }
704 
705 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
706 			  const char *buf, size_t len)
707 {
708 	struct cxl_region *cxlr = to_cxl_region(dev);
709 	u64 val;
710 	int rc;
711 
712 	rc = kstrtou64(buf, 0, &val);
713 	if (rc)
714 		return rc;
715 
716 	rc = down_write_killable(&cxl_region_rwsem);
717 	if (rc)
718 		return rc;
719 
720 	if (val)
721 		rc = alloc_hpa(cxlr, val);
722 	else
723 		rc = free_hpa(cxlr);
724 	up_write(&cxl_region_rwsem);
725 
726 	if (rc)
727 		return rc;
728 
729 	return len;
730 }
731 
732 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
733 			 char *buf)
734 {
735 	struct cxl_region *cxlr = to_cxl_region(dev);
736 	struct cxl_region_params *p = &cxlr->params;
737 	u64 size = 0;
738 	ssize_t rc;
739 
740 	rc = down_read_interruptible(&cxl_region_rwsem);
741 	if (rc)
742 		return rc;
743 	if (p->res)
744 		size = resource_size(p->res);
745 	rc = sysfs_emit(buf, "%#llx\n", size);
746 	up_read(&cxl_region_rwsem);
747 
748 	return rc;
749 }
750 static DEVICE_ATTR_RW(size);
751 
752 static struct attribute *cxl_region_attrs[] = {
753 	&dev_attr_uuid.attr,
754 	&dev_attr_commit.attr,
755 	&dev_attr_interleave_ways.attr,
756 	&dev_attr_interleave_granularity.attr,
757 	&dev_attr_resource.attr,
758 	&dev_attr_size.attr,
759 	&dev_attr_mode.attr,
760 	NULL,
761 };
762 
763 static const struct attribute_group cxl_region_group = {
764 	.attrs = cxl_region_attrs,
765 	.is_visible = cxl_region_visible,
766 };
767 
768 static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
769 {
770 	struct cxl_region_params *p = &cxlr->params;
771 	struct cxl_endpoint_decoder *cxled;
772 	int rc;
773 
774 	rc = down_read_interruptible(&cxl_region_rwsem);
775 	if (rc)
776 		return rc;
777 
778 	if (pos >= p->interleave_ways) {
779 		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
780 			p->interleave_ways);
781 		rc = -ENXIO;
782 		goto out;
783 	}
784 
785 	cxled = p->targets[pos];
786 	if (!cxled)
787 		rc = sysfs_emit(buf, "\n");
788 	else
789 		rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
790 out:
791 	up_read(&cxl_region_rwsem);
792 
793 	return rc;
794 }
795 
796 static int match_free_decoder(struct device *dev, void *data)
797 {
798 	struct cxl_decoder *cxld;
799 	int *id = data;
800 
801 	if (!is_switch_decoder(dev))
802 		return 0;
803 
804 	cxld = to_cxl_decoder(dev);
805 
806 	/* enforce ordered allocation */
807 	if (cxld->id != *id)
808 		return 0;
809 
810 	if (!cxld->region)
811 		return 1;
812 
813 	(*id)++;
814 
815 	return 0;
816 }
817 
818 static int match_auto_decoder(struct device *dev, void *data)
819 {
820 	struct cxl_region_params *p = data;
821 	struct cxl_decoder *cxld;
822 	struct range *r;
823 
824 	if (!is_switch_decoder(dev))
825 		return 0;
826 
827 	cxld = to_cxl_decoder(dev);
828 	r = &cxld->hpa_range;
829 
830 	if (p->res && p->res->start == r->start && p->res->end == r->end)
831 		return 1;
832 
833 	return 0;
834 }
835 
836 static struct cxl_decoder *
837 cxl_region_find_decoder(struct cxl_port *port,
838 			struct cxl_endpoint_decoder *cxled,
839 			struct cxl_region *cxlr)
840 {
841 	struct device *dev;
842 	int id = 0;
843 
844 	if (port == cxled_to_port(cxled))
845 		return &cxled->cxld;
846 
847 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
848 		dev = device_find_child(&port->dev, &cxlr->params,
849 					match_auto_decoder);
850 	else
851 		dev = device_find_child(&port->dev, &id, match_free_decoder);
852 	if (!dev)
853 		return NULL;
854 	/*
855 	 * This decoder is pinned registered as long as the endpoint decoder is
856 	 * registered, and endpoint decoder unregistration holds the
857 	 * cxl_region_rwsem over unregister events, so no need to hold on to
858 	 * this extra reference.
859 	 */
860 	put_device(dev);
861 	return to_cxl_decoder(dev);
862 }
863 
864 static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter,
865 			  struct cxl_decoder *cxld)
866 {
867 	struct cxl_region_ref *rr = cxl_rr_load(port, cxlr_iter);
868 	struct cxl_decoder *cxld_iter = rr->decoder;
869 
870 	/*
871 	 * Allow the out of order assembly of auto-discovered regions.
872 	 * Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders
873 	 * in HPA order. Confirm that the decoder with the lesser HPA
874 	 * starting address has the lesser id.
875 	 */
876 	dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n",
877 		dev_name(&cxld->dev), cxld->id,
878 		dev_name(&cxld_iter->dev), cxld_iter->id);
879 
880 	if (cxld_iter->id > cxld->id)
881 		return true;
882 
883 	return false;
884 }
885 
886 static struct cxl_region_ref *
887 alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
888 		 struct cxl_endpoint_decoder *cxled)
889 {
890 	struct cxl_region_params *p = &cxlr->params;
891 	struct cxl_region_ref *cxl_rr, *iter;
892 	unsigned long index;
893 	int rc;
894 
895 	xa_for_each(&port->regions, index, iter) {
896 		struct cxl_region_params *ip = &iter->region->params;
897 
898 		if (!ip->res || ip->res->start < p->res->start)
899 			continue;
900 
901 		if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
902 			struct cxl_decoder *cxld;
903 
904 			cxld = cxl_region_find_decoder(port, cxled, cxlr);
905 			if (auto_order_ok(port, iter->region, cxld))
906 				continue;
907 		}
908 		dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n",
909 			dev_name(&port->dev),
910 			dev_name(&iter->region->dev), ip->res, p->res);
911 
912 		return ERR_PTR(-EBUSY);
913 	}
914 
915 	cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
916 	if (!cxl_rr)
917 		return ERR_PTR(-ENOMEM);
918 	cxl_rr->port = port;
919 	cxl_rr->region = cxlr;
920 	cxl_rr->nr_targets = 1;
921 	xa_init(&cxl_rr->endpoints);
922 
923 	rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL);
924 	if (rc) {
925 		dev_dbg(&cxlr->dev,
926 			"%s: failed to track region reference: %d\n",
927 			dev_name(&port->dev), rc);
928 		kfree(cxl_rr);
929 		return ERR_PTR(rc);
930 	}
931 
932 	return cxl_rr;
933 }
934 
935 static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
936 {
937 	struct cxl_region *cxlr = cxl_rr->region;
938 	struct cxl_decoder *cxld = cxl_rr->decoder;
939 
940 	if (!cxld)
941 		return;
942 
943 	dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
944 	if (cxld->region == cxlr) {
945 		cxld->region = NULL;
946 		put_device(&cxlr->dev);
947 	}
948 }
949 
950 static void free_region_ref(struct cxl_region_ref *cxl_rr)
951 {
952 	struct cxl_port *port = cxl_rr->port;
953 	struct cxl_region *cxlr = cxl_rr->region;
954 
955 	cxl_rr_free_decoder(cxl_rr);
956 	xa_erase(&port->regions, (unsigned long)cxlr);
957 	xa_destroy(&cxl_rr->endpoints);
958 	kfree(cxl_rr);
959 }
960 
961 static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
962 			 struct cxl_endpoint_decoder *cxled)
963 {
964 	int rc;
965 	struct cxl_port *port = cxl_rr->port;
966 	struct cxl_region *cxlr = cxl_rr->region;
967 	struct cxl_decoder *cxld = cxl_rr->decoder;
968 	struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled));
969 
970 	if (ep) {
971 		rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep,
972 			       GFP_KERNEL);
973 		if (rc)
974 			return rc;
975 	}
976 	cxl_rr->nr_eps++;
977 
978 	if (!cxld->region) {
979 		cxld->region = cxlr;
980 		get_device(&cxlr->dev);
981 	}
982 
983 	return 0;
984 }
985 
986 static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
987 				struct cxl_endpoint_decoder *cxled,
988 				struct cxl_region_ref *cxl_rr)
989 {
990 	struct cxl_decoder *cxld;
991 
992 	cxld = cxl_region_find_decoder(port, cxled, cxlr);
993 	if (!cxld) {
994 		dev_dbg(&cxlr->dev, "%s: no decoder available\n",
995 			dev_name(&port->dev));
996 		return -EBUSY;
997 	}
998 
999 	if (cxld->region) {
1000 		dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
1001 			dev_name(&port->dev), dev_name(&cxld->dev),
1002 			dev_name(&cxld->region->dev));
1003 		return -EBUSY;
1004 	}
1005 
1006 	/*
1007 	 * Endpoints should already match the region type, but backstop that
1008 	 * assumption with an assertion. Switch-decoders change mapping-type
1009 	 * based on what is mapped when they are assigned to a region.
1010 	 */
1011 	dev_WARN_ONCE(&cxlr->dev,
1012 		      port == cxled_to_port(cxled) &&
1013 			      cxld->target_type != cxlr->type,
1014 		      "%s:%s mismatch decoder type %d -> %d\n",
1015 		      dev_name(&cxled_to_memdev(cxled)->dev),
1016 		      dev_name(&cxld->dev), cxld->target_type, cxlr->type);
1017 	cxld->target_type = cxlr->type;
1018 	cxl_rr->decoder = cxld;
1019 	return 0;
1020 }
1021 
1022 /**
1023  * cxl_port_attach_region() - track a region's interest in a port by endpoint
1024  * @port: port to add a new region reference 'struct cxl_region_ref'
1025  * @cxlr: region to attach to @port
1026  * @cxled: endpoint decoder used to create or further pin a region reference
1027  * @pos: interleave position of @cxled in @cxlr
1028  *
1029  * The attach event is an opportunity to validate CXL decode setup
1030  * constraints and record metadata needed for programming HDM decoders,
1031  * in particular decoder target lists.
1032  *
1033  * The steps are:
1034  *
1035  * - validate that there are no other regions with a higher HPA already
1036  *   associated with @port
1037  * - establish a region reference if one is not already present
1038  *
1039  *   - additionally allocate a decoder instance that will host @cxlr on
1040  *     @port
1041  *
1042  * - pin the region reference by the endpoint
1043  * - account for how many entries in @port's target list are needed to
1044  *   cover all of the added endpoints.
1045  */
1046 static int cxl_port_attach_region(struct cxl_port *port,
1047 				  struct cxl_region *cxlr,
1048 				  struct cxl_endpoint_decoder *cxled, int pos)
1049 {
1050 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1051 	struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1052 	struct cxl_region_ref *cxl_rr;
1053 	bool nr_targets_inc = false;
1054 	struct cxl_decoder *cxld;
1055 	unsigned long index;
1056 	int rc = -EBUSY;
1057 
1058 	lockdep_assert_held_write(&cxl_region_rwsem);
1059 
1060 	cxl_rr = cxl_rr_load(port, cxlr);
1061 	if (cxl_rr) {
1062 		struct cxl_ep *ep_iter;
1063 		int found = 0;
1064 
1065 		/*
1066 		 * Walk the existing endpoints that have been attached to
1067 		 * @cxlr at @port and see if they share the same 'next' port
1068 		 * in the downstream direction. I.e. endpoints that share common
1069 		 * upstream switch.
1070 		 */
1071 		xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
1072 			if (ep_iter == ep)
1073 				continue;
1074 			if (ep_iter->next == ep->next) {
1075 				found++;
1076 				break;
1077 			}
1078 		}
1079 
1080 		/*
1081 		 * New target port, or @port is an endpoint port that always
1082 		 * accounts its own local decode as a target.
1083 		 */
1084 		if (!found || !ep->next) {
1085 			cxl_rr->nr_targets++;
1086 			nr_targets_inc = true;
1087 		}
1088 	} else {
1089 		cxl_rr = alloc_region_ref(port, cxlr, cxled);
1090 		if (IS_ERR(cxl_rr)) {
1091 			dev_dbg(&cxlr->dev,
1092 				"%s: failed to allocate region reference\n",
1093 				dev_name(&port->dev));
1094 			return PTR_ERR(cxl_rr);
1095 		}
1096 		nr_targets_inc = true;
1097 
1098 		rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
1099 		if (rc)
1100 			goto out_erase;
1101 	}
1102 	cxld = cxl_rr->decoder;
1103 
1104 	/*
1105 	 * the number of targets should not exceed the target_count
1106 	 * of the decoder
1107 	 */
1108 	if (is_switch_decoder(&cxld->dev)) {
1109 		struct cxl_switch_decoder *cxlsd;
1110 
1111 		cxlsd = to_cxl_switch_decoder(&cxld->dev);
1112 		if (cxl_rr->nr_targets > cxlsd->nr_targets) {
1113 			dev_dbg(&cxlr->dev,
1114 				"%s:%s %s add: %s:%s @ %d overflows targets: %d\n",
1115 				dev_name(port->uport_dev), dev_name(&port->dev),
1116 				dev_name(&cxld->dev), dev_name(&cxlmd->dev),
1117 				dev_name(&cxled->cxld.dev), pos,
1118 				cxlsd->nr_targets);
1119 			rc = -ENXIO;
1120 			goto out_erase;
1121 		}
1122 	}
1123 
1124 	rc = cxl_rr_ep_add(cxl_rr, cxled);
1125 	if (rc) {
1126 		dev_dbg(&cxlr->dev,
1127 			"%s: failed to track endpoint %s:%s reference\n",
1128 			dev_name(&port->dev), dev_name(&cxlmd->dev),
1129 			dev_name(&cxld->dev));
1130 		goto out_erase;
1131 	}
1132 
1133 	dev_dbg(&cxlr->dev,
1134 		"%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n",
1135 		dev_name(port->uport_dev), dev_name(&port->dev),
1136 		dev_name(&cxld->dev), dev_name(&cxlmd->dev),
1137 		dev_name(&cxled->cxld.dev), pos,
1138 		ep ? ep->next ? dev_name(ep->next->uport_dev) :
1139 				      dev_name(&cxlmd->dev) :
1140 			   "none",
1141 		cxl_rr->nr_eps, cxl_rr->nr_targets);
1142 
1143 	return 0;
1144 out_erase:
1145 	if (nr_targets_inc)
1146 		cxl_rr->nr_targets--;
1147 	if (cxl_rr->nr_eps == 0)
1148 		free_region_ref(cxl_rr);
1149 	return rc;
1150 }
1151 
1152 static void cxl_port_detach_region(struct cxl_port *port,
1153 				   struct cxl_region *cxlr,
1154 				   struct cxl_endpoint_decoder *cxled)
1155 {
1156 	struct cxl_region_ref *cxl_rr;
1157 	struct cxl_ep *ep = NULL;
1158 
1159 	lockdep_assert_held_write(&cxl_region_rwsem);
1160 
1161 	cxl_rr = cxl_rr_load(port, cxlr);
1162 	if (!cxl_rr)
1163 		return;
1164 
1165 	/*
1166 	 * Endpoint ports do not carry cxl_ep references, and they
1167 	 * never target more than one endpoint by definition
1168 	 */
1169 	if (cxl_rr->decoder == &cxled->cxld)
1170 		cxl_rr->nr_eps--;
1171 	else
1172 		ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled);
1173 	if (ep) {
1174 		struct cxl_ep *ep_iter;
1175 		unsigned long index;
1176 		int found = 0;
1177 
1178 		cxl_rr->nr_eps--;
1179 		xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
1180 			if (ep_iter->next == ep->next) {
1181 				found++;
1182 				break;
1183 			}
1184 		}
1185 		if (!found)
1186 			cxl_rr->nr_targets--;
1187 	}
1188 
1189 	if (cxl_rr->nr_eps == 0)
1190 		free_region_ref(cxl_rr);
1191 }
1192 
1193 static int check_last_peer(struct cxl_endpoint_decoder *cxled,
1194 			   struct cxl_ep *ep, struct cxl_region_ref *cxl_rr,
1195 			   int distance)
1196 {
1197 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1198 	struct cxl_region *cxlr = cxl_rr->region;
1199 	struct cxl_region_params *p = &cxlr->params;
1200 	struct cxl_endpoint_decoder *cxled_peer;
1201 	struct cxl_port *port = cxl_rr->port;
1202 	struct cxl_memdev *cxlmd_peer;
1203 	struct cxl_ep *ep_peer;
1204 	int pos = cxled->pos;
1205 
1206 	/*
1207 	 * If this position wants to share a dport with the last endpoint mapped
1208 	 * then that endpoint, at index 'position - distance', must also be
1209 	 * mapped by this dport.
1210 	 */
1211 	if (pos < distance) {
1212 		dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n",
1213 			dev_name(port->uport_dev), dev_name(&port->dev),
1214 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1215 		return -ENXIO;
1216 	}
1217 	cxled_peer = p->targets[pos - distance];
1218 	cxlmd_peer = cxled_to_memdev(cxled_peer);
1219 	ep_peer = cxl_ep_load(port, cxlmd_peer);
1220 	if (ep->dport != ep_peer->dport) {
1221 		dev_dbg(&cxlr->dev,
1222 			"%s:%s: %s:%s pos %d mismatched peer %s:%s\n",
1223 			dev_name(port->uport_dev), dev_name(&port->dev),
1224 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos,
1225 			dev_name(&cxlmd_peer->dev),
1226 			dev_name(&cxled_peer->cxld.dev));
1227 		return -ENXIO;
1228 	}
1229 
1230 	return 0;
1231 }
1232 
1233 static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
1234 {
1235 	struct cxl_port *port = to_cxl_port(cxld->dev.parent);
1236 	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
1237 	unsigned int interleave_mask;
1238 	u8 eiw;
1239 	u16 eig;
1240 	int high_pos, low_pos;
1241 
1242 	if (!test_bit(iw, &cxlhdm->iw_cap_mask))
1243 		return -ENXIO;
1244 	/*
1245 	 * Per CXL specification r3.1(8.2.4.20.13 Decoder Protection),
1246 	 * if eiw < 8:
1247 	 *   DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + 8 + eiw]
1248 	 *   DPAOFFSET[eig + 7: 0]  = HPAOFFSET[eig + 7: 0]
1249 	 *
1250 	 *   when the eiw is 0, all the bits of HPAOFFSET[51: 0] are used, the
1251 	 *   interleave bits are none.
1252 	 *
1253 	 * if eiw >= 8:
1254 	 *   DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + eiw] / 3
1255 	 *   DPAOFFSET[eig + 7: 0]  = HPAOFFSET[eig + 7: 0]
1256 	 *
1257 	 *   when the eiw is 8, all the bits of HPAOFFSET[51: 0] are used, the
1258 	 *   interleave bits are none.
1259 	 */
1260 	ways_to_eiw(iw, &eiw);
1261 	if (eiw == 0 || eiw == 8)
1262 		return 0;
1263 
1264 	granularity_to_eig(ig, &eig);
1265 	if (eiw > 8)
1266 		high_pos = eiw + eig - 1;
1267 	else
1268 		high_pos = eiw + eig + 7;
1269 	low_pos = eig + 8;
1270 	interleave_mask = GENMASK(high_pos, low_pos);
1271 	if (interleave_mask & ~cxlhdm->interleave_mask)
1272 		return -ENXIO;
1273 
1274 	return 0;
1275 }
1276 
1277 static int cxl_port_setup_targets(struct cxl_port *port,
1278 				  struct cxl_region *cxlr,
1279 				  struct cxl_endpoint_decoder *cxled)
1280 {
1281 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1282 	int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos;
1283 	struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
1284 	struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1285 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1286 	struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1287 	struct cxl_region_params *p = &cxlr->params;
1288 	struct cxl_decoder *cxld = cxl_rr->decoder;
1289 	struct cxl_switch_decoder *cxlsd;
1290 	u16 eig, peig;
1291 	u8 eiw, peiw;
1292 
1293 	/*
1294 	 * While root level decoders support x3, x6, x12, switch level
1295 	 * decoders only support powers of 2 up to x16.
1296 	 */
1297 	if (!is_power_of_2(cxl_rr->nr_targets)) {
1298 		dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
1299 			dev_name(port->uport_dev), dev_name(&port->dev),
1300 			cxl_rr->nr_targets);
1301 		return -EINVAL;
1302 	}
1303 
1304 	cxlsd = to_cxl_switch_decoder(&cxld->dev);
1305 	if (cxl_rr->nr_targets_set) {
1306 		int i, distance;
1307 
1308 		/*
1309 		 * Passthrough decoders impose no distance requirements between
1310 		 * peers
1311 		 */
1312 		if (cxl_rr->nr_targets == 1)
1313 			distance = 0;
1314 		else
1315 			distance = p->nr_targets / cxl_rr->nr_targets;
1316 		for (i = 0; i < cxl_rr->nr_targets_set; i++)
1317 			if (ep->dport == cxlsd->target[i]) {
1318 				rc = check_last_peer(cxled, ep, cxl_rr,
1319 						     distance);
1320 				if (rc)
1321 					return rc;
1322 				goto out_target_set;
1323 			}
1324 		goto add_target;
1325 	}
1326 
1327 	if (is_cxl_root(parent_port)) {
1328 		/*
1329 		 * Root decoder IG is always set to value in CFMWS which
1330 		 * may be different than this region's IG.  We can use the
1331 		 * region's IG here since interleave_granularity_store()
1332 		 * does not allow interleaved host-bridges with
1333 		 * root IG != region IG.
1334 		 */
1335 		parent_ig = p->interleave_granularity;
1336 		parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
1337 		/*
1338 		 * For purposes of address bit routing, use power-of-2 math for
1339 		 * switch ports.
1340 		 */
1341 		if (!is_power_of_2(parent_iw))
1342 			parent_iw /= 3;
1343 	} else {
1344 		struct cxl_region_ref *parent_rr;
1345 		struct cxl_decoder *parent_cxld;
1346 
1347 		parent_rr = cxl_rr_load(parent_port, cxlr);
1348 		parent_cxld = parent_rr->decoder;
1349 		parent_ig = parent_cxld->interleave_granularity;
1350 		parent_iw = parent_cxld->interleave_ways;
1351 	}
1352 
1353 	rc = granularity_to_eig(parent_ig, &peig);
1354 	if (rc) {
1355 		dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
1356 			dev_name(parent_port->uport_dev),
1357 			dev_name(&parent_port->dev), parent_ig);
1358 		return rc;
1359 	}
1360 
1361 	rc = ways_to_eiw(parent_iw, &peiw);
1362 	if (rc) {
1363 		dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
1364 			dev_name(parent_port->uport_dev),
1365 			dev_name(&parent_port->dev), parent_iw);
1366 		return rc;
1367 	}
1368 
1369 	iw = cxl_rr->nr_targets;
1370 	rc = ways_to_eiw(iw, &eiw);
1371 	if (rc) {
1372 		dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
1373 			dev_name(port->uport_dev), dev_name(&port->dev), iw);
1374 		return rc;
1375 	}
1376 
1377 	/*
1378 	 * Interleave granularity is a multiple of @parent_port granularity.
1379 	 * Multiplier is the parent port interleave ways.
1380 	 */
1381 	rc = granularity_to_eig(parent_ig * parent_iw, &eig);
1382 	if (rc) {
1383 		dev_dbg(&cxlr->dev,
1384 			"%s: invalid granularity calculation (%d * %d)\n",
1385 			dev_name(&parent_port->dev), parent_ig, parent_iw);
1386 		return rc;
1387 	}
1388 
1389 	rc = eig_to_granularity(eig, &ig);
1390 	if (rc) {
1391 		dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
1392 			dev_name(port->uport_dev), dev_name(&port->dev),
1393 			256 << eig);
1394 		return rc;
1395 	}
1396 
1397 	if (iw > 8 || iw > cxlsd->nr_targets) {
1398 		dev_dbg(&cxlr->dev,
1399 			"%s:%s:%s: ways: %d overflows targets: %d\n",
1400 			dev_name(port->uport_dev), dev_name(&port->dev),
1401 			dev_name(&cxld->dev), iw, cxlsd->nr_targets);
1402 		return -ENXIO;
1403 	}
1404 
1405 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1406 		if (cxld->interleave_ways != iw ||
1407 		    cxld->interleave_granularity != ig ||
1408 		    cxld->hpa_range.start != p->res->start ||
1409 		    cxld->hpa_range.end != p->res->end ||
1410 		    ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
1411 			dev_err(&cxlr->dev,
1412 				"%s:%s %s expected iw: %d ig: %d %pr\n",
1413 				dev_name(port->uport_dev), dev_name(&port->dev),
1414 				__func__, iw, ig, p->res);
1415 			dev_err(&cxlr->dev,
1416 				"%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n",
1417 				dev_name(port->uport_dev), dev_name(&port->dev),
1418 				__func__, cxld->interleave_ways,
1419 				cxld->interleave_granularity,
1420 				(cxld->flags & CXL_DECODER_F_ENABLE) ?
1421 					"enabled" :
1422 					"disabled",
1423 				cxld->hpa_range.start, cxld->hpa_range.end);
1424 			return -ENXIO;
1425 		}
1426 	} else {
1427 		rc = check_interleave_cap(cxld, iw, ig);
1428 		if (rc) {
1429 			dev_dbg(&cxlr->dev,
1430 				"%s:%s iw: %d ig: %d is not supported\n",
1431 				dev_name(port->uport_dev),
1432 				dev_name(&port->dev), iw, ig);
1433 			return rc;
1434 		}
1435 
1436 		cxld->interleave_ways = iw;
1437 		cxld->interleave_granularity = ig;
1438 		cxld->hpa_range = (struct range) {
1439 			.start = p->res->start,
1440 			.end = p->res->end,
1441 		};
1442 	}
1443 	dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport_dev),
1444 		dev_name(&port->dev), iw, ig);
1445 add_target:
1446 	if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) {
1447 		dev_dbg(&cxlr->dev,
1448 			"%s:%s: targets full trying to add %s:%s at %d\n",
1449 			dev_name(port->uport_dev), dev_name(&port->dev),
1450 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1451 		return -ENXIO;
1452 	}
1453 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1454 		if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) {
1455 			dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n",
1456 				dev_name(port->uport_dev), dev_name(&port->dev),
1457 				dev_name(&cxlsd->cxld.dev),
1458 				dev_name(ep->dport->dport_dev),
1459 				cxl_rr->nr_targets_set);
1460 			return -ENXIO;
1461 		}
1462 	} else
1463 		cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
1464 	inc = 1;
1465 out_target_set:
1466 	cxl_rr->nr_targets_set += inc;
1467 	dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
1468 		dev_name(port->uport_dev), dev_name(&port->dev),
1469 		cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev),
1470 		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1471 
1472 	return 0;
1473 }
1474 
1475 static void cxl_port_reset_targets(struct cxl_port *port,
1476 				   struct cxl_region *cxlr)
1477 {
1478 	struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1479 	struct cxl_decoder *cxld;
1480 
1481 	/*
1482 	 * After the last endpoint has been detached the entire cxl_rr may now
1483 	 * be gone.
1484 	 */
1485 	if (!cxl_rr)
1486 		return;
1487 	cxl_rr->nr_targets_set = 0;
1488 
1489 	cxld = cxl_rr->decoder;
1490 	cxld->hpa_range = (struct range) {
1491 		.start = 0,
1492 		.end = -1,
1493 	};
1494 }
1495 
1496 static void cxl_region_teardown_targets(struct cxl_region *cxlr)
1497 {
1498 	struct cxl_region_params *p = &cxlr->params;
1499 	struct cxl_endpoint_decoder *cxled;
1500 	struct cxl_dev_state *cxlds;
1501 	struct cxl_memdev *cxlmd;
1502 	struct cxl_port *iter;
1503 	struct cxl_ep *ep;
1504 	int i;
1505 
1506 	/*
1507 	 * In the auto-discovery case skip automatic teardown since the
1508 	 * address space is already active
1509 	 */
1510 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
1511 		return;
1512 
1513 	for (i = 0; i < p->nr_targets; i++) {
1514 		cxled = p->targets[i];
1515 		cxlmd = cxled_to_memdev(cxled);
1516 		cxlds = cxlmd->cxlds;
1517 
1518 		if (cxlds->rcd)
1519 			continue;
1520 
1521 		iter = cxled_to_port(cxled);
1522 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1523 			iter = to_cxl_port(iter->dev.parent);
1524 
1525 		for (ep = cxl_ep_load(iter, cxlmd); iter;
1526 		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd))
1527 			cxl_port_reset_targets(iter, cxlr);
1528 	}
1529 }
1530 
1531 static int cxl_region_setup_targets(struct cxl_region *cxlr)
1532 {
1533 	struct cxl_region_params *p = &cxlr->params;
1534 	struct cxl_endpoint_decoder *cxled;
1535 	struct cxl_dev_state *cxlds;
1536 	int i, rc, rch = 0, vh = 0;
1537 	struct cxl_memdev *cxlmd;
1538 	struct cxl_port *iter;
1539 	struct cxl_ep *ep;
1540 
1541 	for (i = 0; i < p->nr_targets; i++) {
1542 		cxled = p->targets[i];
1543 		cxlmd = cxled_to_memdev(cxled);
1544 		cxlds = cxlmd->cxlds;
1545 
1546 		/* validate that all targets agree on topology */
1547 		if (!cxlds->rcd) {
1548 			vh++;
1549 		} else {
1550 			rch++;
1551 			continue;
1552 		}
1553 
1554 		iter = cxled_to_port(cxled);
1555 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1556 			iter = to_cxl_port(iter->dev.parent);
1557 
1558 		/*
1559 		 * Descend the topology tree programming / validating
1560 		 * targets while looking for conflicts.
1561 		 */
1562 		for (ep = cxl_ep_load(iter, cxlmd); iter;
1563 		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
1564 			rc = cxl_port_setup_targets(iter, cxlr, cxled);
1565 			if (rc) {
1566 				cxl_region_teardown_targets(cxlr);
1567 				return rc;
1568 			}
1569 		}
1570 	}
1571 
1572 	if (rch && vh) {
1573 		dev_err(&cxlr->dev, "mismatched CXL topologies detected\n");
1574 		cxl_region_teardown_targets(cxlr);
1575 		return -ENXIO;
1576 	}
1577 
1578 	return 0;
1579 }
1580 
1581 static int cxl_region_validate_position(struct cxl_region *cxlr,
1582 					struct cxl_endpoint_decoder *cxled,
1583 					int pos)
1584 {
1585 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1586 	struct cxl_region_params *p = &cxlr->params;
1587 	int i;
1588 
1589 	if (pos < 0 || pos >= p->interleave_ways) {
1590 		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1591 			p->interleave_ways);
1592 		return -ENXIO;
1593 	}
1594 
1595 	if (p->targets[pos] == cxled)
1596 		return 0;
1597 
1598 	if (p->targets[pos]) {
1599 		struct cxl_endpoint_decoder *cxled_target = p->targets[pos];
1600 		struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled_target);
1601 
1602 		dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n",
1603 			pos, dev_name(&cxlmd_target->dev),
1604 			dev_name(&cxled_target->cxld.dev));
1605 		return -EBUSY;
1606 	}
1607 
1608 	for (i = 0; i < p->interleave_ways; i++) {
1609 		struct cxl_endpoint_decoder *cxled_target;
1610 		struct cxl_memdev *cxlmd_target;
1611 
1612 		cxled_target = p->targets[i];
1613 		if (!cxled_target)
1614 			continue;
1615 
1616 		cxlmd_target = cxled_to_memdev(cxled_target);
1617 		if (cxlmd_target == cxlmd) {
1618 			dev_dbg(&cxlr->dev,
1619 				"%s already specified at position %d via: %s\n",
1620 				dev_name(&cxlmd->dev), pos,
1621 				dev_name(&cxled_target->cxld.dev));
1622 			return -EBUSY;
1623 		}
1624 	}
1625 
1626 	return 0;
1627 }
1628 
1629 static int cxl_region_attach_position(struct cxl_region *cxlr,
1630 				      struct cxl_root_decoder *cxlrd,
1631 				      struct cxl_endpoint_decoder *cxled,
1632 				      const struct cxl_dport *dport, int pos)
1633 {
1634 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1635 	struct cxl_port *iter;
1636 	int rc;
1637 
1638 	if (cxlrd->calc_hb(cxlrd, pos) != dport) {
1639 		dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
1640 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1641 			dev_name(&cxlrd->cxlsd.cxld.dev));
1642 		return -ENXIO;
1643 	}
1644 
1645 	for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1646 	     iter = to_cxl_port(iter->dev.parent)) {
1647 		rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
1648 		if (rc)
1649 			goto err;
1650 	}
1651 
1652 	return 0;
1653 
1654 err:
1655 	for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1656 	     iter = to_cxl_port(iter->dev.parent))
1657 		cxl_port_detach_region(iter, cxlr, cxled);
1658 	return rc;
1659 }
1660 
1661 static int cxl_region_attach_auto(struct cxl_region *cxlr,
1662 				  struct cxl_endpoint_decoder *cxled, int pos)
1663 {
1664 	struct cxl_region_params *p = &cxlr->params;
1665 
1666 	if (cxled->state != CXL_DECODER_STATE_AUTO) {
1667 		dev_err(&cxlr->dev,
1668 			"%s: unable to add decoder to autodetected region\n",
1669 			dev_name(&cxled->cxld.dev));
1670 		return -EINVAL;
1671 	}
1672 
1673 	if (pos >= 0) {
1674 		dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n",
1675 			dev_name(&cxled->cxld.dev), pos);
1676 		return -EINVAL;
1677 	}
1678 
1679 	if (p->nr_targets >= p->interleave_ways) {
1680 		dev_err(&cxlr->dev, "%s: no more target slots available\n",
1681 			dev_name(&cxled->cxld.dev));
1682 		return -ENXIO;
1683 	}
1684 
1685 	/*
1686 	 * Temporarily record the endpoint decoder into the target array. Yes,
1687 	 * this means that userspace can view devices in the wrong position
1688 	 * before the region activates, and must be careful to understand when
1689 	 * it might be racing region autodiscovery.
1690 	 */
1691 	pos = p->nr_targets;
1692 	p->targets[pos] = cxled;
1693 	cxled->pos = pos;
1694 	p->nr_targets++;
1695 
1696 	return 0;
1697 }
1698 
1699 static int cmp_interleave_pos(const void *a, const void *b)
1700 {
1701 	struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
1702 	struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
1703 
1704 	return cxled_a->pos - cxled_b->pos;
1705 }
1706 
1707 static struct cxl_port *next_port(struct cxl_port *port)
1708 {
1709 	if (!port->parent_dport)
1710 		return NULL;
1711 	return port->parent_dport->port;
1712 }
1713 
1714 static int match_switch_decoder_by_range(struct device *dev, void *data)
1715 {
1716 	struct cxl_switch_decoder *cxlsd;
1717 	struct range *r1, *r2 = data;
1718 
1719 	if (!is_switch_decoder(dev))
1720 		return 0;
1721 
1722 	cxlsd = to_cxl_switch_decoder(dev);
1723 	r1 = &cxlsd->cxld.hpa_range;
1724 
1725 	if (is_root_decoder(dev))
1726 		return range_contains(r1, r2);
1727 	return (r1->start == r2->start && r1->end == r2->end);
1728 }
1729 
1730 static int find_pos_and_ways(struct cxl_port *port, struct range *range,
1731 			     int *pos, int *ways)
1732 {
1733 	struct cxl_switch_decoder *cxlsd;
1734 	struct cxl_port *parent;
1735 	struct device *dev;
1736 	int rc = -ENXIO;
1737 
1738 	parent = next_port(port);
1739 	if (!parent)
1740 		return rc;
1741 
1742 	dev = device_find_child(&parent->dev, range,
1743 				match_switch_decoder_by_range);
1744 	if (!dev) {
1745 		dev_err(port->uport_dev,
1746 			"failed to find decoder mapping %#llx-%#llx\n",
1747 			range->start, range->end);
1748 		return rc;
1749 	}
1750 	cxlsd = to_cxl_switch_decoder(dev);
1751 	*ways = cxlsd->cxld.interleave_ways;
1752 
1753 	for (int i = 0; i < *ways; i++) {
1754 		if (cxlsd->target[i] == port->parent_dport) {
1755 			*pos = i;
1756 			rc = 0;
1757 			break;
1758 		}
1759 	}
1760 	put_device(dev);
1761 
1762 	return rc;
1763 }
1764 
1765 /**
1766  * cxl_calc_interleave_pos() - calculate an endpoint position in a region
1767  * @cxled: endpoint decoder member of given region
1768  *
1769  * The endpoint position is calculated by traversing the topology from
1770  * the endpoint to the root decoder and iteratively applying this
1771  * calculation:
1772  *
1773  *    position = position * parent_ways + parent_pos;
1774  *
1775  * ...where @position is inferred from switch and root decoder target lists.
1776  *
1777  * Return: position >= 0 on success
1778  *	   -ENXIO on failure
1779  */
1780 static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
1781 {
1782 	struct cxl_port *iter, *port = cxled_to_port(cxled);
1783 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1784 	struct range *range = &cxled->cxld.hpa_range;
1785 	int parent_ways = 0, parent_pos = 0, pos = 0;
1786 	int rc;
1787 
1788 	/*
1789 	 * Example: the expected interleave order of the 4-way region shown
1790 	 * below is: mem0, mem2, mem1, mem3
1791 	 *
1792 	 *		  root_port
1793 	 *                 /      \
1794 	 *      host_bridge_0    host_bridge_1
1795 	 *        |    |           |    |
1796 	 *       mem0 mem1        mem2 mem3
1797 	 *
1798 	 * In the example the calculator will iterate twice. The first iteration
1799 	 * uses the mem position in the host-bridge and the ways of the host-
1800 	 * bridge to generate the first, or local, position. The second
1801 	 * iteration uses the host-bridge position in the root_port and the ways
1802 	 * of the root_port to refine the position.
1803 	 *
1804 	 * A trace of the calculation per endpoint looks like this:
1805 	 * mem0: pos = 0 * 2 + 0    mem2: pos = 0 * 2 + 0
1806 	 *       pos = 0 * 2 + 0          pos = 0 * 2 + 1
1807 	 *       pos: 0                   pos: 1
1808 	 *
1809 	 * mem1: pos = 0 * 2 + 1    mem3: pos = 0 * 2 + 1
1810 	 *       pos = 1 * 2 + 0          pos = 1 * 2 + 1
1811 	 *       pos: 2                   pos = 3
1812 	 *
1813 	 * Note that while this example is simple, the method applies to more
1814 	 * complex topologies, including those with switches.
1815 	 */
1816 
1817 	/* Iterate from endpoint to root_port refining the position */
1818 	for (iter = port; iter; iter = next_port(iter)) {
1819 		if (is_cxl_root(iter))
1820 			break;
1821 
1822 		rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways);
1823 		if (rc)
1824 			return rc;
1825 
1826 		pos = pos * parent_ways + parent_pos;
1827 	}
1828 
1829 	dev_dbg(&cxlmd->dev,
1830 		"decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n",
1831 		dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent),
1832 		dev_name(&port->dev), range->start, range->end, pos);
1833 
1834 	return pos;
1835 }
1836 
1837 static int cxl_region_sort_targets(struct cxl_region *cxlr)
1838 {
1839 	struct cxl_region_params *p = &cxlr->params;
1840 	int i, rc = 0;
1841 
1842 	for (i = 0; i < p->nr_targets; i++) {
1843 		struct cxl_endpoint_decoder *cxled = p->targets[i];
1844 
1845 		cxled->pos = cxl_calc_interleave_pos(cxled);
1846 		/*
1847 		 * Record that sorting failed, but still continue to calc
1848 		 * cxled->pos so that follow-on code paths can reliably
1849 		 * do p->targets[cxled->pos] to self-reference their entry.
1850 		 */
1851 		if (cxled->pos < 0)
1852 			rc = -ENXIO;
1853 	}
1854 	/* Keep the cxlr target list in interleave position order */
1855 	sort(p->targets, p->nr_targets, sizeof(p->targets[0]),
1856 	     cmp_interleave_pos, NULL);
1857 
1858 	dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
1859 	return rc;
1860 }
1861 
1862 static int cxl_region_attach(struct cxl_region *cxlr,
1863 			     struct cxl_endpoint_decoder *cxled, int pos)
1864 {
1865 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1866 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1867 	struct cxl_region_params *p = &cxlr->params;
1868 	struct cxl_port *ep_port, *root_port;
1869 	struct cxl_dport *dport;
1870 	int rc = -ENXIO;
1871 
1872 	rc = check_interleave_cap(&cxled->cxld, p->interleave_ways,
1873 				  p->interleave_granularity);
1874 	if (rc) {
1875 		dev_dbg(&cxlr->dev, "%s iw: %d ig: %d is not supported\n",
1876 			dev_name(&cxled->cxld.dev), p->interleave_ways,
1877 			p->interleave_granularity);
1878 		return rc;
1879 	}
1880 
1881 	if (cxled->mode != cxlr->mode) {
1882 		dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
1883 			dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
1884 		return -EINVAL;
1885 	}
1886 
1887 	if (cxled->mode == CXL_DECODER_DEAD) {
1888 		dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
1889 		return -ENODEV;
1890 	}
1891 
1892 	/* all full of members, or interleave config not established? */
1893 	if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
1894 		dev_dbg(&cxlr->dev, "region already active\n");
1895 		return -EBUSY;
1896 	} else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
1897 		dev_dbg(&cxlr->dev, "interleave config missing\n");
1898 		return -ENXIO;
1899 	}
1900 
1901 	if (p->nr_targets >= p->interleave_ways) {
1902 		dev_dbg(&cxlr->dev, "region already has %d endpoints\n",
1903 			p->nr_targets);
1904 		return -EINVAL;
1905 	}
1906 
1907 	ep_port = cxled_to_port(cxled);
1908 	root_port = cxlrd_to_port(cxlrd);
1909 	dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
1910 	if (!dport) {
1911 		dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n",
1912 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1913 			dev_name(cxlr->dev.parent));
1914 		return -ENXIO;
1915 	}
1916 
1917 	if (cxled->cxld.target_type != cxlr->type) {
1918 		dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
1919 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1920 			cxled->cxld.target_type, cxlr->type);
1921 		return -ENXIO;
1922 	}
1923 
1924 	if (!cxled->dpa_res) {
1925 		dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n",
1926 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev));
1927 		return -ENXIO;
1928 	}
1929 
1930 	if (resource_size(cxled->dpa_res) * p->interleave_ways !=
1931 	    resource_size(p->res)) {
1932 		dev_dbg(&cxlr->dev,
1933 			"%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n",
1934 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1935 			(u64)resource_size(cxled->dpa_res), p->interleave_ways,
1936 			(u64)resource_size(p->res));
1937 		return -EINVAL;
1938 	}
1939 
1940 	cxl_region_perf_data_calculate(cxlr, cxled);
1941 
1942 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1943 		int i;
1944 
1945 		rc = cxl_region_attach_auto(cxlr, cxled, pos);
1946 		if (rc)
1947 			return rc;
1948 
1949 		/* await more targets to arrive... */
1950 		if (p->nr_targets < p->interleave_ways)
1951 			return 0;
1952 
1953 		/*
1954 		 * All targets are here, which implies all PCI enumeration that
1955 		 * affects this region has been completed. Walk the topology to
1956 		 * sort the devices into their relative region decode position.
1957 		 */
1958 		rc = cxl_region_sort_targets(cxlr);
1959 		if (rc)
1960 			return rc;
1961 
1962 		for (i = 0; i < p->nr_targets; i++) {
1963 			cxled = p->targets[i];
1964 			ep_port = cxled_to_port(cxled);
1965 			dport = cxl_find_dport_by_dev(root_port,
1966 						      ep_port->host_bridge);
1967 			rc = cxl_region_attach_position(cxlr, cxlrd, cxled,
1968 							dport, i);
1969 			if (rc)
1970 				return rc;
1971 		}
1972 
1973 		rc = cxl_region_setup_targets(cxlr);
1974 		if (rc)
1975 			return rc;
1976 
1977 		/*
1978 		 * If target setup succeeds in the autodiscovery case
1979 		 * then the region is already committed.
1980 		 */
1981 		p->state = CXL_CONFIG_COMMIT;
1982 
1983 		return 0;
1984 	}
1985 
1986 	rc = cxl_region_validate_position(cxlr, cxled, pos);
1987 	if (rc)
1988 		return rc;
1989 
1990 	rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos);
1991 	if (rc)
1992 		return rc;
1993 
1994 	p->targets[pos] = cxled;
1995 	cxled->pos = pos;
1996 	p->nr_targets++;
1997 
1998 	if (p->nr_targets == p->interleave_ways) {
1999 		rc = cxl_region_setup_targets(cxlr);
2000 		if (rc)
2001 			return rc;
2002 		p->state = CXL_CONFIG_ACTIVE;
2003 	}
2004 
2005 	cxled->cxld.interleave_ways = p->interleave_ways;
2006 	cxled->cxld.interleave_granularity = p->interleave_granularity;
2007 	cxled->cxld.hpa_range = (struct range) {
2008 		.start = p->res->start,
2009 		.end = p->res->end,
2010 	};
2011 
2012 	if (p->nr_targets != p->interleave_ways)
2013 		return 0;
2014 
2015 	/*
2016 	 * Test the auto-discovery position calculator function
2017 	 * against this successfully created user-defined region.
2018 	 * A fail message here means that this interleave config
2019 	 * will fail when presented as CXL_REGION_F_AUTO.
2020 	 */
2021 	for (int i = 0; i < p->nr_targets; i++) {
2022 		struct cxl_endpoint_decoder *cxled = p->targets[i];
2023 		int test_pos;
2024 
2025 		test_pos = cxl_calc_interleave_pos(cxled);
2026 		dev_dbg(&cxled->cxld.dev,
2027 			"Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n",
2028 			(test_pos == cxled->pos) ? "success" : "fail",
2029 			test_pos, cxled->pos);
2030 	}
2031 
2032 	return 0;
2033 }
2034 
2035 static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
2036 {
2037 	struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
2038 	struct cxl_region *cxlr = cxled->cxld.region;
2039 	struct cxl_region_params *p;
2040 	int rc = 0;
2041 
2042 	lockdep_assert_held_write(&cxl_region_rwsem);
2043 
2044 	if (!cxlr)
2045 		return 0;
2046 
2047 	p = &cxlr->params;
2048 	get_device(&cxlr->dev);
2049 
2050 	if (p->state > CXL_CONFIG_ACTIVE) {
2051 		/*
2052 		 * TODO: tear down all impacted regions if a device is
2053 		 * removed out of order
2054 		 */
2055 		rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
2056 		if (rc)
2057 			goto out;
2058 		p->state = CXL_CONFIG_ACTIVE;
2059 	}
2060 
2061 	for (iter = ep_port; !is_cxl_root(iter);
2062 	     iter = to_cxl_port(iter->dev.parent))
2063 		cxl_port_detach_region(iter, cxlr, cxled);
2064 
2065 	if (cxled->pos < 0 || cxled->pos >= p->interleave_ways ||
2066 	    p->targets[cxled->pos] != cxled) {
2067 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2068 
2069 		dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
2070 			      dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2071 			      cxled->pos);
2072 		goto out;
2073 	}
2074 
2075 	if (p->state == CXL_CONFIG_ACTIVE) {
2076 		p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
2077 		cxl_region_teardown_targets(cxlr);
2078 	}
2079 	p->targets[cxled->pos] = NULL;
2080 	p->nr_targets--;
2081 	cxled->cxld.hpa_range = (struct range) {
2082 		.start = 0,
2083 		.end = -1,
2084 	};
2085 
2086 	/* notify the region driver that one of its targets has departed */
2087 	up_write(&cxl_region_rwsem);
2088 	device_release_driver(&cxlr->dev);
2089 	down_write(&cxl_region_rwsem);
2090 out:
2091 	put_device(&cxlr->dev);
2092 	return rc;
2093 }
2094 
2095 void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
2096 {
2097 	down_write(&cxl_region_rwsem);
2098 	cxled->mode = CXL_DECODER_DEAD;
2099 	cxl_region_detach(cxled);
2100 	up_write(&cxl_region_rwsem);
2101 }
2102 
2103 static int attach_target(struct cxl_region *cxlr,
2104 			 struct cxl_endpoint_decoder *cxled, int pos,
2105 			 unsigned int state)
2106 {
2107 	int rc = 0;
2108 
2109 	if (state == TASK_INTERRUPTIBLE)
2110 		rc = down_write_killable(&cxl_region_rwsem);
2111 	else
2112 		down_write(&cxl_region_rwsem);
2113 	if (rc)
2114 		return rc;
2115 
2116 	down_read(&cxl_dpa_rwsem);
2117 	rc = cxl_region_attach(cxlr, cxled, pos);
2118 	up_read(&cxl_dpa_rwsem);
2119 	up_write(&cxl_region_rwsem);
2120 	return rc;
2121 }
2122 
2123 static int detach_target(struct cxl_region *cxlr, int pos)
2124 {
2125 	struct cxl_region_params *p = &cxlr->params;
2126 	int rc;
2127 
2128 	rc = down_write_killable(&cxl_region_rwsem);
2129 	if (rc)
2130 		return rc;
2131 
2132 	if (pos >= p->interleave_ways) {
2133 		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
2134 			p->interleave_ways);
2135 		rc = -ENXIO;
2136 		goto out;
2137 	}
2138 
2139 	if (!p->targets[pos]) {
2140 		rc = 0;
2141 		goto out;
2142 	}
2143 
2144 	rc = cxl_region_detach(p->targets[pos]);
2145 out:
2146 	up_write(&cxl_region_rwsem);
2147 	return rc;
2148 }
2149 
2150 static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
2151 			    size_t len)
2152 {
2153 	int rc;
2154 
2155 	if (sysfs_streq(buf, "\n"))
2156 		rc = detach_target(cxlr, pos);
2157 	else {
2158 		struct device *dev;
2159 
2160 		dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
2161 		if (!dev)
2162 			return -ENODEV;
2163 
2164 		if (!is_endpoint_decoder(dev)) {
2165 			rc = -EINVAL;
2166 			goto out;
2167 		}
2168 
2169 		rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
2170 				   TASK_INTERRUPTIBLE);
2171 out:
2172 		put_device(dev);
2173 	}
2174 
2175 	if (rc < 0)
2176 		return rc;
2177 	return len;
2178 }
2179 
2180 #define TARGET_ATTR_RW(n)                                              \
2181 static ssize_t target##n##_show(                                       \
2182 	struct device *dev, struct device_attribute *attr, char *buf)  \
2183 {                                                                      \
2184 	return show_targetN(to_cxl_region(dev), buf, (n));             \
2185 }                                                                      \
2186 static ssize_t target##n##_store(struct device *dev,                   \
2187 				 struct device_attribute *attr,        \
2188 				 const char *buf, size_t len)          \
2189 {                                                                      \
2190 	return store_targetN(to_cxl_region(dev), buf, (n), len);       \
2191 }                                                                      \
2192 static DEVICE_ATTR_RW(target##n)
2193 
2194 TARGET_ATTR_RW(0);
2195 TARGET_ATTR_RW(1);
2196 TARGET_ATTR_RW(2);
2197 TARGET_ATTR_RW(3);
2198 TARGET_ATTR_RW(4);
2199 TARGET_ATTR_RW(5);
2200 TARGET_ATTR_RW(6);
2201 TARGET_ATTR_RW(7);
2202 TARGET_ATTR_RW(8);
2203 TARGET_ATTR_RW(9);
2204 TARGET_ATTR_RW(10);
2205 TARGET_ATTR_RW(11);
2206 TARGET_ATTR_RW(12);
2207 TARGET_ATTR_RW(13);
2208 TARGET_ATTR_RW(14);
2209 TARGET_ATTR_RW(15);
2210 
2211 static struct attribute *target_attrs[] = {
2212 	&dev_attr_target0.attr,
2213 	&dev_attr_target1.attr,
2214 	&dev_attr_target2.attr,
2215 	&dev_attr_target3.attr,
2216 	&dev_attr_target4.attr,
2217 	&dev_attr_target5.attr,
2218 	&dev_attr_target6.attr,
2219 	&dev_attr_target7.attr,
2220 	&dev_attr_target8.attr,
2221 	&dev_attr_target9.attr,
2222 	&dev_attr_target10.attr,
2223 	&dev_attr_target11.attr,
2224 	&dev_attr_target12.attr,
2225 	&dev_attr_target13.attr,
2226 	&dev_attr_target14.attr,
2227 	&dev_attr_target15.attr,
2228 	NULL,
2229 };
2230 
2231 static umode_t cxl_region_target_visible(struct kobject *kobj,
2232 					 struct attribute *a, int n)
2233 {
2234 	struct device *dev = kobj_to_dev(kobj);
2235 	struct cxl_region *cxlr = to_cxl_region(dev);
2236 	struct cxl_region_params *p = &cxlr->params;
2237 
2238 	if (n < p->interleave_ways)
2239 		return a->mode;
2240 	return 0;
2241 }
2242 
2243 static const struct attribute_group cxl_region_target_group = {
2244 	.attrs = target_attrs,
2245 	.is_visible = cxl_region_target_visible,
2246 };
2247 
2248 static const struct attribute_group *get_cxl_region_target_group(void)
2249 {
2250 	return &cxl_region_target_group;
2251 }
2252 
2253 static const struct attribute_group *region_groups[] = {
2254 	&cxl_base_attribute_group,
2255 	&cxl_region_group,
2256 	&cxl_region_target_group,
2257 	&cxl_region_access0_coordinate_group,
2258 	&cxl_region_access1_coordinate_group,
2259 	NULL,
2260 };
2261 
2262 static void cxl_region_release(struct device *dev)
2263 {
2264 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
2265 	struct cxl_region *cxlr = to_cxl_region(dev);
2266 	int id = atomic_read(&cxlrd->region_id);
2267 
2268 	/*
2269 	 * Try to reuse the recently idled id rather than the cached
2270 	 * next id to prevent the region id space from increasing
2271 	 * unnecessarily.
2272 	 */
2273 	if (cxlr->id < id)
2274 		if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
2275 			memregion_free(id);
2276 			goto out;
2277 		}
2278 
2279 	memregion_free(cxlr->id);
2280 out:
2281 	put_device(dev->parent);
2282 	kfree(cxlr);
2283 }
2284 
2285 const struct device_type cxl_region_type = {
2286 	.name = "cxl_region",
2287 	.release = cxl_region_release,
2288 	.groups = region_groups
2289 };
2290 
2291 bool is_cxl_region(struct device *dev)
2292 {
2293 	return dev->type == &cxl_region_type;
2294 }
2295 EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL);
2296 
2297 static struct cxl_region *to_cxl_region(struct device *dev)
2298 {
2299 	if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
2300 			  "not a cxl_region device\n"))
2301 		return NULL;
2302 
2303 	return container_of(dev, struct cxl_region, dev);
2304 }
2305 
2306 static void unregister_region(void *_cxlr)
2307 {
2308 	struct cxl_region *cxlr = _cxlr;
2309 	struct cxl_region_params *p = &cxlr->params;
2310 	int i;
2311 
2312 	unregister_memory_notifier(&cxlr->memory_notifier);
2313 	device_del(&cxlr->dev);
2314 
2315 	/*
2316 	 * Now that region sysfs is shutdown, the parameter block is now
2317 	 * read-only, so no need to hold the region rwsem to access the
2318 	 * region parameters.
2319 	 */
2320 	for (i = 0; i < p->interleave_ways; i++)
2321 		detach_target(cxlr, i);
2322 
2323 	cxl_region_iomem_release(cxlr);
2324 	put_device(&cxlr->dev);
2325 }
2326 
2327 static struct lock_class_key cxl_region_key;
2328 
2329 static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
2330 {
2331 	struct cxl_region *cxlr;
2332 	struct device *dev;
2333 
2334 	cxlr = kzalloc(sizeof(*cxlr), GFP_KERNEL);
2335 	if (!cxlr) {
2336 		memregion_free(id);
2337 		return ERR_PTR(-ENOMEM);
2338 	}
2339 
2340 	dev = &cxlr->dev;
2341 	device_initialize(dev);
2342 	lockdep_set_class(&dev->mutex, &cxl_region_key);
2343 	dev->parent = &cxlrd->cxlsd.cxld.dev;
2344 	/*
2345 	 * Keep root decoder pinned through cxl_region_release to fixup
2346 	 * region id allocations
2347 	 */
2348 	get_device(dev->parent);
2349 	device_set_pm_not_required(dev);
2350 	dev->bus = &cxl_bus_type;
2351 	dev->type = &cxl_region_type;
2352 	cxlr->id = id;
2353 
2354 	return cxlr;
2355 }
2356 
2357 static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
2358 {
2359 	int cset = 0;
2360 	int rc;
2361 
2362 	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
2363 		if (cxlr->coord[i].read_bandwidth) {
2364 			rc = 0;
2365 			if (cxl_need_node_perf_attrs_update(nid))
2366 				node_set_perf_attrs(nid, &cxlr->coord[i], i);
2367 			else
2368 				rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
2369 
2370 			if (rc == 0)
2371 				cset++;
2372 		}
2373 	}
2374 
2375 	if (!cset)
2376 		return false;
2377 
2378 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access0_group());
2379 	if (rc)
2380 		dev_dbg(&cxlr->dev, "Failed to update access0 group\n");
2381 
2382 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access1_group());
2383 	if (rc)
2384 		dev_dbg(&cxlr->dev, "Failed to update access1 group\n");
2385 
2386 	return true;
2387 }
2388 
2389 static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
2390 					  unsigned long action, void *arg)
2391 {
2392 	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
2393 					       memory_notifier);
2394 	struct cxl_region_params *p = &cxlr->params;
2395 	struct cxl_endpoint_decoder *cxled = p->targets[0];
2396 	struct cxl_decoder *cxld = &cxled->cxld;
2397 	struct memory_notify *mnb = arg;
2398 	int nid = mnb->status_change_nid;
2399 	int region_nid;
2400 
2401 	if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
2402 		return NOTIFY_DONE;
2403 
2404 	region_nid = phys_to_target_node(cxld->hpa_range.start);
2405 	if (nid != region_nid)
2406 		return NOTIFY_DONE;
2407 
2408 	if (!cxl_region_update_coordinates(cxlr, nid))
2409 		return NOTIFY_DONE;
2410 
2411 	return NOTIFY_OK;
2412 }
2413 
2414 /**
2415  * devm_cxl_add_region - Adds a region to a decoder
2416  * @cxlrd: root decoder
2417  * @id: memregion id to create, or memregion_free() on failure
2418  * @mode: mode for the endpoint decoders of this region
2419  * @type: select whether this is an expander or accelerator (type-2 or type-3)
2420  *
2421  * This is the second step of region initialization. Regions exist within an
2422  * address space which is mapped by a @cxlrd.
2423  *
2424  * Return: 0 if the region was added to the @cxlrd, else returns negative error
2425  * code. The region will be named "regionZ" where Z is the unique region number.
2426  */
2427 static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
2428 					      int id,
2429 					      enum cxl_decoder_mode mode,
2430 					      enum cxl_decoder_type type)
2431 {
2432 	struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
2433 	struct cxl_region *cxlr;
2434 	struct device *dev;
2435 	int rc;
2436 
2437 	cxlr = cxl_region_alloc(cxlrd, id);
2438 	if (IS_ERR(cxlr))
2439 		return cxlr;
2440 	cxlr->mode = mode;
2441 	cxlr->type = type;
2442 
2443 	dev = &cxlr->dev;
2444 	rc = dev_set_name(dev, "region%d", id);
2445 	if (rc)
2446 		goto err;
2447 
2448 	rc = device_add(dev);
2449 	if (rc)
2450 		goto err;
2451 
2452 	cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
2453 	cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
2454 	register_memory_notifier(&cxlr->memory_notifier);
2455 
2456 	rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
2457 	if (rc)
2458 		return ERR_PTR(rc);
2459 
2460 	dev_dbg(port->uport_dev, "%s: created %s\n",
2461 		dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
2462 	return cxlr;
2463 
2464 err:
2465 	put_device(dev);
2466 	return ERR_PTR(rc);
2467 }
2468 
2469 static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
2470 {
2471 	return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
2472 }
2473 
2474 static ssize_t create_pmem_region_show(struct device *dev,
2475 				       struct device_attribute *attr, char *buf)
2476 {
2477 	return __create_region_show(to_cxl_root_decoder(dev), buf);
2478 }
2479 
2480 static ssize_t create_ram_region_show(struct device *dev,
2481 				      struct device_attribute *attr, char *buf)
2482 {
2483 	return __create_region_show(to_cxl_root_decoder(dev), buf);
2484 }
2485 
2486 static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
2487 					  enum cxl_decoder_mode mode, int id)
2488 {
2489 	int rc;
2490 
2491 	switch (mode) {
2492 	case CXL_DECODER_RAM:
2493 	case CXL_DECODER_PMEM:
2494 		break;
2495 	default:
2496 		dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
2497 		return ERR_PTR(-EINVAL);
2498 	}
2499 
2500 	rc = memregion_alloc(GFP_KERNEL);
2501 	if (rc < 0)
2502 		return ERR_PTR(rc);
2503 
2504 	if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
2505 		memregion_free(rc);
2506 		return ERR_PTR(-EBUSY);
2507 	}
2508 
2509 	return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
2510 }
2511 
2512 static ssize_t create_pmem_region_store(struct device *dev,
2513 					struct device_attribute *attr,
2514 					const char *buf, size_t len)
2515 {
2516 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2517 	struct cxl_region *cxlr;
2518 	int rc, id;
2519 
2520 	rc = sscanf(buf, "region%d\n", &id);
2521 	if (rc != 1)
2522 		return -EINVAL;
2523 
2524 	cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id);
2525 	if (IS_ERR(cxlr))
2526 		return PTR_ERR(cxlr);
2527 
2528 	return len;
2529 }
2530 DEVICE_ATTR_RW(create_pmem_region);
2531 
2532 static ssize_t create_ram_region_store(struct device *dev,
2533 				       struct device_attribute *attr,
2534 				       const char *buf, size_t len)
2535 {
2536 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2537 	struct cxl_region *cxlr;
2538 	int rc, id;
2539 
2540 	rc = sscanf(buf, "region%d\n", &id);
2541 	if (rc != 1)
2542 		return -EINVAL;
2543 
2544 	cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id);
2545 	if (IS_ERR(cxlr))
2546 		return PTR_ERR(cxlr);
2547 
2548 	return len;
2549 }
2550 DEVICE_ATTR_RW(create_ram_region);
2551 
2552 static ssize_t region_show(struct device *dev, struct device_attribute *attr,
2553 			   char *buf)
2554 {
2555 	struct cxl_decoder *cxld = to_cxl_decoder(dev);
2556 	ssize_t rc;
2557 
2558 	rc = down_read_interruptible(&cxl_region_rwsem);
2559 	if (rc)
2560 		return rc;
2561 
2562 	if (cxld->region)
2563 		rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
2564 	else
2565 		rc = sysfs_emit(buf, "\n");
2566 	up_read(&cxl_region_rwsem);
2567 
2568 	return rc;
2569 }
2570 DEVICE_ATTR_RO(region);
2571 
2572 static struct cxl_region *
2573 cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
2574 {
2575 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
2576 	struct device *region_dev;
2577 
2578 	region_dev = device_find_child_by_name(&cxld->dev, name);
2579 	if (!region_dev)
2580 		return ERR_PTR(-ENODEV);
2581 
2582 	return to_cxl_region(region_dev);
2583 }
2584 
2585 static ssize_t delete_region_store(struct device *dev,
2586 				   struct device_attribute *attr,
2587 				   const char *buf, size_t len)
2588 {
2589 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2590 	struct cxl_port *port = to_cxl_port(dev->parent);
2591 	struct cxl_region *cxlr;
2592 
2593 	cxlr = cxl_find_region_by_name(cxlrd, buf);
2594 	if (IS_ERR(cxlr))
2595 		return PTR_ERR(cxlr);
2596 
2597 	devm_release_action(port->uport_dev, unregister_region, cxlr);
2598 	put_device(&cxlr->dev);
2599 
2600 	return len;
2601 }
2602 DEVICE_ATTR_WO(delete_region);
2603 
2604 static void cxl_pmem_region_release(struct device *dev)
2605 {
2606 	struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
2607 	int i;
2608 
2609 	for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
2610 		struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd;
2611 
2612 		put_device(&cxlmd->dev);
2613 	}
2614 
2615 	kfree(cxlr_pmem);
2616 }
2617 
2618 static const struct attribute_group *cxl_pmem_region_attribute_groups[] = {
2619 	&cxl_base_attribute_group,
2620 	NULL,
2621 };
2622 
2623 const struct device_type cxl_pmem_region_type = {
2624 	.name = "cxl_pmem_region",
2625 	.release = cxl_pmem_region_release,
2626 	.groups = cxl_pmem_region_attribute_groups,
2627 };
2628 
2629 bool is_cxl_pmem_region(struct device *dev)
2630 {
2631 	return dev->type == &cxl_pmem_region_type;
2632 }
2633 EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, CXL);
2634 
2635 struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
2636 {
2637 	if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev),
2638 			  "not a cxl_pmem_region device\n"))
2639 		return NULL;
2640 	return container_of(dev, struct cxl_pmem_region, dev);
2641 }
2642 EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL);
2643 
2644 struct cxl_poison_context {
2645 	struct cxl_port *port;
2646 	enum cxl_decoder_mode mode;
2647 	u64 offset;
2648 };
2649 
2650 static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
2651 				   struct cxl_poison_context *ctx)
2652 {
2653 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
2654 	u64 offset, length;
2655 	int rc = 0;
2656 
2657 	/*
2658 	 * Collect poison for the remaining unmapped resources
2659 	 * after poison is collected by committed endpoints.
2660 	 *
2661 	 * Knowing that PMEM must always follow RAM, get poison
2662 	 * for unmapped resources based on the last decoder's mode:
2663 	 *	ram: scan remains of ram range, then any pmem range
2664 	 *	pmem: scan remains of pmem range
2665 	 */
2666 
2667 	if (ctx->mode == CXL_DECODER_RAM) {
2668 		offset = ctx->offset;
2669 		length = resource_size(&cxlds->ram_res) - offset;
2670 		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2671 		if (rc == -EFAULT)
2672 			rc = 0;
2673 		if (rc)
2674 			return rc;
2675 	}
2676 	if (ctx->mode == CXL_DECODER_PMEM) {
2677 		offset = ctx->offset;
2678 		length = resource_size(&cxlds->dpa_res) - offset;
2679 		if (!length)
2680 			return 0;
2681 	} else if (resource_size(&cxlds->pmem_res)) {
2682 		offset = cxlds->pmem_res.start;
2683 		length = resource_size(&cxlds->pmem_res);
2684 	} else {
2685 		return 0;
2686 	}
2687 
2688 	return cxl_mem_get_poison(cxlmd, offset, length, NULL);
2689 }
2690 
2691 static int poison_by_decoder(struct device *dev, void *arg)
2692 {
2693 	struct cxl_poison_context *ctx = arg;
2694 	struct cxl_endpoint_decoder *cxled;
2695 	struct cxl_memdev *cxlmd;
2696 	u64 offset, length;
2697 	int rc = 0;
2698 
2699 	if (!is_endpoint_decoder(dev))
2700 		return rc;
2701 
2702 	cxled = to_cxl_endpoint_decoder(dev);
2703 	if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
2704 		return rc;
2705 
2706 	/*
2707 	 * Regions are only created with single mode decoders: pmem or ram.
2708 	 * Linux does not support mixed mode decoders. This means that
2709 	 * reading poison per endpoint decoder adheres to the requirement
2710 	 * that poison reads of pmem and ram must be separated.
2711 	 * CXL 3.0 Spec 8.2.9.8.4.1
2712 	 */
2713 	if (cxled->mode == CXL_DECODER_MIXED) {
2714 		dev_dbg(dev, "poison list read unsupported in mixed mode\n");
2715 		return rc;
2716 	}
2717 
2718 	cxlmd = cxled_to_memdev(cxled);
2719 	if (cxled->skip) {
2720 		offset = cxled->dpa_res->start - cxled->skip;
2721 		length = cxled->skip;
2722 		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2723 		if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
2724 			rc = 0;
2725 		if (rc)
2726 			return rc;
2727 	}
2728 
2729 	offset = cxled->dpa_res->start;
2730 	length = cxled->dpa_res->end - offset + 1;
2731 	rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
2732 	if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
2733 		rc = 0;
2734 	if (rc)
2735 		return rc;
2736 
2737 	/* Iterate until commit_end is reached */
2738 	if (cxled->cxld.id == ctx->port->commit_end) {
2739 		ctx->offset = cxled->dpa_res->end + 1;
2740 		ctx->mode = cxled->mode;
2741 		return 1;
2742 	}
2743 
2744 	return 0;
2745 }
2746 
2747 int cxl_get_poison_by_endpoint(struct cxl_port *port)
2748 {
2749 	struct cxl_poison_context ctx;
2750 	int rc = 0;
2751 
2752 	ctx = (struct cxl_poison_context) {
2753 		.port = port
2754 	};
2755 
2756 	rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
2757 	if (rc == 1)
2758 		rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
2759 					     &ctx);
2760 
2761 	return rc;
2762 }
2763 
2764 struct cxl_dpa_to_region_context {
2765 	struct cxl_region *cxlr;
2766 	u64 dpa;
2767 };
2768 
2769 static int __cxl_dpa_to_region(struct device *dev, void *arg)
2770 {
2771 	struct cxl_dpa_to_region_context *ctx = arg;
2772 	struct cxl_endpoint_decoder *cxled;
2773 	struct cxl_region *cxlr;
2774 	u64 dpa = ctx->dpa;
2775 
2776 	if (!is_endpoint_decoder(dev))
2777 		return 0;
2778 
2779 	cxled = to_cxl_endpoint_decoder(dev);
2780 	if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res))
2781 		return 0;
2782 
2783 	if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
2784 		return 0;
2785 
2786 	/*
2787 	 * Stop the region search (return 1) when an endpoint mapping is
2788 	 * found. The region may not be fully constructed so offering
2789 	 * the cxlr in the context structure is not guaranteed.
2790 	 */
2791 	cxlr = cxled->cxld.region;
2792 	if (cxlr)
2793 		dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
2794 			dev_name(&cxlr->dev));
2795 	else
2796 		dev_dbg(dev, "dpa:0x%llx mapped in endpoint:%s\n", dpa,
2797 			dev_name(dev));
2798 
2799 	ctx->cxlr = cxlr;
2800 
2801 	return 1;
2802 }
2803 
2804 struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa)
2805 {
2806 	struct cxl_dpa_to_region_context ctx;
2807 	struct cxl_port *port;
2808 
2809 	ctx = (struct cxl_dpa_to_region_context) {
2810 		.dpa = dpa,
2811 	};
2812 	port = cxlmd->endpoint;
2813 	if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port))
2814 		device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
2815 
2816 	return ctx.cxlr;
2817 }
2818 
2819 static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos)
2820 {
2821 	struct cxl_region_params *p = &cxlr->params;
2822 	int gran = p->interleave_granularity;
2823 	int ways = p->interleave_ways;
2824 	u64 offset;
2825 
2826 	/* Is the hpa within this region at all */
2827 	if (hpa < p->res->start || hpa > p->res->end) {
2828 		dev_dbg(&cxlr->dev,
2829 			"Addr trans fail: hpa 0x%llx not in region\n", hpa);
2830 		return false;
2831 	}
2832 
2833 	/* Is the hpa in an expected chunk for its pos(-ition) */
2834 	offset = hpa - p->res->start;
2835 	offset = do_div(offset, gran * ways);
2836 	if ((offset >= pos * gran) && (offset < (pos + 1) * gran))
2837 		return true;
2838 
2839 	dev_dbg(&cxlr->dev,
2840 		"Addr trans fail: hpa 0x%llx not in expected chunk\n", hpa);
2841 
2842 	return false;
2843 }
2844 
2845 static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr,
2846 			  struct cxl_endpoint_decoder *cxled)
2847 {
2848 	u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
2849 	struct cxl_region_params *p = &cxlr->params;
2850 	int pos = cxled->pos;
2851 	u16 eig = 0;
2852 	u8 eiw = 0;
2853 
2854 	ways_to_eiw(p->interleave_ways, &eiw);
2855 	granularity_to_eig(p->interleave_granularity, &eig);
2856 
2857 	/*
2858 	 * The device position in the region interleave set was removed
2859 	 * from the offset at HPA->DPA translation. To reconstruct the
2860 	 * HPA, place the 'pos' in the offset.
2861 	 *
2862 	 * The placement of 'pos' in the HPA is determined by interleave
2863 	 * ways and granularity and is defined in the CXL Spec 3.0 Section
2864 	 * 8.2.4.19.13 Implementation Note: Device Decode Logic
2865 	 */
2866 
2867 	/* Remove the dpa base */
2868 	dpa_offset = dpa - cxl_dpa_resource_start(cxled);
2869 
2870 	mask_upper = GENMASK_ULL(51, eig + 8);
2871 
2872 	if (eiw < 8) {
2873 		hpa_offset = (dpa_offset & mask_upper) << eiw;
2874 		hpa_offset |= pos << (eig + 8);
2875 	} else {
2876 		bits_upper = (dpa_offset & mask_upper) >> (eig + 8);
2877 		bits_upper = bits_upper * 3;
2878 		hpa_offset = ((bits_upper << (eiw - 8)) + pos) << (eig + 8);
2879 	}
2880 
2881 	/* The lower bits remain unchanged */
2882 	hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
2883 
2884 	/* Apply the hpa_offset to the region base address */
2885 	hpa = hpa_offset + p->res->start;
2886 
2887 	if (!cxl_is_hpa_in_range(hpa, cxlr, cxled->pos))
2888 		return ULLONG_MAX;
2889 
2890 	return hpa;
2891 }
2892 
2893 u64 cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
2894 		  u64 dpa)
2895 {
2896 	struct cxl_region_params *p = &cxlr->params;
2897 	struct cxl_endpoint_decoder *cxled = NULL;
2898 
2899 	for (int i = 0; i <  p->nr_targets; i++) {
2900 		cxled = p->targets[i];
2901 		if (cxlmd == cxled_to_memdev(cxled))
2902 			break;
2903 	}
2904 	if (!cxled || cxlmd != cxled_to_memdev(cxled))
2905 		return ULLONG_MAX;
2906 
2907 	return cxl_dpa_to_hpa(dpa, cxlr, cxled);
2908 }
2909 
2910 static struct lock_class_key cxl_pmem_region_key;
2911 
2912 static int cxl_pmem_region_alloc(struct cxl_region *cxlr)
2913 {
2914 	struct cxl_region_params *p = &cxlr->params;
2915 	struct cxl_nvdimm_bridge *cxl_nvb;
2916 	struct device *dev;
2917 	int i;
2918 
2919 	guard(rwsem_read)(&cxl_region_rwsem);
2920 	if (p->state != CXL_CONFIG_COMMIT)
2921 		return -ENXIO;
2922 
2923 	struct cxl_pmem_region *cxlr_pmem __free(kfree) =
2924 		kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets), GFP_KERNEL);
2925 	if (!cxlr_pmem)
2926 		return -ENOMEM;
2927 
2928 	cxlr_pmem->hpa_range.start = p->res->start;
2929 	cxlr_pmem->hpa_range.end = p->res->end;
2930 
2931 	/* Snapshot the region configuration underneath the cxl_region_rwsem */
2932 	cxlr_pmem->nr_mappings = p->nr_targets;
2933 	for (i = 0; i < p->nr_targets; i++) {
2934 		struct cxl_endpoint_decoder *cxled = p->targets[i];
2935 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2936 		struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
2937 
2938 		/*
2939 		 * Regions never span CXL root devices, so by definition the
2940 		 * bridge for one device is the same for all.
2941 		 */
2942 		if (i == 0) {
2943 			cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint);
2944 			if (!cxl_nvb)
2945 				return -ENODEV;
2946 			cxlr->cxl_nvb = cxl_nvb;
2947 		}
2948 		m->cxlmd = cxlmd;
2949 		get_device(&cxlmd->dev);
2950 		m->start = cxled->dpa_res->start;
2951 		m->size = resource_size(cxled->dpa_res);
2952 		m->position = i;
2953 	}
2954 
2955 	dev = &cxlr_pmem->dev;
2956 	device_initialize(dev);
2957 	lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
2958 	device_set_pm_not_required(dev);
2959 	dev->parent = &cxlr->dev;
2960 	dev->bus = &cxl_bus_type;
2961 	dev->type = &cxl_pmem_region_type;
2962 	cxlr_pmem->cxlr = cxlr;
2963 	cxlr->cxlr_pmem = no_free_ptr(cxlr_pmem);
2964 
2965 	return 0;
2966 }
2967 
2968 static void cxl_dax_region_release(struct device *dev)
2969 {
2970 	struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
2971 
2972 	kfree(cxlr_dax);
2973 }
2974 
2975 static const struct attribute_group *cxl_dax_region_attribute_groups[] = {
2976 	&cxl_base_attribute_group,
2977 	NULL,
2978 };
2979 
2980 const struct device_type cxl_dax_region_type = {
2981 	.name = "cxl_dax_region",
2982 	.release = cxl_dax_region_release,
2983 	.groups = cxl_dax_region_attribute_groups,
2984 };
2985 
2986 static bool is_cxl_dax_region(struct device *dev)
2987 {
2988 	return dev->type == &cxl_dax_region_type;
2989 }
2990 
2991 struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
2992 {
2993 	if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev),
2994 			  "not a cxl_dax_region device\n"))
2995 		return NULL;
2996 	return container_of(dev, struct cxl_dax_region, dev);
2997 }
2998 EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL);
2999 
3000 static struct lock_class_key cxl_dax_region_key;
3001 
3002 static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
3003 {
3004 	struct cxl_region_params *p = &cxlr->params;
3005 	struct cxl_dax_region *cxlr_dax;
3006 	struct device *dev;
3007 
3008 	down_read(&cxl_region_rwsem);
3009 	if (p->state != CXL_CONFIG_COMMIT) {
3010 		cxlr_dax = ERR_PTR(-ENXIO);
3011 		goto out;
3012 	}
3013 
3014 	cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
3015 	if (!cxlr_dax) {
3016 		cxlr_dax = ERR_PTR(-ENOMEM);
3017 		goto out;
3018 	}
3019 
3020 	cxlr_dax->hpa_range.start = p->res->start;
3021 	cxlr_dax->hpa_range.end = p->res->end;
3022 
3023 	dev = &cxlr_dax->dev;
3024 	cxlr_dax->cxlr = cxlr;
3025 	device_initialize(dev);
3026 	lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
3027 	device_set_pm_not_required(dev);
3028 	dev->parent = &cxlr->dev;
3029 	dev->bus = &cxl_bus_type;
3030 	dev->type = &cxl_dax_region_type;
3031 out:
3032 	up_read(&cxl_region_rwsem);
3033 
3034 	return cxlr_dax;
3035 }
3036 
3037 static void cxlr_pmem_unregister(void *_cxlr_pmem)
3038 {
3039 	struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
3040 	struct cxl_region *cxlr = cxlr_pmem->cxlr;
3041 	struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
3042 
3043 	/*
3044 	 * Either the bridge is in ->remove() context under the device_lock(),
3045 	 * or cxlr_release_nvdimm() is cancelling the bridge's release action
3046 	 * for @cxlr_pmem and doing it itself (while manually holding the bridge
3047 	 * lock).
3048 	 */
3049 	device_lock_assert(&cxl_nvb->dev);
3050 	cxlr->cxlr_pmem = NULL;
3051 	cxlr_pmem->cxlr = NULL;
3052 	device_unregister(&cxlr_pmem->dev);
3053 }
3054 
3055 static void cxlr_release_nvdimm(void *_cxlr)
3056 {
3057 	struct cxl_region *cxlr = _cxlr;
3058 	struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
3059 
3060 	device_lock(&cxl_nvb->dev);
3061 	if (cxlr->cxlr_pmem)
3062 		devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
3063 				    cxlr->cxlr_pmem);
3064 	device_unlock(&cxl_nvb->dev);
3065 	cxlr->cxl_nvb = NULL;
3066 	put_device(&cxl_nvb->dev);
3067 }
3068 
3069 /**
3070  * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge
3071  * @cxlr: parent CXL region for this pmem region bridge device
3072  *
3073  * Return: 0 on success negative error code on failure.
3074  */
3075 static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
3076 {
3077 	struct cxl_pmem_region *cxlr_pmem;
3078 	struct cxl_nvdimm_bridge *cxl_nvb;
3079 	struct device *dev;
3080 	int rc;
3081 
3082 	rc = cxl_pmem_region_alloc(cxlr);
3083 	if (rc)
3084 		return rc;
3085 	cxlr_pmem = cxlr->cxlr_pmem;
3086 	cxl_nvb = cxlr->cxl_nvb;
3087 
3088 	dev = &cxlr_pmem->dev;
3089 	rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
3090 	if (rc)
3091 		goto err;
3092 
3093 	rc = device_add(dev);
3094 	if (rc)
3095 		goto err;
3096 
3097 	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
3098 		dev_name(dev));
3099 
3100 	device_lock(&cxl_nvb->dev);
3101 	if (cxl_nvb->dev.driver)
3102 		rc = devm_add_action_or_reset(&cxl_nvb->dev,
3103 					      cxlr_pmem_unregister, cxlr_pmem);
3104 	else
3105 		rc = -ENXIO;
3106 	device_unlock(&cxl_nvb->dev);
3107 
3108 	if (rc)
3109 		goto err_bridge;
3110 
3111 	/* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */
3112 	return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr);
3113 
3114 err:
3115 	put_device(dev);
3116 err_bridge:
3117 	put_device(&cxl_nvb->dev);
3118 	cxlr->cxl_nvb = NULL;
3119 	return rc;
3120 }
3121 
3122 static void cxlr_dax_unregister(void *_cxlr_dax)
3123 {
3124 	struct cxl_dax_region *cxlr_dax = _cxlr_dax;
3125 
3126 	device_unregister(&cxlr_dax->dev);
3127 }
3128 
3129 static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
3130 {
3131 	struct cxl_dax_region *cxlr_dax;
3132 	struct device *dev;
3133 	int rc;
3134 
3135 	cxlr_dax = cxl_dax_region_alloc(cxlr);
3136 	if (IS_ERR(cxlr_dax))
3137 		return PTR_ERR(cxlr_dax);
3138 
3139 	dev = &cxlr_dax->dev;
3140 	rc = dev_set_name(dev, "dax_region%d", cxlr->id);
3141 	if (rc)
3142 		goto err;
3143 
3144 	rc = device_add(dev);
3145 	if (rc)
3146 		goto err;
3147 
3148 	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
3149 		dev_name(dev));
3150 
3151 	return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
3152 					cxlr_dax);
3153 err:
3154 	put_device(dev);
3155 	return rc;
3156 }
3157 
3158 static int match_root_decoder_by_range(struct device *dev, void *data)
3159 {
3160 	struct range *r1, *r2 = data;
3161 	struct cxl_root_decoder *cxlrd;
3162 
3163 	if (!is_root_decoder(dev))
3164 		return 0;
3165 
3166 	cxlrd = to_cxl_root_decoder(dev);
3167 	r1 = &cxlrd->cxlsd.cxld.hpa_range;
3168 	return range_contains(r1, r2);
3169 }
3170 
3171 static int match_region_by_range(struct device *dev, void *data)
3172 {
3173 	struct cxl_region_params *p;
3174 	struct cxl_region *cxlr;
3175 	struct range *r = data;
3176 	int rc = 0;
3177 
3178 	if (!is_cxl_region(dev))
3179 		return 0;
3180 
3181 	cxlr = to_cxl_region(dev);
3182 	p = &cxlr->params;
3183 
3184 	down_read(&cxl_region_rwsem);
3185 	if (p->res && p->res->start == r->start && p->res->end == r->end)
3186 		rc = 1;
3187 	up_read(&cxl_region_rwsem);
3188 
3189 	return rc;
3190 }
3191 
3192 /* Establish an empty region covering the given HPA range */
3193 static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
3194 					   struct cxl_endpoint_decoder *cxled)
3195 {
3196 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3197 	struct cxl_port *port = cxlrd_to_port(cxlrd);
3198 	struct range *hpa = &cxled->cxld.hpa_range;
3199 	struct cxl_region_params *p;
3200 	struct cxl_region *cxlr;
3201 	struct resource *res;
3202 	int rc;
3203 
3204 	do {
3205 		cxlr = __create_region(cxlrd, cxled->mode,
3206 				       atomic_read(&cxlrd->region_id));
3207 	} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
3208 
3209 	if (IS_ERR(cxlr)) {
3210 		dev_err(cxlmd->dev.parent,
3211 			"%s:%s: %s failed assign region: %ld\n",
3212 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3213 			__func__, PTR_ERR(cxlr));
3214 		return cxlr;
3215 	}
3216 
3217 	down_write(&cxl_region_rwsem);
3218 	p = &cxlr->params;
3219 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
3220 		dev_err(cxlmd->dev.parent,
3221 			"%s:%s: %s autodiscovery interrupted\n",
3222 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3223 			__func__);
3224 		rc = -EBUSY;
3225 		goto err;
3226 	}
3227 
3228 	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
3229 
3230 	res = kmalloc(sizeof(*res), GFP_KERNEL);
3231 	if (!res) {
3232 		rc = -ENOMEM;
3233 		goto err;
3234 	}
3235 
3236 	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
3237 				    dev_name(&cxlr->dev));
3238 	rc = insert_resource(cxlrd->res, res);
3239 	if (rc) {
3240 		/*
3241 		 * Platform-firmware may not have split resources like "System
3242 		 * RAM" on CXL window boundaries see cxl_region_iomem_release()
3243 		 */
3244 		dev_warn(cxlmd->dev.parent,
3245 			 "%s:%s: %s %s cannot insert resource\n",
3246 			 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3247 			 __func__, dev_name(&cxlr->dev));
3248 	}
3249 
3250 	p->res = res;
3251 	p->interleave_ways = cxled->cxld.interleave_ways;
3252 	p->interleave_granularity = cxled->cxld.interleave_granularity;
3253 	p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
3254 
3255 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
3256 	if (rc)
3257 		goto err;
3258 
3259 	dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
3260 		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
3261 		dev_name(&cxlr->dev), p->res, p->interleave_ways,
3262 		p->interleave_granularity);
3263 
3264 	/* ...to match put_device() in cxl_add_to_region() */
3265 	get_device(&cxlr->dev);
3266 	up_write(&cxl_region_rwsem);
3267 
3268 	return cxlr;
3269 
3270 err:
3271 	up_write(&cxl_region_rwsem);
3272 	devm_release_action(port->uport_dev, unregister_region, cxlr);
3273 	return ERR_PTR(rc);
3274 }
3275 
3276 int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
3277 {
3278 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3279 	struct range *hpa = &cxled->cxld.hpa_range;
3280 	struct cxl_decoder *cxld = &cxled->cxld;
3281 	struct device *cxlrd_dev, *region_dev;
3282 	struct cxl_root_decoder *cxlrd;
3283 	struct cxl_region_params *p;
3284 	struct cxl_region *cxlr;
3285 	bool attach = false;
3286 	int rc;
3287 
3288 	cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
3289 				      match_root_decoder_by_range);
3290 	if (!cxlrd_dev) {
3291 		dev_err(cxlmd->dev.parent,
3292 			"%s:%s no CXL window for range %#llx:%#llx\n",
3293 			dev_name(&cxlmd->dev), dev_name(&cxld->dev),
3294 			cxld->hpa_range.start, cxld->hpa_range.end);
3295 		return -ENXIO;
3296 	}
3297 
3298 	cxlrd = to_cxl_root_decoder(cxlrd_dev);
3299 
3300 	/*
3301 	 * Ensure that if multiple threads race to construct_region() for @hpa
3302 	 * one does the construction and the others add to that.
3303 	 */
3304 	mutex_lock(&cxlrd->range_lock);
3305 	region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
3306 				       match_region_by_range);
3307 	if (!region_dev) {
3308 		cxlr = construct_region(cxlrd, cxled);
3309 		region_dev = &cxlr->dev;
3310 	} else
3311 		cxlr = to_cxl_region(region_dev);
3312 	mutex_unlock(&cxlrd->range_lock);
3313 
3314 	rc = PTR_ERR_OR_ZERO(cxlr);
3315 	if (rc)
3316 		goto out;
3317 
3318 	attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
3319 
3320 	down_read(&cxl_region_rwsem);
3321 	p = &cxlr->params;
3322 	attach = p->state == CXL_CONFIG_COMMIT;
3323 	up_read(&cxl_region_rwsem);
3324 
3325 	if (attach) {
3326 		/*
3327 		 * If device_attach() fails the range may still be active via
3328 		 * the platform-firmware memory map, otherwise the driver for
3329 		 * regions is local to this file, so driver matching can't fail.
3330 		 */
3331 		if (device_attach(&cxlr->dev) < 0)
3332 			dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
3333 				p->res);
3334 	}
3335 
3336 	put_device(region_dev);
3337 out:
3338 	put_device(cxlrd_dev);
3339 	return rc;
3340 }
3341 EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL);
3342 
3343 static int is_system_ram(struct resource *res, void *arg)
3344 {
3345 	struct cxl_region *cxlr = arg;
3346 	struct cxl_region_params *p = &cxlr->params;
3347 
3348 	dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
3349 	return 1;
3350 }
3351 
3352 static int cxl_region_probe(struct device *dev)
3353 {
3354 	struct cxl_region *cxlr = to_cxl_region(dev);
3355 	struct cxl_region_params *p = &cxlr->params;
3356 	int rc;
3357 
3358 	rc = down_read_interruptible(&cxl_region_rwsem);
3359 	if (rc) {
3360 		dev_dbg(&cxlr->dev, "probe interrupted\n");
3361 		return rc;
3362 	}
3363 
3364 	if (p->state < CXL_CONFIG_COMMIT) {
3365 		dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
3366 		rc = -ENXIO;
3367 		goto out;
3368 	}
3369 
3370 	if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) {
3371 		dev_err(&cxlr->dev,
3372 			"failed to activate, re-commit region and retry\n");
3373 		rc = -ENXIO;
3374 		goto out;
3375 	}
3376 
3377 	/*
3378 	 * From this point on any path that changes the region's state away from
3379 	 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
3380 	 */
3381 out:
3382 	up_read(&cxl_region_rwsem);
3383 
3384 	if (rc)
3385 		return rc;
3386 
3387 	switch (cxlr->mode) {
3388 	case CXL_DECODER_PMEM:
3389 		return devm_cxl_add_pmem_region(cxlr);
3390 	case CXL_DECODER_RAM:
3391 		/*
3392 		 * The region can not be manged by CXL if any portion of
3393 		 * it is already online as 'System RAM'
3394 		 */
3395 		if (walk_iomem_res_desc(IORES_DESC_NONE,
3396 					IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
3397 					p->res->start, p->res->end, cxlr,
3398 					is_system_ram) > 0)
3399 			return 0;
3400 		return devm_cxl_add_dax_region(cxlr);
3401 	default:
3402 		dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
3403 			cxlr->mode);
3404 		return -ENXIO;
3405 	}
3406 }
3407 
3408 static struct cxl_driver cxl_region_driver = {
3409 	.name = "cxl_region",
3410 	.probe = cxl_region_probe,
3411 	.id = CXL_DEVICE_REGION,
3412 };
3413 
3414 int cxl_region_init(void)
3415 {
3416 	return cxl_driver_register(&cxl_region_driver);
3417 }
3418 
3419 void cxl_region_exit(void)
3420 {
3421 	cxl_driver_unregister(&cxl_region_driver);
3422 }
3423 
3424 MODULE_IMPORT_NS(CXL);
3425 MODULE_IMPORT_NS(DEVMEM);
3426 MODULE_ALIAS_CXL(CXL_DEVICE_REGION);
3427