xref: /linux/drivers/cxl/core/pci.c (revision 660a5704603593393799b4cd116fac06d35471b2)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
3 #include <linux/io-64-nonatomic-lo-hi.h>
4 #include <linux/device.h>
5 #include <linux/delay.h>
6 #include <linux/pci.h>
7 #include <linux/pci-doe.h>
8 #include <cxlpci.h>
9 #include <cxlmem.h>
10 #include <cxl.h>
11 #include "core.h"
12 
13 /**
14  * DOC: cxl core pci
15  *
16  * Compute Express Link protocols are layered on top of PCIe. CXL core provides
17  * a set of helpers for CXL interactions which occur via PCIe.
18  */
19 
20 static unsigned short media_ready_timeout = 60;
21 module_param(media_ready_timeout, ushort, 0644);
22 MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready");
23 
24 struct cxl_walk_context {
25 	struct pci_bus *bus;
26 	struct cxl_port *port;
27 	int type;
28 	int error;
29 	int count;
30 };
31 
32 static int match_add_dports(struct pci_dev *pdev, void *data)
33 {
34 	struct cxl_walk_context *ctx = data;
35 	struct cxl_port *port = ctx->port;
36 	int type = pci_pcie_type(pdev);
37 	struct cxl_register_map map;
38 	struct cxl_dport *dport;
39 	u32 lnkcap, port_num;
40 	int rc;
41 
42 	if (pdev->bus != ctx->bus)
43 		return 0;
44 	if (!pci_is_pcie(pdev))
45 		return 0;
46 	if (type != ctx->type)
47 		return 0;
48 	if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
49 				  &lnkcap))
50 		return 0;
51 
52 	rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
53 	if (rc)
54 		dev_dbg(&port->dev, "failed to find component registers\n");
55 
56 	port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
57 	dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource);
58 	if (IS_ERR(dport)) {
59 		ctx->error = PTR_ERR(dport);
60 		return PTR_ERR(dport);
61 	}
62 	ctx->count++;
63 
64 	return 0;
65 }
66 
67 /**
68  * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
69  * @port: cxl_port whose ->uport is the upstream of dports to be enumerated
70  *
71  * Returns a positive number of dports enumerated or a negative error
72  * code.
73  */
74 int devm_cxl_port_enumerate_dports(struct cxl_port *port)
75 {
76 	struct pci_bus *bus = cxl_port_to_pci_bus(port);
77 	struct cxl_walk_context ctx;
78 	int type;
79 
80 	if (!bus)
81 		return -ENXIO;
82 
83 	if (pci_is_root_bus(bus))
84 		type = PCI_EXP_TYPE_ROOT_PORT;
85 	else
86 		type = PCI_EXP_TYPE_DOWNSTREAM;
87 
88 	ctx = (struct cxl_walk_context) {
89 		.port = port,
90 		.bus = bus,
91 		.type = type,
92 	};
93 	pci_walk_bus(bus, match_add_dports, &ctx);
94 
95 	if (ctx.count == 0)
96 		return -ENODEV;
97 	if (ctx.error)
98 		return ctx.error;
99 	return ctx.count;
100 }
101 EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL);
102 
103 /*
104  * Wait up to @media_ready_timeout for the device to report memory
105  * active.
106  */
107 int cxl_await_media_ready(struct cxl_dev_state *cxlds)
108 {
109 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
110 	int d = cxlds->cxl_dvsec;
111 	bool active = false;
112 	u64 md_status;
113 	int rc, i;
114 
115 	for (i = media_ready_timeout; i; i--) {
116 		u32 temp;
117 
118 		rc = pci_read_config_dword(
119 			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
120 		if (rc)
121 			return rc;
122 
123 		active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
124 		if (active)
125 			break;
126 		msleep(1000);
127 	}
128 
129 	if (!active) {
130 		dev_err(&pdev->dev,
131 			"timeout awaiting memory active after %d seconds\n",
132 			media_ready_timeout);
133 		return -ETIMEDOUT;
134 	}
135 
136 	md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
137 	if (!CXLMDEV_READY(md_status))
138 		return -EIO;
139 
140 	return 0;
141 }
142 EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL);
143 
144 static int wait_for_valid(struct cxl_dev_state *cxlds)
145 {
146 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
147 	int d = cxlds->cxl_dvsec, rc;
148 	u32 val;
149 
150 	/*
151 	 * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high
152 	 * and Size Low registers are valid. Must be set within 1 second of
153 	 * deassertion of reset to CXL device. Likely it is already set by the
154 	 * time this runs, but otherwise give a 1.5 second timeout in case of
155 	 * clock skew.
156 	 */
157 	rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
158 	if (rc)
159 		return rc;
160 
161 	if (val & CXL_DVSEC_MEM_INFO_VALID)
162 		return 0;
163 
164 	msleep(1500);
165 
166 	rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
167 	if (rc)
168 		return rc;
169 
170 	if (val & CXL_DVSEC_MEM_INFO_VALID)
171 		return 0;
172 
173 	return -ETIMEDOUT;
174 }
175 
176 static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val)
177 {
178 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
179 	int d = cxlds->cxl_dvsec;
180 	u16 ctrl;
181 	int rc;
182 
183 	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
184 	if (rc < 0)
185 		return rc;
186 
187 	if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val)
188 		return 1;
189 	ctrl &= ~CXL_DVSEC_MEM_ENABLE;
190 	ctrl |= val;
191 
192 	rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl);
193 	if (rc < 0)
194 		return rc;
195 
196 	return 0;
197 }
198 
199 static void clear_mem_enable(void *cxlds)
200 {
201 	cxl_set_mem_enable(cxlds, 0);
202 }
203 
204 static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds)
205 {
206 	int rc;
207 
208 	rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE);
209 	if (rc < 0)
210 		return rc;
211 	if (rc > 0)
212 		return 0;
213 	return devm_add_action_or_reset(host, clear_mem_enable, cxlds);
214 }
215 
216 static bool range_contains(struct range *r1, struct range *r2)
217 {
218 	return r1->start <= r2->start && r1->end >= r2->end;
219 }
220 
221 /* require dvsec ranges to be covered by a locked platform window */
222 static int dvsec_range_allowed(struct device *dev, void *arg)
223 {
224 	struct range *dev_range = arg;
225 	struct cxl_decoder *cxld;
226 
227 	if (!is_root_decoder(dev))
228 		return 0;
229 
230 	cxld = to_cxl_decoder(dev);
231 
232 	if (!(cxld->flags & CXL_DECODER_F_LOCK))
233 		return 0;
234 	if (!(cxld->flags & CXL_DECODER_F_RAM))
235 		return 0;
236 
237 	return range_contains(&cxld->hpa_range, dev_range);
238 }
239 
240 static void disable_hdm(void *_cxlhdm)
241 {
242 	u32 global_ctrl;
243 	struct cxl_hdm *cxlhdm = _cxlhdm;
244 	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
245 
246 	global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
247 	writel(global_ctrl & ~CXL_HDM_DECODER_ENABLE,
248 	       hdm + CXL_HDM_DECODER_CTRL_OFFSET);
249 }
250 
251 static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm)
252 {
253 	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
254 	u32 global_ctrl;
255 
256 	global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
257 	writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
258 	       hdm + CXL_HDM_DECODER_CTRL_OFFSET);
259 
260 	return devm_add_action_or_reset(host, disable_hdm, cxlhdm);
261 }
262 
263 static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
264 				  struct cxl_hdm *cxlhdm,
265 				  struct cxl_endpoint_dvsec_info *info)
266 {
267 	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
268 	struct cxl_port *port = cxlhdm->port;
269 	struct device *dev = cxlds->dev;
270 	struct cxl_port *root;
271 	int i, rc, allowed;
272 	u32 global_ctrl;
273 
274 	global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
275 
276 	/*
277 	 * If the HDM Decoder Capability is already enabled then assume
278 	 * that some other agent like platform firmware set it up.
279 	 */
280 	if (global_ctrl & CXL_HDM_DECODER_ENABLE) {
281 		rc = devm_cxl_enable_mem(&port->dev, cxlds);
282 		if (rc)
283 			return false;
284 		return true;
285 	}
286 
287 	root = to_cxl_port(port->dev.parent);
288 	while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
289 		root = to_cxl_port(root->dev.parent);
290 	if (!is_cxl_root(root)) {
291 		dev_err(dev, "Failed to acquire root port for HDM enable\n");
292 		return false;
293 	}
294 
295 	for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) {
296 		struct device *cxld_dev;
297 
298 		cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i],
299 					     dvsec_range_allowed);
300 		if (!cxld_dev) {
301 			dev_dbg(dev, "DVSEC Range%d denied by platform\n", i);
302 			continue;
303 		}
304 		dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i);
305 		put_device(cxld_dev);
306 		allowed++;
307 	}
308 
309 	if (!allowed) {
310 		cxl_set_mem_enable(cxlds, 0);
311 		info->mem_enabled = 0;
312 	}
313 
314 	/*
315 	 * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
316 	 * [High,Low] when HDM operation is enabled the range register values
317 	 * are ignored by the device, but the spec also recommends matching the
318 	 * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
319 	 * are expected even though Linux does not require or maintain that
320 	 * match. If at least one DVSEC range is enabled and allowed, skip HDM
321 	 * Decoder Capability Enable.
322 	 */
323 	if (info->mem_enabled)
324 		return false;
325 
326 	rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
327 	if (rc)
328 		return false;
329 
330 	rc = devm_cxl_enable_mem(&port->dev, cxlds);
331 	if (rc)
332 		return false;
333 
334 	return true;
335 }
336 
337 /**
338  * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
339  * @cxlds: Device state
340  * @cxlhdm: Mapped HDM decoder Capability
341  *
342  * Try to enable the endpoint's HDM Decoder Capability
343  */
344 int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm)
345 {
346 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
347 	struct cxl_endpoint_dvsec_info info = { 0 };
348 	int hdm_count, rc, i, ranges = 0;
349 	struct device *dev = &pdev->dev;
350 	int d = cxlds->cxl_dvsec;
351 	u16 cap, ctrl;
352 
353 	if (!d) {
354 		dev_dbg(dev, "No DVSEC Capability\n");
355 		return -ENXIO;
356 	}
357 
358 	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap);
359 	if (rc)
360 		return rc;
361 
362 	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
363 	if (rc)
364 		return rc;
365 
366 	if (!(cap & CXL_DVSEC_MEM_CAPABLE)) {
367 		dev_dbg(dev, "Not MEM Capable\n");
368 		return -ENXIO;
369 	}
370 
371 	/*
372 	 * It is not allowed by spec for MEM.capable to be set and have 0 legacy
373 	 * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this
374 	 * driver is for a spec defined class code which must be CXL.mem
375 	 * capable, there is no point in continuing to enable CXL.mem.
376 	 */
377 	hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
378 	if (!hdm_count || hdm_count > 2)
379 		return -EINVAL;
380 
381 	rc = wait_for_valid(cxlds);
382 	if (rc) {
383 		dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc);
384 		return rc;
385 	}
386 
387 	/*
388 	 * The current DVSEC values are moot if the memory capability is
389 	 * disabled, and they will remain moot after the HDM Decoder
390 	 * capability is enabled.
391 	 */
392 	info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
393 	if (!info.mem_enabled)
394 		goto hdm_init;
395 
396 	for (i = 0; i < hdm_count; i++) {
397 		u64 base, size;
398 		u32 temp;
399 
400 		rc = pci_read_config_dword(
401 			pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
402 		if (rc)
403 			return rc;
404 
405 		size = (u64)temp << 32;
406 
407 		rc = pci_read_config_dword(
408 			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
409 		if (rc)
410 			return rc;
411 
412 		size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
413 
414 		rc = pci_read_config_dword(
415 			pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
416 		if (rc)
417 			return rc;
418 
419 		base = (u64)temp << 32;
420 
421 		rc = pci_read_config_dword(
422 			pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp);
423 		if (rc)
424 			return rc;
425 
426 		base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
427 
428 		info.dvsec_range[i] = (struct range) {
429 			.start = base,
430 			.end = base + size - 1
431 		};
432 
433 		if (size)
434 			ranges++;
435 	}
436 
437 	info.ranges = ranges;
438 
439 	/*
440 	 * If DVSEC ranges are being used instead of HDM decoder registers there
441 	 * is no use in trying to manage those.
442 	 */
443 hdm_init:
444 	if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) {
445 		dev_err(dev,
446 			"Legacy range registers configuration prevents HDM operation.\n");
447 		return -EBUSY;
448 	}
449 
450 	return 0;
451 }
452 EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
453 
454 #define CXL_DOE_TABLE_ACCESS_REQ_CODE		0x000000ff
455 #define   CXL_DOE_TABLE_ACCESS_REQ_CODE_READ	0
456 #define CXL_DOE_TABLE_ACCESS_TABLE_TYPE		0x0000ff00
457 #define   CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA	0
458 #define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE	0xffff0000
459 #define CXL_DOE_TABLE_ACCESS_LAST_ENTRY		0xffff
460 #define CXL_DOE_PROTOCOL_TABLE_ACCESS 2
461 
462 static struct pci_doe_mb *find_cdat_doe(struct device *uport)
463 {
464 	struct cxl_memdev *cxlmd;
465 	struct cxl_dev_state *cxlds;
466 	unsigned long index;
467 	void *entry;
468 
469 	cxlmd = to_cxl_memdev(uport);
470 	cxlds = cxlmd->cxlds;
471 
472 	xa_for_each(&cxlds->doe_mbs, index, entry) {
473 		struct pci_doe_mb *cur = entry;
474 
475 		if (pci_doe_supports_prot(cur, PCI_DVSEC_VENDOR_ID_CXL,
476 					  CXL_DOE_PROTOCOL_TABLE_ACCESS))
477 			return cur;
478 	}
479 
480 	return NULL;
481 }
482 
483 #define CDAT_DOE_REQ(entry_handle)					\
484 	(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE,			\
485 		    CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) |		\
486 	 FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE,			\
487 		    CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) |		\
488 	 FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
489 
490 static void cxl_doe_task_complete(struct pci_doe_task *task)
491 {
492 	complete(task->private);
493 }
494 
495 struct cdat_doe_task {
496 	u32 request_pl;
497 	u32 response_pl[32];
498 	struct completion c;
499 	struct pci_doe_task task;
500 };
501 
502 #define DECLARE_CDAT_DOE_TASK(req, cdt)                       \
503 struct cdat_doe_task cdt = {                                  \
504 	.c = COMPLETION_INITIALIZER_ONSTACK(cdt.c),           \
505 	.request_pl = req,				      \
506 	.task = {                                             \
507 		.prot.vid = PCI_DVSEC_VENDOR_ID_CXL,        \
508 		.prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, \
509 		.request_pl = &cdt.request_pl,                \
510 		.request_pl_sz = sizeof(cdt.request_pl),      \
511 		.response_pl = cdt.response_pl,               \
512 		.response_pl_sz = sizeof(cdt.response_pl),    \
513 		.complete = cxl_doe_task_complete,            \
514 		.private = &cdt.c,                            \
515 	}                                                     \
516 }
517 
518 static int cxl_cdat_get_length(struct device *dev,
519 			       struct pci_doe_mb *cdat_doe,
520 			       size_t *length)
521 {
522 	DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
523 	int rc;
524 
525 	rc = pci_doe_submit_task(cdat_doe, &t.task);
526 	if (rc < 0) {
527 		dev_err(dev, "DOE submit failed: %d", rc);
528 		return rc;
529 	}
530 	wait_for_completion(&t.c);
531 	if (t.task.rv < sizeof(u32))
532 		return -EIO;
533 
534 	*length = t.response_pl[1];
535 	dev_dbg(dev, "CDAT length %zu\n", *length);
536 
537 	return 0;
538 }
539 
540 static int cxl_cdat_read_table(struct device *dev,
541 			       struct pci_doe_mb *cdat_doe,
542 			       struct cxl_cdat *cdat)
543 {
544 	size_t length = cdat->length;
545 	u32 *data = cdat->table;
546 	int entry_handle = 0;
547 
548 	do {
549 		DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
550 		size_t entry_dw;
551 		u32 *entry;
552 		int rc;
553 
554 		rc = pci_doe_submit_task(cdat_doe, &t.task);
555 		if (rc < 0) {
556 			dev_err(dev, "DOE submit failed: %d", rc);
557 			return rc;
558 		}
559 		wait_for_completion(&t.c);
560 		/* 1 DW header + 1 DW data min */
561 		if (t.task.rv < (2 * sizeof(u32)))
562 			return -EIO;
563 
564 		/* Get the CXL table access header entry handle */
565 		entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
566 					 t.response_pl[0]);
567 		entry = t.response_pl + 1;
568 		entry_dw = t.task.rv / sizeof(u32);
569 		/* Skip Header */
570 		entry_dw -= 1;
571 		entry_dw = min(length / sizeof(u32), entry_dw);
572 		/* Prevent length < 1 DW from causing a buffer overflow */
573 		if (entry_dw) {
574 			memcpy(data, entry, entry_dw * sizeof(u32));
575 			length -= entry_dw * sizeof(u32);
576 			data += entry_dw;
577 		}
578 	} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
579 
580 	return 0;
581 }
582 
583 /**
584  * read_cdat_data - Read the CDAT data on this port
585  * @port: Port to read data from
586  *
587  * This call will sleep waiting for responses from the DOE mailbox.
588  */
589 void read_cdat_data(struct cxl_port *port)
590 {
591 	struct pci_doe_mb *cdat_doe;
592 	struct device *dev = &port->dev;
593 	struct device *uport = port->uport;
594 	size_t cdat_length;
595 	int rc;
596 
597 	cdat_doe = find_cdat_doe(uport);
598 	if (!cdat_doe) {
599 		dev_dbg(dev, "No CDAT mailbox\n");
600 		return;
601 	}
602 
603 	port->cdat_available = true;
604 
605 	if (cxl_cdat_get_length(dev, cdat_doe, &cdat_length)) {
606 		dev_dbg(dev, "No CDAT length\n");
607 		return;
608 	}
609 
610 	port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
611 	if (!port->cdat.table)
612 		return;
613 
614 	port->cdat.length = cdat_length;
615 	rc = cxl_cdat_read_table(dev, cdat_doe, &port->cdat);
616 	if (rc) {
617 		/* Don't leave table data allocated on error */
618 		devm_kfree(dev, port->cdat.table);
619 		port->cdat.table = NULL;
620 		port->cdat.length = 0;
621 		dev_err(dev, "CDAT data read error\n");
622 	}
623 }
624 EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
625