xref: /linux/drivers/cxl/core/pci.c (revision cc8c418b4fc09ed58ddd27b8e90ec797e9ca1e67)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
3 #include <linux/io-64-nonatomic-lo-hi.h>
4 #include <linux/device.h>
5 #include <linux/delay.h>
6 #include <linux/pci.h>
7 #include <linux/pci-doe.h>
8 #include <cxlpci.h>
9 #include <cxlmem.h>
10 #include <cxl.h>
11 #include "core.h"
12 
13 /**
14  * DOC: cxl core pci
15  *
16  * Compute Express Link protocols are layered on top of PCIe. CXL core provides
17  * a set of helpers for CXL interactions which occur via PCIe.
18  */
19 
20 static unsigned short media_ready_timeout = 60;
21 module_param(media_ready_timeout, ushort, 0644);
22 MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready");
23 
24 struct cxl_walk_context {
25 	struct pci_bus *bus;
26 	struct cxl_port *port;
27 	int type;
28 	int error;
29 	int count;
30 };
31 
32 static int match_add_dports(struct pci_dev *pdev, void *data)
33 {
34 	struct cxl_walk_context *ctx = data;
35 	struct cxl_port *port = ctx->port;
36 	int type = pci_pcie_type(pdev);
37 	struct cxl_register_map map;
38 	struct cxl_dport *dport;
39 	u32 lnkcap, port_num;
40 	int rc;
41 
42 	if (pdev->bus != ctx->bus)
43 		return 0;
44 	if (!pci_is_pcie(pdev))
45 		return 0;
46 	if (type != ctx->type)
47 		return 0;
48 	if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
49 				  &lnkcap))
50 		return 0;
51 
52 	rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
53 	if (rc)
54 		dev_dbg(&port->dev, "failed to find component registers\n");
55 
56 	port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
57 	dport = devm_cxl_add_dport(port, &pdev->dev, port_num,
58 				   cxl_regmap_to_base(pdev, &map));
59 	if (IS_ERR(dport)) {
60 		ctx->error = PTR_ERR(dport);
61 		return PTR_ERR(dport);
62 	}
63 	ctx->count++;
64 
65 	dev_dbg(&port->dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev));
66 
67 	return 0;
68 }
69 
70 /**
71  * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
72  * @port: cxl_port whose ->uport is the upstream of dports to be enumerated
73  *
74  * Returns a positive number of dports enumerated or a negative error
75  * code.
76  */
77 int devm_cxl_port_enumerate_dports(struct cxl_port *port)
78 {
79 	struct pci_bus *bus = cxl_port_to_pci_bus(port);
80 	struct cxl_walk_context ctx;
81 	int type;
82 
83 	if (!bus)
84 		return -ENXIO;
85 
86 	if (pci_is_root_bus(bus))
87 		type = PCI_EXP_TYPE_ROOT_PORT;
88 	else
89 		type = PCI_EXP_TYPE_DOWNSTREAM;
90 
91 	ctx = (struct cxl_walk_context) {
92 		.port = port,
93 		.bus = bus,
94 		.type = type,
95 	};
96 	pci_walk_bus(bus, match_add_dports, &ctx);
97 
98 	if (ctx.count == 0)
99 		return -ENODEV;
100 	if (ctx.error)
101 		return ctx.error;
102 	return ctx.count;
103 }
104 EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL);
105 
106 /*
107  * Wait up to @media_ready_timeout for the device to report memory
108  * active.
109  */
110 int cxl_await_media_ready(struct cxl_dev_state *cxlds)
111 {
112 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
113 	int d = cxlds->cxl_dvsec;
114 	bool active = false;
115 	u64 md_status;
116 	int rc, i;
117 
118 	for (i = media_ready_timeout; i; i--) {
119 		u32 temp;
120 
121 		rc = pci_read_config_dword(
122 			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
123 		if (rc)
124 			return rc;
125 
126 		active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
127 		if (active)
128 			break;
129 		msleep(1000);
130 	}
131 
132 	if (!active) {
133 		dev_err(&pdev->dev,
134 			"timeout awaiting memory active after %d seconds\n",
135 			media_ready_timeout);
136 		return -ETIMEDOUT;
137 	}
138 
139 	md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
140 	if (!CXLMDEV_READY(md_status))
141 		return -EIO;
142 
143 	return 0;
144 }
145 EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL);
146 
147 static int wait_for_valid(struct cxl_dev_state *cxlds)
148 {
149 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
150 	int d = cxlds->cxl_dvsec, rc;
151 	u32 val;
152 
153 	/*
154 	 * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high
155 	 * and Size Low registers are valid. Must be set within 1 second of
156 	 * deassertion of reset to CXL device. Likely it is already set by the
157 	 * time this runs, but otherwise give a 1.5 second timeout in case of
158 	 * clock skew.
159 	 */
160 	rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
161 	if (rc)
162 		return rc;
163 
164 	if (val & CXL_DVSEC_MEM_INFO_VALID)
165 		return 0;
166 
167 	msleep(1500);
168 
169 	rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
170 	if (rc)
171 		return rc;
172 
173 	if (val & CXL_DVSEC_MEM_INFO_VALID)
174 		return 0;
175 
176 	return -ETIMEDOUT;
177 }
178 
179 static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val)
180 {
181 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
182 	int d = cxlds->cxl_dvsec;
183 	u16 ctrl;
184 	int rc;
185 
186 	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
187 	if (rc < 0)
188 		return rc;
189 
190 	if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val)
191 		return 1;
192 	ctrl &= ~CXL_DVSEC_MEM_ENABLE;
193 	ctrl |= val;
194 
195 	rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl);
196 	if (rc < 0)
197 		return rc;
198 
199 	return 0;
200 }
201 
202 static void clear_mem_enable(void *cxlds)
203 {
204 	cxl_set_mem_enable(cxlds, 0);
205 }
206 
207 static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds)
208 {
209 	int rc;
210 
211 	rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE);
212 	if (rc < 0)
213 		return rc;
214 	if (rc > 0)
215 		return 0;
216 	return devm_add_action_or_reset(host, clear_mem_enable, cxlds);
217 }
218 
219 static bool range_contains(struct range *r1, struct range *r2)
220 {
221 	return r1->start <= r2->start && r1->end >= r2->end;
222 }
223 
224 /* require dvsec ranges to be covered by a locked platform window */
225 static int dvsec_range_allowed(struct device *dev, void *arg)
226 {
227 	struct range *dev_range = arg;
228 	struct cxl_decoder *cxld;
229 
230 	if (!is_root_decoder(dev))
231 		return 0;
232 
233 	cxld = to_cxl_decoder(dev);
234 
235 	if (!(cxld->flags & CXL_DECODER_F_LOCK))
236 		return 0;
237 	if (!(cxld->flags & CXL_DECODER_F_RAM))
238 		return 0;
239 
240 	return range_contains(&cxld->hpa_range, dev_range);
241 }
242 
243 static void disable_hdm(void *_cxlhdm)
244 {
245 	u32 global_ctrl;
246 	struct cxl_hdm *cxlhdm = _cxlhdm;
247 	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
248 
249 	global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
250 	writel(global_ctrl & ~CXL_HDM_DECODER_ENABLE,
251 	       hdm + CXL_HDM_DECODER_CTRL_OFFSET);
252 }
253 
254 static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm)
255 {
256 	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
257 	u32 global_ctrl;
258 
259 	global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
260 	writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
261 	       hdm + CXL_HDM_DECODER_CTRL_OFFSET);
262 
263 	return devm_add_action_or_reset(host, disable_hdm, cxlhdm);
264 }
265 
266 static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
267 				  struct cxl_hdm *cxlhdm,
268 				  struct cxl_endpoint_dvsec_info *info)
269 {
270 	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
271 	struct cxl_port *port = cxlhdm->port;
272 	struct device *dev = cxlds->dev;
273 	struct cxl_port *root;
274 	int i, rc, allowed;
275 	u32 global_ctrl;
276 
277 	global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
278 
279 	/*
280 	 * If the HDM Decoder Capability is already enabled then assume
281 	 * that some other agent like platform firmware set it up.
282 	 */
283 	if (global_ctrl & CXL_HDM_DECODER_ENABLE) {
284 		rc = devm_cxl_enable_mem(&port->dev, cxlds);
285 		if (rc)
286 			return false;
287 		return true;
288 	}
289 
290 	root = to_cxl_port(port->dev.parent);
291 	while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
292 		root = to_cxl_port(root->dev.parent);
293 	if (!is_cxl_root(root)) {
294 		dev_err(dev, "Failed to acquire root port for HDM enable\n");
295 		return false;
296 	}
297 
298 	for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) {
299 		struct device *cxld_dev;
300 
301 		cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i],
302 					     dvsec_range_allowed);
303 		if (!cxld_dev) {
304 			dev_dbg(dev, "DVSEC Range%d denied by platform\n", i);
305 			continue;
306 		}
307 		dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i);
308 		put_device(cxld_dev);
309 		allowed++;
310 	}
311 
312 	if (!allowed) {
313 		cxl_set_mem_enable(cxlds, 0);
314 		info->mem_enabled = 0;
315 	}
316 
317 	/*
318 	 * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
319 	 * [High,Low] when HDM operation is enabled the range register values
320 	 * are ignored by the device, but the spec also recommends matching the
321 	 * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
322 	 * are expected even though Linux does not require or maintain that
323 	 * match. If at least one DVSEC range is enabled and allowed, skip HDM
324 	 * Decoder Capability Enable.
325 	 */
326 	if (info->mem_enabled)
327 		return false;
328 
329 	rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
330 	if (rc)
331 		return false;
332 
333 	rc = devm_cxl_enable_mem(&port->dev, cxlds);
334 	if (rc)
335 		return false;
336 
337 	return true;
338 }
339 
340 /**
341  * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
342  * @cxlds: Device state
343  * @cxlhdm: Mapped HDM decoder Capability
344  *
345  * Try to enable the endpoint's HDM Decoder Capability
346  */
347 int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm)
348 {
349 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
350 	struct cxl_endpoint_dvsec_info info = { 0 };
351 	int hdm_count, rc, i, ranges = 0;
352 	struct device *dev = &pdev->dev;
353 	int d = cxlds->cxl_dvsec;
354 	u16 cap, ctrl;
355 
356 	if (!d) {
357 		dev_dbg(dev, "No DVSEC Capability\n");
358 		return -ENXIO;
359 	}
360 
361 	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap);
362 	if (rc)
363 		return rc;
364 
365 	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
366 	if (rc)
367 		return rc;
368 
369 	if (!(cap & CXL_DVSEC_MEM_CAPABLE)) {
370 		dev_dbg(dev, "Not MEM Capable\n");
371 		return -ENXIO;
372 	}
373 
374 	/*
375 	 * It is not allowed by spec for MEM.capable to be set and have 0 legacy
376 	 * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this
377 	 * driver is for a spec defined class code which must be CXL.mem
378 	 * capable, there is no point in continuing to enable CXL.mem.
379 	 */
380 	hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
381 	if (!hdm_count || hdm_count > 2)
382 		return -EINVAL;
383 
384 	rc = wait_for_valid(cxlds);
385 	if (rc) {
386 		dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc);
387 		return rc;
388 	}
389 
390 	/*
391 	 * The current DVSEC values are moot if the memory capability is
392 	 * disabled, and they will remain moot after the HDM Decoder
393 	 * capability is enabled.
394 	 */
395 	info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
396 	if (!info.mem_enabled)
397 		goto hdm_init;
398 
399 	for (i = 0; i < hdm_count; i++) {
400 		u64 base, size;
401 		u32 temp;
402 
403 		rc = pci_read_config_dword(
404 			pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
405 		if (rc)
406 			return rc;
407 
408 		size = (u64)temp << 32;
409 
410 		rc = pci_read_config_dword(
411 			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
412 		if (rc)
413 			return rc;
414 
415 		size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
416 
417 		rc = pci_read_config_dword(
418 			pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
419 		if (rc)
420 			return rc;
421 
422 		base = (u64)temp << 32;
423 
424 		rc = pci_read_config_dword(
425 			pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp);
426 		if (rc)
427 			return rc;
428 
429 		base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
430 
431 		info.dvsec_range[i] = (struct range) {
432 			.start = base,
433 			.end = base + size - 1
434 		};
435 
436 		if (size)
437 			ranges++;
438 	}
439 
440 	info.ranges = ranges;
441 
442 	/*
443 	 * If DVSEC ranges are being used instead of HDM decoder registers there
444 	 * is no use in trying to manage those.
445 	 */
446 hdm_init:
447 	if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) {
448 		dev_err(dev,
449 			"Legacy range registers configuration prevents HDM operation.\n");
450 		return -EBUSY;
451 	}
452 
453 	return 0;
454 }
455 EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
456 
457 #define CXL_DOE_TABLE_ACCESS_REQ_CODE		0x000000ff
458 #define   CXL_DOE_TABLE_ACCESS_REQ_CODE_READ	0
459 #define CXL_DOE_TABLE_ACCESS_TABLE_TYPE		0x0000ff00
460 #define   CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA	0
461 #define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE	0xffff0000
462 #define CXL_DOE_TABLE_ACCESS_LAST_ENTRY		0xffff
463 #define CXL_DOE_PROTOCOL_TABLE_ACCESS 2
464 
465 static struct pci_doe_mb *find_cdat_doe(struct device *uport)
466 {
467 	struct cxl_memdev *cxlmd;
468 	struct cxl_dev_state *cxlds;
469 	unsigned long index;
470 	void *entry;
471 
472 	cxlmd = to_cxl_memdev(uport);
473 	cxlds = cxlmd->cxlds;
474 
475 	xa_for_each(&cxlds->doe_mbs, index, entry) {
476 		struct pci_doe_mb *cur = entry;
477 
478 		if (pci_doe_supports_prot(cur, PCI_DVSEC_VENDOR_ID_CXL,
479 					  CXL_DOE_PROTOCOL_TABLE_ACCESS))
480 			return cur;
481 	}
482 
483 	return NULL;
484 }
485 
486 #define CDAT_DOE_REQ(entry_handle)					\
487 	(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE,			\
488 		    CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) |		\
489 	 FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE,			\
490 		    CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) |		\
491 	 FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
492 
493 static void cxl_doe_task_complete(struct pci_doe_task *task)
494 {
495 	complete(task->private);
496 }
497 
498 struct cdat_doe_task {
499 	u32 request_pl;
500 	u32 response_pl[32];
501 	struct completion c;
502 	struct pci_doe_task task;
503 };
504 
505 #define DECLARE_CDAT_DOE_TASK(req, cdt)                       \
506 struct cdat_doe_task cdt = {                                  \
507 	.c = COMPLETION_INITIALIZER_ONSTACK(cdt.c),           \
508 	.request_pl = req,				      \
509 	.task = {                                             \
510 		.prot.vid = PCI_DVSEC_VENDOR_ID_CXL,        \
511 		.prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, \
512 		.request_pl = &cdt.request_pl,                \
513 		.request_pl_sz = sizeof(cdt.request_pl),      \
514 		.response_pl = cdt.response_pl,               \
515 		.response_pl_sz = sizeof(cdt.response_pl),    \
516 		.complete = cxl_doe_task_complete,            \
517 		.private = &cdt.c,                            \
518 	}                                                     \
519 }
520 
521 static int cxl_cdat_get_length(struct device *dev,
522 			       struct pci_doe_mb *cdat_doe,
523 			       size_t *length)
524 {
525 	DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
526 	int rc;
527 
528 	rc = pci_doe_submit_task(cdat_doe, &t.task);
529 	if (rc < 0) {
530 		dev_err(dev, "DOE submit failed: %d", rc);
531 		return rc;
532 	}
533 	wait_for_completion(&t.c);
534 	if (t.task.rv < sizeof(u32))
535 		return -EIO;
536 
537 	*length = t.response_pl[1];
538 	dev_dbg(dev, "CDAT length %zu\n", *length);
539 
540 	return 0;
541 }
542 
543 static int cxl_cdat_read_table(struct device *dev,
544 			       struct pci_doe_mb *cdat_doe,
545 			       struct cxl_cdat *cdat)
546 {
547 	size_t length = cdat->length;
548 	u32 *data = cdat->table;
549 	int entry_handle = 0;
550 
551 	do {
552 		DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
553 		size_t entry_dw;
554 		u32 *entry;
555 		int rc;
556 
557 		rc = pci_doe_submit_task(cdat_doe, &t.task);
558 		if (rc < 0) {
559 			dev_err(dev, "DOE submit failed: %d", rc);
560 			return rc;
561 		}
562 		wait_for_completion(&t.c);
563 		/* 1 DW header + 1 DW data min */
564 		if (t.task.rv < (2 * sizeof(u32)))
565 			return -EIO;
566 
567 		/* Get the CXL table access header entry handle */
568 		entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
569 					 t.response_pl[0]);
570 		entry = t.response_pl + 1;
571 		entry_dw = t.task.rv / sizeof(u32);
572 		/* Skip Header */
573 		entry_dw -= 1;
574 		entry_dw = min(length / sizeof(u32), entry_dw);
575 		/* Prevent length < 1 DW from causing a buffer overflow */
576 		if (entry_dw) {
577 			memcpy(data, entry, entry_dw * sizeof(u32));
578 			length -= entry_dw * sizeof(u32);
579 			data += entry_dw;
580 		}
581 	} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
582 
583 	return 0;
584 }
585 
586 /**
587  * read_cdat_data - Read the CDAT data on this port
588  * @port: Port to read data from
589  *
590  * This call will sleep waiting for responses from the DOE mailbox.
591  */
592 void read_cdat_data(struct cxl_port *port)
593 {
594 	struct pci_doe_mb *cdat_doe;
595 	struct device *dev = &port->dev;
596 	struct device *uport = port->uport;
597 	size_t cdat_length;
598 	int rc;
599 
600 	cdat_doe = find_cdat_doe(uport);
601 	if (!cdat_doe) {
602 		dev_dbg(dev, "No CDAT mailbox\n");
603 		return;
604 	}
605 
606 	port->cdat_available = true;
607 
608 	if (cxl_cdat_get_length(dev, cdat_doe, &cdat_length)) {
609 		dev_dbg(dev, "No CDAT length\n");
610 		return;
611 	}
612 
613 	port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
614 	if (!port->cdat.table)
615 		return;
616 
617 	port->cdat.length = cdat_length;
618 	rc = cxl_cdat_read_table(dev, cdat_doe, &port->cdat);
619 	if (rc) {
620 		/* Don't leave table data allocated on error */
621 		devm_kfree(dev, port->cdat.table);
622 		port->cdat.table = NULL;
623 		port->cdat.length = 0;
624 		dev_err(dev, "CDAT data read error\n");
625 	}
626 }
627 EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
628