xref: /linux/drivers/cxl/pci.c (revision 64b14a184e83eb62ea0615e31a409956049d40e7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
3 #include <linux/io-64-nonatomic-lo-hi.h>
4 #include <linux/module.h>
5 #include <linux/sizes.h>
6 #include <linux/mutex.h>
7 #include <linux/list.h>
8 #include <linux/pci.h>
9 #include <linux/io.h>
10 #include "cxlmem.h"
11 #include "pci.h"
12 #include "cxl.h"
13 
14 /**
15  * DOC: cxl pci
16  *
17  * This implements the PCI exclusive functionality for a CXL device as it is
18  * defined by the Compute Express Link specification. CXL devices may surface
19  * certain functionality even if it isn't CXL enabled. While this driver is
20  * focused around the PCI specific aspects of a CXL device, it binds to the
21  * specific CXL memory device class code, and therefore the implementation of
22  * cxl_pci is focused around CXL memory devices.
23  *
24  * The driver has several responsibilities, mainly:
25  *  - Create the memX device and register on the CXL bus.
26  *  - Enumerate device's register interface and map them.
27  *  - Registers nvdimm bridge device with cxl_core.
28  *  - Registers a CXL mailbox with cxl_core.
29  */
30 
31 #define cxl_doorbell_busy(cxlds)                                                \
32 	(readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) &                  \
33 	 CXLDEV_MBOX_CTRL_DOORBELL)
34 
35 /* CXL 2.0 - 8.2.8.4 */
36 #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
37 
38 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
39 {
40 	const unsigned long start = jiffies;
41 	unsigned long end = start;
42 
43 	while (cxl_doorbell_busy(cxlds)) {
44 		end = jiffies;
45 
46 		if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
47 			/* Check again in case preempted before timeout test */
48 			if (!cxl_doorbell_busy(cxlds))
49 				break;
50 			return -ETIMEDOUT;
51 		}
52 		cpu_relax();
53 	}
54 
55 	dev_dbg(cxlds->dev, "Doorbell wait took %dms",
56 		jiffies_to_msecs(end) - jiffies_to_msecs(start));
57 	return 0;
58 }
59 
60 static void cxl_pci_mbox_timeout(struct cxl_dev_state *cxlds,
61 				 struct cxl_mbox_cmd *mbox_cmd)
62 {
63 	struct device *dev = cxlds->dev;
64 
65 	dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n",
66 		mbox_cmd->opcode, mbox_cmd->size_in);
67 }
68 
69 /**
70  * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
71  * @cxlds: The device state to communicate with.
72  * @mbox_cmd: Command to send to the memory device.
73  *
74  * Context: Any context. Expects mbox_mutex to be held.
75  * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
76  *         Caller should check the return code in @mbox_cmd to make sure it
77  *         succeeded.
78  *
79  * This is a generic form of the CXL mailbox send command thus only using the
80  * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
81  * devices, and perhaps other types of CXL devices may have further information
82  * available upon error conditions. Driver facilities wishing to send mailbox
83  * commands should use the wrapper command.
84  *
85  * The CXL spec allows for up to two mailboxes. The intention is for the primary
86  * mailbox to be OS controlled and the secondary mailbox to be used by system
87  * firmware. This allows the OS and firmware to communicate with the device and
88  * not need to coordinate with each other. The driver only uses the primary
89  * mailbox.
90  */
91 static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
92 				   struct cxl_mbox_cmd *mbox_cmd)
93 {
94 	void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
95 	struct device *dev = cxlds->dev;
96 	u64 cmd_reg, status_reg;
97 	size_t out_len;
98 	int rc;
99 
100 	lockdep_assert_held(&cxlds->mbox_mutex);
101 
102 	/*
103 	 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
104 	 *   1. Caller reads MB Control Register to verify doorbell is clear
105 	 *   2. Caller writes Command Register
106 	 *   3. Caller writes Command Payload Registers if input payload is non-empty
107 	 *   4. Caller writes MB Control Register to set doorbell
108 	 *   5. Caller either polls for doorbell to be clear or waits for interrupt if configured
109 	 *   6. Caller reads MB Status Register to fetch Return code
110 	 *   7. If command successful, Caller reads Command Register to get Payload Length
111 	 *   8. If output payload is non-empty, host reads Command Payload Registers
112 	 *
113 	 * Hardware is free to do whatever it wants before the doorbell is rung,
114 	 * and isn't allowed to change anything after it clears the doorbell. As
115 	 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
116 	 * also happen in any order (though some orders might not make sense).
117 	 */
118 
119 	/* #1 */
120 	if (cxl_doorbell_busy(cxlds)) {
121 		dev_err_ratelimited(dev, "Mailbox re-busy after acquiring\n");
122 		return -EBUSY;
123 	}
124 
125 	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
126 			     mbox_cmd->opcode);
127 	if (mbox_cmd->size_in) {
128 		if (WARN_ON(!mbox_cmd->payload_in))
129 			return -EINVAL;
130 
131 		cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
132 				      mbox_cmd->size_in);
133 		memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
134 	}
135 
136 	/* #2, #3 */
137 	writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
138 
139 	/* #4 */
140 	dev_dbg(dev, "Sending command\n");
141 	writel(CXLDEV_MBOX_CTRL_DOORBELL,
142 	       cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
143 
144 	/* #5 */
145 	rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
146 	if (rc == -ETIMEDOUT) {
147 		cxl_pci_mbox_timeout(cxlds, mbox_cmd);
148 		return rc;
149 	}
150 
151 	/* #6 */
152 	status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
153 	mbox_cmd->return_code =
154 		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
155 
156 	if (mbox_cmd->return_code != 0) {
157 		dev_dbg(dev, "Mailbox operation had an error\n");
158 		return 0;
159 	}
160 
161 	/* #7 */
162 	cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
163 	out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
164 
165 	/* #8 */
166 	if (out_len && mbox_cmd->payload_out) {
167 		/*
168 		 * Sanitize the copy. If hardware misbehaves, out_len per the
169 		 * spec can actually be greater than the max allowed size (21
170 		 * bits available but spec defined 1M max). The caller also may
171 		 * have requested less data than the hardware supplied even
172 		 * within spec.
173 		 */
174 		size_t n = min3(mbox_cmd->size_out, cxlds->payload_size, out_len);
175 
176 		memcpy_fromio(mbox_cmd->payload_out, payload, n);
177 		mbox_cmd->size_out = n;
178 	} else {
179 		mbox_cmd->size_out = 0;
180 	}
181 
182 	return 0;
183 }
184 
185 /**
186  * cxl_pci_mbox_get() - Acquire exclusive access to the mailbox.
187  * @cxlds: The device state to gain access to.
188  *
189  * Context: Any context. Takes the mbox_mutex.
190  * Return: 0 if exclusive access was acquired.
191  */
192 static int cxl_pci_mbox_get(struct cxl_dev_state *cxlds)
193 {
194 	struct device *dev = cxlds->dev;
195 	u64 md_status;
196 	int rc;
197 
198 	mutex_lock_io(&cxlds->mbox_mutex);
199 
200 	/*
201 	 * XXX: There is some amount of ambiguity in the 2.0 version of the spec
202 	 * around the mailbox interface ready (8.2.8.5.1.1).  The purpose of the
203 	 * bit is to allow firmware running on the device to notify the driver
204 	 * that it's ready to receive commands. It is unclear if the bit needs
205 	 * to be read for each transaction mailbox, ie. the firmware can switch
206 	 * it on and off as needed. Second, there is no defined timeout for
207 	 * mailbox ready, like there is for the doorbell interface.
208 	 *
209 	 * Assumptions:
210 	 * 1. The firmware might toggle the Mailbox Interface Ready bit, check
211 	 *    it for every command.
212 	 *
213 	 * 2. If the doorbell is clear, the firmware should have first set the
214 	 *    Mailbox Interface Ready bit. Therefore, waiting for the doorbell
215 	 *    to be ready is sufficient.
216 	 */
217 	rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
218 	if (rc) {
219 		dev_warn(dev, "Mailbox interface not ready\n");
220 		goto out;
221 	}
222 
223 	md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
224 	if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) {
225 		dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n");
226 		rc = -EBUSY;
227 		goto out;
228 	}
229 
230 	/*
231 	 * Hardware shouldn't allow a ready status but also have failure bits
232 	 * set. Spit out an error, this should be a bug report
233 	 */
234 	rc = -EFAULT;
235 	if (md_status & CXLMDEV_DEV_FATAL) {
236 		dev_err(dev, "mbox: reported ready, but fatal\n");
237 		goto out;
238 	}
239 	if (md_status & CXLMDEV_FW_HALT) {
240 		dev_err(dev, "mbox: reported ready, but halted\n");
241 		goto out;
242 	}
243 	if (CXLMDEV_RESET_NEEDED(md_status)) {
244 		dev_err(dev, "mbox: reported ready, but reset needed\n");
245 		goto out;
246 	}
247 
248 	/* with lock held */
249 	return 0;
250 
251 out:
252 	mutex_unlock(&cxlds->mbox_mutex);
253 	return rc;
254 }
255 
256 /**
257  * cxl_pci_mbox_put() - Release exclusive access to the mailbox.
258  * @cxlds: The device state to communicate with.
259  *
260  * Context: Any context. Expects mbox_mutex to be held.
261  */
262 static void cxl_pci_mbox_put(struct cxl_dev_state *cxlds)
263 {
264 	mutex_unlock(&cxlds->mbox_mutex);
265 }
266 
267 static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
268 {
269 	int rc;
270 
271 	rc = cxl_pci_mbox_get(cxlds);
272 	if (rc)
273 		return rc;
274 
275 	rc = __cxl_pci_mbox_send_cmd(cxlds, cmd);
276 	cxl_pci_mbox_put(cxlds);
277 
278 	return rc;
279 }
280 
281 static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
282 {
283 	const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
284 
285 	cxlds->mbox_send = cxl_pci_mbox_send;
286 	cxlds->payload_size =
287 		1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
288 
289 	/*
290 	 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
291 	 *
292 	 * If the size is too small, mandatory commands will not work and so
293 	 * there's no point in going forward. If the size is too large, there's
294 	 * no harm is soft limiting it.
295 	 */
296 	cxlds->payload_size = min_t(size_t, cxlds->payload_size, SZ_1M);
297 	if (cxlds->payload_size < 256) {
298 		dev_err(cxlds->dev, "Mailbox is too small (%zub)",
299 			cxlds->payload_size);
300 		return -ENXIO;
301 	}
302 
303 	dev_dbg(cxlds->dev, "Mailbox payload sized %zu",
304 		cxlds->payload_size);
305 
306 	return 0;
307 }
308 
309 static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map)
310 {
311 	void __iomem *addr;
312 	int bar = map->barno;
313 	struct device *dev = &pdev->dev;
314 	resource_size_t offset = map->block_offset;
315 
316 	/* Basic sanity check that BAR is big enough */
317 	if (pci_resource_len(pdev, bar) < offset) {
318 		dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar,
319 			&pdev->resource[bar], &offset);
320 		return -ENXIO;
321 	}
322 
323 	addr = pci_iomap(pdev, bar, 0);
324 	if (!addr) {
325 		dev_err(dev, "failed to map registers\n");
326 		return -ENOMEM;
327 	}
328 
329 	dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n",
330 		bar, &offset);
331 
332 	map->base = addr + map->block_offset;
333 	return 0;
334 }
335 
336 static void cxl_unmap_regblock(struct pci_dev *pdev,
337 			       struct cxl_register_map *map)
338 {
339 	pci_iounmap(pdev, map->base - map->block_offset);
340 	map->base = NULL;
341 }
342 
343 static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
344 {
345 	struct cxl_component_reg_map *comp_map;
346 	struct cxl_device_reg_map *dev_map;
347 	struct device *dev = &pdev->dev;
348 	void __iomem *base = map->base;
349 
350 	switch (map->reg_type) {
351 	case CXL_REGLOC_RBI_COMPONENT:
352 		comp_map = &map->component_map;
353 		cxl_probe_component_regs(dev, base, comp_map);
354 		if (!comp_map->hdm_decoder.valid) {
355 			dev_err(dev, "HDM decoder registers not found\n");
356 			return -ENXIO;
357 		}
358 
359 		dev_dbg(dev, "Set up component registers\n");
360 		break;
361 	case CXL_REGLOC_RBI_MEMDEV:
362 		dev_map = &map->device_map;
363 		cxl_probe_device_regs(dev, base, dev_map);
364 		if (!dev_map->status.valid || !dev_map->mbox.valid ||
365 		    !dev_map->memdev.valid) {
366 			dev_err(dev, "registers not found: %s%s%s\n",
367 				!dev_map->status.valid ? "status " : "",
368 				!dev_map->mbox.valid ? "mbox " : "",
369 				!dev_map->memdev.valid ? "memdev " : "");
370 			return -ENXIO;
371 		}
372 
373 		dev_dbg(dev, "Probing device registers...\n");
374 		break;
375 	default:
376 		break;
377 	}
378 
379 	return 0;
380 }
381 
382 static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *map)
383 {
384 	struct device *dev = cxlds->dev;
385 	struct pci_dev *pdev = to_pci_dev(dev);
386 
387 	switch (map->reg_type) {
388 	case CXL_REGLOC_RBI_COMPONENT:
389 		cxl_map_component_regs(pdev, &cxlds->regs.component, map);
390 		dev_dbg(dev, "Mapping component registers...\n");
391 		break;
392 	case CXL_REGLOC_RBI_MEMDEV:
393 		cxl_map_device_regs(pdev, &cxlds->regs.device_regs, map);
394 		dev_dbg(dev, "Probing device registers...\n");
395 		break;
396 	default:
397 		break;
398 	}
399 
400 	return 0;
401 }
402 
403 static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi,
404 				struct cxl_register_map *map)
405 {
406 	map->block_offset =
407 		((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
408 	map->barno = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
409 	map->reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
410 }
411 
412 /**
413  * cxl_find_regblock() - Locate register blocks by type
414  * @pdev: The CXL PCI device to enumerate.
415  * @type: Register Block Indicator id
416  * @map: Enumeration output, clobbered on error
417  *
418  * Return: 0 if register block enumerated, negative error code otherwise
419  *
420  * A CXL DVSEC may point to one or more register blocks, search for them
421  * by @type.
422  */
423 static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
424 			     struct cxl_register_map *map)
425 {
426 	u32 regloc_size, regblocks;
427 	int regloc, i;
428 
429 	regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL,
430 					   PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
431 	if (!regloc)
432 		return -ENXIO;
433 
434 	pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
435 	regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
436 
437 	regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
438 	regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
439 
440 	for (i = 0; i < regblocks; i++, regloc += 8) {
441 		u32 reg_lo, reg_hi;
442 
443 		pci_read_config_dword(pdev, regloc, &reg_lo);
444 		pci_read_config_dword(pdev, regloc + 4, &reg_hi);
445 
446 		cxl_decode_regblock(reg_lo, reg_hi, map);
447 
448 		if (map->reg_type == type)
449 			return 0;
450 	}
451 
452 	return -ENODEV;
453 }
454 
455 static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
456 			  struct cxl_register_map *map)
457 {
458 	int rc;
459 
460 	rc = cxl_find_regblock(pdev, type, map);
461 	if (rc)
462 		return rc;
463 
464 	rc = cxl_map_regblock(pdev, map);
465 	if (rc)
466 		return rc;
467 
468 	rc = cxl_probe_regs(pdev, map);
469 	cxl_unmap_regblock(pdev, map);
470 
471 	return rc;
472 }
473 
474 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
475 {
476 	struct cxl_register_map map;
477 	struct cxl_memdev *cxlmd;
478 	struct cxl_dev_state *cxlds;
479 	int rc;
480 
481 	/*
482 	 * Double check the anonymous union trickery in struct cxl_regs
483 	 * FIXME switch to struct_group()
484 	 */
485 	BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
486 		     offsetof(struct cxl_regs, device_regs.memdev));
487 
488 	rc = pcim_enable_device(pdev);
489 	if (rc)
490 		return rc;
491 
492 	cxlds = cxl_dev_state_create(&pdev->dev);
493 	if (IS_ERR(cxlds))
494 		return PTR_ERR(cxlds);
495 
496 	rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
497 	if (rc)
498 		return rc;
499 
500 	rc = cxl_map_regs(cxlds, &map);
501 	if (rc)
502 		return rc;
503 
504 	rc = cxl_pci_setup_mailbox(cxlds);
505 	if (rc)
506 		return rc;
507 
508 	rc = cxl_enumerate_cmds(cxlds);
509 	if (rc)
510 		return rc;
511 
512 	rc = cxl_dev_state_identify(cxlds);
513 	if (rc)
514 		return rc;
515 
516 	rc = cxl_mem_create_range_info(cxlds);
517 	if (rc)
518 		return rc;
519 
520 	cxlmd = devm_cxl_add_memdev(cxlds);
521 	if (IS_ERR(cxlmd))
522 		return PTR_ERR(cxlmd);
523 
524 	if (range_len(&cxlds->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM))
525 		rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
526 
527 	return rc;
528 }
529 
530 static const struct pci_device_id cxl_mem_pci_tbl[] = {
531 	/* PCI class code for CXL.mem Type-3 Devices */
532 	{ PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
533 	{ /* terminate list */ },
534 };
535 MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
536 
537 static struct pci_driver cxl_pci_driver = {
538 	.name			= KBUILD_MODNAME,
539 	.id_table		= cxl_mem_pci_tbl,
540 	.probe			= cxl_pci_probe,
541 	.driver	= {
542 		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,
543 	},
544 };
545 
546 MODULE_LICENSE("GPL v2");
547 module_pci_driver(cxl_pci_driver);
548 MODULE_IMPORT_NS(CXL);
549