xref: /linux/arch/powerpc/platforms/powernv/ocxl.c (revision 3f2a5ba784b808109cac0aac921213e43143a216)
1 // SPDX-License-Identifier: GPL-2.0+
2 // Copyright 2017 IBM Corp.
3 #include <asm/pnv-ocxl.h>
4 #include <asm/opal.h>
5 #include <misc/ocxl-config.h>
6 #include "pci.h"
7 
8 #define PNV_OCXL_TL_P9_RECV_CAP		0x000000000000000Full
9 #define PNV_OCXL_ACTAG_MAX		64
10 /* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
11 #define PNV_OCXL_PASID_BITS		15
12 #define PNV_OCXL_PASID_MAX		((1 << PNV_OCXL_PASID_BITS) - 1)
13 
14 #define AFU_PRESENT (1 << 31)
15 #define AFU_INDEX_MASK 0x3F000000
16 #define AFU_INDEX_SHIFT 24
17 #define ACTAG_MASK 0xFFF
18 
19 
20 struct actag_range {
21 	u16 start;
22 	u16 count;
23 };
24 
25 struct npu_link {
26 	struct list_head list;
27 	int domain;
28 	int bus;
29 	int dev;
30 	u16 fn_desired_actags[8];
31 	struct actag_range fn_actags[8];
32 	bool assignment_done;
33 };
34 static struct list_head links_list = LIST_HEAD_INIT(links_list);
35 static DEFINE_MUTEX(links_list_lock);
36 
37 
38 /*
39  * opencapi actags handling:
40  *
41  * When sending commands, the opencapi device references the memory
42  * context it's targeting with an 'actag', which is really an alias
43  * for a (BDF, pasid) combination. When it receives a command, the NPU
44  * must do a lookup of the actag to identify the memory context. The
45  * hardware supports a finite number of actags per link (64 for
46  * POWER9).
47  *
48  * The device can carry multiple functions, and each function can have
49  * multiple AFUs. Each AFU advertises in its config space the number
50  * of desired actags. The host must configure in the config space of
51  * the AFU how many actags the AFU is really allowed to use (which can
52  * be less than what the AFU desires).
53  *
54  * When a PCI function is probed by the driver, it has no visibility
55  * about the other PCI functions and how many actags they'd like,
56  * which makes it impossible to distribute actags fairly among AFUs.
57  *
58  * Unfortunately, the only way to know how many actags a function
59  * desires is by looking at the data for each AFU in the config space
60  * and add them up. Similarly, the only way to know how many actags
61  * all the functions of the physical device desire is by adding the
62  * previously computed function counts. Then we can match that against
63  * what the hardware supports.
64  *
65  * To get a comprehensive view, we use a 'pci fixup': at the end of
66  * PCI enumeration, each function counts how many actags its AFUs
67  * desire and we save it in a 'npu_link' structure, shared between all
68  * the PCI functions of a same device. Therefore, when the first
69  * function is probed by the driver, we can get an idea of the total
70  * count of desired actags for the device, and assign the actags to
71  * the AFUs, by pro-rating if needed.
72  */
73 
74 static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
75 {
76 	int vsec = pos;
77 	u16 vendor, id;
78 
79 	while ((vsec = pci_find_next_ext_capability(dev, vsec,
80 						    OCXL_EXT_CAP_ID_DVSEC))) {
81 		pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
82 				&vendor);
83 		pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
84 		if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
85 			return vsec;
86 	}
87 	return 0;
88 }
89 
90 static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
91 {
92 	int vsec = 0;
93 	u8 idx;
94 
95 	while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
96 					   vsec))) {
97 		pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
98 				&idx);
99 		if (idx == afu_idx)
100 			return vsec;
101 	}
102 	return 0;
103 }
104 
105 static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
106 {
107 	int pos;
108 	u32 val;
109 
110 	pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM,
111 					OCXL_DVSEC_FUNC_ID);
112 	if (!pos)
113 		return -ESRCH;
114 
115 	pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
116 	if (val & AFU_PRESENT)
117 		*afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
118 	else
119 		*afu_idx = -1;
120 	return 0;
121 }
122 
123 static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
124 {
125 	int pos;
126 	u16 actag_sup;
127 
128 	pos = find_dvsec_afu_ctrl(dev, afu_idx);
129 	if (!pos)
130 		return -ESRCH;
131 
132 	pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
133 			&actag_sup);
134 	*actag = actag_sup & ACTAG_MASK;
135 	return 0;
136 }
137 
138 static struct npu_link *find_link(struct pci_dev *dev)
139 {
140 	struct npu_link *link;
141 
142 	list_for_each_entry(link, &links_list, list) {
143 		/* The functions of a device all share the same link */
144 		if (link->domain == pci_domain_nr(dev->bus) &&
145 			link->bus == dev->bus->number &&
146 			link->dev == PCI_SLOT(dev->devfn)) {
147 			return link;
148 		}
149 	}
150 
151 	/* link doesn't exist yet. Allocate one */
152 	link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
153 	if (!link)
154 		return NULL;
155 	link->domain = pci_domain_nr(dev->bus);
156 	link->bus = dev->bus->number;
157 	link->dev = PCI_SLOT(dev->devfn);
158 	list_add(&link->list, &links_list);
159 	return link;
160 }
161 
162 static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
163 {
164 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
165 	struct pnv_phb *phb = hose->private_data;
166 	struct npu_link *link;
167 	int rc, afu_idx = -1, i, actag;
168 
169 	if (!machine_is(powernv))
170 		return;
171 
172 	if (phb->type != PNV_PHB_NPU_OCAPI)
173 		return;
174 
175 	guard(mutex)(&links_list_lock);
176 
177 	link = find_link(dev);
178 	if (!link) {
179 		dev_warn(&dev->dev, "couldn't update actag information\n");
180 		return;
181 	}
182 
183 	/*
184 	 * Check how many actags are desired for the AFUs under that
185 	 * function and add it to the count for the link
186 	 */
187 	rc = get_max_afu_index(dev, &afu_idx);
188 	if (rc) {
189 		/* Most likely an invalid config space */
190 		dev_dbg(&dev->dev, "couldn't find AFU information\n");
191 		afu_idx = -1;
192 	}
193 
194 	link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
195 	for (i = 0; i <= afu_idx; i++) {
196 		/*
197 		 * AFU index 'holes' are allowed. So don't fail if we
198 		 * can't read the actag info for an index
199 		 */
200 		rc = get_actag_count(dev, i, &actag);
201 		if (rc)
202 			continue;
203 		link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
204 	}
205 	dev_dbg(&dev->dev, "total actags for function: %d\n",
206 		link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
207 
208 }
209 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
210 
211 static u16 assign_fn_actags(u16 desired, u16 total)
212 {
213 	u16 count;
214 
215 	if (total <= PNV_OCXL_ACTAG_MAX)
216 		count = desired;
217 	else
218 		count = PNV_OCXL_ACTAG_MAX * desired / total;
219 
220 	return count;
221 }
222 
223 static void assign_actags(struct npu_link *link)
224 {
225 	u16 actag_count, range_start = 0, total_desired = 0;
226 	int i;
227 
228 	for (i = 0; i < 8; i++)
229 		total_desired += link->fn_desired_actags[i];
230 
231 	for (i = 0; i < 8; i++) {
232 		if (link->fn_desired_actags[i]) {
233 			actag_count = assign_fn_actags(
234 				link->fn_desired_actags[i],
235 				total_desired);
236 			link->fn_actags[i].start = range_start;
237 			link->fn_actags[i].count = actag_count;
238 			range_start += actag_count;
239 			WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
240 		}
241 		pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
242 			link->domain, link->bus, link->dev, i,
243 			link->fn_actags[i].start, link->fn_actags[i].count,
244 			link->fn_desired_actags[i]);
245 	}
246 	link->assignment_done = true;
247 }
248 
249 int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
250 		u16 *supported)
251 {
252 	struct npu_link *link;
253 
254 	guard(mutex)(&links_list_lock);
255 
256 	link = find_link(dev);
257 	if (!link) {
258 		dev_err(&dev->dev, "actag information not found\n");
259 		return -ENODEV;
260 	}
261 	/*
262 	 * On p9, we only have 64 actags per link, so they must be
263 	 * shared by all the functions of the same adapter. We counted
264 	 * the desired actag counts during PCI enumeration, so that we
265 	 * can allocate a pro-rated number of actags to each function.
266 	 */
267 	if (!link->assignment_done)
268 		assign_actags(link);
269 
270 	*base      = link->fn_actags[PCI_FUNC(dev->devfn)].start;
271 	*enabled   = link->fn_actags[PCI_FUNC(dev->devfn)].count;
272 	*supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
273 
274 	return 0;
275 }
276 EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
277 
278 int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
279 {
280 	struct npu_link *link;
281 	int i, rc = -EINVAL;
282 
283 	/*
284 	 * The number of PASIDs (process address space ID) which can
285 	 * be used by a function depends on how many functions exist
286 	 * on the device. The NPU needs to be configured to know how
287 	 * many bits are available to PASIDs and how many are to be
288 	 * used by the function BDF identifier.
289 	 *
290 	 * We only support one AFU-carrying function for now.
291 	 */
292 	guard(mutex)(&links_list_lock);
293 
294 	link = find_link(dev);
295 	if (!link) {
296 		dev_err(&dev->dev, "actag information not found\n");
297 		return -ENODEV;
298 	}
299 
300 	for (i = 0; i < 8; i++)
301 		if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
302 			*count = PNV_OCXL_PASID_MAX;
303 			rc = 0;
304 			break;
305 		}
306 
307 	dev_dbg(&dev->dev, "%d PASIDs available for function\n",
308 		rc ? 0 : *count);
309 	return rc;
310 }
311 EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
312 
313 static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
314 {
315 	int shift, idx;
316 
317 	WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
318 	idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
319 	shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
320 	buf[idx] |= rate << shift;
321 }
322 
323 int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
324 			char *rate_buf, int rate_buf_size)
325 {
326 	if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
327 		return -EINVAL;
328 	/*
329 	 * The TL capabilities are a characteristic of the NPU, so
330 	 * we go with hard-coded values.
331 	 *
332 	 * The receiving rate of each template is encoded on 4 bits.
333 	 *
334 	 * On P9:
335 	 * - templates 0 -> 3 are supported
336 	 * - templates 0, 1 and 3 have a 0 receiving rate
337 	 * - template 2 has receiving rate of 1 (extra cycle)
338 	 */
339 	memset(rate_buf, 0, rate_buf_size);
340 	set_templ_rate(2, 1, rate_buf);
341 	*cap = PNV_OCXL_TL_P9_RECV_CAP;
342 	return 0;
343 }
344 EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
345 
346 int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
347 			uint64_t rate_buf_phys, int rate_buf_size)
348 {
349 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
350 	struct pnv_phb *phb = hose->private_data;
351 	int rc;
352 
353 	if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
354 		return -EINVAL;
355 
356 	rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
357 			rate_buf_phys, rate_buf_size);
358 	if (rc) {
359 		dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
360 		return -EINVAL;
361 	}
362 	return 0;
363 }
364 EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
365 
366 int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
367 {
368 	int rc;
369 
370 	rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
371 	if (rc) {
372 		dev_err(&dev->dev,
373 			"Can't get translation interrupt for device\n");
374 		return rc;
375 	}
376 	return 0;
377 }
378 EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
379 
380 void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
381 			void __iomem *tfc, void __iomem *pe_handle)
382 {
383 	iounmap(dsisr);
384 	iounmap(dar);
385 	iounmap(tfc);
386 	iounmap(pe_handle);
387 }
388 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
389 
390 int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
391 			void __iomem **dar, void __iomem **tfc,
392 			void __iomem **pe_handle)
393 {
394 	u64 reg;
395 	int i, j, rc = 0;
396 	void __iomem *regs[4];
397 
398 	/*
399 	 * opal stores the mmio addresses of the DSISR, DAR, TFC and
400 	 * PE_HANDLE registers in a device tree property, in that
401 	 * order
402 	 */
403 	for (i = 0; i < 4; i++) {
404 		rc = of_property_read_u64_index(dev->dev.of_node,
405 						"ibm,opal-xsl-mmio", i, &reg);
406 		if (rc)
407 			break;
408 		regs[i] = ioremap(reg, 8);
409 		if (!regs[i]) {
410 			rc = -EINVAL;
411 			break;
412 		}
413 	}
414 	if (rc) {
415 		dev_err(&dev->dev, "Can't map translation mmio registers\n");
416 		for (j = i - 1; j >= 0; j--)
417 			iounmap(regs[j]);
418 	} else {
419 		*dsisr = regs[0];
420 		*dar = regs[1];
421 		*tfc = regs[2];
422 		*pe_handle = regs[3];
423 	}
424 	return rc;
425 }
426 EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
427 
428 struct spa_data {
429 	u64 phb_opal_id;
430 	u32 bdfn;
431 };
432 
433 int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
434 		void **platform_data)
435 {
436 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
437 	struct pnv_phb *phb = hose->private_data;
438 	struct spa_data *data;
439 	u32 bdfn;
440 	int rc;
441 
442 	data = kzalloc(sizeof(*data), GFP_KERNEL);
443 	if (!data)
444 		return -ENOMEM;
445 
446 	bdfn = pci_dev_id(dev);
447 	rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
448 				PE_mask);
449 	if (rc) {
450 		dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
451 		kfree(data);
452 		return rc;
453 	}
454 	data->phb_opal_id = phb->opal_id;
455 	data->bdfn = bdfn;
456 	*platform_data = (void *) data;
457 	return 0;
458 }
459 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
460 
461 void pnv_ocxl_spa_release(void *platform_data)
462 {
463 	struct spa_data *data = (struct spa_data *) platform_data;
464 	int rc;
465 
466 	rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
467 	WARN_ON(rc);
468 	kfree(data);
469 }
470 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
471 
472 int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
473 {
474 	struct spa_data *data = (struct spa_data *) platform_data;
475 
476 	return opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
477 }
478 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
479 
480 int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
481 		      uint64_t lpcr, void __iomem **arva)
482 {
483 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
484 	struct pnv_phb *phb = hose->private_data;
485 	u64 mmio_atsd;
486 	int rc;
487 
488 	/* ATSD physical address.
489 	 * ATSD LAUNCH register: write access initiates a shoot down to
490 	 * initiate the TLB Invalidate command.
491 	 */
492 	rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
493 					0, &mmio_atsd);
494 	if (rc) {
495 		dev_info(&dev->dev, "No available ATSD found\n");
496 		return rc;
497 	}
498 
499 	/* Assign a register set to a Logical Partition and MMIO ATSD
500 	 * LPARID register to the required value.
501 	 */
502 	rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),
503 			       lparid, lpcr);
504 	if (rc) {
505 		dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc);
506 		return rc;
507 	}
508 
509 	*arva = ioremap(mmio_atsd, 24);
510 	if (!(*arva)) {
511 		dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd);
512 		rc = -ENOMEM;
513 	}
514 
515 	return rc;
516 }
517 EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);
518 
519 void pnv_ocxl_unmap_lpar(void __iomem *arva)
520 {
521 	iounmap(arva);
522 }
523 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
524 
525 void pnv_ocxl_tlb_invalidate(void __iomem *arva,
526 			     unsigned long pid,
527 			     unsigned long addr,
528 			     unsigned long page_size)
529 {
530 	unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);
531 	u64 val = 0ull;
532 	int pend;
533 	u8 size;
534 
535 	if (!(arva))
536 		return;
537 
538 	if (addr) {
539 		/* load Abbreviated Virtual Address register with
540 		 * the necessary value
541 		 */
542 		val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));
543 		out_be64(arva + PNV_OCXL_ATSD_AVA, val);
544 	}
545 
546 	/* Write access initiates a shoot down to initiate the
547 	 * TLB Invalidate command
548 	 */
549 	val = PNV_OCXL_ATSD_LNCH_R;
550 	val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);
551 	if (addr)
552 		val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);
553 	else {
554 		val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);
555 		val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;
556 	}
557 	val |= PNV_OCXL_ATSD_LNCH_PRS;
558 	/* Actual Page Size to be invalidated
559 	 * 000 4KB
560 	 * 101 64KB
561 	 * 001 2MB
562 	 * 010 1GB
563 	 */
564 	size = 0b101;
565 	if (page_size == 0x1000)
566 		size = 0b000;
567 	if (page_size == 0x200000)
568 		size = 0b001;
569 	if (page_size == 0x40000000)
570 		size = 0b010;
571 	val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size);
572 	val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid);
573 	out_be64(arva + PNV_OCXL_ATSD_LNCH, val);
574 
575 	/* Poll the ATSD status register to determine when the
576 	 * TLB Invalidate has been completed.
577 	 */
578 	val = in_be64(arva + PNV_OCXL_ATSD_STAT);
579 	pend = val >> 63;
580 
581 	while (pend) {
582 		if (time_after_eq(jiffies, timeout)) {
583 			pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",
584 			       __func__, val, pid);
585 			return;
586 		}
587 		cpu_relax();
588 		val = in_be64(arva + PNV_OCXL_ATSD_STAT);
589 		pend = val >> 63;
590 	}
591 }
592 EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);
593