xref: /linux/arch/powerpc/platforms/powernv/pci.c (revision c532de5a67a70f8533d495f8f2aaa9a0491c3ad0)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Support PCI/PCIe on PowerNV platforms
4  *
5  * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
6  */
7 
8 #include <linux/kernel.h>
9 #include <linux/pci.h>
10 #include <linux/delay.h>
11 #include <linux/string.h>
12 #include <linux/init.h>
13 #include <linux/irq.h>
14 #include <linux/io.h>
15 #include <linux/msi.h>
16 #include <linux/iommu.h>
17 #include <linux/sched/mm.h>
18 
19 #include <asm/sections.h>
20 #include <asm/io.h>
21 #include <asm/pci-bridge.h>
22 #include <asm/machdep.h>
23 #include <asm/msi_bitmap.h>
24 #include <asm/ppc-pci.h>
25 #include <asm/pnv-pci.h>
26 #include <asm/opal.h>
27 #include <asm/iommu.h>
28 #include <asm/tce.h>
29 #include <asm/firmware.h>
30 #include <asm/eeh_event.h>
31 #include <asm/eeh.h>
32 
33 #include "powernv.h"
34 #include "pci.h"
35 
36 static DEFINE_MUTEX(tunnel_mutex);
37 
38 int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
39 {
40 	struct device_node *node = np;
41 	u32 bdfn;
42 	u64 phbid;
43 	int ret;
44 
45 	ret = of_property_read_u32(np, "reg", &bdfn);
46 	if (ret)
47 		return -ENXIO;
48 
49 	bdfn = ((bdfn & 0x00ffff00) >> 8);
50 	for (node = np; node; node = of_get_parent(node)) {
51 		if (!PCI_DN(node)) {
52 			of_node_put(node);
53 			break;
54 		}
55 
56 		if (!of_device_is_compatible(node, "ibm,ioda2-phb") &&
57 		    !of_device_is_compatible(node, "ibm,ioda3-phb") &&
58 		    !of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb")) {
59 			of_node_put(node);
60 			continue;
61 		}
62 
63 		ret = of_property_read_u64(node, "ibm,opal-phbid", &phbid);
64 		if (ret) {
65 			of_node_put(node);
66 			return -ENXIO;
67 		}
68 
69 		if (of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb"))
70 			*id = PCI_PHB_SLOT_ID(phbid);
71 		else
72 			*id = PCI_SLOT_ID(phbid, bdfn);
73 		return 0;
74 	}
75 
76 	return -ENODEV;
77 }
78 EXPORT_SYMBOL_GPL(pnv_pci_get_slot_id);
79 
80 int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len)
81 {
82 	int64_t rc;
83 
84 	if (!opal_check_token(OPAL_GET_DEVICE_TREE))
85 		return -ENXIO;
86 
87 	rc = opal_get_device_tree(phandle, (uint64_t)buf, len);
88 	if (rc < OPAL_SUCCESS)
89 		return -EIO;
90 
91 	return rc;
92 }
93 EXPORT_SYMBOL_GPL(pnv_pci_get_device_tree);
94 
95 int pnv_pci_get_presence_state(uint64_t id, uint8_t *state)
96 {
97 	int64_t rc;
98 
99 	if (!opal_check_token(OPAL_PCI_GET_PRESENCE_STATE))
100 		return -ENXIO;
101 
102 	rc = opal_pci_get_presence_state(id, (uint64_t)state);
103 	if (rc != OPAL_SUCCESS)
104 		return -EIO;
105 
106 	return 0;
107 }
108 EXPORT_SYMBOL_GPL(pnv_pci_get_presence_state);
109 
110 int pnv_pci_get_power_state(uint64_t id, uint8_t *state)
111 {
112 	int64_t rc;
113 
114 	if (!opal_check_token(OPAL_PCI_GET_POWER_STATE))
115 		return -ENXIO;
116 
117 	rc = opal_pci_get_power_state(id, (uint64_t)state);
118 	if (rc != OPAL_SUCCESS)
119 		return -EIO;
120 
121 	return 0;
122 }
123 EXPORT_SYMBOL_GPL(pnv_pci_get_power_state);
124 
125 int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg)
126 {
127 	struct opal_msg m;
128 	int token, ret;
129 	int64_t rc;
130 
131 	if (!opal_check_token(OPAL_PCI_SET_POWER_STATE))
132 		return -ENXIO;
133 
134 	token = opal_async_get_token_interruptible();
135 	if (unlikely(token < 0))
136 		return token;
137 
138 	rc = opal_pci_set_power_state(token, id, (uint64_t)&state);
139 	if (rc == OPAL_SUCCESS) {
140 		ret = 0;
141 		goto exit;
142 	} else if (rc != OPAL_ASYNC_COMPLETION) {
143 		ret = -EIO;
144 		goto exit;
145 	}
146 
147 	ret = opal_async_wait_response(token, &m);
148 	if (ret < 0)
149 		goto exit;
150 
151 	if (msg) {
152 		ret = 1;
153 		memcpy(msg, &m, sizeof(m));
154 	}
155 
156 exit:
157 	opal_async_release_token(token);
158 	return ret;
159 }
160 EXPORT_SYMBOL_GPL(pnv_pci_set_power_state);
161 
162 /* Nicely print the contents of the PE State Tables (PEST). */
163 static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size)
164 {
165 	__be64 prevA = ULONG_MAX, prevB = ULONG_MAX;
166 	bool dup = false;
167 	int i;
168 
169 	for (i = 0; i < pest_size; i++) {
170 		__be64 peA = be64_to_cpu(pestA[i]);
171 		__be64 peB = be64_to_cpu(pestB[i]);
172 
173 		if (peA != prevA || peB != prevB) {
174 			if (dup) {
175 				pr_info("PE[..%03x] A/B: as above\n", i-1);
176 				dup = false;
177 			}
178 			prevA = peA;
179 			prevB = peB;
180 			if (peA & PNV_IODA_STOPPED_STATE ||
181 			    peB & PNV_IODA_STOPPED_STATE)
182 				pr_info("PE[%03x] A/B: %016llx %016llx\n",
183 					i, peA, peB);
184 		} else if (!dup && (peA & PNV_IODA_STOPPED_STATE ||
185 				    peB & PNV_IODA_STOPPED_STATE)) {
186 			dup = true;
187 		}
188 	}
189 }
190 
191 static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
192 					 struct OpalIoPhbErrorCommon *common)
193 {
194 	struct OpalIoP7IOCPhbErrorData *data;
195 
196 	data = (struct OpalIoP7IOCPhbErrorData *)common;
197 	pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n",
198 		hose->global_number, be32_to_cpu(common->version));
199 
200 	if (data->brdgCtl)
201 		pr_info("brdgCtl:     %08x\n",
202 			be32_to_cpu(data->brdgCtl));
203 	if (data->portStatusReg || data->rootCmplxStatus ||
204 	    data->busAgentStatus)
205 		pr_info("UtlSts:      %08x %08x %08x\n",
206 			be32_to_cpu(data->portStatusReg),
207 			be32_to_cpu(data->rootCmplxStatus),
208 			be32_to_cpu(data->busAgentStatus));
209 	if (data->deviceStatus || data->slotStatus   ||
210 	    data->linkStatus   || data->devCmdStatus ||
211 	    data->devSecStatus)
212 		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
213 			be32_to_cpu(data->deviceStatus),
214 			be32_to_cpu(data->slotStatus),
215 			be32_to_cpu(data->linkStatus),
216 			be32_to_cpu(data->devCmdStatus),
217 			be32_to_cpu(data->devSecStatus));
218 	if (data->rootErrorStatus   || data->uncorrErrorStatus ||
219 	    data->corrErrorStatus)
220 		pr_info("RootErrSts:  %08x %08x %08x\n",
221 			be32_to_cpu(data->rootErrorStatus),
222 			be32_to_cpu(data->uncorrErrorStatus),
223 			be32_to_cpu(data->corrErrorStatus));
224 	if (data->tlpHdr1 || data->tlpHdr2 ||
225 	    data->tlpHdr3 || data->tlpHdr4)
226 		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
227 			be32_to_cpu(data->tlpHdr1),
228 			be32_to_cpu(data->tlpHdr2),
229 			be32_to_cpu(data->tlpHdr3),
230 			be32_to_cpu(data->tlpHdr4));
231 	if (data->sourceId || data->errorClass ||
232 	    data->correlator)
233 		pr_info("RootErrLog1: %08x %016llx %016llx\n",
234 			be32_to_cpu(data->sourceId),
235 			be64_to_cpu(data->errorClass),
236 			be64_to_cpu(data->correlator));
237 	if (data->p7iocPlssr || data->p7iocCsr)
238 		pr_info("PhbSts:      %016llx %016llx\n",
239 			be64_to_cpu(data->p7iocPlssr),
240 			be64_to_cpu(data->p7iocCsr));
241 	if (data->lemFir)
242 		pr_info("Lem:         %016llx %016llx %016llx\n",
243 			be64_to_cpu(data->lemFir),
244 			be64_to_cpu(data->lemErrorMask),
245 			be64_to_cpu(data->lemWOF));
246 	if (data->phbErrorStatus)
247 		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
248 			be64_to_cpu(data->phbErrorStatus),
249 			be64_to_cpu(data->phbFirstErrorStatus),
250 			be64_to_cpu(data->phbErrorLog0),
251 			be64_to_cpu(data->phbErrorLog1));
252 	if (data->mmioErrorStatus)
253 		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
254 			be64_to_cpu(data->mmioErrorStatus),
255 			be64_to_cpu(data->mmioFirstErrorStatus),
256 			be64_to_cpu(data->mmioErrorLog0),
257 			be64_to_cpu(data->mmioErrorLog1));
258 	if (data->dma0ErrorStatus)
259 		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
260 			be64_to_cpu(data->dma0ErrorStatus),
261 			be64_to_cpu(data->dma0FirstErrorStatus),
262 			be64_to_cpu(data->dma0ErrorLog0),
263 			be64_to_cpu(data->dma0ErrorLog1));
264 	if (data->dma1ErrorStatus)
265 		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
266 			be64_to_cpu(data->dma1ErrorStatus),
267 			be64_to_cpu(data->dma1FirstErrorStatus),
268 			be64_to_cpu(data->dma1ErrorLog0),
269 			be64_to_cpu(data->dma1ErrorLog1));
270 
271 	pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_P7IOC_NUM_PEST_REGS);
272 }
273 
274 static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
275 					struct OpalIoPhbErrorCommon *common)
276 {
277 	struct OpalIoPhb3ErrorData *data;
278 
279 	data = (struct OpalIoPhb3ErrorData*)common;
280 	pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n",
281 		hose->global_number, be32_to_cpu(common->version));
282 	if (data->brdgCtl)
283 		pr_info("brdgCtl:     %08x\n",
284 			be32_to_cpu(data->brdgCtl));
285 	if (data->portStatusReg || data->rootCmplxStatus ||
286 	    data->busAgentStatus)
287 		pr_info("UtlSts:      %08x %08x %08x\n",
288 			be32_to_cpu(data->portStatusReg),
289 			be32_to_cpu(data->rootCmplxStatus),
290 			be32_to_cpu(data->busAgentStatus));
291 	if (data->deviceStatus || data->slotStatus   ||
292 	    data->linkStatus   || data->devCmdStatus ||
293 	    data->devSecStatus)
294 		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
295 			be32_to_cpu(data->deviceStatus),
296 			be32_to_cpu(data->slotStatus),
297 			be32_to_cpu(data->linkStatus),
298 			be32_to_cpu(data->devCmdStatus),
299 			be32_to_cpu(data->devSecStatus));
300 	if (data->rootErrorStatus || data->uncorrErrorStatus ||
301 	    data->corrErrorStatus)
302 		pr_info("RootErrSts:  %08x %08x %08x\n",
303 			be32_to_cpu(data->rootErrorStatus),
304 			be32_to_cpu(data->uncorrErrorStatus),
305 			be32_to_cpu(data->corrErrorStatus));
306 	if (data->tlpHdr1 || data->tlpHdr2 ||
307 	    data->tlpHdr3 || data->tlpHdr4)
308 		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
309 			be32_to_cpu(data->tlpHdr1),
310 			be32_to_cpu(data->tlpHdr2),
311 			be32_to_cpu(data->tlpHdr3),
312 			be32_to_cpu(data->tlpHdr4));
313 	if (data->sourceId || data->errorClass ||
314 	    data->correlator)
315 		pr_info("RootErrLog1: %08x %016llx %016llx\n",
316 			be32_to_cpu(data->sourceId),
317 			be64_to_cpu(data->errorClass),
318 			be64_to_cpu(data->correlator));
319 	if (data->nFir)
320 		pr_info("nFir:        %016llx %016llx %016llx\n",
321 			be64_to_cpu(data->nFir),
322 			be64_to_cpu(data->nFirMask),
323 			be64_to_cpu(data->nFirWOF));
324 	if (data->phbPlssr || data->phbCsr)
325 		pr_info("PhbSts:      %016llx %016llx\n",
326 			be64_to_cpu(data->phbPlssr),
327 			be64_to_cpu(data->phbCsr));
328 	if (data->lemFir)
329 		pr_info("Lem:         %016llx %016llx %016llx\n",
330 			be64_to_cpu(data->lemFir),
331 			be64_to_cpu(data->lemErrorMask),
332 			be64_to_cpu(data->lemWOF));
333 	if (data->phbErrorStatus)
334 		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
335 			be64_to_cpu(data->phbErrorStatus),
336 			be64_to_cpu(data->phbFirstErrorStatus),
337 			be64_to_cpu(data->phbErrorLog0),
338 			be64_to_cpu(data->phbErrorLog1));
339 	if (data->mmioErrorStatus)
340 		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
341 			be64_to_cpu(data->mmioErrorStatus),
342 			be64_to_cpu(data->mmioFirstErrorStatus),
343 			be64_to_cpu(data->mmioErrorLog0),
344 			be64_to_cpu(data->mmioErrorLog1));
345 	if (data->dma0ErrorStatus)
346 		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
347 			be64_to_cpu(data->dma0ErrorStatus),
348 			be64_to_cpu(data->dma0FirstErrorStatus),
349 			be64_to_cpu(data->dma0ErrorLog0),
350 			be64_to_cpu(data->dma0ErrorLog1));
351 	if (data->dma1ErrorStatus)
352 		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
353 			be64_to_cpu(data->dma1ErrorStatus),
354 			be64_to_cpu(data->dma1FirstErrorStatus),
355 			be64_to_cpu(data->dma1ErrorLog0),
356 			be64_to_cpu(data->dma1ErrorLog1));
357 
358 	pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB3_NUM_PEST_REGS);
359 }
360 
361 static void pnv_pci_dump_phb4_diag_data(struct pci_controller *hose,
362 					struct OpalIoPhbErrorCommon *common)
363 {
364 	struct OpalIoPhb4ErrorData *data;
365 
366 	data = (struct OpalIoPhb4ErrorData*)common;
367 	pr_info("PHB4 PHB#%d Diag-data (Version: %d)\n",
368 		hose->global_number, be32_to_cpu(common->version));
369 	if (data->brdgCtl)
370 		pr_info("brdgCtl:    %08x\n",
371 			be32_to_cpu(data->brdgCtl));
372 	if (data->deviceStatus || data->slotStatus   ||
373 	    data->linkStatus   || data->devCmdStatus ||
374 	    data->devSecStatus)
375 		pr_info("RootSts:    %08x %08x %08x %08x %08x\n",
376 			be32_to_cpu(data->deviceStatus),
377 			be32_to_cpu(data->slotStatus),
378 			be32_to_cpu(data->linkStatus),
379 			be32_to_cpu(data->devCmdStatus),
380 			be32_to_cpu(data->devSecStatus));
381 	if (data->rootErrorStatus || data->uncorrErrorStatus ||
382 	    data->corrErrorStatus)
383 		pr_info("RootErrSts: %08x %08x %08x\n",
384 			be32_to_cpu(data->rootErrorStatus),
385 			be32_to_cpu(data->uncorrErrorStatus),
386 			be32_to_cpu(data->corrErrorStatus));
387 	if (data->tlpHdr1 || data->tlpHdr2 ||
388 	    data->tlpHdr3 || data->tlpHdr4)
389 		pr_info("RootErrLog: %08x %08x %08x %08x\n",
390 			be32_to_cpu(data->tlpHdr1),
391 			be32_to_cpu(data->tlpHdr2),
392 			be32_to_cpu(data->tlpHdr3),
393 			be32_to_cpu(data->tlpHdr4));
394 	if (data->sourceId)
395 		pr_info("sourceId:   %08x\n", be32_to_cpu(data->sourceId));
396 	if (data->nFir)
397 		pr_info("nFir:       %016llx %016llx %016llx\n",
398 			be64_to_cpu(data->nFir),
399 			be64_to_cpu(data->nFirMask),
400 			be64_to_cpu(data->nFirWOF));
401 	if (data->phbPlssr || data->phbCsr)
402 		pr_info("PhbSts:     %016llx %016llx\n",
403 			be64_to_cpu(data->phbPlssr),
404 			be64_to_cpu(data->phbCsr));
405 	if (data->lemFir)
406 		pr_info("Lem:        %016llx %016llx %016llx\n",
407 			be64_to_cpu(data->lemFir),
408 			be64_to_cpu(data->lemErrorMask),
409 			be64_to_cpu(data->lemWOF));
410 	if (data->phbErrorStatus)
411 		pr_info("PhbErr:     %016llx %016llx %016llx %016llx\n",
412 			be64_to_cpu(data->phbErrorStatus),
413 			be64_to_cpu(data->phbFirstErrorStatus),
414 			be64_to_cpu(data->phbErrorLog0),
415 			be64_to_cpu(data->phbErrorLog1));
416 	if (data->phbTxeErrorStatus)
417 		pr_info("PhbTxeErr:  %016llx %016llx %016llx %016llx\n",
418 			be64_to_cpu(data->phbTxeErrorStatus),
419 			be64_to_cpu(data->phbTxeFirstErrorStatus),
420 			be64_to_cpu(data->phbTxeErrorLog0),
421 			be64_to_cpu(data->phbTxeErrorLog1));
422 	if (data->phbRxeArbErrorStatus)
423 		pr_info("RxeArbErr:  %016llx %016llx %016llx %016llx\n",
424 			be64_to_cpu(data->phbRxeArbErrorStatus),
425 			be64_to_cpu(data->phbRxeArbFirstErrorStatus),
426 			be64_to_cpu(data->phbRxeArbErrorLog0),
427 			be64_to_cpu(data->phbRxeArbErrorLog1));
428 	if (data->phbRxeMrgErrorStatus)
429 		pr_info("RxeMrgErr:  %016llx %016llx %016llx %016llx\n",
430 			be64_to_cpu(data->phbRxeMrgErrorStatus),
431 			be64_to_cpu(data->phbRxeMrgFirstErrorStatus),
432 			be64_to_cpu(data->phbRxeMrgErrorLog0),
433 			be64_to_cpu(data->phbRxeMrgErrorLog1));
434 	if (data->phbRxeTceErrorStatus)
435 		pr_info("RxeTceErr:  %016llx %016llx %016llx %016llx\n",
436 			be64_to_cpu(data->phbRxeTceErrorStatus),
437 			be64_to_cpu(data->phbRxeTceFirstErrorStatus),
438 			be64_to_cpu(data->phbRxeTceErrorLog0),
439 			be64_to_cpu(data->phbRxeTceErrorLog1));
440 
441 	if (data->phbPblErrorStatus)
442 		pr_info("PblErr:     %016llx %016llx %016llx %016llx\n",
443 			be64_to_cpu(data->phbPblErrorStatus),
444 			be64_to_cpu(data->phbPblFirstErrorStatus),
445 			be64_to_cpu(data->phbPblErrorLog0),
446 			be64_to_cpu(data->phbPblErrorLog1));
447 	if (data->phbPcieDlpErrorStatus)
448 		pr_info("PcieDlp:    %016llx %016llx %016llx\n",
449 			be64_to_cpu(data->phbPcieDlpErrorLog1),
450 			be64_to_cpu(data->phbPcieDlpErrorLog2),
451 			be64_to_cpu(data->phbPcieDlpErrorStatus));
452 	if (data->phbRegbErrorStatus)
453 		pr_info("RegbErr:    %016llx %016llx %016llx %016llx\n",
454 			be64_to_cpu(data->phbRegbErrorStatus),
455 			be64_to_cpu(data->phbRegbFirstErrorStatus),
456 			be64_to_cpu(data->phbRegbErrorLog0),
457 			be64_to_cpu(data->phbRegbErrorLog1));
458 
459 
460 	pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB4_NUM_PEST_REGS);
461 }
462 
463 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
464 				unsigned char *log_buff)
465 {
466 	struct OpalIoPhbErrorCommon *common;
467 
468 	if (!hose || !log_buff)
469 		return;
470 
471 	common = (struct OpalIoPhbErrorCommon *)log_buff;
472 	switch (be32_to_cpu(common->ioType)) {
473 	case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
474 		pnv_pci_dump_p7ioc_diag_data(hose, common);
475 		break;
476 	case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
477 		pnv_pci_dump_phb3_diag_data(hose, common);
478 		break;
479 	case OPAL_PHB_ERROR_DATA_TYPE_PHB4:
480 		pnv_pci_dump_phb4_diag_data(hose, common);
481 		break;
482 	default:
483 		pr_warn("%s: Unrecognized ioType %d\n",
484 			__func__, be32_to_cpu(common->ioType));
485 	}
486 }
487 
488 static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
489 {
490 	unsigned long flags, rc;
491 	int has_diag, ret = 0;
492 
493 	spin_lock_irqsave(&phb->lock, flags);
494 
495 	/* Fetch PHB diag-data */
496 	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
497 					 phb->diag_data_size);
498 	has_diag = (rc == OPAL_SUCCESS);
499 
500 	/* If PHB supports compound PE, to handle it */
501 	if (phb->unfreeze_pe) {
502 		ret = phb->unfreeze_pe(phb,
503 				       pe_no,
504 				       OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
505 	} else {
506 		rc = opal_pci_eeh_freeze_clear(phb->opal_id,
507 					     pe_no,
508 					     OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
509 		if (rc) {
510 			pr_warn("%s: Failure %ld clearing frozen "
511 				"PHB#%x-PE#%x\n",
512 				__func__, rc, phb->hose->global_number,
513 				pe_no);
514 			ret = -EIO;
515 		}
516 	}
517 
518 	/*
519 	 * For now, let's only display the diag buffer when we fail to clear
520 	 * the EEH status. We'll do more sensible things later when we have
521 	 * proper EEH support. We need to make sure we don't pollute ourselves
522 	 * with the normal errors generated when probing empty slots
523 	 */
524 	if (has_diag && ret)
525 		pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data);
526 
527 	spin_unlock_irqrestore(&phb->lock, flags);
528 }
529 
530 static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
531 {
532 	struct pnv_phb *phb = pdn->phb->private_data;
533 	u8	fstate = 0;
534 	__be16	pcierr = 0;
535 	unsigned int pe_no;
536 	s64	rc;
537 
538 	/*
539 	 * Get the PE#. During the PCI probe stage, we might not
540 	 * setup that yet. So all ER errors should be mapped to
541 	 * reserved PE.
542 	 */
543 	pe_no = pdn->pe_number;
544 	if (pe_no == IODA_INVALID_PE) {
545 		pe_no = phb->ioda.reserved_pe_idx;
546 	}
547 
548 	/*
549 	 * Fetch frozen state. If the PHB support compound PE,
550 	 * we need handle that case.
551 	 */
552 	if (phb->get_pe_state) {
553 		fstate = phb->get_pe_state(phb, pe_no);
554 	} else {
555 		rc = opal_pci_eeh_freeze_status(phb->opal_id,
556 						pe_no,
557 						&fstate,
558 						&pcierr,
559 						NULL);
560 		if (rc) {
561 			pr_warn("%s: Failure %lld getting PHB#%x-PE#%x state\n",
562 				__func__, rc, phb->hose->global_number, pe_no);
563 			return;
564 		}
565 	}
566 
567 	pr_devel(" -> EEH check, bdfn=%04x PE#%x fstate=%x\n",
568 		 (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
569 
570 	/* Clear the frozen state if applicable */
571 	if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
572 	    fstate == OPAL_EEH_STOPPED_DMA_FREEZE  ||
573 	    fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) {
574 		/*
575 		 * If PHB supports compound PE, freeze it for
576 		 * consistency.
577 		 */
578 		if (phb->freeze_pe)
579 			phb->freeze_pe(phb, pe_no);
580 
581 		pnv_pci_handle_eeh_config(phb, pe_no);
582 	}
583 }
584 
585 int pnv_pci_cfg_read(struct pci_dn *pdn,
586 		     int where, int size, u32 *val)
587 {
588 	struct pnv_phb *phb = pdn->phb->private_data;
589 	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
590 	s64 rc;
591 
592 	switch (size) {
593 	case 1: {
594 		u8 v8;
595 		rc = opal_pci_config_read_byte(phb->opal_id, bdfn, where, &v8);
596 		*val = (rc == OPAL_SUCCESS) ? v8 : 0xff;
597 		break;
598 	}
599 	case 2: {
600 		__be16 v16;
601 		rc = opal_pci_config_read_half_word(phb->opal_id, bdfn, where,
602 						   &v16);
603 		*val = (rc == OPAL_SUCCESS) ? be16_to_cpu(v16) : 0xffff;
604 		break;
605 	}
606 	case 4: {
607 		__be32 v32;
608 		rc = opal_pci_config_read_word(phb->opal_id, bdfn, where, &v32);
609 		*val = (rc == OPAL_SUCCESS) ? be32_to_cpu(v32) : 0xffffffff;
610 		break;
611 	}
612 	default:
613 		return PCIBIOS_FUNC_NOT_SUPPORTED;
614 	}
615 
616 	pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
617 		 __func__, pdn->busno, pdn->devfn, where, size, *val);
618 	return PCIBIOS_SUCCESSFUL;
619 }
620 
621 int pnv_pci_cfg_write(struct pci_dn *pdn,
622 		      int where, int size, u32 val)
623 {
624 	struct pnv_phb *phb = pdn->phb->private_data;
625 	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
626 
627 	pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
628 		 __func__, pdn->busno, pdn->devfn, where, size, val);
629 	switch (size) {
630 	case 1:
631 		opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
632 		break;
633 	case 2:
634 		opal_pci_config_write_half_word(phb->opal_id, bdfn, where, val);
635 		break;
636 	case 4:
637 		opal_pci_config_write_word(phb->opal_id, bdfn, where, val);
638 		break;
639 	default:
640 		return PCIBIOS_FUNC_NOT_SUPPORTED;
641 	}
642 
643 	return PCIBIOS_SUCCESSFUL;
644 }
645 
646 #ifdef CONFIG_EEH
647 static bool pnv_pci_cfg_check(struct pci_dn *pdn)
648 {
649 	struct eeh_dev *edev = NULL;
650 	struct pnv_phb *phb = pdn->phb->private_data;
651 
652 	/* EEH not enabled ? */
653 	if (!(phb->flags & PNV_PHB_FLAG_EEH))
654 		return true;
655 
656 	/* PE reset or device removed ? */
657 	edev = pdn->edev;
658 	if (edev) {
659 		if (edev->pe &&
660 		    (edev->pe->state & EEH_PE_CFG_BLOCKED))
661 			return false;
662 
663 		if (edev->mode & EEH_DEV_REMOVED)
664 			return false;
665 	}
666 
667 	return true;
668 }
669 #else
670 static inline pnv_pci_cfg_check(struct pci_dn *pdn)
671 {
672 	return true;
673 }
674 #endif /* CONFIG_EEH */
675 
676 static int pnv_pci_read_config(struct pci_bus *bus,
677 			       unsigned int devfn,
678 			       int where, int size, u32 *val)
679 {
680 	struct pci_dn *pdn;
681 	struct pnv_phb *phb;
682 	int ret;
683 
684 	*val = 0xFFFFFFFF;
685 	pdn = pci_get_pdn_by_devfn(bus, devfn);
686 	if (!pdn)
687 		return PCIBIOS_DEVICE_NOT_FOUND;
688 
689 	if (!pnv_pci_cfg_check(pdn))
690 		return PCIBIOS_DEVICE_NOT_FOUND;
691 
692 	ret = pnv_pci_cfg_read(pdn, where, size, val);
693 	phb = pdn->phb->private_data;
694 	if (phb->flags & PNV_PHB_FLAG_EEH && pdn->edev) {
695 		if (*val == EEH_IO_ERROR_VALUE(size) &&
696 		    eeh_dev_check_failure(pdn->edev))
697                         return PCIBIOS_DEVICE_NOT_FOUND;
698 	} else {
699 		pnv_pci_config_check_eeh(pdn);
700 	}
701 
702 	return ret;
703 }
704 
705 static int pnv_pci_write_config(struct pci_bus *bus,
706 				unsigned int devfn,
707 				int where, int size, u32 val)
708 {
709 	struct pci_dn *pdn;
710 	struct pnv_phb *phb;
711 	int ret;
712 
713 	pdn = pci_get_pdn_by_devfn(bus, devfn);
714 	if (!pdn)
715 		return PCIBIOS_DEVICE_NOT_FOUND;
716 
717 	if (!pnv_pci_cfg_check(pdn))
718 		return PCIBIOS_DEVICE_NOT_FOUND;
719 
720 	ret = pnv_pci_cfg_write(pdn, where, size, val);
721 	phb = pdn->phb->private_data;
722 	if (!(phb->flags & PNV_PHB_FLAG_EEH))
723 		pnv_pci_config_check_eeh(pdn);
724 
725 	return ret;
726 }
727 
728 struct pci_ops pnv_pci_ops = {
729 	.read  = pnv_pci_read_config,
730 	.write = pnv_pci_write_config,
731 };
732 
733 struct iommu_table *pnv_pci_table_alloc(int nid)
734 {
735 	struct iommu_table *tbl;
736 
737 	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid);
738 	if (!tbl)
739 		return NULL;
740 
741 	INIT_LIST_HEAD_RCU(&tbl->it_group_list);
742 	kref_init(&tbl->it_kref);
743 
744 	return tbl;
745 }
746 
747 struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
748 {
749 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
750 
751 	return of_node_get(hose->dn);
752 }
753 EXPORT_SYMBOL(pnv_pci_get_phb_node);
754 
755 int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
756 {
757 	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
758 	u64 tunnel_bar;
759 	__be64 val;
760 	int rc;
761 
762 	if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR))
763 		return -ENXIO;
764 	if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR))
765 		return -ENXIO;
766 
767 	mutex_lock(&tunnel_mutex);
768 	rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val);
769 	if (rc != OPAL_SUCCESS) {
770 		rc = -EIO;
771 		goto out;
772 	}
773 	tunnel_bar = be64_to_cpu(val);
774 	if (enable) {
775 		/*
776 		* Only one device per PHB can use atomics.
777 		* Our policy is first-come, first-served.
778 		*/
779 		if (tunnel_bar) {
780 			if (tunnel_bar != addr)
781 				rc = -EBUSY;
782 			else
783 				rc = 0;	/* Setting same address twice is ok */
784 			goto out;
785 		}
786 	} else {
787 		/*
788 		* The device that owns atomics and wants to release
789 		* them must pass the same address with enable == 0.
790 		*/
791 		if (tunnel_bar != addr) {
792 			rc = -EPERM;
793 			goto out;
794 		}
795 		addr = 0x0ULL;
796 	}
797 	rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr);
798 	rc = opal_error_code(rc);
799 out:
800 	mutex_unlock(&tunnel_mutex);
801 	return rc;
802 }
803 EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar);
804 
805 void pnv_pci_shutdown(void)
806 {
807 	struct pci_controller *hose;
808 
809 	list_for_each_entry(hose, &hose_list, list_node)
810 		if (hose->controller_ops.shutdown)
811 			hose->controller_ops.shutdown(hose);
812 }
813 
814 /* Fixup wrong class code in p7ioc and p8 root complex */
815 static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
816 {
817 	dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL;
818 }
819 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
820 
821 void __init pnv_pci_init(void)
822 {
823 	struct device_node *np;
824 
825 	pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
826 
827 	/* If we don't have OPAL, eg. in sim, just skip PCI probe */
828 	if (!firmware_has_feature(FW_FEATURE_OPAL))
829 		return;
830 
831 #ifdef CONFIG_PCIEPORTBUS
832 	/*
833 	 * On PowerNV PCIe devices are (currently) managed in cooperation
834 	 * with firmware. This isn't *strictly* required, but there's enough
835 	 * assumptions baked into both firmware and the platform code that
836 	 * it's unwise to allow the portbus services to be used.
837 	 *
838 	 * We need to fix this eventually, but for now set this flag to disable
839 	 * the portbus driver. The AER service isn't required since that AER
840 	 * events are handled via EEH. The pciehp hotplug driver can't work
841 	 * without kernel changes (and portbus binding breaks pnv_php). The
842 	 * other services also require some thinking about how we're going
843 	 * to integrate them.
844 	 */
845 	pcie_ports_disabled = true;
846 #endif
847 
848 	/* Look for ioda2 built-in PHB3's */
849 	for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
850 		pnv_pci_init_ioda2_phb(np);
851 
852 	/* Look for ioda3 built-in PHB4's, we treat them as IODA2 */
853 	for_each_compatible_node(np, NULL, "ibm,ioda3-phb")
854 		pnv_pci_init_ioda2_phb(np);
855 
856 	/* Look for NPU2 OpenCAPI PHBs */
857 	for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb")
858 		pnv_pci_init_npu2_opencapi_phb(np);
859 
860 	/* Configure IOMMU DMA hooks */
861 	set_pci_dma_ops(&dma_iommu_ops);
862 }
863