xref: /illumos-gate/usr/src/uts/common/io/pciex/pciev.c (revision a38ee58261c5aa81028a4329e73da4016006aa99)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/ddi.h>
28 #include <sys/dditypes.h>
29 #include <sys/ddifm.h>
30 #include <sys/sunndi.h>
31 #include <sys/devops.h>
32 #include <sys/pcie.h>
33 #include <sys/pci_cap.h>
34 #include <sys/pcie_impl.h>
35 #include <sys/pathname.h>
36 
37 /*
38  * The below 2 global variables are for PCIe IOV Error Handling.  They must only
39  * be accessed during error handling under the protection of a error mutex.
40  */
41 static pcie_domains_t *pcie_faulty_domains = NULL;
42 static boolean_t pcie_faulty_all = B_FALSE;
43 
44 static void pcie_domain_list_destroy(pcie_domains_t *domain_ids);
45 static void pcie_bdf_list_add(pcie_req_id_t bdf,
46     pcie_req_id_list_t **rlist_p);
47 static void pcie_bdf_list_remove(pcie_req_id_t bdf,
48     pcie_req_id_list_t **rlist_p);
49 static void pcie_cache_domain_info(pcie_bus_t *bus_p);
50 static void pcie_uncache_domain_info(pcie_bus_t *bus_p);
51 
52 static void pcie_faulty_list_clear();
53 static void pcie_faulty_list_update(pcie_domains_t *pd,
54     pcie_domains_t **headp);
55 
56 dev_info_t *
57 pcie_find_dip_by_bdf(dev_info_t *rootp, pcie_req_id_t bdf)
58 {
59 	dev_info_t *dip;
60 	pcie_bus_t *bus_p;
61 	int bus_num;
62 
63 	dip = ddi_get_child(rootp);
64 	while (dip) {
65 		bus_p = PCIE_DIP2BUS(dip);
66 		if (bus_p && (bus_p->bus_bdf == bdf))
67 			return (dip);
68 		if (bus_p) {
69 			bus_num = (bdf >> 8) & 0xff;
70 			if ((bus_num >= bus_p->bus_bus_range.lo &&
71 			    bus_num <= bus_p->bus_bus_range.hi) ||
72 			    bus_p->bus_bus_range.hi == 0)
73 				return (pcie_find_dip_by_bdf(dip, bdf));
74 		}
75 		dip = ddi_get_next_sibling(dip);
76 	}
77 	return (NULL);
78 }
79 
80 /*
81  * Add a device bdf to the bdf list.
82  */
83 static void
84 pcie_bdf_list_add(pcie_req_id_t bdf, pcie_req_id_list_t **rlist_p)
85 {
86 	pcie_req_id_list_t *rl = PCIE_ZALLOC(pcie_req_id_list_t);
87 
88 	rl->bdf = bdf;
89 	rl->next = *rlist_p;
90 	*rlist_p = rl;
91 }
92 
93 /*
94  * Remove a bdf from the bdf list.
95  */
96 static void
97 pcie_bdf_list_remove(pcie_req_id_t bdf, pcie_req_id_list_t **rlist_p)
98 {
99 	pcie_req_id_list_t *rl_pre, *rl_next;
100 
101 	rl_pre = *rlist_p;
102 	if (rl_pre->bdf == bdf) {
103 		*rlist_p = rl_pre->next;
104 		kmem_free(rl_pre, sizeof (pcie_req_id_list_t));
105 		return;
106 	}
107 
108 	while (rl_pre->next) {
109 		rl_next = rl_pre->next;
110 		if (rl_next->bdf == bdf) {
111 			rl_pre->next = rl_next->next;
112 			kmem_free(rl_next, sizeof (pcie_req_id_list_t));
113 			break;
114 		} else
115 			rl_pre = rl_next;
116 	}
117 }
118 
119 /*
120  * Cache IOV domain info in all it's parent's pcie_domain_t
121  *
122  * The leaf devices's domain info must be set before calling this function.
123  */
124 void
125 pcie_cache_domain_info(pcie_bus_t *bus_p)
126 {
127 	boolean_t 	assigned = PCIE_IS_ASSIGNED(bus_p);
128 	boolean_t 	fma_dom = PCIE_ASSIGNED_TO_FMA_DOM(bus_p);
129 	uint_t		domain_id = PCIE_DOMAIN_ID_GET(bus_p);
130 	pcie_req_id_t	bdf = bus_p->bus_bdf;
131 	dev_info_t	*pdip;
132 	pcie_bus_t	*pbus_p;
133 	pcie_domain_t	*pdom_p;
134 
135 	ASSERT(!PCIE_IS_BDG(bus_p));
136 
137 	for (pdip = ddi_get_parent(PCIE_BUS2DIP(bus_p)); PCIE_DIP2BUS(pdip);
138 	    pdip = ddi_get_parent(pdip)) {
139 		pbus_p = PCIE_DIP2BUS(pdip);
140 		pdom_p = PCIE_BUS2DOM(pbus_p);
141 
142 		if (assigned) {
143 			if (domain_id)
144 				PCIE_DOMAIN_LIST_ADD(pbus_p, domain_id);
145 
146 			if (fma_dom)
147 				pdom_p->fmadom_count++;
148 			else {
149 				PCIE_BDF_LIST_ADD(pbus_p, bdf);
150 				pdom_p->nfmadom_count++;
151 			}
152 		} else
153 			pdom_p->rootdom_count++;
154 	}
155 }
156 
157 /*
158  * Clear the leaf device's domain info and uncache IOV domain info in all it's
159  * parent's pcie_domain_t
160  *
161  * The leaf devices's domain info is also cleared by calling this function.
162  */
163 void
164 pcie_uncache_domain_info(pcie_bus_t *bus_p)
165 {
166 	boolean_t 	assigned = PCIE_IS_ASSIGNED(bus_p);
167 	boolean_t 	fma_dom = PCIE_ASSIGNED_TO_FMA_DOM(bus_p);
168 	uint_t		domain_id = PCIE_DOMAIN_ID_GET(bus_p);
169 	pcie_domain_t	*dom_p = PCIE_BUS2DOM(bus_p), *pdom_p;
170 	pcie_bus_t	*pbus_p;
171 	dev_info_t	*pdip;
172 
173 	ASSERT(!PCIE_IS_BDG(bus_p));
174 	ASSERT((dom_p->fmadom_count + dom_p->nfmadom_count +
175 	    dom_p->rootdom_count) == 1);
176 
177 	/* Clear the domain information */
178 	if (domain_id) {
179 		PCIE_DOMAIN_ID_SET(bus_p, NULL);
180 		PCIE_DOMAIN_ID_DECR_REF_COUNT(bus_p);
181 	}
182 
183 	dom_p->fmadom_count = 0;
184 	dom_p->nfmadom_count = 0;
185 	dom_p->rootdom_count = 0;
186 
187 	for (pdip = ddi_get_parent(PCIE_BUS2DIP(bus_p)); PCIE_DIP2BUS(pdip);
188 	    pdip = ddi_get_parent(pdip)) {
189 		pbus_p = PCIE_DIP2BUS(pdip);
190 		pdom_p = PCIE_BUS2DOM(pbus_p);
191 
192 		if (assigned) {
193 			if (domain_id)
194 				PCIE_DOMAIN_LIST_REMOVE(pbus_p, domain_id);
195 
196 			if (fma_dom)
197 				pdom_p->fmadom_count--;
198 			else {
199 				pdom_p->nfmadom_count--;
200 				PCIE_BDF_LIST_REMOVE(pbus_p, bus_p->bus_bdf);
201 			}
202 		} else
203 			pdom_p->rootdom_count--;
204 	}
205 }
206 
207 
208 /*
209  * Initialize private data structure for IOV environments.
210  * o Allocate memory for iov data
211  * o Cache Domain ids.
212  */
213 void
214 pcie_init_dom(dev_info_t *dip)
215 {
216 	pcie_domain_t	*dom_p = PCIE_ZALLOC(pcie_domain_t);
217 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
218 
219 	PCIE_BUS2DOM(bus_p) = dom_p;
220 
221 	/* Only leaf devices are assignable to IO Domains */
222 	if (PCIE_IS_BDG(bus_p))
223 		return;
224 
225 	/*
226 	 * At the time of init_dom in the root domain a device may or may not
227 	 * have been assigned to an IO Domain.
228 	 *
229 	 * LDOMS: the property "ddi-assigned" will be set for devices that is
230 	 * assignable to an IO domain and unusable in the root domain.  If the
231 	 * property exist assume it has been assigned to a non-fma domain until
232 	 * otherwise notified.  The domain id is unknown on LDOMS.
233 	 *
234 	 * Xen: the "ddi-assigned" property won't be set until Xen store calls
235 	 * pcie_loan_device is called.  In this function this will always look
236 	 * like the device is assigned to the root domain.  Domain ID caching
237 	 * will occur in pcie_loan_device function.
238 	 */
239 	if (ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
240 	    "ddi-assigned", -1) != -1) {
241 		dom_p->nfmadom_count = 1;
242 
243 		/* Prevent "assigned" device from detaching */
244 		ndi_hold_devi(dip);
245 	} else
246 		dom_p->rootdom_count = 1;
247 	(void) ddi_prop_remove(DDI_DEV_T_NONE, dip, "ddi-assigned");
248 
249 	pcie_cache_domain_info(bus_p);
250 }
251 
252 void
253 pcie_fini_dom(dev_info_t *dip)
254 {
255 	pcie_domain_t	*dom_p = PCIE_DIP2DOM(dip);
256 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
257 
258 	if (PCIE_IS_BDG(bus_p))
259 		pcie_domain_list_destroy(PCIE_DOMAIN_LIST_GET(bus_p));
260 	else
261 		pcie_uncache_domain_info(bus_p);
262 
263 	kmem_free(dom_p, sizeof (pcie_domain_t));
264 }
265 
266 /*
267  * PCIe Severity:
268  *
269  * PF_ERR_NO_ERROR	: no IOV Action
270  * PF_ERR_CE		: no IOV Action
271  * PF_ERR_NO_PANIC	: contains error telemetry, log domain info
272  * PF_ERR_MATCHED_DEVICE: contains error telemetry, log domain info
273  * PF_ERR_MATCHED_RC	: Error already taken care of, no further IOV Action
274  * PF_ERR_MATCHED_PARENT: Error already taken care of, no further IOV Action
275  * PF_ERR_PANIC		: contains error telemetry, log domain info
276  *
277  * For NO_PANIC, MATCHED_DEVICE and PANIC, IOV wants to look at the affected
278  * devices and find the domains involved.
279  *
280  * If root domain does not own an affected device, IOV EH should change
281  * PF_ERR_PANIC to PF_ERR_MATCH_DOM.
282  */
283 int
284 pciev_eh(pf_data_t *pfd_p, pf_impl_t *impl)
285 {
286 	int severity = pfd_p->pe_severity_flags;
287 	int iov_severity = severity;
288 	pcie_bus_t *a_bus_p;	/* Affected device's pcie_bus_t */
289 	pf_data_t *root_pfd_p = impl->pf_dq_head_p;
290 	pcie_bus_t *root_bus_p;
291 
292 	/*
293 	 * check if all devices under the root device are unassigned.
294 	 * this function should quickly return in non-IOV environment.
295 	 */
296 	ASSERT(root_pfd_p != NULL);
297 	root_bus_p = PCIE_PFD2BUS(root_pfd_p);
298 	if (PCIE_BDG_IS_UNASSIGNED(root_bus_p))
299 		return (severity);
300 
301 	if (severity & PF_ERR_PANIC_DEADLOCK) {
302 		pcie_faulty_all = B_TRUE;
303 
304 	} else if (severity & (PF_ERR_NO_PANIC | PF_ERR_MATCHED_DEVICE |
305 	    PF_ERR_PANIC | PF_ERR_PANIC_BAD_RESPONSE)) {
306 
307 		uint16_t affected_flag, dev_affected_flags;
308 		uint_t is_panic = 0, is_aff_dev_found = 0;
309 
310 		dev_affected_flags = PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags;
311 		/* adjust affected flags to leverage cached domain ids */
312 		if (dev_affected_flags & PF_AFFECTED_CHILDREN) {
313 			dev_affected_flags |= PF_AFFECTED_SELF;
314 			dev_affected_flags &= ~PF_AFFECTED_CHILDREN;
315 		}
316 
317 		for (affected_flag = 1;
318 		    affected_flag <= PF_MAX_AFFECTED_FLAG;
319 		    affected_flag <<= 1) {
320 			a_bus_p = pciev_get_affected_dev(impl, pfd_p,
321 			    affected_flag, dev_affected_flags);
322 
323 			if (a_bus_p == NULL)
324 				continue;
325 
326 			is_aff_dev_found++;
327 			PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf =
328 			    a_bus_p->bus_bdf;
329 
330 			/*
331 			 * If a leaf device is assigned to the root domain or if
332 			 * a bridge has children assigned to a root domain
333 			 * panic.
334 			 *
335 			 * If a leaf device or a child of a bridge is assigned
336 			 * to NFMA domain mark it for panic.  If assigned to FMA
337 			 * domain save the domain id.
338 			 */
339 			if (!PCIE_IS_BDG(a_bus_p) &&
340 			    !PCIE_IS_ASSIGNED(a_bus_p)) {
341 				if (severity & PF_ERR_FATAL_FLAGS)
342 					is_panic++;
343 				continue;
344 			}
345 
346 			if (PCIE_BDG_HAS_CHILDREN_ROOT_DOM(a_bus_p)) {
347 				if (severity & PF_ERR_FATAL_FLAGS)
348 					is_panic++;
349 			}
350 
351 			if ((PCIE_ASSIGNED_TO_NFMA_DOM(a_bus_p) ||
352 			    PCIE_BDG_HAS_CHILDREN_NFMA_DOM(a_bus_p)) &&
353 			    (severity & PF_ERR_FATAL_FLAGS)) {
354 				PCIE_BUS2DOM(a_bus_p)->nfma_panic = B_TRUE;
355 				iov_severity |= PF_ERR_MATCH_DOM;
356 			}
357 
358 			if (PCIE_ASSIGNED_TO_FMA_DOM(a_bus_p)) {
359 				pcie_save_domain_id(
360 				    &PCIE_BUS2DOM(a_bus_p)->domain.id);
361 				iov_severity |= PF_ERR_MATCH_DOM;
362 			}
363 
364 			if (PCIE_BDG_HAS_CHILDREN_FMA_DOM(a_bus_p)) {
365 				pcie_save_domain_id(
366 				    PCIE_DOMAIN_LIST_GET(a_bus_p));
367 				iov_severity |= PF_ERR_MATCH_DOM;
368 			}
369 		}
370 
371 		/*
372 		 * Overwrite the severity only if affected device can be
373 		 * identified and root domain does not need to panic.
374 		 */
375 		if ((!is_panic) && is_aff_dev_found) {
376 			iov_severity &= ~PF_ERR_FATAL_FLAGS;
377 		}
378 	}
379 
380 	return (iov_severity);
381 }
382 
383 /* ARGSUSED */
384 void
385 pciev_eh_exit(pf_data_t *root_pfd_p, uint_t intr_type)
386 {
387 	pcie_bus_t *root_bus_p;
388 
389 	/*
390 	 * check if all devices under the root device are unassigned.
391 	 * this function should quickly return in non-IOV environment.
392 	 */
393 	root_bus_p = PCIE_PFD2BUS(root_pfd_p);
394 	if (PCIE_BDG_IS_UNASSIGNED(root_bus_p))
395 		return;
396 
397 	pcie_faulty_list_clear();
398 }
399 
400 pcie_bus_t *
401 pciev_get_affected_dev(pf_impl_t *impl, pf_data_t *pfd_p,
402     uint16_t affected_flag, uint16_t dev_affected_flags)
403 {
404 	pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p);
405 	uint16_t flag = affected_flag & dev_affected_flags;
406 	pcie_bus_t *temp_bus_p;
407 	pcie_req_id_t a_bdf;
408 	uint64_t a_addr;
409 	uint16_t cmd;
410 
411 	if (!flag)
412 		return (NULL);
413 
414 	switch (flag) {
415 	case PF_AFFECTED_ROOT:
416 		return (PCIE_DIP2BUS(bus_p->bus_rp_dip));
417 	case PF_AFFECTED_SELF:
418 		return (bus_p);
419 	case PF_AFFECTED_PARENT:
420 		return (PCIE_DIP2BUS(ddi_get_parent(PCIE_BUS2DIP(bus_p))));
421 	case PF_AFFECTED_BDF: /* may only be used for RC */
422 		a_bdf = PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf;
423 		if (!PCIE_CHECK_VALID_BDF(a_bdf))
424 			return (NULL);
425 
426 		temp_bus_p = pf_find_busp_by_bdf(impl, a_bdf);
427 		return (temp_bus_p);
428 	case PF_AFFECTED_AER:
429 		if (pf_tlp_decode(bus_p, PCIE_ADV_REG(pfd_p)) == DDI_SUCCESS) {
430 			temp_bus_p = pf_find_busp_by_aer(impl, pfd_p);
431 			return (temp_bus_p);
432 		}
433 		break;
434 	case PF_AFFECTED_SAER:
435 		if (pf_pci_decode(pfd_p, &cmd) == DDI_SUCCESS) {
436 			temp_bus_p = pf_find_busp_by_saer(impl, pfd_p);
437 			return (temp_bus_p);
438 		}
439 		break;
440 	case PF_AFFECTED_ADDR: /* ROOT only */
441 		a_addr = PCIE_ROOT_FAULT(pfd_p)->scan_addr;
442 		temp_bus_p = pf_find_busp_by_addr(impl, a_addr);
443 		return (temp_bus_p);
444 	}
445 
446 	return (NULL);
447 }
448 
449 /* type used for pcie_domain_list_find() function */
450 typedef enum {
451 	PCIE_DOM_LIST_TYPE_CACHE = 1,
452 	PCIE_DOM_LIST_TYPE_FAULT = 2
453 } pcie_dom_list_type_t;
454 
455 /*
456  * Check if a domain id is already in the linked list
457  */
458 static pcie_domains_t *
459 pcie_domain_list_find(uint_t domain_id, pcie_domains_t *pd_list_p,
460     pcie_dom_list_type_t type)
461 {
462 	while (pd_list_p) {
463 		if (pd_list_p->domain_id == domain_id)
464 			return (pd_list_p);
465 
466 		if (type == PCIE_DOM_LIST_TYPE_CACHE) {
467 			pd_list_p = pd_list_p->cached_next;
468 		} else if (type == PCIE_DOM_LIST_TYPE_FAULT) {
469 			pd_list_p = pd_list_p->faulty_next;
470 		} else {
471 			return (NULL);
472 		}
473 	}
474 
475 	return (NULL);
476 }
477 
478 /*
479  * Return true if a leaf device is assigned to a domain or a bridge device
480  * has children assigned to the domain
481  */
482 boolean_t
483 pcie_in_domain(pcie_bus_t *bus_p, uint_t domain_id)
484 {
485 	if (PCIE_IS_BDG(bus_p)) {
486 		pcie_domains_t *pd;
487 		pd = pcie_domain_list_find(domain_id,
488 		    PCIE_DOMAIN_LIST_GET(bus_p), PCIE_DOM_LIST_TYPE_CACHE);
489 		if (pd && pd->cached_count)
490 			return (B_TRUE);
491 		return (B_FALSE);
492 	} else {
493 		return (PCIE_DOMAIN_ID_GET(bus_p) == domain_id);
494 	}
495 }
496 
497 /*
498  * Add a domain id to a cached domain id list.
499  * If the domain already exists in the list, increment the reference count.
500  */
501 void
502 pcie_domain_list_add(uint_t domain_id, pcie_domains_t **pd_list_p)
503 {
504 	pcie_domains_t *pd;
505 
506 	pd = pcie_domain_list_find(domain_id, *pd_list_p,
507 	    PCIE_DOM_LIST_TYPE_CACHE);
508 
509 	if (pd == NULL) {
510 		pd = PCIE_ZALLOC(pcie_domains_t);
511 		pd->domain_id = domain_id;
512 		pd->cached_count = 1;
513 		pd->cached_next = *pd_list_p;
514 		*pd_list_p = pd;
515 	} else
516 		pd->cached_count++;
517 }
518 
519 /*
520  * Remove a domain id from a cached domain id list.
521  * Decrement the reference count.
522  */
523 void
524 pcie_domain_list_remove(uint_t domain_id, pcie_domains_t *pd_list_p)
525 {
526 	pcie_domains_t *pd;
527 
528 	pd = pcie_domain_list_find(domain_id, pd_list_p,
529 	    PCIE_DOM_LIST_TYPE_CACHE);
530 
531 	if (pd) {
532 		ASSERT((pd->cached_count)--);
533 	}
534 }
535 
536 /* destroy cached domain id list */
537 static void
538 pcie_domain_list_destroy(pcie_domains_t *domain_ids)
539 {
540 	pcie_domains_t *p = domain_ids;
541 	pcie_domains_t *next;
542 
543 	while (p) {
544 		next = p->cached_next;
545 		kmem_free(p, sizeof (pcie_domains_t));
546 		p = next;
547 	}
548 }
549 
550 static void
551 pcie_faulty_list_update(pcie_domains_t *pd,
552     pcie_domains_t **headp)
553 {
554 	if (pd == NULL)
555 		return;
556 
557 	if (*headp == NULL) {
558 		*headp = pd;
559 		pd->faulty_prev = NULL;
560 		pd->faulty_next = NULL;
561 		pd->faulty_count = 1;
562 	} else {
563 		pd->faulty_next = *headp;
564 		(*headp)->faulty_prev = pd;
565 		pd->faulty_prev = NULL;
566 		pd->faulty_count = 1;
567 		*headp = pd;
568 	}
569 }
570 
571 static void
572 pcie_faulty_list_clear()
573 {
574 	pcie_domains_t *pd = pcie_faulty_domains;
575 	pcie_domains_t *next;
576 
577 	/* unlink all domain structures from the faulty list */
578 	while (pd) {
579 		next = pd->faulty_next;
580 		pd->faulty_prev = NULL;
581 		pd->faulty_next = NULL;
582 		pd->faulty_count = 0;
583 		pd = next;
584 	}
585 	pcie_faulty_domains = NULL;
586 	pcie_faulty_all = B_FALSE;
587 }
588 
589 void
590 pcie_save_domain_id(pcie_domains_t *domain_ids)
591 {
592 	pcie_domains_t *old_list_p, *new_list_p, *pd;
593 
594 	if (pcie_faulty_all)
595 		return;
596 
597 	if (domain_ids == NULL)
598 		return;
599 
600 	old_list_p = pcie_faulty_domains;
601 	for (new_list_p = domain_ids; new_list_p;
602 	    new_list_p = new_list_p->cached_next) {
603 		if (!new_list_p->cached_count)
604 			continue;
605 
606 		/* search domain id in the faulty domain list */
607 		pd = pcie_domain_list_find(new_list_p->domain_id,
608 		    old_list_p, PCIE_DOM_LIST_TYPE_FAULT);
609 		if (pd)
610 			pd->faulty_count++;
611 		else
612 			pcie_faulty_list_update(new_list_p,
613 			    &pcie_faulty_domains);
614 	}
615 }
616