xref: /illumos-gate/usr/src/uts/common/io/pciex/pciev.c (revision 528737823843346cf95a4a701612f82089135554)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright (c) 2017, Joyent, Inc.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/ddi.h>
32 #include <sys/dditypes.h>
33 #include <sys/ddifm.h>
34 #include <sys/sunndi.h>
35 #include <sys/devops.h>
36 #include <sys/pcie.h>
37 #include <sys/pci_cap.h>
38 #include <sys/pcie_impl.h>
39 #include <sys/pathname.h>
40 
41 /*
42  * The below 2 global variables are for PCIe IOV Error Handling.  They must only
43  * be accessed during error handling under the protection of a error mutex.
44  */
45 static pcie_domains_t *pcie_faulty_domains = NULL;
46 static boolean_t pcie_faulty_all = B_FALSE;
47 
48 static void pcie_domain_list_destroy(pcie_domains_t *domain_ids);
49 static void pcie_bdf_list_add(pcie_req_id_t bdf,
50     pcie_req_id_list_t **rlist_p);
51 static void pcie_bdf_list_remove(pcie_req_id_t bdf,
52     pcie_req_id_list_t **rlist_p);
53 static void pcie_cache_domain_info(pcie_bus_t *bus_p);
54 static void pcie_uncache_domain_info(pcie_bus_t *bus_p);
55 
56 static void pcie_faulty_list_clear();
57 static void pcie_faulty_list_update(pcie_domains_t *pd,
58     pcie_domains_t **headp);
59 
60 dev_info_t *
61 pcie_find_dip_by_bdf(dev_info_t *rootp, pcie_req_id_t bdf)
62 {
63 	dev_info_t *dip;
64 	pcie_bus_t *bus_p;
65 	int bus_num;
66 
67 	dip = ddi_get_child(rootp);
68 	while (dip) {
69 		bus_p = PCIE_DIP2BUS(dip);
70 		if (bus_p && (bus_p->bus_bdf == bdf))
71 			return (dip);
72 		if (bus_p) {
73 			bus_num = (bdf >> 8) & 0xff;
74 			if ((bus_num >= bus_p->bus_bus_range.lo &&
75 			    bus_num <= bus_p->bus_bus_range.hi) ||
76 			    bus_p->bus_bus_range.hi == 0)
77 				return (pcie_find_dip_by_bdf(dip, bdf));
78 		}
79 		dip = ddi_get_next_sibling(dip);
80 	}
81 	return (NULL);
82 }
83 
84 /*
85  * Add a device bdf to the bdf list.
86  */
87 static void
88 pcie_bdf_list_add(pcie_req_id_t bdf, pcie_req_id_list_t **rlist_p)
89 {
90 	pcie_req_id_list_t *rl = PCIE_ZALLOC(pcie_req_id_list_t);
91 
92 	rl->bdf = bdf;
93 	rl->next = *rlist_p;
94 	*rlist_p = rl;
95 }
96 
97 /*
98  * Remove a bdf from the bdf list.
99  */
100 static void
101 pcie_bdf_list_remove(pcie_req_id_t bdf, pcie_req_id_list_t **rlist_p)
102 {
103 	pcie_req_id_list_t *rl_pre, *rl_next;
104 
105 	rl_pre = *rlist_p;
106 	if (rl_pre->bdf == bdf) {
107 		*rlist_p = rl_pre->next;
108 		kmem_free(rl_pre, sizeof (pcie_req_id_list_t));
109 		return;
110 	}
111 
112 	while (rl_pre->next) {
113 		rl_next = rl_pre->next;
114 		if (rl_next->bdf == bdf) {
115 			rl_pre->next = rl_next->next;
116 			kmem_free(rl_next, sizeof (pcie_req_id_list_t));
117 			break;
118 		} else
119 			rl_pre = rl_next;
120 	}
121 }
122 
123 /*
124  * Cache IOV domain info in all it's parent's pcie_domain_t
125  *
126  * The leaf devices's domain info must be set before calling this function.
127  */
128 void
129 pcie_cache_domain_info(pcie_bus_t *bus_p)
130 {
131 	boolean_t	assigned = PCIE_IS_ASSIGNED(bus_p);
132 	boolean_t	fma_dom = PCIE_ASSIGNED_TO_FMA_DOM(bus_p);
133 	uint_t		domain_id = PCIE_DOMAIN_ID_GET(bus_p);
134 	pcie_req_id_t	bdf = bus_p->bus_bdf;
135 	dev_info_t	*pdip;
136 	pcie_bus_t	*pbus_p;
137 	pcie_domain_t	*pdom_p;
138 
139 	ASSERT(!PCIE_IS_BDG(bus_p));
140 
141 	for (pdip = ddi_get_parent(PCIE_BUS2DIP(bus_p)); PCIE_DIP2BUS(pdip);
142 	    pdip = ddi_get_parent(pdip)) {
143 		pbus_p = PCIE_DIP2BUS(pdip);
144 		pdom_p = PCIE_BUS2DOM(pbus_p);
145 
146 		if (assigned) {
147 			if (domain_id)
148 				PCIE_DOMAIN_LIST_ADD(pbus_p, domain_id);
149 
150 			if (fma_dom)
151 				pdom_p->fmadom_count++;
152 			else {
153 				PCIE_BDF_LIST_ADD(pbus_p, bdf);
154 				pdom_p->nfmadom_count++;
155 			}
156 		} else
157 			pdom_p->rootdom_count++;
158 	}
159 }
160 
161 /*
162  * Clear the leaf device's domain info and uncache IOV domain info in all it's
163  * parent's pcie_domain_t
164  *
165  * The leaf devices's domain info is also cleared by calling this function.
166  */
167 void
168 pcie_uncache_domain_info(pcie_bus_t *bus_p)
169 {
170 	boolean_t	assigned = PCIE_IS_ASSIGNED(bus_p);
171 	boolean_t	fma_dom = PCIE_ASSIGNED_TO_FMA_DOM(bus_p);
172 	uint_t		domain_id = PCIE_DOMAIN_ID_GET(bus_p);
173 	pcie_domain_t	*dom_p = PCIE_BUS2DOM(bus_p), *pdom_p;
174 	pcie_bus_t	*pbus_p;
175 	dev_info_t	*pdip;
176 
177 	ASSERT(!PCIE_IS_BDG(bus_p));
178 	ASSERT((dom_p->fmadom_count + dom_p->nfmadom_count +
179 	    dom_p->rootdom_count) == 1);
180 
181 	/* Clear the domain information */
182 	if (domain_id) {
183 		PCIE_DOMAIN_ID_SET(bus_p, 0);
184 		PCIE_DOMAIN_ID_DECR_REF_COUNT(bus_p);
185 	}
186 
187 	dom_p->fmadom_count = 0;
188 	dom_p->nfmadom_count = 0;
189 	dom_p->rootdom_count = 0;
190 
191 	for (pdip = ddi_get_parent(PCIE_BUS2DIP(bus_p)); PCIE_DIP2BUS(pdip);
192 	    pdip = ddi_get_parent(pdip)) {
193 		pbus_p = PCIE_DIP2BUS(pdip);
194 		pdom_p = PCIE_BUS2DOM(pbus_p);
195 
196 		if (assigned) {
197 			if (domain_id)
198 				PCIE_DOMAIN_LIST_REMOVE(pbus_p, domain_id);
199 
200 			if (fma_dom)
201 				pdom_p->fmadom_count--;
202 			else {
203 				pdom_p->nfmadom_count--;
204 				PCIE_BDF_LIST_REMOVE(pbus_p, bus_p->bus_bdf);
205 			}
206 		} else
207 			pdom_p->rootdom_count--;
208 	}
209 }
210 
211 
212 /*
213  * Initialize private data structure for IOV environments.
214  * o Allocate memory for iov data
215  * o Cache Domain ids.
216  */
217 void
218 pcie_init_dom(dev_info_t *dip)
219 {
220 	pcie_domain_t	*dom_p = PCIE_ZALLOC(pcie_domain_t);
221 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
222 
223 	PCIE_BUS2DOM(bus_p) = dom_p;
224 
225 	/* Only leaf devices are assignable to IO Domains */
226 	if (PCIE_IS_BDG(bus_p))
227 		return;
228 
229 	/*
230 	 * At the time of init_dom in the root domain a device may or may not
231 	 * have been assigned to an IO Domain.
232 	 *
233 	 * LDOMS: the property "ddi-assigned" will be set for devices that is
234 	 * assignable to an IO domain and unusable in the root domain.  If the
235 	 * property exist assume it has been assigned to a non-fma domain until
236 	 * otherwise notified.  The domain id is unknown on LDOMS.
237 	 *
238 	 * Xen: the "ddi-assigned" property won't be set until Xen store calls
239 	 * pcie_loan_device is called.  In this function this will always look
240 	 * like the device is assigned to the root domain.  Domain ID caching
241 	 * will occur in pcie_loan_device function.
242 	 */
243 	if (ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
244 	    "ddi-assigned", -1) != -1) {
245 		dom_p->nfmadom_count = 1;
246 
247 		/* Prevent "assigned" device from detaching */
248 		ndi_hold_devi(dip);
249 	} else
250 		dom_p->rootdom_count = 1;
251 	(void) ddi_prop_remove(DDI_DEV_T_NONE, dip, "ddi-assigned");
252 
253 	pcie_cache_domain_info(bus_p);
254 }
255 
256 void
257 pcie_fini_dom(dev_info_t *dip)
258 {
259 	pcie_domain_t	*dom_p = PCIE_DIP2DOM(dip);
260 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
261 
262 	if (PCIE_IS_BDG(bus_p))
263 		pcie_domain_list_destroy(PCIE_DOMAIN_LIST_GET(bus_p));
264 	else
265 		pcie_uncache_domain_info(bus_p);
266 
267 	kmem_free(dom_p, sizeof (pcie_domain_t));
268 }
269 
270 /*
271  * PCIe Severity:
272  *
273  * PF_ERR_NO_ERROR	: no IOV Action
274  * PF_ERR_CE		: no IOV Action
275  * PF_ERR_NO_PANIC	: contains error telemetry, log domain info
276  * PF_ERR_MATCHED_DEVICE: contains error telemetry, log domain info
277  * PF_ERR_MATCHED_RC	: Error already taken care of, no further IOV Action
278  * PF_ERR_MATCHED_PARENT: Error already taken care of, no further IOV Action
279  * PF_ERR_PANIC		: contains error telemetry, log domain info
280  *
281  * For NO_PANIC, MATCHED_DEVICE and PANIC, IOV wants to look at the affected
282  * devices and find the domains involved.
283  *
284  * If root domain does not own an affected device, IOV EH should change
285  * PF_ERR_PANIC to PF_ERR_MATCH_DOM.
286  */
287 int
288 pciev_eh(pf_data_t *pfd_p, pf_impl_t *impl)
289 {
290 	int severity = pfd_p->pe_severity_flags;
291 	int iov_severity = severity;
292 	pcie_bus_t *a_bus_p;	/* Affected device's pcie_bus_t */
293 	pf_data_t *root_pfd_p = impl->pf_dq_head_p;
294 	pcie_bus_t *root_bus_p;
295 
296 	/*
297 	 * check if all devices under the root device are unassigned.
298 	 * this function should quickly return in non-IOV environment.
299 	 */
300 	ASSERT(root_pfd_p != NULL);
301 	root_bus_p = PCIE_PFD2BUS(root_pfd_p);
302 	if (PCIE_BDG_IS_UNASSIGNED(root_bus_p))
303 		return (severity);
304 
305 	if (severity & PF_ERR_PANIC_DEADLOCK) {
306 		pcie_faulty_all = B_TRUE;
307 
308 	} else if (severity & (PF_ERR_NO_PANIC | PF_ERR_MATCHED_DEVICE |
309 	    PF_ERR_PANIC | PF_ERR_BAD_RESPONSE)) {
310 
311 		uint16_t affected_flag, dev_affected_flags;
312 		uint_t is_panic = 0, is_aff_dev_found = 0;
313 
314 		dev_affected_flags = PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags;
315 		/* adjust affected flags to leverage cached domain ids */
316 		if (dev_affected_flags & PF_AFFECTED_CHILDREN) {
317 			dev_affected_flags |= PF_AFFECTED_SELF;
318 			dev_affected_flags &= ~PF_AFFECTED_CHILDREN;
319 		}
320 
321 		for (affected_flag = 1;
322 		    affected_flag <= PF_MAX_AFFECTED_FLAG;
323 		    affected_flag <<= 1) {
324 			a_bus_p = pciev_get_affected_dev(impl, pfd_p,
325 			    affected_flag, dev_affected_flags);
326 
327 			if (a_bus_p == NULL)
328 				continue;
329 
330 			is_aff_dev_found++;
331 			PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf =
332 			    a_bus_p->bus_bdf;
333 
334 			/*
335 			 * If a leaf device is assigned to the root domain or if
336 			 * a bridge has children assigned to a root domain
337 			 * panic.
338 			 *
339 			 * If a leaf device or a child of a bridge is assigned
340 			 * to NFMA domain mark it for panic.  If assigned to FMA
341 			 * domain save the domain id.
342 			 */
343 			if (!PCIE_IS_BDG(a_bus_p) &&
344 			    !PCIE_IS_ASSIGNED(a_bus_p)) {
345 				if (severity & PF_ERR_FATAL_FLAGS)
346 					is_panic++;
347 				continue;
348 			}
349 
350 			if (PCIE_BDG_HAS_CHILDREN_ROOT_DOM(a_bus_p)) {
351 				if (severity & PF_ERR_FATAL_FLAGS)
352 					is_panic++;
353 			}
354 
355 			if ((PCIE_ASSIGNED_TO_NFMA_DOM(a_bus_p) ||
356 			    PCIE_BDG_HAS_CHILDREN_NFMA_DOM(a_bus_p)) &&
357 			    (severity & PF_ERR_FATAL_FLAGS)) {
358 				PCIE_BUS2DOM(a_bus_p)->nfma_panic = B_TRUE;
359 				iov_severity |= PF_ERR_MATCH_DOM;
360 			}
361 
362 			if (PCIE_ASSIGNED_TO_FMA_DOM(a_bus_p)) {
363 				pcie_save_domain_id(
364 				    &PCIE_BUS2DOM(a_bus_p)->domain.id);
365 				iov_severity |= PF_ERR_MATCH_DOM;
366 			}
367 
368 			if (PCIE_BDG_HAS_CHILDREN_FMA_DOM(a_bus_p)) {
369 				pcie_save_domain_id(
370 				    PCIE_DOMAIN_LIST_GET(a_bus_p));
371 				iov_severity |= PF_ERR_MATCH_DOM;
372 			}
373 		}
374 
375 		/*
376 		 * Overwrite the severity only if affected device can be
377 		 * identified and root domain does not need to panic.
378 		 */
379 		if ((!is_panic) && is_aff_dev_found) {
380 			iov_severity &= ~PF_ERR_FATAL_FLAGS;
381 		}
382 	}
383 
384 	return (iov_severity);
385 }
386 
387 /* ARGSUSED */
388 void
389 pciev_eh_exit(pf_data_t *root_pfd_p, uint_t intr_type)
390 {
391 	pcie_bus_t *root_bus_p;
392 
393 	/*
394 	 * check if all devices under the root device are unassigned.
395 	 * this function should quickly return in non-IOV environment.
396 	 */
397 	root_bus_p = PCIE_PFD2BUS(root_pfd_p);
398 	if (PCIE_BDG_IS_UNASSIGNED(root_bus_p))
399 		return;
400 
401 	pcie_faulty_list_clear();
402 }
403 
404 pcie_bus_t *
405 pciev_get_affected_dev(pf_impl_t *impl, pf_data_t *pfd_p,
406     uint16_t affected_flag, uint16_t dev_affected_flags)
407 {
408 	pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p);
409 	uint16_t flag = affected_flag & dev_affected_flags;
410 	pcie_bus_t *temp_bus_p;
411 	pcie_req_id_t a_bdf;
412 	uint64_t a_addr;
413 	uint16_t cmd;
414 
415 	if (!flag)
416 		return (NULL);
417 
418 	switch (flag) {
419 	case PF_AFFECTED_ROOT:
420 		return (PCIE_DIP2BUS(bus_p->bus_rp_dip));
421 	case PF_AFFECTED_SELF:
422 		return (bus_p);
423 	case PF_AFFECTED_PARENT:
424 		return (PCIE_DIP2BUS(ddi_get_parent(PCIE_BUS2DIP(bus_p))));
425 	case PF_AFFECTED_BDF: /* may only be used for RC */
426 		a_bdf = PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf;
427 		if (!PCIE_CHECK_VALID_BDF(a_bdf))
428 			return (NULL);
429 
430 		temp_bus_p = pf_find_busp_by_bdf(impl, a_bdf);
431 		return (temp_bus_p);
432 	case PF_AFFECTED_AER:
433 		if (pf_tlp_decode(bus_p, PCIE_ADV_REG(pfd_p)) == DDI_SUCCESS) {
434 			temp_bus_p = pf_find_busp_by_aer(impl, pfd_p);
435 			return (temp_bus_p);
436 		}
437 		break;
438 	case PF_AFFECTED_SAER:
439 		if (pf_pci_decode(pfd_p, &cmd) == DDI_SUCCESS) {
440 			temp_bus_p = pf_find_busp_by_saer(impl, pfd_p);
441 			return (temp_bus_p);
442 		}
443 		break;
444 	case PF_AFFECTED_ADDR: /* ROOT only */
445 		a_addr = PCIE_ROOT_FAULT(pfd_p)->scan_addr;
446 		temp_bus_p = pf_find_busp_by_addr(impl, a_addr);
447 		return (temp_bus_p);
448 	}
449 
450 	return (NULL);
451 }
452 
453 /* type used for pcie_domain_list_find() function */
454 typedef enum {
455 	PCIE_DOM_LIST_TYPE_CACHE = 1,
456 	PCIE_DOM_LIST_TYPE_FAULT = 2
457 } pcie_dom_list_type_t;
458 
459 /*
460  * Check if a domain id is already in the linked list
461  */
462 static pcie_domains_t *
463 pcie_domain_list_find(uint_t domain_id, pcie_domains_t *pd_list_p,
464     pcie_dom_list_type_t type)
465 {
466 	while (pd_list_p) {
467 		if (pd_list_p->domain_id == domain_id)
468 			return (pd_list_p);
469 
470 		if (type == PCIE_DOM_LIST_TYPE_CACHE) {
471 			pd_list_p = pd_list_p->cached_next;
472 		} else if (type == PCIE_DOM_LIST_TYPE_FAULT) {
473 			pd_list_p = pd_list_p->faulty_next;
474 		} else {
475 			return (NULL);
476 		}
477 	}
478 
479 	return (NULL);
480 }
481 
482 /*
483  * Return true if a leaf device is assigned to a domain or a bridge device
484  * has children assigned to the domain
485  */
486 boolean_t
487 pcie_in_domain(pcie_bus_t *bus_p, uint_t domain_id)
488 {
489 	if (PCIE_IS_BDG(bus_p)) {
490 		pcie_domains_t *pd;
491 		pd = pcie_domain_list_find(domain_id,
492 		    PCIE_DOMAIN_LIST_GET(bus_p), PCIE_DOM_LIST_TYPE_CACHE);
493 		if (pd && pd->cached_count)
494 			return (B_TRUE);
495 		return (B_FALSE);
496 	} else {
497 		return (PCIE_DOMAIN_ID_GET(bus_p) == domain_id);
498 	}
499 }
500 
501 /*
502  * Add a domain id to a cached domain id list.
503  * If the domain already exists in the list, increment the reference count.
504  */
505 void
506 pcie_domain_list_add(uint_t domain_id, pcie_domains_t **pd_list_p)
507 {
508 	pcie_domains_t *pd;
509 
510 	pd = pcie_domain_list_find(domain_id, *pd_list_p,
511 	    PCIE_DOM_LIST_TYPE_CACHE);
512 
513 	if (pd == NULL) {
514 		pd = PCIE_ZALLOC(pcie_domains_t);
515 		pd->domain_id = domain_id;
516 		pd->cached_count = 1;
517 		pd->cached_next = *pd_list_p;
518 		*pd_list_p = pd;
519 	} else
520 		pd->cached_count++;
521 }
522 
523 /*
524  * Remove a domain id from a cached domain id list.
525  * Decrement the reference count.
526  */
527 void
528 pcie_domain_list_remove(uint_t domain_id, pcie_domains_t *pd_list_p)
529 {
530 	pcie_domains_t *pd;
531 
532 	pd = pcie_domain_list_find(domain_id, pd_list_p,
533 	    PCIE_DOM_LIST_TYPE_CACHE);
534 
535 	if (pd) {
536 		ASSERT((pd->cached_count)--);
537 	}
538 }
539 
540 /* destroy cached domain id list */
541 static void
542 pcie_domain_list_destroy(pcie_domains_t *domain_ids)
543 {
544 	pcie_domains_t *p = domain_ids;
545 	pcie_domains_t *next;
546 
547 	while (p) {
548 		next = p->cached_next;
549 		kmem_free(p, sizeof (pcie_domains_t));
550 		p = next;
551 	}
552 }
553 
554 static void
555 pcie_faulty_list_update(pcie_domains_t *pd,
556     pcie_domains_t **headp)
557 {
558 	if (pd == NULL)
559 		return;
560 
561 	if (*headp == NULL) {
562 		*headp = pd;
563 		pd->faulty_prev = NULL;
564 		pd->faulty_next = NULL;
565 		pd->faulty_count = 1;
566 	} else {
567 		pd->faulty_next = *headp;
568 		(*headp)->faulty_prev = pd;
569 		pd->faulty_prev = NULL;
570 		pd->faulty_count = 1;
571 		*headp = pd;
572 	}
573 }
574 
575 static void
576 pcie_faulty_list_clear()
577 {
578 	pcie_domains_t *pd = pcie_faulty_domains;
579 	pcie_domains_t *next;
580 
581 	/* unlink all domain structures from the faulty list */
582 	while (pd) {
583 		next = pd->faulty_next;
584 		pd->faulty_prev = NULL;
585 		pd->faulty_next = NULL;
586 		pd->faulty_count = 0;
587 		pd = next;
588 	}
589 	pcie_faulty_domains = NULL;
590 	pcie_faulty_all = B_FALSE;
591 }
592 
593 void
594 pcie_save_domain_id(pcie_domains_t *domain_ids)
595 {
596 	pcie_domains_t *old_list_p, *new_list_p, *pd;
597 
598 	if (pcie_faulty_all)
599 		return;
600 
601 	if (domain_ids == NULL)
602 		return;
603 
604 	old_list_p = pcie_faulty_domains;
605 	for (new_list_p = domain_ids; new_list_p;
606 	    new_list_p = new_list_p->cached_next) {
607 		if (!new_list_p->cached_count)
608 			continue;
609 
610 		/* search domain id in the faulty domain list */
611 		pd = pcie_domain_list_find(new_list_p->domain_id,
612 		    old_list_p, PCIE_DOM_LIST_TYPE_FAULT);
613 		if (pd)
614 			pd->faulty_count++;
615 		else
616 			pcie_faulty_list_update(new_list_p,
617 			    &pcie_faulty_domains);
618 	}
619 }
620