1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2017, Joyent, Inc.
28 */
29
30 #include <sys/types.h>
31 #include <sys/ddi.h>
32 #include <sys/dditypes.h>
33 #include <sys/ddifm.h>
34 #include <sys/sunndi.h>
35 #include <sys/devops.h>
36 #include <sys/pcie.h>
37 #include <sys/pci_cap.h>
38 #include <sys/pcie_impl.h>
39 #include <sys/pathname.h>
40
41 /*
42 * The below 2 global variables are for PCIe IOV Error Handling. They must only
43 * be accessed during error handling under the protection of a error mutex.
44 */
45 static pcie_domains_t *pcie_faulty_domains = NULL;
46 static boolean_t pcie_faulty_all = B_FALSE;
47
48 static void pcie_domain_list_destroy(pcie_domains_t *domain_ids);
49 static void pcie_bdf_list_add(pcie_req_id_t bdf,
50 pcie_req_id_list_t **rlist_p);
51 static void pcie_bdf_list_remove(pcie_req_id_t bdf,
52 pcie_req_id_list_t **rlist_p);
53 static void pcie_cache_domain_info(pcie_bus_t *bus_p);
54 static void pcie_uncache_domain_info(pcie_bus_t *bus_p);
55
56 static void pcie_faulty_list_clear();
57 static void pcie_faulty_list_update(pcie_domains_t *pd,
58 pcie_domains_t **headp);
59
60 dev_info_t *
pcie_find_dip_by_bdf(dev_info_t * rootp,pcie_req_id_t bdf)61 pcie_find_dip_by_bdf(dev_info_t *rootp, pcie_req_id_t bdf)
62 {
63 dev_info_t *dip;
64 pcie_bus_t *bus_p;
65 int bus_num;
66
67 dip = ddi_get_child(rootp);
68 while (dip) {
69 bus_p = PCIE_DIP2BUS(dip);
70 if (bus_p && (bus_p->bus_bdf == bdf))
71 return (dip);
72 if (bus_p) {
73 bus_num = (bdf >> 8) & 0xff;
74 if ((bus_num >= bus_p->bus_bus_range.lo &&
75 bus_num <= bus_p->bus_bus_range.hi) ||
76 bus_p->bus_bus_range.hi == 0)
77 return (pcie_find_dip_by_bdf(dip, bdf));
78 }
79 dip = ddi_get_next_sibling(dip);
80 }
81 return (NULL);
82 }
83
84 /*
85 * Add a device bdf to the bdf list.
86 */
87 static void
pcie_bdf_list_add(pcie_req_id_t bdf,pcie_req_id_list_t ** rlist_p)88 pcie_bdf_list_add(pcie_req_id_t bdf, pcie_req_id_list_t **rlist_p)
89 {
90 pcie_req_id_list_t *rl = PCIE_ZALLOC(pcie_req_id_list_t);
91
92 rl->bdf = bdf;
93 rl->next = *rlist_p;
94 *rlist_p = rl;
95 }
96
97 /*
98 * Remove a bdf from the bdf list.
99 */
100 static void
pcie_bdf_list_remove(pcie_req_id_t bdf,pcie_req_id_list_t ** rlist_p)101 pcie_bdf_list_remove(pcie_req_id_t bdf, pcie_req_id_list_t **rlist_p)
102 {
103 pcie_req_id_list_t *rl_pre, *rl_next;
104
105 rl_pre = *rlist_p;
106 if (rl_pre->bdf == bdf) {
107 *rlist_p = rl_pre->next;
108 kmem_free(rl_pre, sizeof (pcie_req_id_list_t));
109 return;
110 }
111
112 while (rl_pre->next) {
113 rl_next = rl_pre->next;
114 if (rl_next->bdf == bdf) {
115 rl_pre->next = rl_next->next;
116 kmem_free(rl_next, sizeof (pcie_req_id_list_t));
117 break;
118 } else
119 rl_pre = rl_next;
120 }
121 }
122
123 /*
124 * Cache IOV domain info in all it's parent's pcie_domain_t
125 *
126 * The leaf devices's domain info must be set before calling this function.
127 */
128 void
pcie_cache_domain_info(pcie_bus_t * bus_p)129 pcie_cache_domain_info(pcie_bus_t *bus_p)
130 {
131 boolean_t assigned = PCIE_IS_ASSIGNED(bus_p);
132 boolean_t fma_dom = PCIE_ASSIGNED_TO_FMA_DOM(bus_p);
133 uint_t domain_id = PCIE_DOMAIN_ID_GET(bus_p);
134 pcie_req_id_t bdf = bus_p->bus_bdf;
135 dev_info_t *pdip;
136 pcie_bus_t *pbus_p;
137 pcie_domain_t *pdom_p;
138
139 ASSERT(!PCIE_IS_BDG(bus_p));
140
141 for (pdip = ddi_get_parent(PCIE_BUS2DIP(bus_p)); PCIE_DIP2BUS(pdip);
142 pdip = ddi_get_parent(pdip)) {
143 pbus_p = PCIE_DIP2BUS(pdip);
144 pdom_p = PCIE_BUS2DOM(pbus_p);
145
146 if (assigned) {
147 if (domain_id)
148 PCIE_DOMAIN_LIST_ADD(pbus_p, domain_id);
149
150 if (fma_dom)
151 pdom_p->fmadom_count++;
152 else {
153 PCIE_BDF_LIST_ADD(pbus_p, bdf);
154 pdom_p->nfmadom_count++;
155 }
156 } else
157 pdom_p->rootdom_count++;
158 }
159 }
160
161 /*
162 * Clear the leaf device's domain info and uncache IOV domain info in all it's
163 * parent's pcie_domain_t
164 *
165 * The leaf devices's domain info is also cleared by calling this function.
166 */
167 void
pcie_uncache_domain_info(pcie_bus_t * bus_p)168 pcie_uncache_domain_info(pcie_bus_t *bus_p)
169 {
170 boolean_t assigned = PCIE_IS_ASSIGNED(bus_p);
171 boolean_t fma_dom = PCIE_ASSIGNED_TO_FMA_DOM(bus_p);
172 uint_t domain_id = PCIE_DOMAIN_ID_GET(bus_p);
173 pcie_domain_t *dom_p = PCIE_BUS2DOM(bus_p), *pdom_p;
174 pcie_bus_t *pbus_p;
175 dev_info_t *pdip;
176
177 ASSERT(!PCIE_IS_BDG(bus_p));
178 ASSERT((dom_p->fmadom_count + dom_p->nfmadom_count +
179 dom_p->rootdom_count) == 1);
180
181 /* Clear the domain information */
182 if (domain_id) {
183 PCIE_DOMAIN_ID_SET(bus_p, 0);
184 PCIE_DOMAIN_ID_DECR_REF_COUNT(bus_p);
185 }
186
187 dom_p->fmadom_count = 0;
188 dom_p->nfmadom_count = 0;
189 dom_p->rootdom_count = 0;
190
191 for (pdip = ddi_get_parent(PCIE_BUS2DIP(bus_p)); PCIE_DIP2BUS(pdip);
192 pdip = ddi_get_parent(pdip)) {
193 pbus_p = PCIE_DIP2BUS(pdip);
194 pdom_p = PCIE_BUS2DOM(pbus_p);
195
196 if (assigned) {
197 if (domain_id)
198 PCIE_DOMAIN_LIST_REMOVE(pbus_p, domain_id);
199
200 if (fma_dom)
201 pdom_p->fmadom_count--;
202 else {
203 pdom_p->nfmadom_count--;
204 PCIE_BDF_LIST_REMOVE(pbus_p, bus_p->bus_bdf);
205 }
206 } else
207 pdom_p->rootdom_count--;
208 }
209 }
210
211
212 /*
213 * Initialize private data structure for IOV environments.
214 * o Allocate memory for iov data
215 * o Cache Domain ids.
216 */
217 void
pcie_init_dom(dev_info_t * dip)218 pcie_init_dom(dev_info_t *dip)
219 {
220 pcie_domain_t *dom_p = PCIE_ZALLOC(pcie_domain_t);
221 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
222
223 PCIE_BUS2DOM(bus_p) = dom_p;
224
225 /* Only leaf devices are assignable to IO Domains */
226 if (PCIE_IS_BDG(bus_p))
227 return;
228
229 /*
230 * At the time of init_dom in the root domain a device may or may not
231 * have been assigned to an IO Domain.
232 *
233 * LDOMS: the property "ddi-assigned" will be set for devices that is
234 * assignable to an IO domain and unusable in the root domain. If the
235 * property exist assume it has been assigned to a non-fma domain until
236 * otherwise notified. The domain id is unknown on LDOMS.
237 *
238 * Xen: the "ddi-assigned" property won't be set until Xen store calls
239 * pcie_loan_device is called. In this function this will always look
240 * like the device is assigned to the root domain. Domain ID caching
241 * will occur in pcie_loan_device function.
242 */
243 if (ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
244 "ddi-assigned", -1) != -1) {
245 dom_p->nfmadom_count = 1;
246
247 /* Prevent "assigned" device from detaching */
248 ndi_hold_devi(dip);
249 } else
250 dom_p->rootdom_count = 1;
251 (void) ddi_prop_remove(DDI_DEV_T_NONE, dip, "ddi-assigned");
252
253 pcie_cache_domain_info(bus_p);
254 }
255
256 void
pcie_fini_dom(dev_info_t * dip)257 pcie_fini_dom(dev_info_t *dip)
258 {
259 pcie_domain_t *dom_p = PCIE_DIP2DOM(dip);
260 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
261
262 if (PCIE_IS_BDG(bus_p))
263 pcie_domain_list_destroy(PCIE_DOMAIN_LIST_GET(bus_p));
264 else
265 pcie_uncache_domain_info(bus_p);
266
267 kmem_free(dom_p, sizeof (pcie_domain_t));
268 }
269
270 /*
271 * PCIe Severity:
272 *
273 * PF_ERR_NO_ERROR : no IOV Action
274 * PF_ERR_CE : no IOV Action
275 * PF_ERR_NO_PANIC : contains error telemetry, log domain info
276 * PF_ERR_MATCHED_DEVICE: contains error telemetry, log domain info
277 * PF_ERR_MATCHED_RC : Error already taken care of, no further IOV Action
278 * PF_ERR_MATCHED_PARENT: Error already taken care of, no further IOV Action
279 * PF_ERR_PANIC : contains error telemetry, log domain info
280 *
281 * For NO_PANIC, MATCHED_DEVICE and PANIC, IOV wants to look at the affected
282 * devices and find the domains involved.
283 *
284 * If root domain does not own an affected device, IOV EH should change
285 * PF_ERR_PANIC to PF_ERR_MATCH_DOM.
286 */
287 int
pciev_eh(pf_data_t * pfd_p,pf_impl_t * impl)288 pciev_eh(pf_data_t *pfd_p, pf_impl_t *impl)
289 {
290 int severity = pfd_p->pe_severity_flags;
291 int iov_severity = severity;
292 pcie_bus_t *a_bus_p; /* Affected device's pcie_bus_t */
293 pf_data_t *root_pfd_p = impl->pf_dq_head_p;
294 pcie_bus_t *root_bus_p;
295
296 /*
297 * check if all devices under the root device are unassigned.
298 * this function should quickly return in non-IOV environment.
299 */
300 ASSERT(root_pfd_p != NULL);
301 root_bus_p = PCIE_PFD2BUS(root_pfd_p);
302 if (PCIE_BDG_IS_UNASSIGNED(root_bus_p))
303 return (severity);
304
305 if (severity & PF_ERR_PANIC_DEADLOCK) {
306 pcie_faulty_all = B_TRUE;
307
308 } else if (severity & (PF_ERR_NO_PANIC | PF_ERR_MATCHED_DEVICE |
309 PF_ERR_PANIC | PF_ERR_BAD_RESPONSE)) {
310
311 uint16_t affected_flag, dev_affected_flags;
312 uint_t is_panic = 0, is_aff_dev_found = 0;
313
314 dev_affected_flags = PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags;
315 /* adjust affected flags to leverage cached domain ids */
316 if (dev_affected_flags & PF_AFFECTED_CHILDREN) {
317 dev_affected_flags |= PF_AFFECTED_SELF;
318 dev_affected_flags &= ~PF_AFFECTED_CHILDREN;
319 }
320
321 for (affected_flag = 1;
322 affected_flag <= PF_MAX_AFFECTED_FLAG;
323 affected_flag <<= 1) {
324 a_bus_p = pciev_get_affected_dev(impl, pfd_p,
325 affected_flag, dev_affected_flags);
326
327 if (a_bus_p == NULL)
328 continue;
329
330 is_aff_dev_found++;
331 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf =
332 a_bus_p->bus_bdf;
333
334 /*
335 * If a leaf device is assigned to the root domain or if
336 * a bridge has children assigned to a root domain
337 * panic.
338 *
339 * If a leaf device or a child of a bridge is assigned
340 * to NFMA domain mark it for panic. If assigned to FMA
341 * domain save the domain id.
342 */
343 if (!PCIE_IS_BDG(a_bus_p) &&
344 !PCIE_IS_ASSIGNED(a_bus_p)) {
345 if (severity & PF_ERR_FATAL_FLAGS)
346 is_panic++;
347 continue;
348 }
349
350 if (PCIE_BDG_HAS_CHILDREN_ROOT_DOM(a_bus_p)) {
351 if (severity & PF_ERR_FATAL_FLAGS)
352 is_panic++;
353 }
354
355 if ((PCIE_ASSIGNED_TO_NFMA_DOM(a_bus_p) ||
356 PCIE_BDG_HAS_CHILDREN_NFMA_DOM(a_bus_p)) &&
357 (severity & PF_ERR_FATAL_FLAGS)) {
358 PCIE_BUS2DOM(a_bus_p)->nfma_panic = B_TRUE;
359 iov_severity |= PF_ERR_MATCH_DOM;
360 }
361
362 if (PCIE_ASSIGNED_TO_FMA_DOM(a_bus_p)) {
363 pcie_save_domain_id(
364 &PCIE_BUS2DOM(a_bus_p)->domain.id);
365 iov_severity |= PF_ERR_MATCH_DOM;
366 }
367
368 if (PCIE_BDG_HAS_CHILDREN_FMA_DOM(a_bus_p)) {
369 pcie_save_domain_id(
370 PCIE_DOMAIN_LIST_GET(a_bus_p));
371 iov_severity |= PF_ERR_MATCH_DOM;
372 }
373 }
374
375 /*
376 * Overwrite the severity only if affected device can be
377 * identified and root domain does not need to panic.
378 */
379 if ((!is_panic) && is_aff_dev_found) {
380 iov_severity &= ~PF_ERR_FATAL_FLAGS;
381 }
382 }
383
384 return (iov_severity);
385 }
386
387 /* ARGSUSED */
388 void
pciev_eh_exit(pf_data_t * root_pfd_p,uint_t intr_type)389 pciev_eh_exit(pf_data_t *root_pfd_p, uint_t intr_type)
390 {
391 pcie_bus_t *root_bus_p;
392
393 /*
394 * check if all devices under the root device are unassigned.
395 * this function should quickly return in non-IOV environment.
396 */
397 root_bus_p = PCIE_PFD2BUS(root_pfd_p);
398 if (PCIE_BDG_IS_UNASSIGNED(root_bus_p))
399 return;
400
401 pcie_faulty_list_clear();
402 }
403
404 pcie_bus_t *
pciev_get_affected_dev(pf_impl_t * impl,pf_data_t * pfd_p,uint16_t affected_flag,uint16_t dev_affected_flags)405 pciev_get_affected_dev(pf_impl_t *impl, pf_data_t *pfd_p,
406 uint16_t affected_flag, uint16_t dev_affected_flags)
407 {
408 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p);
409 uint16_t flag = affected_flag & dev_affected_flags;
410 pcie_bus_t *temp_bus_p;
411 pcie_req_id_t a_bdf;
412 uint64_t a_addr;
413 uint16_t cmd;
414
415 if (!flag)
416 return (NULL);
417
418 switch (flag) {
419 case PF_AFFECTED_ROOT:
420 return (PCIE_DIP2BUS(bus_p->bus_rp_dip));
421 case PF_AFFECTED_SELF:
422 return (bus_p);
423 case PF_AFFECTED_PARENT:
424 return (PCIE_DIP2BUS(ddi_get_parent(PCIE_BUS2DIP(bus_p))));
425 case PF_AFFECTED_BDF: /* may only be used for RC */
426 a_bdf = PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf;
427 if (!PCIE_CHECK_VALID_BDF(a_bdf))
428 return (NULL);
429
430 temp_bus_p = pf_find_busp_by_bdf(impl, a_bdf);
431 return (temp_bus_p);
432 case PF_AFFECTED_AER:
433 if (pf_tlp_decode(bus_p, PCIE_ADV_REG(pfd_p)) == DDI_SUCCESS) {
434 temp_bus_p = pf_find_busp_by_aer(impl, pfd_p);
435 return (temp_bus_p);
436 }
437 break;
438 case PF_AFFECTED_SAER:
439 if (pf_pci_decode(pfd_p, &cmd) == DDI_SUCCESS) {
440 temp_bus_p = pf_find_busp_by_saer(impl, pfd_p);
441 return (temp_bus_p);
442 }
443 break;
444 case PF_AFFECTED_ADDR: /* ROOT only */
445 a_addr = PCIE_ROOT_FAULT(pfd_p)->scan_addr;
446 temp_bus_p = pf_find_busp_by_addr(impl, a_addr);
447 return (temp_bus_p);
448 }
449
450 return (NULL);
451 }
452
453 /* type used for pcie_domain_list_find() function */
454 typedef enum {
455 PCIE_DOM_LIST_TYPE_CACHE = 1,
456 PCIE_DOM_LIST_TYPE_FAULT = 2
457 } pcie_dom_list_type_t;
458
459 /*
460 * Check if a domain id is already in the linked list
461 */
462 static pcie_domains_t *
pcie_domain_list_find(uint_t domain_id,pcie_domains_t * pd_list_p,pcie_dom_list_type_t type)463 pcie_domain_list_find(uint_t domain_id, pcie_domains_t *pd_list_p,
464 pcie_dom_list_type_t type)
465 {
466 while (pd_list_p) {
467 if (pd_list_p->domain_id == domain_id)
468 return (pd_list_p);
469
470 if (type == PCIE_DOM_LIST_TYPE_CACHE) {
471 pd_list_p = pd_list_p->cached_next;
472 } else if (type == PCIE_DOM_LIST_TYPE_FAULT) {
473 pd_list_p = pd_list_p->faulty_next;
474 } else {
475 return (NULL);
476 }
477 }
478
479 return (NULL);
480 }
481
482 /*
483 * Return true if a leaf device is assigned to a domain or a bridge device
484 * has children assigned to the domain
485 */
486 boolean_t
pcie_in_domain(pcie_bus_t * bus_p,uint_t domain_id)487 pcie_in_domain(pcie_bus_t *bus_p, uint_t domain_id)
488 {
489 if (PCIE_IS_BDG(bus_p)) {
490 pcie_domains_t *pd;
491 pd = pcie_domain_list_find(domain_id,
492 PCIE_DOMAIN_LIST_GET(bus_p), PCIE_DOM_LIST_TYPE_CACHE);
493 if (pd && pd->cached_count)
494 return (B_TRUE);
495 return (B_FALSE);
496 } else {
497 return (PCIE_DOMAIN_ID_GET(bus_p) == domain_id);
498 }
499 }
500
501 /*
502 * Add a domain id to a cached domain id list.
503 * If the domain already exists in the list, increment the reference count.
504 */
505 void
pcie_domain_list_add(uint_t domain_id,pcie_domains_t ** pd_list_p)506 pcie_domain_list_add(uint_t domain_id, pcie_domains_t **pd_list_p)
507 {
508 pcie_domains_t *pd;
509
510 pd = pcie_domain_list_find(domain_id, *pd_list_p,
511 PCIE_DOM_LIST_TYPE_CACHE);
512
513 if (pd == NULL) {
514 pd = PCIE_ZALLOC(pcie_domains_t);
515 pd->domain_id = domain_id;
516 pd->cached_count = 1;
517 pd->cached_next = *pd_list_p;
518 *pd_list_p = pd;
519 } else
520 pd->cached_count++;
521 }
522
523 /*
524 * Remove a domain id from a cached domain id list.
525 * Decrement the reference count.
526 */
527 void
pcie_domain_list_remove(uint_t domain_id,pcie_domains_t * pd_list_p)528 pcie_domain_list_remove(uint_t domain_id, pcie_domains_t *pd_list_p)
529 {
530 pcie_domains_t *pd;
531
532 pd = pcie_domain_list_find(domain_id, pd_list_p,
533 PCIE_DOM_LIST_TYPE_CACHE);
534
535 if (pd) {
536 ASSERT((pd->cached_count)--);
537 }
538 }
539
540 /* destroy cached domain id list */
541 static void
pcie_domain_list_destroy(pcie_domains_t * domain_ids)542 pcie_domain_list_destroy(pcie_domains_t *domain_ids)
543 {
544 pcie_domains_t *p = domain_ids;
545 pcie_domains_t *next;
546
547 while (p) {
548 next = p->cached_next;
549 kmem_free(p, sizeof (pcie_domains_t));
550 p = next;
551 }
552 }
553
554 static void
pcie_faulty_list_update(pcie_domains_t * pd,pcie_domains_t ** headp)555 pcie_faulty_list_update(pcie_domains_t *pd,
556 pcie_domains_t **headp)
557 {
558 if (pd == NULL)
559 return;
560
561 if (*headp == NULL) {
562 *headp = pd;
563 pd->faulty_prev = NULL;
564 pd->faulty_next = NULL;
565 pd->faulty_count = 1;
566 } else {
567 pd->faulty_next = *headp;
568 (*headp)->faulty_prev = pd;
569 pd->faulty_prev = NULL;
570 pd->faulty_count = 1;
571 *headp = pd;
572 }
573 }
574
575 static void
pcie_faulty_list_clear()576 pcie_faulty_list_clear()
577 {
578 pcie_domains_t *pd = pcie_faulty_domains;
579 pcie_domains_t *next;
580
581 /* unlink all domain structures from the faulty list */
582 while (pd) {
583 next = pd->faulty_next;
584 pd->faulty_prev = NULL;
585 pd->faulty_next = NULL;
586 pd->faulty_count = 0;
587 pd = next;
588 }
589 pcie_faulty_domains = NULL;
590 pcie_faulty_all = B_FALSE;
591 }
592
593 void
pcie_save_domain_id(pcie_domains_t * domain_ids)594 pcie_save_domain_id(pcie_domains_t *domain_ids)
595 {
596 pcie_domains_t *old_list_p, *new_list_p, *pd;
597
598 if (pcie_faulty_all)
599 return;
600
601 if (domain_ids == NULL)
602 return;
603
604 old_list_p = pcie_faulty_domains;
605 for (new_list_p = domain_ids; new_list_p;
606 new_list_p = new_list_p->cached_next) {
607 if (!new_list_p->cached_count)
608 continue;
609
610 /* search domain id in the faulty domain list */
611 pd = pcie_domain_list_find(new_list_p->domain_id,
612 old_list_p, PCIE_DOM_LIST_TYPE_FAULT);
613 if (pd)
614 pd->faulty_count++;
615 else
616 pcie_faulty_list_update(new_list_p,
617 &pcie_faulty_domains);
618 }
619 }
620