1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/ddi.h>
28 #include <sys/dditypes.h>
29 #include <sys/ddifm.h>
30 #include <sys/sunndi.h>
31 #include <sys/devops.h>
32 #include <sys/pcie.h>
33 #include <sys/pci_cap.h>
34 #include <sys/pcie_impl.h>
35 #include <sys/pathname.h>
36
37 /*
38 * The below 2 global variables are for PCIe IOV Error Handling. They must only
39 * be accessed during error handling under the protection of a error mutex.
40 */
41 static pcie_domains_t *pcie_faulty_domains = NULL;
42 static boolean_t pcie_faulty_all = B_FALSE;
43
44 static void pcie_domain_list_destroy(pcie_domains_t *domain_ids);
45 static void pcie_bdf_list_add(pcie_req_id_t bdf,
46 pcie_req_id_list_t **rlist_p);
47 static void pcie_bdf_list_remove(pcie_req_id_t bdf,
48 pcie_req_id_list_t **rlist_p);
49 static void pcie_cache_domain_info(pcie_bus_t *bus_p);
50 static void pcie_uncache_domain_info(pcie_bus_t *bus_p);
51
52 static void pcie_faulty_list_clear();
53 static void pcie_faulty_list_update(pcie_domains_t *pd,
54 pcie_domains_t **headp);
55
56 dev_info_t *
pcie_find_dip_by_bdf(dev_info_t * rootp,pcie_req_id_t bdf)57 pcie_find_dip_by_bdf(dev_info_t *rootp, pcie_req_id_t bdf)
58 {
59 dev_info_t *dip;
60 pcie_bus_t *bus_p;
61 int bus_num;
62
63 dip = ddi_get_child(rootp);
64 while (dip) {
65 bus_p = PCIE_DIP2BUS(dip);
66 if (bus_p && (bus_p->bus_bdf == bdf))
67 return (dip);
68 if (bus_p) {
69 bus_num = (bdf >> 8) & 0xff;
70 if ((bus_num >= bus_p->bus_bus_range.lo &&
71 bus_num <= bus_p->bus_bus_range.hi) ||
72 bus_p->bus_bus_range.hi == 0)
73 return (pcie_find_dip_by_bdf(dip, bdf));
74 }
75 dip = ddi_get_next_sibling(dip);
76 }
77 return (NULL);
78 }
79
80 /*
81 * Add a device bdf to the bdf list.
82 */
83 static void
pcie_bdf_list_add(pcie_req_id_t bdf,pcie_req_id_list_t ** rlist_p)84 pcie_bdf_list_add(pcie_req_id_t bdf, pcie_req_id_list_t **rlist_p)
85 {
86 pcie_req_id_list_t *rl = PCIE_ZALLOC(pcie_req_id_list_t);
87
88 rl->bdf = bdf;
89 rl->next = *rlist_p;
90 *rlist_p = rl;
91 }
92
93 /*
94 * Remove a bdf from the bdf list.
95 */
96 static void
pcie_bdf_list_remove(pcie_req_id_t bdf,pcie_req_id_list_t ** rlist_p)97 pcie_bdf_list_remove(pcie_req_id_t bdf, pcie_req_id_list_t **rlist_p)
98 {
99 pcie_req_id_list_t *rl_pre, *rl_next;
100
101 rl_pre = *rlist_p;
102 if (rl_pre->bdf == bdf) {
103 *rlist_p = rl_pre->next;
104 kmem_free(rl_pre, sizeof (pcie_req_id_list_t));
105 return;
106 }
107
108 while (rl_pre->next) {
109 rl_next = rl_pre->next;
110 if (rl_next->bdf == bdf) {
111 rl_pre->next = rl_next->next;
112 kmem_free(rl_next, sizeof (pcie_req_id_list_t));
113 break;
114 } else
115 rl_pre = rl_next;
116 }
117 }
118
119 /*
120 * Cache IOV domain info in all it's parent's pcie_domain_t
121 *
122 * The leaf devices's domain info must be set before calling this function.
123 */
124 void
pcie_cache_domain_info(pcie_bus_t * bus_p)125 pcie_cache_domain_info(pcie_bus_t *bus_p)
126 {
127 boolean_t assigned = PCIE_IS_ASSIGNED(bus_p);
128 boolean_t fma_dom = PCIE_ASSIGNED_TO_FMA_DOM(bus_p);
129 uint_t domain_id = PCIE_DOMAIN_ID_GET(bus_p);
130 pcie_req_id_t bdf = bus_p->bus_bdf;
131 dev_info_t *pdip;
132 pcie_bus_t *pbus_p;
133 pcie_domain_t *pdom_p;
134
135 ASSERT(!PCIE_IS_BDG(bus_p));
136
137 for (pdip = ddi_get_parent(PCIE_BUS2DIP(bus_p)); PCIE_DIP2BUS(pdip);
138 pdip = ddi_get_parent(pdip)) {
139 pbus_p = PCIE_DIP2BUS(pdip);
140 pdom_p = PCIE_BUS2DOM(pbus_p);
141
142 if (assigned) {
143 if (domain_id)
144 PCIE_DOMAIN_LIST_ADD(pbus_p, domain_id);
145
146 if (fma_dom)
147 pdom_p->fmadom_count++;
148 else {
149 PCIE_BDF_LIST_ADD(pbus_p, bdf);
150 pdom_p->nfmadom_count++;
151 }
152 } else
153 pdom_p->rootdom_count++;
154 }
155 }
156
157 /*
158 * Clear the leaf device's domain info and uncache IOV domain info in all it's
159 * parent's pcie_domain_t
160 *
161 * The leaf devices's domain info is also cleared by calling this function.
162 */
163 void
pcie_uncache_domain_info(pcie_bus_t * bus_p)164 pcie_uncache_domain_info(pcie_bus_t *bus_p)
165 {
166 boolean_t assigned = PCIE_IS_ASSIGNED(bus_p);
167 boolean_t fma_dom = PCIE_ASSIGNED_TO_FMA_DOM(bus_p);
168 uint_t domain_id = PCIE_DOMAIN_ID_GET(bus_p);
169 pcie_domain_t *dom_p = PCIE_BUS2DOM(bus_p), *pdom_p;
170 pcie_bus_t *pbus_p;
171 dev_info_t *pdip;
172
173 ASSERT(!PCIE_IS_BDG(bus_p));
174 ASSERT((dom_p->fmadom_count + dom_p->nfmadom_count +
175 dom_p->rootdom_count) == 1);
176
177 /* Clear the domain information */
178 if (domain_id) {
179 PCIE_DOMAIN_ID_SET(bus_p, NULL);
180 PCIE_DOMAIN_ID_DECR_REF_COUNT(bus_p);
181 }
182
183 dom_p->fmadom_count = 0;
184 dom_p->nfmadom_count = 0;
185 dom_p->rootdom_count = 0;
186
187 for (pdip = ddi_get_parent(PCIE_BUS2DIP(bus_p)); PCIE_DIP2BUS(pdip);
188 pdip = ddi_get_parent(pdip)) {
189 pbus_p = PCIE_DIP2BUS(pdip);
190 pdom_p = PCIE_BUS2DOM(pbus_p);
191
192 if (assigned) {
193 if (domain_id)
194 PCIE_DOMAIN_LIST_REMOVE(pbus_p, domain_id);
195
196 if (fma_dom)
197 pdom_p->fmadom_count--;
198 else {
199 pdom_p->nfmadom_count--;
200 PCIE_BDF_LIST_REMOVE(pbus_p, bus_p->bus_bdf);
201 }
202 } else
203 pdom_p->rootdom_count--;
204 }
205 }
206
207
208 /*
209 * Initialize private data structure for IOV environments.
210 * o Allocate memory for iov data
211 * o Cache Domain ids.
212 */
213 void
pcie_init_dom(dev_info_t * dip)214 pcie_init_dom(dev_info_t *dip)
215 {
216 pcie_domain_t *dom_p = PCIE_ZALLOC(pcie_domain_t);
217 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
218
219 PCIE_BUS2DOM(bus_p) = dom_p;
220
221 /* Only leaf devices are assignable to IO Domains */
222 if (PCIE_IS_BDG(bus_p))
223 return;
224
225 /*
226 * At the time of init_dom in the root domain a device may or may not
227 * have been assigned to an IO Domain.
228 *
229 * LDOMS: the property "ddi-assigned" will be set for devices that is
230 * assignable to an IO domain and unusable in the root domain. If the
231 * property exist assume it has been assigned to a non-fma domain until
232 * otherwise notified. The domain id is unknown on LDOMS.
233 *
234 * Xen: the "ddi-assigned" property won't be set until Xen store calls
235 * pcie_loan_device is called. In this function this will always look
236 * like the device is assigned to the root domain. Domain ID caching
237 * will occur in pcie_loan_device function.
238 */
239 if (ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
240 "ddi-assigned", -1) != -1) {
241 dom_p->nfmadom_count = 1;
242
243 /* Prevent "assigned" device from detaching */
244 ndi_hold_devi(dip);
245 } else
246 dom_p->rootdom_count = 1;
247 (void) ddi_prop_remove(DDI_DEV_T_NONE, dip, "ddi-assigned");
248
249 pcie_cache_domain_info(bus_p);
250 }
251
252 void
pcie_fini_dom(dev_info_t * dip)253 pcie_fini_dom(dev_info_t *dip)
254 {
255 pcie_domain_t *dom_p = PCIE_DIP2DOM(dip);
256 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
257
258 if (PCIE_IS_BDG(bus_p))
259 pcie_domain_list_destroy(PCIE_DOMAIN_LIST_GET(bus_p));
260 else
261 pcie_uncache_domain_info(bus_p);
262
263 kmem_free(dom_p, sizeof (pcie_domain_t));
264 }
265
266 /*
267 * PCIe Severity:
268 *
269 * PF_ERR_NO_ERROR : no IOV Action
270 * PF_ERR_CE : no IOV Action
271 * PF_ERR_NO_PANIC : contains error telemetry, log domain info
272 * PF_ERR_MATCHED_DEVICE: contains error telemetry, log domain info
273 * PF_ERR_MATCHED_RC : Error already taken care of, no further IOV Action
274 * PF_ERR_MATCHED_PARENT: Error already taken care of, no further IOV Action
275 * PF_ERR_PANIC : contains error telemetry, log domain info
276 *
277 * For NO_PANIC, MATCHED_DEVICE and PANIC, IOV wants to look at the affected
278 * devices and find the domains involved.
279 *
280 * If root domain does not own an affected device, IOV EH should change
281 * PF_ERR_PANIC to PF_ERR_MATCH_DOM.
282 */
283 int
pciev_eh(pf_data_t * pfd_p,pf_impl_t * impl)284 pciev_eh(pf_data_t *pfd_p, pf_impl_t *impl)
285 {
286 int severity = pfd_p->pe_severity_flags;
287 int iov_severity = severity;
288 pcie_bus_t *a_bus_p; /* Affected device's pcie_bus_t */
289 pf_data_t *root_pfd_p = impl->pf_dq_head_p;
290 pcie_bus_t *root_bus_p;
291
292 /*
293 * check if all devices under the root device are unassigned.
294 * this function should quickly return in non-IOV environment.
295 */
296 ASSERT(root_pfd_p != NULL);
297 root_bus_p = PCIE_PFD2BUS(root_pfd_p);
298 if (PCIE_BDG_IS_UNASSIGNED(root_bus_p))
299 return (severity);
300
301 if (severity & PF_ERR_PANIC_DEADLOCK) {
302 pcie_faulty_all = B_TRUE;
303
304 } else if (severity & (PF_ERR_NO_PANIC | PF_ERR_MATCHED_DEVICE |
305 PF_ERR_PANIC | PF_ERR_PANIC_BAD_RESPONSE)) {
306
307 uint16_t affected_flag, dev_affected_flags;
308 uint_t is_panic = 0, is_aff_dev_found = 0;
309
310 dev_affected_flags = PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags;
311 /* adjust affected flags to leverage cached domain ids */
312 if (dev_affected_flags & PF_AFFECTED_CHILDREN) {
313 dev_affected_flags |= PF_AFFECTED_SELF;
314 dev_affected_flags &= ~PF_AFFECTED_CHILDREN;
315 }
316
317 for (affected_flag = 1;
318 affected_flag <= PF_MAX_AFFECTED_FLAG;
319 affected_flag <<= 1) {
320 a_bus_p = pciev_get_affected_dev(impl, pfd_p,
321 affected_flag, dev_affected_flags);
322
323 if (a_bus_p == NULL)
324 continue;
325
326 is_aff_dev_found++;
327 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf =
328 a_bus_p->bus_bdf;
329
330 /*
331 * If a leaf device is assigned to the root domain or if
332 * a bridge has children assigned to a root domain
333 * panic.
334 *
335 * If a leaf device or a child of a bridge is assigned
336 * to NFMA domain mark it for panic. If assigned to FMA
337 * domain save the domain id.
338 */
339 if (!PCIE_IS_BDG(a_bus_p) &&
340 !PCIE_IS_ASSIGNED(a_bus_p)) {
341 if (severity & PF_ERR_FATAL_FLAGS)
342 is_panic++;
343 continue;
344 }
345
346 if (PCIE_BDG_HAS_CHILDREN_ROOT_DOM(a_bus_p)) {
347 if (severity & PF_ERR_FATAL_FLAGS)
348 is_panic++;
349 }
350
351 if ((PCIE_ASSIGNED_TO_NFMA_DOM(a_bus_p) ||
352 PCIE_BDG_HAS_CHILDREN_NFMA_DOM(a_bus_p)) &&
353 (severity & PF_ERR_FATAL_FLAGS)) {
354 PCIE_BUS2DOM(a_bus_p)->nfma_panic = B_TRUE;
355 iov_severity |= PF_ERR_MATCH_DOM;
356 }
357
358 if (PCIE_ASSIGNED_TO_FMA_DOM(a_bus_p)) {
359 pcie_save_domain_id(
360 &PCIE_BUS2DOM(a_bus_p)->domain.id);
361 iov_severity |= PF_ERR_MATCH_DOM;
362 }
363
364 if (PCIE_BDG_HAS_CHILDREN_FMA_DOM(a_bus_p)) {
365 pcie_save_domain_id(
366 PCIE_DOMAIN_LIST_GET(a_bus_p));
367 iov_severity |= PF_ERR_MATCH_DOM;
368 }
369 }
370
371 /*
372 * Overwrite the severity only if affected device can be
373 * identified and root domain does not need to panic.
374 */
375 if ((!is_panic) && is_aff_dev_found) {
376 iov_severity &= ~PF_ERR_FATAL_FLAGS;
377 }
378 }
379
380 return (iov_severity);
381 }
382
383 /* ARGSUSED */
384 void
pciev_eh_exit(pf_data_t * root_pfd_p,uint_t intr_type)385 pciev_eh_exit(pf_data_t *root_pfd_p, uint_t intr_type)
386 {
387 pcie_bus_t *root_bus_p;
388
389 /*
390 * check if all devices under the root device are unassigned.
391 * this function should quickly return in non-IOV environment.
392 */
393 root_bus_p = PCIE_PFD2BUS(root_pfd_p);
394 if (PCIE_BDG_IS_UNASSIGNED(root_bus_p))
395 return;
396
397 pcie_faulty_list_clear();
398 }
399
400 pcie_bus_t *
pciev_get_affected_dev(pf_impl_t * impl,pf_data_t * pfd_p,uint16_t affected_flag,uint16_t dev_affected_flags)401 pciev_get_affected_dev(pf_impl_t *impl, pf_data_t *pfd_p,
402 uint16_t affected_flag, uint16_t dev_affected_flags)
403 {
404 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p);
405 uint16_t flag = affected_flag & dev_affected_flags;
406 pcie_bus_t *temp_bus_p;
407 pcie_req_id_t a_bdf;
408 uint64_t a_addr;
409 uint16_t cmd;
410
411 if (!flag)
412 return (NULL);
413
414 switch (flag) {
415 case PF_AFFECTED_ROOT:
416 return (PCIE_DIP2BUS(bus_p->bus_rp_dip));
417 case PF_AFFECTED_SELF:
418 return (bus_p);
419 case PF_AFFECTED_PARENT:
420 return (PCIE_DIP2BUS(ddi_get_parent(PCIE_BUS2DIP(bus_p))));
421 case PF_AFFECTED_BDF: /* may only be used for RC */
422 a_bdf = PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf;
423 if (!PCIE_CHECK_VALID_BDF(a_bdf))
424 return (NULL);
425
426 temp_bus_p = pf_find_busp_by_bdf(impl, a_bdf);
427 return (temp_bus_p);
428 case PF_AFFECTED_AER:
429 if (pf_tlp_decode(bus_p, PCIE_ADV_REG(pfd_p)) == DDI_SUCCESS) {
430 temp_bus_p = pf_find_busp_by_aer(impl, pfd_p);
431 return (temp_bus_p);
432 }
433 break;
434 case PF_AFFECTED_SAER:
435 if (pf_pci_decode(pfd_p, &cmd) == DDI_SUCCESS) {
436 temp_bus_p = pf_find_busp_by_saer(impl, pfd_p);
437 return (temp_bus_p);
438 }
439 break;
440 case PF_AFFECTED_ADDR: /* ROOT only */
441 a_addr = PCIE_ROOT_FAULT(pfd_p)->scan_addr;
442 temp_bus_p = pf_find_busp_by_addr(impl, a_addr);
443 return (temp_bus_p);
444 }
445
446 return (NULL);
447 }
448
449 /* type used for pcie_domain_list_find() function */
450 typedef enum {
451 PCIE_DOM_LIST_TYPE_CACHE = 1,
452 PCIE_DOM_LIST_TYPE_FAULT = 2
453 } pcie_dom_list_type_t;
454
455 /*
456 * Check if a domain id is already in the linked list
457 */
458 static pcie_domains_t *
pcie_domain_list_find(uint_t domain_id,pcie_domains_t * pd_list_p,pcie_dom_list_type_t type)459 pcie_domain_list_find(uint_t domain_id, pcie_domains_t *pd_list_p,
460 pcie_dom_list_type_t type)
461 {
462 while (pd_list_p) {
463 if (pd_list_p->domain_id == domain_id)
464 return (pd_list_p);
465
466 if (type == PCIE_DOM_LIST_TYPE_CACHE) {
467 pd_list_p = pd_list_p->cached_next;
468 } else if (type == PCIE_DOM_LIST_TYPE_FAULT) {
469 pd_list_p = pd_list_p->faulty_next;
470 } else {
471 return (NULL);
472 }
473 }
474
475 return (NULL);
476 }
477
478 /*
479 * Return true if a leaf device is assigned to a domain or a bridge device
480 * has children assigned to the domain
481 */
482 boolean_t
pcie_in_domain(pcie_bus_t * bus_p,uint_t domain_id)483 pcie_in_domain(pcie_bus_t *bus_p, uint_t domain_id)
484 {
485 if (PCIE_IS_BDG(bus_p)) {
486 pcie_domains_t *pd;
487 pd = pcie_domain_list_find(domain_id,
488 PCIE_DOMAIN_LIST_GET(bus_p), PCIE_DOM_LIST_TYPE_CACHE);
489 if (pd && pd->cached_count)
490 return (B_TRUE);
491 return (B_FALSE);
492 } else {
493 return (PCIE_DOMAIN_ID_GET(bus_p) == domain_id);
494 }
495 }
496
497 /*
498 * Add a domain id to a cached domain id list.
499 * If the domain already exists in the list, increment the reference count.
500 */
501 void
pcie_domain_list_add(uint_t domain_id,pcie_domains_t ** pd_list_p)502 pcie_domain_list_add(uint_t domain_id, pcie_domains_t **pd_list_p)
503 {
504 pcie_domains_t *pd;
505
506 pd = pcie_domain_list_find(domain_id, *pd_list_p,
507 PCIE_DOM_LIST_TYPE_CACHE);
508
509 if (pd == NULL) {
510 pd = PCIE_ZALLOC(pcie_domains_t);
511 pd->domain_id = domain_id;
512 pd->cached_count = 1;
513 pd->cached_next = *pd_list_p;
514 *pd_list_p = pd;
515 } else
516 pd->cached_count++;
517 }
518
519 /*
520 * Remove a domain id from a cached domain id list.
521 * Decrement the reference count.
522 */
523 void
pcie_domain_list_remove(uint_t domain_id,pcie_domains_t * pd_list_p)524 pcie_domain_list_remove(uint_t domain_id, pcie_domains_t *pd_list_p)
525 {
526 pcie_domains_t *pd;
527
528 pd = pcie_domain_list_find(domain_id, pd_list_p,
529 PCIE_DOM_LIST_TYPE_CACHE);
530
531 if (pd) {
532 ASSERT((pd->cached_count)--);
533 }
534 }
535
536 /* destroy cached domain id list */
537 static void
pcie_domain_list_destroy(pcie_domains_t * domain_ids)538 pcie_domain_list_destroy(pcie_domains_t *domain_ids)
539 {
540 pcie_domains_t *p = domain_ids;
541 pcie_domains_t *next;
542
543 while (p) {
544 next = p->cached_next;
545 kmem_free(p, sizeof (pcie_domains_t));
546 p = next;
547 }
548 }
549
550 static void
pcie_faulty_list_update(pcie_domains_t * pd,pcie_domains_t ** headp)551 pcie_faulty_list_update(pcie_domains_t *pd,
552 pcie_domains_t **headp)
553 {
554 if (pd == NULL)
555 return;
556
557 if (*headp == NULL) {
558 *headp = pd;
559 pd->faulty_prev = NULL;
560 pd->faulty_next = NULL;
561 pd->faulty_count = 1;
562 } else {
563 pd->faulty_next = *headp;
564 (*headp)->faulty_prev = pd;
565 pd->faulty_prev = NULL;
566 pd->faulty_count = 1;
567 *headp = pd;
568 }
569 }
570
571 static void
pcie_faulty_list_clear()572 pcie_faulty_list_clear()
573 {
574 pcie_domains_t *pd = pcie_faulty_domains;
575 pcie_domains_t *next;
576
577 /* unlink all domain structures from the faulty list */
578 while (pd) {
579 next = pd->faulty_next;
580 pd->faulty_prev = NULL;
581 pd->faulty_next = NULL;
582 pd->faulty_count = 0;
583 pd = next;
584 }
585 pcie_faulty_domains = NULL;
586 pcie_faulty_all = B_FALSE;
587 }
588
589 void
pcie_save_domain_id(pcie_domains_t * domain_ids)590 pcie_save_domain_id(pcie_domains_t *domain_ids)
591 {
592 pcie_domains_t *old_list_p, *new_list_p, *pd;
593
594 if (pcie_faulty_all)
595 return;
596
597 if (domain_ids == NULL)
598 return;
599
600 old_list_p = pcie_faulty_domains;
601 for (new_list_p = domain_ids; new_list_p;
602 new_list_p = new_list_p->cached_next) {
603 if (!new_list_p->cached_count)
604 continue;
605
606 /* search domain id in the faulty domain list */
607 pd = pcie_domain_list_find(new_list_p->domain_id,
608 old_list_p, PCIE_DOM_LIST_TYPE_FAULT);
609 if (pd)
610 pd->faulty_count++;
611 else
612 pcie_faulty_list_update(new_list_p,
613 &pcie_faulty_domains);
614 }
615 }
616