1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2019 Joyent, Inc.
25 * Copyright 2024 Oxide Computer Company
26 */
27
28 /*
29 * PCIe Initialization
30 * -------------------
31 *
32 * The PCIe subsystem is split about and initializes itself in a couple of
33 * different places. This is due to the platform-specific nature of initializing
34 * resources and the nature of the SPARC PROM and how that influenced the
35 * subsystem. Note that traditional PCI (mostly seen these days in Virtual
36 * Machines) follows most of the same basic path outlined here, but skips a
37 * large chunk of PCIe-specific initialization.
38 *
39 * First, there is an initial device discovery phase that is taken care of by
40 * the platform. This is where we discover the set of devices that are present
41 * at system power on. These devices may or may not be hot-pluggable. In
42 * particular, this happens in a platform-specific way right now. In general, we
43 * expect most discovery to be driven by scanning each bus, device, and
44 * function, and seeing what actually exists and responds to configuration space
45 * reads. This is driven via pci_boot.c on x86. This may be seeded by something
46 * like device tree, a PROM, supplemented with ACPI, or by knowledge that the
47 * underlying platform has.
48 *
49 * As a part of this discovery process, the full set of resources that exist in
50 * the system for PCIe are:
51 *
52 * o PCI buses
53 * o Prefetchable Memory
54 * o Non-prefetchable memory
55 * o I/O ports
56 *
57 * This process is driven by a platform's PCI platform Resource Discovery (PRD)
58 * module. The PRD definitions can be found in <sys/plat/pci_prd.h> and are used
59 * to discover these resources, which will be converted into the initial set of
60 * the standard properties in the system: 'regs', 'available', 'ranges', etc.
61 * Currently it is up to platform-specific code (which should ideally be
62 * consolidated at some point) to set up all these properties.
63 *
64 * As a part of the discovery process, the platform code will create a device
65 * node (dev_info_t) for each discovered function and will create a PCIe nexus
66 * for each overall root complex that exists in the system. Most root complexes
67 * will have multiple root ports, each of which is the foundation of an
68 * independent PCIe bus due to the point-to-point nature of PCIe. When a root
69 * complex is found, a nexus driver such as npe (Nexus for PCIe Express) is
70 * attached. In the case of a non-PCIe-capable system this is where the older
71 * pci nexus driver would be used instead.
72 *
73 * To track data about a given device on a bus, a 'pcie_bus_t' structure is
74 * created for and assigned to every PCIe-based dev_info_t. This can be used to
75 * find the root port and get basic information about the device, its faults,
76 * and related information. This contains pointers to the corresponding root
77 * port as well.
78 *
79 * A root complex has its pcie_bus_t initialized as part of the device discovery
80 * process. That is, because we're trying to bootstrap the actual tree and most
81 * platforms don't have a representation for this that's explicitly
82 * discoverable, this is created manually. See callers of pcie_rc_init_bus().
83 *
84 * For other devices, bridges, and switches, the process is split into two.
85 * There is an initial pcie_bus_t that is created which will exist before we go
86 * through the actual driver attachment process. For example, on x86 this is
87 * done as part of the device and function discovery. The second pass of
88 * initialization is done only after the nexus driver actually is attached and
89 * it goes through and finishes processing all of its children.
90 *
91 * Child Initialization
92 * --------------------
93 *
94 * Generally speaking, the platform will first enumerate all PCIe devices that
95 * are in the sytem before it actually creates a device tree. This is part of
96 * the bus/device/function scanning that is performed and from that dev_info_t
97 * nodes are created for each discovered device and are inserted into the
98 * broader device tree. Later in boot, the actual device tree is walked and the
99 * nodes go through the standard dev_info_t initialization process (DS_PROTO,
100 * DS_LINKED, DS_BOUND, etc.).
101 *
102 * PCIe-specific initialization can roughly be broken into the following pieces:
103 *
104 * 1. Platform initial discovery and resource assignment
105 * 2. The pcie_bus_t initialization
106 * 3. Nexus driver child initialization
107 * 4. Fabric initialization
108 * 5. Device driver-specific initialization
109 *
110 * The first part of this (1) and (2) are discussed in the previous section.
111 * Part (1) in particular is a combination of the PRD (platform resource
112 * discovery) and general device initialization. After this, because we have a
113 * device tree, most of the standard nexus initialization happens.
114 *
115 * (5) is somewhat simple, so let's get into it before we discuss (3) and (4).
116 * This is the last thing that is called and that happens after all of the
117 * others are done. This is the logic that occurs in a driver's attach(9E) entry
118 * point. This is always device-specific and generally speaking should not be
119 * manipulating standard PCIe registers directly on their own. For example, the
120 * MSI/MSI-X, AER, Serial Number, etc. capabilities will be automatically dealt
121 * with by the framework in (3) and (4) below. In many cases, particularly
122 * things that are part of (4), adjusting them in the individual driver is not
123 * safe.
124 *
125 * Finally, let's talk about (3) and (4) as these are related. The NDI provides
126 * for a standard hook for a nexus to initialize its children. In our platforms,
127 * there are basically two possible PCIe nexus drivers: there is the generic
128 * pcieb -- PCIe bridge -- driver which is used for standard root ports,
129 * switches, etc. Then there is the platform-specific primary nexus driver,
130 * which is being slowly consolidated into a single one where it makes sense. An
131 * example of this is npe.
132 *
133 * Each of these has a child initialization function which is called from their
134 * DDI_CTLOPS_INITCHILD operation on the bus_ctl function pointer. This goes
135 * through and initializes a large number of different pieces of PCIe-based
136 * settings through the common pcie_initchild() function. This takes care of
137 * things like:
138 *
139 * o Advanced Error Reporting
140 * o Alternative Routing
141 * o Capturing information around link speed, width, serial numbers, etc.
142 * o Setting common properties around aborts
143 *
144 * There are a few caveats with this that need to be kept in mind:
145 *
146 * o A dev_info_t indicates a specific function. This means that a
147 * multi-function device will not all be initialized at the same time and
148 * there is no guarantee that all children will be initialized before one of
149 * them is attached.
150 * o A child is only initialized if we have found a driver that matches an
151 * alias in the dev_info_t's compatible array property. While a lot of
152 * multi-function devices are often multiple instances of the same thing
153 * (e.g. a multi-port NIC with a function / NIC), this is not always the
154 * case and one cannot make any assumptions here.
155 *
156 * This in turn leads to the next form of initialization that takes place in the
157 * case of (4). This is where we take care of things that need to be consistent
158 * across either entire devices or more generally across an entire root port and
159 * all of its children. There are a few different examples of this:
160 *
161 * o Setting the maximum packet size
162 * o Determining the tag width
163 *
164 * Note that features which are only based on function 0, such as ASPM (Active
165 * State Power Management), hardware autonomous width disable, etc. ultimately
166 * do not go through this path today. There are some implications here in that
167 * today several of these things are captured on functions which may not have
168 * any control here. This is an area of needed improvement.
169 *
170 * The settings in (4) are initialized in a common way, via
171 * pcie_fabric_setup(). This is called into from two different parts of
172 * the stack:
173 *
174 * 1. When we attach a root port, which is driven by pcieb.
175 * 2. When we have a hotplug event that adds a device.
176 *
177 * In general here we are going to use the term 'fabric' to refer to everything
178 * that is downstream of a root port. This corresponds to what the PCIe
179 * specification calls a 'hierarchy domain'. Strictly speaking, this is fine
180 * until peer-to-peer requests begin to happen that cause you to need to forward
181 * things across root ports. At that point the scope of the fabric increases and
182 * these settings become more complicated. We currently optimize for the much
183 * more common case, which is that each root port is effectively independent
184 * from a PCIe transaction routing perspective.
185 *
186 * Put differently, we use the term 'fabric' to refer to a set of PCIe devices
187 * that can route transactions to one another, which is generally constrained to
188 * everything under a root port and that root ports are independent. If this
189 * constraint changes, then all one needs to do is replace the discussion of the
190 * root port below with the broader root complex and system.
191 *
192 * A challenge with these settings is that once they're set and devices are
193 * actively making requests, we cannot really change them without resetting the
194 * links and cancelling all outstanding transactions via device resets. Because
195 * this is not something that we want to do, we instead look at how and when we
196 * set this to constrain what's going on.
197 *
198 * Because of this we basically say that if a given fabric has more than one
199 * hot-plug capable device that's encountered, then we have to use safe defaults
200 * (which we can allow an operator to tune eventually via pcieadm). If we have a
201 * mix of non-hotpluggable slots with downstream endpoints present and
202 * hot-pluggable slots, then we're in this case. If we don't have hot-pluggable
203 * slots, then we can have an arbitrarily complex setup. Let's look at a few of
204 * these visually:
205 *
206 * In the following diagrams, RP stands for Root Port, EP stands for Endpoint.
207 * If something is hot-pluggable, then we label it with (HP).
208 *
209 * (1) RP --> EP
210 * (2) RP --> Switch --> EP
211 * +--> EP
212 * +--> EP
213 *
214 * (3) RP --> Switch --> EP
215 * +--> EP
216 * +--> Switch --> EP
217 * +--> EP
218 * +--> EP
219 *
220 *
221 * (4) RP (HP) --> EP
222 * (5) RP (HP) --> Switch --> EP
223 * +--> EP
224 * +--> EP
225 *
226 * (6) RP --> Switch (HP) --> EP
227 * (7) RP (HP) --> Switch (HP) --> EP
228 *
229 * If we look at all of these, these are all cases where it's safe for us to set
230 * things based on all devices. (1), (2), and (3) are straightforward because
231 * they have no hot-pluggable elements. This means that nothing should come/go
232 * on the system and we can set up fabric-wide properties as part of the root
233 * port.
234 *
235 * Case (4) is the most standard one that we encounter for hot-plug. Here you
236 * have a root port directly connected to an endpoint. The most common example
237 * would be an NVMe device plugged into a root port. Case (5) is interesting to
238 * highlight. While there is a switch and multiple endpoints there, they are
239 * showing up as a unit. This ends up being a weirder variant of (4), but it is
240 * safe for us to set advanced properties because we can figure out what the
241 * total set should be.
242 *
243 * Now, the more interesting bits here are (6) and (7). The reason that (6)
244 * works is that ultimately there is only a single down-stream port here that is
245 * hot-pluggable and all non-hotpluggable ports do not have a device present,
246 * which suggests that they will never have a device present. (7) also could be
247 * made to work by making the observation that if there's truly only one
248 * endpoint in a fabric, it doesn't matter how many switches there are that are
249 * hot-pluggable. This would only hold if we can assume for some reason that no
250 * other endpoints could be added.
251 *
252 * In turn, let's look at several cases that we believe aren't safe:
253 *
254 * (8) RP --> Switch --> EP
255 * +--> EP
256 * (HP) +--> EP
257 *
258 * (9) RP --> Switch (HP) +--> EP
259 * (HP) +--> EP
260 *
261 * (10) RP (HP) --> Switch (HP) +--> EP
262 * (HP) +--> EP
263 *
264 * All of these are situations where it's much more explicitly unsafe. Let's
265 * take (8). The problem here is that the devices on the non-hotpluggable
266 * downstream switches are always there and we should assume all device drivers
267 * will be active and performing I/O when the hot-pluggable slot changes. If the
268 * hot-pluggable slot has a lower max payload size, then we're mostly out of
269 * luck. The case of (9) is very similar to (8), just that we have more hot-plug
270 * capable slots.
271 *
272 * Finally (10) is a case of multiple instances of hotplug. (9) and (10) are the
273 * more general case of (6) and (7). While we can try to detect (6) and (7) more
274 * generally or try to make it safe, we're going to start with a simpler form of
275 * detection for this, which roughly follows the following rules:
276 *
277 * o If there are no hot-pluggable slots in an entire fabric, then we can set
278 * all fabric properties based on device capabilities.
279 * o If we encounter a hot-pluggable slot, we can only set fabric properties
280 * based on device capabilities if:
281 *
282 * 1. The hotpluggable slot is a root port.
283 * 2. There are no other hotpluggable devices downstream of it.
284 *
285 * Otherwise, if neither of the above is true, then we must use the basic PCIe
286 * defaults for various fabric-wide properties (discussed below). Even in these
287 * more complicated cases, device-specific properties such as the configuration
288 * of AERs, ASPM, etc. are still handled in the general pcie_init_bus() and
289 * related discussed earlier here.
290 *
291 * Because the only fabrics that we'll change are those that correspond to root
292 * ports, we will only call into the actual fabric feature setup when one of
293 * those changes. This has the side effect of simplifying locking. When we make
294 * changes here we need to be able to hold the entire device tree under the root
295 * port (including the root port and its parent). This is much harder to do
296 * safely when starting in the middle of the tree.
297 *
298 * Handling of Specific Properties
299 * -------------------------------
300 *
301 * This section goes into the rationale behind how we initialize and program
302 * various parts of the PCIe stack.
303 *
304 * 5-, 8-, 10- AND 14-BIT TAGS
305 *
306 * Tags are part of PCIe transactions and when combined with a device identifier
307 * are used to uniquely identify a transaction. In PCIe parlance, a Requester
308 * (someone who initiates a PCIe request) sets a unique tag in the request and
309 * the Completer (someone who processes and responds to a PCIe request) echoes
310 * the tag back. This means that a requester generally is responsible for
311 * ensuring that they don't reuse a tag between transactions.
312 *
313 * Thus the number of tags that a device has relates to the number of
314 * outstanding transactions that it can have, which are usually tied to the
315 * number of outstanding DMA transfers. The size of these transactions is also
316 * then scoped by the handling of the Maximum Packet Payload.
317 *
318 * In PCIe 1.0, devices default to a 5-bit tag. There was also an option to
319 * support an 8-bit tag. The 8-bit extended tag did not distinguish between a
320 * Requester or Completer. There was a bit to indicate device support of 8-bit
321 * tags in the Device Capabilities Register of the PCIe Capability and a
322 * separate bit to enable it in the Device Control Register of the PCIe
323 * Capability.
324 *
325 * In PCIe 4.0, support for a 10-bit tag was added. The specification broke
326 * apart the support bit into multiple pieces. In particular, in the Device
327 * Capabilities 2 register of the PCIe Capability there is a separate bit to
328 * indicate whether the device supports 10-bit completions and 10-bit requests.
329 * All PCIe 4.0 compliant devices are required to support 10-bit tags if they
330 * operate at 16.0 GT/s speed (a PCIe Gen 4 compliant device does not have to
331 * operate at Gen 4 speeds).
332 *
333 * This allows a device to support 10-bit completions but not 10-bit requests.
334 * A device that supports 10-bit requests is required to support 10-bit
335 * completions. There is no ability to enable or disable 10-bit completion
336 * support in the Device Capabilities 2 register. There is only a bit to enable
337 * 10-bit requests. This distinction makes our life easier as this means that as
338 * long as the entire fabric supports 10-bit completions, it doesn't matter if
339 * not all devices support 10-bit requests and we can enable them as required.
340 * More on this in a bit.
341 *
342 * In PCIe 6.0, another set of bits was added for 14-bit tags. These follow the
343 * same pattern as the 10-bit tags. The biggest difference is that the
344 * capabilities and control for these are found in the Device Capabilities 3
345 * and Device Control 3 register of the Device 3 Extended Capability. Similar to
346 * what we see with 10-bit tags, requesters are required to support the
347 * completer capability. The only control bit is for whether or not they enable
348 * a 14-bit requester.
349 *
350 * PCIe switches which sit between root ports and endpoints and show up to
351 * software as a set of bridges. Bridges generally don't have to know about tags
352 * as they are usually neither requesters or completers (unless directly talking
353 * to the bridge instance). That is they are generally required to forward
354 * packets without modifying them. This works until we deal with switch error
355 * handling. At that point, the switch may try to interpret the transaction and
356 * if it doesn't understand the tagging scheme in use, return the transaction to
357 * with the wrong tag and also an incorrectly diagnosed error (usually a
358 * malformed TLP).
359 *
360 * With all this, we construct a somewhat simple policy of how and when we
361 * enable extended tags:
362 *
363 * o If we have a complex hotplug-capable fabric (based on the discussion
364 * earlier in fabric-specific settings), then we cannot enable any of the
365 * 8-bit, 10-bit, and 14-bit tagging features. This is due to the issues
366 * with intermediate PCIe switches and related.
367 *
368 * o If every device supports 8-bit capable tags, then we will go through and
369 * enable those everywhere.
370 *
371 * o If every device supports 10-bit capable completions, then we will enable
372 * 10-bit requester on every device that supports it.
373 *
374 * o If every device supports 14-bit capable completions, then we will enable
375 * 14-bit requesters on every device that supports it.
376 *
377 * This is the simpler end of the policy and one that is relatively easy to
378 * implement. While we could attempt to relax the constraint that every device
379 * in the fabric implement these features by making assumptions about peer-to-
380 * peer requests (that is devices at the same layer in the tree won't talk to
381 * one another), that is a lot of complexity. For now, we leave such an
382 * implementation to those who need it in the future.
383 *
384 * MAX PAYLOAD SIZE
385 *
386 * When performing transactions on the PCIe bus, a given transaction has a
387 * maximum allowed size. This size is called the MPS or 'Maximum Payload Size'.
388 * A given device reports its maximum supported size in the Device Capabilities
389 * register of the PCIe Capability. It is then set in the Device Control
390 * register.
391 *
392 * One of the challenges with this value is that different functions of a device
393 * have independent values, but strictly speaking are required to actually have
394 * the same value programmed in all of them lest device behavior goes awry. When
395 * a device has the ARI (alternative routing ID) capability enabled, then only
396 * function 0 controls the actual payload size.
397 *
398 * The settings for this need to be consistent throughout the fabric. A
399 * Transmitter is not allowed to create a TLP that exceeds its maximum packet
400 * size and a Receiver is not allowed to receive a packet that exceeds its
401 * maximum packet size. In all of these cases, this would result in something
402 * like a malformed TLP error.
403 *
404 * Effectively, this means that everything on a given fabric must have the same
405 * value programmed in its Device Control register for this value. While in the
406 * case of tags, switches generally weren't completers or requesters, here every
407 * device along the path is subject to this. This makes the actual value that we
408 * set throughout the fabric even more important and the constraints of hotplug
409 * even worse to deal with.
410 *
411 * Because a hotplug device can be inserted with any packet size, if we hit
412 * anything other than the simple hotplug cases discussed in the fabric-specific
413 * settings section, then we must use the smallest size of 128 byte payloads.
414 * This is because a device could be plugged in that supports something smaller
415 * than we had otherwise set. If there are other active devices, those could not
416 * be changed without quiescing the entire fabric. As such our algorithm is as
417 * follows:
418 *
419 * 1. Scan the entire fabric, keeping track of the smallest seen MPS in the
420 * Device Capabilities Register.
421 * 2. If we have a complex fabric, program each Device Control register with
422 * a 128 byte maximum payload size, otherwise, program it with the
423 * discovered value.
424 *
425 *
426 * MAX READ REQUEST SIZE
427 *
428 * The maximum read request size (mrrs) is a much more confusing thing when
429 * compared to the maximum payload size counterpart. The maximum payload size
430 * (MPS) above is what restricts the actual size of a TLP. The mrrs value
431 * is used to control part of the behavior of Memory Read Request, which is not
432 * strictly speaking subject to the MPS. A PCIe device is allowed to respond to
433 * a Memory Read Request with less bytes than were actually requested in a
434 * single completion. In general, the default size that a root complex and its
435 * root port will reply to are based around the length of a cache line.
436 *
437 * What this ultimately controls is the number of requests that the Requester
438 * has to make and trades off bandwidth, bus sharing, and related here. For
439 * example, if the maximum read request size is 4 KiB, then the requester would
440 * only issue a single read request asking for 4 KiB. It would still receive
441 * these as multiple packets in units of the MPS. If however, the maximum read
442 * request was only say 512 B, then it would need to make 8 separate requests,
443 * potentially increasing latency. On the other hand, if systems are relying on
444 * total requests for QoS, then it's important to set it to something that's
445 * closer to the actual MPS.
446 *
447 * Traditionally, the OS has not been the most straightforward about this. It's
448 * important to remember that setting this up is also somewhat in the realm of
449 * system firmware. Due to the PCI Firmware specification, the firmware may have
450 * set up a value for not just the MRRS but also the MPS. As such, our logic
451 * basically left the MRRS alone and used whatever the device had there as long
452 * as we weren't shrinking the device's MPS. If we were, then we'd set it to the
453 * MPS. If the device was a root port, then it was just left at a system wide
454 * and PCIe default of 512 bytes.
455 *
456 * If we survey firmware (which isn't easy due to its nature), we have seen most
457 * cases where the firmware just doesn't do anything and leaves it to the
458 * device's default, which is basically just the PCIe default, unless it has a
459 * specific knowledge of something like say wanting to do something for an NVMe
460 * device. The same is generally true of other systems, leaving it at its
461 * default unless otherwise set by a device driver.
462 *
463 * Because this value doesn't really have the same constraints as other fabric
464 * properties, this becomes much simpler and we instead opt to set it as part of
465 * the device node initialization. In addition, there are no real rules about
466 * different functions having different values here as it doesn't really impact
467 * the TLP processing the same way that the MPS does.
468 *
469 * While we should add a fuller way of setting this and allowing operator
470 * override of the MRRS based on things like device class, etc. that is driven
471 * by pcieadm, that is left to the future. For now we opt to that all devices
472 * are kept at their default (512 bytes or whatever firmware left behind) and we
473 * ensure that root ports always have the mrrs set to 512.
474 */
475
476 #include <sys/sysmacros.h>
477 #include <sys/types.h>
478 #include <sys/kmem.h>
479 #include <sys/modctl.h>
480 #include <sys/ddi.h>
481 #include <sys/sunddi.h>
482 #include <sys/sunndi.h>
483 #include <sys/fm/protocol.h>
484 #include <sys/fm/util.h>
485 #include <sys/promif.h>
486 #include <sys/disp.h>
487 #include <sys/stat.h>
488 #include <sys/file.h>
489 #include <sys/pci_cap.h>
490 #include <sys/pci_impl.h>
491 #include <sys/pcie_impl.h>
492 #include <sys/hotplug/pci/pcie_hp.h>
493 #include <sys/hotplug/pci/pciehpc.h>
494 #include <sys/hotplug/pci/pcishpc.h>
495 #include <sys/hotplug/pci/pcicfg.h>
496 #include <sys/pci_cfgacc.h>
497 #include <sys/sysevent.h>
498 #include <sys/sysevent/eventdefs.h>
499 #include <sys/sysevent/pcie.h>
500
501 /* Local functions prototypes */
502 static void pcie_init_pfd(dev_info_t *);
503 static void pcie_fini_pfd(dev_info_t *);
504
505 #ifdef DEBUG
506 uint_t pcie_debug_flags = 0;
507 static void pcie_print_bus(pcie_bus_t *bus_p);
508 void pcie_dbg(char *fmt, ...);
509 #endif /* DEBUG */
510
511 /* Variable to control default PCI-Express config settings */
512 ushort_t pcie_command_default =
513 PCI_COMM_SERR_ENABLE |
514 PCI_COMM_WAIT_CYC_ENAB |
515 PCI_COMM_PARITY_DETECT |
516 PCI_COMM_ME |
517 PCI_COMM_MAE |
518 PCI_COMM_IO;
519
520 /* xxx_fw are bits that are controlled by FW and should not be modified */
521 ushort_t pcie_command_default_fw =
522 PCI_COMM_SPEC_CYC |
523 PCI_COMM_MEMWR_INVAL |
524 PCI_COMM_PALETTE_SNOOP |
525 PCI_COMM_WAIT_CYC_ENAB |
526 0xF800; /* Reserved Bits */
527
528 ushort_t pcie_bdg_command_default_fw =
529 PCI_BCNF_BCNTRL_ISA_ENABLE |
530 PCI_BCNF_BCNTRL_VGA_ENABLE |
531 0xF000; /* Reserved Bits */
532
533 /* PCI-Express Base error defaults */
534 ushort_t pcie_base_err_default =
535 PCIE_DEVCTL_CE_REPORTING_EN |
536 PCIE_DEVCTL_NFE_REPORTING_EN |
537 PCIE_DEVCTL_FE_REPORTING_EN |
538 PCIE_DEVCTL_UR_REPORTING_EN;
539
540 /*
541 * This contains default values and masks that are used to manipulate the device
542 * control register and ensure that it is in a normal state. The mask controls
543 * things that are managed by pcie_fabric_setup(), firmware, or other sources
544 * and therefore should be preserved unless we're explicitly trying to change
545 * it.
546 */
547 uint16_t pcie_devctl_default = PCIE_DEVCTL_RO_EN | PCIE_DEVCTL_MAX_READ_REQ_512;
548 uint16_t pcie_devctl_default_mask = PCIE_DEVCTL_MAX_READ_REQ_MASK |
549 PCIE_DEVCTL_MAX_PAYLOAD_MASK | PCIE_DEVCTL_EXT_TAG_FIELD_EN;
550
551 /* PCI-Express AER Root Control Register */
552 #define PCIE_ROOT_SYS_ERR (PCIE_ROOTCTL_SYS_ERR_ON_CE_EN | \
553 PCIE_ROOTCTL_SYS_ERR_ON_NFE_EN | \
554 PCIE_ROOTCTL_SYS_ERR_ON_FE_EN)
555
556 ushort_t pcie_root_ctrl_default =
557 PCIE_ROOTCTL_SYS_ERR_ON_CE_EN |
558 PCIE_ROOTCTL_SYS_ERR_ON_NFE_EN |
559 PCIE_ROOTCTL_SYS_ERR_ON_FE_EN;
560
561 /* PCI-Express Root Error Command Register */
562 ushort_t pcie_root_error_cmd_default =
563 PCIE_AER_RE_CMD_CE_REP_EN |
564 PCIE_AER_RE_CMD_NFE_REP_EN |
565 PCIE_AER_RE_CMD_FE_REP_EN;
566
567 /* ECRC settings in the PCIe AER Control Register */
568 uint32_t pcie_ecrc_value =
569 PCIE_AER_CTL_ECRC_GEN_ENA |
570 PCIE_AER_CTL_ECRC_CHECK_ENA;
571
572 /*
573 * If a particular platform wants to disable certain errors such as UR/MA,
574 * instead of using #defines have the platform's PCIe Root Complex driver set
575 * these masks using the pcie_get_XXX_mask and pcie_set_XXX_mask functions. For
576 * x86 the closest thing to a PCIe root complex driver is NPE. For SPARC the
577 * closest PCIe root complex driver is PX.
578 *
579 * pcie_serr_disable_flag : disable SERR only (in RCR and command reg) x86
580 * systems may want to disable SERR in general. For root ports, enabling SERR
581 * causes NMIs which are not handled and results in a watchdog timeout error.
582 */
583 uint32_t pcie_aer_uce_mask = 0; /* AER UE Mask */
584 uint32_t pcie_aer_ce_mask = 0; /* AER CE Mask */
585 uint32_t pcie_aer_suce_mask = 0; /* AER Secondary UE Mask */
586 uint32_t pcie_serr_disable_flag = 0; /* Disable SERR */
587
588 /* Default severities needed for eversholt. Error handling doesn't care */
589 uint32_t pcie_aer_uce_severity = PCIE_AER_UCE_MTLP | PCIE_AER_UCE_RO | \
590 PCIE_AER_UCE_FCP | PCIE_AER_UCE_SD | PCIE_AER_UCE_DLP | \
591 PCIE_AER_UCE_TRAINING;
592 uint32_t pcie_aer_suce_severity = PCIE_AER_SUCE_SERR_ASSERT | \
593 PCIE_AER_SUCE_UC_ADDR_ERR | PCIE_AER_SUCE_UC_ATTR_ERR | \
594 PCIE_AER_SUCE_USC_MSG_DATA_ERR;
595
596 int pcie_disable_ari = 0;
597
598 /*
599 * On some platforms, such as the AMD B450 chipset, we've seen an odd
600 * relationship between enabling link bandwidth notifications and AERs about
601 * ECRC errors. This provides a mechanism to disable it.
602 */
603 int pcie_disable_lbw = 0;
604
605 /*
606 * Amount of time to wait for an in-progress retraining. The default is to try
607 * 500 times in 10ms chunks, thus a total of 5s.
608 */
609 uint32_t pcie_link_retrain_count = 500;
610 uint32_t pcie_link_retrain_delay_ms = 10;
611
612 taskq_t *pcie_link_tq;
613 kmutex_t pcie_link_tq_mutex;
614
615 static int pcie_link_bw_intr(dev_info_t *);
616 static void pcie_capture_speeds(dev_info_t *);
617
618 dev_info_t *pcie_get_rc_dip(dev_info_t *dip);
619
620 /*
621 * modload support
622 */
623
624 static struct modlmisc modlmisc = {
625 &mod_miscops, /* Type of module */
626 "PCI Express Framework Module"
627 };
628
629 static struct modlinkage modlinkage = {
630 MODREV_1,
631 (void *)&modlmisc,
632 NULL
633 };
634
635 /*
636 * Global Variables needed for a non-atomic version of ddi_fm_ereport_post.
637 * Currently used to send the pci.fabric ereports whose payload depends on the
638 * type of PCI device it is being sent for.
639 */
640 char *pcie_nv_buf;
641 nv_alloc_t *pcie_nvap;
642 nvlist_t *pcie_nvl;
643
644 int
_init(void)645 _init(void)
646 {
647 int rval;
648
649 pcie_nv_buf = kmem_alloc(ERPT_DATA_SZ, KM_SLEEP);
650 pcie_nvap = fm_nva_xcreate(pcie_nv_buf, ERPT_DATA_SZ);
651 pcie_nvl = fm_nvlist_create(pcie_nvap);
652 mutex_init(&pcie_link_tq_mutex, NULL, MUTEX_DRIVER, NULL);
653
654 if ((rval = mod_install(&modlinkage)) != 0) {
655 mutex_destroy(&pcie_link_tq_mutex);
656 fm_nvlist_destroy(pcie_nvl, FM_NVA_RETAIN);
657 fm_nva_xdestroy(pcie_nvap);
658 kmem_free(pcie_nv_buf, ERPT_DATA_SZ);
659 }
660 return (rval);
661 }
662
663 int
_fini()664 _fini()
665 {
666 int rval;
667
668 if ((rval = mod_remove(&modlinkage)) == 0) {
669 if (pcie_link_tq != NULL) {
670 taskq_destroy(pcie_link_tq);
671 }
672 mutex_destroy(&pcie_link_tq_mutex);
673 fm_nvlist_destroy(pcie_nvl, FM_NVA_RETAIN);
674 fm_nva_xdestroy(pcie_nvap);
675 kmem_free(pcie_nv_buf, ERPT_DATA_SZ);
676 }
677 return (rval);
678 }
679
680 int
_info(struct modinfo * modinfop)681 _info(struct modinfo *modinfop)
682 {
683 return (mod_info(&modlinkage, modinfop));
684 }
685
686 /* ARGSUSED */
687 int
pcie_init(dev_info_t * dip,caddr_t arg)688 pcie_init(dev_info_t *dip, caddr_t arg)
689 {
690 int ret = DDI_SUCCESS;
691
692 /*
693 * Our _init function is too early to create a taskq. Create the pcie
694 * link management taskq here now instead.
695 */
696 mutex_enter(&pcie_link_tq_mutex);
697 if (pcie_link_tq == NULL) {
698 pcie_link_tq = taskq_create("pcie_link", 1, minclsyspri, 0, 0,
699 0);
700 }
701 mutex_exit(&pcie_link_tq_mutex);
702
703
704 /*
705 * Create a "devctl" minor node to support DEVCTL_DEVICE_*
706 * and DEVCTL_BUS_* ioctls to this bus.
707 */
708 if ((ret = ddi_create_minor_node(dip, "devctl", S_IFCHR,
709 PCI_MINOR_NUM(ddi_get_instance(dip), PCI_DEVCTL_MINOR),
710 DDI_NT_NEXUS, 0)) != DDI_SUCCESS) {
711 PCIE_DBG("Failed to create devctl minor node for %s%d\n",
712 ddi_driver_name(dip), ddi_get_instance(dip));
713
714 return (ret);
715 }
716
717 if ((ret = pcie_hp_init(dip, arg)) != DDI_SUCCESS) {
718 /*
719 * On some x86 platforms, we observed unexpected hotplug
720 * initialization failures in recent years. The known cause
721 * is a hardware issue: while the problem PCI bridges have
722 * the Hotplug Capable registers set, the machine actually
723 * does not implement the expected ACPI object.
724 *
725 * We don't want to stop PCI driver attach and system boot
726 * just because of this hotplug initialization failure.
727 * Continue with a debug message printed.
728 */
729 PCIE_DBG("%s%d: Failed setting hotplug framework\n",
730 ddi_driver_name(dip), ddi_get_instance(dip));
731
732 #if defined(__sparc)
733 ddi_remove_minor_node(dip, "devctl");
734
735 return (ret);
736 #endif /* defined(__sparc) */
737 }
738
739 return (DDI_SUCCESS);
740 }
741
742 /* ARGSUSED */
743 int
pcie_uninit(dev_info_t * dip)744 pcie_uninit(dev_info_t *dip)
745 {
746 int ret = DDI_SUCCESS;
747
748 if (pcie_ari_is_enabled(dip) == PCIE_ARI_FORW_ENABLED)
749 (void) pcie_ari_disable(dip);
750
751 if ((ret = pcie_hp_uninit(dip)) != DDI_SUCCESS) {
752 PCIE_DBG("Failed to uninitialize hotplug for %s%d\n",
753 ddi_driver_name(dip), ddi_get_instance(dip));
754
755 return (ret);
756 }
757
758 if (pcie_link_bw_supported(dip)) {
759 (void) pcie_link_bw_disable(dip);
760 }
761
762 ddi_remove_minor_node(dip, "devctl");
763
764 return (ret);
765 }
766
767 /*
768 * PCIe module interface for enabling hotplug interrupt.
769 *
770 * It should be called after pcie_init() is done and bus driver's
771 * interrupt handlers have being attached.
772 */
773 int
pcie_hpintr_enable(dev_info_t * dip)774 pcie_hpintr_enable(dev_info_t *dip)
775 {
776 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
777 pcie_hp_ctrl_t *ctrl_p = PCIE_GET_HP_CTRL(dip);
778
779 if (PCIE_IS_PCIE_HOTPLUG_ENABLED(bus_p)) {
780 (void) (ctrl_p->hc_ops.enable_hpc_intr)(ctrl_p);
781 } else if (PCIE_IS_PCI_HOTPLUG_ENABLED(bus_p)) {
782 (void) pcishpc_enable_irqs(ctrl_p);
783 }
784 return (DDI_SUCCESS);
785 }
786
787 /*
788 * PCIe module interface for disabling hotplug interrupt.
789 *
790 * It should be called before pcie_uninit() is called and bus driver's
791 * interrupt handlers is dettached.
792 */
793 int
pcie_hpintr_disable(dev_info_t * dip)794 pcie_hpintr_disable(dev_info_t *dip)
795 {
796 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
797 pcie_hp_ctrl_t *ctrl_p = PCIE_GET_HP_CTRL(dip);
798
799 if (PCIE_IS_PCIE_HOTPLUG_ENABLED(bus_p)) {
800 (void) (ctrl_p->hc_ops.disable_hpc_intr)(ctrl_p);
801 } else if (PCIE_IS_PCI_HOTPLUG_ENABLED(bus_p)) {
802 (void) pcishpc_disable_irqs(ctrl_p);
803 }
804 return (DDI_SUCCESS);
805 }
806
807 /* ARGSUSED */
808 int
pcie_intr(dev_info_t * dip)809 pcie_intr(dev_info_t *dip)
810 {
811 int hp, lbw;
812
813 hp = pcie_hp_intr(dip);
814 lbw = pcie_link_bw_intr(dip);
815
816 if (hp == DDI_INTR_CLAIMED || lbw == DDI_INTR_CLAIMED) {
817 return (DDI_INTR_CLAIMED);
818 }
819
820 return (DDI_INTR_UNCLAIMED);
821 }
822
823 /* ARGSUSED */
824 int
pcie_open(dev_info_t * dip,dev_t * devp,int flags,int otyp,cred_t * credp)825 pcie_open(dev_info_t *dip, dev_t *devp, int flags, int otyp, cred_t *credp)
826 {
827 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
828
829 /*
830 * Make sure the open is for the right file type.
831 */
832 if (otyp != OTYP_CHR)
833 return (EINVAL);
834
835 /*
836 * Handle the open by tracking the device state.
837 */
838 if ((bus_p->bus_soft_state == PCI_SOFT_STATE_OPEN_EXCL) ||
839 ((flags & FEXCL) &&
840 (bus_p->bus_soft_state != PCI_SOFT_STATE_CLOSED))) {
841 return (EBUSY);
842 }
843
844 if (flags & FEXCL)
845 bus_p->bus_soft_state = PCI_SOFT_STATE_OPEN_EXCL;
846 else
847 bus_p->bus_soft_state = PCI_SOFT_STATE_OPEN;
848
849 return (0);
850 }
851
852 /* ARGSUSED */
853 int
pcie_close(dev_info_t * dip,dev_t dev,int flags,int otyp,cred_t * credp)854 pcie_close(dev_info_t *dip, dev_t dev, int flags, int otyp, cred_t *credp)
855 {
856 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
857
858 if (otyp != OTYP_CHR)
859 return (EINVAL);
860
861 bus_p->bus_soft_state = PCI_SOFT_STATE_CLOSED;
862
863 return (0);
864 }
865
866 /* ARGSUSED */
867 int
pcie_ioctl(dev_info_t * dip,dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)868 pcie_ioctl(dev_info_t *dip, dev_t dev, int cmd, intptr_t arg, int mode,
869 cred_t *credp, int *rvalp)
870 {
871 struct devctl_iocdata *dcp;
872 uint_t bus_state;
873 int rv = DDI_SUCCESS;
874
875 /*
876 * We can use the generic implementation for devctl ioctl
877 */
878 switch (cmd) {
879 case DEVCTL_DEVICE_GETSTATE:
880 case DEVCTL_DEVICE_ONLINE:
881 case DEVCTL_DEVICE_OFFLINE:
882 case DEVCTL_BUS_GETSTATE:
883 return (ndi_devctl_ioctl(dip, cmd, arg, mode, 0));
884 default:
885 break;
886 }
887
888 /*
889 * read devctl ioctl data
890 */
891 if (ndi_dc_allochdl((void *)arg, &dcp) != NDI_SUCCESS)
892 return (EFAULT);
893
894 switch (cmd) {
895 case DEVCTL_BUS_QUIESCE:
896 if (ndi_get_bus_state(dip, &bus_state) == NDI_SUCCESS)
897 if (bus_state == BUS_QUIESCED)
898 break;
899 (void) ndi_set_bus_state(dip, BUS_QUIESCED);
900 break;
901 case DEVCTL_BUS_UNQUIESCE:
902 if (ndi_get_bus_state(dip, &bus_state) == NDI_SUCCESS)
903 if (bus_state == BUS_ACTIVE)
904 break;
905 (void) ndi_set_bus_state(dip, BUS_ACTIVE);
906 break;
907 case DEVCTL_BUS_RESET:
908 case DEVCTL_BUS_RESETALL:
909 case DEVCTL_DEVICE_RESET:
910 rv = ENOTSUP;
911 break;
912 default:
913 rv = ENOTTY;
914 }
915
916 ndi_dc_freehdl(dcp);
917 return (rv);
918 }
919
920 /* ARGSUSED */
921 int
pcie_prop_op(dev_t dev,dev_info_t * dip,ddi_prop_op_t prop_op,int flags,char * name,caddr_t valuep,int * lengthp)922 pcie_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
923 int flags, char *name, caddr_t valuep, int *lengthp)
924 {
925 if (dev == DDI_DEV_T_ANY)
926 goto skip;
927
928 if (PCIE_IS_HOTPLUG_CAPABLE(dip) &&
929 strcmp(name, "pci-occupant") == 0) {
930 int pci_dev = PCI_MINOR_NUM_TO_PCI_DEVNUM(getminor(dev));
931
932 pcie_hp_create_occupant_props(dip, dev, pci_dev);
933 }
934
935 skip:
936 return (ddi_prop_op(dev, dip, prop_op, flags, name, valuep, lengthp));
937 }
938
939 int
pcie_init_cfghdl(dev_info_t * cdip)940 pcie_init_cfghdl(dev_info_t *cdip)
941 {
942 pcie_bus_t *bus_p;
943 ddi_acc_handle_t eh = NULL;
944
945 bus_p = PCIE_DIP2BUS(cdip);
946 if (bus_p == NULL)
947 return (DDI_FAILURE);
948
949 /* Create an config access special to error handling */
950 if (pci_config_setup(cdip, &eh) != DDI_SUCCESS) {
951 cmn_err(CE_WARN, "Cannot setup config access"
952 " for BDF 0x%x\n", bus_p->bus_bdf);
953 return (DDI_FAILURE);
954 }
955
956 bus_p->bus_cfg_hdl = eh;
957 return (DDI_SUCCESS);
958 }
959
960 void
pcie_fini_cfghdl(dev_info_t * cdip)961 pcie_fini_cfghdl(dev_info_t *cdip)
962 {
963 pcie_bus_t *bus_p = PCIE_DIP2BUS(cdip);
964
965 pci_config_teardown(&bus_p->bus_cfg_hdl);
966 }
967
968 void
pcie_determine_serial(dev_info_t * dip)969 pcie_determine_serial(dev_info_t *dip)
970 {
971 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
972 ddi_acc_handle_t h;
973 uint16_t cap;
974 uchar_t serial[8];
975 uint32_t low, high;
976
977 if (!PCIE_IS_PCIE(bus_p))
978 return;
979
980 h = bus_p->bus_cfg_hdl;
981
982 if ((PCI_CAP_LOCATE(h, PCI_CAP_XCFG_SPC(PCIE_EXT_CAP_ID_SER), &cap)) ==
983 DDI_FAILURE)
984 return;
985
986 high = PCI_XCAP_GET32(h, 0, cap, PCIE_SER_SID_UPPER_DW);
987 low = PCI_XCAP_GET32(h, 0, cap, PCIE_SER_SID_LOWER_DW);
988
989 /*
990 * Here, we're trying to figure out if we had an invalid PCIe read. From
991 * looking at the contents of the value, it can be hard to tell the
992 * difference between a value that has all 1s correctly versus if we had
993 * an error. In this case, we only assume it's invalid if both register
994 * reads are invalid. We also only use 32-bit reads as we're not sure if
995 * all devices will support these as 64-bit reads, while we know that
996 * they'll support these as 32-bit reads.
997 */
998 if (high == PCI_EINVAL32 && low == PCI_EINVAL32)
999 return;
1000
1001 serial[0] = low & 0xff;
1002 serial[1] = (low >> 8) & 0xff;
1003 serial[2] = (low >> 16) & 0xff;
1004 serial[3] = (low >> 24) & 0xff;
1005 serial[4] = high & 0xff;
1006 serial[5] = (high >> 8) & 0xff;
1007 serial[6] = (high >> 16) & 0xff;
1008 serial[7] = (high >> 24) & 0xff;
1009
1010 (void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, dip, "pcie-serial",
1011 serial, sizeof (serial));
1012 }
1013
1014 static void
pcie_determine_aspm(dev_info_t * dip)1015 pcie_determine_aspm(dev_info_t *dip)
1016 {
1017 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1018 uint32_t linkcap;
1019 uint16_t linkctl;
1020
1021 if (!PCIE_IS_PCIE(bus_p))
1022 return;
1023
1024 linkcap = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP);
1025 linkctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL);
1026
1027 switch (linkcap & PCIE_LINKCAP_ASPM_SUP_MASK) {
1028 case PCIE_LINKCAP_ASPM_SUP_L0S:
1029 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1030 "pcie-aspm-support", "l0s");
1031 break;
1032 case PCIE_LINKCAP_ASPM_SUP_L1:
1033 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1034 "pcie-aspm-support", "l1");
1035 break;
1036 case PCIE_LINKCAP_ASPM_SUP_L0S_L1:
1037 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1038 "pcie-aspm-support", "l0s,l1");
1039 break;
1040 default:
1041 return;
1042 }
1043
1044 switch (linkctl & PCIE_LINKCTL_ASPM_CTL_MASK) {
1045 case PCIE_LINKCTL_ASPM_CTL_DIS:
1046 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1047 "pcie-aspm-state", "disabled");
1048 break;
1049 case PCIE_LINKCTL_ASPM_CTL_L0S:
1050 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1051 "pcie-aspm-state", "l0s");
1052 break;
1053 case PCIE_LINKCTL_ASPM_CTL_L1:
1054 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1055 "pcie-aspm-state", "l1");
1056 break;
1057 case PCIE_LINKCTL_ASPM_CTL_L0S_L1:
1058 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1059 "pcie-aspm-state", "l0s,l1");
1060 break;
1061 }
1062 }
1063
1064 /*
1065 * PCI-Express child device initialization. Note, this only will be called on a
1066 * device or function if we actually attach a device driver to it.
1067 *
1068 * This function enables generic pci-express interrupts and error handling.
1069 * Note, tagging, the max packet size, and related are all set up before this
1070 * point and is performed in pcie_fabric_setup().
1071 *
1072 * @param pdip root dip (root nexus's dip)
1073 * @param cdip child's dip (device's dip)
1074 * @return DDI_SUCCESS or DDI_FAILURE
1075 */
1076 /* ARGSUSED */
1077 int
pcie_initchild(dev_info_t * cdip)1078 pcie_initchild(dev_info_t *cdip)
1079 {
1080 uint16_t tmp16, reg16;
1081 pcie_bus_t *bus_p;
1082 uint32_t devid, venid;
1083
1084 bus_p = PCIE_DIP2BUS(cdip);
1085 if (bus_p == NULL) {
1086 PCIE_DBG("%s: BUS not found.\n",
1087 ddi_driver_name(cdip));
1088
1089 return (DDI_FAILURE);
1090 }
1091
1092 if (pcie_init_cfghdl(cdip) != DDI_SUCCESS)
1093 return (DDI_FAILURE);
1094
1095 /*
1096 * Update pcie_bus_t with real Vendor Id Device Id.
1097 *
1098 * For assigned devices in IOV environment, the OBP will return
1099 * faked device id/vendor id on configration read and for both
1100 * properties in root domain. translate_devid() function will
1101 * update the properties with real device-id/vendor-id on such
1102 * platforms, so that we can utilize the properties here to get
1103 * real device-id/vendor-id and overwrite the faked ids.
1104 *
1105 * For unassigned devices or devices in non-IOV environment, the
1106 * operation below won't make a difference.
1107 *
1108 * The IOV implementation only supports assignment of PCIE
1109 * endpoint devices. Devices under pci-pci bridges don't need
1110 * operation like this.
1111 */
1112 devid = ddi_prop_get_int(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1113 "device-id", -1);
1114 venid = ddi_prop_get_int(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1115 "vendor-id", -1);
1116 bus_p->bus_dev_ven_id = (devid << 16) | (venid & 0xffff);
1117
1118 /* Clear the device's status register */
1119 reg16 = PCIE_GET(16, bus_p, PCI_CONF_STAT);
1120 PCIE_PUT(16, bus_p, PCI_CONF_STAT, reg16);
1121
1122 /* Setup the device's command register */
1123 reg16 = PCIE_GET(16, bus_p, PCI_CONF_COMM);
1124 tmp16 = (reg16 & pcie_command_default_fw) | pcie_command_default;
1125
1126 if (pcie_serr_disable_flag && PCIE_IS_PCIE(bus_p))
1127 tmp16 &= ~PCI_COMM_SERR_ENABLE;
1128
1129 PCIE_PUT(16, bus_p, PCI_CONF_COMM, tmp16);
1130 PCIE_DBG_CFG(cdip, bus_p, "COMMAND", 16, PCI_CONF_COMM, reg16);
1131
1132 /*
1133 * If the device has a bus control register then program it
1134 * based on the settings in the command register.
1135 */
1136 if (PCIE_IS_BDG(bus_p)) {
1137 /* Clear the device's secondary status register */
1138 reg16 = PCIE_GET(16, bus_p, PCI_BCNF_SEC_STATUS);
1139 PCIE_PUT(16, bus_p, PCI_BCNF_SEC_STATUS, reg16);
1140
1141 /* Setup the device's secondary command register */
1142 reg16 = PCIE_GET(16, bus_p, PCI_BCNF_BCNTRL);
1143 tmp16 = (reg16 & pcie_bdg_command_default_fw);
1144
1145 tmp16 |= PCI_BCNF_BCNTRL_SERR_ENABLE;
1146 /*
1147 * Workaround for this Nvidia bridge. Don't enable the SERR
1148 * enable bit in the bridge control register as it could lead to
1149 * bogus NMIs.
1150 */
1151 if (bus_p->bus_dev_ven_id == 0x037010DE)
1152 tmp16 &= ~PCI_BCNF_BCNTRL_SERR_ENABLE;
1153
1154 if (pcie_command_default & PCI_COMM_PARITY_DETECT)
1155 tmp16 |= PCI_BCNF_BCNTRL_PARITY_ENABLE;
1156
1157 /*
1158 * Enable Master Abort Mode only if URs have not been masked.
1159 * For PCI and PCIe-PCI bridges, enabling this bit causes a
1160 * Master Aborts/UR to be forwarded as a UR/TA or SERR. If this
1161 * bit is masked, posted requests are dropped and non-posted
1162 * requests are returned with -1.
1163 */
1164 if (pcie_aer_uce_mask & PCIE_AER_UCE_UR)
1165 tmp16 &= ~PCI_BCNF_BCNTRL_MAST_AB_MODE;
1166 else
1167 tmp16 |= PCI_BCNF_BCNTRL_MAST_AB_MODE;
1168 PCIE_PUT(16, bus_p, PCI_BCNF_BCNTRL, tmp16);
1169 PCIE_DBG_CFG(cdip, bus_p, "SEC CMD", 16, PCI_BCNF_BCNTRL,
1170 reg16);
1171 }
1172
1173 if (PCIE_IS_PCIE(bus_p)) {
1174 /*
1175 * Get the device control register into an initial state that
1176 * makes sense. The maximum payload, tagging, and related will
1177 * be dealt with in pcie_fabric_setup().
1178 */
1179 reg16 = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL);
1180 tmp16 = (reg16 & pcie_devctl_default_mask) |
1181 (pcie_devctl_default & ~pcie_devctl_default_mask);
1182 PCIE_CAP_PUT(16, bus_p, PCIE_DEVCTL, tmp16);
1183 PCIE_DBG_CAP(cdip, bus_p, "DEVCTL", 16, PCIE_DEVCTL, reg16);
1184
1185 /* Enable PCIe errors */
1186 pcie_enable_errors(cdip);
1187
1188 pcie_determine_serial(cdip);
1189
1190 pcie_determine_aspm(cdip);
1191
1192 pcie_capture_speeds(cdip);
1193 }
1194
1195 bus_p->bus_ari = B_FALSE;
1196 if ((pcie_ari_is_enabled(ddi_get_parent(cdip))
1197 == PCIE_ARI_FORW_ENABLED) && (pcie_ari_device(cdip)
1198 == PCIE_ARI_DEVICE)) {
1199 bus_p->bus_ari = B_TRUE;
1200 }
1201
1202 return (DDI_SUCCESS);
1203 }
1204
1205 static void
pcie_init_pfd(dev_info_t * dip)1206 pcie_init_pfd(dev_info_t *dip)
1207 {
1208 pf_data_t *pfd_p = PCIE_ZALLOC(pf_data_t);
1209 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1210
1211 PCIE_DIP2PFD(dip) = pfd_p;
1212
1213 pfd_p->pe_bus_p = bus_p;
1214 pfd_p->pe_severity_flags = 0;
1215 pfd_p->pe_severity_mask = 0;
1216 pfd_p->pe_orig_severity_flags = 0;
1217 pfd_p->pe_lock = B_FALSE;
1218 pfd_p->pe_valid = B_FALSE;
1219
1220 /* Allocate the root fault struct for both RC and RP */
1221 if (PCIE_IS_ROOT(bus_p)) {
1222 PCIE_ROOT_FAULT(pfd_p) = PCIE_ZALLOC(pf_root_fault_t);
1223 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF;
1224 PCIE_ROOT_EH_SRC(pfd_p) = PCIE_ZALLOC(pf_root_eh_src_t);
1225 }
1226
1227 PCI_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pci_err_regs_t);
1228 PFD_AFFECTED_DEV(pfd_p) = PCIE_ZALLOC(pf_affected_dev_t);
1229 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
1230
1231 if (PCIE_IS_BDG(bus_p))
1232 PCI_BDG_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pci_bdg_err_regs_t);
1233
1234 if (PCIE_IS_PCIE(bus_p)) {
1235 PCIE_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_err_regs_t);
1236
1237 if (PCIE_IS_RP(bus_p))
1238 PCIE_RP_REG(pfd_p) =
1239 PCIE_ZALLOC(pf_pcie_rp_err_regs_t);
1240
1241 PCIE_ADV_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_adv_err_regs_t);
1242 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
1243
1244 if (PCIE_IS_RP(bus_p)) {
1245 PCIE_ADV_RP_REG(pfd_p) =
1246 PCIE_ZALLOC(pf_pcie_adv_rp_err_regs_t);
1247 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id =
1248 PCIE_INVALID_BDF;
1249 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id =
1250 PCIE_INVALID_BDF;
1251 } else if (PCIE_IS_PCIE_BDG(bus_p)) {
1252 PCIE_ADV_BDG_REG(pfd_p) =
1253 PCIE_ZALLOC(pf_pcie_adv_bdg_err_regs_t);
1254 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf =
1255 PCIE_INVALID_BDF;
1256 }
1257
1258 if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) {
1259 PCIX_BDG_ERR_REG(pfd_p) =
1260 PCIE_ZALLOC(pf_pcix_bdg_err_regs_t);
1261
1262 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
1263 PCIX_BDG_ECC_REG(pfd_p, 0) =
1264 PCIE_ZALLOC(pf_pcix_ecc_regs_t);
1265 PCIX_BDG_ECC_REG(pfd_p, 1) =
1266 PCIE_ZALLOC(pf_pcix_ecc_regs_t);
1267 }
1268 }
1269
1270 PCIE_SLOT_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_slot_regs_t);
1271 PCIE_SLOT_REG(pfd_p)->pcie_slot_regs_valid = B_FALSE;
1272 PCIE_SLOT_REG(pfd_p)->pcie_slot_cap = 0;
1273 PCIE_SLOT_REG(pfd_p)->pcie_slot_control = 0;
1274 PCIE_SLOT_REG(pfd_p)->pcie_slot_status = 0;
1275
1276 } else if (PCIE_IS_PCIX(bus_p)) {
1277 if (PCIE_IS_BDG(bus_p)) {
1278 PCIX_BDG_ERR_REG(pfd_p) =
1279 PCIE_ZALLOC(pf_pcix_bdg_err_regs_t);
1280
1281 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
1282 PCIX_BDG_ECC_REG(pfd_p, 0) =
1283 PCIE_ZALLOC(pf_pcix_ecc_regs_t);
1284 PCIX_BDG_ECC_REG(pfd_p, 1) =
1285 PCIE_ZALLOC(pf_pcix_ecc_regs_t);
1286 }
1287 } else {
1288 PCIX_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pcix_err_regs_t);
1289
1290 if (PCIX_ECC_VERSION_CHECK(bus_p))
1291 PCIX_ECC_REG(pfd_p) =
1292 PCIE_ZALLOC(pf_pcix_ecc_regs_t);
1293 }
1294 }
1295 }
1296
1297 static void
pcie_fini_pfd(dev_info_t * dip)1298 pcie_fini_pfd(dev_info_t *dip)
1299 {
1300 pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
1301 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1302
1303 if (PCIE_IS_PCIE(bus_p)) {
1304 if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) {
1305 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
1306 kmem_free(PCIX_BDG_ECC_REG(pfd_p, 0),
1307 sizeof (pf_pcix_ecc_regs_t));
1308 kmem_free(PCIX_BDG_ECC_REG(pfd_p, 1),
1309 sizeof (pf_pcix_ecc_regs_t));
1310 }
1311
1312 kmem_free(PCIX_BDG_ERR_REG(pfd_p),
1313 sizeof (pf_pcix_bdg_err_regs_t));
1314 }
1315
1316 if (PCIE_IS_RP(bus_p))
1317 kmem_free(PCIE_ADV_RP_REG(pfd_p),
1318 sizeof (pf_pcie_adv_rp_err_regs_t));
1319 else if (PCIE_IS_PCIE_BDG(bus_p))
1320 kmem_free(PCIE_ADV_BDG_REG(pfd_p),
1321 sizeof (pf_pcie_adv_bdg_err_regs_t));
1322
1323 kmem_free(PCIE_ADV_REG(pfd_p),
1324 sizeof (pf_pcie_adv_err_regs_t));
1325
1326 if (PCIE_IS_RP(bus_p))
1327 kmem_free(PCIE_RP_REG(pfd_p),
1328 sizeof (pf_pcie_rp_err_regs_t));
1329
1330 kmem_free(PCIE_ERR_REG(pfd_p), sizeof (pf_pcie_err_regs_t));
1331 } else if (PCIE_IS_PCIX(bus_p)) {
1332 if (PCIE_IS_BDG(bus_p)) {
1333 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
1334 kmem_free(PCIX_BDG_ECC_REG(pfd_p, 0),
1335 sizeof (pf_pcix_ecc_regs_t));
1336 kmem_free(PCIX_BDG_ECC_REG(pfd_p, 1),
1337 sizeof (pf_pcix_ecc_regs_t));
1338 }
1339
1340 kmem_free(PCIX_BDG_ERR_REG(pfd_p),
1341 sizeof (pf_pcix_bdg_err_regs_t));
1342 } else {
1343 if (PCIX_ECC_VERSION_CHECK(bus_p))
1344 kmem_free(PCIX_ECC_REG(pfd_p),
1345 sizeof (pf_pcix_ecc_regs_t));
1346
1347 kmem_free(PCIX_ERR_REG(pfd_p),
1348 sizeof (pf_pcix_err_regs_t));
1349 }
1350 }
1351
1352 if (PCIE_IS_BDG(bus_p))
1353 kmem_free(PCI_BDG_ERR_REG(pfd_p),
1354 sizeof (pf_pci_bdg_err_regs_t));
1355
1356 kmem_free(PFD_AFFECTED_DEV(pfd_p), sizeof (pf_affected_dev_t));
1357 kmem_free(PCI_ERR_REG(pfd_p), sizeof (pf_pci_err_regs_t));
1358
1359 if (PCIE_IS_ROOT(bus_p)) {
1360 kmem_free(PCIE_ROOT_FAULT(pfd_p), sizeof (pf_root_fault_t));
1361 kmem_free(PCIE_ROOT_EH_SRC(pfd_p), sizeof (pf_root_eh_src_t));
1362 }
1363
1364 kmem_free(PCIE_DIP2PFD(dip), sizeof (pf_data_t));
1365
1366 PCIE_DIP2PFD(dip) = NULL;
1367 }
1368
1369
1370 /*
1371 * Special functions to allocate pf_data_t's for PCIe root complexes.
1372 * Note: Root Complex not Root Port
1373 */
1374 void
pcie_rc_init_pfd(dev_info_t * dip,pf_data_t * pfd_p)1375 pcie_rc_init_pfd(dev_info_t *dip, pf_data_t *pfd_p)
1376 {
1377 pfd_p->pe_bus_p = PCIE_DIP2DOWNBUS(dip);
1378 pfd_p->pe_severity_flags = 0;
1379 pfd_p->pe_severity_mask = 0;
1380 pfd_p->pe_orig_severity_flags = 0;
1381 pfd_p->pe_lock = B_FALSE;
1382 pfd_p->pe_valid = B_FALSE;
1383
1384 PCIE_ROOT_FAULT(pfd_p) = PCIE_ZALLOC(pf_root_fault_t);
1385 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF;
1386 PCIE_ROOT_EH_SRC(pfd_p) = PCIE_ZALLOC(pf_root_eh_src_t);
1387 PCI_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pci_err_regs_t);
1388 PFD_AFFECTED_DEV(pfd_p) = PCIE_ZALLOC(pf_affected_dev_t);
1389 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
1390 PCI_BDG_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pci_bdg_err_regs_t);
1391 PCIE_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_err_regs_t);
1392 PCIE_RP_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_rp_err_regs_t);
1393 PCIE_ADV_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_adv_err_regs_t);
1394 PCIE_ADV_RP_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_adv_rp_err_regs_t);
1395 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id = PCIE_INVALID_BDF;
1396 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id = PCIE_INVALID_BDF;
1397
1398 PCIE_ADV_REG(pfd_p)->pcie_ue_sev = pcie_aer_uce_severity;
1399 }
1400
1401 void
pcie_rc_fini_pfd(pf_data_t * pfd_p)1402 pcie_rc_fini_pfd(pf_data_t *pfd_p)
1403 {
1404 kmem_free(PCIE_ADV_RP_REG(pfd_p), sizeof (pf_pcie_adv_rp_err_regs_t));
1405 kmem_free(PCIE_ADV_REG(pfd_p), sizeof (pf_pcie_adv_err_regs_t));
1406 kmem_free(PCIE_RP_REG(pfd_p), sizeof (pf_pcie_rp_err_regs_t));
1407 kmem_free(PCIE_ERR_REG(pfd_p), sizeof (pf_pcie_err_regs_t));
1408 kmem_free(PCI_BDG_ERR_REG(pfd_p), sizeof (pf_pci_bdg_err_regs_t));
1409 kmem_free(PFD_AFFECTED_DEV(pfd_p), sizeof (pf_affected_dev_t));
1410 kmem_free(PCI_ERR_REG(pfd_p), sizeof (pf_pci_err_regs_t));
1411 kmem_free(PCIE_ROOT_FAULT(pfd_p), sizeof (pf_root_fault_t));
1412 kmem_free(PCIE_ROOT_EH_SRC(pfd_p), sizeof (pf_root_eh_src_t));
1413 }
1414
1415 /*
1416 * init pcie_bus_t for root complex
1417 *
1418 * Only a few of the fields in bus_t is valid for root complex.
1419 * The fields that are bracketed are initialized in this routine:
1420 *
1421 * dev_info_t * <bus_dip>
1422 * dev_info_t * bus_rp_dip
1423 * ddi_acc_handle_t bus_cfg_hdl
1424 * uint_t <bus_fm_flags>
1425 * pcie_req_id_t bus_bdf
1426 * pcie_req_id_t bus_rp_bdf
1427 * uint32_t bus_dev_ven_id
1428 * uint8_t bus_rev_id
1429 * uint8_t <bus_hdr_type>
1430 * uint16_t <bus_dev_type>
1431 * uint8_t bus_bdg_secbus
1432 * uint16_t bus_pcie_off
1433 * uint16_t <bus_aer_off>
1434 * uint16_t bus_pcix_off
1435 * uint16_t bus_ecc_ver
1436 * pci_bus_range_t bus_bus_range
1437 * ppb_ranges_t * bus_addr_ranges
1438 * int bus_addr_entries
1439 * pci_regspec_t * bus_assigned_addr
1440 * int bus_assigned_entries
1441 * pf_data_t * bus_pfd
1442 * pcie_domain_t * <bus_dom>
1443 * int bus_mps
1444 * uint64_t bus_cfgacc_base
1445 * void * bus_plat_private
1446 */
1447 void
pcie_rc_init_bus(dev_info_t * dip)1448 pcie_rc_init_bus(dev_info_t *dip)
1449 {
1450 pcie_bus_t *bus_p;
1451
1452 bus_p = (pcie_bus_t *)kmem_zalloc(sizeof (pcie_bus_t), KM_SLEEP);
1453 bus_p->bus_dip = dip;
1454 bus_p->bus_dev_type = PCIE_PCIECAP_DEV_TYPE_RC_PSEUDO;
1455 bus_p->bus_hdr_type = PCI_HEADER_ONE;
1456
1457 /* Fake that there are AER logs */
1458 bus_p->bus_aer_off = (uint16_t)-1;
1459
1460 /* Needed only for handle lookup */
1461 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_READY);
1462
1463 ndi_set_bus_private(dip, B_FALSE, DEVI_PORT_TYPE_PCI, bus_p);
1464
1465 PCIE_BUS2DOM(bus_p) = PCIE_ZALLOC(pcie_domain_t);
1466 }
1467
1468 void
pcie_rc_fini_bus(dev_info_t * dip)1469 pcie_rc_fini_bus(dev_info_t *dip)
1470 {
1471 pcie_bus_t *bus_p = PCIE_DIP2DOWNBUS(dip);
1472 ndi_set_bus_private(dip, B_FALSE, 0, NULL);
1473 kmem_free(PCIE_BUS2DOM(bus_p), sizeof (pcie_domain_t));
1474 kmem_free(bus_p, sizeof (pcie_bus_t));
1475 }
1476
1477 static int
pcie_width_to_int(pcie_link_width_t width)1478 pcie_width_to_int(pcie_link_width_t width)
1479 {
1480 switch (width) {
1481 case PCIE_LINK_WIDTH_X1:
1482 return (1);
1483 case PCIE_LINK_WIDTH_X2:
1484 return (2);
1485 case PCIE_LINK_WIDTH_X4:
1486 return (4);
1487 case PCIE_LINK_WIDTH_X8:
1488 return (8);
1489 case PCIE_LINK_WIDTH_X12:
1490 return (12);
1491 case PCIE_LINK_WIDTH_X16:
1492 return (16);
1493 case PCIE_LINK_WIDTH_X32:
1494 return (32);
1495 default:
1496 return (0);
1497 }
1498 }
1499
1500 /*
1501 * Return the speed in Transfers / second. This is a signed quantity to match
1502 * the ndi/ddi property interfaces.
1503 */
1504 static int64_t
pcie_speed_to_int(pcie_link_speed_t speed)1505 pcie_speed_to_int(pcie_link_speed_t speed)
1506 {
1507 switch (speed) {
1508 case PCIE_LINK_SPEED_2_5:
1509 return (2500000000LL);
1510 case PCIE_LINK_SPEED_5:
1511 return (5000000000LL);
1512 case PCIE_LINK_SPEED_8:
1513 return (8000000000LL);
1514 case PCIE_LINK_SPEED_16:
1515 return (16000000000LL);
1516 case PCIE_LINK_SPEED_32:
1517 return (32000000000LL);
1518 case PCIE_LINK_SPEED_64:
1519 return (64000000000LL);
1520 default:
1521 return (0);
1522 }
1523 }
1524
1525 /*
1526 * Translate the recorded speed information into devinfo properties.
1527 */
1528 static void
pcie_speeds_to_devinfo(dev_info_t * dip,pcie_bus_t * bus_p)1529 pcie_speeds_to_devinfo(dev_info_t *dip, pcie_bus_t *bus_p)
1530 {
1531 if (bus_p->bus_max_width != PCIE_LINK_WIDTH_UNKNOWN) {
1532 (void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
1533 "pcie-link-maximum-width",
1534 pcie_width_to_int(bus_p->bus_max_width));
1535 }
1536
1537 if (bus_p->bus_cur_width != PCIE_LINK_WIDTH_UNKNOWN) {
1538 (void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
1539 "pcie-link-current-width",
1540 pcie_width_to_int(bus_p->bus_cur_width));
1541 }
1542
1543 if (bus_p->bus_cur_speed != PCIE_LINK_SPEED_UNKNOWN) {
1544 (void) ndi_prop_update_int64(DDI_DEV_T_NONE, dip,
1545 "pcie-link-current-speed",
1546 pcie_speed_to_int(bus_p->bus_cur_speed));
1547 }
1548
1549 if (bus_p->bus_max_speed != PCIE_LINK_SPEED_UNKNOWN) {
1550 (void) ndi_prop_update_int64(DDI_DEV_T_NONE, dip,
1551 "pcie-link-maximum-speed",
1552 pcie_speed_to_int(bus_p->bus_max_speed));
1553 }
1554
1555 if (bus_p->bus_target_speed != PCIE_LINK_SPEED_UNKNOWN) {
1556 (void) ndi_prop_update_int64(DDI_DEV_T_NONE, dip,
1557 "pcie-link-target-speed",
1558 pcie_speed_to_int(bus_p->bus_target_speed));
1559 }
1560
1561 if ((bus_p->bus_speed_flags & PCIE_LINK_F_ADMIN_TARGET) != 0) {
1562 (void) ndi_prop_create_boolean(DDI_DEV_T_NONE, dip,
1563 "pcie-link-admin-target-speed");
1564 }
1565
1566 if (bus_p->bus_sup_speed != PCIE_LINK_SPEED_UNKNOWN) {
1567 int64_t speeds[PCIE_NSPEEDS];
1568 uint_t nspeeds = 0;
1569
1570 if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_2_5) {
1571 speeds[nspeeds++] =
1572 pcie_speed_to_int(PCIE_LINK_SPEED_2_5);
1573 }
1574
1575 if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_5) {
1576 speeds[nspeeds++] =
1577 pcie_speed_to_int(PCIE_LINK_SPEED_5);
1578 }
1579
1580 if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_8) {
1581 speeds[nspeeds++] =
1582 pcie_speed_to_int(PCIE_LINK_SPEED_8);
1583 }
1584
1585 if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_16) {
1586 speeds[nspeeds++] =
1587 pcie_speed_to_int(PCIE_LINK_SPEED_16);
1588 }
1589
1590 if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_32) {
1591 speeds[nspeeds++] =
1592 pcie_speed_to_int(PCIE_LINK_SPEED_32);
1593 }
1594
1595 if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_64) {
1596 speeds[nspeeds++] =
1597 pcie_speed_to_int(PCIE_LINK_SPEED_64);
1598 }
1599
1600 (void) ndi_prop_update_int64_array(DDI_DEV_T_NONE, dip,
1601 "pcie-link-supported-speeds", speeds, nspeeds);
1602 }
1603 }
1604
1605 /*
1606 * We need to capture the supported, maximum, and current device speed and
1607 * width. The way that this has been done has changed over time.
1608 *
1609 * Prior to PCIe Gen 3, there were only current and supported speed fields.
1610 * These were found in the link status and link capabilities registers of the
1611 * PCI express capability. With the change to PCIe Gen 3, the information in the
1612 * link capabilities changed to the maximum value. The supported speeds vector
1613 * was moved to the link capabilities 2 register.
1614 *
1615 * Now, a device may not implement some of these registers. To determine whether
1616 * or not it's here, we have to do the following. First, we need to check the
1617 * revision of the PCI express capability. The link capabilities 2 register did
1618 * not exist prior to version 2 of this capability. If a modern device does not
1619 * implement it, it is supposed to return zero for the register.
1620 */
1621 static void
pcie_capture_speeds(dev_info_t * dip)1622 pcie_capture_speeds(dev_info_t *dip)
1623 {
1624 uint16_t vers, status;
1625 uint32_t cap, cap2, ctl2;
1626 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1627 dev_info_t *rcdip;
1628
1629 if (!PCIE_IS_PCIE(bus_p))
1630 return;
1631
1632 rcdip = pcie_get_rc_dip(dip);
1633 if (bus_p->bus_cfg_hdl == NULL) {
1634 vers = pci_cfgacc_get16(rcdip, bus_p->bus_bdf,
1635 bus_p->bus_pcie_off + PCIE_PCIECAP);
1636 } else {
1637 vers = PCIE_CAP_GET(16, bus_p, PCIE_PCIECAP);
1638 }
1639 if (vers == PCI_EINVAL16)
1640 return;
1641 vers &= PCIE_PCIECAP_VER_MASK;
1642
1643 /*
1644 * Verify the capability's version.
1645 */
1646 switch (vers) {
1647 case PCIE_PCIECAP_VER_1_0:
1648 cap2 = 0;
1649 ctl2 = 0;
1650 break;
1651 case PCIE_PCIECAP_VER_2_0:
1652 if (bus_p->bus_cfg_hdl == NULL) {
1653 cap2 = pci_cfgacc_get32(rcdip, bus_p->bus_bdf,
1654 bus_p->bus_pcie_off + PCIE_LINKCAP2);
1655 ctl2 = pci_cfgacc_get16(rcdip, bus_p->bus_bdf,
1656 bus_p->bus_pcie_off + PCIE_LINKCTL2);
1657 } else {
1658 cap2 = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP2);
1659 ctl2 = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL2);
1660 }
1661 if (cap2 == PCI_EINVAL32)
1662 cap2 = 0;
1663 if (ctl2 == PCI_EINVAL16)
1664 ctl2 = 0;
1665 break;
1666 default:
1667 /* Don't try and handle an unknown version */
1668 return;
1669 }
1670
1671 if (bus_p->bus_cfg_hdl == NULL) {
1672 status = pci_cfgacc_get16(rcdip, bus_p->bus_bdf,
1673 bus_p->bus_pcie_off + PCIE_LINKSTS);
1674 cap = pci_cfgacc_get32(rcdip, bus_p->bus_bdf,
1675 bus_p->bus_pcie_off + PCIE_LINKCAP);
1676 } else {
1677 status = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS);
1678 cap = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP);
1679 }
1680 if (status == PCI_EINVAL16 || cap == PCI_EINVAL32)
1681 return;
1682
1683 mutex_enter(&bus_p->bus_speed_mutex);
1684
1685 switch (status & PCIE_LINKSTS_SPEED_MASK) {
1686 case PCIE_LINKSTS_SPEED_2_5:
1687 bus_p->bus_cur_speed = PCIE_LINK_SPEED_2_5;
1688 break;
1689 case PCIE_LINKSTS_SPEED_5:
1690 bus_p->bus_cur_speed = PCIE_LINK_SPEED_5;
1691 break;
1692 case PCIE_LINKSTS_SPEED_8:
1693 bus_p->bus_cur_speed = PCIE_LINK_SPEED_8;
1694 break;
1695 case PCIE_LINKSTS_SPEED_16:
1696 bus_p->bus_cur_speed = PCIE_LINK_SPEED_16;
1697 break;
1698 case PCIE_LINKSTS_SPEED_32:
1699 bus_p->bus_cur_speed = PCIE_LINK_SPEED_32;
1700 break;
1701 case PCIE_LINKSTS_SPEED_64:
1702 bus_p->bus_cur_speed = PCIE_LINK_SPEED_64;
1703 break;
1704 default:
1705 bus_p->bus_cur_speed = PCIE_LINK_SPEED_UNKNOWN;
1706 break;
1707 }
1708
1709 switch (status & PCIE_LINKSTS_NEG_WIDTH_MASK) {
1710 case PCIE_LINKSTS_NEG_WIDTH_X1:
1711 bus_p->bus_cur_width = PCIE_LINK_WIDTH_X1;
1712 break;
1713 case PCIE_LINKSTS_NEG_WIDTH_X2:
1714 bus_p->bus_cur_width = PCIE_LINK_WIDTH_X2;
1715 break;
1716 case PCIE_LINKSTS_NEG_WIDTH_X4:
1717 bus_p->bus_cur_width = PCIE_LINK_WIDTH_X4;
1718 break;
1719 case PCIE_LINKSTS_NEG_WIDTH_X8:
1720 bus_p->bus_cur_width = PCIE_LINK_WIDTH_X8;
1721 break;
1722 case PCIE_LINKSTS_NEG_WIDTH_X12:
1723 bus_p->bus_cur_width = PCIE_LINK_WIDTH_X12;
1724 break;
1725 case PCIE_LINKSTS_NEG_WIDTH_X16:
1726 bus_p->bus_cur_width = PCIE_LINK_WIDTH_X16;
1727 break;
1728 case PCIE_LINKSTS_NEG_WIDTH_X32:
1729 bus_p->bus_cur_width = PCIE_LINK_WIDTH_X32;
1730 break;
1731 default:
1732 bus_p->bus_cur_width = PCIE_LINK_WIDTH_UNKNOWN;
1733 break;
1734 }
1735
1736 switch (cap & PCIE_LINKCAP_MAX_WIDTH_MASK) {
1737 case PCIE_LINKCAP_MAX_WIDTH_X1:
1738 bus_p->bus_max_width = PCIE_LINK_WIDTH_X1;
1739 break;
1740 case PCIE_LINKCAP_MAX_WIDTH_X2:
1741 bus_p->bus_max_width = PCIE_LINK_WIDTH_X2;
1742 break;
1743 case PCIE_LINKCAP_MAX_WIDTH_X4:
1744 bus_p->bus_max_width = PCIE_LINK_WIDTH_X4;
1745 break;
1746 case PCIE_LINKCAP_MAX_WIDTH_X8:
1747 bus_p->bus_max_width = PCIE_LINK_WIDTH_X8;
1748 break;
1749 case PCIE_LINKCAP_MAX_WIDTH_X12:
1750 bus_p->bus_max_width = PCIE_LINK_WIDTH_X12;
1751 break;
1752 case PCIE_LINKCAP_MAX_WIDTH_X16:
1753 bus_p->bus_max_width = PCIE_LINK_WIDTH_X16;
1754 break;
1755 case PCIE_LINKCAP_MAX_WIDTH_X32:
1756 bus_p->bus_max_width = PCIE_LINK_WIDTH_X32;
1757 break;
1758 default:
1759 bus_p->bus_max_width = PCIE_LINK_WIDTH_UNKNOWN;
1760 break;
1761 }
1762
1763 /*
1764 * If we have the Link Capabilities 2, then we can get the supported
1765 * speeds from it and treat the bits in Link Capabilities 1 as the
1766 * maximum. If we don't, then we need to follow the Implementation Note
1767 * in the standard under Link Capabilities 2. Effectively, this means
1768 * that if the value of 10b is set in Link Capabilities register, that
1769 * it supports both 2.5 and 5 GT/s speeds.
1770 */
1771 if (cap2 != 0) {
1772 if (cap2 & PCIE_LINKCAP2_SPEED_2_5)
1773 bus_p->bus_sup_speed |= PCIE_LINK_SPEED_2_5;
1774 if (cap2 & PCIE_LINKCAP2_SPEED_5)
1775 bus_p->bus_sup_speed |= PCIE_LINK_SPEED_5;
1776 if (cap2 & PCIE_LINKCAP2_SPEED_8)
1777 bus_p->bus_sup_speed |= PCIE_LINK_SPEED_8;
1778 if (cap2 & PCIE_LINKCAP2_SPEED_16)
1779 bus_p->bus_sup_speed |= PCIE_LINK_SPEED_16;
1780 if (cap2 & PCIE_LINKCAP2_SPEED_32)
1781 bus_p->bus_sup_speed |= PCIE_LINK_SPEED_32;
1782 if (cap2 & PCIE_LINKCAP2_SPEED_64)
1783 bus_p->bus_sup_speed |= PCIE_LINK_SPEED_64;
1784
1785 switch (cap & PCIE_LINKCAP_MAX_SPEED_MASK) {
1786 case PCIE_LINKCAP_MAX_SPEED_2_5:
1787 bus_p->bus_max_speed = PCIE_LINK_SPEED_2_5;
1788 break;
1789 case PCIE_LINKCAP_MAX_SPEED_5:
1790 bus_p->bus_max_speed = PCIE_LINK_SPEED_5;
1791 break;
1792 case PCIE_LINKCAP_MAX_SPEED_8:
1793 bus_p->bus_max_speed = PCIE_LINK_SPEED_8;
1794 break;
1795 case PCIE_LINKCAP_MAX_SPEED_16:
1796 bus_p->bus_max_speed = PCIE_LINK_SPEED_16;
1797 break;
1798 case PCIE_LINKCAP_MAX_SPEED_32:
1799 bus_p->bus_max_speed = PCIE_LINK_SPEED_32;
1800 break;
1801 case PCIE_LINKCAP_MAX_SPEED_64:
1802 bus_p->bus_max_speed = PCIE_LINK_SPEED_64;
1803 break;
1804 default:
1805 bus_p->bus_max_speed = PCIE_LINK_SPEED_UNKNOWN;
1806 break;
1807 }
1808 } else {
1809 if (cap & PCIE_LINKCAP_MAX_SPEED_5) {
1810 bus_p->bus_max_speed = PCIE_LINK_SPEED_5;
1811 bus_p->bus_sup_speed = PCIE_LINK_SPEED_2_5 |
1812 PCIE_LINK_SPEED_5;
1813 } else if (cap & PCIE_LINKCAP_MAX_SPEED_2_5) {
1814 bus_p->bus_max_speed = PCIE_LINK_SPEED_2_5;
1815 bus_p->bus_sup_speed = PCIE_LINK_SPEED_2_5;
1816 }
1817 }
1818
1819 switch (ctl2 & PCIE_LINKCTL2_TARGET_SPEED_MASK) {
1820 case PCIE_LINKCTL2_TARGET_SPEED_2_5:
1821 bus_p->bus_target_speed = PCIE_LINK_SPEED_2_5;
1822 break;
1823 case PCIE_LINKCTL2_TARGET_SPEED_5:
1824 bus_p->bus_target_speed = PCIE_LINK_SPEED_5;
1825 break;
1826 case PCIE_LINKCTL2_TARGET_SPEED_8:
1827 bus_p->bus_target_speed = PCIE_LINK_SPEED_8;
1828 break;
1829 case PCIE_LINKCTL2_TARGET_SPEED_16:
1830 bus_p->bus_target_speed = PCIE_LINK_SPEED_16;
1831 break;
1832 case PCIE_LINKCTL2_TARGET_SPEED_32:
1833 bus_p->bus_target_speed = PCIE_LINK_SPEED_32;
1834 break;
1835 case PCIE_LINKCTL2_TARGET_SPEED_64:
1836 bus_p->bus_target_speed = PCIE_LINK_SPEED_64;
1837 break;
1838 default:
1839 bus_p->bus_target_speed = PCIE_LINK_SPEED_UNKNOWN;
1840 break;
1841 }
1842
1843 pcie_speeds_to_devinfo(dip, bus_p);
1844 mutex_exit(&bus_p->bus_speed_mutex);
1845 }
1846
1847 /*
1848 * partially init pcie_bus_t for device (dip,bdf) for accessing pci
1849 * config space
1850 *
1851 * This routine is invoked during boot, either after creating a devinfo node
1852 * (x86 case) or during px driver attach (sparc case); it is also invoked
1853 * in hotplug context after a devinfo node is created.
1854 *
1855 * The fields that are bracketed are initialized if flag PCIE_BUS_INITIAL
1856 * is set:
1857 *
1858 * dev_info_t * <bus_dip>
1859 * dev_info_t * <bus_rp_dip>
1860 * ddi_acc_handle_t bus_cfg_hdl
1861 * uint_t bus_fm_flags
1862 * pcie_req_id_t <bus_bdf>
1863 * pcie_req_id_t <bus_rp_bdf>
1864 * uint32_t <bus_dev_ven_id>
1865 * uint8_t <bus_rev_id>
1866 * uint8_t <bus_hdr_type>
1867 * uint16_t <bus_dev_type>
1868 * uint8_t <bus_bdg_secbus
1869 * uint16_t <bus_pcie_off>
1870 * uint16_t <bus_aer_off>
1871 * uint16_t <bus_pcix_off>
1872 * uint16_t <bus_ecc_ver>
1873 * pci_bus_range_t bus_bus_range
1874 * ppb_ranges_t * bus_addr_ranges
1875 * int bus_addr_entries
1876 * pci_regspec_t * bus_assigned_addr
1877 * int bus_assigned_entries
1878 * pf_data_t * bus_pfd
1879 * pcie_domain_t * bus_dom
1880 * int bus_mps
1881 * uint64_t bus_cfgacc_base
1882 * void * bus_plat_private
1883 *
1884 * The fields that are bracketed are initialized if flag PCIE_BUS_FINAL
1885 * is set:
1886 *
1887 * dev_info_t * bus_dip
1888 * dev_info_t * bus_rp_dip
1889 * ddi_acc_handle_t bus_cfg_hdl
1890 * uint_t bus_fm_flags
1891 * pcie_req_id_t bus_bdf
1892 * pcie_req_id_t bus_rp_bdf
1893 * uint32_t bus_dev_ven_id
1894 * uint8_t bus_rev_id
1895 * uint8_t bus_hdr_type
1896 * uint16_t bus_dev_type
1897 * uint8_t <bus_bdg_secbus>
1898 * uint16_t bus_pcie_off
1899 * uint16_t bus_aer_off
1900 * uint16_t bus_pcix_off
1901 * uint16_t bus_ecc_ver
1902 * pci_bus_range_t <bus_bus_range>
1903 * ppb_ranges_t * <bus_addr_ranges>
1904 * int <bus_addr_entries>
1905 * pci_regspec_t * <bus_assigned_addr>
1906 * int <bus_assigned_entries>
1907 * pf_data_t * <bus_pfd>
1908 * pcie_domain_t * bus_dom
1909 * int bus_mps
1910 * uint64_t bus_cfgacc_base
1911 * void * <bus_plat_private>
1912 */
1913
1914 pcie_bus_t *
pcie_init_bus(dev_info_t * dip,pcie_req_id_t bdf,uint8_t flags)1915 pcie_init_bus(dev_info_t *dip, pcie_req_id_t bdf, uint8_t flags)
1916 {
1917 uint16_t status, base, baseptr, num_cap;
1918 uint32_t capid;
1919 int range_size;
1920 pcie_bus_t *bus_p = NULL;
1921 dev_info_t *rcdip;
1922 dev_info_t *pdip;
1923 const char *errstr = NULL;
1924
1925 if (!(flags & PCIE_BUS_INITIAL))
1926 goto initial_done;
1927
1928 bus_p = kmem_zalloc(sizeof (pcie_bus_t), KM_SLEEP);
1929
1930 bus_p->bus_dip = dip;
1931 bus_p->bus_bdf = bdf;
1932
1933 rcdip = pcie_get_rc_dip(dip);
1934 ASSERT(rcdip != NULL);
1935
1936 /* Save the Vendor ID, Device ID and revision ID */
1937 bus_p->bus_dev_ven_id = pci_cfgacc_get32(rcdip, bdf, PCI_CONF_VENID);
1938 bus_p->bus_rev_id = pci_cfgacc_get8(rcdip, bdf, PCI_CONF_REVID);
1939 /* Save the Header Type */
1940 bus_p->bus_hdr_type = pci_cfgacc_get8(rcdip, bdf, PCI_CONF_HEADER);
1941 bus_p->bus_hdr_type &= PCI_HEADER_TYPE_M;
1942
1943 /*
1944 * Figure out the device type and all the relavant capability offsets
1945 */
1946 /* set default value */
1947 bus_p->bus_dev_type = PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO;
1948
1949 status = pci_cfgacc_get16(rcdip, bdf, PCI_CONF_STAT);
1950 if (status == PCI_CAP_EINVAL16 || !(status & PCI_STAT_CAP))
1951 goto caps_done; /* capability not supported */
1952
1953 /* Relevant conventional capabilities first */
1954
1955 /* Conventional caps: PCI_CAP_ID_PCI_E, PCI_CAP_ID_PCIX */
1956 num_cap = 2;
1957
1958 switch (bus_p->bus_hdr_type) {
1959 case PCI_HEADER_ZERO:
1960 baseptr = PCI_CONF_CAP_PTR;
1961 break;
1962 case PCI_HEADER_PPB:
1963 baseptr = PCI_BCNF_CAP_PTR;
1964 break;
1965 case PCI_HEADER_CARDBUS:
1966 baseptr = PCI_CBUS_CAP_PTR;
1967 break;
1968 default:
1969 cmn_err(CE_WARN, "%s: unexpected pci header type:%x",
1970 __func__, bus_p->bus_hdr_type);
1971 goto caps_done;
1972 }
1973
1974 base = baseptr;
1975 for (base = pci_cfgacc_get8(rcdip, bdf, base); base && num_cap;
1976 base = pci_cfgacc_get8(rcdip, bdf, base + PCI_CAP_NEXT_PTR)) {
1977 capid = pci_cfgacc_get8(rcdip, bdf, base);
1978 uint16_t pcap;
1979
1980 switch (capid) {
1981 case PCI_CAP_ID_PCI_E:
1982 bus_p->bus_pcie_off = base;
1983 pcap = pci_cfgacc_get16(rcdip, bdf, base +
1984 PCIE_PCIECAP);
1985 bus_p->bus_dev_type = pcap & PCIE_PCIECAP_DEV_TYPE_MASK;
1986 bus_p->bus_pcie_vers = pcap & PCIE_PCIECAP_VER_MASK;
1987
1988 /* Check and save PCIe hotplug capability information */
1989 if ((PCIE_IS_RP(bus_p) || PCIE_IS_SWD(bus_p)) &&
1990 (pci_cfgacc_get16(rcdip, bdf, base + PCIE_PCIECAP)
1991 & PCIE_PCIECAP_SLOT_IMPL) &&
1992 (pci_cfgacc_get32(rcdip, bdf, base + PCIE_SLOTCAP)
1993 & PCIE_SLOTCAP_HP_CAPABLE))
1994 bus_p->bus_hp_sup_modes |= PCIE_NATIVE_HP_MODE;
1995
1996 num_cap--;
1997 break;
1998 case PCI_CAP_ID_PCIX:
1999 bus_p->bus_pcix_off = base;
2000 if (PCIE_IS_BDG(bus_p))
2001 bus_p->bus_ecc_ver =
2002 pci_cfgacc_get16(rcdip, bdf, base +
2003 PCI_PCIX_SEC_STATUS) & PCI_PCIX_VER_MASK;
2004 else
2005 bus_p->bus_ecc_ver =
2006 pci_cfgacc_get16(rcdip, bdf, base +
2007 PCI_PCIX_COMMAND) & PCI_PCIX_VER_MASK;
2008 num_cap--;
2009 break;
2010 default:
2011 break;
2012 }
2013 }
2014
2015 /* Check and save PCI hotplug (SHPC) capability information */
2016 if (PCIE_IS_BDG(bus_p)) {
2017 base = baseptr;
2018 for (base = pci_cfgacc_get8(rcdip, bdf, base);
2019 base; base = pci_cfgacc_get8(rcdip, bdf,
2020 base + PCI_CAP_NEXT_PTR)) {
2021 capid = pci_cfgacc_get8(rcdip, bdf, base);
2022 if (capid == PCI_CAP_ID_PCI_HOTPLUG) {
2023 bus_p->bus_pci_hp_off = base;
2024 bus_p->bus_hp_sup_modes |= PCIE_PCI_HP_MODE;
2025 break;
2026 }
2027 }
2028 }
2029
2030 /* Then, relevant extended capabilities */
2031
2032 if (!PCIE_IS_PCIE(bus_p))
2033 goto caps_done;
2034
2035 /* Extended caps: PCIE_EXT_CAP_ID_AER */
2036 for (base = PCIE_EXT_CAP; base; base = (capid >>
2037 PCIE_EXT_CAP_NEXT_PTR_SHIFT) & PCIE_EXT_CAP_NEXT_PTR_MASK) {
2038 capid = pci_cfgacc_get32(rcdip, bdf, base);
2039 if (capid == PCI_CAP_EINVAL32)
2040 break;
2041 switch ((capid >> PCIE_EXT_CAP_ID_SHIFT) &
2042 PCIE_EXT_CAP_ID_MASK) {
2043 case PCIE_EXT_CAP_ID_AER:
2044 bus_p->bus_aer_off = base;
2045 break;
2046 case PCIE_EXT_CAP_ID_DEV3:
2047 bus_p->bus_dev3_off = base;
2048 break;
2049 }
2050 }
2051
2052 caps_done:
2053 /* save RP dip and RP bdf */
2054 if (PCIE_IS_RP(bus_p)) {
2055 bus_p->bus_rp_dip = dip;
2056 bus_p->bus_rp_bdf = bus_p->bus_bdf;
2057
2058 bus_p->bus_fab = PCIE_ZALLOC(pcie_fabric_data_t);
2059 } else {
2060 for (pdip = ddi_get_parent(dip); pdip;
2061 pdip = ddi_get_parent(pdip)) {
2062 pcie_bus_t *parent_bus_p = PCIE_DIP2BUS(pdip);
2063
2064 /*
2065 * If RP dip and RP bdf in parent's bus_t have
2066 * been initialized, simply use these instead of
2067 * continuing up to the RC.
2068 */
2069 if (parent_bus_p->bus_rp_dip != NULL) {
2070 bus_p->bus_rp_dip = parent_bus_p->bus_rp_dip;
2071 bus_p->bus_rp_bdf = parent_bus_p->bus_rp_bdf;
2072 break;
2073 }
2074
2075 /*
2076 * When debugging be aware that some NVIDIA x86
2077 * architectures have 2 nodes for each RP, One at Bus
2078 * 0x0 and one at Bus 0x80. The requester is from Bus
2079 * 0x80
2080 */
2081 if (PCIE_IS_ROOT(parent_bus_p)) {
2082 bus_p->bus_rp_dip = pdip;
2083 bus_p->bus_rp_bdf = parent_bus_p->bus_bdf;
2084 break;
2085 }
2086 }
2087 }
2088
2089 bus_p->bus_soft_state = PCI_SOFT_STATE_CLOSED;
2090 (void) atomic_swap_uint(&bus_p->bus_fm_flags, 0);
2091
2092 ndi_set_bus_private(dip, B_TRUE, DEVI_PORT_TYPE_PCI, (void *)bus_p);
2093
2094 if (PCIE_IS_HOTPLUG_CAPABLE(dip))
2095 (void) ndi_prop_create_boolean(DDI_DEV_T_NONE, dip,
2096 "hotplug-capable");
2097
2098 initial_done:
2099 if (!(flags & PCIE_BUS_FINAL))
2100 goto final_done;
2101
2102 /* already initialized? */
2103 bus_p = PCIE_DIP2BUS(dip);
2104
2105 /* Save the Range information if device is a switch/bridge */
2106 if (PCIE_IS_BDG(bus_p)) {
2107 /* get "bus_range" property */
2108 range_size = sizeof (pci_bus_range_t);
2109 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2110 "bus-range", (caddr_t)&bus_p->bus_bus_range, &range_size)
2111 != DDI_PROP_SUCCESS) {
2112 errstr = "Cannot find \"bus-range\" property";
2113 cmn_err(CE_WARN,
2114 "PCIE init err info failed BDF 0x%x:%s\n",
2115 bus_p->bus_bdf, errstr);
2116 }
2117
2118 /* get secondary bus number */
2119 rcdip = pcie_get_rc_dip(dip);
2120 ASSERT(rcdip != NULL);
2121
2122 bus_p->bus_bdg_secbus = pci_cfgacc_get8(rcdip,
2123 bus_p->bus_bdf, PCI_BCNF_SECBUS);
2124
2125 /* Get "ranges" property */
2126 if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2127 "ranges", (caddr_t)&bus_p->bus_addr_ranges,
2128 &bus_p->bus_addr_entries) != DDI_PROP_SUCCESS)
2129 bus_p->bus_addr_entries = 0;
2130 bus_p->bus_addr_entries /= sizeof (ppb_ranges_t);
2131 }
2132
2133 /* save "assigned-addresses" property array, ignore failues */
2134 if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2135 "assigned-addresses", (caddr_t)&bus_p->bus_assigned_addr,
2136 &bus_p->bus_assigned_entries) == DDI_PROP_SUCCESS)
2137 bus_p->bus_assigned_entries /= sizeof (pci_regspec_t);
2138 else
2139 bus_p->bus_assigned_entries = 0;
2140
2141 pcie_init_pfd(dip);
2142
2143 pcie_init_plat(dip);
2144
2145 pcie_capture_speeds(dip);
2146
2147 final_done:
2148
2149 PCIE_DBG("Add %s(dip 0x%p, bdf 0x%x, secbus 0x%x)\n",
2150 ddi_driver_name(dip), (void *)dip, bus_p->bus_bdf,
2151 bus_p->bus_bdg_secbus);
2152 #ifdef DEBUG
2153 if (bus_p != NULL) {
2154 pcie_print_bus(bus_p);
2155 }
2156 #endif
2157
2158 return (bus_p);
2159 }
2160
2161 /*
2162 * Invoked before destroying devinfo node, mostly during hotplug
2163 * operation to free pcie_bus_t data structure
2164 */
2165 /* ARGSUSED */
2166 void
pcie_fini_bus(dev_info_t * dip,uint8_t flags)2167 pcie_fini_bus(dev_info_t *dip, uint8_t flags)
2168 {
2169 pcie_bus_t *bus_p = PCIE_DIP2UPBUS(dip);
2170 ASSERT(bus_p);
2171
2172 if (flags & PCIE_BUS_INITIAL) {
2173 pcie_fini_plat(dip);
2174 pcie_fini_pfd(dip);
2175
2176 if (PCIE_IS_RP(bus_p)) {
2177 kmem_free(bus_p->bus_fab, sizeof (pcie_fabric_data_t));
2178 bus_p->bus_fab = NULL;
2179 }
2180
2181 kmem_free(bus_p->bus_assigned_addr,
2182 (sizeof (pci_regspec_t) * bus_p->bus_assigned_entries));
2183 kmem_free(bus_p->bus_addr_ranges,
2184 (sizeof (ppb_ranges_t) * bus_p->bus_addr_entries));
2185 /* zero out the fields that have been destroyed */
2186 bus_p->bus_assigned_addr = NULL;
2187 bus_p->bus_addr_ranges = NULL;
2188 bus_p->bus_assigned_entries = 0;
2189 bus_p->bus_addr_entries = 0;
2190 }
2191
2192 if (flags & PCIE_BUS_FINAL) {
2193 if (PCIE_IS_HOTPLUG_CAPABLE(dip)) {
2194 (void) ndi_prop_remove(DDI_DEV_T_NONE, dip,
2195 "hotplug-capable");
2196 }
2197
2198 ndi_set_bus_private(dip, B_TRUE, 0, NULL);
2199 kmem_free(bus_p, sizeof (pcie_bus_t));
2200 }
2201 }
2202
2203 int
pcie_postattach_child(dev_info_t * cdip)2204 pcie_postattach_child(dev_info_t *cdip)
2205 {
2206 pcie_bus_t *bus_p = PCIE_DIP2BUS(cdip);
2207
2208 if (!bus_p)
2209 return (DDI_FAILURE);
2210
2211 return (pcie_enable_ce(cdip));
2212 }
2213
2214 /*
2215 * PCI-Express child device de-initialization.
2216 * This function disables generic pci-express interrupts and error
2217 * handling.
2218 */
2219 void
pcie_uninitchild(dev_info_t * cdip)2220 pcie_uninitchild(dev_info_t *cdip)
2221 {
2222 pcie_disable_errors(cdip);
2223 pcie_fini_cfghdl(cdip);
2224 pcie_fini_dom(cdip);
2225 }
2226
2227 /*
2228 * find the root complex dip
2229 */
2230 dev_info_t *
pcie_get_rc_dip(dev_info_t * dip)2231 pcie_get_rc_dip(dev_info_t *dip)
2232 {
2233 dev_info_t *rcdip;
2234 pcie_bus_t *rc_bus_p;
2235
2236 for (rcdip = ddi_get_parent(dip); rcdip;
2237 rcdip = ddi_get_parent(rcdip)) {
2238 rc_bus_p = PCIE_DIP2BUS(rcdip);
2239 if (rc_bus_p && PCIE_IS_RC(rc_bus_p))
2240 break;
2241 }
2242
2243 return (rcdip);
2244 }
2245
2246 boolean_t
pcie_is_pci_device(dev_info_t * dip)2247 pcie_is_pci_device(dev_info_t *dip)
2248 {
2249 dev_info_t *pdip;
2250 char *device_type;
2251
2252 pdip = ddi_get_parent(dip);
2253 if (pdip == NULL)
2254 return (B_FALSE);
2255
2256 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, DDI_PROP_DONTPASS,
2257 "device_type", &device_type) != DDI_PROP_SUCCESS)
2258 return (B_FALSE);
2259
2260 if (strcmp(device_type, "pciex") != 0 &&
2261 strcmp(device_type, "pci") != 0) {
2262 ddi_prop_free(device_type);
2263 return (B_FALSE);
2264 }
2265
2266 ddi_prop_free(device_type);
2267 return (B_TRUE);
2268 }
2269
2270 typedef struct {
2271 boolean_t init;
2272 uint8_t flags;
2273 } pcie_bus_arg_t;
2274
2275 /*ARGSUSED*/
2276 static int
pcie_fab_do_init_fini(dev_info_t * dip,void * arg)2277 pcie_fab_do_init_fini(dev_info_t *dip, void *arg)
2278 {
2279 pcie_req_id_t bdf;
2280 pcie_bus_arg_t *bus_arg = (pcie_bus_arg_t *)arg;
2281
2282 if (!pcie_is_pci_device(dip))
2283 goto out;
2284
2285 if (bus_arg->init) {
2286 if (pcie_get_bdf_from_dip(dip, &bdf) != DDI_SUCCESS)
2287 goto out;
2288
2289 (void) pcie_init_bus(dip, bdf, bus_arg->flags);
2290 } else {
2291 (void) pcie_fini_bus(dip, bus_arg->flags);
2292 }
2293
2294 return (DDI_WALK_CONTINUE);
2295
2296 out:
2297 return (DDI_WALK_PRUNECHILD);
2298 }
2299
2300 void
pcie_fab_init_bus(dev_info_t * rcdip,uint8_t flags)2301 pcie_fab_init_bus(dev_info_t *rcdip, uint8_t flags)
2302 {
2303 dev_info_t *dip = ddi_get_child(rcdip);
2304 pcie_bus_arg_t arg;
2305
2306 arg.init = B_TRUE;
2307 arg.flags = flags;
2308
2309 ndi_devi_enter(rcdip);
2310 ddi_walk_devs(dip, pcie_fab_do_init_fini, &arg);
2311 ndi_devi_exit(rcdip);
2312 }
2313
2314 void
pcie_fab_fini_bus(dev_info_t * rcdip,uint8_t flags)2315 pcie_fab_fini_bus(dev_info_t *rcdip, uint8_t flags)
2316 {
2317 dev_info_t *dip = ddi_get_child(rcdip);
2318 pcie_bus_arg_t arg;
2319
2320 arg.init = B_FALSE;
2321 arg.flags = flags;
2322
2323 ndi_devi_enter(rcdip);
2324 ddi_walk_devs(dip, pcie_fab_do_init_fini, &arg);
2325 ndi_devi_exit(rcdip);
2326 }
2327
2328 void
pcie_enable_errors(dev_info_t * dip)2329 pcie_enable_errors(dev_info_t *dip)
2330 {
2331 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2332 uint16_t reg16, tmp16;
2333 uint32_t reg32, tmp32;
2334
2335 ASSERT(bus_p);
2336
2337 /*
2338 * Clear any pending errors
2339 */
2340 pcie_clear_errors(dip);
2341
2342 if (!PCIE_IS_PCIE(bus_p))
2343 return;
2344
2345 /*
2346 * Enable Baseline Error Handling but leave CE reporting off (poweron
2347 * default).
2348 */
2349 if ((reg16 = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL)) !=
2350 PCI_CAP_EINVAL16) {
2351 tmp16 = (reg16 & pcie_devctl_default_mask) |
2352 (pcie_devctl_default & ~pcie_devctl_default_mask) |
2353 (pcie_base_err_default & ~PCIE_DEVCTL_CE_REPORTING_EN);
2354
2355 PCIE_CAP_PUT(16, bus_p, PCIE_DEVCTL, tmp16);
2356 PCIE_DBG_CAP(dip, bus_p, "DEVCTL", 16, PCIE_DEVCTL, reg16);
2357 }
2358
2359 /* Enable Root Port Baseline Error Receiving */
2360 if (PCIE_IS_ROOT(bus_p) &&
2361 (reg16 = PCIE_CAP_GET(16, bus_p, PCIE_ROOTCTL)) !=
2362 PCI_CAP_EINVAL16) {
2363
2364 tmp16 = pcie_serr_disable_flag ?
2365 (pcie_root_ctrl_default & ~PCIE_ROOT_SYS_ERR) :
2366 pcie_root_ctrl_default;
2367 PCIE_CAP_PUT(16, bus_p, PCIE_ROOTCTL, tmp16);
2368 PCIE_DBG_CAP(dip, bus_p, "ROOT DEVCTL", 16, PCIE_ROOTCTL,
2369 reg16);
2370 }
2371
2372 /*
2373 * Enable PCI-Express Advanced Error Handling if Exists
2374 */
2375 if (!PCIE_HAS_AER(bus_p))
2376 return;
2377
2378 /* Set Uncorrectable Severity */
2379 if ((reg32 = PCIE_AER_GET(32, bus_p, PCIE_AER_UCE_SERV)) !=
2380 PCI_CAP_EINVAL32) {
2381 tmp32 = pcie_aer_uce_severity;
2382
2383 PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_SERV, tmp32);
2384 PCIE_DBG_AER(dip, bus_p, "AER UCE SEV", 32, PCIE_AER_UCE_SERV,
2385 reg32);
2386 }
2387
2388 /* Enable Uncorrectable errors */
2389 if ((reg32 = PCIE_AER_GET(32, bus_p, PCIE_AER_UCE_MASK)) !=
2390 PCI_CAP_EINVAL32) {
2391 tmp32 = pcie_aer_uce_mask;
2392
2393 PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_MASK, tmp32);
2394 PCIE_DBG_AER(dip, bus_p, "AER UCE MASK", 32, PCIE_AER_UCE_MASK,
2395 reg32);
2396 }
2397
2398 /* Enable ECRC generation and checking */
2399 if ((reg32 = PCIE_AER_GET(32, bus_p, PCIE_AER_CTL)) !=
2400 PCI_CAP_EINVAL32) {
2401 tmp32 = reg32 | pcie_ecrc_value;
2402 PCIE_AER_PUT(32, bus_p, PCIE_AER_CTL, tmp32);
2403 PCIE_DBG_AER(dip, bus_p, "AER CTL", 32, PCIE_AER_CTL, reg32);
2404 }
2405
2406 /* Enable Secondary Uncorrectable errors if this is a bridge */
2407 if (!PCIE_IS_PCIE_BDG(bus_p))
2408 goto root;
2409
2410 /* Set Uncorrectable Severity */
2411 if ((reg32 = PCIE_AER_GET(32, bus_p, PCIE_AER_SUCE_SERV)) !=
2412 PCI_CAP_EINVAL32) {
2413 tmp32 = pcie_aer_suce_severity;
2414
2415 PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_SERV, tmp32);
2416 PCIE_DBG_AER(dip, bus_p, "AER SUCE SEV", 32, PCIE_AER_SUCE_SERV,
2417 reg32);
2418 }
2419
2420 if ((reg32 = PCIE_AER_GET(32, bus_p, PCIE_AER_SUCE_MASK)) !=
2421 PCI_CAP_EINVAL32) {
2422 PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_MASK, pcie_aer_suce_mask);
2423 PCIE_DBG_AER(dip, bus_p, "AER SUCE MASK", 32,
2424 PCIE_AER_SUCE_MASK, reg32);
2425 }
2426
2427 root:
2428 /*
2429 * Enable Root Control this is a Root device
2430 */
2431 if (!PCIE_IS_ROOT(bus_p))
2432 return;
2433
2434 if ((reg16 = PCIE_AER_GET(16, bus_p, PCIE_AER_RE_CMD)) !=
2435 PCI_CAP_EINVAL16) {
2436 PCIE_AER_PUT(16, bus_p, PCIE_AER_RE_CMD,
2437 pcie_root_error_cmd_default);
2438 PCIE_DBG_AER(dip, bus_p, "AER Root Err Cmd", 16,
2439 PCIE_AER_RE_CMD, reg16);
2440 }
2441 }
2442
2443 /*
2444 * This function is used for enabling CE reporting and setting the AER CE mask.
2445 * When called from outside the pcie module it should always be preceded by
2446 * a call to pcie_enable_errors.
2447 */
2448 int
pcie_enable_ce(dev_info_t * dip)2449 pcie_enable_ce(dev_info_t *dip)
2450 {
2451 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2452 uint16_t device_sts, device_ctl;
2453 uint32_t tmp_pcie_aer_ce_mask;
2454
2455 if (!PCIE_IS_PCIE(bus_p))
2456 return (DDI_SUCCESS);
2457
2458 /*
2459 * The "pcie_ce_mask" property is used to control both the CE reporting
2460 * enable field in the device control register and the AER CE mask. We
2461 * leave CE reporting disabled if pcie_ce_mask is set to -1.
2462 */
2463
2464 tmp_pcie_aer_ce_mask = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
2465 DDI_PROP_DONTPASS, "pcie_ce_mask", pcie_aer_ce_mask);
2466
2467 if (tmp_pcie_aer_ce_mask == (uint32_t)-1) {
2468 /*
2469 * Nothing to do since CE reporting has already been disabled.
2470 */
2471 return (DDI_SUCCESS);
2472 }
2473
2474 if (PCIE_HAS_AER(bus_p)) {
2475 /* Enable AER CE */
2476 PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_MASK, tmp_pcie_aer_ce_mask);
2477 PCIE_DBG_AER(dip, bus_p, "AER CE MASK", 32, PCIE_AER_CE_MASK,
2478 0);
2479
2480 /* Clear any pending AER CE errors */
2481 PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_STS, -1);
2482 }
2483
2484 /* clear any pending CE errors */
2485 if ((device_sts = PCIE_CAP_GET(16, bus_p, PCIE_DEVSTS)) !=
2486 PCI_CAP_EINVAL16)
2487 PCIE_CAP_PUT(16, bus_p, PCIE_DEVSTS,
2488 device_sts & (~PCIE_DEVSTS_CE_DETECTED));
2489
2490 /* Enable CE reporting */
2491 device_ctl = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL);
2492 PCIE_CAP_PUT(16, bus_p, PCIE_DEVCTL,
2493 (device_ctl & (~PCIE_DEVCTL_ERR_MASK)) | pcie_base_err_default);
2494 PCIE_DBG_CAP(dip, bus_p, "DEVCTL", 16, PCIE_DEVCTL, device_ctl);
2495
2496 return (DDI_SUCCESS);
2497 }
2498
2499 /* ARGSUSED */
2500 void
pcie_disable_errors(dev_info_t * dip)2501 pcie_disable_errors(dev_info_t *dip)
2502 {
2503 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2504 uint16_t device_ctl;
2505 uint32_t aer_reg;
2506
2507 if (!PCIE_IS_PCIE(bus_p))
2508 return;
2509
2510 /*
2511 * Disable PCI-Express Baseline Error Handling
2512 */
2513 device_ctl = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL);
2514 device_ctl &= ~PCIE_DEVCTL_ERR_MASK;
2515 PCIE_CAP_PUT(16, bus_p, PCIE_DEVCTL, device_ctl);
2516
2517 /*
2518 * Disable PCI-Express Advanced Error Handling if Exists
2519 */
2520 if (!PCIE_HAS_AER(bus_p))
2521 goto root;
2522
2523 /* Disable Uncorrectable errors */
2524 PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_MASK, PCIE_AER_UCE_BITS);
2525
2526 /* Disable Correctable errors */
2527 PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_MASK, PCIE_AER_CE_BITS);
2528
2529 /* Disable ECRC generation and checking */
2530 if ((aer_reg = PCIE_AER_GET(32, bus_p, PCIE_AER_CTL)) !=
2531 PCI_CAP_EINVAL32) {
2532 aer_reg &= ~(PCIE_AER_CTL_ECRC_GEN_ENA |
2533 PCIE_AER_CTL_ECRC_CHECK_ENA);
2534
2535 PCIE_AER_PUT(32, bus_p, PCIE_AER_CTL, aer_reg);
2536 }
2537 /*
2538 * Disable Secondary Uncorrectable errors if this is a bridge
2539 */
2540 if (!PCIE_IS_PCIE_BDG(bus_p))
2541 goto root;
2542
2543 PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_MASK, PCIE_AER_SUCE_BITS);
2544
2545 root:
2546 /*
2547 * disable Root Control this is a Root device
2548 */
2549 if (!PCIE_IS_ROOT(bus_p))
2550 return;
2551
2552 if (!pcie_serr_disable_flag) {
2553 device_ctl = PCIE_CAP_GET(16, bus_p, PCIE_ROOTCTL);
2554 device_ctl &= ~PCIE_ROOT_SYS_ERR;
2555 PCIE_CAP_PUT(16, bus_p, PCIE_ROOTCTL, device_ctl);
2556 }
2557
2558 if (!PCIE_HAS_AER(bus_p))
2559 return;
2560
2561 if ((device_ctl = PCIE_CAP_GET(16, bus_p, PCIE_AER_RE_CMD)) !=
2562 PCI_CAP_EINVAL16) {
2563 device_ctl &= ~pcie_root_error_cmd_default;
2564 PCIE_CAP_PUT(16, bus_p, PCIE_AER_RE_CMD, device_ctl);
2565 }
2566 }
2567
2568 /*
2569 * Extract bdf from "reg" property.
2570 */
2571 int
pcie_get_bdf_from_dip(dev_info_t * dip,pcie_req_id_t * bdf)2572 pcie_get_bdf_from_dip(dev_info_t *dip, pcie_req_id_t *bdf)
2573 {
2574 pci_regspec_t *regspec;
2575 int reglen;
2576
2577 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2578 "reg", (int **)®spec, (uint_t *)®len) != DDI_SUCCESS)
2579 return (DDI_FAILURE);
2580
2581 if (reglen < (sizeof (pci_regspec_t) / sizeof (int))) {
2582 ddi_prop_free(regspec);
2583 return (DDI_FAILURE);
2584 }
2585
2586 /* Get phys_hi from first element. All have same bdf. */
2587 *bdf = (regspec->pci_phys_hi & (PCI_REG_BDFR_M ^ PCI_REG_REG_M)) >> 8;
2588
2589 ddi_prop_free(regspec);
2590 return (DDI_SUCCESS);
2591 }
2592
2593 dev_info_t *
pcie_get_my_childs_dip(dev_info_t * dip,dev_info_t * rdip)2594 pcie_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
2595 {
2596 dev_info_t *cdip = rdip;
2597
2598 for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
2599 ;
2600
2601 return (cdip);
2602 }
2603
2604 uint32_t
pcie_get_bdf_for_dma_xfer(dev_info_t * dip,dev_info_t * rdip)2605 pcie_get_bdf_for_dma_xfer(dev_info_t *dip, dev_info_t *rdip)
2606 {
2607 dev_info_t *cdip;
2608
2609 /*
2610 * As part of the probing, the PCI fcode interpreter may setup a DMA
2611 * request if a given card has a fcode on it using dip and rdip of the
2612 * hotplug connector i.e, dip and rdip of px/pcieb driver. In this
2613 * case, return a invalid value for the bdf since we cannot get to the
2614 * bdf value of the actual device which will be initiating this DMA.
2615 */
2616 if (rdip == dip)
2617 return (PCIE_INVALID_BDF);
2618
2619 cdip = pcie_get_my_childs_dip(dip, rdip);
2620
2621 /*
2622 * For a given rdip, return the bdf value of dip's (px or pcieb)
2623 * immediate child or secondary bus-id if dip is a PCIe2PCI bridge.
2624 *
2625 * XXX - For now, return a invalid bdf value for all PCI and PCI-X
2626 * devices since this needs more work.
2627 */
2628 return (PCI_GET_PCIE2PCI_SECBUS(cdip) ?
2629 PCIE_INVALID_BDF : PCI_GET_BDF(cdip));
2630 }
2631
2632 uint32_t
pcie_get_aer_uce_mask()2633 pcie_get_aer_uce_mask()
2634 {
2635 return (pcie_aer_uce_mask);
2636 }
2637 uint32_t
pcie_get_aer_ce_mask()2638 pcie_get_aer_ce_mask()
2639 {
2640 return (pcie_aer_ce_mask);
2641 }
2642 uint32_t
pcie_get_aer_suce_mask()2643 pcie_get_aer_suce_mask()
2644 {
2645 return (pcie_aer_suce_mask);
2646 }
2647 uint32_t
pcie_get_serr_mask()2648 pcie_get_serr_mask()
2649 {
2650 return (pcie_serr_disable_flag);
2651 }
2652
2653 void
pcie_set_aer_uce_mask(uint32_t mask)2654 pcie_set_aer_uce_mask(uint32_t mask)
2655 {
2656 pcie_aer_uce_mask = mask;
2657 if (mask & PCIE_AER_UCE_UR)
2658 pcie_base_err_default &= ~PCIE_DEVCTL_UR_REPORTING_EN;
2659 else
2660 pcie_base_err_default |= PCIE_DEVCTL_UR_REPORTING_EN;
2661
2662 if (mask & PCIE_AER_UCE_ECRC)
2663 pcie_ecrc_value = 0;
2664 }
2665
2666 void
pcie_set_aer_ce_mask(uint32_t mask)2667 pcie_set_aer_ce_mask(uint32_t mask)
2668 {
2669 pcie_aer_ce_mask = mask;
2670 }
2671 void
pcie_set_aer_suce_mask(uint32_t mask)2672 pcie_set_aer_suce_mask(uint32_t mask)
2673 {
2674 pcie_aer_suce_mask = mask;
2675 }
2676 void
pcie_set_serr_mask(uint32_t mask)2677 pcie_set_serr_mask(uint32_t mask)
2678 {
2679 pcie_serr_disable_flag = mask;
2680 }
2681
2682 /*
2683 * Is the rdip a child of dip. Used for checking certain CTLOPS from bubbling
2684 * up erronously. Ex. ISA ctlops to a PCI-PCI Bridge.
2685 */
2686 boolean_t
pcie_is_child(dev_info_t * dip,dev_info_t * rdip)2687 pcie_is_child(dev_info_t *dip, dev_info_t *rdip)
2688 {
2689 dev_info_t *cdip = ddi_get_child(dip);
2690 for (; cdip; cdip = ddi_get_next_sibling(cdip))
2691 if (cdip == rdip)
2692 break;
2693 return (cdip != NULL);
2694 }
2695
2696 boolean_t
pcie_is_link_disabled(dev_info_t * dip)2697 pcie_is_link_disabled(dev_info_t *dip)
2698 {
2699 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2700
2701 if (PCIE_IS_PCIE(bus_p)) {
2702 if (PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL) &
2703 PCIE_LINKCTL_LINK_DISABLE)
2704 return (B_TRUE);
2705 }
2706 return (B_FALSE);
2707 }
2708
2709 /*
2710 * Determines if there are any root ports attached to a root complex.
2711 *
2712 * dip - dip of root complex
2713 *
2714 * Returns - DDI_SUCCESS if there is at least one root port otherwise
2715 * DDI_FAILURE.
2716 */
2717 int
pcie_root_port(dev_info_t * dip)2718 pcie_root_port(dev_info_t *dip)
2719 {
2720 int port_type;
2721 uint16_t cap_ptr;
2722 ddi_acc_handle_t config_handle;
2723 dev_info_t *cdip = ddi_get_child(dip);
2724
2725 /*
2726 * Determine if any of the children of the passed in dip
2727 * are root ports.
2728 */
2729 for (; cdip; cdip = ddi_get_next_sibling(cdip)) {
2730
2731 if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS)
2732 continue;
2733
2734 if ((PCI_CAP_LOCATE(config_handle, PCI_CAP_ID_PCI_E,
2735 &cap_ptr)) == DDI_FAILURE) {
2736 pci_config_teardown(&config_handle);
2737 continue;
2738 }
2739
2740 port_type = PCI_CAP_GET16(config_handle, 0, cap_ptr,
2741 PCIE_PCIECAP) & PCIE_PCIECAP_DEV_TYPE_MASK;
2742
2743 pci_config_teardown(&config_handle);
2744
2745 if (port_type == PCIE_PCIECAP_DEV_TYPE_ROOT)
2746 return (DDI_SUCCESS);
2747 }
2748
2749 /* No root ports were found */
2750
2751 return (DDI_FAILURE);
2752 }
2753
2754 /*
2755 * Function that determines if a device a PCIe device.
2756 *
2757 * dip - dip of device.
2758 *
2759 * returns - DDI_SUCCESS if device is a PCIe device, otherwise DDI_FAILURE.
2760 */
2761 int
pcie_dev(dev_info_t * dip)2762 pcie_dev(dev_info_t *dip)
2763 {
2764 /* get parent device's device_type property */
2765 char *device_type;
2766 int rc = DDI_FAILURE;
2767 dev_info_t *pdip = ddi_get_parent(dip);
2768
2769 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip,
2770 DDI_PROP_DONTPASS, "device_type", &device_type)
2771 != DDI_PROP_SUCCESS) {
2772 return (DDI_FAILURE);
2773 }
2774
2775 if (strcmp(device_type, "pciex") == 0)
2776 rc = DDI_SUCCESS;
2777 else
2778 rc = DDI_FAILURE;
2779
2780 ddi_prop_free(device_type);
2781 return (rc);
2782 }
2783
2784 void
pcie_set_rber_fatal(dev_info_t * dip,boolean_t val)2785 pcie_set_rber_fatal(dev_info_t *dip, boolean_t val)
2786 {
2787 pcie_bus_t *bus_p = PCIE_DIP2UPBUS(dip);
2788 bus_p->bus_pfd->pe_rber_fatal = val;
2789 }
2790
2791 /*
2792 * Return parent Root Port's pe_rber_fatal value.
2793 */
2794 boolean_t
pcie_get_rber_fatal(dev_info_t * dip)2795 pcie_get_rber_fatal(dev_info_t *dip)
2796 {
2797 pcie_bus_t *bus_p = PCIE_DIP2UPBUS(dip);
2798 pcie_bus_t *rp_bus_p = PCIE_DIP2UPBUS(bus_p->bus_rp_dip);
2799 return (rp_bus_p->bus_pfd->pe_rber_fatal);
2800 }
2801
2802 int
pcie_ari_supported(dev_info_t * dip)2803 pcie_ari_supported(dev_info_t *dip)
2804 {
2805 uint32_t devcap2;
2806 uint16_t pciecap;
2807 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2808 uint8_t dev_type;
2809
2810 PCIE_DBG("pcie_ari_supported: dip=%p\n", dip);
2811
2812 if (bus_p == NULL)
2813 return (PCIE_ARI_FORW_NOT_SUPPORTED);
2814
2815 dev_type = bus_p->bus_dev_type;
2816
2817 if ((dev_type != PCIE_PCIECAP_DEV_TYPE_DOWN) &&
2818 (dev_type != PCIE_PCIECAP_DEV_TYPE_ROOT))
2819 return (PCIE_ARI_FORW_NOT_SUPPORTED);
2820
2821 if (pcie_disable_ari) {
2822 PCIE_DBG("pcie_ari_supported: dip=%p: ARI Disabled\n", dip);
2823 return (PCIE_ARI_FORW_NOT_SUPPORTED);
2824 }
2825
2826 pciecap = PCIE_CAP_GET(16, bus_p, PCIE_PCIECAP);
2827
2828 if ((pciecap & PCIE_PCIECAP_VER_MASK) < PCIE_PCIECAP_VER_2_0) {
2829 PCIE_DBG("pcie_ari_supported: dip=%p: Not 2.0\n", dip);
2830 return (PCIE_ARI_FORW_NOT_SUPPORTED);
2831 }
2832
2833 devcap2 = PCIE_CAP_GET(32, bus_p, PCIE_DEVCAP2);
2834
2835 PCIE_DBG("pcie_ari_supported: dip=%p: DevCap2=0x%x\n",
2836 dip, devcap2);
2837
2838 if (devcap2 & PCIE_DEVCAP2_ARI_FORWARD) {
2839 PCIE_DBG("pcie_ari_supported: "
2840 "dip=%p: ARI Forwarding is supported\n", dip);
2841 return (PCIE_ARI_FORW_SUPPORTED);
2842 }
2843 return (PCIE_ARI_FORW_NOT_SUPPORTED);
2844 }
2845
2846 int
pcie_ari_enable(dev_info_t * dip)2847 pcie_ari_enable(dev_info_t *dip)
2848 {
2849 uint16_t devctl2;
2850 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2851
2852 PCIE_DBG("pcie_ari_enable: dip=%p\n", dip);
2853
2854 if (pcie_ari_supported(dip) == PCIE_ARI_FORW_NOT_SUPPORTED)
2855 return (DDI_FAILURE);
2856
2857 devctl2 = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL2);
2858 devctl2 |= PCIE_DEVCTL2_ARI_FORWARD_EN;
2859 PCIE_CAP_PUT(16, bus_p, PCIE_DEVCTL2, devctl2);
2860
2861 PCIE_DBG("pcie_ari_enable: dip=%p: writing 0x%x to DevCtl2\n",
2862 dip, devctl2);
2863
2864 return (DDI_SUCCESS);
2865 }
2866
2867 int
pcie_ari_disable(dev_info_t * dip)2868 pcie_ari_disable(dev_info_t *dip)
2869 {
2870 uint16_t devctl2;
2871 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2872
2873 PCIE_DBG("pcie_ari_disable: dip=%p\n", dip);
2874
2875 if (pcie_ari_supported(dip) == PCIE_ARI_FORW_NOT_SUPPORTED)
2876 return (DDI_FAILURE);
2877
2878 devctl2 = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL2);
2879 devctl2 &= ~PCIE_DEVCTL2_ARI_FORWARD_EN;
2880 PCIE_CAP_PUT(16, bus_p, PCIE_DEVCTL2, devctl2);
2881
2882 PCIE_DBG("pcie_ari_disable: dip=%p: writing 0x%x to DevCtl2\n",
2883 dip, devctl2);
2884
2885 return (DDI_SUCCESS);
2886 }
2887
2888 int
pcie_ari_is_enabled(dev_info_t * dip)2889 pcie_ari_is_enabled(dev_info_t *dip)
2890 {
2891 uint16_t devctl2;
2892 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2893
2894 PCIE_DBG("pcie_ari_is_enabled: dip=%p\n", dip);
2895
2896 if (pcie_ari_supported(dip) == PCIE_ARI_FORW_NOT_SUPPORTED)
2897 return (PCIE_ARI_FORW_DISABLED);
2898
2899 devctl2 = PCIE_CAP_GET(32, bus_p, PCIE_DEVCTL2);
2900
2901 PCIE_DBG("pcie_ari_is_enabled: dip=%p: DevCtl2=0x%x\n",
2902 dip, devctl2);
2903
2904 if (devctl2 & PCIE_DEVCTL2_ARI_FORWARD_EN) {
2905 PCIE_DBG("pcie_ari_is_enabled: "
2906 "dip=%p: ARI Forwarding is enabled\n", dip);
2907 return (PCIE_ARI_FORW_ENABLED);
2908 }
2909
2910 return (PCIE_ARI_FORW_DISABLED);
2911 }
2912
2913 int
pcie_ari_device(dev_info_t * dip)2914 pcie_ari_device(dev_info_t *dip)
2915 {
2916 ddi_acc_handle_t handle;
2917 uint16_t cap_ptr;
2918
2919 PCIE_DBG("pcie_ari_device: dip=%p\n", dip);
2920
2921 /*
2922 * XXX - This function may be called before the bus_p structure
2923 * has been populated. This code can be changed to remove
2924 * pci_config_setup()/pci_config_teardown() when the RFE
2925 * to populate the bus_p structures early in boot is putback.
2926 */
2927
2928 /* First make sure it is a PCIe device */
2929
2930 if (pci_config_setup(dip, &handle) != DDI_SUCCESS)
2931 return (PCIE_NOT_ARI_DEVICE);
2932
2933 if ((PCI_CAP_LOCATE(handle, PCI_CAP_ID_PCI_E, &cap_ptr))
2934 != DDI_SUCCESS) {
2935 pci_config_teardown(&handle);
2936 return (PCIE_NOT_ARI_DEVICE);
2937 }
2938
2939 /* Locate the ARI Capability */
2940
2941 if ((PCI_CAP_LOCATE(handle, PCI_CAP_XCFG_SPC(PCIE_EXT_CAP_ID_ARI),
2942 &cap_ptr)) == DDI_FAILURE) {
2943 pci_config_teardown(&handle);
2944 return (PCIE_NOT_ARI_DEVICE);
2945 }
2946
2947 /* ARI Capability was found so it must be a ARI device */
2948 PCIE_DBG("pcie_ari_device: ARI Device dip=%p\n", dip);
2949
2950 pci_config_teardown(&handle);
2951 return (PCIE_ARI_DEVICE);
2952 }
2953
2954 int
pcie_ari_get_next_function(dev_info_t * dip,int * func)2955 pcie_ari_get_next_function(dev_info_t *dip, int *func)
2956 {
2957 uint32_t val;
2958 uint16_t cap_ptr, next_function;
2959 ddi_acc_handle_t handle;
2960
2961 /*
2962 * XXX - This function may be called before the bus_p structure
2963 * has been populated. This code can be changed to remove
2964 * pci_config_setup()/pci_config_teardown() when the RFE
2965 * to populate the bus_p structures early in boot is putback.
2966 */
2967
2968 if (pci_config_setup(dip, &handle) != DDI_SUCCESS)
2969 return (DDI_FAILURE);
2970
2971 if ((PCI_CAP_LOCATE(handle,
2972 PCI_CAP_XCFG_SPC(PCIE_EXT_CAP_ID_ARI), &cap_ptr)) == DDI_FAILURE) {
2973 pci_config_teardown(&handle);
2974 return (DDI_FAILURE);
2975 }
2976
2977 val = PCI_CAP_GET32(handle, 0, cap_ptr, PCIE_ARI_CAP);
2978
2979 next_function = (val >> PCIE_ARI_CAP_NEXT_FUNC_SHIFT) &
2980 PCIE_ARI_CAP_NEXT_FUNC_MASK;
2981
2982 pci_config_teardown(&handle);
2983
2984 *func = next_function;
2985
2986 return (DDI_SUCCESS);
2987 }
2988
2989 dev_info_t *
pcie_func_to_dip(dev_info_t * dip,pcie_req_id_t function)2990 pcie_func_to_dip(dev_info_t *dip, pcie_req_id_t function)
2991 {
2992 pcie_req_id_t child_bdf;
2993 dev_info_t *cdip;
2994
2995 for (cdip = ddi_get_child(dip); cdip;
2996 cdip = ddi_get_next_sibling(cdip)) {
2997
2998 if (pcie_get_bdf_from_dip(cdip, &child_bdf) == DDI_FAILURE)
2999 return (NULL);
3000
3001 if ((child_bdf & PCIE_REQ_ID_ARI_FUNC_MASK) == function)
3002 return (cdip);
3003 }
3004 return (NULL);
3005 }
3006
3007 #ifdef DEBUG
3008
3009 static void
pcie_print_bus(pcie_bus_t * bus_p)3010 pcie_print_bus(pcie_bus_t *bus_p)
3011 {
3012 pcie_dbg("\tbus_dip = 0x%p\n", bus_p->bus_dip);
3013 pcie_dbg("\tbus_fm_flags = 0x%x\n", bus_p->bus_fm_flags);
3014
3015 pcie_dbg("\tbus_bdf = 0x%x\n", bus_p->bus_bdf);
3016 pcie_dbg("\tbus_dev_ven_id = 0x%x\n", bus_p->bus_dev_ven_id);
3017 pcie_dbg("\tbus_rev_id = 0x%x\n", bus_p->bus_rev_id);
3018 pcie_dbg("\tbus_hdr_type = 0x%x\n", bus_p->bus_hdr_type);
3019 pcie_dbg("\tbus_dev_type = 0x%x\n", bus_p->bus_dev_type);
3020 pcie_dbg("\tbus_bdg_secbus = 0x%x\n", bus_p->bus_bdg_secbus);
3021 pcie_dbg("\tbus_pcie_off = 0x%x\n", bus_p->bus_pcie_off);
3022 pcie_dbg("\tbus_aer_off = 0x%x\n", bus_p->bus_aer_off);
3023 pcie_dbg("\tbus_pcix_off = 0x%x\n", bus_p->bus_pcix_off);
3024 pcie_dbg("\tbus_ecc_ver = 0x%x\n", bus_p->bus_ecc_ver);
3025 }
3026
3027 /*
3028 * For debugging purposes set pcie_dbg_print != 0 to see printf messages
3029 * during interrupt.
3030 *
3031 * When a proper solution is in place this code will disappear.
3032 * Potential solutions are:
3033 * o circular buffers
3034 * o taskq to print at lower pil
3035 */
3036 int pcie_dbg_print = 0;
3037 void
pcie_dbg(char * fmt,...)3038 pcie_dbg(char *fmt, ...)
3039 {
3040 va_list ap;
3041
3042 if (!pcie_debug_flags) {
3043 return;
3044 }
3045 va_start(ap, fmt);
3046 if (servicing_interrupt()) {
3047 if (pcie_dbg_print) {
3048 prom_vprintf(fmt, ap);
3049 }
3050 } else {
3051 prom_vprintf(fmt, ap);
3052 }
3053 va_end(ap);
3054 }
3055 #endif /* DEBUG */
3056
3057 boolean_t
pcie_link_bw_supported(dev_info_t * dip)3058 pcie_link_bw_supported(dev_info_t *dip)
3059 {
3060 uint32_t linkcap;
3061 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3062
3063 if (!PCIE_IS_PCIE(bus_p)) {
3064 return (B_FALSE);
3065 }
3066
3067 if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
3068 return (B_FALSE);
3069 }
3070
3071 linkcap = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP);
3072 return ((linkcap & PCIE_LINKCAP_LINK_BW_NOTIFY_CAP) != 0);
3073 }
3074
3075 int
pcie_link_bw_enable(dev_info_t * dip)3076 pcie_link_bw_enable(dev_info_t *dip)
3077 {
3078 uint16_t linkctl;
3079 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3080
3081 if (pcie_disable_lbw != 0) {
3082 return (DDI_FAILURE);
3083 }
3084
3085 if (!pcie_link_bw_supported(dip)) {
3086 return (DDI_FAILURE);
3087 }
3088
3089 mutex_init(&bus_p->bus_lbw_mutex, NULL, MUTEX_DRIVER, NULL);
3090 cv_init(&bus_p->bus_lbw_cv, NULL, CV_DRIVER, NULL);
3091 linkctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL);
3092 linkctl |= PCIE_LINKCTL_LINK_BW_INTR_EN;
3093 linkctl |= PCIE_LINKCTL_LINK_AUTO_BW_INTR_EN;
3094 PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL, linkctl);
3095
3096 bus_p->bus_lbw_pbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3097 bus_p->bus_lbw_cbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3098 bus_p->bus_lbw_state |= PCIE_LBW_S_ENABLED;
3099
3100 return (DDI_SUCCESS);
3101 }
3102
3103 int
pcie_link_bw_disable(dev_info_t * dip)3104 pcie_link_bw_disable(dev_info_t *dip)
3105 {
3106 uint16_t linkctl;
3107 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3108
3109 if ((bus_p->bus_lbw_state & PCIE_LBW_S_ENABLED) == 0) {
3110 return (DDI_FAILURE);
3111 }
3112
3113 mutex_enter(&bus_p->bus_lbw_mutex);
3114 while ((bus_p->bus_lbw_state &
3115 (PCIE_LBW_S_DISPATCHED | PCIE_LBW_S_RUNNING)) != 0) {
3116 cv_wait(&bus_p->bus_lbw_cv, &bus_p->bus_lbw_mutex);
3117 }
3118 mutex_exit(&bus_p->bus_lbw_mutex);
3119
3120 linkctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL);
3121 linkctl &= ~PCIE_LINKCTL_LINK_BW_INTR_EN;
3122 linkctl &= ~PCIE_LINKCTL_LINK_AUTO_BW_INTR_EN;
3123 PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL, linkctl);
3124
3125 bus_p->bus_lbw_state &= ~PCIE_LBW_S_ENABLED;
3126 kmem_free(bus_p->bus_lbw_pbuf, MAXPATHLEN);
3127 kmem_free(bus_p->bus_lbw_cbuf, MAXPATHLEN);
3128 bus_p->bus_lbw_pbuf = NULL;
3129 bus_p->bus_lbw_cbuf = NULL;
3130
3131 mutex_destroy(&bus_p->bus_lbw_mutex);
3132 cv_destroy(&bus_p->bus_lbw_cv);
3133
3134 return (DDI_SUCCESS);
3135 }
3136
3137 void
pcie_link_bw_taskq(void * arg)3138 pcie_link_bw_taskq(void *arg)
3139 {
3140 dev_info_t *dip = arg;
3141 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3142 dev_info_t *cdip;
3143 boolean_t again;
3144 sysevent_t *se;
3145 sysevent_value_t se_val;
3146 sysevent_id_t eid;
3147 sysevent_attr_list_t *ev_attr_list;
3148
3149 top:
3150 ndi_devi_enter(dip);
3151 se = NULL;
3152 ev_attr_list = NULL;
3153 mutex_enter(&bus_p->bus_lbw_mutex);
3154 bus_p->bus_lbw_state &= ~PCIE_LBW_S_DISPATCHED;
3155 bus_p->bus_lbw_state |= PCIE_LBW_S_RUNNING;
3156 mutex_exit(&bus_p->bus_lbw_mutex);
3157
3158 /*
3159 * Update our own speeds as we've likely changed something.
3160 */
3161 pcie_capture_speeds(dip);
3162
3163 /*
3164 * Walk our children. We only care about updating this on function 0
3165 * because the PCIe specification requires that these all be the same
3166 * otherwise.
3167 */
3168 for (cdip = ddi_get_child(dip); cdip != NULL;
3169 cdip = ddi_get_next_sibling(cdip)) {
3170 pcie_bus_t *cbus_p = PCIE_DIP2BUS(cdip);
3171
3172 if (cbus_p == NULL) {
3173 continue;
3174 }
3175
3176 if ((cbus_p->bus_bdf & PCIE_REQ_ID_FUNC_MASK) != 0) {
3177 continue;
3178 }
3179
3180 /*
3181 * It's possible that this can fire while a child is otherwise
3182 * only partially constructed. Therefore, if we don't have the
3183 * config handle, don't bother updating the child.
3184 */
3185 if (cbus_p->bus_cfg_hdl == NULL) {
3186 continue;
3187 }
3188
3189 pcie_capture_speeds(cdip);
3190 break;
3191 }
3192
3193 se = sysevent_alloc(EC_PCIE, ESC_PCIE_LINK_STATE,
3194 ILLUMOS_KERN_PUB "pcie", SE_SLEEP);
3195
3196 (void) ddi_pathname(dip, bus_p->bus_lbw_pbuf);
3197 se_val.value_type = SE_DATA_TYPE_STRING;
3198 se_val.value.sv_string = bus_p->bus_lbw_pbuf;
3199 if (sysevent_add_attr(&ev_attr_list, PCIE_EV_DETECTOR_PATH, &se_val,
3200 SE_SLEEP) != 0) {
3201 ndi_devi_exit(dip);
3202 goto err;
3203 }
3204
3205 if (cdip != NULL) {
3206 (void) ddi_pathname(cdip, bus_p->bus_lbw_cbuf);
3207
3208 se_val.value_type = SE_DATA_TYPE_STRING;
3209 se_val.value.sv_string = bus_p->bus_lbw_cbuf;
3210
3211 /*
3212 * If this fails, that's OK. We'd rather get the event off and
3213 * there's a chance that there may not be anything there for us.
3214 */
3215 (void) sysevent_add_attr(&ev_attr_list, PCIE_EV_CHILD_PATH,
3216 &se_val, SE_SLEEP);
3217 }
3218
3219 ndi_devi_exit(dip);
3220
3221 /*
3222 * Before we generate and send down a sysevent, we need to tell the
3223 * system that parts of the devinfo cache need to be invalidated. While
3224 * the function below takes several args, it ignores them all. Because
3225 * this is a global invalidation, we don't bother trying to do much more
3226 * than requesting a global invalidation, lest we accidentally kick off
3227 * several in a row.
3228 */
3229 ddi_prop_cache_invalidate(DDI_DEV_T_NONE, NULL, NULL, 0);
3230
3231 if (sysevent_attach_attributes(se, ev_attr_list) != 0) {
3232 goto err;
3233 }
3234 ev_attr_list = NULL;
3235
3236 if (log_sysevent(se, SE_SLEEP, &eid) != 0) {
3237 goto err;
3238 }
3239
3240 err:
3241 sysevent_free_attr(ev_attr_list);
3242 sysevent_free(se);
3243
3244 mutex_enter(&bus_p->bus_lbw_mutex);
3245 bus_p->bus_lbw_state &= ~PCIE_LBW_S_RUNNING;
3246 cv_broadcast(&bus_p->bus_lbw_cv);
3247 again = (bus_p->bus_lbw_state & PCIE_LBW_S_DISPATCHED) != 0;
3248 mutex_exit(&bus_p->bus_lbw_mutex);
3249
3250 if (again) {
3251 goto top;
3252 }
3253 }
3254
3255 int
pcie_link_bw_intr(dev_info_t * dip)3256 pcie_link_bw_intr(dev_info_t *dip)
3257 {
3258 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3259 uint16_t linksts;
3260 uint16_t flags = PCIE_LINKSTS_LINK_BW_MGMT | PCIE_LINKSTS_AUTO_BW;
3261 hrtime_t now;
3262
3263 if ((bus_p->bus_lbw_state & PCIE_LBW_S_ENABLED) == 0) {
3264 return (DDI_INTR_UNCLAIMED);
3265 }
3266
3267 linksts = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS);
3268 if ((linksts & flags) == 0) {
3269 return (DDI_INTR_UNCLAIMED);
3270 }
3271
3272 now = gethrtime();
3273
3274 /*
3275 * Check if we've already dispatched this event. If we have already
3276 * dispatched it, then there's nothing else to do, we coalesce multiple
3277 * events.
3278 */
3279 mutex_enter(&bus_p->bus_lbw_mutex);
3280 bus_p->bus_lbw_nevents++;
3281 bus_p->bus_lbw_last_ts = now;
3282 if ((bus_p->bus_lbw_state & PCIE_LBW_S_DISPATCHED) == 0) {
3283 if ((bus_p->bus_lbw_state & PCIE_LBW_S_RUNNING) == 0) {
3284 taskq_dispatch_ent(pcie_link_tq, pcie_link_bw_taskq,
3285 dip, 0, &bus_p->bus_lbw_ent);
3286 }
3287
3288 bus_p->bus_lbw_state |= PCIE_LBW_S_DISPATCHED;
3289 }
3290 mutex_exit(&bus_p->bus_lbw_mutex);
3291
3292 PCIE_CAP_PUT(16, bus_p, PCIE_LINKSTS, flags);
3293 return (DDI_INTR_CLAIMED);
3294 }
3295
3296 int
pcie_link_set_target(dev_info_t * dip,pcie_link_speed_t speed)3297 pcie_link_set_target(dev_info_t *dip, pcie_link_speed_t speed)
3298 {
3299 uint16_t ctl2, rval;
3300 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3301
3302 if (!PCIE_IS_PCIE(bus_p)) {
3303 return (ENOTSUP);
3304 }
3305
3306 if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
3307 return (ENOTSUP);
3308 }
3309
3310 if (bus_p->bus_pcie_vers < 2) {
3311 return (ENOTSUP);
3312 }
3313
3314 switch (speed) {
3315 case PCIE_LINK_SPEED_2_5:
3316 rval = PCIE_LINKCTL2_TARGET_SPEED_2_5;
3317 break;
3318 case PCIE_LINK_SPEED_5:
3319 rval = PCIE_LINKCTL2_TARGET_SPEED_5;
3320 break;
3321 case PCIE_LINK_SPEED_8:
3322 rval = PCIE_LINKCTL2_TARGET_SPEED_8;
3323 break;
3324 case PCIE_LINK_SPEED_16:
3325 rval = PCIE_LINKCTL2_TARGET_SPEED_16;
3326 break;
3327 case PCIE_LINK_SPEED_32:
3328 rval = PCIE_LINKCTL2_TARGET_SPEED_32;
3329 break;
3330 case PCIE_LINK_SPEED_64:
3331 rval = PCIE_LINKCTL2_TARGET_SPEED_64;
3332 break;
3333 default:
3334 return (EINVAL);
3335 }
3336
3337 mutex_enter(&bus_p->bus_speed_mutex);
3338 if ((bus_p->bus_sup_speed & speed) == 0) {
3339 mutex_exit(&bus_p->bus_speed_mutex);
3340 return (ENOTSUP);
3341 }
3342
3343 bus_p->bus_target_speed = speed;
3344 bus_p->bus_speed_flags |= PCIE_LINK_F_ADMIN_TARGET;
3345
3346 ctl2 = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL2);
3347 ctl2 &= ~PCIE_LINKCTL2_TARGET_SPEED_MASK;
3348 ctl2 |= rval;
3349 PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL2, ctl2);
3350 mutex_exit(&bus_p->bus_speed_mutex);
3351
3352 /*
3353 * Make sure our updates have been reflected in devinfo.
3354 */
3355 pcie_capture_speeds(dip);
3356
3357 return (0);
3358 }
3359
3360 int
pcie_link_retrain(dev_info_t * dip)3361 pcie_link_retrain(dev_info_t *dip)
3362 {
3363 uint16_t ctl;
3364 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3365
3366 if (!PCIE_IS_PCIE(bus_p)) {
3367 return (ENOTSUP);
3368 }
3369
3370 if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
3371 return (ENOTSUP);
3372 }
3373
3374 /*
3375 * The PCIe specification suggests that we make sure that the link isn't
3376 * in training before issuing this command in case there was a state
3377 * machine transition prior to when we got here. We wait and then go
3378 * ahead and issue the command anyways.
3379 */
3380 for (uint32_t i = 0; i < pcie_link_retrain_count; i++) {
3381 uint16_t sts;
3382
3383 sts = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS);
3384 if ((sts & PCIE_LINKSTS_LINK_TRAINING) == 0)
3385 break;
3386 delay(drv_usectohz(pcie_link_retrain_delay_ms * 1000));
3387 }
3388
3389 ctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL);
3390 ctl |= PCIE_LINKCTL_RETRAIN_LINK;
3391 PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL, ctl);
3392
3393 /*
3394 * Wait again to see if it clears before returning to the user.
3395 */
3396 for (uint32_t i = 0; i < pcie_link_retrain_count; i++) {
3397 uint16_t sts;
3398
3399 sts = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS);
3400 if ((sts & PCIE_LINKSTS_LINK_TRAINING) == 0)
3401 break;
3402 delay(drv_usectohz(pcie_link_retrain_delay_ms * 1000));
3403 }
3404
3405 return (0);
3406 }
3407
3408 /*
3409 * Here we're going through and grabbing information about a given PCIe device.
3410 * Our situation is a little bit complicated at this point. This gets invoked
3411 * both during early initialization and during hotplug events. We cannot rely on
3412 * the device node having been fully set up, that is, while the pcie_bus_t
3413 * normally contains a ddi_acc_handle_t for configuration space, that may not be
3414 * valid yet as this can occur before child initialization or we may be dealing
3415 * with a function that will never have a handle.
3416 *
3417 * However, we should always have a fully furnished pcie_bus_t, which means that
3418 * we can get its bdf and use that to access the devices configuration space.
3419 */
3420 static int
pcie_fabric_feature_scan(dev_info_t * dip,void * arg)3421 pcie_fabric_feature_scan(dev_info_t *dip, void *arg)
3422 {
3423 pcie_bus_t *bus_p;
3424 uint32_t devcap;
3425 uint16_t mps;
3426 dev_info_t *rcdip;
3427 pcie_fabric_data_t *fab = arg;
3428
3429 /*
3430 * Skip over non-PCIe devices. If we encounter something here, we don't
3431 * bother going through any of its children because we don't have reason
3432 * to believe that a PCIe device that this will impact will exist below
3433 * this. While it is possible that there's a PCIe fabric downstream an
3434 * intermediate old PCI/PCI-X bus, at that point, we'll still trigger
3435 * our complex fabric detection and use the minimums.
3436 *
3437 * The reason this doesn't trigger an immediate flagging as a complex
3438 * case like the one below is because we could be scanning a device that
3439 * is a nexus driver and has children already (albeit that would be
3440 * somewhat surprising as we don't anticipate being called at this
3441 * point).
3442 */
3443 if (pcie_dev(dip) != DDI_SUCCESS) {
3444 return (DDI_WALK_PRUNECHILD);
3445 }
3446
3447 /*
3448 * If we fail to find a pcie_bus_t for some reason, that's somewhat
3449 * surprising. We log this fact and set the complex flag and indicate it
3450 * was because of this case. This immediately transitions us to a
3451 * "complex" case which means use the minimal, safe, settings.
3452 */
3453 bus_p = PCIE_DIP2BUS(dip);
3454 if (bus_p == NULL) {
3455 dev_err(dip, CE_WARN, "failed to find associated pcie_bus_t "
3456 "during fabric scan");
3457 fab->pfd_flags |= PCIE_FABRIC_F_COMPLEX;
3458 return (DDI_WALK_TERMINATE);
3459 }
3460
3461 /*
3462 * In a similar case, there is hardware out there which is a PCIe
3463 * device, but does not advertise a PCIe capability. An example of this
3464 * is the IDT Tsi382A which can hide its PCIe capability. If this is
3465 * the case, we immediately terminate scanning and flag this as a
3466 * 'complex' case which causes us to use guaranteed safe settings.
3467 */
3468 if (bus_p->bus_pcie_off == 0) {
3469 dev_err(dip, CE_WARN, "encountered PCIe device without PCIe "
3470 "capability");
3471 fab->pfd_flags |= PCIE_FABRIC_F_COMPLEX;
3472 return (DDI_WALK_TERMINATE);
3473 }
3474
3475 rcdip = pcie_get_rc_dip(dip);
3476
3477 /*
3478 * First, start by determining what the device's tagging and max packet
3479 * size is. All PCIe devices will always have the 8-bit tag information
3480 * as this has existed since PCIe 1.0. 10-bit tagging requires a V2
3481 * PCIe capability. 14-bit requires the DEV3 cap. If we are missing a
3482 * version or capability, then we always treat that as lacking the bits
3483 * in the fabric.
3484 */
3485 ASSERT3U(bus_p->bus_pcie_off, !=, 0);
3486 devcap = pci_cfgacc_get32(rcdip, bus_p->bus_bdf, bus_p->bus_pcie_off +
3487 PCIE_DEVCAP);
3488 mps = devcap & PCIE_DEVCAP_MAX_PAYLOAD_MASK;
3489 if (mps < fab->pfd_mps_found) {
3490 fab->pfd_mps_found = mps;
3491 }
3492
3493 if ((devcap & PCIE_DEVCAP_EXT_TAG_8BIT) == 0) {
3494 fab->pfd_tag_found &= ~PCIE_TAG_8B;
3495 }
3496
3497 if (bus_p->bus_pcie_vers == PCIE_PCIECAP_VER_2_0) {
3498 uint32_t devcap2 = pci_cfgacc_get32(rcdip, bus_p->bus_bdf,
3499 bus_p->bus_pcie_off + PCIE_DEVCAP2);
3500 if ((devcap2 & PCIE_DEVCAP2_10B_TAG_COMP_SUP) == 0) {
3501 fab->pfd_tag_found &= ~PCIE_TAG_10B_COMP;
3502 }
3503 } else {
3504 fab->pfd_tag_found &= ~PCIE_TAG_10B_COMP;
3505 }
3506
3507 if (bus_p->bus_dev3_off != 0) {
3508 uint32_t devcap3 = pci_cfgacc_get32(rcdip, bus_p->bus_bdf,
3509 bus_p->bus_dev3_off + PCIE_DEVCAP3);
3510 if ((devcap3 & PCIE_DEVCAP3_14B_TAG_COMP_SUP) == 0) {
3511 fab->pfd_tag_found &= ~PCIE_TAG_14B_COMP;
3512 }
3513 } else {
3514 fab->pfd_tag_found &= ~PCIE_TAG_14B_COMP;
3515 }
3516
3517 /*
3518 * Now that we have captured device information, we must go and ask
3519 * questions of the topology here. The big theory statement enumerates
3520 * several types of cases. The big question we need to answer is have we
3521 * encountered a hotpluggable bridge that means we need to mark this as
3522 * complex.
3523 *
3524 * The big theory statement notes several different kinds of hotplug
3525 * topologies that exist that we can theoretically support. Right now we
3526 * opt to keep our lives simple and focus solely on (4) and (5). These
3527 * can both be summarized by a single, fairly straightforward rule:
3528 *
3529 * The only allowed hotpluggable entity is a root port.
3530 *
3531 * The reason that this can work and detect cases like (6), (7), and our
3532 * other invalid ones is that the hotplug code will scan and find all
3533 * children before we are called into here.
3534 */
3535 if (bus_p->bus_hp_sup_modes != 0) {
3536 /*
3537 * We opt to terminate in this case because there's no value in
3538 * scanning the rest of the tree at this point.
3539 */
3540 if (!PCIE_IS_RP(bus_p)) {
3541 fab->pfd_flags |= PCIE_FABRIC_F_COMPLEX;
3542 return (DDI_WALK_TERMINATE);
3543 }
3544
3545 fab->pfd_flags |= PCIE_FABRIC_F_RP_HP;
3546 }
3547
3548 /*
3549 * As our walk starts at a root port, we need to make sure that we don't
3550 * pick up any of its siblings and their children as those would be
3551 * different PCIe fabric domains for us to scan. In many hardware
3552 * platforms multiple root ports are all at the same level in the tree.
3553 */
3554 if (bus_p->bus_rp_dip == dip) {
3555 return (DDI_WALK_PRUNESIB);
3556 }
3557
3558 return (DDI_WALK_CONTINUE);
3559 }
3560
3561 static int
pcie_fabric_feature_set(dev_info_t * dip,void * arg)3562 pcie_fabric_feature_set(dev_info_t *dip, void *arg)
3563 {
3564 pcie_bus_t *bus_p;
3565 dev_info_t *rcdip;
3566 pcie_fabric_data_t *fab = arg;
3567 uint32_t devcap, devctl;
3568
3569 if (pcie_dev(dip) != DDI_SUCCESS) {
3570 return (DDI_WALK_PRUNECHILD);
3571 }
3572
3573 /*
3574 * The missing bus_t sent us into the complex case previously. We still
3575 * need to make sure all devices have values we expect here and thus
3576 * don't terminate like the above. The same is true for the case where
3577 * there is no PCIe capability.
3578 */
3579 bus_p = PCIE_DIP2BUS(dip);
3580 if (bus_p == NULL || bus_p->bus_pcie_off == 0) {
3581 return (DDI_WALK_CONTINUE);
3582 }
3583 rcdip = pcie_get_rc_dip(dip);
3584
3585 devcap = pci_cfgacc_get32(rcdip, bus_p->bus_bdf, bus_p->bus_pcie_off +
3586 PCIE_DEVCAP);
3587 devctl = pci_cfgacc_get16(rcdip, bus_p->bus_bdf, bus_p->bus_pcie_off +
3588 PCIE_DEVCTL);
3589
3590 if ((devcap & PCIE_DEVCAP_EXT_TAG_8BIT) != 0 &&
3591 (fab->pfd_tag_act & PCIE_TAG_8B) != 0) {
3592 devctl |= PCIE_DEVCTL_EXT_TAG_FIELD_EN;
3593 }
3594
3595 devctl &= ~PCIE_DEVCTL_MAX_PAYLOAD_MASK;
3596 ASSERT0(fab->pfd_mps_act & ~PCIE_DEVCAP_MAX_PAYLOAD_MASK);
3597 devctl |= fab->pfd_mps_act << PCIE_DEVCTL_MAX_PAYLOAD_SHIFT;
3598
3599 pci_cfgacc_put16(rcdip, bus_p->bus_bdf, bus_p->bus_pcie_off +
3600 PCIE_DEVCTL, devctl);
3601
3602 if (bus_p->bus_pcie_vers == PCIE_PCIECAP_VER_2_0 &&
3603 (fab->pfd_tag_act & PCIE_TAG_10B_COMP) != 0) {
3604 uint32_t devcap2 = pci_cfgacc_get32(rcdip, bus_p->bus_bdf,
3605 bus_p->bus_pcie_off + PCIE_DEVCAP2);
3606
3607 if ((devcap2 & PCIE_DEVCAP2_10B_TAG_REQ_SUP) == 0) {
3608 uint16_t devctl2 = pci_cfgacc_get16(rcdip,
3609 bus_p->bus_bdf, bus_p->bus_pcie_off + PCIE_DEVCTL2);
3610 devctl2 |= PCIE_DEVCTL2_10B_TAG_REQ_EN;
3611 pci_cfgacc_put16(rcdip, bus_p->bus_bdf,
3612 bus_p->bus_pcie_off + PCIE_DEVCTL2, devctl2);
3613 }
3614 }
3615
3616 if (bus_p->bus_dev3_off != 0 &&
3617 (fab->pfd_tag_act & PCIE_TAG_14B_COMP) != 0) {
3618 uint32_t devcap3 = pci_cfgacc_get32(rcdip, bus_p->bus_bdf,
3619 bus_p->bus_dev3_off + PCIE_DEVCAP3);
3620
3621 if ((devcap3 & PCIE_DEVCAP3_14B_TAG_REQ_SUP) == 0) {
3622 uint16_t devctl3 = pci_cfgacc_get16(rcdip,
3623 bus_p->bus_bdf, bus_p->bus_dev3_off + PCIE_DEVCTL3);
3624 devctl3 |= PCIE_DEVCTL3_14B_TAG_REQ_EN;
3625 pci_cfgacc_put16(rcdip, bus_p->bus_bdf,
3626 bus_p->bus_pcie_off + PCIE_DEVCTL2, devctl3);
3627 }
3628 }
3629
3630 /*
3631 * As our walk starts at a root port, we need to make sure that we don't
3632 * pick up any of its siblings and their children as those would be
3633 * different PCIe fabric domains for us to scan. In many hardware
3634 * platforms multiple root ports are all at the same level in the tree.
3635 */
3636 if (bus_p->bus_rp_dip == dip) {
3637 return (DDI_WALK_PRUNESIB);
3638 }
3639
3640 return (DDI_WALK_CONTINUE);
3641 }
3642
3643 /*
3644 * This is used to scan and determine the total set of PCIe fabric settings that
3645 * we should have in the system for everything downstream of this specified root
3646 * port. Note, it is only really safe to call this while working from the
3647 * perspective of a root port as we will be walking down the entire device tree.
3648 *
3649 * However, our callers, particularly hoptlug, don't have all the information
3650 * we'd like. In particular, we need to check that:
3651 *
3652 * o This is actually a PCIe device.
3653 * o That this is a root port (see the big theory statement to understand this
3654 * constraint).
3655 */
3656 void
pcie_fabric_setup(dev_info_t * dip)3657 pcie_fabric_setup(dev_info_t *dip)
3658 {
3659 pcie_bus_t *bus_p;
3660 pcie_fabric_data_t *fab;
3661 dev_info_t *pdip;
3662
3663 bus_p = PCIE_DIP2BUS(dip);
3664 if (bus_p == NULL || !PCIE_IS_RP(bus_p)) {
3665 return;
3666 }
3667
3668 VERIFY3P(bus_p->bus_fab, !=, NULL);
3669 fab = bus_p->bus_fab;
3670
3671 /*
3672 * For us to call ddi_walk_devs(), our parent needs to be held.
3673 * ddi_walk_devs() will take care of grabbing our dip as part of its
3674 * walk before we iterate over our children.
3675 *
3676 * A reasonable question to ask here is why is it safe to ask for our
3677 * parent? In this case, because we have entered here through some
3678 * thread that's operating on us whether as part of attach or a hotplug
3679 * event, our dip somewhat by definition has to be valid. If we were
3680 * looking at our dip's children and then asking them for a parent, then
3681 * that would be a race condition.
3682 */
3683 pdip = ddi_get_parent(dip);
3684 VERIFY3P(pdip, !=, NULL);
3685 ndi_devi_enter(pdip);
3686 fab->pfd_flags |= PCIE_FABRIC_F_SCANNING;
3687
3688 /*
3689 * Reinitialize the tracking structure to basically set the maximum
3690 * caps. These will be chipped away during the scan.
3691 */
3692 fab->pfd_mps_found = PCIE_DEVCAP_MAX_PAYLOAD_4096;
3693 fab->pfd_tag_found = PCIE_TAG_ALL;
3694 fab->pfd_flags &= ~PCIE_FABRIC_F_COMPLEX;
3695
3696 ddi_walk_devs(dip, pcie_fabric_feature_scan, fab);
3697
3698 if ((fab->pfd_flags & PCIE_FABRIC_F_COMPLEX) != 0) {
3699 fab->pfd_tag_act = PCIE_TAG_5B;
3700 fab->pfd_mps_act = PCIE_DEVCAP_MAX_PAYLOAD_128;
3701 } else {
3702 fab->pfd_tag_act = fab->pfd_tag_found;
3703 fab->pfd_mps_act = fab->pfd_mps_found;
3704 }
3705
3706 ddi_walk_devs(dip, pcie_fabric_feature_set, fab);
3707
3708 fab->pfd_flags &= ~PCIE_FABRIC_F_SCANNING;
3709 ndi_devi_exit(pdip);
3710 }
3711