xref: /illumos-gate/usr/src/uts/intel/io/amdzen/amdzen.c (revision 784279176e68a516c9e391eb98dda7bd543fa6dd)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019, Joyent, Inc.
14  * Copyright 2023 Oxide Computer Company
15  */
16 
17 /*
18  * Nexus Driver for AMD Zen family systems. The purpose of this driver is to
19  * provide access to the following resources in a single, centralized fashion:
20  *
21  *  - The per-chip Data Fabric
22  *  - The North Bridge
23  *  - The System Management Network (SMN)
24  *
25  * This is a nexus driver as once we have attached to all the requisite
26  * components, we will enumerate child devices which consume this functionality.
27  *
28  * ------------------------
29  * Mapping Devices Together
30  * ------------------------
31  *
32  * The operating system needs to expose things like temperature sensors and DRAM
33  * configuration registers in terms of things that are meaningful to the system
34  * such as logical CPUs, cores, etc. This driver attaches to the PCI devices
35  * that represent the northbridge, data fabrics, and dies. Note that there are
36  * multiple northbridge and DF devices (one each per die) and this driver maps
37  * all of these three things together. Unfortunately, this requires some
38  * acrobatics as there is no direct way to map a northbridge to its
39  * corresponding die. Instead, we map a CPU die to a data fabric PCI device and
40  * a data fabric PCI device to a corresponding northbridge PCI device. This
41  * transitive relationship allows us to map from between northbridge and die.
42  *
43  * As each data fabric device is attached, based on vendor and device portions
44  * of the PCI ID, we add it to the DF stubs list in the global amdzen_t
45  * structure, amdzen_data->azn_df_stubs. We must now map these to logical CPUs.
46  *
47  * In current Zen based products, there is a direct mapping between processor
48  * nodes and a data fabric PCI device: all of the devices are on PCI Bus 0 and
49  * start from Device 0x18, so device 0x18 maps to processor node 0, 0x19 to
50  * processor node 1, etc. This means that to map a logical CPU to a data fabric
51  * device, we take its processor node id, add it to 0x18 and find the PCI device
52  * that is on bus 0 with that ID number. We already discovered the DF devices as
53  * described above.
54  *
55  * The northbridge PCI device has a well-defined device and function, but the
56  * bus that it is on varies. Each die has its own set of assigned PCI buses and
57  * its northbridge device is on the first die-specific bus. This implies that
58  * the northbridges do not show up on PCI bus 0, as that is the PCI bus that all
59  * of the data fabric devices are on and is not assigned to any particular die.
60  * Additionally, while the northbridge on the lowest-numbered PCI bus
61  * intuitively corresponds to processor node zero, hardware does not guarantee
62  * this. Because we don't want to be at the mercy of firmware, we don't rely on
63  * this ordering assumption, though we have yet to find a system that deviates
64  * from it, either.
65  *
66  * One of the registers in the data fabric device's function 0
67  * (AMDZEN_DF_F0_CFG_ADDR_CTL) happens to identify the first PCI bus that is
68  * associated with the processor node. This means that we can map a data fabric
69  * device to a northbridge by finding the northbridge whose PCI bus ID matches
70  * the value in the corresponding data fabric's AMDZEN_DF_F0_CFG_ADDR_CTL.
71  *
72  * Given all of the above, we can map a northbridge to a data fabric device and
73  * a die to a data fabric device. Because these are 1:1 mappings, there is a
74  * transitive relationship from northbridge to die. and therefore we know which
75  * northbridge is associated with which processor die. This is summarized in the
76  * following image:
77  *
78  *  +-------+     +------------------------------------+     +--------------+
79  *  | Die 0 |---->| Data Fabric PCI BDF 0/18/0         |---->| Northbridge  |
80  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 10  |     | PCI  10/0/0  |
81  *     ...        +------------------------------------+     +--------------+
82  *  +-------+     +------------------------------------+     +--------------+
83  *  | Die n |---->| Data Fabric PCI BDF 0/18+n/0       |---->| Northbridge  |
84  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 133 |     | PCI 133/0/0  |
85  *                +------------------------------------+     +--------------+
86  *
87  * Note, the PCI buses used by the northbridges here are arbitrary examples that
88  * do not necessarily reflect actual hardware values; however, the
89  * bus/device/function (BDF) of the data fabric accurately models hardware. All
90  * BDF values are in hex.
91  *
92  * Starting with the Rome generation of processors (Family 17h Model 30-3Fh),
93  * AMD has multiple northbridges on a given die. All of these northbridges share
94  * the same data fabric and system management network port. From our perspective
95  * this means that some of the northbridge devices will be redundant and that we
96  * no longer have a 1:1 mapping between the northbridge and the data fabric
97  * devices. Every data fabric will have a northbridge, but not every northbridge
98  * will have a data fabric device mapped. Because we're always trying to map
99  * from a die to a northbridge and not the reverse, the fact that there are
100  * extra northbridge devices hanging around that we don't know about shouldn't
101  * be a problem.
102  *
103  * -------------------------------
104  * Attach and Detach Complications
105  * -------------------------------
106  *
107  * We need to map different PCI devices together. Each device is attached to a
108  * amdzen_stub driver to facilitate integration with the rest of the kernel PCI
109  * machinery and so we have to manage multiple dev_info_t structures, each of
110  * which may be independently attached and detached.
111  *
112  * This is not particularly complex for attach: our _init routine allocates the
113  * necessary mutex and list structures at module load time, and as each stub is
114  * attached, it calls into this code to be added to the appropriate list. When
115  * the nexus itself is attached, we walk the PCI device tree accumulating a
116  * counter for all devices we expect to be attached. Once the scan is complete
117  * and all such devices are accounted for (stub registration may be happening
118  * asynchronously with respect to nexus attach), we initialize the nexus device
119  * and the attach is complete.
120  *
121  * Most other device drivers support instances that can be brought back after
122  * detach, provided they are associated with an active minor node in the
123  * /devices file system. This driver is different. Once a stub device has been
124  * attached, we do not permit detaching the nexus driver instance, as the kernel
125  * does not give us interlocking guarantees between nexus and stub driver attach
126  * and detach. It is simplest to just unconditionally fail detach once a stub
127  * has attached.
128  *
129  * ---------------
130  * Exposed Devices
131  * ---------------
132  *
133  * Rather than try and have all of the different functions that could be
134  * provided in one driver, we have a nexus driver that tries to load child
135  * pseudo-device drivers that provide specific pieces of functionality.
136  *
137  * -------
138  * Locking
139  * -------
140  *
141  * The amdzen_data structure contains a single lock, azn_mutex.
142  *
143  * The various client functions here are intended for our nexus's direct
144  * children, but have been designed in case someone else should depends on this
145  * driver. Once a DF has been discovered, the set of entities inside of it
146  * (adf_nents, adf_ents[]) is considered static, constant data, and iteration
147  * over them does not require locking. However, the discovery of the amd_df_t
148  * does. In addition, locking is required whenever performing register accesses
149  * to the DF or SMN.
150  *
151  * To summarize, one must hold the lock in the following circumstances:
152  *
153  *  - Looking up DF structures
154  *  - Reading or writing to DF registers
155  *  - Reading or writing to SMN registers
156  *
157  * In general, it is preferred that the lock be held across an entire client
158  * operation if possible. The only time this becomes an issue are when we have
159  * callbacks into our callers (ala amdzen_c_df_iter()) as they may recursively
160  * call into us.
161  */
162 
163 #include <sys/modctl.h>
164 #include <sys/conf.h>
165 #include <sys/devops.h>
166 #include <sys/ddi.h>
167 #include <sys/sunddi.h>
168 #include <sys/pci.h>
169 #include <sys/sysmacros.h>
170 #include <sys/sunndi.h>
171 #include <sys/x86_archext.h>
172 #include <sys/cpuvar.h>
173 
174 #include <sys/amdzen/df.h>
175 #include "amdzen_client.h"
176 #include "amdzen.h"
177 
178 amdzen_t *amdzen_data;
179 
180 /*
181  * Array of northbridge IDs that we care about.
182  */
183 static const uint16_t amdzen_nb_ids[] = {
184 	/* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */
185 	0x1450,
186 	/* Family 17h Raven Ridge, Kestrel, Dali Models 10h-2fh (Zen uarch) */
187 	0x15d0,
188 	/* Family 17h/19h Rome, Milan, Matisse, Vermeer Zen 2/Zen 3 uarch */
189 	0x1480,
190 	/* Family 17h/19h Renoir, Cezanne, Van Gogh Zen 2/3 uarch */
191 	0x1630,
192 	/* Family 19h Genoa */
193 	0x14a4,
194 	/* Family 17h Mendocino, Family 19h Rembrandt */
195 	0x14b5,
196 	/* Family 19h Raphael */
197 	0x14d8
198 };
199 
200 typedef struct {
201 	char *acd_name;
202 	amdzen_child_t acd_addr;
203 } amdzen_child_data_t;
204 
205 static const amdzen_child_data_t amdzen_children[] = {
206 	{ "smntemp", AMDZEN_C_SMNTEMP },
207 	{ "usmn", AMDZEN_C_USMN },
208 	{ "zen_udf", AMDZEN_C_ZEN_UDF },
209 	{ "zen_umc", AMDZEN_C_ZEN_UMC }
210 };
211 
212 static uint8_t
213 amdzen_stub_get8(amdzen_stub_t *stub, off_t reg)
214 {
215 	return (pci_config_get8(stub->azns_cfgspace, reg));
216 }
217 
218 static uint16_t
219 amdzen_stub_get16(amdzen_stub_t *stub, off_t reg)
220 {
221 	return (pci_config_get16(stub->azns_cfgspace, reg));
222 }
223 
224 static uint32_t
225 amdzen_stub_get32(amdzen_stub_t *stub, off_t reg)
226 {
227 	return (pci_config_get32(stub->azns_cfgspace, reg));
228 }
229 
230 static uint64_t
231 amdzen_stub_get64(amdzen_stub_t *stub, off_t reg)
232 {
233 	return (pci_config_get64(stub->azns_cfgspace, reg));
234 }
235 
236 static void
237 amdzen_stub_put8(amdzen_stub_t *stub, off_t reg, uint8_t val)
238 {
239 	pci_config_put8(stub->azns_cfgspace, reg, val);
240 }
241 
242 static void
243 amdzen_stub_put16(amdzen_stub_t *stub, off_t reg, uint16_t val)
244 {
245 	pci_config_put16(stub->azns_cfgspace, reg, val);
246 }
247 
248 static void
249 amdzen_stub_put32(amdzen_stub_t *stub, off_t reg, uint32_t val)
250 {
251 	pci_config_put32(stub->azns_cfgspace, reg, val);
252 }
253 
254 static uint64_t
255 amdzen_df_read_regdef(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def,
256     uint8_t inst, boolean_t do_64)
257 {
258 	df_reg_def_t ficaa;
259 	df_reg_def_t ficad;
260 	uint32_t val = 0;
261 	df_rev_t df_rev = azn->azn_dfs[0].adf_rev;
262 
263 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
264 	ASSERT3U(def.drd_gens & df_rev, ==, df_rev);
265 	val = DF_FICAA_V2_SET_TARG_INST(val, 1);
266 	val = DF_FICAA_V2_SET_FUNC(val, def.drd_func);
267 	val = DF_FICAA_V2_SET_INST(val, inst);
268 	val = DF_FICAA_V2_SET_64B(val, do_64 ? 1 : 0);
269 
270 	switch (df_rev) {
271 	case DF_REV_2:
272 	case DF_REV_3:
273 	case DF_REV_3P5:
274 		ficaa = DF_FICAA_V2;
275 		ficad = DF_FICAD_LO_V2;
276 		/*
277 		 * Both here and in the DFv4 case, the register ignores the
278 		 * lower 2 bits. That is we can only address and encode things
279 		 * in units of 4 bytes.
280 		 */
281 		val = DF_FICAA_V2_SET_REG(val, def.drd_reg >> 2);
282 		break;
283 	case DF_REV_4:
284 		ficaa = DF_FICAA_V4;
285 		ficad = DF_FICAD_LO_V4;
286 		val = DF_FICAA_V4_SET_REG(val, def.drd_reg >> 2);
287 		break;
288 	default:
289 		panic("encountered unexpected DF rev: %u", df_rev);
290 	}
291 
292 	amdzen_stub_put32(df->adf_funcs[ficaa.drd_func], ficaa.drd_reg, val);
293 	if (do_64) {
294 		return (amdzen_stub_get64(df->adf_funcs[ficad.drd_func],
295 		    ficad.drd_reg));
296 	} else {
297 		return (amdzen_stub_get32(df->adf_funcs[ficad.drd_func],
298 		    ficad.drd_reg));
299 	}
300 }
301 
302 /*
303  * Perform a targeted 32-bit indirect read to a specific instance and function.
304  */
305 static uint32_t
306 amdzen_df_read32(amdzen_t *azn, amdzen_df_t *df, uint8_t inst,
307     const df_reg_def_t def)
308 {
309 	return (amdzen_df_read_regdef(azn, df, def, inst, B_FALSE));
310 }
311 
312 /*
313  * For a broadcast read, just go to the underlying PCI function and perform a
314  * read. At this point in time, we don't believe we need to use the FICAA/FICAD
315  * to access it (though it does have a broadcast mode).
316  */
317 static uint32_t
318 amdzen_df_read32_bcast(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def)
319 {
320 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
321 	return (amdzen_stub_get32(df->adf_funcs[def.drd_func], def.drd_reg));
322 }
323 
324 static uint32_t
325 amdzen_smn_read(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg)
326 {
327 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
328 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
329 
330 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
331 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
332 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
333 
334 	switch (SMN_REG_SIZE(reg)) {
335 	case 1:
336 		return ((uint32_t)amdzen_stub_get8(df->adf_nb,
337 		    AMDZEN_NB_SMN_DATA + addr_off));
338 	case 2:
339 		return ((uint32_t)amdzen_stub_get16(df->adf_nb,
340 		    AMDZEN_NB_SMN_DATA + addr_off));
341 	case 4:
342 		return (amdzen_stub_get32(df->adf_nb, AMDZEN_NB_SMN_DATA));
343 	default:
344 		panic("unreachable invalid SMN register size %u",
345 		    SMN_REG_SIZE(reg));
346 	}
347 }
348 
349 static void
350 amdzen_smn_write(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg,
351     const uint32_t val)
352 {
353 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
354 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
355 
356 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
357 	VERIFY(SMN_REG_VALUE_FITS(reg, val));
358 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
359 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
360 
361 	switch (SMN_REG_SIZE(reg)) {
362 	case 1:
363 		amdzen_stub_put8(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
364 		    (uint8_t)val);
365 		break;
366 	case 2:
367 		amdzen_stub_put16(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
368 		    (uint16_t)val);
369 		break;
370 	case 4:
371 		amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_DATA, val);
372 		break;
373 	default:
374 		panic("unreachable invalid SMN register size %u",
375 		    SMN_REG_SIZE(reg));
376 	}
377 }
378 
379 static amdzen_df_t *
380 amdzen_df_find(amdzen_t *azn, uint_t dfno)
381 {
382 	uint_t i;
383 
384 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
385 	if (dfno >= azn->azn_ndfs) {
386 		return (NULL);
387 	}
388 
389 	for (i = 0; i < azn->azn_ndfs; i++) {
390 		amdzen_df_t *df = &azn->azn_dfs[i];
391 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) {
392 			continue;
393 		}
394 
395 		if (dfno == 0) {
396 			return (df);
397 		}
398 		dfno--;
399 	}
400 
401 	return (NULL);
402 }
403 
404 /*
405  * Client functions that are used by nexus children.
406  */
407 int
408 amdzen_c_smn_read(uint_t dfno, const smn_reg_t reg, uint32_t *valp)
409 {
410 	amdzen_df_t *df;
411 	amdzen_t *azn = amdzen_data;
412 
413 	if (!SMN_REG_SIZE_IS_VALID(reg))
414 		return (EINVAL);
415 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
416 		return (EINVAL);
417 
418 	mutex_enter(&azn->azn_mutex);
419 	df = amdzen_df_find(azn, dfno);
420 	if (df == NULL) {
421 		mutex_exit(&azn->azn_mutex);
422 		return (ENOENT);
423 	}
424 
425 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
426 		mutex_exit(&azn->azn_mutex);
427 		return (ENXIO);
428 	}
429 
430 	*valp = amdzen_smn_read(azn, df, reg);
431 	mutex_exit(&azn->azn_mutex);
432 	return (0);
433 }
434 
435 int
436 amdzen_c_smn_write(uint_t dfno, const smn_reg_t reg, const uint32_t val)
437 {
438 	amdzen_df_t *df;
439 	amdzen_t *azn = amdzen_data;
440 
441 	if (!SMN_REG_SIZE_IS_VALID(reg))
442 		return (EINVAL);
443 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
444 		return (EINVAL);
445 	if (!SMN_REG_VALUE_FITS(reg, val))
446 		return (EOVERFLOW);
447 
448 	mutex_enter(&azn->azn_mutex);
449 	df = amdzen_df_find(azn, dfno);
450 	if (df == NULL) {
451 		mutex_exit(&azn->azn_mutex);
452 		return (ENOENT);
453 	}
454 
455 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
456 		mutex_exit(&azn->azn_mutex);
457 		return (ENXIO);
458 	}
459 
460 	amdzen_smn_write(azn, df, reg, val);
461 	mutex_exit(&azn->azn_mutex);
462 	return (0);
463 }
464 
465 uint_t
466 amdzen_c_df_count(void)
467 {
468 	uint_t ret;
469 	amdzen_t *azn = amdzen_data;
470 
471 	mutex_enter(&azn->azn_mutex);
472 	ret = azn->azn_ndfs;
473 	mutex_exit(&azn->azn_mutex);
474 	return (ret);
475 }
476 
477 df_rev_t
478 amdzen_c_df_rev(void)
479 {
480 	amdzen_df_t *df;
481 	amdzen_t *azn = amdzen_data;
482 	df_rev_t rev;
483 
484 	/*
485 	 * Always use the first DF instance to determine what we're using. Our
486 	 * current assumption, which seems to generally be true, is that the
487 	 * given DF revisions are the same in a given system when the DFs are
488 	 * directly connected.
489 	 */
490 	mutex_enter(&azn->azn_mutex);
491 	df = amdzen_df_find(azn, 0);
492 	if (df == NULL) {
493 		rev = DF_REV_UNKNOWN;
494 	} else {
495 		rev = df->adf_rev;
496 	}
497 	mutex_exit(&azn->azn_mutex);
498 
499 	return (rev);
500 }
501 
502 int
503 amdzen_c_df_read32(uint_t dfno, uint8_t inst, const df_reg_def_t def,
504     uint32_t *valp)
505 {
506 	amdzen_df_t *df;
507 	amdzen_t *azn = amdzen_data;
508 
509 	mutex_enter(&azn->azn_mutex);
510 	df = amdzen_df_find(azn, dfno);
511 	if (df == NULL) {
512 		mutex_exit(&azn->azn_mutex);
513 		return (ENOENT);
514 	}
515 
516 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_FALSE);
517 	mutex_exit(&azn->azn_mutex);
518 
519 	return (0);
520 }
521 
522 int
523 amdzen_c_df_read64(uint_t dfno, uint8_t inst, const df_reg_def_t def,
524     uint64_t *valp)
525 {
526 	amdzen_df_t *df;
527 	amdzen_t *azn = amdzen_data;
528 
529 	mutex_enter(&azn->azn_mutex);
530 	df = amdzen_df_find(azn, dfno);
531 	if (df == NULL) {
532 		mutex_exit(&azn->azn_mutex);
533 		return (ENOENT);
534 	}
535 
536 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_TRUE);
537 	mutex_exit(&azn->azn_mutex);
538 
539 	return (0);
540 }
541 
542 int
543 amdzen_c_df_iter(uint_t dfno, zen_df_type_t type, amdzen_c_iter_f func,
544     void *arg)
545 {
546 	amdzen_df_t *df;
547 	amdzen_t *azn = amdzen_data;
548 	df_type_t df_type;
549 	uint8_t df_subtype;
550 
551 	/*
552 	 * Unlike other calls here, we hold our lock only to find the DF here.
553 	 * The main reason for this is the nature of the callback function.
554 	 * Folks are iterating over instances so they can call back into us. If
555 	 * you look at the locking statement, the thing that is most volatile
556 	 * right here and what we need to protect is the DF itself and
557 	 * subsequent register accesses to it. The actual data about which
558 	 * entities exist is static and so once we have found a DF we should
559 	 * hopefully be in good shape as they only come, but don't go.
560 	 */
561 	mutex_enter(&azn->azn_mutex);
562 	df = amdzen_df_find(azn, dfno);
563 	if (df == NULL) {
564 		mutex_exit(&azn->azn_mutex);
565 		return (ENOENT);
566 	}
567 	mutex_exit(&azn->azn_mutex);
568 
569 	switch (type) {
570 	case ZEN_DF_TYPE_CS_UMC:
571 		df_type = DF_TYPE_CS;
572 		/*
573 		 * In the original Zeppelin DFv2 die there was no subtype field
574 		 * used for the CS. The UMC is the only type and has a subtype
575 		 * of zero.
576 		 */
577 		if (df->adf_rev != DF_REV_2) {
578 			df_subtype = DF_CS_SUBTYPE_UMC;
579 		} else {
580 			df_subtype = 0;
581 		}
582 		break;
583 	case ZEN_DF_TYPE_CCM_CPU:
584 		/*
585 		 * While the wording of the PPR is a little weird, the CCM still
586 		 * has subtype 0 in DFv4 systems; however, what's said to be for
587 		 * the CPU appears to apply to the ACM.
588 		 */
589 		df_type = DF_TYPE_CCM;
590 		df_subtype = 0;
591 		break;
592 	default:
593 		return (EINVAL);
594 	}
595 
596 	for (uint_t i = 0; i < df->adf_nents; i++) {
597 		amdzen_df_ent_t *ent = &df->adf_ents[i];
598 
599 		/*
600 		 * Some DF components are not considered enabled and therefore
601 		 * will end up having bogus values in their ID fields. If we do
602 		 * not have an enable flag set, we must skip this node.
603 		 */
604 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
605 			continue;
606 
607 		if (ent->adfe_type == df_type &&
608 		    ent->adfe_subtype == df_subtype) {
609 			int ret = func(dfno, ent->adfe_fabric_id,
610 			    ent->adfe_inst_id, arg);
611 			if (ret != 0) {
612 				return (ret);
613 			}
614 		}
615 	}
616 
617 	return (0);
618 }
619 
620 int
621 amdzen_c_df_fabric_decomp(df_fabric_decomp_t *decomp)
622 {
623 	const amdzen_df_t *df;
624 	amdzen_t *azn = amdzen_data;
625 
626 	mutex_enter(&azn->azn_mutex);
627 	df = amdzen_df_find(azn, 0);
628 	if (df == NULL) {
629 		mutex_exit(&azn->azn_mutex);
630 		return (ENOENT);
631 	}
632 
633 	*decomp = df->adf_decomp;
634 	mutex_exit(&azn->azn_mutex);
635 	return (0);
636 }
637 
638 static boolean_t
639 amdzen_create_child(amdzen_t *azn, const amdzen_child_data_t *acd)
640 {
641 	int ret;
642 	dev_info_t *child;
643 
644 	if (ndi_devi_alloc(azn->azn_dip, acd->acd_name,
645 	    (pnode_t)DEVI_SID_NODEID, &child) != NDI_SUCCESS) {
646 		dev_err(azn->azn_dip, CE_WARN, "!failed to allocate child "
647 		    "dip for %s", acd->acd_name);
648 		return (B_FALSE);
649 	}
650 
651 	ddi_set_parent_data(child, (void *)acd);
652 	if ((ret = ndi_devi_online(child, 0)) != NDI_SUCCESS) {
653 		dev_err(azn->azn_dip, CE_WARN, "!failed to online child "
654 		    "dip %s: %d", acd->acd_name, ret);
655 		return (B_FALSE);
656 	}
657 
658 	return (B_TRUE);
659 }
660 
661 static boolean_t
662 amdzen_map_dfs(amdzen_t *azn)
663 {
664 	amdzen_stub_t *stub;
665 
666 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
667 
668 	for (stub = list_head(&azn->azn_df_stubs); stub != NULL;
669 	    stub = list_next(&azn->azn_df_stubs, stub)) {
670 		amdzen_df_t *df;
671 		uint_t dfno;
672 
673 		dfno = stub->azns_dev - AMDZEN_DF_FIRST_DEVICE;
674 		if (dfno > AMDZEN_MAX_DFS) {
675 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
676 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
677 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
678 			goto err;
679 		}
680 
681 		df = &azn->azn_dfs[dfno];
682 
683 		if (stub->azns_func >= AMDZEN_MAX_DF_FUNCS) {
684 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
685 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
686 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
687 			goto err;
688 		}
689 
690 		if (df->adf_funcs[stub->azns_func] != NULL) {
691 			dev_err(stub->azns_dip, CE_WARN, "encountered "
692 			    "duplicate df device with DF PCI b/d/f: 0x%x/%x/%x",
693 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
694 			goto err;
695 		}
696 		df->adf_funcs[stub->azns_func] = stub;
697 	}
698 
699 	return (B_TRUE);
700 
701 err:
702 	azn->azn_flags |= AMDZEN_F_DEVICE_ERROR;
703 	return (B_FALSE);
704 }
705 
706 static boolean_t
707 amdzen_check_dfs(amdzen_t *azn)
708 {
709 	uint_t i;
710 	boolean_t ret = B_TRUE;
711 
712 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
713 		amdzen_df_t *df = &azn->azn_dfs[i];
714 		uint_t count = 0;
715 
716 		/*
717 		 * We require all platforms to have DFs functions 0-6. Not all
718 		 * platforms have DF function 7.
719 		 */
720 		for (uint_t func = 0; func < AMDZEN_MAX_DF_FUNCS - 1; func++) {
721 			if (df->adf_funcs[func] != NULL) {
722 				count++;
723 			}
724 		}
725 
726 		if (count == 0)
727 			continue;
728 
729 		if (count != 7) {
730 			ret = B_FALSE;
731 			dev_err(azn->azn_dip, CE_WARN, "df %u devices "
732 			    "incomplete", i);
733 		} else {
734 			df->adf_flags |= AMDZEN_DF_F_VALID;
735 			azn->azn_ndfs++;
736 		}
737 	}
738 
739 	return (ret);
740 }
741 
742 static const uint8_t amdzen_df_rome_ids[0x2b] = {
743 	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23,
744 	24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
745 	44, 45, 46, 47, 48
746 };
747 
748 /*
749  * Check the first df entry to see if it belongs to Rome or Milan. If so, then
750  * it uses the disjoint ID space.
751  */
752 static boolean_t
753 amdzen_is_rome_style(uint_t id)
754 {
755 	return (id == 0x1490 || id == 0x1650);
756 }
757 
758 /*
759  * To be able to do most other things we want to do, we must first determine
760  * what revision of the DF (data fabric) that we're using.
761  *
762  * Snapshot the df version. This was added explicitly in DFv4.0, around the Zen
763  * 4 timeframe and allows us to tell apart different version of the DF register
764  * set, most usefully when various subtypes were added.
765  *
766  * Older versions can theoretically be told apart based on usage of reserved
767  * registers. We walk these in the following order, starting with the newest rev
768  * and walking backwards to tell things apart:
769  *
770  *   o v3.5 -> Check function 1, register 0x150. This was reserved prior
771  *             to this point. This is actually DF_FIDMASK0_V3P5. We are supposed
772  *             to check bits [7:0].
773  *
774  *   o v3.0 -> Check function 1, register 0x208. The low byte (7:0) was
775  *             changed to indicate a component mask. This is non-zero
776  *             in the 3.0 generation. This is actually DF_FIDMASK_V2.
777  *
778  *   o v2.0 -> This is just the not that case. Presumably v1 wasn't part
779  *             of the Zen generation.
780  *
781  * Because we don't know what version we are yet, we do not use the normal
782  * versioned register accesses which would check what DF version we are and
783  * would want to use the normal indirect register accesses (which also require
784  * us to know the version). We instead do direct broadcast reads.
785  */
786 static void
787 amdzen_determine_df_vers(amdzen_t *azn, amdzen_df_t *df)
788 {
789 	uint32_t val;
790 	df_reg_def_t rd = DF_FBICNT;
791 
792 	val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
793 	df->adf_major = DF_FBICNT_V4_GET_MAJOR(val);
794 	df->adf_minor = DF_FBICNT_V4_GET_MINOR(val);
795 	if (df->adf_major == 0 && df->adf_minor == 0) {
796 		rd = DF_FIDMASK0_V3P5;
797 		val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
798 		if (bitx32(val, 7, 0) != 0) {
799 			df->adf_major = 3;
800 			df->adf_minor = 5;
801 			df->adf_rev = DF_REV_3P5;
802 		} else {
803 			rd = DF_FIDMASK_V2;
804 			val = amdzen_stub_get32(df->adf_funcs[rd.drd_func],
805 			    rd.drd_reg);
806 			if (bitx32(val, 7, 0) != 0) {
807 				df->adf_major = 3;
808 				df->adf_minor = 0;
809 				df->adf_rev = DF_REV_3;
810 			} else {
811 				df->adf_major = 2;
812 				df->adf_minor = 0;
813 				df->adf_rev = DF_REV_2;
814 			}
815 		}
816 	} else if (df->adf_major == 4 && df->adf_minor == 0) {
817 		df->adf_rev = DF_REV_4;
818 	} else {
819 		df->adf_rev = DF_REV_UNKNOWN;
820 	}
821 }
822 
823 /*
824  * All of the different versions of the DF have different ways of getting at and
825  * answering the question of how do I break a fabric ID into a corresponding
826  * socket, die, and component. Importantly the goal here is to obtain, cache,
827  * and normalize:
828  *
829  *  o The DF System Configuration
830  *  o The various Mask registers
831  *  o The Node ID
832  */
833 static void
834 amdzen_determine_fabric_decomp(amdzen_t *azn, amdzen_df_t *df)
835 {
836 	uint32_t mask;
837 	df_fabric_decomp_t *decomp = &df->adf_decomp;
838 
839 	switch (df->adf_rev) {
840 	case DF_REV_2:
841 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V2);
842 		switch (DF_SYSCFG_V2_GET_MY_TYPE(df->adf_syscfg)) {
843 		case DF_DIE_TYPE_CPU:
844 			mask = amdzen_df_read32_bcast(azn, df,
845 			    DF_DIEMASK_CPU_V2);
846 			break;
847 		case DF_DIE_TYPE_APU:
848 			mask = amdzen_df_read32_bcast(azn, df,
849 			    DF_DIEMASK_APU_V2);
850 			break;
851 		default:
852 			panic("DF thinks we're not on a CPU!");
853 		}
854 		df->adf_mask0 = mask;
855 
856 		/*
857 		 * DFv2 is a bit different in how the fabric mask register is
858 		 * phrased. Logically a fabric ID is broken into something that
859 		 * uniquely identifies a "node" (a particular die on a socket)
860 		 * and something that identifies a "component", e.g. a memory
861 		 * controller.
862 		 *
863 		 * Starting with DFv3, these registers logically called out how
864 		 * to separate the fabric ID first into a node and a component.
865 		 * Then the node was then broken down into a socket and die. In
866 		 * DFv2, there is no separate mask and shift of a node. Instead
867 		 * the socket and die are absolute offsets into the fabric ID
868 		 * rather than relative offsets into the node ID. As such, when
869 		 * we encounter DFv2, we fake up a node mask and shift and make
870 		 * it look like DFv3+.
871 		 */
872 		decomp->dfd_node_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) |
873 		    DF_DIEMASK_V2_GET_DIE_MASK(mask);
874 		decomp->dfd_node_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask);
875 		decomp->dfd_comp_mask = DF_DIEMASK_V2_GET_COMP_MASK(mask);
876 		decomp->dfd_comp_shift = 0;
877 
878 		decomp->dfd_sock_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) >>
879 		    decomp->dfd_node_shift;
880 		decomp->dfd_die_mask = DF_DIEMASK_V2_GET_DIE_MASK(mask) >>
881 		    decomp->dfd_node_shift;
882 		decomp->dfd_sock_shift = DF_DIEMASK_V2_GET_SOCK_SHIFT(mask) -
883 		    decomp->dfd_node_shift;
884 		decomp->dfd_die_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask) -
885 		    decomp->dfd_node_shift;
886 		ASSERT3U(decomp->dfd_die_shift, ==, 0);
887 		break;
888 	case DF_REV_3:
889 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V3);
890 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
891 		    DF_FIDMASK0_V3);
892 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
893 		    DF_FIDMASK1_V3);
894 
895 		decomp->dfd_sock_mask =
896 		    DF_FIDMASK1_V3_GET_SOCK_MASK(df->adf_mask1);
897 		decomp->dfd_sock_shift =
898 		    DF_FIDMASK1_V3_GET_SOCK_SHIFT(df->adf_mask1);
899 		decomp->dfd_die_mask =
900 		    DF_FIDMASK1_V3_GET_DIE_MASK(df->adf_mask1);
901 		decomp->dfd_die_shift = 0;
902 		decomp->dfd_node_mask =
903 		    DF_FIDMASK0_V3_GET_NODE_MASK(df->adf_mask0);
904 		decomp->dfd_node_shift =
905 		    DF_FIDMASK1_V3_GET_NODE_SHIFT(df->adf_mask1);
906 		decomp->dfd_comp_mask =
907 		    DF_FIDMASK0_V3_GET_COMP_MASK(df->adf_mask0);
908 		decomp->dfd_comp_shift = 0;
909 		break;
910 	case DF_REV_3P5:
911 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df,
912 		    DF_SYSCFG_V3P5);
913 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
914 		    DF_FIDMASK0_V3P5);
915 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
916 		    DF_FIDMASK1_V3P5);
917 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
918 		    DF_FIDMASK2_V3P5);
919 
920 		decomp->dfd_sock_mask =
921 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
922 		decomp->dfd_sock_shift =
923 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
924 		decomp->dfd_die_mask =
925 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
926 		decomp->dfd_die_shift = 0;
927 		decomp->dfd_node_mask =
928 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
929 		decomp->dfd_node_shift =
930 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
931 		decomp->dfd_comp_mask =
932 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
933 		decomp->dfd_comp_shift = 0;
934 		break;
935 	case DF_REV_4:
936 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V4);
937 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
938 		    DF_FIDMASK0_V4);
939 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
940 		    DF_FIDMASK1_V4);
941 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
942 		    DF_FIDMASK2_V4);
943 
944 		/*
945 		 * The DFv4 registers are at a different location in the DF;
946 		 * however, the actual layout of fields is the same as DFv3.5.
947 		 * This is why you see V3P5 below.
948 		 */
949 		decomp->dfd_sock_mask =
950 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
951 		decomp->dfd_sock_shift =
952 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
953 		decomp->dfd_die_mask =
954 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
955 		decomp->dfd_die_shift = 0;
956 		decomp->dfd_node_mask =
957 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
958 		decomp->dfd_node_shift =
959 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
960 		decomp->dfd_comp_mask =
961 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
962 		decomp->dfd_comp_shift = 0;
963 		break;
964 	default:
965 		panic("encountered suspicious, previously rejected DF "
966 		    "rev: 0x%x", df->adf_rev);
967 	}
968 }
969 
970 /*
971  * Initialize our knowledge about a given series of nodes on the data fabric.
972  */
973 static void
974 amdzen_setup_df(amdzen_t *azn, amdzen_df_t *df)
975 {
976 	uint_t i;
977 	uint32_t val;
978 
979 	amdzen_determine_df_vers(azn, df);
980 
981 	switch (df->adf_rev) {
982 	case DF_REV_2:
983 	case DF_REV_3:
984 	case DF_REV_3P5:
985 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V2);
986 		break;
987 	case DF_REV_4:
988 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V4);
989 		break;
990 	default:
991 		dev_err(azn->azn_dip, CE_WARN, "encountered unsupported DF "
992 		    "revision: 0x%x", df->adf_rev);
993 		return;
994 	}
995 	df->adf_nb_busno = DF_CFG_ADDR_CTL_GET_BUS_NUM(val);
996 	val = amdzen_df_read32_bcast(azn, df, DF_FBICNT);
997 	df->adf_nents = DF_FBICNT_GET_COUNT(val);
998 	if (df->adf_nents == 0)
999 		return;
1000 	df->adf_ents = kmem_zalloc(sizeof (amdzen_df_ent_t) * df->adf_nents,
1001 	    KM_SLEEP);
1002 
1003 	for (i = 0; i < df->adf_nents; i++) {
1004 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
1005 		uint8_t inst = i;
1006 
1007 		/*
1008 		 * Unfortunately, Rome uses a discontinuous instance ID pattern
1009 		 * while everything else we can find uses a contiguous instance
1010 		 * ID pattern. This means that for Rome, we need to adjust the
1011 		 * indexes that we iterate over, though the total number of
1012 		 * entries is right. This was carried over into Milan, but not
1013 		 * Genoa.
1014 		 */
1015 		if (amdzen_is_rome_style(df->adf_funcs[0]->azns_did)) {
1016 			if (inst > ARRAY_SIZE(amdzen_df_rome_ids)) {
1017 				dev_err(azn->azn_dip, CE_WARN, "Rome family "
1018 				    "processor reported more ids than the PPR, "
1019 				    "resetting %u to instance zero", inst);
1020 				inst = 0;
1021 			} else {
1022 				inst = amdzen_df_rome_ids[inst];
1023 			}
1024 		}
1025 
1026 		dfe->adfe_drvid = inst;
1027 		dfe->adfe_info0 = amdzen_df_read32(azn, df, inst, DF_FBIINFO0);
1028 		dfe->adfe_info1 = amdzen_df_read32(azn, df, inst, DF_FBIINFO1);
1029 		dfe->adfe_info2 = amdzen_df_read32(azn, df, inst, DF_FBIINFO2);
1030 		dfe->adfe_info3 = amdzen_df_read32(azn, df, inst, DF_FBIINFO3);
1031 
1032 		dfe->adfe_type = DF_FBIINFO0_GET_TYPE(dfe->adfe_info0);
1033 		dfe->adfe_subtype = DF_FBIINFO0_GET_SUBTYPE(dfe->adfe_info0);
1034 
1035 		/*
1036 		 * The enabled flag was not present in Zen 1. Simulate it by
1037 		 * checking for a non-zero register instead.
1038 		 */
1039 		if (DF_FBIINFO0_V3_GET_ENABLED(dfe->adfe_info0) ||
1040 		    (df->adf_rev == DF_REV_2 && dfe->adfe_info0 != 0)) {
1041 			dfe->adfe_flags |= AMDZEN_DFE_F_ENABLED;
1042 		}
1043 		if (DF_FBIINFO0_GET_HAS_MCA(dfe->adfe_info0)) {
1044 			dfe->adfe_flags |= AMDZEN_DFE_F_MCA;
1045 		}
1046 		dfe->adfe_inst_id = DF_FBIINFO3_GET_INSTID(dfe->adfe_info3);
1047 		switch (df->adf_rev) {
1048 		case DF_REV_2:
1049 			dfe->adfe_fabric_id =
1050 			    DF_FBIINFO3_V2_GET_BLOCKID(dfe->adfe_info3);
1051 			break;
1052 		case DF_REV_3:
1053 			dfe->adfe_fabric_id =
1054 			    DF_FBIINFO3_V3_GET_BLOCKID(dfe->adfe_info3);
1055 			break;
1056 		case DF_REV_3P5:
1057 			dfe->adfe_fabric_id =
1058 			    DF_FBIINFO3_V3P5_GET_BLOCKID(dfe->adfe_info3);
1059 			break;
1060 		case DF_REV_4:
1061 			dfe->adfe_fabric_id =
1062 			    DF_FBIINFO3_V4_GET_BLOCKID(dfe->adfe_info3);
1063 			break;
1064 		default:
1065 			panic("encountered suspicious, previously rejected DF "
1066 			    "rev: 0x%x", df->adf_rev);
1067 		}
1068 	}
1069 
1070 	amdzen_determine_fabric_decomp(azn, df);
1071 }
1072 
1073 static void
1074 amdzen_find_nb(amdzen_t *azn, amdzen_df_t *df)
1075 {
1076 	amdzen_stub_t *stub;
1077 
1078 	for (stub = list_head(&azn->azn_nb_stubs); stub != NULL;
1079 	    stub = list_next(&azn->azn_nb_stubs, stub)) {
1080 		if (stub->azns_bus == df->adf_nb_busno) {
1081 			df->adf_flags |= AMDZEN_DF_F_FOUND_NB;
1082 			df->adf_nb = stub;
1083 			return;
1084 		}
1085 	}
1086 }
1087 
1088 static void
1089 amdzen_nexus_init(void *arg)
1090 {
1091 	uint_t i;
1092 	amdzen_t *azn = arg;
1093 
1094 	/*
1095 	 * First go through all of the stubs and assign the DF entries.
1096 	 */
1097 	mutex_enter(&azn->azn_mutex);
1098 	if (!amdzen_map_dfs(azn) || !amdzen_check_dfs(azn)) {
1099 		azn->azn_flags |= AMDZEN_F_MAP_ERROR;
1100 		goto done;
1101 	}
1102 
1103 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
1104 		amdzen_df_t *df = &azn->azn_dfs[i];
1105 
1106 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0)
1107 			continue;
1108 		amdzen_setup_df(azn, df);
1109 		amdzen_find_nb(azn, df);
1110 	}
1111 
1112 	/*
1113 	 * Not all children may be installed. As such, we do not treat the
1114 	 * failure of a child as fatal to the driver.
1115 	 */
1116 	mutex_exit(&azn->azn_mutex);
1117 	for (i = 0; i < ARRAY_SIZE(amdzen_children); i++) {
1118 		(void) amdzen_create_child(azn, &amdzen_children[i]);
1119 	}
1120 	mutex_enter(&azn->azn_mutex);
1121 
1122 done:
1123 	azn->azn_flags &= ~AMDZEN_F_ATTACH_DISPATCHED;
1124 	azn->azn_flags |= AMDZEN_F_ATTACH_COMPLETE;
1125 	azn->azn_taskqid = TASKQID_INVALID;
1126 	cv_broadcast(&azn->azn_cv);
1127 	mutex_exit(&azn->azn_mutex);
1128 }
1129 
1130 static int
1131 amdzen_stub_scan_cb(dev_info_t *dip, void *arg)
1132 {
1133 	amdzen_t *azn = arg;
1134 	uint16_t vid, did;
1135 	int *regs;
1136 	uint_t nregs, i;
1137 	boolean_t match = B_FALSE;
1138 
1139 	if (dip == ddi_root_node()) {
1140 		return (DDI_WALK_CONTINUE);
1141 	}
1142 
1143 	/*
1144 	 * If a node in question is not a pci node, then we have no interest in
1145 	 * it as all the stubs that we care about are related to pci devices.
1146 	 */
1147 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
1148 		return (DDI_WALK_PRUNECHILD);
1149 	}
1150 
1151 	/*
1152 	 * If we can't get a device or vendor ID and prove that this is an AMD
1153 	 * part, then we don't care about it.
1154 	 */
1155 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1156 	    "vendor-id", PCI_EINVAL16);
1157 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1158 	    "device-id", PCI_EINVAL16);
1159 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
1160 		return (DDI_WALK_CONTINUE);
1161 	}
1162 
1163 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
1164 		return (DDI_WALK_CONTINUE);
1165 	}
1166 
1167 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
1168 		if (amdzen_nb_ids[i] == did) {
1169 			match = B_TRUE;
1170 		}
1171 	}
1172 
1173 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1174 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
1175 		return (DDI_WALK_CONTINUE);
1176 	}
1177 
1178 	if (nregs == 0) {
1179 		ddi_prop_free(regs);
1180 		return (DDI_WALK_CONTINUE);
1181 	}
1182 
1183 	if (PCI_REG_BUS_G(regs[0]) == AMDZEN_DF_BUSNO &&
1184 	    PCI_REG_DEV_G(regs[0]) >= AMDZEN_DF_FIRST_DEVICE) {
1185 		match = B_TRUE;
1186 	}
1187 
1188 	ddi_prop_free(regs);
1189 	if (match) {
1190 		mutex_enter(&azn->azn_mutex);
1191 		azn->azn_nscanned++;
1192 		mutex_exit(&azn->azn_mutex);
1193 	}
1194 
1195 	return (DDI_WALK_CONTINUE);
1196 }
1197 
1198 static void
1199 amdzen_stub_scan(void *arg)
1200 {
1201 	amdzen_t *azn = arg;
1202 
1203 	mutex_enter(&azn->azn_mutex);
1204 	azn->azn_nscanned = 0;
1205 	mutex_exit(&azn->azn_mutex);
1206 
1207 	ddi_walk_devs(ddi_root_node(), amdzen_stub_scan_cb, azn);
1208 
1209 	mutex_enter(&azn->azn_mutex);
1210 	azn->azn_flags &= ~AMDZEN_F_SCAN_DISPATCHED;
1211 	azn->azn_flags |= AMDZEN_F_SCAN_COMPLETE;
1212 
1213 	if (azn->azn_nscanned == 0) {
1214 		azn->azn_flags |= AMDZEN_F_UNSUPPORTED;
1215 		azn->azn_taskqid = TASKQID_INVALID;
1216 		cv_broadcast(&azn->azn_cv);
1217 	} else if (azn->azn_npresent == azn->azn_nscanned) {
1218 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
1219 		azn->azn_taskqid = taskq_dispatch(system_taskq,
1220 		    amdzen_nexus_init, azn, TQ_SLEEP);
1221 	}
1222 	mutex_exit(&azn->azn_mutex);
1223 }
1224 
1225 /*
1226  * Unfortunately we can't really let the stubs detach as we may need them to be
1227  * available for client operations. We may be able to improve this if we know
1228  * that the actual nexus is going away. However, as long as it's active, we need
1229  * all the stubs.
1230  */
1231 int
1232 amdzen_detach_stub(dev_info_t *dip, ddi_detach_cmd_t cmd)
1233 {
1234 	if (cmd == DDI_SUSPEND) {
1235 		return (DDI_SUCCESS);
1236 	}
1237 
1238 	return (DDI_FAILURE);
1239 }
1240 
1241 int
1242 amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd)
1243 {
1244 	int *regs, reg;
1245 	uint_t nregs, i;
1246 	uint16_t vid, did;
1247 	amdzen_stub_t *stub;
1248 	amdzen_t *azn = amdzen_data;
1249 	boolean_t valid = B_FALSE;
1250 	boolean_t nb = B_FALSE;
1251 
1252 	if (cmd == DDI_RESUME) {
1253 		return (DDI_SUCCESS);
1254 	} else if (cmd != DDI_ATTACH) {
1255 		return (DDI_FAILURE);
1256 	}
1257 
1258 	/*
1259 	 * Make sure that the stub that we've been asked to attach is a pci type
1260 	 * device. If not, then there is no reason for us to proceed.
1261 	 */
1262 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
1263 		dev_err(dip, CE_WARN, "asked to attach a bad AMD Zen nexus "
1264 		    "stub: %s", ddi_get_name(dip));
1265 		return (DDI_FAILURE);
1266 	}
1267 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1268 	    "vendor-id", PCI_EINVAL16);
1269 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1270 	    "device-id", PCI_EINVAL16);
1271 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
1272 		dev_err(dip, CE_WARN, "failed to get PCI ID properties");
1273 		return (DDI_FAILURE);
1274 	}
1275 
1276 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
1277 		dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x",
1278 		    cpuid_getvendor(CPU) == X86_VENDOR_HYGON ?
1279 		    AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid);
1280 		return (DDI_FAILURE);
1281 	}
1282 
1283 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1284 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
1285 		dev_err(dip, CE_WARN, "failed to get 'reg' property");
1286 		return (DDI_FAILURE);
1287 	}
1288 
1289 	if (nregs == 0) {
1290 		ddi_prop_free(regs);
1291 		dev_err(dip, CE_WARN, "missing 'reg' property values");
1292 		return (DDI_FAILURE);
1293 	}
1294 	reg = *regs;
1295 	ddi_prop_free(regs);
1296 
1297 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
1298 		if (amdzen_nb_ids[i] == did) {
1299 			valid = B_TRUE;
1300 			nb = B_TRUE;
1301 		}
1302 	}
1303 
1304 	if (!valid && PCI_REG_BUS_G(reg) == AMDZEN_DF_BUSNO &&
1305 	    PCI_REG_DEV_G(reg) >= AMDZEN_DF_FIRST_DEVICE) {
1306 		valid = B_TRUE;
1307 		nb = B_FALSE;
1308 	}
1309 
1310 	if (!valid) {
1311 		dev_err(dip, CE_WARN, "device %s didn't match the nexus list",
1312 		    ddi_get_name(dip));
1313 		return (DDI_FAILURE);
1314 	}
1315 
1316 	stub = kmem_alloc(sizeof (amdzen_stub_t), KM_SLEEP);
1317 	if (pci_config_setup(dip, &stub->azns_cfgspace) != DDI_SUCCESS) {
1318 		dev_err(dip, CE_WARN, "failed to set up config space");
1319 		kmem_free(stub, sizeof (amdzen_stub_t));
1320 		return (DDI_FAILURE);
1321 	}
1322 
1323 	stub->azns_dip = dip;
1324 	stub->azns_vid = vid;
1325 	stub->azns_did = did;
1326 	stub->azns_bus = PCI_REG_BUS_G(reg);
1327 	stub->azns_dev = PCI_REG_DEV_G(reg);
1328 	stub->azns_func = PCI_REG_FUNC_G(reg);
1329 	ddi_set_driver_private(dip, stub);
1330 
1331 	mutex_enter(&azn->azn_mutex);
1332 	azn->azn_npresent++;
1333 	if (nb) {
1334 		list_insert_tail(&azn->azn_nb_stubs, stub);
1335 	} else {
1336 		list_insert_tail(&azn->azn_df_stubs, stub);
1337 	}
1338 
1339 	if ((azn->azn_flags & AMDZEN_F_TASKQ_MASK) == AMDZEN_F_SCAN_COMPLETE &&
1340 	    azn->azn_nscanned == azn->azn_npresent) {
1341 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
1342 		azn->azn_taskqid = taskq_dispatch(system_taskq,
1343 		    amdzen_nexus_init, azn, TQ_SLEEP);
1344 	}
1345 	mutex_exit(&azn->azn_mutex);
1346 
1347 	return (DDI_SUCCESS);
1348 }
1349 
1350 static int
1351 amdzen_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
1352     void *arg, void *result)
1353 {
1354 	char buf[32];
1355 	dev_info_t *child;
1356 	const amdzen_child_data_t *acd;
1357 
1358 	switch (ctlop) {
1359 	case DDI_CTLOPS_REPORTDEV:
1360 		if (rdip == NULL) {
1361 			return (DDI_FAILURE);
1362 		}
1363 		cmn_err(CE_CONT, "amdzen nexus: %s@%s, %s%d\n",
1364 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
1365 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
1366 		break;
1367 	case DDI_CTLOPS_INITCHILD:
1368 		child = arg;
1369 		if (child == NULL) {
1370 			dev_err(dip, CE_WARN, "!no child passed for "
1371 			    "DDI_CTLOPS_INITCHILD");
1372 		}
1373 
1374 		acd = ddi_get_parent_data(child);
1375 		if (acd == NULL) {
1376 			dev_err(dip, CE_WARN, "!missing child parent data");
1377 			return (DDI_FAILURE);
1378 		}
1379 
1380 		if (snprintf(buf, sizeof (buf), "%d", acd->acd_addr) >=
1381 		    sizeof (buf)) {
1382 			dev_err(dip, CE_WARN, "!failed to construct device "
1383 			    "addr due to overflow");
1384 			return (DDI_FAILURE);
1385 		}
1386 
1387 		ddi_set_name_addr(child, buf);
1388 		break;
1389 	case DDI_CTLOPS_UNINITCHILD:
1390 		child = arg;
1391 		if (child == NULL) {
1392 			dev_err(dip, CE_WARN, "!no child passed for "
1393 			    "DDI_CTLOPS_UNINITCHILD");
1394 		}
1395 
1396 		ddi_set_name_addr(child, NULL);
1397 		break;
1398 	default:
1399 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
1400 	}
1401 	return (DDI_SUCCESS);
1402 }
1403 
1404 static int
1405 amdzen_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1406 {
1407 	amdzen_t *azn = amdzen_data;
1408 
1409 	if (cmd == DDI_RESUME) {
1410 		return (DDI_SUCCESS);
1411 	} else if (cmd != DDI_ATTACH) {
1412 		return (DDI_FAILURE);
1413 	}
1414 
1415 	mutex_enter(&azn->azn_mutex);
1416 	if (azn->azn_dip != NULL) {
1417 		dev_err(dip, CE_WARN, "driver is already attached!");
1418 		mutex_exit(&azn->azn_mutex);
1419 		return (DDI_FAILURE);
1420 	}
1421 
1422 	azn->azn_dip = dip;
1423 	azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_stub_scan,
1424 	    azn, TQ_SLEEP);
1425 	azn->azn_flags |= AMDZEN_F_SCAN_DISPATCHED;
1426 	mutex_exit(&azn->azn_mutex);
1427 
1428 	return (DDI_SUCCESS);
1429 }
1430 
1431 static int
1432 amdzen_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1433 {
1434 	amdzen_t *azn = amdzen_data;
1435 
1436 	if (cmd == DDI_SUSPEND) {
1437 		return (DDI_SUCCESS);
1438 	} else if (cmd != DDI_DETACH) {
1439 		return (DDI_FAILURE);
1440 	}
1441 
1442 	mutex_enter(&azn->azn_mutex);
1443 	while (azn->azn_taskqid != TASKQID_INVALID) {
1444 		cv_wait(&azn->azn_cv, &azn->azn_mutex);
1445 	}
1446 
1447 	/*
1448 	 * If we've attached any stub drivers, e.g. this platform is important
1449 	 * for us, then we fail detach.
1450 	 */
1451 	if (!list_is_empty(&azn->azn_df_stubs) ||
1452 	    !list_is_empty(&azn->azn_nb_stubs)) {
1453 		mutex_exit(&azn->azn_mutex);
1454 		return (DDI_FAILURE);
1455 	}
1456 
1457 	azn->azn_dip = NULL;
1458 	mutex_exit(&azn->azn_mutex);
1459 
1460 	return (DDI_SUCCESS);
1461 }
1462 
1463 static void
1464 amdzen_free(void)
1465 {
1466 	if (amdzen_data == NULL) {
1467 		return;
1468 	}
1469 
1470 	VERIFY(list_is_empty(&amdzen_data->azn_df_stubs));
1471 	list_destroy(&amdzen_data->azn_df_stubs);
1472 	VERIFY(list_is_empty(&amdzen_data->azn_nb_stubs));
1473 	list_destroy(&amdzen_data->azn_nb_stubs);
1474 	cv_destroy(&amdzen_data->azn_cv);
1475 	mutex_destroy(&amdzen_data->azn_mutex);
1476 	kmem_free(amdzen_data, sizeof (amdzen_t));
1477 	amdzen_data = NULL;
1478 }
1479 
1480 static void
1481 amdzen_alloc(void)
1482 {
1483 	amdzen_data = kmem_zalloc(sizeof (amdzen_t), KM_SLEEP);
1484 	mutex_init(&amdzen_data->azn_mutex, NULL, MUTEX_DRIVER, NULL);
1485 	list_create(&amdzen_data->azn_df_stubs, sizeof (amdzen_stub_t),
1486 	    offsetof(amdzen_stub_t, azns_link));
1487 	list_create(&amdzen_data->azn_nb_stubs, sizeof (amdzen_stub_t),
1488 	    offsetof(amdzen_stub_t, azns_link));
1489 	cv_init(&amdzen_data->azn_cv, NULL, CV_DRIVER, NULL);
1490 }
1491 
1492 struct bus_ops amdzen_bus_ops = {
1493 	.busops_rev = BUSO_REV,
1494 	.bus_map = nullbusmap,
1495 	.bus_dma_map = ddi_no_dma_map,
1496 	.bus_dma_allochdl = ddi_no_dma_allochdl,
1497 	.bus_dma_freehdl = ddi_no_dma_freehdl,
1498 	.bus_dma_bindhdl = ddi_no_dma_bindhdl,
1499 	.bus_dma_unbindhdl = ddi_no_dma_unbindhdl,
1500 	.bus_dma_flush = ddi_no_dma_flush,
1501 	.bus_dma_win = ddi_no_dma_win,
1502 	.bus_dma_ctl = ddi_no_dma_mctl,
1503 	.bus_prop_op = ddi_bus_prop_op,
1504 	.bus_ctl = amdzen_bus_ctl
1505 };
1506 
1507 static struct dev_ops amdzen_dev_ops = {
1508 	.devo_rev = DEVO_REV,
1509 	.devo_refcnt = 0,
1510 	.devo_getinfo = nodev,
1511 	.devo_identify = nulldev,
1512 	.devo_probe = nulldev,
1513 	.devo_attach = amdzen_attach,
1514 	.devo_detach = amdzen_detach,
1515 	.devo_reset = nodev,
1516 	.devo_quiesce = ddi_quiesce_not_needed,
1517 	.devo_bus_ops = &amdzen_bus_ops
1518 };
1519 
1520 static struct modldrv amdzen_modldrv = {
1521 	.drv_modops = &mod_driverops,
1522 	.drv_linkinfo = "AMD Zen Nexus Driver",
1523 	.drv_dev_ops = &amdzen_dev_ops
1524 };
1525 
1526 static struct modlinkage amdzen_modlinkage = {
1527 	.ml_rev = MODREV_1,
1528 	.ml_linkage = { &amdzen_modldrv, NULL }
1529 };
1530 
1531 int
1532 _init(void)
1533 {
1534 	int ret;
1535 
1536 	if (cpuid_getvendor(CPU) != X86_VENDOR_AMD &&
1537 	    cpuid_getvendor(CPU) != X86_VENDOR_HYGON) {
1538 		return (ENOTSUP);
1539 	}
1540 
1541 	if ((ret = mod_install(&amdzen_modlinkage)) == 0) {
1542 		amdzen_alloc();
1543 	}
1544 
1545 	return (ret);
1546 }
1547 
1548 int
1549 _info(struct modinfo *modinfop)
1550 {
1551 	return (mod_info(&amdzen_modlinkage, modinfop));
1552 }
1553 
1554 int
1555 _fini(void)
1556 {
1557 	int ret;
1558 
1559 	if ((ret = mod_remove(&amdzen_modlinkage)) == 0) {
1560 		amdzen_free();
1561 	}
1562 
1563 	return (ret);
1564 }
1565