xref: /illumos-gate/usr/src/uts/intel/io/amdzen/amdzen.c (revision cb1bb6c32d034ea24e8549ef763c9c2b79413eb8)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019, Joyent, Inc.
14  * Copyright 2022 Oxide Computer Company
15  */
16 
17 /*
18  * Nexus Driver for AMD Zen family systems. The purpose of this driver is to
19  * provide access to the following resources in a single, centralized fashion:
20  *
21  *  - The per-chip Data Fabric
22  *  - The North Bridge
23  *  - The System Management Network (SMN)
24  *
25  * This is a nexus driver as once we have attached to all the requisite
26  * components, we will enumerate child devices which consume this functionality.
27  *
28  * ------------------------
29  * Mapping Devices Together
30  * ------------------------
31  *
32  * The operating system needs to expose things like temperature sensors and DRAM
33  * configuration registers in terms that are meaningful to the system such as
34  * logical CPUs, cores, etc. This driver attaches to the PCI IDs that represent
35  * the northbridge and data fabric; however, there are multiple PCI devices (one
36  * per die) that exist. This driver does manage to map all of these three things
37  * together; however, it requires some acrobatics. Unfortunately, there's no
38  * direct way to map a northbridge to its corresponding die. However, we can map
39  * a CPU die to a data fabric PCI device and a data fabric PCI device to a
40  * corresponding northbridge PCI device.
41  *
42  * In current Zen based products, there is a direct mapping between processor
43  * nodes and a data fabric PCI device. All of the devices are on PCI Bus 0 and
44  * start from Device 0x18. Device 0x18 maps to processor node 0, 0x19 to
45  * processor node 1, etc. This means that to map a logical CPU to a data fabric
46  * device, we take its processor node id, add it to 0x18 and find the PCI device
47  * that is on bus 0, device 0x18. As each data fabric device is attached based
48  * on its PCI ID, we add it to the global list, amd_nbdf_dfs that is in the
49  * amd_f17nbdf_t structure.
50  *
51  * The northbridge PCI device has a defined device and function, but the PCI bus
52  * that it's on can vary. Each die has its own series of PCI buses that are
53  * assigned to it and the northbridge PCI device is on the first of die-specific
54  * PCI bus for each die. This also means that the northbridge will not show up
55  * on PCI bus 0, which is the PCI bus that all of the data fabric devices are
56  * on. While conventionally the northbridge with the lowest PCI bus value
57  * would correspond to processor node zero, hardware does not guarantee that at
58  * all. Because we don't want to be at the mercy of firmware, we don't rely on
59  * this ordering, even though we have yet to find a system that deviates from
60  * this scheme.
61  *
62  * One of the registers in the data fabric device's function 0
63  * (AMDZEN_DF_F0_CFG_ADDR_CTL) happens to have the first PCI bus that is
64  * associated with the processor node. This means that we can map a data fabric
65  * device to a northbridge by finding the northbridge whose PCI bus matches the
66  * value in the corresponding data fabric's AMDZEN_DF_F0_CFG_ADDR_CTL.
67  *
68  * We can map a northbridge to a data fabric device and a data fabric device to
69  * a die. Because these are generally 1:1 mappings, there is a transitive
70  * relationship and therefore we know which northbridge is associated with which
71  * processor die. This is summarized in the following image:
72  *
73  *  +-------+    +-----------------------------------+        +--------------+
74  *  | Die 0 |--->| Data Fabric PCI BDF 0/18/0        |------->| Northbridge  |
75  *  +-------+    | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 10 |        | PCI  10/0/0  |
76  *     ...       +-----------------------------------+        +--------------+
77  *  +-------+     +------------------------------------+        +--------------+
78  *  | Die n |---->| Data Fabric PCI BDF 0/18+n/0       |------->| Northbridge  |
79  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 133 |        | PCI 133/0/0  |
80  *                +------------------------------------+        +--------------+
81  *
82  * Note, the PCI buses used by the northbridges here are arbitrary. They do not
83  * reflect the actual values by hardware; however, the bus/device/function (BDF)
84  * of the data fabric accurately models hardware. All of the BDF values are in
85  * hex.
86  *
87  * Starting with the Rome generation of processors (Family 17h Model 30-3Fh),
88  * AMD has multiple northbridges that exist on a given die. All of these
89  * northbridges share the same data fabric and system management network port.
90  * From our perspective this means that some of the northbridge devices will be
91  * redundant and that we will no longer have a 1:1 mapping between the
92  * northbridge and the data fabric devices. Every data fabric will have a
93  * northbridge, but not every northbridge will have a data fabric device mapped.
94  * Because we're always trying to map from a die to a northbridge and not the
95  * reverse, the fact that there are extra northbridge devices hanging around
96  * that we don't know about shouldn't be a problem.
97  *
98  * -------------------------------
99  * Attach and Detach Complications
100  * -------------------------------
101  *
102  * Because we need to map different PCI devices together, this means that we
103  * have multiple dev_info_t structures that we need to manage. Each of these is
104  * independently attached and detached. While this is easily managed for attach,
105  * it is not for detach. Each of these devices is a 'stub'.
106  *
107  * Once a device has been detached it will only come back if we have an active
108  * minor node that will be accessed. This means that if they are detached,
109  * nothing would ever cause them to be reattached. The system also doesn't
110  * provide us a way or any guarantees around making sure that we're attached to
111  * all such devices before we detach. As a result, unfortunately, it's easier to
112  * basically have detach always fail.
113  *
114  * ---------------
115  * Exposed Devices
116  * ---------------
117  *
118  * Rather than try and have all of the different functions that could be
119  * provided by one driver, we instead have created a nexus driver that will
120  * itself try and load children. Children are all pseudo-device drivers that
121  * provide different pieces of functionality that use this.
122  *
123  * -------
124  * Locking
125  * -------
126  *
127  * The amdzen_data structure contains a single lock, azn_mutex. The various
128  * client functions are intended for direct children of our nexus, but have been
129  * designed in case someone else depends on this driver despite not being a
130  * child. Once a DF has been discovered, the set of entities inside of it
131  * (adf_nents, adf_ents[]) is considered static, constant data. This means that
132  * iterating over it in and of itself does not require locking; however, the
133  * discovery of the amd_df_t does. In addition, whenever performing register
134  * accesses to the DF or SMN, those require locking. This means that one must
135  * hold the lock in the following circumstances:
136  *
137  *   o Looking up DF structures
138  *   o Reading or writing to DF registers
139  *   o Reading or writing to SMN registers
140  *
141  * In general, it is preferred that the lock be held across an entire client
142  * operation if possible. The only time this becomes an issue are when we have
143  * callbacks into our callers (ala amdzen_c_df_iter()) as they will likely
144  * recursively call into us.
145  */
146 
147 #include <sys/modctl.h>
148 #include <sys/conf.h>
149 #include <sys/devops.h>
150 #include <sys/ddi.h>
151 #include <sys/sunddi.h>
152 #include <sys/pci.h>
153 #include <sys/sysmacros.h>
154 #include <sys/sunndi.h>
155 #include <sys/x86_archext.h>
156 #include <sys/cpuvar.h>
157 
158 #include <sys/amdzen/df.h>
159 #include "amdzen_client.h"
160 #include "amdzen.h"
161 
162 amdzen_t *amdzen_data;
163 
164 /*
165  * Array of northbridge IDs that we care about.
166  */
167 static const uint16_t amdzen_nb_ids[] = {
168 	/* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */
169 	0x1450,
170 	/* Family 17h Raven Ridge, Kestrel, Dali Models 10h-2fh (Zen uarch) */
171 	0x15d0,
172 	/* Family 17h/19h Rome, Milan, Matisse, Vermeer Zen 2/Zen 3 uarch */
173 	0x1480,
174 	/* Family 17h/19h Renoir, Cezanne Zen 2/3 uarch) */
175 	0x1630
176 };
177 
178 typedef struct {
179 	char *acd_name;
180 	amdzen_child_t acd_addr;
181 } amdzen_child_data_t;
182 
183 static const amdzen_child_data_t amdzen_children[] = {
184 	{ "smntemp", AMDZEN_C_SMNTEMP },
185 	{ "usmn", AMDZEN_C_USMN },
186 	{ "zen_udf", AMDZEN_C_ZEN_UDF },
187 	{ "zen_umc", AMDZEN_C_ZEN_UMC }
188 };
189 
190 /*
191  * Provide a caller with the notion of what CPU family their device falls into.
192  * This is useful for client drivers that want to make decisions based on model
193  * ranges.
194  */
195 zen_family_t
196 amdzen_c_family(void)
197 {
198 	uint_t vendor, family, model;
199 	zen_family_t ret = ZEN_FAMILY_UNKNOWN;
200 
201 	vendor = cpuid_getvendor(CPU);
202 	family = cpuid_getfamily(CPU);
203 	model = cpuid_getmodel(CPU);
204 
205 	switch (family) {
206 	case 0x17:
207 		if (vendor != X86_VENDOR_AMD)
208 			break;
209 		if (model < 0x10) {
210 			ret = ZEN_FAMILY_NAPLES;
211 		} else if (model >= 0x10 && model < 0x30) {
212 			ret = ZEN_FAMILY_DALI;
213 		} else if (model >= 0x30 && model < 0x40) {
214 			ret = ZEN_FAMILY_ROME;
215 		} else if (model >= 0x60 && model < 0x70) {
216 			ret = ZEN_FAMILY_RENOIR;
217 		} else if (model >= 0x70 && model < 0x80) {
218 			ret = ZEN_FAMILY_MATISSE;
219 		} else if (model >= 0x90 && model < 0xa0) {
220 			ret = ZEN_FAMILY_VAN_GOGH;
221 		} else if (model >= 0xa0 && model < 0xb0) {
222 			ret = ZEN_FAMILY_MENDOCINO;
223 		}
224 		break;
225 	case 0x18:
226 		if (vendor != X86_VENDOR_HYGON)
227 			break;
228 		if (model < 0x10)
229 			ret = ZEN_FAMILY_DHYANA;
230 		break;
231 	case 0x19:
232 		if (vendor != X86_VENDOR_AMD)
233 			break;
234 		if (model < 0x10) {
235 			ret = ZEN_FAMILY_MILAN;
236 		} else if (model >= 0x10 && model < 0x20) {
237 			ret = ZEN_FAMILY_GENOA;
238 		} else if (model >= 0x20 && model < 0x30) {
239 			ret = ZEN_FAMILY_VERMEER;
240 		} else if (model >= 0x40 && model < 0x50) {
241 			ret = ZEN_FAMILY_REMBRANDT;
242 		} else if (model >= 0x50 && model < 0x60) {
243 			ret = ZEN_FAMILY_CEZANNE;
244 		} else if (model >= 0x60 && model < 0x70) {
245 			ret = ZEN_FAMILY_RAPHAEL;
246 		}
247 		break;
248 	default:
249 		break;
250 	}
251 
252 	return (ret);
253 }
254 
255 static uint32_t
256 amdzen_stub_get32(amdzen_stub_t *stub, off_t reg)
257 {
258 	return (pci_config_get32(stub->azns_cfgspace, reg));
259 }
260 
261 static uint64_t
262 amdzen_stub_get64(amdzen_stub_t *stub, off_t reg)
263 {
264 	return (pci_config_get64(stub->azns_cfgspace, reg));
265 }
266 
267 static void
268 amdzen_stub_put32(amdzen_stub_t *stub, off_t reg, uint32_t val)
269 {
270 	pci_config_put32(stub->azns_cfgspace, reg, val);
271 }
272 
273 static uint64_t
274 amdzen_df_read_regdef(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def,
275     uint8_t inst, boolean_t do_64)
276 {
277 	df_reg_def_t ficaa;
278 	df_reg_def_t ficad;
279 	uint32_t val = 0;
280 	df_rev_t df_rev = azn->azn_dfs[0].adf_rev;
281 
282 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
283 	ASSERT3U(def.drd_gens & df_rev, ==, df_rev);
284 	val = DF_FICAA_V2_SET_TARG_INST(val, 1);
285 	val = DF_FICAA_V2_SET_FUNC(val, def.drd_func);
286 	val = DF_FICAA_V2_SET_INST(val, inst);
287 	val = DF_FICAA_V2_SET_64B(val, do_64 ? 1 : 0);
288 
289 	switch (df_rev) {
290 	case DF_REV_2:
291 	case DF_REV_3:
292 	case DF_REV_3P5:
293 		ficaa = DF_FICAA_V2;
294 		ficad = DF_FICAD_LO_V2;
295 		/*
296 		 * Both here and in the DFv4 case, the register ignores the
297 		 * lower 2 bits. That is we can only address and encode things
298 		 * in units of 4 bytes.
299 		 */
300 		val = DF_FICAA_V2_SET_REG(val, def.drd_reg >> 2);
301 		break;
302 	case DF_REV_4:
303 		ficaa = DF_FICAA_V4;
304 		ficad = DF_FICAD_LO_V4;
305 		val = DF_FICAA_V4_SET_REG(val, def.drd_reg >> 2);
306 		break;
307 	default:
308 		panic("encountered unexpected DF rev: %u", df_rev);
309 	}
310 
311 	amdzen_stub_put32(df->adf_funcs[ficaa.drd_func], ficaa.drd_reg, val);
312 	if (do_64) {
313 		return (amdzen_stub_get64(df->adf_funcs[ficad.drd_func],
314 		    ficad.drd_reg));
315 	} else {
316 		return (amdzen_stub_get32(df->adf_funcs[ficad.drd_func],
317 		    ficad.drd_reg));
318 	}
319 }
320 
321 /*
322  * Perform a targeted 32-bit indirect read to a specific instance and function.
323  */
324 static uint32_t
325 amdzen_df_read32(amdzen_t *azn, amdzen_df_t *df, uint8_t inst,
326     const df_reg_def_t def)
327 {
328 	return (amdzen_df_read_regdef(azn, df, def, inst, B_FALSE));
329 }
330 
331 /*
332  * For a broadcast read, just go to the underlying PCI function and perform a
333  * read. At this point in time, we don't believe we need to use the FICAA/FICAD
334  * to access it (though it does have a broadcast mode).
335  */
336 static uint32_t
337 amdzen_df_read32_bcast(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def)
338 {
339 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
340 	return (amdzen_stub_get32(df->adf_funcs[def.drd_func], def.drd_reg));
341 }
342 
343 
344 static uint32_t
345 amdzen_smn_read32(amdzen_t *azn, amdzen_df_t *df, uint32_t reg)
346 {
347 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
348 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, reg);
349 	return (amdzen_stub_get32(df->adf_nb, AMDZEN_NB_SMN_DATA));
350 }
351 
352 static void
353 amdzen_smn_write32(amdzen_t *azn, amdzen_df_t *df, uint32_t reg, uint32_t val)
354 {
355 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
356 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, reg);
357 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_DATA, val);
358 }
359 
360 static amdzen_df_t *
361 amdzen_df_find(amdzen_t *azn, uint_t dfno)
362 {
363 	uint_t i;
364 
365 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
366 	if (dfno >= azn->azn_ndfs) {
367 		return (NULL);
368 	}
369 
370 	for (i = 0; i < azn->azn_ndfs; i++) {
371 		amdzen_df_t *df = &azn->azn_dfs[i];
372 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) {
373 			continue;
374 		}
375 
376 		if (dfno == 0) {
377 			return (df);
378 		}
379 		dfno--;
380 	}
381 
382 	return (NULL);
383 }
384 
385 /*
386  * Client functions that are used by nexus children.
387  */
388 int
389 amdzen_c_smn_read32(uint_t dfno, uint32_t reg, uint32_t *valp)
390 {
391 	amdzen_df_t *df;
392 	amdzen_t *azn = amdzen_data;
393 
394 	mutex_enter(&azn->azn_mutex);
395 	df = amdzen_df_find(azn, dfno);
396 	if (df == NULL) {
397 		mutex_exit(&azn->azn_mutex);
398 		return (ENOENT);
399 	}
400 
401 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
402 		mutex_exit(&azn->azn_mutex);
403 		return (ENXIO);
404 	}
405 
406 	*valp = amdzen_smn_read32(azn, df, reg);
407 	mutex_exit(&azn->azn_mutex);
408 	return (0);
409 }
410 
411 int
412 amdzen_c_smn_write32(uint_t dfno, uint32_t reg, uint32_t val)
413 {
414 	amdzen_df_t *df;
415 	amdzen_t *azn = amdzen_data;
416 
417 	mutex_enter(&azn->azn_mutex);
418 	df = amdzen_df_find(azn, dfno);
419 	if (df == NULL) {
420 		mutex_exit(&azn->azn_mutex);
421 		return (ENOENT);
422 	}
423 
424 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
425 		mutex_exit(&azn->azn_mutex);
426 		return (ENXIO);
427 	}
428 
429 	amdzen_smn_write32(azn, df, reg, val);
430 	mutex_exit(&azn->azn_mutex);
431 	return (0);
432 }
433 
434 
435 uint_t
436 amdzen_c_df_count(void)
437 {
438 	uint_t ret;
439 	amdzen_t *azn = amdzen_data;
440 
441 	mutex_enter(&azn->azn_mutex);
442 	ret = azn->azn_ndfs;
443 	mutex_exit(&azn->azn_mutex);
444 	return (ret);
445 }
446 
447 df_rev_t
448 amdzen_c_df_rev(void)
449 {
450 	amdzen_df_t *df;
451 	amdzen_t *azn = amdzen_data;
452 	df_rev_t rev;
453 
454 	/*
455 	 * Always use the first DF instance to determine what we're using. Our
456 	 * current assumption, which seems to generally be true, is that the
457 	 * given DF revisions are the same in a given system when the DFs are
458 	 * directly connected.
459 	 */
460 	mutex_enter(&azn->azn_mutex);
461 	df = amdzen_df_find(azn, 0);
462 	if (df == NULL) {
463 		rev = DF_REV_UNKNOWN;
464 	} else {
465 		rev = df->adf_rev;
466 	}
467 	mutex_exit(&azn->azn_mutex);
468 
469 	return (rev);
470 }
471 
472 int
473 amdzen_c_df_read32(uint_t dfno, uint8_t inst, const df_reg_def_t def,
474     uint32_t *valp)
475 {
476 	amdzen_df_t *df;
477 	amdzen_t *azn = amdzen_data;
478 
479 	mutex_enter(&azn->azn_mutex);
480 	df = amdzen_df_find(azn, dfno);
481 	if (df == NULL) {
482 		mutex_exit(&azn->azn_mutex);
483 		return (ENOENT);
484 	}
485 
486 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_FALSE);
487 	mutex_exit(&azn->azn_mutex);
488 
489 	return (0);
490 }
491 
492 int
493 amdzen_c_df_read64(uint_t dfno, uint8_t inst, const df_reg_def_t def,
494     uint64_t *valp)
495 {
496 	amdzen_df_t *df;
497 	amdzen_t *azn = amdzen_data;
498 
499 	mutex_enter(&azn->azn_mutex);
500 	df = amdzen_df_find(azn, dfno);
501 	if (df == NULL) {
502 		mutex_exit(&azn->azn_mutex);
503 		return (ENOENT);
504 	}
505 
506 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_TRUE);
507 	mutex_exit(&azn->azn_mutex);
508 
509 	return (0);
510 }
511 
512 int
513 amdzen_c_df_iter(uint_t dfno, zen_df_type_t type, amdzen_c_iter_f func,
514     void *arg)
515 {
516 	amdzen_df_t *df;
517 	amdzen_t *azn = amdzen_data;
518 	df_type_t df_type;
519 	uint8_t df_subtype;
520 
521 	/*
522 	 * Unlike other calls here, we hold our lock only to find the DF here.
523 	 * The main reason for this is the nature of the callback function.
524 	 * Folks are iterating over instances so they can call back into us. If
525 	 * you look at the locking statement, the thing that is most volatile
526 	 * right here and what we need to protect is the DF itself and
527 	 * subsequent register accesses to it. The actual data about which
528 	 * entities exist is static and so once we have found a DF we should
529 	 * hopefully be in good shape as they only come, but don't go.
530 	 */
531 	mutex_enter(&azn->azn_mutex);
532 	df = amdzen_df_find(azn, dfno);
533 	if (df == NULL) {
534 		mutex_exit(&azn->azn_mutex);
535 		return (ENOENT);
536 	}
537 	mutex_exit(&azn->azn_mutex);
538 
539 	switch (type) {
540 	case ZEN_DF_TYPE_CS_UMC:
541 		df_type = DF_TYPE_CS;
542 		/*
543 		 * In the original Zeppelin DFv2 die there was no subtype field
544 		 * used for the CS. The UMC is the only type and has a subtype
545 		 * of zero.
546 		 */
547 		if (df->adf_rev != DF_REV_2) {
548 			df_subtype = DF_CS_SUBTYPE_UMC;
549 		} else {
550 			df_subtype = 0;
551 		}
552 		break;
553 	case ZEN_DF_TYPE_CCM_CPU:
554 		df_type = DF_TYPE_CCM;
555 		/*
556 		 * In the Genoa/DFv4 timeframe, with the introduction of CXL and
557 		 * related, a subtype was added here where as previously it was
558 		 * always zero.
559 		 */
560 		if (df->adf_major >= 4) {
561 			df_subtype = DF_CCM_SUBTYPE_CPU;
562 		} else {
563 			df_subtype = 0;
564 		}
565 		break;
566 	default:
567 		return (EINVAL);
568 	}
569 
570 	for (uint_t i = 0; i < df->adf_nents; i++) {
571 		amdzen_df_ent_t *ent = &df->adf_ents[i];
572 
573 		/*
574 		 * Some DF components are not considered enabled and therefore
575 		 * will end up having bogus values in their ID fields. If we do
576 		 * not have an enable flag set, we must skip this node.
577 		 */
578 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
579 			continue;
580 
581 		if (ent->adfe_type == df_type &&
582 		    ent->adfe_subtype == df_subtype) {
583 			int ret = func(dfno, ent->adfe_fabric_id,
584 			    ent->adfe_inst_id, arg);
585 			if (ret != 0) {
586 				return (ret);
587 			}
588 		}
589 	}
590 
591 	return (0);
592 }
593 
594 int
595 amdzen_c_df_fabric_decomp(df_fabric_decomp_t *decomp)
596 {
597 	const amdzen_df_t *df;
598 	amdzen_t *azn = amdzen_data;
599 
600 	mutex_enter(&azn->azn_mutex);
601 	df = amdzen_df_find(azn, 0);
602 	if (df == NULL) {
603 		mutex_exit(&azn->azn_mutex);
604 		return (ENOENT);
605 	}
606 
607 	*decomp = df->adf_decomp;
608 	mutex_exit(&azn->azn_mutex);
609 	return (0);
610 }
611 
612 static boolean_t
613 amdzen_create_child(amdzen_t *azn, const amdzen_child_data_t *acd)
614 {
615 	int ret;
616 	dev_info_t *child;
617 
618 	if (ndi_devi_alloc(azn->azn_dip, acd->acd_name,
619 	    (pnode_t)DEVI_SID_NODEID, &child) != NDI_SUCCESS) {
620 		dev_err(azn->azn_dip, CE_WARN, "!failed to allocate child "
621 		    "dip for %s", acd->acd_name);
622 		return (B_FALSE);
623 	}
624 
625 	ddi_set_parent_data(child, (void *)acd);
626 	if ((ret = ndi_devi_online(child, 0)) != NDI_SUCCESS) {
627 		dev_err(azn->azn_dip, CE_WARN, "!failed to online child "
628 		    "dip %s: %d", acd->acd_name, ret);
629 		return (B_FALSE);
630 	}
631 
632 	return (B_TRUE);
633 }
634 
635 static boolean_t
636 amdzen_map_dfs(amdzen_t *azn)
637 {
638 	amdzen_stub_t *stub;
639 
640 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
641 
642 	for (stub = list_head(&azn->azn_df_stubs); stub != NULL;
643 	    stub = list_next(&azn->azn_df_stubs, stub)) {
644 		amdzen_df_t *df;
645 		uint_t dfno;
646 
647 		dfno = stub->azns_dev - AMDZEN_DF_FIRST_DEVICE;
648 		if (dfno > AMDZEN_MAX_DFS) {
649 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
650 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
651 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
652 			goto err;
653 		}
654 
655 		df = &azn->azn_dfs[dfno];
656 
657 		if (stub->azns_func >= AMDZEN_MAX_DF_FUNCS) {
658 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
659 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
660 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
661 			goto err;
662 		}
663 
664 		if (df->adf_funcs[stub->azns_func] != NULL) {
665 			dev_err(stub->azns_dip, CE_WARN, "encountered "
666 			    "duplicate df device with DF PCI b/d/f: 0x%x/%x/%x",
667 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
668 			goto err;
669 		}
670 		df->adf_funcs[stub->azns_func] = stub;
671 	}
672 
673 	return (B_TRUE);
674 
675 err:
676 	azn->azn_flags |= AMDZEN_F_DEVICE_ERROR;
677 	return (B_FALSE);
678 }
679 
680 static boolean_t
681 amdzen_check_dfs(amdzen_t *azn)
682 {
683 	uint_t i;
684 	boolean_t ret = B_TRUE;
685 
686 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
687 		amdzen_df_t *df = &azn->azn_dfs[i];
688 		uint_t count = 0;
689 
690 		/*
691 		 * We require all platforms to have DFs functions 0-6. Not all
692 		 * platforms have DF function 7.
693 		 */
694 		for (uint_t func = 0; func < AMDZEN_MAX_DF_FUNCS - 1; func++) {
695 			if (df->adf_funcs[func] != NULL) {
696 				count++;
697 			}
698 		}
699 
700 		if (count == 0)
701 			continue;
702 
703 		if (count != 7) {
704 			ret = B_FALSE;
705 			dev_err(azn->azn_dip, CE_WARN, "df %u devices "
706 			    "incomplete", i);
707 		} else {
708 			df->adf_flags |= AMDZEN_DF_F_VALID;
709 			azn->azn_ndfs++;
710 		}
711 	}
712 
713 	return (ret);
714 }
715 
716 static const uint8_t amdzen_df_rome_ids[0x2b] = {
717 	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23,
718 	24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
719 	44, 45, 46, 47, 48
720 };
721 
722 /*
723  * Check the first df entry to see if it belongs to Rome or Milan. If so, then
724  * it uses the disjoint ID space.
725  */
726 static boolean_t
727 amdzen_is_rome_style(uint_t id)
728 {
729 	return (id == 0x1490 || id == 0x1650);
730 }
731 
732 /*
733  * To be able to do most other things we want to do, we must first determine
734  * what revision of the DF (data fabric) that we're using.
735  *
736  * Snapshot the df version. This was added explicitly in DFv4.0, around the Zen
737  * 4 timeframe and allows us to tell apart different version of the DF register
738  * set, most usefully when various subtypes were added.
739  *
740  * Older versions can theoretically be told apart based on usage of reserved
741  * registers. We walk these in the following order, starting with the newest rev
742  * and walking backwards to tell things apart:
743  *
744  *   o v3.5 -> Check function 1, register 0x150. This was reserved prior
745  *             to this point. This is actually DF_FIDMASK0_V3P5. We are supposed
746  *             to check bits [7:0].
747  *
748  *   o v3.0 -> Check function 1, register 0x208. The low byte (7:0) was
749  *             changed to indicate a component mask. This is non-zero
750  *             in the 3.0 generation. This is actually DF_FIDMASK_V2.
751  *
752  *   o v2.0 -> This is just the not that case. Presumably v1 wasn't part
753  *             of the Zen generation.
754  *
755  * Because we don't know what version we are yet, we do not use the normal
756  * versioned register accesses which would check what DF version we are and
757  * would want to use the normal indirect register accesses (which also require
758  * us to know the version). We instead do direct broadcast reads.
759  */
760 static void
761 amdzen_determine_df_vers(amdzen_t *azn, amdzen_df_t *df)
762 {
763 	uint32_t val;
764 	df_reg_def_t rd = DF_FBICNT;
765 
766 	val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
767 	df->adf_major = DF_FBICNT_V4_GET_MAJOR(val);
768 	df->adf_minor = DF_FBICNT_V4_GET_MINOR(val);
769 	if (df->adf_major == 0 && df->adf_minor == 0) {
770 		rd = DF_FIDMASK0_V3P5;
771 		val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
772 		if (bitx32(val, 7, 0) != 0) {
773 			df->adf_major = 3;
774 			df->adf_minor = 5;
775 			df->adf_rev = DF_REV_3P5;
776 		} else {
777 			rd = DF_FIDMASK_V2;
778 			val = amdzen_stub_get32(df->adf_funcs[rd.drd_func],
779 			    rd.drd_reg);
780 			if (bitx32(val, 7, 0) != 0) {
781 				df->adf_major = 3;
782 				df->adf_minor = 0;
783 				df->adf_rev = DF_REV_3;
784 			} else {
785 				df->adf_major = 2;
786 				df->adf_minor = 0;
787 				df->adf_rev = DF_REV_2;
788 			}
789 		}
790 	} else if (df->adf_major == 4 && df->adf_minor == 0) {
791 		df->adf_rev = DF_REV_4;
792 	} else {
793 		df->adf_rev = DF_REV_UNKNOWN;
794 	}
795 }
796 
797 /*
798  * All of the different versions of the DF have different ways of getting at and
799  * answering the question of how do I break a fabric ID into a corresponding
800  * socket, die, and component. Importantly the goal here is to obtain, cache,
801  * and normalize:
802  *
803  *  o The DF System Configuration
804  *  o The various Mask registers
805  *  o The Node ID
806  */
807 static void
808 amdzen_determine_fabric_decomp(amdzen_t *azn, amdzen_df_t *df)
809 {
810 	uint32_t mask;
811 	df_fabric_decomp_t *decomp = &df->adf_decomp;
812 
813 	switch (df->adf_rev) {
814 	case DF_REV_2:
815 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V2);
816 		switch (DF_SYSCFG_V2_GET_MY_TYPE(df->adf_syscfg)) {
817 		case DF_DIE_TYPE_CPU:
818 			mask = amdzen_df_read32_bcast(azn, df,
819 			    DF_DIEMASK_CPU_V2);
820 			break;
821 		case DF_DIE_TYPE_APU:
822 			mask = amdzen_df_read32_bcast(azn, df,
823 			    DF_DIEMASK_APU_V2);
824 			break;
825 		default:
826 			panic("DF thinks we're not on a CPU!");
827 		}
828 		df->adf_mask0 = mask;
829 
830 		/*
831 		 * DFv2 is a bit different in how the fabric mask register is
832 		 * phrased. Logically a fabric ID is broken into something that
833 		 * uniquely identifies a "node" (a particular die on a socket)
834 		 * and something that identifies a "component", e.g. a memory
835 		 * controller.
836 		 *
837 		 * Starting with DFv3, these registers logically called out how
838 		 * to separate the fabric ID first into a node and a component.
839 		 * Then the node was then broken down into a socket and die. In
840 		 * DFv2, there is no separate mask and shift of a node. Instead
841 		 * the socket and die are absolute offsets into the fabric ID
842 		 * rather than relative offsets into the node ID. As such, when
843 		 * we encounter DFv2, we fake up a node mask and shift and make
844 		 * it look like DFv3+.
845 		 */
846 		decomp->dfd_node_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) |
847 		    DF_DIEMASK_V2_GET_DIE_MASK(mask);
848 		decomp->dfd_node_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask);
849 		decomp->dfd_comp_mask = DF_DIEMASK_V2_GET_COMP_MASK(mask);
850 		decomp->dfd_comp_shift = 0;
851 
852 		decomp->dfd_sock_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) >>
853 		    decomp->dfd_node_shift;
854 		decomp->dfd_die_mask = DF_DIEMASK_V2_GET_DIE_MASK(mask) >>
855 		    decomp->dfd_node_shift;
856 		decomp->dfd_sock_shift = DF_DIEMASK_V2_GET_SOCK_SHIFT(mask) -
857 		    decomp->dfd_node_shift;
858 		decomp->dfd_die_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask) -
859 		    decomp->dfd_node_shift;
860 		ASSERT3U(decomp->dfd_die_shift, ==, 0);
861 		break;
862 	case DF_REV_3:
863 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V3);
864 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
865 		    DF_FIDMASK0_V3);
866 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
867 		    DF_FIDMASK1_V3);
868 
869 		decomp->dfd_sock_mask =
870 		    DF_FIDMASK1_V3_GET_SOCK_MASK(df->adf_mask1);
871 		decomp->dfd_sock_shift =
872 		    DF_FIDMASK1_V3_GET_SOCK_SHIFT(df->adf_mask1);
873 		decomp->dfd_die_mask =
874 		    DF_FIDMASK1_V3_GET_DIE_MASK(df->adf_mask1);
875 		decomp->dfd_die_shift = 0;
876 		decomp->dfd_node_mask =
877 		    DF_FIDMASK0_V3_GET_NODE_MASK(df->adf_mask0);
878 		decomp->dfd_node_shift =
879 		    DF_FIDMASK1_V3_GET_NODE_SHIFT(df->adf_mask1);
880 		decomp->dfd_comp_mask =
881 		    DF_FIDMASK0_V3_GET_COMP_MASK(df->adf_mask0);
882 		decomp->dfd_comp_shift = 0;
883 		break;
884 	case DF_REV_3P5:
885 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df,
886 		    DF_SYSCFG_V3P5);
887 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
888 		    DF_FIDMASK0_V3P5);
889 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
890 		    DF_FIDMASK1_V3P5);
891 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
892 		    DF_FIDMASK2_V3P5);
893 
894 		decomp->dfd_sock_mask =
895 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
896 		decomp->dfd_sock_shift =
897 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
898 		decomp->dfd_die_mask =
899 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
900 		decomp->dfd_die_shift = 0;
901 		decomp->dfd_node_mask =
902 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
903 		decomp->dfd_node_shift =
904 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
905 		decomp->dfd_comp_mask =
906 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
907 		decomp->dfd_comp_shift = 0;
908 		break;
909 	case DF_REV_4:
910 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V4);
911 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
912 		    DF_FIDMASK0_V4);
913 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
914 		    DF_FIDMASK1_V4);
915 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
916 		    DF_FIDMASK2_V4);
917 
918 		/*
919 		 * The DFv4 registers are at a different location in the DF;
920 		 * however, the actual layout of fields is the same as DFv3.5.
921 		 * This is why you see V3P5 below.
922 		 */
923 		decomp->dfd_sock_mask =
924 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
925 		decomp->dfd_sock_shift =
926 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
927 		decomp->dfd_die_mask =
928 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
929 		decomp->dfd_die_shift = 0;
930 		decomp->dfd_node_mask =
931 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
932 		decomp->dfd_node_shift =
933 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
934 		decomp->dfd_comp_mask =
935 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
936 		decomp->dfd_comp_shift = 0;
937 		break;
938 	default:
939 		panic("encountered suspicious, previously rejected DF "
940 		    "rev: 0x%x", df->adf_rev);
941 	}
942 }
943 
944 /*
945  * Initialize our knowledge about a given series of nodes on the data fabric.
946  */
947 static void
948 amdzen_setup_df(amdzen_t *azn, amdzen_df_t *df)
949 {
950 	uint_t i;
951 	uint32_t val;
952 
953 	amdzen_determine_df_vers(azn, df);
954 
955 	switch (df->adf_rev) {
956 	case DF_REV_2:
957 	case DF_REV_3:
958 	case DF_REV_3P5:
959 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V2);
960 		break;
961 	case DF_REV_4:
962 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V4);
963 		break;
964 	default:
965 		dev_err(azn->azn_dip, CE_WARN, "encountered unsupported DF "
966 		    "revision: 0x%x", df->adf_rev);
967 		return;
968 	}
969 	df->adf_nb_busno = DF_CFG_ADDR_CTL_GET_BUS_NUM(val);
970 	val = amdzen_df_read32_bcast(azn, df, DF_FBICNT);
971 	df->adf_nents = DF_FBICNT_GET_COUNT(val);
972 	if (df->adf_nents == 0)
973 		return;
974 	df->adf_ents = kmem_zalloc(sizeof (amdzen_df_ent_t) * df->adf_nents,
975 	    KM_SLEEP);
976 
977 	for (i = 0; i < df->adf_nents; i++) {
978 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
979 		uint8_t inst = i;
980 
981 		/*
982 		 * Unfortunately, Rome uses a discontinuous instance ID pattern
983 		 * while everything else we can find uses a contiguous instance
984 		 * ID pattern.  This means that for Rome, we need to adjust the
985 		 * indexes that we iterate over, though the total number of
986 		 * entries is right. This was carried over into Milan, but not
987 		 * Genoa.
988 		 */
989 		if (amdzen_is_rome_style(df->adf_funcs[0]->azns_did)) {
990 			if (inst > ARRAY_SIZE(amdzen_df_rome_ids)) {
991 				dev_err(azn->azn_dip, CE_WARN, "Rome family "
992 				    "processor reported more ids than the PPR, "
993 				    "resetting %u to instance zero", inst);
994 				inst = 0;
995 			} else {
996 				inst = amdzen_df_rome_ids[inst];
997 			}
998 		}
999 
1000 		dfe->adfe_drvid = inst;
1001 		dfe->adfe_info0 = amdzen_df_read32(azn, df, inst, DF_FBIINFO0);
1002 		dfe->adfe_info1 = amdzen_df_read32(azn, df, inst, DF_FBIINFO1);
1003 		dfe->adfe_info2 = amdzen_df_read32(azn, df, inst, DF_FBIINFO2);
1004 		dfe->adfe_info3 = amdzen_df_read32(azn, df, inst, DF_FBIINFO3);
1005 
1006 		dfe->adfe_type = DF_FBIINFO0_GET_TYPE(dfe->adfe_info0);
1007 		dfe->adfe_subtype = DF_FBIINFO0_GET_SUBTYPE(dfe->adfe_info0);
1008 
1009 		/*
1010 		 * The enabled flag was not present in Zen 1. Simulate it by
1011 		 * checking for a non-zero register instead.
1012 		 */
1013 		if (DF_FBIINFO0_V3_GET_ENABLED(dfe->adfe_info0) ||
1014 		    (df->adf_rev == DF_REV_2 && dfe->adfe_info0 != 0)) {
1015 			dfe->adfe_flags |= AMDZEN_DFE_F_ENABLED;
1016 		}
1017 		if (DF_FBIINFO0_GET_HAS_MCA(dfe->adfe_info0)) {
1018 			dfe->adfe_flags |= AMDZEN_DFE_F_MCA;
1019 		}
1020 		dfe->adfe_inst_id = DF_FBIINFO3_GET_INSTID(dfe->adfe_info3);
1021 		switch (df->adf_rev) {
1022 		case DF_REV_2:
1023 			dfe->adfe_fabric_id =
1024 			    DF_FBIINFO3_V2_GET_BLOCKID(dfe->adfe_info3);
1025 			break;
1026 		case DF_REV_3:
1027 			dfe->adfe_fabric_id =
1028 			    DF_FBIINFO3_V3_GET_BLOCKID(dfe->adfe_info3);
1029 			break;
1030 		case DF_REV_3P5:
1031 			dfe->adfe_fabric_id =
1032 			    DF_FBIINFO3_V3P5_GET_BLOCKID(dfe->adfe_info3);
1033 			break;
1034 		case DF_REV_4:
1035 			dfe->adfe_fabric_id =
1036 			    DF_FBIINFO3_V4_GET_BLOCKID(dfe->adfe_info3);
1037 			break;
1038 		default:
1039 			panic("encountered suspicious, previously rejected DF "
1040 			    "rev: 0x%x", df->adf_rev);
1041 		}
1042 	}
1043 
1044 	amdzen_determine_fabric_decomp(azn, df);
1045 }
1046 
1047 static void
1048 amdzen_find_nb(amdzen_t *azn, amdzen_df_t *df)
1049 {
1050 	amdzen_stub_t *stub;
1051 
1052 	for (stub = list_head(&azn->azn_nb_stubs); stub != NULL;
1053 	    stub = list_next(&azn->azn_nb_stubs, stub)) {
1054 		if (stub->azns_bus == df->adf_nb_busno) {
1055 			df->adf_flags |= AMDZEN_DF_F_FOUND_NB;
1056 			df->adf_nb = stub;
1057 			return;
1058 		}
1059 	}
1060 }
1061 
1062 static void
1063 amdzen_nexus_init(void *arg)
1064 {
1065 	uint_t i;
1066 	amdzen_t *azn = arg;
1067 
1068 	/*
1069 	 * First go through all of the stubs and assign the DF entries.
1070 	 */
1071 	mutex_enter(&azn->azn_mutex);
1072 	if (!amdzen_map_dfs(azn) || !amdzen_check_dfs(azn)) {
1073 		azn->azn_flags |= AMDZEN_F_MAP_ERROR;
1074 		goto done;
1075 	}
1076 
1077 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
1078 		amdzen_df_t *df = &azn->azn_dfs[i];
1079 
1080 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0)
1081 			continue;
1082 		amdzen_setup_df(azn, df);
1083 		amdzen_find_nb(azn, df);
1084 	}
1085 
1086 	/*
1087 	 * Not all children may be installed. As such, we do not treat the
1088 	 * failure of a child as fatal to the driver.
1089 	 */
1090 	mutex_exit(&azn->azn_mutex);
1091 	for (i = 0; i < ARRAY_SIZE(amdzen_children); i++) {
1092 		(void) amdzen_create_child(azn, &amdzen_children[i]);
1093 	}
1094 	mutex_enter(&azn->azn_mutex);
1095 
1096 done:
1097 	azn->azn_flags &= ~AMDZEN_F_ATTACH_DISPATCHED;
1098 	azn->azn_flags |= AMDZEN_F_ATTACH_COMPLETE;
1099 	azn->azn_taskqid = TASKQID_INVALID;
1100 	cv_broadcast(&azn->azn_cv);
1101 	mutex_exit(&azn->azn_mutex);
1102 }
1103 
1104 static int
1105 amdzen_stub_scan_cb(dev_info_t *dip, void *arg)
1106 {
1107 	amdzen_t *azn = arg;
1108 	uint16_t vid, did;
1109 	int *regs;
1110 	uint_t nregs, i;
1111 	boolean_t match = B_FALSE;
1112 
1113 	if (dip == ddi_root_node()) {
1114 		return (DDI_WALK_CONTINUE);
1115 	}
1116 
1117 	/*
1118 	 * If a node in question is not a pci node, then we have no interest in
1119 	 * it as all the stubs that we care about are related to pci devices.
1120 	 */
1121 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
1122 		return (DDI_WALK_PRUNECHILD);
1123 	}
1124 
1125 	/*
1126 	 * If we can't get a device or vendor ID and prove that this is an AMD
1127 	 * part, then we don't care about it.
1128 	 */
1129 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1130 	    "vendor-id", PCI_EINVAL16);
1131 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1132 	    "device-id", PCI_EINVAL16);
1133 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
1134 		return (DDI_WALK_CONTINUE);
1135 	}
1136 
1137 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
1138 		return (DDI_WALK_CONTINUE);
1139 	}
1140 
1141 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
1142 		if (amdzen_nb_ids[i] == did) {
1143 			match = B_TRUE;
1144 		}
1145 	}
1146 
1147 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1148 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
1149 		return (DDI_WALK_CONTINUE);
1150 	}
1151 
1152 	if (nregs == 0) {
1153 		ddi_prop_free(regs);
1154 		return (DDI_WALK_CONTINUE);
1155 	}
1156 
1157 	if (PCI_REG_BUS_G(regs[0]) == AMDZEN_DF_BUSNO &&
1158 	    PCI_REG_DEV_G(regs[0]) >= AMDZEN_DF_FIRST_DEVICE) {
1159 		match = B_TRUE;
1160 	}
1161 
1162 	ddi_prop_free(regs);
1163 	if (match) {
1164 		mutex_enter(&azn->azn_mutex);
1165 		azn->azn_nscanned++;
1166 		mutex_exit(&azn->azn_mutex);
1167 	}
1168 
1169 	return (DDI_WALK_CONTINUE);
1170 }
1171 
1172 static void
1173 amdzen_stub_scan(void *arg)
1174 {
1175 	amdzen_t *azn = arg;
1176 
1177 	mutex_enter(&azn->azn_mutex);
1178 	azn->azn_nscanned = 0;
1179 	mutex_exit(&azn->azn_mutex);
1180 
1181 	ddi_walk_devs(ddi_root_node(), amdzen_stub_scan_cb, azn);
1182 
1183 	mutex_enter(&azn->azn_mutex);
1184 	azn->azn_flags &= ~AMDZEN_F_SCAN_DISPATCHED;
1185 	azn->azn_flags |= AMDZEN_F_SCAN_COMPLETE;
1186 
1187 	if (azn->azn_nscanned == 0) {
1188 		azn->azn_flags |= AMDZEN_F_UNSUPPORTED;
1189 		azn->azn_taskqid = TASKQID_INVALID;
1190 		cv_broadcast(&azn->azn_cv);
1191 	} else if (azn->azn_npresent == azn->azn_nscanned) {
1192 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
1193 		azn->azn_taskqid = taskq_dispatch(system_taskq,
1194 		    amdzen_nexus_init, azn, TQ_SLEEP);
1195 	}
1196 	mutex_exit(&azn->azn_mutex);
1197 }
1198 
1199 /*
1200  * Unfortunately we can't really let the stubs detach as we may need them to be
1201  * available for client operations. We may be able to improve this if we know
1202  * that the actual nexus is going away. However, as long as it's active, we need
1203  * all the stubs.
1204  */
1205 int
1206 amdzen_detach_stub(dev_info_t *dip, ddi_detach_cmd_t cmd)
1207 {
1208 	if (cmd == DDI_SUSPEND) {
1209 		return (DDI_SUCCESS);
1210 	}
1211 
1212 	return (DDI_FAILURE);
1213 }
1214 
1215 int
1216 amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd)
1217 {
1218 	int *regs, reg;
1219 	uint_t nregs, i;
1220 	uint16_t vid, did;
1221 	amdzen_stub_t *stub;
1222 	amdzen_t *azn = amdzen_data;
1223 	boolean_t valid = B_FALSE;
1224 	boolean_t nb = B_FALSE;
1225 
1226 	if (cmd == DDI_RESUME) {
1227 		return (DDI_SUCCESS);
1228 	} else if (cmd != DDI_ATTACH) {
1229 		return (DDI_FAILURE);
1230 	}
1231 
1232 	/*
1233 	 * Make sure that the stub that we've been asked to attach is a pci type
1234 	 * device. If not, then there is no reason for us to proceed.
1235 	 */
1236 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
1237 		dev_err(dip, CE_WARN, "asked to attach a bad AMD Zen nexus "
1238 		    "stub: %s", ddi_get_name(dip));
1239 		return (DDI_FAILURE);
1240 	}
1241 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1242 	    "vendor-id", PCI_EINVAL16);
1243 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1244 	    "device-id", PCI_EINVAL16);
1245 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
1246 		dev_err(dip, CE_WARN, "failed to get PCI ID properties");
1247 		return (DDI_FAILURE);
1248 	}
1249 
1250 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
1251 		dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x",
1252 		    cpuid_getvendor(CPU) == X86_VENDOR_HYGON ?
1253 		    AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid);
1254 		return (DDI_FAILURE);
1255 	}
1256 
1257 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1258 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
1259 		dev_err(dip, CE_WARN, "failed to get 'reg' property");
1260 		return (DDI_FAILURE);
1261 	}
1262 
1263 	if (nregs == 0) {
1264 		ddi_prop_free(regs);
1265 		dev_err(dip, CE_WARN, "missing 'reg' property values");
1266 		return (DDI_FAILURE);
1267 	}
1268 	reg = *regs;
1269 	ddi_prop_free(regs);
1270 
1271 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
1272 		if (amdzen_nb_ids[i] == did) {
1273 			valid = B_TRUE;
1274 			nb = B_TRUE;
1275 		}
1276 	}
1277 
1278 	if (!valid && PCI_REG_BUS_G(reg) == AMDZEN_DF_BUSNO &&
1279 	    PCI_REG_DEV_G(reg) >= AMDZEN_DF_FIRST_DEVICE) {
1280 		valid = B_TRUE;
1281 		nb = B_FALSE;
1282 	}
1283 
1284 	if (!valid) {
1285 		dev_err(dip, CE_WARN, "device %s didn't match the nexus list",
1286 		    ddi_get_name(dip));
1287 		return (DDI_FAILURE);
1288 	}
1289 
1290 	stub = kmem_alloc(sizeof (amdzen_stub_t), KM_SLEEP);
1291 	if (pci_config_setup(dip, &stub->azns_cfgspace) != DDI_SUCCESS) {
1292 		dev_err(dip, CE_WARN, "failed to set up config space");
1293 		kmem_free(stub, sizeof (amdzen_stub_t));
1294 		return (DDI_FAILURE);
1295 	}
1296 
1297 	stub->azns_dip = dip;
1298 	stub->azns_vid = vid;
1299 	stub->azns_did = did;
1300 	stub->azns_bus = PCI_REG_BUS_G(reg);
1301 	stub->azns_dev = PCI_REG_DEV_G(reg);
1302 	stub->azns_func = PCI_REG_FUNC_G(reg);
1303 	ddi_set_driver_private(dip, stub);
1304 
1305 	mutex_enter(&azn->azn_mutex);
1306 	azn->azn_npresent++;
1307 	if (nb) {
1308 		list_insert_tail(&azn->azn_nb_stubs, stub);
1309 	} else {
1310 		list_insert_tail(&azn->azn_df_stubs, stub);
1311 	}
1312 
1313 	if ((azn->azn_flags & AMDZEN_F_TASKQ_MASK) == AMDZEN_F_SCAN_COMPLETE &&
1314 	    azn->azn_nscanned == azn->azn_npresent) {
1315 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
1316 		azn->azn_taskqid = taskq_dispatch(system_taskq,
1317 		    amdzen_nexus_init, azn, TQ_SLEEP);
1318 	}
1319 	mutex_exit(&azn->azn_mutex);
1320 
1321 	return (DDI_SUCCESS);
1322 }
1323 
1324 static int
1325 amdzen_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
1326     void *arg, void *result)
1327 {
1328 	char buf[32];
1329 	dev_info_t *child;
1330 	const amdzen_child_data_t *acd;
1331 
1332 	switch (ctlop) {
1333 	case DDI_CTLOPS_REPORTDEV:
1334 		if (rdip == NULL) {
1335 			return (DDI_FAILURE);
1336 		}
1337 		cmn_err(CE_CONT, "amdzen nexus: %s@%s, %s%d\n",
1338 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
1339 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
1340 		break;
1341 	case DDI_CTLOPS_INITCHILD:
1342 		child = arg;
1343 		if (child == NULL) {
1344 			dev_err(dip, CE_WARN, "!no child passed for "
1345 			    "DDI_CTLOPS_INITCHILD");
1346 		}
1347 
1348 		acd = ddi_get_parent_data(child);
1349 		if (acd == NULL) {
1350 			dev_err(dip, CE_WARN, "!missing child parent data");
1351 			return (DDI_FAILURE);
1352 		}
1353 
1354 		if (snprintf(buf, sizeof (buf), "%d", acd->acd_addr) >=
1355 		    sizeof (buf)) {
1356 			dev_err(dip, CE_WARN, "!failed to construct device "
1357 			    "addr due to overflow");
1358 			return (DDI_FAILURE);
1359 		}
1360 
1361 		ddi_set_name_addr(child, buf);
1362 		break;
1363 	case DDI_CTLOPS_UNINITCHILD:
1364 		child = arg;
1365 		if (child == NULL) {
1366 			dev_err(dip, CE_WARN, "!no child passed for "
1367 			    "DDI_CTLOPS_UNINITCHILD");
1368 		}
1369 
1370 		ddi_set_name_addr(child, NULL);
1371 		break;
1372 	default:
1373 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
1374 	}
1375 	return (DDI_SUCCESS);
1376 }
1377 
1378 static int
1379 amdzen_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1380 {
1381 	amdzen_t *azn = amdzen_data;
1382 
1383 	if (cmd == DDI_RESUME) {
1384 		return (DDI_SUCCESS);
1385 	} else if (cmd != DDI_ATTACH) {
1386 		return (DDI_FAILURE);
1387 	}
1388 
1389 	mutex_enter(&azn->azn_mutex);
1390 	if (azn->azn_dip != NULL) {
1391 		dev_err(dip, CE_WARN, "driver is already attached!");
1392 		mutex_exit(&azn->azn_mutex);
1393 		return (DDI_FAILURE);
1394 	}
1395 
1396 	azn->azn_dip = dip;
1397 	azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_stub_scan,
1398 	    azn, TQ_SLEEP);
1399 	azn->azn_flags |= AMDZEN_F_SCAN_DISPATCHED;
1400 	mutex_exit(&azn->azn_mutex);
1401 
1402 	return (DDI_SUCCESS);
1403 }
1404 
1405 static int
1406 amdzen_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1407 {
1408 	amdzen_t *azn = amdzen_data;
1409 
1410 	if (cmd == DDI_SUSPEND) {
1411 		return (DDI_SUCCESS);
1412 	} else if (cmd != DDI_DETACH) {
1413 		return (DDI_FAILURE);
1414 	}
1415 
1416 	mutex_enter(&azn->azn_mutex);
1417 	while (azn->azn_taskqid != TASKQID_INVALID) {
1418 		cv_wait(&azn->azn_cv, &azn->azn_mutex);
1419 	}
1420 
1421 	/*
1422 	 * If we've attached any stub drivers, e.g. this platform is important
1423 	 * for us, then we fail detach.
1424 	 */
1425 	if (!list_is_empty(&azn->azn_df_stubs) ||
1426 	    !list_is_empty(&azn->azn_nb_stubs)) {
1427 		mutex_exit(&azn->azn_mutex);
1428 		return (DDI_FAILURE);
1429 	}
1430 
1431 	azn->azn_dip = NULL;
1432 	mutex_exit(&azn->azn_mutex);
1433 
1434 	return (DDI_SUCCESS);
1435 }
1436 
1437 static void
1438 amdzen_free(void)
1439 {
1440 	if (amdzen_data == NULL) {
1441 		return;
1442 	}
1443 
1444 	VERIFY(list_is_empty(&amdzen_data->azn_df_stubs));
1445 	list_destroy(&amdzen_data->azn_df_stubs);
1446 	VERIFY(list_is_empty(&amdzen_data->azn_nb_stubs));
1447 	list_destroy(&amdzen_data->azn_nb_stubs);
1448 	cv_destroy(&amdzen_data->azn_cv);
1449 	mutex_destroy(&amdzen_data->azn_mutex);
1450 	kmem_free(amdzen_data, sizeof (amdzen_t));
1451 	amdzen_data = NULL;
1452 }
1453 
1454 static void
1455 amdzen_alloc(void)
1456 {
1457 	amdzen_data = kmem_zalloc(sizeof (amdzen_t), KM_SLEEP);
1458 	mutex_init(&amdzen_data->azn_mutex, NULL, MUTEX_DRIVER, NULL);
1459 	list_create(&amdzen_data->azn_df_stubs, sizeof (amdzen_stub_t),
1460 	    offsetof(amdzen_stub_t, azns_link));
1461 	list_create(&amdzen_data->azn_nb_stubs, sizeof (amdzen_stub_t),
1462 	    offsetof(amdzen_stub_t, azns_link));
1463 	cv_init(&amdzen_data->azn_cv, NULL, CV_DRIVER, NULL);
1464 }
1465 
1466 struct bus_ops amdzen_bus_ops = {
1467 	.busops_rev = BUSO_REV,
1468 	.bus_map = nullbusmap,
1469 	.bus_dma_map = ddi_no_dma_map,
1470 	.bus_dma_allochdl = ddi_no_dma_allochdl,
1471 	.bus_dma_freehdl = ddi_no_dma_freehdl,
1472 	.bus_dma_bindhdl = ddi_no_dma_bindhdl,
1473 	.bus_dma_unbindhdl = ddi_no_dma_unbindhdl,
1474 	.bus_dma_flush = ddi_no_dma_flush,
1475 	.bus_dma_win = ddi_no_dma_win,
1476 	.bus_dma_ctl = ddi_no_dma_mctl,
1477 	.bus_prop_op = ddi_bus_prop_op,
1478 	.bus_ctl = amdzen_bus_ctl
1479 };
1480 
1481 static struct dev_ops amdzen_dev_ops = {
1482 	.devo_rev = DEVO_REV,
1483 	.devo_refcnt = 0,
1484 	.devo_getinfo = nodev,
1485 	.devo_identify = nulldev,
1486 	.devo_probe = nulldev,
1487 	.devo_attach = amdzen_attach,
1488 	.devo_detach = amdzen_detach,
1489 	.devo_reset = nodev,
1490 	.devo_quiesce = ddi_quiesce_not_needed,
1491 	.devo_bus_ops = &amdzen_bus_ops
1492 };
1493 
1494 static struct modldrv amdzen_modldrv = {
1495 	.drv_modops = &mod_driverops,
1496 	.drv_linkinfo = "AMD Zen Nexus Driver",
1497 	.drv_dev_ops = &amdzen_dev_ops
1498 };
1499 
1500 static struct modlinkage amdzen_modlinkage = {
1501 	.ml_rev = MODREV_1,
1502 	.ml_linkage = { &amdzen_modldrv, NULL }
1503 };
1504 
1505 int
1506 _init(void)
1507 {
1508 	int ret;
1509 
1510 	if (cpuid_getvendor(CPU) != X86_VENDOR_AMD &&
1511 	    cpuid_getvendor(CPU) != X86_VENDOR_HYGON) {
1512 		return (ENOTSUP);
1513 	}
1514 
1515 	if ((ret = mod_install(&amdzen_modlinkage)) == 0) {
1516 		amdzen_alloc();
1517 	}
1518 
1519 	return (ret);
1520 }
1521 
1522 int
1523 _info(struct modinfo *modinfop)
1524 {
1525 	return (mod_info(&amdzen_modlinkage, modinfop));
1526 }
1527 
1528 int
1529 _fini(void)
1530 {
1531 	int ret;
1532 
1533 	if ((ret = mod_remove(&amdzen_modlinkage)) == 0) {
1534 		amdzen_free();
1535 	}
1536 
1537 	return (ret);
1538 }
1539