xref: /illumos-gate/usr/src/uts/intel/io/amdzen/amdzen.c (revision ccac1493decd9d71005b164e6dc843a90409d7b7)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019, Joyent, Inc.
14  * Copyright 2023 Oxide Computer Company
15  */
16 
17 /*
18  * Nexus Driver for AMD Zen family systems. The purpose of this driver is to
19  * provide access to the following resources in a single, centralized fashion:
20  *
21  *  - The per-chip Data Fabric
22  *  - The North Bridge
23  *  - The System Management Network (SMN)
24  *
25  * This is a nexus driver as once we have attached to all the requisite
26  * components, we will enumerate child devices which consume this functionality.
27  *
28  * ------------------------
29  * Mapping Devices Together
30  * ------------------------
31  *
32  * The operating system needs to expose things like temperature sensors and DRAM
33  * configuration registers in terms of things that are meaningful to the system
34  * such as logical CPUs, cores, etc. This driver attaches to the PCI devices
35  * that represent the northbridge, data fabrics, and dies. Note that there are
36  * multiple northbridge and DF devices (one each per die) and this driver maps
37  * all of these three things together. Unfortunately, this requires some
38  * acrobatics as there is no direct way to map a northbridge to its
39  * corresponding die. Instead, we map a CPU die to a data fabric PCI device and
40  * a data fabric PCI device to a corresponding northbridge PCI device. This
41  * transitive relationship allows us to map from between northbridge and die.
42  *
43  * As each data fabric device is attached, based on vendor and device portions
44  * of the PCI ID, we add it to the DF stubs list in the global amdzen_t
45  * structure, amdzen_data->azn_df_stubs. We must now map these to logical CPUs.
46  *
47  * In current Zen based products, there is a direct mapping between processor
48  * nodes and a data fabric PCI device: all of the devices are on PCI Bus 0 and
49  * start from Device 0x18, so device 0x18 maps to processor node 0, 0x19 to
50  * processor node 1, etc. This means that to map a logical CPU to a data fabric
51  * device, we take its processor node id, add it to 0x18 and find the PCI device
52  * that is on bus 0 with that ID number. We already discovered the DF devices as
53  * described above.
54  *
55  * The northbridge PCI device has a well-defined device and function, but the
56  * bus that it is on varies. Each die has its own set of assigned PCI buses and
57  * its northbridge device is on the first die-specific bus. This implies that
58  * the northbridges do not show up on PCI bus 0, as that is the PCI bus that all
59  * of the data fabric devices are on and is not assigned to any particular die.
60  * Additionally, while the northbridge on the lowest-numbered PCI bus
61  * intuitively corresponds to processor node zero, hardware does not guarantee
62  * this. Because we don't want to be at the mercy of firmware, we don't rely on
63  * this ordering assumption, though we have yet to find a system that deviates
64  * from it, either.
65  *
66  * One of the registers in the data fabric device's function 0
67  * (AMDZEN_DF_F0_CFG_ADDR_CTL) happens to identify the first PCI bus that is
68  * associated with the processor node. This means that we can map a data fabric
69  * device to a northbridge by finding the northbridge whose PCI bus ID matches
70  * the value in the corresponding data fabric's AMDZEN_DF_F0_CFG_ADDR_CTL.
71  *
72  * Given all of the above, we can map a northbridge to a data fabric device and
73  * a die to a data fabric device. Because these are 1:1 mappings, there is a
74  * transitive relationship from northbridge to die. and therefore we know which
75  * northbridge is associated with which processor die. This is summarized in the
76  * following image:
77  *
78  *  +-------+     +------------------------------------+     +--------------+
79  *  | Die 0 |---->| Data Fabric PCI BDF 0/18/0         |---->| Northbridge  |
80  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 10  |     | PCI  10/0/0  |
81  *     ...        +------------------------------------+     +--------------+
82  *  +-------+     +------------------------------------+     +--------------+
83  *  | Die n |---->| Data Fabric PCI BDF 0/18+n/0       |---->| Northbridge  |
84  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 133 |     | PCI 133/0/0  |
85  *                +------------------------------------+     +--------------+
86  *
87  * Note, the PCI buses used by the northbridges here are arbitrary examples that
88  * do not necessarily reflect actual hardware values; however, the
89  * bus/device/function (BDF) of the data fabric accurately models hardware. All
90  * BDF values are in hex.
91  *
92  * Starting with the Rome generation of processors (Family 17h Model 30-3Fh),
93  * AMD has multiple northbridges on a given die. All of these northbridges share
94  * the same data fabric and system management network port. From our perspective
95  * this means that some of the northbridge devices will be redundant and that we
96  * no longer have a 1:1 mapping between the northbridge and the data fabric
97  * devices. Every data fabric will have a northbridge, but not every northbridge
98  * will have a data fabric device mapped. Because we're always trying to map
99  * from a die to a northbridge and not the reverse, the fact that there are
100  * extra northbridge devices hanging around that we don't know about shouldn't
101  * be a problem.
102  *
103  * -------------------------------
104  * Attach and Detach Complications
105  * -------------------------------
106  *
107  * We need to map different PCI devices together. Each device is attached to a
108  * amdzen_stub driver to facilitate integration with the rest of the kernel PCI
109  * machinery and so we have to manage multiple dev_info_t structures, each of
110  * which may be independently attached and detached.
111  *
112  * This is not particularly complex for attach: our _init routine allocates the
113  * necessary mutex and list structures at module load time, and as each stub is
114  * attached, it calls into this code to be added to the appropriate list. When
115  * the nexus itself is attached, we walk the PCI device tree accumulating a
116  * counter for all devices we expect to be attached. Once the scan is complete
117  * and all such devices are accounted for (stub registration may be happening
118  * asynchronously with respect to nexus attach), we initialize the nexus device
119  * and the attach is complete.
120  *
121  * Most other device drivers support instances that can be brought back after
122  * detach, provided they are associated with an active minor node in the
123  * /devices file system. This driver is different. Once a stub device has been
124  * attached, we do not permit detaching the nexus driver instance, as the kernel
125  * does not give us interlocking guarantees between nexus and stub driver attach
126  * and detach. It is simplest to just unconditionally fail detach once a stub
127  * has attached.
128  *
129  * ---------------
130  * Exposed Devices
131  * ---------------
132  *
133  * Rather than try and have all of the different functions that could be
134  * provided in one driver, we have a nexus driver that tries to load child
135  * pseudo-device drivers that provide specific pieces of functionality.
136  *
137  * -------
138  * Locking
139  * -------
140  *
141  * The amdzen_data structure contains a single lock, azn_mutex.
142  *
143  * The various client functions here are intended for our nexus's direct
144  * children, but have been designed in case someone else should depends on this
145  * driver. Once a DF has been discovered, the set of entities inside of it
146  * (adf_nents, adf_ents[]) is considered static, constant data, and iteration
147  * over them does not require locking. However, the discovery of the amd_df_t
148  * does. In addition, locking is required whenever performing register accesses
149  * to the DF or SMN.
150  *
151  * To summarize, one must hold the lock in the following circumstances:
152  *
153  *  - Looking up DF structures
154  *  - Reading or writing to DF registers
155  *  - Reading or writing to SMN registers
156  *
157  * In general, it is preferred that the lock be held across an entire client
158  * operation if possible. The only time this becomes an issue are when we have
159  * callbacks into our callers (ala amdzen_c_df_iter()) as they may recursively
160  * call into us.
161  */
162 
163 #include <sys/modctl.h>
164 #include <sys/conf.h>
165 #include <sys/devops.h>
166 #include <sys/ddi.h>
167 #include <sys/sunddi.h>
168 #include <sys/pci.h>
169 #include <sys/sysmacros.h>
170 #include <sys/sunndi.h>
171 #include <sys/x86_archext.h>
172 #include <sys/cpuvar.h>
173 
174 #include <sys/amdzen/df.h>
175 #include "amdzen_client.h"
176 #include "amdzen.h"
177 
178 amdzen_t *amdzen_data;
179 
180 /*
181  * Array of northbridge IDs that we care about.
182  */
183 static const uint16_t amdzen_nb_ids[] = {
184 	/* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */
185 	0x1450,
186 	/* Family 17h Raven Ridge, Kestrel, Dali Models 10h-2fh (Zen uarch) */
187 	0x15d0,
188 	/* Family 17h/19h Rome, Milan, Matisse, Vermeer Zen 2/Zen 3 uarch */
189 	0x1480,
190 	/* Family 17h/19h Renoir, Cezanne, Van Gogh Zen 2/3 uarch */
191 	0x1630,
192 	/* Family 19h Genoa and Bergamo */
193 	0x14a4,
194 	/* Family 17h Mendocino, Family 19h Rembrandt */
195 	0x14b5,
196 	/* Family 19h Raphael */
197 	0x14d8,
198 	/* Family 19h Phoenix */
199 	0x14e8
200 };
201 
202 typedef struct {
203 	char *acd_name;
204 	amdzen_child_t acd_addr;
205 } amdzen_child_data_t;
206 
207 static const amdzen_child_data_t amdzen_children[] = {
208 	{ "smntemp", AMDZEN_C_SMNTEMP },
209 	{ "usmn", AMDZEN_C_USMN },
210 	{ "zen_udf", AMDZEN_C_ZEN_UDF },
211 	{ "zen_umc", AMDZEN_C_ZEN_UMC }
212 };
213 
214 static uint8_t
215 amdzen_stub_get8(amdzen_stub_t *stub, off_t reg)
216 {
217 	return (pci_config_get8(stub->azns_cfgspace, reg));
218 }
219 
220 static uint16_t
221 amdzen_stub_get16(amdzen_stub_t *stub, off_t reg)
222 {
223 	return (pci_config_get16(stub->azns_cfgspace, reg));
224 }
225 
226 static uint32_t
227 amdzen_stub_get32(amdzen_stub_t *stub, off_t reg)
228 {
229 	return (pci_config_get32(stub->azns_cfgspace, reg));
230 }
231 
232 static uint64_t
233 amdzen_stub_get64(amdzen_stub_t *stub, off_t reg)
234 {
235 	return (pci_config_get64(stub->azns_cfgspace, reg));
236 }
237 
238 static void
239 amdzen_stub_put8(amdzen_stub_t *stub, off_t reg, uint8_t val)
240 {
241 	pci_config_put8(stub->azns_cfgspace, reg, val);
242 }
243 
244 static void
245 amdzen_stub_put16(amdzen_stub_t *stub, off_t reg, uint16_t val)
246 {
247 	pci_config_put16(stub->azns_cfgspace, reg, val);
248 }
249 
250 static void
251 amdzen_stub_put32(amdzen_stub_t *stub, off_t reg, uint32_t val)
252 {
253 	pci_config_put32(stub->azns_cfgspace, reg, val);
254 }
255 
256 static uint64_t
257 amdzen_df_read_regdef(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def,
258     uint8_t inst, boolean_t do_64)
259 {
260 	df_reg_def_t ficaa;
261 	df_reg_def_t ficad;
262 	uint32_t val = 0;
263 	df_rev_t df_rev = azn->azn_dfs[0].adf_rev;
264 
265 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
266 	ASSERT3U(def.drd_gens & df_rev, ==, df_rev);
267 	val = DF_FICAA_V2_SET_TARG_INST(val, 1);
268 	val = DF_FICAA_V2_SET_FUNC(val, def.drd_func);
269 	val = DF_FICAA_V2_SET_INST(val, inst);
270 	val = DF_FICAA_V2_SET_64B(val, do_64 ? 1 : 0);
271 
272 	switch (df_rev) {
273 	case DF_REV_2:
274 	case DF_REV_3:
275 	case DF_REV_3P5:
276 		ficaa = DF_FICAA_V2;
277 		ficad = DF_FICAD_LO_V2;
278 		/*
279 		 * Both here and in the DFv4 case, the register ignores the
280 		 * lower 2 bits. That is we can only address and encode things
281 		 * in units of 4 bytes.
282 		 */
283 		val = DF_FICAA_V2_SET_REG(val, def.drd_reg >> 2);
284 		break;
285 	case DF_REV_4:
286 		ficaa = DF_FICAA_V4;
287 		ficad = DF_FICAD_LO_V4;
288 		val = DF_FICAA_V4_SET_REG(val, def.drd_reg >> 2);
289 		break;
290 	default:
291 		panic("encountered unexpected DF rev: %u", df_rev);
292 	}
293 
294 	amdzen_stub_put32(df->adf_funcs[ficaa.drd_func], ficaa.drd_reg, val);
295 	if (do_64) {
296 		return (amdzen_stub_get64(df->adf_funcs[ficad.drd_func],
297 		    ficad.drd_reg));
298 	} else {
299 		return (amdzen_stub_get32(df->adf_funcs[ficad.drd_func],
300 		    ficad.drd_reg));
301 	}
302 }
303 
304 /*
305  * Perform a targeted 32-bit indirect read to a specific instance and function.
306  */
307 static uint32_t
308 amdzen_df_read32(amdzen_t *azn, amdzen_df_t *df, uint8_t inst,
309     const df_reg_def_t def)
310 {
311 	return (amdzen_df_read_regdef(azn, df, def, inst, B_FALSE));
312 }
313 
314 /*
315  * For a broadcast read, just go to the underlying PCI function and perform a
316  * read. At this point in time, we don't believe we need to use the FICAA/FICAD
317  * to access it (though it does have a broadcast mode).
318  */
319 static uint32_t
320 amdzen_df_read32_bcast(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def)
321 {
322 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
323 	return (amdzen_stub_get32(df->adf_funcs[def.drd_func], def.drd_reg));
324 }
325 
326 static uint32_t
327 amdzen_smn_read(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg)
328 {
329 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
330 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
331 
332 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
333 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
334 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
335 
336 	switch (SMN_REG_SIZE(reg)) {
337 	case 1:
338 		return ((uint32_t)amdzen_stub_get8(df->adf_nb,
339 		    AMDZEN_NB_SMN_DATA + addr_off));
340 	case 2:
341 		return ((uint32_t)amdzen_stub_get16(df->adf_nb,
342 		    AMDZEN_NB_SMN_DATA + addr_off));
343 	case 4:
344 		return (amdzen_stub_get32(df->adf_nb, AMDZEN_NB_SMN_DATA));
345 	default:
346 		panic("unreachable invalid SMN register size %u",
347 		    SMN_REG_SIZE(reg));
348 	}
349 }
350 
351 static void
352 amdzen_smn_write(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg,
353     const uint32_t val)
354 {
355 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
356 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
357 
358 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
359 	VERIFY(SMN_REG_VALUE_FITS(reg, val));
360 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
361 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
362 
363 	switch (SMN_REG_SIZE(reg)) {
364 	case 1:
365 		amdzen_stub_put8(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
366 		    (uint8_t)val);
367 		break;
368 	case 2:
369 		amdzen_stub_put16(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
370 		    (uint16_t)val);
371 		break;
372 	case 4:
373 		amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_DATA, val);
374 		break;
375 	default:
376 		panic("unreachable invalid SMN register size %u",
377 		    SMN_REG_SIZE(reg));
378 	}
379 }
380 
381 static amdzen_df_t *
382 amdzen_df_find(amdzen_t *azn, uint_t dfno)
383 {
384 	uint_t i;
385 
386 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
387 	if (dfno >= azn->azn_ndfs) {
388 		return (NULL);
389 	}
390 
391 	for (i = 0; i < azn->azn_ndfs; i++) {
392 		amdzen_df_t *df = &azn->azn_dfs[i];
393 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) {
394 			continue;
395 		}
396 
397 		if (dfno == 0) {
398 			return (df);
399 		}
400 		dfno--;
401 	}
402 
403 	return (NULL);
404 }
405 
406 /*
407  * Client functions that are used by nexus children.
408  */
409 int
410 amdzen_c_smn_read(uint_t dfno, const smn_reg_t reg, uint32_t *valp)
411 {
412 	amdzen_df_t *df;
413 	amdzen_t *azn = amdzen_data;
414 
415 	if (!SMN_REG_SIZE_IS_VALID(reg))
416 		return (EINVAL);
417 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
418 		return (EINVAL);
419 
420 	mutex_enter(&azn->azn_mutex);
421 	df = amdzen_df_find(azn, dfno);
422 	if (df == NULL) {
423 		mutex_exit(&azn->azn_mutex);
424 		return (ENOENT);
425 	}
426 
427 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
428 		mutex_exit(&azn->azn_mutex);
429 		return (ENXIO);
430 	}
431 
432 	*valp = amdzen_smn_read(azn, df, reg);
433 	mutex_exit(&azn->azn_mutex);
434 	return (0);
435 }
436 
437 int
438 amdzen_c_smn_write(uint_t dfno, const smn_reg_t reg, const uint32_t val)
439 {
440 	amdzen_df_t *df;
441 	amdzen_t *azn = amdzen_data;
442 
443 	if (!SMN_REG_SIZE_IS_VALID(reg))
444 		return (EINVAL);
445 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
446 		return (EINVAL);
447 	if (!SMN_REG_VALUE_FITS(reg, val))
448 		return (EOVERFLOW);
449 
450 	mutex_enter(&azn->azn_mutex);
451 	df = amdzen_df_find(azn, dfno);
452 	if (df == NULL) {
453 		mutex_exit(&azn->azn_mutex);
454 		return (ENOENT);
455 	}
456 
457 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
458 		mutex_exit(&azn->azn_mutex);
459 		return (ENXIO);
460 	}
461 
462 	amdzen_smn_write(azn, df, reg, val);
463 	mutex_exit(&azn->azn_mutex);
464 	return (0);
465 }
466 
467 uint_t
468 amdzen_c_df_count(void)
469 {
470 	uint_t ret;
471 	amdzen_t *azn = amdzen_data;
472 
473 	mutex_enter(&azn->azn_mutex);
474 	ret = azn->azn_ndfs;
475 	mutex_exit(&azn->azn_mutex);
476 	return (ret);
477 }
478 
479 df_rev_t
480 amdzen_c_df_rev(void)
481 {
482 	amdzen_df_t *df;
483 	amdzen_t *azn = amdzen_data;
484 	df_rev_t rev;
485 
486 	/*
487 	 * Always use the first DF instance to determine what we're using. Our
488 	 * current assumption, which seems to generally be true, is that the
489 	 * given DF revisions are the same in a given system when the DFs are
490 	 * directly connected.
491 	 */
492 	mutex_enter(&azn->azn_mutex);
493 	df = amdzen_df_find(azn, 0);
494 	if (df == NULL) {
495 		rev = DF_REV_UNKNOWN;
496 	} else {
497 		rev = df->adf_rev;
498 	}
499 	mutex_exit(&azn->azn_mutex);
500 
501 	return (rev);
502 }
503 
504 int
505 amdzen_c_df_read32(uint_t dfno, uint8_t inst, const df_reg_def_t def,
506     uint32_t *valp)
507 {
508 	amdzen_df_t *df;
509 	amdzen_t *azn = amdzen_data;
510 
511 	mutex_enter(&azn->azn_mutex);
512 	df = amdzen_df_find(azn, dfno);
513 	if (df == NULL) {
514 		mutex_exit(&azn->azn_mutex);
515 		return (ENOENT);
516 	}
517 
518 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_FALSE);
519 	mutex_exit(&azn->azn_mutex);
520 
521 	return (0);
522 }
523 
524 int
525 amdzen_c_df_read64(uint_t dfno, uint8_t inst, const df_reg_def_t def,
526     uint64_t *valp)
527 {
528 	amdzen_df_t *df;
529 	amdzen_t *azn = amdzen_data;
530 
531 	mutex_enter(&azn->azn_mutex);
532 	df = amdzen_df_find(azn, dfno);
533 	if (df == NULL) {
534 		mutex_exit(&azn->azn_mutex);
535 		return (ENOENT);
536 	}
537 
538 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_TRUE);
539 	mutex_exit(&azn->azn_mutex);
540 
541 	return (0);
542 }
543 
544 int
545 amdzen_c_df_iter(uint_t dfno, zen_df_type_t type, amdzen_c_iter_f func,
546     void *arg)
547 {
548 	amdzen_df_t *df;
549 	amdzen_t *azn = amdzen_data;
550 	df_type_t df_type;
551 	uint8_t df_subtype;
552 
553 	/*
554 	 * Unlike other calls here, we hold our lock only to find the DF here.
555 	 * The main reason for this is the nature of the callback function.
556 	 * Folks are iterating over instances so they can call back into us. If
557 	 * you look at the locking statement, the thing that is most volatile
558 	 * right here and what we need to protect is the DF itself and
559 	 * subsequent register accesses to it. The actual data about which
560 	 * entities exist is static and so once we have found a DF we should
561 	 * hopefully be in good shape as they only come, but don't go.
562 	 */
563 	mutex_enter(&azn->azn_mutex);
564 	df = amdzen_df_find(azn, dfno);
565 	if (df == NULL) {
566 		mutex_exit(&azn->azn_mutex);
567 		return (ENOENT);
568 	}
569 	mutex_exit(&azn->azn_mutex);
570 
571 	switch (type) {
572 	case ZEN_DF_TYPE_CS_UMC:
573 		df_type = DF_TYPE_CS;
574 		/*
575 		 * In the original Zeppelin DFv2 die there was no subtype field
576 		 * used for the CS. The UMC is the only type and has a subtype
577 		 * of zero.
578 		 */
579 		if (df->adf_rev != DF_REV_2) {
580 			df_subtype = DF_CS_SUBTYPE_UMC;
581 		} else {
582 			df_subtype = 0;
583 		}
584 		break;
585 	case ZEN_DF_TYPE_CCM_CPU:
586 		/*
587 		 * While the wording of the PPR is a little weird, the CCM still
588 		 * has subtype 0 in DFv4 systems; however, what's said to be for
589 		 * the CPU appears to apply to the ACM.
590 		 */
591 		df_type = DF_TYPE_CCM;
592 		df_subtype = 0;
593 		break;
594 	default:
595 		return (EINVAL);
596 	}
597 
598 	for (uint_t i = 0; i < df->adf_nents; i++) {
599 		amdzen_df_ent_t *ent = &df->adf_ents[i];
600 
601 		/*
602 		 * Some DF components are not considered enabled and therefore
603 		 * will end up having bogus values in their ID fields. If we do
604 		 * not have an enable flag set, we must skip this node.
605 		 */
606 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
607 			continue;
608 
609 		if (ent->adfe_type == df_type &&
610 		    ent->adfe_subtype == df_subtype) {
611 			int ret = func(dfno, ent->adfe_fabric_id,
612 			    ent->adfe_inst_id, arg);
613 			if (ret != 0) {
614 				return (ret);
615 			}
616 		}
617 	}
618 
619 	return (0);
620 }
621 
622 int
623 amdzen_c_df_fabric_decomp(df_fabric_decomp_t *decomp)
624 {
625 	const amdzen_df_t *df;
626 	amdzen_t *azn = amdzen_data;
627 
628 	mutex_enter(&azn->azn_mutex);
629 	df = amdzen_df_find(azn, 0);
630 	if (df == NULL) {
631 		mutex_exit(&azn->azn_mutex);
632 		return (ENOENT);
633 	}
634 
635 	*decomp = df->adf_decomp;
636 	mutex_exit(&azn->azn_mutex);
637 	return (0);
638 }
639 
640 static boolean_t
641 amdzen_create_child(amdzen_t *azn, const amdzen_child_data_t *acd)
642 {
643 	int ret;
644 	dev_info_t *child;
645 
646 	if (ndi_devi_alloc(azn->azn_dip, acd->acd_name,
647 	    (pnode_t)DEVI_SID_NODEID, &child) != NDI_SUCCESS) {
648 		dev_err(azn->azn_dip, CE_WARN, "!failed to allocate child "
649 		    "dip for %s", acd->acd_name);
650 		return (B_FALSE);
651 	}
652 
653 	ddi_set_parent_data(child, (void *)acd);
654 	if ((ret = ndi_devi_online(child, 0)) != NDI_SUCCESS) {
655 		dev_err(azn->azn_dip, CE_WARN, "!failed to online child "
656 		    "dip %s: %d", acd->acd_name, ret);
657 		return (B_FALSE);
658 	}
659 
660 	return (B_TRUE);
661 }
662 
663 static boolean_t
664 amdzen_map_dfs(amdzen_t *azn)
665 {
666 	amdzen_stub_t *stub;
667 
668 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
669 
670 	for (stub = list_head(&azn->azn_df_stubs); stub != NULL;
671 	    stub = list_next(&azn->azn_df_stubs, stub)) {
672 		amdzen_df_t *df;
673 		uint_t dfno;
674 
675 		dfno = stub->azns_dev - AMDZEN_DF_FIRST_DEVICE;
676 		if (dfno > AMDZEN_MAX_DFS) {
677 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
678 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
679 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
680 			goto err;
681 		}
682 
683 		df = &azn->azn_dfs[dfno];
684 
685 		if (stub->azns_func >= AMDZEN_MAX_DF_FUNCS) {
686 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
687 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
688 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
689 			goto err;
690 		}
691 
692 		if (df->adf_funcs[stub->azns_func] != NULL) {
693 			dev_err(stub->azns_dip, CE_WARN, "encountered "
694 			    "duplicate df device with DF PCI b/d/f: 0x%x/%x/%x",
695 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
696 			goto err;
697 		}
698 		df->adf_funcs[stub->azns_func] = stub;
699 	}
700 
701 	return (B_TRUE);
702 
703 err:
704 	azn->azn_flags |= AMDZEN_F_DEVICE_ERROR;
705 	return (B_FALSE);
706 }
707 
708 static boolean_t
709 amdzen_check_dfs(amdzen_t *azn)
710 {
711 	uint_t i;
712 	boolean_t ret = B_TRUE;
713 
714 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
715 		amdzen_df_t *df = &azn->azn_dfs[i];
716 		uint_t count = 0;
717 
718 		/*
719 		 * We require all platforms to have DFs functions 0-6. Not all
720 		 * platforms have DF function 7.
721 		 */
722 		for (uint_t func = 0; func < AMDZEN_MAX_DF_FUNCS - 1; func++) {
723 			if (df->adf_funcs[func] != NULL) {
724 				count++;
725 			}
726 		}
727 
728 		if (count == 0)
729 			continue;
730 
731 		if (count != 7) {
732 			ret = B_FALSE;
733 			dev_err(azn->azn_dip, CE_WARN, "df %u devices "
734 			    "incomplete", i);
735 		} else {
736 			df->adf_flags |= AMDZEN_DF_F_VALID;
737 			azn->azn_ndfs++;
738 		}
739 	}
740 
741 	return (ret);
742 }
743 
744 static const uint8_t amdzen_df_rome_ids[0x2b] = {
745 	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23,
746 	24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
747 	44, 45, 46, 47, 48
748 };
749 
750 /*
751  * Check the first df entry to see if it belongs to Rome or Milan. If so, then
752  * it uses the disjoint ID space.
753  */
754 static boolean_t
755 amdzen_is_rome_style(uint_t id)
756 {
757 	return (id == 0x1490 || id == 0x1650);
758 }
759 
760 /*
761  * To be able to do most other things we want to do, we must first determine
762  * what revision of the DF (data fabric) that we're using.
763  *
764  * Snapshot the df version. This was added explicitly in DFv4.0, around the Zen
765  * 4 timeframe and allows us to tell apart different version of the DF register
766  * set, most usefully when various subtypes were added.
767  *
768  * Older versions can theoretically be told apart based on usage of reserved
769  * registers. We walk these in the following order, starting with the newest rev
770  * and walking backwards to tell things apart:
771  *
772  *   o v3.5 -> Check function 1, register 0x150. This was reserved prior
773  *             to this point. This is actually DF_FIDMASK0_V3P5. We are supposed
774  *             to check bits [7:0].
775  *
776  *   o v3.0 -> Check function 1, register 0x208. The low byte (7:0) was
777  *             changed to indicate a component mask. This is non-zero
778  *             in the 3.0 generation. This is actually DF_FIDMASK_V2.
779  *
780  *   o v2.0 -> This is just the not that case. Presumably v1 wasn't part
781  *             of the Zen generation.
782  *
783  * Because we don't know what version we are yet, we do not use the normal
784  * versioned register accesses which would check what DF version we are and
785  * would want to use the normal indirect register accesses (which also require
786  * us to know the version). We instead do direct broadcast reads.
787  */
788 static void
789 amdzen_determine_df_vers(amdzen_t *azn, amdzen_df_t *df)
790 {
791 	uint32_t val;
792 	df_reg_def_t rd = DF_FBICNT;
793 
794 	val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
795 	df->adf_major = DF_FBICNT_V4_GET_MAJOR(val);
796 	df->adf_minor = DF_FBICNT_V4_GET_MINOR(val);
797 	if (df->adf_major == 0 && df->adf_minor == 0) {
798 		rd = DF_FIDMASK0_V3P5;
799 		val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
800 		if (bitx32(val, 7, 0) != 0) {
801 			df->adf_major = 3;
802 			df->adf_minor = 5;
803 			df->adf_rev = DF_REV_3P5;
804 		} else {
805 			rd = DF_FIDMASK_V2;
806 			val = amdzen_stub_get32(df->adf_funcs[rd.drd_func],
807 			    rd.drd_reg);
808 			if (bitx32(val, 7, 0) != 0) {
809 				df->adf_major = 3;
810 				df->adf_minor = 0;
811 				df->adf_rev = DF_REV_3;
812 			} else {
813 				df->adf_major = 2;
814 				df->adf_minor = 0;
815 				df->adf_rev = DF_REV_2;
816 			}
817 		}
818 	} else if (df->adf_major == 4 && df->adf_minor == 0) {
819 		df->adf_rev = DF_REV_4;
820 	} else {
821 		df->adf_rev = DF_REV_UNKNOWN;
822 	}
823 }
824 
825 /*
826  * All of the different versions of the DF have different ways of getting at and
827  * answering the question of how do I break a fabric ID into a corresponding
828  * socket, die, and component. Importantly the goal here is to obtain, cache,
829  * and normalize:
830  *
831  *  o The DF System Configuration
832  *  o The various Mask registers
833  *  o The Node ID
834  */
835 static void
836 amdzen_determine_fabric_decomp(amdzen_t *azn, amdzen_df_t *df)
837 {
838 	uint32_t mask;
839 	df_fabric_decomp_t *decomp = &df->adf_decomp;
840 
841 	switch (df->adf_rev) {
842 	case DF_REV_2:
843 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V2);
844 		switch (DF_SYSCFG_V2_GET_MY_TYPE(df->adf_syscfg)) {
845 		case DF_DIE_TYPE_CPU:
846 			mask = amdzen_df_read32_bcast(azn, df,
847 			    DF_DIEMASK_CPU_V2);
848 			break;
849 		case DF_DIE_TYPE_APU:
850 			mask = amdzen_df_read32_bcast(azn, df,
851 			    DF_DIEMASK_APU_V2);
852 			break;
853 		default:
854 			panic("DF thinks we're not on a CPU!");
855 		}
856 		df->adf_mask0 = mask;
857 
858 		/*
859 		 * DFv2 is a bit different in how the fabric mask register is
860 		 * phrased. Logically a fabric ID is broken into something that
861 		 * uniquely identifies a "node" (a particular die on a socket)
862 		 * and something that identifies a "component", e.g. a memory
863 		 * controller.
864 		 *
865 		 * Starting with DFv3, these registers logically called out how
866 		 * to separate the fabric ID first into a node and a component.
867 		 * Then the node was then broken down into a socket and die. In
868 		 * DFv2, there is no separate mask and shift of a node. Instead
869 		 * the socket and die are absolute offsets into the fabric ID
870 		 * rather than relative offsets into the node ID. As such, when
871 		 * we encounter DFv2, we fake up a node mask and shift and make
872 		 * it look like DFv3+.
873 		 */
874 		decomp->dfd_node_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) |
875 		    DF_DIEMASK_V2_GET_DIE_MASK(mask);
876 		decomp->dfd_node_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask);
877 		decomp->dfd_comp_mask = DF_DIEMASK_V2_GET_COMP_MASK(mask);
878 		decomp->dfd_comp_shift = 0;
879 
880 		decomp->dfd_sock_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) >>
881 		    decomp->dfd_node_shift;
882 		decomp->dfd_die_mask = DF_DIEMASK_V2_GET_DIE_MASK(mask) >>
883 		    decomp->dfd_node_shift;
884 		decomp->dfd_sock_shift = DF_DIEMASK_V2_GET_SOCK_SHIFT(mask) -
885 		    decomp->dfd_node_shift;
886 		decomp->dfd_die_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask) -
887 		    decomp->dfd_node_shift;
888 		ASSERT3U(decomp->dfd_die_shift, ==, 0);
889 		break;
890 	case DF_REV_3:
891 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V3);
892 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
893 		    DF_FIDMASK0_V3);
894 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
895 		    DF_FIDMASK1_V3);
896 
897 		decomp->dfd_sock_mask =
898 		    DF_FIDMASK1_V3_GET_SOCK_MASK(df->adf_mask1);
899 		decomp->dfd_sock_shift =
900 		    DF_FIDMASK1_V3_GET_SOCK_SHIFT(df->adf_mask1);
901 		decomp->dfd_die_mask =
902 		    DF_FIDMASK1_V3_GET_DIE_MASK(df->adf_mask1);
903 		decomp->dfd_die_shift = 0;
904 		decomp->dfd_node_mask =
905 		    DF_FIDMASK0_V3_GET_NODE_MASK(df->adf_mask0);
906 		decomp->dfd_node_shift =
907 		    DF_FIDMASK1_V3_GET_NODE_SHIFT(df->adf_mask1);
908 		decomp->dfd_comp_mask =
909 		    DF_FIDMASK0_V3_GET_COMP_MASK(df->adf_mask0);
910 		decomp->dfd_comp_shift = 0;
911 		break;
912 	case DF_REV_3P5:
913 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df,
914 		    DF_SYSCFG_V3P5);
915 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
916 		    DF_FIDMASK0_V3P5);
917 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
918 		    DF_FIDMASK1_V3P5);
919 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
920 		    DF_FIDMASK2_V3P5);
921 
922 		decomp->dfd_sock_mask =
923 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
924 		decomp->dfd_sock_shift =
925 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
926 		decomp->dfd_die_mask =
927 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
928 		decomp->dfd_die_shift = 0;
929 		decomp->dfd_node_mask =
930 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
931 		decomp->dfd_node_shift =
932 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
933 		decomp->dfd_comp_mask =
934 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
935 		decomp->dfd_comp_shift = 0;
936 		break;
937 	case DF_REV_4:
938 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V4);
939 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
940 		    DF_FIDMASK0_V4);
941 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
942 		    DF_FIDMASK1_V4);
943 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
944 		    DF_FIDMASK2_V4);
945 
946 		/*
947 		 * The DFv4 registers are at a different location in the DF;
948 		 * however, the actual layout of fields is the same as DFv3.5.
949 		 * This is why you see V3P5 below.
950 		 */
951 		decomp->dfd_sock_mask =
952 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
953 		decomp->dfd_sock_shift =
954 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
955 		decomp->dfd_die_mask =
956 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
957 		decomp->dfd_die_shift = 0;
958 		decomp->dfd_node_mask =
959 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
960 		decomp->dfd_node_shift =
961 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
962 		decomp->dfd_comp_mask =
963 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
964 		decomp->dfd_comp_shift = 0;
965 		break;
966 	default:
967 		panic("encountered suspicious, previously rejected DF "
968 		    "rev: 0x%x", df->adf_rev);
969 	}
970 }
971 
972 /*
973  * Initialize our knowledge about a given series of nodes on the data fabric.
974  */
975 static void
976 amdzen_setup_df(amdzen_t *azn, amdzen_df_t *df)
977 {
978 	uint_t i;
979 	uint32_t val;
980 
981 	amdzen_determine_df_vers(azn, df);
982 
983 	switch (df->adf_rev) {
984 	case DF_REV_2:
985 	case DF_REV_3:
986 	case DF_REV_3P5:
987 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V2);
988 		break;
989 	case DF_REV_4:
990 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V4);
991 		break;
992 	default:
993 		dev_err(azn->azn_dip, CE_WARN, "encountered unsupported DF "
994 		    "revision: 0x%x", df->adf_rev);
995 		return;
996 	}
997 	df->adf_nb_busno = DF_CFG_ADDR_CTL_GET_BUS_NUM(val);
998 	val = amdzen_df_read32_bcast(azn, df, DF_FBICNT);
999 	df->adf_nents = DF_FBICNT_GET_COUNT(val);
1000 	if (df->adf_nents == 0)
1001 		return;
1002 	df->adf_ents = kmem_zalloc(sizeof (amdzen_df_ent_t) * df->adf_nents,
1003 	    KM_SLEEP);
1004 
1005 	for (i = 0; i < df->adf_nents; i++) {
1006 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
1007 		uint8_t inst = i;
1008 
1009 		/*
1010 		 * Unfortunately, Rome uses a discontinuous instance ID pattern
1011 		 * while everything else we can find uses a contiguous instance
1012 		 * ID pattern. This means that for Rome, we need to adjust the
1013 		 * indexes that we iterate over, though the total number of
1014 		 * entries is right. This was carried over into Milan, but not
1015 		 * Genoa.
1016 		 */
1017 		if (amdzen_is_rome_style(df->adf_funcs[0]->azns_did)) {
1018 			if (inst > ARRAY_SIZE(amdzen_df_rome_ids)) {
1019 				dev_err(azn->azn_dip, CE_WARN, "Rome family "
1020 				    "processor reported more ids than the PPR, "
1021 				    "resetting %u to instance zero", inst);
1022 				inst = 0;
1023 			} else {
1024 				inst = amdzen_df_rome_ids[inst];
1025 			}
1026 		}
1027 
1028 		dfe->adfe_drvid = inst;
1029 		dfe->adfe_info0 = amdzen_df_read32(azn, df, inst, DF_FBIINFO0);
1030 		dfe->adfe_info1 = amdzen_df_read32(azn, df, inst, DF_FBIINFO1);
1031 		dfe->adfe_info2 = amdzen_df_read32(azn, df, inst, DF_FBIINFO2);
1032 		dfe->adfe_info3 = amdzen_df_read32(azn, df, inst, DF_FBIINFO3);
1033 
1034 		dfe->adfe_type = DF_FBIINFO0_GET_TYPE(dfe->adfe_info0);
1035 		dfe->adfe_subtype = DF_FBIINFO0_GET_SUBTYPE(dfe->adfe_info0);
1036 
1037 		/*
1038 		 * The enabled flag was not present in Zen 1. Simulate it by
1039 		 * checking for a non-zero register instead.
1040 		 */
1041 		if (DF_FBIINFO0_V3_GET_ENABLED(dfe->adfe_info0) ||
1042 		    (df->adf_rev == DF_REV_2 && dfe->adfe_info0 != 0)) {
1043 			dfe->adfe_flags |= AMDZEN_DFE_F_ENABLED;
1044 		}
1045 		if (DF_FBIINFO0_GET_HAS_MCA(dfe->adfe_info0)) {
1046 			dfe->adfe_flags |= AMDZEN_DFE_F_MCA;
1047 		}
1048 		dfe->adfe_inst_id = DF_FBIINFO3_GET_INSTID(dfe->adfe_info3);
1049 		switch (df->adf_rev) {
1050 		case DF_REV_2:
1051 			dfe->adfe_fabric_id =
1052 			    DF_FBIINFO3_V2_GET_BLOCKID(dfe->adfe_info3);
1053 			break;
1054 		case DF_REV_3:
1055 			dfe->adfe_fabric_id =
1056 			    DF_FBIINFO3_V3_GET_BLOCKID(dfe->adfe_info3);
1057 			break;
1058 		case DF_REV_3P5:
1059 			dfe->adfe_fabric_id =
1060 			    DF_FBIINFO3_V3P5_GET_BLOCKID(dfe->adfe_info3);
1061 			break;
1062 		case DF_REV_4:
1063 			dfe->adfe_fabric_id =
1064 			    DF_FBIINFO3_V4_GET_BLOCKID(dfe->adfe_info3);
1065 			break;
1066 		default:
1067 			panic("encountered suspicious, previously rejected DF "
1068 			    "rev: 0x%x", df->adf_rev);
1069 		}
1070 	}
1071 
1072 	amdzen_determine_fabric_decomp(azn, df);
1073 }
1074 
1075 static void
1076 amdzen_find_nb(amdzen_t *azn, amdzen_df_t *df)
1077 {
1078 	amdzen_stub_t *stub;
1079 
1080 	for (stub = list_head(&azn->azn_nb_stubs); stub != NULL;
1081 	    stub = list_next(&azn->azn_nb_stubs, stub)) {
1082 		if (stub->azns_bus == df->adf_nb_busno) {
1083 			df->adf_flags |= AMDZEN_DF_F_FOUND_NB;
1084 			df->adf_nb = stub;
1085 			return;
1086 		}
1087 	}
1088 }
1089 
1090 static void
1091 amdzen_nexus_init(void *arg)
1092 {
1093 	uint_t i;
1094 	amdzen_t *azn = arg;
1095 
1096 	/*
1097 	 * First go through all of the stubs and assign the DF entries.
1098 	 */
1099 	mutex_enter(&azn->azn_mutex);
1100 	if (!amdzen_map_dfs(azn) || !amdzen_check_dfs(azn)) {
1101 		azn->azn_flags |= AMDZEN_F_MAP_ERROR;
1102 		goto done;
1103 	}
1104 
1105 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
1106 		amdzen_df_t *df = &azn->azn_dfs[i];
1107 
1108 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0)
1109 			continue;
1110 		amdzen_setup_df(azn, df);
1111 		amdzen_find_nb(azn, df);
1112 	}
1113 
1114 	/*
1115 	 * Not all children may be installed. As such, we do not treat the
1116 	 * failure of a child as fatal to the driver.
1117 	 */
1118 	mutex_exit(&azn->azn_mutex);
1119 	for (i = 0; i < ARRAY_SIZE(amdzen_children); i++) {
1120 		(void) amdzen_create_child(azn, &amdzen_children[i]);
1121 	}
1122 	mutex_enter(&azn->azn_mutex);
1123 
1124 done:
1125 	azn->azn_flags &= ~AMDZEN_F_ATTACH_DISPATCHED;
1126 	azn->azn_flags |= AMDZEN_F_ATTACH_COMPLETE;
1127 	azn->azn_taskqid = TASKQID_INVALID;
1128 	cv_broadcast(&azn->azn_cv);
1129 	mutex_exit(&azn->azn_mutex);
1130 }
1131 
1132 static int
1133 amdzen_stub_scan_cb(dev_info_t *dip, void *arg)
1134 {
1135 	amdzen_t *azn = arg;
1136 	uint16_t vid, did;
1137 	int *regs;
1138 	uint_t nregs, i;
1139 	boolean_t match = B_FALSE;
1140 
1141 	if (dip == ddi_root_node()) {
1142 		return (DDI_WALK_CONTINUE);
1143 	}
1144 
1145 	/*
1146 	 * If a node in question is not a pci node, then we have no interest in
1147 	 * it as all the stubs that we care about are related to pci devices.
1148 	 */
1149 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
1150 		return (DDI_WALK_PRUNECHILD);
1151 	}
1152 
1153 	/*
1154 	 * If we can't get a device or vendor ID and prove that this is an AMD
1155 	 * part, then we don't care about it.
1156 	 */
1157 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1158 	    "vendor-id", PCI_EINVAL16);
1159 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1160 	    "device-id", PCI_EINVAL16);
1161 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
1162 		return (DDI_WALK_CONTINUE);
1163 	}
1164 
1165 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
1166 		return (DDI_WALK_CONTINUE);
1167 	}
1168 
1169 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
1170 		if (amdzen_nb_ids[i] == did) {
1171 			match = B_TRUE;
1172 		}
1173 	}
1174 
1175 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1176 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
1177 		return (DDI_WALK_CONTINUE);
1178 	}
1179 
1180 	if (nregs == 0) {
1181 		ddi_prop_free(regs);
1182 		return (DDI_WALK_CONTINUE);
1183 	}
1184 
1185 	if (PCI_REG_BUS_G(regs[0]) == AMDZEN_DF_BUSNO &&
1186 	    PCI_REG_DEV_G(regs[0]) >= AMDZEN_DF_FIRST_DEVICE) {
1187 		match = B_TRUE;
1188 	}
1189 
1190 	ddi_prop_free(regs);
1191 	if (match) {
1192 		mutex_enter(&azn->azn_mutex);
1193 		azn->azn_nscanned++;
1194 		mutex_exit(&azn->azn_mutex);
1195 	}
1196 
1197 	return (DDI_WALK_CONTINUE);
1198 }
1199 
1200 static void
1201 amdzen_stub_scan(void *arg)
1202 {
1203 	amdzen_t *azn = arg;
1204 
1205 	mutex_enter(&azn->azn_mutex);
1206 	azn->azn_nscanned = 0;
1207 	mutex_exit(&azn->azn_mutex);
1208 
1209 	ddi_walk_devs(ddi_root_node(), amdzen_stub_scan_cb, azn);
1210 
1211 	mutex_enter(&azn->azn_mutex);
1212 	azn->azn_flags &= ~AMDZEN_F_SCAN_DISPATCHED;
1213 	azn->azn_flags |= AMDZEN_F_SCAN_COMPLETE;
1214 
1215 	if (azn->azn_nscanned == 0) {
1216 		azn->azn_flags |= AMDZEN_F_UNSUPPORTED;
1217 		azn->azn_taskqid = TASKQID_INVALID;
1218 		cv_broadcast(&azn->azn_cv);
1219 	} else if (azn->azn_npresent == azn->azn_nscanned) {
1220 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
1221 		azn->azn_taskqid = taskq_dispatch(system_taskq,
1222 		    amdzen_nexus_init, azn, TQ_SLEEP);
1223 	}
1224 	mutex_exit(&azn->azn_mutex);
1225 }
1226 
1227 /*
1228  * Unfortunately we can't really let the stubs detach as we may need them to be
1229  * available for client operations. We may be able to improve this if we know
1230  * that the actual nexus is going away. However, as long as it's active, we need
1231  * all the stubs.
1232  */
1233 int
1234 amdzen_detach_stub(dev_info_t *dip, ddi_detach_cmd_t cmd)
1235 {
1236 	if (cmd == DDI_SUSPEND) {
1237 		return (DDI_SUCCESS);
1238 	}
1239 
1240 	return (DDI_FAILURE);
1241 }
1242 
1243 int
1244 amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd)
1245 {
1246 	int *regs, reg;
1247 	uint_t nregs, i;
1248 	uint16_t vid, did;
1249 	amdzen_stub_t *stub;
1250 	amdzen_t *azn = amdzen_data;
1251 	boolean_t valid = B_FALSE;
1252 	boolean_t nb = B_FALSE;
1253 
1254 	if (cmd == DDI_RESUME) {
1255 		return (DDI_SUCCESS);
1256 	} else if (cmd != DDI_ATTACH) {
1257 		return (DDI_FAILURE);
1258 	}
1259 
1260 	/*
1261 	 * Make sure that the stub that we've been asked to attach is a pci type
1262 	 * device. If not, then there is no reason for us to proceed.
1263 	 */
1264 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
1265 		dev_err(dip, CE_WARN, "asked to attach a bad AMD Zen nexus "
1266 		    "stub: %s", ddi_get_name(dip));
1267 		return (DDI_FAILURE);
1268 	}
1269 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1270 	    "vendor-id", PCI_EINVAL16);
1271 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1272 	    "device-id", PCI_EINVAL16);
1273 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
1274 		dev_err(dip, CE_WARN, "failed to get PCI ID properties");
1275 		return (DDI_FAILURE);
1276 	}
1277 
1278 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
1279 		dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x",
1280 		    cpuid_getvendor(CPU) == X86_VENDOR_HYGON ?
1281 		    AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid);
1282 		return (DDI_FAILURE);
1283 	}
1284 
1285 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1286 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
1287 		dev_err(dip, CE_WARN, "failed to get 'reg' property");
1288 		return (DDI_FAILURE);
1289 	}
1290 
1291 	if (nregs == 0) {
1292 		ddi_prop_free(regs);
1293 		dev_err(dip, CE_WARN, "missing 'reg' property values");
1294 		return (DDI_FAILURE);
1295 	}
1296 	reg = *regs;
1297 	ddi_prop_free(regs);
1298 
1299 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
1300 		if (amdzen_nb_ids[i] == did) {
1301 			valid = B_TRUE;
1302 			nb = B_TRUE;
1303 		}
1304 	}
1305 
1306 	if (!valid && PCI_REG_BUS_G(reg) == AMDZEN_DF_BUSNO &&
1307 	    PCI_REG_DEV_G(reg) >= AMDZEN_DF_FIRST_DEVICE) {
1308 		valid = B_TRUE;
1309 		nb = B_FALSE;
1310 	}
1311 
1312 	if (!valid) {
1313 		dev_err(dip, CE_WARN, "device %s didn't match the nexus list",
1314 		    ddi_get_name(dip));
1315 		return (DDI_FAILURE);
1316 	}
1317 
1318 	stub = kmem_alloc(sizeof (amdzen_stub_t), KM_SLEEP);
1319 	if (pci_config_setup(dip, &stub->azns_cfgspace) != DDI_SUCCESS) {
1320 		dev_err(dip, CE_WARN, "failed to set up config space");
1321 		kmem_free(stub, sizeof (amdzen_stub_t));
1322 		return (DDI_FAILURE);
1323 	}
1324 
1325 	stub->azns_dip = dip;
1326 	stub->azns_vid = vid;
1327 	stub->azns_did = did;
1328 	stub->azns_bus = PCI_REG_BUS_G(reg);
1329 	stub->azns_dev = PCI_REG_DEV_G(reg);
1330 	stub->azns_func = PCI_REG_FUNC_G(reg);
1331 	ddi_set_driver_private(dip, stub);
1332 
1333 	mutex_enter(&azn->azn_mutex);
1334 	azn->azn_npresent++;
1335 	if (nb) {
1336 		list_insert_tail(&azn->azn_nb_stubs, stub);
1337 	} else {
1338 		list_insert_tail(&azn->azn_df_stubs, stub);
1339 	}
1340 
1341 	if ((azn->azn_flags & AMDZEN_F_TASKQ_MASK) == AMDZEN_F_SCAN_COMPLETE &&
1342 	    azn->azn_nscanned == azn->azn_npresent) {
1343 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
1344 		azn->azn_taskqid = taskq_dispatch(system_taskq,
1345 		    amdzen_nexus_init, azn, TQ_SLEEP);
1346 	}
1347 	mutex_exit(&azn->azn_mutex);
1348 
1349 	return (DDI_SUCCESS);
1350 }
1351 
1352 static int
1353 amdzen_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
1354     void *arg, void *result)
1355 {
1356 	char buf[32];
1357 	dev_info_t *child;
1358 	const amdzen_child_data_t *acd;
1359 
1360 	switch (ctlop) {
1361 	case DDI_CTLOPS_REPORTDEV:
1362 		if (rdip == NULL) {
1363 			return (DDI_FAILURE);
1364 		}
1365 		cmn_err(CE_CONT, "amdzen nexus: %s@%s, %s%d\n",
1366 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
1367 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
1368 		break;
1369 	case DDI_CTLOPS_INITCHILD:
1370 		child = arg;
1371 		if (child == NULL) {
1372 			dev_err(dip, CE_WARN, "!no child passed for "
1373 			    "DDI_CTLOPS_INITCHILD");
1374 		}
1375 
1376 		acd = ddi_get_parent_data(child);
1377 		if (acd == NULL) {
1378 			dev_err(dip, CE_WARN, "!missing child parent data");
1379 			return (DDI_FAILURE);
1380 		}
1381 
1382 		if (snprintf(buf, sizeof (buf), "%d", acd->acd_addr) >=
1383 		    sizeof (buf)) {
1384 			dev_err(dip, CE_WARN, "!failed to construct device "
1385 			    "addr due to overflow");
1386 			return (DDI_FAILURE);
1387 		}
1388 
1389 		ddi_set_name_addr(child, buf);
1390 		break;
1391 	case DDI_CTLOPS_UNINITCHILD:
1392 		child = arg;
1393 		if (child == NULL) {
1394 			dev_err(dip, CE_WARN, "!no child passed for "
1395 			    "DDI_CTLOPS_UNINITCHILD");
1396 		}
1397 
1398 		ddi_set_name_addr(child, NULL);
1399 		break;
1400 	default:
1401 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
1402 	}
1403 	return (DDI_SUCCESS);
1404 }
1405 
1406 static int
1407 amdzen_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1408 {
1409 	amdzen_t *azn = amdzen_data;
1410 
1411 	if (cmd == DDI_RESUME) {
1412 		return (DDI_SUCCESS);
1413 	} else if (cmd != DDI_ATTACH) {
1414 		return (DDI_FAILURE);
1415 	}
1416 
1417 	mutex_enter(&azn->azn_mutex);
1418 	if (azn->azn_dip != NULL) {
1419 		dev_err(dip, CE_WARN, "driver is already attached!");
1420 		mutex_exit(&azn->azn_mutex);
1421 		return (DDI_FAILURE);
1422 	}
1423 
1424 	azn->azn_dip = dip;
1425 	azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_stub_scan,
1426 	    azn, TQ_SLEEP);
1427 	azn->azn_flags |= AMDZEN_F_SCAN_DISPATCHED;
1428 	mutex_exit(&azn->azn_mutex);
1429 
1430 	return (DDI_SUCCESS);
1431 }
1432 
1433 static int
1434 amdzen_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1435 {
1436 	amdzen_t *azn = amdzen_data;
1437 
1438 	if (cmd == DDI_SUSPEND) {
1439 		return (DDI_SUCCESS);
1440 	} else if (cmd != DDI_DETACH) {
1441 		return (DDI_FAILURE);
1442 	}
1443 
1444 	mutex_enter(&azn->azn_mutex);
1445 	while (azn->azn_taskqid != TASKQID_INVALID) {
1446 		cv_wait(&azn->azn_cv, &azn->azn_mutex);
1447 	}
1448 
1449 	/*
1450 	 * If we've attached any stub drivers, e.g. this platform is important
1451 	 * for us, then we fail detach.
1452 	 */
1453 	if (!list_is_empty(&azn->azn_df_stubs) ||
1454 	    !list_is_empty(&azn->azn_nb_stubs)) {
1455 		mutex_exit(&azn->azn_mutex);
1456 		return (DDI_FAILURE);
1457 	}
1458 
1459 	azn->azn_dip = NULL;
1460 	mutex_exit(&azn->azn_mutex);
1461 
1462 	return (DDI_SUCCESS);
1463 }
1464 
1465 static void
1466 amdzen_free(void)
1467 {
1468 	if (amdzen_data == NULL) {
1469 		return;
1470 	}
1471 
1472 	VERIFY(list_is_empty(&amdzen_data->azn_df_stubs));
1473 	list_destroy(&amdzen_data->azn_df_stubs);
1474 	VERIFY(list_is_empty(&amdzen_data->azn_nb_stubs));
1475 	list_destroy(&amdzen_data->azn_nb_stubs);
1476 	cv_destroy(&amdzen_data->azn_cv);
1477 	mutex_destroy(&amdzen_data->azn_mutex);
1478 	kmem_free(amdzen_data, sizeof (amdzen_t));
1479 	amdzen_data = NULL;
1480 }
1481 
1482 static void
1483 amdzen_alloc(void)
1484 {
1485 	amdzen_data = kmem_zalloc(sizeof (amdzen_t), KM_SLEEP);
1486 	mutex_init(&amdzen_data->azn_mutex, NULL, MUTEX_DRIVER, NULL);
1487 	list_create(&amdzen_data->azn_df_stubs, sizeof (amdzen_stub_t),
1488 	    offsetof(amdzen_stub_t, azns_link));
1489 	list_create(&amdzen_data->azn_nb_stubs, sizeof (amdzen_stub_t),
1490 	    offsetof(amdzen_stub_t, azns_link));
1491 	cv_init(&amdzen_data->azn_cv, NULL, CV_DRIVER, NULL);
1492 }
1493 
1494 struct bus_ops amdzen_bus_ops = {
1495 	.busops_rev = BUSO_REV,
1496 	.bus_map = nullbusmap,
1497 	.bus_dma_map = ddi_no_dma_map,
1498 	.bus_dma_allochdl = ddi_no_dma_allochdl,
1499 	.bus_dma_freehdl = ddi_no_dma_freehdl,
1500 	.bus_dma_bindhdl = ddi_no_dma_bindhdl,
1501 	.bus_dma_unbindhdl = ddi_no_dma_unbindhdl,
1502 	.bus_dma_flush = ddi_no_dma_flush,
1503 	.bus_dma_win = ddi_no_dma_win,
1504 	.bus_dma_ctl = ddi_no_dma_mctl,
1505 	.bus_prop_op = ddi_bus_prop_op,
1506 	.bus_ctl = amdzen_bus_ctl
1507 };
1508 
1509 static struct dev_ops amdzen_dev_ops = {
1510 	.devo_rev = DEVO_REV,
1511 	.devo_refcnt = 0,
1512 	.devo_getinfo = nodev,
1513 	.devo_identify = nulldev,
1514 	.devo_probe = nulldev,
1515 	.devo_attach = amdzen_attach,
1516 	.devo_detach = amdzen_detach,
1517 	.devo_reset = nodev,
1518 	.devo_quiesce = ddi_quiesce_not_needed,
1519 	.devo_bus_ops = &amdzen_bus_ops
1520 };
1521 
1522 static struct modldrv amdzen_modldrv = {
1523 	.drv_modops = &mod_driverops,
1524 	.drv_linkinfo = "AMD Zen Nexus Driver",
1525 	.drv_dev_ops = &amdzen_dev_ops
1526 };
1527 
1528 static struct modlinkage amdzen_modlinkage = {
1529 	.ml_rev = MODREV_1,
1530 	.ml_linkage = { &amdzen_modldrv, NULL }
1531 };
1532 
1533 int
1534 _init(void)
1535 {
1536 	int ret;
1537 
1538 	if (cpuid_getvendor(CPU) != X86_VENDOR_AMD &&
1539 	    cpuid_getvendor(CPU) != X86_VENDOR_HYGON) {
1540 		return (ENOTSUP);
1541 	}
1542 
1543 	if ((ret = mod_install(&amdzen_modlinkage)) == 0) {
1544 		amdzen_alloc();
1545 	}
1546 
1547 	return (ret);
1548 }
1549 
1550 int
1551 _info(struct modinfo *modinfop)
1552 {
1553 	return (mod_info(&amdzen_modlinkage, modinfop));
1554 }
1555 
1556 int
1557 _fini(void)
1558 {
1559 	int ret;
1560 
1561 	if ((ret = mod_remove(&amdzen_modlinkage)) == 0) {
1562 		amdzen_free();
1563 	}
1564 
1565 	return (ret);
1566 }
1567