xref: /illumos-gate/usr/src/uts/intel/io/amdzen/amdzen.c (revision 92279cb6e70fd12428e1d9e6270e7e2d877cbeec)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019, Joyent, Inc.
14  * Copyright 2025 Oxide Computer Company
15  */
16 
17 /*
18  * Nexus Driver for AMD Zen family systems. The purpose of this driver is to
19  * provide access to the following resources in a single, centralized fashion:
20  *
21  *  - The per-chip Data Fabric
22  *  - The North Bridge
23  *  - The System Management Network (SMN)
24  *
25  * This is a nexus driver as once we have attached to all the requisite
26  * components, we will enumerate child devices which consume this functionality.
27  *
28  * ------------------------
29  * Mapping Devices Together
30  * ------------------------
31  *
32  * The operating system needs to expose things like temperature sensors and DRAM
33  * configuration registers in terms of things that are meaningful to the system
34  * such as logical CPUs, cores, etc. This driver attaches to the PCI devices
35  * that represent the northbridge, data fabrics, and dies. Note that there are
36  * multiple northbridge and DF devices (one each per die) and this driver maps
37  * all of these three things together. Unfortunately, this requires some
38  * acrobatics as there is no direct way to map a northbridge to its
39  * corresponding die. Instead, we map a CPU die to a data fabric PCI device and
40  * a data fabric PCI device to a corresponding northbridge PCI device. This
41  * transitive relationship allows us to map from between northbridge and die.
42  *
43  * As each data fabric device is attached, based on vendor and device portions
44  * of the PCI ID, we add it to the DF stubs list in the global amdzen_t
45  * structure, amdzen_data->azn_df_stubs. We must now map these to logical CPUs.
46  *
47  * In current Zen based products, there is a direct mapping between processor
48  * nodes and a data fabric PCI device: all of the devices are on PCI Bus 0 and
49  * start from Device 0x18, so device 0x18 maps to processor node 0, 0x19 to
50  * processor node 1, etc. This means that to map a logical CPU to a data fabric
51  * device, we take its processor node id, add it to 0x18 and find the PCI device
52  * that is on bus 0 with that ID number. We already discovered the DF devices as
53  * described above.
54  *
55  * The northbridge PCI device has a well-defined device and function, but the
56  * bus that it is on varies. Each die has its own set of assigned PCI buses and
57  * its northbridge device is on the first die-specific bus. This implies that
58  * the northbridges do not show up on PCI bus 0, as that is the PCI bus that all
59  * of the data fabric devices are on and is not assigned to any particular die.
60  * Additionally, while the northbridge on the lowest-numbered PCI bus
61  * intuitively corresponds to processor node zero, hardware does not guarantee
62  * this. Because we don't want to be at the mercy of firmware, we don't rely on
63  * this ordering assumption, though we have yet to find a system that deviates
64  * from it, either.
65  *
66  * One of the registers in the data fabric device's function 0
67  * (AMDZEN_DF_F0_CFG_ADDR_CTL) happens to identify the first PCI bus that is
68  * associated with the processor node. This means that we can map a data fabric
69  * device to a northbridge by finding the northbridge whose PCI bus ID matches
70  * the value in the corresponding data fabric's AMDZEN_DF_F0_CFG_ADDR_CTL.
71  *
72  * Given all of the above, we can map a northbridge to a data fabric device and
73  * a die to a data fabric device. Because these are 1:1 mappings, there is a
74  * transitive relationship from northbridge to die. and therefore we know which
75  * northbridge is associated with which processor die. This is summarized in the
76  * following image:
77  *
78  *  +-------+     +------------------------------------+     +--------------+
79  *  | Die 0 |---->| Data Fabric PCI BDF 0/18/0         |---->| Northbridge  |
80  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 10  |     | PCI  10/0/0  |
81  *     ...        +------------------------------------+     +--------------+
82  *  +-------+     +------------------------------------+     +--------------+
83  *  | Die n |---->| Data Fabric PCI BDF 0/18+n/0       |---->| Northbridge  |
84  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 133 |     | PCI 133/0/0  |
85  *                +------------------------------------+     +--------------+
86  *
87  * Note, the PCI buses used by the northbridges here are arbitrary examples that
88  * do not necessarily reflect actual hardware values; however, the
89  * bus/device/function (BDF) of the data fabric accurately models hardware. All
90  * BDF values are in hex.
91  *
92  * Starting with the Rome generation of processors (Family 17h Model 30-3Fh),
93  * AMD has multiple northbridges on a given die. All of these northbridges share
94  * the same data fabric and system management network port. From our perspective
95  * this means that some of the northbridge devices will be redundant and that we
96  * no longer have a 1:1 mapping between the northbridge and the data fabric
97  * devices. Every data fabric will have a northbridge, but not every northbridge
98  * will have a data fabric device mapped. Because we're always trying to map
99  * from a die to a northbridge and not the reverse, the fact that there are
100  * extra northbridge devices hanging around that we don't know about shouldn't
101  * be a problem.
102  *
103  * -------------------------------
104  * Attach and Detach Complications
105  * -------------------------------
106  *
107  * We need to map different PCI devices together. Each device is attached to a
108  * amdzen_stub driver to facilitate integration with the rest of the kernel PCI
109  * machinery and so we have to manage multiple dev_info_t structures, each of
110  * which may be independently attached and detached.
111  *
112  * This is not particularly complex for attach: our _init routine allocates the
113  * necessary mutex and list structures at module load time, and as each stub is
114  * attached, it calls into this code to be added to the appropriate list. When
115  * the nexus itself is attached, we walk the PCI device tree accumulating a
116  * counter for all devices we expect to be attached. Once the scan is complete
117  * and all such devices are accounted for (stub registration may be happening
118  * asynchronously with respect to nexus attach), we initialize the nexus device
119  * and the attach is complete.
120  *
121  * Most other device drivers support instances that can be brought back after
122  * detach, provided they are associated with an active minor node in the
123  * /devices file system. This driver is different. Once a stub device has been
124  * attached, we do not permit detaching the nexus driver instance, as the kernel
125  * does not give us interlocking guarantees between nexus and stub driver attach
126  * and detach. It is simplest to just unconditionally fail detach once a stub
127  * has attached.
128  *
129  * ---------------
130  * Exposed Devices
131  * ---------------
132  *
133  * Rather than try and have all of the different functions that could be
134  * provided in one driver, we have a nexus driver that tries to load child
135  * pseudo-device drivers that provide specific pieces of functionality.
136  *
137  * -------
138  * Locking
139  * -------
140  *
141  * The amdzen_data structure contains a single lock, azn_mutex.
142  *
143  * The various client functions here are intended for our nexus's direct
144  * children, but have been designed in case someone else should depends on this
145  * driver. Once a DF has been discovered, the set of entities inside of it
146  * (adf_nents, adf_ents[]) is considered static, constant data, and iteration
147  * over them does not require locking. However, the discovery of the amd_df_t
148  * does. In addition, locking is required whenever performing register accesses
149  * to the DF or SMN.
150  *
151  * To summarize, one must hold the lock in the following circumstances:
152  *
153  *  - Looking up DF structures
154  *  - Reading or writing to DF registers
155  *  - Reading or writing to SMN registers
156  *
157  * In general, it is preferred that the lock be held across an entire client
158  * operation if possible. The only time this becomes an issue are when we have
159  * callbacks into our callers (ala amdzen_c_df_iter()) as they may recursively
160  * call into us.
161  */
162 
163 #include <sys/modctl.h>
164 #include <sys/conf.h>
165 #include <sys/devops.h>
166 #include <sys/ddi.h>
167 #include <sys/sunddi.h>
168 #include <sys/pci.h>
169 #include <sys/sysmacros.h>
170 #include <sys/sunndi.h>
171 #include <sys/x86_archext.h>
172 #include <sys/cpuvar.h>
173 #include <sys/policy.h>
174 #include <sys/stat.h>
175 #include <sys/sunddi.h>
176 #include <sys/bitmap.h>
177 #include <sys/stdbool.h>
178 
179 #include <sys/amdzen/df.h>
180 #include <sys/amdzen/ccd.h>
181 #include "amdzen.h"
182 #include "amdzen_client.h"
183 #include "amdzen_topo.h"
184 
185 amdzen_t *amdzen_data;
186 
187 /*
188  * Internal minor nodes for devices that the nexus provides itself.
189  */
190 #define	AMDZEN_MINOR_TOPO	0
191 
192 /*
193  * Array of northbridge IDs that we care about.
194  */
195 static const uint16_t amdzen_nb_ids[] = {
196 	/* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */
197 	0x1450,
198 	/* Family 17h Raven Ridge, Kestrel, Dali Models 10h-2fh (Zen uarch) */
199 	0x15d0,
200 	/* Family 17h/19h Rome, Milan, Matisse, Vermeer Zen 2/Zen 3 uarch */
201 	0x1480,
202 	/* Family 17h/19h Renoir, Cezanne, Van Gogh Zen 2/3 uarch */
203 	0x1630,
204 	/* Family 19h Genoa and Bergamo */
205 	0x14a4,
206 	/* Family 17h Mendocino, Family 19h Rembrandt */
207 	0x14b5,
208 	/* Family 19h Raphael, Family 1Ah 40-4fh */
209 	0x14d8,
210 	/* Family 19h Phoenix */
211 	0x14e8,
212 	/* Family 1Ah Turin */
213 	0x153a,
214 	/* Family 1Ah 20-2fh, 70-77h */
215 	0x1507,
216 	/* Family 1Ah 60-6fh */
217 	0x1122
218 };
219 
220 typedef struct {
221 	char *acd_name;
222 	amdzen_child_t acd_addr;
223 	/*
224 	 * This indicates whether or not we should issue warnings to users when
225 	 * something happens specific to this instance. The main reason we don't
226 	 * want to is for optional devices that may not be installed as they are
227 	 * for development purposes (e.g. usmn, zen_udf); however, if there is
228 	 * an issue with the others we still want to know.
229 	 */
230 	bool acd_warn;
231 } amdzen_child_data_t;
232 
233 static const amdzen_child_data_t amdzen_children[] = {
234 	{ "smntemp", AMDZEN_C_SMNTEMP, true },
235 	{ "usmn", AMDZEN_C_USMN, false },
236 	{ "zen_udf", AMDZEN_C_ZEN_UDF, false },
237 	{ "zen_umc", AMDZEN_C_ZEN_UMC, true }
238 };
239 
240 static uint8_t
amdzen_stub_get8(amdzen_stub_t * stub,off_t reg)241 amdzen_stub_get8(amdzen_stub_t *stub, off_t reg)
242 {
243 	return (pci_config_get8(stub->azns_cfgspace, reg));
244 }
245 
246 static uint16_t
amdzen_stub_get16(amdzen_stub_t * stub,off_t reg)247 amdzen_stub_get16(amdzen_stub_t *stub, off_t reg)
248 {
249 	return (pci_config_get16(stub->azns_cfgspace, reg));
250 }
251 
252 static uint32_t
amdzen_stub_get32(amdzen_stub_t * stub,off_t reg)253 amdzen_stub_get32(amdzen_stub_t *stub, off_t reg)
254 {
255 	return (pci_config_get32(stub->azns_cfgspace, reg));
256 }
257 
258 static uint64_t
amdzen_stub_get64(amdzen_stub_t * stub,off_t reg)259 amdzen_stub_get64(amdzen_stub_t *stub, off_t reg)
260 {
261 	return (pci_config_get64(stub->azns_cfgspace, reg));
262 }
263 
264 static void
amdzen_stub_put8(amdzen_stub_t * stub,off_t reg,uint8_t val)265 amdzen_stub_put8(amdzen_stub_t *stub, off_t reg, uint8_t val)
266 {
267 	pci_config_put8(stub->azns_cfgspace, reg, val);
268 }
269 
270 static void
amdzen_stub_put16(amdzen_stub_t * stub,off_t reg,uint16_t val)271 amdzen_stub_put16(amdzen_stub_t *stub, off_t reg, uint16_t val)
272 {
273 	pci_config_put16(stub->azns_cfgspace, reg, val);
274 }
275 
276 static void
amdzen_stub_put32(amdzen_stub_t * stub,off_t reg,uint32_t val)277 amdzen_stub_put32(amdzen_stub_t *stub, off_t reg, uint32_t val)
278 {
279 	pci_config_put32(stub->azns_cfgspace, reg, val);
280 }
281 
282 static uint64_t
amdzen_df_read_regdef(amdzen_t * azn,amdzen_df_t * df,const df_reg_def_t def,uint8_t inst,boolean_t do_64)283 amdzen_df_read_regdef(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def,
284     uint8_t inst, boolean_t do_64)
285 {
286 	df_reg_def_t ficaa;
287 	df_reg_def_t ficad;
288 	uint32_t val = 0;
289 	df_rev_t df_rev = azn->azn_dfs[0].adf_rev;
290 	VERIFY(df_reg_valid(df_rev, def));
291 
292 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
293 	val = DF_FICAA_V2_SET_TARG_INST(val, 1);
294 	val = DF_FICAA_V2_SET_FUNC(val, def.drd_func);
295 	val = DF_FICAA_V2_SET_INST(val, inst);
296 	val = DF_FICAA_V2_SET_64B(val, do_64 ? 1 : 0);
297 
298 	switch (df_rev) {
299 	case DF_REV_2:
300 	case DF_REV_3:
301 	case DF_REV_3P5:
302 		ficaa = DF_FICAA_V2;
303 		ficad = DF_FICAD_LO_V2;
304 		val = DF_FICAA_V2_SET_REG(val, def.drd_reg >>
305 		    DF_FICAA_REG_SHIFT);
306 		break;
307 	case DF_REV_4:
308 	case DF_REV_4D2:
309 		ficaa = DF_FICAA_V4;
310 		ficad = DF_FICAD_LO_V4;
311 		val = DF_FICAA_V4_SET_REG(val, def.drd_reg >>
312 		    DF_FICAA_REG_SHIFT);
313 		break;
314 	default:
315 		panic("encountered unexpected DF rev: %u", df_rev);
316 	}
317 
318 	amdzen_stub_put32(df->adf_funcs[ficaa.drd_func], ficaa.drd_reg, val);
319 	if (do_64) {
320 		return (amdzen_stub_get64(df->adf_funcs[ficad.drd_func],
321 		    ficad.drd_reg));
322 	} else {
323 		return (amdzen_stub_get32(df->adf_funcs[ficad.drd_func],
324 		    ficad.drd_reg));
325 	}
326 }
327 
328 /*
329  * Perform a targeted 32-bit indirect read to a specific instance and function.
330  */
331 static uint32_t
amdzen_df_read32(amdzen_t * azn,amdzen_df_t * df,uint8_t inst,const df_reg_def_t def)332 amdzen_df_read32(amdzen_t *azn, amdzen_df_t *df, uint8_t inst,
333     const df_reg_def_t def)
334 {
335 	return (amdzen_df_read_regdef(azn, df, def, inst, B_FALSE));
336 }
337 
338 /*
339  * For a broadcast read, just go to the underlying PCI function and perform a
340  * read. At this point in time, we don't believe we need to use the FICAA/FICAD
341  * to access it (though it does have a broadcast mode).
342  */
343 static uint32_t
amdzen_df_read32_bcast(amdzen_t * azn,amdzen_df_t * df,const df_reg_def_t def)344 amdzen_df_read32_bcast(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def)
345 {
346 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
347 	return (amdzen_stub_get32(df->adf_funcs[def.drd_func], def.drd_reg));
348 }
349 
350 static uint32_t
amdzen_smn_read(amdzen_t * azn,amdzen_df_t * df,const smn_reg_t reg)351 amdzen_smn_read(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg)
352 {
353 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
354 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
355 
356 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
357 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
358 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
359 
360 	switch (SMN_REG_SIZE(reg)) {
361 	case 1:
362 		return ((uint32_t)amdzen_stub_get8(df->adf_nb,
363 		    AMDZEN_NB_SMN_DATA + addr_off));
364 	case 2:
365 		return ((uint32_t)amdzen_stub_get16(df->adf_nb,
366 		    AMDZEN_NB_SMN_DATA + addr_off));
367 	case 4:
368 		return (amdzen_stub_get32(df->adf_nb, AMDZEN_NB_SMN_DATA));
369 	default:
370 		panic("unreachable invalid SMN register size %u",
371 		    SMN_REG_SIZE(reg));
372 	}
373 }
374 
375 static void
amdzen_smn_write(amdzen_t * azn,amdzen_df_t * df,const smn_reg_t reg,const uint32_t val)376 amdzen_smn_write(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg,
377     const uint32_t val)
378 {
379 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
380 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
381 
382 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
383 	VERIFY(SMN_REG_VALUE_FITS(reg, val));
384 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
385 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
386 
387 	switch (SMN_REG_SIZE(reg)) {
388 	case 1:
389 		amdzen_stub_put8(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
390 		    (uint8_t)val);
391 		break;
392 	case 2:
393 		amdzen_stub_put16(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
394 		    (uint16_t)val);
395 		break;
396 	case 4:
397 		amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_DATA, val);
398 		break;
399 	default:
400 		panic("unreachable invalid SMN register size %u",
401 		    SMN_REG_SIZE(reg));
402 	}
403 }
404 
405 /*
406  * This is an unfortunate necessity due to the evolution of the CCM DF values.
407  */
408 static inline boolean_t
amdzen_df_at_least(const amdzen_df_t * df,uint8_t major,uint8_t minor)409 amdzen_df_at_least(const amdzen_df_t *df, uint8_t major, uint8_t minor)
410 {
411 	return (df->adf_major > major || (df->adf_major == major &&
412 	    df->adf_minor >= minor));
413 }
414 
415 static amdzen_df_t *
amdzen_df_find(amdzen_t * azn,uint_t dfno)416 amdzen_df_find(amdzen_t *azn, uint_t dfno)
417 {
418 	uint_t i;
419 
420 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
421 	if (dfno >= azn->azn_ndfs) {
422 		return (NULL);
423 	}
424 
425 	for (i = 0; i < azn->azn_ndfs; i++) {
426 		amdzen_df_t *df = &azn->azn_dfs[i];
427 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) {
428 			continue;
429 		}
430 
431 		if (dfno == 0) {
432 			return (df);
433 		}
434 		dfno--;
435 	}
436 
437 	return (NULL);
438 }
439 
440 static amdzen_df_ent_t *
amdzen_df_ent_find_by_instid(amdzen_df_t * df,uint8_t instid)441 amdzen_df_ent_find_by_instid(amdzen_df_t *df, uint8_t instid)
442 {
443 	for (uint_t i = 0; i < df->adf_nents; i++) {
444 		amdzen_df_ent_t *ent = &df->adf_ents[i];
445 
446 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) {
447 			continue;
448 		}
449 
450 		if (ent->adfe_inst_id == instid) {
451 			return (ent);
452 		}
453 	}
454 
455 	return (NULL);
456 }
457 
458 /*
459  * Client functions that are used by nexus children.
460  */
461 int
amdzen_c_smn_read(uint_t dfno,const smn_reg_t reg,uint32_t * valp)462 amdzen_c_smn_read(uint_t dfno, const smn_reg_t reg, uint32_t *valp)
463 {
464 	amdzen_df_t *df;
465 	amdzen_t *azn = amdzen_data;
466 
467 	if (!SMN_REG_SIZE_IS_VALID(reg))
468 		return (EINVAL);
469 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
470 		return (EINVAL);
471 
472 	mutex_enter(&azn->azn_mutex);
473 	df = amdzen_df_find(azn, dfno);
474 	if (df == NULL) {
475 		mutex_exit(&azn->azn_mutex);
476 		return (ENOENT);
477 	}
478 
479 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
480 		mutex_exit(&azn->azn_mutex);
481 		return (ENXIO);
482 	}
483 
484 	*valp = amdzen_smn_read(azn, df, reg);
485 	mutex_exit(&azn->azn_mutex);
486 	return (0);
487 }
488 
489 int
amdzen_c_smn_write(uint_t dfno,const smn_reg_t reg,const uint32_t val)490 amdzen_c_smn_write(uint_t dfno, const smn_reg_t reg, const uint32_t val)
491 {
492 	amdzen_df_t *df;
493 	amdzen_t *azn = amdzen_data;
494 
495 	if (!SMN_REG_SIZE_IS_VALID(reg))
496 		return (EINVAL);
497 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
498 		return (EINVAL);
499 	if (!SMN_REG_VALUE_FITS(reg, val))
500 		return (EOVERFLOW);
501 
502 	mutex_enter(&azn->azn_mutex);
503 	df = amdzen_df_find(azn, dfno);
504 	if (df == NULL) {
505 		mutex_exit(&azn->azn_mutex);
506 		return (ENOENT);
507 	}
508 
509 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
510 		mutex_exit(&azn->azn_mutex);
511 		return (ENXIO);
512 	}
513 
514 	amdzen_smn_write(azn, df, reg, val);
515 	mutex_exit(&azn->azn_mutex);
516 	return (0);
517 }
518 
519 uint_t
amdzen_c_df_count(void)520 amdzen_c_df_count(void)
521 {
522 	uint_t ret;
523 	amdzen_t *azn = amdzen_data;
524 
525 	mutex_enter(&azn->azn_mutex);
526 	ret = azn->azn_ndfs;
527 	mutex_exit(&azn->azn_mutex);
528 	return (ret);
529 }
530 
531 df_rev_t
amdzen_c_df_rev(void)532 amdzen_c_df_rev(void)
533 {
534 	amdzen_df_t *df;
535 	amdzen_t *azn = amdzen_data;
536 	df_rev_t rev;
537 
538 	/*
539 	 * Always use the first DF instance to determine what we're using. Our
540 	 * current assumption, which seems to generally be true, is that the
541 	 * given DF revisions are the same in a given system when the DFs are
542 	 * directly connected.
543 	 */
544 	mutex_enter(&azn->azn_mutex);
545 	df = amdzen_df_find(azn, 0);
546 	if (df == NULL) {
547 		rev = DF_REV_UNKNOWN;
548 	} else {
549 		rev = df->adf_rev;
550 	}
551 	mutex_exit(&azn->azn_mutex);
552 
553 	return (rev);
554 }
555 
556 int
amdzen_c_df_read32(uint_t dfno,uint8_t inst,const df_reg_def_t def,uint32_t * valp)557 amdzen_c_df_read32(uint_t dfno, uint8_t inst, const df_reg_def_t def,
558     uint32_t *valp)
559 {
560 	amdzen_df_t *df;
561 	amdzen_t *azn = amdzen_data;
562 
563 	mutex_enter(&azn->azn_mutex);
564 	df = amdzen_df_find(azn, dfno);
565 	if (df == NULL) {
566 		mutex_exit(&azn->azn_mutex);
567 		return (ENOENT);
568 	}
569 
570 	if (df->adf_rev == DF_REV_UNKNOWN) {
571 		mutex_exit(&azn->azn_mutex);
572 		return (ENOTSUP);
573 	}
574 
575 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_FALSE);
576 	mutex_exit(&azn->azn_mutex);
577 
578 	return (0);
579 }
580 
581 int
amdzen_c_df_read64(uint_t dfno,uint8_t inst,const df_reg_def_t def,uint64_t * valp)582 amdzen_c_df_read64(uint_t dfno, uint8_t inst, const df_reg_def_t def,
583     uint64_t *valp)
584 {
585 	amdzen_df_t *df;
586 	amdzen_t *azn = amdzen_data;
587 
588 	mutex_enter(&azn->azn_mutex);
589 	df = amdzen_df_find(azn, dfno);
590 	if (df == NULL) {
591 		mutex_exit(&azn->azn_mutex);
592 		return (ENOENT);
593 	}
594 
595 	if (df->adf_rev == DF_REV_UNKNOWN) {
596 		mutex_exit(&azn->azn_mutex);
597 		return (ENOTSUP);
598 	}
599 
600 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_TRUE);
601 	mutex_exit(&azn->azn_mutex);
602 
603 	return (0);
604 }
605 
606 int
amdzen_c_df_read32_bcast(uint_t dfno,const df_reg_def_t def,uint32_t * valp)607 amdzen_c_df_read32_bcast(uint_t dfno, const df_reg_def_t def, uint32_t *valp)
608 {
609 	amdzen_df_t *df;
610 	amdzen_t *azn = amdzen_data;
611 
612 	mutex_enter(&azn->azn_mutex);
613 	df = amdzen_df_find(azn, dfno);
614 	if (df == NULL) {
615 		mutex_exit(&azn->azn_mutex);
616 		return (ENOENT);
617 	}
618 
619 	if (df->adf_rev == DF_REV_UNKNOWN) {
620 		mutex_exit(&azn->azn_mutex);
621 		return (ENOTSUP);
622 	}
623 
624 	*valp = amdzen_df_read32_bcast(azn, df, def);
625 	mutex_exit(&azn->azn_mutex);
626 
627 	return (0);
628 }
629 
630 int
amdzen_c_df_iter(uint_t dfno,zen_df_type_t type,amdzen_c_iter_f func,void * arg)631 amdzen_c_df_iter(uint_t dfno, zen_df_type_t type, amdzen_c_iter_f func,
632     void *arg)
633 {
634 	amdzen_df_t *df;
635 	amdzen_t *azn = amdzen_data;
636 	df_type_t df_type;
637 	uint8_t df_subtype;
638 
639 	/*
640 	 * Unlike other calls here, we hold our lock only to find the DF here.
641 	 * The main reason for this is the nature of the callback function.
642 	 * Folks are iterating over instances so they can call back into us. If
643 	 * you look at the locking statement, the thing that is most volatile
644 	 * right here and what we need to protect is the DF itself and
645 	 * subsequent register accesses to it. The actual data about which
646 	 * entities exist is static and so once we have found a DF we should
647 	 * hopefully be in good shape as they only come, but don't go.
648 	 */
649 	mutex_enter(&azn->azn_mutex);
650 	df = amdzen_df_find(azn, dfno);
651 	if (df == NULL) {
652 		mutex_exit(&azn->azn_mutex);
653 		return (ENOENT);
654 	}
655 	mutex_exit(&azn->azn_mutex);
656 
657 	switch (type) {
658 	case ZEN_DF_TYPE_CS_UMC:
659 		df_type = DF_TYPE_CS;
660 		/*
661 		 * In the original Zeppelin DFv2 die there was no subtype field
662 		 * used for the CS. The UMC is the only type and has a subtype
663 		 * of zero.
664 		 */
665 		if (df->adf_rev != DF_REV_2) {
666 			df_subtype = DF_CS_SUBTYPE_UMC;
667 		} else {
668 			df_subtype = 0;
669 		}
670 		break;
671 	case ZEN_DF_TYPE_CCM_CPU:
672 		df_type = DF_TYPE_CCM;
673 
674 		if (df->adf_rev >= DF_REV_4 && amdzen_df_at_least(df, 4, 1)) {
675 			df_subtype = DF_CCM_SUBTYPE_CPU_V4P1;
676 		} else {
677 			df_subtype = DF_CCM_SUBTYPE_CPU_V2;
678 		}
679 		break;
680 	default:
681 		return (EINVAL);
682 	}
683 
684 	for (uint_t i = 0; i < df->adf_nents; i++) {
685 		amdzen_df_ent_t *ent = &df->adf_ents[i];
686 
687 		/*
688 		 * Some DF components are not considered enabled and therefore
689 		 * will end up having bogus values in their ID fields. If we do
690 		 * not have an enable flag set, we must skip this node.
691 		 */
692 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
693 			continue;
694 
695 		if (ent->adfe_type == df_type &&
696 		    ent->adfe_subtype == df_subtype) {
697 			int ret = func(dfno, ent->adfe_fabric_id,
698 			    ent->adfe_inst_id, arg);
699 			if (ret != 0) {
700 				return (ret);
701 			}
702 		}
703 	}
704 
705 	return (0);
706 }
707 
708 int
amdzen_c_df_fabric_decomp(df_fabric_decomp_t * decomp)709 amdzen_c_df_fabric_decomp(df_fabric_decomp_t *decomp)
710 {
711 	const amdzen_df_t *df;
712 	amdzen_t *azn = amdzen_data;
713 
714 	mutex_enter(&azn->azn_mutex);
715 	df = amdzen_df_find(azn, 0);
716 	if (df == NULL) {
717 		mutex_exit(&azn->azn_mutex);
718 		return (ENOENT);
719 	}
720 
721 	*decomp = df->adf_decomp;
722 	mutex_exit(&azn->azn_mutex);
723 	return (0);
724 }
725 
726 static boolean_t
amdzen_create_child(amdzen_t * azn,const amdzen_child_data_t * acd)727 amdzen_create_child(amdzen_t *azn, const amdzen_child_data_t *acd)
728 {
729 	int ret;
730 	dev_info_t *child;
731 
732 	if (ndi_devi_alloc(azn->azn_dip, acd->acd_name,
733 	    (pnode_t)DEVI_SID_NODEID, &child) != NDI_SUCCESS) {
734 		dev_err(azn->azn_dip, CE_WARN, "!failed to allocate child "
735 		    "dip for %s", acd->acd_name);
736 		return (B_FALSE);
737 	}
738 
739 	ddi_set_parent_data(child, (void *)acd);
740 	if ((ret = ndi_devi_online(child, 0)) != NDI_SUCCESS) {
741 		if (acd->acd_warn) {
742 			dev_err(azn->azn_dip, CE_WARN, "!failed to online "
743 			    "child dip %s: %d", acd->acd_name, ret);
744 		}
745 		return (B_FALSE);
746 	}
747 
748 	return (B_TRUE);
749 }
750 
751 static boolean_t
amdzen_map_dfs(amdzen_t * azn)752 amdzen_map_dfs(amdzen_t *azn)
753 {
754 	amdzen_stub_t *stub;
755 
756 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
757 
758 	for (stub = list_head(&azn->azn_df_stubs); stub != NULL;
759 	    stub = list_next(&azn->azn_df_stubs, stub)) {
760 		amdzen_df_t *df;
761 		uint_t dfno;
762 
763 		dfno = stub->azns_dev - AMDZEN_DF_FIRST_DEVICE;
764 		if (dfno > AMDZEN_MAX_DFS) {
765 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
766 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
767 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
768 			goto err;
769 		}
770 
771 		df = &azn->azn_dfs[dfno];
772 
773 		if (stub->azns_func >= AMDZEN_MAX_DF_FUNCS) {
774 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
775 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
776 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
777 			goto err;
778 		}
779 
780 		if (df->adf_funcs[stub->azns_func] != NULL) {
781 			dev_err(stub->azns_dip, CE_WARN, "encountered "
782 			    "duplicate df device with DF PCI b/d/f: 0x%x/%x/%x",
783 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
784 			goto err;
785 		}
786 		df->adf_funcs[stub->azns_func] = stub;
787 	}
788 
789 	return (B_TRUE);
790 
791 err:
792 	azn->azn_flags |= AMDZEN_F_DEVICE_ERROR;
793 	return (B_FALSE);
794 }
795 
796 static boolean_t
amdzen_check_dfs(amdzen_t * azn)797 amdzen_check_dfs(amdzen_t *azn)
798 {
799 	uint_t i;
800 	boolean_t ret = B_TRUE;
801 
802 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
803 		amdzen_df_t *df = &azn->azn_dfs[i];
804 		uint_t count = 0;
805 
806 		/*
807 		 * We require all platforms to have DFs functions 0-6. Not all
808 		 * platforms have DF function 7.
809 		 */
810 		for (uint_t func = 0; func < AMDZEN_MAX_DF_FUNCS - 1; func++) {
811 			if (df->adf_funcs[func] != NULL) {
812 				count++;
813 			}
814 		}
815 
816 		if (count == 0)
817 			continue;
818 
819 		if (count != 7) {
820 			ret = B_FALSE;
821 			dev_err(azn->azn_dip, CE_WARN, "df %u devices "
822 			    "incomplete", i);
823 		} else {
824 			df->adf_flags |= AMDZEN_DF_F_VALID;
825 			azn->azn_ndfs++;
826 		}
827 	}
828 
829 	return (ret);
830 }
831 
832 static const uint8_t amdzen_df_rome_ids[0x2b] = {
833 	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23,
834 	24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
835 	44, 45, 46, 47, 48
836 };
837 
838 /*
839  * Check the first df entry to see if it belongs to Rome or Milan. If so, then
840  * it uses the disjoint ID space.
841  */
842 static boolean_t
amdzen_is_rome_style(uint_t id)843 amdzen_is_rome_style(uint_t id)
844 {
845 	return (id == 0x1490 || id == 0x1650);
846 }
847 
848 /*
849  * Deal with the differences between between how a CCM subtype is indicated
850  * across CPU generations.
851  */
852 static boolean_t
amdzen_dfe_is_ccm(const amdzen_df_t * df,const amdzen_df_ent_t * ent)853 amdzen_dfe_is_ccm(const amdzen_df_t *df, const amdzen_df_ent_t *ent)
854 {
855 	if (ent->adfe_type != DF_TYPE_CCM) {
856 		return (B_FALSE);
857 	}
858 
859 	if (df->adf_rev >= DF_REV_4 && amdzen_df_at_least(df, 4, 1)) {
860 		return (ent->adfe_subtype == DF_CCM_SUBTYPE_CPU_V4P1);
861 	} else {
862 		return (ent->adfe_subtype == DF_CCM_SUBTYPE_CPU_V2);
863 	}
864 }
865 
866 /*
867  * To be able to do most other things we want to do, we must first determine
868  * what revision of the DF (data fabric) that we're using.
869  *
870  * Snapshot the df version. This was added explicitly in DFv4.0, around the Zen
871  * 4 timeframe and allows us to tell apart different version of the DF register
872  * set, most usefully when various subtypes were added.
873  *
874  * Older versions can theoretically be told apart based on usage of reserved
875  * registers. We walk these in the following order, starting with the newest rev
876  * and walking backwards to tell things apart:
877  *
878  *   o v3.5 -> Check function 1, register 0x150. This was reserved prior
879  *             to this point. This is actually DF_FIDMASK0_V3P5. We are supposed
880  *             to check bits [7:0].
881  *
882  *   o v3.0 -> Check function 1, register 0x208. The low byte (7:0) was
883  *             changed to indicate a component mask. This is non-zero
884  *             in the 3.0 generation. This is actually DF_FIDMASK_V2.
885  *
886  *   o v2.0 -> This is just the not that case. Presumably v1 wasn't part
887  *             of the Zen generation.
888  *
889  * Because we don't know what version we are yet, we do not use the normal
890  * versioned register accesses which would check what DF version we are and
891  * would want to use the normal indirect register accesses (which also require
892  * us to know the version). We instead do direct broadcast reads.
893  */
894 static void
amdzen_determine_df_vers(amdzen_t * azn,amdzen_df_t * df)895 amdzen_determine_df_vers(amdzen_t *azn, amdzen_df_t *df)
896 {
897 	uint32_t val;
898 	df_reg_def_t rd = DF_FBICNT;
899 
900 	val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
901 	df->adf_major = DF_FBICNT_V4_GET_MAJOR(val);
902 	df->adf_minor = DF_FBICNT_V4_GET_MINOR(val);
903 	if (df->adf_major == 0 && df->adf_minor == 0) {
904 		rd = DF_FIDMASK0_V3P5;
905 		val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
906 		if (bitx32(val, 7, 0) != 0) {
907 			df->adf_major = 3;
908 			df->adf_minor = 5;
909 			df->adf_rev = DF_REV_3P5;
910 		} else {
911 			rd = DF_FIDMASK_V2;
912 			val = amdzen_stub_get32(df->adf_funcs[rd.drd_func],
913 			    rd.drd_reg);
914 			if (bitx32(val, 7, 0) != 0) {
915 				df->adf_major = 3;
916 				df->adf_minor = 0;
917 				df->adf_rev = DF_REV_3;
918 			} else {
919 				df->adf_major = 2;
920 				df->adf_minor = 0;
921 				df->adf_rev = DF_REV_2;
922 			}
923 		}
924 	} else if (df->adf_major == 4 && df->adf_minor >= 2) {
925 		/*
926 		 * These are devices that have the newer memory layout that
927 		 * moves the DF::DramBaseAddress to 0x200. Please see the df.h
928 		 * theory statement for more information.
929 		 */
930 		df->adf_rev = DF_REV_4D2;
931 	} else if (df->adf_major == 4) {
932 		df->adf_rev = DF_REV_4;
933 	} else {
934 		df->adf_rev = DF_REV_UNKNOWN;
935 	}
936 }
937 
938 /*
939  * All of the different versions of the DF have different ways of getting at and
940  * answering the question of how do I break a fabric ID into a corresponding
941  * socket, die, and component. Importantly the goal here is to obtain, cache,
942  * and normalize:
943  *
944  *  o The DF System Configuration
945  *  o The various Mask registers
946  *  o The Node ID
947  */
948 static void
amdzen_determine_fabric_decomp(amdzen_t * azn,amdzen_df_t * df)949 amdzen_determine_fabric_decomp(amdzen_t *azn, amdzen_df_t *df)
950 {
951 	uint32_t mask;
952 	df_fabric_decomp_t *decomp = &df->adf_decomp;
953 
954 	switch (df->adf_rev) {
955 	case DF_REV_2:
956 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V2);
957 		switch (DF_SYSCFG_V2_GET_MY_TYPE(df->adf_syscfg)) {
958 		case DF_DIE_TYPE_CPU:
959 			mask = amdzen_df_read32_bcast(azn, df,
960 			    DF_DIEMASK_CPU_V2);
961 			break;
962 		case DF_DIE_TYPE_APU:
963 			mask = amdzen_df_read32_bcast(azn, df,
964 			    DF_DIEMASK_APU_V2);
965 			break;
966 		default:
967 			panic("DF thinks we're not on a CPU!");
968 		}
969 		df->adf_mask0 = mask;
970 
971 		/*
972 		 * DFv2 is a bit different in how the fabric mask register is
973 		 * phrased. Logically a fabric ID is broken into something that
974 		 * uniquely identifies a "node" (a particular die on a socket)
975 		 * and something that identifies a "component", e.g. a memory
976 		 * controller.
977 		 *
978 		 * Starting with DFv3, these registers logically called out how
979 		 * to separate the fabric ID first into a node and a component.
980 		 * Then the node was then broken down into a socket and die. In
981 		 * DFv2, there is no separate mask and shift of a node. Instead
982 		 * the socket and die are absolute offsets into the fabric ID
983 		 * rather than relative offsets into the node ID. As such, when
984 		 * we encounter DFv2, we fake up a node mask and shift and make
985 		 * it look like DFv3+.
986 		 */
987 		decomp->dfd_node_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) |
988 		    DF_DIEMASK_V2_GET_DIE_MASK(mask);
989 		decomp->dfd_node_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask);
990 		decomp->dfd_comp_mask = DF_DIEMASK_V2_GET_COMP_MASK(mask);
991 		decomp->dfd_comp_shift = 0;
992 
993 		decomp->dfd_sock_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) >>
994 		    decomp->dfd_node_shift;
995 		decomp->dfd_die_mask = DF_DIEMASK_V2_GET_DIE_MASK(mask) >>
996 		    decomp->dfd_node_shift;
997 		decomp->dfd_sock_shift = DF_DIEMASK_V2_GET_SOCK_SHIFT(mask) -
998 		    decomp->dfd_node_shift;
999 		decomp->dfd_die_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask) -
1000 		    decomp->dfd_node_shift;
1001 		ASSERT3U(decomp->dfd_die_shift, ==, 0);
1002 
1003 		/*
1004 		 * There is no register in the actual data fabric with the node
1005 		 * ID in DFv2 that we have found. Instead we take the first
1006 		 * entity's fabric ID and transform it into the node id.
1007 		 */
1008 		df->adf_nodeid = (df->adf_ents[0].adfe_fabric_id &
1009 		    decomp->dfd_node_mask) >> decomp->dfd_node_shift;
1010 		break;
1011 	case DF_REV_3:
1012 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V3);
1013 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1014 		    DF_FIDMASK0_V3);
1015 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1016 		    DF_FIDMASK1_V3);
1017 
1018 		decomp->dfd_sock_mask =
1019 		    DF_FIDMASK1_V3_GET_SOCK_MASK(df->adf_mask1);
1020 		decomp->dfd_sock_shift =
1021 		    DF_FIDMASK1_V3_GET_SOCK_SHIFT(df->adf_mask1);
1022 		decomp->dfd_die_mask =
1023 		    DF_FIDMASK1_V3_GET_DIE_MASK(df->adf_mask1);
1024 		decomp->dfd_die_shift = 0;
1025 		decomp->dfd_node_mask =
1026 		    DF_FIDMASK0_V3_GET_NODE_MASK(df->adf_mask0);
1027 		decomp->dfd_node_shift =
1028 		    DF_FIDMASK1_V3_GET_NODE_SHIFT(df->adf_mask1);
1029 		decomp->dfd_comp_mask =
1030 		    DF_FIDMASK0_V3_GET_COMP_MASK(df->adf_mask0);
1031 		decomp->dfd_comp_shift = 0;
1032 
1033 		df->adf_nodeid = DF_SYSCFG_V3_GET_NODE_ID(df->adf_syscfg);
1034 		break;
1035 	case DF_REV_3P5:
1036 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df,
1037 		    DF_SYSCFG_V3P5);
1038 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1039 		    DF_FIDMASK0_V3P5);
1040 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1041 		    DF_FIDMASK1_V3P5);
1042 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
1043 		    DF_FIDMASK2_V3P5);
1044 
1045 		decomp->dfd_sock_mask =
1046 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
1047 		decomp->dfd_sock_shift =
1048 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
1049 		decomp->dfd_die_mask =
1050 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
1051 		decomp->dfd_die_shift = 0;
1052 		decomp->dfd_node_mask =
1053 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
1054 		decomp->dfd_node_shift =
1055 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
1056 		decomp->dfd_comp_mask =
1057 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
1058 		decomp->dfd_comp_shift = 0;
1059 
1060 		df->adf_nodeid = DF_SYSCFG_V3P5_GET_NODE_ID(df->adf_syscfg);
1061 		break;
1062 	case DF_REV_4:
1063 	case DF_REV_4D2:
1064 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V4);
1065 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1066 		    DF_FIDMASK0_V4);
1067 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1068 		    DF_FIDMASK1_V4);
1069 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
1070 		    DF_FIDMASK2_V4);
1071 
1072 		/*
1073 		 * The DFv4 registers are at a different location in the DF;
1074 		 * however, the actual layout of fields is the same as DFv3.5.
1075 		 * This is why you see V3P5 below.
1076 		 */
1077 		decomp->dfd_sock_mask =
1078 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
1079 		decomp->dfd_sock_shift =
1080 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
1081 		decomp->dfd_die_mask =
1082 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
1083 		decomp->dfd_die_shift = 0;
1084 		decomp->dfd_node_mask =
1085 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
1086 		decomp->dfd_node_shift =
1087 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
1088 		decomp->dfd_comp_mask =
1089 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
1090 		decomp->dfd_comp_shift = 0;
1091 
1092 		df->adf_nodeid = DF_SYSCFG_V4_GET_NODE_ID(df->adf_syscfg);
1093 		break;
1094 	default:
1095 		panic("encountered suspicious, previously rejected DF "
1096 		    "rev: 0x%x", df->adf_rev);
1097 	}
1098 }
1099 
1100 /*
1101  * The purpose of this function is to map CCMs to the corresponding CCDs that
1102  * exist. This is not an obvious thing as there is no direct mapping in the data
1103  * fabric between these IDs.
1104  *
1105  * Prior to DFv4, a given CCM was only ever connected to at most one CCD.
1106  * Starting in DFv4 a given CCM may have one or two SDP (scalable data ports)
1107  * that connect to CCDs. These may be connected to the same CCD or a different
1108  * one. When both ports are enabled we must check whether or not the port is
1109  * considered to be in wide mode. When wide mode is enabled then the two ports
1110  * are connected to a single CCD. If wide mode is disabled then the two ports
1111  * are connected to separate CCDs.
1112  *
1113  * The physical number of a CCD, which is how we determine the SMN aperture to
1114  * use, is based on the CCM ID. In most sockets we have seen up to a maximum of
1115  * 8 CCMs. When a CCM is connected to more than one CCD we have determined based
1116  * on some hints from AMD's ACPI information that the numbering is assumed to be
1117  * that CCM's number plus the total number of CCMs.
1118  *
1119  * More concretely, the SP5 Genoa/Bergamo Zen 4 platform has 8 CCMs. When there
1120  * are more than 8 CCDs installed then CCM 0 maps to CCDs 0 and 8. CCM 1 to CCDs
1121  * 1 and 9, etc. CCMs 4-7 map 1:1 to CCDs 4-7. However, the placement of CCDs
1122  * within the package has changed across generations.
1123  *
1124  * Notably in Rome and Milan (Zen 2/3) it appears that each quadrant had an
1125  * increasing number of CCDs. So CCDs 0/1 were together, 2/3, 4/5, and 6/7. This
1126  * meant that in cases where only a subset of CCDs were populated it'd forcibly
1127  * disable the higher CCD in a group (but with DFv3 the CCM would still be
1128  * enabled). So a 4 CCD config would generally enable CCDs 0, 2, 4, and 6 say.
1129  * This was almost certainly done to balance the NUMA config.
1130  *
1131  * Instead, starting in Genoa (Zen 4) the CCMs are round-robined around the
1132  * quadrants so CCMs (CCDs) 0 (0/8) and 4 (4) are together, 1 (1/9) and 5 (5),
1133  * etc. This is also why we more often see disabled CCMs in Genoa, but not in
1134  * Rome/Milan.
1135  *
1136  * When we're operating in wide mode and therefore both SDPs are connected to a
1137  * single CCD, we've always found that the lower CCD index will be used by the
1138  * system and the higher one is not considered present. Therefore, when
1139  * operating in wide mode, we need to make sure that whenever we have a non-zero
1140  * value for SDPs being connected that we rewrite this to only appear as a
1141  * single CCD is present. It's conceivable (though hard to imagine) that we
1142  * could get a value of 0b10 indicating that only the upper SDP link is active
1143  * for some reason.
1144  */
1145 static void
amdzen_setup_df_ccm(amdzen_t * azn,amdzen_df_t * df,amdzen_df_ent_t * dfe,uint32_t ccmno)1146 amdzen_setup_df_ccm(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *dfe,
1147     uint32_t ccmno)
1148 {
1149 	amdzen_ccm_data_t *ccm = &dfe->adfe_data.aded_ccm;
1150 	uint32_t ccd_en;
1151 	boolean_t wide_en;
1152 
1153 	if (df->adf_rev >= DF_REV_4) {
1154 		uint32_t val = amdzen_df_read32(azn, df, dfe->adfe_inst_id,
1155 		    DF_CCD_EN_V4);
1156 		ccd_en = DF_CCD_EN_V4_GET_CCD_EN(val);
1157 
1158 		if (df->adf_rev == DF_REV_4D2) {
1159 			wide_en = DF_CCD_EN_V4D2_GET_WIDE_EN(val);
1160 		} else {
1161 			val = amdzen_df_read32(azn, df, dfe->adfe_inst_id,
1162 			    DF_CCMCFG4_V4);
1163 			wide_en = DF_CCMCFG4_V4_GET_WIDE_EN(val);
1164 		}
1165 
1166 		if (wide_en != 0 && ccd_en != 0) {
1167 			ccd_en = 0x1;
1168 		}
1169 	} else {
1170 		ccd_en = 0x1;
1171 	}
1172 
1173 	for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) {
1174 		ccm->acd_ccd_en[i] = (ccd_en & (1 << i)) != 0;
1175 		if (ccm->acd_ccd_en[i] == 0)
1176 			continue;
1177 		ccm->acd_ccd_id[i] = ccmno + i * df->adf_nccm;
1178 		ccm->acd_nccds++;
1179 	}
1180 }
1181 
1182 /*
1183  * Initialize our knowledge about a given series of nodes on the data fabric.
1184  */
1185 static void
amdzen_setup_df(amdzen_t * azn,amdzen_df_t * df)1186 amdzen_setup_df(amdzen_t *azn, amdzen_df_t *df)
1187 {
1188 	uint_t i;
1189 	uint32_t val, ccmno;
1190 
1191 	amdzen_determine_df_vers(azn, df);
1192 
1193 	switch (df->adf_rev) {
1194 	case DF_REV_2:
1195 	case DF_REV_3:
1196 	case DF_REV_3P5:
1197 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V2);
1198 		break;
1199 	case DF_REV_4:
1200 	case DF_REV_4D2:
1201 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V4);
1202 		break;
1203 	default:
1204 		dev_err(azn->azn_dip, CE_WARN, "encountered unsupported DF "
1205 		    "revision: 0x%x", df->adf_rev);
1206 		return;
1207 	}
1208 	df->adf_nb_busno = DF_CFG_ADDR_CTL_GET_BUS_NUM(val);
1209 	val = amdzen_df_read32_bcast(azn, df, DF_FBICNT);
1210 	df->adf_nents = DF_FBICNT_GET_COUNT(val);
1211 	if (df->adf_nents == 0)
1212 		return;
1213 	df->adf_ents = kmem_zalloc(sizeof (amdzen_df_ent_t) * df->adf_nents,
1214 	    KM_SLEEP);
1215 
1216 	for (i = 0; i < df->adf_nents; i++) {
1217 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
1218 		uint8_t inst = i;
1219 
1220 		/*
1221 		 * Unfortunately, Rome uses a discontinuous instance ID pattern
1222 		 * while everything else we can find uses a contiguous instance
1223 		 * ID pattern. This means that for Rome, we need to adjust the
1224 		 * indexes that we iterate over, though the total number of
1225 		 * entries is right. This was carried over into Milan, but not
1226 		 * Genoa.
1227 		 */
1228 		if (amdzen_is_rome_style(df->adf_funcs[0]->azns_did)) {
1229 			if (inst >= ARRAY_SIZE(amdzen_df_rome_ids)) {
1230 				dev_err(azn->azn_dip, CE_WARN, "Rome family "
1231 				    "processor reported more ids than the PPR, "
1232 				    "resetting %u to instance zero", inst);
1233 				inst = 0;
1234 			} else {
1235 				inst = amdzen_df_rome_ids[inst];
1236 			}
1237 		}
1238 
1239 		dfe->adfe_drvid = inst;
1240 		dfe->adfe_info0 = amdzen_df_read32(azn, df, inst, DF_FBIINFO0);
1241 		if (df->adf_rev <= DF_REV_4) {
1242 			dfe->adfe_info1 = amdzen_df_read32(azn, df, inst,
1243 			    DF_FBIINFO1);
1244 			dfe->adfe_info2 = amdzen_df_read32(azn, df, inst,
1245 			    DF_FBIINFO2);
1246 		}
1247 		dfe->adfe_info3 = amdzen_df_read32(azn, df, inst, DF_FBIINFO3);
1248 
1249 		dfe->adfe_type = DF_FBIINFO0_GET_TYPE(dfe->adfe_info0);
1250 		dfe->adfe_subtype = DF_FBIINFO0_GET_SUBTYPE(dfe->adfe_info0);
1251 
1252 		/*
1253 		 * The enabled flag was not present in Zen 1. Simulate it by
1254 		 * checking for a non-zero register instead.
1255 		 */
1256 		if (DF_FBIINFO0_V3_GET_ENABLED(dfe->adfe_info0) ||
1257 		    (df->adf_rev == DF_REV_2 && dfe->adfe_info0 != 0)) {
1258 			dfe->adfe_flags |= AMDZEN_DFE_F_ENABLED;
1259 		}
1260 		if (DF_FBIINFO0_GET_HAS_MCA(dfe->adfe_info0)) {
1261 			dfe->adfe_flags |= AMDZEN_DFE_F_MCA;
1262 		}
1263 
1264 		/*
1265 		 * Starting with DFv4 there is no instance ID in the fabric info
1266 		 * 3 register, so we instead grab it out of the driver ID which
1267 		 * is what it should be anyways.
1268 		 */
1269 		if (df->adf_rev >= DF_REV_4) {
1270 			dfe->adfe_inst_id = dfe->adfe_drvid;
1271 		} else {
1272 			dfe->adfe_inst_id =
1273 			    DF_FBIINFO3_GET_INSTID(dfe->adfe_info3);
1274 		}
1275 
1276 		switch (df->adf_rev) {
1277 		case DF_REV_2:
1278 			dfe->adfe_fabric_id =
1279 			    DF_FBIINFO3_V2_GET_BLOCKID(dfe->adfe_info3);
1280 			break;
1281 		case DF_REV_3:
1282 			dfe->adfe_fabric_id =
1283 			    DF_FBIINFO3_V3_GET_BLOCKID(dfe->adfe_info3);
1284 			break;
1285 		case DF_REV_3P5:
1286 			dfe->adfe_fabric_id =
1287 			    DF_FBIINFO3_V3P5_GET_BLOCKID(dfe->adfe_info3);
1288 			break;
1289 		case DF_REV_4:
1290 		case DF_REV_4D2:
1291 			dfe->adfe_fabric_id =
1292 			    DF_FBIINFO3_V4_GET_BLOCKID(dfe->adfe_info3);
1293 			break;
1294 		default:
1295 			panic("encountered suspicious, previously rejected DF "
1296 			    "rev: 0x%x", df->adf_rev);
1297 		}
1298 
1299 		/*
1300 		 * Record information about a subset of DF entities that we've
1301 		 * found. Currently we're tracking this only for CCMs.
1302 		 */
1303 		if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1304 			continue;
1305 
1306 		if (amdzen_dfe_is_ccm(df, dfe)) {
1307 			df->adf_nccm++;
1308 		}
1309 	}
1310 
1311 	/*
1312 	 * Now that we have filled in all of our info, attempt to fill in
1313 	 * specific information about different types of instances.
1314 	 */
1315 	ccmno = 0;
1316 	for (uint_t i = 0; i < df->adf_nents; i++) {
1317 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
1318 
1319 		if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1320 			continue;
1321 
1322 		/*
1323 		 * Perform type and sub-type specific initialization. Currently
1324 		 * limited to CCMs.
1325 		 */
1326 		switch (dfe->adfe_type) {
1327 		case DF_TYPE_CCM:
1328 			amdzen_setup_df_ccm(azn, df, dfe, ccmno);
1329 			ccmno++;
1330 			break;
1331 		default:
1332 			break;
1333 		}
1334 	}
1335 
1336 	amdzen_determine_fabric_decomp(azn, df);
1337 }
1338 
1339 static void
amdzen_find_nb(amdzen_t * azn,amdzen_df_t * df)1340 amdzen_find_nb(amdzen_t *azn, amdzen_df_t *df)
1341 {
1342 	amdzen_stub_t *stub;
1343 
1344 	for (stub = list_head(&azn->azn_nb_stubs); stub != NULL;
1345 	    stub = list_next(&azn->azn_nb_stubs, stub)) {
1346 		if (stub->azns_bus == df->adf_nb_busno) {
1347 			df->adf_flags |= AMDZEN_DF_F_FOUND_NB;
1348 			df->adf_nb = stub;
1349 			return;
1350 		}
1351 	}
1352 }
1353 
1354 /*
1355  * We need to be careful using this function as different AMD generations have
1356  * acted in different ways when there is a missing CCD. We've found that in
1357  * hardware where the CCM is enabled but there is no CCD attached, it generally
1358  * is safe (i.e. DFv3 on Rome), but on DFv4 if we ask for a CCD that would
1359  * correspond to a disabled CCM then the firmware may inject a fatal error
1360  * (which is hopefully something missing in our RAS/MCA-X enablement).
1361  *
1362  * Put differently if this doesn't correspond to an Enabled CCM and you know the
1363  * number of valid CCDs on this, don't use it.
1364  */
1365 static boolean_t
amdzen_ccd_present(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1366 amdzen_ccd_present(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1367 {
1368 	smn_reg_t die_reg = SMUPWR_CCD_DIE_ID(ccdno);
1369 	uint32_t val = amdzen_smn_read(azn, df, die_reg);
1370 	if (val == SMN_EINVAL32) {
1371 		return (B_FALSE);
1372 	}
1373 
1374 	ASSERT3U(ccdno, ==, SMUPWR_CCD_DIE_ID_GET(val));
1375 	return (B_TRUE);
1376 }
1377 
1378 static uint32_t
amdzen_ccd_thread_en(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1379 amdzen_ccd_thread_en(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1380 {
1381 	smn_reg_t reg;
1382 
1383 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1384 		reg = L3SOC_THREAD_EN(ccdno);
1385 	} else {
1386 		reg = SMUPWR_THREAD_EN(ccdno);
1387 	}
1388 
1389 	return (amdzen_smn_read(azn, df, reg));
1390 }
1391 
1392 static uint32_t
amdzen_ccd_core_en(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1393 amdzen_ccd_core_en(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1394 {
1395 	smn_reg_t reg;
1396 
1397 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1398 		reg = L3SOC_CORE_EN(ccdno);
1399 	} else {
1400 		reg = SMUPWR_CORE_EN(ccdno);
1401 	}
1402 
1403 	return (amdzen_smn_read(azn, df, reg));
1404 }
1405 
1406 static void
amdzen_ccd_info(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno,uint32_t * nccxp,uint32_t * nlcorep,uint32_t * nthrp)1407 amdzen_ccd_info(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno, uint32_t *nccxp,
1408     uint32_t *nlcorep, uint32_t *nthrp)
1409 {
1410 	uint32_t nccx, nlcore, smt;
1411 
1412 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1413 		smn_reg_t reg = L3SOC_THREAD_CFG(ccdno);
1414 		uint32_t val = amdzen_smn_read(azn, df, reg);
1415 		nccx = L3SOC_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1;
1416 		nlcore = L3SOC_THREAD_CFG_GET_CORE_COUNT(val) + 1;
1417 		smt = L3SOC_THREAD_CFG_GET_SMT_MODE(val);
1418 	} else {
1419 		smn_reg_t reg = SMUPWR_THREAD_CFG(ccdno);
1420 		uint32_t val = amdzen_smn_read(azn, df, reg);
1421 		nccx = SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1;
1422 		nlcore = SMUPWR_THREAD_CFG_GET_CORE_COUNT(val) + 1;
1423 		smt = SMUPWR_THREAD_CFG_GET_SMT_MODE(val);
1424 	}
1425 
1426 	if (nccxp != NULL) {
1427 		*nccxp = nccx;
1428 	}
1429 
1430 	if (nlcorep != NULL) {
1431 		*nlcorep = nlcore;
1432 	}
1433 
1434 	if (nthrp != NULL) {
1435 		/* The L3::L3SOC and SMU::PWR values are the same here */
1436 		if (smt == SMUPWR_THREAD_CFG_SMT_MODE_SMT) {
1437 			*nthrp = 2;
1438 		} else {
1439 			*nthrp = 1;
1440 		}
1441 	}
1442 }
1443 
1444 static void
amdzen_initpkg_to_apic(amdzen_t * azn,const uint32_t pkg0,const uint32_t pkg7)1445 amdzen_initpkg_to_apic(amdzen_t *azn, const uint32_t pkg0, const uint32_t pkg7)
1446 {
1447 	uint32_t nsock, nccd, nccx, ncore, nthr, extccx;
1448 	uint32_t nsock_bits, nccd_bits, nccx_bits, ncore_bits, nthr_bits;
1449 	amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp;
1450 
1451 	/*
1452 	 * These are all 0 based values, meaning that we need to add one to each
1453 	 * of them. However, we skip this because to calculate the number of
1454 	 * bits to cover an entity we would subtract one.
1455 	 */
1456 	nthr = SCFCTP_PMREG_INITPKG0_GET_SMTEN(pkg0);
1457 	ncore = SCFCTP_PMREG_INITPKG7_GET_N_CORES(pkg7);
1458 	nccx = SCFCTP_PMREG_INITPKG7_GET_N_CCXS(pkg7);
1459 	nccd = SCFCTP_PMREG_INITPKG7_GET_N_DIES(pkg7);
1460 	nsock = SCFCTP_PMREG_INITPKG7_GET_N_SOCKETS(pkg7);
1461 
1462 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN4) {
1463 		extccx = SCFCTP_PMREG_INITPKG7_ZEN4_GET_16TAPIC(pkg7);
1464 	} else {
1465 		extccx = 0;
1466 	}
1467 
1468 	nthr_bits = highbit(nthr);
1469 	ncore_bits = highbit(ncore);
1470 	nccx_bits = highbit(nccx);
1471 	nccd_bits = highbit(nccd);
1472 	nsock_bits = highbit(nsock);
1473 
1474 	apic->aad_thread_shift = 0;
1475 	apic->aad_thread_mask = (1 << nthr_bits) - 1;
1476 
1477 	apic->aad_core_shift = nthr_bits;
1478 	if (ncore_bits > 0) {
1479 		apic->aad_core_mask = (1 << ncore_bits) - 1;
1480 		apic->aad_core_mask <<= apic->aad_core_shift;
1481 	} else {
1482 		apic->aad_core_mask = 0;
1483 	}
1484 
1485 	/*
1486 	 * The APIC_16T_MODE bit indicates that the total shift to start the CCX
1487 	 * should be at 4 bits if it's not. It doesn't mean that the CCX portion
1488 	 * of the value should take up four bits. In the common Genoa case,
1489 	 * nccx_bits will be zero.
1490 	 */
1491 	apic->aad_ccx_shift = apic->aad_core_shift + ncore_bits;
1492 	if (extccx != 0 && apic->aad_ccx_shift < 4) {
1493 		apic->aad_ccx_shift = 4;
1494 	}
1495 	if (nccx_bits > 0) {
1496 		apic->aad_ccx_mask = (1 << nccx_bits) - 1;
1497 		apic->aad_ccx_mask <<= apic->aad_ccx_shift;
1498 	} else {
1499 		apic->aad_ccx_mask = 0;
1500 	}
1501 
1502 	apic->aad_ccd_shift = apic->aad_ccx_shift + nccx_bits;
1503 	if (nccd_bits > 0) {
1504 		apic->aad_ccd_mask = (1 << nccd_bits) - 1;
1505 		apic->aad_ccd_mask <<= apic->aad_ccd_shift;
1506 	} else {
1507 		apic->aad_ccd_mask = 0;
1508 	}
1509 
1510 	apic->aad_sock_shift = apic->aad_ccd_shift + nccd_bits;
1511 	if (nsock_bits > 0) {
1512 		apic->aad_sock_mask = (1 << nsock_bits) - 1;
1513 		apic->aad_sock_mask <<= apic->aad_sock_shift;
1514 	} else {
1515 		apic->aad_sock_mask = 0;
1516 	}
1517 
1518 	/*
1519 	 * Currently all supported Zen 2+ platforms only have a single die per
1520 	 * socket as compared to Zen 1. So this is always kept at zero.
1521 	 */
1522 	apic->aad_die_mask = 0;
1523 	apic->aad_die_shift = 0;
1524 }
1525 
1526 /*
1527  * We would like to determine what the logical APIC decomposition is on Zen 3
1528  * and newer family parts. While there is information added to CPUID in the form
1529  * of leaf 8X26, that isn't present in Zen 3, so instead we go to what we
1530  * believe is the underlying source of the CPUID data.
1531  *
1532  * Fundamentally there are a series of registers in SMN space that relate to the
1533  * SCFCTP. Coincidentally, there is one of these for each core and there are a
1534  * pair of related SMN registers. L3::SCFCTP::PMREG_INITPKG0 contains
1535  * information about a given's core logical and physical IDs. More interestingly
1536  * for this particular case, L3::SCFCTP::PMREG_INITPKG7, contains the overall
1537  * total number of logical entities. We've been promised that this has to be
1538  * the same across the fabric. That's all well and good, but this begs the
1539  * question of how do we actually get there. The above is a core-specific
1540  * register and requires that we understand information about which CCDs and
1541  * CCXs are actually present.
1542  *
1543  * So we are starting with a data fabric that has some CCM present. The CCM
1544  * entries in the data fabric may be tagged with our ENABLED flag.
1545  * Unfortunately, that can be true regardless of whether or not it's actually
1546  * present or not. As a result, we go to another chunk of SMN space registers,
1547  * SMU::PWR. These contain information about the CCDs, the physical cores that
1548  * are enabled, and related. So we will first walk the DF entities and see if we
1549  * can read its SMN::PWR::CCD_DIE_ID. If we get back a value of all 1s then
1550  * there is nothing present. Otherwise, we should get back something that
1551  * matches information in the data fabric.
1552  *
1553  * With that in hand, we can read the SMU::PWR::CORE_ENABLE register to
1554  * determine which physical cores are enabled in the CCD/CCX. That will finally
1555  * give us an index to get to our friend INITPKG7.
1556  */
1557 static boolean_t
amdzen_determine_apic_decomp_initpkg(amdzen_t * azn)1558 amdzen_determine_apic_decomp_initpkg(amdzen_t *azn)
1559 {
1560 	amdzen_df_t *df = &azn->azn_dfs[0];
1561 	uint32_t ccdno = 0;
1562 
1563 	for (uint_t i = 0; i < df->adf_nents; i++) {
1564 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
1565 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1566 			continue;
1567 
1568 		if (amdzen_dfe_is_ccm(df, ent)) {
1569 			uint32_t val, nccx, pkg7, pkg0;
1570 			smn_reg_t pkg7_reg, pkg0_reg;
1571 			int core_bit;
1572 			uint8_t pccxno, pcoreno;
1573 
1574 			if (!amdzen_ccd_present(azn, df, ccdno)) {
1575 				ccdno++;
1576 				continue;
1577 			}
1578 
1579 			/*
1580 			 * This die actually exists. Switch over to the core
1581 			 * enable register to find one to ask about physically.
1582 			 */
1583 			amdzen_ccd_info(azn, df, ccdno, &nccx, NULL, NULL);
1584 			val = amdzen_ccd_core_en(azn, df, ccdno);
1585 			if (val == 0) {
1586 				ccdno++;
1587 				continue;
1588 			}
1589 
1590 			/*
1591 			 * There exists an enabled physical core. Find the first
1592 			 * index of it and map it to the corresponding CCD and
1593 			 * CCX. ddi_ffs is the bit index, but we want the
1594 			 * physical core number, hence the -1.
1595 			 */
1596 			core_bit = ddi_ffs(val);
1597 			ASSERT3S(core_bit, !=, 0);
1598 			pcoreno = core_bit - 1;
1599 
1600 			/*
1601 			 * Unfortunately SMU::PWR::THREAD_CONFIGURATION gives us
1602 			 * the Number of logical cores that are present in the
1603 			 * complex, not the total number of physical cores.
1604 			 * Right now we do assume that the physical and logical
1605 			 * ccx numbering is equivalent (we have no other way of
1606 			 * knowing if it is or isn't right now) and that we'd
1607 			 * always have CCX0 before CCX1. AMD seems to suggest we
1608 			 * can assume this, though it is a worrisome assumption.
1609 			 */
1610 			pccxno = pcoreno / azn->azn_ncore_per_ccx;
1611 			ASSERT3U(pccxno, <, nccx);
1612 			pkg7_reg = SCFCTP_PMREG_INITPKG7(ccdno, pccxno,
1613 			    pcoreno);
1614 			pkg7 = amdzen_smn_read(azn, df, pkg7_reg);
1615 			pkg0_reg = SCFCTP_PMREG_INITPKG0(ccdno, pccxno,
1616 			    pcoreno);
1617 			pkg0 = amdzen_smn_read(azn, df, pkg0_reg);
1618 			amdzen_initpkg_to_apic(azn, pkg0, pkg7);
1619 			return (B_TRUE);
1620 		}
1621 	}
1622 
1623 	return (B_FALSE);
1624 }
1625 
1626 /*
1627  * We have the fun job of trying to figure out what the correct form of the APIC
1628  * decomposition should be and how to break that into its logical components.
1629  * The way that we get at this is generation-specific unfortunately. Here's how
1630  * it works out:
1631  *
1632  * Zen 1-2	This era of CPUs are deceptively simple. The PPR for a given
1633  *		family defines exactly how the APIC ID is broken into logical
1634  *		components and it's fixed. That is, depending on whether or
1635  *		not SMT is enabled. Zen 1 and Zen 2 use different schemes for
1636  *		constructing this. The way that we're supposed to check if SMT
1637  *		is enabled is to use AMD leaf 8X1E and ask how many threads per
1638  *		core there are. We use the x86 feature set to determine that
1639  *		instead.
1640  *
1641  *		More specifically the Zen 1 scheme is 7 bits long. The bits have
1642  *		the following meanings.
1643  *
1644  *		[6]   Socket ID
1645  *		[5:4] Node ID
1646  *		[3]   Logical CCX ID
1647  *		With SMT		Without SMT
1648  *		[2:1] Logical Core ID	[2]   hardcoded to zero
1649  *		[0] Thread ID		[1:0] Logical Core ID
1650  *
1651  *		The following is the Zen 2 scheme assuming SMT. The Zen 2 scheme
1652  *		without SMT shifts everything to the right by one bit.
1653  *
1654  *		[7]   Socket ID
1655  *		[6:4] Logical CCD ID
1656  *		[3]   Logical CCX ID
1657  *		[2:1] Logical Core ID
1658  *		[0]   Thread ID
1659  *
1660  * Zen 3	Zen 3 CPUs moved past the fixed APIC ID format that Zen 1 and
1661  *		Zen 2 had, but also don't give us the nice way of discovering
1662  *		this via CPUID that Zen 4 did. The APIC ID id uses a given
1663  *		number of bits for each logical component that exists, but the
1664  *		exact number varies based on what's actually present. To get at
1665  *		this we use a piece of data that is embedded in the SCFCTP
1666  *		(Scalable Control Fabric, Clocks, Test, Power Gating). This can
1667  *		be used to determine how many logical entities of each kind the
1668  *		system thinks exist. While we could use the various CPUID
1669  *		topology items to try to speed this up, they don't tell us the
1670  *		die information that we need to do this.
1671  *
1672  * Zen 4+	Zen 4 introduced CPUID leaf 8000_0026h which gives us a means
1673  *		for determining how to extract the CCD, CCX, and related pieces
1674  *		out of the device. One thing we have to be aware of is that when
1675  *		the CCD and CCX shift are the same, that means that there is
1676  *		only a single CCX and therefore have to take that into account
1677  *		appropriately. This is the case generally on Zen 4 platforms,
1678  *		but not on Bergamo. Until we can confirm the actual CPUID leaf
1679  *		values that we receive in the cases of Bergamo and others, we
1680  *		opt instead to use the same SCFCTP scheme as Zen 3.
1681  */
1682 static boolean_t
amdzen_determine_apic_decomp(amdzen_t * azn)1683 amdzen_determine_apic_decomp(amdzen_t *azn)
1684 {
1685 	amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp;
1686 	boolean_t smt = is_x86_feature(x86_featureset, X86FSET_HTT);
1687 
1688 	switch (uarchrev_uarch(azn->azn_uarchrev)) {
1689 	case X86_UARCH_AMD_ZEN1:
1690 	case X86_UARCH_AMD_ZENPLUS:
1691 		apic->aad_sock_mask = 0x40;
1692 		apic->aad_sock_shift = 6;
1693 		apic->aad_die_mask = 0x30;
1694 		apic->aad_die_shift = 4;
1695 		apic->aad_ccd_mask = 0;
1696 		apic->aad_ccd_shift = 0;
1697 		apic->aad_ccx_mask = 0x08;
1698 		apic->aad_ccx_shift = 3;
1699 
1700 		if (smt) {
1701 			apic->aad_core_mask = 0x06;
1702 			apic->aad_core_shift = 1;
1703 			apic->aad_thread_mask = 0x1;
1704 			apic->aad_thread_shift = 0;
1705 		} else {
1706 			apic->aad_core_mask = 0x03;
1707 			apic->aad_core_shift = 0;
1708 			apic->aad_thread_mask = 0;
1709 			apic->aad_thread_shift = 0;
1710 		}
1711 		break;
1712 	case X86_UARCH_AMD_ZEN2:
1713 		if (smt) {
1714 			apic->aad_sock_mask = 0x80;
1715 			apic->aad_sock_shift = 7;
1716 			apic->aad_die_mask = 0;
1717 			apic->aad_die_shift = 0;
1718 			apic->aad_ccd_mask = 0x70;
1719 			apic->aad_ccd_shift = 4;
1720 			apic->aad_ccx_mask = 0x08;
1721 			apic->aad_ccx_shift = 3;
1722 			apic->aad_core_mask = 0x06;
1723 			apic->aad_core_shift = 1;
1724 			apic->aad_thread_mask = 0x01;
1725 			apic->aad_thread_shift = 0;
1726 		} else {
1727 			apic->aad_sock_mask = 0x40;
1728 			apic->aad_sock_shift = 6;
1729 			apic->aad_die_mask = 0;
1730 			apic->aad_die_shift = 0;
1731 			apic->aad_ccd_mask = 0x38;
1732 			apic->aad_ccd_shift = 3;
1733 			apic->aad_ccx_mask = 0x04;
1734 			apic->aad_ccx_shift = 2;
1735 			apic->aad_core_mask = 0x3;
1736 			apic->aad_core_shift = 0;
1737 			apic->aad_thread_mask = 0;
1738 			apic->aad_thread_shift = 0;
1739 		}
1740 		break;
1741 	case X86_UARCH_AMD_ZEN3:
1742 	case X86_UARCH_AMD_ZEN4:
1743 	case X86_UARCH_AMD_ZEN5:
1744 		return (amdzen_determine_apic_decomp_initpkg(azn));
1745 	default:
1746 		return (B_FALSE);
1747 	}
1748 	return (B_TRUE);
1749 }
1750 
1751 /*
1752  * Snapshot the number of cores that can exist in a CCX based on the Zen
1753  * microarchitecture revision. In Zen 1-4 this has been a constant number
1754  * regardless of the actual CPU Family. In Zen 5 this varies based upon whether
1755  * or not dense dies are being used.
1756  */
1757 static void
amdzen_determine_ncore_per_ccx(amdzen_t * azn)1758 amdzen_determine_ncore_per_ccx(amdzen_t *azn)
1759 {
1760 	switch (uarchrev_uarch(azn->azn_uarchrev)) {
1761 	case X86_UARCH_AMD_ZEN1:
1762 	case X86_UARCH_AMD_ZENPLUS:
1763 	case X86_UARCH_AMD_ZEN2:
1764 		azn->azn_ncore_per_ccx = 4;
1765 		break;
1766 	case X86_UARCH_AMD_ZEN3:
1767 	case X86_UARCH_AMD_ZEN4:
1768 		azn->azn_ncore_per_ccx = 8;
1769 		break;
1770 	case X86_UARCH_AMD_ZEN5:
1771 		if (chiprev_family(azn->azn_chiprev) ==
1772 		    X86_PF_AMD_DENSE_TURIN) {
1773 			azn->azn_ncore_per_ccx = 16;
1774 		} else {
1775 			azn->azn_ncore_per_ccx = 8;
1776 		}
1777 		break;
1778 	default:
1779 		panic("asked about non-Zen or unknown uarch");
1780 	}
1781 }
1782 
1783 /*
1784  * Attempt to determine a logical CCD number of a given CCD where we don't have
1785  * hardware support for L3::SCFCTP::PMREG_INITPKG* (e.g. pre-Zen 3 systems).
1786  * The CCD numbers that we have are the in the physical space. Likely because of
1787  * how the orientation of CCM numbers map to physical locations and the layout
1788  * of them within the package, we haven't found a good way using the core DFv3
1789  * registers to determine if a given CCD is actually present or not as generally
1790  * all the CCMs are left enabled. Instead we use SMU::PWR::DIE_ID as a proxy to
1791  * determine CCD presence.
1792  */
1793 static uint32_t
amdzen_ccd_log_id_zen2(amdzen_t * azn,amdzen_df_t * df,const amdzen_df_ent_t * targ)1794 amdzen_ccd_log_id_zen2(amdzen_t *azn, amdzen_df_t *df,
1795     const amdzen_df_ent_t *targ)
1796 {
1797 	uint32_t smnid = 0;
1798 	uint32_t logid = 0;
1799 
1800 	for (uint_t i = 0; i < df->adf_nents; i++) {
1801 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
1802 
1803 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) {
1804 			continue;
1805 		}
1806 
1807 		if (ent->adfe_inst_id == targ->adfe_inst_id) {
1808 			return (logid);
1809 		}
1810 
1811 		if (ent->adfe_type == targ->adfe_type &&
1812 		    ent->adfe_subtype == targ->adfe_subtype) {
1813 			boolean_t present = amdzen_ccd_present(azn, df, smnid);
1814 			smnid++;
1815 			if (present) {
1816 				logid++;
1817 			}
1818 		}
1819 	}
1820 
1821 	panic("asked to match against invalid DF entity %p in df %p", targ, df);
1822 }
1823 
1824 static void
amdzen_ccd_fill_core_initpkg0(amdzen_t * azn,amdzen_df_t * df,amdzen_topo_ccd_t * ccd,amdzen_topo_ccx_t * ccx,amdzen_topo_core_t * core,boolean_t * ccd_set,boolean_t * ccx_set)1825 amdzen_ccd_fill_core_initpkg0(amdzen_t *azn, amdzen_df_t *df,
1826     amdzen_topo_ccd_t *ccd, amdzen_topo_ccx_t *ccx, amdzen_topo_core_t *core,
1827     boolean_t *ccd_set, boolean_t *ccx_set)
1828 {
1829 	smn_reg_t pkg0_reg;
1830 	uint32_t pkg0;
1831 
1832 	pkg0_reg = SCFCTP_PMREG_INITPKG0(ccd->atccd_phys_no, ccx->atccx_phys_no,
1833 	    core->atcore_phys_no);
1834 	pkg0 = amdzen_smn_read(azn, df, pkg0_reg);
1835 	core->atcore_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CORE(pkg0);
1836 
1837 	if (!*ccx_set) {
1838 		ccx->atccx_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CCX(pkg0);
1839 		*ccx_set = B_TRUE;
1840 	}
1841 
1842 	if (!*ccd_set) {
1843 		ccd->atccd_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_DIE(pkg0);
1844 		*ccd_set = B_TRUE;
1845 	}
1846 }
1847 
1848 /*
1849  * Attempt to fill in the physical topology information for this given CCD.
1850  * There are a few steps to this that we undertake to perform this as follows:
1851  *
1852  * 1) First we determine whether the CCD is actually present or not by reading
1853  * SMU::PWR::DIE_ID. CCDs that are not installed will still have an enabled DF
1854  * entry it appears, but the request for the die ID will returns an invalid
1855  * read (all 1s). This die ID should match what we think of as the SMN number
1856  * below. If not, we're in trouble and the rest of this is in question.
1857  *
1858  * 2) We use the SMU::PWR registers to determine how many logical and physical
1859  * cores are present in this CCD and how they are split amongst the CCX. Here we
1860  * need to encode the CPU to CCX core size rankings. Through this process we
1861  * determine and fill out which threads and cores are enabled.
1862  *
1863  * 3) In Zen 3+ we then will read each core's INITPK0 values to ensure that we
1864  * have a proper physical to logical mapping, at which point we can fill in the
1865  * APIC IDs. For Zen 2, we will set the AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN to
1866  * indicate that we just mapped the first logical processor to the first enabled
1867  * core.
1868  *
1869  * 4) Once we have the logical IDs determined we will construct the APIC ID that
1870  * we expect this to have.
1871  *
1872  * Steps (2) - (4) are intertwined and done together.
1873  */
1874 static void
amdzen_ccd_fill_topo(amdzen_t * azn,amdzen_df_t * df,amdzen_df_ent_t * ent,amdzen_topo_ccd_t * ccd)1875 amdzen_ccd_fill_topo(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *ent,
1876     amdzen_topo_ccd_t *ccd)
1877 {
1878 	uint32_t nccx, core_en, thread_en;
1879 	uint32_t nlcore_per_ccx, nthreads_per_core;
1880 	uint32_t sockid, dieid, compid;
1881 	const uint32_t ccdno = ccd->atccd_phys_no;
1882 	const x86_uarch_t uarch = uarchrev_uarch(azn->azn_uarchrev);
1883 	boolean_t pkg0_ids, logccd_set = B_FALSE;
1884 
1885 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
1886 	if (!amdzen_ccd_present(azn, df, ccdno)) {
1887 		ccd->atccd_err = AMDZEN_TOPO_CCD_E_CCD_MISSING;
1888 		return;
1889 	}
1890 
1891 	amdzen_ccd_info(azn, df, ccdno, &nccx, &nlcore_per_ccx,
1892 	    &nthreads_per_core);
1893 	ASSERT3U(nccx, <=, AMDZEN_TOPO_CCD_MAX_CCX);
1894 
1895 	core_en = amdzen_ccd_core_en(azn, df, ccdno);
1896 	thread_en = amdzen_ccd_thread_en(azn, df, ccdno);
1897 
1898 	/*
1899 	 * The BSP is never enabled in a conventional sense and therefore the
1900 	 * bit is reserved and left as 0. As the BSP should be in the first CCD,
1901 	 * we go through and OR back in the bit lest we think the thread isn't
1902 	 * enabled.
1903 	 */
1904 	if (ccdno == 0) {
1905 		thread_en |= 1;
1906 	}
1907 
1908 	ccd->atccd_phys_no = ccdno;
1909 	if (uarch >= X86_UARCH_AMD_ZEN3) {
1910 		pkg0_ids = B_TRUE;
1911 	} else {
1912 		ccd->atccd_flags |= AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN;
1913 		pkg0_ids = B_FALSE;
1914 
1915 		/*
1916 		 * Determine the CCD logical ID for Zen 2 now since this doesn't
1917 		 * rely upon needing a valid physical core.
1918 		 */
1919 		ccd->atccd_log_no = amdzen_ccd_log_id_zen2(azn, df, ent);
1920 		logccd_set = B_TRUE;
1921 	}
1922 
1923 	/*
1924 	 * To construct the APIC ID we need to know the socket and die (not CCD)
1925 	 * this is on. We deconstruct the CCD's fabric ID to determine that.
1926 	 */
1927 	zen_fabric_id_decompose(&df->adf_decomp, ent->adfe_fabric_id, &sockid,
1928 	    &dieid, &compid);
1929 
1930 	/*
1931 	 * At this point we have all the information about the CCD, the number
1932 	 * of CCX instances, and which physical cores and threads are enabled.
1933 	 * Currently we assume that if we have one CCX enabled, then it is
1934 	 * always CCX0. We cannot find evidence of a two CCX supporting part
1935 	 * that doesn't always ship with both CCXs present and enabled.
1936 	 */
1937 	ccd->atccd_nlog_ccx = ccd->atccd_nphys_ccx = nccx;
1938 	for (uint32_t ccxno = 0; ccxno < nccx; ccxno++) {
1939 		amdzen_topo_ccx_t *ccx = &ccd->atccd_ccx[ccxno];
1940 		const uint32_t core_mask = (1 << azn->azn_ncore_per_ccx) - 1;
1941 		const uint32_t core_shift = ccxno * azn->azn_ncore_per_ccx;
1942 		const uint32_t ccx_core_en = (core_en >> core_shift) &
1943 		    core_mask;
1944 		boolean_t logccx_set = B_FALSE;
1945 
1946 		ccd->atccd_ccx_en[ccxno] = 1;
1947 		ccx->atccx_phys_no = ccxno;
1948 		ccx->atccx_nphys_cores = azn->azn_ncore_per_ccx;
1949 		ccx->atccx_nlog_cores = nlcore_per_ccx;
1950 
1951 		if (!pkg0_ids) {
1952 			ccx->atccx_log_no = ccx->atccx_phys_no;
1953 			logccx_set = B_TRUE;
1954 		}
1955 
1956 		for (uint32_t coreno = 0, logcorezen2 = 0;
1957 		    coreno < azn->azn_ncore_per_ccx; coreno++) {
1958 			amdzen_topo_core_t *core = &ccx->atccx_cores[coreno];
1959 
1960 			if ((ccx_core_en & (1 << coreno)) == 0) {
1961 				continue;
1962 			}
1963 
1964 			ccx->atccx_core_en[coreno] = 1;
1965 			core->atcore_phys_no = coreno;
1966 
1967 			/*
1968 			 * Now that we have the physical core number present, we
1969 			 * must determine the logical core number and fill out
1970 			 * the logical CCX/CCD if it has not been set. We must
1971 			 * do this before we attempt to look at which threads
1972 			 * are enabled, because that operates based upon logical
1973 			 * core number.
1974 			 *
1975 			 * For Zen 2 we do not have INITPKG0 at our disposal. We
1976 			 * currently assume (and tag for userland with the
1977 			 * AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN flag) that we are
1978 			 * mapping logical cores to physicals in the order of
1979 			 * appearance.
1980 			 */
1981 			if (pkg0_ids) {
1982 				amdzen_ccd_fill_core_initpkg0(azn, df, ccd, ccx,
1983 				    core, &logccd_set, &logccx_set);
1984 			} else {
1985 				core->atcore_log_no = logcorezen2;
1986 				logcorezen2++;
1987 			}
1988 
1989 			/*
1990 			 * Determining which bits to use for the thread is a bit
1991 			 * weird here. Thread IDs within a CCX are logical, but
1992 			 * there are always physically spaced CCX sizes. See the
1993 			 * comment at the definition for SMU::PWR::THREAD_ENABLE
1994 			 * for more information.
1995 			 */
1996 			const uint32_t thread_shift = (ccx->atccx_nphys_cores *
1997 			    ccx->atccx_log_no + core->atcore_log_no) *
1998 			    nthreads_per_core;
1999 			const uint32_t thread_mask = (nthreads_per_core << 1) -
2000 			    1;
2001 			const uint32_t core_thread_en = (thread_en >>
2002 			    thread_shift) & thread_mask;
2003 			core->atcore_nthreads = nthreads_per_core;
2004 			core->atcore_thr_en[0] = core_thread_en & 0x01;
2005 			core->atcore_thr_en[1] = core_thread_en & 0x02;
2006 #ifdef	DEBUG
2007 			if (nthreads_per_core == 1) {
2008 				VERIFY0(core->atcore_thr_en[1]);
2009 			}
2010 #endif
2011 			for (uint32_t thrno = 0; thrno < core->atcore_nthreads;
2012 			    thrno++) {
2013 				ASSERT3U(core->atcore_thr_en[thrno], !=, 0);
2014 
2015 				zen_apic_id_compose(&azn->azn_apic_decomp,
2016 				    sockid, dieid, ccd->atccd_log_no,
2017 				    ccx->atccx_log_no, core->atcore_log_no,
2018 				    thrno, &core->atcore_apicids[thrno]);
2019 
2020 			}
2021 		}
2022 
2023 		ASSERT3U(logccx_set, ==, B_TRUE);
2024 		ASSERT3U(logccd_set, ==, B_TRUE);
2025 	}
2026 }
2027 
2028 static void
amdzen_nexus_init(void * arg)2029 amdzen_nexus_init(void *arg)
2030 {
2031 	uint_t i;
2032 	amdzen_t *azn = arg;
2033 
2034 	/*
2035 	 * Assign the requisite identifying information for this CPU.
2036 	 */
2037 	azn->azn_uarchrev = cpuid_getuarchrev(CPU);
2038 	azn->azn_chiprev = cpuid_getchiprev(CPU);
2039 
2040 	/*
2041 	 * Go through all of the stubs and assign the DF entries.
2042 	 */
2043 	mutex_enter(&azn->azn_mutex);
2044 	if (!amdzen_map_dfs(azn) || !amdzen_check_dfs(azn)) {
2045 		azn->azn_flags |= AMDZEN_F_MAP_ERROR;
2046 		goto done;
2047 	}
2048 
2049 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
2050 		amdzen_df_t *df = &azn->azn_dfs[i];
2051 
2052 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0)
2053 			continue;
2054 		amdzen_setup_df(azn, df);
2055 		amdzen_find_nb(azn, df);
2056 	}
2057 
2058 	amdzen_determine_ncore_per_ccx(azn);
2059 
2060 	if (amdzen_determine_apic_decomp(azn)) {
2061 		azn->azn_flags |= AMDZEN_F_APIC_DECOMP_VALID;
2062 	}
2063 
2064 	/*
2065 	 * Not all children may be installed. As such, we do not treat the
2066 	 * failure of a child as fatal to the driver.
2067 	 */
2068 	mutex_exit(&azn->azn_mutex);
2069 	for (i = 0; i < ARRAY_SIZE(amdzen_children); i++) {
2070 		(void) amdzen_create_child(azn, &amdzen_children[i]);
2071 	}
2072 	mutex_enter(&azn->azn_mutex);
2073 
2074 done:
2075 	azn->azn_flags &= ~AMDZEN_F_ATTACH_DISPATCHED;
2076 	azn->azn_flags |= AMDZEN_F_ATTACH_COMPLETE;
2077 	azn->azn_taskqid = TASKQID_INVALID;
2078 	cv_broadcast(&azn->azn_cv);
2079 	mutex_exit(&azn->azn_mutex);
2080 }
2081 
2082 static int
amdzen_stub_scan_cb(dev_info_t * dip,void * arg)2083 amdzen_stub_scan_cb(dev_info_t *dip, void *arg)
2084 {
2085 	amdzen_t *azn = arg;
2086 	uint16_t vid, did;
2087 	int *regs;
2088 	uint_t nregs, i;
2089 	boolean_t match = B_FALSE;
2090 
2091 	if (dip == ddi_root_node()) {
2092 		return (DDI_WALK_CONTINUE);
2093 	}
2094 
2095 	/*
2096 	 * If a node in question is not a pci node, then we have no interest in
2097 	 * it as all the stubs that we care about are related to pci devices.
2098 	 */
2099 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
2100 		return (DDI_WALK_PRUNECHILD);
2101 	}
2102 
2103 	/*
2104 	 * If we can't get a device or vendor ID and prove that this is an AMD
2105 	 * part, then we don't care about it.
2106 	 */
2107 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2108 	    "vendor-id", PCI_EINVAL16);
2109 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2110 	    "device-id", PCI_EINVAL16);
2111 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
2112 		return (DDI_WALK_CONTINUE);
2113 	}
2114 
2115 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
2116 		return (DDI_WALK_CONTINUE);
2117 	}
2118 
2119 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
2120 		if (amdzen_nb_ids[i] == did) {
2121 			match = B_TRUE;
2122 		}
2123 	}
2124 
2125 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2126 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
2127 		return (DDI_WALK_CONTINUE);
2128 	}
2129 
2130 	if (nregs == 0) {
2131 		ddi_prop_free(regs);
2132 		return (DDI_WALK_CONTINUE);
2133 	}
2134 
2135 	if (PCI_REG_BUS_G(regs[0]) == AMDZEN_DF_BUSNO &&
2136 	    PCI_REG_DEV_G(regs[0]) >= AMDZEN_DF_FIRST_DEVICE) {
2137 		match = B_TRUE;
2138 	}
2139 
2140 	ddi_prop_free(regs);
2141 	if (match) {
2142 		mutex_enter(&azn->azn_mutex);
2143 		azn->azn_nscanned++;
2144 		mutex_exit(&azn->azn_mutex);
2145 	}
2146 
2147 	return (DDI_WALK_CONTINUE);
2148 }
2149 
2150 static void
amdzen_stub_scan(void * arg)2151 amdzen_stub_scan(void *arg)
2152 {
2153 	amdzen_t *azn = arg;
2154 
2155 	mutex_enter(&azn->azn_mutex);
2156 	azn->azn_nscanned = 0;
2157 	mutex_exit(&azn->azn_mutex);
2158 
2159 	ddi_walk_devs(ddi_root_node(), amdzen_stub_scan_cb, azn);
2160 
2161 	mutex_enter(&azn->azn_mutex);
2162 	azn->azn_flags &= ~AMDZEN_F_SCAN_DISPATCHED;
2163 	azn->azn_flags |= AMDZEN_F_SCAN_COMPLETE;
2164 
2165 	if (azn->azn_nscanned == 0) {
2166 		azn->azn_flags |= AMDZEN_F_UNSUPPORTED;
2167 		azn->azn_taskqid = TASKQID_INVALID;
2168 		cv_broadcast(&azn->azn_cv);
2169 	} else if (azn->azn_npresent == azn->azn_nscanned) {
2170 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
2171 		azn->azn_taskqid = taskq_dispatch(system_taskq,
2172 		    amdzen_nexus_init, azn, TQ_SLEEP);
2173 	}
2174 	mutex_exit(&azn->azn_mutex);
2175 }
2176 
2177 /*
2178  * Unfortunately we can't really let the stubs detach as we may need them to be
2179  * available for client operations. We may be able to improve this if we know
2180  * that the actual nexus is going away. However, as long as it's active, we need
2181  * all the stubs.
2182  */
2183 int
amdzen_detach_stub(dev_info_t * dip,ddi_detach_cmd_t cmd)2184 amdzen_detach_stub(dev_info_t *dip, ddi_detach_cmd_t cmd)
2185 {
2186 	if (cmd == DDI_SUSPEND) {
2187 		return (DDI_SUCCESS);
2188 	}
2189 
2190 	return (DDI_FAILURE);
2191 }
2192 
2193 int
amdzen_attach_stub(dev_info_t * dip,ddi_attach_cmd_t cmd)2194 amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd)
2195 {
2196 	int *regs, reg;
2197 	uint_t nregs, i;
2198 	uint16_t vid, did;
2199 	amdzen_stub_t *stub;
2200 	amdzen_t *azn = amdzen_data;
2201 	boolean_t valid = B_FALSE;
2202 	boolean_t nb = B_FALSE;
2203 
2204 	if (cmd == DDI_RESUME) {
2205 		return (DDI_SUCCESS);
2206 	} else if (cmd != DDI_ATTACH) {
2207 		return (DDI_FAILURE);
2208 	}
2209 
2210 	/*
2211 	 * Make sure that the stub that we've been asked to attach is a pci type
2212 	 * device. If not, then there is no reason for us to proceed.
2213 	 */
2214 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
2215 		dev_err(dip, CE_WARN, "asked to attach a bad AMD Zen nexus "
2216 		    "stub: %s", ddi_get_name(dip));
2217 		return (DDI_FAILURE);
2218 	}
2219 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2220 	    "vendor-id", PCI_EINVAL16);
2221 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2222 	    "device-id", PCI_EINVAL16);
2223 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
2224 		dev_err(dip, CE_WARN, "failed to get PCI ID properties");
2225 		return (DDI_FAILURE);
2226 	}
2227 
2228 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
2229 		dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x",
2230 		    cpuid_getvendor(CPU) == X86_VENDOR_HYGON ?
2231 		    AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid);
2232 		return (DDI_FAILURE);
2233 	}
2234 
2235 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2236 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
2237 		dev_err(dip, CE_WARN, "failed to get 'reg' property");
2238 		return (DDI_FAILURE);
2239 	}
2240 
2241 	if (nregs == 0) {
2242 		ddi_prop_free(regs);
2243 		dev_err(dip, CE_WARN, "missing 'reg' property values");
2244 		return (DDI_FAILURE);
2245 	}
2246 	reg = *regs;
2247 	ddi_prop_free(regs);
2248 
2249 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
2250 		if (amdzen_nb_ids[i] == did) {
2251 			valid = B_TRUE;
2252 			nb = B_TRUE;
2253 		}
2254 	}
2255 
2256 	if (!valid && PCI_REG_BUS_G(reg) == AMDZEN_DF_BUSNO &&
2257 	    PCI_REG_DEV_G(reg) >= AMDZEN_DF_FIRST_DEVICE) {
2258 		valid = B_TRUE;
2259 		nb = B_FALSE;
2260 	}
2261 
2262 	if (!valid) {
2263 		dev_err(dip, CE_WARN, "device %s didn't match the nexus list",
2264 		    ddi_get_name(dip));
2265 		return (DDI_FAILURE);
2266 	}
2267 
2268 	stub = kmem_alloc(sizeof (amdzen_stub_t), KM_SLEEP);
2269 	if (pci_config_setup(dip, &stub->azns_cfgspace) != DDI_SUCCESS) {
2270 		dev_err(dip, CE_WARN, "failed to set up config space");
2271 		kmem_free(stub, sizeof (amdzen_stub_t));
2272 		return (DDI_FAILURE);
2273 	}
2274 
2275 	stub->azns_dip = dip;
2276 	stub->azns_vid = vid;
2277 	stub->azns_did = did;
2278 	stub->azns_bus = PCI_REG_BUS_G(reg);
2279 	stub->azns_dev = PCI_REG_DEV_G(reg);
2280 	stub->azns_func = PCI_REG_FUNC_G(reg);
2281 	ddi_set_driver_private(dip, stub);
2282 
2283 	mutex_enter(&azn->azn_mutex);
2284 	azn->azn_npresent++;
2285 	if (nb) {
2286 		list_insert_tail(&azn->azn_nb_stubs, stub);
2287 	} else {
2288 		list_insert_tail(&azn->azn_df_stubs, stub);
2289 	}
2290 
2291 	if ((azn->azn_flags & AMDZEN_F_TASKQ_MASK) == AMDZEN_F_SCAN_COMPLETE &&
2292 	    azn->azn_nscanned == azn->azn_npresent) {
2293 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
2294 		azn->azn_taskqid = taskq_dispatch(system_taskq,
2295 		    amdzen_nexus_init, azn, TQ_SLEEP);
2296 	}
2297 	mutex_exit(&azn->azn_mutex);
2298 
2299 	return (DDI_SUCCESS);
2300 }
2301 
2302 static int
amdzen_bus_ctl(dev_info_t * dip,dev_info_t * rdip,ddi_ctl_enum_t ctlop,void * arg,void * result)2303 amdzen_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
2304     void *arg, void *result)
2305 {
2306 	char buf[32];
2307 	dev_info_t *child;
2308 	const amdzen_child_data_t *acd;
2309 
2310 	switch (ctlop) {
2311 	case DDI_CTLOPS_REPORTDEV:
2312 		if (rdip == NULL) {
2313 			return (DDI_FAILURE);
2314 		}
2315 		cmn_err(CE_CONT, "amdzen nexus: %s@%s, %s%d\n",
2316 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
2317 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
2318 		break;
2319 	case DDI_CTLOPS_INITCHILD:
2320 		child = arg;
2321 		if (child == NULL) {
2322 			dev_err(dip, CE_WARN, "!no child passed for "
2323 			    "DDI_CTLOPS_INITCHILD");
2324 		}
2325 
2326 		acd = ddi_get_parent_data(child);
2327 		if (acd == NULL) {
2328 			dev_err(dip, CE_WARN, "!missing child parent data");
2329 			return (DDI_FAILURE);
2330 		}
2331 
2332 		if (snprintf(buf, sizeof (buf), "%d", acd->acd_addr) >=
2333 		    sizeof (buf)) {
2334 			dev_err(dip, CE_WARN, "!failed to construct device "
2335 			    "addr due to overflow");
2336 			return (DDI_FAILURE);
2337 		}
2338 
2339 		ddi_set_name_addr(child, buf);
2340 		break;
2341 	case DDI_CTLOPS_UNINITCHILD:
2342 		child = arg;
2343 		if (child == NULL) {
2344 			dev_err(dip, CE_WARN, "!no child passed for "
2345 			    "DDI_CTLOPS_UNINITCHILD");
2346 		}
2347 
2348 		ddi_set_name_addr(child, NULL);
2349 		break;
2350 	default:
2351 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
2352 	}
2353 	return (DDI_SUCCESS);
2354 }
2355 
2356 static int
amdzen_topo_open(dev_t * devp,int flag,int otyp,cred_t * credp)2357 amdzen_topo_open(dev_t *devp, int flag, int otyp, cred_t *credp)
2358 {
2359 	minor_t m;
2360 	amdzen_t *azn = amdzen_data;
2361 
2362 	if (crgetzoneid(credp) != GLOBAL_ZONEID ||
2363 	    secpolicy_sys_config(credp, B_FALSE) != 0) {
2364 		return (EPERM);
2365 	}
2366 
2367 	if ((flag & (FEXCL | FNDELAY | FNONBLOCK)) != 0) {
2368 		return (EINVAL);
2369 	}
2370 
2371 	if (otyp != OTYP_CHR) {
2372 		return (EINVAL);
2373 	}
2374 
2375 	m = getminor(*devp);
2376 	if (m != AMDZEN_MINOR_TOPO) {
2377 		return (ENXIO);
2378 	}
2379 
2380 	mutex_enter(&azn->azn_mutex);
2381 	if ((azn->azn_flags & AMDZEN_F_IOCTL_MASK) !=
2382 	    AMDZEN_F_ATTACH_COMPLETE) {
2383 		mutex_exit(&azn->azn_mutex);
2384 		return (ENOTSUP);
2385 	}
2386 	mutex_exit(&azn->azn_mutex);
2387 
2388 	return (0);
2389 }
2390 
2391 static int
amdzen_topo_ioctl_base(amdzen_t * azn,intptr_t arg,int mode)2392 amdzen_topo_ioctl_base(amdzen_t *azn, intptr_t arg, int mode)
2393 {
2394 	amdzen_topo_base_t base;
2395 
2396 	bzero(&base, sizeof (base));
2397 	mutex_enter(&azn->azn_mutex);
2398 	base.atb_ndf = azn->azn_ndfs;
2399 
2400 	if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) {
2401 		mutex_exit(&azn->azn_mutex);
2402 		return (ENOTSUP);
2403 	}
2404 
2405 	base.atb_apic_decomp = azn->azn_apic_decomp;
2406 	for (uint_t i = 0; i < azn->azn_ndfs; i++) {
2407 		const amdzen_df_t *df = &azn->azn_dfs[i];
2408 
2409 		base.atb_maxdfent = MAX(base.atb_maxdfent, df->adf_nents);
2410 		if (i == 0) {
2411 			base.atb_rev = df->adf_rev;
2412 			base.atb_df_decomp = df->adf_decomp;
2413 		}
2414 	}
2415 	mutex_exit(&azn->azn_mutex);
2416 
2417 	if (ddi_copyout(&base, (void *)(uintptr_t)arg, sizeof (base),
2418 	    mode & FKIOCTL) != 0) {
2419 		return (EFAULT);
2420 	}
2421 
2422 	return (0);
2423 }
2424 
2425 /*
2426  * Fill in the peers. We only have this information prior to DF 4D2.  The way we
2427  * do is this is to just fill in all the entries and then zero out the ones that
2428  * aren't valid.
2429  */
2430 static void
amdzen_topo_ioctl_df_fill_peers(const amdzen_df_t * df,const amdzen_df_ent_t * ent,amdzen_topo_df_ent_t * topo_ent)2431 amdzen_topo_ioctl_df_fill_peers(const amdzen_df_t *df,
2432     const amdzen_df_ent_t *ent, amdzen_topo_df_ent_t *topo_ent)
2433 {
2434 	topo_ent->atde_npeers = DF_FBIINFO0_GET_FTI_PCNT(ent->adfe_info0);
2435 
2436 	if (df->adf_rev >= DF_REV_4D2) {
2437 		bzero(topo_ent->atde_peers, sizeof (topo_ent->atde_npeers));
2438 		return;
2439 	}
2440 
2441 	topo_ent->atde_peers[0] = DF_FBINFO1_GET_FTI0_NINSTID(ent->adfe_info1);
2442 	topo_ent->atde_peers[1] = DF_FBINFO1_GET_FTI1_NINSTID(ent->adfe_info1);
2443 	topo_ent->atde_peers[2] = DF_FBINFO1_GET_FTI2_NINSTID(ent->adfe_info1);
2444 	topo_ent->atde_peers[3] = DF_FBINFO1_GET_FTI3_NINSTID(ent->adfe_info1);
2445 	topo_ent->atde_peers[4] = DF_FBINFO2_GET_FTI4_NINSTID(ent->adfe_info2);
2446 	topo_ent->atde_peers[5] = DF_FBINFO2_GET_FTI5_NINSTID(ent->adfe_info2);
2447 
2448 	for (uint32_t i = topo_ent->atde_npeers; i < AMDZEN_TOPO_DF_MAX_PEERS;
2449 	    i++) {
2450 		topo_ent->atde_peers[i] = 0;
2451 	}
2452 }
2453 
2454 static void
amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t * ent,amdzen_topo_df_ent_t * topo_ent)2455 amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t *ent,
2456     amdzen_topo_df_ent_t *topo_ent)
2457 {
2458 	const amdzen_ccm_data_t *ccm = &ent->adfe_data.aded_ccm;
2459 	amdzen_topo_ccm_data_t *topo_ccm = &topo_ent->atde_data.atded_ccm;
2460 
2461 	topo_ccm->atcd_nccds = ccm->acd_nccds;
2462 	for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) {
2463 		topo_ccm->atcd_ccd_en[i] = ccm->acd_ccd_en[i];
2464 		topo_ccm->atcd_ccd_ids[i] = ccm->acd_ccd_id[i];
2465 	}
2466 }
2467 
2468 static int
amdzen_topo_ioctl_df(amdzen_t * azn,intptr_t arg,int mode)2469 amdzen_topo_ioctl_df(amdzen_t *azn, intptr_t arg, int mode)
2470 {
2471 	uint_t model;
2472 	uint32_t max_ents, nwritten;
2473 	const amdzen_df_t *df;
2474 	amdzen_topo_df_t topo_df;
2475 #ifdef	_MULTI_DATAMODEL
2476 	amdzen_topo_df32_t topo_df32;
2477 #endif
2478 
2479 	model = ddi_model_convert_from(mode);
2480 	switch (model) {
2481 #ifdef	_MULTI_DATAMODEL
2482 	case DDI_MODEL_ILP32:
2483 		if (ddi_copyin((void *)(uintptr_t)arg, &topo_df32,
2484 		    sizeof (topo_df32), mode & FKIOCTL) != 0) {
2485 			return (EFAULT);
2486 		}
2487 		bzero(&topo_df, sizeof (topo_df));
2488 		topo_df.atd_dfno = topo_df32.atd_dfno;
2489 		topo_df.atd_df_buf_nents = topo_df32.atd_df_buf_nents;
2490 		topo_df.atd_df_ents = (void *)(uintptr_t)topo_df32.atd_df_ents;
2491 		break;
2492 #endif
2493 	case DDI_MODEL_NONE:
2494 		if (ddi_copyin((void *)(uintptr_t)arg, &topo_df,
2495 		    sizeof (topo_df), mode & FKIOCTL) != 0) {
2496 			return (EFAULT);
2497 		}
2498 		break;
2499 	default:
2500 		return (ENOTSUP);
2501 	}
2502 
2503 	mutex_enter(&azn->azn_mutex);
2504 	if (topo_df.atd_dfno >= azn->azn_ndfs) {
2505 		mutex_exit(&azn->azn_mutex);
2506 		return (EINVAL);
2507 	}
2508 
2509 	df = &azn->azn_dfs[topo_df.atd_dfno];
2510 	topo_df.atd_nodeid = df->adf_nodeid;
2511 	topo_df.atd_sockid = (df->adf_nodeid & df->adf_decomp.dfd_sock_mask) >>
2512 	    df->adf_decomp.dfd_sock_shift;
2513 	topo_df.atd_dieid = (df->adf_nodeid & df->adf_decomp.dfd_die_mask) >>
2514 	    df->adf_decomp.dfd_die_shift;
2515 	topo_df.atd_rev = df->adf_rev;
2516 	topo_df.atd_major = df->adf_major;
2517 	topo_df.atd_minor = df->adf_minor;
2518 	topo_df.atd_df_act_nents = df->adf_nents;
2519 	max_ents = MIN(topo_df.atd_df_buf_nents, df->adf_nents);
2520 
2521 	if (topo_df.atd_df_ents == NULL) {
2522 		topo_df.atd_df_buf_nvalid = 0;
2523 		mutex_exit(&azn->azn_mutex);
2524 		goto copyout;
2525 	}
2526 
2527 	nwritten = 0;
2528 	for (uint32_t i = 0; i < max_ents; i++) {
2529 		amdzen_topo_df_ent_t topo_ent;
2530 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
2531 
2532 		/*
2533 		 * We opt not to include disabled elements right now. They
2534 		 * generally don't have a valid type and there isn't much useful
2535 		 * information we can get from them. This can be changed if we
2536 		 * find a use case for them for userland topo.
2537 		 */
2538 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
2539 			continue;
2540 
2541 		bzero(&topo_ent, sizeof (topo_ent));
2542 		topo_ent.atde_type = ent->adfe_type;
2543 		topo_ent.atde_subtype = ent->adfe_subtype;
2544 		topo_ent.atde_fabric_id = ent->adfe_fabric_id;
2545 		topo_ent.atde_inst_id = ent->adfe_inst_id;
2546 		amdzen_topo_ioctl_df_fill_peers(df, ent, &topo_ent);
2547 
2548 		if (amdzen_dfe_is_ccm(df, ent)) {
2549 			amdzen_topo_ioctl_df_fill_ccm(ent, &topo_ent);
2550 		}
2551 
2552 		if (ddi_copyout(&topo_ent, &topo_df.atd_df_ents[nwritten],
2553 		    sizeof (topo_ent), mode & FKIOCTL) != 0) {
2554 			mutex_exit(&azn->azn_mutex);
2555 			return (EFAULT);
2556 		}
2557 		nwritten++;
2558 	}
2559 	mutex_exit(&azn->azn_mutex);
2560 
2561 	topo_df.atd_df_buf_nvalid = nwritten;
2562 copyout:
2563 	switch (model) {
2564 #ifdef	_MULTI_DATAMODEL
2565 	case DDI_MODEL_ILP32:
2566 		topo_df32.atd_nodeid = topo_df.atd_nodeid;
2567 		topo_df32.atd_sockid = topo_df.atd_sockid;
2568 		topo_df32.atd_dieid = topo_df.atd_dieid;
2569 		topo_df32.atd_rev = topo_df.atd_rev;
2570 		topo_df32.atd_major = topo_df.atd_major;
2571 		topo_df32.atd_minor = topo_df.atd_minor;
2572 		topo_df32.atd_df_buf_nvalid = topo_df.atd_df_buf_nvalid;
2573 		topo_df32.atd_df_act_nents = topo_df.atd_df_act_nents;
2574 
2575 		if (ddi_copyout(&topo_df32, (void *)(uintptr_t)arg,
2576 		    sizeof (topo_df32), mode & FKIOCTL) != 0) {
2577 			return (EFAULT);
2578 		}
2579 		break;
2580 #endif
2581 	case DDI_MODEL_NONE:
2582 		if (ddi_copyout(&topo_df, (void *)(uintptr_t)arg,
2583 		    sizeof (topo_df), mode & FKIOCTL) != 0) {
2584 			return (EFAULT);
2585 		}
2586 		break;
2587 	default:
2588 		break;
2589 	}
2590 
2591 
2592 	return (0);
2593 }
2594 
2595 static int
amdzen_topo_ioctl_ccd(amdzen_t * azn,intptr_t arg,int mode)2596 amdzen_topo_ioctl_ccd(amdzen_t *azn, intptr_t arg, int mode)
2597 {
2598 	amdzen_topo_ccd_t ccd, *ccdp;
2599 	amdzen_df_t *df;
2600 	amdzen_df_ent_t *ent;
2601 	amdzen_ccm_data_t *ccm;
2602 	uint32_t ccdno;
2603 	size_t copyin_size = offsetof(amdzen_topo_ccd_t, atccd_err);
2604 
2605 	/*
2606 	 * Only copy in the identifying information so that way we can ensure
2607 	 * the rest of the structure we return to the user doesn't contain
2608 	 * anything unexpected in it.
2609 	 */
2610 	bzero(&ccd, sizeof (ccd));
2611 	if (ddi_copyin((void *)(uintptr_t)arg, &ccd, copyin_size,
2612 	    mode & FKIOCTL) != 0) {
2613 		return (EFAULT);
2614 	}
2615 
2616 	mutex_enter(&azn->azn_mutex);
2617 	if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) {
2618 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NO_APIC_DECOMP;
2619 		goto copyout;
2620 	}
2621 
2622 	df = amdzen_df_find(azn, ccd.atccd_dfno);
2623 	if (df == NULL) {
2624 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_DFNO;
2625 		goto copyout;
2626 	}
2627 
2628 	/*
2629 	 * We don't have enough information to know how to construct this
2630 	 * information in Zen 1 at this time, so refuse.
2631 	 */
2632 	if (df->adf_rev <= DF_REV_2) {
2633 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_SOC_UNSUPPORTED;
2634 		goto copyout;
2635 	}
2636 
2637 	ent = amdzen_df_ent_find_by_instid(df, ccd.atccd_instid);
2638 	if (ent == NULL) {
2639 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_INSTID;
2640 		goto copyout;
2641 	}
2642 
2643 	if (!amdzen_dfe_is_ccm(df, ent)) {
2644 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD;
2645 		goto copyout;
2646 	}
2647 
2648 	ccm = &ent->adfe_data.aded_ccm;
2649 	for (ccdno = 0; ccdno < DF_MAX_CCDS_PER_CCM; ccdno++) {
2650 		if (ccm->acd_ccd_en[ccdno] != 0 &&
2651 		    ccm->acd_ccd_id[ccdno] == ccd.atccd_phys_no) {
2652 			break;
2653 		}
2654 	}
2655 
2656 	if (ccdno == DF_MAX_CCDS_PER_CCM) {
2657 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD;
2658 		goto copyout;
2659 	}
2660 
2661 	if (ccm->acd_ccd_data[ccdno] == NULL) {
2662 		/*
2663 		 * We don't actually have this data. Go fill it out and save it
2664 		 * for future use.
2665 		 */
2666 		ccdp = kmem_zalloc(sizeof (amdzen_topo_ccd_t), KM_NOSLEEP_LAZY);
2667 		if (ccdp == NULL) {
2668 			mutex_exit(&azn->azn_mutex);
2669 			return (ENOMEM);
2670 		}
2671 
2672 		ccdp->atccd_dfno = ccd.atccd_dfno;
2673 		ccdp->atccd_instid = ccd.atccd_instid;
2674 		ccdp->atccd_phys_no = ccd.atccd_phys_no;
2675 		amdzen_ccd_fill_topo(azn, df, ent, ccdp);
2676 		ccm->acd_ccd_data[ccdno] = ccdp;
2677 	}
2678 	ASSERT3P(ccm->acd_ccd_data[ccdno], !=, NULL);
2679 	bcopy(ccm->acd_ccd_data[ccdno], &ccd, sizeof (ccd));
2680 
2681 copyout:
2682 	mutex_exit(&azn->azn_mutex);
2683 	if (ddi_copyout(&ccd, (void *)(uintptr_t)arg, sizeof (ccd),
2684 	    mode & FKIOCTL) != 0) {
2685 		return (EFAULT);
2686 	}
2687 
2688 	return (0);
2689 }
2690 
2691 static int
amdzen_topo_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)2692 amdzen_topo_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
2693     cred_t *credp, int *rvalp)
2694 {
2695 	int ret;
2696 	amdzen_t *azn = amdzen_data;
2697 
2698 	if (getminor(dev) != AMDZEN_MINOR_TOPO) {
2699 		return (ENXIO);
2700 	}
2701 
2702 	if ((mode & FREAD) == 0) {
2703 		return (EBADF);
2704 	}
2705 
2706 	switch (cmd) {
2707 	case AMDZEN_TOPO_IOCTL_BASE:
2708 		ret = amdzen_topo_ioctl_base(azn, arg, mode);
2709 		break;
2710 	case AMDZEN_TOPO_IOCTL_DF:
2711 		ret = amdzen_topo_ioctl_df(azn, arg, mode);
2712 		break;
2713 	case AMDZEN_TOPO_IOCTL_CCD:
2714 		ret = amdzen_topo_ioctl_ccd(azn, arg, mode);
2715 		break;
2716 	default:
2717 		ret = ENOTTY;
2718 		break;
2719 	}
2720 
2721 	return (ret);
2722 }
2723 
2724 static int
amdzen_topo_close(dev_t dev,int flag,int otyp,cred_t * credp)2725 amdzen_topo_close(dev_t dev, int flag, int otyp, cred_t *credp)
2726 {
2727 	if (otyp != OTYP_CHR) {
2728 		return (EINVAL);
2729 	}
2730 
2731 	if (getminor(dev) != AMDZEN_MINOR_TOPO) {
2732 		return (ENXIO);
2733 	}
2734 
2735 	return (0);
2736 }
2737 
2738 static int
amdzen_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)2739 amdzen_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2740 {
2741 	amdzen_t *azn = amdzen_data;
2742 
2743 	if (cmd == DDI_RESUME) {
2744 		return (DDI_SUCCESS);
2745 	} else if (cmd != DDI_ATTACH) {
2746 		return (DDI_FAILURE);
2747 	}
2748 
2749 	mutex_enter(&azn->azn_mutex);
2750 	if (azn->azn_dip != NULL) {
2751 		dev_err(dip, CE_WARN, "driver is already attached!");
2752 		mutex_exit(&azn->azn_mutex);
2753 		return (DDI_FAILURE);
2754 	}
2755 
2756 	if (ddi_create_minor_node(dip, "topo", S_IFCHR, AMDZEN_MINOR_TOPO,
2757 	    DDI_PSEUDO, 0) != 0) {
2758 		dev_err(dip, CE_WARN, "failed to create topo minor node!");
2759 		mutex_exit(&azn->azn_mutex);
2760 		return (DDI_FAILURE);
2761 	}
2762 
2763 	azn->azn_dip = dip;
2764 	azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_stub_scan,
2765 	    azn, TQ_SLEEP);
2766 	azn->azn_flags |= AMDZEN_F_SCAN_DISPATCHED;
2767 	mutex_exit(&azn->azn_mutex);
2768 
2769 	return (DDI_SUCCESS);
2770 }
2771 
2772 static int
amdzen_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)2773 amdzen_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
2774 {
2775 	amdzen_t *azn = amdzen_data;
2776 
2777 	if (cmd == DDI_SUSPEND) {
2778 		return (DDI_SUCCESS);
2779 	} else if (cmd != DDI_DETACH) {
2780 		return (DDI_FAILURE);
2781 	}
2782 
2783 	mutex_enter(&azn->azn_mutex);
2784 	while (azn->azn_taskqid != TASKQID_INVALID) {
2785 		cv_wait(&azn->azn_cv, &azn->azn_mutex);
2786 	}
2787 
2788 	/*
2789 	 * If we've attached any stub drivers, e.g. this platform is important
2790 	 * for us, then we fail detach.
2791 	 */
2792 	if (!list_is_empty(&azn->azn_df_stubs) ||
2793 	    !list_is_empty(&azn->azn_nb_stubs)) {
2794 		mutex_exit(&azn->azn_mutex);
2795 		return (DDI_FAILURE);
2796 	}
2797 
2798 	ddi_remove_minor_node(azn->azn_dip, NULL);
2799 	azn->azn_dip = NULL;
2800 	mutex_exit(&azn->azn_mutex);
2801 
2802 	return (DDI_SUCCESS);
2803 }
2804 
2805 static void
amdzen_free(void)2806 amdzen_free(void)
2807 {
2808 	if (amdzen_data == NULL) {
2809 		return;
2810 	}
2811 
2812 	VERIFY(list_is_empty(&amdzen_data->azn_df_stubs));
2813 	list_destroy(&amdzen_data->azn_df_stubs);
2814 	VERIFY(list_is_empty(&amdzen_data->azn_nb_stubs));
2815 	list_destroy(&amdzen_data->azn_nb_stubs);
2816 	cv_destroy(&amdzen_data->azn_cv);
2817 	mutex_destroy(&amdzen_data->azn_mutex);
2818 	kmem_free(amdzen_data, sizeof (amdzen_t));
2819 	amdzen_data = NULL;
2820 }
2821 
2822 static void
amdzen_alloc(void)2823 amdzen_alloc(void)
2824 {
2825 	amdzen_data = kmem_zalloc(sizeof (amdzen_t), KM_SLEEP);
2826 	mutex_init(&amdzen_data->azn_mutex, NULL, MUTEX_DRIVER, NULL);
2827 	list_create(&amdzen_data->azn_df_stubs, sizeof (amdzen_stub_t),
2828 	    offsetof(amdzen_stub_t, azns_link));
2829 	list_create(&amdzen_data->azn_nb_stubs, sizeof (amdzen_stub_t),
2830 	    offsetof(amdzen_stub_t, azns_link));
2831 	cv_init(&amdzen_data->azn_cv, NULL, CV_DRIVER, NULL);
2832 }
2833 
2834 static struct cb_ops amdzen_topo_cb_ops = {
2835 	.cb_open = amdzen_topo_open,
2836 	.cb_close = amdzen_topo_close,
2837 	.cb_strategy = nodev,
2838 	.cb_print = nodev,
2839 	.cb_dump = nodev,
2840 	.cb_read = nodev,
2841 	.cb_write = nodev,
2842 	.cb_ioctl = amdzen_topo_ioctl,
2843 	.cb_devmap = nodev,
2844 	.cb_mmap = nodev,
2845 	.cb_segmap = nodev,
2846 	.cb_chpoll = nochpoll,
2847 	.cb_prop_op = ddi_prop_op,
2848 	.cb_flag = D_MP,
2849 	.cb_rev = CB_REV,
2850 	.cb_aread = nodev,
2851 	.cb_awrite = nodev
2852 };
2853 
2854 struct bus_ops amdzen_bus_ops = {
2855 	.busops_rev = BUSO_REV,
2856 	.bus_map = nullbusmap,
2857 	.bus_dma_map = ddi_no_dma_map,
2858 	.bus_dma_allochdl = ddi_no_dma_allochdl,
2859 	.bus_dma_freehdl = ddi_no_dma_freehdl,
2860 	.bus_dma_bindhdl = ddi_no_dma_bindhdl,
2861 	.bus_dma_unbindhdl = ddi_no_dma_unbindhdl,
2862 	.bus_dma_flush = ddi_no_dma_flush,
2863 	.bus_dma_win = ddi_no_dma_win,
2864 	.bus_dma_ctl = ddi_no_dma_mctl,
2865 	.bus_prop_op = ddi_bus_prop_op,
2866 	.bus_ctl = amdzen_bus_ctl
2867 };
2868 
2869 static struct dev_ops amdzen_dev_ops = {
2870 	.devo_rev = DEVO_REV,
2871 	.devo_refcnt = 0,
2872 	.devo_getinfo = nodev,
2873 	.devo_identify = nulldev,
2874 	.devo_probe = nulldev,
2875 	.devo_attach = amdzen_attach,
2876 	.devo_detach = amdzen_detach,
2877 	.devo_reset = nodev,
2878 	.devo_quiesce = ddi_quiesce_not_needed,
2879 	.devo_bus_ops = &amdzen_bus_ops,
2880 	.devo_cb_ops = &amdzen_topo_cb_ops
2881 };
2882 
2883 static struct modldrv amdzen_modldrv = {
2884 	.drv_modops = &mod_driverops,
2885 	.drv_linkinfo = "AMD Zen Nexus Driver",
2886 	.drv_dev_ops = &amdzen_dev_ops
2887 };
2888 
2889 static struct modlinkage amdzen_modlinkage = {
2890 	.ml_rev = MODREV_1,
2891 	.ml_linkage = { &amdzen_modldrv, NULL }
2892 };
2893 
2894 int
_init(void)2895 _init(void)
2896 {
2897 	int ret;
2898 
2899 	if (cpuid_getvendor(CPU) != X86_VENDOR_AMD &&
2900 	    cpuid_getvendor(CPU) != X86_VENDOR_HYGON) {
2901 		return (ENOTSUP);
2902 	}
2903 
2904 	if ((ret = mod_install(&amdzen_modlinkage)) == 0) {
2905 		amdzen_alloc();
2906 	}
2907 
2908 	return (ret);
2909 }
2910 
2911 int
_info(struct modinfo * modinfop)2912 _info(struct modinfo *modinfop)
2913 {
2914 	return (mod_info(&amdzen_modlinkage, modinfop));
2915 }
2916 
2917 int
_fini(void)2918 _fini(void)
2919 {
2920 	int ret;
2921 
2922 	if ((ret = mod_remove(&amdzen_modlinkage)) == 0) {
2923 		amdzen_free();
2924 	}
2925 
2926 	return (ret);
2927 }
2928