xref: /illumos-gate/usr/src/uts/intel/io/amdzen/amdzen.c (revision 852deac253162fc67cf47230429574fc4c2b38ee)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019, Joyent, Inc.
14  * Copyright 2024 Oxide Computer Company
15  */
16 
17 /*
18  * Nexus Driver for AMD Zen family systems. The purpose of this driver is to
19  * provide access to the following resources in a single, centralized fashion:
20  *
21  *  - The per-chip Data Fabric
22  *  - The North Bridge
23  *  - The System Management Network (SMN)
24  *
25  * This is a nexus driver as once we have attached to all the requisite
26  * components, we will enumerate child devices which consume this functionality.
27  *
28  * ------------------------
29  * Mapping Devices Together
30  * ------------------------
31  *
32  * The operating system needs to expose things like temperature sensors and DRAM
33  * configuration registers in terms of things that are meaningful to the system
34  * such as logical CPUs, cores, etc. This driver attaches to the PCI devices
35  * that represent the northbridge, data fabrics, and dies. Note that there are
36  * multiple northbridge and DF devices (one each per die) and this driver maps
37  * all of these three things together. Unfortunately, this requires some
38  * acrobatics as there is no direct way to map a northbridge to its
39  * corresponding die. Instead, we map a CPU die to a data fabric PCI device and
40  * a data fabric PCI device to a corresponding northbridge PCI device. This
41  * transitive relationship allows us to map from between northbridge and die.
42  *
43  * As each data fabric device is attached, based on vendor and device portions
44  * of the PCI ID, we add it to the DF stubs list in the global amdzen_t
45  * structure, amdzen_data->azn_df_stubs. We must now map these to logical CPUs.
46  *
47  * In current Zen based products, there is a direct mapping between processor
48  * nodes and a data fabric PCI device: all of the devices are on PCI Bus 0 and
49  * start from Device 0x18, so device 0x18 maps to processor node 0, 0x19 to
50  * processor node 1, etc. This means that to map a logical CPU to a data fabric
51  * device, we take its processor node id, add it to 0x18 and find the PCI device
52  * that is on bus 0 with that ID number. We already discovered the DF devices as
53  * described above.
54  *
55  * The northbridge PCI device has a well-defined device and function, but the
56  * bus that it is on varies. Each die has its own set of assigned PCI buses and
57  * its northbridge device is on the first die-specific bus. This implies that
58  * the northbridges do not show up on PCI bus 0, as that is the PCI bus that all
59  * of the data fabric devices are on and is not assigned to any particular die.
60  * Additionally, while the northbridge on the lowest-numbered PCI bus
61  * intuitively corresponds to processor node zero, hardware does not guarantee
62  * this. Because we don't want to be at the mercy of firmware, we don't rely on
63  * this ordering assumption, though we have yet to find a system that deviates
64  * from it, either.
65  *
66  * One of the registers in the data fabric device's function 0
67  * (AMDZEN_DF_F0_CFG_ADDR_CTL) happens to identify the first PCI bus that is
68  * associated with the processor node. This means that we can map a data fabric
69  * device to a northbridge by finding the northbridge whose PCI bus ID matches
70  * the value in the corresponding data fabric's AMDZEN_DF_F0_CFG_ADDR_CTL.
71  *
72  * Given all of the above, we can map a northbridge to a data fabric device and
73  * a die to a data fabric device. Because these are 1:1 mappings, there is a
74  * transitive relationship from northbridge to die. and therefore we know which
75  * northbridge is associated with which processor die. This is summarized in the
76  * following image:
77  *
78  *  +-------+     +------------------------------------+     +--------------+
79  *  | Die 0 |---->| Data Fabric PCI BDF 0/18/0         |---->| Northbridge  |
80  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 10  |     | PCI  10/0/0  |
81  *     ...        +------------------------------------+     +--------------+
82  *  +-------+     +------------------------------------+     +--------------+
83  *  | Die n |---->| Data Fabric PCI BDF 0/18+n/0       |---->| Northbridge  |
84  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 133 |     | PCI 133/0/0  |
85  *                +------------------------------------+     +--------------+
86  *
87  * Note, the PCI buses used by the northbridges here are arbitrary examples that
88  * do not necessarily reflect actual hardware values; however, the
89  * bus/device/function (BDF) of the data fabric accurately models hardware. All
90  * BDF values are in hex.
91  *
92  * Starting with the Rome generation of processors (Family 17h Model 30-3Fh),
93  * AMD has multiple northbridges on a given die. All of these northbridges share
94  * the same data fabric and system management network port. From our perspective
95  * this means that some of the northbridge devices will be redundant and that we
96  * no longer have a 1:1 mapping between the northbridge and the data fabric
97  * devices. Every data fabric will have a northbridge, but not every northbridge
98  * will have a data fabric device mapped. Because we're always trying to map
99  * from a die to a northbridge and not the reverse, the fact that there are
100  * extra northbridge devices hanging around that we don't know about shouldn't
101  * be a problem.
102  *
103  * -------------------------------
104  * Attach and Detach Complications
105  * -------------------------------
106  *
107  * We need to map different PCI devices together. Each device is attached to a
108  * amdzen_stub driver to facilitate integration with the rest of the kernel PCI
109  * machinery and so we have to manage multiple dev_info_t structures, each of
110  * which may be independently attached and detached.
111  *
112  * This is not particularly complex for attach: our _init routine allocates the
113  * necessary mutex and list structures at module load time, and as each stub is
114  * attached, it calls into this code to be added to the appropriate list. When
115  * the nexus itself is attached, we walk the PCI device tree accumulating a
116  * counter for all devices we expect to be attached. Once the scan is complete
117  * and all such devices are accounted for (stub registration may be happening
118  * asynchronously with respect to nexus attach), we initialize the nexus device
119  * and the attach is complete.
120  *
121  * Most other device drivers support instances that can be brought back after
122  * detach, provided they are associated with an active minor node in the
123  * /devices file system. This driver is different. Once a stub device has been
124  * attached, we do not permit detaching the nexus driver instance, as the kernel
125  * does not give us interlocking guarantees between nexus and stub driver attach
126  * and detach. It is simplest to just unconditionally fail detach once a stub
127  * has attached.
128  *
129  * ---------------
130  * Exposed Devices
131  * ---------------
132  *
133  * Rather than try and have all of the different functions that could be
134  * provided in one driver, we have a nexus driver that tries to load child
135  * pseudo-device drivers that provide specific pieces of functionality.
136  *
137  * -------
138  * Locking
139  * -------
140  *
141  * The amdzen_data structure contains a single lock, azn_mutex.
142  *
143  * The various client functions here are intended for our nexus's direct
144  * children, but have been designed in case someone else should depends on this
145  * driver. Once a DF has been discovered, the set of entities inside of it
146  * (adf_nents, adf_ents[]) is considered static, constant data, and iteration
147  * over them does not require locking. However, the discovery of the amd_df_t
148  * does. In addition, locking is required whenever performing register accesses
149  * to the DF or SMN.
150  *
151  * To summarize, one must hold the lock in the following circumstances:
152  *
153  *  - Looking up DF structures
154  *  - Reading or writing to DF registers
155  *  - Reading or writing to SMN registers
156  *
157  * In general, it is preferred that the lock be held across an entire client
158  * operation if possible. The only time this becomes an issue are when we have
159  * callbacks into our callers (ala amdzen_c_df_iter()) as they may recursively
160  * call into us.
161  */
162 
163 #include <sys/modctl.h>
164 #include <sys/conf.h>
165 #include <sys/devops.h>
166 #include <sys/ddi.h>
167 #include <sys/sunddi.h>
168 #include <sys/pci.h>
169 #include <sys/sysmacros.h>
170 #include <sys/sunndi.h>
171 #include <sys/x86_archext.h>
172 #include <sys/cpuvar.h>
173 #include <sys/policy.h>
174 #include <sys/stat.h>
175 #include <sys/sunddi.h>
176 #include <sys/bitmap.h>
177 #include <sys/stdbool.h>
178 
179 #include <sys/amdzen/df.h>
180 #include <sys/amdzen/ccd.h>
181 #include "amdzen.h"
182 #include "amdzen_client.h"
183 #include "amdzen_topo.h"
184 
185 amdzen_t *amdzen_data;
186 
187 /*
188  * Internal minor nodes for devices that the nexus provides itself.
189  */
190 #define	AMDZEN_MINOR_TOPO	0
191 
192 /*
193  * Array of northbridge IDs that we care about.
194  */
195 static const uint16_t amdzen_nb_ids[] = {
196 	/* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */
197 	0x1450,
198 	/* Family 17h Raven Ridge, Kestrel, Dali Models 10h-2fh (Zen uarch) */
199 	0x15d0,
200 	/* Family 17h/19h Rome, Milan, Matisse, Vermeer Zen 2/Zen 3 uarch */
201 	0x1480,
202 	/* Family 17h/19h Renoir, Cezanne, Van Gogh Zen 2/3 uarch */
203 	0x1630,
204 	/* Family 19h Genoa and Bergamo */
205 	0x14a4,
206 	/* Family 17h Mendocino, Family 19h Rembrandt */
207 	0x14b5,
208 	/* Family 19h Raphael, Family 1Ah 40-4fh */
209 	0x14d8,
210 	/* Family 19h Phoenix */
211 	0x14e8,
212 	/* Family 1Ah Turin */
213 	0x153a,
214 	/* Family 1Ah 20-2fh */
215 	0x1507
216 };
217 
218 typedef struct {
219 	char *acd_name;
220 	amdzen_child_t acd_addr;
221 	/*
222 	 * This indicates whether or not we should issue warnings to users when
223 	 * something happens specific to this instance. The main reason we don't
224 	 * want to is for optional devices that may not be installed as they are
225 	 * for development purposes (e.g. usmn, zen_udf); however, if there is
226 	 * an issue with the others we still want to know.
227 	 */
228 	bool acd_warn;
229 } amdzen_child_data_t;
230 
231 static const amdzen_child_data_t amdzen_children[] = {
232 	{ "smntemp", AMDZEN_C_SMNTEMP, true },
233 	{ "usmn", AMDZEN_C_USMN, false },
234 	{ "zen_udf", AMDZEN_C_ZEN_UDF, false },
235 	{ "zen_umc", AMDZEN_C_ZEN_UMC, true }
236 };
237 
238 static uint8_t
amdzen_stub_get8(amdzen_stub_t * stub,off_t reg)239 amdzen_stub_get8(amdzen_stub_t *stub, off_t reg)
240 {
241 	return (pci_config_get8(stub->azns_cfgspace, reg));
242 }
243 
244 static uint16_t
amdzen_stub_get16(amdzen_stub_t * stub,off_t reg)245 amdzen_stub_get16(amdzen_stub_t *stub, off_t reg)
246 {
247 	return (pci_config_get16(stub->azns_cfgspace, reg));
248 }
249 
250 static uint32_t
amdzen_stub_get32(amdzen_stub_t * stub,off_t reg)251 amdzen_stub_get32(amdzen_stub_t *stub, off_t reg)
252 {
253 	return (pci_config_get32(stub->azns_cfgspace, reg));
254 }
255 
256 static uint64_t
amdzen_stub_get64(amdzen_stub_t * stub,off_t reg)257 amdzen_stub_get64(amdzen_stub_t *stub, off_t reg)
258 {
259 	return (pci_config_get64(stub->azns_cfgspace, reg));
260 }
261 
262 static void
amdzen_stub_put8(amdzen_stub_t * stub,off_t reg,uint8_t val)263 amdzen_stub_put8(amdzen_stub_t *stub, off_t reg, uint8_t val)
264 {
265 	pci_config_put8(stub->azns_cfgspace, reg, val);
266 }
267 
268 static void
amdzen_stub_put16(amdzen_stub_t * stub,off_t reg,uint16_t val)269 amdzen_stub_put16(amdzen_stub_t *stub, off_t reg, uint16_t val)
270 {
271 	pci_config_put16(stub->azns_cfgspace, reg, val);
272 }
273 
274 static void
amdzen_stub_put32(amdzen_stub_t * stub,off_t reg,uint32_t val)275 amdzen_stub_put32(amdzen_stub_t *stub, off_t reg, uint32_t val)
276 {
277 	pci_config_put32(stub->azns_cfgspace, reg, val);
278 }
279 
280 static uint64_t
amdzen_df_read_regdef(amdzen_t * azn,amdzen_df_t * df,const df_reg_def_t def,uint8_t inst,boolean_t do_64)281 amdzen_df_read_regdef(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def,
282     uint8_t inst, boolean_t do_64)
283 {
284 	df_reg_def_t ficaa;
285 	df_reg_def_t ficad;
286 	uint32_t val = 0;
287 	df_rev_t df_rev = azn->azn_dfs[0].adf_rev;
288 	VERIFY(df_reg_valid(df_rev, def));
289 
290 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
291 	val = DF_FICAA_V2_SET_TARG_INST(val, 1);
292 	val = DF_FICAA_V2_SET_FUNC(val, def.drd_func);
293 	val = DF_FICAA_V2_SET_INST(val, inst);
294 	val = DF_FICAA_V2_SET_64B(val, do_64 ? 1 : 0);
295 
296 	switch (df_rev) {
297 	case DF_REV_2:
298 	case DF_REV_3:
299 	case DF_REV_3P5:
300 		ficaa = DF_FICAA_V2;
301 		ficad = DF_FICAD_LO_V2;
302 		val = DF_FICAA_V2_SET_REG(val, def.drd_reg >>
303 		    DF_FICAA_REG_SHIFT);
304 		break;
305 	case DF_REV_4:
306 	case DF_REV_4D2:
307 		ficaa = DF_FICAA_V4;
308 		ficad = DF_FICAD_LO_V4;
309 		val = DF_FICAA_V4_SET_REG(val, def.drd_reg >>
310 		    DF_FICAA_REG_SHIFT);
311 		break;
312 	default:
313 		panic("encountered unexpected DF rev: %u", df_rev);
314 	}
315 
316 	amdzen_stub_put32(df->adf_funcs[ficaa.drd_func], ficaa.drd_reg, val);
317 	if (do_64) {
318 		return (amdzen_stub_get64(df->adf_funcs[ficad.drd_func],
319 		    ficad.drd_reg));
320 	} else {
321 		return (amdzen_stub_get32(df->adf_funcs[ficad.drd_func],
322 		    ficad.drd_reg));
323 	}
324 }
325 
326 /*
327  * Perform a targeted 32-bit indirect read to a specific instance and function.
328  */
329 static uint32_t
amdzen_df_read32(amdzen_t * azn,amdzen_df_t * df,uint8_t inst,const df_reg_def_t def)330 amdzen_df_read32(amdzen_t *azn, amdzen_df_t *df, uint8_t inst,
331     const df_reg_def_t def)
332 {
333 	return (amdzen_df_read_regdef(azn, df, def, inst, B_FALSE));
334 }
335 
336 /*
337  * For a broadcast read, just go to the underlying PCI function and perform a
338  * read. At this point in time, we don't believe we need to use the FICAA/FICAD
339  * to access it (though it does have a broadcast mode).
340  */
341 static uint32_t
amdzen_df_read32_bcast(amdzen_t * azn,amdzen_df_t * df,const df_reg_def_t def)342 amdzen_df_read32_bcast(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def)
343 {
344 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
345 	return (amdzen_stub_get32(df->adf_funcs[def.drd_func], def.drd_reg));
346 }
347 
348 static uint32_t
amdzen_smn_read(amdzen_t * azn,amdzen_df_t * df,const smn_reg_t reg)349 amdzen_smn_read(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg)
350 {
351 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
352 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
353 
354 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
355 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
356 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
357 
358 	switch (SMN_REG_SIZE(reg)) {
359 	case 1:
360 		return ((uint32_t)amdzen_stub_get8(df->adf_nb,
361 		    AMDZEN_NB_SMN_DATA + addr_off));
362 	case 2:
363 		return ((uint32_t)amdzen_stub_get16(df->adf_nb,
364 		    AMDZEN_NB_SMN_DATA + addr_off));
365 	case 4:
366 		return (amdzen_stub_get32(df->adf_nb, AMDZEN_NB_SMN_DATA));
367 	default:
368 		panic("unreachable invalid SMN register size %u",
369 		    SMN_REG_SIZE(reg));
370 	}
371 }
372 
373 static void
amdzen_smn_write(amdzen_t * azn,amdzen_df_t * df,const smn_reg_t reg,const uint32_t val)374 amdzen_smn_write(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg,
375     const uint32_t val)
376 {
377 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
378 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
379 
380 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
381 	VERIFY(SMN_REG_VALUE_FITS(reg, val));
382 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
383 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
384 
385 	switch (SMN_REG_SIZE(reg)) {
386 	case 1:
387 		amdzen_stub_put8(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
388 		    (uint8_t)val);
389 		break;
390 	case 2:
391 		amdzen_stub_put16(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
392 		    (uint16_t)val);
393 		break;
394 	case 4:
395 		amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_DATA, val);
396 		break;
397 	default:
398 		panic("unreachable invalid SMN register size %u",
399 		    SMN_REG_SIZE(reg));
400 	}
401 }
402 
403 /*
404  * This is an unfortunate necessity due to the evolution of the CCM DF values.
405  */
406 static inline boolean_t
amdzen_df_at_least(const amdzen_df_t * df,uint8_t major,uint8_t minor)407 amdzen_df_at_least(const amdzen_df_t *df, uint8_t major, uint8_t minor)
408 {
409 	return (df->adf_major > major || (df->adf_major == major &&
410 	    df->adf_minor >= minor));
411 }
412 
413 static amdzen_df_t *
amdzen_df_find(amdzen_t * azn,uint_t dfno)414 amdzen_df_find(amdzen_t *azn, uint_t dfno)
415 {
416 	uint_t i;
417 
418 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
419 	if (dfno >= azn->azn_ndfs) {
420 		return (NULL);
421 	}
422 
423 	for (i = 0; i < azn->azn_ndfs; i++) {
424 		amdzen_df_t *df = &azn->azn_dfs[i];
425 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) {
426 			continue;
427 		}
428 
429 		if (dfno == 0) {
430 			return (df);
431 		}
432 		dfno--;
433 	}
434 
435 	return (NULL);
436 }
437 
438 static amdzen_df_ent_t *
amdzen_df_ent_find_by_instid(amdzen_df_t * df,uint8_t instid)439 amdzen_df_ent_find_by_instid(amdzen_df_t *df, uint8_t instid)
440 {
441 	for (uint_t i = 0; i < df->adf_nents; i++) {
442 		amdzen_df_ent_t *ent = &df->adf_ents[i];
443 
444 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) {
445 			continue;
446 		}
447 
448 		if (ent->adfe_inst_id == instid) {
449 			return (ent);
450 		}
451 	}
452 
453 	return (NULL);
454 }
455 
456 /*
457  * Client functions that are used by nexus children.
458  */
459 int
amdzen_c_smn_read(uint_t dfno,const smn_reg_t reg,uint32_t * valp)460 amdzen_c_smn_read(uint_t dfno, const smn_reg_t reg, uint32_t *valp)
461 {
462 	amdzen_df_t *df;
463 	amdzen_t *azn = amdzen_data;
464 
465 	if (!SMN_REG_SIZE_IS_VALID(reg))
466 		return (EINVAL);
467 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
468 		return (EINVAL);
469 
470 	mutex_enter(&azn->azn_mutex);
471 	df = amdzen_df_find(azn, dfno);
472 	if (df == NULL) {
473 		mutex_exit(&azn->azn_mutex);
474 		return (ENOENT);
475 	}
476 
477 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
478 		mutex_exit(&azn->azn_mutex);
479 		return (ENXIO);
480 	}
481 
482 	*valp = amdzen_smn_read(azn, df, reg);
483 	mutex_exit(&azn->azn_mutex);
484 	return (0);
485 }
486 
487 int
amdzen_c_smn_write(uint_t dfno,const smn_reg_t reg,const uint32_t val)488 amdzen_c_smn_write(uint_t dfno, const smn_reg_t reg, const uint32_t val)
489 {
490 	amdzen_df_t *df;
491 	amdzen_t *azn = amdzen_data;
492 
493 	if (!SMN_REG_SIZE_IS_VALID(reg))
494 		return (EINVAL);
495 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
496 		return (EINVAL);
497 	if (!SMN_REG_VALUE_FITS(reg, val))
498 		return (EOVERFLOW);
499 
500 	mutex_enter(&azn->azn_mutex);
501 	df = amdzen_df_find(azn, dfno);
502 	if (df == NULL) {
503 		mutex_exit(&azn->azn_mutex);
504 		return (ENOENT);
505 	}
506 
507 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
508 		mutex_exit(&azn->azn_mutex);
509 		return (ENXIO);
510 	}
511 
512 	amdzen_smn_write(azn, df, reg, val);
513 	mutex_exit(&azn->azn_mutex);
514 	return (0);
515 }
516 
517 uint_t
amdzen_c_df_count(void)518 amdzen_c_df_count(void)
519 {
520 	uint_t ret;
521 	amdzen_t *azn = amdzen_data;
522 
523 	mutex_enter(&azn->azn_mutex);
524 	ret = azn->azn_ndfs;
525 	mutex_exit(&azn->azn_mutex);
526 	return (ret);
527 }
528 
529 df_rev_t
amdzen_c_df_rev(void)530 amdzen_c_df_rev(void)
531 {
532 	amdzen_df_t *df;
533 	amdzen_t *azn = amdzen_data;
534 	df_rev_t rev;
535 
536 	/*
537 	 * Always use the first DF instance to determine what we're using. Our
538 	 * current assumption, which seems to generally be true, is that the
539 	 * given DF revisions are the same in a given system when the DFs are
540 	 * directly connected.
541 	 */
542 	mutex_enter(&azn->azn_mutex);
543 	df = amdzen_df_find(azn, 0);
544 	if (df == NULL) {
545 		rev = DF_REV_UNKNOWN;
546 	} else {
547 		rev = df->adf_rev;
548 	}
549 	mutex_exit(&azn->azn_mutex);
550 
551 	return (rev);
552 }
553 
554 int
amdzen_c_df_read32(uint_t dfno,uint8_t inst,const df_reg_def_t def,uint32_t * valp)555 amdzen_c_df_read32(uint_t dfno, uint8_t inst, const df_reg_def_t def,
556     uint32_t *valp)
557 {
558 	amdzen_df_t *df;
559 	amdzen_t *azn = amdzen_data;
560 
561 	mutex_enter(&azn->azn_mutex);
562 	df = amdzen_df_find(azn, dfno);
563 	if (df == NULL) {
564 		mutex_exit(&azn->azn_mutex);
565 		return (ENOENT);
566 	}
567 
568 	if (df->adf_rev == DF_REV_UNKNOWN) {
569 		mutex_exit(&azn->azn_mutex);
570 		return (ENOTSUP);
571 	}
572 
573 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_FALSE);
574 	mutex_exit(&azn->azn_mutex);
575 
576 	return (0);
577 }
578 
579 int
amdzen_c_df_read64(uint_t dfno,uint8_t inst,const df_reg_def_t def,uint64_t * valp)580 amdzen_c_df_read64(uint_t dfno, uint8_t inst, const df_reg_def_t def,
581     uint64_t *valp)
582 {
583 	amdzen_df_t *df;
584 	amdzen_t *azn = amdzen_data;
585 
586 	mutex_enter(&azn->azn_mutex);
587 	df = amdzen_df_find(azn, dfno);
588 	if (df == NULL) {
589 		mutex_exit(&azn->azn_mutex);
590 		return (ENOENT);
591 	}
592 
593 	if (df->adf_rev == DF_REV_UNKNOWN) {
594 		mutex_exit(&azn->azn_mutex);
595 		return (ENOTSUP);
596 	}
597 
598 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_TRUE);
599 	mutex_exit(&azn->azn_mutex);
600 
601 	return (0);
602 }
603 
604 int
amdzen_c_df_iter(uint_t dfno,zen_df_type_t type,amdzen_c_iter_f func,void * arg)605 amdzen_c_df_iter(uint_t dfno, zen_df_type_t type, amdzen_c_iter_f func,
606     void *arg)
607 {
608 	amdzen_df_t *df;
609 	amdzen_t *azn = amdzen_data;
610 	df_type_t df_type;
611 	uint8_t df_subtype;
612 
613 	/*
614 	 * Unlike other calls here, we hold our lock only to find the DF here.
615 	 * The main reason for this is the nature of the callback function.
616 	 * Folks are iterating over instances so they can call back into us. If
617 	 * you look at the locking statement, the thing that is most volatile
618 	 * right here and what we need to protect is the DF itself and
619 	 * subsequent register accesses to it. The actual data about which
620 	 * entities exist is static and so once we have found a DF we should
621 	 * hopefully be in good shape as they only come, but don't go.
622 	 */
623 	mutex_enter(&azn->azn_mutex);
624 	df = amdzen_df_find(azn, dfno);
625 	if (df == NULL) {
626 		mutex_exit(&azn->azn_mutex);
627 		return (ENOENT);
628 	}
629 	mutex_exit(&azn->azn_mutex);
630 
631 	switch (type) {
632 	case ZEN_DF_TYPE_CS_UMC:
633 		df_type = DF_TYPE_CS;
634 		/*
635 		 * In the original Zeppelin DFv2 die there was no subtype field
636 		 * used for the CS. The UMC is the only type and has a subtype
637 		 * of zero.
638 		 */
639 		if (df->adf_rev != DF_REV_2) {
640 			df_subtype = DF_CS_SUBTYPE_UMC;
641 		} else {
642 			df_subtype = 0;
643 		}
644 		break;
645 	case ZEN_DF_TYPE_CCM_CPU:
646 		df_type = DF_TYPE_CCM;
647 
648 		if (df->adf_rev >= DF_REV_4 && amdzen_df_at_least(df, 4, 1)) {
649 			df_subtype = DF_CCM_SUBTYPE_CPU_V4P1;
650 		} else {
651 			df_subtype = DF_CCM_SUBTYPE_CPU_V2;
652 		}
653 		break;
654 	default:
655 		return (EINVAL);
656 	}
657 
658 	for (uint_t i = 0; i < df->adf_nents; i++) {
659 		amdzen_df_ent_t *ent = &df->adf_ents[i];
660 
661 		/*
662 		 * Some DF components are not considered enabled and therefore
663 		 * will end up having bogus values in their ID fields. If we do
664 		 * not have an enable flag set, we must skip this node.
665 		 */
666 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
667 			continue;
668 
669 		if (ent->adfe_type == df_type &&
670 		    ent->adfe_subtype == df_subtype) {
671 			int ret = func(dfno, ent->adfe_fabric_id,
672 			    ent->adfe_inst_id, arg);
673 			if (ret != 0) {
674 				return (ret);
675 			}
676 		}
677 	}
678 
679 	return (0);
680 }
681 
682 int
amdzen_c_df_fabric_decomp(df_fabric_decomp_t * decomp)683 amdzen_c_df_fabric_decomp(df_fabric_decomp_t *decomp)
684 {
685 	const amdzen_df_t *df;
686 	amdzen_t *azn = amdzen_data;
687 
688 	mutex_enter(&azn->azn_mutex);
689 	df = amdzen_df_find(azn, 0);
690 	if (df == NULL) {
691 		mutex_exit(&azn->azn_mutex);
692 		return (ENOENT);
693 	}
694 
695 	*decomp = df->adf_decomp;
696 	mutex_exit(&azn->azn_mutex);
697 	return (0);
698 }
699 
700 static boolean_t
amdzen_create_child(amdzen_t * azn,const amdzen_child_data_t * acd)701 amdzen_create_child(amdzen_t *azn, const amdzen_child_data_t *acd)
702 {
703 	int ret;
704 	dev_info_t *child;
705 
706 	if (ndi_devi_alloc(azn->azn_dip, acd->acd_name,
707 	    (pnode_t)DEVI_SID_NODEID, &child) != NDI_SUCCESS) {
708 		dev_err(azn->azn_dip, CE_WARN, "!failed to allocate child "
709 		    "dip for %s", acd->acd_name);
710 		return (B_FALSE);
711 	}
712 
713 	ddi_set_parent_data(child, (void *)acd);
714 	if ((ret = ndi_devi_online(child, 0)) != NDI_SUCCESS) {
715 		if (acd->acd_warn) {
716 			dev_err(azn->azn_dip, CE_WARN, "!failed to online "
717 			    "child dip %s: %d", acd->acd_name, ret);
718 		}
719 		return (B_FALSE);
720 	}
721 
722 	return (B_TRUE);
723 }
724 
725 static boolean_t
amdzen_map_dfs(amdzen_t * azn)726 amdzen_map_dfs(amdzen_t *azn)
727 {
728 	amdzen_stub_t *stub;
729 
730 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
731 
732 	for (stub = list_head(&azn->azn_df_stubs); stub != NULL;
733 	    stub = list_next(&azn->azn_df_stubs, stub)) {
734 		amdzen_df_t *df;
735 		uint_t dfno;
736 
737 		dfno = stub->azns_dev - AMDZEN_DF_FIRST_DEVICE;
738 		if (dfno > AMDZEN_MAX_DFS) {
739 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
740 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
741 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
742 			goto err;
743 		}
744 
745 		df = &azn->azn_dfs[dfno];
746 
747 		if (stub->azns_func >= AMDZEN_MAX_DF_FUNCS) {
748 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
749 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
750 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
751 			goto err;
752 		}
753 
754 		if (df->adf_funcs[stub->azns_func] != NULL) {
755 			dev_err(stub->azns_dip, CE_WARN, "encountered "
756 			    "duplicate df device with DF PCI b/d/f: 0x%x/%x/%x",
757 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
758 			goto err;
759 		}
760 		df->adf_funcs[stub->azns_func] = stub;
761 	}
762 
763 	return (B_TRUE);
764 
765 err:
766 	azn->azn_flags |= AMDZEN_F_DEVICE_ERROR;
767 	return (B_FALSE);
768 }
769 
770 static boolean_t
amdzen_check_dfs(amdzen_t * azn)771 amdzen_check_dfs(amdzen_t *azn)
772 {
773 	uint_t i;
774 	boolean_t ret = B_TRUE;
775 
776 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
777 		amdzen_df_t *df = &azn->azn_dfs[i];
778 		uint_t count = 0;
779 
780 		/*
781 		 * We require all platforms to have DFs functions 0-6. Not all
782 		 * platforms have DF function 7.
783 		 */
784 		for (uint_t func = 0; func < AMDZEN_MAX_DF_FUNCS - 1; func++) {
785 			if (df->adf_funcs[func] != NULL) {
786 				count++;
787 			}
788 		}
789 
790 		if (count == 0)
791 			continue;
792 
793 		if (count != 7) {
794 			ret = B_FALSE;
795 			dev_err(azn->azn_dip, CE_WARN, "df %u devices "
796 			    "incomplete", i);
797 		} else {
798 			df->adf_flags |= AMDZEN_DF_F_VALID;
799 			azn->azn_ndfs++;
800 		}
801 	}
802 
803 	return (ret);
804 }
805 
806 static const uint8_t amdzen_df_rome_ids[0x2b] = {
807 	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23,
808 	24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
809 	44, 45, 46, 47, 48
810 };
811 
812 /*
813  * Check the first df entry to see if it belongs to Rome or Milan. If so, then
814  * it uses the disjoint ID space.
815  */
816 static boolean_t
amdzen_is_rome_style(uint_t id)817 amdzen_is_rome_style(uint_t id)
818 {
819 	return (id == 0x1490 || id == 0x1650);
820 }
821 
822 /*
823  * Deal with the differences between between how a CCM subtype is indicated
824  * across CPU generations.
825  */
826 static boolean_t
amdzen_dfe_is_ccm(const amdzen_df_t * df,const amdzen_df_ent_t * ent)827 amdzen_dfe_is_ccm(const amdzen_df_t *df, const amdzen_df_ent_t *ent)
828 {
829 	if (ent->adfe_type != DF_TYPE_CCM) {
830 		return (B_FALSE);
831 	}
832 
833 	if (df->adf_rev >= DF_REV_4 && amdzen_df_at_least(df, 4, 1)) {
834 		return (ent->adfe_subtype == DF_CCM_SUBTYPE_CPU_V4P1);
835 	} else {
836 		return (ent->adfe_subtype == DF_CCM_SUBTYPE_CPU_V2);
837 	}
838 }
839 
840 /*
841  * To be able to do most other things we want to do, we must first determine
842  * what revision of the DF (data fabric) that we're using.
843  *
844  * Snapshot the df version. This was added explicitly in DFv4.0, around the Zen
845  * 4 timeframe and allows us to tell apart different version of the DF register
846  * set, most usefully when various subtypes were added.
847  *
848  * Older versions can theoretically be told apart based on usage of reserved
849  * registers. We walk these in the following order, starting with the newest rev
850  * and walking backwards to tell things apart:
851  *
852  *   o v3.5 -> Check function 1, register 0x150. This was reserved prior
853  *             to this point. This is actually DF_FIDMASK0_V3P5. We are supposed
854  *             to check bits [7:0].
855  *
856  *   o v3.0 -> Check function 1, register 0x208. The low byte (7:0) was
857  *             changed to indicate a component mask. This is non-zero
858  *             in the 3.0 generation. This is actually DF_FIDMASK_V2.
859  *
860  *   o v2.0 -> This is just the not that case. Presumably v1 wasn't part
861  *             of the Zen generation.
862  *
863  * Because we don't know what version we are yet, we do not use the normal
864  * versioned register accesses which would check what DF version we are and
865  * would want to use the normal indirect register accesses (which also require
866  * us to know the version). We instead do direct broadcast reads.
867  */
868 static void
amdzen_determine_df_vers(amdzen_t * azn,amdzen_df_t * df)869 amdzen_determine_df_vers(amdzen_t *azn, amdzen_df_t *df)
870 {
871 	uint32_t val;
872 	df_reg_def_t rd = DF_FBICNT;
873 
874 	val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
875 	df->adf_major = DF_FBICNT_V4_GET_MAJOR(val);
876 	df->adf_minor = DF_FBICNT_V4_GET_MINOR(val);
877 	if (df->adf_major == 0 && df->adf_minor == 0) {
878 		rd = DF_FIDMASK0_V3P5;
879 		val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
880 		if (bitx32(val, 7, 0) != 0) {
881 			df->adf_major = 3;
882 			df->adf_minor = 5;
883 			df->adf_rev = DF_REV_3P5;
884 		} else {
885 			rd = DF_FIDMASK_V2;
886 			val = amdzen_stub_get32(df->adf_funcs[rd.drd_func],
887 			    rd.drd_reg);
888 			if (bitx32(val, 7, 0) != 0) {
889 				df->adf_major = 3;
890 				df->adf_minor = 0;
891 				df->adf_rev = DF_REV_3;
892 			} else {
893 				df->adf_major = 2;
894 				df->adf_minor = 0;
895 				df->adf_rev = DF_REV_2;
896 			}
897 		}
898 	} else if (df->adf_major == 4 && df->adf_minor >= 2) {
899 		/*
900 		 * These are devices that have the newer memory layout that
901 		 * moves the DF::DramBaseAddress to 0x200. Please see the df.h
902 		 * theory statement for more information.
903 		 */
904 		df->adf_rev = DF_REV_4D2;
905 	} else if (df->adf_major == 4) {
906 		df->adf_rev = DF_REV_4;
907 	} else {
908 		df->adf_rev = DF_REV_UNKNOWN;
909 	}
910 }
911 
912 /*
913  * All of the different versions of the DF have different ways of getting at and
914  * answering the question of how do I break a fabric ID into a corresponding
915  * socket, die, and component. Importantly the goal here is to obtain, cache,
916  * and normalize:
917  *
918  *  o The DF System Configuration
919  *  o The various Mask registers
920  *  o The Node ID
921  */
922 static void
amdzen_determine_fabric_decomp(amdzen_t * azn,amdzen_df_t * df)923 amdzen_determine_fabric_decomp(amdzen_t *azn, amdzen_df_t *df)
924 {
925 	uint32_t mask;
926 	df_fabric_decomp_t *decomp = &df->adf_decomp;
927 
928 	switch (df->adf_rev) {
929 	case DF_REV_2:
930 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V2);
931 		switch (DF_SYSCFG_V2_GET_MY_TYPE(df->adf_syscfg)) {
932 		case DF_DIE_TYPE_CPU:
933 			mask = amdzen_df_read32_bcast(azn, df,
934 			    DF_DIEMASK_CPU_V2);
935 			break;
936 		case DF_DIE_TYPE_APU:
937 			mask = amdzen_df_read32_bcast(azn, df,
938 			    DF_DIEMASK_APU_V2);
939 			break;
940 		default:
941 			panic("DF thinks we're not on a CPU!");
942 		}
943 		df->adf_mask0 = mask;
944 
945 		/*
946 		 * DFv2 is a bit different in how the fabric mask register is
947 		 * phrased. Logically a fabric ID is broken into something that
948 		 * uniquely identifies a "node" (a particular die on a socket)
949 		 * and something that identifies a "component", e.g. a memory
950 		 * controller.
951 		 *
952 		 * Starting with DFv3, these registers logically called out how
953 		 * to separate the fabric ID first into a node and a component.
954 		 * Then the node was then broken down into a socket and die. In
955 		 * DFv2, there is no separate mask and shift of a node. Instead
956 		 * the socket and die are absolute offsets into the fabric ID
957 		 * rather than relative offsets into the node ID. As such, when
958 		 * we encounter DFv2, we fake up a node mask and shift and make
959 		 * it look like DFv3+.
960 		 */
961 		decomp->dfd_node_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) |
962 		    DF_DIEMASK_V2_GET_DIE_MASK(mask);
963 		decomp->dfd_node_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask);
964 		decomp->dfd_comp_mask = DF_DIEMASK_V2_GET_COMP_MASK(mask);
965 		decomp->dfd_comp_shift = 0;
966 
967 		decomp->dfd_sock_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) >>
968 		    decomp->dfd_node_shift;
969 		decomp->dfd_die_mask = DF_DIEMASK_V2_GET_DIE_MASK(mask) >>
970 		    decomp->dfd_node_shift;
971 		decomp->dfd_sock_shift = DF_DIEMASK_V2_GET_SOCK_SHIFT(mask) -
972 		    decomp->dfd_node_shift;
973 		decomp->dfd_die_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask) -
974 		    decomp->dfd_node_shift;
975 		ASSERT3U(decomp->dfd_die_shift, ==, 0);
976 
977 		/*
978 		 * There is no register in the actual data fabric with the node
979 		 * ID in DFv2 that we have found. Instead we take the first
980 		 * entity's fabric ID and transform it into the node id.
981 		 */
982 		df->adf_nodeid = (df->adf_ents[0].adfe_fabric_id &
983 		    decomp->dfd_node_mask) >> decomp->dfd_node_shift;
984 		break;
985 	case DF_REV_3:
986 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V3);
987 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
988 		    DF_FIDMASK0_V3);
989 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
990 		    DF_FIDMASK1_V3);
991 
992 		decomp->dfd_sock_mask =
993 		    DF_FIDMASK1_V3_GET_SOCK_MASK(df->adf_mask1);
994 		decomp->dfd_sock_shift =
995 		    DF_FIDMASK1_V3_GET_SOCK_SHIFT(df->adf_mask1);
996 		decomp->dfd_die_mask =
997 		    DF_FIDMASK1_V3_GET_DIE_MASK(df->adf_mask1);
998 		decomp->dfd_die_shift = 0;
999 		decomp->dfd_node_mask =
1000 		    DF_FIDMASK0_V3_GET_NODE_MASK(df->adf_mask0);
1001 		decomp->dfd_node_shift =
1002 		    DF_FIDMASK1_V3_GET_NODE_SHIFT(df->adf_mask1);
1003 		decomp->dfd_comp_mask =
1004 		    DF_FIDMASK0_V3_GET_COMP_MASK(df->adf_mask0);
1005 		decomp->dfd_comp_shift = 0;
1006 
1007 		df->adf_nodeid = DF_SYSCFG_V3_GET_NODE_ID(df->adf_syscfg);
1008 		break;
1009 	case DF_REV_3P5:
1010 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df,
1011 		    DF_SYSCFG_V3P5);
1012 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1013 		    DF_FIDMASK0_V3P5);
1014 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1015 		    DF_FIDMASK1_V3P5);
1016 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
1017 		    DF_FIDMASK2_V3P5);
1018 
1019 		decomp->dfd_sock_mask =
1020 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
1021 		decomp->dfd_sock_shift =
1022 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
1023 		decomp->dfd_die_mask =
1024 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
1025 		decomp->dfd_die_shift = 0;
1026 		decomp->dfd_node_mask =
1027 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
1028 		decomp->dfd_node_shift =
1029 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
1030 		decomp->dfd_comp_mask =
1031 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
1032 		decomp->dfd_comp_shift = 0;
1033 
1034 		df->adf_nodeid = DF_SYSCFG_V3P5_GET_NODE_ID(df->adf_syscfg);
1035 		break;
1036 	case DF_REV_4:
1037 	case DF_REV_4D2:
1038 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V4);
1039 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1040 		    DF_FIDMASK0_V4);
1041 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1042 		    DF_FIDMASK1_V4);
1043 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
1044 		    DF_FIDMASK2_V4);
1045 
1046 		/*
1047 		 * The DFv4 registers are at a different location in the DF;
1048 		 * however, the actual layout of fields is the same as DFv3.5.
1049 		 * This is why you see V3P5 below.
1050 		 */
1051 		decomp->dfd_sock_mask =
1052 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
1053 		decomp->dfd_sock_shift =
1054 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
1055 		decomp->dfd_die_mask =
1056 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
1057 		decomp->dfd_die_shift = 0;
1058 		decomp->dfd_node_mask =
1059 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
1060 		decomp->dfd_node_shift =
1061 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
1062 		decomp->dfd_comp_mask =
1063 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
1064 		decomp->dfd_comp_shift = 0;
1065 
1066 		df->adf_nodeid = DF_SYSCFG_V4_GET_NODE_ID(df->adf_syscfg);
1067 		break;
1068 	default:
1069 		panic("encountered suspicious, previously rejected DF "
1070 		    "rev: 0x%x", df->adf_rev);
1071 	}
1072 }
1073 
1074 /*
1075  * The purpose of this function is to map CCMs to the corresponding CCDs that
1076  * exist. This is not an obvious thing as there is no direct mapping in the data
1077  * fabric between these IDs.
1078  *
1079  * Prior to DFv4, a given CCM was only ever connected to at most one CCD.
1080  * Starting in DFv4 a given CCM may have one or two SDP (scalable data ports)
1081  * that connect to CCDs. These may be connected to the same CCD or a different
1082  * one. When both ports are enabled we must check whether or not the port is
1083  * considered to be in wide mode. When wide mode is enabled then the two ports
1084  * are connected to a single CCD. If wide mode is disabled then the two ports
1085  * are connected to separate CCDs.
1086  *
1087  * The physical number of a CCD, which is how we determine the SMN aperture to
1088  * use, is based on the CCM ID. In most sockets we have seen up to a maximum of
1089  * 8 CCMs. When a CCM is connected to more than one CCD we have determined based
1090  * on some hints from AMD's ACPI information that the numbering is assumed to be
1091  * that CCM's number plus the total number of CCMs.
1092  *
1093  * More concretely, the SP5 Genoa/Bergamo Zen 4 platform has 8 CCMs. When there
1094  * are more than 8 CCDs installed then CCM 0 maps to CCDs 0 and 8. CCM 1 to CCDs
1095  * 1 and 9, etc. CCMs 4-7 map 1:1 to CCDs 4-7. However, the placement of CCDs
1096  * within the package has changed across generations.
1097  *
1098  * Notably in Rome and Milan (Zen 2/3) it appears that each quadrant had an
1099  * increasing number of CCDs. So CCDs 0/1 were together, 2/3, 4/5, and 6/7. This
1100  * meant that in cases where only a subset of CCDs were populated it'd forcibly
1101  * disable the higher CCD in a group (but with DFv3 the CCM would still be
1102  * enabled). So a 4 CCD config would generally enable CCDs 0, 2, 4, and 6 say.
1103  * This was almost certainly done to balance the NUMA config.
1104  *
1105  * Instead, starting in Genoa (Zen 4) the CCMs are round-robined around the
1106  * quadrants so CCMs (CCDs) 0 (0/8) and 4 (4) are together, 1 (1/9) and 5 (5),
1107  * etc. This is also why we more often see disabled CCMs in Genoa, but not in
1108  * Rome/Milan.
1109  *
1110  * When we're operating in wide mode and therefore both SDPs are connected to a
1111  * single CCD, we've always found that the lower CCD index will be used by the
1112  * system and the higher one is not considered present. Therefore, when
1113  * operating in wide mode, we need to make sure that whenever we have a non-zero
1114  * value for SDPs being connected that we rewrite this to only appear as a
1115  * single CCD is present. It's conceivable (though hard to imagine) that we
1116  * could get a value of 0b10 indicating that only the upper SDP link is active
1117  * for some reason.
1118  */
1119 static void
amdzen_setup_df_ccm(amdzen_t * azn,amdzen_df_t * df,amdzen_df_ent_t * dfe,uint32_t ccmno)1120 amdzen_setup_df_ccm(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *dfe,
1121     uint32_t ccmno)
1122 {
1123 	amdzen_ccm_data_t *ccm = &dfe->adfe_data.aded_ccm;
1124 	uint32_t ccd_en;
1125 	boolean_t wide_en;
1126 
1127 	if (df->adf_rev >= DF_REV_4) {
1128 		uint32_t val = amdzen_df_read32(azn, df, dfe->adfe_inst_id,
1129 		    DF_CCD_EN_V4);
1130 		ccd_en = DF_CCD_EN_V4_GET_CCD_EN(val);
1131 
1132 		if (df->adf_rev == DF_REV_4D2) {
1133 			wide_en = DF_CCD_EN_V4D2_GET_WIDE_EN(val);
1134 		} else {
1135 			val = amdzen_df_read32(azn, df, dfe->adfe_inst_id,
1136 			    DF_CCMCFG4_V4);
1137 			wide_en = DF_CCMCFG4_V4_GET_WIDE_EN(val);
1138 		}
1139 
1140 		if (wide_en != 0 && ccd_en != 0) {
1141 			ccd_en = 0x1;
1142 		}
1143 	} else {
1144 		ccd_en = 0x1;
1145 	}
1146 
1147 	for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) {
1148 		ccm->acd_ccd_en[i] = (ccd_en & (1 << i)) != 0;
1149 		if (ccm->acd_ccd_en[i] == 0)
1150 			continue;
1151 		ccm->acd_ccd_id[i] = ccmno + i * df->adf_nccm;
1152 		ccm->acd_nccds++;
1153 	}
1154 }
1155 
1156 /*
1157  * Initialize our knowledge about a given series of nodes on the data fabric.
1158  */
1159 static void
amdzen_setup_df(amdzen_t * azn,amdzen_df_t * df)1160 amdzen_setup_df(amdzen_t *azn, amdzen_df_t *df)
1161 {
1162 	uint_t i;
1163 	uint32_t val, ccmno;
1164 
1165 	amdzen_determine_df_vers(azn, df);
1166 
1167 	switch (df->adf_rev) {
1168 	case DF_REV_2:
1169 	case DF_REV_3:
1170 	case DF_REV_3P5:
1171 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V2);
1172 		break;
1173 	case DF_REV_4:
1174 	case DF_REV_4D2:
1175 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V4);
1176 		break;
1177 	default:
1178 		dev_err(azn->azn_dip, CE_WARN, "encountered unsupported DF "
1179 		    "revision: 0x%x", df->adf_rev);
1180 		return;
1181 	}
1182 	df->adf_nb_busno = DF_CFG_ADDR_CTL_GET_BUS_NUM(val);
1183 	val = amdzen_df_read32_bcast(azn, df, DF_FBICNT);
1184 	df->adf_nents = DF_FBICNT_GET_COUNT(val);
1185 	if (df->adf_nents == 0)
1186 		return;
1187 	df->adf_ents = kmem_zalloc(sizeof (amdzen_df_ent_t) * df->adf_nents,
1188 	    KM_SLEEP);
1189 
1190 	for (i = 0; i < df->adf_nents; i++) {
1191 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
1192 		uint8_t inst = i;
1193 
1194 		/*
1195 		 * Unfortunately, Rome uses a discontinuous instance ID pattern
1196 		 * while everything else we can find uses a contiguous instance
1197 		 * ID pattern. This means that for Rome, we need to adjust the
1198 		 * indexes that we iterate over, though the total number of
1199 		 * entries is right. This was carried over into Milan, but not
1200 		 * Genoa.
1201 		 */
1202 		if (amdzen_is_rome_style(df->adf_funcs[0]->azns_did)) {
1203 			if (inst >= ARRAY_SIZE(amdzen_df_rome_ids)) {
1204 				dev_err(azn->azn_dip, CE_WARN, "Rome family "
1205 				    "processor reported more ids than the PPR, "
1206 				    "resetting %u to instance zero", inst);
1207 				inst = 0;
1208 			} else {
1209 				inst = amdzen_df_rome_ids[inst];
1210 			}
1211 		}
1212 
1213 		dfe->adfe_drvid = inst;
1214 		dfe->adfe_info0 = amdzen_df_read32(azn, df, inst, DF_FBIINFO0);
1215 		if (df->adf_rev <= DF_REV_4) {
1216 			dfe->adfe_info1 = amdzen_df_read32(azn, df, inst,
1217 			    DF_FBIINFO1);
1218 			dfe->adfe_info2 = amdzen_df_read32(azn, df, inst,
1219 			    DF_FBIINFO2);
1220 		}
1221 		dfe->adfe_info3 = amdzen_df_read32(azn, df, inst, DF_FBIINFO3);
1222 
1223 		dfe->adfe_type = DF_FBIINFO0_GET_TYPE(dfe->adfe_info0);
1224 		dfe->adfe_subtype = DF_FBIINFO0_GET_SUBTYPE(dfe->adfe_info0);
1225 
1226 		/*
1227 		 * The enabled flag was not present in Zen 1. Simulate it by
1228 		 * checking for a non-zero register instead.
1229 		 */
1230 		if (DF_FBIINFO0_V3_GET_ENABLED(dfe->adfe_info0) ||
1231 		    (df->adf_rev == DF_REV_2 && dfe->adfe_info0 != 0)) {
1232 			dfe->adfe_flags |= AMDZEN_DFE_F_ENABLED;
1233 		}
1234 		if (DF_FBIINFO0_GET_HAS_MCA(dfe->adfe_info0)) {
1235 			dfe->adfe_flags |= AMDZEN_DFE_F_MCA;
1236 		}
1237 
1238 		/*
1239 		 * Starting with DFv4 there is no instance ID in the fabric info
1240 		 * 3 register, so we instead grab it out of the driver ID which
1241 		 * is what it should be anyways.
1242 		 */
1243 		if (df->adf_rev >= DF_REV_4) {
1244 			dfe->adfe_inst_id = dfe->adfe_drvid;
1245 		} else {
1246 			dfe->adfe_inst_id =
1247 			    DF_FBIINFO3_GET_INSTID(dfe->adfe_info3);
1248 		}
1249 
1250 		switch (df->adf_rev) {
1251 		case DF_REV_2:
1252 			dfe->adfe_fabric_id =
1253 			    DF_FBIINFO3_V2_GET_BLOCKID(dfe->adfe_info3);
1254 			break;
1255 		case DF_REV_3:
1256 			dfe->adfe_fabric_id =
1257 			    DF_FBIINFO3_V3_GET_BLOCKID(dfe->adfe_info3);
1258 			break;
1259 		case DF_REV_3P5:
1260 			dfe->adfe_fabric_id =
1261 			    DF_FBIINFO3_V3P5_GET_BLOCKID(dfe->adfe_info3);
1262 			break;
1263 		case DF_REV_4:
1264 		case DF_REV_4D2:
1265 			dfe->adfe_fabric_id =
1266 			    DF_FBIINFO3_V4_GET_BLOCKID(dfe->adfe_info3);
1267 			break;
1268 		default:
1269 			panic("encountered suspicious, previously rejected DF "
1270 			    "rev: 0x%x", df->adf_rev);
1271 		}
1272 
1273 		/*
1274 		 * Record information about a subset of DF entities that we've
1275 		 * found. Currently we're tracking this only for CCMs.
1276 		 */
1277 		if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1278 			continue;
1279 
1280 		if (amdzen_dfe_is_ccm(df, dfe)) {
1281 			df->adf_nccm++;
1282 		}
1283 	}
1284 
1285 	/*
1286 	 * Now that we have filled in all of our info, attempt to fill in
1287 	 * specific information about different types of instances.
1288 	 */
1289 	ccmno = 0;
1290 	for (uint_t i = 0; i < df->adf_nents; i++) {
1291 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
1292 
1293 		if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1294 			continue;
1295 
1296 		/*
1297 		 * Perform type and sub-type specific initialization. Currently
1298 		 * limited to CCMs.
1299 		 */
1300 		switch (dfe->adfe_type) {
1301 		case DF_TYPE_CCM:
1302 			amdzen_setup_df_ccm(azn, df, dfe, ccmno);
1303 			ccmno++;
1304 			break;
1305 		default:
1306 			break;
1307 		}
1308 	}
1309 
1310 	amdzen_determine_fabric_decomp(azn, df);
1311 }
1312 
1313 static void
amdzen_find_nb(amdzen_t * azn,amdzen_df_t * df)1314 amdzen_find_nb(amdzen_t *azn, amdzen_df_t *df)
1315 {
1316 	amdzen_stub_t *stub;
1317 
1318 	for (stub = list_head(&azn->azn_nb_stubs); stub != NULL;
1319 	    stub = list_next(&azn->azn_nb_stubs, stub)) {
1320 		if (stub->azns_bus == df->adf_nb_busno) {
1321 			df->adf_flags |= AMDZEN_DF_F_FOUND_NB;
1322 			df->adf_nb = stub;
1323 			return;
1324 		}
1325 	}
1326 }
1327 
1328 /*
1329  * We need to be careful using this function as different AMD generations have
1330  * acted in different ways when there is a missing CCD. We've found that in
1331  * hardware where the CCM is enabled but there is no CCD attached, it generally
1332  * is safe (i.e. DFv3 on Rome), but on DFv4 if we ask for a CCD that would
1333  * correspond to a disabled CCM then the firmware may inject a fatal error
1334  * (which is hopefully something missing in our RAS/MCA-X enablement).
1335  *
1336  * Put differently if this doesn't correspond to an Enabled CCM and you know the
1337  * number of valid CCDs on this, don't use it.
1338  */
1339 static boolean_t
amdzen_ccd_present(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1340 amdzen_ccd_present(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1341 {
1342 	smn_reg_t die_reg = SMUPWR_CCD_DIE_ID(ccdno);
1343 	uint32_t val = amdzen_smn_read(azn, df, die_reg);
1344 	if (val == SMN_EINVAL32) {
1345 		return (B_FALSE);
1346 	}
1347 
1348 	ASSERT3U(ccdno, ==, SMUPWR_CCD_DIE_ID_GET(val));
1349 	return (B_TRUE);
1350 }
1351 
1352 static uint32_t
amdzen_ccd_thread_en(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1353 amdzen_ccd_thread_en(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1354 {
1355 	smn_reg_t reg;
1356 
1357 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1358 		reg = L3SOC_THREAD_EN(ccdno);
1359 	} else {
1360 		reg = SMUPWR_THREAD_EN(ccdno);
1361 	}
1362 
1363 	return (amdzen_smn_read(azn, df, reg));
1364 }
1365 
1366 static uint32_t
amdzen_ccd_core_en(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1367 amdzen_ccd_core_en(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1368 {
1369 	smn_reg_t reg;
1370 
1371 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1372 		reg = L3SOC_CORE_EN(ccdno);
1373 	} else {
1374 		reg = SMUPWR_CORE_EN(ccdno);
1375 	}
1376 
1377 	return (amdzen_smn_read(azn, df, reg));
1378 }
1379 
1380 static void
amdzen_ccd_info(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno,uint32_t * nccxp,uint32_t * nlcorep,uint32_t * nthrp)1381 amdzen_ccd_info(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno, uint32_t *nccxp,
1382     uint32_t *nlcorep, uint32_t *nthrp)
1383 {
1384 	uint32_t nccx, nlcore, smt;
1385 
1386 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1387 		smn_reg_t reg = L3SOC_THREAD_CFG(ccdno);
1388 		uint32_t val = amdzen_smn_read(azn, df, reg);
1389 		nccx = L3SOC_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1;
1390 		nlcore = L3SOC_THREAD_CFG_GET_CORE_COUNT(val) + 1;
1391 		smt = L3SOC_THREAD_CFG_GET_SMT_MODE(val);
1392 	} else {
1393 		smn_reg_t reg = SMUPWR_THREAD_CFG(ccdno);
1394 		uint32_t val = amdzen_smn_read(azn, df, reg);
1395 		nccx = SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1;
1396 		nlcore = SMUPWR_THREAD_CFG_GET_CORE_COUNT(val) + 1;
1397 		smt = SMUPWR_THREAD_CFG_GET_SMT_MODE(val);
1398 	}
1399 
1400 	if (nccxp != NULL) {
1401 		*nccxp = nccx;
1402 	}
1403 
1404 	if (nlcorep != NULL) {
1405 		*nlcorep = nlcore;
1406 	}
1407 
1408 	if (nthrp != NULL) {
1409 		/* The L3::L3SOC and SMU::PWR values are the same here */
1410 		if (smt == SMUPWR_THREAD_CFG_SMT_MODE_SMT) {
1411 			*nthrp = 2;
1412 		} else {
1413 			*nthrp = 1;
1414 		}
1415 	}
1416 }
1417 
1418 static void
amdzen_initpkg_to_apic(amdzen_t * azn,const uint32_t pkg0,const uint32_t pkg7)1419 amdzen_initpkg_to_apic(amdzen_t *azn, const uint32_t pkg0, const uint32_t pkg7)
1420 {
1421 	uint32_t nsock, nccd, nccx, ncore, nthr, extccx;
1422 	uint32_t nsock_bits, nccd_bits, nccx_bits, ncore_bits, nthr_bits;
1423 	amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp;
1424 
1425 	/*
1426 	 * These are all 0 based values, meaning that we need to add one to each
1427 	 * of them. However, we skip this because to calculate the number of
1428 	 * bits to cover an entity we would subtract one.
1429 	 */
1430 	nthr = SCFCTP_PMREG_INITPKG0_GET_SMTEN(pkg0);
1431 	ncore = SCFCTP_PMREG_INITPKG7_GET_N_CORES(pkg7);
1432 	nccx = SCFCTP_PMREG_INITPKG7_GET_N_CCXS(pkg7);
1433 	nccd = SCFCTP_PMREG_INITPKG7_GET_N_DIES(pkg7);
1434 	nsock = SCFCTP_PMREG_INITPKG7_GET_N_SOCKETS(pkg7);
1435 
1436 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN4) {
1437 		extccx = SCFCTP_PMREG_INITPKG7_ZEN4_GET_16TAPIC(pkg7);
1438 	} else {
1439 		extccx = 0;
1440 	}
1441 
1442 	nthr_bits = highbit(nthr);
1443 	ncore_bits = highbit(ncore);
1444 	nccx_bits = highbit(nccx);
1445 	nccd_bits = highbit(nccd);
1446 	nsock_bits = highbit(nsock);
1447 
1448 	apic->aad_thread_shift = 0;
1449 	apic->aad_thread_mask = (1 << nthr_bits) - 1;
1450 
1451 	apic->aad_core_shift = nthr_bits;
1452 	if (ncore_bits > 0) {
1453 		apic->aad_core_mask = (1 << ncore_bits) - 1;
1454 		apic->aad_core_mask <<= apic->aad_core_shift;
1455 	} else {
1456 		apic->aad_core_mask = 0;
1457 	}
1458 
1459 	/*
1460 	 * The APIC_16T_MODE bit indicates that the total shift to start the CCX
1461 	 * should be at 4 bits if it's not. It doesn't mean that the CCX portion
1462 	 * of the value should take up four bits. In the common Genoa case,
1463 	 * nccx_bits will be zero.
1464 	 */
1465 	apic->aad_ccx_shift = apic->aad_core_shift + ncore_bits;
1466 	if (extccx != 0 && apic->aad_ccx_shift < 4) {
1467 		apic->aad_ccx_shift = 4;
1468 	}
1469 	if (nccx_bits > 0) {
1470 		apic->aad_ccx_mask = (1 << nccx_bits) - 1;
1471 		apic->aad_ccx_mask <<= apic->aad_ccx_shift;
1472 	} else {
1473 		apic->aad_ccx_mask = 0;
1474 	}
1475 
1476 	apic->aad_ccd_shift = apic->aad_ccx_shift + nccx_bits;
1477 	if (nccd_bits > 0) {
1478 		apic->aad_ccd_mask = (1 << nccd_bits) - 1;
1479 		apic->aad_ccd_mask <<= apic->aad_ccd_shift;
1480 	} else {
1481 		apic->aad_ccd_mask = 0;
1482 	}
1483 
1484 	apic->aad_sock_shift = apic->aad_ccd_shift + nccd_bits;
1485 	if (nsock_bits > 0) {
1486 		apic->aad_sock_mask = (1 << nsock_bits) - 1;
1487 		apic->aad_sock_mask <<= apic->aad_sock_shift;
1488 	} else {
1489 		apic->aad_sock_mask = 0;
1490 	}
1491 
1492 	/*
1493 	 * Currently all supported Zen 2+ platforms only have a single die per
1494 	 * socket as compared to Zen 1. So this is always kept at zero.
1495 	 */
1496 	apic->aad_die_mask = 0;
1497 	apic->aad_die_shift = 0;
1498 }
1499 
1500 /*
1501  * We would like to determine what the logical APIC decomposition is on Zen 3
1502  * and newer family parts. While there is information added to CPUID in the form
1503  * of leaf 8X26, that isn't present in Zen 3, so instead we go to what we
1504  * believe is the underlying source of the CPUID data.
1505  *
1506  * Fundamentally there are a series of registers in SMN space that relate to the
1507  * SCFCTP. Coincidentally, there is one of these for each core and there are a
1508  * pair of related SMN registers. L3::SCFCTP::PMREG_INITPKG0 contains
1509  * information about a given's core logical and physical IDs. More interestingly
1510  * for this particular case, L3::SCFCTP::PMREG_INITPKG7, contains the overall
1511  * total number of logical entities. We've been promised that this has to be
1512  * the same across the fabric. That's all well and good, but this begs the
1513  * question of how do we actually get there. The above is a core-specific
1514  * register and requires that we understand information about which CCDs and
1515  * CCXs are actually present.
1516  *
1517  * So we are starting with a data fabric that has some CCM present. The CCM
1518  * entries in the data fabric may be tagged with our ENABLED flag.
1519  * Unfortunately, that can be true regardless of whether or not it's actually
1520  * present or not. As a result, we go to another chunk of SMN space registers,
1521  * SMU::PWR. These contain information about the CCDs, the physical cores that
1522  * are enabled, and related. So we will first walk the DF entities and see if we
1523  * can read its SMN::PWR::CCD_DIE_ID. If we get back a value of all 1s then
1524  * there is nothing present. Otherwise, we should get back something that
1525  * matches information in the data fabric.
1526  *
1527  * With that in hand, we can read the SMU::PWR::CORE_ENABLE register to
1528  * determine which physical cores are enabled in the CCD/CCX. That will finally
1529  * give us an index to get to our friend INITPKG7.
1530  */
1531 static boolean_t
amdzen_determine_apic_decomp_initpkg(amdzen_t * azn)1532 amdzen_determine_apic_decomp_initpkg(amdzen_t *azn)
1533 {
1534 	amdzen_df_t *df = &azn->azn_dfs[0];
1535 	uint32_t ccdno = 0;
1536 
1537 	for (uint_t i = 0; i < df->adf_nents; i++) {
1538 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
1539 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1540 			continue;
1541 
1542 		if (amdzen_dfe_is_ccm(df, ent)) {
1543 			uint32_t val, nccx, pkg7, pkg0;
1544 			smn_reg_t pkg7_reg, pkg0_reg;
1545 			int core_bit;
1546 			uint8_t pccxno, pcoreno;
1547 
1548 			if (!amdzen_ccd_present(azn, df, ccdno)) {
1549 				ccdno++;
1550 				continue;
1551 			}
1552 
1553 			/*
1554 			 * This die actually exists. Switch over to the core
1555 			 * enable register to find one to ask about physically.
1556 			 */
1557 			amdzen_ccd_info(azn, df, ccdno, &nccx, NULL, NULL);
1558 			val = amdzen_ccd_core_en(azn, df, ccdno);
1559 			if (val == 0) {
1560 				ccdno++;
1561 				continue;
1562 			}
1563 
1564 			/*
1565 			 * There exists an enabled physical core. Find the first
1566 			 * index of it and map it to the corresponding CCD and
1567 			 * CCX. ddi_ffs is the bit index, but we want the
1568 			 * physical core number, hence the -1.
1569 			 */
1570 			core_bit = ddi_ffs(val);
1571 			ASSERT3S(core_bit, !=, 0);
1572 			pcoreno = core_bit - 1;
1573 
1574 			/*
1575 			 * Unfortunately SMU::PWR::THREAD_CONFIGURATION gives us
1576 			 * the Number of logical cores that are present in the
1577 			 * complex, not the total number of physical cores.
1578 			 * Right now we do assume that the physical and logical
1579 			 * ccx numbering is equivalent (we have no other way of
1580 			 * knowing if it is or isn't right now) and that we'd
1581 			 * always have CCX0 before CCX1. AMD seems to suggest we
1582 			 * can assume this, though it is a worrisome assumption.
1583 			 */
1584 			pccxno = pcoreno / azn->azn_ncore_per_ccx;
1585 			ASSERT3U(pccxno, <, nccx);
1586 			pkg7_reg = SCFCTP_PMREG_INITPKG7(ccdno, pccxno,
1587 			    pcoreno);
1588 			pkg7 = amdzen_smn_read(azn, df, pkg7_reg);
1589 			pkg0_reg = SCFCTP_PMREG_INITPKG0(ccdno, pccxno,
1590 			    pcoreno);
1591 			pkg0 = amdzen_smn_read(azn, df, pkg0_reg);
1592 			amdzen_initpkg_to_apic(azn, pkg0, pkg7);
1593 			return (B_TRUE);
1594 		}
1595 	}
1596 
1597 	return (B_FALSE);
1598 }
1599 
1600 /*
1601  * We have the fun job of trying to figure out what the correct form of the APIC
1602  * decomposition should be and how to break that into its logical components.
1603  * The way that we get at this is generation-specific unfortunately. Here's how
1604  * it works out:
1605  *
1606  * Zen 1-2	This era of CPUs are deceptively simple. The PPR for a given
1607  *		family defines exactly how the APIC ID is broken into logical
1608  *		components and it's fixed. That is, depending on whether or
1609  *		not SMT is enabled. Zen 1 and Zen 2 use different schemes for
1610  *		constructing this. The way that we're supposed to check if SMT
1611  *		is enabled is to use AMD leaf 8X1E and ask how many threads per
1612  *		core there are. We use the x86 feature set to determine that
1613  *		instead.
1614  *
1615  *		More specifically the Zen 1 scheme is 7 bits long. The bits have
1616  *		the following meanings.
1617  *
1618  *		[6]   Socket ID
1619  *		[5:4] Node ID
1620  *		[3]   Logical CCX ID
1621  *		With SMT		Without SMT
1622  *		[2:1] Logical Core ID	[2]   hardcoded to zero
1623  *		[0] Thread ID		[1:0] Logical Core ID
1624  *
1625  *		The following is the Zen 2 scheme assuming SMT. The Zen 2 scheme
1626  *		without SMT shifts everything to the right by one bit.
1627  *
1628  *		[7]   Socket ID
1629  *		[6:4] Logical CCD ID
1630  *		[3]   Logical CCX ID
1631  *		[2:1] Logical Core ID
1632  *		[0]   Thread ID
1633  *
1634  * Zen 3	Zen 3 CPUs moved past the fixed APIC ID format that Zen 1 and
1635  *		Zen 2 had, but also don't give us the nice way of discovering
1636  *		this via CPUID that Zen 4 did. The APIC ID id uses a given
1637  *		number of bits for each logical component that exists, but the
1638  *		exact number varies based on what's actually present. To get at
1639  *		this we use a piece of data that is embedded in the SCFCTP
1640  *		(Scalable Control Fabric, Clocks, Test, Power Gating). This can
1641  *		be used to determine how many logical entities of each kind the
1642  *		system thinks exist. While we could use the various CPUID
1643  *		topology items to try to speed this up, they don't tell us the
1644  *		die information that we need to do this.
1645  *
1646  * Zen 4+	Zen 4 introduced CPUID leaf 8000_0026h which gives us a means
1647  *		for determining how to extract the CCD, CCX, and related pieces
1648  *		out of the device. One thing we have to be aware of is that when
1649  *		the CCD and CCX shift are the same, that means that there is
1650  *		only a single CCX and therefore have to take that into account
1651  *		appropriately. This is the case generally on Zen 4 platforms,
1652  *		but not on Bergamo. Until we can confirm the actual CPUID leaf
1653  *		values that we receive in the cases of Bergamo and others, we
1654  *		opt instead to use the same SCFCTP scheme as Zen 3.
1655  */
1656 static boolean_t
amdzen_determine_apic_decomp(amdzen_t * azn)1657 amdzen_determine_apic_decomp(amdzen_t *azn)
1658 {
1659 	amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp;
1660 	boolean_t smt = is_x86_feature(x86_featureset, X86FSET_HTT);
1661 
1662 	switch (uarchrev_uarch(azn->azn_uarchrev)) {
1663 	case X86_UARCH_AMD_ZEN1:
1664 	case X86_UARCH_AMD_ZENPLUS:
1665 		apic->aad_sock_mask = 0x40;
1666 		apic->aad_sock_shift = 6;
1667 		apic->aad_die_mask = 0x30;
1668 		apic->aad_die_shift = 4;
1669 		apic->aad_ccd_mask = 0;
1670 		apic->aad_ccd_shift = 0;
1671 		apic->aad_ccx_mask = 0x08;
1672 		apic->aad_ccx_shift = 3;
1673 
1674 		if (smt) {
1675 			apic->aad_core_mask = 0x06;
1676 			apic->aad_core_shift = 1;
1677 			apic->aad_thread_mask = 0x1;
1678 			apic->aad_thread_shift = 0;
1679 		} else {
1680 			apic->aad_core_mask = 0x03;
1681 			apic->aad_core_shift = 0;
1682 			apic->aad_thread_mask = 0;
1683 			apic->aad_thread_shift = 0;
1684 		}
1685 		break;
1686 	case X86_UARCH_AMD_ZEN2:
1687 		if (smt) {
1688 			apic->aad_sock_mask = 0x80;
1689 			apic->aad_sock_shift = 7;
1690 			apic->aad_die_mask = 0;
1691 			apic->aad_die_shift = 0;
1692 			apic->aad_ccd_mask = 0x70;
1693 			apic->aad_ccd_shift = 4;
1694 			apic->aad_ccx_mask = 0x08;
1695 			apic->aad_ccx_shift = 3;
1696 			apic->aad_core_mask = 0x06;
1697 			apic->aad_core_shift = 1;
1698 			apic->aad_thread_mask = 0x01;
1699 			apic->aad_thread_shift = 0;
1700 		} else {
1701 			apic->aad_sock_mask = 0x40;
1702 			apic->aad_sock_shift = 6;
1703 			apic->aad_die_mask = 0;
1704 			apic->aad_die_shift = 0;
1705 			apic->aad_ccd_mask = 0x38;
1706 			apic->aad_ccd_shift = 3;
1707 			apic->aad_ccx_mask = 0x04;
1708 			apic->aad_ccx_shift = 2;
1709 			apic->aad_core_mask = 0x3;
1710 			apic->aad_core_shift = 0;
1711 			apic->aad_thread_mask = 0;
1712 			apic->aad_thread_shift = 0;
1713 		}
1714 		break;
1715 	case X86_UARCH_AMD_ZEN3:
1716 	case X86_UARCH_AMD_ZEN4:
1717 	case X86_UARCH_AMD_ZEN5:
1718 		return (amdzen_determine_apic_decomp_initpkg(azn));
1719 	default:
1720 		return (B_FALSE);
1721 	}
1722 	return (B_TRUE);
1723 }
1724 
1725 /*
1726  * Snapshot the number of cores that can exist in a CCX based on the Zen
1727  * microarchitecture revision. In Zen 1-4 this has been a constant number
1728  * regardless of the actual CPU Family. In Zen 5 this varies based upon whether
1729  * or not dense dies are being used.
1730  */
1731 static void
amdzen_determine_ncore_per_ccx(amdzen_t * azn)1732 amdzen_determine_ncore_per_ccx(amdzen_t *azn)
1733 {
1734 	switch (uarchrev_uarch(azn->azn_uarchrev)) {
1735 	case X86_UARCH_AMD_ZEN1:
1736 	case X86_UARCH_AMD_ZENPLUS:
1737 	case X86_UARCH_AMD_ZEN2:
1738 		azn->azn_ncore_per_ccx = 4;
1739 		break;
1740 	case X86_UARCH_AMD_ZEN3:
1741 	case X86_UARCH_AMD_ZEN4:
1742 		azn->azn_ncore_per_ccx = 8;
1743 		break;
1744 	case X86_UARCH_AMD_ZEN5:
1745 		if (chiprev_family(azn->azn_chiprev) ==
1746 		    X86_PF_AMD_DENSE_TURIN) {
1747 			azn->azn_ncore_per_ccx = 16;
1748 		} else {
1749 			azn->azn_ncore_per_ccx = 8;
1750 		}
1751 		break;
1752 	default:
1753 		panic("asked about non-Zen or unknown uarch");
1754 	}
1755 }
1756 
1757 /*
1758  * Attempt to determine a logical CCD number of a given CCD where we don't have
1759  * hardware support for L3::SCFCTP::PMREG_INITPKG* (e.g. pre-Zen 3 systems).
1760  * The CCD numbers that we have are the in the physical space. Likely because of
1761  * how the orientation of CCM numbers map to physical locations and the layout
1762  * of them within the package, we haven't found a good way using the core DFv3
1763  * registers to determine if a given CCD is actually present or not as generally
1764  * all the CCMs are left enabled. Instead we use SMU::PWR::DIE_ID as a proxy to
1765  * determine CCD presence.
1766  */
1767 static uint32_t
amdzen_ccd_log_id_zen2(amdzen_t * azn,amdzen_df_t * df,const amdzen_df_ent_t * targ)1768 amdzen_ccd_log_id_zen2(amdzen_t *azn, amdzen_df_t *df,
1769     const amdzen_df_ent_t *targ)
1770 {
1771 	uint32_t smnid = 0;
1772 	uint32_t logid = 0;
1773 
1774 	for (uint_t i = 0; i < df->adf_nents; i++) {
1775 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
1776 
1777 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) {
1778 			continue;
1779 		}
1780 
1781 		if (ent->adfe_inst_id == targ->adfe_inst_id) {
1782 			return (logid);
1783 		}
1784 
1785 		if (ent->adfe_type == targ->adfe_type &&
1786 		    ent->adfe_subtype == targ->adfe_subtype) {
1787 			boolean_t present = amdzen_ccd_present(azn, df, smnid);
1788 			smnid++;
1789 			if (present) {
1790 				logid++;
1791 			}
1792 		}
1793 	}
1794 
1795 	panic("asked to match against invalid DF entity %p in df %p", targ, df);
1796 }
1797 
1798 static void
amdzen_ccd_fill_core_initpkg0(amdzen_t * azn,amdzen_df_t * df,amdzen_topo_ccd_t * ccd,amdzen_topo_ccx_t * ccx,amdzen_topo_core_t * core,boolean_t * ccd_set,boolean_t * ccx_set)1799 amdzen_ccd_fill_core_initpkg0(amdzen_t *azn, amdzen_df_t *df,
1800     amdzen_topo_ccd_t *ccd, amdzen_topo_ccx_t *ccx, amdzen_topo_core_t *core,
1801     boolean_t *ccd_set, boolean_t *ccx_set)
1802 {
1803 	smn_reg_t pkg0_reg;
1804 	uint32_t pkg0;
1805 
1806 	pkg0_reg = SCFCTP_PMREG_INITPKG0(ccd->atccd_phys_no, ccx->atccx_phys_no,
1807 	    core->atcore_phys_no);
1808 	pkg0 = amdzen_smn_read(azn, df, pkg0_reg);
1809 	core->atcore_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CORE(pkg0);
1810 
1811 	if (!*ccx_set) {
1812 		ccx->atccx_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CCX(pkg0);
1813 		*ccx_set = B_TRUE;
1814 	}
1815 
1816 	if (!*ccd_set) {
1817 		ccd->atccd_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_DIE(pkg0);
1818 		*ccd_set = B_TRUE;
1819 	}
1820 }
1821 
1822 /*
1823  * Attempt to fill in the physical topology information for this given CCD.
1824  * There are a few steps to this that we undertake to perform this as follows:
1825  *
1826  * 1) First we determine whether the CCD is actually present or not by reading
1827  * SMU::PWR::DIE_ID. CCDs that are not installed will still have an enabled DF
1828  * entry it appears, but the request for the die ID will returns an invalid
1829  * read (all 1s). This die ID should match what we think of as the SMN number
1830  * below. If not, we're in trouble and the rest of this is in question.
1831  *
1832  * 2) We use the SMU::PWR registers to determine how many logical and physical
1833  * cores are present in this CCD and how they are split amongst the CCX. Here we
1834  * need to encode the CPU to CCX core size rankings. Through this process we
1835  * determine and fill out which threads and cores are enabled.
1836  *
1837  * 3) In Zen 3+ we then will read each core's INITPK0 values to ensure that we
1838  * have a proper physical to logical mapping, at which point we can fill in the
1839  * APIC IDs. For Zen 2, we will set the AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN to
1840  * indicate that we just mapped the first logical processor to the first enabled
1841  * core.
1842  *
1843  * 4) Once we have the logical IDs determined we will construct the APIC ID that
1844  * we expect this to have.
1845  *
1846  * Steps (2) - (4) are intertwined and done together.
1847  */
1848 static void
amdzen_ccd_fill_topo(amdzen_t * azn,amdzen_df_t * df,amdzen_df_ent_t * ent,amdzen_topo_ccd_t * ccd)1849 amdzen_ccd_fill_topo(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *ent,
1850     amdzen_topo_ccd_t *ccd)
1851 {
1852 	uint32_t nccx, core_en, thread_en;
1853 	uint32_t nlcore_per_ccx, nthreads_per_core;
1854 	uint32_t sockid, dieid, compid;
1855 	const uint32_t ccdno = ccd->atccd_phys_no;
1856 	const x86_uarch_t uarch = uarchrev_uarch(azn->azn_uarchrev);
1857 	boolean_t pkg0_ids, logccd_set = B_FALSE;
1858 
1859 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
1860 	if (!amdzen_ccd_present(azn, df, ccdno)) {
1861 		ccd->atccd_err = AMDZEN_TOPO_CCD_E_CCD_MISSING;
1862 		return;
1863 	}
1864 
1865 	amdzen_ccd_info(azn, df, ccdno, &nccx, &nlcore_per_ccx,
1866 	    &nthreads_per_core);
1867 	ASSERT3U(nccx, <=, AMDZEN_TOPO_CCD_MAX_CCX);
1868 
1869 	core_en = amdzen_ccd_core_en(azn, df, ccdno);
1870 	thread_en = amdzen_ccd_thread_en(azn, df, ccdno);
1871 
1872 	/*
1873 	 * The BSP is never enabled in a conventional sense and therefore the
1874 	 * bit is reserved and left as 0. As the BSP should be in the first CCD,
1875 	 * we go through and OR back in the bit lest we think the thread isn't
1876 	 * enabled.
1877 	 */
1878 	if (ccdno == 0) {
1879 		thread_en |= 1;
1880 	}
1881 
1882 	ccd->atccd_phys_no = ccdno;
1883 	if (uarch >= X86_UARCH_AMD_ZEN3) {
1884 		pkg0_ids = B_TRUE;
1885 	} else {
1886 		ccd->atccd_flags |= AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN;
1887 		pkg0_ids = B_FALSE;
1888 
1889 		/*
1890 		 * Determine the CCD logical ID for Zen 2 now since this doesn't
1891 		 * rely upon needing a valid physical core.
1892 		 */
1893 		ccd->atccd_log_no = amdzen_ccd_log_id_zen2(azn, df, ent);
1894 		logccd_set = B_TRUE;
1895 	}
1896 
1897 	/*
1898 	 * To construct the APIC ID we need to know the socket and die (not CCD)
1899 	 * this is on. We deconstruct the CCD's fabric ID to determine that.
1900 	 */
1901 	zen_fabric_id_decompose(&df->adf_decomp, ent->adfe_fabric_id, &sockid,
1902 	    &dieid, &compid);
1903 
1904 	/*
1905 	 * At this point we have all the information about the CCD, the number
1906 	 * of CCX instances, and which physical cores and threads are enabled.
1907 	 * Currently we assume that if we have one CCX enabled, then it is
1908 	 * always CCX0. We cannot find evidence of a two CCX supporting part
1909 	 * that doesn't always ship with both CCXs present and enabled.
1910 	 */
1911 	ccd->atccd_nlog_ccx = ccd->atccd_nphys_ccx = nccx;
1912 	for (uint32_t ccxno = 0; ccxno < nccx; ccxno++) {
1913 		amdzen_topo_ccx_t *ccx = &ccd->atccd_ccx[ccxno];
1914 		const uint32_t core_mask = (1 << azn->azn_ncore_per_ccx) - 1;
1915 		const uint32_t core_shift = ccxno * azn->azn_ncore_per_ccx;
1916 		const uint32_t ccx_core_en = (core_en >> core_shift) &
1917 		    core_mask;
1918 		boolean_t logccx_set = B_FALSE;
1919 
1920 		ccd->atccd_ccx_en[ccxno] = 1;
1921 		ccx->atccx_phys_no = ccxno;
1922 		ccx->atccx_nphys_cores = azn->azn_ncore_per_ccx;
1923 		ccx->atccx_nlog_cores = nlcore_per_ccx;
1924 
1925 		if (!pkg0_ids) {
1926 			ccx->atccx_log_no = ccx->atccx_phys_no;
1927 			logccx_set = B_TRUE;
1928 		}
1929 
1930 		for (uint32_t coreno = 0, logcorezen2 = 0;
1931 		    coreno < azn->azn_ncore_per_ccx; coreno++) {
1932 			amdzen_topo_core_t *core = &ccx->atccx_cores[coreno];
1933 
1934 			if ((ccx_core_en & (1 << coreno)) == 0) {
1935 				continue;
1936 			}
1937 
1938 			ccx->atccx_core_en[coreno] = 1;
1939 			core->atcore_phys_no = coreno;
1940 
1941 			/*
1942 			 * Now that we have the physical core number present, we
1943 			 * must determine the logical core number and fill out
1944 			 * the logical CCX/CCD if it has not been set. We must
1945 			 * do this before we attempt to look at which threads
1946 			 * are enabled, because that operates based upon logical
1947 			 * core number.
1948 			 *
1949 			 * For Zen 2 we do not have INITPKG0 at our disposal. We
1950 			 * currently assume (and tag for userland with the
1951 			 * AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN flag) that we are
1952 			 * mapping logical cores to physicals in the order of
1953 			 * appearance.
1954 			 */
1955 			if (pkg0_ids) {
1956 				amdzen_ccd_fill_core_initpkg0(azn, df, ccd, ccx,
1957 				    core, &logccd_set, &logccx_set);
1958 			} else {
1959 				core->atcore_log_no = logcorezen2;
1960 				logcorezen2++;
1961 			}
1962 
1963 			/*
1964 			 * Determining which bits to use for the thread is a bit
1965 			 * weird here. Thread IDs within a CCX are logical, but
1966 			 * there are always physically spaced CCX sizes. See the
1967 			 * comment at the definition for SMU::PWR::THREAD_ENABLE
1968 			 * for more information.
1969 			 */
1970 			const uint32_t thread_shift = (ccx->atccx_nphys_cores *
1971 			    ccx->atccx_log_no + core->atcore_log_no) *
1972 			    nthreads_per_core;
1973 			const uint32_t thread_mask = (nthreads_per_core << 1) -
1974 			    1;
1975 			const uint32_t core_thread_en = (thread_en >>
1976 			    thread_shift) & thread_mask;
1977 			core->atcore_nthreads = nthreads_per_core;
1978 			core->atcore_thr_en[0] = core_thread_en & 0x01;
1979 			core->atcore_thr_en[1] = core_thread_en & 0x02;
1980 #ifdef	DEBUG
1981 			if (nthreads_per_core == 1) {
1982 				VERIFY0(core->atcore_thr_en[1]);
1983 			}
1984 #endif
1985 			for (uint32_t thrno = 0; thrno < core->atcore_nthreads;
1986 			    thrno++) {
1987 				ASSERT3U(core->atcore_thr_en[thrno], !=, 0);
1988 
1989 				zen_apic_id_compose(&azn->azn_apic_decomp,
1990 				    sockid, dieid, ccd->atccd_log_no,
1991 				    ccx->atccx_log_no, core->atcore_log_no,
1992 				    thrno, &core->atcore_apicids[thrno]);
1993 
1994 			}
1995 		}
1996 
1997 		ASSERT3U(logccx_set, ==, B_TRUE);
1998 		ASSERT3U(logccd_set, ==, B_TRUE);
1999 	}
2000 }
2001 
2002 static void
amdzen_nexus_init(void * arg)2003 amdzen_nexus_init(void *arg)
2004 {
2005 	uint_t i;
2006 	amdzen_t *azn = arg;
2007 
2008 	/*
2009 	 * Assign the requisite identifying information for this CPU.
2010 	 */
2011 	azn->azn_uarchrev = cpuid_getuarchrev(CPU);
2012 	azn->azn_chiprev = cpuid_getchiprev(CPU);
2013 
2014 	/*
2015 	 * Go through all of the stubs and assign the DF entries.
2016 	 */
2017 	mutex_enter(&azn->azn_mutex);
2018 	if (!amdzen_map_dfs(azn) || !amdzen_check_dfs(azn)) {
2019 		azn->azn_flags |= AMDZEN_F_MAP_ERROR;
2020 		goto done;
2021 	}
2022 
2023 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
2024 		amdzen_df_t *df = &azn->azn_dfs[i];
2025 
2026 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0)
2027 			continue;
2028 		amdzen_setup_df(azn, df);
2029 		amdzen_find_nb(azn, df);
2030 	}
2031 
2032 	amdzen_determine_ncore_per_ccx(azn);
2033 
2034 	if (amdzen_determine_apic_decomp(azn)) {
2035 		azn->azn_flags |= AMDZEN_F_APIC_DECOMP_VALID;
2036 	}
2037 
2038 	/*
2039 	 * Not all children may be installed. As such, we do not treat the
2040 	 * failure of a child as fatal to the driver.
2041 	 */
2042 	mutex_exit(&azn->azn_mutex);
2043 	for (i = 0; i < ARRAY_SIZE(amdzen_children); i++) {
2044 		(void) amdzen_create_child(azn, &amdzen_children[i]);
2045 	}
2046 	mutex_enter(&azn->azn_mutex);
2047 
2048 done:
2049 	azn->azn_flags &= ~AMDZEN_F_ATTACH_DISPATCHED;
2050 	azn->azn_flags |= AMDZEN_F_ATTACH_COMPLETE;
2051 	azn->azn_taskqid = TASKQID_INVALID;
2052 	cv_broadcast(&azn->azn_cv);
2053 	mutex_exit(&azn->azn_mutex);
2054 }
2055 
2056 static int
amdzen_stub_scan_cb(dev_info_t * dip,void * arg)2057 amdzen_stub_scan_cb(dev_info_t *dip, void *arg)
2058 {
2059 	amdzen_t *azn = arg;
2060 	uint16_t vid, did;
2061 	int *regs;
2062 	uint_t nregs, i;
2063 	boolean_t match = B_FALSE;
2064 
2065 	if (dip == ddi_root_node()) {
2066 		return (DDI_WALK_CONTINUE);
2067 	}
2068 
2069 	/*
2070 	 * If a node in question is not a pci node, then we have no interest in
2071 	 * it as all the stubs that we care about are related to pci devices.
2072 	 */
2073 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
2074 		return (DDI_WALK_PRUNECHILD);
2075 	}
2076 
2077 	/*
2078 	 * If we can't get a device or vendor ID and prove that this is an AMD
2079 	 * part, then we don't care about it.
2080 	 */
2081 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2082 	    "vendor-id", PCI_EINVAL16);
2083 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2084 	    "device-id", PCI_EINVAL16);
2085 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
2086 		return (DDI_WALK_CONTINUE);
2087 	}
2088 
2089 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
2090 		return (DDI_WALK_CONTINUE);
2091 	}
2092 
2093 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
2094 		if (amdzen_nb_ids[i] == did) {
2095 			match = B_TRUE;
2096 		}
2097 	}
2098 
2099 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2100 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
2101 		return (DDI_WALK_CONTINUE);
2102 	}
2103 
2104 	if (nregs == 0) {
2105 		ddi_prop_free(regs);
2106 		return (DDI_WALK_CONTINUE);
2107 	}
2108 
2109 	if (PCI_REG_BUS_G(regs[0]) == AMDZEN_DF_BUSNO &&
2110 	    PCI_REG_DEV_G(regs[0]) >= AMDZEN_DF_FIRST_DEVICE) {
2111 		match = B_TRUE;
2112 	}
2113 
2114 	ddi_prop_free(regs);
2115 	if (match) {
2116 		mutex_enter(&azn->azn_mutex);
2117 		azn->azn_nscanned++;
2118 		mutex_exit(&azn->azn_mutex);
2119 	}
2120 
2121 	return (DDI_WALK_CONTINUE);
2122 }
2123 
2124 static void
amdzen_stub_scan(void * arg)2125 amdzen_stub_scan(void *arg)
2126 {
2127 	amdzen_t *azn = arg;
2128 
2129 	mutex_enter(&azn->azn_mutex);
2130 	azn->azn_nscanned = 0;
2131 	mutex_exit(&azn->azn_mutex);
2132 
2133 	ddi_walk_devs(ddi_root_node(), amdzen_stub_scan_cb, azn);
2134 
2135 	mutex_enter(&azn->azn_mutex);
2136 	azn->azn_flags &= ~AMDZEN_F_SCAN_DISPATCHED;
2137 	azn->azn_flags |= AMDZEN_F_SCAN_COMPLETE;
2138 
2139 	if (azn->azn_nscanned == 0) {
2140 		azn->azn_flags |= AMDZEN_F_UNSUPPORTED;
2141 		azn->azn_taskqid = TASKQID_INVALID;
2142 		cv_broadcast(&azn->azn_cv);
2143 	} else if (azn->azn_npresent == azn->azn_nscanned) {
2144 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
2145 		azn->azn_taskqid = taskq_dispatch(system_taskq,
2146 		    amdzen_nexus_init, azn, TQ_SLEEP);
2147 	}
2148 	mutex_exit(&azn->azn_mutex);
2149 }
2150 
2151 /*
2152  * Unfortunately we can't really let the stubs detach as we may need them to be
2153  * available for client operations. We may be able to improve this if we know
2154  * that the actual nexus is going away. However, as long as it's active, we need
2155  * all the stubs.
2156  */
2157 int
amdzen_detach_stub(dev_info_t * dip,ddi_detach_cmd_t cmd)2158 amdzen_detach_stub(dev_info_t *dip, ddi_detach_cmd_t cmd)
2159 {
2160 	if (cmd == DDI_SUSPEND) {
2161 		return (DDI_SUCCESS);
2162 	}
2163 
2164 	return (DDI_FAILURE);
2165 }
2166 
2167 int
amdzen_attach_stub(dev_info_t * dip,ddi_attach_cmd_t cmd)2168 amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd)
2169 {
2170 	int *regs, reg;
2171 	uint_t nregs, i;
2172 	uint16_t vid, did;
2173 	amdzen_stub_t *stub;
2174 	amdzen_t *azn = amdzen_data;
2175 	boolean_t valid = B_FALSE;
2176 	boolean_t nb = B_FALSE;
2177 
2178 	if (cmd == DDI_RESUME) {
2179 		return (DDI_SUCCESS);
2180 	} else if (cmd != DDI_ATTACH) {
2181 		return (DDI_FAILURE);
2182 	}
2183 
2184 	/*
2185 	 * Make sure that the stub that we've been asked to attach is a pci type
2186 	 * device. If not, then there is no reason for us to proceed.
2187 	 */
2188 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
2189 		dev_err(dip, CE_WARN, "asked to attach a bad AMD Zen nexus "
2190 		    "stub: %s", ddi_get_name(dip));
2191 		return (DDI_FAILURE);
2192 	}
2193 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2194 	    "vendor-id", PCI_EINVAL16);
2195 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2196 	    "device-id", PCI_EINVAL16);
2197 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
2198 		dev_err(dip, CE_WARN, "failed to get PCI ID properties");
2199 		return (DDI_FAILURE);
2200 	}
2201 
2202 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
2203 		dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x",
2204 		    cpuid_getvendor(CPU) == X86_VENDOR_HYGON ?
2205 		    AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid);
2206 		return (DDI_FAILURE);
2207 	}
2208 
2209 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2210 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
2211 		dev_err(dip, CE_WARN, "failed to get 'reg' property");
2212 		return (DDI_FAILURE);
2213 	}
2214 
2215 	if (nregs == 0) {
2216 		ddi_prop_free(regs);
2217 		dev_err(dip, CE_WARN, "missing 'reg' property values");
2218 		return (DDI_FAILURE);
2219 	}
2220 	reg = *regs;
2221 	ddi_prop_free(regs);
2222 
2223 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
2224 		if (amdzen_nb_ids[i] == did) {
2225 			valid = B_TRUE;
2226 			nb = B_TRUE;
2227 		}
2228 	}
2229 
2230 	if (!valid && PCI_REG_BUS_G(reg) == AMDZEN_DF_BUSNO &&
2231 	    PCI_REG_DEV_G(reg) >= AMDZEN_DF_FIRST_DEVICE) {
2232 		valid = B_TRUE;
2233 		nb = B_FALSE;
2234 	}
2235 
2236 	if (!valid) {
2237 		dev_err(dip, CE_WARN, "device %s didn't match the nexus list",
2238 		    ddi_get_name(dip));
2239 		return (DDI_FAILURE);
2240 	}
2241 
2242 	stub = kmem_alloc(sizeof (amdzen_stub_t), KM_SLEEP);
2243 	if (pci_config_setup(dip, &stub->azns_cfgspace) != DDI_SUCCESS) {
2244 		dev_err(dip, CE_WARN, "failed to set up config space");
2245 		kmem_free(stub, sizeof (amdzen_stub_t));
2246 		return (DDI_FAILURE);
2247 	}
2248 
2249 	stub->azns_dip = dip;
2250 	stub->azns_vid = vid;
2251 	stub->azns_did = did;
2252 	stub->azns_bus = PCI_REG_BUS_G(reg);
2253 	stub->azns_dev = PCI_REG_DEV_G(reg);
2254 	stub->azns_func = PCI_REG_FUNC_G(reg);
2255 	ddi_set_driver_private(dip, stub);
2256 
2257 	mutex_enter(&azn->azn_mutex);
2258 	azn->azn_npresent++;
2259 	if (nb) {
2260 		list_insert_tail(&azn->azn_nb_stubs, stub);
2261 	} else {
2262 		list_insert_tail(&azn->azn_df_stubs, stub);
2263 	}
2264 
2265 	if ((azn->azn_flags & AMDZEN_F_TASKQ_MASK) == AMDZEN_F_SCAN_COMPLETE &&
2266 	    azn->azn_nscanned == azn->azn_npresent) {
2267 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
2268 		azn->azn_taskqid = taskq_dispatch(system_taskq,
2269 		    amdzen_nexus_init, azn, TQ_SLEEP);
2270 	}
2271 	mutex_exit(&azn->azn_mutex);
2272 
2273 	return (DDI_SUCCESS);
2274 }
2275 
2276 static int
amdzen_bus_ctl(dev_info_t * dip,dev_info_t * rdip,ddi_ctl_enum_t ctlop,void * arg,void * result)2277 amdzen_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
2278     void *arg, void *result)
2279 {
2280 	char buf[32];
2281 	dev_info_t *child;
2282 	const amdzen_child_data_t *acd;
2283 
2284 	switch (ctlop) {
2285 	case DDI_CTLOPS_REPORTDEV:
2286 		if (rdip == NULL) {
2287 			return (DDI_FAILURE);
2288 		}
2289 		cmn_err(CE_CONT, "amdzen nexus: %s@%s, %s%d\n",
2290 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
2291 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
2292 		break;
2293 	case DDI_CTLOPS_INITCHILD:
2294 		child = arg;
2295 		if (child == NULL) {
2296 			dev_err(dip, CE_WARN, "!no child passed for "
2297 			    "DDI_CTLOPS_INITCHILD");
2298 		}
2299 
2300 		acd = ddi_get_parent_data(child);
2301 		if (acd == NULL) {
2302 			dev_err(dip, CE_WARN, "!missing child parent data");
2303 			return (DDI_FAILURE);
2304 		}
2305 
2306 		if (snprintf(buf, sizeof (buf), "%d", acd->acd_addr) >=
2307 		    sizeof (buf)) {
2308 			dev_err(dip, CE_WARN, "!failed to construct device "
2309 			    "addr due to overflow");
2310 			return (DDI_FAILURE);
2311 		}
2312 
2313 		ddi_set_name_addr(child, buf);
2314 		break;
2315 	case DDI_CTLOPS_UNINITCHILD:
2316 		child = arg;
2317 		if (child == NULL) {
2318 			dev_err(dip, CE_WARN, "!no child passed for "
2319 			    "DDI_CTLOPS_UNINITCHILD");
2320 		}
2321 
2322 		ddi_set_name_addr(child, NULL);
2323 		break;
2324 	default:
2325 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
2326 	}
2327 	return (DDI_SUCCESS);
2328 }
2329 
2330 static int
amdzen_topo_open(dev_t * devp,int flag,int otyp,cred_t * credp)2331 amdzen_topo_open(dev_t *devp, int flag, int otyp, cred_t *credp)
2332 {
2333 	minor_t m;
2334 	amdzen_t *azn = amdzen_data;
2335 
2336 	if (crgetzoneid(credp) != GLOBAL_ZONEID ||
2337 	    secpolicy_sys_config(credp, B_FALSE) != 0) {
2338 		return (EPERM);
2339 	}
2340 
2341 	if ((flag & (FEXCL | FNDELAY | FNONBLOCK)) != 0) {
2342 		return (EINVAL);
2343 	}
2344 
2345 	if (otyp != OTYP_CHR) {
2346 		return (EINVAL);
2347 	}
2348 
2349 	m = getminor(*devp);
2350 	if (m != AMDZEN_MINOR_TOPO) {
2351 		return (ENXIO);
2352 	}
2353 
2354 	mutex_enter(&azn->azn_mutex);
2355 	if ((azn->azn_flags & AMDZEN_F_IOCTL_MASK) !=
2356 	    AMDZEN_F_ATTACH_COMPLETE) {
2357 		mutex_exit(&azn->azn_mutex);
2358 		return (ENOTSUP);
2359 	}
2360 	mutex_exit(&azn->azn_mutex);
2361 
2362 	return (0);
2363 }
2364 
2365 static int
amdzen_topo_ioctl_base(amdzen_t * azn,intptr_t arg,int mode)2366 amdzen_topo_ioctl_base(amdzen_t *azn, intptr_t arg, int mode)
2367 {
2368 	amdzen_topo_base_t base;
2369 
2370 	bzero(&base, sizeof (base));
2371 	mutex_enter(&azn->azn_mutex);
2372 	base.atb_ndf = azn->azn_ndfs;
2373 
2374 	if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) {
2375 		mutex_exit(&azn->azn_mutex);
2376 		return (ENOTSUP);
2377 	}
2378 
2379 	base.atb_apic_decomp = azn->azn_apic_decomp;
2380 	for (uint_t i = 0; i < azn->azn_ndfs; i++) {
2381 		const amdzen_df_t *df = &azn->azn_dfs[i];
2382 
2383 		base.atb_maxdfent = MAX(base.atb_maxdfent, df->adf_nents);
2384 		if (i == 0) {
2385 			base.atb_rev = df->adf_rev;
2386 			base.atb_df_decomp = df->adf_decomp;
2387 		}
2388 	}
2389 	mutex_exit(&azn->azn_mutex);
2390 
2391 	if (ddi_copyout(&base, (void *)(uintptr_t)arg, sizeof (base),
2392 	    mode & FKIOCTL) != 0) {
2393 		return (EFAULT);
2394 	}
2395 
2396 	return (0);
2397 }
2398 
2399 /*
2400  * Fill in the peers. We only have this information prior to DF 4D2.  The way we
2401  * do is this is to just fill in all the entries and then zero out the ones that
2402  * aren't valid.
2403  */
2404 static void
amdzen_topo_ioctl_df_fill_peers(const amdzen_df_t * df,const amdzen_df_ent_t * ent,amdzen_topo_df_ent_t * topo_ent)2405 amdzen_topo_ioctl_df_fill_peers(const amdzen_df_t *df,
2406     const amdzen_df_ent_t *ent, amdzen_topo_df_ent_t *topo_ent)
2407 {
2408 	topo_ent->atde_npeers = DF_FBIINFO0_GET_FTI_PCNT(ent->adfe_info0);
2409 
2410 	if (df->adf_rev >= DF_REV_4D2) {
2411 		bzero(topo_ent->atde_peers, sizeof (topo_ent->atde_npeers));
2412 		return;
2413 	}
2414 
2415 	topo_ent->atde_peers[0] = DF_FBINFO1_GET_FTI0_NINSTID(ent->adfe_info1);
2416 	topo_ent->atde_peers[1] = DF_FBINFO1_GET_FTI1_NINSTID(ent->adfe_info1);
2417 	topo_ent->atde_peers[2] = DF_FBINFO1_GET_FTI2_NINSTID(ent->adfe_info1);
2418 	topo_ent->atde_peers[3] = DF_FBINFO1_GET_FTI3_NINSTID(ent->adfe_info1);
2419 	topo_ent->atde_peers[4] = DF_FBINFO2_GET_FTI4_NINSTID(ent->adfe_info2);
2420 	topo_ent->atde_peers[5] = DF_FBINFO2_GET_FTI5_NINSTID(ent->adfe_info2);
2421 
2422 	for (uint32_t i = topo_ent->atde_npeers; i < AMDZEN_TOPO_DF_MAX_PEERS;
2423 	    i++) {
2424 		topo_ent->atde_peers[i] = 0;
2425 	}
2426 }
2427 
2428 static void
amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t * ent,amdzen_topo_df_ent_t * topo_ent)2429 amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t *ent,
2430     amdzen_topo_df_ent_t *topo_ent)
2431 {
2432 	const amdzen_ccm_data_t *ccm = &ent->adfe_data.aded_ccm;
2433 	amdzen_topo_ccm_data_t *topo_ccm = &topo_ent->atde_data.atded_ccm;
2434 
2435 	topo_ccm->atcd_nccds = ccm->acd_nccds;
2436 	for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) {
2437 		topo_ccm->atcd_ccd_en[i] = ccm->acd_ccd_en[i];
2438 		topo_ccm->atcd_ccd_ids[i] = ccm->acd_ccd_id[i];
2439 	}
2440 }
2441 
2442 static int
amdzen_topo_ioctl_df(amdzen_t * azn,intptr_t arg,int mode)2443 amdzen_topo_ioctl_df(amdzen_t *azn, intptr_t arg, int mode)
2444 {
2445 	uint_t model;
2446 	uint32_t max_ents, nwritten;
2447 	const amdzen_df_t *df;
2448 	amdzen_topo_df_t topo_df;
2449 #ifdef	_MULTI_DATAMODEL
2450 	amdzen_topo_df32_t topo_df32;
2451 #endif
2452 
2453 	model = ddi_model_convert_from(mode);
2454 	switch (model) {
2455 #ifdef	_MULTI_DATAMODEL
2456 	case DDI_MODEL_ILP32:
2457 		if (ddi_copyin((void *)(uintptr_t)arg, &topo_df32,
2458 		    sizeof (topo_df32), mode & FKIOCTL) != 0) {
2459 			return (EFAULT);
2460 		}
2461 		bzero(&topo_df, sizeof (topo_df));
2462 		topo_df.atd_dfno = topo_df32.atd_dfno;
2463 		topo_df.atd_df_buf_nents = topo_df32.atd_df_buf_nents;
2464 		topo_df.atd_df_ents = (void *)(uintptr_t)topo_df32.atd_df_ents;
2465 		break;
2466 #endif
2467 	case DDI_MODEL_NONE:
2468 		if (ddi_copyin((void *)(uintptr_t)arg, &topo_df,
2469 		    sizeof (topo_df), mode & FKIOCTL) != 0) {
2470 			return (EFAULT);
2471 		}
2472 		break;
2473 	default:
2474 		return (ENOTSUP);
2475 	}
2476 
2477 	mutex_enter(&azn->azn_mutex);
2478 	if (topo_df.atd_dfno >= azn->azn_ndfs) {
2479 		mutex_exit(&azn->azn_mutex);
2480 		return (EINVAL);
2481 	}
2482 
2483 	df = &azn->azn_dfs[topo_df.atd_dfno];
2484 	topo_df.atd_nodeid = df->adf_nodeid;
2485 	topo_df.atd_sockid = (df->adf_nodeid & df->adf_decomp.dfd_sock_mask) >>
2486 	    df->adf_decomp.dfd_sock_shift;
2487 	topo_df.atd_dieid = (df->adf_nodeid & df->adf_decomp.dfd_die_mask) >>
2488 	    df->adf_decomp.dfd_die_shift;
2489 	topo_df.atd_rev = df->adf_rev;
2490 	topo_df.atd_major = df->adf_major;
2491 	topo_df.atd_minor = df->adf_minor;
2492 	topo_df.atd_df_act_nents = df->adf_nents;
2493 	max_ents = MIN(topo_df.atd_df_buf_nents, df->adf_nents);
2494 
2495 	if (topo_df.atd_df_ents == NULL) {
2496 		topo_df.atd_df_buf_nvalid = 0;
2497 		mutex_exit(&azn->azn_mutex);
2498 		goto copyout;
2499 	}
2500 
2501 	nwritten = 0;
2502 	for (uint32_t i = 0; i < max_ents; i++) {
2503 		amdzen_topo_df_ent_t topo_ent;
2504 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
2505 
2506 		/*
2507 		 * We opt not to include disabled elements right now. They
2508 		 * generally don't have a valid type and there isn't much useful
2509 		 * information we can get from them. This can be changed if we
2510 		 * find a use case for them for userland topo.
2511 		 */
2512 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
2513 			continue;
2514 
2515 		bzero(&topo_ent, sizeof (topo_ent));
2516 		topo_ent.atde_type = ent->adfe_type;
2517 		topo_ent.atde_subtype = ent->adfe_subtype;
2518 		topo_ent.atde_fabric_id = ent->adfe_fabric_id;
2519 		topo_ent.atde_inst_id = ent->adfe_inst_id;
2520 		amdzen_topo_ioctl_df_fill_peers(df, ent, &topo_ent);
2521 
2522 		if (amdzen_dfe_is_ccm(df, ent)) {
2523 			amdzen_topo_ioctl_df_fill_ccm(ent, &topo_ent);
2524 		}
2525 
2526 		if (ddi_copyout(&topo_ent, &topo_df.atd_df_ents[nwritten],
2527 		    sizeof (topo_ent), mode & FKIOCTL) != 0) {
2528 			mutex_exit(&azn->azn_mutex);
2529 			return (EFAULT);
2530 		}
2531 		nwritten++;
2532 	}
2533 	mutex_exit(&azn->azn_mutex);
2534 
2535 	topo_df.atd_df_buf_nvalid = nwritten;
2536 copyout:
2537 	switch (model) {
2538 #ifdef	_MULTI_DATAMODEL
2539 	case DDI_MODEL_ILP32:
2540 		topo_df32.atd_nodeid = topo_df.atd_nodeid;
2541 		topo_df32.atd_sockid = topo_df.atd_sockid;
2542 		topo_df32.atd_dieid = topo_df.atd_dieid;
2543 		topo_df32.atd_rev = topo_df.atd_rev;
2544 		topo_df32.atd_major = topo_df.atd_major;
2545 		topo_df32.atd_minor = topo_df.atd_minor;
2546 		topo_df32.atd_df_buf_nvalid = topo_df.atd_df_buf_nvalid;
2547 		topo_df32.atd_df_act_nents = topo_df.atd_df_act_nents;
2548 
2549 		if (ddi_copyout(&topo_df32, (void *)(uintptr_t)arg,
2550 		    sizeof (topo_df32), mode & FKIOCTL) != 0) {
2551 			return (EFAULT);
2552 		}
2553 		break;
2554 #endif
2555 	case DDI_MODEL_NONE:
2556 		if (ddi_copyout(&topo_df, (void *)(uintptr_t)arg,
2557 		    sizeof (topo_df), mode & FKIOCTL) != 0) {
2558 			return (EFAULT);
2559 		}
2560 		break;
2561 	default:
2562 		break;
2563 	}
2564 
2565 
2566 	return (0);
2567 }
2568 
2569 static int
amdzen_topo_ioctl_ccd(amdzen_t * azn,intptr_t arg,int mode)2570 amdzen_topo_ioctl_ccd(amdzen_t *azn, intptr_t arg, int mode)
2571 {
2572 	amdzen_topo_ccd_t ccd, *ccdp;
2573 	amdzen_df_t *df;
2574 	amdzen_df_ent_t *ent;
2575 	amdzen_ccm_data_t *ccm;
2576 	uint32_t ccdno;
2577 	size_t copyin_size = offsetof(amdzen_topo_ccd_t, atccd_err);
2578 
2579 	/*
2580 	 * Only copy in the identifying information so that way we can ensure
2581 	 * the rest of the structure we return to the user doesn't contain
2582 	 * anything unexpected in it.
2583 	 */
2584 	bzero(&ccd, sizeof (ccd));
2585 	if (ddi_copyin((void *)(uintptr_t)arg, &ccd, copyin_size,
2586 	    mode & FKIOCTL) != 0) {
2587 		return (EFAULT);
2588 	}
2589 
2590 	mutex_enter(&azn->azn_mutex);
2591 	if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) {
2592 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NO_APIC_DECOMP;
2593 		goto copyout;
2594 	}
2595 
2596 	df = amdzen_df_find(azn, ccd.atccd_dfno);
2597 	if (df == NULL) {
2598 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_DFNO;
2599 		goto copyout;
2600 	}
2601 
2602 	/*
2603 	 * We don't have enough information to know how to construct this
2604 	 * information in Zen 1 at this time, so refuse.
2605 	 */
2606 	if (df->adf_rev <= DF_REV_2) {
2607 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_SOC_UNSUPPORTED;
2608 		goto copyout;
2609 	}
2610 
2611 	ent = amdzen_df_ent_find_by_instid(df, ccd.atccd_instid);
2612 	if (ent == NULL) {
2613 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_INSTID;
2614 		goto copyout;
2615 	}
2616 
2617 	if (!amdzen_dfe_is_ccm(df, ent)) {
2618 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD;
2619 		goto copyout;
2620 	}
2621 
2622 	ccm = &ent->adfe_data.aded_ccm;
2623 	for (ccdno = 0; ccdno < DF_MAX_CCDS_PER_CCM; ccdno++) {
2624 		if (ccm->acd_ccd_en[ccdno] != 0 &&
2625 		    ccm->acd_ccd_id[ccdno] == ccd.atccd_phys_no) {
2626 			break;
2627 		}
2628 	}
2629 
2630 	if (ccdno == DF_MAX_CCDS_PER_CCM) {
2631 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD;
2632 		goto copyout;
2633 	}
2634 
2635 	if (ccm->acd_ccd_data[ccdno] == NULL) {
2636 		/*
2637 		 * We don't actually have this data. Go fill it out and save it
2638 		 * for future use.
2639 		 */
2640 		ccdp = kmem_zalloc(sizeof (amdzen_topo_ccd_t), KM_NOSLEEP_LAZY);
2641 		if (ccdp == NULL) {
2642 			mutex_exit(&azn->azn_mutex);
2643 			return (ENOMEM);
2644 		}
2645 
2646 		ccdp->atccd_dfno = ccd.atccd_dfno;
2647 		ccdp->atccd_instid = ccd.atccd_instid;
2648 		ccdp->atccd_phys_no = ccd.atccd_phys_no;
2649 		amdzen_ccd_fill_topo(azn, df, ent, ccdp);
2650 		ccm->acd_ccd_data[ccdno] = ccdp;
2651 	}
2652 	ASSERT3P(ccm->acd_ccd_data[ccdno], !=, NULL);
2653 	bcopy(ccm->acd_ccd_data[ccdno], &ccd, sizeof (ccd));
2654 
2655 copyout:
2656 	mutex_exit(&azn->azn_mutex);
2657 	if (ddi_copyout(&ccd, (void *)(uintptr_t)arg, sizeof (ccd),
2658 	    mode & FKIOCTL) != 0) {
2659 		return (EFAULT);
2660 	}
2661 
2662 	return (0);
2663 }
2664 
2665 static int
amdzen_topo_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)2666 amdzen_topo_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
2667     cred_t *credp, int *rvalp)
2668 {
2669 	int ret;
2670 	amdzen_t *azn = amdzen_data;
2671 
2672 	if (getminor(dev) != AMDZEN_MINOR_TOPO) {
2673 		return (ENXIO);
2674 	}
2675 
2676 	if ((mode & FREAD) == 0) {
2677 		return (EBADF);
2678 	}
2679 
2680 	switch (cmd) {
2681 	case AMDZEN_TOPO_IOCTL_BASE:
2682 		ret = amdzen_topo_ioctl_base(azn, arg, mode);
2683 		break;
2684 	case AMDZEN_TOPO_IOCTL_DF:
2685 		ret = amdzen_topo_ioctl_df(azn, arg, mode);
2686 		break;
2687 	case AMDZEN_TOPO_IOCTL_CCD:
2688 		ret = amdzen_topo_ioctl_ccd(azn, arg, mode);
2689 		break;
2690 	default:
2691 		ret = ENOTTY;
2692 		break;
2693 	}
2694 
2695 	return (ret);
2696 }
2697 
2698 static int
amdzen_topo_close(dev_t dev,int flag,int otyp,cred_t * credp)2699 amdzen_topo_close(dev_t dev, int flag, int otyp, cred_t *credp)
2700 {
2701 	if (otyp != OTYP_CHR) {
2702 		return (EINVAL);
2703 	}
2704 
2705 	if (getminor(dev) != AMDZEN_MINOR_TOPO) {
2706 		return (ENXIO);
2707 	}
2708 
2709 	return (0);
2710 }
2711 
2712 static int
amdzen_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)2713 amdzen_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2714 {
2715 	amdzen_t *azn = amdzen_data;
2716 
2717 	if (cmd == DDI_RESUME) {
2718 		return (DDI_SUCCESS);
2719 	} else if (cmd != DDI_ATTACH) {
2720 		return (DDI_FAILURE);
2721 	}
2722 
2723 	mutex_enter(&azn->azn_mutex);
2724 	if (azn->azn_dip != NULL) {
2725 		dev_err(dip, CE_WARN, "driver is already attached!");
2726 		mutex_exit(&azn->azn_mutex);
2727 		return (DDI_FAILURE);
2728 	}
2729 
2730 	if (ddi_create_minor_node(dip, "topo", S_IFCHR, AMDZEN_MINOR_TOPO,
2731 	    DDI_PSEUDO, 0) != 0) {
2732 		dev_err(dip, CE_WARN, "failed to create topo minor node!");
2733 		mutex_exit(&azn->azn_mutex);
2734 		return (DDI_FAILURE);
2735 	}
2736 
2737 	azn->azn_dip = dip;
2738 	azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_stub_scan,
2739 	    azn, TQ_SLEEP);
2740 	azn->azn_flags |= AMDZEN_F_SCAN_DISPATCHED;
2741 	mutex_exit(&azn->azn_mutex);
2742 
2743 	return (DDI_SUCCESS);
2744 }
2745 
2746 static int
amdzen_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)2747 amdzen_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
2748 {
2749 	amdzen_t *azn = amdzen_data;
2750 
2751 	if (cmd == DDI_SUSPEND) {
2752 		return (DDI_SUCCESS);
2753 	} else if (cmd != DDI_DETACH) {
2754 		return (DDI_FAILURE);
2755 	}
2756 
2757 	mutex_enter(&azn->azn_mutex);
2758 	while (azn->azn_taskqid != TASKQID_INVALID) {
2759 		cv_wait(&azn->azn_cv, &azn->azn_mutex);
2760 	}
2761 
2762 	/*
2763 	 * If we've attached any stub drivers, e.g. this platform is important
2764 	 * for us, then we fail detach.
2765 	 */
2766 	if (!list_is_empty(&azn->azn_df_stubs) ||
2767 	    !list_is_empty(&azn->azn_nb_stubs)) {
2768 		mutex_exit(&azn->azn_mutex);
2769 		return (DDI_FAILURE);
2770 	}
2771 
2772 	ddi_remove_minor_node(azn->azn_dip, NULL);
2773 	azn->azn_dip = NULL;
2774 	mutex_exit(&azn->azn_mutex);
2775 
2776 	return (DDI_SUCCESS);
2777 }
2778 
2779 static void
amdzen_free(void)2780 amdzen_free(void)
2781 {
2782 	if (amdzen_data == NULL) {
2783 		return;
2784 	}
2785 
2786 	VERIFY(list_is_empty(&amdzen_data->azn_df_stubs));
2787 	list_destroy(&amdzen_data->azn_df_stubs);
2788 	VERIFY(list_is_empty(&amdzen_data->azn_nb_stubs));
2789 	list_destroy(&amdzen_data->azn_nb_stubs);
2790 	cv_destroy(&amdzen_data->azn_cv);
2791 	mutex_destroy(&amdzen_data->azn_mutex);
2792 	kmem_free(amdzen_data, sizeof (amdzen_t));
2793 	amdzen_data = NULL;
2794 }
2795 
2796 static void
amdzen_alloc(void)2797 amdzen_alloc(void)
2798 {
2799 	amdzen_data = kmem_zalloc(sizeof (amdzen_t), KM_SLEEP);
2800 	mutex_init(&amdzen_data->azn_mutex, NULL, MUTEX_DRIVER, NULL);
2801 	list_create(&amdzen_data->azn_df_stubs, sizeof (amdzen_stub_t),
2802 	    offsetof(amdzen_stub_t, azns_link));
2803 	list_create(&amdzen_data->azn_nb_stubs, sizeof (amdzen_stub_t),
2804 	    offsetof(amdzen_stub_t, azns_link));
2805 	cv_init(&amdzen_data->azn_cv, NULL, CV_DRIVER, NULL);
2806 }
2807 
2808 static struct cb_ops amdzen_topo_cb_ops = {
2809 	.cb_open = amdzen_topo_open,
2810 	.cb_close = amdzen_topo_close,
2811 	.cb_strategy = nodev,
2812 	.cb_print = nodev,
2813 	.cb_dump = nodev,
2814 	.cb_read = nodev,
2815 	.cb_write = nodev,
2816 	.cb_ioctl = amdzen_topo_ioctl,
2817 	.cb_devmap = nodev,
2818 	.cb_mmap = nodev,
2819 	.cb_segmap = nodev,
2820 	.cb_chpoll = nochpoll,
2821 	.cb_prop_op = ddi_prop_op,
2822 	.cb_flag = D_MP,
2823 	.cb_rev = CB_REV,
2824 	.cb_aread = nodev,
2825 	.cb_awrite = nodev
2826 };
2827 
2828 struct bus_ops amdzen_bus_ops = {
2829 	.busops_rev = BUSO_REV,
2830 	.bus_map = nullbusmap,
2831 	.bus_dma_map = ddi_no_dma_map,
2832 	.bus_dma_allochdl = ddi_no_dma_allochdl,
2833 	.bus_dma_freehdl = ddi_no_dma_freehdl,
2834 	.bus_dma_bindhdl = ddi_no_dma_bindhdl,
2835 	.bus_dma_unbindhdl = ddi_no_dma_unbindhdl,
2836 	.bus_dma_flush = ddi_no_dma_flush,
2837 	.bus_dma_win = ddi_no_dma_win,
2838 	.bus_dma_ctl = ddi_no_dma_mctl,
2839 	.bus_prop_op = ddi_bus_prop_op,
2840 	.bus_ctl = amdzen_bus_ctl
2841 };
2842 
2843 static struct dev_ops amdzen_dev_ops = {
2844 	.devo_rev = DEVO_REV,
2845 	.devo_refcnt = 0,
2846 	.devo_getinfo = nodev,
2847 	.devo_identify = nulldev,
2848 	.devo_probe = nulldev,
2849 	.devo_attach = amdzen_attach,
2850 	.devo_detach = amdzen_detach,
2851 	.devo_reset = nodev,
2852 	.devo_quiesce = ddi_quiesce_not_needed,
2853 	.devo_bus_ops = &amdzen_bus_ops,
2854 	.devo_cb_ops = &amdzen_topo_cb_ops
2855 };
2856 
2857 static struct modldrv amdzen_modldrv = {
2858 	.drv_modops = &mod_driverops,
2859 	.drv_linkinfo = "AMD Zen Nexus Driver",
2860 	.drv_dev_ops = &amdzen_dev_ops
2861 };
2862 
2863 static struct modlinkage amdzen_modlinkage = {
2864 	.ml_rev = MODREV_1,
2865 	.ml_linkage = { &amdzen_modldrv, NULL }
2866 };
2867 
2868 int
_init(void)2869 _init(void)
2870 {
2871 	int ret;
2872 
2873 	if (cpuid_getvendor(CPU) != X86_VENDOR_AMD &&
2874 	    cpuid_getvendor(CPU) != X86_VENDOR_HYGON) {
2875 		return (ENOTSUP);
2876 	}
2877 
2878 	if ((ret = mod_install(&amdzen_modlinkage)) == 0) {
2879 		amdzen_alloc();
2880 	}
2881 
2882 	return (ret);
2883 }
2884 
2885 int
_info(struct modinfo * modinfop)2886 _info(struct modinfo *modinfop)
2887 {
2888 	return (mod_info(&amdzen_modlinkage, modinfop));
2889 }
2890 
2891 int
_fini(void)2892 _fini(void)
2893 {
2894 	int ret;
2895 
2896 	if ((ret = mod_remove(&amdzen_modlinkage)) == 0) {
2897 		amdzen_free();
2898 	}
2899 
2900 	return (ret);
2901 }
2902