1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019, Joyent, Inc.
14  * Copyright 2025 Oxide Computer Company
15  */
16 
17 /*
18  * Nexus Driver for AMD Zen family systems. The purpose of this driver is to
19  * provide access to the following resources in a single, centralized fashion:
20  *
21  *  - The per-chip Data Fabric
22  *  - The North Bridge
23  *  - The System Management Network (SMN)
24  *
25  * This is a nexus driver as once we have attached to all the requisite
26  * components, we will enumerate child devices which consume this functionality.
27  *
28  * ------------------------
29  * Mapping Devices Together
30  * ------------------------
31  *
32  * The operating system needs to expose things like temperature sensors and DRAM
33  * configuration registers in terms of things that are meaningful to the system
34  * such as logical CPUs, cores, etc. This driver attaches to the PCI devices
35  * that represent the northbridge, data fabrics, and dies. Note that there are
36  * multiple northbridge and DF devices (one each per die) and this driver maps
37  * all of these three things together. Unfortunately, this requires some
38  * acrobatics as there is no direct way to map a northbridge to its
39  * corresponding die. Instead, we map a CPU die to a data fabric PCI device and
40  * a data fabric PCI device to a corresponding northbridge PCI device. This
41  * transitive relationship allows us to map from between northbridge and die.
42  *
43  * As each data fabric device is attached, based on vendor and device portions
44  * of the PCI ID, we add it to the DF stubs list in the global amdzen_t
45  * structure, amdzen_data->azn_df_stubs. We must now map these to logical CPUs.
46  *
47  * In current Zen based products, there is a direct mapping between processor
48  * nodes and a data fabric PCI device: all of the devices are on PCI Bus 0 and
49  * start from Device 0x18, so device 0x18 maps to processor node 0, 0x19 to
50  * processor node 1, etc. This means that to map a logical CPU to a data fabric
51  * device, we take its processor node id, add it to 0x18 and find the PCI device
52  * that is on bus 0 with that ID number. We already discovered the DF devices as
53  * described above.
54  *
55  * The northbridge PCI device has a well-defined device and function, but the
56  * bus that it is on varies. Each die has its own set of assigned PCI buses and
57  * its northbridge device is on the first die-specific bus. This implies that
58  * the northbridges do not show up on PCI bus 0, as that is the PCI bus that all
59  * of the data fabric devices are on and is not assigned to any particular die.
60  * Additionally, while the northbridge on the lowest-numbered PCI bus
61  * intuitively corresponds to processor node zero, hardware does not guarantee
62  * this. Because we don't want to be at the mercy of firmware, we don't rely on
63  * this ordering assumption, though we have yet to find a system that deviates
64  * from it, either.
65  *
66  * One of the registers in the data fabric device's function 0
67  * (AMDZEN_DF_F0_CFG_ADDR_CTL) happens to identify the first PCI bus that is
68  * associated with the processor node. This means that we can map a data fabric
69  * device to a northbridge by finding the northbridge whose PCI bus ID matches
70  * the value in the corresponding data fabric's AMDZEN_DF_F0_CFG_ADDR_CTL.
71  *
72  * Given all of the above, we can map a northbridge to a data fabric device and
73  * a die to a data fabric device. Because these are 1:1 mappings, there is a
74  * transitive relationship from northbridge to die. and therefore we know which
75  * northbridge is associated with which processor die. This is summarized in the
76  * following image:
77  *
78  *  +-------+     +------------------------------------+     +--------------+
79  *  | Die 0 |---->| Data Fabric PCI BDF 0/18/0         |---->| Northbridge  |
80  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 10  |     | PCI  10/0/0  |
81  *     ...        +------------------------------------+     +--------------+
82  *  +-------+     +------------------------------------+     +--------------+
83  *  | Die n |---->| Data Fabric PCI BDF 0/18+n/0       |---->| Northbridge  |
84  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 133 |     | PCI 133/0/0  |
85  *                +------------------------------------+     +--------------+
86  *
87  * Note, the PCI buses used by the northbridges here are arbitrary examples that
88  * do not necessarily reflect actual hardware values; however, the
89  * bus/device/function (BDF) of the data fabric accurately models hardware. All
90  * BDF values are in hex.
91  *
92  * Starting with the Rome generation of processors (Family 17h Model 30-3Fh),
93  * AMD has multiple northbridges on a given die. All of these northbridges share
94  * the same data fabric and system management network port. From our perspective
95  * this means that some of the northbridge devices will be redundant and that we
96  * no longer have a 1:1 mapping between the northbridge and the data fabric
97  * devices. Every data fabric will have a northbridge, but not every northbridge
98  * will have a data fabric device mapped. Because we're always trying to map
99  * from a die to a northbridge and not the reverse, the fact that there are
100  * extra northbridge devices hanging around that we don't know about shouldn't
101  * be a problem.
102  *
103  * -------------------------------
104  * Attach and Detach Complications
105  * -------------------------------
106  *
107  * We need to map different PCI devices together. Each device is attached to a
108  * amdzen_stub driver to facilitate integration with the rest of the kernel PCI
109  * machinery and so we have to manage multiple dev_info_t structures, each of
110  * which may be independently attached and detached.
111  *
112  * This is not particularly complex for attach: our _init routine allocates the
113  * necessary mutex and list structures at module load time, and as each stub is
114  * attached, it calls into this code to be added to the appropriate list. When
115  * the nexus itself is attached, we walk the PCI device tree accumulating a
116  * counter for all devices we expect to be attached. Once the scan is complete
117  * and all such devices are accounted for (stub registration may be happening
118  * asynchronously with respect to nexus attach), we initialize the nexus device
119  * and the attach is complete.
120  *
121  * Most other device drivers support instances that can be brought back after
122  * detach, provided they are associated with an active minor node in the
123  * /devices file system. This driver is different. Once a stub device has been
124  * attached, we do not permit detaching the nexus driver instance, as the kernel
125  * does not give us interlocking guarantees between nexus and stub driver attach
126  * and detach. It is simplest to just unconditionally fail detach once a stub
127  * has attached.
128  *
129  * ---------------
130  * Exposed Devices
131  * ---------------
132  *
133  * Rather than try and have all of the different functions that could be
134  * provided in one driver, we have a nexus driver that tries to load child
135  * pseudo-device drivers that provide specific pieces of functionality.
136  *
137  * -------
138  * Locking
139  * -------
140  *
141  * The amdzen_data structure contains a single lock, azn_mutex.
142  *
143  * The various client functions here are intended for our nexus's direct
144  * children, but have been designed in case someone else should depends on this
145  * driver. Once a DF has been discovered, the set of entities inside of it
146  * (adf_nents, adf_ents[]) is considered static, constant data, and iteration
147  * over them does not require locking. However, the discovery of the amd_df_t
148  * does. In addition, locking is required whenever performing register accesses
149  * to the DF or SMN.
150  *
151  * To summarize, one must hold the lock in the following circumstances:
152  *
153  *  - Looking up DF structures
154  *  - Reading or writing to DF registers
155  *  - Reading or writing to SMN registers
156  *
157  * In general, it is preferred that the lock be held across an entire client
158  * operation if possible. The only time this becomes an issue are when we have
159  * callbacks into our callers (ala amdzen_c_df_iter()) as they may recursively
160  * call into us.
161  */
162 
163 #include <sys/modctl.h>
164 #include <sys/conf.h>
165 #include <sys/devops.h>
166 #include <sys/ddi.h>
167 #include <sys/sunddi.h>
168 #include <sys/pci.h>
169 #include <sys/sysmacros.h>
170 #include <sys/sunndi.h>
171 #include <sys/x86_archext.h>
172 #include <sys/cpuvar.h>
173 #include <sys/policy.h>
174 #include <sys/stat.h>
175 #include <sys/sunddi.h>
176 #include <sys/bitmap.h>
177 #include <sys/stdbool.h>
178 
179 #include <sys/amdzen/df.h>
180 #include <sys/amdzen/ccd.h>
181 #include "amdzen.h"
182 #include "amdzen_client.h"
183 #include "amdzen_topo.h"
184 
185 amdzen_t *amdzen_data;
186 
187 /*
188  * Internal minor nodes for devices that the nexus provides itself.
189  */
190 #define	AMDZEN_MINOR_TOPO	0
191 
192 /*
193  * Array of northbridge IDs that we care about.
194  */
195 static const uint16_t amdzen_nb_ids[] = {
196 	/* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */
197 	0x1450,
198 	/* Family 17h Raven Ridge, Kestrel, Dali Models 10h-2fh (Zen uarch) */
199 	0x15d0,
200 	/* Family 17h/19h Rome, Milan, Matisse, Vermeer Zen 2/Zen 3 uarch */
201 	0x1480,
202 	/* Family 17h/19h Renoir, Cezanne, Van Gogh Zen 2/3 uarch */
203 	0x1630,
204 	/* Family 19h Genoa and Bergamo */
205 	0x14a4,
206 	/* Family 17h Mendocino, Family 19h Rembrandt */
207 	0x14b5,
208 	/* Family 19h Raphael, Family 1Ah 40-4fh */
209 	0x14d8,
210 	/* Family 19h Phoenix */
211 	0x14e8,
212 	/* Family 1Ah Turin */
213 	0x153a,
214 	/* Family 1Ah 20-2fh, 70-77h */
215 	0x1507,
216 	/* Family 1Ah 60-6fh */
217 	0x1122
218 };
219 
220 typedef struct {
221 	char *acd_name;
222 	amdzen_child_t acd_addr;
223 	/*
224 	 * This indicates whether or not we should issue warnings to users when
225 	 * something happens specific to this instance. The main reason we don't
226 	 * want to is for optional devices that may not be installed as they are
227 	 * for development purposes (e.g. usmn, zen_udf); however, if there is
228 	 * an issue with the others we still want to know.
229 	 */
230 	bool acd_warn;
231 } amdzen_child_data_t;
232 
233 static const amdzen_child_data_t amdzen_children[] = {
234 	{ "smntemp", AMDZEN_C_SMNTEMP, true },
235 	{ "usmn", AMDZEN_C_USMN, false },
236 	{ "zen_udf", AMDZEN_C_ZEN_UDF, false },
237 	{ "zen_umc", AMDZEN_C_ZEN_UMC, true }
238 };
239 
240 static uint8_t
amdzen_stub_get8(amdzen_stub_t * stub,off_t reg)241 amdzen_stub_get8(amdzen_stub_t *stub, off_t reg)
242 {
243 	return (pci_config_get8(stub->azns_cfgspace, reg));
244 }
245 
246 static uint16_t
amdzen_stub_get16(amdzen_stub_t * stub,off_t reg)247 amdzen_stub_get16(amdzen_stub_t *stub, off_t reg)
248 {
249 	return (pci_config_get16(stub->azns_cfgspace, reg));
250 }
251 
252 static uint32_t
amdzen_stub_get32(amdzen_stub_t * stub,off_t reg)253 amdzen_stub_get32(amdzen_stub_t *stub, off_t reg)
254 {
255 	return (pci_config_get32(stub->azns_cfgspace, reg));
256 }
257 
258 static uint64_t
amdzen_stub_get64(amdzen_stub_t * stub,off_t reg)259 amdzen_stub_get64(amdzen_stub_t *stub, off_t reg)
260 {
261 	return (pci_config_get64(stub->azns_cfgspace, reg));
262 }
263 
264 static void
amdzen_stub_put8(amdzen_stub_t * stub,off_t reg,uint8_t val)265 amdzen_stub_put8(amdzen_stub_t *stub, off_t reg, uint8_t val)
266 {
267 	pci_config_put8(stub->azns_cfgspace, reg, val);
268 }
269 
270 static void
amdzen_stub_put16(amdzen_stub_t * stub,off_t reg,uint16_t val)271 amdzen_stub_put16(amdzen_stub_t *stub, off_t reg, uint16_t val)
272 {
273 	pci_config_put16(stub->azns_cfgspace, reg, val);
274 }
275 
276 static void
amdzen_stub_put32(amdzen_stub_t * stub,off_t reg,uint32_t val)277 amdzen_stub_put32(amdzen_stub_t *stub, off_t reg, uint32_t val)
278 {
279 	pci_config_put32(stub->azns_cfgspace, reg, val);
280 }
281 
282 static uint64_t
amdzen_df_read_regdef(amdzen_t * azn,amdzen_df_t * df,const df_reg_def_t def,uint8_t inst,boolean_t do_64)283 amdzen_df_read_regdef(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def,
284     uint8_t inst, boolean_t do_64)
285 {
286 	df_reg_def_t ficaa;
287 	df_reg_def_t ficad;
288 	uint32_t val = 0;
289 	df_rev_t df_rev = azn->azn_dfs[0].adf_rev;
290 	VERIFY(df_reg_valid(df_rev, def));
291 
292 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
293 	val = DF_FICAA_V2_SET_TARG_INST(val, 1);
294 	val = DF_FICAA_V2_SET_FUNC(val, def.drd_func);
295 	val = DF_FICAA_V2_SET_INST(val, inst);
296 	val = DF_FICAA_V2_SET_64B(val, do_64 ? 1 : 0);
297 
298 	switch (df_rev) {
299 	case DF_REV_2:
300 	case DF_REV_3:
301 	case DF_REV_3P5:
302 		ficaa = DF_FICAA_V2;
303 		ficad = DF_FICAD_LO_V2;
304 		val = DF_FICAA_V2_SET_REG(val, def.drd_reg >>
305 		    DF_FICAA_REG_SHIFT);
306 		break;
307 	case DF_REV_4:
308 	case DF_REV_4D2:
309 		ficaa = DF_FICAA_V4;
310 		ficad = DF_FICAD_LO_V4;
311 		val = DF_FICAA_V4_SET_REG(val, def.drd_reg >>
312 		    DF_FICAA_REG_SHIFT);
313 		break;
314 	default:
315 		panic("encountered unexpected DF rev: %u", df_rev);
316 	}
317 
318 	amdzen_stub_put32(df->adf_funcs[ficaa.drd_func], ficaa.drd_reg, val);
319 	if (do_64) {
320 		return (amdzen_stub_get64(df->adf_funcs[ficad.drd_func],
321 		    ficad.drd_reg));
322 	} else {
323 		return (amdzen_stub_get32(df->adf_funcs[ficad.drd_func],
324 		    ficad.drd_reg));
325 	}
326 }
327 
328 /*
329  * Perform a targeted 32-bit indirect read to a specific instance and function.
330  */
331 static uint32_t
amdzen_df_read32(amdzen_t * azn,amdzen_df_t * df,uint8_t inst,const df_reg_def_t def)332 amdzen_df_read32(amdzen_t *azn, amdzen_df_t *df, uint8_t inst,
333     const df_reg_def_t def)
334 {
335 	return (amdzen_df_read_regdef(azn, df, def, inst, B_FALSE));
336 }
337 
338 /*
339  * For a broadcast read, just go to the underlying PCI function and perform a
340  * read. At this point in time, we don't believe we need to use the FICAA/FICAD
341  * to access it (though it does have a broadcast mode).
342  */
343 static uint32_t
amdzen_df_read32_bcast(amdzen_t * azn,amdzen_df_t * df,const df_reg_def_t def)344 amdzen_df_read32_bcast(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def)
345 {
346 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
347 	return (amdzen_stub_get32(df->adf_funcs[def.drd_func], def.drd_reg));
348 }
349 
350 static uint64_t
amdzen_df_read64_bcast(amdzen_t * azn,amdzen_df_t * df,const df_reg_def_t def)351 amdzen_df_read64_bcast(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def)
352 {
353 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
354 	return (amdzen_stub_get64(df->adf_funcs[def.drd_func], def.drd_reg));
355 }
356 
357 static uint32_t
amdzen_smn_read(amdzen_t * azn,amdzen_df_t * df,const smn_reg_t reg)358 amdzen_smn_read(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg)
359 {
360 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
361 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
362 
363 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
364 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
365 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
366 
367 	switch (SMN_REG_SIZE(reg)) {
368 	case 1:
369 		return ((uint32_t)amdzen_stub_get8(df->adf_nb,
370 		    AMDZEN_NB_SMN_DATA + addr_off));
371 	case 2:
372 		return ((uint32_t)amdzen_stub_get16(df->adf_nb,
373 		    AMDZEN_NB_SMN_DATA + addr_off));
374 	case 4:
375 		return (amdzen_stub_get32(df->adf_nb, AMDZEN_NB_SMN_DATA));
376 	default:
377 		panic("unreachable invalid SMN register size %u",
378 		    SMN_REG_SIZE(reg));
379 	}
380 }
381 
382 static void
amdzen_smn_write(amdzen_t * azn,amdzen_df_t * df,const smn_reg_t reg,const uint32_t val)383 amdzen_smn_write(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg,
384     const uint32_t val)
385 {
386 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
387 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
388 
389 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
390 	VERIFY(SMN_REG_VALUE_FITS(reg, val));
391 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
392 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
393 
394 	switch (SMN_REG_SIZE(reg)) {
395 	case 1:
396 		amdzen_stub_put8(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
397 		    (uint8_t)val);
398 		break;
399 	case 2:
400 		amdzen_stub_put16(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
401 		    (uint16_t)val);
402 		break;
403 	case 4:
404 		amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_DATA, val);
405 		break;
406 	default:
407 		panic("unreachable invalid SMN register size %u",
408 		    SMN_REG_SIZE(reg));
409 	}
410 }
411 
412 /*
413  * This is an unfortunate necessity due to the evolution of the CCM DF values.
414  */
415 static inline boolean_t
amdzen_df_at_least(const amdzen_df_t * df,uint8_t major,uint8_t minor)416 amdzen_df_at_least(const amdzen_df_t *df, uint8_t major, uint8_t minor)
417 {
418 	return (df->adf_major > major || (df->adf_major == major &&
419 	    df->adf_minor >= minor));
420 }
421 
422 static amdzen_df_t *
amdzen_df_find(amdzen_t * azn,uint_t dfno)423 amdzen_df_find(amdzen_t *azn, uint_t dfno)
424 {
425 	uint_t i;
426 
427 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
428 	if (dfno >= azn->azn_ndfs) {
429 		return (NULL);
430 	}
431 
432 	for (i = 0; i < azn->azn_ndfs; i++) {
433 		amdzen_df_t *df = &azn->azn_dfs[i];
434 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) {
435 			continue;
436 		}
437 
438 		if (dfno == 0) {
439 			return (df);
440 		}
441 		dfno--;
442 	}
443 
444 	return (NULL);
445 }
446 
447 static amdzen_df_ent_t *
amdzen_df_ent_find_by_instid(amdzen_df_t * df,uint8_t instid)448 amdzen_df_ent_find_by_instid(amdzen_df_t *df, uint8_t instid)
449 {
450 	for (uint_t i = 0; i < df->adf_nents; i++) {
451 		amdzen_df_ent_t *ent = &df->adf_ents[i];
452 
453 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) {
454 			continue;
455 		}
456 
457 		if (ent->adfe_inst_id == instid) {
458 			return (ent);
459 		}
460 	}
461 
462 	return (NULL);
463 }
464 
465 /*
466  * Client functions that are used by nexus children.
467  */
468 int
amdzen_c_smn_read(uint_t dfno,const smn_reg_t reg,uint32_t * valp)469 amdzen_c_smn_read(uint_t dfno, const smn_reg_t reg, uint32_t *valp)
470 {
471 	amdzen_df_t *df;
472 	amdzen_t *azn = amdzen_data;
473 
474 	if (!SMN_REG_SIZE_IS_VALID(reg))
475 		return (EINVAL);
476 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
477 		return (EINVAL);
478 
479 	mutex_enter(&azn->azn_mutex);
480 	df = amdzen_df_find(azn, dfno);
481 	if (df == NULL) {
482 		mutex_exit(&azn->azn_mutex);
483 		return (ENOENT);
484 	}
485 
486 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
487 		mutex_exit(&azn->azn_mutex);
488 		return (ENXIO);
489 	}
490 
491 	*valp = amdzen_smn_read(azn, df, reg);
492 	mutex_exit(&azn->azn_mutex);
493 	return (0);
494 }
495 
496 int
amdzen_c_smn_write(uint_t dfno,const smn_reg_t reg,const uint32_t val)497 amdzen_c_smn_write(uint_t dfno, const smn_reg_t reg, const uint32_t val)
498 {
499 	amdzen_df_t *df;
500 	amdzen_t *azn = amdzen_data;
501 
502 	if (!SMN_REG_SIZE_IS_VALID(reg))
503 		return (EINVAL);
504 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
505 		return (EINVAL);
506 	if (!SMN_REG_VALUE_FITS(reg, val))
507 		return (EOVERFLOW);
508 
509 	mutex_enter(&azn->azn_mutex);
510 	df = amdzen_df_find(azn, dfno);
511 	if (df == NULL) {
512 		mutex_exit(&azn->azn_mutex);
513 		return (ENOENT);
514 	}
515 
516 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
517 		mutex_exit(&azn->azn_mutex);
518 		return (ENXIO);
519 	}
520 
521 	amdzen_smn_write(azn, df, reg, val);
522 	mutex_exit(&azn->azn_mutex);
523 	return (0);
524 }
525 
526 uint_t
amdzen_c_df_count(void)527 amdzen_c_df_count(void)
528 {
529 	uint_t ret;
530 	amdzen_t *azn = amdzen_data;
531 
532 	mutex_enter(&azn->azn_mutex);
533 	ret = azn->azn_ndfs;
534 	mutex_exit(&azn->azn_mutex);
535 	return (ret);
536 }
537 
538 df_rev_t
amdzen_c_df_rev(void)539 amdzen_c_df_rev(void)
540 {
541 	amdzen_df_t *df;
542 	amdzen_t *azn = amdzen_data;
543 	df_rev_t rev;
544 
545 	/*
546 	 * Always use the first DF instance to determine what we're using. Our
547 	 * current assumption, which seems to generally be true, is that the
548 	 * given DF revisions are the same in a given system when the DFs are
549 	 * directly connected.
550 	 */
551 	mutex_enter(&azn->azn_mutex);
552 	df = amdzen_df_find(azn, 0);
553 	if (df == NULL) {
554 		rev = DF_REV_UNKNOWN;
555 	} else {
556 		rev = df->adf_rev;
557 	}
558 	mutex_exit(&azn->azn_mutex);
559 
560 	return (rev);
561 }
562 
563 int
amdzen_c_df_read32(uint_t dfno,uint8_t inst,const df_reg_def_t def,uint32_t * valp)564 amdzen_c_df_read32(uint_t dfno, uint8_t inst, const df_reg_def_t def,
565     uint32_t *valp)
566 {
567 	amdzen_df_t *df;
568 	amdzen_t *azn = amdzen_data;
569 
570 	mutex_enter(&azn->azn_mutex);
571 	df = amdzen_df_find(azn, dfno);
572 	if (df == NULL) {
573 		mutex_exit(&azn->azn_mutex);
574 		return (ENOENT);
575 	}
576 
577 	if (df->adf_rev == DF_REV_UNKNOWN) {
578 		mutex_exit(&azn->azn_mutex);
579 		return (ENOTSUP);
580 	}
581 
582 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_FALSE);
583 	mutex_exit(&azn->azn_mutex);
584 
585 	return (0);
586 }
587 
588 int
amdzen_c_df_read64(uint_t dfno,uint8_t inst,const df_reg_def_t def,uint64_t * valp)589 amdzen_c_df_read64(uint_t dfno, uint8_t inst, const df_reg_def_t def,
590     uint64_t *valp)
591 {
592 	amdzen_df_t *df;
593 	amdzen_t *azn = amdzen_data;
594 
595 	mutex_enter(&azn->azn_mutex);
596 	df = amdzen_df_find(azn, dfno);
597 	if (df == NULL) {
598 		mutex_exit(&azn->azn_mutex);
599 		return (ENOENT);
600 	}
601 
602 	if (df->adf_rev == DF_REV_UNKNOWN) {
603 		mutex_exit(&azn->azn_mutex);
604 		return (ENOTSUP);
605 	}
606 
607 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_TRUE);
608 	mutex_exit(&azn->azn_mutex);
609 
610 	return (0);
611 }
612 
613 int
amdzen_c_df_read32_bcast(uint_t dfno,const df_reg_def_t def,uint32_t * valp)614 amdzen_c_df_read32_bcast(uint_t dfno, const df_reg_def_t def, uint32_t *valp)
615 {
616 	amdzen_df_t *df;
617 	amdzen_t *azn = amdzen_data;
618 
619 	mutex_enter(&azn->azn_mutex);
620 	df = amdzen_df_find(azn, dfno);
621 	if (df == NULL) {
622 		mutex_exit(&azn->azn_mutex);
623 		return (ENOENT);
624 	}
625 
626 	if (df->adf_rev == DF_REV_UNKNOWN) {
627 		mutex_exit(&azn->azn_mutex);
628 		return (ENOTSUP);
629 	}
630 
631 	*valp = amdzen_df_read32_bcast(azn, df, def);
632 	mutex_exit(&azn->azn_mutex);
633 
634 	return (0);
635 }
636 
637 int
amdzen_c_df_read64_bcast(uint_t dfno,const df_reg_def_t def,uint64_t * valp)638 amdzen_c_df_read64_bcast(uint_t dfno, const df_reg_def_t def, uint64_t *valp)
639 {
640 	amdzen_df_t *df;
641 	amdzen_t *azn = amdzen_data;
642 
643 	mutex_enter(&azn->azn_mutex);
644 	df = amdzen_df_find(azn, dfno);
645 	if (df == NULL) {
646 		mutex_exit(&azn->azn_mutex);
647 		return (ENOENT);
648 	}
649 
650 	if (df->adf_rev == DF_REV_UNKNOWN) {
651 		mutex_exit(&azn->azn_mutex);
652 		return (ENOTSUP);
653 	}
654 
655 	*valp = amdzen_df_read64_bcast(azn, df, def);
656 	mutex_exit(&azn->azn_mutex);
657 
658 	return (0);
659 }
660 
661 int
amdzen_c_df_iter(uint_t dfno,zen_df_type_t type,amdzen_c_iter_f func,void * arg)662 amdzen_c_df_iter(uint_t dfno, zen_df_type_t type, amdzen_c_iter_f func,
663     void *arg)
664 {
665 	amdzen_df_t *df;
666 	amdzen_t *azn = amdzen_data;
667 	df_type_t df_type;
668 	uint8_t df_subtype;
669 
670 	/*
671 	 * Unlike other calls here, we hold our lock only to find the DF here.
672 	 * The main reason for this is the nature of the callback function.
673 	 * Folks are iterating over instances so they can call back into us. If
674 	 * you look at the locking statement, the thing that is most volatile
675 	 * right here and what we need to protect is the DF itself and
676 	 * subsequent register accesses to it. The actual data about which
677 	 * entities exist is static and so once we have found a DF we should
678 	 * hopefully be in good shape as they only come, but don't go.
679 	 */
680 	mutex_enter(&azn->azn_mutex);
681 	df = amdzen_df_find(azn, dfno);
682 	if (df == NULL) {
683 		mutex_exit(&azn->azn_mutex);
684 		return (ENOENT);
685 	}
686 	mutex_exit(&azn->azn_mutex);
687 
688 	switch (type) {
689 	case ZEN_DF_TYPE_CS_UMC:
690 		df_type = DF_TYPE_CS;
691 		/*
692 		 * In the original Zeppelin DFv2 die there was no subtype field
693 		 * used for the CS. The UMC is the only type and has a subtype
694 		 * of zero.
695 		 */
696 		if (df->adf_rev != DF_REV_2) {
697 			df_subtype = DF_CS_SUBTYPE_UMC;
698 		} else {
699 			df_subtype = 0;
700 		}
701 		break;
702 	case ZEN_DF_TYPE_CCM_CPU:
703 		df_type = DF_TYPE_CCM;
704 
705 		if (df->adf_rev >= DF_REV_4 && amdzen_df_at_least(df, 4, 1)) {
706 			df_subtype = DF_CCM_SUBTYPE_CPU_V4P1;
707 		} else {
708 			df_subtype = DF_CCM_SUBTYPE_CPU_V2;
709 		}
710 		break;
711 	default:
712 		return (EINVAL);
713 	}
714 
715 	for (uint_t i = 0; i < df->adf_nents; i++) {
716 		amdzen_df_ent_t *ent = &df->adf_ents[i];
717 
718 		/*
719 		 * Some DF components are not considered enabled and therefore
720 		 * will end up having bogus values in their ID fields. If we do
721 		 * not have an enable flag set, we must skip this node.
722 		 */
723 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
724 			continue;
725 
726 		if (ent->adfe_type == df_type &&
727 		    ent->adfe_subtype == df_subtype) {
728 			int ret = func(dfno, ent->adfe_fabric_id,
729 			    ent->adfe_inst_id, arg);
730 			if (ret != 0) {
731 				return (ret);
732 			}
733 		}
734 	}
735 
736 	return (0);
737 }
738 
739 int
amdzen_c_df_fabric_decomp(df_fabric_decomp_t * decomp)740 amdzen_c_df_fabric_decomp(df_fabric_decomp_t *decomp)
741 {
742 	const amdzen_df_t *df;
743 	amdzen_t *azn = amdzen_data;
744 
745 	mutex_enter(&azn->azn_mutex);
746 	df = amdzen_df_find(azn, 0);
747 	if (df == NULL) {
748 		mutex_exit(&azn->azn_mutex);
749 		return (ENOENT);
750 	}
751 
752 	*decomp = df->adf_decomp;
753 	mutex_exit(&azn->azn_mutex);
754 	return (0);
755 }
756 
757 static boolean_t
amdzen_create_child(amdzen_t * azn,const amdzen_child_data_t * acd)758 amdzen_create_child(amdzen_t *azn, const amdzen_child_data_t *acd)
759 {
760 	int ret;
761 	dev_info_t *child;
762 
763 	if (ndi_devi_alloc(azn->azn_dip, acd->acd_name,
764 	    (pnode_t)DEVI_SID_NODEID, &child) != NDI_SUCCESS) {
765 		dev_err(azn->azn_dip, CE_WARN, "!failed to allocate child "
766 		    "dip for %s", acd->acd_name);
767 		return (B_FALSE);
768 	}
769 
770 	ddi_set_parent_data(child, (void *)acd);
771 	if ((ret = ndi_devi_online(child, 0)) != NDI_SUCCESS) {
772 		if (acd->acd_warn) {
773 			dev_err(azn->azn_dip, CE_WARN, "!failed to online "
774 			    "child dip %s: %d", acd->acd_name, ret);
775 		}
776 		return (B_FALSE);
777 	}
778 
779 	return (B_TRUE);
780 }
781 
782 static boolean_t
amdzen_map_dfs(amdzen_t * azn)783 amdzen_map_dfs(amdzen_t *azn)
784 {
785 	amdzen_stub_t *stub;
786 
787 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
788 
789 	for (stub = list_head(&azn->azn_df_stubs); stub != NULL;
790 	    stub = list_next(&azn->azn_df_stubs, stub)) {
791 		amdzen_df_t *df;
792 		uint_t dfno;
793 
794 		dfno = stub->azns_dev - AMDZEN_DF_FIRST_DEVICE;
795 		if (dfno > AMDZEN_MAX_DFS) {
796 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
797 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
798 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
799 			goto err;
800 		}
801 
802 		df = &azn->azn_dfs[dfno];
803 
804 		if (stub->azns_func >= AMDZEN_MAX_DF_FUNCS) {
805 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
806 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
807 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
808 			goto err;
809 		}
810 
811 		if (df->adf_funcs[stub->azns_func] != NULL) {
812 			dev_err(stub->azns_dip, CE_WARN, "encountered "
813 			    "duplicate df device with DF PCI b/d/f: 0x%x/%x/%x",
814 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
815 			goto err;
816 		}
817 		df->adf_funcs[stub->azns_func] = stub;
818 	}
819 
820 	return (B_TRUE);
821 
822 err:
823 	azn->azn_flags |= AMDZEN_F_DEVICE_ERROR;
824 	return (B_FALSE);
825 }
826 
827 static boolean_t
amdzen_check_dfs(amdzen_t * azn)828 amdzen_check_dfs(amdzen_t *azn)
829 {
830 	uint_t i;
831 	boolean_t ret = B_TRUE;
832 
833 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
834 		amdzen_df_t *df = &azn->azn_dfs[i];
835 		uint_t count = 0;
836 
837 		/*
838 		 * We require all platforms to have DFs functions 0-6. Not all
839 		 * platforms have DF function 7.
840 		 */
841 		for (uint_t func = 0; func < AMDZEN_MAX_DF_FUNCS - 1; func++) {
842 			if (df->adf_funcs[func] != NULL) {
843 				count++;
844 			}
845 		}
846 
847 		if (count == 0)
848 			continue;
849 
850 		if (count != 7) {
851 			ret = B_FALSE;
852 			dev_err(azn->azn_dip, CE_WARN, "df %u devices "
853 			    "incomplete", i);
854 		} else {
855 			df->adf_flags |= AMDZEN_DF_F_VALID;
856 			azn->azn_ndfs++;
857 		}
858 	}
859 
860 	return (ret);
861 }
862 
863 static const uint8_t amdzen_df_rome_ids[0x2b] = {
864 	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23,
865 	24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
866 	44, 45, 46, 47, 48
867 };
868 
869 /*
870  * Check the first df entry to see if it belongs to Rome or Milan. If so, then
871  * it uses the disjoint ID space.
872  */
873 static boolean_t
amdzen_is_rome_style(uint_t id)874 amdzen_is_rome_style(uint_t id)
875 {
876 	return (id == 0x1490 || id == 0x1650);
877 }
878 
879 /*
880  * Deal with the differences between between how a CCM subtype is indicated
881  * across CPU generations.
882  */
883 static boolean_t
amdzen_dfe_is_ccm(const amdzen_df_t * df,const amdzen_df_ent_t * ent)884 amdzen_dfe_is_ccm(const amdzen_df_t *df, const amdzen_df_ent_t *ent)
885 {
886 	if (ent->adfe_type != DF_TYPE_CCM) {
887 		return (B_FALSE);
888 	}
889 
890 	if (df->adf_rev >= DF_REV_4 && amdzen_df_at_least(df, 4, 1)) {
891 		return (ent->adfe_subtype == DF_CCM_SUBTYPE_CPU_V4P1);
892 	} else {
893 		return (ent->adfe_subtype == DF_CCM_SUBTYPE_CPU_V2);
894 	}
895 }
896 
897 /*
898  * To be able to do most other things we want to do, we must first determine
899  * what revision of the DF (data fabric) that we're using.
900  *
901  * Snapshot the df version. This was added explicitly in DFv4.0, around the Zen
902  * 4 timeframe and allows us to tell apart different version of the DF register
903  * set, most usefully when various subtypes were added.
904  *
905  * Older versions can theoretically be told apart based on usage of reserved
906  * registers. We walk these in the following order, starting with the newest rev
907  * and walking backwards to tell things apart:
908  *
909  *   o v3.5 -> Check function 1, register 0x150. This was reserved prior
910  *             to this point. This is actually DF_FIDMASK0_V3P5. We are supposed
911  *             to check bits [7:0].
912  *
913  *   o v3.0 -> Check function 1, register 0x208. The low byte (7:0) was
914  *             changed to indicate a component mask. This is non-zero
915  *             in the 3.0 generation. This is actually DF_FIDMASK_V2.
916  *
917  *   o v2.0 -> This is just the not that case. Presumably v1 wasn't part
918  *             of the Zen generation.
919  *
920  * Because we don't know what version we are yet, we do not use the normal
921  * versioned register accesses which would check what DF version we are and
922  * would want to use the normal indirect register accesses (which also require
923  * us to know the version). We instead do direct broadcast reads.
924  */
925 static void
amdzen_determine_df_vers(amdzen_t * azn,amdzen_df_t * df)926 amdzen_determine_df_vers(amdzen_t *azn, amdzen_df_t *df)
927 {
928 	uint32_t val;
929 	df_reg_def_t rd = DF_FBICNT;
930 
931 	val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
932 	df->adf_major = DF_FBICNT_V4_GET_MAJOR(val);
933 	df->adf_minor = DF_FBICNT_V4_GET_MINOR(val);
934 	if (df->adf_major == 0 && df->adf_minor == 0) {
935 		rd = DF_FIDMASK0_V3P5;
936 		val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
937 		if (bitx32(val, 7, 0) != 0) {
938 			df->adf_major = 3;
939 			df->adf_minor = 5;
940 			df->adf_rev = DF_REV_3P5;
941 		} else {
942 			rd = DF_FIDMASK_V2;
943 			val = amdzen_stub_get32(df->adf_funcs[rd.drd_func],
944 			    rd.drd_reg);
945 			if (bitx32(val, 7, 0) != 0) {
946 				df->adf_major = 3;
947 				df->adf_minor = 0;
948 				df->adf_rev = DF_REV_3;
949 			} else {
950 				df->adf_major = 2;
951 				df->adf_minor = 0;
952 				df->adf_rev = DF_REV_2;
953 			}
954 		}
955 	} else if (df->adf_major == 4 && df->adf_minor >= 2) {
956 		/*
957 		 * These are devices that have the newer memory layout that
958 		 * moves the DF::DramBaseAddress to 0x200. Please see the df.h
959 		 * theory statement for more information.
960 		 */
961 		df->adf_rev = DF_REV_4D2;
962 	} else if (df->adf_major == 4) {
963 		df->adf_rev = DF_REV_4;
964 	} else {
965 		df->adf_rev = DF_REV_UNKNOWN;
966 	}
967 }
968 
969 /*
970  * All of the different versions of the DF have different ways of getting at and
971  * answering the question of how do I break a fabric ID into a corresponding
972  * socket, die, and component. Importantly the goal here is to obtain, cache,
973  * and normalize:
974  *
975  *  o The DF System Configuration
976  *  o The various Mask registers
977  *  o The Node ID
978  */
979 static void
amdzen_determine_fabric_decomp(amdzen_t * azn,amdzen_df_t * df)980 amdzen_determine_fabric_decomp(amdzen_t *azn, amdzen_df_t *df)
981 {
982 	uint32_t mask;
983 	df_fabric_decomp_t *decomp = &df->adf_decomp;
984 
985 	switch (df->adf_rev) {
986 	case DF_REV_2:
987 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V2);
988 		switch (DF_SYSCFG_V2_GET_MY_TYPE(df->adf_syscfg)) {
989 		case DF_DIE_TYPE_CPU:
990 			mask = amdzen_df_read32_bcast(azn, df,
991 			    DF_DIEMASK_CPU_V2);
992 			break;
993 		case DF_DIE_TYPE_APU:
994 			mask = amdzen_df_read32_bcast(azn, df,
995 			    DF_DIEMASK_APU_V2);
996 			break;
997 		default:
998 			panic("DF thinks we're not on a CPU!");
999 		}
1000 		df->adf_mask0 = mask;
1001 
1002 		/*
1003 		 * DFv2 is a bit different in how the fabric mask register is
1004 		 * phrased. Logically a fabric ID is broken into something that
1005 		 * uniquely identifies a "node" (a particular die on a socket)
1006 		 * and something that identifies a "component", e.g. a memory
1007 		 * controller.
1008 		 *
1009 		 * Starting with DFv3, these registers logically called out how
1010 		 * to separate the fabric ID first into a node and a component.
1011 		 * Then the node was then broken down into a socket and die. In
1012 		 * DFv2, there is no separate mask and shift of a node. Instead
1013 		 * the socket and die are absolute offsets into the fabric ID
1014 		 * rather than relative offsets into the node ID. As such, when
1015 		 * we encounter DFv2, we fake up a node mask and shift and make
1016 		 * it look like DFv3+.
1017 		 */
1018 		decomp->dfd_node_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) |
1019 		    DF_DIEMASK_V2_GET_DIE_MASK(mask);
1020 		decomp->dfd_node_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask);
1021 		decomp->dfd_comp_mask = DF_DIEMASK_V2_GET_COMP_MASK(mask);
1022 		decomp->dfd_comp_shift = 0;
1023 
1024 		decomp->dfd_sock_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) >>
1025 		    decomp->dfd_node_shift;
1026 		decomp->dfd_die_mask = DF_DIEMASK_V2_GET_DIE_MASK(mask) >>
1027 		    decomp->dfd_node_shift;
1028 		decomp->dfd_sock_shift = DF_DIEMASK_V2_GET_SOCK_SHIFT(mask) -
1029 		    decomp->dfd_node_shift;
1030 		decomp->dfd_die_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask) -
1031 		    decomp->dfd_node_shift;
1032 		ASSERT3U(decomp->dfd_die_shift, ==, 0);
1033 
1034 		/*
1035 		 * There is no register in the actual data fabric with the node
1036 		 * ID in DFv2 that we have found. Instead we take the first
1037 		 * entity's fabric ID and transform it into the node id.
1038 		 */
1039 		df->adf_nodeid = (df->adf_ents[0].adfe_fabric_id &
1040 		    decomp->dfd_node_mask) >> decomp->dfd_node_shift;
1041 		break;
1042 	case DF_REV_3:
1043 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V3);
1044 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1045 		    DF_FIDMASK0_V3);
1046 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1047 		    DF_FIDMASK1_V3);
1048 
1049 		decomp->dfd_sock_mask =
1050 		    DF_FIDMASK1_V3_GET_SOCK_MASK(df->adf_mask1);
1051 		decomp->dfd_sock_shift =
1052 		    DF_FIDMASK1_V3_GET_SOCK_SHIFT(df->adf_mask1);
1053 		decomp->dfd_die_mask =
1054 		    DF_FIDMASK1_V3_GET_DIE_MASK(df->adf_mask1);
1055 		decomp->dfd_die_shift = 0;
1056 		decomp->dfd_node_mask =
1057 		    DF_FIDMASK0_V3_GET_NODE_MASK(df->adf_mask0);
1058 		decomp->dfd_node_shift =
1059 		    DF_FIDMASK1_V3_GET_NODE_SHIFT(df->adf_mask1);
1060 		decomp->dfd_comp_mask =
1061 		    DF_FIDMASK0_V3_GET_COMP_MASK(df->adf_mask0);
1062 		decomp->dfd_comp_shift = 0;
1063 
1064 		df->adf_nodeid = DF_SYSCFG_V3_GET_NODE_ID(df->adf_syscfg);
1065 		break;
1066 	case DF_REV_3P5:
1067 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df,
1068 		    DF_SYSCFG_V3P5);
1069 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1070 		    DF_FIDMASK0_V3P5);
1071 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1072 		    DF_FIDMASK1_V3P5);
1073 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
1074 		    DF_FIDMASK2_V3P5);
1075 
1076 		decomp->dfd_sock_mask =
1077 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
1078 		decomp->dfd_sock_shift =
1079 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
1080 		decomp->dfd_die_mask =
1081 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
1082 		decomp->dfd_die_shift = 0;
1083 		decomp->dfd_node_mask =
1084 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
1085 		decomp->dfd_node_shift =
1086 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
1087 		decomp->dfd_comp_mask =
1088 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
1089 		decomp->dfd_comp_shift = 0;
1090 
1091 		df->adf_nodeid = DF_SYSCFG_V3P5_GET_NODE_ID(df->adf_syscfg);
1092 		break;
1093 	case DF_REV_4:
1094 	case DF_REV_4D2:
1095 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V4);
1096 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1097 		    DF_FIDMASK0_V4);
1098 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1099 		    DF_FIDMASK1_V4);
1100 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
1101 		    DF_FIDMASK2_V4);
1102 
1103 		/*
1104 		 * The DFv4 registers are at a different location in the DF;
1105 		 * however, the actual layout of fields is the same as DFv3.5.
1106 		 * This is why you see V3P5 below.
1107 		 */
1108 		decomp->dfd_sock_mask =
1109 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
1110 		decomp->dfd_sock_shift =
1111 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
1112 		decomp->dfd_die_mask =
1113 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
1114 		decomp->dfd_die_shift = 0;
1115 		decomp->dfd_node_mask =
1116 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
1117 		decomp->dfd_node_shift =
1118 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
1119 		decomp->dfd_comp_mask =
1120 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
1121 		decomp->dfd_comp_shift = 0;
1122 
1123 		df->adf_nodeid = DF_SYSCFG_V4_GET_NODE_ID(df->adf_syscfg);
1124 		break;
1125 	default:
1126 		panic("encountered suspicious, previously rejected DF "
1127 		    "rev: 0x%x", df->adf_rev);
1128 	}
1129 }
1130 
1131 /*
1132  * The purpose of this function is to map CCMs to the corresponding CCDs that
1133  * exist. This is not an obvious thing as there is no direct mapping in the data
1134  * fabric between these IDs.
1135  *
1136  * Prior to DFv4, a given CCM was only ever connected to at most one CCD.
1137  * Starting in DFv4 a given CCM may have one or two SDP (scalable data ports)
1138  * that connect to CCDs. These may be connected to the same CCD or a different
1139  * one. When both ports are enabled we must check whether or not the port is
1140  * considered to be in wide mode. When wide mode is enabled then the two ports
1141  * are connected to a single CCD. If wide mode is disabled then the two ports
1142  * are connected to separate CCDs.
1143  *
1144  * The physical number of a CCD, which is how we determine the SMN aperture to
1145  * use, is based on the CCM ID. In most sockets we have seen up to a maximum of
1146  * 8 CCMs. When a CCM is connected to more than one CCD we have determined based
1147  * on some hints from AMD's ACPI information that the numbering is assumed to be
1148  * that CCM's number plus the total number of CCMs.
1149  *
1150  * More concretely, the SP5 Genoa/Bergamo Zen 4 platform has 8 CCMs. When there
1151  * are more than 8 CCDs installed then CCM 0 maps to CCDs 0 and 8. CCM 1 to CCDs
1152  * 1 and 9, etc. CCMs 4-7 map 1:1 to CCDs 4-7. However, the placement of CCDs
1153  * within the package has changed across generations.
1154  *
1155  * Notably in Rome and Milan (Zen 2/3) it appears that each quadrant had an
1156  * increasing number of CCDs. So CCDs 0/1 were together, 2/3, 4/5, and 6/7. This
1157  * meant that in cases where only a subset of CCDs were populated it'd forcibly
1158  * disable the higher CCD in a group (but with DFv3 the CCM would still be
1159  * enabled). So a 4 CCD config would generally enable CCDs 0, 2, 4, and 6 say.
1160  * This was almost certainly done to balance the NUMA config.
1161  *
1162  * Instead, starting in Genoa (Zen 4) the CCMs are round-robined around the
1163  * quadrants so CCMs (CCDs) 0 (0/8) and 4 (4) are together, 1 (1/9) and 5 (5),
1164  * etc. This is also why we more often see disabled CCMs in Genoa, but not in
1165  * Rome/Milan.
1166  *
1167  * When we're operating in wide mode and therefore both SDPs are connected to a
1168  * single CCD, we've always found that the lower CCD index will be used by the
1169  * system and the higher one is not considered present. Therefore, when
1170  * operating in wide mode, we need to make sure that whenever we have a non-zero
1171  * value for SDPs being connected that we rewrite this to only appear as a
1172  * single CCD is present. It's conceivable (though hard to imagine) that we
1173  * could get a value of 0b10 indicating that only the upper SDP link is active
1174  * for some reason.
1175  */
1176 static void
amdzen_setup_df_ccm(amdzen_t * azn,amdzen_df_t * df,amdzen_df_ent_t * dfe,uint32_t ccmno)1177 amdzen_setup_df_ccm(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *dfe,
1178     uint32_t ccmno)
1179 {
1180 	amdzen_ccm_data_t *ccm = &dfe->adfe_data.aded_ccm;
1181 	uint32_t ccd_en;
1182 	boolean_t wide_en;
1183 
1184 	if (df->adf_rev >= DF_REV_4) {
1185 		uint32_t val = amdzen_df_read32(azn, df, dfe->adfe_inst_id,
1186 		    DF_CCD_EN_V4);
1187 		ccd_en = DF_CCD_EN_V4_GET_CCD_EN(val);
1188 
1189 		if (df->adf_rev == DF_REV_4D2) {
1190 			wide_en = DF_CCD_EN_V4D2_GET_WIDE_EN(val);
1191 		} else {
1192 			val = amdzen_df_read32(azn, df, dfe->adfe_inst_id,
1193 			    DF_CCMCFG4_V4);
1194 			wide_en = DF_CCMCFG4_V4_GET_WIDE_EN(val);
1195 		}
1196 
1197 		if (wide_en != 0 && ccd_en != 0) {
1198 			ccd_en = 0x1;
1199 		}
1200 	} else {
1201 		ccd_en = 0x1;
1202 	}
1203 
1204 	for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) {
1205 		ccm->acd_ccd_en[i] = (ccd_en & (1 << i)) != 0;
1206 		if (ccm->acd_ccd_en[i] == 0)
1207 			continue;
1208 		ccm->acd_ccd_id[i] = ccmno + i * df->adf_nccm;
1209 		ccm->acd_nccds++;
1210 	}
1211 }
1212 
1213 /*
1214  * Initialize our knowledge about a given series of nodes on the data fabric.
1215  */
1216 static void
amdzen_setup_df(amdzen_t * azn,amdzen_df_t * df)1217 amdzen_setup_df(amdzen_t *azn, amdzen_df_t *df)
1218 {
1219 	uint_t i;
1220 	uint32_t val, ccmno;
1221 
1222 	amdzen_determine_df_vers(azn, df);
1223 
1224 	switch (df->adf_rev) {
1225 	case DF_REV_2:
1226 	case DF_REV_3:
1227 	case DF_REV_3P5:
1228 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V2);
1229 		break;
1230 	case DF_REV_4:
1231 	case DF_REV_4D2:
1232 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V4);
1233 		break;
1234 	default:
1235 		dev_err(azn->azn_dip, CE_WARN, "encountered unsupported DF "
1236 		    "revision: 0x%x", df->adf_rev);
1237 		return;
1238 	}
1239 	df->adf_nb_busno = DF_CFG_ADDR_CTL_GET_BUS_NUM(val);
1240 	val = amdzen_df_read32_bcast(azn, df, DF_FBICNT);
1241 	df->adf_nents = DF_FBICNT_GET_COUNT(val);
1242 	if (df->adf_nents == 0)
1243 		return;
1244 	df->adf_ents = kmem_zalloc(sizeof (amdzen_df_ent_t) * df->adf_nents,
1245 	    KM_SLEEP);
1246 
1247 	for (i = 0; i < df->adf_nents; i++) {
1248 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
1249 		uint8_t inst = i;
1250 
1251 		/*
1252 		 * Unfortunately, Rome uses a discontinuous instance ID pattern
1253 		 * while everything else we can find uses a contiguous instance
1254 		 * ID pattern. This means that for Rome, we need to adjust the
1255 		 * indexes that we iterate over, though the total number of
1256 		 * entries is right. This was carried over into Milan, but not
1257 		 * Genoa.
1258 		 */
1259 		if (amdzen_is_rome_style(df->adf_funcs[0]->azns_did)) {
1260 			if (inst >= ARRAY_SIZE(amdzen_df_rome_ids)) {
1261 				dev_err(azn->azn_dip, CE_WARN, "Rome family "
1262 				    "processor reported more ids than the PPR, "
1263 				    "resetting %u to instance zero", inst);
1264 				inst = 0;
1265 			} else {
1266 				inst = amdzen_df_rome_ids[inst];
1267 			}
1268 		}
1269 
1270 		dfe->adfe_drvid = inst;
1271 		dfe->adfe_info0 = amdzen_df_read32(azn, df, inst, DF_FBIINFO0);
1272 		if (df->adf_rev <= DF_REV_4) {
1273 			dfe->adfe_info1 = amdzen_df_read32(azn, df, inst,
1274 			    DF_FBIINFO1);
1275 			dfe->adfe_info2 = amdzen_df_read32(azn, df, inst,
1276 			    DF_FBIINFO2);
1277 		}
1278 		dfe->adfe_info3 = amdzen_df_read32(azn, df, inst, DF_FBIINFO3);
1279 
1280 		dfe->adfe_type = DF_FBIINFO0_GET_TYPE(dfe->adfe_info0);
1281 		dfe->adfe_subtype = DF_FBIINFO0_GET_SUBTYPE(dfe->adfe_info0);
1282 
1283 		/*
1284 		 * The enabled flag was not present in Zen 1. Simulate it by
1285 		 * checking for a non-zero register instead.
1286 		 */
1287 		if (DF_FBIINFO0_V3_GET_ENABLED(dfe->adfe_info0) ||
1288 		    (df->adf_rev == DF_REV_2 && dfe->adfe_info0 != 0)) {
1289 			dfe->adfe_flags |= AMDZEN_DFE_F_ENABLED;
1290 		}
1291 		if (DF_FBIINFO0_GET_HAS_MCA(dfe->adfe_info0)) {
1292 			dfe->adfe_flags |= AMDZEN_DFE_F_MCA;
1293 		}
1294 
1295 		/*
1296 		 * Starting with DFv4 there is no instance ID in the fabric info
1297 		 * 3 register, so we instead grab it out of the driver ID which
1298 		 * is what it should be anyways.
1299 		 */
1300 		if (df->adf_rev >= DF_REV_4) {
1301 			dfe->adfe_inst_id = dfe->adfe_drvid;
1302 		} else {
1303 			dfe->adfe_inst_id =
1304 			    DF_FBIINFO3_GET_INSTID(dfe->adfe_info3);
1305 		}
1306 
1307 		switch (df->adf_rev) {
1308 		case DF_REV_2:
1309 			dfe->adfe_fabric_id =
1310 			    DF_FBIINFO3_V2_GET_BLOCKID(dfe->adfe_info3);
1311 			break;
1312 		case DF_REV_3:
1313 			dfe->adfe_fabric_id =
1314 			    DF_FBIINFO3_V3_GET_BLOCKID(dfe->adfe_info3);
1315 			break;
1316 		case DF_REV_3P5:
1317 			dfe->adfe_fabric_id =
1318 			    DF_FBIINFO3_V3P5_GET_BLOCKID(dfe->adfe_info3);
1319 			break;
1320 		case DF_REV_4:
1321 		case DF_REV_4D2:
1322 			dfe->adfe_fabric_id =
1323 			    DF_FBIINFO3_V4_GET_BLOCKID(dfe->adfe_info3);
1324 			break;
1325 		default:
1326 			panic("encountered suspicious, previously rejected DF "
1327 			    "rev: 0x%x", df->adf_rev);
1328 		}
1329 
1330 		/*
1331 		 * Record information about a subset of DF entities that we've
1332 		 * found. Currently we're tracking this only for CCMs.
1333 		 */
1334 		if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1335 			continue;
1336 
1337 		if (amdzen_dfe_is_ccm(df, dfe)) {
1338 			df->adf_nccm++;
1339 		}
1340 	}
1341 
1342 	/*
1343 	 * Now that we have filled in all of our info, attempt to fill in
1344 	 * specific information about different types of instances.
1345 	 */
1346 	ccmno = 0;
1347 	for (uint_t i = 0; i < df->adf_nents; i++) {
1348 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
1349 
1350 		if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1351 			continue;
1352 
1353 		/*
1354 		 * Perform type and sub-type specific initialization. Currently
1355 		 * limited to CCMs.
1356 		 */
1357 		switch (dfe->adfe_type) {
1358 		case DF_TYPE_CCM:
1359 			amdzen_setup_df_ccm(azn, df, dfe, ccmno);
1360 			ccmno++;
1361 			break;
1362 		default:
1363 			break;
1364 		}
1365 	}
1366 
1367 	amdzen_determine_fabric_decomp(azn, df);
1368 }
1369 
1370 static void
amdzen_find_nb(amdzen_t * azn,amdzen_df_t * df)1371 amdzen_find_nb(amdzen_t *azn, amdzen_df_t *df)
1372 {
1373 	amdzen_stub_t *stub;
1374 
1375 	for (stub = list_head(&azn->azn_nb_stubs); stub != NULL;
1376 	    stub = list_next(&azn->azn_nb_stubs, stub)) {
1377 		if (stub->azns_bus == df->adf_nb_busno) {
1378 			df->adf_flags |= AMDZEN_DF_F_FOUND_NB;
1379 			df->adf_nb = stub;
1380 			return;
1381 		}
1382 	}
1383 }
1384 
1385 /*
1386  * We need to be careful using this function as different AMD generations have
1387  * acted in different ways when there is a missing CCD. We've found that in
1388  * hardware where the CCM is enabled but there is no CCD attached, it generally
1389  * is safe (i.e. DFv3 on Rome), but on DFv4 if we ask for a CCD that would
1390  * correspond to a disabled CCM then the firmware may inject a fatal error
1391  * (which is hopefully something missing in our RAS/MCA-X enablement).
1392  *
1393  * Put differently if this doesn't correspond to an Enabled CCM and you know the
1394  * number of valid CCDs on this, don't use it.
1395  */
1396 static boolean_t
amdzen_ccd_present(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1397 amdzen_ccd_present(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1398 {
1399 	smn_reg_t die_reg = SMUPWR_CCD_DIE_ID(ccdno);
1400 	uint32_t val = amdzen_smn_read(azn, df, die_reg);
1401 	if (val == SMN_EINVAL32) {
1402 		return (B_FALSE);
1403 	}
1404 
1405 	ASSERT3U(ccdno, ==, SMUPWR_CCD_DIE_ID_GET(val));
1406 	return (B_TRUE);
1407 }
1408 
1409 static uint32_t
amdzen_ccd_thread_en(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1410 amdzen_ccd_thread_en(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1411 {
1412 	smn_reg_t reg;
1413 
1414 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1415 		reg = L3SOC_THREAD_EN(ccdno);
1416 	} else {
1417 		reg = SMUPWR_THREAD_EN(ccdno);
1418 	}
1419 
1420 	return (amdzen_smn_read(azn, df, reg));
1421 }
1422 
1423 static uint32_t
amdzen_ccd_core_en(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1424 amdzen_ccd_core_en(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1425 {
1426 	smn_reg_t reg;
1427 
1428 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1429 		reg = L3SOC_CORE_EN(ccdno);
1430 	} else {
1431 		reg = SMUPWR_CORE_EN(ccdno);
1432 	}
1433 
1434 	return (amdzen_smn_read(azn, df, reg));
1435 }
1436 
1437 static void
amdzen_ccd_info(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno,uint32_t * nccxp,uint32_t * nlcorep,uint32_t * nthrp)1438 amdzen_ccd_info(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno, uint32_t *nccxp,
1439     uint32_t *nlcorep, uint32_t *nthrp)
1440 {
1441 	uint32_t nccx, nlcore, smt;
1442 
1443 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1444 		smn_reg_t reg = L3SOC_THREAD_CFG(ccdno);
1445 		uint32_t val = amdzen_smn_read(azn, df, reg);
1446 		nccx = L3SOC_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1;
1447 		nlcore = L3SOC_THREAD_CFG_GET_CORE_COUNT(val) + 1;
1448 		smt = L3SOC_THREAD_CFG_GET_SMT_MODE(val);
1449 	} else {
1450 		smn_reg_t reg = SMUPWR_THREAD_CFG(ccdno);
1451 		uint32_t val = amdzen_smn_read(azn, df, reg);
1452 		nccx = SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1;
1453 		nlcore = SMUPWR_THREAD_CFG_GET_CORE_COUNT(val) + 1;
1454 		smt = SMUPWR_THREAD_CFG_GET_SMT_MODE(val);
1455 	}
1456 
1457 	if (nccxp != NULL) {
1458 		*nccxp = nccx;
1459 	}
1460 
1461 	if (nlcorep != NULL) {
1462 		*nlcorep = nlcore;
1463 	}
1464 
1465 	if (nthrp != NULL) {
1466 		/* The L3::L3SOC and SMU::PWR values are the same here */
1467 		if (smt == SMUPWR_THREAD_CFG_SMT_MODE_SMT) {
1468 			*nthrp = 2;
1469 		} else {
1470 			*nthrp = 1;
1471 		}
1472 	}
1473 }
1474 
1475 static void
amdzen_initpkg_to_apic(amdzen_t * azn,const uint32_t pkg0,const uint32_t pkg7)1476 amdzen_initpkg_to_apic(amdzen_t *azn, const uint32_t pkg0, const uint32_t pkg7)
1477 {
1478 	uint32_t nsock, nccd, nccx, ncore, nthr, extccx;
1479 	uint32_t nsock_bits, nccd_bits, nccx_bits, ncore_bits, nthr_bits;
1480 	amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp;
1481 
1482 	/*
1483 	 * These are all 0 based values, meaning that we need to add one to each
1484 	 * of them. However, we skip this because to calculate the number of
1485 	 * bits to cover an entity we would subtract one.
1486 	 */
1487 	nthr = SCFCTP_PMREG_INITPKG0_GET_SMTEN(pkg0);
1488 	ncore = SCFCTP_PMREG_INITPKG7_GET_N_CORES(pkg7);
1489 	nccx = SCFCTP_PMREG_INITPKG7_GET_N_CCXS(pkg7);
1490 	nccd = SCFCTP_PMREG_INITPKG7_GET_N_DIES(pkg7);
1491 	nsock = SCFCTP_PMREG_INITPKG7_GET_N_SOCKETS(pkg7);
1492 
1493 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN4) {
1494 		extccx = SCFCTP_PMREG_INITPKG7_ZEN4_GET_16TAPIC(pkg7);
1495 	} else {
1496 		extccx = 0;
1497 	}
1498 
1499 	nthr_bits = highbit(nthr);
1500 	ncore_bits = highbit(ncore);
1501 	nccx_bits = highbit(nccx);
1502 	nccd_bits = highbit(nccd);
1503 	nsock_bits = highbit(nsock);
1504 
1505 	apic->aad_thread_shift = 0;
1506 	apic->aad_thread_mask = (1 << nthr_bits) - 1;
1507 
1508 	apic->aad_core_shift = nthr_bits;
1509 	if (ncore_bits > 0) {
1510 		apic->aad_core_mask = (1 << ncore_bits) - 1;
1511 		apic->aad_core_mask <<= apic->aad_core_shift;
1512 	} else {
1513 		apic->aad_core_mask = 0;
1514 	}
1515 
1516 	/*
1517 	 * The APIC_16T_MODE bit indicates that the total shift to start the CCX
1518 	 * should be at 4 bits if it's not. It doesn't mean that the CCX portion
1519 	 * of the value should take up four bits. In the common Genoa case,
1520 	 * nccx_bits will be zero.
1521 	 */
1522 	apic->aad_ccx_shift = apic->aad_core_shift + ncore_bits;
1523 	if (extccx != 0 && apic->aad_ccx_shift < 4) {
1524 		apic->aad_ccx_shift = 4;
1525 	}
1526 	if (nccx_bits > 0) {
1527 		apic->aad_ccx_mask = (1 << nccx_bits) - 1;
1528 		apic->aad_ccx_mask <<= apic->aad_ccx_shift;
1529 	} else {
1530 		apic->aad_ccx_mask = 0;
1531 	}
1532 
1533 	apic->aad_ccd_shift = apic->aad_ccx_shift + nccx_bits;
1534 	if (nccd_bits > 0) {
1535 		apic->aad_ccd_mask = (1 << nccd_bits) - 1;
1536 		apic->aad_ccd_mask <<= apic->aad_ccd_shift;
1537 	} else {
1538 		apic->aad_ccd_mask = 0;
1539 	}
1540 
1541 	apic->aad_sock_shift = apic->aad_ccd_shift + nccd_bits;
1542 	if (nsock_bits > 0) {
1543 		apic->aad_sock_mask = (1 << nsock_bits) - 1;
1544 		apic->aad_sock_mask <<= apic->aad_sock_shift;
1545 	} else {
1546 		apic->aad_sock_mask = 0;
1547 	}
1548 
1549 	/*
1550 	 * Currently all supported Zen 2+ platforms only have a single die per
1551 	 * socket as compared to Zen 1. So this is always kept at zero.
1552 	 */
1553 	apic->aad_die_mask = 0;
1554 	apic->aad_die_shift = 0;
1555 }
1556 
1557 /*
1558  * We would like to determine what the logical APIC decomposition is on Zen 3
1559  * and newer family parts. While there is information added to CPUID in the form
1560  * of leaf 8X26, that isn't present in Zen 3, so instead we go to what we
1561  * believe is the underlying source of the CPUID data.
1562  *
1563  * Fundamentally there are a series of registers in SMN space that relate to the
1564  * SCFCTP. Coincidentally, there is one of these for each core and there are a
1565  * pair of related SMN registers. L3::SCFCTP::PMREG_INITPKG0 contains
1566  * information about a given's core logical and physical IDs. More interestingly
1567  * for this particular case, L3::SCFCTP::PMREG_INITPKG7, contains the overall
1568  * total number of logical entities. We've been promised that this has to be
1569  * the same across the fabric. That's all well and good, but this begs the
1570  * question of how do we actually get there. The above is a core-specific
1571  * register and requires that we understand information about which CCDs and
1572  * CCXs are actually present.
1573  *
1574  * So we are starting with a data fabric that has some CCM present. The CCM
1575  * entries in the data fabric may be tagged with our ENABLED flag.
1576  * Unfortunately, that can be true regardless of whether or not it's actually
1577  * present or not. As a result, we go to another chunk of SMN space registers,
1578  * SMU::PWR. These contain information about the CCDs, the physical cores that
1579  * are enabled, and related. So we will first walk the DF entities and see if we
1580  * can read its SMN::PWR::CCD_DIE_ID. If we get back a value of all 1s then
1581  * there is nothing present. Otherwise, we should get back something that
1582  * matches information in the data fabric.
1583  *
1584  * With that in hand, we can read the SMU::PWR::CORE_ENABLE register to
1585  * determine which physical cores are enabled in the CCD/CCX. That will finally
1586  * give us an index to get to our friend INITPKG7.
1587  */
1588 static boolean_t
amdzen_determine_apic_decomp_initpkg(amdzen_t * azn)1589 amdzen_determine_apic_decomp_initpkg(amdzen_t *azn)
1590 {
1591 	amdzen_df_t *df = &azn->azn_dfs[0];
1592 	uint32_t ccdno = 0;
1593 
1594 	for (uint_t i = 0; i < df->adf_nents; i++) {
1595 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
1596 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1597 			continue;
1598 
1599 		if (amdzen_dfe_is_ccm(df, ent)) {
1600 			uint32_t val, nccx, pkg7, pkg0;
1601 			smn_reg_t pkg7_reg, pkg0_reg;
1602 			int core_bit;
1603 			uint8_t pccxno, pcoreno;
1604 
1605 			if (!amdzen_ccd_present(azn, df, ccdno)) {
1606 				ccdno++;
1607 				continue;
1608 			}
1609 
1610 			/*
1611 			 * This die actually exists. Switch over to the core
1612 			 * enable register to find one to ask about physically.
1613 			 */
1614 			amdzen_ccd_info(azn, df, ccdno, &nccx, NULL, NULL);
1615 			val = amdzen_ccd_core_en(azn, df, ccdno);
1616 			if (val == 0) {
1617 				ccdno++;
1618 				continue;
1619 			}
1620 
1621 			/*
1622 			 * There exists an enabled physical core. Find the first
1623 			 * index of it and map it to the corresponding CCD and
1624 			 * CCX. ddi_ffs is the bit index, but we want the
1625 			 * physical core number, hence the -1.
1626 			 */
1627 			core_bit = ddi_ffs(val);
1628 			ASSERT3S(core_bit, !=, 0);
1629 			pcoreno = core_bit - 1;
1630 
1631 			/*
1632 			 * Unfortunately SMU::PWR::THREAD_CONFIGURATION gives us
1633 			 * the Number of logical cores that are present in the
1634 			 * complex, not the total number of physical cores.
1635 			 * Right now we do assume that the physical and logical
1636 			 * ccx numbering is equivalent (we have no other way of
1637 			 * knowing if it is or isn't right now) and that we'd
1638 			 * always have CCX0 before CCX1. AMD seems to suggest we
1639 			 * can assume this, though it is a worrisome assumption.
1640 			 */
1641 			pccxno = pcoreno / azn->azn_ncore_per_ccx;
1642 			ASSERT3U(pccxno, <, nccx);
1643 			pkg7_reg = SCFCTP_PMREG_INITPKG7(ccdno, pccxno,
1644 			    pcoreno);
1645 			pkg7 = amdzen_smn_read(azn, df, pkg7_reg);
1646 			pkg0_reg = SCFCTP_PMREG_INITPKG0(ccdno, pccxno,
1647 			    pcoreno);
1648 			pkg0 = amdzen_smn_read(azn, df, pkg0_reg);
1649 			amdzen_initpkg_to_apic(azn, pkg0, pkg7);
1650 			return (B_TRUE);
1651 		}
1652 	}
1653 
1654 	return (B_FALSE);
1655 }
1656 
1657 /*
1658  * We have the fun job of trying to figure out what the correct form of the APIC
1659  * decomposition should be and how to break that into its logical components.
1660  * The way that we get at this is generation-specific unfortunately. Here's how
1661  * it works out:
1662  *
1663  * Zen 1-2	This era of CPUs are deceptively simple. The PPR for a given
1664  *		family defines exactly how the APIC ID is broken into logical
1665  *		components and it's fixed. That is, depending on whether or
1666  *		not SMT is enabled. Zen 1 and Zen 2 use different schemes for
1667  *		constructing this. The way that we're supposed to check if SMT
1668  *		is enabled is to use AMD leaf 8X1E and ask how many threads per
1669  *		core there are. We use the x86 feature set to determine that
1670  *		instead.
1671  *
1672  *		More specifically the Zen 1 scheme is 7 bits long. The bits have
1673  *		the following meanings.
1674  *
1675  *		[6]   Socket ID
1676  *		[5:4] Node ID
1677  *		[3]   Logical CCX ID
1678  *		With SMT		Without SMT
1679  *		[2:1] Logical Core ID	[2]   hardcoded to zero
1680  *		[0] Thread ID		[1:0] Logical Core ID
1681  *
1682  *		The following is the Zen 2 scheme assuming SMT. The Zen 2 scheme
1683  *		without SMT shifts everything to the right by one bit.
1684  *
1685  *		[7]   Socket ID
1686  *		[6:4] Logical CCD ID
1687  *		[3]   Logical CCX ID
1688  *		[2:1] Logical Core ID
1689  *		[0]   Thread ID
1690  *
1691  * Zen 3	Zen 3 CPUs moved past the fixed APIC ID format that Zen 1 and
1692  *		Zen 2 had, but also don't give us the nice way of discovering
1693  *		this via CPUID that Zen 4 did. The APIC ID id uses a given
1694  *		number of bits for each logical component that exists, but the
1695  *		exact number varies based on what's actually present. To get at
1696  *		this we use a piece of data that is embedded in the SCFCTP
1697  *		(Scalable Control Fabric, Clocks, Test, Power Gating). This can
1698  *		be used to determine how many logical entities of each kind the
1699  *		system thinks exist. While we could use the various CPUID
1700  *		topology items to try to speed this up, they don't tell us the
1701  *		die information that we need to do this.
1702  *
1703  * Zen 4+	Zen 4 introduced CPUID leaf 8000_0026h which gives us a means
1704  *		for determining how to extract the CCD, CCX, and related pieces
1705  *		out of the device. One thing we have to be aware of is that when
1706  *		the CCD and CCX shift are the same, that means that there is
1707  *		only a single CCX and therefore have to take that into account
1708  *		appropriately. This is the case generally on Zen 4 platforms,
1709  *		but not on Bergamo. Until we can confirm the actual CPUID leaf
1710  *		values that we receive in the cases of Bergamo and others, we
1711  *		opt instead to use the same SCFCTP scheme as Zen 3.
1712  */
1713 static boolean_t
amdzen_determine_apic_decomp(amdzen_t * azn)1714 amdzen_determine_apic_decomp(amdzen_t *azn)
1715 {
1716 	amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp;
1717 	boolean_t smt = is_x86_feature(x86_featureset, X86FSET_HTT);
1718 
1719 	switch (uarchrev_uarch(azn->azn_uarchrev)) {
1720 	case X86_UARCH_AMD_ZEN1:
1721 	case X86_UARCH_AMD_ZENPLUS:
1722 		apic->aad_sock_mask = 0x40;
1723 		apic->aad_sock_shift = 6;
1724 		apic->aad_die_mask = 0x30;
1725 		apic->aad_die_shift = 4;
1726 		apic->aad_ccd_mask = 0;
1727 		apic->aad_ccd_shift = 0;
1728 		apic->aad_ccx_mask = 0x08;
1729 		apic->aad_ccx_shift = 3;
1730 
1731 		if (smt) {
1732 			apic->aad_core_mask = 0x06;
1733 			apic->aad_core_shift = 1;
1734 			apic->aad_thread_mask = 0x1;
1735 			apic->aad_thread_shift = 0;
1736 		} else {
1737 			apic->aad_core_mask = 0x03;
1738 			apic->aad_core_shift = 0;
1739 			apic->aad_thread_mask = 0;
1740 			apic->aad_thread_shift = 0;
1741 		}
1742 		break;
1743 	case X86_UARCH_AMD_ZEN2:
1744 		if (smt) {
1745 			apic->aad_sock_mask = 0x80;
1746 			apic->aad_sock_shift = 7;
1747 			apic->aad_die_mask = 0;
1748 			apic->aad_die_shift = 0;
1749 			apic->aad_ccd_mask = 0x70;
1750 			apic->aad_ccd_shift = 4;
1751 			apic->aad_ccx_mask = 0x08;
1752 			apic->aad_ccx_shift = 3;
1753 			apic->aad_core_mask = 0x06;
1754 			apic->aad_core_shift = 1;
1755 			apic->aad_thread_mask = 0x01;
1756 			apic->aad_thread_shift = 0;
1757 		} else {
1758 			apic->aad_sock_mask = 0x40;
1759 			apic->aad_sock_shift = 6;
1760 			apic->aad_die_mask = 0;
1761 			apic->aad_die_shift = 0;
1762 			apic->aad_ccd_mask = 0x38;
1763 			apic->aad_ccd_shift = 3;
1764 			apic->aad_ccx_mask = 0x04;
1765 			apic->aad_ccx_shift = 2;
1766 			apic->aad_core_mask = 0x3;
1767 			apic->aad_core_shift = 0;
1768 			apic->aad_thread_mask = 0;
1769 			apic->aad_thread_shift = 0;
1770 		}
1771 		break;
1772 	case X86_UARCH_AMD_ZEN3:
1773 	case X86_UARCH_AMD_ZEN4:
1774 	case X86_UARCH_AMD_ZEN5:
1775 		return (amdzen_determine_apic_decomp_initpkg(azn));
1776 	default:
1777 		return (B_FALSE);
1778 	}
1779 	return (B_TRUE);
1780 }
1781 
1782 /*
1783  * Snapshot the number of cores that can exist in a CCX based on the Zen
1784  * microarchitecture revision. In Zen 1-4 this has been a constant number
1785  * regardless of the actual CPU Family. In Zen 5 this varies based upon whether
1786  * or not dense dies are being used.
1787  */
1788 static void
amdzen_determine_ncore_per_ccx(amdzen_t * azn)1789 amdzen_determine_ncore_per_ccx(amdzen_t *azn)
1790 {
1791 	switch (uarchrev_uarch(azn->azn_uarchrev)) {
1792 	case X86_UARCH_AMD_ZEN1:
1793 	case X86_UARCH_AMD_ZENPLUS:
1794 	case X86_UARCH_AMD_ZEN2:
1795 		azn->azn_ncore_per_ccx = 4;
1796 		break;
1797 	case X86_UARCH_AMD_ZEN3:
1798 	case X86_UARCH_AMD_ZEN4:
1799 		azn->azn_ncore_per_ccx = 8;
1800 		break;
1801 	case X86_UARCH_AMD_ZEN5:
1802 		if (chiprev_family(azn->azn_chiprev) ==
1803 		    X86_PF_AMD_DENSE_TURIN) {
1804 			azn->azn_ncore_per_ccx = 16;
1805 		} else {
1806 			azn->azn_ncore_per_ccx = 8;
1807 		}
1808 		break;
1809 	default:
1810 		panic("asked about non-Zen or unknown uarch");
1811 	}
1812 }
1813 
1814 /*
1815  * Attempt to determine a logical CCD number of a given CCD where we don't have
1816  * hardware support for L3::SCFCTP::PMREG_INITPKG* (e.g. pre-Zen 3 systems).
1817  * The CCD numbers that we have are the in the physical space. Likely because of
1818  * how the orientation of CCM numbers map to physical locations and the layout
1819  * of them within the package, we haven't found a good way using the core DFv3
1820  * registers to determine if a given CCD is actually present or not as generally
1821  * all the CCMs are left enabled. Instead we use SMU::PWR::DIE_ID as a proxy to
1822  * determine CCD presence.
1823  */
1824 static uint32_t
amdzen_ccd_log_id_zen2(amdzen_t * azn,amdzen_df_t * df,const amdzen_df_ent_t * targ)1825 amdzen_ccd_log_id_zen2(amdzen_t *azn, amdzen_df_t *df,
1826     const amdzen_df_ent_t *targ)
1827 {
1828 	uint32_t smnid = 0;
1829 	uint32_t logid = 0;
1830 
1831 	for (uint_t i = 0; i < df->adf_nents; i++) {
1832 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
1833 
1834 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) {
1835 			continue;
1836 		}
1837 
1838 		if (ent->adfe_inst_id == targ->adfe_inst_id) {
1839 			return (logid);
1840 		}
1841 
1842 		if (ent->adfe_type == targ->adfe_type &&
1843 		    ent->adfe_subtype == targ->adfe_subtype) {
1844 			boolean_t present = amdzen_ccd_present(azn, df, smnid);
1845 			smnid++;
1846 			if (present) {
1847 				logid++;
1848 			}
1849 		}
1850 	}
1851 
1852 	panic("asked to match against invalid DF entity %p in df %p", targ, df);
1853 }
1854 
1855 static void
amdzen_ccd_fill_core_initpkg0(amdzen_t * azn,amdzen_df_t * df,amdzen_topo_ccd_t * ccd,amdzen_topo_ccx_t * ccx,amdzen_topo_core_t * core,boolean_t * ccd_set,boolean_t * ccx_set)1856 amdzen_ccd_fill_core_initpkg0(amdzen_t *azn, amdzen_df_t *df,
1857     amdzen_topo_ccd_t *ccd, amdzen_topo_ccx_t *ccx, amdzen_topo_core_t *core,
1858     boolean_t *ccd_set, boolean_t *ccx_set)
1859 {
1860 	smn_reg_t pkg0_reg;
1861 	uint32_t pkg0;
1862 
1863 	pkg0_reg = SCFCTP_PMREG_INITPKG0(ccd->atccd_phys_no, ccx->atccx_phys_no,
1864 	    core->atcore_phys_no);
1865 	pkg0 = amdzen_smn_read(azn, df, pkg0_reg);
1866 	core->atcore_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CORE(pkg0);
1867 
1868 	if (!*ccx_set) {
1869 		ccx->atccx_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CCX(pkg0);
1870 		*ccx_set = B_TRUE;
1871 	}
1872 
1873 	if (!*ccd_set) {
1874 		ccd->atccd_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_DIE(pkg0);
1875 		*ccd_set = B_TRUE;
1876 	}
1877 }
1878 
1879 /*
1880  * Attempt to fill in the physical topology information for this given CCD.
1881  * There are a few steps to this that we undertake to perform this as follows:
1882  *
1883  * 1) First we determine whether the CCD is actually present or not by reading
1884  * SMU::PWR::DIE_ID. CCDs that are not installed will still have an enabled DF
1885  * entry it appears, but the request for the die ID will returns an invalid
1886  * read (all 1s). This die ID should match what we think of as the SMN number
1887  * below. If not, we're in trouble and the rest of this is in question.
1888  *
1889  * 2) We use the SMU::PWR registers to determine how many logical and physical
1890  * cores are present in this CCD and how they are split amongst the CCX. Here we
1891  * need to encode the CPU to CCX core size rankings. Through this process we
1892  * determine and fill out which threads and cores are enabled.
1893  *
1894  * 3) In Zen 3+ we then will read each core's INITPK0 values to ensure that we
1895  * have a proper physical to logical mapping, at which point we can fill in the
1896  * APIC IDs. For Zen 2, we will set the AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN to
1897  * indicate that we just mapped the first logical processor to the first enabled
1898  * core.
1899  *
1900  * 4) Once we have the logical IDs determined we will construct the APIC ID that
1901  * we expect this to have.
1902  *
1903  * Steps (2) - (4) are intertwined and done together.
1904  */
1905 static void
amdzen_ccd_fill_topo(amdzen_t * azn,amdzen_df_t * df,amdzen_df_ent_t * ent,amdzen_topo_ccd_t * ccd)1906 amdzen_ccd_fill_topo(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *ent,
1907     amdzen_topo_ccd_t *ccd)
1908 {
1909 	uint32_t nccx, core_en, thread_en;
1910 	uint32_t nlcore_per_ccx, nthreads_per_core;
1911 	uint32_t sockid, dieid, compid;
1912 	const uint32_t ccdno = ccd->atccd_phys_no;
1913 	const x86_uarch_t uarch = uarchrev_uarch(azn->azn_uarchrev);
1914 	boolean_t pkg0_ids, logccd_set = B_FALSE;
1915 
1916 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
1917 	if (!amdzen_ccd_present(azn, df, ccdno)) {
1918 		ccd->atccd_err = AMDZEN_TOPO_CCD_E_CCD_MISSING;
1919 		return;
1920 	}
1921 
1922 	amdzen_ccd_info(azn, df, ccdno, &nccx, &nlcore_per_ccx,
1923 	    &nthreads_per_core);
1924 	ASSERT3U(nccx, <=, AMDZEN_TOPO_CCD_MAX_CCX);
1925 
1926 	core_en = amdzen_ccd_core_en(azn, df, ccdno);
1927 	thread_en = amdzen_ccd_thread_en(azn, df, ccdno);
1928 
1929 	/*
1930 	 * The BSP is never enabled in a conventional sense and therefore the
1931 	 * bit is reserved and left as 0. As the BSP should be in the first CCD,
1932 	 * we go through and OR back in the bit lest we think the thread isn't
1933 	 * enabled.
1934 	 */
1935 	if (ccdno == 0) {
1936 		thread_en |= 1;
1937 	}
1938 
1939 	ccd->atccd_phys_no = ccdno;
1940 	if (uarch >= X86_UARCH_AMD_ZEN3) {
1941 		pkg0_ids = B_TRUE;
1942 	} else {
1943 		ccd->atccd_flags |= AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN;
1944 		pkg0_ids = B_FALSE;
1945 
1946 		/*
1947 		 * Determine the CCD logical ID for Zen 2 now since this doesn't
1948 		 * rely upon needing a valid physical core.
1949 		 */
1950 		ccd->atccd_log_no = amdzen_ccd_log_id_zen2(azn, df, ent);
1951 		logccd_set = B_TRUE;
1952 	}
1953 
1954 	/*
1955 	 * To construct the APIC ID we need to know the socket and die (not CCD)
1956 	 * this is on. We deconstruct the CCD's fabric ID to determine that.
1957 	 */
1958 	zen_fabric_id_decompose(&df->adf_decomp, ent->adfe_fabric_id, &sockid,
1959 	    &dieid, &compid);
1960 
1961 	/*
1962 	 * At this point we have all the information about the CCD, the number
1963 	 * of CCX instances, and which physical cores and threads are enabled.
1964 	 * Currently we assume that if we have one CCX enabled, then it is
1965 	 * always CCX0. We cannot find evidence of a two CCX supporting part
1966 	 * that doesn't always ship with both CCXs present and enabled.
1967 	 */
1968 	ccd->atccd_nlog_ccx = ccd->atccd_nphys_ccx = nccx;
1969 	for (uint32_t ccxno = 0; ccxno < nccx; ccxno++) {
1970 		amdzen_topo_ccx_t *ccx = &ccd->atccd_ccx[ccxno];
1971 		const uint32_t core_mask = (1 << azn->azn_ncore_per_ccx) - 1;
1972 		const uint32_t core_shift = ccxno * azn->azn_ncore_per_ccx;
1973 		const uint32_t ccx_core_en = (core_en >> core_shift) &
1974 		    core_mask;
1975 		boolean_t logccx_set = B_FALSE;
1976 
1977 		ccd->atccd_ccx_en[ccxno] = 1;
1978 		ccx->atccx_phys_no = ccxno;
1979 		ccx->atccx_nphys_cores = azn->azn_ncore_per_ccx;
1980 		ccx->atccx_nlog_cores = nlcore_per_ccx;
1981 
1982 		if (!pkg0_ids) {
1983 			ccx->atccx_log_no = ccx->atccx_phys_no;
1984 			logccx_set = B_TRUE;
1985 		}
1986 
1987 		for (uint32_t coreno = 0, logcorezen2 = 0;
1988 		    coreno < azn->azn_ncore_per_ccx; coreno++) {
1989 			amdzen_topo_core_t *core = &ccx->atccx_cores[coreno];
1990 
1991 			if ((ccx_core_en & (1 << coreno)) == 0) {
1992 				continue;
1993 			}
1994 
1995 			ccx->atccx_core_en[coreno] = 1;
1996 			core->atcore_phys_no = coreno;
1997 
1998 			/*
1999 			 * Now that we have the physical core number present, we
2000 			 * must determine the logical core number and fill out
2001 			 * the logical CCX/CCD if it has not been set. We must
2002 			 * do this before we attempt to look at which threads
2003 			 * are enabled, because that operates based upon logical
2004 			 * core number.
2005 			 *
2006 			 * For Zen 2 we do not have INITPKG0 at our disposal. We
2007 			 * currently assume (and tag for userland with the
2008 			 * AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN flag) that we are
2009 			 * mapping logical cores to physicals in the order of
2010 			 * appearance.
2011 			 */
2012 			if (pkg0_ids) {
2013 				amdzen_ccd_fill_core_initpkg0(azn, df, ccd, ccx,
2014 				    core, &logccd_set, &logccx_set);
2015 			} else {
2016 				core->atcore_log_no = logcorezen2;
2017 				logcorezen2++;
2018 			}
2019 
2020 			/*
2021 			 * Determining which bits to use for the thread is a bit
2022 			 * weird here. Thread IDs within a CCX are logical, but
2023 			 * there are always physically spaced CCX sizes. See the
2024 			 * comment at the definition for SMU::PWR::THREAD_ENABLE
2025 			 * for more information.
2026 			 */
2027 			const uint32_t thread_shift = (ccx->atccx_nphys_cores *
2028 			    ccx->atccx_log_no + core->atcore_log_no) *
2029 			    nthreads_per_core;
2030 			const uint32_t thread_mask = (nthreads_per_core << 1) -
2031 			    1;
2032 			const uint32_t core_thread_en = (thread_en >>
2033 			    thread_shift) & thread_mask;
2034 			core->atcore_nthreads = nthreads_per_core;
2035 			core->atcore_thr_en[0] = core_thread_en & 0x01;
2036 			core->atcore_thr_en[1] = core_thread_en & 0x02;
2037 #ifdef	DEBUG
2038 			if (nthreads_per_core == 1) {
2039 				VERIFY0(core->atcore_thr_en[1]);
2040 			}
2041 #endif
2042 			for (uint32_t thrno = 0; thrno < core->atcore_nthreads;
2043 			    thrno++) {
2044 				ASSERT3U(core->atcore_thr_en[thrno], !=, 0);
2045 
2046 				zen_apic_id_compose(&azn->azn_apic_decomp,
2047 				    sockid, dieid, ccd->atccd_log_no,
2048 				    ccx->atccx_log_no, core->atcore_log_no,
2049 				    thrno, &core->atcore_apicids[thrno]);
2050 
2051 			}
2052 		}
2053 
2054 		ASSERT3U(logccx_set, ==, B_TRUE);
2055 		ASSERT3U(logccd_set, ==, B_TRUE);
2056 	}
2057 }
2058 
2059 static void
amdzen_nexus_init(void * arg)2060 amdzen_nexus_init(void *arg)
2061 {
2062 	uint_t i;
2063 	amdzen_t *azn = arg;
2064 
2065 	/*
2066 	 * Assign the requisite identifying information for this CPU.
2067 	 */
2068 	azn->azn_uarchrev = cpuid_getuarchrev(CPU);
2069 	azn->azn_chiprev = cpuid_getchiprev(CPU);
2070 
2071 	/*
2072 	 * Go through all of the stubs and assign the DF entries.
2073 	 */
2074 	mutex_enter(&azn->azn_mutex);
2075 	if (!amdzen_map_dfs(azn) || !amdzen_check_dfs(azn)) {
2076 		azn->azn_flags |= AMDZEN_F_MAP_ERROR;
2077 		goto done;
2078 	}
2079 
2080 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
2081 		amdzen_df_t *df = &azn->azn_dfs[i];
2082 
2083 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0)
2084 			continue;
2085 		amdzen_setup_df(azn, df);
2086 		amdzen_find_nb(azn, df);
2087 	}
2088 
2089 	amdzen_determine_ncore_per_ccx(azn);
2090 
2091 	if (amdzen_determine_apic_decomp(azn)) {
2092 		azn->azn_flags |= AMDZEN_F_APIC_DECOMP_VALID;
2093 	}
2094 
2095 	/*
2096 	 * Not all children may be installed. As such, we do not treat the
2097 	 * failure of a child as fatal to the driver.
2098 	 */
2099 	mutex_exit(&azn->azn_mutex);
2100 	for (i = 0; i < ARRAY_SIZE(amdzen_children); i++) {
2101 		(void) amdzen_create_child(azn, &amdzen_children[i]);
2102 	}
2103 	mutex_enter(&azn->azn_mutex);
2104 
2105 done:
2106 	azn->azn_flags &= ~AMDZEN_F_ATTACH_DISPATCHED;
2107 	azn->azn_flags |= AMDZEN_F_ATTACH_COMPLETE;
2108 	azn->azn_taskqid = TASKQID_INVALID;
2109 	cv_broadcast(&azn->azn_cv);
2110 	mutex_exit(&azn->azn_mutex);
2111 }
2112 
2113 static int
amdzen_stub_scan_cb(dev_info_t * dip,void * arg)2114 amdzen_stub_scan_cb(dev_info_t *dip, void *arg)
2115 {
2116 	amdzen_t *azn = arg;
2117 	uint16_t vid, did;
2118 	int *regs;
2119 	uint_t nregs, i;
2120 	boolean_t match = B_FALSE;
2121 
2122 	if (dip == ddi_root_node()) {
2123 		return (DDI_WALK_CONTINUE);
2124 	}
2125 
2126 	/*
2127 	 * If a node in question is not a pci node, then we have no interest in
2128 	 * it as all the stubs that we care about are related to pci devices.
2129 	 */
2130 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
2131 		return (DDI_WALK_PRUNECHILD);
2132 	}
2133 
2134 	/*
2135 	 * If we can't get a device or vendor ID and prove that this is an AMD
2136 	 * part, then we don't care about it.
2137 	 */
2138 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2139 	    "vendor-id", PCI_EINVAL16);
2140 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2141 	    "device-id", PCI_EINVAL16);
2142 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
2143 		return (DDI_WALK_CONTINUE);
2144 	}
2145 
2146 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
2147 		return (DDI_WALK_CONTINUE);
2148 	}
2149 
2150 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
2151 		if (amdzen_nb_ids[i] == did) {
2152 			match = B_TRUE;
2153 		}
2154 	}
2155 
2156 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2157 	    "reg", ®s, &nregs) != DDI_PROP_SUCCESS) {
2158 		return (DDI_WALK_CONTINUE);
2159 	}
2160 
2161 	if (nregs == 0) {
2162 		ddi_prop_free(regs);
2163 		return (DDI_WALK_CONTINUE);
2164 	}
2165 
2166 	if (PCI_REG_BUS_G(regs[0]) == AMDZEN_DF_BUSNO &&
2167 	    PCI_REG_DEV_G(regs[0]) >= AMDZEN_DF_FIRST_DEVICE) {
2168 		match = B_TRUE;
2169 	}
2170 
2171 	ddi_prop_free(regs);
2172 	if (match) {
2173 		mutex_enter(&azn->azn_mutex);
2174 		azn->azn_nscanned++;
2175 		mutex_exit(&azn->azn_mutex);
2176 	}
2177 
2178 	return (DDI_WALK_CONTINUE);
2179 }
2180 
2181 static void
amdzen_stub_scan(void * arg)2182 amdzen_stub_scan(void *arg)
2183 {
2184 	amdzen_t *azn = arg;
2185 
2186 	mutex_enter(&azn->azn_mutex);
2187 	azn->azn_nscanned = 0;
2188 	mutex_exit(&azn->azn_mutex);
2189 
2190 	ddi_walk_devs(ddi_root_node(), amdzen_stub_scan_cb, azn);
2191 
2192 	mutex_enter(&azn->azn_mutex);
2193 	azn->azn_flags &= ~AMDZEN_F_SCAN_DISPATCHED;
2194 	azn->azn_flags |= AMDZEN_F_SCAN_COMPLETE;
2195 
2196 	if (azn->azn_nscanned == 0) {
2197 		azn->azn_flags |= AMDZEN_F_UNSUPPORTED;
2198 		azn->azn_taskqid = TASKQID_INVALID;
2199 		cv_broadcast(&azn->azn_cv);
2200 	} else if (azn->azn_npresent == azn->azn_nscanned) {
2201 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
2202 		azn->azn_taskqid = taskq_dispatch(system_taskq,
2203 		    amdzen_nexus_init, azn, TQ_SLEEP);
2204 	}
2205 	mutex_exit(&azn->azn_mutex);
2206 }
2207 
2208 /*
2209  * Unfortunately we can't really let the stubs detach as we may need them to be
2210  * available for client operations. We may be able to improve this if we know
2211  * that the actual nexus is going away. However, as long as it's active, we need
2212  * all the stubs.
2213  */
2214 int
amdzen_detach_stub(dev_info_t * dip,ddi_detach_cmd_t cmd)2215 amdzen_detach_stub(dev_info_t *dip, ddi_detach_cmd_t cmd)
2216 {
2217 	if (cmd == DDI_SUSPEND) {
2218 		return (DDI_SUCCESS);
2219 	}
2220 
2221 	return (DDI_FAILURE);
2222 }
2223 
2224 int
amdzen_attach_stub(dev_info_t * dip,ddi_attach_cmd_t cmd)2225 amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd)
2226 {
2227 	int *regs, reg;
2228 	uint_t nregs, i;
2229 	uint16_t vid, did;
2230 	amdzen_stub_t *stub;
2231 	amdzen_t *azn = amdzen_data;
2232 	boolean_t valid = B_FALSE;
2233 	boolean_t nb = B_FALSE;
2234 
2235 	if (cmd == DDI_RESUME) {
2236 		return (DDI_SUCCESS);
2237 	} else if (cmd != DDI_ATTACH) {
2238 		return (DDI_FAILURE);
2239 	}
2240 
2241 	/*
2242 	 * Make sure that the stub that we've been asked to attach is a pci type
2243 	 * device. If not, then there is no reason for us to proceed.
2244 	 */
2245 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
2246 		dev_err(dip, CE_WARN, "asked to attach a bad AMD Zen nexus "
2247 		    "stub: %s", ddi_get_name(dip));
2248 		return (DDI_FAILURE);
2249 	}
2250 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2251 	    "vendor-id", PCI_EINVAL16);
2252 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2253 	    "device-id", PCI_EINVAL16);
2254 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
2255 		dev_err(dip, CE_WARN, "failed to get PCI ID properties");
2256 		return (DDI_FAILURE);
2257 	}
2258 
2259 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
2260 		dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x",
2261 		    cpuid_getvendor(CPU) == X86_VENDOR_HYGON ?
2262 		    AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid);
2263 		return (DDI_FAILURE);
2264 	}
2265 
2266 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2267 	    "reg", ®s, &nregs) != DDI_PROP_SUCCESS) {
2268 		dev_err(dip, CE_WARN, "failed to get 'reg' property");
2269 		return (DDI_FAILURE);
2270 	}
2271 
2272 	if (nregs == 0) {
2273 		ddi_prop_free(regs);
2274 		dev_err(dip, CE_WARN, "missing 'reg' property values");
2275 		return (DDI_FAILURE);
2276 	}
2277 	reg = *regs;
2278 	ddi_prop_free(regs);
2279 
2280 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
2281 		if (amdzen_nb_ids[i] == did) {
2282 			valid = B_TRUE;
2283 			nb = B_TRUE;
2284 		}
2285 	}
2286 
2287 	if (!valid && PCI_REG_BUS_G(reg) == AMDZEN_DF_BUSNO &&
2288 	    PCI_REG_DEV_G(reg) >= AMDZEN_DF_FIRST_DEVICE) {
2289 		valid = B_TRUE;
2290 		nb = B_FALSE;
2291 	}
2292 
2293 	if (!valid) {
2294 		dev_err(dip, CE_WARN, "device %s didn't match the nexus list",
2295 		    ddi_get_name(dip));
2296 		return (DDI_FAILURE);
2297 	}
2298 
2299 	stub = kmem_alloc(sizeof (amdzen_stub_t), KM_SLEEP);
2300 	if (pci_config_setup(dip, &stub->azns_cfgspace) != DDI_SUCCESS) {
2301 		dev_err(dip, CE_WARN, "failed to set up config space");
2302 		kmem_free(stub, sizeof (amdzen_stub_t));
2303 		return (DDI_FAILURE);
2304 	}
2305 
2306 	stub->azns_dip = dip;
2307 	stub->azns_vid = vid;
2308 	stub->azns_did = did;
2309 	stub->azns_bus = PCI_REG_BUS_G(reg);
2310 	stub->azns_dev = PCI_REG_DEV_G(reg);
2311 	stub->azns_func = PCI_REG_FUNC_G(reg);
2312 	ddi_set_driver_private(dip, stub);
2313 
2314 	mutex_enter(&azn->azn_mutex);
2315 	azn->azn_npresent++;
2316 	if (nb) {
2317 		list_insert_tail(&azn->azn_nb_stubs, stub);
2318 	} else {
2319 		list_insert_tail(&azn->azn_df_stubs, stub);
2320 	}
2321 
2322 	if ((azn->azn_flags & AMDZEN_F_TASKQ_MASK) == AMDZEN_F_SCAN_COMPLETE &&
2323 	    azn->azn_nscanned == azn->azn_npresent) {
2324 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
2325 		azn->azn_taskqid = taskq_dispatch(system_taskq,
2326 		    amdzen_nexus_init, azn, TQ_SLEEP);
2327 	}
2328 	mutex_exit(&azn->azn_mutex);
2329 
2330 	return (DDI_SUCCESS);
2331 }
2332 
2333 static int
amdzen_bus_ctl(dev_info_t * dip,dev_info_t * rdip,ddi_ctl_enum_t ctlop,void * arg,void * result)2334 amdzen_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
2335     void *arg, void *result)
2336 {
2337 	char buf[32];
2338 	dev_info_t *child;
2339 	const amdzen_child_data_t *acd;
2340 
2341 	switch (ctlop) {
2342 	case DDI_CTLOPS_REPORTDEV:
2343 		if (rdip == NULL) {
2344 			return (DDI_FAILURE);
2345 		}
2346 		cmn_err(CE_CONT, "amdzen nexus: %s@%s, %s%d\n",
2347 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
2348 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
2349 		break;
2350 	case DDI_CTLOPS_INITCHILD:
2351 		child = arg;
2352 		if (child == NULL) {
2353 			dev_err(dip, CE_WARN, "!no child passed for "
2354 			    "DDI_CTLOPS_INITCHILD");
2355 		}
2356 
2357 		acd = ddi_get_parent_data(child);
2358 		if (acd == NULL) {
2359 			dev_err(dip, CE_WARN, "!missing child parent data");
2360 			return (DDI_FAILURE);
2361 		}
2362 
2363 		if (snprintf(buf, sizeof (buf), "%d", acd->acd_addr) >=
2364 		    sizeof (buf)) {
2365 			dev_err(dip, CE_WARN, "!failed to construct device "
2366 			    "addr due to overflow");
2367 			return (DDI_FAILURE);
2368 		}
2369 
2370 		ddi_set_name_addr(child, buf);
2371 		break;
2372 	case DDI_CTLOPS_UNINITCHILD:
2373 		child = arg;
2374 		if (child == NULL) {
2375 			dev_err(dip, CE_WARN, "!no child passed for "
2376 			    "DDI_CTLOPS_UNINITCHILD");
2377 		}
2378 
2379 		ddi_set_name_addr(child, NULL);
2380 		break;
2381 	default:
2382 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
2383 	}
2384 	return (DDI_SUCCESS);
2385 }
2386 
2387 static int
amdzen_topo_open(dev_t * devp,int flag,int otyp,cred_t * credp)2388 amdzen_topo_open(dev_t *devp, int flag, int otyp, cred_t *credp)
2389 {
2390 	minor_t m;
2391 	amdzen_t *azn = amdzen_data;
2392 
2393 	if (crgetzoneid(credp) != GLOBAL_ZONEID ||
2394 	    secpolicy_sys_config(credp, B_FALSE) != 0) {
2395 		return (EPERM);
2396 	}
2397 
2398 	if ((flag & (FEXCL | FNDELAY | FNONBLOCK)) != 0) {
2399 		return (EINVAL);
2400 	}
2401 
2402 	if (otyp != OTYP_CHR) {
2403 		return (EINVAL);
2404 	}
2405 
2406 	m = getminor(*devp);
2407 	if (m != AMDZEN_MINOR_TOPO) {
2408 		return (ENXIO);
2409 	}
2410 
2411 	mutex_enter(&azn->azn_mutex);
2412 	if ((azn->azn_flags & AMDZEN_F_IOCTL_MASK) !=
2413 	    AMDZEN_F_ATTACH_COMPLETE) {
2414 		mutex_exit(&azn->azn_mutex);
2415 		return (ENOTSUP);
2416 	}
2417 	mutex_exit(&azn->azn_mutex);
2418 
2419 	return (0);
2420 }
2421 
2422 static int
amdzen_topo_ioctl_base(amdzen_t * azn,intptr_t arg,int mode)2423 amdzen_topo_ioctl_base(amdzen_t *azn, intptr_t arg, int mode)
2424 {
2425 	amdzen_topo_base_t base;
2426 
2427 	bzero(&base, sizeof (base));
2428 	mutex_enter(&azn->azn_mutex);
2429 	base.atb_ndf = azn->azn_ndfs;
2430 
2431 	if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) {
2432 		mutex_exit(&azn->azn_mutex);
2433 		return (ENOTSUP);
2434 	}
2435 
2436 	base.atb_apic_decomp = azn->azn_apic_decomp;
2437 	for (uint_t i = 0; i < azn->azn_ndfs; i++) {
2438 		const amdzen_df_t *df = &azn->azn_dfs[i];
2439 
2440 		base.atb_maxdfent = MAX(base.atb_maxdfent, df->adf_nents);
2441 		if (i == 0) {
2442 			base.atb_rev = df->adf_rev;
2443 			base.atb_df_decomp = df->adf_decomp;
2444 		}
2445 	}
2446 	mutex_exit(&azn->azn_mutex);
2447 
2448 	if (ddi_copyout(&base, (void *)(uintptr_t)arg, sizeof (base),
2449 	    mode & FKIOCTL) != 0) {
2450 		return (EFAULT);
2451 	}
2452 
2453 	return (0);
2454 }
2455 
2456 /*
2457  * Fill in the peers. We only have this information prior to DF 4D2.  The way we
2458  * do is this is to just fill in all the entries and then zero out the ones that
2459  * aren't valid.
2460  */
2461 static void
amdzen_topo_ioctl_df_fill_peers(const amdzen_df_t * df,const amdzen_df_ent_t * ent,amdzen_topo_df_ent_t * topo_ent)2462 amdzen_topo_ioctl_df_fill_peers(const amdzen_df_t *df,
2463     const amdzen_df_ent_t *ent, amdzen_topo_df_ent_t *topo_ent)
2464 {
2465 	topo_ent->atde_npeers = DF_FBIINFO0_GET_FTI_PCNT(ent->adfe_info0);
2466 
2467 	if (df->adf_rev >= DF_REV_4D2) {
2468 		bzero(topo_ent->atde_peers, sizeof (topo_ent->atde_npeers));
2469 		return;
2470 	}
2471 
2472 	topo_ent->atde_peers[0] = DF_FBINFO1_GET_FTI0_NINSTID(ent->adfe_info1);
2473 	topo_ent->atde_peers[1] = DF_FBINFO1_GET_FTI1_NINSTID(ent->adfe_info1);
2474 	topo_ent->atde_peers[2] = DF_FBINFO1_GET_FTI2_NINSTID(ent->adfe_info1);
2475 	topo_ent->atde_peers[3] = DF_FBINFO1_GET_FTI3_NINSTID(ent->adfe_info1);
2476 	topo_ent->atde_peers[4] = DF_FBINFO2_GET_FTI4_NINSTID(ent->adfe_info2);
2477 	topo_ent->atde_peers[5] = DF_FBINFO2_GET_FTI5_NINSTID(ent->adfe_info2);
2478 
2479 	for (uint32_t i = topo_ent->atde_npeers; i < AMDZEN_TOPO_DF_MAX_PEERS;
2480 	    i++) {
2481 		topo_ent->atde_peers[i] = 0;
2482 	}
2483 }
2484 
2485 static void
amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t * ent,amdzen_topo_df_ent_t * topo_ent)2486 amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t *ent,
2487     amdzen_topo_df_ent_t *topo_ent)
2488 {
2489 	const amdzen_ccm_data_t *ccm = &ent->adfe_data.aded_ccm;
2490 	amdzen_topo_ccm_data_t *topo_ccm = &topo_ent->atde_data.atded_ccm;
2491 
2492 	topo_ccm->atcd_nccds = ccm->acd_nccds;
2493 	for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) {
2494 		topo_ccm->atcd_ccd_en[i] = ccm->acd_ccd_en[i];
2495 		topo_ccm->atcd_ccd_ids[i] = ccm->acd_ccd_id[i];
2496 	}
2497 }
2498 
2499 static int
amdzen_topo_ioctl_df(amdzen_t * azn,intptr_t arg,int mode)2500 amdzen_topo_ioctl_df(amdzen_t *azn, intptr_t arg, int mode)
2501 {
2502 	uint_t model;
2503 	uint32_t max_ents, nwritten;
2504 	const amdzen_df_t *df;
2505 	amdzen_topo_df_t topo_df;
2506 #ifdef	_MULTI_DATAMODEL
2507 	amdzen_topo_df32_t topo_df32;
2508 #endif
2509 
2510 	model = ddi_model_convert_from(mode);
2511 	switch (model) {
2512 #ifdef	_MULTI_DATAMODEL
2513 	case DDI_MODEL_ILP32:
2514 		if (ddi_copyin((void *)(uintptr_t)arg, &topo_df32,
2515 		    sizeof (topo_df32), mode & FKIOCTL) != 0) {
2516 			return (EFAULT);
2517 		}
2518 		bzero(&topo_df, sizeof (topo_df));
2519 		topo_df.atd_dfno = topo_df32.atd_dfno;
2520 		topo_df.atd_df_buf_nents = topo_df32.atd_df_buf_nents;
2521 		topo_df.atd_df_ents = (void *)(uintptr_t)topo_df32.atd_df_ents;
2522 		break;
2523 #endif
2524 	case DDI_MODEL_NONE:
2525 		if (ddi_copyin((void *)(uintptr_t)arg, &topo_df,
2526 		    sizeof (topo_df), mode & FKIOCTL) != 0) {
2527 			return (EFAULT);
2528 		}
2529 		break;
2530 	default:
2531 		return (ENOTSUP);
2532 	}
2533 
2534 	mutex_enter(&azn->azn_mutex);
2535 	if (topo_df.atd_dfno >= azn->azn_ndfs) {
2536 		mutex_exit(&azn->azn_mutex);
2537 		return (EINVAL);
2538 	}
2539 
2540 	df = &azn->azn_dfs[topo_df.atd_dfno];
2541 	topo_df.atd_nodeid = df->adf_nodeid;
2542 	topo_df.atd_sockid = (df->adf_nodeid & df->adf_decomp.dfd_sock_mask) >>
2543 	    df->adf_decomp.dfd_sock_shift;
2544 	topo_df.atd_dieid = (df->adf_nodeid & df->adf_decomp.dfd_die_mask) >>
2545 	    df->adf_decomp.dfd_die_shift;
2546 	topo_df.atd_rev = df->adf_rev;
2547 	topo_df.atd_major = df->adf_major;
2548 	topo_df.atd_minor = df->adf_minor;
2549 	topo_df.atd_df_act_nents = df->adf_nents;
2550 	max_ents = MIN(topo_df.atd_df_buf_nents, df->adf_nents);
2551 
2552 	if (topo_df.atd_df_ents == NULL) {
2553 		topo_df.atd_df_buf_nvalid = 0;
2554 		mutex_exit(&azn->azn_mutex);
2555 		goto copyout;
2556 	}
2557 
2558 	nwritten = 0;
2559 	for (uint32_t i = 0; i < max_ents; i++) {
2560 		amdzen_topo_df_ent_t topo_ent;
2561 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
2562 
2563 		/*
2564 		 * We opt not to include disabled elements right now. They
2565 		 * generally don't have a valid type and there isn't much useful
2566 		 * information we can get from them. This can be changed if we
2567 		 * find a use case for them for userland topo.
2568 		 */
2569 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
2570 			continue;
2571 
2572 		bzero(&topo_ent, sizeof (topo_ent));
2573 		topo_ent.atde_type = ent->adfe_type;
2574 		topo_ent.atde_subtype = ent->adfe_subtype;
2575 		topo_ent.atde_fabric_id = ent->adfe_fabric_id;
2576 		topo_ent.atde_inst_id = ent->adfe_inst_id;
2577 		amdzen_topo_ioctl_df_fill_peers(df, ent, &topo_ent);
2578 
2579 		if (amdzen_dfe_is_ccm(df, ent)) {
2580 			amdzen_topo_ioctl_df_fill_ccm(ent, &topo_ent);
2581 		}
2582 
2583 		if (ddi_copyout(&topo_ent, &topo_df.atd_df_ents[nwritten],
2584 		    sizeof (topo_ent), mode & FKIOCTL) != 0) {
2585 			mutex_exit(&azn->azn_mutex);
2586 			return (EFAULT);
2587 		}
2588 		nwritten++;
2589 	}
2590 	mutex_exit(&azn->azn_mutex);
2591 
2592 	topo_df.atd_df_buf_nvalid = nwritten;
2593 copyout:
2594 	switch (model) {
2595 #ifdef	_MULTI_DATAMODEL
2596 	case DDI_MODEL_ILP32:
2597 		topo_df32.atd_nodeid = topo_df.atd_nodeid;
2598 		topo_df32.atd_sockid = topo_df.atd_sockid;
2599 		topo_df32.atd_dieid = topo_df.atd_dieid;
2600 		topo_df32.atd_rev = topo_df.atd_rev;
2601 		topo_df32.atd_major = topo_df.atd_major;
2602 		topo_df32.atd_minor = topo_df.atd_minor;
2603 		topo_df32.atd_df_buf_nvalid = topo_df.atd_df_buf_nvalid;
2604 		topo_df32.atd_df_act_nents = topo_df.atd_df_act_nents;
2605 
2606 		if (ddi_copyout(&topo_df32, (void *)(uintptr_t)arg,
2607 		    sizeof (topo_df32), mode & FKIOCTL) != 0) {
2608 			return (EFAULT);
2609 		}
2610 		break;
2611 #endif
2612 	case DDI_MODEL_NONE:
2613 		if (ddi_copyout(&topo_df, (void *)(uintptr_t)arg,
2614 		    sizeof (topo_df), mode & FKIOCTL) != 0) {
2615 			return (EFAULT);
2616 		}
2617 		break;
2618 	default:
2619 		break;
2620 	}
2621 
2622 
2623 	return (0);
2624 }
2625 
2626 static int
amdzen_topo_ioctl_ccd(amdzen_t * azn,intptr_t arg,int mode)2627 amdzen_topo_ioctl_ccd(amdzen_t *azn, intptr_t arg, int mode)
2628 {
2629 	amdzen_topo_ccd_t ccd, *ccdp;
2630 	amdzen_df_t *df;
2631 	amdzen_df_ent_t *ent;
2632 	amdzen_ccm_data_t *ccm;
2633 	uint32_t ccdno;
2634 	size_t copyin_size = offsetof(amdzen_topo_ccd_t, atccd_err);
2635 
2636 	/*
2637 	 * Only copy in the identifying information so that way we can ensure
2638 	 * the rest of the structure we return to the user doesn't contain
2639 	 * anything unexpected in it.
2640 	 */
2641 	bzero(&ccd, sizeof (ccd));
2642 	if (ddi_copyin((void *)(uintptr_t)arg, &ccd, copyin_size,
2643 	    mode & FKIOCTL) != 0) {
2644 		return (EFAULT);
2645 	}
2646 
2647 	mutex_enter(&azn->azn_mutex);
2648 	if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) {
2649 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NO_APIC_DECOMP;
2650 		goto copyout;
2651 	}
2652 
2653 	df = amdzen_df_find(azn, ccd.atccd_dfno);
2654 	if (df == NULL) {
2655 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_DFNO;
2656 		goto copyout;
2657 	}
2658 
2659 	/*
2660 	 * We don't have enough information to know how to construct this
2661 	 * information in Zen 1 at this time, so refuse.
2662 	 */
2663 	if (df->adf_rev <= DF_REV_2) {
2664 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_SOC_UNSUPPORTED;
2665 		goto copyout;
2666 	}
2667 
2668 	ent = amdzen_df_ent_find_by_instid(df, ccd.atccd_instid);
2669 	if (ent == NULL) {
2670 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_INSTID;
2671 		goto copyout;
2672 	}
2673 
2674 	if (!amdzen_dfe_is_ccm(df, ent)) {
2675 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD;
2676 		goto copyout;
2677 	}
2678 
2679 	ccm = &ent->adfe_data.aded_ccm;
2680 	for (ccdno = 0; ccdno < DF_MAX_CCDS_PER_CCM; ccdno++) {
2681 		if (ccm->acd_ccd_en[ccdno] != 0 &&
2682 		    ccm->acd_ccd_id[ccdno] == ccd.atccd_phys_no) {
2683 			break;
2684 		}
2685 	}
2686 
2687 	if (ccdno == DF_MAX_CCDS_PER_CCM) {
2688 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD;
2689 		goto copyout;
2690 	}
2691 
2692 	if (ccm->acd_ccd_data[ccdno] == NULL) {
2693 		/*
2694 		 * We don't actually have this data. Go fill it out and save it
2695 		 * for future use.
2696 		 */
2697 		ccdp = kmem_zalloc(sizeof (amdzen_topo_ccd_t), KM_NOSLEEP_LAZY);
2698 		if (ccdp == NULL) {
2699 			mutex_exit(&azn->azn_mutex);
2700 			return (ENOMEM);
2701 		}
2702 
2703 		ccdp->atccd_dfno = ccd.atccd_dfno;
2704 		ccdp->atccd_instid = ccd.atccd_instid;
2705 		ccdp->atccd_phys_no = ccd.atccd_phys_no;
2706 		amdzen_ccd_fill_topo(azn, df, ent, ccdp);
2707 		ccm->acd_ccd_data[ccdno] = ccdp;
2708 	}
2709 	ASSERT3P(ccm->acd_ccd_data[ccdno], !=, NULL);
2710 	bcopy(ccm->acd_ccd_data[ccdno], &ccd, sizeof (ccd));
2711 
2712 copyout:
2713 	mutex_exit(&azn->azn_mutex);
2714 	if (ddi_copyout(&ccd, (void *)(uintptr_t)arg, sizeof (ccd),
2715 	    mode & FKIOCTL) != 0) {
2716 		return (EFAULT);
2717 	}
2718 
2719 	return (0);
2720 }
2721 
2722 static int
amdzen_topo_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)2723 amdzen_topo_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
2724     cred_t *credp, int *rvalp)
2725 {
2726 	int ret;
2727 	amdzen_t *azn = amdzen_data;
2728 
2729 	if (getminor(dev) != AMDZEN_MINOR_TOPO) {
2730 		return (ENXIO);
2731 	}
2732 
2733 	if ((mode & FREAD) == 0) {
2734 		return (EBADF);
2735 	}
2736 
2737 	switch (cmd) {
2738 	case AMDZEN_TOPO_IOCTL_BASE:
2739 		ret = amdzen_topo_ioctl_base(azn, arg, mode);
2740 		break;
2741 	case AMDZEN_TOPO_IOCTL_DF:
2742 		ret = amdzen_topo_ioctl_df(azn, arg, mode);
2743 		break;
2744 	case AMDZEN_TOPO_IOCTL_CCD:
2745 		ret = amdzen_topo_ioctl_ccd(azn, arg, mode);
2746 		break;
2747 	default:
2748 		ret = ENOTTY;
2749 		break;
2750 	}
2751 
2752 	return (ret);
2753 }
2754 
2755 static int
amdzen_topo_close(dev_t dev,int flag,int otyp,cred_t * credp)2756 amdzen_topo_close(dev_t dev, int flag, int otyp, cred_t *credp)
2757 {
2758 	if (otyp != OTYP_CHR) {
2759 		return (EINVAL);
2760 	}
2761 
2762 	if (getminor(dev) != AMDZEN_MINOR_TOPO) {
2763 		return (ENXIO);
2764 	}
2765 
2766 	return (0);
2767 }
2768 
2769 static int
amdzen_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)2770 amdzen_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2771 {
2772 	amdzen_t *azn = amdzen_data;
2773 
2774 	if (cmd == DDI_RESUME) {
2775 		return (DDI_SUCCESS);
2776 	} else if (cmd != DDI_ATTACH) {
2777 		return (DDI_FAILURE);
2778 	}
2779 
2780 	mutex_enter(&azn->azn_mutex);
2781 	if (azn->azn_dip != NULL) {
2782 		dev_err(dip, CE_WARN, "driver is already attached!");
2783 		mutex_exit(&azn->azn_mutex);
2784 		return (DDI_FAILURE);
2785 	}
2786 
2787 	if (ddi_create_minor_node(dip, "topo", S_IFCHR, AMDZEN_MINOR_TOPO,
2788 	    DDI_PSEUDO, 0) != 0) {
2789 		dev_err(dip, CE_WARN, "failed to create topo minor node!");
2790 		mutex_exit(&azn->azn_mutex);
2791 		return (DDI_FAILURE);
2792 	}
2793 
2794 	azn->azn_dip = dip;
2795 	azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_stub_scan,
2796 	    azn, TQ_SLEEP);
2797 	azn->azn_flags |= AMDZEN_F_SCAN_DISPATCHED;
2798 	mutex_exit(&azn->azn_mutex);
2799 
2800 	return (DDI_SUCCESS);
2801 }
2802 
2803 static int
amdzen_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)2804 amdzen_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
2805 {
2806 	amdzen_t *azn = amdzen_data;
2807 
2808 	if (cmd == DDI_SUSPEND) {
2809 		return (DDI_SUCCESS);
2810 	} else if (cmd != DDI_DETACH) {
2811 		return (DDI_FAILURE);
2812 	}
2813 
2814 	mutex_enter(&azn->azn_mutex);
2815 	while (azn->azn_taskqid != TASKQID_INVALID) {
2816 		cv_wait(&azn->azn_cv, &azn->azn_mutex);
2817 	}
2818 
2819 	/*
2820 	 * If we've attached any stub drivers, e.g. this platform is important
2821 	 * for us, then we fail detach.
2822 	 */
2823 	if (!list_is_empty(&azn->azn_df_stubs) ||
2824 	    !list_is_empty(&azn->azn_nb_stubs)) {
2825 		mutex_exit(&azn->azn_mutex);
2826 		return (DDI_FAILURE);
2827 	}
2828 
2829 	ddi_remove_minor_node(azn->azn_dip, NULL);
2830 	azn->azn_dip = NULL;
2831 	mutex_exit(&azn->azn_mutex);
2832 
2833 	return (DDI_SUCCESS);
2834 }
2835 
2836 static void
amdzen_free(void)2837 amdzen_free(void)
2838 {
2839 	if (amdzen_data == NULL) {
2840 		return;
2841 	}
2842 
2843 	VERIFY(list_is_empty(&amdzen_data->azn_df_stubs));
2844 	list_destroy(&amdzen_data->azn_df_stubs);
2845 	VERIFY(list_is_empty(&amdzen_data->azn_nb_stubs));
2846 	list_destroy(&amdzen_data->azn_nb_stubs);
2847 	cv_destroy(&amdzen_data->azn_cv);
2848 	mutex_destroy(&amdzen_data->azn_mutex);
2849 	kmem_free(amdzen_data, sizeof (amdzen_t));
2850 	amdzen_data = NULL;
2851 }
2852 
2853 static void
amdzen_alloc(void)2854 amdzen_alloc(void)
2855 {
2856 	amdzen_data = kmem_zalloc(sizeof (amdzen_t), KM_SLEEP);
2857 	mutex_init(&amdzen_data->azn_mutex, NULL, MUTEX_DRIVER, NULL);
2858 	list_create(&amdzen_data->azn_df_stubs, sizeof (amdzen_stub_t),
2859 	    offsetof(amdzen_stub_t, azns_link));
2860 	list_create(&amdzen_data->azn_nb_stubs, sizeof (amdzen_stub_t),
2861 	    offsetof(amdzen_stub_t, azns_link));
2862 	cv_init(&amdzen_data->azn_cv, NULL, CV_DRIVER, NULL);
2863 }
2864 
2865 static struct cb_ops amdzen_topo_cb_ops = {
2866 	.cb_open = amdzen_topo_open,
2867 	.cb_close = amdzen_topo_close,
2868 	.cb_strategy = nodev,
2869 	.cb_print = nodev,
2870 	.cb_dump = nodev,
2871 	.cb_read = nodev,
2872 	.cb_write = nodev,
2873 	.cb_ioctl = amdzen_topo_ioctl,
2874 	.cb_devmap = nodev,
2875 	.cb_mmap = nodev,
2876 	.cb_segmap = nodev,
2877 	.cb_chpoll = nochpoll,
2878 	.cb_prop_op = ddi_prop_op,
2879 	.cb_flag = D_MP,
2880 	.cb_rev = CB_REV,
2881 	.cb_aread = nodev,
2882 	.cb_awrite = nodev
2883 };
2884 
2885 struct bus_ops amdzen_bus_ops = {
2886 	.busops_rev = BUSO_REV,
2887 	.bus_map = nullbusmap,
2888 	.bus_dma_map = ddi_no_dma_map,
2889 	.bus_dma_allochdl = ddi_no_dma_allochdl,
2890 	.bus_dma_freehdl = ddi_no_dma_freehdl,
2891 	.bus_dma_bindhdl = ddi_no_dma_bindhdl,
2892 	.bus_dma_unbindhdl = ddi_no_dma_unbindhdl,
2893 	.bus_dma_flush = ddi_no_dma_flush,
2894 	.bus_dma_win = ddi_no_dma_win,
2895 	.bus_dma_ctl = ddi_no_dma_mctl,
2896 	.bus_prop_op = ddi_bus_prop_op,
2897 	.bus_ctl = amdzen_bus_ctl
2898 };
2899 
2900 static struct dev_ops amdzen_dev_ops = {
2901 	.devo_rev = DEVO_REV,
2902 	.devo_refcnt = 0,
2903 	.devo_getinfo = nodev,
2904 	.devo_identify = nulldev,
2905 	.devo_probe = nulldev,
2906 	.devo_attach = amdzen_attach,
2907 	.devo_detach = amdzen_detach,
2908 	.devo_reset = nodev,
2909 	.devo_quiesce = ddi_quiesce_not_needed,
2910 	.devo_bus_ops = &amdzen_bus_ops,
2911 	.devo_cb_ops = &amdzen_topo_cb_ops
2912 };
2913 
2914 static struct modldrv amdzen_modldrv = {
2915 	.drv_modops = &mod_driverops,
2916 	.drv_linkinfo = "AMD Zen Nexus Driver",
2917 	.drv_dev_ops = &amdzen_dev_ops
2918 };
2919 
2920 static struct modlinkage amdzen_modlinkage = {
2921 	.ml_rev = MODREV_1,
2922 	.ml_linkage = { &amdzen_modldrv, NULL }
2923 };
2924 
2925 int
_init(void)2926 _init(void)
2927 {
2928 	int ret;
2929 
2930 	if (cpuid_getvendor(CPU) != X86_VENDOR_AMD &&
2931 	    cpuid_getvendor(CPU) != X86_VENDOR_HYGON) {
2932 		return (ENOTSUP);
2933 	}
2934 
2935 	if ((ret = mod_install(&amdzen_modlinkage)) == 0) {
2936 		amdzen_alloc();
2937 	}
2938 
2939 	return (ret);
2940 }
2941 
2942 int
_info(struct modinfo * modinfop)2943 _info(struct modinfo *modinfop)
2944 {
2945 	return (mod_info(&amdzen_modlinkage, modinfop));
2946 }
2947 
2948 int
_fini(void)2949 _fini(void)
2950 {
2951 	int ret;
2952 
2953 	if ((ret = mod_remove(&amdzen_modlinkage)) == 0) {
2954 		amdzen_free();
2955 	}
2956 
2957 	return (ret);
2958 }
2959