xref: /illumos-gate/usr/src/uts/intel/io/amdzen/amdzen.c (revision 05ce3950cb6a645887911ba82ec91e3c06c5ad7c)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019, Joyent, Inc.
14  * Copyright 2024 Oxide Computer Company
15  */
16 
17 /*
18  * Nexus Driver for AMD Zen family systems. The purpose of this driver is to
19  * provide access to the following resources in a single, centralized fashion:
20  *
21  *  - The per-chip Data Fabric
22  *  - The North Bridge
23  *  - The System Management Network (SMN)
24  *
25  * This is a nexus driver as once we have attached to all the requisite
26  * components, we will enumerate child devices which consume this functionality.
27  *
28  * ------------------------
29  * Mapping Devices Together
30  * ------------------------
31  *
32  * The operating system needs to expose things like temperature sensors and DRAM
33  * configuration registers in terms of things that are meaningful to the system
34  * such as logical CPUs, cores, etc. This driver attaches to the PCI devices
35  * that represent the northbridge, data fabrics, and dies. Note that there are
36  * multiple northbridge and DF devices (one each per die) and this driver maps
37  * all of these three things together. Unfortunately, this requires some
38  * acrobatics as there is no direct way to map a northbridge to its
39  * corresponding die. Instead, we map a CPU die to a data fabric PCI device and
40  * a data fabric PCI device to a corresponding northbridge PCI device. This
41  * transitive relationship allows us to map from between northbridge and die.
42  *
43  * As each data fabric device is attached, based on vendor and device portions
44  * of the PCI ID, we add it to the DF stubs list in the global amdzen_t
45  * structure, amdzen_data->azn_df_stubs. We must now map these to logical CPUs.
46  *
47  * In current Zen based products, there is a direct mapping between processor
48  * nodes and a data fabric PCI device: all of the devices are on PCI Bus 0 and
49  * start from Device 0x18, so device 0x18 maps to processor node 0, 0x19 to
50  * processor node 1, etc. This means that to map a logical CPU to a data fabric
51  * device, we take its processor node id, add it to 0x18 and find the PCI device
52  * that is on bus 0 with that ID number. We already discovered the DF devices as
53  * described above.
54  *
55  * The northbridge PCI device has a well-defined device and function, but the
56  * bus that it is on varies. Each die has its own set of assigned PCI buses and
57  * its northbridge device is on the first die-specific bus. This implies that
58  * the northbridges do not show up on PCI bus 0, as that is the PCI bus that all
59  * of the data fabric devices are on and is not assigned to any particular die.
60  * Additionally, while the northbridge on the lowest-numbered PCI bus
61  * intuitively corresponds to processor node zero, hardware does not guarantee
62  * this. Because we don't want to be at the mercy of firmware, we don't rely on
63  * this ordering assumption, though we have yet to find a system that deviates
64  * from it, either.
65  *
66  * One of the registers in the data fabric device's function 0
67  * (AMDZEN_DF_F0_CFG_ADDR_CTL) happens to identify the first PCI bus that is
68  * associated with the processor node. This means that we can map a data fabric
69  * device to a northbridge by finding the northbridge whose PCI bus ID matches
70  * the value in the corresponding data fabric's AMDZEN_DF_F0_CFG_ADDR_CTL.
71  *
72  * Given all of the above, we can map a northbridge to a data fabric device and
73  * a die to a data fabric device. Because these are 1:1 mappings, there is a
74  * transitive relationship from northbridge to die. and therefore we know which
75  * northbridge is associated with which processor die. This is summarized in the
76  * following image:
77  *
78  *  +-------+     +------------------------------------+     +--------------+
79  *  | Die 0 |---->| Data Fabric PCI BDF 0/18/0         |---->| Northbridge  |
80  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 10  |     | PCI  10/0/0  |
81  *     ...        +------------------------------------+     +--------------+
82  *  +-------+     +------------------------------------+     +--------------+
83  *  | Die n |---->| Data Fabric PCI BDF 0/18+n/0       |---->| Northbridge  |
84  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 133 |     | PCI 133/0/0  |
85  *                +------------------------------------+     +--------------+
86  *
87  * Note, the PCI buses used by the northbridges here are arbitrary examples that
88  * do not necessarily reflect actual hardware values; however, the
89  * bus/device/function (BDF) of the data fabric accurately models hardware. All
90  * BDF values are in hex.
91  *
92  * Starting with the Rome generation of processors (Family 17h Model 30-3Fh),
93  * AMD has multiple northbridges on a given die. All of these northbridges share
94  * the same data fabric and system management network port. From our perspective
95  * this means that some of the northbridge devices will be redundant and that we
96  * no longer have a 1:1 mapping between the northbridge and the data fabric
97  * devices. Every data fabric will have a northbridge, but not every northbridge
98  * will have a data fabric device mapped. Because we're always trying to map
99  * from a die to a northbridge and not the reverse, the fact that there are
100  * extra northbridge devices hanging around that we don't know about shouldn't
101  * be a problem.
102  *
103  * -------------------------------
104  * Attach and Detach Complications
105  * -------------------------------
106  *
107  * We need to map different PCI devices together. Each device is attached to a
108  * amdzen_stub driver to facilitate integration with the rest of the kernel PCI
109  * machinery and so we have to manage multiple dev_info_t structures, each of
110  * which may be independently attached and detached.
111  *
112  * This is not particularly complex for attach: our _init routine allocates the
113  * necessary mutex and list structures at module load time, and as each stub is
114  * attached, it calls into this code to be added to the appropriate list. When
115  * the nexus itself is attached, we walk the PCI device tree accumulating a
116  * counter for all devices we expect to be attached. Once the scan is complete
117  * and all such devices are accounted for (stub registration may be happening
118  * asynchronously with respect to nexus attach), we initialize the nexus device
119  * and the attach is complete.
120  *
121  * Most other device drivers support instances that can be brought back after
122  * detach, provided they are associated with an active minor node in the
123  * /devices file system. This driver is different. Once a stub device has been
124  * attached, we do not permit detaching the nexus driver instance, as the kernel
125  * does not give us interlocking guarantees between nexus and stub driver attach
126  * and detach. It is simplest to just unconditionally fail detach once a stub
127  * has attached.
128  *
129  * ---------------
130  * Exposed Devices
131  * ---------------
132  *
133  * Rather than try and have all of the different functions that could be
134  * provided in one driver, we have a nexus driver that tries to load child
135  * pseudo-device drivers that provide specific pieces of functionality.
136  *
137  * -------
138  * Locking
139  * -------
140  *
141  * The amdzen_data structure contains a single lock, azn_mutex.
142  *
143  * The various client functions here are intended for our nexus's direct
144  * children, but have been designed in case someone else should depends on this
145  * driver. Once a DF has been discovered, the set of entities inside of it
146  * (adf_nents, adf_ents[]) is considered static, constant data, and iteration
147  * over them does not require locking. However, the discovery of the amd_df_t
148  * does. In addition, locking is required whenever performing register accesses
149  * to the DF or SMN.
150  *
151  * To summarize, one must hold the lock in the following circumstances:
152  *
153  *  - Looking up DF structures
154  *  - Reading or writing to DF registers
155  *  - Reading or writing to SMN registers
156  *
157  * In general, it is preferred that the lock be held across an entire client
158  * operation if possible. The only time this becomes an issue are when we have
159  * callbacks into our callers (ala amdzen_c_df_iter()) as they may recursively
160  * call into us.
161  */
162 
163 #include <sys/modctl.h>
164 #include <sys/conf.h>
165 #include <sys/devops.h>
166 #include <sys/ddi.h>
167 #include <sys/sunddi.h>
168 #include <sys/pci.h>
169 #include <sys/sysmacros.h>
170 #include <sys/sunndi.h>
171 #include <sys/x86_archext.h>
172 #include <sys/cpuvar.h>
173 #include <sys/policy.h>
174 #include <sys/stat.h>
175 #include <sys/sunddi.h>
176 #include <sys/bitmap.h>
177 #include <sys/stdbool.h>
178 
179 #include <sys/amdzen/df.h>
180 #include <sys/amdzen/ccd.h>
181 #include "amdzen.h"
182 #include "amdzen_client.h"
183 #include "amdzen_topo.h"
184 
185 amdzen_t *amdzen_data;
186 
187 /*
188  * Internal minor nodes for devices that the nexus provides itself.
189  */
190 #define	AMDZEN_MINOR_TOPO	0
191 
192 /*
193  * Array of northbridge IDs that we care about.
194  */
195 static const uint16_t amdzen_nb_ids[] = {
196 	/* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */
197 	0x1450,
198 	/* Family 17h Raven Ridge, Kestrel, Dali Models 10h-2fh (Zen uarch) */
199 	0x15d0,
200 	/* Family 17h/19h Rome, Milan, Matisse, Vermeer Zen 2/Zen 3 uarch */
201 	0x1480,
202 	/* Family 17h/19h Renoir, Cezanne, Van Gogh Zen 2/3 uarch */
203 	0x1630,
204 	/* Family 19h Genoa and Bergamo */
205 	0x14a4,
206 	/* Family 17h Mendocino, Family 19h Rembrandt */
207 	0x14b5,
208 	/* Family 19h Raphael, Family 1Ah 40-4fh */
209 	0x14d8,
210 	/* Family 19h Phoenix */
211 	0x14e8,
212 	/* Family 1Ah Turin */
213 	0x153a,
214 	/* Family 1Ah 20-2fh */
215 	0x1507
216 };
217 
218 typedef struct {
219 	char *acd_name;
220 	amdzen_child_t acd_addr;
221 	/*
222 	 * This indicates whether or not we should issue warnings to users when
223 	 * something happens specific to this instance. The main reason we don't
224 	 * want to is for optional devices that may not be installed as they are
225 	 * for development purposes (e.g. usmn, zen_udf); however, if there is
226 	 * an issue with the others we still want to know.
227 	 */
228 	bool acd_warn;
229 } amdzen_child_data_t;
230 
231 static const amdzen_child_data_t amdzen_children[] = {
232 	{ "smntemp", AMDZEN_C_SMNTEMP, true },
233 	{ "usmn", AMDZEN_C_USMN, false },
234 	{ "zen_udf", AMDZEN_C_ZEN_UDF, false },
235 	{ "zen_umc", AMDZEN_C_ZEN_UMC, true }
236 };
237 
238 static uint8_t
amdzen_stub_get8(amdzen_stub_t * stub,off_t reg)239 amdzen_stub_get8(amdzen_stub_t *stub, off_t reg)
240 {
241 	return (pci_config_get8(stub->azns_cfgspace, reg));
242 }
243 
244 static uint16_t
amdzen_stub_get16(amdzen_stub_t * stub,off_t reg)245 amdzen_stub_get16(amdzen_stub_t *stub, off_t reg)
246 {
247 	return (pci_config_get16(stub->azns_cfgspace, reg));
248 }
249 
250 static uint32_t
amdzen_stub_get32(amdzen_stub_t * stub,off_t reg)251 amdzen_stub_get32(amdzen_stub_t *stub, off_t reg)
252 {
253 	return (pci_config_get32(stub->azns_cfgspace, reg));
254 }
255 
256 static uint64_t
amdzen_stub_get64(amdzen_stub_t * stub,off_t reg)257 amdzen_stub_get64(amdzen_stub_t *stub, off_t reg)
258 {
259 	return (pci_config_get64(stub->azns_cfgspace, reg));
260 }
261 
262 static void
amdzen_stub_put8(amdzen_stub_t * stub,off_t reg,uint8_t val)263 amdzen_stub_put8(amdzen_stub_t *stub, off_t reg, uint8_t val)
264 {
265 	pci_config_put8(stub->azns_cfgspace, reg, val);
266 }
267 
268 static void
amdzen_stub_put16(amdzen_stub_t * stub,off_t reg,uint16_t val)269 amdzen_stub_put16(amdzen_stub_t *stub, off_t reg, uint16_t val)
270 {
271 	pci_config_put16(stub->azns_cfgspace, reg, val);
272 }
273 
274 static void
amdzen_stub_put32(amdzen_stub_t * stub,off_t reg,uint32_t val)275 amdzen_stub_put32(amdzen_stub_t *stub, off_t reg, uint32_t val)
276 {
277 	pci_config_put32(stub->azns_cfgspace, reg, val);
278 }
279 
280 static uint64_t
amdzen_df_read_regdef(amdzen_t * azn,amdzen_df_t * df,const df_reg_def_t def,uint8_t inst,boolean_t do_64)281 amdzen_df_read_regdef(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def,
282     uint8_t inst, boolean_t do_64)
283 {
284 	df_reg_def_t ficaa;
285 	df_reg_def_t ficad;
286 	uint32_t val = 0;
287 	df_rev_t df_rev = azn->azn_dfs[0].adf_rev;
288 	VERIFY(df_reg_valid(df_rev, def));
289 
290 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
291 	val = DF_FICAA_V2_SET_TARG_INST(val, 1);
292 	val = DF_FICAA_V2_SET_FUNC(val, def.drd_func);
293 	val = DF_FICAA_V2_SET_INST(val, inst);
294 	val = DF_FICAA_V2_SET_64B(val, do_64 ? 1 : 0);
295 
296 	switch (df_rev) {
297 	case DF_REV_2:
298 	case DF_REV_3:
299 	case DF_REV_3P5:
300 		ficaa = DF_FICAA_V2;
301 		ficad = DF_FICAD_LO_V2;
302 		val = DF_FICAA_V2_SET_REG(val, def.drd_reg >>
303 		    DF_FICAA_REG_SHIFT);
304 		break;
305 	case DF_REV_4:
306 	case DF_REV_4D2:
307 		ficaa = DF_FICAA_V4;
308 		ficad = DF_FICAD_LO_V4;
309 		val = DF_FICAA_V4_SET_REG(val, def.drd_reg >>
310 		    DF_FICAA_REG_SHIFT);
311 		break;
312 	default:
313 		panic("encountered unexpected DF rev: %u", df_rev);
314 	}
315 
316 	amdzen_stub_put32(df->adf_funcs[ficaa.drd_func], ficaa.drd_reg, val);
317 	if (do_64) {
318 		return (amdzen_stub_get64(df->adf_funcs[ficad.drd_func],
319 		    ficad.drd_reg));
320 	} else {
321 		return (amdzen_stub_get32(df->adf_funcs[ficad.drd_func],
322 		    ficad.drd_reg));
323 	}
324 }
325 
326 /*
327  * Perform a targeted 32-bit indirect read to a specific instance and function.
328  */
329 static uint32_t
amdzen_df_read32(amdzen_t * azn,amdzen_df_t * df,uint8_t inst,const df_reg_def_t def)330 amdzen_df_read32(amdzen_t *azn, amdzen_df_t *df, uint8_t inst,
331     const df_reg_def_t def)
332 {
333 	return (amdzen_df_read_regdef(azn, df, def, inst, B_FALSE));
334 }
335 
336 /*
337  * For a broadcast read, just go to the underlying PCI function and perform a
338  * read. At this point in time, we don't believe we need to use the FICAA/FICAD
339  * to access it (though it does have a broadcast mode).
340  */
341 static uint32_t
amdzen_df_read32_bcast(amdzen_t * azn,amdzen_df_t * df,const df_reg_def_t def)342 amdzen_df_read32_bcast(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def)
343 {
344 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
345 	return (amdzen_stub_get32(df->adf_funcs[def.drd_func], def.drd_reg));
346 }
347 
348 static uint32_t
amdzen_smn_read(amdzen_t * azn,amdzen_df_t * df,const smn_reg_t reg)349 amdzen_smn_read(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg)
350 {
351 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
352 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
353 
354 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
355 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
356 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
357 
358 	switch (SMN_REG_SIZE(reg)) {
359 	case 1:
360 		return ((uint32_t)amdzen_stub_get8(df->adf_nb,
361 		    AMDZEN_NB_SMN_DATA + addr_off));
362 	case 2:
363 		return ((uint32_t)amdzen_stub_get16(df->adf_nb,
364 		    AMDZEN_NB_SMN_DATA + addr_off));
365 	case 4:
366 		return (amdzen_stub_get32(df->adf_nb, AMDZEN_NB_SMN_DATA));
367 	default:
368 		panic("unreachable invalid SMN register size %u",
369 		    SMN_REG_SIZE(reg));
370 	}
371 }
372 
373 static void
amdzen_smn_write(amdzen_t * azn,amdzen_df_t * df,const smn_reg_t reg,const uint32_t val)374 amdzen_smn_write(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg,
375     const uint32_t val)
376 {
377 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
378 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
379 
380 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
381 	VERIFY(SMN_REG_VALUE_FITS(reg, val));
382 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
383 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
384 
385 	switch (SMN_REG_SIZE(reg)) {
386 	case 1:
387 		amdzen_stub_put8(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
388 		    (uint8_t)val);
389 		break;
390 	case 2:
391 		amdzen_stub_put16(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
392 		    (uint16_t)val);
393 		break;
394 	case 4:
395 		amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_DATA, val);
396 		break;
397 	default:
398 		panic("unreachable invalid SMN register size %u",
399 		    SMN_REG_SIZE(reg));
400 	}
401 }
402 
403 /*
404  * This is an unfortunate necessity due to the evolution of the CCM DF values.
405  */
406 static inline boolean_t
amdzen_df_at_least(const amdzen_df_t * df,uint8_t major,uint8_t minor)407 amdzen_df_at_least(const amdzen_df_t *df, uint8_t major, uint8_t minor)
408 {
409 	return (df->adf_major > major || (df->adf_major == major &&
410 	    df->adf_minor >= minor));
411 }
412 
413 static amdzen_df_t *
amdzen_df_find(amdzen_t * azn,uint_t dfno)414 amdzen_df_find(amdzen_t *azn, uint_t dfno)
415 {
416 	uint_t i;
417 
418 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
419 	if (dfno >= azn->azn_ndfs) {
420 		return (NULL);
421 	}
422 
423 	for (i = 0; i < azn->azn_ndfs; i++) {
424 		amdzen_df_t *df = &azn->azn_dfs[i];
425 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) {
426 			continue;
427 		}
428 
429 		if (dfno == 0) {
430 			return (df);
431 		}
432 		dfno--;
433 	}
434 
435 	return (NULL);
436 }
437 
438 static amdzen_df_ent_t *
amdzen_df_ent_find_by_instid(amdzen_df_t * df,uint8_t instid)439 amdzen_df_ent_find_by_instid(amdzen_df_t *df, uint8_t instid)
440 {
441 	for (uint_t i = 0; i < df->adf_nents; i++) {
442 		amdzen_df_ent_t *ent = &df->adf_ents[i];
443 
444 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) {
445 			continue;
446 		}
447 
448 		if (ent->adfe_inst_id == instid) {
449 			return (ent);
450 		}
451 	}
452 
453 	return (NULL);
454 }
455 
456 /*
457  * Client functions that are used by nexus children.
458  */
459 int
amdzen_c_smn_read(uint_t dfno,const smn_reg_t reg,uint32_t * valp)460 amdzen_c_smn_read(uint_t dfno, const smn_reg_t reg, uint32_t *valp)
461 {
462 	amdzen_df_t *df;
463 	amdzen_t *azn = amdzen_data;
464 
465 	if (!SMN_REG_SIZE_IS_VALID(reg))
466 		return (EINVAL);
467 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
468 		return (EINVAL);
469 
470 	mutex_enter(&azn->azn_mutex);
471 	df = amdzen_df_find(azn, dfno);
472 	if (df == NULL) {
473 		mutex_exit(&azn->azn_mutex);
474 		return (ENOENT);
475 	}
476 
477 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
478 		mutex_exit(&azn->azn_mutex);
479 		return (ENXIO);
480 	}
481 
482 	*valp = amdzen_smn_read(azn, df, reg);
483 	mutex_exit(&azn->azn_mutex);
484 	return (0);
485 }
486 
487 int
amdzen_c_smn_write(uint_t dfno,const smn_reg_t reg,const uint32_t val)488 amdzen_c_smn_write(uint_t dfno, const smn_reg_t reg, const uint32_t val)
489 {
490 	amdzen_df_t *df;
491 	amdzen_t *azn = amdzen_data;
492 
493 	if (!SMN_REG_SIZE_IS_VALID(reg))
494 		return (EINVAL);
495 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
496 		return (EINVAL);
497 	if (!SMN_REG_VALUE_FITS(reg, val))
498 		return (EOVERFLOW);
499 
500 	mutex_enter(&azn->azn_mutex);
501 	df = amdzen_df_find(azn, dfno);
502 	if (df == NULL) {
503 		mutex_exit(&azn->azn_mutex);
504 		return (ENOENT);
505 	}
506 
507 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
508 		mutex_exit(&azn->azn_mutex);
509 		return (ENXIO);
510 	}
511 
512 	amdzen_smn_write(azn, df, reg, val);
513 	mutex_exit(&azn->azn_mutex);
514 	return (0);
515 }
516 
517 uint_t
amdzen_c_df_count(void)518 amdzen_c_df_count(void)
519 {
520 	uint_t ret;
521 	amdzen_t *azn = amdzen_data;
522 
523 	mutex_enter(&azn->azn_mutex);
524 	ret = azn->azn_ndfs;
525 	mutex_exit(&azn->azn_mutex);
526 	return (ret);
527 }
528 
529 df_rev_t
amdzen_c_df_rev(void)530 amdzen_c_df_rev(void)
531 {
532 	amdzen_df_t *df;
533 	amdzen_t *azn = amdzen_data;
534 	df_rev_t rev;
535 
536 	/*
537 	 * Always use the first DF instance to determine what we're using. Our
538 	 * current assumption, which seems to generally be true, is that the
539 	 * given DF revisions are the same in a given system when the DFs are
540 	 * directly connected.
541 	 */
542 	mutex_enter(&azn->azn_mutex);
543 	df = amdzen_df_find(azn, 0);
544 	if (df == NULL) {
545 		rev = DF_REV_UNKNOWN;
546 	} else {
547 		rev = df->adf_rev;
548 	}
549 	mutex_exit(&azn->azn_mutex);
550 
551 	return (rev);
552 }
553 
554 int
amdzen_c_df_read32(uint_t dfno,uint8_t inst,const df_reg_def_t def,uint32_t * valp)555 amdzen_c_df_read32(uint_t dfno, uint8_t inst, const df_reg_def_t def,
556     uint32_t *valp)
557 {
558 	amdzen_df_t *df;
559 	amdzen_t *azn = amdzen_data;
560 
561 	mutex_enter(&azn->azn_mutex);
562 	df = amdzen_df_find(azn, dfno);
563 	if (df == NULL) {
564 		mutex_exit(&azn->azn_mutex);
565 		return (ENOENT);
566 	}
567 
568 	if (df->adf_rev == DF_REV_UNKNOWN) {
569 		mutex_exit(&azn->azn_mutex);
570 		return (ENOTSUP);
571 	}
572 
573 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_FALSE);
574 	mutex_exit(&azn->azn_mutex);
575 
576 	return (0);
577 }
578 
579 int
amdzen_c_df_read64(uint_t dfno,uint8_t inst,const df_reg_def_t def,uint64_t * valp)580 amdzen_c_df_read64(uint_t dfno, uint8_t inst, const df_reg_def_t def,
581     uint64_t *valp)
582 {
583 	amdzen_df_t *df;
584 	amdzen_t *azn = amdzen_data;
585 
586 	mutex_enter(&azn->azn_mutex);
587 	df = amdzen_df_find(azn, dfno);
588 	if (df == NULL) {
589 		mutex_exit(&azn->azn_mutex);
590 		return (ENOENT);
591 	}
592 
593 	if (df->adf_rev == DF_REV_UNKNOWN) {
594 		mutex_exit(&azn->azn_mutex);
595 		return (ENOTSUP);
596 	}
597 
598 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_TRUE);
599 	mutex_exit(&azn->azn_mutex);
600 
601 	return (0);
602 }
603 
604 int
amdzen_c_df_read32_bcast(uint_t dfno,const df_reg_def_t def,uint32_t * valp)605 amdzen_c_df_read32_bcast(uint_t dfno, const df_reg_def_t def, uint32_t *valp)
606 {
607 	amdzen_df_t *df;
608 	amdzen_t *azn = amdzen_data;
609 
610 	mutex_enter(&azn->azn_mutex);
611 	df = amdzen_df_find(azn, dfno);
612 	if (df == NULL) {
613 		mutex_exit(&azn->azn_mutex);
614 		return (ENOENT);
615 	}
616 
617 	if (df->adf_rev == DF_REV_UNKNOWN) {
618 		mutex_exit(&azn->azn_mutex);
619 		return (ENOTSUP);
620 	}
621 
622 	*valp = amdzen_df_read32_bcast(azn, df, def);
623 	mutex_exit(&azn->azn_mutex);
624 
625 	return (0);
626 }
627 
628 int
amdzen_c_df_iter(uint_t dfno,zen_df_type_t type,amdzen_c_iter_f func,void * arg)629 amdzen_c_df_iter(uint_t dfno, zen_df_type_t type, amdzen_c_iter_f func,
630     void *arg)
631 {
632 	amdzen_df_t *df;
633 	amdzen_t *azn = amdzen_data;
634 	df_type_t df_type;
635 	uint8_t df_subtype;
636 
637 	/*
638 	 * Unlike other calls here, we hold our lock only to find the DF here.
639 	 * The main reason for this is the nature of the callback function.
640 	 * Folks are iterating over instances so they can call back into us. If
641 	 * you look at the locking statement, the thing that is most volatile
642 	 * right here and what we need to protect is the DF itself and
643 	 * subsequent register accesses to it. The actual data about which
644 	 * entities exist is static and so once we have found a DF we should
645 	 * hopefully be in good shape as they only come, but don't go.
646 	 */
647 	mutex_enter(&azn->azn_mutex);
648 	df = amdzen_df_find(azn, dfno);
649 	if (df == NULL) {
650 		mutex_exit(&azn->azn_mutex);
651 		return (ENOENT);
652 	}
653 	mutex_exit(&azn->azn_mutex);
654 
655 	switch (type) {
656 	case ZEN_DF_TYPE_CS_UMC:
657 		df_type = DF_TYPE_CS;
658 		/*
659 		 * In the original Zeppelin DFv2 die there was no subtype field
660 		 * used for the CS. The UMC is the only type and has a subtype
661 		 * of zero.
662 		 */
663 		if (df->adf_rev != DF_REV_2) {
664 			df_subtype = DF_CS_SUBTYPE_UMC;
665 		} else {
666 			df_subtype = 0;
667 		}
668 		break;
669 	case ZEN_DF_TYPE_CCM_CPU:
670 		df_type = DF_TYPE_CCM;
671 
672 		if (df->adf_rev >= DF_REV_4 && amdzen_df_at_least(df, 4, 1)) {
673 			df_subtype = DF_CCM_SUBTYPE_CPU_V4P1;
674 		} else {
675 			df_subtype = DF_CCM_SUBTYPE_CPU_V2;
676 		}
677 		break;
678 	default:
679 		return (EINVAL);
680 	}
681 
682 	for (uint_t i = 0; i < df->adf_nents; i++) {
683 		amdzen_df_ent_t *ent = &df->adf_ents[i];
684 
685 		/*
686 		 * Some DF components are not considered enabled and therefore
687 		 * will end up having bogus values in their ID fields. If we do
688 		 * not have an enable flag set, we must skip this node.
689 		 */
690 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
691 			continue;
692 
693 		if (ent->adfe_type == df_type &&
694 		    ent->adfe_subtype == df_subtype) {
695 			int ret = func(dfno, ent->adfe_fabric_id,
696 			    ent->adfe_inst_id, arg);
697 			if (ret != 0) {
698 				return (ret);
699 			}
700 		}
701 	}
702 
703 	return (0);
704 }
705 
706 int
amdzen_c_df_fabric_decomp(df_fabric_decomp_t * decomp)707 amdzen_c_df_fabric_decomp(df_fabric_decomp_t *decomp)
708 {
709 	const amdzen_df_t *df;
710 	amdzen_t *azn = amdzen_data;
711 
712 	mutex_enter(&azn->azn_mutex);
713 	df = amdzen_df_find(azn, 0);
714 	if (df == NULL) {
715 		mutex_exit(&azn->azn_mutex);
716 		return (ENOENT);
717 	}
718 
719 	*decomp = df->adf_decomp;
720 	mutex_exit(&azn->azn_mutex);
721 	return (0);
722 }
723 
724 static boolean_t
amdzen_create_child(amdzen_t * azn,const amdzen_child_data_t * acd)725 amdzen_create_child(amdzen_t *azn, const amdzen_child_data_t *acd)
726 {
727 	int ret;
728 	dev_info_t *child;
729 
730 	if (ndi_devi_alloc(azn->azn_dip, acd->acd_name,
731 	    (pnode_t)DEVI_SID_NODEID, &child) != NDI_SUCCESS) {
732 		dev_err(azn->azn_dip, CE_WARN, "!failed to allocate child "
733 		    "dip for %s", acd->acd_name);
734 		return (B_FALSE);
735 	}
736 
737 	ddi_set_parent_data(child, (void *)acd);
738 	if ((ret = ndi_devi_online(child, 0)) != NDI_SUCCESS) {
739 		if (acd->acd_warn) {
740 			dev_err(azn->azn_dip, CE_WARN, "!failed to online "
741 			    "child dip %s: %d", acd->acd_name, ret);
742 		}
743 		return (B_FALSE);
744 	}
745 
746 	return (B_TRUE);
747 }
748 
749 static boolean_t
amdzen_map_dfs(amdzen_t * azn)750 amdzen_map_dfs(amdzen_t *azn)
751 {
752 	amdzen_stub_t *stub;
753 
754 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
755 
756 	for (stub = list_head(&azn->azn_df_stubs); stub != NULL;
757 	    stub = list_next(&azn->azn_df_stubs, stub)) {
758 		amdzen_df_t *df;
759 		uint_t dfno;
760 
761 		dfno = stub->azns_dev - AMDZEN_DF_FIRST_DEVICE;
762 		if (dfno > AMDZEN_MAX_DFS) {
763 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
764 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
765 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
766 			goto err;
767 		}
768 
769 		df = &azn->azn_dfs[dfno];
770 
771 		if (stub->azns_func >= AMDZEN_MAX_DF_FUNCS) {
772 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
773 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
774 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
775 			goto err;
776 		}
777 
778 		if (df->adf_funcs[stub->azns_func] != NULL) {
779 			dev_err(stub->azns_dip, CE_WARN, "encountered "
780 			    "duplicate df device with DF PCI b/d/f: 0x%x/%x/%x",
781 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
782 			goto err;
783 		}
784 		df->adf_funcs[stub->azns_func] = stub;
785 	}
786 
787 	return (B_TRUE);
788 
789 err:
790 	azn->azn_flags |= AMDZEN_F_DEVICE_ERROR;
791 	return (B_FALSE);
792 }
793 
794 static boolean_t
amdzen_check_dfs(amdzen_t * azn)795 amdzen_check_dfs(amdzen_t *azn)
796 {
797 	uint_t i;
798 	boolean_t ret = B_TRUE;
799 
800 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
801 		amdzen_df_t *df = &azn->azn_dfs[i];
802 		uint_t count = 0;
803 
804 		/*
805 		 * We require all platforms to have DFs functions 0-6. Not all
806 		 * platforms have DF function 7.
807 		 */
808 		for (uint_t func = 0; func < AMDZEN_MAX_DF_FUNCS - 1; func++) {
809 			if (df->adf_funcs[func] != NULL) {
810 				count++;
811 			}
812 		}
813 
814 		if (count == 0)
815 			continue;
816 
817 		if (count != 7) {
818 			ret = B_FALSE;
819 			dev_err(azn->azn_dip, CE_WARN, "df %u devices "
820 			    "incomplete", i);
821 		} else {
822 			df->adf_flags |= AMDZEN_DF_F_VALID;
823 			azn->azn_ndfs++;
824 		}
825 	}
826 
827 	return (ret);
828 }
829 
830 static const uint8_t amdzen_df_rome_ids[0x2b] = {
831 	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23,
832 	24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
833 	44, 45, 46, 47, 48
834 };
835 
836 /*
837  * Check the first df entry to see if it belongs to Rome or Milan. If so, then
838  * it uses the disjoint ID space.
839  */
840 static boolean_t
amdzen_is_rome_style(uint_t id)841 amdzen_is_rome_style(uint_t id)
842 {
843 	return (id == 0x1490 || id == 0x1650);
844 }
845 
846 /*
847  * Deal with the differences between between how a CCM subtype is indicated
848  * across CPU generations.
849  */
850 static boolean_t
amdzen_dfe_is_ccm(const amdzen_df_t * df,const amdzen_df_ent_t * ent)851 amdzen_dfe_is_ccm(const amdzen_df_t *df, const amdzen_df_ent_t *ent)
852 {
853 	if (ent->adfe_type != DF_TYPE_CCM) {
854 		return (B_FALSE);
855 	}
856 
857 	if (df->adf_rev >= DF_REV_4 && amdzen_df_at_least(df, 4, 1)) {
858 		return (ent->adfe_subtype == DF_CCM_SUBTYPE_CPU_V4P1);
859 	} else {
860 		return (ent->adfe_subtype == DF_CCM_SUBTYPE_CPU_V2);
861 	}
862 }
863 
864 /*
865  * To be able to do most other things we want to do, we must first determine
866  * what revision of the DF (data fabric) that we're using.
867  *
868  * Snapshot the df version. This was added explicitly in DFv4.0, around the Zen
869  * 4 timeframe and allows us to tell apart different version of the DF register
870  * set, most usefully when various subtypes were added.
871  *
872  * Older versions can theoretically be told apart based on usage of reserved
873  * registers. We walk these in the following order, starting with the newest rev
874  * and walking backwards to tell things apart:
875  *
876  *   o v3.5 -> Check function 1, register 0x150. This was reserved prior
877  *             to this point. This is actually DF_FIDMASK0_V3P5. We are supposed
878  *             to check bits [7:0].
879  *
880  *   o v3.0 -> Check function 1, register 0x208. The low byte (7:0) was
881  *             changed to indicate a component mask. This is non-zero
882  *             in the 3.0 generation. This is actually DF_FIDMASK_V2.
883  *
884  *   o v2.0 -> This is just the not that case. Presumably v1 wasn't part
885  *             of the Zen generation.
886  *
887  * Because we don't know what version we are yet, we do not use the normal
888  * versioned register accesses which would check what DF version we are and
889  * would want to use the normal indirect register accesses (which also require
890  * us to know the version). We instead do direct broadcast reads.
891  */
892 static void
amdzen_determine_df_vers(amdzen_t * azn,amdzen_df_t * df)893 amdzen_determine_df_vers(amdzen_t *azn, amdzen_df_t *df)
894 {
895 	uint32_t val;
896 	df_reg_def_t rd = DF_FBICNT;
897 
898 	val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
899 	df->adf_major = DF_FBICNT_V4_GET_MAJOR(val);
900 	df->adf_minor = DF_FBICNT_V4_GET_MINOR(val);
901 	if (df->adf_major == 0 && df->adf_minor == 0) {
902 		rd = DF_FIDMASK0_V3P5;
903 		val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
904 		if (bitx32(val, 7, 0) != 0) {
905 			df->adf_major = 3;
906 			df->adf_minor = 5;
907 			df->adf_rev = DF_REV_3P5;
908 		} else {
909 			rd = DF_FIDMASK_V2;
910 			val = amdzen_stub_get32(df->adf_funcs[rd.drd_func],
911 			    rd.drd_reg);
912 			if (bitx32(val, 7, 0) != 0) {
913 				df->adf_major = 3;
914 				df->adf_minor = 0;
915 				df->adf_rev = DF_REV_3;
916 			} else {
917 				df->adf_major = 2;
918 				df->adf_minor = 0;
919 				df->adf_rev = DF_REV_2;
920 			}
921 		}
922 	} else if (df->adf_major == 4 && df->adf_minor >= 2) {
923 		/*
924 		 * These are devices that have the newer memory layout that
925 		 * moves the DF::DramBaseAddress to 0x200. Please see the df.h
926 		 * theory statement for more information.
927 		 */
928 		df->adf_rev = DF_REV_4D2;
929 	} else if (df->adf_major == 4) {
930 		df->adf_rev = DF_REV_4;
931 	} else {
932 		df->adf_rev = DF_REV_UNKNOWN;
933 	}
934 }
935 
936 /*
937  * All of the different versions of the DF have different ways of getting at and
938  * answering the question of how do I break a fabric ID into a corresponding
939  * socket, die, and component. Importantly the goal here is to obtain, cache,
940  * and normalize:
941  *
942  *  o The DF System Configuration
943  *  o The various Mask registers
944  *  o The Node ID
945  */
946 static void
amdzen_determine_fabric_decomp(amdzen_t * azn,amdzen_df_t * df)947 amdzen_determine_fabric_decomp(amdzen_t *azn, amdzen_df_t *df)
948 {
949 	uint32_t mask;
950 	df_fabric_decomp_t *decomp = &df->adf_decomp;
951 
952 	switch (df->adf_rev) {
953 	case DF_REV_2:
954 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V2);
955 		switch (DF_SYSCFG_V2_GET_MY_TYPE(df->adf_syscfg)) {
956 		case DF_DIE_TYPE_CPU:
957 			mask = amdzen_df_read32_bcast(azn, df,
958 			    DF_DIEMASK_CPU_V2);
959 			break;
960 		case DF_DIE_TYPE_APU:
961 			mask = amdzen_df_read32_bcast(azn, df,
962 			    DF_DIEMASK_APU_V2);
963 			break;
964 		default:
965 			panic("DF thinks we're not on a CPU!");
966 		}
967 		df->adf_mask0 = mask;
968 
969 		/*
970 		 * DFv2 is a bit different in how the fabric mask register is
971 		 * phrased. Logically a fabric ID is broken into something that
972 		 * uniquely identifies a "node" (a particular die on a socket)
973 		 * and something that identifies a "component", e.g. a memory
974 		 * controller.
975 		 *
976 		 * Starting with DFv3, these registers logically called out how
977 		 * to separate the fabric ID first into a node and a component.
978 		 * Then the node was then broken down into a socket and die. In
979 		 * DFv2, there is no separate mask and shift of a node. Instead
980 		 * the socket and die are absolute offsets into the fabric ID
981 		 * rather than relative offsets into the node ID. As such, when
982 		 * we encounter DFv2, we fake up a node mask and shift and make
983 		 * it look like DFv3+.
984 		 */
985 		decomp->dfd_node_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) |
986 		    DF_DIEMASK_V2_GET_DIE_MASK(mask);
987 		decomp->dfd_node_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask);
988 		decomp->dfd_comp_mask = DF_DIEMASK_V2_GET_COMP_MASK(mask);
989 		decomp->dfd_comp_shift = 0;
990 
991 		decomp->dfd_sock_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) >>
992 		    decomp->dfd_node_shift;
993 		decomp->dfd_die_mask = DF_DIEMASK_V2_GET_DIE_MASK(mask) >>
994 		    decomp->dfd_node_shift;
995 		decomp->dfd_sock_shift = DF_DIEMASK_V2_GET_SOCK_SHIFT(mask) -
996 		    decomp->dfd_node_shift;
997 		decomp->dfd_die_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask) -
998 		    decomp->dfd_node_shift;
999 		ASSERT3U(decomp->dfd_die_shift, ==, 0);
1000 
1001 		/*
1002 		 * There is no register in the actual data fabric with the node
1003 		 * ID in DFv2 that we have found. Instead we take the first
1004 		 * entity's fabric ID and transform it into the node id.
1005 		 */
1006 		df->adf_nodeid = (df->adf_ents[0].adfe_fabric_id &
1007 		    decomp->dfd_node_mask) >> decomp->dfd_node_shift;
1008 		break;
1009 	case DF_REV_3:
1010 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V3);
1011 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1012 		    DF_FIDMASK0_V3);
1013 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1014 		    DF_FIDMASK1_V3);
1015 
1016 		decomp->dfd_sock_mask =
1017 		    DF_FIDMASK1_V3_GET_SOCK_MASK(df->adf_mask1);
1018 		decomp->dfd_sock_shift =
1019 		    DF_FIDMASK1_V3_GET_SOCK_SHIFT(df->adf_mask1);
1020 		decomp->dfd_die_mask =
1021 		    DF_FIDMASK1_V3_GET_DIE_MASK(df->adf_mask1);
1022 		decomp->dfd_die_shift = 0;
1023 		decomp->dfd_node_mask =
1024 		    DF_FIDMASK0_V3_GET_NODE_MASK(df->adf_mask0);
1025 		decomp->dfd_node_shift =
1026 		    DF_FIDMASK1_V3_GET_NODE_SHIFT(df->adf_mask1);
1027 		decomp->dfd_comp_mask =
1028 		    DF_FIDMASK0_V3_GET_COMP_MASK(df->adf_mask0);
1029 		decomp->dfd_comp_shift = 0;
1030 
1031 		df->adf_nodeid = DF_SYSCFG_V3_GET_NODE_ID(df->adf_syscfg);
1032 		break;
1033 	case DF_REV_3P5:
1034 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df,
1035 		    DF_SYSCFG_V3P5);
1036 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1037 		    DF_FIDMASK0_V3P5);
1038 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1039 		    DF_FIDMASK1_V3P5);
1040 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
1041 		    DF_FIDMASK2_V3P5);
1042 
1043 		decomp->dfd_sock_mask =
1044 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
1045 		decomp->dfd_sock_shift =
1046 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
1047 		decomp->dfd_die_mask =
1048 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
1049 		decomp->dfd_die_shift = 0;
1050 		decomp->dfd_node_mask =
1051 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
1052 		decomp->dfd_node_shift =
1053 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
1054 		decomp->dfd_comp_mask =
1055 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
1056 		decomp->dfd_comp_shift = 0;
1057 
1058 		df->adf_nodeid = DF_SYSCFG_V3P5_GET_NODE_ID(df->adf_syscfg);
1059 		break;
1060 	case DF_REV_4:
1061 	case DF_REV_4D2:
1062 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V4);
1063 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1064 		    DF_FIDMASK0_V4);
1065 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1066 		    DF_FIDMASK1_V4);
1067 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
1068 		    DF_FIDMASK2_V4);
1069 
1070 		/*
1071 		 * The DFv4 registers are at a different location in the DF;
1072 		 * however, the actual layout of fields is the same as DFv3.5.
1073 		 * This is why you see V3P5 below.
1074 		 */
1075 		decomp->dfd_sock_mask =
1076 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
1077 		decomp->dfd_sock_shift =
1078 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
1079 		decomp->dfd_die_mask =
1080 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
1081 		decomp->dfd_die_shift = 0;
1082 		decomp->dfd_node_mask =
1083 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
1084 		decomp->dfd_node_shift =
1085 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
1086 		decomp->dfd_comp_mask =
1087 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
1088 		decomp->dfd_comp_shift = 0;
1089 
1090 		df->adf_nodeid = DF_SYSCFG_V4_GET_NODE_ID(df->adf_syscfg);
1091 		break;
1092 	default:
1093 		panic("encountered suspicious, previously rejected DF "
1094 		    "rev: 0x%x", df->adf_rev);
1095 	}
1096 }
1097 
1098 /*
1099  * The purpose of this function is to map CCMs to the corresponding CCDs that
1100  * exist. This is not an obvious thing as there is no direct mapping in the data
1101  * fabric between these IDs.
1102  *
1103  * Prior to DFv4, a given CCM was only ever connected to at most one CCD.
1104  * Starting in DFv4 a given CCM may have one or two SDP (scalable data ports)
1105  * that connect to CCDs. These may be connected to the same CCD or a different
1106  * one. When both ports are enabled we must check whether or not the port is
1107  * considered to be in wide mode. When wide mode is enabled then the two ports
1108  * are connected to a single CCD. If wide mode is disabled then the two ports
1109  * are connected to separate CCDs.
1110  *
1111  * The physical number of a CCD, which is how we determine the SMN aperture to
1112  * use, is based on the CCM ID. In most sockets we have seen up to a maximum of
1113  * 8 CCMs. When a CCM is connected to more than one CCD we have determined based
1114  * on some hints from AMD's ACPI information that the numbering is assumed to be
1115  * that CCM's number plus the total number of CCMs.
1116  *
1117  * More concretely, the SP5 Genoa/Bergamo Zen 4 platform has 8 CCMs. When there
1118  * are more than 8 CCDs installed then CCM 0 maps to CCDs 0 and 8. CCM 1 to CCDs
1119  * 1 and 9, etc. CCMs 4-7 map 1:1 to CCDs 4-7. However, the placement of CCDs
1120  * within the package has changed across generations.
1121  *
1122  * Notably in Rome and Milan (Zen 2/3) it appears that each quadrant had an
1123  * increasing number of CCDs. So CCDs 0/1 were together, 2/3, 4/5, and 6/7. This
1124  * meant that in cases where only a subset of CCDs were populated it'd forcibly
1125  * disable the higher CCD in a group (but with DFv3 the CCM would still be
1126  * enabled). So a 4 CCD config would generally enable CCDs 0, 2, 4, and 6 say.
1127  * This was almost certainly done to balance the NUMA config.
1128  *
1129  * Instead, starting in Genoa (Zen 4) the CCMs are round-robined around the
1130  * quadrants so CCMs (CCDs) 0 (0/8) and 4 (4) are together, 1 (1/9) and 5 (5),
1131  * etc. This is also why we more often see disabled CCMs in Genoa, but not in
1132  * Rome/Milan.
1133  *
1134  * When we're operating in wide mode and therefore both SDPs are connected to a
1135  * single CCD, we've always found that the lower CCD index will be used by the
1136  * system and the higher one is not considered present. Therefore, when
1137  * operating in wide mode, we need to make sure that whenever we have a non-zero
1138  * value for SDPs being connected that we rewrite this to only appear as a
1139  * single CCD is present. It's conceivable (though hard to imagine) that we
1140  * could get a value of 0b10 indicating that only the upper SDP link is active
1141  * for some reason.
1142  */
1143 static void
amdzen_setup_df_ccm(amdzen_t * azn,amdzen_df_t * df,amdzen_df_ent_t * dfe,uint32_t ccmno)1144 amdzen_setup_df_ccm(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *dfe,
1145     uint32_t ccmno)
1146 {
1147 	amdzen_ccm_data_t *ccm = &dfe->adfe_data.aded_ccm;
1148 	uint32_t ccd_en;
1149 	boolean_t wide_en;
1150 
1151 	if (df->adf_rev >= DF_REV_4) {
1152 		uint32_t val = amdzen_df_read32(azn, df, dfe->adfe_inst_id,
1153 		    DF_CCD_EN_V4);
1154 		ccd_en = DF_CCD_EN_V4_GET_CCD_EN(val);
1155 
1156 		if (df->adf_rev == DF_REV_4D2) {
1157 			wide_en = DF_CCD_EN_V4D2_GET_WIDE_EN(val);
1158 		} else {
1159 			val = amdzen_df_read32(azn, df, dfe->adfe_inst_id,
1160 			    DF_CCMCFG4_V4);
1161 			wide_en = DF_CCMCFG4_V4_GET_WIDE_EN(val);
1162 		}
1163 
1164 		if (wide_en != 0 && ccd_en != 0) {
1165 			ccd_en = 0x1;
1166 		}
1167 	} else {
1168 		ccd_en = 0x1;
1169 	}
1170 
1171 	for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) {
1172 		ccm->acd_ccd_en[i] = (ccd_en & (1 << i)) != 0;
1173 		if (ccm->acd_ccd_en[i] == 0)
1174 			continue;
1175 		ccm->acd_ccd_id[i] = ccmno + i * df->adf_nccm;
1176 		ccm->acd_nccds++;
1177 	}
1178 }
1179 
1180 /*
1181  * Initialize our knowledge about a given series of nodes on the data fabric.
1182  */
1183 static void
amdzen_setup_df(amdzen_t * azn,amdzen_df_t * df)1184 amdzen_setup_df(amdzen_t *azn, amdzen_df_t *df)
1185 {
1186 	uint_t i;
1187 	uint32_t val, ccmno;
1188 
1189 	amdzen_determine_df_vers(azn, df);
1190 
1191 	switch (df->adf_rev) {
1192 	case DF_REV_2:
1193 	case DF_REV_3:
1194 	case DF_REV_3P5:
1195 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V2);
1196 		break;
1197 	case DF_REV_4:
1198 	case DF_REV_4D2:
1199 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V4);
1200 		break;
1201 	default:
1202 		dev_err(azn->azn_dip, CE_WARN, "encountered unsupported DF "
1203 		    "revision: 0x%x", df->adf_rev);
1204 		return;
1205 	}
1206 	df->adf_nb_busno = DF_CFG_ADDR_CTL_GET_BUS_NUM(val);
1207 	val = amdzen_df_read32_bcast(azn, df, DF_FBICNT);
1208 	df->adf_nents = DF_FBICNT_GET_COUNT(val);
1209 	if (df->adf_nents == 0)
1210 		return;
1211 	df->adf_ents = kmem_zalloc(sizeof (amdzen_df_ent_t) * df->adf_nents,
1212 	    KM_SLEEP);
1213 
1214 	for (i = 0; i < df->adf_nents; i++) {
1215 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
1216 		uint8_t inst = i;
1217 
1218 		/*
1219 		 * Unfortunately, Rome uses a discontinuous instance ID pattern
1220 		 * while everything else we can find uses a contiguous instance
1221 		 * ID pattern. This means that for Rome, we need to adjust the
1222 		 * indexes that we iterate over, though the total number of
1223 		 * entries is right. This was carried over into Milan, but not
1224 		 * Genoa.
1225 		 */
1226 		if (amdzen_is_rome_style(df->adf_funcs[0]->azns_did)) {
1227 			if (inst >= ARRAY_SIZE(amdzen_df_rome_ids)) {
1228 				dev_err(azn->azn_dip, CE_WARN, "Rome family "
1229 				    "processor reported more ids than the PPR, "
1230 				    "resetting %u to instance zero", inst);
1231 				inst = 0;
1232 			} else {
1233 				inst = amdzen_df_rome_ids[inst];
1234 			}
1235 		}
1236 
1237 		dfe->adfe_drvid = inst;
1238 		dfe->adfe_info0 = amdzen_df_read32(azn, df, inst, DF_FBIINFO0);
1239 		if (df->adf_rev <= DF_REV_4) {
1240 			dfe->adfe_info1 = amdzen_df_read32(azn, df, inst,
1241 			    DF_FBIINFO1);
1242 			dfe->adfe_info2 = amdzen_df_read32(azn, df, inst,
1243 			    DF_FBIINFO2);
1244 		}
1245 		dfe->adfe_info3 = amdzen_df_read32(azn, df, inst, DF_FBIINFO3);
1246 
1247 		dfe->adfe_type = DF_FBIINFO0_GET_TYPE(dfe->adfe_info0);
1248 		dfe->adfe_subtype = DF_FBIINFO0_GET_SUBTYPE(dfe->adfe_info0);
1249 
1250 		/*
1251 		 * The enabled flag was not present in Zen 1. Simulate it by
1252 		 * checking for a non-zero register instead.
1253 		 */
1254 		if (DF_FBIINFO0_V3_GET_ENABLED(dfe->adfe_info0) ||
1255 		    (df->adf_rev == DF_REV_2 && dfe->adfe_info0 != 0)) {
1256 			dfe->adfe_flags |= AMDZEN_DFE_F_ENABLED;
1257 		}
1258 		if (DF_FBIINFO0_GET_HAS_MCA(dfe->adfe_info0)) {
1259 			dfe->adfe_flags |= AMDZEN_DFE_F_MCA;
1260 		}
1261 
1262 		/*
1263 		 * Starting with DFv4 there is no instance ID in the fabric info
1264 		 * 3 register, so we instead grab it out of the driver ID which
1265 		 * is what it should be anyways.
1266 		 */
1267 		if (df->adf_rev >= DF_REV_4) {
1268 			dfe->adfe_inst_id = dfe->adfe_drvid;
1269 		} else {
1270 			dfe->adfe_inst_id =
1271 			    DF_FBIINFO3_GET_INSTID(dfe->adfe_info3);
1272 		}
1273 
1274 		switch (df->adf_rev) {
1275 		case DF_REV_2:
1276 			dfe->adfe_fabric_id =
1277 			    DF_FBIINFO3_V2_GET_BLOCKID(dfe->adfe_info3);
1278 			break;
1279 		case DF_REV_3:
1280 			dfe->adfe_fabric_id =
1281 			    DF_FBIINFO3_V3_GET_BLOCKID(dfe->adfe_info3);
1282 			break;
1283 		case DF_REV_3P5:
1284 			dfe->adfe_fabric_id =
1285 			    DF_FBIINFO3_V3P5_GET_BLOCKID(dfe->adfe_info3);
1286 			break;
1287 		case DF_REV_4:
1288 		case DF_REV_4D2:
1289 			dfe->adfe_fabric_id =
1290 			    DF_FBIINFO3_V4_GET_BLOCKID(dfe->adfe_info3);
1291 			break;
1292 		default:
1293 			panic("encountered suspicious, previously rejected DF "
1294 			    "rev: 0x%x", df->adf_rev);
1295 		}
1296 
1297 		/*
1298 		 * Record information about a subset of DF entities that we've
1299 		 * found. Currently we're tracking this only for CCMs.
1300 		 */
1301 		if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1302 			continue;
1303 
1304 		if (amdzen_dfe_is_ccm(df, dfe)) {
1305 			df->adf_nccm++;
1306 		}
1307 	}
1308 
1309 	/*
1310 	 * Now that we have filled in all of our info, attempt to fill in
1311 	 * specific information about different types of instances.
1312 	 */
1313 	ccmno = 0;
1314 	for (uint_t i = 0; i < df->adf_nents; i++) {
1315 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
1316 
1317 		if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1318 			continue;
1319 
1320 		/*
1321 		 * Perform type and sub-type specific initialization. Currently
1322 		 * limited to CCMs.
1323 		 */
1324 		switch (dfe->adfe_type) {
1325 		case DF_TYPE_CCM:
1326 			amdzen_setup_df_ccm(azn, df, dfe, ccmno);
1327 			ccmno++;
1328 			break;
1329 		default:
1330 			break;
1331 		}
1332 	}
1333 
1334 	amdzen_determine_fabric_decomp(azn, df);
1335 }
1336 
1337 static void
amdzen_find_nb(amdzen_t * azn,amdzen_df_t * df)1338 amdzen_find_nb(amdzen_t *azn, amdzen_df_t *df)
1339 {
1340 	amdzen_stub_t *stub;
1341 
1342 	for (stub = list_head(&azn->azn_nb_stubs); stub != NULL;
1343 	    stub = list_next(&azn->azn_nb_stubs, stub)) {
1344 		if (stub->azns_bus == df->adf_nb_busno) {
1345 			df->adf_flags |= AMDZEN_DF_F_FOUND_NB;
1346 			df->adf_nb = stub;
1347 			return;
1348 		}
1349 	}
1350 }
1351 
1352 /*
1353  * We need to be careful using this function as different AMD generations have
1354  * acted in different ways when there is a missing CCD. We've found that in
1355  * hardware where the CCM is enabled but there is no CCD attached, it generally
1356  * is safe (i.e. DFv3 on Rome), but on DFv4 if we ask for a CCD that would
1357  * correspond to a disabled CCM then the firmware may inject a fatal error
1358  * (which is hopefully something missing in our RAS/MCA-X enablement).
1359  *
1360  * Put differently if this doesn't correspond to an Enabled CCM and you know the
1361  * number of valid CCDs on this, don't use it.
1362  */
1363 static boolean_t
amdzen_ccd_present(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1364 amdzen_ccd_present(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1365 {
1366 	smn_reg_t die_reg = SMUPWR_CCD_DIE_ID(ccdno);
1367 	uint32_t val = amdzen_smn_read(azn, df, die_reg);
1368 	if (val == SMN_EINVAL32) {
1369 		return (B_FALSE);
1370 	}
1371 
1372 	ASSERT3U(ccdno, ==, SMUPWR_CCD_DIE_ID_GET(val));
1373 	return (B_TRUE);
1374 }
1375 
1376 static uint32_t
amdzen_ccd_thread_en(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1377 amdzen_ccd_thread_en(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1378 {
1379 	smn_reg_t reg;
1380 
1381 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1382 		reg = L3SOC_THREAD_EN(ccdno);
1383 	} else {
1384 		reg = SMUPWR_THREAD_EN(ccdno);
1385 	}
1386 
1387 	return (amdzen_smn_read(azn, df, reg));
1388 }
1389 
1390 static uint32_t
amdzen_ccd_core_en(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1391 amdzen_ccd_core_en(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1392 {
1393 	smn_reg_t reg;
1394 
1395 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1396 		reg = L3SOC_CORE_EN(ccdno);
1397 	} else {
1398 		reg = SMUPWR_CORE_EN(ccdno);
1399 	}
1400 
1401 	return (amdzen_smn_read(azn, df, reg));
1402 }
1403 
1404 static void
amdzen_ccd_info(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno,uint32_t * nccxp,uint32_t * nlcorep,uint32_t * nthrp)1405 amdzen_ccd_info(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno, uint32_t *nccxp,
1406     uint32_t *nlcorep, uint32_t *nthrp)
1407 {
1408 	uint32_t nccx, nlcore, smt;
1409 
1410 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1411 		smn_reg_t reg = L3SOC_THREAD_CFG(ccdno);
1412 		uint32_t val = amdzen_smn_read(azn, df, reg);
1413 		nccx = L3SOC_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1;
1414 		nlcore = L3SOC_THREAD_CFG_GET_CORE_COUNT(val) + 1;
1415 		smt = L3SOC_THREAD_CFG_GET_SMT_MODE(val);
1416 	} else {
1417 		smn_reg_t reg = SMUPWR_THREAD_CFG(ccdno);
1418 		uint32_t val = amdzen_smn_read(azn, df, reg);
1419 		nccx = SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1;
1420 		nlcore = SMUPWR_THREAD_CFG_GET_CORE_COUNT(val) + 1;
1421 		smt = SMUPWR_THREAD_CFG_GET_SMT_MODE(val);
1422 	}
1423 
1424 	if (nccxp != NULL) {
1425 		*nccxp = nccx;
1426 	}
1427 
1428 	if (nlcorep != NULL) {
1429 		*nlcorep = nlcore;
1430 	}
1431 
1432 	if (nthrp != NULL) {
1433 		/* The L3::L3SOC and SMU::PWR values are the same here */
1434 		if (smt == SMUPWR_THREAD_CFG_SMT_MODE_SMT) {
1435 			*nthrp = 2;
1436 		} else {
1437 			*nthrp = 1;
1438 		}
1439 	}
1440 }
1441 
1442 static void
amdzen_initpkg_to_apic(amdzen_t * azn,const uint32_t pkg0,const uint32_t pkg7)1443 amdzen_initpkg_to_apic(amdzen_t *azn, const uint32_t pkg0, const uint32_t pkg7)
1444 {
1445 	uint32_t nsock, nccd, nccx, ncore, nthr, extccx;
1446 	uint32_t nsock_bits, nccd_bits, nccx_bits, ncore_bits, nthr_bits;
1447 	amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp;
1448 
1449 	/*
1450 	 * These are all 0 based values, meaning that we need to add one to each
1451 	 * of them. However, we skip this because to calculate the number of
1452 	 * bits to cover an entity we would subtract one.
1453 	 */
1454 	nthr = SCFCTP_PMREG_INITPKG0_GET_SMTEN(pkg0);
1455 	ncore = SCFCTP_PMREG_INITPKG7_GET_N_CORES(pkg7);
1456 	nccx = SCFCTP_PMREG_INITPKG7_GET_N_CCXS(pkg7);
1457 	nccd = SCFCTP_PMREG_INITPKG7_GET_N_DIES(pkg7);
1458 	nsock = SCFCTP_PMREG_INITPKG7_GET_N_SOCKETS(pkg7);
1459 
1460 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN4) {
1461 		extccx = SCFCTP_PMREG_INITPKG7_ZEN4_GET_16TAPIC(pkg7);
1462 	} else {
1463 		extccx = 0;
1464 	}
1465 
1466 	nthr_bits = highbit(nthr);
1467 	ncore_bits = highbit(ncore);
1468 	nccx_bits = highbit(nccx);
1469 	nccd_bits = highbit(nccd);
1470 	nsock_bits = highbit(nsock);
1471 
1472 	apic->aad_thread_shift = 0;
1473 	apic->aad_thread_mask = (1 << nthr_bits) - 1;
1474 
1475 	apic->aad_core_shift = nthr_bits;
1476 	if (ncore_bits > 0) {
1477 		apic->aad_core_mask = (1 << ncore_bits) - 1;
1478 		apic->aad_core_mask <<= apic->aad_core_shift;
1479 	} else {
1480 		apic->aad_core_mask = 0;
1481 	}
1482 
1483 	/*
1484 	 * The APIC_16T_MODE bit indicates that the total shift to start the CCX
1485 	 * should be at 4 bits if it's not. It doesn't mean that the CCX portion
1486 	 * of the value should take up four bits. In the common Genoa case,
1487 	 * nccx_bits will be zero.
1488 	 */
1489 	apic->aad_ccx_shift = apic->aad_core_shift + ncore_bits;
1490 	if (extccx != 0 && apic->aad_ccx_shift < 4) {
1491 		apic->aad_ccx_shift = 4;
1492 	}
1493 	if (nccx_bits > 0) {
1494 		apic->aad_ccx_mask = (1 << nccx_bits) - 1;
1495 		apic->aad_ccx_mask <<= apic->aad_ccx_shift;
1496 	} else {
1497 		apic->aad_ccx_mask = 0;
1498 	}
1499 
1500 	apic->aad_ccd_shift = apic->aad_ccx_shift + nccx_bits;
1501 	if (nccd_bits > 0) {
1502 		apic->aad_ccd_mask = (1 << nccd_bits) - 1;
1503 		apic->aad_ccd_mask <<= apic->aad_ccd_shift;
1504 	} else {
1505 		apic->aad_ccd_mask = 0;
1506 	}
1507 
1508 	apic->aad_sock_shift = apic->aad_ccd_shift + nccd_bits;
1509 	if (nsock_bits > 0) {
1510 		apic->aad_sock_mask = (1 << nsock_bits) - 1;
1511 		apic->aad_sock_mask <<= apic->aad_sock_shift;
1512 	} else {
1513 		apic->aad_sock_mask = 0;
1514 	}
1515 
1516 	/*
1517 	 * Currently all supported Zen 2+ platforms only have a single die per
1518 	 * socket as compared to Zen 1. So this is always kept at zero.
1519 	 */
1520 	apic->aad_die_mask = 0;
1521 	apic->aad_die_shift = 0;
1522 }
1523 
1524 /*
1525  * We would like to determine what the logical APIC decomposition is on Zen 3
1526  * and newer family parts. While there is information added to CPUID in the form
1527  * of leaf 8X26, that isn't present in Zen 3, so instead we go to what we
1528  * believe is the underlying source of the CPUID data.
1529  *
1530  * Fundamentally there are a series of registers in SMN space that relate to the
1531  * SCFCTP. Coincidentally, there is one of these for each core and there are a
1532  * pair of related SMN registers. L3::SCFCTP::PMREG_INITPKG0 contains
1533  * information about a given's core logical and physical IDs. More interestingly
1534  * for this particular case, L3::SCFCTP::PMREG_INITPKG7, contains the overall
1535  * total number of logical entities. We've been promised that this has to be
1536  * the same across the fabric. That's all well and good, but this begs the
1537  * question of how do we actually get there. The above is a core-specific
1538  * register and requires that we understand information about which CCDs and
1539  * CCXs are actually present.
1540  *
1541  * So we are starting with a data fabric that has some CCM present. The CCM
1542  * entries in the data fabric may be tagged with our ENABLED flag.
1543  * Unfortunately, that can be true regardless of whether or not it's actually
1544  * present or not. As a result, we go to another chunk of SMN space registers,
1545  * SMU::PWR. These contain information about the CCDs, the physical cores that
1546  * are enabled, and related. So we will first walk the DF entities and see if we
1547  * can read its SMN::PWR::CCD_DIE_ID. If we get back a value of all 1s then
1548  * there is nothing present. Otherwise, we should get back something that
1549  * matches information in the data fabric.
1550  *
1551  * With that in hand, we can read the SMU::PWR::CORE_ENABLE register to
1552  * determine which physical cores are enabled in the CCD/CCX. That will finally
1553  * give us an index to get to our friend INITPKG7.
1554  */
1555 static boolean_t
amdzen_determine_apic_decomp_initpkg(amdzen_t * azn)1556 amdzen_determine_apic_decomp_initpkg(amdzen_t *azn)
1557 {
1558 	amdzen_df_t *df = &azn->azn_dfs[0];
1559 	uint32_t ccdno = 0;
1560 
1561 	for (uint_t i = 0; i < df->adf_nents; i++) {
1562 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
1563 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1564 			continue;
1565 
1566 		if (amdzen_dfe_is_ccm(df, ent)) {
1567 			uint32_t val, nccx, pkg7, pkg0;
1568 			smn_reg_t pkg7_reg, pkg0_reg;
1569 			int core_bit;
1570 			uint8_t pccxno, pcoreno;
1571 
1572 			if (!amdzen_ccd_present(azn, df, ccdno)) {
1573 				ccdno++;
1574 				continue;
1575 			}
1576 
1577 			/*
1578 			 * This die actually exists. Switch over to the core
1579 			 * enable register to find one to ask about physically.
1580 			 */
1581 			amdzen_ccd_info(azn, df, ccdno, &nccx, NULL, NULL);
1582 			val = amdzen_ccd_core_en(azn, df, ccdno);
1583 			if (val == 0) {
1584 				ccdno++;
1585 				continue;
1586 			}
1587 
1588 			/*
1589 			 * There exists an enabled physical core. Find the first
1590 			 * index of it and map it to the corresponding CCD and
1591 			 * CCX. ddi_ffs is the bit index, but we want the
1592 			 * physical core number, hence the -1.
1593 			 */
1594 			core_bit = ddi_ffs(val);
1595 			ASSERT3S(core_bit, !=, 0);
1596 			pcoreno = core_bit - 1;
1597 
1598 			/*
1599 			 * Unfortunately SMU::PWR::THREAD_CONFIGURATION gives us
1600 			 * the Number of logical cores that are present in the
1601 			 * complex, not the total number of physical cores.
1602 			 * Right now we do assume that the physical and logical
1603 			 * ccx numbering is equivalent (we have no other way of
1604 			 * knowing if it is or isn't right now) and that we'd
1605 			 * always have CCX0 before CCX1. AMD seems to suggest we
1606 			 * can assume this, though it is a worrisome assumption.
1607 			 */
1608 			pccxno = pcoreno / azn->azn_ncore_per_ccx;
1609 			ASSERT3U(pccxno, <, nccx);
1610 			pkg7_reg = SCFCTP_PMREG_INITPKG7(ccdno, pccxno,
1611 			    pcoreno);
1612 			pkg7 = amdzen_smn_read(azn, df, pkg7_reg);
1613 			pkg0_reg = SCFCTP_PMREG_INITPKG0(ccdno, pccxno,
1614 			    pcoreno);
1615 			pkg0 = amdzen_smn_read(azn, df, pkg0_reg);
1616 			amdzen_initpkg_to_apic(azn, pkg0, pkg7);
1617 			return (B_TRUE);
1618 		}
1619 	}
1620 
1621 	return (B_FALSE);
1622 }
1623 
1624 /*
1625  * We have the fun job of trying to figure out what the correct form of the APIC
1626  * decomposition should be and how to break that into its logical components.
1627  * The way that we get at this is generation-specific unfortunately. Here's how
1628  * it works out:
1629  *
1630  * Zen 1-2	This era of CPUs are deceptively simple. The PPR for a given
1631  *		family defines exactly how the APIC ID is broken into logical
1632  *		components and it's fixed. That is, depending on whether or
1633  *		not SMT is enabled. Zen 1 and Zen 2 use different schemes for
1634  *		constructing this. The way that we're supposed to check if SMT
1635  *		is enabled is to use AMD leaf 8X1E and ask how many threads per
1636  *		core there are. We use the x86 feature set to determine that
1637  *		instead.
1638  *
1639  *		More specifically the Zen 1 scheme is 7 bits long. The bits have
1640  *		the following meanings.
1641  *
1642  *		[6]   Socket ID
1643  *		[5:4] Node ID
1644  *		[3]   Logical CCX ID
1645  *		With SMT		Without SMT
1646  *		[2:1] Logical Core ID	[2]   hardcoded to zero
1647  *		[0] Thread ID		[1:0] Logical Core ID
1648  *
1649  *		The following is the Zen 2 scheme assuming SMT. The Zen 2 scheme
1650  *		without SMT shifts everything to the right by one bit.
1651  *
1652  *		[7]   Socket ID
1653  *		[6:4] Logical CCD ID
1654  *		[3]   Logical CCX ID
1655  *		[2:1] Logical Core ID
1656  *		[0]   Thread ID
1657  *
1658  * Zen 3	Zen 3 CPUs moved past the fixed APIC ID format that Zen 1 and
1659  *		Zen 2 had, but also don't give us the nice way of discovering
1660  *		this via CPUID that Zen 4 did. The APIC ID id uses a given
1661  *		number of bits for each logical component that exists, but the
1662  *		exact number varies based on what's actually present. To get at
1663  *		this we use a piece of data that is embedded in the SCFCTP
1664  *		(Scalable Control Fabric, Clocks, Test, Power Gating). This can
1665  *		be used to determine how many logical entities of each kind the
1666  *		system thinks exist. While we could use the various CPUID
1667  *		topology items to try to speed this up, they don't tell us the
1668  *		die information that we need to do this.
1669  *
1670  * Zen 4+	Zen 4 introduced CPUID leaf 8000_0026h which gives us a means
1671  *		for determining how to extract the CCD, CCX, and related pieces
1672  *		out of the device. One thing we have to be aware of is that when
1673  *		the CCD and CCX shift are the same, that means that there is
1674  *		only a single CCX and therefore have to take that into account
1675  *		appropriately. This is the case generally on Zen 4 platforms,
1676  *		but not on Bergamo. Until we can confirm the actual CPUID leaf
1677  *		values that we receive in the cases of Bergamo and others, we
1678  *		opt instead to use the same SCFCTP scheme as Zen 3.
1679  */
1680 static boolean_t
amdzen_determine_apic_decomp(amdzen_t * azn)1681 amdzen_determine_apic_decomp(amdzen_t *azn)
1682 {
1683 	amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp;
1684 	boolean_t smt = is_x86_feature(x86_featureset, X86FSET_HTT);
1685 
1686 	switch (uarchrev_uarch(azn->azn_uarchrev)) {
1687 	case X86_UARCH_AMD_ZEN1:
1688 	case X86_UARCH_AMD_ZENPLUS:
1689 		apic->aad_sock_mask = 0x40;
1690 		apic->aad_sock_shift = 6;
1691 		apic->aad_die_mask = 0x30;
1692 		apic->aad_die_shift = 4;
1693 		apic->aad_ccd_mask = 0;
1694 		apic->aad_ccd_shift = 0;
1695 		apic->aad_ccx_mask = 0x08;
1696 		apic->aad_ccx_shift = 3;
1697 
1698 		if (smt) {
1699 			apic->aad_core_mask = 0x06;
1700 			apic->aad_core_shift = 1;
1701 			apic->aad_thread_mask = 0x1;
1702 			apic->aad_thread_shift = 0;
1703 		} else {
1704 			apic->aad_core_mask = 0x03;
1705 			apic->aad_core_shift = 0;
1706 			apic->aad_thread_mask = 0;
1707 			apic->aad_thread_shift = 0;
1708 		}
1709 		break;
1710 	case X86_UARCH_AMD_ZEN2:
1711 		if (smt) {
1712 			apic->aad_sock_mask = 0x80;
1713 			apic->aad_sock_shift = 7;
1714 			apic->aad_die_mask = 0;
1715 			apic->aad_die_shift = 0;
1716 			apic->aad_ccd_mask = 0x70;
1717 			apic->aad_ccd_shift = 4;
1718 			apic->aad_ccx_mask = 0x08;
1719 			apic->aad_ccx_shift = 3;
1720 			apic->aad_core_mask = 0x06;
1721 			apic->aad_core_shift = 1;
1722 			apic->aad_thread_mask = 0x01;
1723 			apic->aad_thread_shift = 0;
1724 		} else {
1725 			apic->aad_sock_mask = 0x40;
1726 			apic->aad_sock_shift = 6;
1727 			apic->aad_die_mask = 0;
1728 			apic->aad_die_shift = 0;
1729 			apic->aad_ccd_mask = 0x38;
1730 			apic->aad_ccd_shift = 3;
1731 			apic->aad_ccx_mask = 0x04;
1732 			apic->aad_ccx_shift = 2;
1733 			apic->aad_core_mask = 0x3;
1734 			apic->aad_core_shift = 0;
1735 			apic->aad_thread_mask = 0;
1736 			apic->aad_thread_shift = 0;
1737 		}
1738 		break;
1739 	case X86_UARCH_AMD_ZEN3:
1740 	case X86_UARCH_AMD_ZEN4:
1741 	case X86_UARCH_AMD_ZEN5:
1742 		return (amdzen_determine_apic_decomp_initpkg(azn));
1743 	default:
1744 		return (B_FALSE);
1745 	}
1746 	return (B_TRUE);
1747 }
1748 
1749 /*
1750  * Snapshot the number of cores that can exist in a CCX based on the Zen
1751  * microarchitecture revision. In Zen 1-4 this has been a constant number
1752  * regardless of the actual CPU Family. In Zen 5 this varies based upon whether
1753  * or not dense dies are being used.
1754  */
1755 static void
amdzen_determine_ncore_per_ccx(amdzen_t * azn)1756 amdzen_determine_ncore_per_ccx(amdzen_t *azn)
1757 {
1758 	switch (uarchrev_uarch(azn->azn_uarchrev)) {
1759 	case X86_UARCH_AMD_ZEN1:
1760 	case X86_UARCH_AMD_ZENPLUS:
1761 	case X86_UARCH_AMD_ZEN2:
1762 		azn->azn_ncore_per_ccx = 4;
1763 		break;
1764 	case X86_UARCH_AMD_ZEN3:
1765 	case X86_UARCH_AMD_ZEN4:
1766 		azn->azn_ncore_per_ccx = 8;
1767 		break;
1768 	case X86_UARCH_AMD_ZEN5:
1769 		if (chiprev_family(azn->azn_chiprev) ==
1770 		    X86_PF_AMD_DENSE_TURIN) {
1771 			azn->azn_ncore_per_ccx = 16;
1772 		} else {
1773 			azn->azn_ncore_per_ccx = 8;
1774 		}
1775 		break;
1776 	default:
1777 		panic("asked about non-Zen or unknown uarch");
1778 	}
1779 }
1780 
1781 /*
1782  * Attempt to determine a logical CCD number of a given CCD where we don't have
1783  * hardware support for L3::SCFCTP::PMREG_INITPKG* (e.g. pre-Zen 3 systems).
1784  * The CCD numbers that we have are the in the physical space. Likely because of
1785  * how the orientation of CCM numbers map to physical locations and the layout
1786  * of them within the package, we haven't found a good way using the core DFv3
1787  * registers to determine if a given CCD is actually present or not as generally
1788  * all the CCMs are left enabled. Instead we use SMU::PWR::DIE_ID as a proxy to
1789  * determine CCD presence.
1790  */
1791 static uint32_t
amdzen_ccd_log_id_zen2(amdzen_t * azn,amdzen_df_t * df,const amdzen_df_ent_t * targ)1792 amdzen_ccd_log_id_zen2(amdzen_t *azn, amdzen_df_t *df,
1793     const amdzen_df_ent_t *targ)
1794 {
1795 	uint32_t smnid = 0;
1796 	uint32_t logid = 0;
1797 
1798 	for (uint_t i = 0; i < df->adf_nents; i++) {
1799 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
1800 
1801 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) {
1802 			continue;
1803 		}
1804 
1805 		if (ent->adfe_inst_id == targ->adfe_inst_id) {
1806 			return (logid);
1807 		}
1808 
1809 		if (ent->adfe_type == targ->adfe_type &&
1810 		    ent->adfe_subtype == targ->adfe_subtype) {
1811 			boolean_t present = amdzen_ccd_present(azn, df, smnid);
1812 			smnid++;
1813 			if (present) {
1814 				logid++;
1815 			}
1816 		}
1817 	}
1818 
1819 	panic("asked to match against invalid DF entity %p in df %p", targ, df);
1820 }
1821 
1822 static void
amdzen_ccd_fill_core_initpkg0(amdzen_t * azn,amdzen_df_t * df,amdzen_topo_ccd_t * ccd,amdzen_topo_ccx_t * ccx,amdzen_topo_core_t * core,boolean_t * ccd_set,boolean_t * ccx_set)1823 amdzen_ccd_fill_core_initpkg0(amdzen_t *azn, amdzen_df_t *df,
1824     amdzen_topo_ccd_t *ccd, amdzen_topo_ccx_t *ccx, amdzen_topo_core_t *core,
1825     boolean_t *ccd_set, boolean_t *ccx_set)
1826 {
1827 	smn_reg_t pkg0_reg;
1828 	uint32_t pkg0;
1829 
1830 	pkg0_reg = SCFCTP_PMREG_INITPKG0(ccd->atccd_phys_no, ccx->atccx_phys_no,
1831 	    core->atcore_phys_no);
1832 	pkg0 = amdzen_smn_read(azn, df, pkg0_reg);
1833 	core->atcore_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CORE(pkg0);
1834 
1835 	if (!*ccx_set) {
1836 		ccx->atccx_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CCX(pkg0);
1837 		*ccx_set = B_TRUE;
1838 	}
1839 
1840 	if (!*ccd_set) {
1841 		ccd->atccd_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_DIE(pkg0);
1842 		*ccd_set = B_TRUE;
1843 	}
1844 }
1845 
1846 /*
1847  * Attempt to fill in the physical topology information for this given CCD.
1848  * There are a few steps to this that we undertake to perform this as follows:
1849  *
1850  * 1) First we determine whether the CCD is actually present or not by reading
1851  * SMU::PWR::DIE_ID. CCDs that are not installed will still have an enabled DF
1852  * entry it appears, but the request for the die ID will returns an invalid
1853  * read (all 1s). This die ID should match what we think of as the SMN number
1854  * below. If not, we're in trouble and the rest of this is in question.
1855  *
1856  * 2) We use the SMU::PWR registers to determine how many logical and physical
1857  * cores are present in this CCD and how they are split amongst the CCX. Here we
1858  * need to encode the CPU to CCX core size rankings. Through this process we
1859  * determine and fill out which threads and cores are enabled.
1860  *
1861  * 3) In Zen 3+ we then will read each core's INITPK0 values to ensure that we
1862  * have a proper physical to logical mapping, at which point we can fill in the
1863  * APIC IDs. For Zen 2, we will set the AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN to
1864  * indicate that we just mapped the first logical processor to the first enabled
1865  * core.
1866  *
1867  * 4) Once we have the logical IDs determined we will construct the APIC ID that
1868  * we expect this to have.
1869  *
1870  * Steps (2) - (4) are intertwined and done together.
1871  */
1872 static void
amdzen_ccd_fill_topo(amdzen_t * azn,amdzen_df_t * df,amdzen_df_ent_t * ent,amdzen_topo_ccd_t * ccd)1873 amdzen_ccd_fill_topo(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *ent,
1874     amdzen_topo_ccd_t *ccd)
1875 {
1876 	uint32_t nccx, core_en, thread_en;
1877 	uint32_t nlcore_per_ccx, nthreads_per_core;
1878 	uint32_t sockid, dieid, compid;
1879 	const uint32_t ccdno = ccd->atccd_phys_no;
1880 	const x86_uarch_t uarch = uarchrev_uarch(azn->azn_uarchrev);
1881 	boolean_t pkg0_ids, logccd_set = B_FALSE;
1882 
1883 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
1884 	if (!amdzen_ccd_present(azn, df, ccdno)) {
1885 		ccd->atccd_err = AMDZEN_TOPO_CCD_E_CCD_MISSING;
1886 		return;
1887 	}
1888 
1889 	amdzen_ccd_info(azn, df, ccdno, &nccx, &nlcore_per_ccx,
1890 	    &nthreads_per_core);
1891 	ASSERT3U(nccx, <=, AMDZEN_TOPO_CCD_MAX_CCX);
1892 
1893 	core_en = amdzen_ccd_core_en(azn, df, ccdno);
1894 	thread_en = amdzen_ccd_thread_en(azn, df, ccdno);
1895 
1896 	/*
1897 	 * The BSP is never enabled in a conventional sense and therefore the
1898 	 * bit is reserved and left as 0. As the BSP should be in the first CCD,
1899 	 * we go through and OR back in the bit lest we think the thread isn't
1900 	 * enabled.
1901 	 */
1902 	if (ccdno == 0) {
1903 		thread_en |= 1;
1904 	}
1905 
1906 	ccd->atccd_phys_no = ccdno;
1907 	if (uarch >= X86_UARCH_AMD_ZEN3) {
1908 		pkg0_ids = B_TRUE;
1909 	} else {
1910 		ccd->atccd_flags |= AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN;
1911 		pkg0_ids = B_FALSE;
1912 
1913 		/*
1914 		 * Determine the CCD logical ID for Zen 2 now since this doesn't
1915 		 * rely upon needing a valid physical core.
1916 		 */
1917 		ccd->atccd_log_no = amdzen_ccd_log_id_zen2(azn, df, ent);
1918 		logccd_set = B_TRUE;
1919 	}
1920 
1921 	/*
1922 	 * To construct the APIC ID we need to know the socket and die (not CCD)
1923 	 * this is on. We deconstruct the CCD's fabric ID to determine that.
1924 	 */
1925 	zen_fabric_id_decompose(&df->adf_decomp, ent->adfe_fabric_id, &sockid,
1926 	    &dieid, &compid);
1927 
1928 	/*
1929 	 * At this point we have all the information about the CCD, the number
1930 	 * of CCX instances, and which physical cores and threads are enabled.
1931 	 * Currently we assume that if we have one CCX enabled, then it is
1932 	 * always CCX0. We cannot find evidence of a two CCX supporting part
1933 	 * that doesn't always ship with both CCXs present and enabled.
1934 	 */
1935 	ccd->atccd_nlog_ccx = ccd->atccd_nphys_ccx = nccx;
1936 	for (uint32_t ccxno = 0; ccxno < nccx; ccxno++) {
1937 		amdzen_topo_ccx_t *ccx = &ccd->atccd_ccx[ccxno];
1938 		const uint32_t core_mask = (1 << azn->azn_ncore_per_ccx) - 1;
1939 		const uint32_t core_shift = ccxno * azn->azn_ncore_per_ccx;
1940 		const uint32_t ccx_core_en = (core_en >> core_shift) &
1941 		    core_mask;
1942 		boolean_t logccx_set = B_FALSE;
1943 
1944 		ccd->atccd_ccx_en[ccxno] = 1;
1945 		ccx->atccx_phys_no = ccxno;
1946 		ccx->atccx_nphys_cores = azn->azn_ncore_per_ccx;
1947 		ccx->atccx_nlog_cores = nlcore_per_ccx;
1948 
1949 		if (!pkg0_ids) {
1950 			ccx->atccx_log_no = ccx->atccx_phys_no;
1951 			logccx_set = B_TRUE;
1952 		}
1953 
1954 		for (uint32_t coreno = 0, logcorezen2 = 0;
1955 		    coreno < azn->azn_ncore_per_ccx; coreno++) {
1956 			amdzen_topo_core_t *core = &ccx->atccx_cores[coreno];
1957 
1958 			if ((ccx_core_en & (1 << coreno)) == 0) {
1959 				continue;
1960 			}
1961 
1962 			ccx->atccx_core_en[coreno] = 1;
1963 			core->atcore_phys_no = coreno;
1964 
1965 			/*
1966 			 * Now that we have the physical core number present, we
1967 			 * must determine the logical core number and fill out
1968 			 * the logical CCX/CCD if it has not been set. We must
1969 			 * do this before we attempt to look at which threads
1970 			 * are enabled, because that operates based upon logical
1971 			 * core number.
1972 			 *
1973 			 * For Zen 2 we do not have INITPKG0 at our disposal. We
1974 			 * currently assume (and tag for userland with the
1975 			 * AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN flag) that we are
1976 			 * mapping logical cores to physicals in the order of
1977 			 * appearance.
1978 			 */
1979 			if (pkg0_ids) {
1980 				amdzen_ccd_fill_core_initpkg0(azn, df, ccd, ccx,
1981 				    core, &logccd_set, &logccx_set);
1982 			} else {
1983 				core->atcore_log_no = logcorezen2;
1984 				logcorezen2++;
1985 			}
1986 
1987 			/*
1988 			 * Determining which bits to use for the thread is a bit
1989 			 * weird here. Thread IDs within a CCX are logical, but
1990 			 * there are always physically spaced CCX sizes. See the
1991 			 * comment at the definition for SMU::PWR::THREAD_ENABLE
1992 			 * for more information.
1993 			 */
1994 			const uint32_t thread_shift = (ccx->atccx_nphys_cores *
1995 			    ccx->atccx_log_no + core->atcore_log_no) *
1996 			    nthreads_per_core;
1997 			const uint32_t thread_mask = (nthreads_per_core << 1) -
1998 			    1;
1999 			const uint32_t core_thread_en = (thread_en >>
2000 			    thread_shift) & thread_mask;
2001 			core->atcore_nthreads = nthreads_per_core;
2002 			core->atcore_thr_en[0] = core_thread_en & 0x01;
2003 			core->atcore_thr_en[1] = core_thread_en & 0x02;
2004 #ifdef	DEBUG
2005 			if (nthreads_per_core == 1) {
2006 				VERIFY0(core->atcore_thr_en[1]);
2007 			}
2008 #endif
2009 			for (uint32_t thrno = 0; thrno < core->atcore_nthreads;
2010 			    thrno++) {
2011 				ASSERT3U(core->atcore_thr_en[thrno], !=, 0);
2012 
2013 				zen_apic_id_compose(&azn->azn_apic_decomp,
2014 				    sockid, dieid, ccd->atccd_log_no,
2015 				    ccx->atccx_log_no, core->atcore_log_no,
2016 				    thrno, &core->atcore_apicids[thrno]);
2017 
2018 			}
2019 		}
2020 
2021 		ASSERT3U(logccx_set, ==, B_TRUE);
2022 		ASSERT3U(logccd_set, ==, B_TRUE);
2023 	}
2024 }
2025 
2026 static void
amdzen_nexus_init(void * arg)2027 amdzen_nexus_init(void *arg)
2028 {
2029 	uint_t i;
2030 	amdzen_t *azn = arg;
2031 
2032 	/*
2033 	 * Assign the requisite identifying information for this CPU.
2034 	 */
2035 	azn->azn_uarchrev = cpuid_getuarchrev(CPU);
2036 	azn->azn_chiprev = cpuid_getchiprev(CPU);
2037 
2038 	/*
2039 	 * Go through all of the stubs and assign the DF entries.
2040 	 */
2041 	mutex_enter(&azn->azn_mutex);
2042 	if (!amdzen_map_dfs(azn) || !amdzen_check_dfs(azn)) {
2043 		azn->azn_flags |= AMDZEN_F_MAP_ERROR;
2044 		goto done;
2045 	}
2046 
2047 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
2048 		amdzen_df_t *df = &azn->azn_dfs[i];
2049 
2050 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0)
2051 			continue;
2052 		amdzen_setup_df(azn, df);
2053 		amdzen_find_nb(azn, df);
2054 	}
2055 
2056 	amdzen_determine_ncore_per_ccx(azn);
2057 
2058 	if (amdzen_determine_apic_decomp(azn)) {
2059 		azn->azn_flags |= AMDZEN_F_APIC_DECOMP_VALID;
2060 	}
2061 
2062 	/*
2063 	 * Not all children may be installed. As such, we do not treat the
2064 	 * failure of a child as fatal to the driver.
2065 	 */
2066 	mutex_exit(&azn->azn_mutex);
2067 	for (i = 0; i < ARRAY_SIZE(amdzen_children); i++) {
2068 		(void) amdzen_create_child(azn, &amdzen_children[i]);
2069 	}
2070 	mutex_enter(&azn->azn_mutex);
2071 
2072 done:
2073 	azn->azn_flags &= ~AMDZEN_F_ATTACH_DISPATCHED;
2074 	azn->azn_flags |= AMDZEN_F_ATTACH_COMPLETE;
2075 	azn->azn_taskqid = TASKQID_INVALID;
2076 	cv_broadcast(&azn->azn_cv);
2077 	mutex_exit(&azn->azn_mutex);
2078 }
2079 
2080 static int
amdzen_stub_scan_cb(dev_info_t * dip,void * arg)2081 amdzen_stub_scan_cb(dev_info_t *dip, void *arg)
2082 {
2083 	amdzen_t *azn = arg;
2084 	uint16_t vid, did;
2085 	int *regs;
2086 	uint_t nregs, i;
2087 	boolean_t match = B_FALSE;
2088 
2089 	if (dip == ddi_root_node()) {
2090 		return (DDI_WALK_CONTINUE);
2091 	}
2092 
2093 	/*
2094 	 * If a node in question is not a pci node, then we have no interest in
2095 	 * it as all the stubs that we care about are related to pci devices.
2096 	 */
2097 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
2098 		return (DDI_WALK_PRUNECHILD);
2099 	}
2100 
2101 	/*
2102 	 * If we can't get a device or vendor ID and prove that this is an AMD
2103 	 * part, then we don't care about it.
2104 	 */
2105 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2106 	    "vendor-id", PCI_EINVAL16);
2107 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2108 	    "device-id", PCI_EINVAL16);
2109 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
2110 		return (DDI_WALK_CONTINUE);
2111 	}
2112 
2113 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
2114 		return (DDI_WALK_CONTINUE);
2115 	}
2116 
2117 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
2118 		if (amdzen_nb_ids[i] == did) {
2119 			match = B_TRUE;
2120 		}
2121 	}
2122 
2123 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2124 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
2125 		return (DDI_WALK_CONTINUE);
2126 	}
2127 
2128 	if (nregs == 0) {
2129 		ddi_prop_free(regs);
2130 		return (DDI_WALK_CONTINUE);
2131 	}
2132 
2133 	if (PCI_REG_BUS_G(regs[0]) == AMDZEN_DF_BUSNO &&
2134 	    PCI_REG_DEV_G(regs[0]) >= AMDZEN_DF_FIRST_DEVICE) {
2135 		match = B_TRUE;
2136 	}
2137 
2138 	ddi_prop_free(regs);
2139 	if (match) {
2140 		mutex_enter(&azn->azn_mutex);
2141 		azn->azn_nscanned++;
2142 		mutex_exit(&azn->azn_mutex);
2143 	}
2144 
2145 	return (DDI_WALK_CONTINUE);
2146 }
2147 
2148 static void
amdzen_stub_scan(void * arg)2149 amdzen_stub_scan(void *arg)
2150 {
2151 	amdzen_t *azn = arg;
2152 
2153 	mutex_enter(&azn->azn_mutex);
2154 	azn->azn_nscanned = 0;
2155 	mutex_exit(&azn->azn_mutex);
2156 
2157 	ddi_walk_devs(ddi_root_node(), amdzen_stub_scan_cb, azn);
2158 
2159 	mutex_enter(&azn->azn_mutex);
2160 	azn->azn_flags &= ~AMDZEN_F_SCAN_DISPATCHED;
2161 	azn->azn_flags |= AMDZEN_F_SCAN_COMPLETE;
2162 
2163 	if (azn->azn_nscanned == 0) {
2164 		azn->azn_flags |= AMDZEN_F_UNSUPPORTED;
2165 		azn->azn_taskqid = TASKQID_INVALID;
2166 		cv_broadcast(&azn->azn_cv);
2167 	} else if (azn->azn_npresent == azn->azn_nscanned) {
2168 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
2169 		azn->azn_taskqid = taskq_dispatch(system_taskq,
2170 		    amdzen_nexus_init, azn, TQ_SLEEP);
2171 	}
2172 	mutex_exit(&azn->azn_mutex);
2173 }
2174 
2175 /*
2176  * Unfortunately we can't really let the stubs detach as we may need them to be
2177  * available for client operations. We may be able to improve this if we know
2178  * that the actual nexus is going away. However, as long as it's active, we need
2179  * all the stubs.
2180  */
2181 int
amdzen_detach_stub(dev_info_t * dip,ddi_detach_cmd_t cmd)2182 amdzen_detach_stub(dev_info_t *dip, ddi_detach_cmd_t cmd)
2183 {
2184 	if (cmd == DDI_SUSPEND) {
2185 		return (DDI_SUCCESS);
2186 	}
2187 
2188 	return (DDI_FAILURE);
2189 }
2190 
2191 int
amdzen_attach_stub(dev_info_t * dip,ddi_attach_cmd_t cmd)2192 amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd)
2193 {
2194 	int *regs, reg;
2195 	uint_t nregs, i;
2196 	uint16_t vid, did;
2197 	amdzen_stub_t *stub;
2198 	amdzen_t *azn = amdzen_data;
2199 	boolean_t valid = B_FALSE;
2200 	boolean_t nb = B_FALSE;
2201 
2202 	if (cmd == DDI_RESUME) {
2203 		return (DDI_SUCCESS);
2204 	} else if (cmd != DDI_ATTACH) {
2205 		return (DDI_FAILURE);
2206 	}
2207 
2208 	/*
2209 	 * Make sure that the stub that we've been asked to attach is a pci type
2210 	 * device. If not, then there is no reason for us to proceed.
2211 	 */
2212 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
2213 		dev_err(dip, CE_WARN, "asked to attach a bad AMD Zen nexus "
2214 		    "stub: %s", ddi_get_name(dip));
2215 		return (DDI_FAILURE);
2216 	}
2217 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2218 	    "vendor-id", PCI_EINVAL16);
2219 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2220 	    "device-id", PCI_EINVAL16);
2221 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
2222 		dev_err(dip, CE_WARN, "failed to get PCI ID properties");
2223 		return (DDI_FAILURE);
2224 	}
2225 
2226 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
2227 		dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x",
2228 		    cpuid_getvendor(CPU) == X86_VENDOR_HYGON ?
2229 		    AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid);
2230 		return (DDI_FAILURE);
2231 	}
2232 
2233 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2234 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
2235 		dev_err(dip, CE_WARN, "failed to get 'reg' property");
2236 		return (DDI_FAILURE);
2237 	}
2238 
2239 	if (nregs == 0) {
2240 		ddi_prop_free(regs);
2241 		dev_err(dip, CE_WARN, "missing 'reg' property values");
2242 		return (DDI_FAILURE);
2243 	}
2244 	reg = *regs;
2245 	ddi_prop_free(regs);
2246 
2247 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
2248 		if (amdzen_nb_ids[i] == did) {
2249 			valid = B_TRUE;
2250 			nb = B_TRUE;
2251 		}
2252 	}
2253 
2254 	if (!valid && PCI_REG_BUS_G(reg) == AMDZEN_DF_BUSNO &&
2255 	    PCI_REG_DEV_G(reg) >= AMDZEN_DF_FIRST_DEVICE) {
2256 		valid = B_TRUE;
2257 		nb = B_FALSE;
2258 	}
2259 
2260 	if (!valid) {
2261 		dev_err(dip, CE_WARN, "device %s didn't match the nexus list",
2262 		    ddi_get_name(dip));
2263 		return (DDI_FAILURE);
2264 	}
2265 
2266 	stub = kmem_alloc(sizeof (amdzen_stub_t), KM_SLEEP);
2267 	if (pci_config_setup(dip, &stub->azns_cfgspace) != DDI_SUCCESS) {
2268 		dev_err(dip, CE_WARN, "failed to set up config space");
2269 		kmem_free(stub, sizeof (amdzen_stub_t));
2270 		return (DDI_FAILURE);
2271 	}
2272 
2273 	stub->azns_dip = dip;
2274 	stub->azns_vid = vid;
2275 	stub->azns_did = did;
2276 	stub->azns_bus = PCI_REG_BUS_G(reg);
2277 	stub->azns_dev = PCI_REG_DEV_G(reg);
2278 	stub->azns_func = PCI_REG_FUNC_G(reg);
2279 	ddi_set_driver_private(dip, stub);
2280 
2281 	mutex_enter(&azn->azn_mutex);
2282 	azn->azn_npresent++;
2283 	if (nb) {
2284 		list_insert_tail(&azn->azn_nb_stubs, stub);
2285 	} else {
2286 		list_insert_tail(&azn->azn_df_stubs, stub);
2287 	}
2288 
2289 	if ((azn->azn_flags & AMDZEN_F_TASKQ_MASK) == AMDZEN_F_SCAN_COMPLETE &&
2290 	    azn->azn_nscanned == azn->azn_npresent) {
2291 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
2292 		azn->azn_taskqid = taskq_dispatch(system_taskq,
2293 		    amdzen_nexus_init, azn, TQ_SLEEP);
2294 	}
2295 	mutex_exit(&azn->azn_mutex);
2296 
2297 	return (DDI_SUCCESS);
2298 }
2299 
2300 static int
amdzen_bus_ctl(dev_info_t * dip,dev_info_t * rdip,ddi_ctl_enum_t ctlop,void * arg,void * result)2301 amdzen_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
2302     void *arg, void *result)
2303 {
2304 	char buf[32];
2305 	dev_info_t *child;
2306 	const amdzen_child_data_t *acd;
2307 
2308 	switch (ctlop) {
2309 	case DDI_CTLOPS_REPORTDEV:
2310 		if (rdip == NULL) {
2311 			return (DDI_FAILURE);
2312 		}
2313 		cmn_err(CE_CONT, "amdzen nexus: %s@%s, %s%d\n",
2314 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
2315 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
2316 		break;
2317 	case DDI_CTLOPS_INITCHILD:
2318 		child = arg;
2319 		if (child == NULL) {
2320 			dev_err(dip, CE_WARN, "!no child passed for "
2321 			    "DDI_CTLOPS_INITCHILD");
2322 		}
2323 
2324 		acd = ddi_get_parent_data(child);
2325 		if (acd == NULL) {
2326 			dev_err(dip, CE_WARN, "!missing child parent data");
2327 			return (DDI_FAILURE);
2328 		}
2329 
2330 		if (snprintf(buf, sizeof (buf), "%d", acd->acd_addr) >=
2331 		    sizeof (buf)) {
2332 			dev_err(dip, CE_WARN, "!failed to construct device "
2333 			    "addr due to overflow");
2334 			return (DDI_FAILURE);
2335 		}
2336 
2337 		ddi_set_name_addr(child, buf);
2338 		break;
2339 	case DDI_CTLOPS_UNINITCHILD:
2340 		child = arg;
2341 		if (child == NULL) {
2342 			dev_err(dip, CE_WARN, "!no child passed for "
2343 			    "DDI_CTLOPS_UNINITCHILD");
2344 		}
2345 
2346 		ddi_set_name_addr(child, NULL);
2347 		break;
2348 	default:
2349 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
2350 	}
2351 	return (DDI_SUCCESS);
2352 }
2353 
2354 static int
amdzen_topo_open(dev_t * devp,int flag,int otyp,cred_t * credp)2355 amdzen_topo_open(dev_t *devp, int flag, int otyp, cred_t *credp)
2356 {
2357 	minor_t m;
2358 	amdzen_t *azn = amdzen_data;
2359 
2360 	if (crgetzoneid(credp) != GLOBAL_ZONEID ||
2361 	    secpolicy_sys_config(credp, B_FALSE) != 0) {
2362 		return (EPERM);
2363 	}
2364 
2365 	if ((flag & (FEXCL | FNDELAY | FNONBLOCK)) != 0) {
2366 		return (EINVAL);
2367 	}
2368 
2369 	if (otyp != OTYP_CHR) {
2370 		return (EINVAL);
2371 	}
2372 
2373 	m = getminor(*devp);
2374 	if (m != AMDZEN_MINOR_TOPO) {
2375 		return (ENXIO);
2376 	}
2377 
2378 	mutex_enter(&azn->azn_mutex);
2379 	if ((azn->azn_flags & AMDZEN_F_IOCTL_MASK) !=
2380 	    AMDZEN_F_ATTACH_COMPLETE) {
2381 		mutex_exit(&azn->azn_mutex);
2382 		return (ENOTSUP);
2383 	}
2384 	mutex_exit(&azn->azn_mutex);
2385 
2386 	return (0);
2387 }
2388 
2389 static int
amdzen_topo_ioctl_base(amdzen_t * azn,intptr_t arg,int mode)2390 amdzen_topo_ioctl_base(amdzen_t *azn, intptr_t arg, int mode)
2391 {
2392 	amdzen_topo_base_t base;
2393 
2394 	bzero(&base, sizeof (base));
2395 	mutex_enter(&azn->azn_mutex);
2396 	base.atb_ndf = azn->azn_ndfs;
2397 
2398 	if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) {
2399 		mutex_exit(&azn->azn_mutex);
2400 		return (ENOTSUP);
2401 	}
2402 
2403 	base.atb_apic_decomp = azn->azn_apic_decomp;
2404 	for (uint_t i = 0; i < azn->azn_ndfs; i++) {
2405 		const amdzen_df_t *df = &azn->azn_dfs[i];
2406 
2407 		base.atb_maxdfent = MAX(base.atb_maxdfent, df->adf_nents);
2408 		if (i == 0) {
2409 			base.atb_rev = df->adf_rev;
2410 			base.atb_df_decomp = df->adf_decomp;
2411 		}
2412 	}
2413 	mutex_exit(&azn->azn_mutex);
2414 
2415 	if (ddi_copyout(&base, (void *)(uintptr_t)arg, sizeof (base),
2416 	    mode & FKIOCTL) != 0) {
2417 		return (EFAULT);
2418 	}
2419 
2420 	return (0);
2421 }
2422 
2423 /*
2424  * Fill in the peers. We only have this information prior to DF 4D2.  The way we
2425  * do is this is to just fill in all the entries and then zero out the ones that
2426  * aren't valid.
2427  */
2428 static void
amdzen_topo_ioctl_df_fill_peers(const amdzen_df_t * df,const amdzen_df_ent_t * ent,amdzen_topo_df_ent_t * topo_ent)2429 amdzen_topo_ioctl_df_fill_peers(const amdzen_df_t *df,
2430     const amdzen_df_ent_t *ent, amdzen_topo_df_ent_t *topo_ent)
2431 {
2432 	topo_ent->atde_npeers = DF_FBIINFO0_GET_FTI_PCNT(ent->adfe_info0);
2433 
2434 	if (df->adf_rev >= DF_REV_4D2) {
2435 		bzero(topo_ent->atde_peers, sizeof (topo_ent->atde_npeers));
2436 		return;
2437 	}
2438 
2439 	topo_ent->atde_peers[0] = DF_FBINFO1_GET_FTI0_NINSTID(ent->adfe_info1);
2440 	topo_ent->atde_peers[1] = DF_FBINFO1_GET_FTI1_NINSTID(ent->adfe_info1);
2441 	topo_ent->atde_peers[2] = DF_FBINFO1_GET_FTI2_NINSTID(ent->adfe_info1);
2442 	topo_ent->atde_peers[3] = DF_FBINFO1_GET_FTI3_NINSTID(ent->adfe_info1);
2443 	topo_ent->atde_peers[4] = DF_FBINFO2_GET_FTI4_NINSTID(ent->adfe_info2);
2444 	topo_ent->atde_peers[5] = DF_FBINFO2_GET_FTI5_NINSTID(ent->adfe_info2);
2445 
2446 	for (uint32_t i = topo_ent->atde_npeers; i < AMDZEN_TOPO_DF_MAX_PEERS;
2447 	    i++) {
2448 		topo_ent->atde_peers[i] = 0;
2449 	}
2450 }
2451 
2452 static void
amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t * ent,amdzen_topo_df_ent_t * topo_ent)2453 amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t *ent,
2454     amdzen_topo_df_ent_t *topo_ent)
2455 {
2456 	const amdzen_ccm_data_t *ccm = &ent->adfe_data.aded_ccm;
2457 	amdzen_topo_ccm_data_t *topo_ccm = &topo_ent->atde_data.atded_ccm;
2458 
2459 	topo_ccm->atcd_nccds = ccm->acd_nccds;
2460 	for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) {
2461 		topo_ccm->atcd_ccd_en[i] = ccm->acd_ccd_en[i];
2462 		topo_ccm->atcd_ccd_ids[i] = ccm->acd_ccd_id[i];
2463 	}
2464 }
2465 
2466 static int
amdzen_topo_ioctl_df(amdzen_t * azn,intptr_t arg,int mode)2467 amdzen_topo_ioctl_df(amdzen_t *azn, intptr_t arg, int mode)
2468 {
2469 	uint_t model;
2470 	uint32_t max_ents, nwritten;
2471 	const amdzen_df_t *df;
2472 	amdzen_topo_df_t topo_df;
2473 #ifdef	_MULTI_DATAMODEL
2474 	amdzen_topo_df32_t topo_df32;
2475 #endif
2476 
2477 	model = ddi_model_convert_from(mode);
2478 	switch (model) {
2479 #ifdef	_MULTI_DATAMODEL
2480 	case DDI_MODEL_ILP32:
2481 		if (ddi_copyin((void *)(uintptr_t)arg, &topo_df32,
2482 		    sizeof (topo_df32), mode & FKIOCTL) != 0) {
2483 			return (EFAULT);
2484 		}
2485 		bzero(&topo_df, sizeof (topo_df));
2486 		topo_df.atd_dfno = topo_df32.atd_dfno;
2487 		topo_df.atd_df_buf_nents = topo_df32.atd_df_buf_nents;
2488 		topo_df.atd_df_ents = (void *)(uintptr_t)topo_df32.atd_df_ents;
2489 		break;
2490 #endif
2491 	case DDI_MODEL_NONE:
2492 		if (ddi_copyin((void *)(uintptr_t)arg, &topo_df,
2493 		    sizeof (topo_df), mode & FKIOCTL) != 0) {
2494 			return (EFAULT);
2495 		}
2496 		break;
2497 	default:
2498 		return (ENOTSUP);
2499 	}
2500 
2501 	mutex_enter(&azn->azn_mutex);
2502 	if (topo_df.atd_dfno >= azn->azn_ndfs) {
2503 		mutex_exit(&azn->azn_mutex);
2504 		return (EINVAL);
2505 	}
2506 
2507 	df = &azn->azn_dfs[topo_df.atd_dfno];
2508 	topo_df.atd_nodeid = df->adf_nodeid;
2509 	topo_df.atd_sockid = (df->adf_nodeid & df->adf_decomp.dfd_sock_mask) >>
2510 	    df->adf_decomp.dfd_sock_shift;
2511 	topo_df.atd_dieid = (df->adf_nodeid & df->adf_decomp.dfd_die_mask) >>
2512 	    df->adf_decomp.dfd_die_shift;
2513 	topo_df.atd_rev = df->adf_rev;
2514 	topo_df.atd_major = df->adf_major;
2515 	topo_df.atd_minor = df->adf_minor;
2516 	topo_df.atd_df_act_nents = df->adf_nents;
2517 	max_ents = MIN(topo_df.atd_df_buf_nents, df->adf_nents);
2518 
2519 	if (topo_df.atd_df_ents == NULL) {
2520 		topo_df.atd_df_buf_nvalid = 0;
2521 		mutex_exit(&azn->azn_mutex);
2522 		goto copyout;
2523 	}
2524 
2525 	nwritten = 0;
2526 	for (uint32_t i = 0; i < max_ents; i++) {
2527 		amdzen_topo_df_ent_t topo_ent;
2528 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
2529 
2530 		/*
2531 		 * We opt not to include disabled elements right now. They
2532 		 * generally don't have a valid type and there isn't much useful
2533 		 * information we can get from them. This can be changed if we
2534 		 * find a use case for them for userland topo.
2535 		 */
2536 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
2537 			continue;
2538 
2539 		bzero(&topo_ent, sizeof (topo_ent));
2540 		topo_ent.atde_type = ent->adfe_type;
2541 		topo_ent.atde_subtype = ent->adfe_subtype;
2542 		topo_ent.atde_fabric_id = ent->adfe_fabric_id;
2543 		topo_ent.atde_inst_id = ent->adfe_inst_id;
2544 		amdzen_topo_ioctl_df_fill_peers(df, ent, &topo_ent);
2545 
2546 		if (amdzen_dfe_is_ccm(df, ent)) {
2547 			amdzen_topo_ioctl_df_fill_ccm(ent, &topo_ent);
2548 		}
2549 
2550 		if (ddi_copyout(&topo_ent, &topo_df.atd_df_ents[nwritten],
2551 		    sizeof (topo_ent), mode & FKIOCTL) != 0) {
2552 			mutex_exit(&azn->azn_mutex);
2553 			return (EFAULT);
2554 		}
2555 		nwritten++;
2556 	}
2557 	mutex_exit(&azn->azn_mutex);
2558 
2559 	topo_df.atd_df_buf_nvalid = nwritten;
2560 copyout:
2561 	switch (model) {
2562 #ifdef	_MULTI_DATAMODEL
2563 	case DDI_MODEL_ILP32:
2564 		topo_df32.atd_nodeid = topo_df.atd_nodeid;
2565 		topo_df32.atd_sockid = topo_df.atd_sockid;
2566 		topo_df32.atd_dieid = topo_df.atd_dieid;
2567 		topo_df32.atd_rev = topo_df.atd_rev;
2568 		topo_df32.atd_major = topo_df.atd_major;
2569 		topo_df32.atd_minor = topo_df.atd_minor;
2570 		topo_df32.atd_df_buf_nvalid = topo_df.atd_df_buf_nvalid;
2571 		topo_df32.atd_df_act_nents = topo_df.atd_df_act_nents;
2572 
2573 		if (ddi_copyout(&topo_df32, (void *)(uintptr_t)arg,
2574 		    sizeof (topo_df32), mode & FKIOCTL) != 0) {
2575 			return (EFAULT);
2576 		}
2577 		break;
2578 #endif
2579 	case DDI_MODEL_NONE:
2580 		if (ddi_copyout(&topo_df, (void *)(uintptr_t)arg,
2581 		    sizeof (topo_df), mode & FKIOCTL) != 0) {
2582 			return (EFAULT);
2583 		}
2584 		break;
2585 	default:
2586 		break;
2587 	}
2588 
2589 
2590 	return (0);
2591 }
2592 
2593 static int
amdzen_topo_ioctl_ccd(amdzen_t * azn,intptr_t arg,int mode)2594 amdzen_topo_ioctl_ccd(amdzen_t *azn, intptr_t arg, int mode)
2595 {
2596 	amdzen_topo_ccd_t ccd, *ccdp;
2597 	amdzen_df_t *df;
2598 	amdzen_df_ent_t *ent;
2599 	amdzen_ccm_data_t *ccm;
2600 	uint32_t ccdno;
2601 	size_t copyin_size = offsetof(amdzen_topo_ccd_t, atccd_err);
2602 
2603 	/*
2604 	 * Only copy in the identifying information so that way we can ensure
2605 	 * the rest of the structure we return to the user doesn't contain
2606 	 * anything unexpected in it.
2607 	 */
2608 	bzero(&ccd, sizeof (ccd));
2609 	if (ddi_copyin((void *)(uintptr_t)arg, &ccd, copyin_size,
2610 	    mode & FKIOCTL) != 0) {
2611 		return (EFAULT);
2612 	}
2613 
2614 	mutex_enter(&azn->azn_mutex);
2615 	if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) {
2616 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NO_APIC_DECOMP;
2617 		goto copyout;
2618 	}
2619 
2620 	df = amdzen_df_find(azn, ccd.atccd_dfno);
2621 	if (df == NULL) {
2622 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_DFNO;
2623 		goto copyout;
2624 	}
2625 
2626 	/*
2627 	 * We don't have enough information to know how to construct this
2628 	 * information in Zen 1 at this time, so refuse.
2629 	 */
2630 	if (df->adf_rev <= DF_REV_2) {
2631 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_SOC_UNSUPPORTED;
2632 		goto copyout;
2633 	}
2634 
2635 	ent = amdzen_df_ent_find_by_instid(df, ccd.atccd_instid);
2636 	if (ent == NULL) {
2637 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_INSTID;
2638 		goto copyout;
2639 	}
2640 
2641 	if (!amdzen_dfe_is_ccm(df, ent)) {
2642 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD;
2643 		goto copyout;
2644 	}
2645 
2646 	ccm = &ent->adfe_data.aded_ccm;
2647 	for (ccdno = 0; ccdno < DF_MAX_CCDS_PER_CCM; ccdno++) {
2648 		if (ccm->acd_ccd_en[ccdno] != 0 &&
2649 		    ccm->acd_ccd_id[ccdno] == ccd.atccd_phys_no) {
2650 			break;
2651 		}
2652 	}
2653 
2654 	if (ccdno == DF_MAX_CCDS_PER_CCM) {
2655 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD;
2656 		goto copyout;
2657 	}
2658 
2659 	if (ccm->acd_ccd_data[ccdno] == NULL) {
2660 		/*
2661 		 * We don't actually have this data. Go fill it out and save it
2662 		 * for future use.
2663 		 */
2664 		ccdp = kmem_zalloc(sizeof (amdzen_topo_ccd_t), KM_NOSLEEP_LAZY);
2665 		if (ccdp == NULL) {
2666 			mutex_exit(&azn->azn_mutex);
2667 			return (ENOMEM);
2668 		}
2669 
2670 		ccdp->atccd_dfno = ccd.atccd_dfno;
2671 		ccdp->atccd_instid = ccd.atccd_instid;
2672 		ccdp->atccd_phys_no = ccd.atccd_phys_no;
2673 		amdzen_ccd_fill_topo(azn, df, ent, ccdp);
2674 		ccm->acd_ccd_data[ccdno] = ccdp;
2675 	}
2676 	ASSERT3P(ccm->acd_ccd_data[ccdno], !=, NULL);
2677 	bcopy(ccm->acd_ccd_data[ccdno], &ccd, sizeof (ccd));
2678 
2679 copyout:
2680 	mutex_exit(&azn->azn_mutex);
2681 	if (ddi_copyout(&ccd, (void *)(uintptr_t)arg, sizeof (ccd),
2682 	    mode & FKIOCTL) != 0) {
2683 		return (EFAULT);
2684 	}
2685 
2686 	return (0);
2687 }
2688 
2689 static int
amdzen_topo_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)2690 amdzen_topo_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
2691     cred_t *credp, int *rvalp)
2692 {
2693 	int ret;
2694 	amdzen_t *azn = amdzen_data;
2695 
2696 	if (getminor(dev) != AMDZEN_MINOR_TOPO) {
2697 		return (ENXIO);
2698 	}
2699 
2700 	if ((mode & FREAD) == 0) {
2701 		return (EBADF);
2702 	}
2703 
2704 	switch (cmd) {
2705 	case AMDZEN_TOPO_IOCTL_BASE:
2706 		ret = amdzen_topo_ioctl_base(azn, arg, mode);
2707 		break;
2708 	case AMDZEN_TOPO_IOCTL_DF:
2709 		ret = amdzen_topo_ioctl_df(azn, arg, mode);
2710 		break;
2711 	case AMDZEN_TOPO_IOCTL_CCD:
2712 		ret = amdzen_topo_ioctl_ccd(azn, arg, mode);
2713 		break;
2714 	default:
2715 		ret = ENOTTY;
2716 		break;
2717 	}
2718 
2719 	return (ret);
2720 }
2721 
2722 static int
amdzen_topo_close(dev_t dev,int flag,int otyp,cred_t * credp)2723 amdzen_topo_close(dev_t dev, int flag, int otyp, cred_t *credp)
2724 {
2725 	if (otyp != OTYP_CHR) {
2726 		return (EINVAL);
2727 	}
2728 
2729 	if (getminor(dev) != AMDZEN_MINOR_TOPO) {
2730 		return (ENXIO);
2731 	}
2732 
2733 	return (0);
2734 }
2735 
2736 static int
amdzen_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)2737 amdzen_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2738 {
2739 	amdzen_t *azn = amdzen_data;
2740 
2741 	if (cmd == DDI_RESUME) {
2742 		return (DDI_SUCCESS);
2743 	} else if (cmd != DDI_ATTACH) {
2744 		return (DDI_FAILURE);
2745 	}
2746 
2747 	mutex_enter(&azn->azn_mutex);
2748 	if (azn->azn_dip != NULL) {
2749 		dev_err(dip, CE_WARN, "driver is already attached!");
2750 		mutex_exit(&azn->azn_mutex);
2751 		return (DDI_FAILURE);
2752 	}
2753 
2754 	if (ddi_create_minor_node(dip, "topo", S_IFCHR, AMDZEN_MINOR_TOPO,
2755 	    DDI_PSEUDO, 0) != 0) {
2756 		dev_err(dip, CE_WARN, "failed to create topo minor node!");
2757 		mutex_exit(&azn->azn_mutex);
2758 		return (DDI_FAILURE);
2759 	}
2760 
2761 	azn->azn_dip = dip;
2762 	azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_stub_scan,
2763 	    azn, TQ_SLEEP);
2764 	azn->azn_flags |= AMDZEN_F_SCAN_DISPATCHED;
2765 	mutex_exit(&azn->azn_mutex);
2766 
2767 	return (DDI_SUCCESS);
2768 }
2769 
2770 static int
amdzen_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)2771 amdzen_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
2772 {
2773 	amdzen_t *azn = amdzen_data;
2774 
2775 	if (cmd == DDI_SUSPEND) {
2776 		return (DDI_SUCCESS);
2777 	} else if (cmd != DDI_DETACH) {
2778 		return (DDI_FAILURE);
2779 	}
2780 
2781 	mutex_enter(&azn->azn_mutex);
2782 	while (azn->azn_taskqid != TASKQID_INVALID) {
2783 		cv_wait(&azn->azn_cv, &azn->azn_mutex);
2784 	}
2785 
2786 	/*
2787 	 * If we've attached any stub drivers, e.g. this platform is important
2788 	 * for us, then we fail detach.
2789 	 */
2790 	if (!list_is_empty(&azn->azn_df_stubs) ||
2791 	    !list_is_empty(&azn->azn_nb_stubs)) {
2792 		mutex_exit(&azn->azn_mutex);
2793 		return (DDI_FAILURE);
2794 	}
2795 
2796 	ddi_remove_minor_node(azn->azn_dip, NULL);
2797 	azn->azn_dip = NULL;
2798 	mutex_exit(&azn->azn_mutex);
2799 
2800 	return (DDI_SUCCESS);
2801 }
2802 
2803 static void
amdzen_free(void)2804 amdzen_free(void)
2805 {
2806 	if (amdzen_data == NULL) {
2807 		return;
2808 	}
2809 
2810 	VERIFY(list_is_empty(&amdzen_data->azn_df_stubs));
2811 	list_destroy(&amdzen_data->azn_df_stubs);
2812 	VERIFY(list_is_empty(&amdzen_data->azn_nb_stubs));
2813 	list_destroy(&amdzen_data->azn_nb_stubs);
2814 	cv_destroy(&amdzen_data->azn_cv);
2815 	mutex_destroy(&amdzen_data->azn_mutex);
2816 	kmem_free(amdzen_data, sizeof (amdzen_t));
2817 	amdzen_data = NULL;
2818 }
2819 
2820 static void
amdzen_alloc(void)2821 amdzen_alloc(void)
2822 {
2823 	amdzen_data = kmem_zalloc(sizeof (amdzen_t), KM_SLEEP);
2824 	mutex_init(&amdzen_data->azn_mutex, NULL, MUTEX_DRIVER, NULL);
2825 	list_create(&amdzen_data->azn_df_stubs, sizeof (amdzen_stub_t),
2826 	    offsetof(amdzen_stub_t, azns_link));
2827 	list_create(&amdzen_data->azn_nb_stubs, sizeof (amdzen_stub_t),
2828 	    offsetof(amdzen_stub_t, azns_link));
2829 	cv_init(&amdzen_data->azn_cv, NULL, CV_DRIVER, NULL);
2830 }
2831 
2832 static struct cb_ops amdzen_topo_cb_ops = {
2833 	.cb_open = amdzen_topo_open,
2834 	.cb_close = amdzen_topo_close,
2835 	.cb_strategy = nodev,
2836 	.cb_print = nodev,
2837 	.cb_dump = nodev,
2838 	.cb_read = nodev,
2839 	.cb_write = nodev,
2840 	.cb_ioctl = amdzen_topo_ioctl,
2841 	.cb_devmap = nodev,
2842 	.cb_mmap = nodev,
2843 	.cb_segmap = nodev,
2844 	.cb_chpoll = nochpoll,
2845 	.cb_prop_op = ddi_prop_op,
2846 	.cb_flag = D_MP,
2847 	.cb_rev = CB_REV,
2848 	.cb_aread = nodev,
2849 	.cb_awrite = nodev
2850 };
2851 
2852 struct bus_ops amdzen_bus_ops = {
2853 	.busops_rev = BUSO_REV,
2854 	.bus_map = nullbusmap,
2855 	.bus_dma_map = ddi_no_dma_map,
2856 	.bus_dma_allochdl = ddi_no_dma_allochdl,
2857 	.bus_dma_freehdl = ddi_no_dma_freehdl,
2858 	.bus_dma_bindhdl = ddi_no_dma_bindhdl,
2859 	.bus_dma_unbindhdl = ddi_no_dma_unbindhdl,
2860 	.bus_dma_flush = ddi_no_dma_flush,
2861 	.bus_dma_win = ddi_no_dma_win,
2862 	.bus_dma_ctl = ddi_no_dma_mctl,
2863 	.bus_prop_op = ddi_bus_prop_op,
2864 	.bus_ctl = amdzen_bus_ctl
2865 };
2866 
2867 static struct dev_ops amdzen_dev_ops = {
2868 	.devo_rev = DEVO_REV,
2869 	.devo_refcnt = 0,
2870 	.devo_getinfo = nodev,
2871 	.devo_identify = nulldev,
2872 	.devo_probe = nulldev,
2873 	.devo_attach = amdzen_attach,
2874 	.devo_detach = amdzen_detach,
2875 	.devo_reset = nodev,
2876 	.devo_quiesce = ddi_quiesce_not_needed,
2877 	.devo_bus_ops = &amdzen_bus_ops,
2878 	.devo_cb_ops = &amdzen_topo_cb_ops
2879 };
2880 
2881 static struct modldrv amdzen_modldrv = {
2882 	.drv_modops = &mod_driverops,
2883 	.drv_linkinfo = "AMD Zen Nexus Driver",
2884 	.drv_dev_ops = &amdzen_dev_ops
2885 };
2886 
2887 static struct modlinkage amdzen_modlinkage = {
2888 	.ml_rev = MODREV_1,
2889 	.ml_linkage = { &amdzen_modldrv, NULL }
2890 };
2891 
2892 int
_init(void)2893 _init(void)
2894 {
2895 	int ret;
2896 
2897 	if (cpuid_getvendor(CPU) != X86_VENDOR_AMD &&
2898 	    cpuid_getvendor(CPU) != X86_VENDOR_HYGON) {
2899 		return (ENOTSUP);
2900 	}
2901 
2902 	if ((ret = mod_install(&amdzen_modlinkage)) == 0) {
2903 		amdzen_alloc();
2904 	}
2905 
2906 	return (ret);
2907 }
2908 
2909 int
_info(struct modinfo * modinfop)2910 _info(struct modinfo *modinfop)
2911 {
2912 	return (mod_info(&amdzen_modlinkage, modinfop));
2913 }
2914 
2915 int
_fini(void)2916 _fini(void)
2917 {
2918 	int ret;
2919 
2920 	if ((ret = mod_remove(&amdzen_modlinkage)) == 0) {
2921 		amdzen_free();
2922 	}
2923 
2924 	return (ret);
2925 }
2926