xref: /illumos-gate/usr/src/lib/fm/topo/modules/common/zen/topo_zen.c (revision e98d23eb02da9d91bda0f65ef0da3203adbbbf49)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2025 Oxide Computer Company
14  */
15 
16 /*
17  * This module implements a series of enumeration methods that tie into the
18  * amdzen(4D) nexus driver. This module is currently built out of the various
19  * x86 platform directories (though it'd be nice if we could just make this
20  * ISA-specific rather than platform-specific).
21  */
22 
23 #include <sys/fm/protocol.h>
24 #include <fm/topo_mod.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <errno.h>
29 #include <strings.h>
30 #include <unistd.h>
31 #include <sys/devfm.h>
32 #include <sys/x86_archext.h>
33 
34 #include "topo_zen_impl.h"
35 
36 /*
37  * This is the path to the device node that amdzen(4D) creates for us to ask it
38  * questions.
39  */
40 static const char *topo_zen_dev = "/devices/pseudo/amdzen@0:topo";
41 
42 static inline boolean_t
topo_zen_df_at_least(const amdzen_topo_df_t * df,uint8_t major,uint8_t minor)43 topo_zen_df_at_least(const amdzen_topo_df_t *df, uint8_t major, uint8_t minor)
44 {
45 	return (df->atd_major > major || (df->atd_major == major &&
46 	    df->atd_minor >= minor));
47 }
48 
49 /*
50  * Helper to determine whether or not a given DF entity's type is that of a CCM
51  * or not as this has changed across the various DF versions.
52  */
53 static boolean_t
topo_zen_fabric_is_ccm(const amdzen_topo_df_t * df,const amdzen_topo_df_ent_t * ent)54 topo_zen_fabric_is_ccm(const amdzen_topo_df_t *df,
55     const amdzen_topo_df_ent_t *ent)
56 {
57 	if (ent->atde_type != DF_TYPE_CCM) {
58 		return (B_FALSE);
59 	}
60 
61 	if (df->atd_rev >= DF_REV_4 && topo_zen_df_at_least(df, 4, 1)) {
62 		return (ent->atde_subtype == DF_CCM_SUBTYPE_CPU_V4P1);
63 	} else {
64 		return (ent->atde_subtype == DF_CCM_SUBTYPE_CPU_V2);
65 	}
66 }
67 
68 /*
69  * Clean up all data that is associated with an attempt to enumerate the socket.
70  * The structure itself is assumed to be on the stack or handled elsewhere. It
71  * must have been initialized prior to calling this. Don't give us stack
72  * garbage.
73  */
74 static void
topo_zen_enum_cleanup_sock(topo_mod_t * mod,zen_topo_enum_sock_t * sock)75 topo_zen_enum_cleanup_sock(topo_mod_t *mod, zen_topo_enum_sock_t *sock)
76 {
77 	if (sock->ztes_kstat != NULL) {
78 		(void) kstat_close(sock->ztes_kstat);
79 		sock->ztes_kstat = NULL;
80 	}
81 
82 	if (sock->ztes_cpus != NULL) {
83 		for (uint_t i = 0; i < sock->ztes_ncpus; i++) {
84 			nvlist_free(sock->ztes_cpus[i]);
85 		}
86 		umem_free(sock->ztes_cpus, sizeof (nvlist_t *) *
87 		    sock->ztes_ncpus);
88 		sock->ztes_cpus = NULL;
89 	}
90 
91 	if (sock->ztes_fm_agent != NULL) {
92 		fmd_agent_cache_info_free(sock->ztes_fm_agent,
93 		    &sock->ztes_cache);
94 		fmd_agent_close(sock->ztes_fm_agent);
95 		sock->ztes_fm_agent = NULL;
96 	}
97 
98 	if (sock->ztes_tn_ccd != NULL) {
99 		topo_mod_free(mod, sock->ztes_tn_ccd, sock->ztes_nccd *
100 		    sizeof (zen_topo_enum_ccd_t));
101 		sock->ztes_tn_ccd = NULL;
102 	}
103 
104 	if (sock->ztes_ccd != NULL) {
105 		topo_mod_free(mod, sock->ztes_ccd, sock->ztes_nccd *
106 		    sizeof (amdzen_topo_ccd_t));
107 		sock->ztes_ccd = NULL;
108 	}
109 }
110 
111 static int
topo_zen_enum_chip_gather_ccd(topo_mod_t * mod,const zen_topo_t * zen,zen_topo_enum_sock_t * sock,const amdzen_topo_df_ent_t * dfe,uint32_t ccdno,uint32_t phys_ccdno)112 topo_zen_enum_chip_gather_ccd(topo_mod_t *mod, const zen_topo_t *zen,
113     zen_topo_enum_sock_t *sock,
114     const amdzen_topo_df_ent_t *dfe, uint32_t ccdno, uint32_t phys_ccdno)
115 {
116 	amdzen_topo_ccd_t *ccd;
117 
118 	ccd = &sock->ztes_ccd[ccdno];
119 	ccd->atccd_dfno = sock->ztes_df->atd_dfno;
120 	ccd->atccd_instid = dfe->atde_inst_id;
121 	ccd->atccd_phys_no = phys_ccdno;
122 	if (ioctl(zen->zt_fd, AMDZEN_TOPO_IOCTL_CCD, ccd) != 0) {
123 		topo_mod_dprintf(mod, "failed to get CCD information "
124 		    "for DF/CCD 0x%x/0x%x: %s\n", sock->ztes_df->atd_dfno,
125 		    ccd->atccd_instid, strerror(errno));
126 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
127 	}
128 
129 	switch (ccd->atccd_err) {
130 	case AMDZEN_TOPO_CCD_E_OK:
131 		sock->ztes_nccd_valid++;
132 		break;
133 	/*
134 	 * We ignore errors about CCDs being missing. This is fine
135 	 * because on systems without a full CCD complement this will
136 	 * happen and is expected. We make sure we have at least one
137 	 * valid CCD before continuing.
138 	 */
139 	case AMDZEN_TOPO_CCD_E_CCD_MISSING:
140 		break;
141 	default:
142 		topo_mod_dprintf(mod, "DF CCM fabric 0x%x, CCD 0x%x "
143 		    "didn't give us valid info: found error 0x%x\n",
144 		    dfe->atde_fabric_id, phys_ccdno, ccd->atccd_err);
145 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
146 	}
147 
148 	return (0);
149 }
150 
151 
152 /*
153  * Go through all of our disparate sources and gather information that we'll
154  * need to process and perform enumeration. We need to gather the following
155  * disparate pieces of information:
156  *
157  * 1) We need to determine what's going on with all the CCDs and ask the
158  * amdzen(4D) driver for information.
159  *
160  * 2) We need to use the FM agent to ask /dev/fm to get all the CPU information
161  * for this system.
162  *
163  * 3) We use the same system to go get all the actual cache information for this
164  * system.
165  *
166  * 4) We grab some of the chip-wide information such as the socket and brand
167  * string information through kstats, with information about a valid CPU ID.
168  */
169 static int
topo_zen_enum_chip_gather(topo_mod_t * mod,const zen_topo_t * zen,const amdzen_topo_df_t * df,zen_topo_enum_sock_t * sock)170 topo_zen_enum_chip_gather(topo_mod_t *mod, const zen_topo_t *zen,
171     const amdzen_topo_df_t *df, zen_topo_enum_sock_t *sock)
172 {
173 	uint32_t nccd = 0;
174 
175 	sock->ztes_df = df;
176 	for (uint32_t i = 0; i < df->atd_df_buf_nvalid; i++) {
177 		const amdzen_topo_df_ent_t *dfe = &df->atd_df_ents[i];
178 		if (topo_zen_fabric_is_ccm(df, dfe)) {
179 			nccd += dfe->atde_data.atded_ccm.atcd_nccds;
180 		}
181 	}
182 
183 	if (nccd == 0) {
184 		topo_mod_dprintf(mod, "no CCDs found! Not much more we can "
185 		    "do... Something probably went wrong\n");
186 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
187 	}
188 
189 	sock->ztes_nccd = nccd;
190 	sock->ztes_ccd = topo_mod_zalloc(mod, sizeof (amdzen_topo_ccd_t) *
191 	    sock->ztes_nccd);
192 	if (sock->ztes_ccd == NULL) {
193 		topo_mod_dprintf(mod, "failed to allocate memory for "
194 		    "ztes_ccd[]\n");
195 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
196 	}
197 
198 	sock->ztes_tn_ccd = topo_mod_zalloc(mod, sizeof (zen_topo_enum_ccd_t) *
199 	    sock->ztes_nccd);
200 
201 	for (uint32_t i = 0, ccdno = 0; i < df->atd_df_buf_nvalid; i++) {
202 		const amdzen_topo_df_ent_t *dfe = &df->atd_df_ents[i];
203 		const amdzen_topo_ccm_data_t *ccm;
204 
205 		if (!topo_zen_fabric_is_ccm(df, dfe)) {
206 			continue;
207 		}
208 
209 		ccm = &dfe->atde_data.atded_ccm;
210 		for (uint32_t ccm_ccdno = 0; ccm_ccdno < ccm->atcd_nccds;
211 		    ccm_ccdno++) {
212 			if (ccm->atcd_ccd_en[ccm_ccdno] == 0) {
213 				continue;
214 			}
215 
216 			if (topo_zen_enum_chip_gather_ccd(mod, zen, sock, dfe,
217 			    ccdno, ccm->atcd_ccd_ids[ccm_ccdno]) != 0) {
218 				return (-1);
219 			}
220 
221 			ccdno++;
222 		}
223 	}
224 
225 	topo_mod_dprintf(mod, "found %u CCDs\n", sock->ztes_nccd_valid);
226 	if (sock->ztes_nccd_valid == 0) {
227 		topo_mod_dprintf(mod, "somehow we ended up with no CCDs with "
228 		    "valid topo information. Something went very wrong.\n");
229 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
230 	}
231 
232 	sock->ztes_fm_agent = fmd_agent_open(FMD_AGENT_VERSION);
233 	if (sock->ztes_fm_agent == NULL) {
234 		topo_mod_dprintf(mod, "failed to open FMD agent: %s\n",
235 		    strerror(errno));
236 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
237 	}
238 
239 	if (fmd_agent_physcpu_info(sock->ztes_fm_agent, &sock->ztes_cpus,
240 	    &sock->ztes_ncpus) != 0) {
241 		topo_mod_dprintf(mod, "failed to get FM agent CPU "
242 		    "information: %s\n",
243 		    strerror(fmd_agent_errno(sock->ztes_fm_agent)));
244 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
245 	}
246 
247 	topo_mod_dprintf(mod, "got %u CPUs worth of data from the FM agent\n",
248 	    sock->ztes_ncpus);
249 
250 	if (fmd_agent_cache_info(sock->ztes_fm_agent, &sock->ztes_cache) != 0) {
251 		topo_mod_dprintf(mod, "failed to get FM agent cache "
252 		    "information: %s\n",
253 		    strerror(fmd_agent_errno(sock->ztes_fm_agent)));
254 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
255 	}
256 
257 	if (sock->ztes_cache.fmc_ncpus != sock->ztes_ncpus) {
258 		topo_mod_dprintf(mod, "/dev/fm gave us %u CPUs, but %u CPUs "
259 		    "for cache information: cannot continue\n",
260 		    sock->ztes_ncpus, sock->ztes_cache.fmc_ncpus);
261 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
262 	}
263 
264 	sock->ztes_kstat = kstat_open();
265 	if (sock->ztes_kstat == NULL) {
266 		topo_mod_dprintf(mod, "failed to open kstat driver: %s\n",
267 		    strerror(errno));
268 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
269 	}
270 
271 	return (0);
272 }
273 
274 typedef enum {
275 	ZEN_TOPO_CACHE_UNKNOWN,
276 	ZEN_TOPO_CACHE_CORE_L1D,
277 	ZEN_TOPO_CACHE_CORE_L1I,
278 	ZEN_TOPO_CACHE_CORE_L2,
279 	ZEN_TOPO_CACHE_CCX_L3
280 } zen_topo_cache_type_t;
281 
282 typedef struct {
283 	uint32_t		ztcm_level;
284 	fm_cache_info_type_t	ztcm_type;
285 	boolean_t		ztcm_core;
286 	zen_topo_cache_type_t	ztcm_cache;
287 } zen_topo_cache_map_t;
288 
289 const zen_topo_cache_map_t zen_topo_cache_map[] = {
290 	{ 1, FM_CACHE_INFO_T_DATA, B_TRUE, ZEN_TOPO_CACHE_CORE_L1D },
291 	{ 1, FM_CACHE_INFO_T_INSTR, B_TRUE, ZEN_TOPO_CACHE_CORE_L1I },
292 	{ 2, FM_CACHE_INFO_T_DATA | FM_CACHE_INFO_T_INSTR |
293 	    FM_CACHE_INFO_T_UNIFIED, B_TRUE, ZEN_TOPO_CACHE_CORE_L2 },
294 	{ 3, FM_CACHE_INFO_T_DATA | FM_CACHE_INFO_T_INSTR |
295 	    FM_CACHE_INFO_T_UNIFIED, B_FALSE, ZEN_TOPO_CACHE_CCX_L3 }
296 };
297 
298 static zen_topo_cache_type_t
zen_topo_determine_cache(topo_mod_t * mod,uint32_t level,uint32_t type,uint32_t shift)299 zen_topo_determine_cache(topo_mod_t *mod, uint32_t level, uint32_t type,
300     uint32_t shift)
301 {
302 	for (size_t i = 0; i < ARRAY_SIZE(zen_topo_cache_map); i++) {
303 		const zen_topo_cache_map_t *map = &zen_topo_cache_map[i];
304 
305 		if (map->ztcm_level == level && map->ztcm_type == type) {
306 			return (map->ztcm_cache);
307 		}
308 	}
309 
310 	return (ZEN_TOPO_CACHE_UNKNOWN);
311 }
312 
313 /*
314  * We have mapped a logical CPU to a position in the hierarchy. We must now walk
315  * its caches and attempt to install them up the chain. We assume that there
316  * there are four caches right now: an L1i, L1d, L2, and L3 cache.
317  *
318  * Note, AMD has mixed designs with 1 CCX and 2 CCXs. When there is only 1 CCX
319  * then we often describe the CCX and CCD as equivalent though if you look at
320  * the PPR it describes each CCD as having a single CCX. This is why the L3
321  * cache lives on the CCX right now.
322  *
323  * Historically we tried to leverage the APIC shift information that the kernel
324  * provides around the number of CPUs that shared a cache and map that to the
325  * APIC ID decomposition information that we had. Unfortunately, this heuristic
326  * was useful, but inaccurate. In particular the CPUID interface gives us a
327  * count of logical CPUs that share something. If you had less CPUs in a CCD
328  * than the APIC split would be at, then this would fail. A prime example is a
329  * 32 CPU where there are 4 cores in each of 8 CCDs. This would result in 8
330  * logical CPUs sharing the CPU; however, the APIC split was often shifting over
331  * at 4 because the CCD design was for up to 8 cores.
332  */
333 static boolean_t
topo_zen_map_caches(topo_mod_t * mod,zen_topo_enum_sock_t * sock,zen_topo_enum_ccx_t * ccx,zen_topo_enum_core_t * core,uint32_t cpuno)334 topo_zen_map_caches(topo_mod_t *mod, zen_topo_enum_sock_t *sock,
335     zen_topo_enum_ccx_t *ccx, zen_topo_enum_core_t *core, uint32_t cpuno)
336 {
337 	fmd_agent_cpu_cache_t *cpu_cache = &sock->ztes_cache.fmc_cpus[cpuno];
338 	if (cpu_cache->fmcc_ncaches == 0) {
339 		return (B_TRUE);
340 	}
341 
342 	/*
343 	 * For each cache that we discover we need to do the following:
344 	 *
345 	 *  o Determine the type of cache that this is. While the upper layers
346 	 *    guarantee us the L1 caches come before L2 and L2 before L3, we
347 	 *    don't care.
348 	 *  o If a cache is already there, it should have the same ID as the one
349 	 *    that we already have.
350 	 */
351 	for (uint_t i = 0; i < cpu_cache->fmcc_ncaches; i++) {
352 		nvlist_t *nvl = cpu_cache->fmcc_caches[i];
353 		nvlist_t **cachep = NULL;
354 		zen_topo_cache_type_t ct;
355 		uint32_t level, type, shift;
356 		uint64_t id, alt_id;
357 
358 		if (nvlist_lookup_pairs(nvl, 0,
359 		    FM_CACHE_INFO_LEVEL, DATA_TYPE_UINT32, &level,
360 		    FM_CACHE_INFO_TYPE, DATA_TYPE_UINT32, &type,
361 		    FM_CACHE_INFO_ID, DATA_TYPE_UINT64, &id,
362 		    FM_CACHE_INFO_X86_APIC_SHIFT, DATA_TYPE_UINT32, &shift,
363 		    NULL) != 0) {
364 			topo_mod_dprintf(mod, "missing required nvlist fields "
365 			    "from FM CPU %u cache %u\n", cpuno, i);
366 			return (B_FALSE);
367 		}
368 
369 		ct = zen_topo_determine_cache(mod, level, type, shift);
370 		switch (ct) {
371 		case ZEN_TOPO_CACHE_UNKNOWN:
372 			topo_mod_dprintf(mod, "failed to map CPU %u cache %u "
373 			    "with id 0x%" PRIx64 " level %u, type 0x%x, APIC "
374 			    "shift 0x%x to a known type\n", cpuno, i, id, level,
375 			    type, shift);
376 			return (B_FALSE);
377 		case ZEN_TOPO_CACHE_CORE_L1D:
378 			cachep = &core->ztcore_l1d;
379 			break;
380 		case ZEN_TOPO_CACHE_CORE_L1I:
381 			cachep = &core->ztcore_l1i;
382 			break;
383 		case ZEN_TOPO_CACHE_CORE_L2:
384 			cachep = &core->ztcore_l2;
385 			break;
386 		case ZEN_TOPO_CACHE_CCX_L3:
387 			cachep = &ccx->ztccx_l3;
388 			break;
389 		}
390 
391 		if (*cachep == NULL) {
392 			*cachep = nvl;
393 			continue;
394 		}
395 
396 		alt_id = fnvlist_lookup_uint64(*cachep, FM_CACHE_INFO_ID);
397 		if (alt_id != id) {
398 			topo_mod_dprintf(mod, "wanted to map CPU %u cache %u "
399 			    "with id 0x%" PRIx64 " level %u, type 0x%x, APIC "
400 			    "shift 0x%x to Zen cache type 0x%x, but cache with "
401 			    "id 0x%" PRIx64 " already present", cpuno, i,
402 			    id, level, type, shift, ct, alt_id);
403 			return (B_FALSE);
404 		}
405 	}
406 
407 	return (B_TRUE);
408 }
409 
410 static boolean_t
topo_zen_map_logcpu_to_phys(topo_mod_t * mod,zen_topo_enum_sock_t * sock,nvlist_t * cpu_nvl,uint32_t cpuno,uint32_t apicid)411 topo_zen_map_logcpu_to_phys(topo_mod_t *mod, zen_topo_enum_sock_t *sock,
412     nvlist_t *cpu_nvl, uint32_t cpuno, uint32_t apicid)
413 {
414 	for (uint32_t ccdno = 0; ccdno < sock->ztes_nccd; ccdno++) {
415 		amdzen_topo_ccd_t *ccd = &sock->ztes_ccd[ccdno];
416 		if (ccd->atccd_err != AMDZEN_TOPO_CCD_E_OK)
417 			continue;
418 
419 		for (uint32_t ccxno = 0; ccxno < ccd->atccd_nphys_ccx;
420 		    ccxno++) {
421 			amdzen_topo_ccx_t *ccx;
422 			if (ccd->atccd_ccx_en[ccxno] == 0)
423 				continue;
424 
425 			ccx = &ccd->atccd_ccx[ccxno];
426 			for (uint32_t coreno = 0;
427 			    coreno < ccx->atccx_nphys_cores; coreno++) {
428 				amdzen_topo_core_t *core;
429 				if (ccx->atccx_core_en[coreno] == 0)
430 					continue;
431 
432 				core = &ccx->atccx_cores[coreno];
433 				for (uint32_t thrno = 0;
434 				    thrno < core->atcore_nthreads; thrno++) {
435 					zen_topo_enum_ccd_t *zt_ccd;
436 					zen_topo_enum_ccx_t *zt_ccx;
437 					zen_topo_enum_core_t *zt_core;
438 
439 					if (core->atcore_thr_en[thrno] == 0)
440 						continue;
441 
442 					if (core->atcore_apicids[thrno] !=
443 					    apicid) {
444 						continue;
445 					}
446 
447 					/*
448 					 * We have a match. Make sure we haven't
449 					 * already used it.
450 					 */
451 					zt_ccd = &sock->ztes_tn_ccd[ccdno];
452 					zt_ccx = &zt_ccd->ztccd_ccx[ccxno];
453 					zt_core = &zt_ccx->ztccx_core[coreno];
454 
455 					if (zt_core->ztcore_nvls[thrno] !=
456 					    NULL) {
457 						topo_mod_dprintf(mod, "APIC ID "
458 						    "0x%x mapped to CCD/CCX/"
459 						    "Core/Thread 0x%x/0x%x/"
460 						    "0x%x/0x%x, but found "
461 						    "another nvlist already "
462 						    "there\n", apicid, ccdno,
463 						    ccxno, coreno, thrno);
464 						return (B_FALSE);
465 					}
466 
467 					zt_core->ztcore_nvls[thrno] = cpu_nvl;
468 
469 					/*
470 					 * Now that we have successfully mapped
471 					 * a core into the tree go install the
472 					 * logical CPU's cache information up
473 					 * the tree.
474 					 */
475 					return (topo_zen_map_caches(mod, sock,
476 					    zt_ccx, zt_core, cpuno));
477 				}
478 			}
479 		}
480 	}
481 
482 	topo_mod_dprintf(mod, "failed to find a CPU for apic 0x%x\n",
483 	    apicid);
484 	return (B_FALSE);
485 }
486 
487 /*
488  * Using information from the given logical CPU that we know is part of our
489  * socket that we're enumerating, attempt to go through and load information
490  * about the chip itself such as the family, model, stepping, brand string, etc.
491  * This comes from both the /dev/fm information that we have in cpu_nvl and from
492  * kstats.
493  */
494 static int
topo_zen_map_common_chip_info(topo_mod_t * mod,zen_topo_enum_sock_t * sock,nvlist_t * cpu_nvl)495 topo_zen_map_common_chip_info(topo_mod_t *mod, zen_topo_enum_sock_t *sock,
496     nvlist_t *cpu_nvl)
497 {
498 	char name[KSTAT_STRLEN];
499 	int32_t cpu_id;
500 	uint32_t sockid;
501 	char *rev, *ident;
502 	kstat_t *ks;
503 	const kstat_named_t *knp;
504 
505 	if (nvlist_lookup_pairs(cpu_nvl, 0,
506 	    FM_PHYSCPU_INFO_CPU_ID, DATA_TYPE_INT32, &cpu_id,
507 	    FM_PHYSCPU_INFO_CHIP_REV, DATA_TYPE_STRING, &rev,
508 	    FM_PHYSCPU_INFO_SOCKET_TYPE, DATA_TYPE_UINT32, &sockid,
509 	    FM_PHYSCPU_INFO_FAMILY, DATA_TYPE_INT32, &sock->ztes_cpu_fam,
510 	    FM_PHYSCPU_INFO_MODEL, DATA_TYPE_INT32, &sock->ztes_cpu_model,
511 	    FM_PHYSCPU_INFO_STEPPING, DATA_TYPE_INT32, &sock->ztes_cpu_step,
512 	    NULL) != 0) {
513 		topo_mod_dprintf(mod, "missing required nvlist fields "
514 		    "from FM physcpu info chip ident\n");
515 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
516 	}
517 
518 	/*
519 	 * Some CPUs have PPIN disabled so we look for it separately here. The
520 	 * rest of the aspects are required.
521 	 */
522 	if (nvlist_lookup_string(cpu_nvl, FM_PHYSCPU_INFO_CHIP_IDENTSTR,
523 	    &ident) != 0) {
524 		ident = NULL;
525 	}
526 
527 	/*
528 	 * If we can not fully identify a revision, the kernel will indicate so
529 	 * with a '?' in the name where normally a stepping would show up. See
530 	 * amd_revmap[] in uts/intel/os/cpuid_subr.c. In such a case, we do not
531 	 * want to propagate such a revision.
532 	 */
533 	if (strchr(rev, '?') == NULL) {
534 		sock->ztes_cpu_rev = rev;
535 	}
536 	sock->ztes_cpu_serial = ident;
537 
538 	if (snprintf(name, sizeof (name), "cpu_info%d", cpu_id) >=
539 	    sizeof (name)) {
540 		topo_mod_dprintf(mod, "failed to construct kstat name: "
541 		    "overflow");
542 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
543 	}
544 
545 	ks = kstat_lookup(sock->ztes_kstat, "cpu_info", cpu_id, name);
546 	if (ks == NULL) {
547 		topo_mod_dprintf(mod, "failed to find 'cpu_info:%d:%s': %s",
548 		    cpu_id, name, strerror(errno));
549 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
550 	}
551 
552 	if (kstat_read(sock->ztes_kstat, ks, NULL) == -1) {
553 		topo_mod_dprintf(mod, "failed to read kstat 'cpu_info:%d:%s': "
554 		    "%s", cpu_id, name, strerror(errno));
555 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
556 	}
557 
558 	knp = kstat_data_lookup(ks, "brand");
559 	if (knp == NULL) {
560 		topo_mod_dprintf(mod, "failed to find 'cpu_info:%d:%s:brand\n",
561 		    cpu_id, name);
562 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
563 
564 	}
565 	sock->ztes_cpu_brand = KSTAT_NAMED_STR_PTR(knp);
566 
567 	if (sockid == X86_SOCKET_UNKNOWN) {
568 		return (0);
569 	}
570 
571 	knp = kstat_data_lookup(ks, "socket_type");
572 	if (knp == NULL) {
573 		topo_mod_dprintf(mod, "failed to find 'cpu_info:%d:%s:"
574 		    "socket_type\n", cpu_id, name);
575 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
576 	}
577 	sock->ztes_cpu_sock = KSTAT_NAMED_STR_PTR(knp);
578 
579 	return (0);
580 }
581 
582 static int
topo_zen_enum_chip_map(topo_mod_t * mod,zen_topo_enum_sock_t * sock)583 topo_zen_enum_chip_map(topo_mod_t *mod, zen_topo_enum_sock_t *sock)
584 {
585 	/*
586 	 * We have an arrray of information from /dev/fm that describes each
587 	 * logical CPU. We would like to map that to a given place in physical
588 	 * topology, which we do via the APIC ID. We will then also determine
589 	 * how caches are mapped together.
590 	 */
591 	for (uint_t i = 0; i < sock->ztes_ncpus; i++) {
592 		int32_t apicid, sockid;
593 		nvlist_t *cpu_nvl = sock->ztes_cpus[i];
594 
595 		if (nvlist_lookup_pairs(cpu_nvl, 0,
596 		    FM_PHYSCPU_INFO_CHIP_ID, DATA_TYPE_INT32, &sockid,
597 		    FM_PHYSCPU_INFO_STRAND_APICID, DATA_TYPE_INT32, &apicid,
598 		    NULL) != 0) {
599 			topo_mod_dprintf(mod, "missing required nvlist fields "
600 			    "from FM physcpu info for CPU %u\n", i);
601 			return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
602 		}
603 
604 		/*
605 		 * This logical CPU isn't for our socket, ignore it.
606 		 */
607 		if (sockid != sock->ztes_sockid) {
608 			continue;
609 		}
610 
611 		if (!topo_zen_map_logcpu_to_phys(mod, sock, cpu_nvl, i,
612 		    (uint32_t)apicid)) {
613 			return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
614 		}
615 	}
616 
617 	/*
618 	 * Now that we have each logical CPU taken care of, we want to fill in
619 	 * information about the common CPU.
620 	 */
621 	for (uint_t i = 0; i < sock->ztes_ncpus; i++) {
622 		int32_t sockid;
623 		nvlist_t *cpu_nvl = sock->ztes_cpus[i];
624 
625 		if (nvlist_lookup_pairs(cpu_nvl, 0,
626 		    FM_PHYSCPU_INFO_CHIP_ID, DATA_TYPE_INT32, &sockid,
627 		    NULL) != 0) {
628 			topo_mod_dprintf(mod, "missing required nvlist fields "
629 			    "from FM physcpu info for CPU %u\n", i);
630 			return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
631 		}
632 
633 		/*
634 		 * This logical CPU isn't for our socket, ignore it.
635 		 */
636 		if (sockid != sock->ztes_sockid) {
637 			continue;
638 		}
639 
640 		return (topo_zen_map_common_chip_info(mod, sock, cpu_nvl));
641 	}
642 
643 	topo_mod_dprintf(mod, "no logical CPUs match our target socket %u!\n",
644 	    sock->ztes_sockid);
645 	return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
646 }
647 
648 static int
topo_zen_enum(topo_mod_t * mod,tnode_t * pnode,const char * name,topo_instance_t min,topo_instance_t max,void * modarg,void * data)649 topo_zen_enum(topo_mod_t *mod, tnode_t *pnode, const char *name,
650     topo_instance_t min, topo_instance_t max, void *modarg, void *data)
651 {
652 	int ret;
653 	zen_topo_t *zen = topo_mod_getspecific(mod);
654 	amdzen_topo_df_t *df = NULL;
655 	topo_zen_chip_t *chip;
656 	zen_topo_enum_sock_t sock;
657 
658 	topo_mod_dprintf(mod, "asked to enum %s [%" PRIu64 ", %" PRIu64 "] on "
659 	    "%s%" PRIu64 "\n", name, min, max, topo_node_name(pnode),
660 	    topo_node_instance(pnode));
661 
662 	/*
663 	 * Currently we only support enumerating a given chip.
664 	 */
665 	if (strcmp(name, CHIP) != 0) {
666 		topo_mod_dprintf(mod, "cannot enumerate %s: unknown type\n",
667 		    name);
668 		return (-1);
669 	}
670 
671 	if (data == NULL) {
672 		topo_mod_dprintf(mod, "cannot enumerate %s: missing required "
673 		    "data\n", name);
674 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
675 	}
676 
677 	if (min != max) {
678 		topo_mod_dprintf(mod, "cannot enumerate %s: multiple instances "
679 		    "requested\n", name);
680 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
681 	}
682 
683 	chip = data;
684 	for (uint32_t i = 0; i < zen->zt_base.atb_ndf; i++) {
685 		if (zen->zt_dfs[i].atd_sockid == chip->tzc_sockid) {
686 			df = &zen->zt_dfs[i];
687 			break;
688 		}
689 	}
690 
691 	if (df == NULL) {
692 		topo_mod_dprintf(mod, "no matching DF with socket %u",
693 		    chip->tzc_sockid);
694 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
695 	}
696 
697 	/*
698 	 * In our supported platforms there is either a single DF instance per
699 	 * die (DFv3+ aka Zen 2+) or we have the older style Zen 1 (aka DFv2)
700 	 * systems where there are multiple dies within the package. We don't
701 	 * support Zen 1/DFv2 based systems right now.
702 	 */
703 	if (zen->zt_base.atb_rev == DF_REV_UNKNOWN) {
704 		topo_mod_dprintf(mod, "DF base revision is unknown, cannot "
705 		    "proceed\n");
706 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
707 	}
708 
709 	if (zen->zt_base.atb_rev == DF_REV_2) {
710 		topo_mod_dprintf(mod, "DFv2 multiple dies are not currently "
711 		    "supported\n");
712 		return (topo_mod_seterrno(mod, EMOD_METHOD_NOTSUP));
713 	}
714 
715 	/*
716 	 * We want to create our "chip" node at the top of this. To do that,
717 	 * we'd like to know things like the CPU's PPIN and other information
718 	 * like the socket type and related. To do this we will start by getting
719 	 * information about the physical CPU information from devfm. That will
720 	 * be combined with our knowledge of how APIC IDs map to data fabric
721 	 * elements.
722 	 */
723 	bzero(&sock, sizeof (sock));
724 	sock.ztes_sockid = chip->tzc_sockid;
725 	if ((ret = topo_zen_enum_chip_gather(mod, zen, df, &sock)) != 0) {
726 		topo_zen_enum_cleanup_sock(mod, &sock);
727 		return (ret);
728 	}
729 
730 	/*
731 	 * Determine the mapping of all the logical CPU entries and their data
732 	 * that we found to the CCD mapping.
733 	 */
734 	if ((ret = topo_zen_enum_chip_map(mod, &sock)) != 0) {
735 		return (ret);
736 	}
737 
738 	ret = topo_zen_build_chip(mod, pnode, min, &sock);
739 	topo_zen_enum_cleanup_sock(mod, &sock);
740 
741 	return (ret);
742 }
743 
744 static const topo_modops_t topo_zen_ops = {
745 	topo_zen_enum, NULL
746 };
747 
748 static topo_modinfo_t topo_zen_mod = {
749 	"AMD Zen Enumerator", FM_FMRI_SCHEME_HC, TOPO_MOD_ZEN_VERS,
750 	    &topo_zen_ops
751 };
752 
753 static void
topo_zen_cleanup(topo_mod_t * mod,zen_topo_t * zen)754 topo_zen_cleanup(topo_mod_t *mod, zen_topo_t *zen)
755 {
756 	if (zen->zt_dfs != NULL) {
757 		for (uint32_t i = 0; i < zen->zt_base.atb_ndf; i++) {
758 			size_t entsize;
759 
760 			if (zen->zt_dfs[i].atd_df_ents == NULL)
761 				continue;
762 			entsize = sizeof (amdzen_topo_df_ent_t) *
763 			    zen->zt_base.atb_maxdfent;
764 			topo_mod_free(mod, zen->zt_dfs[i].atd_df_ents,
765 			    entsize);
766 		}
767 		topo_mod_free(mod, zen->zt_dfs, sizeof (amdzen_topo_df_t) *
768 		    zen->zt_base.atb_ndf);
769 	}
770 
771 	if (zen->zt_fd >= 0) {
772 		(void) close(zen->zt_fd);
773 		zen->zt_fd = -1;
774 	}
775 	topo_mod_free(mod, zen, sizeof (zen_topo_t));
776 }
777 
778 static int
topo_zen_init(topo_mod_t * mod,zen_topo_t * zen)779 topo_zen_init(topo_mod_t *mod, zen_topo_t *zen)
780 {
781 	zen->zt_fd = open(topo_zen_dev, O_RDONLY);
782 	if (zen->zt_fd < 0) {
783 		topo_mod_dprintf(mod, "failed to open %s: %s\n", topo_zen_dev,
784 		    strerror(errno));
785 		return (-1);
786 	}
787 
788 	if (ioctl(zen->zt_fd, AMDZEN_TOPO_IOCTL_BASE, &zen->zt_base) != 0) {
789 		topo_mod_dprintf(mod, "failed to get base Zen topology "
790 		    "information: %s\n", strerror(errno));
791 		return (-1);
792 	}
793 
794 	/*
795 	 * Get all of the basic DF information now.
796 	 */
797 	zen->zt_dfs = topo_mod_zalloc(mod, sizeof (amdzen_topo_df_t) *
798 	    zen->zt_base.atb_ndf);
799 	if (zen->zt_dfs == NULL) {
800 		topo_mod_dprintf(mod, "failed to allocate space for %u DF "
801 		    "entries: %s\n", zen->zt_base.atb_ndf,
802 		    topo_strerror(EMOD_NOMEM));
803 		return (-1);
804 	}
805 
806 	for (uint32_t i = 0; i < zen->zt_base.atb_ndf; i++) {
807 		amdzen_topo_df_t *topo_df = &zen->zt_dfs[i];
808 
809 		topo_df->atd_df_ents = topo_mod_zalloc(mod,
810 		    sizeof (amdzen_topo_df_ent_t) * zen->zt_base.atb_maxdfent);
811 		if (topo_df->atd_df_ents == NULL) {
812 			topo_mod_dprintf(mod, "failed to allocate space for "
813 			    "DF %u's DF ents: %s\n", i,
814 			    topo_strerror(EMOD_NOMEM));
815 			return (-1);
816 		}
817 		topo_df->atd_df_buf_nents = zen->zt_base.atb_maxdfent;
818 		topo_df->atd_dfno = i;
819 
820 		if (ioctl(zen->zt_fd, AMDZEN_TOPO_IOCTL_DF, topo_df) != 0) {
821 			topo_mod_dprintf(mod, "failed to get information for "
822 			    "DF %u: %s", i, strerror(errno));
823 			return (-1);
824 		}
825 	}
826 
827 	return (0);
828 }
829 
830 int
_topo_init(topo_mod_t * mod,topo_version_t version)831 _topo_init(topo_mod_t *mod, topo_version_t version)
832 {
833 	zen_topo_t *zen = NULL;
834 
835 	if (getenv("TOPOZENDEBUG") != NULL) {
836 		topo_mod_setdebug(mod);
837 	}
838 	topo_mod_dprintf(mod, "module initializing\n");
839 
840 	zen = topo_mod_zalloc(mod, sizeof (zen_topo_t));
841 	if (zen == NULL) {
842 		topo_mod_dprintf(mod, "failed to allocate zen_topo_t: %s\n",
843 		    topo_strerror(EMOD_NOMEM));
844 		return (-1);
845 	}
846 
847 	if (topo_zen_init(mod, zen) != 0) {
848 		topo_zen_cleanup(mod, zen);
849 		return (-1);
850 	}
851 
852 	if (topo_mod_register(mod, &topo_zen_mod, TOPO_VERSION) != 0) {
853 		topo_zen_cleanup(mod, zen);
854 		return (-1);
855 	}
856 
857 	topo_mod_setspecific(mod, zen);
858 	return (0);
859 }
860 
861 void
_topo_fini(topo_mod_t * mod)862 _topo_fini(topo_mod_t *mod)
863 {
864 	zen_topo_t *zen;
865 
866 	if ((zen = topo_mod_getspecific(mod)) == NULL) {
867 		return;
868 	}
869 
870 	topo_mod_setspecific(mod, NULL);
871 	topo_zen_cleanup(mod, zen);
872 	topo_mod_unregister(mod);
873 }
874