xref: /illumos-gate/usr/src/lib/fm/topo/modules/common/zen/topo_zen.c (revision 92101ea43dccf9afc9af34c02a30de381d3fe66a)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2023 Oxide Computer Company
14  */
15 
16 /*
17  * This module implements a series of enumeration methods that tie into the
18  * amdzen(4D) nexus driver. This module is currently built out of the various
19  * x86 platform directories (though it'd be nice if we could just make this
20  * ISA-specific rather than platform-specific).
21  */
22 
23 #include <sys/fm/protocol.h>
24 #include <fm/topo_mod.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <errno.h>
29 #include <strings.h>
30 #include <unistd.h>
31 #include <sys/devfm.h>
32 #include <sys/x86_archext.h>
33 
34 #include "topo_zen_impl.h"
35 
36 /*
37  * This is the path to the device node that amdzen(4D) creates for us to ask it
38  * questions.
39  */
40 static const char *topo_zen_dev = "/devices/pseudo/amdzen@0:topo";
41 
42 /*
43  * Helper to indicate whether or not the given DF entry matches the type that we
44  * expect and is fair to use, that is it is enabled. Note, this may correspond
45  * to something in the DF that doesn't actually exist in the SoC, but we don't
46  * know that yet.
47  */
48 static boolean_t
49 topo_zen_fabric_match(const amdzen_topo_df_ent_t *ent, df_type_t type,
50     uint8_t subtype)
51 {
52 	return (ent->atde_type == type && ent->atde_subtype == subtype);
53 }
54 
55 /*
56  * Clean up all data that is associated with an attempt to enumerate the socket.
57  * The structure itself is assumed to be on the stack or handled elsewhere. It
58  * must have been initialized prior to calling this. Don't give us stack
59  * garbage.
60  */
61 static void
62 topo_zen_enum_cleanup_sock(topo_mod_t *mod, zen_topo_enum_sock_t *sock)
63 {
64 	if (sock->ztes_kstat != NULL) {
65 		(void) kstat_close(sock->ztes_kstat);
66 		sock->ztes_kstat = NULL;
67 	}
68 
69 
70 	if (sock->ztes_cpus != NULL) {
71 		for (uint_t i = 0; i < sock->ztes_ncpus; i++) {
72 			nvlist_free(sock->ztes_cpus[i]);
73 		}
74 		umem_free(sock->ztes_cpus, sizeof (nvlist_t *) *
75 		    sock->ztes_ncpus);
76 		sock->ztes_cpus = NULL;
77 	}
78 
79 	if (sock->ztes_fm_agent != NULL) {
80 		fmd_agent_cache_info_free(sock->ztes_fm_agent,
81 		    &sock->ztes_cache);
82 		fmd_agent_close(sock->ztes_fm_agent);
83 		sock->ztes_fm_agent = NULL;
84 	}
85 
86 	if (sock->ztes_tn_ccd != NULL) {
87 		topo_mod_free(mod, sock->ztes_tn_ccd, sock->ztes_nccd *
88 		    sizeof (zen_topo_enum_ccd_t));
89 		sock->ztes_tn_ccd = NULL;
90 	}
91 
92 	if (sock->ztes_ccd != NULL) {
93 		topo_mod_free(mod, sock->ztes_ccd, sock->ztes_nccd *
94 		    sizeof (amdzen_topo_ccd_t));
95 		sock->ztes_ccd = NULL;
96 	}
97 }
98 
99 static int
100 topo_zen_enum_chip_gather_ccd(topo_mod_t *mod, const zen_topo_t *zen,
101     zen_topo_enum_sock_t *sock,
102     const amdzen_topo_df_ent_t *dfe, uint32_t ccdno, uint32_t phys_ccdno)
103 {
104 	amdzen_topo_ccd_t *ccd;
105 
106 	ccd = &sock->ztes_ccd[ccdno];
107 	ccd->atccd_dfno = sock->ztes_df->atd_dfno;
108 	ccd->atccd_instid = dfe->atde_inst_id;
109 	ccd->atccd_phys_no = phys_ccdno;
110 	if (ioctl(zen->zt_fd, AMDZEN_TOPO_IOCTL_CCD, ccd) != 0) {
111 		topo_mod_dprintf(mod, "failed to get CCD information "
112 		    "for DF/CCD 0x%x/0x%x: %s\n", sock->ztes_df->atd_dfno,
113 		    ccd->atccd_instid, strerror(errno));
114 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
115 	}
116 
117 	switch (ccd->atccd_err) {
118 	case AMDZEN_TOPO_CCD_E_OK:
119 		sock->ztes_nccd_valid++;
120 		break;
121 	/*
122 	 * We ignore errors about CCDs being missing. This is fine
123 	 * because on systems without a full CCD complement this will
124 	 * happen and is expected. We make sure we have at least one
125 	 * valid CCD before continuing.
126 	 */
127 	case AMDZEN_TOPO_CCD_E_CCD_MISSING:
128 		break;
129 	default:
130 		topo_mod_dprintf(mod, "DF CCM fabric 0x%x, CCD 0x%x "
131 		    "didn't give us valid info: found error 0x%x\n",
132 		    dfe->atde_fabric_id, phys_ccdno, ccd->atccd_err);
133 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
134 	}
135 
136 	return (0);
137 }
138 
139 
140 /*
141  * Go through all of our disparate sources and gather information that we'll
142  * need to process and perform enumeration. We need to gather the following
143  * disparate pieces of information:
144  *
145  * 1) We need to determine what's going on with all the CCDs and ask the
146  * amdzen(4D) driver for information.
147  *
148  * 2) We need to use the FM agent to ask /dev/fm to get all the CPU information
149  * for this system.
150  *
151  * 3) We use the same system to go get all the actual cache information for this
152  * system.
153  *
154  * 4) We grab some of the chip-wide information such as the socket and brand
155  * string information through kstats, with information about a valid CPU ID.
156  */
157 static int
158 topo_zen_enum_chip_gather(topo_mod_t *mod, const zen_topo_t *zen,
159     const amdzen_topo_df_t *df, zen_topo_enum_sock_t *sock)
160 {
161 	uint32_t nccd = 0;
162 
163 	sock->ztes_df = df;
164 	for (uint32_t i = 0; i < df->atd_df_buf_nvalid; i++) {
165 		const amdzen_topo_df_ent_t *dfe = &df->atd_df_ents[i];
166 		if (topo_zen_fabric_match(dfe, DF_TYPE_CCM,
167 		    DF_CCM_SUBTYPE_CPU)) {
168 			nccd += dfe->atde_data.atded_ccm.atcd_nccds;
169 		}
170 	}
171 
172 	if (nccd == 0) {
173 		topo_mod_dprintf(mod, "no CCDs found! Not much more we can "
174 		    "do... Something probably went wrong\n");
175 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
176 	}
177 
178 	sock->ztes_nccd = nccd;
179 	sock->ztes_ccd = topo_mod_zalloc(mod, sizeof (amdzen_topo_ccd_t) *
180 	    sock->ztes_nccd);
181 	if (sock->ztes_ccd == NULL) {
182 		topo_mod_dprintf(mod, "failed to allocate memory for "
183 		    "ztes_ccd[]\n");
184 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
185 	}
186 
187 	sock->ztes_tn_ccd = topo_mod_zalloc(mod, sizeof (zen_topo_enum_ccd_t) *
188 	    sock->ztes_nccd);
189 
190 	for (uint32_t i = 0, ccdno = 0; i < df->atd_df_buf_nvalid; i++) {
191 		const amdzen_topo_df_ent_t *dfe = &df->atd_df_ents[i];
192 		const amdzen_topo_ccm_data_t *ccm;
193 
194 		if (!topo_zen_fabric_match(dfe, DF_TYPE_CCM,
195 		    DF_CCM_SUBTYPE_CPU)) {
196 			continue;
197 		}
198 
199 		ccm = &dfe->atde_data.atded_ccm;
200 		for (uint32_t ccm_ccdno = 0; ccm_ccdno < ccm->atcd_nccds;
201 		    ccm_ccdno++) {
202 			if (ccm->atcd_ccd_en[ccm_ccdno] == 0) {
203 				continue;
204 			}
205 
206 			if (topo_zen_enum_chip_gather_ccd(mod, zen, sock, dfe,
207 			    ccdno, ccm->atcd_ccd_ids[ccm_ccdno]) != 0) {
208 				return (-1);
209 			}
210 
211 			ccdno++;
212 		}
213 	}
214 
215 	topo_mod_dprintf(mod, "found %u CCDs\n", sock->ztes_nccd_valid);
216 	if (sock->ztes_nccd_valid == 0) {
217 		topo_mod_dprintf(mod, "somehow we ended up with no CCDs with "
218 		    "valid topo information. Something went very wrong.\n");
219 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
220 	}
221 
222 	sock->ztes_fm_agent = fmd_agent_open(FMD_AGENT_VERSION);
223 	if (sock->ztes_fm_agent == NULL) {
224 		topo_mod_dprintf(mod, "failed to open FMD agent: %s\n",
225 		    strerror(errno));
226 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
227 	}
228 
229 	if (fmd_agent_physcpu_info(sock->ztes_fm_agent, &sock->ztes_cpus,
230 	    &sock->ztes_ncpus) != 0) {
231 		topo_mod_dprintf(mod, "failed to get FM agent CPU "
232 		    "information: %s\n",
233 		    strerror(fmd_agent_errno(sock->ztes_fm_agent)));
234 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
235 	}
236 
237 	topo_mod_dprintf(mod, "got %u CPUs worth of data from the FM agent\n",
238 	    sock->ztes_ncpus);
239 
240 	if (fmd_agent_cache_info(sock->ztes_fm_agent, &sock->ztes_cache) != 0) {
241 		topo_mod_dprintf(mod, "failed to get FM agent cache "
242 		    "information: %s\n",
243 		    strerror(fmd_agent_errno(sock->ztes_fm_agent)));
244 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
245 	}
246 
247 	if (sock->ztes_cache.fmc_ncpus != sock->ztes_ncpus) {
248 		topo_mod_dprintf(mod, "/dev/fm gave us %u CPUs, but %u CPUs "
249 		    "for cache information: cannot continue\n",
250 		    sock->ztes_ncpus, sock->ztes_cache.fmc_ncpus);
251 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
252 	}
253 
254 	sock->ztes_kstat = kstat_open();
255 	if (sock->ztes_kstat == NULL) {
256 		topo_mod_dprintf(mod, "failed to open kstat driver: %s\n",
257 		    strerror(errno));
258 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
259 	}
260 
261 	return (0);
262 }
263 
264 typedef enum {
265 	ZEN_TOPO_CACHE_UNKNOWN,
266 	ZEN_TOPO_CACHE_CORE_L1D,
267 	ZEN_TOPO_CACHE_CORE_L1I,
268 	ZEN_TOPO_CACHE_CORE_L2,
269 	ZEN_TOPO_CACHE_CCX_L3
270 } zen_topo_cache_type_t;
271 
272 typedef struct {
273 	uint32_t		ztcm_level;
274 	fm_cache_info_type_t	ztcm_type;
275 	boolean_t		ztcm_core;
276 	zen_topo_cache_type_t	ztcm_cache;
277 } zen_topo_cache_map_t;
278 
279 const zen_topo_cache_map_t zen_topo_cache_map[] = {
280 	{ 1, FM_CACHE_INFO_T_DATA, B_TRUE, ZEN_TOPO_CACHE_CORE_L1D },
281 	{ 1, FM_CACHE_INFO_T_INSTR, B_TRUE, ZEN_TOPO_CACHE_CORE_L1I },
282 	{ 2, FM_CACHE_INFO_T_DATA | FM_CACHE_INFO_T_INSTR |
283 	    FM_CACHE_INFO_T_UNIFIED, B_TRUE, ZEN_TOPO_CACHE_CORE_L2 },
284 	{ 3, FM_CACHE_INFO_T_DATA | FM_CACHE_INFO_T_INSTR |
285 	    FM_CACHE_INFO_T_UNIFIED, B_FALSE, ZEN_TOPO_CACHE_CCX_L3 }
286 };
287 
288 static zen_topo_cache_type_t
289 zen_topo_determine_cache(topo_mod_t *mod, uint32_t level, uint32_t type,
290     uint32_t shift)
291 {
292 	zen_topo_t *zen = topo_mod_getspecific(mod);
293 
294 	for (size_t i = 0; i < ARRAY_SIZE(zen_topo_cache_map); i++) {
295 		const zen_topo_cache_map_t *map = &zen_topo_cache_map[i];
296 		uint32_t apic;
297 
298 		if (map->ztcm_level != level || map->ztcm_type != type) {
299 			continue;
300 		}
301 
302 		if (map->ztcm_core) {
303 			apic = zen->zt_base.atb_apic_decomp.aad_core_shift;
304 		} else {
305 			apic = zen->zt_base.atb_apic_decomp.aad_ccx_shift;
306 		}
307 
308 		if (shift == apic) {
309 			return (map->ztcm_cache);
310 		}
311 	}
312 
313 	return (ZEN_TOPO_CACHE_UNKNOWN);
314 }
315 
316 /*
317  * We have mapped a logical CPU to a position in the hierarchy. We must now walk
318  * its caches and attempt to install them up the chain. We assume that there
319  * there are four caches right now: an L1i, L1d, L2, and L3 cache. We will
320  * verify that these are shared at the points in the hierarchy that we expect.
321  * Note, AMD has mixed designs with 1 CCX and 2 CCXs. When there is only 1 CCX
322  * then we often describe the CCX and CCD as equivalent though if you look at
323  * the PPR it describes each CCD as having a single CCX. This is why the L3
324  * cache lives on the CCX right now.
325  */
326 static boolean_t
327 topo_zen_map_caches(topo_mod_t *mod, zen_topo_enum_sock_t *sock,
328     zen_topo_enum_ccx_t *ccx, zen_topo_enum_core_t *core, uint32_t cpuno)
329 {
330 	fmd_agent_cpu_cache_t *cpu_cache = &sock->ztes_cache.fmc_cpus[cpuno];
331 	if (cpu_cache->fmcc_ncaches == 0) {
332 		return (B_TRUE);
333 	}
334 
335 	/*
336 	 * For each cache that we discover we need to do the following:
337 	 *
338 	 *  o Determine the type of cache that this is. While the upper layers
339 	 *    guarantee us the L1 caches come before L2 and L2 before L3, we
340 	 *    don't care.
341 	 *  o Use the APIC shift and our APIC decomp to confirm the level of the
342 	 *    hierarchy this should operate at.
343 	 *  o If a cache is already there, it should have the same ID as the one
344 	 *    that we already have.
345 	 */
346 	for (uint_t i = 0; i < cpu_cache->fmcc_ncaches; i++) {
347 		nvlist_t *nvl = cpu_cache->fmcc_caches[i];
348 		nvlist_t **cachep = NULL;
349 		zen_topo_cache_type_t ct;
350 		uint32_t level, type, shift;
351 		uint64_t id, alt_id;
352 
353 		if (nvlist_lookup_pairs(nvl, 0,
354 		    FM_CACHE_INFO_LEVEL, DATA_TYPE_UINT32, &level,
355 		    FM_CACHE_INFO_TYPE, DATA_TYPE_UINT32, &type,
356 		    FM_CACHE_INFO_ID, DATA_TYPE_UINT64, &id,
357 		    FM_CACHE_INFO_X86_APIC_SHIFT, DATA_TYPE_UINT32, &shift,
358 		    NULL) != 0) {
359 			topo_mod_dprintf(mod, "missing required nvlist fields "
360 			    "from FM CPU %u cache %u\n", cpuno, i);
361 			return (B_FALSE);
362 		}
363 
364 		ct = zen_topo_determine_cache(mod, level, type, shift);
365 		switch (ct) {
366 		case ZEN_TOPO_CACHE_UNKNOWN:
367 			topo_mod_dprintf(mod, "failed to map CPU %u cache %u "
368 			    "with id 0x%" PRIx64 " level %u, type 0x%x, APIC "
369 			    "shift 0x%x to a known type\n", cpuno, i, id, level,
370 			    type, shift);
371 			return (B_FALSE);
372 		case ZEN_TOPO_CACHE_CORE_L1D:
373 			cachep = &core->ztcore_l1d;
374 			break;
375 		case ZEN_TOPO_CACHE_CORE_L1I:
376 			cachep = &core->ztcore_l1i;
377 			break;
378 		case ZEN_TOPO_CACHE_CORE_L2:
379 			cachep = &core->ztcore_l2;
380 			break;
381 		case ZEN_TOPO_CACHE_CCX_L3:
382 			cachep = &ccx->ztccx_l3;
383 			break;
384 		}
385 
386 		if (*cachep == NULL) {
387 			*cachep = nvl;
388 			continue;
389 		}
390 
391 		alt_id = fnvlist_lookup_uint64(*cachep, FM_CACHE_INFO_ID);
392 		if (alt_id != id) {
393 			topo_mod_dprintf(mod, "wanted to map CPU %u cache %u "
394 			    "with id 0x%" PRIx64 " level %u, type 0x%x, APIC "
395 			    "shift 0x%x to Zen cache type 0x%x, but cache with "
396 			    "id 0x%" PRIx64 " already present", cpuno, i,
397 			    id, level, type, shift, ct, alt_id);
398 			return (B_FALSE);
399 		}
400 	}
401 
402 	return (B_TRUE);
403 }
404 
405 static boolean_t
406 topo_zen_map_logcpu_to_phys(topo_mod_t *mod, zen_topo_enum_sock_t *sock,
407     nvlist_t *cpu_nvl, uint32_t cpuno, uint32_t apicid)
408 {
409 	for (uint32_t ccdno = 0; ccdno < sock->ztes_nccd; ccdno++) {
410 		amdzen_topo_ccd_t *ccd = &sock->ztes_ccd[ccdno];
411 		if (ccd->atccd_err != AMDZEN_TOPO_CCD_E_OK)
412 			continue;
413 
414 		for (uint32_t ccxno = 0; ccxno < ccd->atccd_nphys_ccx;
415 		    ccxno++) {
416 			amdzen_topo_ccx_t *ccx;
417 			if (ccd->atccd_ccx_en[ccxno] == 0)
418 				continue;
419 
420 			ccx = &ccd->atccd_ccx[ccxno];
421 			for (uint32_t coreno = 0;
422 			    coreno < ccx->atccx_nphys_cores; coreno++) {
423 				amdzen_topo_core_t *core;
424 				if (ccx->atccx_core_en[coreno] == 0)
425 					continue;
426 
427 				core = &ccx->atccx_cores[coreno];
428 				for (uint32_t thrno = 0;
429 				    thrno < core->atcore_nthreads; thrno++) {
430 					zen_topo_enum_ccd_t *zt_ccd;
431 					zen_topo_enum_ccx_t *zt_ccx;
432 					zen_topo_enum_core_t *zt_core;
433 
434 					if (core->atcore_thr_en[thrno] == 0)
435 						continue;
436 
437 					if (core->atcore_apicids[thrno] !=
438 					    apicid) {
439 						continue;
440 					}
441 
442 					/*
443 					 * We have a match. Make sure we haven't
444 					 * already used it.
445 					 */
446 					zt_ccd = &sock->ztes_tn_ccd[ccdno];
447 					zt_ccx = &zt_ccd->ztccd_ccx[ccxno];
448 					zt_core = &zt_ccx->ztccx_core[coreno];
449 
450 					if (zt_core->ztcore_nvls[thrno] !=
451 					    NULL) {
452 						topo_mod_dprintf(mod, "APIC ID "
453 						    "0x%x mapped to CCD/CCX/"
454 						    "Core/Thread 0x%x/0x%x/"
455 						    "0x%x/0x%x, but found "
456 						    "another nvlist already "
457 						    "there\n", apicid, ccdno,
458 						    ccxno, coreno, thrno);
459 						return (B_FALSE);
460 					}
461 
462 					zt_core->ztcore_nvls[thrno] = cpu_nvl;
463 
464 					/*
465 					 * Now that we have successfully mapped
466 					 * a core into the tree go install the
467 					 * logical CPU's cache information up
468 					 * the tree.
469 					 */
470 					return (topo_zen_map_caches(mod, sock,
471 					    zt_ccx, zt_core, cpuno));
472 				}
473 			}
474 		}
475 	}
476 
477 	topo_mod_dprintf(mod, "failed to find a CPU for apic 0x%x\n",
478 	    apicid);
479 	return (B_FALSE);
480 }
481 
482 /*
483  * Using information from the given logical CPU that we know is part of our
484  * socket that we're enumerating, attempt to go through and load information
485  * about the chip itself such as the family, model, stepping, brand string, etc.
486  * This comes from both the /dev/fm information that we have in cpu_nvl and from
487  * kstats.
488  */
489 static int
490 topo_zen_map_common_chip_info(topo_mod_t *mod, zen_topo_enum_sock_t *sock,
491     nvlist_t *cpu_nvl)
492 {
493 	char name[KSTAT_STRLEN];
494 	int32_t cpu_id;
495 	uint32_t sockid;
496 	char *rev, *ident;
497 	kstat_t *ks;
498 	const kstat_named_t *knp;
499 
500 	if (nvlist_lookup_pairs(cpu_nvl, 0,
501 	    FM_PHYSCPU_INFO_CPU_ID, DATA_TYPE_INT32, &cpu_id,
502 	    FM_PHYSCPU_INFO_CHIP_IDENTSTR, DATA_TYPE_STRING, &ident,
503 	    FM_PHYSCPU_INFO_CHIP_REV, DATA_TYPE_STRING, &rev,
504 	    FM_PHYSCPU_INFO_SOCKET_TYPE, DATA_TYPE_UINT32, &sockid,
505 	    FM_PHYSCPU_INFO_FAMILY, DATA_TYPE_INT32, &sock->ztes_cpu_fam,
506 	    FM_PHYSCPU_INFO_MODEL, DATA_TYPE_INT32, &sock->ztes_cpu_model,
507 	    FM_PHYSCPU_INFO_STEPPING, DATA_TYPE_INT32, &sock->ztes_cpu_step,
508 	    NULL) != 0) {
509 		topo_mod_dprintf(mod, "missing required nvlist fields "
510 		    "from FM physcpu info chip ident\n");
511 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
512 	}
513 
514 	/*
515 	 * If we can not fully identify a revision, the kernel will indicate so
516 	 * with a '?' in the name where normally a stepping would show up. See
517 	 * amd_revmap[] in uts/intel/os/cpuid_subr.c. In such a case, we do not
518 	 * want to propagate such a revision.
519 	 */
520 	if (strchr(rev, '?') == NULL) {
521 		sock->ztes_cpu_rev = rev;
522 	}
523 	sock->ztes_cpu_serial = ident;
524 
525 	if (snprintf(name, sizeof (name), "cpu_info%d", cpu_id) >=
526 	    sizeof (name)) {
527 		topo_mod_dprintf(mod, "failed to construct kstat name: "
528 		    "overflow");
529 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
530 	}
531 
532 	ks = kstat_lookup(sock->ztes_kstat, "cpu_info", cpu_id, name);
533 	if (ks == NULL) {
534 		topo_mod_dprintf(mod, "failed to find 'cpu_info:%d:%s': %s",
535 		    cpu_id, name, strerror(errno));
536 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
537 	}
538 
539 	if (kstat_read(sock->ztes_kstat, ks, NULL) == -1) {
540 		topo_mod_dprintf(mod, "failed to read kstat 'cpu_info:%d:%s': "
541 		    "%s", cpu_id, name, strerror(errno));
542 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
543 	}
544 
545 	knp = kstat_data_lookup(ks, "brand");
546 	if (knp == NULL) {
547 		topo_mod_dprintf(mod, "failed to find 'cpu_info:%d:%s:brand\n",
548 		    cpu_id, name);
549 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
550 
551 	}
552 	sock->ztes_cpu_brand = KSTAT_NAMED_STR_PTR(knp);
553 
554 	if (sockid == X86_SOCKET_UNKNOWN) {
555 		return (0);
556 	}
557 
558 	knp = kstat_data_lookup(ks, "socket_type");
559 	if (knp == NULL) {
560 		topo_mod_dprintf(mod, "failed to find 'cpu_info:%d:%s:"
561 		    "socket_type\n", cpu_id, name);
562 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
563 	}
564 	sock->ztes_cpu_sock = KSTAT_NAMED_STR_PTR(knp);
565 
566 	return (0);
567 }
568 
569 static int
570 topo_zen_enum_chip_map(topo_mod_t *mod, zen_topo_enum_sock_t *sock)
571 {
572 	/*
573 	 * We have an arrray of information from /dev/fm that describes each
574 	 * logical CPU. We would like to map that to a given place in physical
575 	 * topology, which we do via the APIC ID. We will then also determine
576 	 * how caches are mapped together.
577 	 */
578 	for (uint_t i = 0; i < sock->ztes_ncpus; i++) {
579 		int32_t apicid, sockid;
580 		nvlist_t *cpu_nvl = sock->ztes_cpus[i];
581 
582 		if (nvlist_lookup_pairs(cpu_nvl, 0,
583 		    FM_PHYSCPU_INFO_CHIP_ID, DATA_TYPE_INT32, &sockid,
584 		    FM_PHYSCPU_INFO_STRAND_APICID, DATA_TYPE_INT32, &apicid,
585 		    NULL) != 0) {
586 			topo_mod_dprintf(mod, "missing required nvlist fields "
587 			    "from FM physcpu info for CPU %u\n", i);
588 			return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
589 		}
590 
591 		/*
592 		 * This logical CPU isn't for our socket, ignore it.
593 		 */
594 		if (sockid != sock->ztes_sockid) {
595 			continue;
596 		}
597 
598 		if (!topo_zen_map_logcpu_to_phys(mod, sock, cpu_nvl, i,
599 		    (uint32_t)apicid)) {
600 			return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
601 		}
602 	}
603 
604 	/*
605 	 * Now that we have each logical CPU taken care of, we want to fill in
606 	 * information about the common CPU.
607 	 */
608 	for (uint_t i = 0; i < sock->ztes_ncpus; i++) {
609 		int32_t sockid;
610 		nvlist_t *cpu_nvl = sock->ztes_cpus[i];
611 
612 		if (nvlist_lookup_pairs(cpu_nvl, 0,
613 		    FM_PHYSCPU_INFO_CHIP_ID, DATA_TYPE_INT32, &sockid,
614 		    NULL) != 0) {
615 			topo_mod_dprintf(mod, "missing required nvlist fields "
616 			    "from FM physcpu info for CPU %u\n", i);
617 			return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
618 		}
619 
620 		/*
621 		 * This logical CPU isn't for our socket, ignore it.
622 		 */
623 		if (sockid != sock->ztes_sockid) {
624 			continue;
625 		}
626 
627 		return (topo_zen_map_common_chip_info(mod, sock, cpu_nvl));
628 	}
629 
630 	topo_mod_dprintf(mod, "no logical CPUs match our target socket %u!\n",
631 	    sock->ztes_sockid);
632 	return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
633 }
634 
635 static int
636 topo_zen_enum(topo_mod_t *mod, tnode_t *pnode, const char *name,
637     topo_instance_t min, topo_instance_t max, void *modarg, void *data)
638 {
639 	int ret;
640 	zen_topo_t *zen = topo_mod_getspecific(mod);
641 	amdzen_topo_df_t *df = NULL;
642 	topo_zen_chip_t *chip;
643 	zen_topo_enum_sock_t sock;
644 
645 	topo_mod_dprintf(mod, "asked to enum %s [%" PRIu64 ", %" PRIu64 "] on "
646 	    "%s%" PRIu64 "\n", name, min, max, topo_node_name(pnode),
647 	    topo_node_instance(pnode));
648 
649 	/*
650 	 * Currently we only support enumerating a given chip.
651 	 */
652 	if (strcmp(name, CHIP) != 0) {
653 		topo_mod_dprintf(mod, "cannot enumerate %s: unknown type\n",
654 		    name);
655 		return (-1);
656 	}
657 
658 	if (data == NULL) {
659 		topo_mod_dprintf(mod, "cannot enumerate %s: missing required "
660 		    "data\n", name);
661 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
662 	}
663 
664 	if (min != max) {
665 		topo_mod_dprintf(mod, "cannot enumerate %s: multiple instances "
666 		    "requested\n", name);
667 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
668 	}
669 
670 	chip = data;
671 	for (uint32_t i = 0; i < zen->zt_base.atb_ndf; i++) {
672 		if (zen->zt_dfs[i].atd_sockid == chip->tzc_sockid) {
673 			df = &zen->zt_dfs[i];
674 			break;
675 		}
676 	}
677 
678 	if (df == NULL) {
679 		topo_mod_dprintf(mod, "no matching DF with socket %u",
680 		    chip->tzc_sockid);
681 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
682 	}
683 
684 	/*
685 	 * In our supported platforms there is either a single DF instance per
686 	 * die (DFv3+ aka Zen 2+) or we have the older style Zen 1 (aka DFv2)
687 	 * systems where there are multiple dies within the package. We don't
688 	 * support Zen 1/DFv2 based systems right now.
689 	 */
690 	if (zen->zt_base.atb_rev == DF_REV_UNKNOWN) {
691 		topo_mod_dprintf(mod, "DF base revision is unknown, cannot "
692 		    "proceed\n");
693 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
694 	}
695 
696 	if (zen->zt_base.atb_rev == DF_REV_2) {
697 		topo_mod_dprintf(mod, "DFv2 multiple dies are not currently "
698 		    "supported\n");
699 		return (topo_mod_seterrno(mod, EMOD_METHOD_NOTSUP));
700 	}
701 
702 	/*
703 	 * We want to create our "chip" node at the top of this. To do that,
704 	 * we'd like to know things like the CPU's PPIN and other information
705 	 * like the socket type and related. To do this we will start by getting
706 	 * information about the physical CPU information from devfm. That will
707 	 * be combined with our knowledge of how APIC IDs map to data fabric
708 	 * elements.
709 	 */
710 	bzero(&sock, sizeof (sock));
711 	sock.ztes_sockid = chip->tzc_sockid;
712 	if ((ret = topo_zen_enum_chip_gather(mod, zen, df, &sock)) != 0) {
713 		topo_zen_enum_cleanup_sock(mod, &sock);
714 		return (ret);
715 	}
716 
717 	/*
718 	 * Determine the mapping of all the logical CPU entries and their data
719 	 * that we found to the CCD mapping.
720 	 */
721 	if ((ret = topo_zen_enum_chip_map(mod, &sock)) != 0) {
722 		return (ret);
723 	}
724 
725 	ret = topo_zen_build_chip(mod, pnode, min, &sock);
726 	topo_zen_enum_cleanup_sock(mod, &sock);
727 
728 	return (ret);
729 }
730 
731 static const topo_modops_t topo_zen_ops = {
732 	topo_zen_enum, NULL
733 };
734 
735 static topo_modinfo_t topo_zen_mod = {
736 	"AMD Zen Enumerator", FM_FMRI_SCHEME_HC, TOPO_MOD_ZEN_VERS,
737 	    &topo_zen_ops
738 };
739 
740 static void
741 topo_zen_cleanup(topo_mod_t *mod, zen_topo_t *zen)
742 {
743 	if (zen->zt_dfs != NULL) {
744 		for (uint32_t i = 0; i < zen->zt_base.atb_ndf; i++) {
745 			size_t entsize;
746 
747 			if (zen->zt_dfs[i].atd_df_ents == NULL)
748 				continue;
749 			entsize = sizeof (amdzen_topo_df_ent_t) *
750 			    zen->zt_base.atb_maxdfent;
751 			topo_mod_free(mod, zen->zt_dfs[i].atd_df_ents,
752 			    entsize);
753 		}
754 		topo_mod_free(mod, zen->zt_dfs, sizeof (amdzen_topo_df_t) *
755 		    zen->zt_base.atb_ndf);
756 	}
757 
758 	if (zen->zt_fd >= 0) {
759 		(void) close(zen->zt_fd);
760 		zen->zt_fd = -1;
761 	}
762 	topo_mod_free(mod, zen, sizeof (zen_topo_t));
763 }
764 
765 static int
766 topo_zen_init(topo_mod_t *mod, zen_topo_t *zen)
767 {
768 	zen->zt_fd = open(topo_zen_dev, O_RDONLY);
769 	if (zen->zt_fd < 0) {
770 		topo_mod_dprintf(mod, "failed to open %s: %s\n", topo_zen_dev,
771 		    strerror(errno));
772 		return (-1);
773 	}
774 
775 	if (ioctl(zen->zt_fd, AMDZEN_TOPO_IOCTL_BASE, &zen->zt_base) != 0) {
776 		topo_mod_dprintf(mod, "failed to get base Zen topology "
777 		    "information: %s\n", strerror(errno));
778 		return (-1);
779 	}
780 
781 	/*
782 	 * Get all of the basic DF information now.
783 	 */
784 	zen->zt_dfs = topo_mod_zalloc(mod, sizeof (amdzen_topo_df_t) *
785 	    zen->zt_base.atb_ndf);
786 	if (zen->zt_dfs == NULL) {
787 		topo_mod_dprintf(mod, "failed to allocate space for %u DF "
788 		    "entries: %s\n", zen->zt_base.atb_ndf,
789 		    topo_strerror(EMOD_NOMEM));
790 		return (-1);
791 	}
792 
793 	for (uint32_t i = 0; i < zen->zt_base.atb_ndf; i++) {
794 		amdzen_topo_df_t *topo_df = &zen->zt_dfs[i];
795 
796 		topo_df->atd_df_ents = topo_mod_zalloc(mod,
797 		    sizeof (amdzen_topo_df_ent_t) * zen->zt_base.atb_maxdfent);
798 		if (topo_df->atd_df_ents == NULL) {
799 			topo_mod_dprintf(mod, "failed to allocate space for "
800 			    "DF %u's DF ents: %s\n", i,
801 			    topo_strerror(EMOD_NOMEM));
802 			return (-1);
803 		}
804 		topo_df->atd_df_buf_nents = zen->zt_base.atb_maxdfent;
805 		topo_df->atd_dfno = i;
806 
807 		if (ioctl(zen->zt_fd, AMDZEN_TOPO_IOCTL_DF, topo_df) != 0) {
808 			topo_mod_dprintf(mod, "failed to get information for "
809 			    "DF %u: %s", i, strerror(errno));
810 			return (-1);
811 		}
812 	}
813 
814 	return (0);
815 }
816 
817 int
818 _topo_init(topo_mod_t *mod, topo_version_t version)
819 {
820 	zen_topo_t *zen = NULL;
821 
822 	if (getenv("TOPOZENDEBUG") != NULL) {
823 		topo_mod_setdebug(mod);
824 	}
825 	topo_mod_dprintf(mod, "module initializing\n");
826 
827 	zen = topo_mod_zalloc(mod, sizeof (zen_topo_t));
828 	if (zen == NULL) {
829 		topo_mod_dprintf(mod, "failed to allocate zen_topo_t: %s\n",
830 		    topo_strerror(EMOD_NOMEM));
831 		return (-1);
832 	}
833 
834 	if (topo_zen_init(mod, zen) != 0) {
835 		topo_zen_cleanup(mod, zen);
836 		return (-1);
837 	}
838 
839 	if (topo_mod_register(mod, &topo_zen_mod, TOPO_VERSION) != 0) {
840 		topo_zen_cleanup(mod, zen);
841 		return (-1);
842 	}
843 
844 	topo_mod_setspecific(mod, zen);
845 	return (0);
846 }
847 
848 void
849 _topo_fini(topo_mod_t *mod)
850 {
851 	zen_topo_t *zen;
852 
853 	if ((zen = topo_mod_getspecific(mod)) == NULL) {
854 		return;
855 	}
856 
857 	topo_mod_setspecific(mod, NULL);
858 	topo_zen_cleanup(mod, zen);
859 	topo_mod_unregister(mod);
860 }
861