1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2025 Oxide Computer Company
14 */
15
16 /*
17 * This module implements a series of enumeration methods that tie into the
18 * amdzen(4D) nexus driver. This module is currently built out of the various
19 * x86 platform directories (though it'd be nice if we could just make this
20 * ISA-specific rather than platform-specific).
21 */
22
23 #include <sys/fm/protocol.h>
24 #include <fm/topo_mod.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <errno.h>
29 #include <strings.h>
30 #include <unistd.h>
31 #include <sys/devfm.h>
32 #include <sys/x86_archext.h>
33
34 #include "topo_zen_impl.h"
35
36 /*
37 * This is the path to the device node that amdzen(4D) creates for us to ask it
38 * questions.
39 */
40 static const char *topo_zen_dev = "/devices/pseudo/amdzen@0:topo";
41
42 static inline boolean_t
topo_zen_df_at_least(const amdzen_topo_df_t * df,uint8_t major,uint8_t minor)43 topo_zen_df_at_least(const amdzen_topo_df_t *df, uint8_t major, uint8_t minor)
44 {
45 return (df->atd_major > major || (df->atd_major == major &&
46 df->atd_minor >= minor));
47 }
48
49 /*
50 * Helper to determine whether or not a given DF entity's type is that of a CCM
51 * or not as this has changed across the various DF versions.
52 */
53 static boolean_t
topo_zen_fabric_is_ccm(const amdzen_topo_df_t * df,const amdzen_topo_df_ent_t * ent)54 topo_zen_fabric_is_ccm(const amdzen_topo_df_t *df,
55 const amdzen_topo_df_ent_t *ent)
56 {
57 if (ent->atde_type != DF_TYPE_CCM) {
58 return (B_FALSE);
59 }
60
61 if (df->atd_rev >= DF_REV_4 && topo_zen_df_at_least(df, 4, 1)) {
62 return (ent->atde_subtype == DF_CCM_SUBTYPE_CPU_V4P1);
63 } else {
64 return (ent->atde_subtype == DF_CCM_SUBTYPE_CPU_V2);
65 }
66 }
67
68 /*
69 * Clean up all data that is associated with an attempt to enumerate the socket.
70 * The structure itself is assumed to be on the stack or handled elsewhere. It
71 * must have been initialized prior to calling this. Don't give us stack
72 * garbage.
73 */
74 static void
topo_zen_enum_cleanup_sock(topo_mod_t * mod,zen_topo_enum_sock_t * sock)75 topo_zen_enum_cleanup_sock(topo_mod_t *mod, zen_topo_enum_sock_t *sock)
76 {
77 if (sock->ztes_kstat != NULL) {
78 (void) kstat_close(sock->ztes_kstat);
79 sock->ztes_kstat = NULL;
80 }
81
82 if (sock->ztes_cpus != NULL) {
83 for (uint_t i = 0; i < sock->ztes_ncpus; i++) {
84 nvlist_free(sock->ztes_cpus[i]);
85 }
86 umem_free(sock->ztes_cpus, sizeof (nvlist_t *) *
87 sock->ztes_ncpus);
88 sock->ztes_cpus = NULL;
89 }
90
91 if (sock->ztes_fm_agent != NULL) {
92 fmd_agent_cache_info_free(sock->ztes_fm_agent,
93 &sock->ztes_cache);
94 fmd_agent_close(sock->ztes_fm_agent);
95 sock->ztes_fm_agent = NULL;
96 }
97
98 if (sock->ztes_tn_ccd != NULL) {
99 topo_mod_free(mod, sock->ztes_tn_ccd, sock->ztes_nccd *
100 sizeof (zen_topo_enum_ccd_t));
101 sock->ztes_tn_ccd = NULL;
102 }
103
104 if (sock->ztes_ccd != NULL) {
105 topo_mod_free(mod, sock->ztes_ccd, sock->ztes_nccd *
106 sizeof (amdzen_topo_ccd_t));
107 sock->ztes_ccd = NULL;
108 }
109 }
110
111 static int
topo_zen_enum_chip_gather_ccd(topo_mod_t * mod,const zen_topo_t * zen,zen_topo_enum_sock_t * sock,const amdzen_topo_df_ent_t * dfe,uint32_t ccdno,uint32_t phys_ccdno)112 topo_zen_enum_chip_gather_ccd(topo_mod_t *mod, const zen_topo_t *zen,
113 zen_topo_enum_sock_t *sock,
114 const amdzen_topo_df_ent_t *dfe, uint32_t ccdno, uint32_t phys_ccdno)
115 {
116 amdzen_topo_ccd_t *ccd;
117
118 ccd = &sock->ztes_ccd[ccdno];
119 ccd->atccd_dfno = sock->ztes_df->atd_dfno;
120 ccd->atccd_instid = dfe->atde_inst_id;
121 ccd->atccd_phys_no = phys_ccdno;
122 if (ioctl(zen->zt_fd, AMDZEN_TOPO_IOCTL_CCD, ccd) != 0) {
123 topo_mod_dprintf(mod, "failed to get CCD information "
124 "for DF/CCD 0x%x/0x%x: %s\n", sock->ztes_df->atd_dfno,
125 ccd->atccd_instid, strerror(errno));
126 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
127 }
128
129 switch (ccd->atccd_err) {
130 case AMDZEN_TOPO_CCD_E_OK:
131 sock->ztes_nccd_valid++;
132 break;
133 /*
134 * We ignore errors about CCDs being missing. This is fine
135 * because on systems without a full CCD complement this will
136 * happen and is expected. We make sure we have at least one
137 * valid CCD before continuing.
138 */
139 case AMDZEN_TOPO_CCD_E_CCD_MISSING:
140 break;
141 default:
142 topo_mod_dprintf(mod, "DF CCM fabric 0x%x, CCD 0x%x "
143 "didn't give us valid info: found error 0x%x\n",
144 dfe->atde_fabric_id, phys_ccdno, ccd->atccd_err);
145 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
146 }
147
148 return (0);
149 }
150
151
152 /*
153 * Go through all of our disparate sources and gather information that we'll
154 * need to process and perform enumeration. We need to gather the following
155 * disparate pieces of information:
156 *
157 * 1) We need to determine what's going on with all the CCDs and ask the
158 * amdzen(4D) driver for information.
159 *
160 * 2) We need to use the FM agent to ask /dev/fm to get all the CPU information
161 * for this system.
162 *
163 * 3) We use the same system to go get all the actual cache information for this
164 * system.
165 *
166 * 4) We grab some of the chip-wide information such as the socket and brand
167 * string information through kstats, with information about a valid CPU ID.
168 */
169 static int
topo_zen_enum_chip_gather(topo_mod_t * mod,const zen_topo_t * zen,const amdzen_topo_df_t * df,zen_topo_enum_sock_t * sock)170 topo_zen_enum_chip_gather(topo_mod_t *mod, const zen_topo_t *zen,
171 const amdzen_topo_df_t *df, zen_topo_enum_sock_t *sock)
172 {
173 uint32_t nccd = 0;
174
175 sock->ztes_df = df;
176 for (uint32_t i = 0; i < df->atd_df_buf_nvalid; i++) {
177 const amdzen_topo_df_ent_t *dfe = &df->atd_df_ents[i];
178 if (topo_zen_fabric_is_ccm(df, dfe)) {
179 nccd += dfe->atde_data.atded_ccm.atcd_nccds;
180 }
181 }
182
183 if (nccd == 0) {
184 topo_mod_dprintf(mod, "no CCDs found! Not much more we can "
185 "do... Something probably went wrong\n");
186 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
187 }
188
189 sock->ztes_nccd = nccd;
190 sock->ztes_ccd = topo_mod_zalloc(mod, sizeof (amdzen_topo_ccd_t) *
191 sock->ztes_nccd);
192 if (sock->ztes_ccd == NULL) {
193 topo_mod_dprintf(mod, "failed to allocate memory for "
194 "ztes_ccd[]\n");
195 return (topo_mod_seterrno(mod, EMOD_NOMEM));
196 }
197
198 sock->ztes_tn_ccd = topo_mod_zalloc(mod, sizeof (zen_topo_enum_ccd_t) *
199 sock->ztes_nccd);
200
201 for (uint32_t i = 0, ccdno = 0; i < df->atd_df_buf_nvalid; i++) {
202 const amdzen_topo_df_ent_t *dfe = &df->atd_df_ents[i];
203 const amdzen_topo_ccm_data_t *ccm;
204
205 if (!topo_zen_fabric_is_ccm(df, dfe)) {
206 continue;
207 }
208
209 ccm = &dfe->atde_data.atded_ccm;
210 for (uint32_t ccm_ccdno = 0; ccm_ccdno < ccm->atcd_nccds;
211 ccm_ccdno++) {
212 if (ccm->atcd_ccd_en[ccm_ccdno] == 0) {
213 continue;
214 }
215
216 if (topo_zen_enum_chip_gather_ccd(mod, zen, sock, dfe,
217 ccdno, ccm->atcd_ccd_ids[ccm_ccdno]) != 0) {
218 return (-1);
219 }
220
221 ccdno++;
222 }
223 }
224
225 topo_mod_dprintf(mod, "found %u CCDs\n", sock->ztes_nccd_valid);
226 if (sock->ztes_nccd_valid == 0) {
227 topo_mod_dprintf(mod, "somehow we ended up with no CCDs with "
228 "valid topo information. Something went very wrong.\n");
229 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
230 }
231
232 sock->ztes_fm_agent = fmd_agent_open(FMD_AGENT_VERSION);
233 if (sock->ztes_fm_agent == NULL) {
234 topo_mod_dprintf(mod, "failed to open FMD agent: %s\n",
235 strerror(errno));
236 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
237 }
238
239 if (fmd_agent_physcpu_info(sock->ztes_fm_agent, &sock->ztes_cpus,
240 &sock->ztes_ncpus) != 0) {
241 topo_mod_dprintf(mod, "failed to get FM agent CPU "
242 "information: %s\n",
243 strerror(fmd_agent_errno(sock->ztes_fm_agent)));
244 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
245 }
246
247 topo_mod_dprintf(mod, "got %u CPUs worth of data from the FM agent\n",
248 sock->ztes_ncpus);
249
250 if (fmd_agent_cache_info(sock->ztes_fm_agent, &sock->ztes_cache) != 0) {
251 topo_mod_dprintf(mod, "failed to get FM agent cache "
252 "information: %s\n",
253 strerror(fmd_agent_errno(sock->ztes_fm_agent)));
254 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
255 }
256
257 if (sock->ztes_cache.fmc_ncpus != sock->ztes_ncpus) {
258 topo_mod_dprintf(mod, "/dev/fm gave us %u CPUs, but %u CPUs "
259 "for cache information: cannot continue\n",
260 sock->ztes_ncpus, sock->ztes_cache.fmc_ncpus);
261 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
262 }
263
264 sock->ztes_kstat = kstat_open();
265 if (sock->ztes_kstat == NULL) {
266 topo_mod_dprintf(mod, "failed to open kstat driver: %s\n",
267 strerror(errno));
268 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
269 }
270
271 return (0);
272 }
273
274 typedef enum {
275 ZEN_TOPO_CACHE_UNKNOWN,
276 ZEN_TOPO_CACHE_CORE_L1D,
277 ZEN_TOPO_CACHE_CORE_L1I,
278 ZEN_TOPO_CACHE_CORE_L2,
279 ZEN_TOPO_CACHE_CCX_L3
280 } zen_topo_cache_type_t;
281
282 typedef struct {
283 uint32_t ztcm_level;
284 fm_cache_info_type_t ztcm_type;
285 boolean_t ztcm_core;
286 zen_topo_cache_type_t ztcm_cache;
287 } zen_topo_cache_map_t;
288
289 const zen_topo_cache_map_t zen_topo_cache_map[] = {
290 { 1, FM_CACHE_INFO_T_DATA, B_TRUE, ZEN_TOPO_CACHE_CORE_L1D },
291 { 1, FM_CACHE_INFO_T_INSTR, B_TRUE, ZEN_TOPO_CACHE_CORE_L1I },
292 { 2, FM_CACHE_INFO_T_DATA | FM_CACHE_INFO_T_INSTR |
293 FM_CACHE_INFO_T_UNIFIED, B_TRUE, ZEN_TOPO_CACHE_CORE_L2 },
294 { 3, FM_CACHE_INFO_T_DATA | FM_CACHE_INFO_T_INSTR |
295 FM_CACHE_INFO_T_UNIFIED, B_FALSE, ZEN_TOPO_CACHE_CCX_L3 }
296 };
297
298 static zen_topo_cache_type_t
zen_topo_determine_cache(topo_mod_t * mod,uint32_t level,uint32_t type,uint32_t shift)299 zen_topo_determine_cache(topo_mod_t *mod, uint32_t level, uint32_t type,
300 uint32_t shift)
301 {
302 for (size_t i = 0; i < ARRAY_SIZE(zen_topo_cache_map); i++) {
303 const zen_topo_cache_map_t *map = &zen_topo_cache_map[i];
304
305 if (map->ztcm_level == level && map->ztcm_type == type) {
306 return (map->ztcm_cache);
307 }
308 }
309
310 return (ZEN_TOPO_CACHE_UNKNOWN);
311 }
312
313 /*
314 * We have mapped a logical CPU to a position in the hierarchy. We must now walk
315 * its caches and attempt to install them up the chain. We assume that there
316 * there are four caches right now: an L1i, L1d, L2, and L3 cache.
317 *
318 * Note, AMD has mixed designs with 1 CCX and 2 CCXs. When there is only 1 CCX
319 * then we often describe the CCX and CCD as equivalent though if you look at
320 * the PPR it describes each CCD as having a single CCX. This is why the L3
321 * cache lives on the CCX right now.
322 *
323 * Historically we tried to leverage the APIC shift information that the kernel
324 * provides around the number of CPUs that shared a cache and map that to the
325 * APIC ID decomposition information that we had. Unfortunately, this heuristic
326 * was useful, but inaccurate. In particular the CPUID interface gives us a
327 * count of logical CPUs that share something. If you had less CPUs in a CCD
328 * than the APIC split would be at, then this would fail. A prime example is a
329 * 32 CPU where there are 4 cores in each of 8 CCDs. This would result in 8
330 * logical CPUs sharing the CPU; however, the APIC split was often shifting over
331 * at 4 because the CCD design was for up to 8 cores.
332 */
333 static boolean_t
topo_zen_map_caches(topo_mod_t * mod,zen_topo_enum_sock_t * sock,zen_topo_enum_ccx_t * ccx,zen_topo_enum_core_t * core,uint32_t cpuno)334 topo_zen_map_caches(topo_mod_t *mod, zen_topo_enum_sock_t *sock,
335 zen_topo_enum_ccx_t *ccx, zen_topo_enum_core_t *core, uint32_t cpuno)
336 {
337 fmd_agent_cpu_cache_t *cpu_cache = &sock->ztes_cache.fmc_cpus[cpuno];
338 if (cpu_cache->fmcc_ncaches == 0) {
339 return (B_TRUE);
340 }
341
342 /*
343 * For each cache that we discover we need to do the following:
344 *
345 * o Determine the type of cache that this is. While the upper layers
346 * guarantee us the L1 caches come before L2 and L2 before L3, we
347 * don't care.
348 * o If a cache is already there, it should have the same ID as the one
349 * that we already have.
350 */
351 for (uint_t i = 0; i < cpu_cache->fmcc_ncaches; i++) {
352 nvlist_t *nvl = cpu_cache->fmcc_caches[i];
353 nvlist_t **cachep = NULL;
354 zen_topo_cache_type_t ct;
355 uint32_t level, type, shift;
356 uint64_t id, alt_id;
357
358 if (nvlist_lookup_pairs(nvl, 0,
359 FM_CACHE_INFO_LEVEL, DATA_TYPE_UINT32, &level,
360 FM_CACHE_INFO_TYPE, DATA_TYPE_UINT32, &type,
361 FM_CACHE_INFO_ID, DATA_TYPE_UINT64, &id,
362 FM_CACHE_INFO_X86_APIC_SHIFT, DATA_TYPE_UINT32, &shift,
363 NULL) != 0) {
364 topo_mod_dprintf(mod, "missing required nvlist fields "
365 "from FM CPU %u cache %u\n", cpuno, i);
366 return (B_FALSE);
367 }
368
369 ct = zen_topo_determine_cache(mod, level, type, shift);
370 switch (ct) {
371 case ZEN_TOPO_CACHE_UNKNOWN:
372 topo_mod_dprintf(mod, "failed to map CPU %u cache %u "
373 "with id 0x%" PRIx64 " level %u, type 0x%x, APIC "
374 "shift 0x%x to a known type\n", cpuno, i, id, level,
375 type, shift);
376 return (B_FALSE);
377 case ZEN_TOPO_CACHE_CORE_L1D:
378 cachep = &core->ztcore_l1d;
379 break;
380 case ZEN_TOPO_CACHE_CORE_L1I:
381 cachep = &core->ztcore_l1i;
382 break;
383 case ZEN_TOPO_CACHE_CORE_L2:
384 cachep = &core->ztcore_l2;
385 break;
386 case ZEN_TOPO_CACHE_CCX_L3:
387 cachep = &ccx->ztccx_l3;
388 break;
389 }
390
391 if (*cachep == NULL) {
392 *cachep = nvl;
393 continue;
394 }
395
396 alt_id = fnvlist_lookup_uint64(*cachep, FM_CACHE_INFO_ID);
397 if (alt_id != id) {
398 topo_mod_dprintf(mod, "wanted to map CPU %u cache %u "
399 "with id 0x%" PRIx64 " level %u, type 0x%x, APIC "
400 "shift 0x%x to Zen cache type 0x%x, but cache with "
401 "id 0x%" PRIx64 " already present", cpuno, i,
402 id, level, type, shift, ct, alt_id);
403 return (B_FALSE);
404 }
405 }
406
407 return (B_TRUE);
408 }
409
410 static boolean_t
topo_zen_map_logcpu_to_phys(topo_mod_t * mod,zen_topo_enum_sock_t * sock,nvlist_t * cpu_nvl,uint32_t cpuno,uint32_t apicid)411 topo_zen_map_logcpu_to_phys(topo_mod_t *mod, zen_topo_enum_sock_t *sock,
412 nvlist_t *cpu_nvl, uint32_t cpuno, uint32_t apicid)
413 {
414 for (uint32_t ccdno = 0; ccdno < sock->ztes_nccd; ccdno++) {
415 amdzen_topo_ccd_t *ccd = &sock->ztes_ccd[ccdno];
416 if (ccd->atccd_err != AMDZEN_TOPO_CCD_E_OK)
417 continue;
418
419 for (uint32_t ccxno = 0; ccxno < ccd->atccd_nphys_ccx;
420 ccxno++) {
421 amdzen_topo_ccx_t *ccx;
422 if (ccd->atccd_ccx_en[ccxno] == 0)
423 continue;
424
425 ccx = &ccd->atccd_ccx[ccxno];
426 for (uint32_t coreno = 0;
427 coreno < ccx->atccx_nphys_cores; coreno++) {
428 amdzen_topo_core_t *core;
429 if (ccx->atccx_core_en[coreno] == 0)
430 continue;
431
432 core = &ccx->atccx_cores[coreno];
433 for (uint32_t thrno = 0;
434 thrno < core->atcore_nthreads; thrno++) {
435 zen_topo_enum_ccd_t *zt_ccd;
436 zen_topo_enum_ccx_t *zt_ccx;
437 zen_topo_enum_core_t *zt_core;
438
439 if (core->atcore_thr_en[thrno] == 0)
440 continue;
441
442 if (core->atcore_apicids[thrno] !=
443 apicid) {
444 continue;
445 }
446
447 /*
448 * We have a match. Make sure we haven't
449 * already used it.
450 */
451 zt_ccd = &sock->ztes_tn_ccd[ccdno];
452 zt_ccx = &zt_ccd->ztccd_ccx[ccxno];
453 zt_core = &zt_ccx->ztccx_core[coreno];
454
455 if (zt_core->ztcore_nvls[thrno] !=
456 NULL) {
457 topo_mod_dprintf(mod, "APIC ID "
458 "0x%x mapped to CCD/CCX/"
459 "Core/Thread 0x%x/0x%x/"
460 "0x%x/0x%x, but found "
461 "another nvlist already "
462 "there\n", apicid, ccdno,
463 ccxno, coreno, thrno);
464 return (B_FALSE);
465 }
466
467 zt_core->ztcore_nvls[thrno] = cpu_nvl;
468
469 /*
470 * Now that we have successfully mapped
471 * a core into the tree go install the
472 * logical CPU's cache information up
473 * the tree.
474 */
475 return (topo_zen_map_caches(mod, sock,
476 zt_ccx, zt_core, cpuno));
477 }
478 }
479 }
480 }
481
482 topo_mod_dprintf(mod, "failed to find a CPU for apic 0x%x\n",
483 apicid);
484 return (B_FALSE);
485 }
486
487 /*
488 * Using information from the given logical CPU that we know is part of our
489 * socket that we're enumerating, attempt to go through and load information
490 * about the chip itself such as the family, model, stepping, brand string, etc.
491 * This comes from both the /dev/fm information that we have in cpu_nvl and from
492 * kstats.
493 */
494 static int
topo_zen_map_common_chip_info(topo_mod_t * mod,zen_topo_enum_sock_t * sock,nvlist_t * cpu_nvl)495 topo_zen_map_common_chip_info(topo_mod_t *mod, zen_topo_enum_sock_t *sock,
496 nvlist_t *cpu_nvl)
497 {
498 char name[KSTAT_STRLEN];
499 int32_t cpu_id;
500 uint32_t sockid;
501 char *rev, *ident;
502 kstat_t *ks;
503 const kstat_named_t *knp;
504
505 if (nvlist_lookup_pairs(cpu_nvl, 0,
506 FM_PHYSCPU_INFO_CPU_ID, DATA_TYPE_INT32, &cpu_id,
507 FM_PHYSCPU_INFO_CHIP_REV, DATA_TYPE_STRING, &rev,
508 FM_PHYSCPU_INFO_SOCKET_TYPE, DATA_TYPE_UINT32, &sockid,
509 FM_PHYSCPU_INFO_FAMILY, DATA_TYPE_INT32, &sock->ztes_cpu_fam,
510 FM_PHYSCPU_INFO_MODEL, DATA_TYPE_INT32, &sock->ztes_cpu_model,
511 FM_PHYSCPU_INFO_STEPPING, DATA_TYPE_INT32, &sock->ztes_cpu_step,
512 NULL) != 0) {
513 topo_mod_dprintf(mod, "missing required nvlist fields "
514 "from FM physcpu info chip ident\n");
515 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
516 }
517
518 /*
519 * Some CPUs have PPIN disabled so we look for it separately here. The
520 * rest of the aspects are required.
521 */
522 if (nvlist_lookup_string(cpu_nvl, FM_PHYSCPU_INFO_CHIP_IDENTSTR,
523 &ident) != 0) {
524 ident = NULL;
525 }
526
527 /*
528 * If we can not fully identify a revision, the kernel will indicate so
529 * with a '?' in the name where normally a stepping would show up. See
530 * amd_revmap[] in uts/intel/os/cpuid_subr.c. In such a case, we do not
531 * want to propagate such a revision.
532 */
533 if (strchr(rev, '?') == NULL) {
534 sock->ztes_cpu_rev = rev;
535 }
536 sock->ztes_cpu_serial = ident;
537
538 if (snprintf(name, sizeof (name), "cpu_info%d", cpu_id) >=
539 sizeof (name)) {
540 topo_mod_dprintf(mod, "failed to construct kstat name: "
541 "overflow");
542 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
543 }
544
545 ks = kstat_lookup(sock->ztes_kstat, "cpu_info", cpu_id, name);
546 if (ks == NULL) {
547 topo_mod_dprintf(mod, "failed to find 'cpu_info:%d:%s': %s",
548 cpu_id, name, strerror(errno));
549 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
550 }
551
552 if (kstat_read(sock->ztes_kstat, ks, NULL) == -1) {
553 topo_mod_dprintf(mod, "failed to read kstat 'cpu_info:%d:%s': "
554 "%s", cpu_id, name, strerror(errno));
555 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
556 }
557
558 knp = kstat_data_lookup(ks, "brand");
559 if (knp == NULL) {
560 topo_mod_dprintf(mod, "failed to find 'cpu_info:%d:%s:brand\n",
561 cpu_id, name);
562 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
563
564 }
565 sock->ztes_cpu_brand = KSTAT_NAMED_STR_PTR(knp);
566
567 if (sockid == X86_SOCKET_UNKNOWN) {
568 return (0);
569 }
570
571 knp = kstat_data_lookup(ks, "socket_type");
572 if (knp == NULL) {
573 topo_mod_dprintf(mod, "failed to find 'cpu_info:%d:%s:"
574 "socket_type\n", cpu_id, name);
575 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
576 }
577 sock->ztes_cpu_sock = KSTAT_NAMED_STR_PTR(knp);
578
579 return (0);
580 }
581
582 static int
topo_zen_enum_chip_map(topo_mod_t * mod,zen_topo_enum_sock_t * sock)583 topo_zen_enum_chip_map(topo_mod_t *mod, zen_topo_enum_sock_t *sock)
584 {
585 /*
586 * We have an arrray of information from /dev/fm that describes each
587 * logical CPU. We would like to map that to a given place in physical
588 * topology, which we do via the APIC ID. We will then also determine
589 * how caches are mapped together.
590 */
591 for (uint_t i = 0; i < sock->ztes_ncpus; i++) {
592 int32_t apicid, sockid;
593 nvlist_t *cpu_nvl = sock->ztes_cpus[i];
594
595 if (nvlist_lookup_pairs(cpu_nvl, 0,
596 FM_PHYSCPU_INFO_CHIP_ID, DATA_TYPE_INT32, &sockid,
597 FM_PHYSCPU_INFO_STRAND_APICID, DATA_TYPE_INT32, &apicid,
598 NULL) != 0) {
599 topo_mod_dprintf(mod, "missing required nvlist fields "
600 "from FM physcpu info for CPU %u\n", i);
601 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
602 }
603
604 /*
605 * This logical CPU isn't for our socket, ignore it.
606 */
607 if (sockid != sock->ztes_sockid) {
608 continue;
609 }
610
611 if (!topo_zen_map_logcpu_to_phys(mod, sock, cpu_nvl, i,
612 (uint32_t)apicid)) {
613 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
614 }
615 }
616
617 /*
618 * Now that we have each logical CPU taken care of, we want to fill in
619 * information about the common CPU.
620 */
621 for (uint_t i = 0; i < sock->ztes_ncpus; i++) {
622 int32_t sockid;
623 nvlist_t *cpu_nvl = sock->ztes_cpus[i];
624
625 if (nvlist_lookup_pairs(cpu_nvl, 0,
626 FM_PHYSCPU_INFO_CHIP_ID, DATA_TYPE_INT32, &sockid,
627 NULL) != 0) {
628 topo_mod_dprintf(mod, "missing required nvlist fields "
629 "from FM physcpu info for CPU %u\n", i);
630 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
631 }
632
633 /*
634 * This logical CPU isn't for our socket, ignore it.
635 */
636 if (sockid != sock->ztes_sockid) {
637 continue;
638 }
639
640 return (topo_zen_map_common_chip_info(mod, sock, cpu_nvl));
641 }
642
643 topo_mod_dprintf(mod, "no logical CPUs match our target socket %u!\n",
644 sock->ztes_sockid);
645 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
646 }
647
648 static int
topo_zen_enum(topo_mod_t * mod,tnode_t * pnode,const char * name,topo_instance_t min,topo_instance_t max,void * modarg,void * data)649 topo_zen_enum(topo_mod_t *mod, tnode_t *pnode, const char *name,
650 topo_instance_t min, topo_instance_t max, void *modarg, void *data)
651 {
652 int ret;
653 zen_topo_t *zen = topo_mod_getspecific(mod);
654 amdzen_topo_df_t *df = NULL;
655 topo_zen_chip_t *chip;
656 zen_topo_enum_sock_t sock;
657
658 topo_mod_dprintf(mod, "asked to enum %s [%" PRIu64 ", %" PRIu64 "] on "
659 "%s%" PRIu64 "\n", name, min, max, topo_node_name(pnode),
660 topo_node_instance(pnode));
661
662 /*
663 * Currently we only support enumerating a given chip.
664 */
665 if (strcmp(name, CHIP) != 0) {
666 topo_mod_dprintf(mod, "cannot enumerate %s: unknown type\n",
667 name);
668 return (-1);
669 }
670
671 if (data == NULL) {
672 topo_mod_dprintf(mod, "cannot enumerate %s: missing required "
673 "data\n", name);
674 return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
675 }
676
677 if (min != max) {
678 topo_mod_dprintf(mod, "cannot enumerate %s: multiple instances "
679 "requested\n", name);
680 return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
681 }
682
683 chip = data;
684 for (uint32_t i = 0; i < zen->zt_base.atb_ndf; i++) {
685 if (zen->zt_dfs[i].atd_sockid == chip->tzc_sockid) {
686 df = &zen->zt_dfs[i];
687 break;
688 }
689 }
690
691 if (df == NULL) {
692 topo_mod_dprintf(mod, "no matching DF with socket %u",
693 chip->tzc_sockid);
694 return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
695 }
696
697 /*
698 * In our supported platforms there is either a single DF instance per
699 * die (DFv3+ aka Zen 2+) or we have the older style Zen 1 (aka DFv2)
700 * systems where there are multiple dies within the package. We don't
701 * support Zen 1/DFv2 based systems right now.
702 */
703 if (zen->zt_base.atb_rev == DF_REV_UNKNOWN) {
704 topo_mod_dprintf(mod, "DF base revision is unknown, cannot "
705 "proceed\n");
706 return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
707 }
708
709 if (zen->zt_base.atb_rev == DF_REV_2) {
710 topo_mod_dprintf(mod, "DFv2 multiple dies are not currently "
711 "supported\n");
712 return (topo_mod_seterrno(mod, EMOD_METHOD_NOTSUP));
713 }
714
715 /*
716 * We want to create our "chip" node at the top of this. To do that,
717 * we'd like to know things like the CPU's PPIN and other information
718 * like the socket type and related. To do this we will start by getting
719 * information about the physical CPU information from devfm. That will
720 * be combined with our knowledge of how APIC IDs map to data fabric
721 * elements.
722 */
723 bzero(&sock, sizeof (sock));
724 sock.ztes_sockid = chip->tzc_sockid;
725 if ((ret = topo_zen_enum_chip_gather(mod, zen, df, &sock)) != 0) {
726 topo_zen_enum_cleanup_sock(mod, &sock);
727 return (ret);
728 }
729
730 /*
731 * Determine the mapping of all the logical CPU entries and their data
732 * that we found to the CCD mapping.
733 */
734 if ((ret = topo_zen_enum_chip_map(mod, &sock)) != 0) {
735 return (ret);
736 }
737
738 ret = topo_zen_build_chip(mod, pnode, min, &sock);
739 topo_zen_enum_cleanup_sock(mod, &sock);
740
741 return (ret);
742 }
743
744 static const topo_modops_t topo_zen_ops = {
745 topo_zen_enum, NULL
746 };
747
748 static topo_modinfo_t topo_zen_mod = {
749 "AMD Zen Enumerator", FM_FMRI_SCHEME_HC, TOPO_MOD_ZEN_VERS,
750 &topo_zen_ops
751 };
752
753 static void
topo_zen_cleanup(topo_mod_t * mod,zen_topo_t * zen)754 topo_zen_cleanup(topo_mod_t *mod, zen_topo_t *zen)
755 {
756 if (zen->zt_dfs != NULL) {
757 for (uint32_t i = 0; i < zen->zt_base.atb_ndf; i++) {
758 size_t entsize;
759
760 if (zen->zt_dfs[i].atd_df_ents == NULL)
761 continue;
762 entsize = sizeof (amdzen_topo_df_ent_t) *
763 zen->zt_base.atb_maxdfent;
764 topo_mod_free(mod, zen->zt_dfs[i].atd_df_ents,
765 entsize);
766 }
767 topo_mod_free(mod, zen->zt_dfs, sizeof (amdzen_topo_df_t) *
768 zen->zt_base.atb_ndf);
769 }
770
771 if (zen->zt_fd >= 0) {
772 (void) close(zen->zt_fd);
773 zen->zt_fd = -1;
774 }
775 topo_mod_free(mod, zen, sizeof (zen_topo_t));
776 }
777
778 static int
topo_zen_init(topo_mod_t * mod,zen_topo_t * zen)779 topo_zen_init(topo_mod_t *mod, zen_topo_t *zen)
780 {
781 zen->zt_fd = open(topo_zen_dev, O_RDONLY);
782 if (zen->zt_fd < 0) {
783 topo_mod_dprintf(mod, "failed to open %s: %s\n", topo_zen_dev,
784 strerror(errno));
785 return (-1);
786 }
787
788 if (ioctl(zen->zt_fd, AMDZEN_TOPO_IOCTL_BASE, &zen->zt_base) != 0) {
789 topo_mod_dprintf(mod, "failed to get base Zen topology "
790 "information: %s\n", strerror(errno));
791 return (-1);
792 }
793
794 /*
795 * Get all of the basic DF information now.
796 */
797 zen->zt_dfs = topo_mod_zalloc(mod, sizeof (amdzen_topo_df_t) *
798 zen->zt_base.atb_ndf);
799 if (zen->zt_dfs == NULL) {
800 topo_mod_dprintf(mod, "failed to allocate space for %u DF "
801 "entries: %s\n", zen->zt_base.atb_ndf,
802 topo_strerror(EMOD_NOMEM));
803 return (-1);
804 }
805
806 for (uint32_t i = 0; i < zen->zt_base.atb_ndf; i++) {
807 amdzen_topo_df_t *topo_df = &zen->zt_dfs[i];
808
809 topo_df->atd_df_ents = topo_mod_zalloc(mod,
810 sizeof (amdzen_topo_df_ent_t) * zen->zt_base.atb_maxdfent);
811 if (topo_df->atd_df_ents == NULL) {
812 topo_mod_dprintf(mod, "failed to allocate space for "
813 "DF %u's DF ents: %s\n", i,
814 topo_strerror(EMOD_NOMEM));
815 return (-1);
816 }
817 topo_df->atd_df_buf_nents = zen->zt_base.atb_maxdfent;
818 topo_df->atd_dfno = i;
819
820 if (ioctl(zen->zt_fd, AMDZEN_TOPO_IOCTL_DF, topo_df) != 0) {
821 topo_mod_dprintf(mod, "failed to get information for "
822 "DF %u: %s", i, strerror(errno));
823 return (-1);
824 }
825 }
826
827 return (0);
828 }
829
830 int
_topo_init(topo_mod_t * mod,topo_version_t version)831 _topo_init(topo_mod_t *mod, topo_version_t version)
832 {
833 zen_topo_t *zen = NULL;
834
835 if (getenv("TOPOZENDEBUG") != NULL) {
836 topo_mod_setdebug(mod);
837 }
838 topo_mod_dprintf(mod, "module initializing\n");
839
840 zen = topo_mod_zalloc(mod, sizeof (zen_topo_t));
841 if (zen == NULL) {
842 topo_mod_dprintf(mod, "failed to allocate zen_topo_t: %s\n",
843 topo_strerror(EMOD_NOMEM));
844 return (-1);
845 }
846
847 if (topo_zen_init(mod, zen) != 0) {
848 topo_zen_cleanup(mod, zen);
849 return (-1);
850 }
851
852 if (topo_mod_register(mod, &topo_zen_mod, TOPO_VERSION) != 0) {
853 topo_zen_cleanup(mod, zen);
854 return (-1);
855 }
856
857 topo_mod_setspecific(mod, zen);
858 return (0);
859 }
860
861 void
_topo_fini(topo_mod_t * mod)862 _topo_fini(topo_mod_t *mod)
863 {
864 zen_topo_t *zen;
865
866 if ((zen = topo_mod_getspecific(mod)) == NULL) {
867 return;
868 }
869
870 topo_mod_setspecific(mod, NULL);
871 topo_zen_cleanup(mod, zen);
872 topo_mod_unregister(mod);
873 }
874