1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2024 Oxide Computer Company
14 */
15
16 #ifndef _SYS_AMDZEN_CCD_H
17 #define _SYS_AMDZEN_CCD_H
18
19 /*
20 * SMN registers that are CCD-specific (core complex die) but are spread across
21 * multiple functional units. This could be combined with <sys/amdzen/ccx.h>
22 * once the duplication between that and <sys/controlregs.h> is dealt with.
23 *
24 * Currently this covers two different groups:
25 *
26 * SMU::PWR This group describes information about the CCD and, unlike the
27 * DF CCM entries, this is only present if an actual die is
28 * present in the package. These registers are always present
29 * starting in Zen 2.
30 *
31 * L3::SCFCTP The Scalable Control Fabric, Clocks, Test, and Power Gating
32 * registers exist on a per-core basis within each CCD. The first
33 * point that we can find that this exists started in Zen 3.
34 *
35 * L3::SOC This was added starting in Zen 5 and contains several of the
36 * registers that used to exist in SMU::PWR.
37 *
38 * The register naming and fields generally follows the conventions that the DF
39 * and UMC have laid out. The one divergence right now is that the functional
40 * blocks only exist starting in a given Zen uarch (e.g. Zen 2). Once we have
41 * divergences from that introduction point then like the MSRs and others we
42 * will introduce the generation-specific part of the name.
43 */
44
45 #include <sys/bitext.h>
46 #include <sys/debug.h>
47 #include <sys/types.h>
48 #include <sys/amdzen/smn.h>
49
50 #ifdef __cplusplus
51 extern "C" {
52 #endif
53
54 /*
55 * SMU::PWR registers, per-CCD. This functional unit is present starting in Zen
56 * based platforms. Note that there is another aperture at 0x4008_1000 that is
57 * documented to alias CCD 0. It's not really clear what if any utility that's
58 * supposed to have, except that the name given to these aliases contains
59 * "LOCAL" which implies that perhaps rather than aliasing CCD 0 it instead is
60 * decoded by the unit on the originating CCD. We don't use that in any case.
61 *
62 * Once SoCs started supporting more than 8 CCDs with Zen 4, they added a second
63 * aperture that starts at 4a08_1000h and uses the same shifts. This leads to
64 * some awkwardness below. This does make it harder to get at this. We should
65 * investigate to include the uarch to determine limits at some point in the
66 * future like we have done with some of our DF registers.
67 *
68 * Starting in Zen 5, a chunk of the registers described here are all now in
69 * the L3::SOC block.
70 */
71 static inline smn_reg_t
amdzen_smupwr_smn_reg(const uint8_t ccdno,const smn_reg_def_t def,const uint16_t reginst)72 amdzen_smupwr_smn_reg(const uint8_t ccdno, const smn_reg_def_t def,
73 const uint16_t reginst)
74 {
75 const uint32_t APERTURE_BASE = 0x30081000;
76 const uint32_t APERTURE_HI_BASE = 0x4a081000;
77 const uint32_t APERTURE_MASK = 0xfffff000;
78 CTASSERT((APERTURE_BASE & ~APERTURE_MASK) == 0);
79 CTASSERT((APERTURE_HI_BASE & ~APERTURE_MASK) == 0);
80
81 const uint32_t ccdno32 = (const uint32_t)ccdno;
82 const uint32_t reginst32 = (const uint32_t)reginst;
83 const uint32_t size32 = (def.srd_size == 0) ? 4 :
84 (const uint32_t)def.srd_size;
85
86 const uint32_t stride = (def.srd_stride == 0) ? size32 : def.srd_stride;
87 const uint32_t nents = (def.srd_nents == 0) ? 1 :
88 (const uint32_t)def.srd_nents;
89
90 ASSERT(size32 == 1 || size32 == 2 || size32 == 4);
91 ASSERT3S(def.srd_unit, ==, SMN_UNIT_SMUPWR);
92 ASSERT3U(ccdno32, <, 16);
93 ASSERT3U(nents, >, reginst32);
94
95 uint32_t aperture_base, aperture_off;
96 if (ccdno >= 8) {
97 aperture_base = APERTURE_HI_BASE;
98 aperture_off = (ccdno32 - 8) << 25;
99 } else {
100 aperture_base = APERTURE_BASE;
101 aperture_off = ccdno32 << 25;
102 }
103 ASSERT3U(aperture_off, <=, UINT32_MAX - aperture_base);
104
105 const uint32_t aperture = aperture_base + aperture_off;
106 ASSERT0(aperture & ~APERTURE_MASK);
107
108 const uint32_t reg = def.srd_reg + reginst32 * stride;
109 ASSERT0(reg & APERTURE_MASK);
110
111 return (SMN_MAKE_REG_SIZED(aperture + reg, size32));
112 }
113
114 /*
115 * SMU::PWR::CCD_DIE_ID - does what it says.
116 */
117 /*CSTYLED*/
118 #define D_SMUPWR_CCD_DIE_ID (const smn_reg_def_t){ \
119 .srd_unit = SMN_UNIT_SMUPWR, \
120 .srd_reg = 0x00 \
121 }
122 #define SMUPWR_CCD_DIE_ID(c) \
123 amdzen_smupwr_smn_reg(c, D_SMUPWR_CCD_DIE_ID, 0)
124 #define SMUPWR_CCD_DIE_ID_GET(_r) bitx32(_r, 3, 0)
125
126 /*
127 * SMU::PWR::THREAD_ENABLE - also does what it says; this is a bitmap of each of
128 * the possible threads. If the bit is set, the thread runs. Clearing bits
129 * is not allowed. A bit set in here corresponds to a logical thread, though
130 * the exact layout is a bit tricky in the multi-CCX case. When there are two
131 * core complexes on the die, all of CCX0's possible threads will come first,
132 * followed by all of CCX1's. However, while this always describes _logical_
133 * threads, the spacing is based upon the width of the total possible physical
134 * cores in the CCX.
135 *
136 * For example, consider a Zen 2 system. It has 2 core complexes with 4 cores
137 * each. Regardless of how many logical cores and threads are enabled in each
138 * complex, CCX0 logical thread 0 always starts at bit 0 and CCX1 logical thread
139 * 0 always starts at bit 8. In a system that only has 3/4 cores enabled then
140 * we'd see this register set to 0x3f3f. In Zen 3 and non-Bergamo Zen 4, this
141 * is the same width, but there is only one core complex. In Bergamo, this is
142 * instead 32-bits wide with CCX1 thread 0 starting at bit 16. All of this is
143 * to say that even though these bits correspond to logical threads, the CCX
144 * resets the bit position.
145 *
146 * However, if we move to a case where SMT is disabled then the CCX starting
147 * point is still the same, but the there will not be a gap for threads within
148 * the CCX. So bit 0 will be logical CPU 0 thread 0, bit 1 logical CPU 1 thread
149 * 0, etc.
150 */
151 /*CSTYLED*/
152 #define D_SMUPWR_THREAD_EN (const smn_reg_def_t){ \
153 .srd_unit = SMN_UNIT_SMUPWR, \
154 .srd_reg = 0x18 \
155 }
156 #define SMUPWR_THREAD_EN(c) \
157 amdzen_smupwr_smn_reg(c, D_SMUPWR_THREAD_EN, 0)
158 #define SMUPWR_THREAD_EN_GET_T(_r, _t) bitx32(_r, _t, _t)
159 #define SMUPWR_THREAD_EN_SET_T(_r, _t) bitset32(_r, _t, _t, 1)
160
161 /*
162 * SMU::PWR::THREAD_CONFIGURATION - provides core and CCX counts for the die as
163 * well as whether SMT is enabled, and a bit to enable or disable SMT *after the
164 * next warm reset* (which we don't use).
165 */
166 /*CSTYLED*/
167 #define D_SMUPWR_THREAD_CFG (const smn_reg_def_t){ \
168 .srd_unit = SMN_UNIT_SMUPWR, \
169 .srd_reg = 0x1c \
170 }
171 #define SMUPWR_THREAD_CFG(c) \
172 amdzen_smupwr_smn_reg(c, D_SMUPWR_THREAD_CFG, 0)
173 #define SMUPWR_THREAD_CFG_GET_SMT_MODE(_r) bitx32(_r, 8, 8)
174 #define SMUPWR_THREAD_CFG_SMT_MODE_1T 1
175 #define SMUPWR_THREAD_CFG_SMT_MODE_SMT 0
176 #define SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(_r) bitx32(_r, 7, 4)
177 #define SMUPWR_THREAD_CFG_GET_CORE_COUNT(_r) bitx32(_r, 3, 0)
178
179 /*
180 * SMU::PWR::SOFT_DOWNCORE - provides a bitmap of cores that may exist; setting
181 * each bit disables the corresponding core. Presumably after a warm reset.
182 */
183 /*CSTYLED*/
184 #define D_SMUPWR_SOFT_DOWNCORE (const smn_reg_def_t){ \
185 .srd_unit = SMN_UNIT_SMUPWR, \
186 .srd_reg = 0x20 \
187 }
188 #define SMUPWR_SOFT_DOWNCORE(c) \
189 amdzen_smupwr_smn_reg(c, D_SMUPWR_SOFT_DOWNCORE, 0)
190 #define SMUPWR_SOFT_DOWNCORE_GET_DISCORE(_r) bitx32(_r, 7, 0)
191 #define SMUPWR_SOFT_DOWNCORE_GET_DISCORE_C(_r, _c) bitx32(_r, _c, _c)
192 #define SMUPWR_SOFT_DOWNCORE_SET_DISCORE(_r, _v) bitset32(_r, 7, 0, _v)
193 #define SMUPWR_SOFT_DOWNCORE_SET_DISCORE_C(_r, _c) bitset32(_r, _c, _c, 1)
194
195 /*
196 * SMU::PWR::CORE_ENABLE - nominally writable, this register contains a bitmap
197 * of cores; a bit that is set means the core whose physical ID is that bit
198 * position is enabled. The effect of modifying this register, if any, is
199 * undocumented and unknown.
200 */
201 /*CSTYLED*/
202 #define D_SMUPWR_CORE_EN (const smn_reg_def_t){ \
203 .srd_unit = SMN_UNIT_SMUPWR, \
204 .srd_reg = 0x24 \
205 }
206 #define SMUPWR_CORE_EN(c) \
207 amdzen_smupwr_smn_reg(c, D_SMUPWR_CORE_EN, 0)
208 #define SMUPWR_CORE_EN_GET(_r) bitx32(_r, 7, 0)
209 #define SMUPWR_CORE_EN_GET_C(_r, _c) bitx32(_r, _c, _c)
210 #define SMUPWR_CORE_EN_SET(_r, _v) bitset32(_r, 7, 0, _v)
211 #define SMUPWR_CORE_EN_SET_C(_r, _c) bitset32(_r, _c, _c, 1)
212
213 /*
214 * L3::SOC registers, per-CCD. This functional unit is present starting in Zen
215 * 5 based platforms. This covers a majority of the things that are described
216 * above in the SMU::PWR section, except for SMU::PWR::CCD_DIE_ID. CCDs are at a
217 * 23-bit stride.
218 */
219 static inline smn_reg_t
amdzen_l3soc_smn_reg(const uint8_t ccdno,const smn_reg_def_t def,const uint16_t reginst)220 amdzen_l3soc_smn_reg(const uint8_t ccdno, const smn_reg_def_t def,
221 const uint16_t reginst)
222 {
223 const uint32_t APERTURE_BASE = 0x203c0000;
224 const uint32_t APERTURE_MASK = 0xfffc0000;
225 CTASSERT((APERTURE_BASE & ~APERTURE_MASK) == 0);
226
227 const uint32_t ccdno32 = (const uint32_t)ccdno;
228 const uint32_t reginst32 = (const uint32_t)reginst;
229 const uint32_t size32 = (def.srd_size == 0) ? 4 :
230 (const uint32_t)def.srd_size;
231
232 const uint32_t stride = (def.srd_stride == 0) ? size32 : def.srd_stride;
233 const uint32_t nents = (def.srd_nents == 0) ? 1 :
234 (const uint32_t)def.srd_nents;
235
236 ASSERT(size32 == 1 || size32 == 2 || size32 == 4);
237 ASSERT3S(def.srd_unit, ==, SMN_UNIT_L3SOC);
238 ASSERT3U(ccdno32, <, 16);
239 ASSERT3U(nents, >, reginst32);
240
241 uint32_t aperture_base, aperture_off;
242 aperture_base = APERTURE_BASE;
243 aperture_off = ccdno32 << 23;
244 ASSERT3U(aperture_off, <=, UINT32_MAX - aperture_base);
245
246 const uint32_t aperture = aperture_base + aperture_off;
247 ASSERT0(aperture & ~APERTURE_MASK);
248
249 const uint32_t reg = def.srd_reg + reginst32 * stride;
250 ASSERT0(reg & APERTURE_MASK);
251
252 return (SMN_MAKE_REG_SIZED(aperture + reg, size32));
253
254 }
255
256 /*
257 * L3::L3SOC::CcxThreadEnable0 - the Zen 5+ variant of SMU::PWR::THREAD_ENABLE.
258 * See the description there.
259 */
260 /*CSTYLED*/
261 #define D_L3SOC_THREAD_EN (const smn_reg_def_t){ \
262 .srd_unit = SMN_UNIT_L3SOC, \
263 .srd_reg = 0x20 \
264 }
265 #define L3SOC_THREAD_EN(c) \
266 amdzen_l3soc_smn_reg(c, D_L3SOC_THREAD_EN, 0)
267 #define L3SOC_THREAD_EN_GET_T(_r, _t) bitx32(_r, _t, _t)
268 #define L3SOC_THREAD_EN_SET_T(_r, _t) bitset32(_r, _t, _t, 1)
269
270 /*
271 * L3::L3SOC::CcxThreadConfiguration - the Zen 5+ variant of
272 * SMU::PWR::THREAD_CONFIGURATION. Indicates information about enabled cores,
273 * complexes, and SMT. The fields have similar semantics but are at different
274 * locations.
275 */
276 /*CSTYLED*/
277 #define D_L3SOC_THREAD_CFG (const smn_reg_def_t){ \
278 .srd_unit = SMN_UNIT_L3SOC, \
279 .srd_reg = 0x30 \
280 }
281 #define L3SOC_THREAD_CFG(c) \
282 amdzen_l3soc_smn_reg(c, D_L3SOC_THREAD_CFG, 0)
283 #define L3SOC_THREAD_CFG_GET_SMT_MODE(_r) bitx32(_r, 10, 10)
284 #define L3SOC_THREAD_CFG_SMT_MODE_1T 1
285 #define L3SOC_THREAD_CFG_SMT_MODE_SMT 0
286 #define L3SOC_THREAD_CFG_GET_COMPLEX_COUNT(_r) bitx32(_r, 9, 6)
287 #define L3SOC_THREAD_CFG_GET_CORE_COUNT(_r) bitx32(_r, 3, 0)
288
289 /*
290 * L3::L3SOC::CcxSoftDownCore0 - see SMU::PWR::SOFT_DOWNCORE.
291 */
292 /*CSTYLED*/
293 #define D_L3SOC_SOFT_DOWNCORE (const smn_reg_def_t){ \
294 .srd_unit = SMN_UNIT_L3SOC, \
295 .srd_reg = 0x34 \
296 }
297 #define L3SOC_SOFT_DOWNCORE(c) \
298 amdzen_l3soc_smn_reg(c, D_L3SOC_SOFT_DOWNCORE, 0)
299 #define L3SOC_SOFT_DOWNCORE_GET_DISCORE(_r) bitx32(_r, 15, 0)
300 #define L3SOC_SOFT_DOWNCORE_GET_DISCORE_C(_r, _c) bitx32(_r, _c, _c)
301 #define L3SOC_SOFT_DOWNCORE_SET_DISCORE(_r, _v) bitset32(_r, 15, 0, _v)
302 #define L3SOC_SOFT_DOWNCORE_SET_DISCORE_C(_r, _c) bitset32(_r, _c, _c, 1)
303
304 /*
305 * L3::L3SOC::CcxCoreEnable0 -- see SMU::PWR::CORE_ENABLE.
306 */
307 /*CSTYLED*/
308 #define D_L3SOC_CORE_EN (const smn_reg_def_t){ \
309 .srd_unit = SMN_UNIT_L3SOC, \
310 .srd_reg = 0x3c \
311 }
312 #define L3SOC_CORE_EN(c) \
313 amdzen_l3soc_smn_reg(c, D_L3SOC_CORE_EN, 0)
314 #define L3SOC_CORE_EN_GET(_r) bitx32(_r, 15, 0)
315 #define L3SOC_CORE_EN_GET_C(_r, _c) bitx32(_r, _c, _c)
316 #define L3SOC_CORE_EN_SET(_r, _v) bitset32(_r, 15, 0, _v)
317 #define L3SOC_CORE_EN_SET_C(_r, _c) bitset32(_r, _c, _c, 1)
318
319 /*
320 * SCFCTP registers. A copy of these exists for each core. One thing to be aware
321 * of is that not all cores are enabled and this requires looking at the
322 * SMU::PWR/L3::SOC registers above or the DF::CoreEnable. The aperture for
323 * these starts at 2000_0000h. Each core is then spaced 2_0000h apart while each
324 * CCD has a 23-bit stride and each CCX has a 22 bit stride. The number of cores
325 * and CCXes varies based upon the generation. We size this based on what we
326 * anticipate the maximums to be.
327 *
328 * In the future, it'd be good to have a way to constrain the values we accept
329 * to something less than the maximum across all products, but this is often
330 * used before we have fully flushed out the uarchrev part of CPUID making it
331 * challenging at the moment.
332 */
333 #define SCFCTP_CORE_STRIDE 0x20000
334 #define SCFCTP_MAX_ENTS 16
335 static inline smn_reg_t
amdzen_scfctp_smn_reg(const uint8_t ccdno,const uint8_t ccxno,const smn_reg_def_t def,const uint16_t reginst)336 amdzen_scfctp_smn_reg(const uint8_t ccdno, const uint8_t ccxno,
337 const smn_reg_def_t def, const uint16_t reginst)
338 {
339 const uint32_t APERTURE_BASE = 0x20000000;
340 const uint32_t APERTURE_MASK = 0xffc00000;
341 CTASSERT((APERTURE_BASE & ~APERTURE_MASK) == 0);
342
343 const uint32_t ccdno32 = (const uint32_t)ccdno;
344 const uint32_t ccxno32 = (const uint32_t)ccxno;
345 const uint32_t reginst32 = (const uint32_t)reginst;
346 const uint32_t size32 = (def.srd_size == 0) ? 4 :
347 (const uint32_t)def.srd_size;
348
349 const uint32_t stride = (def.srd_stride == 0) ? 4 : def.srd_stride;
350 const uint32_t nents = (def.srd_nents == 0) ? 1 :
351 (const uint32_t)def.srd_nents;
352
353 ASSERT(size32 == 1 || size32 == 2 || size32 == 4);
354 ASSERT3S(def.srd_unit, ==, SMN_UNIT_SCFCTP);
355 ASSERT3U(stride, ==, SCFCTP_CORE_STRIDE);
356 ASSERT3U(nents, ==, SCFCTP_MAX_ENTS);
357 ASSERT3U(ccdno32, <, 16);
358 ASSERT3U(ccxno32, <, 2);
359 ASSERT3U(nents, >, reginst32);
360
361 const uint32_t aperture_off = (ccdno32 << 23) + (ccxno << 22);
362 ASSERT3U(aperture_off, <=, UINT32_MAX - APERTURE_BASE);
363
364 const uint32_t aperture = APERTURE_BASE + aperture_off;
365 ASSERT0(aperture & ~APERTURE_MASK);
366
367 const uint32_t reg = def.srd_reg + reginst32 * stride;
368 ASSERT0(reg & APERTURE_MASK);
369
370 return (SMN_MAKE_REG_SIZED(aperture + reg, size32));
371 }
372
373 /*
374 * L3::SCFCTP::PMREG_INITPKG0 - Nominally writable, this register contains
375 * information allowing us to discover where this core fits into the logical and
376 * physical topology of the processor.
377 */
378 /*CSTYLED*/
379 #define D_SCFCTP_PMREG_INITPKG0 (const smn_reg_def_t){ \
380 .srd_unit = SMN_UNIT_SCFCTP, \
381 .srd_reg = 0x2fd0, \
382 .srd_nents = SCFCTP_MAX_ENTS, \
383 .srd_stride = SCFCTP_CORE_STRIDE \
384 }
385 #define SCFCTP_PMREG_INITPKG0(ccd, ccx, core) \
386 amdzen_scfctp_smn_reg(ccd, ccx, D_SCFCTP_PMREG_INITPKG0, core)
387 #define SCFCTP_PMREG_INITPKG0_GET_LOG_DIE(_r) bitx32(_r, 22, 19)
388 #define SCFCTP_PMREG_INITPKG0_GET_LOG_CCX(_r) bitx32(_r, 18, 18)
389 #define SCFCTP_PMREG_INITPKG0_GET_LOG_CORE(_r) bitx32(_r, 17, 14)
390 #define SCFCTP_PMREG_INITPKG0_GET_SOCKET(_r) bitx32(_r, 13, 12)
391 #define SCFCTP_PMREG_INITPKG0_GET_PHYS_DIE(_r) bitx32(_r, 11, 8)
392 #define SCFCTP_PMREG_INITPKG0_GET_PHYS_CCX(_r) bitx32(_r, 7, 7)
393 #define SCFCTP_PMREG_INITPKG0_GET_PHYS_CORE(_r) bitx32(_r, 6, 3)
394 #define SCFCTP_PMREG_INITPKG0_GET_SMTEN(_r) bitx32(_r, 2, 0)
395
396 /*
397 * L3::SCFCTP::PMREG_INITPKG7 - Similarly, this register describes this
398 * processor's overall internal core topology. The most notable addition to this
399 * register has been the addition of a bit which causes the APIC ID for the CCX
400 * to be shifted and covered by at least 4 bits. That is, if the number of bits
401 * required to cover SCFCTP_PMREG_INITPKG7_GET_N_CCXS is less than 4, it should
402 * be assumed to require 4 bits.
403 */
404 /*CSTYLED*/
405 #define D_SCFCTP_PMREG_INITPKG7 (const smn_reg_def_t){ \
406 .srd_unit = SMN_UNIT_SCFCTP, \
407 .srd_reg = 0x2fec, \
408 .srd_nents = SCFCTP_MAX_ENTS, \
409 .srd_stride = SCFCTP_CORE_STRIDE \
410 }
411 #define SCFCTP_PMREG_INITPKG7(ccd, ccx, core) \
412 amdzen_scfctp_smn_reg(ccd, ccx, D_SCFCTP_PMREG_INITPKG7, core)
413 #define SCFCTP_PMREG_INITPKG7_GET_N_SOCKETS(_r) bitx32(_r, 26, 25)
414 #define SCFCTP_PMREG_INITPKG7_GET_N_DIES(_r) bitx32(_r, 24, 21)
415 #define SCFCTP_PMREG_INITPKG7_GET_N_CCXS(_r) bitx32(_r, 20, 20)
416 #define SCFCTP_PMREG_INITPKG7_GET_N_CORES(_r) bitx32(_r, 19, 16)
417 #define SCFCTP_PMREG_INITPKG7_ZEN4_GET_16TAPIC(_r) bitx32(_r, 11, 11)
418 #define SCFCTP_PMREG_INITPKG7_GET_CHIDXHASHEN(_r) bitx32(_r, 10, 10)
419 #define SCFCTP_PMREG_INITPKG7_GET_S3(_r) bitx32(_r, 9, 9)
420 #define SCFCTP_PMREG_INITPKG7_ZEN3_GET_S0I3(_r) bitx32(_r, 8, 8)
421 #define SCFCTP_PMREG_INITPKG7_GET_CORETYPEISARM(_r) bitx32(_r, 7, 7)
422 #define SCFCTP_PMREG_INITPKG7_GET_SOCID(_r) bitx32(_r, 6, 3)
423
424 #ifdef __cplusplus
425 }
426 #endif
427
428 #endif /* _SYS_AMDZEN_CCD_H */
429