xref: /illumos-gate/usr/src/uts/intel/sys/amdzen/ccd.h (revision a89c0811c892ec231725fe10817ef95dda813c06)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2023 Oxide Computer Company
14  */
15 
16 #ifndef _SYS_AMDZEN_CCD_H
17 #define	_SYS_AMDZEN_CCD_H
18 
19 /*
20  * SMN registers that are CCD-specific (core complex die) but are spread across
21  * multiple functional units. This could be combined with <sys/amdzen/ccx.h>
22  * once the duplication between that and <sys/controlregs.h> is dealt with.
23  *
24  * Currently this covers two different groups:
25  *
26  * SMU::PWR	This group describes information about the CCD and, unlike the
27  *		DF CCM entries, this is only present if an actual die is
28  *		present in the package. These registers are always present
29  *		starting in Zen 2.
30  *
31  * L3::SCFCTP	The Scalable Control Fabric, Clocks, Test, and Power Gating
32  *		registers exist on a per-core basis within each CCD. The first
33  *		point that we can find that this exists started in Zen 3.
34  *
35  * The register naming and fields generally follows the conventions that the DF
36  * and UMC have laid out. The one divergence right now is that the functional
37  * blocks only exist starting in a given Zen uarch (e.g. Zen 2). Once we have
38  * divergences from that introduction point then like the MSRs and others we
39  * will introduce the generation-specific part of the name.
40  */
41 
42 #include <sys/bitext.h>
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/amdzen/smn.h>
46 
47 #ifdef __cplusplus
48 extern "C" {
49 #endif
50 
51 /*
52  * SMU::PWR registers, per-CCD.  This functional unit is present starting in Zen
53  * based platforms.  Note that there is another aperture at 0x4008_1000 that is
54  * documented to alias CCD 0.  It's not really clear what if any utility that's
55  * supposed to have, except that the name given to these aliases contains
56  * "LOCAL" which implies that perhaps rather than aliasing CCD 0 it instead is
57  * decoded by the unit on the originating CCD.  We don't use that in any case.
58  *
59  * Because Genoa supports up to 12 CCDs, they did add a second aperture that
60  * starts at 4a08_1000h and uses the same shifts. This leads to some awkwardness
61  * below. This does make it harder to get at this. We should investigate to
62  * include the uarch to determine limits at some point in the future like we
63  * have done with some of our DF registers.
64  */
65 static inline smn_reg_t
66 amdzen_smupwr_smn_reg(const uint8_t ccdno, const smn_reg_def_t def,
67     const uint16_t reginst)
68 {
69 	const uint32_t APERTURE_BASE = 0x30081000;
70 	const uint32_t APERTURE_HI_BASE = 0x4a081000;
71 	const uint32_t APERTURE_MASK = 0xfffff000;
72 	CTASSERT((APERTURE_BASE & ~APERTURE_MASK) == 0);
73 	CTASSERT((APERTURE_HI_BASE & ~APERTURE_MASK) == 0);
74 
75 	const uint32_t ccdno32 = (const uint32_t)ccdno;
76 	const uint32_t reginst32 = (const uint32_t)reginst;
77 	const uint32_t size32 = (def.srd_size == 0) ? 4 :
78 	    (const uint32_t)def.srd_size;
79 
80 	const uint32_t stride = (def.srd_stride == 0) ? size32 : def.srd_stride;
81 	const uint32_t nents = (def.srd_nents == 0) ? 1 :
82 	    (const uint32_t)def.srd_nents;
83 
84 	ASSERT(size32 == 1 || size32 == 2 || size32 == 4);
85 	ASSERT3S(def.srd_unit, ==, SMN_UNIT_SMUPWR);
86 	ASSERT3U(ccdno32, <, 12);
87 	ASSERT3U(nents, >, reginst32);
88 
89 	uint32_t aperture_base, aperture_off;
90 	if (ccdno >= 8) {
91 		aperture_base = APERTURE_HI_BASE;
92 		aperture_off = (ccdno32 - 8) << 25;
93 	} else {
94 		aperture_base = APERTURE_BASE;
95 		aperture_off = ccdno32 << 25;
96 	}
97 	ASSERT3U(aperture_off, <=, UINT32_MAX - aperture_base);
98 
99 	const uint32_t aperture = aperture_base + aperture_off;
100 	ASSERT0(aperture & ~APERTURE_MASK);
101 
102 	const uint32_t reg = def.srd_reg + reginst32 * stride;
103 	ASSERT0(reg & APERTURE_MASK);
104 
105 	return (SMN_MAKE_REG_SIZED(aperture + reg, size32));
106 }
107 
108 /*
109  * SMU::PWR::CCD_DIE_ID - does what it says.
110  */
111 /*CSTYLED*/
112 #define	D_SMUPWR_CCD_DIE_ID	(const smn_reg_def_t){	\
113 	.srd_unit = SMN_UNIT_SMUPWR,			\
114 	.srd_reg = 0x00					\
115 }
116 #define	SMUPWR_CCD_DIE_ID(c)	\
117     amdzen_smupwr_smn_reg(c, D_SMUPWR_CCD_DIE_ID, 0)
118 #define	SMUPWR_CCD_DIE_ID_GET(_r)	bitx32(_r, 3, 0)
119 
120 /*
121  * SMU::PWR::THREAD_ENABLE - also does what it says; this is a bitmap of each of
122  * the possible threads.  If the bit is set, the thread runs.  Clearing bits
123  * is not allowed.  A bit set in here corresponds to a logical thread, though
124  * the exact layout is a bit tricky in the multi-CCX case.  When there are two
125  * core complexes on the die, all of CCX0's possible threads will come first,
126  * followed by all of CCX1's.  However, while this always describes _logical_
127  * threads, the spacing is based upon the width of the total possible physical
128  * cores in the CCX.
129  *
130  * For example, consider a Zen 2 system. It has 2 core complexes with 4 cores
131  * each. Regardless of how many logical cores and threads are enabled in each
132  * complex, CCX0 logical thread 0 always starts at bit 0 and CCX1 logical thread
133  * 0 always starts at bit 8. In a system that only has 3/4 cores enabled then
134  * we'd see this register set to 0x3f3f.  In Zen 3 and non-Bergamo Zen 4, this
135  * is the same width, but there is only one core complex.  In Bergamo, this is
136  * instead 32-bits wide with CCX1 thread 0 starting at bit 16.  All of this is
137  * to say that even though these bits correspond to logical threads, the CCX
138  * resets the bit position.
139  *
140  * However, if we move to a case where SMT is disabled then the CCX starting
141  * point is still the same, but the there will not be a gap for threads within
142  * the CCX. So bit 0 will be logical CPU 0 thread 0, bit 1 logical CPU 1 thread
143  * 0, etc.
144  */
145 /*CSTYLED*/
146 #define	D_SMUPWR_THREAD_EN	(const smn_reg_def_t){	\
147 	.srd_unit = SMN_UNIT_SMUPWR,			\
148 	.srd_reg = 0x18					\
149 }
150 #define	SMUPWR_THREAD_EN(c)	\
151     amdzen_smupwr_smn_reg(c, D_SMUPWR_THREAD_EN, 0)
152 #define	SMUPWR_THREAD_EN_GET_T(_r, _t)	bitx32(_r, _t, _t)
153 #define	SMUPWR_THREAD_EN_SET_T(_r, _t)	bitset32(_r, _t, _t, 1)
154 
155 /*
156  * SMU::PWR::THREAD_CONFIGURATION - provides core and CCX counts for the die as
157  * well as whether SMT is enabled, and a bit to enable or disable SMT *after the
158  * next warm reset* (which we don't use).
159  */
160 /*CSTYLED*/
161 #define	D_SMUPWR_THREAD_CFG	(const smn_reg_def_t){	\
162 	.srd_unit = SMN_UNIT_SMUPWR,			\
163 	.srd_reg = 0x1c					\
164 }
165 #define	SMUPWR_THREAD_CFG(c)	\
166     amdzen_smupwr_smn_reg(c, D_SMUPWR_THREAD_CFG, 0)
167 #define	SMUPWR_THREAD_CFG_GET_SMT_MODE(_r)	bitx32(_r, 8, 8)
168 #define	SMUPWR_THREAD_CFG_SMT_MODE_1T		1
169 #define	SMUPWR_THREAD_CFG_SMT_MODE_SMT		0
170 #define	SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(_r)	bitx32(_r, 7, 4)
171 #define	SMUPWR_THREAD_CFG_GET_CORE_COUNT(_r)	bitx32(_r, 3, 0)
172 
173 /*
174  * SMU::PWR::SOFT_DOWNCORE - provides a bitmap of cores that may exist; setting
175  * each bit disables the corresponding core.  Presumably after a warm reset.
176  */
177 /*CSTYLED*/
178 #define	D_SMUPWR_SOFT_DOWNCORE	(const smn_reg_def_t){	\
179 	.srd_unit = SMN_UNIT_SMUPWR,			\
180 	.srd_reg = 0x20					\
181 }
182 #define	SMUPWR_SOFT_DOWNCORE(c)	\
183     amdzen_smupwr_smn_reg(c, D_SMUPWR_SOFT_DOWNCORE, 0)
184 #define	SMUPWR_SOFT_DOWNCORE_GET_DISCORE(_r)		bitx32(_r, 7, 0)
185 #define	SMUPWR_SOFT_DOWNCORE_GET_DISCORE_C(_r, _c)	bitx32(_r, _c, _c)
186 #define	SMUPWR_SOFT_DOWNCORE_SET_DISCORE(_r, _v)	bitset32(_r, 7, 0, _v)
187 #define	SMUPWR_SOFT_DOWNCORE_SET_DISCORE_C(_r, _c)	bitset32(_r, _c, _c, 1)
188 
189 /*
190  * SMU::PWR::CORE_ENABLE - nominally writable, this register contains a bitmap
191  * of cores; a bit that is set means the core whose physical ID is that bit
192  * position is enabled.  The effect of modifying this register, if any, is
193  * undocumented and unknown.
194  */
195 /*CSTYLED*/
196 #define	D_SMUPWR_CORE_EN	(const smn_reg_def_t){	\
197 	.srd_unit = SMN_UNIT_SMUPWR,			\
198 	.srd_reg = 0x24					\
199 }
200 #define	SMUPWR_CORE_EN(c)	\
201     amdzen_smupwr_smn_reg(c, D_SMUPWR_CORE_EN, 0)
202 #define	SMUPWR_CORE_EN_GET(_r)		bitx32(_r, 7, 0)
203 #define	SMUPWR_CORE_EN_GET_C(_r, _c)	bitx32(_r, _c, _c)
204 #define	SMUPWR_CORE_EN_SET(_r, _v)	bitset32(_r, 7, 0, _v)
205 #define	SMUPWR_CORE_EN_SET_C(_r, _c)	bitset32(_r, _c, _c, 1)
206 
207 /*
208  * SCFCTP registers. A copy of these exists for each core. One thing to be aware
209  * of is that not all cores are enabled and this requires like at the SMU::PWR
210  * registers above or the DF::CoreEnable. The aperture for these starts at
211  * 2000_0000h. Each core is then spaced 2_0000h apart while each CCD has a
212  * 23-bit stride and each CCX has a 22 bit stride. The number of cores per CCX
213  * still caps at 8, which is what the various .srd_nents entries should be for
214  * all registers in this space. The number of CCDs does vary per platform, but
215  * we size this for the current largest number of 12 in Genoa and two CCXs.
216  *
217  * In the future, it'd be good to have a way to constrain the values we accept
218  * to something less than the maximum across all products, but this is often
219  * used before we have fully flushed out the uarchrev part of CPUID making it
220  * challenging at the moment.
221  */
222 #define	SCFCTP_CORE_STRIDE	0x20000
223 static inline smn_reg_t
224 amdzen_scfctp_smn_reg(const uint8_t ccdno, const uint8_t ccxno,
225     const smn_reg_def_t def, const uint16_t reginst)
226 {
227 	const uint32_t APERTURE_BASE = 0x20000000;
228 	const uint32_t APERTURE_MASK = SMN_APERTURE_MASK;
229 	CTASSERT((APERTURE_BASE & ~APERTURE_MASK) == 0);
230 
231 	const uint32_t ccdno32 = (const uint32_t)ccdno;
232 	const uint32_t ccxno32 = (const uint32_t)ccxno;
233 	const uint32_t reginst32 = (const uint32_t)reginst;
234 	const uint32_t size32 = (def.srd_size == 0) ? 4 :
235 	    (const uint32_t)def.srd_size;
236 
237 	const uint32_t stride = (def.srd_stride == 0) ? 4 : def.srd_stride;
238 	const uint32_t nents = (def.srd_nents == 0) ? 1 :
239 	    (const uint32_t)def.srd_nents;
240 
241 	ASSERT(size32 == 1 || size32 == 2 || size32 == 4);
242 	ASSERT3S(def.srd_unit, ==, SMN_UNIT_SCFCTP);
243 	ASSERT3U(stride, ==, SCFCTP_CORE_STRIDE);
244 	ASSERT3U(nents, ==, 8);
245 	ASSERT3U(ccdno32, <, 12);
246 	ASSERT3U(ccxno32, <, 2);
247 	ASSERT3U(nents, >, reginst32);
248 
249 	const uint32_t aperture_off = (ccdno32 << 23) + (ccxno << 22);
250 	ASSERT3U(aperture_off, <=, UINT32_MAX - APERTURE_BASE);
251 
252 	const uint32_t aperture = APERTURE_BASE + aperture_off;
253 	ASSERT0(aperture & ~APERTURE_MASK);
254 
255 	const uint32_t reg = def.srd_reg + reginst32 * stride;
256 	ASSERT0(reg & APERTURE_MASK);
257 
258 	return (SMN_MAKE_REG_SIZED(aperture + reg, size32));
259 }
260 
261 /*
262  * L3::SCFCTP::PMREG_INITPKG0 - Nominally writable, this register contains
263  * information allowing us to discover where this core fits into the logical and
264  * physical topology of the processor.
265  */
266 /*CSTYLED*/
267 #define	D_SCFCTP_PMREG_INITPKG0	(const smn_reg_def_t){	\
268 	.srd_unit = SMN_UNIT_SCFCTP,			\
269 	.srd_reg = 0x2fd0,				\
270 	.srd_nents = 8,					\
271 	.srd_stride = SCFCTP_CORE_STRIDE		\
272 }
273 #define	SCFCTP_PMREG_INITPKG0(ccd, ccx, core)	\
274     amdzen_scfctp_smn_reg(ccd, ccx, D_SCFCTP_PMREG_INITPKG0, core)
275 #define	SCFCTP_PMREG_INITPKG0_GET_LOG_DIE(_r)	bitx32(_r, 22, 19)
276 #define	SCFCTP_PMREG_INITPKG0_GET_LOG_CCX(_r)	bitx32(_r, 18, 18)
277 #define	SCFCTP_PMREG_INITPKG0_GET_LOG_CORE(_r)	bitx32(_r, 17, 14)
278 #define	SCFCTP_PMREG_INITPKG0_GET_SOCKET(_r)	bitx32(_r, 13, 12)
279 #define	SCFCTP_PMREG_INITPKG0_GET_PHYS_DIE(_r)	bitx32(_r, 11, 8)
280 #define	SCFCTP_PMREG_INITPKG0_GET_PHYS_CCX(_r)	bitx32(_r, 7, 7)
281 #define	SCFCTP_PMREG_INITPKG0_GET_PHYS_CORE(_r)	bitx32(_r, 6, 3)
282 #define	SCFCTP_PMREG_INITPKG0_GET_SMTEN(_r)	bitx32(_r, 2, 0)
283 
284 /*
285  * L3::SCFCTP::PMREG_INITPKG7 - Similarly, this register describes this
286  * processor's overall internal core topology. The most notable addition to this
287  * register has been the addition of a bit which causes the APIC ID for the CCX
288  * to be shifted and covered by at least 4 bits. That is, if the number of bits
289  * required to cover SCFCTP_PMREG_INITPKG7_GET_N_CCXS is less than 4, it should
290  * be assumed to require 4 bits.
291  */
292 /*CSTYLED*/
293 #define	D_SCFCTP_PMREG_INITPKG7	(const smn_reg_def_t){	\
294 	.srd_unit = SMN_UNIT_SCFCTP,			\
295 	.srd_reg = 0x2fec,				\
296 	.srd_nents = 8,					\
297 	.srd_stride = SCFCTP_CORE_STRIDE		\
298 }
299 #define	SCFCTP_PMREG_INITPKG7(ccd, ccx, core)	\
300     amdzen_scfctp_smn_reg(ccd, ccx, D_SCFCTP_PMREG_INITPKG7, core)
301 #define	SCFCTP_PMREG_INITPKG7_GET_N_SOCKETS(_r)		bitx32(_r, 26, 25)
302 #define	SCFCTP_PMREG_INITPKG7_GET_N_DIES(_r)		bitx32(_r, 24, 21)
303 #define	SCFCTP_PMREG_INITPKG7_GET_N_CCXS(_r)		bitx32(_r, 20, 20)
304 #define	SCFCTP_PMREG_INITPKG7_GET_N_CORES(_r)		bitx32(_r, 19, 16)
305 #define	SCFCTP_PMREG_INITPKG7_ZEN4_GET_16TAPIC(_r)	bitx32(_r, 11, 11)
306 #define	SCFCTP_PMREG_INITPKG7_GET_CHIDXHASHEN(_r)	bitx32(_r, 10, 10)
307 #define	SCFCTP_PMREG_INITPKG7_GET_S3(_r)		bitx32(_r, 9, 9)
308 #define	SCFCTP_PMREG_INITPKG7_ZEN3_GET_S0I3(_r)		bitx32(_r, 8, 8)
309 #define	SCFCTP_PMREG_INITPKG7_GET_CORETYPEISARM(_r)	bitx32(_r, 7, 7)
310 #define	SCFCTP_PMREG_INITPKG7_GET_SOCID(_r)		bitx32(_r, 6, 3)
311 
312 #ifdef __cplusplus
313 }
314 #endif
315 
316 #endif /* _SYS_AMDZEN_CCD_H */
317