1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 #ifndef _SYS_AMDZEN_CCD_H 17 #define _SYS_AMDZEN_CCD_H 18 19 /* 20 * SMN registers that are CCD-specific (core complex die) but are spread across 21 * multiple functional units. This could be combined with <sys/amdzen/ccx.h> 22 * once the duplication between that and <sys/controlregs.h> is dealt with. 23 * 24 * Currently this covers two different groups: 25 * 26 * SMU::PWR This group describes information about the CCD and, unlike the 27 * DF CCM entries, this is only present if an actual die is 28 * present in the package. These registers are always present 29 * starting in Zen 2. 30 * 31 * L3::SCFCTP The Scalable Control Fabric, Clocks, Test, and Power Gating 32 * registers exist on a per-core basis within each CCD. The first 33 * point that we can find that this exists started in Zen 3. 34 * 35 * L3::SOC This was added starting in Zen 5 and contains several of the 36 * registers that used to exist in SMU::PWR. 37 * 38 * The register naming and fields generally follows the conventions that the DF 39 * and UMC have laid out. The one divergence right now is that the functional 40 * blocks only exist starting in a given Zen uarch (e.g. Zen 2). Once we have 41 * divergences from that introduction point then like the MSRs and others we 42 * will introduce the generation-specific part of the name. 43 */ 44 45 #include <sys/bitext.h> 46 #include <sys/debug.h> 47 #include <sys/types.h> 48 #include <sys/amdzen/smn.h> 49 50 #ifdef __cplusplus 51 extern "C" { 52 #endif 53 54 /* 55 * SMU::PWR registers, per-CCD. This functional unit is present starting in Zen 56 * based platforms. Note that there is another aperture at 0x4008_1000 that is 57 * documented to alias CCD 0. It's not really clear what if any utility that's 58 * supposed to have, except that the name given to these aliases contains 59 * "LOCAL" which implies that perhaps rather than aliasing CCD 0 it instead is 60 * decoded by the unit on the originating CCD. We don't use that in any case. 61 * 62 * Once SoCs started supporting more than 8 CCDs with Zen 4, they added a second 63 * aperture that starts at 4a08_1000h and uses the same shifts. This leads to 64 * some awkwardness below. This does make it harder to get at this. We should 65 * investigate to include the uarch to determine limits at some point in the 66 * future like we have done with some of our DF registers. 67 * 68 * Starting in Zen 5, a chunk of the registers described here are all now in 69 * the L3::SOC block. 70 */ 71 static inline smn_reg_t 72 amdzen_smupwr_smn_reg(const uint8_t ccdno, const smn_reg_def_t def, 73 const uint16_t reginst) 74 { 75 const uint32_t APERTURE_BASE = 0x30081000; 76 const uint32_t APERTURE_HI_BASE = 0x4a081000; 77 const uint32_t APERTURE_MASK = 0xfffff000; 78 CTASSERT((APERTURE_BASE & ~APERTURE_MASK) == 0); 79 CTASSERT((APERTURE_HI_BASE & ~APERTURE_MASK) == 0); 80 81 const uint32_t ccdno32 = (const uint32_t)ccdno; 82 const uint32_t reginst32 = (const uint32_t)reginst; 83 const uint32_t size32 = (def.srd_size == 0) ? 4 : 84 (const uint32_t)def.srd_size; 85 86 const uint32_t stride = (def.srd_stride == 0) ? size32 : def.srd_stride; 87 const uint32_t nents = (def.srd_nents == 0) ? 1 : 88 (const uint32_t)def.srd_nents; 89 90 ASSERT(size32 == 1 || size32 == 2 || size32 == 4); 91 ASSERT3S(def.srd_unit, ==, SMN_UNIT_SMUPWR); 92 ASSERT3U(ccdno32, <, 16); 93 ASSERT3U(nents, >, reginst32); 94 95 uint32_t aperture_base, aperture_off; 96 if (ccdno >= 8) { 97 aperture_base = APERTURE_HI_BASE; 98 aperture_off = (ccdno32 - 8) << 25; 99 } else { 100 aperture_base = APERTURE_BASE; 101 aperture_off = ccdno32 << 25; 102 } 103 ASSERT3U(aperture_off, <=, UINT32_MAX - aperture_base); 104 105 const uint32_t aperture = aperture_base + aperture_off; 106 ASSERT0(aperture & ~APERTURE_MASK); 107 108 const uint32_t reg = def.srd_reg + reginst32 * stride; 109 ASSERT0(reg & APERTURE_MASK); 110 111 return (SMN_MAKE_REG_SIZED(aperture + reg, size32)); 112 } 113 114 /* 115 * SMU::PWR::CCD_DIE_ID - does what it says. 116 */ 117 /*CSTYLED*/ 118 #define D_SMUPWR_CCD_DIE_ID (const smn_reg_def_t){ \ 119 .srd_unit = SMN_UNIT_SMUPWR, \ 120 .srd_reg = 0x00 \ 121 } 122 #define SMUPWR_CCD_DIE_ID(c) \ 123 amdzen_smupwr_smn_reg(c, D_SMUPWR_CCD_DIE_ID, 0) 124 #define SMUPWR_CCD_DIE_ID_GET(_r) bitx32(_r, 3, 0) 125 126 /* 127 * SMU::PWR::THREAD_ENABLE - also does what it says; this is a bitmap of each of 128 * the possible threads. If the bit is set, the thread runs. Clearing bits 129 * is not allowed. A bit set in here corresponds to a logical thread, though 130 * the exact layout is a bit tricky in the multi-CCX case. When there are two 131 * core complexes on the die, all of CCX0's possible threads will come first, 132 * followed by all of CCX1's. However, while this always describes _logical_ 133 * threads, the spacing is based upon the width of the total possible physical 134 * cores in the CCX. 135 * 136 * For example, consider a Zen 2 system. It has 2 core complexes with 4 cores 137 * each. Regardless of how many logical cores and threads are enabled in each 138 * complex, CCX0 logical thread 0 always starts at bit 0 and CCX1 logical thread 139 * 0 always starts at bit 8. In a system that only has 3/4 cores enabled then 140 * we'd see this register set to 0x3f3f. In Zen 3 and non-Bergamo Zen 4, this 141 * is the same width, but there is only one core complex. In Bergamo, this is 142 * instead 32-bits wide with CCX1 thread 0 starting at bit 16. All of this is 143 * to say that even though these bits correspond to logical threads, the CCX 144 * resets the bit position. 145 * 146 * However, if we move to a case where SMT is disabled then the CCX starting 147 * point is still the same, but the there will not be a gap for threads within 148 * the CCX. So bit 0 will be logical CPU 0 thread 0, bit 1 logical CPU 1 thread 149 * 0, etc. 150 */ 151 /*CSTYLED*/ 152 #define D_SMUPWR_THREAD_EN (const smn_reg_def_t){ \ 153 .srd_unit = SMN_UNIT_SMUPWR, \ 154 .srd_reg = 0x18 \ 155 } 156 #define SMUPWR_THREAD_EN(c) \ 157 amdzen_smupwr_smn_reg(c, D_SMUPWR_THREAD_EN, 0) 158 #define SMUPWR_THREAD_EN_GET_T(_r, _t) bitx32(_r, _t, _t) 159 #define SMUPWR_THREAD_EN_SET_T(_r, _t) bitset32(_r, _t, _t, 1) 160 161 /* 162 * SMU::PWR::THREAD_CONFIGURATION - provides core and CCX counts for the die as 163 * well as whether SMT is enabled, and a bit to enable or disable SMT *after the 164 * next warm reset* (which we don't use). 165 */ 166 /*CSTYLED*/ 167 #define D_SMUPWR_THREAD_CFG (const smn_reg_def_t){ \ 168 .srd_unit = SMN_UNIT_SMUPWR, \ 169 .srd_reg = 0x1c \ 170 } 171 #define SMUPWR_THREAD_CFG(c) \ 172 amdzen_smupwr_smn_reg(c, D_SMUPWR_THREAD_CFG, 0) 173 #define SMUPWR_THREAD_CFG_GET_SMT_MODE(_r) bitx32(_r, 8, 8) 174 #define SMUPWR_THREAD_CFG_SMT_MODE_1T 1 175 #define SMUPWR_THREAD_CFG_SMT_MODE_SMT 0 176 #define SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(_r) bitx32(_r, 7, 4) 177 #define SMUPWR_THREAD_CFG_GET_CORE_COUNT(_r) bitx32(_r, 3, 0) 178 179 /* 180 * SMU::PWR::SOFT_DOWNCORE - provides a bitmap of cores that may exist; setting 181 * each bit disables the corresponding core. Presumably after a warm reset. 182 */ 183 /*CSTYLED*/ 184 #define D_SMUPWR_SOFT_DOWNCORE (const smn_reg_def_t){ \ 185 .srd_unit = SMN_UNIT_SMUPWR, \ 186 .srd_reg = 0x20 \ 187 } 188 #define SMUPWR_SOFT_DOWNCORE(c) \ 189 amdzen_smupwr_smn_reg(c, D_SMUPWR_SOFT_DOWNCORE, 0) 190 #define SMUPWR_SOFT_DOWNCORE_GET_DISCORE(_r) bitx32(_r, 7, 0) 191 #define SMUPWR_SOFT_DOWNCORE_GET_DISCORE_C(_r, _c) bitx32(_r, _c, _c) 192 #define SMUPWR_SOFT_DOWNCORE_SET_DISCORE(_r, _v) bitset32(_r, 7, 0, _v) 193 #define SMUPWR_SOFT_DOWNCORE_SET_DISCORE_C(_r, _c) bitset32(_r, _c, _c, 1) 194 195 /* 196 * SMU::PWR::CORE_ENABLE - nominally writable, this register contains a bitmap 197 * of cores; a bit that is set means the core whose physical ID is that bit 198 * position is enabled. The effect of modifying this register, if any, is 199 * undocumented and unknown. 200 */ 201 /*CSTYLED*/ 202 #define D_SMUPWR_CORE_EN (const smn_reg_def_t){ \ 203 .srd_unit = SMN_UNIT_SMUPWR, \ 204 .srd_reg = 0x24 \ 205 } 206 #define SMUPWR_CORE_EN(c) \ 207 amdzen_smupwr_smn_reg(c, D_SMUPWR_CORE_EN, 0) 208 #define SMUPWR_CORE_EN_GET(_r) bitx32(_r, 7, 0) 209 #define SMUPWR_CORE_EN_GET_C(_r, _c) bitx32(_r, _c, _c) 210 #define SMUPWR_CORE_EN_SET(_r, _v) bitset32(_r, 7, 0, _v) 211 #define SMUPWR_CORE_EN_SET_C(_r, _c) bitset32(_r, _c, _c, 1) 212 213 /* 214 * L3::SOC registers, per-CCD. This functional unit is present starting in Zen 215 * 5 based platforms. This covers a majority of the things that are described 216 * above in the SMU::PWR section, except for SMU::PWR::CCD_DIE_ID. CCDs are at a 217 * 23-bit stride. 218 */ 219 static inline smn_reg_t 220 amdzen_l3soc_smn_reg(const uint8_t ccdno, const smn_reg_def_t def, 221 const uint16_t reginst) 222 { 223 const uint32_t APERTURE_BASE = 0x203c0000; 224 const uint32_t APERTURE_MASK = 0xfffc0000; 225 CTASSERT((APERTURE_BASE & ~APERTURE_MASK) == 0); 226 227 const uint32_t ccdno32 = (const uint32_t)ccdno; 228 const uint32_t reginst32 = (const uint32_t)reginst; 229 const uint32_t size32 = (def.srd_size == 0) ? 4 : 230 (const uint32_t)def.srd_size; 231 232 const uint32_t stride = (def.srd_stride == 0) ? size32 : def.srd_stride; 233 const uint32_t nents = (def.srd_nents == 0) ? 1 : 234 (const uint32_t)def.srd_nents; 235 236 ASSERT(size32 == 1 || size32 == 2 || size32 == 4); 237 ASSERT3S(def.srd_unit, ==, SMN_UNIT_L3SOC); 238 ASSERT3U(ccdno32, <, 16); 239 ASSERT3U(nents, >, reginst32); 240 241 uint32_t aperture_base, aperture_off; 242 aperture_base = APERTURE_BASE; 243 aperture_off = ccdno32 << 23; 244 ASSERT3U(aperture_off, <=, UINT32_MAX - aperture_base); 245 246 const uint32_t aperture = aperture_base + aperture_off; 247 ASSERT0(aperture & ~APERTURE_MASK); 248 249 const uint32_t reg = def.srd_reg + reginst32 * stride; 250 ASSERT0(reg & APERTURE_MASK); 251 252 return (SMN_MAKE_REG_SIZED(aperture + reg, size32)); 253 254 } 255 256 /* 257 * L3::L3SOC::CcxThreadEnable0 - the Zen 5+ variant of SMU::PWR::THREAD_ENABLE. 258 * See the description there. 259 */ 260 /*CSTYLED*/ 261 #define D_L3SOC_THREAD_EN (const smn_reg_def_t){ \ 262 .srd_unit = SMN_UNIT_L3SOC, \ 263 .srd_reg = 0x20 \ 264 } 265 #define L3SOC_THREAD_EN(c) \ 266 amdzen_l3soc_smn_reg(c, D_L3SOC_THREAD_EN, 0) 267 #define L3SOC_THREAD_EN_GET_T(_r, _t) bitx32(_r, _t, _t) 268 #define L3SOC_THREAD_EN_SET_T(_r, _t) bitset32(_r, _t, _t, 1) 269 270 /* 271 * L3::L3SOC::CcxThreadConfiguration - the Zen 5+ variant of 272 * SMU::PWR::THREAD_CONFIGURATION. Indicates information about enabled cores, 273 * complexes, and SMT. The fields have similar semantics but are at different 274 * locations. 275 */ 276 /*CSTYLED*/ 277 #define D_L3SOC_THREAD_CFG (const smn_reg_def_t){ \ 278 .srd_unit = SMN_UNIT_L3SOC, \ 279 .srd_reg = 0x30 \ 280 } 281 #define L3SOC_THREAD_CFG(c) \ 282 amdzen_l3soc_smn_reg(c, D_L3SOC_THREAD_CFG, 0) 283 #define L3SOC_THREAD_CFG_GET_SMT_MODE(_r) bitx32(_r, 10, 10) 284 #define L3SOC_THREAD_CFG_SMT_MODE_1T 1 285 #define L3SOC_THREAD_CFG_SMT_MODE_SMT 0 286 #define L3SOC_THREAD_CFG_GET_COMPLEX_COUNT(_r) bitx32(_r, 9, 6) 287 #define L3SOC_THREAD_CFG_GET_CORE_COUNT(_r) bitx32(_r, 3, 0) 288 289 /* 290 * L3::L3SOC::CcxSoftDownCore0 - see SMU::PWR::SOFT_DOWNCORE. 291 */ 292 /*CSTYLED*/ 293 #define D_L3SOC_SOFT_DOWNCORE (const smn_reg_def_t){ \ 294 .srd_unit = SMN_UNIT_L3SOC, \ 295 .srd_reg = 0x34 \ 296 } 297 #define L3SOC_SOFT_DOWNCORE(c) \ 298 amdzen_l3soc_smn_reg(c, D_L3SOC_SOFT_DOWNCORE, 0) 299 #define L3SOC_SOFT_DOWNCORE_GET_DISCORE(_r) bitx32(_r, 15, 0) 300 #define L3SOC_SOFT_DOWNCORE_GET_DISCORE_C(_r, _c) bitx32(_r, _c, _c) 301 #define L3SOC_SOFT_DOWNCORE_SET_DISCORE(_r, _v) bitset32(_r, 15, 0, _v) 302 #define L3SOC_SOFT_DOWNCORE_SET_DISCORE_C(_r, _c) bitset32(_r, _c, _c, 1) 303 304 /* 305 * L3::L3SOC::CcxCoreEnable0 -- see SMU::PWR::CORE_ENABLE. 306 */ 307 /*CSTYLED*/ 308 #define D_L3SOC_CORE_EN (const smn_reg_def_t){ \ 309 .srd_unit = SMN_UNIT_L3SOC, \ 310 .srd_reg = 0x3c \ 311 } 312 #define L3SOC_CORE_EN(c) \ 313 amdzen_l3soc_smn_reg(c, D_L3SOC_CORE_EN, 0) 314 #define L3SOC_CORE_EN_GET(_r) bitx32(_r, 15, 0) 315 #define L3SOC_CORE_EN_GET_C(_r, _c) bitx32(_r, _c, _c) 316 #define L3SOC_CORE_EN_SET(_r, _v) bitset32(_r, 15, 0, _v) 317 #define L3SOC_CORE_EN_SET_C(_r, _c) bitset32(_r, _c, _c, 1) 318 319 /* 320 * SCFCTP registers. A copy of these exists for each core. One thing to be aware 321 * of is that not all cores are enabled and this requires looking at the 322 * SMU::PWR/L3::SOC registers above or the DF::CoreEnable. The aperture for 323 * these starts at 2000_0000h. Each core is then spaced 2_0000h apart while each 324 * CCD has a 23-bit stride and each CCX has a 22 bit stride. The number of cores 325 * and CCXes varies based upon the generation. We size this based on what we 326 * anticipate the maximums to be. 327 * 328 * In the future, it'd be good to have a way to constrain the values we accept 329 * to something less than the maximum across all products, but this is often 330 * used before we have fully flushed out the uarchrev part of CPUID making it 331 * challenging at the moment. 332 */ 333 #define SCFCTP_CORE_STRIDE 0x20000 334 #define SCFCTP_MAX_ENTS 16 335 static inline smn_reg_t 336 amdzen_scfctp_smn_reg(const uint8_t ccdno, const uint8_t ccxno, 337 const smn_reg_def_t def, const uint16_t reginst) 338 { 339 const uint32_t APERTURE_BASE = 0x20000000; 340 const uint32_t APERTURE_MASK = 0xffc00000; 341 CTASSERT((APERTURE_BASE & ~APERTURE_MASK) == 0); 342 343 const uint32_t ccdno32 = (const uint32_t)ccdno; 344 const uint32_t ccxno32 = (const uint32_t)ccxno; 345 const uint32_t reginst32 = (const uint32_t)reginst; 346 const uint32_t size32 = (def.srd_size == 0) ? 4 : 347 (const uint32_t)def.srd_size; 348 349 const uint32_t stride = (def.srd_stride == 0) ? 4 : def.srd_stride; 350 const uint32_t nents = (def.srd_nents == 0) ? 1 : 351 (const uint32_t)def.srd_nents; 352 353 ASSERT(size32 == 1 || size32 == 2 || size32 == 4); 354 ASSERT3S(def.srd_unit, ==, SMN_UNIT_SCFCTP); 355 ASSERT3U(stride, ==, SCFCTP_CORE_STRIDE); 356 ASSERT3U(nents, ==, SCFCTP_MAX_ENTS); 357 ASSERT3U(ccdno32, <, 16); 358 ASSERT3U(ccxno32, <, 2); 359 ASSERT3U(nents, >, reginst32); 360 361 const uint32_t aperture_off = (ccdno32 << 23) + (ccxno << 22); 362 ASSERT3U(aperture_off, <=, UINT32_MAX - APERTURE_BASE); 363 364 const uint32_t aperture = APERTURE_BASE + aperture_off; 365 ASSERT0(aperture & ~APERTURE_MASK); 366 367 const uint32_t reg = def.srd_reg + reginst32 * stride; 368 ASSERT0(reg & APERTURE_MASK); 369 370 return (SMN_MAKE_REG_SIZED(aperture + reg, size32)); 371 } 372 373 /* 374 * L3::SCFCTP::PMREG_INITPKG0 - Nominally writable, this register contains 375 * information allowing us to discover where this core fits into the logical and 376 * physical topology of the processor. 377 */ 378 /*CSTYLED*/ 379 #define D_SCFCTP_PMREG_INITPKG0 (const smn_reg_def_t){ \ 380 .srd_unit = SMN_UNIT_SCFCTP, \ 381 .srd_reg = 0x2fd0, \ 382 .srd_nents = SCFCTP_MAX_ENTS, \ 383 .srd_stride = SCFCTP_CORE_STRIDE \ 384 } 385 #define SCFCTP_PMREG_INITPKG0(ccd, ccx, core) \ 386 amdzen_scfctp_smn_reg(ccd, ccx, D_SCFCTP_PMREG_INITPKG0, core) 387 #define SCFCTP_PMREG_INITPKG0_GET_LOG_DIE(_r) bitx32(_r, 22, 19) 388 #define SCFCTP_PMREG_INITPKG0_GET_LOG_CCX(_r) bitx32(_r, 18, 18) 389 #define SCFCTP_PMREG_INITPKG0_GET_LOG_CORE(_r) bitx32(_r, 17, 14) 390 #define SCFCTP_PMREG_INITPKG0_GET_SOCKET(_r) bitx32(_r, 13, 12) 391 #define SCFCTP_PMREG_INITPKG0_GET_PHYS_DIE(_r) bitx32(_r, 11, 8) 392 #define SCFCTP_PMREG_INITPKG0_GET_PHYS_CCX(_r) bitx32(_r, 7, 7) 393 #define SCFCTP_PMREG_INITPKG0_GET_PHYS_CORE(_r) bitx32(_r, 6, 3) 394 #define SCFCTP_PMREG_INITPKG0_GET_SMTEN(_r) bitx32(_r, 2, 0) 395 396 /* 397 * L3::SCFCTP::PMREG_INITPKG7 - Similarly, this register describes this 398 * processor's overall internal core topology. The most notable addition to this 399 * register has been the addition of a bit which causes the APIC ID for the CCX 400 * to be shifted and covered by at least 4 bits. That is, if the number of bits 401 * required to cover SCFCTP_PMREG_INITPKG7_GET_N_CCXS is less than 4, it should 402 * be assumed to require 4 bits. 403 */ 404 /*CSTYLED*/ 405 #define D_SCFCTP_PMREG_INITPKG7 (const smn_reg_def_t){ \ 406 .srd_unit = SMN_UNIT_SCFCTP, \ 407 .srd_reg = 0x2fec, \ 408 .srd_nents = SCFCTP_MAX_ENTS, \ 409 .srd_stride = SCFCTP_CORE_STRIDE \ 410 } 411 #define SCFCTP_PMREG_INITPKG7(ccd, ccx, core) \ 412 amdzen_scfctp_smn_reg(ccd, ccx, D_SCFCTP_PMREG_INITPKG7, core) 413 #define SCFCTP_PMREG_INITPKG7_GET_N_SOCKETS(_r) bitx32(_r, 26, 25) 414 #define SCFCTP_PMREG_INITPKG7_GET_N_DIES(_r) bitx32(_r, 24, 21) 415 #define SCFCTP_PMREG_INITPKG7_GET_N_CCXS(_r) bitx32(_r, 20, 20) 416 #define SCFCTP_PMREG_INITPKG7_GET_N_CORES(_r) bitx32(_r, 19, 16) 417 #define SCFCTP_PMREG_INITPKG7_ZEN4_GET_16TAPIC(_r) bitx32(_r, 11, 11) 418 #define SCFCTP_PMREG_INITPKG7_GET_CHIDXHASHEN(_r) bitx32(_r, 10, 10) 419 #define SCFCTP_PMREG_INITPKG7_GET_S3(_r) bitx32(_r, 9, 9) 420 #define SCFCTP_PMREG_INITPKG7_ZEN3_GET_S0I3(_r) bitx32(_r, 8, 8) 421 #define SCFCTP_PMREG_INITPKG7_GET_CORETYPEISARM(_r) bitx32(_r, 7, 7) 422 #define SCFCTP_PMREG_INITPKG7_GET_SOCID(_r) bitx32(_r, 6, 3) 423 424 #ifdef __cplusplus 425 } 426 #endif 427 428 #endif /* _SYS_AMDZEN_CCD_H */ 429