1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2023 Oxide Computer Company 14 */ 15 16 #ifndef _SYS_AMDZEN_CCD_H 17 #define _SYS_AMDZEN_CCD_H 18 19 /* 20 * SMN registers that are CCD-specific (core complex die) but are spread across 21 * multiple functional units. This could be combined with <sys/amdzen/ccx.h> 22 * once the duplication between that and <sys/controlregs.h> is dealt with. 23 * 24 * Currently this covers two different groups: 25 * 26 * SMU::PWR This group describes information about the CCD and, unlike the 27 * DF CCM entries, this is only present if an actual die is 28 * present in the package. These registers are always present 29 * starting in Zen 2. 30 * 31 * L3::SCFCTP The Scalable Control Fabric, Clocks, Test, and Power Gating 32 * registers exist on a per-core basis within each CCD. The first 33 * point that we can find that this exists started in Zen 3. 34 * 35 * The register naming and fields generally follows the conventions that the DF 36 * and UMC have laid out. The one divergence right now is that the functional 37 * blocks only exist starting in a given Zen uarch (e.g. Zen 2). Once we have 38 * divergences from that introduction point then like the MSRs and others we 39 * will introduce the generation-specific part of the name. 40 */ 41 42 #include <sys/bitext.h> 43 #include <sys/debug.h> 44 #include <sys/types.h> 45 #include <sys/amdzen/smn.h> 46 47 #ifdef __cplusplus 48 extern "C" { 49 #endif 50 51 /* 52 * SMU::PWR registers, per-CCD. This functional unit is present starting in Zen 53 * based platforms. Note that there is another aperture at 0x4008_1000 that is 54 * documented to alias CCD 0. It's not really clear what if any utility that's 55 * supposed to have, except that the name given to these aliases contains 56 * "LOCAL" which implies that perhaps rather than aliasing CCD 0 it instead is 57 * decoded by the unit on the originating CCD. We don't use that in any case. 58 * 59 * Because Genoa supports up to 12 CCDs, they did add a second aperture that 60 * starts at 4a08_1000h and uses the same shifts. This leads to some awkwardness 61 * below. This does make it harder to get at this. We should investigate to 62 * include the uarch to determine limits at some point in the future like we 63 * have done with some of our DF registers. 64 */ 65 static inline smn_reg_t 66 amdzen_smupwr_smn_reg(const uint8_t ccdno, const smn_reg_def_t def, 67 const uint16_t reginst) 68 { 69 const uint32_t APERTURE_BASE = 0x30081000; 70 const uint32_t APERTURE_HI_BASE = 0x4a081000; 71 const uint32_t APERTURE_MASK = 0xfffff000; 72 CTASSERT((APERTURE_BASE & ~APERTURE_MASK) == 0); 73 CTASSERT((APERTURE_HI_BASE & ~APERTURE_MASK) == 0); 74 75 const uint32_t ccdno32 = (const uint32_t)ccdno; 76 const uint32_t reginst32 = (const uint32_t)reginst; 77 const uint32_t size32 = (def.srd_size == 0) ? 4 : 78 (const uint32_t)def.srd_size; 79 80 const uint32_t stride = (def.srd_stride == 0) ? size32 : def.srd_stride; 81 const uint32_t nents = (def.srd_nents == 0) ? 1 : 82 (const uint32_t)def.srd_nents; 83 84 ASSERT(size32 == 1 || size32 == 2 || size32 == 4); 85 ASSERT3S(def.srd_unit, ==, SMN_UNIT_SMUPWR); 86 ASSERT3U(ccdno32, <, 12); 87 ASSERT3U(nents, >, reginst32); 88 89 uint32_t aperture_base, aperture_off; 90 if (ccdno >= 8) { 91 aperture_base = APERTURE_HI_BASE; 92 aperture_off = (ccdno32 - 8) << 25; 93 } else { 94 aperture_base = APERTURE_BASE; 95 aperture_off = ccdno32 << 25; 96 } 97 ASSERT3U(aperture_off, <=, UINT32_MAX - aperture_base); 98 99 const uint32_t aperture = aperture_base + aperture_off; 100 ASSERT0(aperture & ~APERTURE_MASK); 101 102 const uint32_t reg = def.srd_reg + reginst32 * stride; 103 ASSERT0(reg & APERTURE_MASK); 104 105 return (SMN_MAKE_REG_SIZED(aperture + reg, size32)); 106 } 107 108 /* 109 * SMU::PWR::CCD_DIE_ID - does what it says. 110 */ 111 /*CSTYLED*/ 112 #define D_SMUPWR_CCD_DIE_ID (const smn_reg_def_t){ \ 113 .srd_unit = SMN_UNIT_SMUPWR, \ 114 .srd_reg = 0x00 \ 115 } 116 #define SMUPWR_CCD_DIE_ID(c) \ 117 amdzen_smupwr_smn_reg(c, D_SMUPWR_CCD_DIE_ID, 0) 118 #define SMUPWR_CCD_DIE_ID_GET(_r) bitx32(_r, 3, 0) 119 120 /* 121 * SMU::PWR::THREAD_ENABLE - also does what it says; this is a bitmap of each of 122 * the possible threads. If the bit is set, the thread runs. Clearing bits 123 * is not allowed. A bit set in here corresponds to a logical thread, though 124 * the exact layout is a bit tricky in the multi-CCX case. When there are two 125 * core complexes on the die, all of CCX0's possible threads will come first, 126 * followed by all of CCX1's. However, while this always describes _logical_ 127 * threads, the spacing is based upon the width of the total possible physical 128 * cores in the CCX. 129 * 130 * For example, consider a Zen 2 system. It has 2 core complexes with 4 cores 131 * each. Regardless of how many logical cores and threads are enabled in each 132 * complex, CCX0 logical thread 0 always starts at bit 0 and CCX1 logical thread 133 * 0 always starts at bit 8. In a system that only has 3/4 cores enabled then 134 * we'd see this register set to 0x3f3f. In Zen 3 and non-Bergamo Zen 4, this 135 * is the same width, but there is only one core complex. In Bergamo, this is 136 * instead 32-bits wide with CCX1 thread 0 starting at bit 16. All of this is 137 * to say that even though these bits correspond to logical threads, the CCX 138 * resets the bit position. 139 * 140 * However, if we move to a case where SMT is disabled then the CCX starting 141 * point is still the same, but the there will not be a gap for threads within 142 * the CCX. So bit 0 will be logical CPU 0 thread 0, bit 1 logical CPU 1 thread 143 * 0, etc. 144 */ 145 /*CSTYLED*/ 146 #define D_SMUPWR_THREAD_EN (const smn_reg_def_t){ \ 147 .srd_unit = SMN_UNIT_SMUPWR, \ 148 .srd_reg = 0x18 \ 149 } 150 #define SMUPWR_THREAD_EN(c) \ 151 amdzen_smupwr_smn_reg(c, D_SMUPWR_THREAD_EN, 0) 152 #define SMUPWR_THREAD_EN_GET_T(_r, _t) bitx32(_r, _t, _t) 153 #define SMUPWR_THREAD_EN_SET_T(_r, _t) bitset32(_r, _t, _t, 1) 154 155 /* 156 * SMU::PWR::THREAD_CONFIGURATION - provides core and CCX counts for the die as 157 * well as whether SMT is enabled, and a bit to enable or disable SMT *after the 158 * next warm reset* (which we don't use). 159 */ 160 /*CSTYLED*/ 161 #define D_SMUPWR_THREAD_CFG (const smn_reg_def_t){ \ 162 .srd_unit = SMN_UNIT_SMUPWR, \ 163 .srd_reg = 0x1c \ 164 } 165 #define SMUPWR_THREAD_CFG(c) \ 166 amdzen_smupwr_smn_reg(c, D_SMUPWR_THREAD_CFG, 0) 167 #define SMUPWR_THREAD_CFG_GET_SMT_MODE(_r) bitx32(_r, 8, 8) 168 #define SMUPWR_THREAD_CFG_SMT_MODE_1T 1 169 #define SMUPWR_THREAD_CFG_SMT_MODE_SMT 0 170 #define SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(_r) bitx32(_r, 7, 4) 171 #define SMUPWR_THREAD_CFG_GET_CORE_COUNT(_r) bitx32(_r, 3, 0) 172 173 /* 174 * SMU::PWR::SOFT_DOWNCORE - provides a bitmap of cores that may exist; setting 175 * each bit disables the corresponding core. Presumably after a warm reset. 176 */ 177 /*CSTYLED*/ 178 #define D_SMUPWR_SOFT_DOWNCORE (const smn_reg_def_t){ \ 179 .srd_unit = SMN_UNIT_SMUPWR, \ 180 .srd_reg = 0x20 \ 181 } 182 #define SMUPWR_SOFT_DOWNCORE(c) \ 183 amdzen_smupwr_smn_reg(c, D_SMUPWR_SOFT_DOWNCORE, 0) 184 #define SMUPWR_SOFT_DOWNCORE_GET_DISCORE(_r) bitx32(_r, 7, 0) 185 #define SMUPWR_SOFT_DOWNCORE_GET_DISCORE_C(_r, _c) bitx32(_r, _c, _c) 186 #define SMUPWR_SOFT_DOWNCORE_SET_DISCORE(_r, _v) bitset32(_r, 7, 0, _v) 187 #define SMUPWR_SOFT_DOWNCORE_SET_DISCORE_C(_r, _c) bitset32(_r, _c, _c, 1) 188 189 /* 190 * SMU::PWR::CORE_ENABLE - nominally writable, this register contains a bitmap 191 * of cores; a bit that is set means the core whose physical ID is that bit 192 * position is enabled. The effect of modifying this register, if any, is 193 * undocumented and unknown. 194 */ 195 /*CSTYLED*/ 196 #define D_SMUPWR_CORE_EN (const smn_reg_def_t){ \ 197 .srd_unit = SMN_UNIT_SMUPWR, \ 198 .srd_reg = 0x24 \ 199 } 200 #define SMUPWR_CORE_EN(c) \ 201 amdzen_smupwr_smn_reg(c, D_SMUPWR_CORE_EN, 0) 202 #define SMUPWR_CORE_EN_GET(_r) bitx32(_r, 7, 0) 203 #define SMUPWR_CORE_EN_GET_C(_r, _c) bitx32(_r, _c, _c) 204 #define SMUPWR_CORE_EN_SET(_r, _v) bitset32(_r, 7, 0, _v) 205 #define SMUPWR_CORE_EN_SET_C(_r, _c) bitset32(_r, _c, _c, 1) 206 207 /* 208 * SCFCTP registers. A copy of these exists for each core. One thing to be aware 209 * of is that not all cores are enabled and this requires like at the SMU::PWR 210 * registers above or the DF::CoreEnable. The aperture for these starts at 211 * 2000_0000h. Each core is then spaced 2_0000h apart while each CCD has a 212 * 23-bit stride and each CCX has a 22 bit stride. The number of cores per CCX 213 * still caps at 8, which is what the various .srd_nents entries should be for 214 * all registers in this space. The number of CCDs does vary per platform, but 215 * we size this for the current largest number of 12 in Genoa and two CCXs. 216 * 217 * In the future, it'd be good to have a way to constrain the values we accept 218 * to something less than the maximum across all products, but this is often 219 * used before we have fully flushed out the uarchrev part of CPUID making it 220 * challenging at the moment. 221 */ 222 #define SCFCTP_CORE_STRIDE 0x20000 223 static inline smn_reg_t 224 amdzen_scfctp_smn_reg(const uint8_t ccdno, const uint8_t ccxno, 225 const smn_reg_def_t def, const uint16_t reginst) 226 { 227 const uint32_t APERTURE_BASE = 0x20000000; 228 const uint32_t APERTURE_MASK = SMN_APERTURE_MASK; 229 CTASSERT((APERTURE_BASE & ~APERTURE_MASK) == 0); 230 231 const uint32_t ccdno32 = (const uint32_t)ccdno; 232 const uint32_t ccxno32 = (const uint32_t)ccxno; 233 const uint32_t reginst32 = (const uint32_t)reginst; 234 const uint32_t size32 = (def.srd_size == 0) ? 4 : 235 (const uint32_t)def.srd_size; 236 237 const uint32_t stride = (def.srd_stride == 0) ? 4 : def.srd_stride; 238 const uint32_t nents = (def.srd_nents == 0) ? 1 : 239 (const uint32_t)def.srd_nents; 240 241 ASSERT(size32 == 1 || size32 == 2 || size32 == 4); 242 ASSERT3S(def.srd_unit, ==, SMN_UNIT_SCFCTP); 243 ASSERT3U(stride, ==, SCFCTP_CORE_STRIDE); 244 ASSERT3U(nents, ==, 8); 245 ASSERT3U(ccdno32, <, 12); 246 ASSERT3U(ccxno32, <, 2); 247 ASSERT3U(nents, >, reginst32); 248 249 const uint32_t aperture_off = (ccdno32 << 23) + (ccxno << 22); 250 ASSERT3U(aperture_off, <=, UINT32_MAX - APERTURE_BASE); 251 252 const uint32_t aperture = APERTURE_BASE + aperture_off; 253 ASSERT0(aperture & ~APERTURE_MASK); 254 255 const uint32_t reg = def.srd_reg + reginst32 * stride; 256 ASSERT0(reg & APERTURE_MASK); 257 258 return (SMN_MAKE_REG_SIZED(aperture + reg, size32)); 259 } 260 261 /* 262 * L3::SCFCTP::PMREG_INITPKG0 - Nominally writable, this register contains 263 * information allowing us to discover where this core fits into the logical and 264 * physical topology of the processor. 265 */ 266 /*CSTYLED*/ 267 #define D_SCFCTP_PMREG_INITPKG0 (const smn_reg_def_t){ \ 268 .srd_unit = SMN_UNIT_SCFCTP, \ 269 .srd_reg = 0x2fd0, \ 270 .srd_nents = 8, \ 271 .srd_stride = SCFCTP_CORE_STRIDE \ 272 } 273 #define SCFCTP_PMREG_INITPKG0(ccd, ccx, core) \ 274 amdzen_scfctp_smn_reg(ccd, ccx, D_SCFCTP_PMREG_INITPKG0, core) 275 #define SCFCTP_PMREG_INITPKG0_GET_LOG_DIE(_r) bitx32(_r, 22, 19) 276 #define SCFCTP_PMREG_INITPKG0_GET_LOG_CCX(_r) bitx32(_r, 18, 18) 277 #define SCFCTP_PMREG_INITPKG0_GET_LOG_CORE(_r) bitx32(_r, 17, 14) 278 #define SCFCTP_PMREG_INITPKG0_GET_SOCKET(_r) bitx32(_r, 13, 12) 279 #define SCFCTP_PMREG_INITPKG0_GET_PHYS_DIE(_r) bitx32(_r, 11, 8) 280 #define SCFCTP_PMREG_INITPKG0_GET_PHYS_CCX(_r) bitx32(_r, 7, 7) 281 #define SCFCTP_PMREG_INITPKG0_GET_PHYS_CORE(_r) bitx32(_r, 6, 3) 282 #define SCFCTP_PMREG_INITPKG0_GET_SMTEN(_r) bitx32(_r, 2, 0) 283 284 /* 285 * L3::SCFCTP::PMREG_INITPKG7 - Similarly, this register describes this 286 * processor's overall internal core topology. The most notable addition to this 287 * register has been the addition of a bit which causes the APIC ID for the CCX 288 * to be shifted and covered by at least 4 bits. That is, if the number of bits 289 * required to cover SCFCTP_PMREG_INITPKG7_GET_N_CCXS is less than 4, it should 290 * be assumed to require 4 bits. 291 */ 292 /*CSTYLED*/ 293 #define D_SCFCTP_PMREG_INITPKG7 (const smn_reg_def_t){ \ 294 .srd_unit = SMN_UNIT_SCFCTP, \ 295 .srd_reg = 0x2fec, \ 296 .srd_nents = 8, \ 297 .srd_stride = SCFCTP_CORE_STRIDE \ 298 } 299 #define SCFCTP_PMREG_INITPKG7(ccd, ccx, core) \ 300 amdzen_scfctp_smn_reg(ccd, ccx, D_SCFCTP_PMREG_INITPKG7, core) 301 #define SCFCTP_PMREG_INITPKG7_GET_N_SOCKETS(_r) bitx32(_r, 26, 25) 302 #define SCFCTP_PMREG_INITPKG7_GET_N_DIES(_r) bitx32(_r, 24, 21) 303 #define SCFCTP_PMREG_INITPKG7_GET_N_CCXS(_r) bitx32(_r, 20, 20) 304 #define SCFCTP_PMREG_INITPKG7_GET_N_CORES(_r) bitx32(_r, 19, 16) 305 #define SCFCTP_PMREG_INITPKG7_ZEN4_GET_16TAPIC(_r) bitx32(_r, 11, 11) 306 #define SCFCTP_PMREG_INITPKG7_GET_CHIDXHASHEN(_r) bitx32(_r, 10, 10) 307 #define SCFCTP_PMREG_INITPKG7_GET_S3(_r) bitx32(_r, 9, 9) 308 #define SCFCTP_PMREG_INITPKG7_ZEN3_GET_S0I3(_r) bitx32(_r, 8, 8) 309 #define SCFCTP_PMREG_INITPKG7_GET_CORETYPEISARM(_r) bitx32(_r, 7, 7) 310 #define SCFCTP_PMREG_INITPKG7_GET_SOCID(_r) bitx32(_r, 6, 3) 311 312 #ifdef __cplusplus 313 } 314 #endif 315 316 #endif /* _SYS_AMDZEN_CCD_H */ 317