1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Routines to identify caches on Intel CPU. 4 * 5 * Changes: 6 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 7 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 8 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. 9 */ 10 11 #include <linux/cacheinfo.h> 12 #include <linux/cpu.h> 13 #include <linux/cpuhotplug.h> 14 #include <linux/stop_machine.h> 15 16 #include <asm/amd_nb.h> 17 #include <asm/cacheinfo.h> 18 #include <asm/cpufeature.h> 19 #include <asm/cpuid.h> 20 #include <asm/mtrr.h> 21 #include <asm/smp.h> 22 #include <asm/tlbflush.h> 23 24 #include "cpu.h" 25 26 /* Shared last level cache maps */ 27 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); 28 29 /* Shared L2 cache maps */ 30 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); 31 32 static cpumask_var_t cpu_cacheinfo_mask; 33 34 /* Kernel controls MTRR and/or PAT MSRs. */ 35 unsigned int memory_caching_control __ro_after_init; 36 37 struct _cache_table { 38 u8 descriptor; 39 enum _cache_table_type type; 40 short size; 41 }; 42 43 #define MB(x) ((x) * 1024) 44 45 /* All the cache descriptor types we care about (no TLB or 46 trace cache entries) */ 47 48 static const struct _cache_table cache_table[] = 49 { 50 { 0x06, CACHE_L1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ 51 { 0x08, CACHE_L1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ 52 { 0x09, CACHE_L1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ 53 { 0x0a, CACHE_L1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ 54 { 0x0c, CACHE_L1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ 55 { 0x0d, CACHE_L1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ 56 { 0x0e, CACHE_L1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */ 57 { 0x21, CACHE_L2, 256 }, /* 8-way set assoc, 64 byte line size */ 58 { 0x22, CACHE_L3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 59 { 0x23, CACHE_L3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ 60 { 0x25, CACHE_L3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */ 61 { 0x29, CACHE_L3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */ 62 { 0x2c, CACHE_L1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ 63 { 0x30, CACHE_L1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ 64 { 0x39, CACHE_L2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 65 { 0x3a, CACHE_L2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ 66 { 0x3b, CACHE_L2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ 67 { 0x3c, CACHE_L2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 68 { 0x3d, CACHE_L2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ 69 { 0x3e, CACHE_L2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 70 { 0x3f, CACHE_L2, 256 }, /* 2-way set assoc, 64 byte line size */ 71 { 0x41, CACHE_L2, 128 }, /* 4-way set assoc, 32 byte line size */ 72 { 0x42, CACHE_L2, 256 }, /* 4-way set assoc, 32 byte line size */ 73 { 0x43, CACHE_L2, 512 }, /* 4-way set assoc, 32 byte line size */ 74 { 0x44, CACHE_L2, MB(1) }, /* 4-way set assoc, 32 byte line size */ 75 { 0x45, CACHE_L2, MB(2) }, /* 4-way set assoc, 32 byte line size */ 76 { 0x46, CACHE_L3, MB(4) }, /* 4-way set assoc, 64 byte line size */ 77 { 0x47, CACHE_L3, MB(8) }, /* 8-way set assoc, 64 byte line size */ 78 { 0x48, CACHE_L2, MB(3) }, /* 12-way set assoc, 64 byte line size */ 79 { 0x49, CACHE_L3, MB(4) }, /* 16-way set assoc, 64 byte line size */ 80 { 0x4a, CACHE_L3, MB(6) }, /* 12-way set assoc, 64 byte line size */ 81 { 0x4b, CACHE_L3, MB(8) }, /* 16-way set assoc, 64 byte line size */ 82 { 0x4c, CACHE_L3, MB(12) }, /* 12-way set assoc, 64 byte line size */ 83 { 0x4d, CACHE_L3, MB(16) }, /* 16-way set assoc, 64 byte line size */ 84 { 0x4e, CACHE_L2, MB(6) }, /* 24-way set assoc, 64 byte line size */ 85 { 0x60, CACHE_L1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 86 { 0x66, CACHE_L1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 87 { 0x67, CACHE_L1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 88 { 0x68, CACHE_L1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 89 { 0x78, CACHE_L2, MB(1) }, /* 4-way set assoc, 64 byte line size */ 90 { 0x79, CACHE_L2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 91 { 0x7a, CACHE_L2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 92 { 0x7b, CACHE_L2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 93 { 0x7c, CACHE_L2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ 94 { 0x7d, CACHE_L2, MB(2) }, /* 8-way set assoc, 64 byte line size */ 95 { 0x7f, CACHE_L2, 512 }, /* 2-way set assoc, 64 byte line size */ 96 { 0x80, CACHE_L2, 512 }, /* 8-way set assoc, 64 byte line size */ 97 { 0x82, CACHE_L2, 256 }, /* 8-way set assoc, 32 byte line size */ 98 { 0x83, CACHE_L2, 512 }, /* 8-way set assoc, 32 byte line size */ 99 { 0x84, CACHE_L2, MB(1) }, /* 8-way set assoc, 32 byte line size */ 100 { 0x85, CACHE_L2, MB(2) }, /* 8-way set assoc, 32 byte line size */ 101 { 0x86, CACHE_L2, 512 }, /* 4-way set assoc, 64 byte line size */ 102 { 0x87, CACHE_L2, MB(1) }, /* 8-way set assoc, 64 byte line size */ 103 { 0xd0, CACHE_L3, 512 }, /* 4-way set assoc, 64 byte line size */ 104 { 0xd1, CACHE_L3, MB(1) }, /* 4-way set assoc, 64 byte line size */ 105 { 0xd2, CACHE_L3, MB(2) }, /* 4-way set assoc, 64 byte line size */ 106 { 0xd6, CACHE_L3, MB(1) }, /* 8-way set assoc, 64 byte line size */ 107 { 0xd7, CACHE_L3, MB(2) }, /* 8-way set assoc, 64 byte line size */ 108 { 0xd8, CACHE_L3, MB(4) }, /* 12-way set assoc, 64 byte line size */ 109 { 0xdc, CACHE_L3, MB(2) }, /* 12-way set assoc, 64 byte line size */ 110 { 0xdd, CACHE_L3, MB(4) }, /* 12-way set assoc, 64 byte line size */ 111 { 0xde, CACHE_L3, MB(8) }, /* 12-way set assoc, 64 byte line size */ 112 { 0xe2, CACHE_L3, MB(2) }, /* 16-way set assoc, 64 byte line size */ 113 { 0xe3, CACHE_L3, MB(4) }, /* 16-way set assoc, 64 byte line size */ 114 { 0xe4, CACHE_L3, MB(8) }, /* 16-way set assoc, 64 byte line size */ 115 { 0xea, CACHE_L3, MB(12) }, /* 24-way set assoc, 64 byte line size */ 116 { 0xeb, CACHE_L3, MB(18) }, /* 24-way set assoc, 64 byte line size */ 117 { 0xec, CACHE_L3, MB(24) }, /* 24-way set assoc, 64 byte line size */ 118 }; 119 120 121 enum _cache_type { 122 CTYPE_NULL = 0, 123 CTYPE_DATA = 1, 124 CTYPE_INST = 2, 125 CTYPE_UNIFIED = 3 126 }; 127 128 union _cpuid4_leaf_eax { 129 struct { 130 enum _cache_type type:5; 131 unsigned int level:3; 132 unsigned int is_self_initializing:1; 133 unsigned int is_fully_associative:1; 134 unsigned int reserved:4; 135 unsigned int num_threads_sharing:12; 136 unsigned int num_cores_on_die:6; 137 } split; 138 u32 full; 139 }; 140 141 union _cpuid4_leaf_ebx { 142 struct { 143 unsigned int coherency_line_size:12; 144 unsigned int physical_line_partition:10; 145 unsigned int ways_of_associativity:10; 146 } split; 147 u32 full; 148 }; 149 150 union _cpuid4_leaf_ecx { 151 struct { 152 unsigned int number_of_sets:32; 153 } split; 154 u32 full; 155 }; 156 157 struct _cpuid4_info { 158 union _cpuid4_leaf_eax eax; 159 union _cpuid4_leaf_ebx ebx; 160 union _cpuid4_leaf_ecx ecx; 161 unsigned int id; 162 unsigned long size; 163 }; 164 165 /* 166 * Fallback AMD CPUID(4) emulation 167 * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d) 168 */ 169 170 union l1_cache { 171 struct { 172 unsigned line_size:8; 173 unsigned lines_per_tag:8; 174 unsigned assoc:8; 175 unsigned size_in_kb:8; 176 }; 177 unsigned val; 178 }; 179 180 union l2_cache { 181 struct { 182 unsigned line_size:8; 183 unsigned lines_per_tag:4; 184 unsigned assoc:4; 185 unsigned size_in_kb:16; 186 }; 187 unsigned val; 188 }; 189 190 union l3_cache { 191 struct { 192 unsigned line_size:8; 193 unsigned lines_per_tag:4; 194 unsigned assoc:4; 195 unsigned res:2; 196 unsigned size_encoded:14; 197 }; 198 unsigned val; 199 }; 200 201 static const unsigned short assocs[] = { 202 [1] = 1, 203 [2] = 2, 204 [4] = 4, 205 [6] = 8, 206 [8] = 16, 207 [0xa] = 32, 208 [0xb] = 48, 209 [0xc] = 64, 210 [0xd] = 96, 211 [0xe] = 128, 212 [0xf] = 0xffff /* fully associative - no way to show this currently */ 213 }; 214 215 static const unsigned char levels[] = { 1, 1, 2, 3 }; 216 static const unsigned char types[] = { 1, 2, 3, 3 }; 217 218 static const enum cache_type cache_type_map[] = { 219 [CTYPE_NULL] = CACHE_TYPE_NOCACHE, 220 [CTYPE_DATA] = CACHE_TYPE_DATA, 221 [CTYPE_INST] = CACHE_TYPE_INST, 222 [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, 223 }; 224 225 static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, 226 union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx) 227 { 228 unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb; 229 union l1_cache l1i, l1d; 230 union l2_cache l2; 231 union l3_cache l3; 232 union l1_cache *l1 = &l1d; 233 234 eax->full = 0; 235 ebx->full = 0; 236 ecx->full = 0; 237 238 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); 239 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); 240 241 switch (index) { 242 case 1: 243 l1 = &l1i; 244 fallthrough; 245 case 0: 246 if (!l1->val) 247 return; 248 assoc = assocs[l1->assoc]; 249 line_size = l1->line_size; 250 lines_per_tag = l1->lines_per_tag; 251 size_in_kb = l1->size_in_kb; 252 break; 253 case 2: 254 if (!l2.val) 255 return; 256 assoc = assocs[l2.assoc]; 257 line_size = l2.line_size; 258 lines_per_tag = l2.lines_per_tag; 259 /* cpu_data has errata corrections for K7 applied */ 260 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); 261 break; 262 case 3: 263 if (!l3.val) 264 return; 265 assoc = assocs[l3.assoc]; 266 line_size = l3.line_size; 267 lines_per_tag = l3.lines_per_tag; 268 size_in_kb = l3.size_encoded * 512; 269 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { 270 size_in_kb = size_in_kb >> 1; 271 assoc = assoc >> 1; 272 } 273 break; 274 default: 275 return; 276 } 277 278 eax->split.is_self_initializing = 1; 279 eax->split.type = types[index]; 280 eax->split.level = levels[index]; 281 eax->split.num_threads_sharing = 0; 282 eax->split.num_cores_on_die = topology_num_cores_per_package(); 283 284 if (assoc == 0xffff) 285 eax->split.is_fully_associative = 1; 286 ebx->split.coherency_line_size = line_size - 1; 287 ebx->split.ways_of_associativity = assoc - 1; 288 ebx->split.physical_line_partition = lines_per_tag - 1; 289 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / 290 (ebx->split.ways_of_associativity + 1) - 1; 291 } 292 293 static int cpuid4_info_fill_done(struct _cpuid4_info *id4, union _cpuid4_leaf_eax eax, 294 union _cpuid4_leaf_ebx ebx, union _cpuid4_leaf_ecx ecx) 295 { 296 if (eax.split.type == CTYPE_NULL) 297 return -EIO; 298 299 id4->eax = eax; 300 id4->ebx = ebx; 301 id4->ecx = ecx; 302 id4->size = (ecx.split.number_of_sets + 1) * 303 (ebx.split.coherency_line_size + 1) * 304 (ebx.split.physical_line_partition + 1) * 305 (ebx.split.ways_of_associativity + 1); 306 307 return 0; 308 } 309 310 static int amd_fill_cpuid4_info(int index, struct _cpuid4_info *id4) 311 { 312 union _cpuid4_leaf_eax eax; 313 union _cpuid4_leaf_ebx ebx; 314 union _cpuid4_leaf_ecx ecx; 315 u32 ignored; 316 317 if (boot_cpu_has(X86_FEATURE_TOPOEXT) || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) 318 cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &ignored); 319 else 320 legacy_amd_cpuid4(index, &eax, &ebx, &ecx); 321 322 return cpuid4_info_fill_done(id4, eax, ebx, ecx); 323 } 324 325 static int intel_fill_cpuid4_info(int index, struct _cpuid4_info *id4) 326 { 327 union _cpuid4_leaf_eax eax; 328 union _cpuid4_leaf_ebx ebx; 329 union _cpuid4_leaf_ecx ecx; 330 u32 ignored; 331 332 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &ignored); 333 334 return cpuid4_info_fill_done(id4, eax, ebx, ecx); 335 } 336 337 static int fill_cpuid4_info(int index, struct _cpuid4_info *id4) 338 { 339 u8 cpu_vendor = boot_cpu_data.x86_vendor; 340 341 return (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) ? 342 amd_fill_cpuid4_info(index, id4) : 343 intel_fill_cpuid4_info(index, id4); 344 } 345 346 static int find_num_cache_leaves(struct cpuinfo_x86 *c) 347 { 348 unsigned int eax, ebx, ecx, edx, op; 349 union _cpuid4_leaf_eax cache_eax; 350 int i = -1; 351 352 if (c->x86_vendor == X86_VENDOR_AMD || 353 c->x86_vendor == X86_VENDOR_HYGON) 354 op = 0x8000001d; 355 else 356 op = 4; 357 358 do { 359 ++i; 360 /* Do cpuid(op) loop to find out num_cache_leaves */ 361 cpuid_count(op, i, &eax, &ebx, &ecx, &edx); 362 cache_eax.full = eax; 363 } while (cache_eax.split.type != CTYPE_NULL); 364 return i; 365 } 366 367 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id) 368 { 369 /* 370 * We may have multiple LLCs if L3 caches exist, so check if we 371 * have an L3 cache by looking at the L3 cache CPUID leaf. 372 */ 373 if (!cpuid_edx(0x80000006)) 374 return; 375 376 if (c->x86 < 0x17) { 377 /* LLC is at the node level. */ 378 c->topo.llc_id = die_id; 379 } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { 380 /* 381 * LLC is at the core complex level. 382 * Core complex ID is ApicId[3] for these processors. 383 */ 384 c->topo.llc_id = c->topo.apicid >> 3; 385 } else { 386 /* 387 * LLC ID is calculated from the number of threads sharing the 388 * cache. 389 * */ 390 u32 eax, ebx, ecx, edx, num_sharing_cache = 0; 391 u32 llc_index = find_num_cache_leaves(c) - 1; 392 393 cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx); 394 if (eax) 395 num_sharing_cache = ((eax >> 14) & 0xfff) + 1; 396 397 if (num_sharing_cache) { 398 int bits = get_count_order(num_sharing_cache); 399 400 c->topo.llc_id = c->topo.apicid >> bits; 401 } 402 } 403 } 404 405 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c) 406 { 407 /* 408 * We may have multiple LLCs if L3 caches exist, so check if we 409 * have an L3 cache by looking at the L3 cache CPUID leaf. 410 */ 411 if (!cpuid_edx(0x80000006)) 412 return; 413 414 /* 415 * LLC is at the core complex level. 416 * Core complex ID is ApicId[3] for these processors. 417 */ 418 c->topo.llc_id = c->topo.apicid >> 3; 419 } 420 421 void init_amd_cacheinfo(struct cpuinfo_x86 *c) 422 { 423 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); 424 425 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { 426 ci->num_leaves = find_num_cache_leaves(c); 427 } else if (c->extended_cpuid_level >= 0x80000006) { 428 if (cpuid_edx(0x80000006) & 0xf000) 429 ci->num_leaves = 4; 430 else 431 ci->num_leaves = 3; 432 } 433 } 434 435 void init_hygon_cacheinfo(struct cpuinfo_x86 *c) 436 { 437 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); 438 439 ci->num_leaves = find_num_cache_leaves(c); 440 } 441 442 static const struct _cache_table *cache_table_get(u8 desc) 443 { 444 for (int i = 0; i < ARRAY_SIZE(cache_table); i++) { 445 if (cache_table[i].descriptor == desc) 446 return &cache_table[i]; 447 } 448 449 return NULL; 450 } 451 452 void init_intel_cacheinfo(struct cpuinfo_x86 *c) 453 { 454 /* Cache sizes */ 455 unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; 456 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ 457 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ 458 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; 459 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); 460 461 if (c->cpuid_level > 3) { 462 /* 463 * There should be at least one leaf. A non-zero value means 464 * that the number of leaves has been initialized. 465 */ 466 if (!ci->num_leaves) 467 ci->num_leaves = find_num_cache_leaves(c); 468 469 /* 470 * Whenever possible use cpuid(4), deterministic cache 471 * parameters cpuid leaf to find the cache details 472 */ 473 for (i = 0; i < ci->num_leaves; i++) { 474 struct _cpuid4_info id4 = {}; 475 int retval; 476 477 retval = intel_fill_cpuid4_info(i, &id4); 478 if (retval < 0) 479 continue; 480 481 switch (id4.eax.split.level) { 482 case 1: 483 if (id4.eax.split.type == CTYPE_DATA) 484 new_l1d = id4.size/1024; 485 else if (id4.eax.split.type == CTYPE_INST) 486 new_l1i = id4.size/1024; 487 break; 488 case 2: 489 new_l2 = id4.size/1024; 490 num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; 491 index_msb = get_count_order(num_threads_sharing); 492 l2_id = c->topo.apicid & ~((1 << index_msb) - 1); 493 break; 494 case 3: 495 new_l3 = id4.size/1024; 496 num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; 497 index_msb = get_count_order(num_threads_sharing); 498 l3_id = c->topo.apicid & ~((1 << index_msb) - 1); 499 break; 500 default: 501 break; 502 } 503 } 504 } 505 506 /* Don't use CPUID(2) if CPUID(4) is supported. */ 507 if (!ci->num_leaves && c->cpuid_level > 1) { 508 const struct _cache_table *entry; 509 union leaf_0x2_regs regs; 510 u8 *desc; 511 512 cpuid_get_leaf_0x2_regs(®s); 513 for_each_leaf_0x2_desc(regs, desc) { 514 entry = cache_table_get(*desc); 515 if (!entry) 516 continue; 517 518 switch (entry->type) { 519 case CACHE_L1_INST: l1i += entry->size; break; 520 case CACHE_L1_DATA: l1d += entry->size; break; 521 case CACHE_L2: l2 += entry->size; break; 522 case CACHE_L3: l3 += entry->size; break; 523 } 524 } 525 } 526 527 if (new_l1d) 528 l1d = new_l1d; 529 530 if (new_l1i) 531 l1i = new_l1i; 532 533 if (new_l2) { 534 l2 = new_l2; 535 c->topo.llc_id = l2_id; 536 c->topo.l2c_id = l2_id; 537 } 538 539 if (new_l3) { 540 l3 = new_l3; 541 c->topo.llc_id = l3_id; 542 } 543 544 /* 545 * If llc_id is not yet set, this means cpuid_level < 4 which in 546 * turns means that the only possibility is SMT (as indicated in 547 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know 548 * that SMT shares all caches, we can unconditionally set cpu_llc_id to 549 * c->topo.pkg_id. 550 */ 551 if (c->topo.llc_id == BAD_APICID) 552 c->topo.llc_id = c->topo.pkg_id; 553 554 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); 555 556 if (!l2) 557 cpu_detect_cache_sizes(c); 558 } 559 560 static int __cache_amd_cpumap_setup(unsigned int cpu, int index, 561 const struct _cpuid4_info *id4) 562 { 563 struct cpu_cacheinfo *this_cpu_ci; 564 struct cacheinfo *ci; 565 int i, sibling; 566 567 /* 568 * For L3, always use the pre-calculated cpu_llc_shared_mask 569 * to derive shared_cpu_map. 570 */ 571 if (index == 3) { 572 for_each_cpu(i, cpu_llc_shared_mask(cpu)) { 573 this_cpu_ci = get_cpu_cacheinfo(i); 574 if (!this_cpu_ci->info_list) 575 continue; 576 ci = this_cpu_ci->info_list + index; 577 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { 578 if (!cpu_online(sibling)) 579 continue; 580 cpumask_set_cpu(sibling, 581 &ci->shared_cpu_map); 582 } 583 } 584 } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { 585 unsigned int apicid, nshared, first, last; 586 587 nshared = id4->eax.split.num_threads_sharing + 1; 588 apicid = cpu_data(cpu).topo.apicid; 589 first = apicid - (apicid % nshared); 590 last = first + nshared - 1; 591 592 for_each_online_cpu(i) { 593 this_cpu_ci = get_cpu_cacheinfo(i); 594 if (!this_cpu_ci->info_list) 595 continue; 596 597 apicid = cpu_data(i).topo.apicid; 598 if ((apicid < first) || (apicid > last)) 599 continue; 600 601 ci = this_cpu_ci->info_list + index; 602 603 for_each_online_cpu(sibling) { 604 apicid = cpu_data(sibling).topo.apicid; 605 if ((apicid < first) || (apicid > last)) 606 continue; 607 cpumask_set_cpu(sibling, 608 &ci->shared_cpu_map); 609 } 610 } 611 } else 612 return 0; 613 614 return 1; 615 } 616 617 static void __cache_cpumap_setup(unsigned int cpu, int index, 618 const struct _cpuid4_info *id4) 619 { 620 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); 621 struct cacheinfo *ci, *sibling_ci; 622 unsigned long num_threads_sharing; 623 int index_msb, i; 624 struct cpuinfo_x86 *c = &cpu_data(cpu); 625 626 if (c->x86_vendor == X86_VENDOR_AMD || 627 c->x86_vendor == X86_VENDOR_HYGON) { 628 if (__cache_amd_cpumap_setup(cpu, index, id4)) 629 return; 630 } 631 632 ci = this_cpu_ci->info_list + index; 633 num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; 634 635 cpumask_set_cpu(cpu, &ci->shared_cpu_map); 636 if (num_threads_sharing == 1) 637 return; 638 639 index_msb = get_count_order(num_threads_sharing); 640 641 for_each_online_cpu(i) 642 if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) { 643 struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); 644 645 if (i == cpu || !sib_cpu_ci->info_list) 646 continue;/* skip if itself or no cacheinfo */ 647 sibling_ci = sib_cpu_ci->info_list + index; 648 cpumask_set_cpu(i, &ci->shared_cpu_map); 649 cpumask_set_cpu(cpu, &sibling_ci->shared_cpu_map); 650 } 651 } 652 653 static void ci_info_init(struct cacheinfo *ci, const struct _cpuid4_info *id4, 654 struct amd_northbridge *nb) 655 { 656 ci->id = id4->id; 657 ci->attributes = CACHE_ID; 658 ci->level = id4->eax.split.level; 659 ci->type = cache_type_map[id4->eax.split.type]; 660 ci->coherency_line_size = id4->ebx.split.coherency_line_size + 1; 661 ci->ways_of_associativity = id4->ebx.split.ways_of_associativity + 1; 662 ci->size = id4->size; 663 ci->number_of_sets = id4->ecx.split.number_of_sets + 1; 664 ci->physical_line_partition = id4->ebx.split.physical_line_partition + 1; 665 ci->priv = nb; 666 } 667 668 int init_cache_level(unsigned int cpu) 669 { 670 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); 671 672 /* There should be at least one leaf. */ 673 if (!ci->num_leaves) 674 return -ENOENT; 675 676 return 0; 677 } 678 679 /* 680 * The max shared threads number comes from CPUID.4:EAX[25-14] with input 681 * ECX as cache index. Then right shift apicid by the number's order to get 682 * cache id for this cache node. 683 */ 684 static void get_cache_id(int cpu, struct _cpuid4_info *id4) 685 { 686 struct cpuinfo_x86 *c = &cpu_data(cpu); 687 unsigned long num_threads_sharing; 688 int index_msb; 689 690 num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; 691 index_msb = get_count_order(num_threads_sharing); 692 id4->id = c->topo.apicid >> index_msb; 693 } 694 695 int populate_cache_leaves(unsigned int cpu) 696 { 697 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); 698 struct cacheinfo *ci = this_cpu_ci->info_list; 699 u8 cpu_vendor = boot_cpu_data.x86_vendor; 700 struct amd_northbridge *nb = NULL; 701 struct _cpuid4_info id4 = {}; 702 int idx, ret; 703 704 for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { 705 ret = fill_cpuid4_info(idx, &id4); 706 if (ret) 707 return ret; 708 709 get_cache_id(cpu, &id4); 710 711 if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) 712 nb = amd_init_l3_cache(idx); 713 714 ci_info_init(ci++, &id4, nb); 715 __cache_cpumap_setup(cpu, idx, &id4); 716 } 717 this_cpu_ci->cpu_map_populated = true; 718 719 return 0; 720 } 721 722 /* 723 * Disable and enable caches. Needed for changing MTRRs and the PAT MSR. 724 * 725 * Since we are disabling the cache don't allow any interrupts, 726 * they would run extremely slow and would only increase the pain. 727 * 728 * The caller must ensure that local interrupts are disabled and 729 * are reenabled after cache_enable() has been called. 730 */ 731 static unsigned long saved_cr4; 732 static DEFINE_RAW_SPINLOCK(cache_disable_lock); 733 734 void cache_disable(void) __acquires(cache_disable_lock) 735 { 736 unsigned long cr0; 737 738 /* 739 * Note that this is not ideal 740 * since the cache is only flushed/disabled for this CPU while the 741 * MTRRs are changed, but changing this requires more invasive 742 * changes to the way the kernel boots 743 */ 744 745 raw_spin_lock(&cache_disable_lock); 746 747 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ 748 cr0 = read_cr0() | X86_CR0_CD; 749 write_cr0(cr0); 750 751 /* 752 * Cache flushing is the most time-consuming step when programming 753 * the MTRRs. Fortunately, as per the Intel Software Development 754 * Manual, we can skip it if the processor supports cache self- 755 * snooping. 756 */ 757 if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) 758 wbinvd(); 759 760 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 761 if (cpu_feature_enabled(X86_FEATURE_PGE)) { 762 saved_cr4 = __read_cr4(); 763 __write_cr4(saved_cr4 & ~X86_CR4_PGE); 764 } 765 766 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ 767 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 768 flush_tlb_local(); 769 770 if (cpu_feature_enabled(X86_FEATURE_MTRR)) 771 mtrr_disable(); 772 773 /* Again, only flush caches if we have to. */ 774 if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) 775 wbinvd(); 776 } 777 778 void cache_enable(void) __releases(cache_disable_lock) 779 { 780 /* Flush TLBs (no need to flush caches - they are disabled) */ 781 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 782 flush_tlb_local(); 783 784 if (cpu_feature_enabled(X86_FEATURE_MTRR)) 785 mtrr_enable(); 786 787 /* Enable caches */ 788 write_cr0(read_cr0() & ~X86_CR0_CD); 789 790 /* Restore value of CR4 */ 791 if (cpu_feature_enabled(X86_FEATURE_PGE)) 792 __write_cr4(saved_cr4); 793 794 raw_spin_unlock(&cache_disable_lock); 795 } 796 797 static void cache_cpu_init(void) 798 { 799 unsigned long flags; 800 801 local_irq_save(flags); 802 803 if (memory_caching_control & CACHE_MTRR) { 804 cache_disable(); 805 mtrr_generic_set_state(); 806 cache_enable(); 807 } 808 809 if (memory_caching_control & CACHE_PAT) 810 pat_cpu_init(); 811 812 local_irq_restore(flags); 813 } 814 815 static bool cache_aps_delayed_init = true; 816 817 void set_cache_aps_delayed_init(bool val) 818 { 819 cache_aps_delayed_init = val; 820 } 821 822 bool get_cache_aps_delayed_init(void) 823 { 824 return cache_aps_delayed_init; 825 } 826 827 static int cache_rendezvous_handler(void *unused) 828 { 829 if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id())) 830 cache_cpu_init(); 831 832 return 0; 833 } 834 835 void __init cache_bp_init(void) 836 { 837 mtrr_bp_init(); 838 pat_bp_init(); 839 840 if (memory_caching_control) 841 cache_cpu_init(); 842 } 843 844 void cache_bp_restore(void) 845 { 846 if (memory_caching_control) 847 cache_cpu_init(); 848 } 849 850 static int cache_ap_online(unsigned int cpu) 851 { 852 cpumask_set_cpu(cpu, cpu_cacheinfo_mask); 853 854 if (!memory_caching_control || get_cache_aps_delayed_init()) 855 return 0; 856 857 /* 858 * Ideally we should hold mtrr_mutex here to avoid MTRR entries 859 * changed, but this routine will be called in CPU boot time, 860 * holding the lock breaks it. 861 * 862 * This routine is called in two cases: 863 * 864 * 1. very early time of software resume, when there absolutely 865 * isn't MTRR entry changes; 866 * 867 * 2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug 868 * lock to prevent MTRR entry changes 869 */ 870 stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL, 871 cpu_cacheinfo_mask); 872 873 return 0; 874 } 875 876 static int cache_ap_offline(unsigned int cpu) 877 { 878 cpumask_clear_cpu(cpu, cpu_cacheinfo_mask); 879 return 0; 880 } 881 882 /* 883 * Delayed cache initialization for all AP's 884 */ 885 void cache_aps_init(void) 886 { 887 if (!memory_caching_control || !get_cache_aps_delayed_init()) 888 return; 889 890 stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask); 891 set_cache_aps_delayed_init(false); 892 } 893 894 static int __init cache_ap_register(void) 895 { 896 zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL); 897 cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask); 898 899 cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING, 900 "x86/cachectrl:starting", 901 cache_ap_online, cache_ap_offline); 902 return 0; 903 } 904 early_initcall(cache_ap_register); 905