1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright 2012 by Oracle Inc 4 * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> 5 * 6 * This code borrows ideas from 7 * https://lore.kernel.org/lkml/1322673664-14642-6-git-send-email-konrad.wilk@oracle.com 8 * so many thanks go to Kevin Tian <kevin.tian@intel.com> 9 * and Yu Ke <ke.yu@intel.com>. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/cpumask.h> 15 #include <linux/cpufreq.h> 16 #include <linux/freezer.h> 17 #include <linux/kernel.h> 18 #include <linux/kthread.h> 19 #include <linux/init.h> 20 #include <linux/module.h> 21 #include <linux/types.h> 22 #include <linux/syscore_ops.h> 23 #include <linux/acpi.h> 24 #include <acpi/processor.h> 25 #include <xen/xen.h> 26 #include <xen/interface/platform.h> 27 #include <asm/xen/hypercall.h> 28 29 static int no_hypercall; 30 MODULE_PARM_DESC(off, "Inhibit the hypercall."); 31 module_param_named(off, no_hypercall, int, 0400); 32 33 /* 34 * Note: Do not convert the acpi_id* below to cpumask_var_t or use cpumask_bit 35 * - as those shrink to nr_cpu_bits (which is dependent on possible_cpu), which 36 * can be less than what we want to put in. Instead use the 'nr_acpi_bits' 37 * which is dynamically computed based on the MADT or x2APIC table. 38 */ 39 static unsigned int nr_acpi_bits; 40 /* Mutex to protect the acpi_ids_done - for CPU hotplug use. */ 41 static DEFINE_MUTEX(acpi_ids_mutex); 42 /* Which ACPI ID we have processed from 'struct acpi_processor'. */ 43 static unsigned long *acpi_ids_done; 44 /* Which ACPI ID exist in the SSDT/DSDT processor definitions. */ 45 static unsigned long *acpi_id_present; 46 /* And if there is an _CST definition (or a PBLK) for the ACPI IDs */ 47 static unsigned long *acpi_id_cst_present; 48 /* Which ACPI P-State dependencies for a enumerated processor */ 49 static struct acpi_psd_package *acpi_psd; 50 51 static int push_cxx_to_hypervisor(struct acpi_processor *_pr) 52 { 53 struct xen_platform_op op = { 54 .cmd = XENPF_set_processor_pminfo, 55 .interface_version = XENPF_INTERFACE_VERSION, 56 .u.set_pminfo.id = _pr->acpi_id, 57 .u.set_pminfo.type = XEN_PM_CX, 58 }; 59 struct xen_processor_cx *dst_cx, *dst_cx_states = NULL; 60 struct acpi_processor_cx *cx; 61 unsigned int i, ok; 62 int ret = 0; 63 64 dst_cx_states = kcalloc(_pr->power.count, 65 sizeof(struct xen_processor_cx), GFP_KERNEL); 66 if (!dst_cx_states) 67 return -ENOMEM; 68 69 for (ok = 0, i = 1; i <= _pr->power.count; i++) { 70 cx = &_pr->power.states[i]; 71 if (!cx->valid) 72 continue; 73 74 dst_cx = &(dst_cx_states[ok++]); 75 76 dst_cx->reg.space_id = ACPI_ADR_SPACE_SYSTEM_IO; 77 if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { 78 dst_cx->reg.bit_width = 8; 79 dst_cx->reg.bit_offset = 0; 80 dst_cx->reg.access_size = 1; 81 } else { 82 dst_cx->reg.space_id = ACPI_ADR_SPACE_FIXED_HARDWARE; 83 if (cx->entry_method == ACPI_CSTATE_FFH) { 84 /* NATIVE_CSTATE_BEYOND_HALT */ 85 dst_cx->reg.bit_offset = 2; 86 dst_cx->reg.bit_width = 1; /* VENDOR_INTEL */ 87 } 88 dst_cx->reg.access_size = 0; 89 } 90 dst_cx->reg.address = cx->address; 91 92 dst_cx->type = cx->type; 93 dst_cx->latency = cx->latency; 94 95 dst_cx->dpcnt = 0; 96 set_xen_guest_handle(dst_cx->dp, NULL); 97 } 98 if (!ok) { 99 pr_debug("No _Cx for ACPI CPU %u\n", _pr->acpi_id); 100 kfree(dst_cx_states); 101 return -EINVAL; 102 } 103 op.u.set_pminfo.power.count = ok; 104 op.u.set_pminfo.power.flags.bm_control = _pr->flags.bm_control; 105 op.u.set_pminfo.power.flags.bm_check = _pr->flags.bm_check; 106 op.u.set_pminfo.power.flags.has_cst = _pr->flags.has_cst; 107 op.u.set_pminfo.power.flags.power_setup_done = 108 _pr->flags.power_setup_done; 109 110 set_xen_guest_handle(op.u.set_pminfo.power.states, dst_cx_states); 111 112 if (!no_hypercall) 113 ret = HYPERVISOR_platform_op(&op); 114 115 if (!ret) { 116 pr_debug("ACPI CPU%u - C-states uploaded.\n", _pr->acpi_id); 117 for (i = 1; i <= _pr->power.count; i++) { 118 cx = &_pr->power.states[i]; 119 if (!cx->valid) 120 continue; 121 pr_debug(" C%d: %s %d uS\n", 122 cx->type, cx->desc, (u32)cx->latency); 123 } 124 } else if ((ret != -EINVAL) && (ret != -ENOSYS)) 125 /* EINVAL means the ACPI ID is incorrect - meaning the ACPI 126 * table is referencing a non-existing CPU - which can happen 127 * with broken ACPI tables. */ 128 pr_err("(CX): Hypervisor error (%d) for ACPI CPU%u\n", 129 ret, _pr->acpi_id); 130 131 kfree(dst_cx_states); 132 133 return ret; 134 } 135 static struct xen_processor_px * 136 xen_copy_pss_data(struct acpi_processor *_pr, 137 struct xen_processor_performance *dst_perf) 138 { 139 struct xen_processor_px *dst_states = NULL; 140 unsigned int i; 141 142 BUILD_BUG_ON(sizeof(struct xen_processor_px) != 143 sizeof(struct acpi_processor_px)); 144 145 dst_states = kcalloc(_pr->performance->state_count, 146 sizeof(struct xen_processor_px), GFP_KERNEL); 147 if (!dst_states) 148 return ERR_PTR(-ENOMEM); 149 150 dst_perf->state_count = _pr->performance->state_count; 151 for (i = 0; i < _pr->performance->state_count; i++) { 152 /* Fortunatly for us, they are both the same size */ 153 memcpy(&(dst_states[i]), &(_pr->performance->states[i]), 154 sizeof(struct acpi_processor_px)); 155 } 156 return dst_states; 157 } 158 static int xen_copy_psd_data(struct acpi_processor *_pr, 159 struct xen_processor_performance *dst) 160 { 161 struct acpi_psd_package *pdomain; 162 163 BUILD_BUG_ON(sizeof(struct xen_psd_package) != 164 sizeof(struct acpi_psd_package)); 165 166 /* This information is enumerated only if acpi_processor_preregister_performance 167 * has been called. 168 */ 169 dst->shared_type = _pr->performance->shared_type; 170 171 pdomain = &(_pr->performance->domain_info); 172 173 /* 'acpi_processor_preregister_performance' does not parse if the 174 * num_processors <= 1, but Xen still requires it. Do it manually here. 175 */ 176 if (pdomain->num_processors <= 1) { 177 if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL) 178 dst->shared_type = CPUFREQ_SHARED_TYPE_ALL; 179 else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL) 180 dst->shared_type = CPUFREQ_SHARED_TYPE_HW; 181 else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) 182 dst->shared_type = CPUFREQ_SHARED_TYPE_ANY; 183 184 } 185 memcpy(&(dst->domain_info), pdomain, sizeof(struct acpi_psd_package)); 186 return 0; 187 } 188 static int xen_copy_pct_data(struct acpi_pct_register *pct, 189 struct xen_pct_register *dst_pct) 190 { 191 /* It would be nice if you could just do 'memcpy(pct, dst_pct') but 192 * sadly the Xen structure did not have the proper padding so the 193 * descriptor field takes two (dst_pct) bytes instead of one (pct). 194 */ 195 dst_pct->descriptor = pct->descriptor; 196 dst_pct->length = pct->length; 197 dst_pct->space_id = pct->space_id; 198 dst_pct->bit_width = pct->bit_width; 199 dst_pct->bit_offset = pct->bit_offset; 200 dst_pct->reserved = pct->reserved; 201 dst_pct->address = pct->address; 202 return 0; 203 } 204 static int push_pxx_to_hypervisor(struct acpi_processor *_pr) 205 { 206 int ret = 0; 207 struct xen_platform_op op = { 208 .cmd = XENPF_set_processor_pminfo, 209 .interface_version = XENPF_INTERFACE_VERSION, 210 .u.set_pminfo.id = _pr->acpi_id, 211 .u.set_pminfo.type = XEN_PM_PX, 212 }; 213 struct xen_processor_performance *dst_perf; 214 struct xen_processor_px *dst_states = NULL; 215 216 dst_perf = &op.u.set_pminfo.perf; 217 218 dst_perf->platform_limit = _pr->performance_platform_limit; 219 dst_perf->flags |= XEN_PX_PPC; 220 xen_copy_pct_data(&(_pr->performance->control_register), 221 &dst_perf->control_register); 222 xen_copy_pct_data(&(_pr->performance->status_register), 223 &dst_perf->status_register); 224 dst_perf->flags |= XEN_PX_PCT; 225 dst_states = xen_copy_pss_data(_pr, dst_perf); 226 if (!IS_ERR_OR_NULL(dst_states)) { 227 set_xen_guest_handle(dst_perf->states, dst_states); 228 dst_perf->flags |= XEN_PX_PSS; 229 } 230 if (!xen_copy_psd_data(_pr, dst_perf)) 231 dst_perf->flags |= XEN_PX_PSD; 232 233 if (dst_perf->flags != (XEN_PX_PSD | XEN_PX_PSS | XEN_PX_PCT | XEN_PX_PPC)) { 234 pr_warn("ACPI CPU%u missing some P-state data (%x), skipping\n", 235 _pr->acpi_id, dst_perf->flags); 236 ret = -ENODEV; 237 goto err_free; 238 } 239 240 if (!no_hypercall) 241 ret = HYPERVISOR_platform_op(&op); 242 243 if (!ret) { 244 struct acpi_processor_performance *perf; 245 unsigned int i; 246 247 perf = _pr->performance; 248 pr_debug("ACPI CPU%u - P-states uploaded.\n", _pr->acpi_id); 249 for (i = 0; i < perf->state_count; i++) { 250 pr_debug(" %cP%d: %d MHz, %d mW, %d uS\n", 251 (i == perf->state ? '*' : ' '), i, 252 (u32) perf->states[i].core_frequency, 253 (u32) perf->states[i].power, 254 (u32) perf->states[i].transition_latency); 255 } 256 } else if ((ret != -EINVAL) && (ret != -ENOSYS)) 257 /* EINVAL means the ACPI ID is incorrect - meaning the ACPI 258 * table is referencing a non-existing CPU - which can happen 259 * with broken ACPI tables. */ 260 pr_warn("(_PXX): Hypervisor error (%d) for ACPI CPU%u\n", 261 ret, _pr->acpi_id); 262 err_free: 263 if (!IS_ERR_OR_NULL(dst_states)) 264 kfree(dst_states); 265 266 return ret; 267 } 268 static int upload_pm_data(struct acpi_processor *_pr) 269 { 270 int err = 0; 271 272 mutex_lock(&acpi_ids_mutex); 273 if (__test_and_set_bit(_pr->acpi_id, acpi_ids_done)) { 274 mutex_unlock(&acpi_ids_mutex); 275 return -EBUSY; 276 } 277 if (_pr->flags.power) 278 err = push_cxx_to_hypervisor(_pr); 279 280 if (_pr->performance && _pr->performance->states) 281 err |= push_pxx_to_hypervisor(_pr); 282 283 mutex_unlock(&acpi_ids_mutex); 284 return err; 285 } 286 static unsigned int __init get_max_acpi_id(void) 287 { 288 struct xenpf_pcpuinfo *info; 289 struct xen_platform_op op = { 290 .cmd = XENPF_get_cpuinfo, 291 .interface_version = XENPF_INTERFACE_VERSION, 292 }; 293 int ret = 0; 294 unsigned int i, last_cpu, max_acpi_id = 0; 295 296 info = &op.u.pcpu_info; 297 info->xen_cpuid = 0; 298 299 ret = HYPERVISOR_platform_op(&op); 300 if (ret) 301 return NR_CPUS; 302 303 /* The max_present is the same irregardless of the xen_cpuid */ 304 last_cpu = op.u.pcpu_info.max_present; 305 for (i = 0; i <= last_cpu; i++) { 306 info->xen_cpuid = i; 307 ret = HYPERVISOR_platform_op(&op); 308 if (ret) 309 continue; 310 max_acpi_id = max(info->acpi_id, max_acpi_id); 311 } 312 max_acpi_id *= 2; /* Slack for CPU hotplug support. */ 313 pr_debug("Max ACPI ID: %u\n", max_acpi_id); 314 return max_acpi_id; 315 } 316 /* 317 * The read_acpi_id and check_acpi_ids are there to support the Xen 318 * oddity of virtual CPUs != physical CPUs in the initial domain. 319 * The user can supply 'xen_max_vcpus=X' on the Xen hypervisor line 320 * which will band the amount of CPUs the initial domain can see. 321 * In general that is OK, except it plays havoc with any of the 322 * for_each_[present|online]_cpu macros which are banded to the virtual 323 * CPU amount. 324 */ 325 static acpi_status 326 read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv) 327 { 328 u32 acpi_id; 329 acpi_status status; 330 acpi_object_type acpi_type; 331 unsigned long long tmp; 332 union acpi_object object = { 0 }; 333 struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; 334 acpi_io_address pblk = 0; 335 336 status = acpi_get_type(handle, &acpi_type); 337 if (ACPI_FAILURE(status)) 338 return AE_OK; 339 340 switch (acpi_type) { 341 case ACPI_TYPE_PROCESSOR: 342 status = acpi_evaluate_object(handle, NULL, NULL, &buffer); 343 if (ACPI_FAILURE(status)) 344 return AE_OK; 345 acpi_id = object.processor.proc_id; 346 pblk = object.processor.pblk_address; 347 break; 348 case ACPI_TYPE_DEVICE: 349 status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp); 350 if (ACPI_FAILURE(status)) 351 return AE_OK; 352 acpi_id = tmp; 353 break; 354 default: 355 return AE_OK; 356 } 357 if (invalid_phys_cpuid(acpi_get_phys_id(handle, 358 acpi_type == ACPI_TYPE_DEVICE, 359 acpi_id))) { 360 pr_debug("CPU with ACPI ID %u is unavailable\n", acpi_id); 361 return AE_OK; 362 } 363 /* There are more ACPI Processor objects than in x2APIC or MADT. 364 * This can happen with incorrect ACPI SSDT declerations. */ 365 if (acpi_id >= nr_acpi_bits) { 366 pr_debug("max acpi id %u, trying to set %u\n", 367 nr_acpi_bits - 1, acpi_id); 368 return AE_OK; 369 } 370 /* OK, There is a ACPI Processor object */ 371 __set_bit(acpi_id, acpi_id_present); 372 373 pr_debug("ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id, (unsigned long)pblk); 374 375 /* It has P-state dependencies */ 376 if (!acpi_processor_get_psd(handle, &acpi_psd[acpi_id])) { 377 pr_debug("ACPI CPU%u w/ PST:coord_type = %llu domain = %llu\n", 378 acpi_id, acpi_psd[acpi_id].coord_type, 379 acpi_psd[acpi_id].domain); 380 } 381 382 status = acpi_evaluate_object(handle, "_CST", NULL, &buffer); 383 if (ACPI_FAILURE(status)) { 384 if (!pblk) 385 return AE_OK; 386 } 387 /* .. and it has a C-state */ 388 __set_bit(acpi_id, acpi_id_cst_present); 389 390 return AE_OK; 391 } 392 static int check_acpi_ids(struct acpi_processor *pr_backup) 393 { 394 395 if (!pr_backup) 396 return -ENODEV; 397 398 if (acpi_id_present && acpi_id_cst_present) 399 /* OK, done this once .. skip to uploading */ 400 goto upload; 401 402 /* All online CPUs have been processed at this stage. Now verify 403 * whether in fact "online CPUs" == physical CPUs. 404 */ 405 acpi_id_present = bitmap_zalloc(nr_acpi_bits, GFP_KERNEL); 406 if (!acpi_id_present) 407 return -ENOMEM; 408 409 acpi_id_cst_present = bitmap_zalloc(nr_acpi_bits, GFP_KERNEL); 410 if (!acpi_id_cst_present) { 411 bitmap_free(acpi_id_present); 412 return -ENOMEM; 413 } 414 415 acpi_psd = kcalloc(nr_acpi_bits, sizeof(struct acpi_psd_package), 416 GFP_KERNEL); 417 if (!acpi_psd) { 418 bitmap_free(acpi_id_present); 419 bitmap_free(acpi_id_cst_present); 420 return -ENOMEM; 421 } 422 423 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, 424 ACPI_UINT32_MAX, 425 read_acpi_id, NULL, NULL, NULL); 426 acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, read_acpi_id, NULL, NULL); 427 428 upload: 429 if (!bitmap_equal(acpi_id_present, acpi_ids_done, nr_acpi_bits)) { 430 unsigned int i; 431 for_each_set_bit(i, acpi_id_present, nr_acpi_bits) { 432 pr_backup->acpi_id = i; 433 /* Mask out C-states if there are no _CST or PBLK */ 434 pr_backup->flags.power = test_bit(i, acpi_id_cst_present); 435 /* num_entries is non-zero if we evaluated _PSD */ 436 if (acpi_psd[i].num_entries) { 437 memcpy(&pr_backup->performance->domain_info, 438 &acpi_psd[i], 439 sizeof(struct acpi_psd_package)); 440 } 441 (void)upload_pm_data(pr_backup); 442 } 443 } 444 445 return 0; 446 } 447 448 /* acpi_perf_data is a pointer to percpu data. */ 449 static struct acpi_processor_performance __percpu *acpi_perf_data; 450 451 static void free_acpi_perf_data(void) 452 { 453 int i; 454 455 /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ 456 for_each_possible_cpu(i) 457 free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) 458 ->shared_cpu_map); 459 free_percpu(acpi_perf_data); 460 } 461 462 static int xen_upload_processor_pm_data(void) 463 { 464 struct acpi_processor *pr_backup = NULL; 465 int i; 466 int rc = 0; 467 468 pr_info("Uploading Xen processor PM info\n"); 469 470 for_each_possible_cpu(i) { 471 struct acpi_processor *_pr; 472 _pr = per_cpu(processors, i /* APIC ID */); 473 if (!_pr) 474 continue; 475 476 if (!pr_backup) 477 pr_backup = kmemdup(_pr, sizeof(*_pr), GFP_KERNEL); 478 (void)upload_pm_data(_pr); 479 } 480 481 rc = check_acpi_ids(pr_backup); 482 kfree(pr_backup); 483 484 return rc; 485 } 486 487 static void xen_acpi_processor_resume_worker(struct work_struct *dummy) 488 { 489 int rc; 490 491 bitmap_zero(acpi_ids_done, nr_acpi_bits); 492 493 rc = xen_upload_processor_pm_data(); 494 if (rc != 0) 495 pr_info("ACPI data upload failed, error = %d\n", rc); 496 } 497 498 static void xen_acpi_processor_resume(void) 499 { 500 static DECLARE_WORK(wq, xen_acpi_processor_resume_worker); 501 502 /* 503 * xen_upload_processor_pm_data() calls non-atomic code. 504 * However, the context for xen_acpi_processor_resume is syscore 505 * with only the boot CPU online and in an atomic context. 506 * 507 * So defer the upload for some point safer. 508 */ 509 schedule_work(&wq); 510 } 511 512 static struct syscore_ops xap_syscore_ops = { 513 .resume = xen_acpi_processor_resume, 514 }; 515 516 static int __init xen_acpi_processor_init(void) 517 { 518 int i; 519 int rc; 520 521 if (!xen_initial_domain()) 522 return -ENODEV; 523 524 nr_acpi_bits = get_max_acpi_id() + 1; 525 acpi_ids_done = bitmap_zalloc(nr_acpi_bits, GFP_KERNEL); 526 if (!acpi_ids_done) 527 return -ENOMEM; 528 529 acpi_perf_data = alloc_percpu(struct acpi_processor_performance); 530 if (!acpi_perf_data) { 531 pr_debug("Memory allocation error for acpi_perf_data\n"); 532 bitmap_free(acpi_ids_done); 533 return -ENOMEM; 534 } 535 for_each_possible_cpu(i) { 536 if (!zalloc_cpumask_var_node( 537 &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, 538 GFP_KERNEL, cpu_to_node(i))) { 539 rc = -ENOMEM; 540 goto err_out; 541 } 542 } 543 544 /* Do initialization in ACPI core. It is OK to fail here. */ 545 (void)acpi_processor_preregister_performance(acpi_perf_data); 546 547 for_each_possible_cpu(i) { 548 struct acpi_processor *pr; 549 struct acpi_processor_performance *perf; 550 551 pr = per_cpu(processors, i); 552 perf = per_cpu_ptr(acpi_perf_data, i); 553 if (!pr) 554 continue; 555 556 pr->performance = perf; 557 rc = acpi_processor_get_performance_info(pr); 558 if (rc) 559 goto err_out; 560 } 561 562 rc = xen_upload_processor_pm_data(); 563 if (rc) 564 goto err_unregister; 565 566 register_syscore_ops(&xap_syscore_ops); 567 568 return 0; 569 err_unregister: 570 for_each_possible_cpu(i) 571 acpi_processor_unregister_performance(i); 572 573 err_out: 574 /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ 575 free_acpi_perf_data(); 576 bitmap_free(acpi_ids_done); 577 return rc; 578 } 579 static void __exit xen_acpi_processor_exit(void) 580 { 581 int i; 582 583 unregister_syscore_ops(&xap_syscore_ops); 584 bitmap_free(acpi_ids_done); 585 bitmap_free(acpi_id_present); 586 bitmap_free(acpi_id_cst_present); 587 kfree(acpi_psd); 588 for_each_possible_cpu(i) 589 acpi_processor_unregister_performance(i); 590 591 free_acpi_perf_data(); 592 } 593 594 MODULE_AUTHOR("Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>"); 595 MODULE_DESCRIPTION("Xen ACPI Processor P-states (and Cx) driver which uploads PM data to Xen hypervisor"); 596 MODULE_LICENSE("GPL"); 597 598 /* We want to be loaded before the CPU freq scaling drivers are loaded. 599 * They are loaded in late_initcall. */ 600 device_initcall(xen_acpi_processor_init); 601 module_exit(xen_acpi_processor_exit); 602