1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Common code for Intel Running Average Power Limit (RAPL) support. 4 * Copyright (c) 2019, Intel Corporation. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/list.h> 11 #include <linux/types.h> 12 #include <linux/device.h> 13 #include <linux/slab.h> 14 #include <linux/log2.h> 15 #include <linux/bitmap.h> 16 #include <linux/delay.h> 17 #include <linux/sysfs.h> 18 #include <linux/cpu.h> 19 #include <linux/powercap.h> 20 #include <linux/suspend.h> 21 #include <linux/intel_rapl.h> 22 #include <linux/processor.h> 23 #include <linux/platform_device.h> 24 25 #include <asm/iosf_mbi.h> 26 #include <asm/cpu_device_id.h> 27 #include <asm/intel-family.h> 28 29 /* bitmasks for RAPL MSRs, used by primitive access functions */ 30 #define ENERGY_STATUS_MASK 0xffffffff 31 32 #define POWER_LIMIT1_MASK 0x7FFF 33 #define POWER_LIMIT1_ENABLE BIT(15) 34 #define POWER_LIMIT1_CLAMP BIT(16) 35 36 #define POWER_LIMIT2_MASK (0x7FFFULL<<32) 37 #define POWER_LIMIT2_ENABLE BIT_ULL(47) 38 #define POWER_LIMIT2_CLAMP BIT_ULL(48) 39 #define POWER_HIGH_LOCK BIT_ULL(63) 40 #define POWER_LOW_LOCK BIT(31) 41 42 #define POWER_LIMIT4_MASK 0x1FFF 43 44 #define TIME_WINDOW1_MASK (0x7FULL<<17) 45 #define TIME_WINDOW2_MASK (0x7FULL<<49) 46 47 #define POWER_UNIT_OFFSET 0 48 #define POWER_UNIT_MASK 0x0F 49 50 #define ENERGY_UNIT_OFFSET 0x08 51 #define ENERGY_UNIT_MASK 0x1F00 52 53 #define TIME_UNIT_OFFSET 0x10 54 #define TIME_UNIT_MASK 0xF0000 55 56 #define POWER_INFO_MAX_MASK (0x7fffULL<<32) 57 #define POWER_INFO_MIN_MASK (0x7fffULL<<16) 58 #define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48) 59 #define POWER_INFO_THERMAL_SPEC_MASK 0x7fff 60 61 #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff 62 #define PP_POLICY_MASK 0x1F 63 64 /* 65 * SPR has different layout for Psys Domain PowerLimit registers. 66 * There are 17 bits of PL1 and PL2 instead of 15 bits. 67 * The Enable bits and TimeWindow bits are also shifted as a result. 68 */ 69 #define PSYS_POWER_LIMIT1_MASK 0x1FFFF 70 #define PSYS_POWER_LIMIT1_ENABLE BIT(17) 71 72 #define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32) 73 #define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49) 74 75 #define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19) 76 #define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51) 77 78 /* bitmasks for RAPL TPMI, used by primitive access functions */ 79 #define TPMI_POWER_LIMIT_MASK 0x3FFFF 80 #define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62) 81 #define TPMI_TIME_WINDOW_MASK (0x7FULL<<18) 82 #define TPMI_INFO_SPEC_MASK 0x3FFFF 83 #define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18) 84 #define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36) 85 #define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54) 86 87 /* Non HW constants */ 88 #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ 89 #define RAPL_PRIMITIVE_DUMMY BIT(2) 90 91 #define TIME_WINDOW_MAX_MSEC 40000 92 #define TIME_WINDOW_MIN_MSEC 250 93 #define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */ 94 enum unit_type { 95 ARBITRARY_UNIT, /* no translation */ 96 POWER_UNIT, 97 ENERGY_UNIT, 98 TIME_UNIT, 99 }; 100 101 /* per domain data, some are optional */ 102 #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2) 103 104 #define DOMAIN_STATE_INACTIVE BIT(0) 105 #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1) 106 107 static const char *pl_names[NR_POWER_LIMITS] = { 108 [POWER_LIMIT1] = "long_term", 109 [POWER_LIMIT2] = "short_term", 110 [POWER_LIMIT4] = "peak_power", 111 }; 112 113 enum pl_prims { 114 PL_ENABLE, 115 PL_CLAMP, 116 PL_LIMIT, 117 PL_TIME_WINDOW, 118 PL_MAX_POWER, 119 PL_LOCK, 120 }; 121 122 static bool is_pl_valid(struct rapl_domain *rd, int pl) 123 { 124 if (pl < POWER_LIMIT1 || pl > POWER_LIMIT4) 125 return false; 126 return rd->rpl[pl].name ? true : false; 127 } 128 129 static int get_pl_lock_prim(struct rapl_domain *rd, int pl) 130 { 131 if (rd->rp->priv->type == RAPL_IF_TPMI) { 132 if (pl == POWER_LIMIT1) 133 return PL1_LOCK; 134 if (pl == POWER_LIMIT2) 135 return PL2_LOCK; 136 if (pl == POWER_LIMIT4) 137 return PL4_LOCK; 138 } 139 140 /* MSR/MMIO Interface doesn't have Lock bit for PL4 */ 141 if (pl == POWER_LIMIT4) 142 return -EINVAL; 143 144 /* 145 * Power Limit register that supports two power limits has a different 146 * bit position for the Lock bit. 147 */ 148 if (rd->rp->priv->limits[rd->id] & BIT(POWER_LIMIT2)) 149 return FW_HIGH_LOCK; 150 return FW_LOCK; 151 } 152 153 static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim) 154 { 155 switch (pl) { 156 case POWER_LIMIT1: 157 if (prim == PL_ENABLE) 158 return PL1_ENABLE; 159 if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI) 160 return PL1_CLAMP; 161 if (prim == PL_LIMIT) 162 return POWER_LIMIT1; 163 if (prim == PL_TIME_WINDOW) 164 return TIME_WINDOW1; 165 if (prim == PL_MAX_POWER) 166 return THERMAL_SPEC_POWER; 167 if (prim == PL_LOCK) 168 return get_pl_lock_prim(rd, pl); 169 return -EINVAL; 170 case POWER_LIMIT2: 171 if (prim == PL_ENABLE) 172 return PL2_ENABLE; 173 if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI) 174 return PL2_CLAMP; 175 if (prim == PL_LIMIT) 176 return POWER_LIMIT2; 177 if (prim == PL_TIME_WINDOW) 178 return TIME_WINDOW2; 179 if (prim == PL_MAX_POWER) 180 return MAX_POWER; 181 if (prim == PL_LOCK) 182 return get_pl_lock_prim(rd, pl); 183 return -EINVAL; 184 case POWER_LIMIT4: 185 if (prim == PL_LIMIT) 186 return POWER_LIMIT4; 187 if (prim == PL_ENABLE) 188 return PL4_ENABLE; 189 /* PL4 would be around two times PL2, use same prim as PL2. */ 190 if (prim == PL_MAX_POWER) 191 return MAX_POWER; 192 if (prim == PL_LOCK) 193 return get_pl_lock_prim(rd, pl); 194 return -EINVAL; 195 default: 196 return -EINVAL; 197 } 198 } 199 200 #define power_zone_to_rapl_domain(_zone) \ 201 container_of(_zone, struct rapl_domain, power_zone) 202 203 struct rapl_defaults { 204 u8 floor_freq_reg_addr; 205 int (*check_unit)(struct rapl_domain *rd); 206 void (*set_floor_freq)(struct rapl_domain *rd, bool mode); 207 u64 (*compute_time_window)(struct rapl_domain *rd, u64 val, 208 bool to_raw); 209 unsigned int dram_domain_energy_unit; 210 unsigned int psys_domain_energy_unit; 211 bool spr_psys_bits; 212 }; 213 static struct rapl_defaults *defaults_msr; 214 static const struct rapl_defaults defaults_tpmi; 215 216 static struct rapl_defaults *get_defaults(struct rapl_package *rp) 217 { 218 return rp->priv->defaults; 219 } 220 221 /* Sideband MBI registers */ 222 #define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2) 223 #define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf) 224 225 #define PACKAGE_PLN_INT_SAVED BIT(0) 226 #define MAX_PRIM_NAME (32) 227 228 /* per domain data. used to describe individual knobs such that access function 229 * can be consolidated into one instead of many inline functions. 230 */ 231 struct rapl_primitive_info { 232 const char *name; 233 u64 mask; 234 int shift; 235 enum rapl_domain_reg_id id; 236 enum unit_type unit; 237 u32 flag; 238 }; 239 240 #define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \ 241 .name = #p, \ 242 .mask = m, \ 243 .shift = s, \ 244 .id = i, \ 245 .unit = u, \ 246 .flag = f \ 247 } 248 249 static void rapl_init_domains(struct rapl_package *rp); 250 static int rapl_read_data_raw(struct rapl_domain *rd, 251 enum rapl_primitives prim, 252 bool xlate, u64 *data); 253 static int rapl_write_data_raw(struct rapl_domain *rd, 254 enum rapl_primitives prim, 255 unsigned long long value); 256 static int rapl_read_pl_data(struct rapl_domain *rd, int pl, 257 enum pl_prims pl_prim, 258 bool xlate, u64 *data); 259 static int rapl_write_pl_data(struct rapl_domain *rd, int pl, 260 enum pl_prims pl_prim, 261 unsigned long long value); 262 static u64 rapl_unit_xlate(struct rapl_domain *rd, 263 enum unit_type type, u64 value, int to_raw); 264 static void package_power_limit_irq_save(struct rapl_package *rp); 265 266 static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */ 267 268 static const char *const rapl_domain_names[] = { 269 "package", 270 "core", 271 "uncore", 272 "dram", 273 "psys", 274 }; 275 276 static int get_energy_counter(struct powercap_zone *power_zone, 277 u64 *energy_raw) 278 { 279 struct rapl_domain *rd; 280 u64 energy_now; 281 282 /* prevent CPU hotplug, make sure the RAPL domain does not go 283 * away while reading the counter. 284 */ 285 cpus_read_lock(); 286 rd = power_zone_to_rapl_domain(power_zone); 287 288 if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) { 289 *energy_raw = energy_now; 290 cpus_read_unlock(); 291 292 return 0; 293 } 294 cpus_read_unlock(); 295 296 return -EIO; 297 } 298 299 static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy) 300 { 301 struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev); 302 303 *energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0); 304 return 0; 305 } 306 307 static int release_zone(struct powercap_zone *power_zone) 308 { 309 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 310 struct rapl_package *rp = rd->rp; 311 312 /* package zone is the last zone of a package, we can free 313 * memory here since all children has been unregistered. 314 */ 315 if (rd->id == RAPL_DOMAIN_PACKAGE) { 316 kfree(rd); 317 rp->domains = NULL; 318 } 319 320 return 0; 321 322 } 323 324 static int find_nr_power_limit(struct rapl_domain *rd) 325 { 326 int i, nr_pl = 0; 327 328 for (i = 0; i < NR_POWER_LIMITS; i++) { 329 if (is_pl_valid(rd, i)) 330 nr_pl++; 331 } 332 333 return nr_pl; 334 } 335 336 static int set_domain_enable(struct powercap_zone *power_zone, bool mode) 337 { 338 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 339 struct rapl_defaults *defaults = get_defaults(rd->rp); 340 int ret; 341 342 cpus_read_lock(); 343 ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode); 344 if (!ret && defaults->set_floor_freq) 345 defaults->set_floor_freq(rd, mode); 346 cpus_read_unlock(); 347 348 return ret; 349 } 350 351 static int get_domain_enable(struct powercap_zone *power_zone, bool *mode) 352 { 353 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 354 u64 val; 355 int ret; 356 357 if (rd->rpl[POWER_LIMIT1].locked) { 358 *mode = false; 359 return 0; 360 } 361 cpus_read_lock(); 362 ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, true, &val); 363 if (!ret) 364 *mode = val; 365 cpus_read_unlock(); 366 367 return ret; 368 } 369 370 /* per RAPL domain ops, in the order of rapl_domain_type */ 371 static const struct powercap_zone_ops zone_ops[] = { 372 /* RAPL_DOMAIN_PACKAGE */ 373 { 374 .get_energy_uj = get_energy_counter, 375 .get_max_energy_range_uj = get_max_energy_counter, 376 .release = release_zone, 377 .set_enable = set_domain_enable, 378 .get_enable = get_domain_enable, 379 }, 380 /* RAPL_DOMAIN_PP0 */ 381 { 382 .get_energy_uj = get_energy_counter, 383 .get_max_energy_range_uj = get_max_energy_counter, 384 .release = release_zone, 385 .set_enable = set_domain_enable, 386 .get_enable = get_domain_enable, 387 }, 388 /* RAPL_DOMAIN_PP1 */ 389 { 390 .get_energy_uj = get_energy_counter, 391 .get_max_energy_range_uj = get_max_energy_counter, 392 .release = release_zone, 393 .set_enable = set_domain_enable, 394 .get_enable = get_domain_enable, 395 }, 396 /* RAPL_DOMAIN_DRAM */ 397 { 398 .get_energy_uj = get_energy_counter, 399 .get_max_energy_range_uj = get_max_energy_counter, 400 .release = release_zone, 401 .set_enable = set_domain_enable, 402 .get_enable = get_domain_enable, 403 }, 404 /* RAPL_DOMAIN_PLATFORM */ 405 { 406 .get_energy_uj = get_energy_counter, 407 .get_max_energy_range_uj = get_max_energy_counter, 408 .release = release_zone, 409 .set_enable = set_domain_enable, 410 .get_enable = get_domain_enable, 411 }, 412 }; 413 414 /* 415 * Constraint index used by powercap can be different than power limit (PL) 416 * index in that some PLs maybe missing due to non-existent MSRs. So we 417 * need to convert here by finding the valid PLs only (name populated). 418 */ 419 static int contraint_to_pl(struct rapl_domain *rd, int cid) 420 { 421 int i, j; 422 423 for (i = POWER_LIMIT1, j = 0; i < NR_POWER_LIMITS; i++) { 424 if (is_pl_valid(rd, i) && j++ == cid) { 425 pr_debug("%s: index %d\n", __func__, i); 426 return i; 427 } 428 } 429 pr_err("Cannot find matching power limit for constraint %d\n", cid); 430 431 return -EINVAL; 432 } 433 434 static int set_power_limit(struct powercap_zone *power_zone, int cid, 435 u64 power_limit) 436 { 437 struct rapl_domain *rd; 438 struct rapl_package *rp; 439 int ret = 0; 440 int id; 441 442 cpus_read_lock(); 443 rd = power_zone_to_rapl_domain(power_zone); 444 id = contraint_to_pl(rd, cid); 445 rp = rd->rp; 446 447 ret = rapl_write_pl_data(rd, id, PL_LIMIT, power_limit); 448 if (!ret) 449 package_power_limit_irq_save(rp); 450 cpus_read_unlock(); 451 return ret; 452 } 453 454 static int get_current_power_limit(struct powercap_zone *power_zone, int cid, 455 u64 *data) 456 { 457 struct rapl_domain *rd; 458 u64 val; 459 int ret = 0; 460 int id; 461 462 cpus_read_lock(); 463 rd = power_zone_to_rapl_domain(power_zone); 464 id = contraint_to_pl(rd, cid); 465 466 ret = rapl_read_pl_data(rd, id, PL_LIMIT, true, &val); 467 if (!ret) 468 *data = val; 469 470 cpus_read_unlock(); 471 472 return ret; 473 } 474 475 static int set_time_window(struct powercap_zone *power_zone, int cid, 476 u64 window) 477 { 478 struct rapl_domain *rd; 479 int ret = 0; 480 int id; 481 482 cpus_read_lock(); 483 rd = power_zone_to_rapl_domain(power_zone); 484 id = contraint_to_pl(rd, cid); 485 486 ret = rapl_write_pl_data(rd, id, PL_TIME_WINDOW, window); 487 488 cpus_read_unlock(); 489 return ret; 490 } 491 492 static int get_time_window(struct powercap_zone *power_zone, int cid, 493 u64 *data) 494 { 495 struct rapl_domain *rd; 496 u64 val; 497 int ret = 0; 498 int id; 499 500 cpus_read_lock(); 501 rd = power_zone_to_rapl_domain(power_zone); 502 id = contraint_to_pl(rd, cid); 503 504 ret = rapl_read_pl_data(rd, id, PL_TIME_WINDOW, true, &val); 505 if (!ret) 506 *data = val; 507 508 cpus_read_unlock(); 509 510 return ret; 511 } 512 513 static const char *get_constraint_name(struct powercap_zone *power_zone, 514 int cid) 515 { 516 struct rapl_domain *rd; 517 int id; 518 519 rd = power_zone_to_rapl_domain(power_zone); 520 id = contraint_to_pl(rd, cid); 521 if (id >= 0) 522 return rd->rpl[id].name; 523 524 return NULL; 525 } 526 527 static int get_max_power(struct powercap_zone *power_zone, int cid, u64 *data) 528 { 529 struct rapl_domain *rd; 530 u64 val; 531 int ret = 0; 532 int id; 533 534 cpus_read_lock(); 535 rd = power_zone_to_rapl_domain(power_zone); 536 id = contraint_to_pl(rd, cid); 537 538 ret = rapl_read_pl_data(rd, id, PL_MAX_POWER, true, &val); 539 if (!ret) 540 *data = val; 541 542 /* As a generalization rule, PL4 would be around two times PL2. */ 543 if (id == POWER_LIMIT4) 544 *data = *data * 2; 545 546 cpus_read_unlock(); 547 548 return ret; 549 } 550 551 static const struct powercap_zone_constraint_ops constraint_ops = { 552 .set_power_limit_uw = set_power_limit, 553 .get_power_limit_uw = get_current_power_limit, 554 .set_time_window_us = set_time_window, 555 .get_time_window_us = get_time_window, 556 .get_max_power_uw = get_max_power, 557 .get_name = get_constraint_name, 558 }; 559 560 /* Return the id used for read_raw/write_raw callback */ 561 static int get_rid(struct rapl_package *rp) 562 { 563 return rp->lead_cpu >= 0 ? rp->lead_cpu : rp->id; 564 } 565 566 /* called after domain detection and package level data are set */ 567 static void rapl_init_domains(struct rapl_package *rp) 568 { 569 enum rapl_domain_type i; 570 enum rapl_domain_reg_id j; 571 struct rapl_domain *rd = rp->domains; 572 573 for (i = 0; i < RAPL_DOMAIN_MAX; i++) { 574 unsigned int mask = rp->domain_map & (1 << i); 575 int t; 576 577 if (!mask) 578 continue; 579 580 rd->rp = rp; 581 582 if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) { 583 snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d", 584 rp->lead_cpu >= 0 ? topology_physical_package_id(rp->lead_cpu) : 585 rp->id); 586 } else { 587 snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s", 588 rapl_domain_names[i]); 589 } 590 591 rd->id = i; 592 593 /* PL1 is supported by default */ 594 rp->priv->limits[i] |= BIT(POWER_LIMIT1); 595 596 for (t = POWER_LIMIT1; t < NR_POWER_LIMITS; t++) { 597 if (rp->priv->limits[i] & BIT(t)) 598 rd->rpl[t].name = pl_names[t]; 599 } 600 601 for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++) 602 rd->regs[j] = rp->priv->regs[i][j]; 603 604 rd++; 605 } 606 } 607 608 static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, 609 u64 value, int to_raw) 610 { 611 u64 units = 1; 612 struct rapl_defaults *defaults = get_defaults(rd->rp); 613 u64 scale = 1; 614 615 switch (type) { 616 case POWER_UNIT: 617 units = rd->power_unit; 618 break; 619 case ENERGY_UNIT: 620 scale = ENERGY_UNIT_SCALE; 621 units = rd->energy_unit; 622 break; 623 case TIME_UNIT: 624 return defaults->compute_time_window(rd, value, to_raw); 625 case ARBITRARY_UNIT: 626 default: 627 return value; 628 } 629 630 if (to_raw) 631 return div64_u64(value, units) * scale; 632 633 value *= units; 634 635 return div64_u64(value, scale); 636 } 637 638 /* RAPL primitives for MSR and MMIO I/F */ 639 static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = { 640 /* name, mask, shift, msr index, unit divisor */ 641 [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0, 642 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 643 [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32, 644 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 645 [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0, 646 RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0), 647 [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, 648 RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), 649 [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31, 650 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 651 [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63, 652 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 653 [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15, 654 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 655 [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16, 656 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 657 [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47, 658 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 659 [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48, 660 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 661 [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0, 662 RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0), 663 [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17, 664 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 665 [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49, 666 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 667 [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK, 668 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 669 [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32, 670 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 671 [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16, 672 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 673 [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48, 674 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0), 675 [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, 676 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), 677 [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, 678 RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0), 679 [PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0, 680 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 681 [PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32, 682 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 683 [PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17, 684 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 685 [PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49, 686 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 687 [PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19, 688 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 689 [PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51, 690 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 691 /* non-hardware */ 692 [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, 693 RAPL_PRIMITIVE_DERIVED), 694 }; 695 696 /* RAPL primitives for TPMI I/F */ 697 static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = { 698 /* name, mask, shift, msr index, unit divisor */ 699 [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0, 700 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 701 [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0, 702 RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0), 703 [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0, 704 RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0), 705 [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, 706 RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), 707 [PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, POWER_HIGH_LOCK, 63, 708 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 709 [PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, POWER_HIGH_LOCK, 63, 710 RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0), 711 [PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, POWER_HIGH_LOCK, 63, 712 RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0), 713 [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62, 714 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 715 [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62, 716 RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0), 717 [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62, 718 RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0), 719 [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18, 720 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 721 [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18, 722 RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0), 723 [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0, 724 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 725 [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36, 726 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 727 [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18, 728 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 729 [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK, 54, 730 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0), 731 [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, 732 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), 733 /* non-hardware */ 734 [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, 735 POWER_UNIT, RAPL_PRIMITIVE_DERIVED), 736 }; 737 738 static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim) 739 { 740 struct rapl_primitive_info *rpi = rp->priv->rpi; 741 742 if (prim < 0 || prim > NR_RAPL_PRIMITIVES || !rpi) 743 return NULL; 744 745 return &rpi[prim]; 746 } 747 748 static int rapl_config(struct rapl_package *rp) 749 { 750 switch (rp->priv->type) { 751 /* MMIO I/F shares the same register layout as MSR registers */ 752 case RAPL_IF_MMIO: 753 case RAPL_IF_MSR: 754 rp->priv->defaults = (void *)defaults_msr; 755 rp->priv->rpi = (void *)rpi_msr; 756 break; 757 case RAPL_IF_TPMI: 758 rp->priv->defaults = (void *)&defaults_tpmi; 759 rp->priv->rpi = (void *)rpi_tpmi; 760 break; 761 default: 762 return -EINVAL; 763 } 764 return 0; 765 } 766 767 static enum rapl_primitives 768 prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim) 769 { 770 struct rapl_defaults *defaults = get_defaults(rd->rp); 771 772 if (!defaults->spr_psys_bits) 773 return prim; 774 775 if (rd->id != RAPL_DOMAIN_PLATFORM) 776 return prim; 777 778 switch (prim) { 779 case POWER_LIMIT1: 780 return PSYS_POWER_LIMIT1; 781 case POWER_LIMIT2: 782 return PSYS_POWER_LIMIT2; 783 case PL1_ENABLE: 784 return PSYS_PL1_ENABLE; 785 case PL2_ENABLE: 786 return PSYS_PL2_ENABLE; 787 case TIME_WINDOW1: 788 return PSYS_TIME_WINDOW1; 789 case TIME_WINDOW2: 790 return PSYS_TIME_WINDOW2; 791 default: 792 return prim; 793 } 794 } 795 796 /* Read primitive data based on its related struct rapl_primitive_info. 797 * if xlate flag is set, return translated data based on data units, i.e. 798 * time, energy, and power. 799 * RAPL MSRs are non-architectual and are laid out not consistently across 800 * domains. Here we use primitive info to allow writing consolidated access 801 * functions. 802 * For a given primitive, it is processed by MSR mask and shift. Unit conversion 803 * is pre-assigned based on RAPL unit MSRs read at init time. 804 * 63-------------------------- 31--------------------------- 0 805 * | xxxxx (mask) | 806 * | |<- shift ----------------| 807 * 63-------------------------- 31--------------------------- 0 808 */ 809 static int rapl_read_data_raw(struct rapl_domain *rd, 810 enum rapl_primitives prim, bool xlate, u64 *data) 811 { 812 u64 value; 813 enum rapl_primitives prim_fixed = prim_fixups(rd, prim); 814 struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed); 815 struct reg_action ra; 816 817 if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY) 818 return -EINVAL; 819 820 ra.reg = rd->regs[rpi->id]; 821 if (!ra.reg.val) 822 return -EINVAL; 823 824 /* non-hardware data are collected by the polling thread */ 825 if (rpi->flag & RAPL_PRIMITIVE_DERIVED) { 826 *data = rd->rdd.primitives[prim]; 827 return 0; 828 } 829 830 ra.mask = rpi->mask; 831 832 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { 833 pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg.val, rd->rp->name, rd->name); 834 return -EIO; 835 } 836 837 value = ra.value >> rpi->shift; 838 839 if (xlate) 840 *data = rapl_unit_xlate(rd, rpi->unit, value, 0); 841 else 842 *data = value; 843 844 return 0; 845 } 846 847 /* Similar use of primitive info in the read counterpart */ 848 static int rapl_write_data_raw(struct rapl_domain *rd, 849 enum rapl_primitives prim, 850 unsigned long long value) 851 { 852 enum rapl_primitives prim_fixed = prim_fixups(rd, prim); 853 struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed); 854 u64 bits; 855 struct reg_action ra; 856 int ret; 857 858 if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY) 859 return -EINVAL; 860 861 bits = rapl_unit_xlate(rd, rpi->unit, value, 1); 862 bits <<= rpi->shift; 863 bits &= rpi->mask; 864 865 memset(&ra, 0, sizeof(ra)); 866 867 ra.reg = rd->regs[rpi->id]; 868 ra.mask = rpi->mask; 869 ra.value = bits; 870 871 ret = rd->rp->priv->write_raw(get_rid(rd->rp), &ra); 872 873 return ret; 874 } 875 876 static int rapl_read_pl_data(struct rapl_domain *rd, int pl, 877 enum pl_prims pl_prim, bool xlate, u64 *data) 878 { 879 enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim); 880 881 if (!is_pl_valid(rd, pl)) 882 return -EINVAL; 883 884 return rapl_read_data_raw(rd, prim, xlate, data); 885 } 886 887 static int rapl_write_pl_data(struct rapl_domain *rd, int pl, 888 enum pl_prims pl_prim, 889 unsigned long long value) 890 { 891 enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim); 892 893 if (!is_pl_valid(rd, pl)) 894 return -EINVAL; 895 896 if (rd->rpl[pl].locked) { 897 pr_warn("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]); 898 return -EACCES; 899 } 900 901 return rapl_write_data_raw(rd, prim, value); 902 } 903 /* 904 * Raw RAPL data stored in MSRs are in certain scales. We need to 905 * convert them into standard units based on the units reported in 906 * the RAPL unit MSRs. This is specific to CPUs as the method to 907 * calculate units differ on different CPUs. 908 * We convert the units to below format based on CPUs. 909 * i.e. 910 * energy unit: picoJoules : Represented in picoJoules by default 911 * power unit : microWatts : Represented in milliWatts by default 912 * time unit : microseconds: Represented in seconds by default 913 */ 914 static int rapl_check_unit_core(struct rapl_domain *rd) 915 { 916 struct reg_action ra; 917 u32 value; 918 919 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT]; 920 ra.mask = ~0; 921 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { 922 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n", 923 ra.reg.val, rd->rp->name, rd->name); 924 return -ENODEV; 925 } 926 927 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; 928 rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value); 929 930 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; 931 rd->power_unit = 1000000 / (1 << value); 932 933 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; 934 rd->time_unit = 1000000 / (1 << value); 935 936 pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n", 937 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit); 938 939 return 0; 940 } 941 942 static int rapl_check_unit_atom(struct rapl_domain *rd) 943 { 944 struct reg_action ra; 945 u32 value; 946 947 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT]; 948 ra.mask = ~0; 949 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { 950 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n", 951 ra.reg.val, rd->rp->name, rd->name); 952 return -ENODEV; 953 } 954 955 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; 956 rd->energy_unit = ENERGY_UNIT_SCALE * 1 << value; 957 958 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; 959 rd->power_unit = (1 << value) * 1000; 960 961 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; 962 rd->time_unit = 1000000 / (1 << value); 963 964 pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n", 965 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit); 966 967 return 0; 968 } 969 970 static void power_limit_irq_save_cpu(void *info) 971 { 972 u32 l, h = 0; 973 struct rapl_package *rp = (struct rapl_package *)info; 974 975 /* save the state of PLN irq mask bit before disabling it */ 976 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); 977 if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) { 978 rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE; 979 rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED; 980 } 981 l &= ~PACKAGE_THERM_INT_PLN_ENABLE; 982 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 983 } 984 985 /* REVISIT: 986 * When package power limit is set artificially low by RAPL, LVT 987 * thermal interrupt for package power limit should be ignored 988 * since we are not really exceeding the real limit. The intention 989 * is to avoid excessive interrupts while we are trying to save power. 990 * A useful feature might be routing the package_power_limit interrupt 991 * to userspace via eventfd. once we have a usecase, this is simple 992 * to do by adding an atomic notifier. 993 */ 994 995 static void package_power_limit_irq_save(struct rapl_package *rp) 996 { 997 if (rp->lead_cpu < 0) 998 return; 999 1000 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) 1001 return; 1002 1003 smp_call_function_single(rp->lead_cpu, power_limit_irq_save_cpu, rp, 1); 1004 } 1005 1006 /* 1007 * Restore per package power limit interrupt enable state. Called from cpu 1008 * hotplug code on package removal. 1009 */ 1010 static void package_power_limit_irq_restore(struct rapl_package *rp) 1011 { 1012 u32 l, h; 1013 1014 if (rp->lead_cpu < 0) 1015 return; 1016 1017 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) 1018 return; 1019 1020 /* irq enable state not saved, nothing to restore */ 1021 if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) 1022 return; 1023 1024 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); 1025 1026 if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE) 1027 l |= PACKAGE_THERM_INT_PLN_ENABLE; 1028 else 1029 l &= ~PACKAGE_THERM_INT_PLN_ENABLE; 1030 1031 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 1032 } 1033 1034 static void set_floor_freq_default(struct rapl_domain *rd, bool mode) 1035 { 1036 int i; 1037 1038 /* always enable clamp such that p-state can go below OS requested 1039 * range. power capping priority over guranteed frequency. 1040 */ 1041 rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode); 1042 1043 for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) { 1044 rapl_write_pl_data(rd, i, PL_ENABLE, mode); 1045 rapl_write_pl_data(rd, i, PL_CLAMP, mode); 1046 } 1047 } 1048 1049 static void set_floor_freq_atom(struct rapl_domain *rd, bool enable) 1050 { 1051 static u32 power_ctrl_orig_val; 1052 struct rapl_defaults *defaults = get_defaults(rd->rp); 1053 u32 mdata; 1054 1055 if (!defaults->floor_freq_reg_addr) { 1056 pr_err("Invalid floor frequency config register\n"); 1057 return; 1058 } 1059 1060 if (!power_ctrl_orig_val) 1061 iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ, 1062 defaults->floor_freq_reg_addr, 1063 &power_ctrl_orig_val); 1064 mdata = power_ctrl_orig_val; 1065 if (enable) { 1066 mdata &= ~(0x7f << 8); 1067 mdata |= 1 << 8; 1068 } 1069 iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE, 1070 defaults->floor_freq_reg_addr, mdata); 1071 } 1072 1073 static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value, 1074 bool to_raw) 1075 { 1076 u64 f, y; /* fraction and exp. used for time unit */ 1077 1078 /* 1079 * Special processing based on 2^Y*(1+F/4), refer 1080 * to Intel Software Developer's manual Vol.3B: CH 14.9.3. 1081 */ 1082 if (!to_raw) { 1083 f = (value & 0x60) >> 5; 1084 y = value & 0x1f; 1085 value = (1 << y) * (4 + f) * rd->time_unit / 4; 1086 } else { 1087 if (value < rd->time_unit) 1088 return 0; 1089 1090 do_div(value, rd->time_unit); 1091 y = ilog2(value); 1092 1093 /* 1094 * The target hardware field is 7 bits wide, so return all ones 1095 * if the exponent is too large. 1096 */ 1097 if (y > 0x1f) 1098 return 0x7f; 1099 1100 f = div64_u64(4 * (value - (1ULL << y)), 1ULL << y); 1101 value = (y & 0x1f) | ((f & 0x3) << 5); 1102 } 1103 return value; 1104 } 1105 1106 static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value, 1107 bool to_raw) 1108 { 1109 /* 1110 * Atom time unit encoding is straight forward val * time_unit, 1111 * where time_unit is default to 1 sec. Never 0. 1112 */ 1113 if (!to_raw) 1114 return (value) ? value * rd->time_unit : rd->time_unit; 1115 1116 value = div64_u64(value, rd->time_unit); 1117 1118 return value; 1119 } 1120 1121 /* TPMI Unit register has different layout */ 1122 #define TPMI_POWER_UNIT_OFFSET POWER_UNIT_OFFSET 1123 #define TPMI_POWER_UNIT_MASK POWER_UNIT_MASK 1124 #define TPMI_ENERGY_UNIT_OFFSET 0x06 1125 #define TPMI_ENERGY_UNIT_MASK 0x7C0 1126 #define TPMI_TIME_UNIT_OFFSET 0x0C 1127 #define TPMI_TIME_UNIT_MASK 0xF000 1128 1129 static int rapl_check_unit_tpmi(struct rapl_domain *rd) 1130 { 1131 struct reg_action ra; 1132 u32 value; 1133 1134 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT]; 1135 ra.mask = ~0; 1136 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { 1137 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n", 1138 ra.reg.val, rd->rp->name, rd->name); 1139 return -ENODEV; 1140 } 1141 1142 value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET; 1143 rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value); 1144 1145 value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET; 1146 rd->power_unit = 1000000 / (1 << value); 1147 1148 value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET; 1149 rd->time_unit = 1000000 / (1 << value); 1150 1151 pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n", 1152 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit); 1153 1154 return 0; 1155 } 1156 1157 static const struct rapl_defaults defaults_tpmi = { 1158 .check_unit = rapl_check_unit_tpmi, 1159 /* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */ 1160 .set_floor_freq = set_floor_freq_default, 1161 .compute_time_window = rapl_compute_time_window_core, 1162 }; 1163 1164 static const struct rapl_defaults rapl_defaults_core = { 1165 .floor_freq_reg_addr = 0, 1166 .check_unit = rapl_check_unit_core, 1167 .set_floor_freq = set_floor_freq_default, 1168 .compute_time_window = rapl_compute_time_window_core, 1169 }; 1170 1171 static const struct rapl_defaults rapl_defaults_hsw_server = { 1172 .check_unit = rapl_check_unit_core, 1173 .set_floor_freq = set_floor_freq_default, 1174 .compute_time_window = rapl_compute_time_window_core, 1175 .dram_domain_energy_unit = 15300, 1176 }; 1177 1178 static const struct rapl_defaults rapl_defaults_spr_server = { 1179 .check_unit = rapl_check_unit_core, 1180 .set_floor_freq = set_floor_freq_default, 1181 .compute_time_window = rapl_compute_time_window_core, 1182 .psys_domain_energy_unit = 1000000000, 1183 .spr_psys_bits = true, 1184 }; 1185 1186 static const struct rapl_defaults rapl_defaults_byt = { 1187 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT, 1188 .check_unit = rapl_check_unit_atom, 1189 .set_floor_freq = set_floor_freq_atom, 1190 .compute_time_window = rapl_compute_time_window_atom, 1191 }; 1192 1193 static const struct rapl_defaults rapl_defaults_tng = { 1194 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG, 1195 .check_unit = rapl_check_unit_atom, 1196 .set_floor_freq = set_floor_freq_atom, 1197 .compute_time_window = rapl_compute_time_window_atom, 1198 }; 1199 1200 static const struct rapl_defaults rapl_defaults_ann = { 1201 .floor_freq_reg_addr = 0, 1202 .check_unit = rapl_check_unit_atom, 1203 .set_floor_freq = NULL, 1204 .compute_time_window = rapl_compute_time_window_atom, 1205 }; 1206 1207 static const struct rapl_defaults rapl_defaults_cht = { 1208 .floor_freq_reg_addr = 0, 1209 .check_unit = rapl_check_unit_atom, 1210 .set_floor_freq = NULL, 1211 .compute_time_window = rapl_compute_time_window_atom, 1212 }; 1213 1214 static const struct rapl_defaults rapl_defaults_amd = { 1215 .check_unit = rapl_check_unit_core, 1216 }; 1217 1218 static const struct x86_cpu_id rapl_ids[] __initconst = { 1219 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &rapl_defaults_core), 1220 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &rapl_defaults_core), 1221 1222 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &rapl_defaults_core), 1223 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &rapl_defaults_core), 1224 1225 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &rapl_defaults_core), 1226 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &rapl_defaults_core), 1227 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &rapl_defaults_core), 1228 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &rapl_defaults_hsw_server), 1229 1230 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &rapl_defaults_core), 1231 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &rapl_defaults_core), 1232 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &rapl_defaults_core), 1233 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &rapl_defaults_hsw_server), 1234 1235 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &rapl_defaults_core), 1236 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &rapl_defaults_core), 1237 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &rapl_defaults_hsw_server), 1238 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &rapl_defaults_core), 1239 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &rapl_defaults_core), 1240 X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &rapl_defaults_core), 1241 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &rapl_defaults_core), 1242 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &rapl_defaults_core), 1243 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &rapl_defaults_core), 1244 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &rapl_defaults_hsw_server), 1245 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &rapl_defaults_hsw_server), 1246 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &rapl_defaults_core), 1247 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core), 1248 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core), 1249 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &rapl_defaults_core), 1250 X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core), 1251 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core), 1252 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &rapl_defaults_core), 1253 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &rapl_defaults_core), 1254 X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core), 1255 X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core), 1256 X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &rapl_defaults_core), 1257 X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &rapl_defaults_core), 1258 X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &rapl_defaults_core), 1259 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), 1260 X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &rapl_defaults_spr_server), 1261 X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core), 1262 1263 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), 1264 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &rapl_defaults_cht), 1265 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &rapl_defaults_tng), 1266 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &rapl_defaults_ann), 1267 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &rapl_defaults_core), 1268 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &rapl_defaults_core), 1269 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &rapl_defaults_core), 1270 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &rapl_defaults_core), 1271 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &rapl_defaults_core), 1272 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &rapl_defaults_core), 1273 1274 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &rapl_defaults_hsw_server), 1275 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &rapl_defaults_hsw_server), 1276 1277 X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd), 1278 X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd), 1279 X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd), 1280 {} 1281 }; 1282 MODULE_DEVICE_TABLE(x86cpu, rapl_ids); 1283 1284 /* Read once for all raw primitive data for domains */ 1285 static void rapl_update_domain_data(struct rapl_package *rp) 1286 { 1287 int dmn, prim; 1288 u64 val; 1289 1290 for (dmn = 0; dmn < rp->nr_domains; dmn++) { 1291 pr_debug("update %s domain %s data\n", rp->name, 1292 rp->domains[dmn].name); 1293 /* exclude non-raw primitives */ 1294 for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) { 1295 struct rapl_primitive_info *rpi = get_rpi(rp, prim); 1296 1297 if (!rapl_read_data_raw(&rp->domains[dmn], prim, 1298 rpi->unit, &val)) 1299 rp->domains[dmn].rdd.primitives[prim] = val; 1300 } 1301 } 1302 1303 } 1304 1305 static int rapl_package_register_powercap(struct rapl_package *rp) 1306 { 1307 struct rapl_domain *rd; 1308 struct powercap_zone *power_zone = NULL; 1309 int nr_pl, ret; 1310 1311 /* Update the domain data of the new package */ 1312 rapl_update_domain_data(rp); 1313 1314 /* first we register package domain as the parent zone */ 1315 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1316 if (rd->id == RAPL_DOMAIN_PACKAGE) { 1317 nr_pl = find_nr_power_limit(rd); 1318 pr_debug("register package domain %s\n", rp->name); 1319 power_zone = powercap_register_zone(&rd->power_zone, 1320 rp->priv->control_type, rp->name, 1321 NULL, &zone_ops[rd->id], nr_pl, 1322 &constraint_ops); 1323 if (IS_ERR(power_zone)) { 1324 pr_debug("failed to register power zone %s\n", 1325 rp->name); 1326 return PTR_ERR(power_zone); 1327 } 1328 /* track parent zone in per package/socket data */ 1329 rp->power_zone = power_zone; 1330 /* done, only one package domain per socket */ 1331 break; 1332 } 1333 } 1334 if (!power_zone) { 1335 pr_err("no package domain found, unknown topology!\n"); 1336 return -ENODEV; 1337 } 1338 /* now register domains as children of the socket/package */ 1339 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1340 struct powercap_zone *parent = rp->power_zone; 1341 1342 if (rd->id == RAPL_DOMAIN_PACKAGE) 1343 continue; 1344 if (rd->id == RAPL_DOMAIN_PLATFORM) 1345 parent = NULL; 1346 /* number of power limits per domain varies */ 1347 nr_pl = find_nr_power_limit(rd); 1348 power_zone = powercap_register_zone(&rd->power_zone, 1349 rp->priv->control_type, 1350 rd->name, parent, 1351 &zone_ops[rd->id], nr_pl, 1352 &constraint_ops); 1353 1354 if (IS_ERR(power_zone)) { 1355 pr_debug("failed to register power_zone, %s:%s\n", 1356 rp->name, rd->name); 1357 ret = PTR_ERR(power_zone); 1358 goto err_cleanup; 1359 } 1360 } 1361 return 0; 1362 1363 err_cleanup: 1364 /* 1365 * Clean up previously initialized domains within the package if we 1366 * failed after the first domain setup. 1367 */ 1368 while (--rd >= rp->domains) { 1369 pr_debug("unregister %s domain %s\n", rp->name, rd->name); 1370 powercap_unregister_zone(rp->priv->control_type, 1371 &rd->power_zone); 1372 } 1373 1374 return ret; 1375 } 1376 1377 static int rapl_check_domain(int domain, struct rapl_package *rp) 1378 { 1379 struct reg_action ra; 1380 1381 switch (domain) { 1382 case RAPL_DOMAIN_PACKAGE: 1383 case RAPL_DOMAIN_PP0: 1384 case RAPL_DOMAIN_PP1: 1385 case RAPL_DOMAIN_DRAM: 1386 case RAPL_DOMAIN_PLATFORM: 1387 ra.reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS]; 1388 break; 1389 default: 1390 pr_err("invalid domain id %d\n", domain); 1391 return -EINVAL; 1392 } 1393 /* make sure domain counters are available and contains non-zero 1394 * values, otherwise skip it. 1395 */ 1396 1397 ra.mask = ENERGY_STATUS_MASK; 1398 if (rp->priv->read_raw(get_rid(rp), &ra) || !ra.value) 1399 return -ENODEV; 1400 1401 return 0; 1402 } 1403 1404 /* 1405 * Get per domain energy/power/time unit. 1406 * RAPL Interfaces without per domain unit register will use the package 1407 * scope unit register to set per domain units. 1408 */ 1409 static int rapl_get_domain_unit(struct rapl_domain *rd) 1410 { 1411 struct rapl_defaults *defaults = get_defaults(rd->rp); 1412 int ret; 1413 1414 if (!rd->regs[RAPL_DOMAIN_REG_UNIT].val) { 1415 if (!rd->rp->priv->reg_unit.val) { 1416 pr_err("No valid Unit register found\n"); 1417 return -ENODEV; 1418 } 1419 rd->regs[RAPL_DOMAIN_REG_UNIT] = rd->rp->priv->reg_unit; 1420 } 1421 1422 if (!defaults->check_unit) { 1423 pr_err("missing .check_unit() callback\n"); 1424 return -ENODEV; 1425 } 1426 1427 ret = defaults->check_unit(rd); 1428 if (ret) 1429 return ret; 1430 1431 if (rd->id == RAPL_DOMAIN_DRAM && defaults->dram_domain_energy_unit) 1432 rd->energy_unit = defaults->dram_domain_energy_unit; 1433 if (rd->id == RAPL_DOMAIN_PLATFORM && defaults->psys_domain_energy_unit) 1434 rd->energy_unit = defaults->psys_domain_energy_unit; 1435 return 0; 1436 } 1437 1438 /* 1439 * Check if power limits are available. Two cases when they are not available: 1440 * 1. Locked by BIOS, in this case we still provide read-only access so that 1441 * users can see what limit is set by the BIOS. 1442 * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not 1443 * exist at all. In this case, we do not show the constraints in powercap. 1444 * 1445 * Called after domains are detected and initialized. 1446 */ 1447 static void rapl_detect_powerlimit(struct rapl_domain *rd) 1448 { 1449 u64 val64; 1450 int i; 1451 1452 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) { 1453 if (!rapl_read_pl_data(rd, i, PL_LOCK, false, &val64)) { 1454 if (val64) { 1455 rd->rpl[i].locked = true; 1456 pr_info("%s:%s:%s locked by BIOS\n", 1457 rd->rp->name, rd->name, pl_names[i]); 1458 } 1459 } 1460 1461 if (rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64)) 1462 rd->rpl[i].name = NULL; 1463 } 1464 } 1465 1466 /* Detect active and valid domains for the given CPU, caller must 1467 * ensure the CPU belongs to the targeted package and CPU hotlug is disabled. 1468 */ 1469 static int rapl_detect_domains(struct rapl_package *rp) 1470 { 1471 struct rapl_domain *rd; 1472 int i; 1473 1474 for (i = 0; i < RAPL_DOMAIN_MAX; i++) { 1475 /* use physical package id to read counters */ 1476 if (!rapl_check_domain(i, rp)) { 1477 rp->domain_map |= 1 << i; 1478 pr_info("Found RAPL domain %s\n", rapl_domain_names[i]); 1479 } 1480 } 1481 rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX); 1482 if (!rp->nr_domains) { 1483 pr_debug("no valid rapl domains found in %s\n", rp->name); 1484 return -ENODEV; 1485 } 1486 pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name); 1487 1488 rp->domains = kcalloc(rp->nr_domains, sizeof(struct rapl_domain), 1489 GFP_KERNEL); 1490 if (!rp->domains) 1491 return -ENOMEM; 1492 1493 rapl_init_domains(rp); 1494 1495 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1496 rapl_get_domain_unit(rd); 1497 rapl_detect_powerlimit(rd); 1498 } 1499 1500 return 0; 1501 } 1502 1503 /* called from CPU hotplug notifier, hotplug lock held */ 1504 void rapl_remove_package(struct rapl_package *rp) 1505 { 1506 struct rapl_domain *rd, *rd_package = NULL; 1507 1508 package_power_limit_irq_restore(rp); 1509 1510 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1511 int i; 1512 1513 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) { 1514 rapl_write_pl_data(rd, i, PL_ENABLE, 0); 1515 rapl_write_pl_data(rd, i, PL_CLAMP, 0); 1516 } 1517 1518 if (rd->id == RAPL_DOMAIN_PACKAGE) { 1519 rd_package = rd; 1520 continue; 1521 } 1522 pr_debug("remove package, undo power limit on %s: %s\n", 1523 rp->name, rd->name); 1524 powercap_unregister_zone(rp->priv->control_type, 1525 &rd->power_zone); 1526 } 1527 /* do parent zone last */ 1528 powercap_unregister_zone(rp->priv->control_type, 1529 &rd_package->power_zone); 1530 list_del(&rp->plist); 1531 kfree(rp); 1532 } 1533 EXPORT_SYMBOL_GPL(rapl_remove_package); 1534 1535 /* caller to ensure CPU hotplug lock is held */ 1536 struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu) 1537 { 1538 struct rapl_package *rp; 1539 int uid; 1540 1541 if (id_is_cpu) 1542 uid = topology_logical_die_id(id); 1543 else 1544 uid = id; 1545 1546 list_for_each_entry(rp, &rapl_packages, plist) { 1547 if (rp->id == uid 1548 && rp->priv->control_type == priv->control_type) 1549 return rp; 1550 } 1551 1552 return NULL; 1553 } 1554 EXPORT_SYMBOL_GPL(rapl_find_package_domain); 1555 1556 /* called from CPU hotplug notifier, hotplug lock held */ 1557 struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu) 1558 { 1559 struct rapl_package *rp; 1560 int ret; 1561 1562 rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL); 1563 if (!rp) 1564 return ERR_PTR(-ENOMEM); 1565 1566 if (id_is_cpu) { 1567 rp->id = topology_logical_die_id(id); 1568 rp->lead_cpu = id; 1569 if (topology_max_die_per_package() > 1) 1570 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d", 1571 topology_physical_package_id(id), topology_die_id(id)); 1572 else 1573 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", 1574 topology_physical_package_id(id)); 1575 } else { 1576 rp->id = id; 1577 rp->lead_cpu = -1; 1578 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", id); 1579 } 1580 1581 rp->priv = priv; 1582 ret = rapl_config(rp); 1583 if (ret) 1584 goto err_free_package; 1585 1586 /* check if the package contains valid domains */ 1587 if (rapl_detect_domains(rp)) { 1588 ret = -ENODEV; 1589 goto err_free_package; 1590 } 1591 ret = rapl_package_register_powercap(rp); 1592 if (!ret) { 1593 INIT_LIST_HEAD(&rp->plist); 1594 list_add(&rp->plist, &rapl_packages); 1595 return rp; 1596 } 1597 1598 err_free_package: 1599 kfree(rp->domains); 1600 kfree(rp); 1601 return ERR_PTR(ret); 1602 } 1603 EXPORT_SYMBOL_GPL(rapl_add_package); 1604 1605 static void power_limit_state_save(void) 1606 { 1607 struct rapl_package *rp; 1608 struct rapl_domain *rd; 1609 int ret, i; 1610 1611 cpus_read_lock(); 1612 list_for_each_entry(rp, &rapl_packages, plist) { 1613 if (!rp->power_zone) 1614 continue; 1615 rd = power_zone_to_rapl_domain(rp->power_zone); 1616 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) { 1617 ret = rapl_read_pl_data(rd, i, PL_LIMIT, true, 1618 &rd->rpl[i].last_power_limit); 1619 if (ret) 1620 rd->rpl[i].last_power_limit = 0; 1621 } 1622 } 1623 cpus_read_unlock(); 1624 } 1625 1626 static void power_limit_state_restore(void) 1627 { 1628 struct rapl_package *rp; 1629 struct rapl_domain *rd; 1630 int i; 1631 1632 cpus_read_lock(); 1633 list_for_each_entry(rp, &rapl_packages, plist) { 1634 if (!rp->power_zone) 1635 continue; 1636 rd = power_zone_to_rapl_domain(rp->power_zone); 1637 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) 1638 if (rd->rpl[i].last_power_limit) 1639 rapl_write_pl_data(rd, i, PL_LIMIT, 1640 rd->rpl[i].last_power_limit); 1641 } 1642 cpus_read_unlock(); 1643 } 1644 1645 static int rapl_pm_callback(struct notifier_block *nb, 1646 unsigned long mode, void *_unused) 1647 { 1648 switch (mode) { 1649 case PM_SUSPEND_PREPARE: 1650 power_limit_state_save(); 1651 break; 1652 case PM_POST_SUSPEND: 1653 power_limit_state_restore(); 1654 break; 1655 } 1656 return NOTIFY_OK; 1657 } 1658 1659 static struct notifier_block rapl_pm_notifier = { 1660 .notifier_call = rapl_pm_callback, 1661 }; 1662 1663 static struct platform_device *rapl_msr_platdev; 1664 1665 static int __init rapl_init(void) 1666 { 1667 const struct x86_cpu_id *id; 1668 int ret; 1669 1670 id = x86_match_cpu(rapl_ids); 1671 if (id) { 1672 defaults_msr = (struct rapl_defaults *)id->driver_data; 1673 1674 rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0); 1675 if (!rapl_msr_platdev) 1676 return -ENOMEM; 1677 1678 ret = platform_device_add(rapl_msr_platdev); 1679 if (ret) { 1680 platform_device_put(rapl_msr_platdev); 1681 return ret; 1682 } 1683 } 1684 1685 ret = register_pm_notifier(&rapl_pm_notifier); 1686 if (ret && rapl_msr_platdev) { 1687 platform_device_del(rapl_msr_platdev); 1688 platform_device_put(rapl_msr_platdev); 1689 } 1690 1691 return ret; 1692 } 1693 1694 static void __exit rapl_exit(void) 1695 { 1696 platform_device_unregister(rapl_msr_platdev); 1697 unregister_pm_notifier(&rapl_pm_notifier); 1698 } 1699 1700 fs_initcall(rapl_init); 1701 module_exit(rapl_exit); 1702 1703 MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code"); 1704 MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com>"); 1705 MODULE_LICENSE("GPL v2"); 1706