xref: /linux/drivers/thermal/intel/intel_hfi.c (revision 4331f070267ae8f76db1abbc7f4eeed4f06ae817)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Hardware Feedback Interface Driver
4  *
5  * Copyright (c) 2021, Intel Corporation.
6  *
7  * Authors: Aubrey Li <aubrey.li@linux.intel.com>
8  *          Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
9  *
10  *
11  * The Hardware Feedback Interface provides a performance and energy efficiency
12  * capability information for each CPU in the system. Depending on the processor
13  * model, hardware may periodically update these capabilities as a result of
14  * changes in the operating conditions (e.g., power limits or thermal
15  * constraints). On other processor models, there is a single HFI update
16  * at boot.
17  *
18  * This file provides functionality to process HFI updates and relay these
19  * updates to userspace.
20  */
21 
22 #define pr_fmt(fmt)  "intel-hfi: " fmt
23 
24 #include <linux/bitops.h>
25 #include <linux/cpufeature.h>
26 #include <linux/cpumask.h>
27 #include <linux/delay.h>
28 #include <linux/gfp.h>
29 #include <linux/io.h>
30 #include <linux/kernel.h>
31 #include <linux/math.h>
32 #include <linux/mutex.h>
33 #include <linux/percpu-defs.h>
34 #include <linux/printk.h>
35 #include <linux/processor.h>
36 #include <linux/slab.h>
37 #include <linux/spinlock.h>
38 #include <linux/string.h>
39 #include <linux/topology.h>
40 #include <linux/workqueue.h>
41 
42 #include <asm/msr.h>
43 
44 #include "intel_hfi.h"
45 #include "thermal_interrupt.h"
46 
47 #include "../thermal_netlink.h"
48 
49 /* Hardware Feedback Interface MSR configuration bits */
50 #define HW_FEEDBACK_PTR_VALID_BIT		BIT(0)
51 #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT	BIT(0)
52 
53 /* CPUID detection and enumeration definitions for HFI */
54 
55 #define CPUID_HFI_LEAF 6
56 
57 union hfi_capabilities {
58 	struct {
59 		u8	performance:1;
60 		u8	energy_efficiency:1;
61 		u8	__reserved:6;
62 	} split;
63 	u8 bits;
64 };
65 
66 union cpuid6_edx {
67 	struct {
68 		union hfi_capabilities	capabilities;
69 		u32			table_pages:4;
70 		u32			__reserved:4;
71 		s32			index:16;
72 	} split;
73 	u32 full;
74 };
75 
76 /**
77  * struct hfi_cpu_data - HFI capabilities per CPU
78  * @perf_cap:		Performance capability
79  * @ee_cap:		Energy efficiency capability
80  *
81  * Capabilities of a logical processor in the HFI table. These capabilities are
82  * unitless.
83  */
84 struct hfi_cpu_data {
85 	u8	perf_cap;
86 	u8	ee_cap;
87 } __packed;
88 
89 /**
90  * struct hfi_hdr - Header of the HFI table
91  * @perf_updated:	Hardware updated performance capabilities
92  * @ee_updated:		Hardware updated energy efficiency capabilities
93  *
94  * Properties of the data in an HFI table.
95  */
96 struct hfi_hdr {
97 	u8	perf_updated;
98 	u8	ee_updated;
99 } __packed;
100 
101 /**
102  * struct hfi_instance - Representation of an HFI instance (i.e., a table)
103  * @local_table:	Base of the local copy of the HFI table
104  * @timestamp:		Timestamp of the last update of the local table.
105  *			Located at the base of the local table.
106  * @hdr:		Base address of the header of the local table
107  * @data:		Base address of the data of the local table
108  * @cpus:		CPUs represented in this HFI table instance
109  * @hw_table:		Pointer to the HFI table of this instance
110  * @update_work:	Delayed work to process HFI updates
111  * @table_lock:		Lock to protect acceses to the table of this instance
112  * @event_lock:		Lock to process HFI interrupts
113  *
114  * A set of parameters to parse and navigate a specific HFI table.
115  */
116 struct hfi_instance {
117 	union {
118 		void			*local_table;
119 		u64			*timestamp;
120 	};
121 	void			*hdr;
122 	void			*data;
123 	cpumask_var_t		cpus;
124 	void			*hw_table;
125 	struct delayed_work	update_work;
126 	raw_spinlock_t		table_lock;
127 	raw_spinlock_t		event_lock;
128 };
129 
130 /**
131  * struct hfi_features - Supported HFI features
132  * @nr_table_pages:	Size of the HFI table in 4KB pages
133  * @cpu_stride:		Stride size to locate the capability data of a logical
134  *			processor within the table (i.e., row stride)
135  * @hdr_size:		Size of the table header
136  *
137  * Parameters and supported features that are common to all HFI instances
138  */
139 struct hfi_features {
140 	size_t		nr_table_pages;
141 	unsigned int	cpu_stride;
142 	unsigned int	hdr_size;
143 };
144 
145 /**
146  * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
147  * @index:		Row of this CPU in its HFI table
148  * @hfi_instance:	Attributes of the HFI table to which this CPU belongs
149  *
150  * Parameters to link a logical processor to an HFI table and a row within it.
151  */
152 struct hfi_cpu_info {
153 	s16			index;
154 	struct hfi_instance	*hfi_instance;
155 };
156 
157 static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
158 
159 static int max_hfi_instances;
160 static struct hfi_instance *hfi_instances;
161 
162 static struct hfi_features hfi_features;
163 static DEFINE_MUTEX(hfi_instance_lock);
164 
165 static struct workqueue_struct *hfi_updates_wq;
166 #define HFI_UPDATE_INTERVAL		HZ
167 #define HFI_MAX_THERM_NOTIFY_COUNT	16
168 
169 static void get_hfi_caps(struct hfi_instance *hfi_instance,
170 			 struct thermal_genl_cpu_caps *cpu_caps)
171 {
172 	int cpu, i = 0;
173 
174 	raw_spin_lock_irq(&hfi_instance->table_lock);
175 	for_each_cpu(cpu, hfi_instance->cpus) {
176 		struct hfi_cpu_data *caps;
177 		s16 index;
178 
179 		index = per_cpu(hfi_cpu_info, cpu).index;
180 		caps = hfi_instance->data + index * hfi_features.cpu_stride;
181 		cpu_caps[i].cpu = cpu;
182 
183 		/*
184 		 * Scale performance and energy efficiency to
185 		 * the [0, 1023] interval that thermal netlink uses.
186 		 */
187 		cpu_caps[i].performance = caps->perf_cap << 2;
188 		cpu_caps[i].efficiency = caps->ee_cap << 2;
189 
190 		++i;
191 	}
192 	raw_spin_unlock_irq(&hfi_instance->table_lock);
193 }
194 
195 /*
196  * Call update_capabilities() when there are changes in the HFI table.
197  */
198 static void update_capabilities(struct hfi_instance *hfi_instance)
199 {
200 	struct thermal_genl_cpu_caps *cpu_caps;
201 	int i = 0, cpu_count;
202 
203 	/* CPUs may come online/offline while processing an HFI update. */
204 	mutex_lock(&hfi_instance_lock);
205 
206 	cpu_count = cpumask_weight(hfi_instance->cpus);
207 
208 	/* No CPUs to report in this hfi_instance. */
209 	if (!cpu_count)
210 		goto out;
211 
212 	cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL);
213 	if (!cpu_caps)
214 		goto out;
215 
216 	get_hfi_caps(hfi_instance, cpu_caps);
217 
218 	if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT)
219 		goto last_cmd;
220 
221 	/* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */
222 	for (i = 0;
223 	     (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count;
224 	     i += HFI_MAX_THERM_NOTIFY_COUNT)
225 		thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT,
226 						  &cpu_caps[i]);
227 
228 	cpu_count = cpu_count - i;
229 
230 last_cmd:
231 	/* Process the remaining capabilities if any. */
232 	if (cpu_count)
233 		thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]);
234 
235 	kfree(cpu_caps);
236 out:
237 	mutex_unlock(&hfi_instance_lock);
238 }
239 
240 static void hfi_update_work_fn(struct work_struct *work)
241 {
242 	struct hfi_instance *hfi_instance;
243 
244 	hfi_instance = container_of(to_delayed_work(work), struct hfi_instance,
245 				    update_work);
246 
247 	update_capabilities(hfi_instance);
248 }
249 
250 void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
251 {
252 	struct hfi_instance *hfi_instance;
253 	int cpu = smp_processor_id();
254 	struct hfi_cpu_info *info;
255 	u64 new_timestamp, msr, hfi;
256 
257 	if (!pkg_therm_status_msr_val)
258 		return;
259 
260 	info = &per_cpu(hfi_cpu_info, cpu);
261 	if (!info)
262 		return;
263 
264 	/*
265 	 * A CPU is linked to its HFI instance before the thermal vector in the
266 	 * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL
267 	 * when receiving an HFI event.
268 	 */
269 	hfi_instance = info->hfi_instance;
270 	if (unlikely(!hfi_instance)) {
271 		pr_debug("Received event on CPU %d but instance was null", cpu);
272 		return;
273 	}
274 
275 	/*
276 	 * On most systems, all CPUs in the package receive a package-level
277 	 * thermal interrupt when there is an HFI update. It is sufficient to
278 	 * let a single CPU to acknowledge the update and queue work to
279 	 * process it. The remaining CPUs can resume their work.
280 	 */
281 	if (!raw_spin_trylock(&hfi_instance->event_lock))
282 		return;
283 
284 	rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr);
285 	hfi = msr & PACKAGE_THERM_STATUS_HFI_UPDATED;
286 	if (!hfi) {
287 		raw_spin_unlock(&hfi_instance->event_lock);
288 		return;
289 	}
290 
291 	/*
292 	 * Ack duplicate update. Since there is an active HFI
293 	 * status from HW, it must be a new event, not a case
294 	 * where a lagging CPU entered the locked region.
295 	 */
296 	new_timestamp = *(u64 *)hfi_instance->hw_table;
297 	if (*hfi_instance->timestamp == new_timestamp) {
298 		thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
299 		raw_spin_unlock(&hfi_instance->event_lock);
300 		return;
301 	}
302 
303 	raw_spin_lock(&hfi_instance->table_lock);
304 
305 	/*
306 	 * Copy the updated table into our local copy. This includes the new
307 	 * timestamp.
308 	 */
309 	memcpy(hfi_instance->local_table, hfi_instance->hw_table,
310 	       hfi_features.nr_table_pages << PAGE_SHIFT);
311 
312 	/*
313 	 * Let hardware know that we are done reading the HFI table and it is
314 	 * free to update it again.
315 	 */
316 	thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
317 
318 	raw_spin_unlock(&hfi_instance->table_lock);
319 	raw_spin_unlock(&hfi_instance->event_lock);
320 
321 	queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
322 			   HFI_UPDATE_INTERVAL);
323 }
324 
325 static void init_hfi_cpu_index(struct hfi_cpu_info *info)
326 {
327 	union cpuid6_edx edx;
328 
329 	/* Do not re-read @cpu's index if it has already been initialized. */
330 	if (info->index > -1)
331 		return;
332 
333 	edx.full = cpuid_edx(CPUID_HFI_LEAF);
334 	info->index = edx.split.index;
335 }
336 
337 /*
338  * The format of the HFI table depends on the number of capabilities that the
339  * hardware supports. Keep a data structure to navigate the table.
340  */
341 static void init_hfi_instance(struct hfi_instance *hfi_instance)
342 {
343 	/* The HFI header is below the time-stamp. */
344 	hfi_instance->hdr = hfi_instance->local_table +
345 			    sizeof(*hfi_instance->timestamp);
346 
347 	/* The HFI data starts below the header. */
348 	hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
349 }
350 
351 /* Caller must hold hfi_instance_lock. */
352 static void hfi_enable(void)
353 {
354 	u64 msr_val;
355 
356 	rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
357 	msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
358 	wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
359 }
360 
361 static void hfi_set_hw_table(struct hfi_instance *hfi_instance)
362 {
363 	phys_addr_t hw_table_pa;
364 	u64 msr_val;
365 
366 	hw_table_pa = virt_to_phys(hfi_instance->hw_table);
367 	msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
368 	wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
369 }
370 
371 /* Caller must hold hfi_instance_lock. */
372 static void hfi_disable(void)
373 {
374 	u64 msr_val;
375 	int i;
376 
377 	rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
378 	msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
379 	wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
380 
381 	/*
382 	 * Wait for hardware to acknowledge the disabling of HFI. Some
383 	 * processors may not do it. Wait for ~2ms. This is a reasonable
384 	 * time for hardware to complete any pending actions on the HFI
385 	 * memory.
386 	 */
387 	for (i = 0; i < 2000; i++) {
388 		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
389 		if (msr_val & PACKAGE_THERM_STATUS_HFI_UPDATED)
390 			break;
391 
392 		udelay(1);
393 		cpu_relax();
394 	}
395 }
396 
397 /**
398  * intel_hfi_online() - Enable HFI on @cpu
399  * @cpu:	CPU in which the HFI will be enabled
400  *
401  * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
402  * level. The first CPU in the die/package to come online does the full HFI
403  * initialization. Subsequent CPUs will just link themselves to the HFI
404  * instance of their die/package.
405  *
406  * This function is called before enabling the thermal vector in the local APIC
407  * in order to ensure that @cpu has an associated HFI instance when it receives
408  * an HFI event.
409  */
410 void intel_hfi_online(unsigned int cpu)
411 {
412 	struct hfi_instance *hfi_instance;
413 	struct hfi_cpu_info *info;
414 	u16 die_id;
415 
416 	/* Nothing to do if hfi_instances are missing. */
417 	if (!hfi_instances)
418 		return;
419 
420 	/*
421 	 * Link @cpu to the HFI instance of its package/die. It does not
422 	 * matter whether the instance has been initialized.
423 	 */
424 	info = &per_cpu(hfi_cpu_info, cpu);
425 	die_id = topology_logical_die_id(cpu);
426 	hfi_instance = info->hfi_instance;
427 	if (!hfi_instance) {
428 		if (die_id >= max_hfi_instances)
429 			return;
430 
431 		hfi_instance = &hfi_instances[die_id];
432 		info->hfi_instance = hfi_instance;
433 	}
434 
435 	init_hfi_cpu_index(info);
436 
437 	/*
438 	 * Now check if the HFI instance of the package/die of @cpu has been
439 	 * initialized (by checking its header). In such case, all we have to
440 	 * do is to add @cpu to this instance's cpumask and enable the instance
441 	 * if needed.
442 	 */
443 	mutex_lock(&hfi_instance_lock);
444 	if (hfi_instance->hdr)
445 		goto enable;
446 
447 	/*
448 	 * Hardware is programmed with the physical address of the first page
449 	 * frame of the table. Hence, the allocated memory must be page-aligned.
450 	 *
451 	 * Some processors do not forget the initial address of the HFI table
452 	 * even after having been reprogrammed. Keep using the same pages. Do
453 	 * not free them.
454 	 */
455 	hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
456 						   GFP_KERNEL | __GFP_ZERO);
457 	if (!hfi_instance->hw_table)
458 		goto unlock;
459 
460 	/*
461 	 * Allocate memory to keep a local copy of the table that
462 	 * hardware generates.
463 	 */
464 	hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
465 					    GFP_KERNEL);
466 	if (!hfi_instance->local_table)
467 		goto free_hw_table;
468 
469 	init_hfi_instance(hfi_instance);
470 
471 	INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
472 	raw_spin_lock_init(&hfi_instance->table_lock);
473 	raw_spin_lock_init(&hfi_instance->event_lock);
474 
475 enable:
476 	cpumask_set_cpu(cpu, hfi_instance->cpus);
477 
478 	/* Enable this HFI instance if this is its first online CPU. */
479 	if (cpumask_weight(hfi_instance->cpus) == 1) {
480 		hfi_set_hw_table(hfi_instance);
481 		hfi_enable();
482 	}
483 
484 unlock:
485 	mutex_unlock(&hfi_instance_lock);
486 	return;
487 
488 free_hw_table:
489 	free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
490 	goto unlock;
491 }
492 
493 /**
494  * intel_hfi_offline() - Disable HFI on @cpu
495  * @cpu:	CPU in which the HFI will be disabled
496  *
497  * Remove @cpu from those covered by its HFI instance.
498  *
499  * On some processors, hardware remembers previous programming settings even
500  * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
501  * die/package of @cpu are offline. See note in intel_hfi_online().
502  */
503 void intel_hfi_offline(unsigned int cpu)
504 {
505 	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
506 	struct hfi_instance *hfi_instance;
507 
508 	/*
509 	 * Check if @cpu as an associated, initialized (i.e., with a non-NULL
510 	 * header). Also, HFI instances are only initialized if X86_FEATURE_HFI
511 	 * is present.
512 	 */
513 	hfi_instance = info->hfi_instance;
514 	if (!hfi_instance)
515 		return;
516 
517 	if (!hfi_instance->hdr)
518 		return;
519 
520 	mutex_lock(&hfi_instance_lock);
521 	cpumask_clear_cpu(cpu, hfi_instance->cpus);
522 
523 	if (!cpumask_weight(hfi_instance->cpus))
524 		hfi_disable();
525 
526 	mutex_unlock(&hfi_instance_lock);
527 }
528 
529 static __init int hfi_parse_features(void)
530 {
531 	unsigned int nr_capabilities;
532 	union cpuid6_edx edx;
533 
534 	if (!boot_cpu_has(X86_FEATURE_HFI))
535 		return -ENODEV;
536 
537 	/*
538 	 * If we are here we know that CPUID_HFI_LEAF exists. Parse the
539 	 * supported capabilities and the size of the HFI table.
540 	 */
541 	edx.full = cpuid_edx(CPUID_HFI_LEAF);
542 
543 	if (!edx.split.capabilities.split.performance) {
544 		pr_debug("Performance reporting not supported! Not using HFI\n");
545 		return -ENODEV;
546 	}
547 
548 	/*
549 	 * The number of supported capabilities determines the number of
550 	 * columns in the HFI table. Exclude the reserved bits.
551 	 */
552 	edx.split.capabilities.split.__reserved = 0;
553 	nr_capabilities = hweight8(edx.split.capabilities.bits);
554 
555 	/* The number of 4KB pages required by the table */
556 	hfi_features.nr_table_pages = edx.split.table_pages + 1;
557 
558 	/*
559 	 * The header contains change indications for each supported feature.
560 	 * The size of the table header is rounded up to be a multiple of 8
561 	 * bytes.
562 	 */
563 	hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
564 
565 	/*
566 	 * Data of each logical processor is also rounded up to be a multiple
567 	 * of 8 bytes.
568 	 */
569 	hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
570 
571 	return 0;
572 }
573 
574 void __init intel_hfi_init(void)
575 {
576 	struct hfi_instance *hfi_instance;
577 	int i, j;
578 
579 	if (hfi_parse_features())
580 		return;
581 
582 	/* There is one HFI instance per die/package. */
583 	max_hfi_instances = topology_max_packages() *
584 			    topology_max_die_per_package();
585 
586 	/*
587 	 * This allocation may fail. CPU hotplug callbacks must check
588 	 * for a null pointer.
589 	 */
590 	hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances),
591 				GFP_KERNEL);
592 	if (!hfi_instances)
593 		return;
594 
595 	for (i = 0; i < max_hfi_instances; i++) {
596 		hfi_instance = &hfi_instances[i];
597 		if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL))
598 			goto err_nomem;
599 	}
600 
601 	hfi_updates_wq = create_singlethread_workqueue("hfi-updates");
602 	if (!hfi_updates_wq)
603 		goto err_nomem;
604 
605 	return;
606 
607 err_nomem:
608 	for (j = 0; j < i; ++j) {
609 		hfi_instance = &hfi_instances[j];
610 		free_cpumask_var(hfi_instance->cpus);
611 	}
612 
613 	kfree(hfi_instances);
614 	hfi_instances = NULL;
615 }
616