xref: /linux/arch/x86/kernel/itmt.c (revision 6b8a024d25ebf7535eb4a3e926309aa693cfe1bd)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * itmt.c: Support Intel Turbo Boost Max Technology 3.0
4  *
5  * (C) Copyright 2016 Intel Corporation
6  * Author: Tim Chen <tim.c.chen@linux.intel.com>
7  *
8  * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
9  * the maximum turbo frequencies of some cores in a CPU package may be
10  * higher than for the other cores in the same package.  In that case,
11  * better performance can be achieved by making the scheduler prefer
12  * to run tasks on the CPUs with higher max turbo frequencies.
13  *
14  * This file provides functions and data structures for enabling the
15  * scheduler to favor scheduling on cores can be boosted to a higher
16  * frequency under ITMT.
17  */
18 
19 #include <linux/sched.h>
20 #include <linux/cpumask.h>
21 #include <linux/cpuset.h>
22 #include <linux/mutex.h>
23 #include <linux/sysctl.h>
24 #include <linux/nodemask.h>
25 
26 static DEFINE_MUTEX(itmt_update_mutex);
27 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
28 
29 /* Boolean to track if system has ITMT capabilities */
30 static bool __read_mostly sched_itmt_capable;
31 
32 /*
33  * Boolean to control whether we want to move processes to cpu capable
34  * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
35  * Technology 3.0.
36  *
37  * It can be set via /proc/sys/kernel/sched_itmt_enabled
38  */
39 unsigned int __read_mostly sysctl_sched_itmt_enabled;
40 
41 static int sched_itmt_update_handler(const struct ctl_table *table, int write,
42 				     void *buffer, size_t *lenp, loff_t *ppos)
43 {
44 	unsigned int old_sysctl;
45 	int ret;
46 
47 	mutex_lock(&itmt_update_mutex);
48 
49 	if (!sched_itmt_capable) {
50 		mutex_unlock(&itmt_update_mutex);
51 		return -EINVAL;
52 	}
53 
54 	old_sysctl = sysctl_sched_itmt_enabled;
55 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
56 
57 	if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
58 		x86_topology_update = true;
59 		rebuild_sched_domains();
60 	}
61 
62 	mutex_unlock(&itmt_update_mutex);
63 
64 	return ret;
65 }
66 
67 static struct ctl_table itmt_kern_table[] = {
68 	{
69 		.procname	= "sched_itmt_enabled",
70 		.data		= &sysctl_sched_itmt_enabled,
71 		.maxlen		= sizeof(unsigned int),
72 		.mode		= 0644,
73 		.proc_handler	= sched_itmt_update_handler,
74 		.extra1		= SYSCTL_ZERO,
75 		.extra2		= SYSCTL_ONE,
76 	},
77 };
78 
79 static struct ctl_table_header *itmt_sysctl_header;
80 
81 /**
82  * sched_set_itmt_support() - Indicate platform supports ITMT
83  *
84  * This function is used by the OS to indicate to scheduler that the platform
85  * is capable of supporting the ITMT feature.
86  *
87  * The current scheme has the pstate driver detects if the system
88  * is ITMT capable and call sched_set_itmt_support.
89  *
90  * This must be done only after sched_set_itmt_core_prio
91  * has been called to set the cpus' priorities.
92  * It must not be called with cpu hot plug lock
93  * held as we need to acquire the lock to rebuild sched domains
94  * later.
95  *
96  * Return: 0 on success
97  */
98 int sched_set_itmt_support(void)
99 {
100 	mutex_lock(&itmt_update_mutex);
101 
102 	if (sched_itmt_capable) {
103 		mutex_unlock(&itmt_update_mutex);
104 		return 0;
105 	}
106 
107 	itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table);
108 	if (!itmt_sysctl_header) {
109 		mutex_unlock(&itmt_update_mutex);
110 		return -ENOMEM;
111 	}
112 
113 	sched_itmt_capable = true;
114 
115 	sysctl_sched_itmt_enabled = 1;
116 
117 	x86_topology_update = true;
118 	rebuild_sched_domains();
119 
120 	mutex_unlock(&itmt_update_mutex);
121 
122 	return 0;
123 }
124 
125 /**
126  * sched_clear_itmt_support() - Revoke platform's support of ITMT
127  *
128  * This function is used by the OS to indicate that it has
129  * revoked the platform's support of ITMT feature.
130  *
131  * It must not be called with cpu hot plug lock
132  * held as we need to acquire the lock to rebuild sched domains
133  * later.
134  */
135 void sched_clear_itmt_support(void)
136 {
137 	mutex_lock(&itmt_update_mutex);
138 
139 	if (!sched_itmt_capable) {
140 		mutex_unlock(&itmt_update_mutex);
141 		return;
142 	}
143 	sched_itmt_capable = false;
144 
145 	if (itmt_sysctl_header) {
146 		unregister_sysctl_table(itmt_sysctl_header);
147 		itmt_sysctl_header = NULL;
148 	}
149 
150 	if (sysctl_sched_itmt_enabled) {
151 		/* disable sched_itmt if we are no longer ITMT capable */
152 		sysctl_sched_itmt_enabled = 0;
153 		x86_topology_update = true;
154 		rebuild_sched_domains();
155 	}
156 
157 	mutex_unlock(&itmt_update_mutex);
158 }
159 
160 int arch_asym_cpu_priority(int cpu)
161 {
162 	return per_cpu(sched_core_priority, cpu);
163 }
164 
165 /**
166  * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
167  * @prio:	Priority of @cpu
168  * @cpu:	The CPU number
169  *
170  * The pstate driver will find out the max boost frequency
171  * and call this function to set a priority proportional
172  * to the max boost frequency. CPUs with higher boost
173  * frequency will receive higher priority.
174  *
175  * No need to rebuild sched domain after updating
176  * the CPU priorities. The sched domains have no
177  * dependency on CPU priorities.
178  */
179 void sched_set_itmt_core_prio(int prio, int cpu)
180 {
181 	per_cpu(sched_core_priority, cpu) = prio;
182 }
183