1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * itmt.c: Support Intel Turbo Boost Max Technology 3.0
4 *
5 * (C) Copyright 2016 Intel Corporation
6 * Author: Tim Chen <tim.c.chen@linux.intel.com>
7 *
8 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
9 * the maximum turbo frequencies of some cores in a CPU package may be
10 * higher than for the other cores in the same package. In that case,
11 * better performance can be achieved by making the scheduler prefer
12 * to run tasks on the CPUs with higher max turbo frequencies.
13 *
14 * This file provides functions and data structures for enabling the
15 * scheduler to favor scheduling on cores can be boosted to a higher
16 * frequency under ITMT.
17 */
18
19 #include <linux/sched.h>
20 #include <linux/cpumask.h>
21 #include <linux/cpuset.h>
22 #include <linux/debugfs.h>
23 #include <linux/mutex.h>
24 #include <linux/sysctl.h>
25 #include <linux/nodemask.h>
26
27 static DEFINE_MUTEX(itmt_update_mutex);
28 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
29
30 /* Boolean to track if system has ITMT capabilities */
31 static bool __read_mostly sched_itmt_capable;
32
33 /*
34 * Boolean to control whether we want to move processes to cpu capable
35 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
36 * Technology 3.0.
37 *
38 * It can be set via /sys/kernel/debug/x86/sched_itmt_enabled
39 */
40 bool __read_mostly sysctl_sched_itmt_enabled;
41
sched_itmt_enabled_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)42 static ssize_t sched_itmt_enabled_write(struct file *filp,
43 const char __user *ubuf,
44 size_t cnt, loff_t *ppos)
45 {
46 ssize_t result;
47 bool orig;
48
49 guard(mutex)(&itmt_update_mutex);
50
51 orig = sysctl_sched_itmt_enabled;
52 result = debugfs_write_file_bool(filp, ubuf, cnt, ppos);
53
54 if (sysctl_sched_itmt_enabled != orig) {
55 x86_topology_update = true;
56 rebuild_sched_domains();
57 }
58
59 return result;
60 }
61
sched_core_priority_show(struct seq_file * s,void * unused)62 static int sched_core_priority_show(struct seq_file *s, void *unused)
63 {
64 int cpu;
65
66 seq_puts(s, "CPU #\tPriority\n");
67 for_each_possible_cpu(cpu)
68 seq_printf(s, "%d\t%d\n", cpu, arch_asym_cpu_priority(cpu));
69
70 return 0;
71 }
72 DEFINE_SHOW_ATTRIBUTE(sched_core_priority);
73
74 static const struct file_operations dfs_sched_itmt_fops = {
75 .read = debugfs_read_file_bool,
76 .write = sched_itmt_enabled_write,
77 .open = simple_open,
78 .llseek = default_llseek,
79 };
80
81 static struct dentry *dfs_sched_itmt;
82 static struct dentry *dfs_sched_core_prio;
83
84 /**
85 * sched_set_itmt_support() - Indicate platform supports ITMT
86 *
87 * This function is used by the OS to indicate to scheduler that the platform
88 * is capable of supporting the ITMT feature.
89 *
90 * The current scheme has the pstate driver detects if the system
91 * is ITMT capable and call sched_set_itmt_support.
92 *
93 * This must be done only after sched_set_itmt_core_prio
94 * has been called to set the cpus' priorities.
95 * It must not be called with cpu hot plug lock
96 * held as we need to acquire the lock to rebuild sched domains
97 * later.
98 *
99 * Return: 0 on success
100 */
sched_set_itmt_support(void)101 int sched_set_itmt_support(void)
102 {
103 guard(mutex)(&itmt_update_mutex);
104
105 if (sched_itmt_capable)
106 return 0;
107
108 dfs_sched_itmt = debugfs_create_file_unsafe("sched_itmt_enabled",
109 0644,
110 arch_debugfs_dir,
111 &sysctl_sched_itmt_enabled,
112 &dfs_sched_itmt_fops);
113 if (IS_ERR_OR_NULL(dfs_sched_itmt)) {
114 dfs_sched_itmt = NULL;
115 return -ENOMEM;
116 }
117
118 dfs_sched_core_prio = debugfs_create_file("sched_core_priority", 0644,
119 arch_debugfs_dir, NULL,
120 &sched_core_priority_fops);
121 if (IS_ERR_OR_NULL(dfs_sched_core_prio)) {
122 dfs_sched_core_prio = NULL;
123 return -ENOMEM;
124 }
125
126 sched_itmt_capable = true;
127
128 sysctl_sched_itmt_enabled = 1;
129
130 x86_topology_update = true;
131 rebuild_sched_domains();
132
133 return 0;
134 }
135
136 /**
137 * sched_clear_itmt_support() - Revoke platform's support of ITMT
138 *
139 * This function is used by the OS to indicate that it has
140 * revoked the platform's support of ITMT feature.
141 *
142 * It must not be called with cpu hot plug lock
143 * held as we need to acquire the lock to rebuild sched domains
144 * later.
145 */
sched_clear_itmt_support(void)146 void sched_clear_itmt_support(void)
147 {
148 guard(mutex)(&itmt_update_mutex);
149
150 if (!sched_itmt_capable)
151 return;
152
153 sched_itmt_capable = false;
154
155 debugfs_remove(dfs_sched_itmt);
156 dfs_sched_itmt = NULL;
157 debugfs_remove(dfs_sched_core_prio);
158 dfs_sched_core_prio = NULL;
159
160 if (sysctl_sched_itmt_enabled) {
161 /* disable sched_itmt if we are no longer ITMT capable */
162 sysctl_sched_itmt_enabled = 0;
163 x86_topology_update = true;
164 rebuild_sched_domains();
165 }
166 }
167
arch_asym_cpu_priority(int cpu)168 int arch_asym_cpu_priority(int cpu)
169 {
170 return per_cpu(sched_core_priority, cpu);
171 }
172
173 /**
174 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
175 * @prio: Priority of @cpu
176 * @cpu: The CPU number
177 *
178 * The pstate driver will find out the max boost frequency
179 * and call this function to set a priority proportional
180 * to the max boost frequency. CPUs with higher boost
181 * frequency will receive higher priority.
182 *
183 * No need to rebuild sched domain after updating
184 * the CPU priorities. The sched domains have no
185 * dependency on CPU priorities.
186 */
sched_set_itmt_core_prio(int prio,int cpu)187 void sched_set_itmt_core_prio(int prio, int cpu)
188 {
189 per_cpu(sched_core_priority, cpu) = prio;
190 }
191