1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * itmt.c: Support Intel Turbo Boost Max Technology 3.0 4 * 5 * (C) Copyright 2016 Intel Corporation 6 * Author: Tim Chen <tim.c.chen@linux.intel.com> 7 * 8 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT), 9 * the maximum turbo frequencies of some cores in a CPU package may be 10 * higher than for the other cores in the same package. In that case, 11 * better performance can be achieved by making the scheduler prefer 12 * to run tasks on the CPUs with higher max turbo frequencies. 13 * 14 * This file provides functions and data structures for enabling the 15 * scheduler to favor scheduling on cores can be boosted to a higher 16 * frequency under ITMT. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/cpumask.h> 21 #include <linux/cpuset.h> 22 #include <linux/mutex.h> 23 #include <linux/sysctl.h> 24 #include <linux/nodemask.h> 25 26 static DEFINE_MUTEX(itmt_update_mutex); 27 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); 28 29 /* Boolean to track if system has ITMT capabilities */ 30 static bool __read_mostly sched_itmt_capable; 31 32 /* 33 * Boolean to control whether we want to move processes to cpu capable 34 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max 35 * Technology 3.0. 36 * 37 * It can be set via /proc/sys/kernel/sched_itmt_enabled 38 */ 39 unsigned int __read_mostly sysctl_sched_itmt_enabled; 40 41 static int sched_itmt_update_handler(struct ctl_table *table, int write, 42 void *buffer, size_t *lenp, loff_t *ppos) 43 { 44 unsigned int old_sysctl; 45 int ret; 46 47 mutex_lock(&itmt_update_mutex); 48 49 if (!sched_itmt_capable) { 50 mutex_unlock(&itmt_update_mutex); 51 return -EINVAL; 52 } 53 54 old_sysctl = sysctl_sched_itmt_enabled; 55 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 56 57 if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) { 58 x86_topology_update = true; 59 rebuild_sched_domains(); 60 } 61 62 mutex_unlock(&itmt_update_mutex); 63 64 return ret; 65 } 66 67 static struct ctl_table itmt_kern_table[] = { 68 { 69 .procname = "sched_itmt_enabled", 70 .data = &sysctl_sched_itmt_enabled, 71 .maxlen = sizeof(unsigned int), 72 .mode = 0644, 73 .proc_handler = sched_itmt_update_handler, 74 .extra1 = SYSCTL_ZERO, 75 .extra2 = SYSCTL_ONE, 76 }, 77 {} 78 }; 79 80 static struct ctl_table itmt_root_table[] = { 81 { 82 .procname = "kernel", 83 .mode = 0555, 84 .child = itmt_kern_table, 85 }, 86 {} 87 }; 88 89 static struct ctl_table_header *itmt_sysctl_header; 90 91 /** 92 * sched_set_itmt_support() - Indicate platform supports ITMT 93 * 94 * This function is used by the OS to indicate to scheduler that the platform 95 * is capable of supporting the ITMT feature. 96 * 97 * The current scheme has the pstate driver detects if the system 98 * is ITMT capable and call sched_set_itmt_support. 99 * 100 * This must be done only after sched_set_itmt_core_prio 101 * has been called to set the cpus' priorities. 102 * It must not be called with cpu hot plug lock 103 * held as we need to acquire the lock to rebuild sched domains 104 * later. 105 * 106 * Return: 0 on success 107 */ 108 int sched_set_itmt_support(void) 109 { 110 mutex_lock(&itmt_update_mutex); 111 112 if (sched_itmt_capable) { 113 mutex_unlock(&itmt_update_mutex); 114 return 0; 115 } 116 117 itmt_sysctl_header = register_sysctl_table(itmt_root_table); 118 if (!itmt_sysctl_header) { 119 mutex_unlock(&itmt_update_mutex); 120 return -ENOMEM; 121 } 122 123 sched_itmt_capable = true; 124 125 sysctl_sched_itmt_enabled = 1; 126 127 x86_topology_update = true; 128 rebuild_sched_domains(); 129 130 mutex_unlock(&itmt_update_mutex); 131 132 return 0; 133 } 134 135 /** 136 * sched_clear_itmt_support() - Revoke platform's support of ITMT 137 * 138 * This function is used by the OS to indicate that it has 139 * revoked the platform's support of ITMT feature. 140 * 141 * It must not be called with cpu hot plug lock 142 * held as we need to acquire the lock to rebuild sched domains 143 * later. 144 */ 145 void sched_clear_itmt_support(void) 146 { 147 mutex_lock(&itmt_update_mutex); 148 149 if (!sched_itmt_capable) { 150 mutex_unlock(&itmt_update_mutex); 151 return; 152 } 153 sched_itmt_capable = false; 154 155 if (itmt_sysctl_header) { 156 unregister_sysctl_table(itmt_sysctl_header); 157 itmt_sysctl_header = NULL; 158 } 159 160 if (sysctl_sched_itmt_enabled) { 161 /* disable sched_itmt if we are no longer ITMT capable */ 162 sysctl_sched_itmt_enabled = 0; 163 x86_topology_update = true; 164 rebuild_sched_domains(); 165 } 166 167 mutex_unlock(&itmt_update_mutex); 168 } 169 170 int arch_asym_cpu_priority(int cpu) 171 { 172 return per_cpu(sched_core_priority, cpu); 173 } 174 175 /** 176 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT 177 * @prio: Priority of cpu core 178 * @core_cpu: The cpu number associated with the core 179 * 180 * The pstate driver will find out the max boost frequency 181 * and call this function to set a priority proportional 182 * to the max boost frequency. CPU with higher boost 183 * frequency will receive higher priority. 184 * 185 * No need to rebuild sched domain after updating 186 * the CPU priorities. The sched domains have no 187 * dependency on CPU priorities. 188 */ 189 void sched_set_itmt_core_prio(int prio, int core_cpu) 190 { 191 int cpu, i = 1; 192 193 for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { 194 int smt_prio; 195 196 /* 197 * Ensure that the siblings are moved to the end 198 * of the priority chain and only used when 199 * all other high priority cpus are out of capacity. 200 */ 201 smt_prio = prio * smp_num_siblings / i; 202 per_cpu(sched_core_priority, cpu) = smt_prio; 203 i++; 204 } 205 } 206