1 /* 2 * itmt.c: Support Intel Turbo Boost Max Technology 3.0 3 * 4 * (C) Copyright 2016 Intel Corporation 5 * Author: Tim Chen <tim.c.chen@linux.intel.com> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 * 12 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT), 13 * the maximum turbo frequencies of some cores in a CPU package may be 14 * higher than for the other cores in the same package. In that case, 15 * better performance can be achieved by making the scheduler prefer 16 * to run tasks on the CPUs with higher max turbo frequencies. 17 * 18 * This file provides functions and data structures for enabling the 19 * scheduler to favor scheduling on cores can be boosted to a higher 20 * frequency under ITMT. 21 */ 22 23 #include <linux/sched.h> 24 #include <linux/cpumask.h> 25 #include <linux/cpuset.h> 26 #include <linux/mutex.h> 27 #include <linux/sched.h> 28 #include <linux/sysctl.h> 29 #include <linux/nodemask.h> 30 31 static DEFINE_MUTEX(itmt_update_mutex); 32 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); 33 34 /* Boolean to track if system has ITMT capabilities */ 35 static bool __read_mostly sched_itmt_capable; 36 37 /* 38 * Boolean to control whether we want to move processes to cpu capable 39 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max 40 * Technology 3.0. 41 * 42 * It can be set via /proc/sys/kernel/sched_itmt_enabled 43 */ 44 unsigned int __read_mostly sysctl_sched_itmt_enabled; 45 46 static int sched_itmt_update_handler(struct ctl_table *table, int write, 47 void __user *buffer, size_t *lenp, 48 loff_t *ppos) 49 { 50 unsigned int old_sysctl; 51 int ret; 52 53 mutex_lock(&itmt_update_mutex); 54 55 if (!sched_itmt_capable) { 56 mutex_unlock(&itmt_update_mutex); 57 return -EINVAL; 58 } 59 60 old_sysctl = sysctl_sched_itmt_enabled; 61 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 62 63 if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) { 64 x86_topology_update = true; 65 rebuild_sched_domains(); 66 } 67 68 mutex_unlock(&itmt_update_mutex); 69 70 return ret; 71 } 72 73 static unsigned int zero; 74 static unsigned int one = 1; 75 static struct ctl_table itmt_kern_table[] = { 76 { 77 .procname = "sched_itmt_enabled", 78 .data = &sysctl_sched_itmt_enabled, 79 .maxlen = sizeof(unsigned int), 80 .mode = 0644, 81 .proc_handler = sched_itmt_update_handler, 82 .extra1 = &zero, 83 .extra2 = &one, 84 }, 85 {} 86 }; 87 88 static struct ctl_table itmt_root_table[] = { 89 { 90 .procname = "kernel", 91 .mode = 0555, 92 .child = itmt_kern_table, 93 }, 94 {} 95 }; 96 97 static struct ctl_table_header *itmt_sysctl_header; 98 99 /** 100 * sched_set_itmt_support() - Indicate platform supports ITMT 101 * 102 * This function is used by the OS to indicate to scheduler that the platform 103 * is capable of supporting the ITMT feature. 104 * 105 * The current scheme has the pstate driver detects if the system 106 * is ITMT capable and call sched_set_itmt_support. 107 * 108 * This must be done only after sched_set_itmt_core_prio 109 * has been called to set the cpus' priorities. 110 * It must not be called with cpu hot plug lock 111 * held as we need to acquire the lock to rebuild sched domains 112 * later. 113 * 114 * Return: 0 on success 115 */ 116 int sched_set_itmt_support(void) 117 { 118 mutex_lock(&itmt_update_mutex); 119 120 if (sched_itmt_capable) { 121 mutex_unlock(&itmt_update_mutex); 122 return 0; 123 } 124 125 itmt_sysctl_header = register_sysctl_table(itmt_root_table); 126 if (!itmt_sysctl_header) { 127 mutex_unlock(&itmt_update_mutex); 128 return -ENOMEM; 129 } 130 131 sched_itmt_capable = true; 132 133 sysctl_sched_itmt_enabled = 1; 134 135 if (sysctl_sched_itmt_enabled) { 136 x86_topology_update = true; 137 rebuild_sched_domains(); 138 } 139 140 mutex_unlock(&itmt_update_mutex); 141 142 return 0; 143 } 144 145 /** 146 * sched_clear_itmt_support() - Revoke platform's support of ITMT 147 * 148 * This function is used by the OS to indicate that it has 149 * revoked the platform's support of ITMT feature. 150 * 151 * It must not be called with cpu hot plug lock 152 * held as we need to acquire the lock to rebuild sched domains 153 * later. 154 */ 155 void sched_clear_itmt_support(void) 156 { 157 mutex_lock(&itmt_update_mutex); 158 159 if (!sched_itmt_capable) { 160 mutex_unlock(&itmt_update_mutex); 161 return; 162 } 163 sched_itmt_capable = false; 164 165 if (itmt_sysctl_header) { 166 unregister_sysctl_table(itmt_sysctl_header); 167 itmt_sysctl_header = NULL; 168 } 169 170 if (sysctl_sched_itmt_enabled) { 171 /* disable sched_itmt if we are no longer ITMT capable */ 172 sysctl_sched_itmt_enabled = 0; 173 x86_topology_update = true; 174 rebuild_sched_domains(); 175 } 176 177 mutex_unlock(&itmt_update_mutex); 178 } 179 180 int arch_asym_cpu_priority(int cpu) 181 { 182 return per_cpu(sched_core_priority, cpu); 183 } 184 185 /** 186 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT 187 * @prio: Priority of cpu core 188 * @core_cpu: The cpu number associated with the core 189 * 190 * The pstate driver will find out the max boost frequency 191 * and call this function to set a priority proportional 192 * to the max boost frequency. CPU with higher boost 193 * frequency will receive higher priority. 194 * 195 * No need to rebuild sched domain after updating 196 * the CPU priorities. The sched domains have no 197 * dependency on CPU priorities. 198 */ 199 void sched_set_itmt_core_prio(int prio, int core_cpu) 200 { 201 int cpu, i = 1; 202 203 for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { 204 int smt_prio; 205 206 /* 207 * Ensure that the siblings are moved to the end 208 * of the priority chain and only used when 209 * all other high priority cpus are out of capacity. 210 */ 211 smt_prio = prio * smp_num_siblings / i; 212 per_cpu(sched_core_priority, cpu) = smt_prio; 213 i++; 214 } 215 } 216