1 /* 2 * itmt.c: Support Intel Turbo Boost Max Technology 3.0 3 * 4 * (C) Copyright 2016 Intel Corporation 5 * Author: Tim Chen <tim.c.chen@linux.intel.com> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 * 12 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT), 13 * the maximum turbo frequencies of some cores in a CPU package may be 14 * higher than for the other cores in the same package. In that case, 15 * better performance can be achieved by making the scheduler prefer 16 * to run tasks on the CPUs with higher max turbo frequencies. 17 * 18 * This file provides functions and data structures for enabling the 19 * scheduler to favor scheduling on cores can be boosted to a higher 20 * frequency under ITMT. 21 */ 22 23 #include <linux/sched.h> 24 #include <linux/cpumask.h> 25 #include <linux/cpuset.h> 26 #include <linux/mutex.h> 27 #include <linux/sched.h> 28 #include <linux/sysctl.h> 29 #include <linux/nodemask.h> 30 31 static DEFINE_MUTEX(itmt_update_mutex); 32 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); 33 34 /* Boolean to track if system has ITMT capabilities */ 35 static bool __read_mostly sched_itmt_capable; 36 37 /* 38 * Boolean to control whether we want to move processes to cpu capable 39 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max 40 * Technology 3.0. 41 * 42 * It can be set via /proc/sys/kernel/sched_itmt_enabled 43 */ 44 unsigned int __read_mostly sysctl_sched_itmt_enabled; 45 46 static int sched_itmt_update_handler(struct ctl_table *table, int write, 47 void __user *buffer, size_t *lenp, 48 loff_t *ppos) 49 { 50 unsigned int old_sysctl; 51 int ret; 52 53 mutex_lock(&itmt_update_mutex); 54 55 if (!sched_itmt_capable) { 56 mutex_unlock(&itmt_update_mutex); 57 return -EINVAL; 58 } 59 60 old_sysctl = sysctl_sched_itmt_enabled; 61 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 62 63 if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) { 64 x86_topology_update = true; 65 rebuild_sched_domains(); 66 } 67 68 mutex_unlock(&itmt_update_mutex); 69 70 return ret; 71 } 72 73 static unsigned int zero; 74 static unsigned int one = 1; 75 static struct ctl_table itmt_kern_table[] = { 76 { 77 .procname = "sched_itmt_enabled", 78 .data = &sysctl_sched_itmt_enabled, 79 .maxlen = sizeof(unsigned int), 80 .mode = 0644, 81 .proc_handler = sched_itmt_update_handler, 82 .extra1 = &zero, 83 .extra2 = &one, 84 }, 85 {} 86 }; 87 88 static struct ctl_table itmt_root_table[] = { 89 { 90 .procname = "kernel", 91 .mode = 0555, 92 .child = itmt_kern_table, 93 }, 94 {} 95 }; 96 97 static struct ctl_table_header *itmt_sysctl_header; 98 99 /** 100 * sched_set_itmt_support() - Indicate platform supports ITMT 101 * 102 * This function is used by the OS to indicate to scheduler that the platform 103 * is capable of supporting the ITMT feature. 104 * 105 * The current scheme has the pstate driver detects if the system 106 * is ITMT capable and call sched_set_itmt_support. 107 * 108 * This must be done only after sched_set_itmt_core_prio 109 * has been called to set the cpus' priorities. 110 * It must not be called with cpu hot plug lock 111 * held as we need to acquire the lock to rebuild sched domains 112 * later. 113 * 114 * Return: 0 on success 115 */ 116 int sched_set_itmt_support(void) 117 { 118 mutex_lock(&itmt_update_mutex); 119 120 if (sched_itmt_capable) { 121 mutex_unlock(&itmt_update_mutex); 122 return 0; 123 } 124 125 itmt_sysctl_header = register_sysctl_table(itmt_root_table); 126 if (!itmt_sysctl_header) { 127 mutex_unlock(&itmt_update_mutex); 128 return -ENOMEM; 129 } 130 131 sched_itmt_capable = true; 132 133 sysctl_sched_itmt_enabled = 1; 134 135 x86_topology_update = true; 136 rebuild_sched_domains(); 137 138 mutex_unlock(&itmt_update_mutex); 139 140 return 0; 141 } 142 143 /** 144 * sched_clear_itmt_support() - Revoke platform's support of ITMT 145 * 146 * This function is used by the OS to indicate that it has 147 * revoked the platform's support of ITMT feature. 148 * 149 * It must not be called with cpu hot plug lock 150 * held as we need to acquire the lock to rebuild sched domains 151 * later. 152 */ 153 void sched_clear_itmt_support(void) 154 { 155 mutex_lock(&itmt_update_mutex); 156 157 if (!sched_itmt_capable) { 158 mutex_unlock(&itmt_update_mutex); 159 return; 160 } 161 sched_itmt_capable = false; 162 163 if (itmt_sysctl_header) { 164 unregister_sysctl_table(itmt_sysctl_header); 165 itmt_sysctl_header = NULL; 166 } 167 168 if (sysctl_sched_itmt_enabled) { 169 /* disable sched_itmt if we are no longer ITMT capable */ 170 sysctl_sched_itmt_enabled = 0; 171 x86_topology_update = true; 172 rebuild_sched_domains(); 173 } 174 175 mutex_unlock(&itmt_update_mutex); 176 } 177 178 int arch_asym_cpu_priority(int cpu) 179 { 180 return per_cpu(sched_core_priority, cpu); 181 } 182 183 /** 184 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT 185 * @prio: Priority of cpu core 186 * @core_cpu: The cpu number associated with the core 187 * 188 * The pstate driver will find out the max boost frequency 189 * and call this function to set a priority proportional 190 * to the max boost frequency. CPU with higher boost 191 * frequency will receive higher priority. 192 * 193 * No need to rebuild sched domain after updating 194 * the CPU priorities. The sched domains have no 195 * dependency on CPU priorities. 196 */ 197 void sched_set_itmt_core_prio(int prio, int core_cpu) 198 { 199 int cpu, i = 1; 200 201 for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { 202 int smt_prio; 203 204 /* 205 * Ensure that the siblings are moved to the end 206 * of the priority chain and only used when 207 * all other high priority cpus are out of capacity. 208 */ 209 smt_prio = prio * smp_num_siblings / i; 210 per_cpu(sched_core_priority, cpu) = smt_prio; 211 i++; 212 } 213 } 214