1 /* 2 * include/linux/topology.h 3 * 4 * Written by: Matthew Dobson, IBM Corporation 5 * 6 * Copyright (C) 2002, IBM Corp. 7 * 8 * All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; either version 2 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 18 * NON INFRINGEMENT. See the GNU General Public License for more 19 * details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 24 * 25 * Send feedback to <colpatch@us.ibm.com> 26 */ 27 #ifndef _LINUX_TOPOLOGY_H 28 #define _LINUX_TOPOLOGY_H 29 30 #include <linux/arch_topology.h> 31 #include <linux/cpumask.h> 32 #include <linux/nodemask.h> 33 #include <linux/bitops.h> 34 #include <linux/mmzone.h> 35 #include <linux/smp.h> 36 #include <linux/percpu.h> 37 #include <asm/topology.h> 38 39 #ifndef nr_cpus_node 40 #define nr_cpus_node(node) cpumask_weight(cpumask_of_node(node)) 41 #endif 42 43 int arch_update_cpu_topology(void); 44 45 /* Conform to ACPI 2.0 SLIT distance definitions */ 46 #define LOCAL_DISTANCE 10 47 #define REMOTE_DISTANCE 20 48 #define DISTANCE_BITS 8 49 #ifndef node_distance 50 #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) 51 #endif 52 #ifndef RECLAIM_DISTANCE 53 /* 54 * If the distance between nodes in a system is larger than RECLAIM_DISTANCE 55 * (in whatever arch specific measurement units returned by node_distance()) 56 * and node_reclaim_mode is enabled then the VM will only call node_reclaim() 57 * on nodes within this distance. 58 */ 59 #define RECLAIM_DISTANCE 30 60 #endif 61 62 /* 63 * The following tunable allows platforms to override the default node 64 * reclaim distance (RECLAIM_DISTANCE) if remote memory accesses are 65 * sufficiently fast that the default value actually hurts 66 * performance. 67 * 68 * AMD EPYC machines use this because even though the 2-hop distance 69 * is 32 (3.2x slower than a local memory access) performance actually 70 * *improves* if allowed to reclaim memory and load balance tasks 71 * between NUMA nodes 2-hops apart. 72 */ 73 extern int __read_mostly node_reclaim_distance; 74 75 #ifndef PENALTY_FOR_NODE_WITH_CPUS 76 #define PENALTY_FOR_NODE_WITH_CPUS (1) 77 #endif 78 79 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID 80 DECLARE_PER_CPU(int, numa_node); 81 82 #ifndef numa_node_id 83 /* Returns the number of the current Node. */ 84 static inline int numa_node_id(void) 85 { 86 return raw_cpu_read(numa_node); 87 } 88 #endif 89 90 #ifndef cpu_to_node 91 static inline int cpu_to_node(int cpu) 92 { 93 return per_cpu(numa_node, cpu); 94 } 95 #endif 96 97 #ifndef set_numa_node 98 static inline void set_numa_node(int node) 99 { 100 this_cpu_write(numa_node, node); 101 } 102 #endif 103 104 #ifndef set_cpu_numa_node 105 static inline void set_cpu_numa_node(int cpu, int node) 106 { 107 per_cpu(numa_node, cpu) = node; 108 } 109 #endif 110 111 #else /* !CONFIG_USE_PERCPU_NUMA_NODE_ID */ 112 113 /* Returns the number of the current Node. */ 114 #ifndef numa_node_id 115 static inline int numa_node_id(void) 116 { 117 return cpu_to_node(raw_smp_processor_id()); 118 } 119 #endif 120 121 #endif /* [!]CONFIG_USE_PERCPU_NUMA_NODE_ID */ 122 123 #ifdef CONFIG_HAVE_MEMORYLESS_NODES 124 125 /* 126 * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. 127 * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined. 128 * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem(). 129 */ 130 DECLARE_PER_CPU(int, _numa_mem_); 131 132 #ifndef set_numa_mem 133 static inline void set_numa_mem(int node) 134 { 135 this_cpu_write(_numa_mem_, node); 136 } 137 #endif 138 139 #ifndef numa_mem_id 140 /* Returns the number of the nearest Node with memory */ 141 static inline int numa_mem_id(void) 142 { 143 return raw_cpu_read(_numa_mem_); 144 } 145 #endif 146 147 #ifndef cpu_to_mem 148 static inline int cpu_to_mem(int cpu) 149 { 150 return per_cpu(_numa_mem_, cpu); 151 } 152 #endif 153 154 #ifndef set_cpu_numa_mem 155 static inline void set_cpu_numa_mem(int cpu, int node) 156 { 157 per_cpu(_numa_mem_, cpu) = node; 158 } 159 #endif 160 161 #else /* !CONFIG_HAVE_MEMORYLESS_NODES */ 162 163 #ifndef numa_mem_id 164 /* Returns the number of the nearest Node with memory */ 165 static inline int numa_mem_id(void) 166 { 167 return numa_node_id(); 168 } 169 #endif 170 171 #ifndef cpu_to_mem 172 static inline int cpu_to_mem(int cpu) 173 { 174 return cpu_to_node(cpu); 175 } 176 #endif 177 178 #endif /* [!]CONFIG_HAVE_MEMORYLESS_NODES */ 179 180 #if defined(topology_die_id) && defined(topology_die_cpumask) 181 #define TOPOLOGY_DIE_SYSFS 182 #endif 183 #if defined(topology_cluster_id) && defined(topology_cluster_cpumask) 184 #define TOPOLOGY_CLUSTER_SYSFS 185 #endif 186 #if defined(topology_book_id) && defined(topology_book_cpumask) 187 #define TOPOLOGY_BOOK_SYSFS 188 #endif 189 #if defined(topology_drawer_id) && defined(topology_drawer_cpumask) 190 #define TOPOLOGY_DRAWER_SYSFS 191 #endif 192 193 #ifndef topology_physical_package_id 194 #define topology_physical_package_id(cpu) ((void)(cpu), -1) 195 #endif 196 #ifndef topology_die_id 197 #define topology_die_id(cpu) ((void)(cpu), -1) 198 #endif 199 #ifndef topology_cluster_id 200 #define topology_cluster_id(cpu) ((void)(cpu), -1) 201 #endif 202 #ifndef topology_core_id 203 #define topology_core_id(cpu) ((void)(cpu), 0) 204 #endif 205 #ifndef topology_book_id 206 #define topology_book_id(cpu) ((void)(cpu), -1) 207 #endif 208 #ifndef topology_drawer_id 209 #define topology_drawer_id(cpu) ((void)(cpu), -1) 210 #endif 211 #ifndef topology_ppin 212 #define topology_ppin(cpu) ((void)(cpu), 0ull) 213 #endif 214 #ifndef topology_sibling_cpumask 215 #define topology_sibling_cpumask(cpu) cpumask_of(cpu) 216 #endif 217 #ifndef topology_core_cpumask 218 #define topology_core_cpumask(cpu) cpumask_of(cpu) 219 #endif 220 #ifndef topology_cluster_cpumask 221 #define topology_cluster_cpumask(cpu) cpumask_of(cpu) 222 #endif 223 #ifndef topology_die_cpumask 224 #define topology_die_cpumask(cpu) cpumask_of(cpu) 225 #endif 226 #ifndef topology_book_cpumask 227 #define topology_book_cpumask(cpu) cpumask_of(cpu) 228 #endif 229 #ifndef topology_drawer_cpumask 230 #define topology_drawer_cpumask(cpu) cpumask_of(cpu) 231 #endif 232 233 /* 234 * Defining cpu_smt_mask as cpumask_of that CPU helps to get 235 * rid of lot of ifdeffery all around the codebase in case of 236 * CONFIG_SCHED_SMT=n. It just means there are no other siblings, which 237 * is what is expected. 238 */ 239 #if defined(CONFIG_SCHED_SMT) 240 # if !defined(cpu_smt_mask) 241 static inline const struct cpumask *cpu_smt_mask(int cpu) 242 { 243 return topology_sibling_cpumask(cpu); 244 } 245 # endif 246 #else /* !CONFIG_SCHED_SMT */ 247 static inline const struct cpumask *cpu_smt_mask(int cpu) 248 { 249 return cpumask_of(cpu); 250 } 251 #endif 252 253 #ifndef topology_is_primary_thread 254 255 static inline bool topology_is_primary_thread(unsigned int cpu) 256 { 257 /* 258 * When disabling SMT, the primary thread of the SMT will remain 259 * enabled/active. Architectures that have a special primary thread 260 * (e.g. x86) need to override this function. Otherwise the first 261 * thread in the SMT can be made the primary thread. 262 * 263 * The sibling cpumask of an offline CPU always contains the CPU 264 * itself on architectures using the implementation of 265 * CONFIG_GENERIC_ARCH_TOPOLOGY for building their topology. 266 * Other architectures not using CONFIG_GENERIC_ARCH_TOPOLOGY for 267 * building their topology have to check whether to use this default 268 * implementation or to override it. 269 */ 270 return cpu == cpumask_first(topology_sibling_cpumask(cpu)); 271 } 272 #define topology_is_primary_thread topology_is_primary_thread 273 274 #endif 275 276 static inline const struct cpumask *cpu_node_mask(int cpu) 277 { 278 return cpumask_of_node(cpu_to_node(cpu)); 279 } 280 281 #ifdef CONFIG_NUMA 282 int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node); 283 extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops); 284 #else 285 static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) 286 { 287 return cpumask_nth_and(cpu, cpus, cpu_online_mask); 288 } 289 290 static inline const struct cpumask * 291 sched_numa_hop_mask(unsigned int node, unsigned int hops) 292 { 293 return ERR_PTR(-EOPNOTSUPP); 294 } 295 #endif /* CONFIG_NUMA */ 296 297 /** 298 * for_each_node_numadist() - iterate over nodes in increasing distance 299 * order, starting from a given node 300 * @node: the iteration variable and the starting node. 301 * @unvisited: a nodemask to keep track of the unvisited nodes. 302 * 303 * This macro iterates over NUMA node IDs in increasing distance from the 304 * starting @node and yields MAX_NUMNODES when all the nodes have been 305 * visited. 306 * 307 * Note that by the time the loop completes, the @unvisited nodemask will 308 * be fully cleared, unless the loop exits early. 309 * 310 * The difference between for_each_node() and for_each_node_numadist() is 311 * that the former allows to iterate over nodes in numerical order, whereas 312 * the latter iterates over nodes in increasing order of distance. 313 * 314 * This complexity of this iterator is O(N^2), where N represents the 315 * number of nodes, as each iteration involves scanning all nodes to 316 * find the one with the shortest distance. 317 * 318 * Requires rcu_lock to be held. 319 */ 320 #define for_each_node_numadist(node, unvisited) \ 321 for (int __start = (node), \ 322 (node) = nearest_node_nodemask((__start), &(unvisited)); \ 323 (node) < MAX_NUMNODES; \ 324 node_clear((node), (unvisited)), \ 325 (node) = nearest_node_nodemask((__start), &(unvisited))) 326 327 /** 328 * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance 329 * from a given node. 330 * @mask: the iteration variable. 331 * @node: the NUMA node to start the search from. 332 * 333 * Requires rcu_lock to be held. 334 * 335 * Yields cpu_online_mask for @node == NUMA_NO_NODE. 336 */ 337 #define for_each_numa_hop_mask(mask, node) \ 338 for (unsigned int __hops = 0; \ 339 mask = (node != NUMA_NO_NODE || __hops) ? \ 340 sched_numa_hop_mask(node, __hops) : \ 341 cpu_online_mask, \ 342 !IS_ERR_OR_NULL(mask); \ 343 __hops++) 344 345 DECLARE_PER_CPU(unsigned long, cpu_scale); 346 347 static inline unsigned long topology_get_cpu_scale(int cpu) 348 { 349 return per_cpu(cpu_scale, cpu); 350 } 351 352 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); 353 354 #endif /* _LINUX_TOPOLOGY_H */ 355