1 /* 2 * cpu_rmap.c: CPU affinity reverse-map support 3 * Copyright 2011 Solarflare Communications Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published 7 * by the Free Software Foundation, incorporated herein by reference. 8 */ 9 10 #include <linux/cpu_rmap.h> 11 #ifdef CONFIG_GENERIC_HARDIRQS 12 #include <linux/interrupt.h> 13 #endif 14 #include <linux/export.h> 15 16 /* 17 * These functions maintain a mapping from CPUs to some ordered set of 18 * objects with CPU affinities. This can be seen as a reverse-map of 19 * CPU affinity. However, we do not assume that the object affinities 20 * cover all CPUs in the system. For those CPUs not directly covered 21 * by object affinities, we attempt to find a nearest object based on 22 * CPU topology. 23 */ 24 25 /** 26 * alloc_cpu_rmap - allocate CPU affinity reverse-map 27 * @size: Number of objects to be mapped 28 * @flags: Allocation flags e.g. %GFP_KERNEL 29 */ 30 struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) 31 { 32 struct cpu_rmap *rmap; 33 unsigned int cpu; 34 size_t obj_offset; 35 36 /* This is a silly number of objects, and we use u16 indices. */ 37 if (size > 0xffff) 38 return NULL; 39 40 /* Offset of object pointer array from base structure */ 41 obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]), 42 sizeof(void *)); 43 44 rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags); 45 if (!rmap) 46 return NULL; 47 48 kref_init(&rmap->refcount); 49 rmap->obj = (void **)((char *)rmap + obj_offset); 50 51 /* Initially assign CPUs to objects on a rota, since we have 52 * no idea where the objects are. Use infinite distance, so 53 * any object with known distance is preferable. Include the 54 * CPUs that are not present/online, since we definitely want 55 * any newly-hotplugged CPUs to have some object assigned. 56 */ 57 for_each_possible_cpu(cpu) { 58 rmap->near[cpu].index = cpu % size; 59 rmap->near[cpu].dist = CPU_RMAP_DIST_INF; 60 } 61 62 rmap->size = size; 63 return rmap; 64 } 65 EXPORT_SYMBOL(alloc_cpu_rmap); 66 67 /** 68 * cpu_rmap_release - internal reclaiming helper called from kref_put 69 * @ref: kref to struct cpu_rmap 70 */ 71 static void cpu_rmap_release(struct kref *ref) 72 { 73 struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount); 74 kfree(rmap); 75 } 76 77 /** 78 * cpu_rmap_get - internal helper to get new ref on a cpu_rmap 79 * @rmap: reverse-map allocated with alloc_cpu_rmap() 80 */ 81 static inline void cpu_rmap_get(struct cpu_rmap *rmap) 82 { 83 kref_get(&rmap->refcount); 84 } 85 86 /** 87 * cpu_rmap_put - release ref on a cpu_rmap 88 * @rmap: reverse-map allocated with alloc_cpu_rmap() 89 */ 90 int cpu_rmap_put(struct cpu_rmap *rmap) 91 { 92 return kref_put(&rmap->refcount, cpu_rmap_release); 93 } 94 EXPORT_SYMBOL(cpu_rmap_put); 95 96 /* Reevaluate nearest object for given CPU, comparing with the given 97 * neighbours at the given distance. 98 */ 99 static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu, 100 const struct cpumask *mask, u16 dist) 101 { 102 int neigh; 103 104 for_each_cpu(neigh, mask) { 105 if (rmap->near[cpu].dist > dist && 106 rmap->near[neigh].dist <= dist) { 107 rmap->near[cpu].index = rmap->near[neigh].index; 108 rmap->near[cpu].dist = dist; 109 return true; 110 } 111 } 112 return false; 113 } 114 115 #ifdef DEBUG 116 static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) 117 { 118 unsigned index; 119 unsigned int cpu; 120 121 pr_info("cpu_rmap %p, %s:\n", rmap, prefix); 122 123 for_each_possible_cpu(cpu) { 124 index = rmap->near[cpu].index; 125 pr_info("cpu %d -> obj %u (distance %u)\n", 126 cpu, index, rmap->near[cpu].dist); 127 } 128 } 129 #else 130 static inline void 131 debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) 132 { 133 } 134 #endif 135 136 /** 137 * cpu_rmap_add - add object to a rmap 138 * @rmap: CPU rmap allocated with alloc_cpu_rmap() 139 * @obj: Object to add to rmap 140 * 141 * Return index of object. 142 */ 143 int cpu_rmap_add(struct cpu_rmap *rmap, void *obj) 144 { 145 u16 index; 146 147 BUG_ON(rmap->used >= rmap->size); 148 index = rmap->used++; 149 rmap->obj[index] = obj; 150 return index; 151 } 152 EXPORT_SYMBOL(cpu_rmap_add); 153 154 /** 155 * cpu_rmap_update - update CPU rmap following a change of object affinity 156 * @rmap: CPU rmap to update 157 * @index: Index of object whose affinity changed 158 * @affinity: New CPU affinity of object 159 */ 160 int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, 161 const struct cpumask *affinity) 162 { 163 cpumask_var_t update_mask; 164 unsigned int cpu; 165 166 if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL))) 167 return -ENOMEM; 168 169 /* Invalidate distance for all CPUs for which this used to be 170 * the nearest object. Mark those CPUs for update. 171 */ 172 for_each_online_cpu(cpu) { 173 if (rmap->near[cpu].index == index) { 174 rmap->near[cpu].dist = CPU_RMAP_DIST_INF; 175 cpumask_set_cpu(cpu, update_mask); 176 } 177 } 178 179 debug_print_rmap(rmap, "after invalidating old distances"); 180 181 /* Set distance to 0 for all CPUs in the new affinity mask. 182 * Mark all CPUs within their NUMA nodes for update. 183 */ 184 for_each_cpu(cpu, affinity) { 185 rmap->near[cpu].index = index; 186 rmap->near[cpu].dist = 0; 187 cpumask_or(update_mask, update_mask, 188 cpumask_of_node(cpu_to_node(cpu))); 189 } 190 191 debug_print_rmap(rmap, "after updating neighbours"); 192 193 /* Update distances based on topology */ 194 for_each_cpu(cpu, update_mask) { 195 if (cpu_rmap_copy_neigh(rmap, cpu, 196 topology_thread_cpumask(cpu), 1)) 197 continue; 198 if (cpu_rmap_copy_neigh(rmap, cpu, 199 topology_core_cpumask(cpu), 2)) 200 continue; 201 if (cpu_rmap_copy_neigh(rmap, cpu, 202 cpumask_of_node(cpu_to_node(cpu)), 3)) 203 continue; 204 /* We could continue into NUMA node distances, but for now 205 * we give up. 206 */ 207 } 208 209 debug_print_rmap(rmap, "after copying neighbours"); 210 211 free_cpumask_var(update_mask); 212 return 0; 213 } 214 EXPORT_SYMBOL(cpu_rmap_update); 215 216 #ifdef CONFIG_GENERIC_HARDIRQS 217 218 /* Glue between IRQ affinity notifiers and CPU rmaps */ 219 220 struct irq_glue { 221 struct irq_affinity_notify notify; 222 struct cpu_rmap *rmap; 223 u16 index; 224 }; 225 226 /** 227 * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs 228 * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL 229 * 230 * Must be called in process context, before freeing the IRQs. 231 */ 232 void free_irq_cpu_rmap(struct cpu_rmap *rmap) 233 { 234 struct irq_glue *glue; 235 u16 index; 236 237 if (!rmap) 238 return; 239 240 for (index = 0; index < rmap->used; index++) { 241 glue = rmap->obj[index]; 242 irq_set_affinity_notifier(glue->notify.irq, NULL); 243 } 244 245 cpu_rmap_put(rmap); 246 } 247 EXPORT_SYMBOL(free_irq_cpu_rmap); 248 249 /** 250 * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated 251 * @notify: struct irq_affinity_notify passed by irq/manage.c 252 * @mask: cpu mask for new SMP affinity 253 * 254 * This is executed in workqueue context. 255 */ 256 static void 257 irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) 258 { 259 struct irq_glue *glue = 260 container_of(notify, struct irq_glue, notify); 261 int rc; 262 263 rc = cpu_rmap_update(glue->rmap, glue->index, mask); 264 if (rc) 265 pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc); 266 } 267 268 /** 269 * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem 270 * @ref: kref to struct irq_affinity_notify passed by irq/manage.c 271 */ 272 static void irq_cpu_rmap_release(struct kref *ref) 273 { 274 struct irq_glue *glue = 275 container_of(ref, struct irq_glue, notify.kref); 276 277 cpu_rmap_put(glue->rmap); 278 kfree(glue); 279 } 280 281 /** 282 * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map 283 * @rmap: The reverse-map 284 * @irq: The IRQ number 285 * 286 * This adds an IRQ affinity notifier that will update the reverse-map 287 * automatically. 288 * 289 * Must be called in process context, after the IRQ is allocated but 290 * before it is bound with request_irq(). 291 */ 292 int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) 293 { 294 struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL); 295 int rc; 296 297 if (!glue) 298 return -ENOMEM; 299 glue->notify.notify = irq_cpu_rmap_notify; 300 glue->notify.release = irq_cpu_rmap_release; 301 glue->rmap = rmap; 302 cpu_rmap_get(rmap); 303 glue->index = cpu_rmap_add(rmap, glue); 304 rc = irq_set_affinity_notifier(irq, &glue->notify); 305 if (rc) { 306 cpu_rmap_put(glue->rmap); 307 kfree(glue); 308 } 309 return rc; 310 } 311 EXPORT_SYMBOL(irq_cpu_rmap_add); 312 313 #endif /* CONFIG_GENERIC_HARDIRQS */ 314