1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * This file contains the routines for handling the MMU on those 4 * PowerPC implementations where the MMU is not using the hash 5 * table, such as 8xx, 4xx, BookE's etc... 6 * 7 * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org> 8 * IBM Corp. 9 * 10 * Derived from previous arch/powerpc/mm/mmu_context.c 11 * and arch/powerpc/include/asm/mmu_context.h 12 * 13 * TODO: 14 * 15 * - The global context lock will not scale very well 16 * - The maps should be dynamically allocated to allow for processors 17 * that support more PID bits at runtime 18 * - Implement flush_tlb_mm() by making the context stale and picking 19 * a new one 20 * - More aggressively clear stale map bits and maybe find some way to 21 * also clear mm->cpu_vm_mask bits when processes are migrated 22 */ 23 24 #include <linux/kernel.h> 25 #include <linux/mm.h> 26 #include <linux/init.h> 27 #include <linux/spinlock.h> 28 #include <linux/memblock.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/slab.h> 32 33 #include <asm/mmu_context.h> 34 #include <asm/tlbflush.h> 35 #include <asm/smp.h> 36 #include <asm/kup.h> 37 38 #include <mm/mmu_decl.h> 39 40 /* 41 * Room for two PTE table pointers, usually the kernel and current user 42 * pointer to their respective root page table (pgdir). 43 */ 44 void *abatron_pteptrs[2]; 45 46 /* 47 * The MPC8xx has only 16 contexts. We rotate through them on each task switch. 48 * A better way would be to keep track of tasks that own contexts, and implement 49 * an LRU usage. That way very active tasks don't always have to pay the TLB 50 * reload overhead. The kernel pages are mapped shared, so the kernel can run on 51 * behalf of any task that makes a kernel entry. Shared does not mean they are 52 * not protected, just that the ASID comparison is not performed. -- Dan 53 * 54 * The IBM4xx has 256 contexts, so we can just rotate through these as a way of 55 * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison 56 * is disabled, so we can use a TID of zero to represent all kernel pages as 57 * shared among all contexts. -- Dan 58 * 59 * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should 60 * normally never have to steal though the facility is present if needed. 61 * -- BenH 62 */ 63 #define FIRST_CONTEXT 1 64 #if defined(CONFIG_PPC_8xx) 65 #define LAST_CONTEXT 16 66 #elif defined(CONFIG_PPC_47x) 67 #define LAST_CONTEXT 65535 68 #else 69 #define LAST_CONTEXT 255 70 #endif 71 72 static unsigned int next_context, nr_free_contexts; 73 static unsigned long *context_map; 74 static unsigned long *stale_map[NR_CPUS]; 75 static struct mm_struct **context_mm; 76 static DEFINE_RAW_SPINLOCK(context_lock); 77 78 #define CTX_MAP_SIZE \ 79 (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1)) 80 81 82 /* Steal a context from a task that has one at the moment. 83 * 84 * This is used when we are running out of available PID numbers 85 * on the processors. 86 * 87 * This isn't an LRU system, it just frees up each context in 88 * turn (sort-of pseudo-random replacement :). This would be the 89 * place to implement an LRU scheme if anyone was motivated to do it. 90 * -- paulus 91 * 92 * For context stealing, we use a slightly different approach for 93 * SMP and UP. Basically, the UP one is simpler and doesn't use 94 * the stale map as we can just flush the local CPU 95 * -- benh 96 */ 97 static unsigned int steal_context_smp(unsigned int id) 98 { 99 struct mm_struct *mm; 100 unsigned int cpu, max, i; 101 102 max = LAST_CONTEXT - FIRST_CONTEXT; 103 104 /* Attempt to free next_context first and then loop until we manage */ 105 while (max--) { 106 /* Pick up the victim mm */ 107 mm = context_mm[id]; 108 109 /* We have a candidate victim, check if it's active, on SMP 110 * we cannot steal active contexts 111 */ 112 if (mm->context.active) { 113 id++; 114 if (id > LAST_CONTEXT) 115 id = FIRST_CONTEXT; 116 continue; 117 } 118 119 /* Mark this mm has having no context anymore */ 120 mm->context.id = MMU_NO_CONTEXT; 121 122 /* Mark it stale on all CPUs that used this mm. For threaded 123 * implementations, we set it on all threads on each core 124 * represented in the mask. A future implementation will use 125 * a core map instead but this will do for now. 126 */ 127 for_each_cpu(cpu, mm_cpumask(mm)) { 128 for (i = cpu_first_thread_sibling(cpu); 129 i <= cpu_last_thread_sibling(cpu); i++) { 130 if (stale_map[i]) 131 __set_bit(id, stale_map[i]); 132 } 133 cpu = i - 1; 134 } 135 return id; 136 } 137 138 /* This will happen if you have more CPUs than available contexts, 139 * all we can do here is wait a bit and try again 140 */ 141 raw_spin_unlock(&context_lock); 142 cpu_relax(); 143 raw_spin_lock(&context_lock); 144 145 /* This will cause the caller to try again */ 146 return MMU_NO_CONTEXT; 147 } 148 149 static unsigned int steal_all_contexts(void) 150 { 151 struct mm_struct *mm; 152 int cpu = smp_processor_id(); 153 unsigned int id; 154 155 for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { 156 /* Pick up the victim mm */ 157 mm = context_mm[id]; 158 159 /* Mark this mm as having no context anymore */ 160 mm->context.id = MMU_NO_CONTEXT; 161 if (id != FIRST_CONTEXT) { 162 context_mm[id] = NULL; 163 __clear_bit(id, context_map); 164 } 165 if (IS_ENABLED(CONFIG_SMP)) 166 __clear_bit(id, stale_map[cpu]); 167 } 168 169 /* Flush the TLB for all contexts (not to be used on SMP) */ 170 _tlbil_all(); 171 172 nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT; 173 174 return FIRST_CONTEXT; 175 } 176 177 /* Note that this will also be called on SMP if all other CPUs are 178 * offlined, which means that it may be called for cpu != 0. For 179 * this to work, we somewhat assume that CPUs that are onlined 180 * come up with a fully clean TLB (or are cleaned when offlined) 181 */ 182 static unsigned int steal_context_up(unsigned int id) 183 { 184 struct mm_struct *mm; 185 int cpu = smp_processor_id(); 186 187 /* Pick up the victim mm */ 188 mm = context_mm[id]; 189 190 /* Flush the TLB for that context */ 191 local_flush_tlb_mm(mm); 192 193 /* Mark this mm has having no context anymore */ 194 mm->context.id = MMU_NO_CONTEXT; 195 196 /* XXX This clear should ultimately be part of local_flush_tlb_mm */ 197 if (IS_ENABLED(CONFIG_SMP)) 198 __clear_bit(id, stale_map[cpu]); 199 200 return id; 201 } 202 203 static void set_context(unsigned long id, pgd_t *pgd) 204 { 205 if (IS_ENABLED(CONFIG_PPC_8xx)) { 206 s16 offset = (s16)(__pa(swapper_pg_dir)); 207 208 /* 209 * Register M_TWB will contain base address of level 1 table minus the 210 * lower part of the kernel PGDIR base address, so that all accesses to 211 * level 1 table are done relative to lower part of kernel PGDIR base 212 * address. 213 */ 214 mtspr(SPRN_M_TWB, __pa(pgd) - offset); 215 216 /* Update context */ 217 mtspr(SPRN_M_CASID, id - 1); 218 219 /* sync */ 220 mb(); 221 } else if (kuap_is_disabled()) { 222 mtspr(SPRN_PID, id); 223 isync(); 224 } 225 } 226 227 void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, 228 struct task_struct *tsk) 229 { 230 unsigned int id; 231 unsigned int i, cpu = smp_processor_id(); 232 unsigned long *map; 233 234 /* No lockless fast path .. yet */ 235 raw_spin_lock(&context_lock); 236 237 if (IS_ENABLED(CONFIG_SMP)) { 238 /* Mark us active and the previous one not anymore */ 239 next->context.active++; 240 if (prev) { 241 WARN_ON(prev->context.active < 1); 242 prev->context.active--; 243 } 244 } 245 246 again: 247 248 /* If we already have a valid assigned context, skip all that */ 249 id = next->context.id; 250 if (likely(id != MMU_NO_CONTEXT)) 251 goto ctxt_ok; 252 253 /* We really don't have a context, let's try to acquire one */ 254 id = next_context; 255 if (id > LAST_CONTEXT) 256 id = FIRST_CONTEXT; 257 map = context_map; 258 259 /* No more free contexts, let's try to steal one */ 260 if (nr_free_contexts == 0) { 261 if (num_online_cpus() > 1) { 262 id = steal_context_smp(id); 263 if (id == MMU_NO_CONTEXT) 264 goto again; 265 goto stolen; 266 } 267 if (IS_ENABLED(CONFIG_PPC_8xx)) 268 id = steal_all_contexts(); 269 else 270 id = steal_context_up(id); 271 goto stolen; 272 } 273 nr_free_contexts--; 274 275 /* We know there's at least one free context, try to find it */ 276 while (__test_and_set_bit(id, map)) { 277 id = find_next_zero_bit(map, LAST_CONTEXT+1, id); 278 if (id > LAST_CONTEXT) 279 id = FIRST_CONTEXT; 280 } 281 stolen: 282 next_context = id + 1; 283 context_mm[id] = next; 284 next->context.id = id; 285 286 ctxt_ok: 287 288 /* If that context got marked stale on this CPU, then flush the 289 * local TLB for it and unmark it before we use it 290 */ 291 if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) { 292 local_flush_tlb_mm(next); 293 294 /* XXX This clear should ultimately be part of local_flush_tlb_mm */ 295 for (i = cpu_first_thread_sibling(cpu); 296 i <= cpu_last_thread_sibling(cpu); i++) { 297 if (stale_map[i]) 298 __clear_bit(id, stale_map[i]); 299 } 300 } 301 302 /* Flick the MMU and release lock */ 303 if (IS_ENABLED(CONFIG_BDI_SWITCH)) 304 abatron_pteptrs[1] = next->pgd; 305 set_context(id, next->pgd); 306 #if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP) 307 tsk->thread.pid = id; 308 #endif 309 raw_spin_unlock(&context_lock); 310 } 311 312 /* 313 * Set up the context for a new address space. 314 */ 315 int init_new_context(struct task_struct *t, struct mm_struct *mm) 316 { 317 mm->context.id = MMU_NO_CONTEXT; 318 mm->context.active = 0; 319 pte_frag_set(&mm->context, NULL); 320 return 0; 321 } 322 323 /* 324 * We're finished using the context for an address space. 325 */ 326 void destroy_context(struct mm_struct *mm) 327 { 328 unsigned long flags; 329 unsigned int id; 330 331 if (mm->context.id == MMU_NO_CONTEXT) 332 return; 333 334 WARN_ON(mm->context.active != 0); 335 336 raw_spin_lock_irqsave(&context_lock, flags); 337 id = mm->context.id; 338 if (id != MMU_NO_CONTEXT) { 339 __clear_bit(id, context_map); 340 mm->context.id = MMU_NO_CONTEXT; 341 context_mm[id] = NULL; 342 nr_free_contexts++; 343 } 344 raw_spin_unlock_irqrestore(&context_lock, flags); 345 } 346 347 static int mmu_ctx_cpu_prepare(unsigned int cpu) 348 { 349 /* We don't touch CPU 0 map, it's allocated at aboot and kept 350 * around forever 351 */ 352 if (cpu == boot_cpuid) 353 return 0; 354 355 stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); 356 return 0; 357 } 358 359 static int mmu_ctx_cpu_dead(unsigned int cpu) 360 { 361 #ifdef CONFIG_HOTPLUG_CPU 362 if (cpu == boot_cpuid) 363 return 0; 364 365 kfree(stale_map[cpu]); 366 stale_map[cpu] = NULL; 367 368 /* We also clear the cpu_vm_mask bits of CPUs going away */ 369 clear_tasks_mm_cpumask(cpu); 370 #endif 371 return 0; 372 } 373 374 /* 375 * Initialize the context management stuff. 376 */ 377 void __init mmu_context_init(void) 378 { 379 /* Mark init_mm as being active on all possible CPUs since 380 * we'll get called with prev == init_mm the first time 381 * we schedule on a given CPU 382 */ 383 init_mm.context.active = NR_CPUS; 384 385 /* 386 * Allocate the maps used by context management 387 */ 388 context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); 389 if (!context_map) 390 panic("%s: Failed to allocate %zu bytes\n", __func__, 391 CTX_MAP_SIZE); 392 context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 393 SMP_CACHE_BYTES); 394 if (!context_mm) 395 panic("%s: Failed to allocate %zu bytes\n", __func__, 396 sizeof(void *) * (LAST_CONTEXT + 1)); 397 if (IS_ENABLED(CONFIG_SMP)) { 398 stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); 399 if (!stale_map[boot_cpuid]) 400 panic("%s: Failed to allocate %zu bytes\n", __func__, 401 CTX_MAP_SIZE); 402 403 cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, 404 "powerpc/mmu/ctx:prepare", 405 mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); 406 } 407 408 printk(KERN_INFO 409 "MMU: Allocated %zu bytes of context maps for %d contexts\n", 410 2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)), 411 LAST_CONTEXT - FIRST_CONTEXT + 1); 412 413 /* 414 * Some processors have too few contexts to reserve one for 415 * init_mm, and require using context 0 for a normal task. 416 * Other processors reserve the use of context zero for the kernel. 417 * This code assumes FIRST_CONTEXT < 32. 418 */ 419 context_map[0] = (1 << FIRST_CONTEXT) - 1; 420 next_context = FIRST_CONTEXT; 421 nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1; 422 } 423