Lines Matching +full:cpu +full:- +full:ns

1 // SPDX-License-Identifier: GPL-2.0-only
9 #include <linux/cpu.h>
18 #include <asm/nospec-branch.h>
39 * TLB flushing, formerly SMP-only
70 * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
71 * that came by on this CPU, allowing cheaper switch_mm between processes on
72 * this CPU.
77 * ASID - [0, TLB_NR_DYN_ASIDS-1]
78 * the canonical identifier for an mm, dynamically allocated on each CPU
79 * [TLB_NR_DYN_ASIDS, MAX_ASID_AVAILABLE-1]
82 * kPCID - [1, MAX_ASID_AVAILABLE]
86 * uPCID - [2048 + 1, 2048 + MAX_ASID_AVAILABLE]
103 #define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
106 * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
107 * for them being zero-based. Another -1 is because PCID 0 is reserved for
108 * use by non-PCID-aware users.
110 #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
133 * The dynamically-assigned ASIDs that get passed in are small in kern_pcid()
137 * If PCID is on, ASID-aware code paths put the ASID+1 into the in kern_pcid()
139 * situation in which PCID-unaware code saves CR3, loads some other in kern_pcid()
142 * that any bugs involving loading a PCID-enabled CR3 with in kern_pcid()
179 * boot because all CPU's the have same capabilities: in build_cr3_noflush()
226 struct new_asid ns; in choose_new_asid() local
230 ns.asid = 0; in choose_new_asid()
231 ns.need_flush = 1; in choose_new_asid()
232 return ns; in choose_new_asid()
243 ns.asid = global_asid; in choose_new_asid()
244 ns.need_flush = 0; in choose_new_asid()
245 return ns; in choose_new_asid()
254 next->context.ctx_id) in choose_new_asid()
257 ns.asid = asid; in choose_new_asid()
258 ns.need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) < next_tlb_gen); in choose_new_asid()
259 return ns; in choose_new_asid()
263 * We don't currently own an ASID slot on this CPU. in choose_new_asid()
266 ns.asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1; in choose_new_asid()
267 if (ns.asid >= TLB_NR_DYN_ASIDS) { in choose_new_asid()
268 ns.asid = 0; in choose_new_asid()
271 ns.need_flush = true; in choose_new_asid()
273 return ns; in choose_new_asid()
277 * Global ASIDs are allocated for multi-threaded processes that are
279 * processes the same PCID on every CPU, for use with hardware-assisted
288 static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
293 * freed global ASIDs are safe to re-use.
305 * The TLB flush above makes it safe to re-use the previously in reset_global_asid_space()
323 if (last_global_asid >= MAX_ASID_AVAILABLE - 1) in allocate_global_asid()
338 global_asid_available--; in allocate_global_asid()
350 int cpu; in mm_active_cpus_exceeds() local
357 for_each_cpu(cpu, mm_cpumask(mm)) { in mm_active_cpus_exceeds()
359 if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm) in mm_active_cpus_exceeds()
362 if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) in mm_active_cpus_exceeds()
413 /* The global ASID can be re-used only after flush at wrap-around. */ in mm_free_global_asid()
415 __set_bit(mm->context.global_asid, global_asid_freed); in mm_free_global_asid()
417 mm->context.global_asid = 0; in mm_free_global_asid()
423 * Is the mm transitioning from a CPU-local ASID to a global ASID?
451 if ((current->pid & 0x1f) != (jiffies & 0x1f)) in consider_global_asid()
464 struct mm_struct *mm = info->mm; in finish_asid_transition()
466 int cpu; in finish_asid_transition() local
471 for_each_cpu(cpu, mm_cpumask(mm)) { in finish_asid_transition()
473 * The remote CPU is context switching. Wait for that to in finish_asid_transition()
477 while (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) == LOADED_MM_SWITCHING) in finish_asid_transition()
480 if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm) in finish_asid_transition()
484 * If at least one CPU is not using the global ASID yet, in finish_asid_transition()
488 * This can race with the CPU switching to another task; in finish_asid_transition()
491 if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm_asid, cpu)) != bc_asid) { in finish_asid_transition()
492 flush_tlb_multi(mm_cpumask(info->mm), info); in finish_asid_transition()
503 bool pmd = info->stride_shift == PMD_SHIFT; in broadcast_tlb_flush()
504 unsigned long asid = mm_global_asid(info->mm); in broadcast_tlb_flush()
505 unsigned long addr = info->start; in broadcast_tlb_flush()
512 if (info->end == TLB_FLUSH_ALL) { in broadcast_tlb_flush()
520 if (info->stride_shift <= PMD_SHIFT) { in broadcast_tlb_flush()
521 nr = (info->end - addr) >> info->stride_shift; in broadcast_tlb_flush()
529 addr += nr << info->stride_shift; in broadcast_tlb_flush()
530 } while (addr < info->end); in broadcast_tlb_flush()
617 * Invoked from return to user/guest by a task that opted-in to L1D
619 * affinity settings or CPU hotplug. This is part of the paranoid L1D flush
645 clear_ti_thread_flag(&next->thread_info, TIF_SPEC_L1D_FLUSH); in l1d_flush_evaluate()
646 next->l1d_flush_kill.func = l1d_flush_force_sigbus; in l1d_flush_evaluate()
647 task_work_add(next, &next->l1d_flush_kill, TWA_RESUME); in l1d_flush_evaluate()
662 return (unsigned long)next->mm | spec_bits; in mm_mangle_tif_spec_bits()
669 if (!next || !next->mm) in cond_mitigation()
676 * Avoid user->user BTB/RSB poisoning by flushing them when switching in cond_mitigation()
677 * between processes. This stops one process from doing Spectre-v2 in cond_mitigation()
682 * same process. Using the mm pointer instead of mm->context.ctx_id in cond_mitigation()
705 * - the same user space task is scheduled out and later in cond_mitigation()
709 * - a user space task belonging to the same process is in cond_mitigation()
712 * - a user space task belonging to the same process is in cond_mitigation()
732 * last on this CPU. in cond_mitigation()
734 if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) != (unsigned long)next->mm) in cond_mitigation()
756 atomic_read(&mm->context.perf_rdpmc_allowed))) { in cr4_update_pce_mm()
780 * 'current->active_mm' up to date.
788 unsigned cpu = smp_processor_id(); in switch_mm_irqs_off() local
790 struct new_asid ns; in switch_mm_irqs_off() local
808 if (WARN_ON_ONCE(__read_cr3() != build_cr3(prev->pgd, prev_asid, in switch_mm_irqs_off()
817 * Architecturally, the CPU could prefetch something in switch_mm_irqs_off()
830 * core serialization before returning to user-space, after in switch_mm_irqs_off()
831 * storing to rq->curr, when changing mm. This is because in switch_mm_irqs_off()
833 * to make them issue memory barriers. However, if another CPU in switch_mm_irqs_off()
835 * membarrier(), it can cause that CPU not to receive an IPI in switch_mm_irqs_off()
844 next->context.ctx_id); in switch_mm_irqs_off()
852 * Even in lazy TLB mode, the CPU should stay set in the in switch_mm_irqs_off()
858 !cpumask_test_cpu(cpu, mm_cpumask(next)))) in switch_mm_irqs_off()
859 cpumask_set_cpu(cpu, mm_cpumask(next)); in switch_mm_irqs_off()
863 next_tlb_gen = atomic64_read(&next->context.tlb_gen); in switch_mm_irqs_off()
864 ns = choose_new_asid(next, next_tlb_gen); in switch_mm_irqs_off()
876 * If the CPU is not in lazy TLB mode, we are just switching in switch_mm_irqs_off()
890 next_tlb_gen = atomic64_read(&next->context.tlb_gen); in switch_mm_irqs_off()
899 ns.asid = prev_asid; in switch_mm_irqs_off()
900 ns.need_flush = true; in switch_mm_irqs_off()
916 * Make sure this CPU is set in mm_cpumask() such that we'll in switch_mm_irqs_off()
928 * loaded_mm load can happen in mative_flush_tlb_multi() -> in switch_mm_irqs_off()
934 if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next))) in switch_mm_irqs_off()
935 cpumask_set_cpu(cpu, mm_cpumask(next)); in switch_mm_irqs_off()
939 next_tlb_gen = atomic64_read(&next->context.tlb_gen); in switch_mm_irqs_off()
941 ns = choose_new_asid(next, next_tlb_gen); in switch_mm_irqs_off()
946 if (ns.need_flush) { in switch_mm_irqs_off()
947 VM_WARN_ON_ONCE(is_global_asid(ns.asid)); in switch_mm_irqs_off()
948 this_cpu_write(cpu_tlbstate.ctxs[ns.asid].ctx_id, next->context.ctx_id); in switch_mm_irqs_off()
949 this_cpu_write(cpu_tlbstate.ctxs[ns.asid].tlb_gen, next_tlb_gen); in switch_mm_irqs_off()
950 load_new_mm_cr3(next->pgd, ns.asid, new_lam, true); in switch_mm_irqs_off()
955 load_new_mm_cr3(next->pgd, ns.asid, new_lam, false); in switch_mm_irqs_off()
964 this_cpu_write(cpu_tlbstate.loaded_mm_asid, ns.asid); in switch_mm_irqs_off()
998 * temporary page-table mappings that are required for these write operations to
1003 * It is illegal to schedule while using a temporary mm -- the context switch
1005 * Use a real (non-temporary) mm in a kernel thread if you need to sleep.
1066 * Call this when reinitializing a CPU. It fixes the following potential
1069 * - The ASID changed from what cpu_tlbstate thinks it is (most likely
1070 * because the CPU was taken down and came back up with CR3's PCID
1071 * bits clear. CPU hotplug can do this.
1073 * - The TLB contains junk in slots corresponding to inactive ASIDs.
1075 * - The CPU went so far out to lunch that it may have missed a TLB
1087 WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd)); in initialize_tlbstate_and_flush()
1102 write_cr3(build_cr3(mm->pgd, 0, 0)); in initialize_tlbstate_and_flush()
1108 this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); in initialize_tlbstate_and_flush()
1128 * - mm_tlb_gen: the latest generation. in flush_tlb_func()
1129 * - local_tlb_gen: the generation that this CPU has already caught in flush_tlb_func()
1131 * - f->new_tlb_gen: the generation that the requester of the flush in flush_tlb_func()
1138 bool local = smp_processor_id() == f->initiating_cpu; in flush_tlb_func()
1150 /* The CPU was left in the mm_cpumask of the target mm. Clear it. */ in flush_tlb_func()
1151 if (f->mm && f->mm != loaded_mm) { in flush_tlb_func()
1152 cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm)); in flush_tlb_func()
1171 loaded_mm->context.ctx_id); in flush_tlb_func()
1176 * paging-structure cache to avoid speculatively reading in flush_tlb_func()
1189 if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID && in flush_tlb_func()
1190 f->new_tlb_gen <= local_tlb_gen)) { in flush_tlb_func()
1192 * The TLB is already up to date in respect to f->new_tlb_gen. in flush_tlb_func()
1204 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen); in flush_tlb_func()
1209 * happen if two concurrent flushes happen -- the first flush to in flush_tlb_func()
1217 WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen); in flush_tlb_func()
1222 * possible that f->new_tlb_gen <= local_tlb_gen), but we're in flush_tlb_func()
1231 * 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that in flush_tlb_func()
1234 * f->new_tlb_gen == 3, then we know that the flush needed to bring in flush_tlb_func()
1242 * processed on this CPU in reverse order, we'll see in flush_tlb_func()
1248 * 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimization. in flush_tlb_func()
1256 if (f->end != TLB_FLUSH_ALL && in flush_tlb_func()
1257 f->new_tlb_gen == local_tlb_gen + 1 && in flush_tlb_func()
1258 f->new_tlb_gen == mm_tlb_gen) { in flush_tlb_func()
1260 unsigned long addr = f->start; in flush_tlb_func()
1263 VM_WARN_ON(f->new_tlb_gen == TLB_GENERATION_INVALID); in flush_tlb_func()
1266 VM_WARN_ON(f->mm == NULL); in flush_tlb_func()
1268 nr_invalidate = (f->end - f->start) >> f->stride_shift; in flush_tlb_func()
1270 while (addr < f->end) { in flush_tlb_func()
1272 addr += 1UL << f->stride_shift; in flush_tlb_func()
1291 (f->mm == NULL) ? TLB_LOCAL_SHOOTDOWN : in flush_tlb_func()
1296 static bool should_flush_tlb(int cpu, void *data) in should_flush_tlb() argument
1298 struct mm_struct *loaded_mm = per_cpu(cpu_tlbstate.loaded_mm, cpu); in should_flush_tlb()
1309 if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) in should_flush_tlb()
1313 if (!info->mm) in should_flush_tlb()
1317 * While switching, the remote CPU could have state from in should_flush_tlb()
1323 /* The target mm is loaded, and the CPU is not lazy. */ in should_flush_tlb()
1324 if (loaded_mm == info->mm) in should_flush_tlb()
1328 if (info->trim_cpumask) in should_flush_tlb()
1336 if (time_after(jiffies, READ_ONCE(mm->context.next_trim_cpumask))) { in should_trim_cpumask()
1337 WRITE_ONCE(mm->context.next_trim_cpumask, jiffies + HZ); in should_trim_cpumask()
1355 if (info->end == TLB_FLUSH_ALL) in native_flush_tlb_multi()
1359 (info->end - info->start) >> PAGE_SHIFT); in native_flush_tlb_multi()
1363 * CPUs in lazy TLB mode. They will flush the CPU themselves in native_flush_tlb_multi()
1371 if (info->freed_tables || mm_in_asid_transition(info->mm)) in native_flush_tlb_multi()
1389 * flush is about 100 ns, so this caps the maximum overhead at
1390 * _about_ 3,000 ns.
1411 * Ensure that the following code is non-reentrant and flush_tlb_info in get_flush_tlb_info()
1413 * interrupt handlers and machine-check exception handlers. in get_flush_tlb_info()
1422 if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) { in get_flush_tlb_info()
1427 info->start = start; in get_flush_tlb_info()
1428 info->end = end; in get_flush_tlb_info()
1429 info->mm = mm; in get_flush_tlb_info()
1430 info->stride_shift = stride_shift; in get_flush_tlb_info()
1431 info->freed_tables = freed_tables; in get_flush_tlb_info()
1432 info->new_tlb_gen = new_tlb_gen; in get_flush_tlb_info()
1433 info->initiating_cpu = smp_processor_id(); in get_flush_tlb_info()
1434 info->trim_cpumask = 0; in get_flush_tlb_info()
1453 int cpu = get_cpu(); in flush_tlb_mm_range() local
1464 * a local TLB flush is needed. Optimize this use-case by calling in flush_tlb_mm_range()
1469 } else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { in flush_tlb_mm_range()
1470 info->trim_cpumask = should_trim_cpumask(mm); in flush_tlb_mm_range()
1495 /* First try (faster) hardware-assisted TLB invalidation. */ in flush_tlb_all()
1499 /* Fall back to the IPI-based invalidation. */ in flush_tlb_all()
1508 for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) { in invlpgb_kernel_range_flush()
1509 nr = (info->end - addr) >> PAGE_SHIFT; in invlpgb_kernel_range_flush()
1528 for (addr = f->start; addr < f->end; addr += PAGE_SIZE) in do_kernel_range_flush()
1557 if (info->end == TLB_FLUSH_ALL) in flush_tlb_kernel_range()
1575 build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, in __get_current_cr3_fast()
1600 * If PTI is on, then the kernel is mapped with non-global PTEs, and in flush_tlb_one_kernel()
1639 * 'cpu_pcide' to ensure that *this* CPU will not trigger those in native_flush_tlb_one_user()
1672 * Read-modify-write to CR4 - protect it from preemption and in native_flush_tlb_global()
1690 * to the per CPU variable and to prevent being preempted between in native_flush_tlb_local()
1697 /* If current->mm == NULL then the read_cr3() "borrows" an mm */ in native_flush_tlb_local()
1721 * !PGE -> !PCID (setup_pcid()), thus every flush is total. in __flush_tlb_all()
1732 int cpu = get_cpu(); in arch_tlbbatch_flush() local
1738 * a local TLB flush is needed. Optimize this use-case by calling in arch_tlbbatch_flush()
1741 if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->unmapped_pages) { in arch_tlbbatch_flush()
1743 batch->unmapped_pages = false; in arch_tlbbatch_flush()
1744 } else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) { in arch_tlbbatch_flush()
1745 flush_tlb_multi(&batch->cpumask, info); in arch_tlbbatch_flush()
1746 } else if (cpumask_test_cpu(cpu, &batch->cpumask)) { in arch_tlbbatch_flush()
1753 cpumask_clear(&batch->cpumask); in arch_tlbbatch_flush()
1769 struct mm_struct *current_mm = current->mm; in nmi_uaccess_okay()
1775 * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though, in nmi_uaccess_okay()
1786 VM_WARN_ON_ONCE(__pa(current_mm->pgd) != read_cr3_pa()); in nmi_uaccess_okay()
1808 len = min(count, sizeof(buf) - 1); in tlbflush_write_file()
1810 return -EFAULT; in tlbflush_write_file()
1814 return -EINVAL; in tlbflush_write_file()
1817 return -EINVAL; in tlbflush_write_file()