Lines Matching +full:atomic +full:- +full:threshold +full:- +full:us

1 // SPDX-License-Identifier: GPL-2.0-only
19 #include <asm/nospec-branch.h>
40 * TLB flushing, formerly SMP-only
71 * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
78 * ASID - [0, TLB_NR_DYN_ASIDS-1]
80 * [TLB_NR_DYN_ASIDS, MAX_ASID_AVAILABLE-1]
83 * kPCID - [1, MAX_ASID_AVAILABLE]
87 * uPCID - [2048 + 1, 2048 + MAX_ASID_AVAILABLE]
104 #define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
107 * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
108 * for them being zero-based. Another -1 is because PCID 0 is reserved for
109 * use by non-PCID-aware users.
111 #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
134 * The dynamically-assigned ASIDs that get passed in are small in kern_pcid()
138 * If PCID is on, ASID-aware code paths put the ASID+1 into the in kern_pcid()
140 * situation in which PCID-unaware code saves CR3, loads some other in kern_pcid()
143 * that any bugs involving loading a PCID-enabled CR3 with in kern_pcid()
255 next->context.ctx_id) in choose_new_asid()
267 ns.asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1; in choose_new_asid()
278 * Global ASIDs are allocated for multi-threaded processes that are
280 * processes the same PCID on every CPU, for use with hardware-assisted
289 static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
294 * freed global ASIDs are safe to re-use.
306 * The TLB flush above makes it safe to re-use the previously in reset_global_asid_space()
324 if (last_global_asid >= MAX_ASID_AVAILABLE - 1) in allocate_global_asid()
339 global_asid_available--; in allocate_global_asid()
344 * Check whether a process is currently active on more than @threshold CPUs.
348 static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold) in mm_active_cpus_exceeds() argument
354 if (cpumask_weight(mm_cpumask(mm)) <= threshold) in mm_active_cpus_exceeds()
366 if (++count > threshold) in mm_active_cpus_exceeds()
414 /* The global ASID can be re-used only after flush at wrap-around. */ in mm_free_global_asid()
416 __set_bit(mm->context.global_asid, global_asid_freed); in mm_free_global_asid()
418 mm->context.global_asid = 0; in mm_free_global_asid()
424 * Is the mm transitioning from a CPU-local ASID to a global ASID?
452 if ((current->pid & 0x1f) != (jiffies & 0x1f)) in consider_global_asid()
465 struct mm_struct *mm = info->mm; in finish_asid_transition()
493 flush_tlb_multi(mm_cpumask(info->mm), info); in finish_asid_transition()
504 bool pmd = info->stride_shift == PMD_SHIFT; in broadcast_tlb_flush()
505 unsigned long asid = mm_global_asid(info->mm); in broadcast_tlb_flush()
506 unsigned long addr = info->start; in broadcast_tlb_flush()
513 if (info->end == TLB_FLUSH_ALL) { in broadcast_tlb_flush()
521 if (info->stride_shift <= PMD_SHIFT) { in broadcast_tlb_flush()
522 nr = (info->end - addr) >> info->stride_shift; in broadcast_tlb_flush()
530 addr += nr << info->stride_shift; in broadcast_tlb_flush()
531 } while (addr < info->end); in broadcast_tlb_flush()
591 * If so, our callers still expect us to flush the TLB, but there in leave_mm()
618 * Invoked from return to user/guest by a task that opted-in to L1D
646 clear_ti_thread_flag(&next->thread_info, TIF_SPEC_L1D_FLUSH); in l1d_flush_evaluate()
647 next->l1d_flush_kill.func = l1d_flush_force_sigbus; in l1d_flush_evaluate()
648 task_work_add(next, &next->l1d_flush_kill, TWA_RESUME); in l1d_flush_evaluate()
663 return (unsigned long)next->mm | spec_bits; in mm_mangle_tif_spec_bits()
670 if (!next || !next->mm) in cond_mitigation()
677 * Avoid user->user BTB/RSB poisoning by flushing them when switching in cond_mitigation()
678 * between processes. This stops one process from doing Spectre-v2 in cond_mitigation()
683 * same process. Using the mm pointer instead of mm->context.ctx_id in cond_mitigation()
706 * - the same user space task is scheduled out and later in cond_mitigation()
710 * - a user space task belonging to the same process is in cond_mitigation()
713 * - a user space task belonging to the same process is in cond_mitigation()
735 if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) != (unsigned long)next->mm) in cond_mitigation()
757 atomic_read(&mm->context.perf_rdpmc_allowed))) { in cr4_update_pce_mm()
781 * 'current->active_mm' up to date.
795 /* We don't want flush_tlb_func() to run concurrently with us. */ in switch_mm_irqs_off()
809 if (WARN_ON_ONCE(__read_cr3() != build_cr3(prev->pgd, prev_asid, in switch_mm_irqs_off()
831 * core serialization before returning to user-space, after in switch_mm_irqs_off()
832 * storing to rq->curr, when changing mm. This is because in switch_mm_irqs_off()
845 next->context.ctx_id); in switch_mm_irqs_off()
864 next_tlb_gen = atomic64_read(&next->context.tlb_gen); in switch_mm_irqs_off()
891 next_tlb_gen = atomic64_read(&next->context.tlb_gen); in switch_mm_irqs_off()
920 * Rely on the smp_mb() implied by cpumask_set_cpu()'s atomic in switch_mm_irqs_off()
929 * loaded_mm load can happen in mative_flush_tlb_multi() -> in switch_mm_irqs_off()
940 next_tlb_gen = atomic64_read(&next->context.tlb_gen); in switch_mm_irqs_off()
949 this_cpu_write(cpu_tlbstate.ctxs[ns.asid].ctx_id, next->context.ctx_id); in switch_mm_irqs_off()
951 load_new_mm_cr3(next->pgd, ns.asid, new_lam, true); in switch_mm_irqs_off()
956 load_new_mm_cr3(next->pgd, ns.asid, new_lam, false); in switch_mm_irqs_off()
984 * in a row. It will notify us that we're going back to a real mm by
999 * temporary page-table mappings that are required for these write operations to
1004 * It is illegal to schedule while using a temporary mm -- the context switch
1006 * Use a real (non-temporary) mm in a kernel thread if you need to sleep.
1070 * - The ASID changed from what cpu_tlbstate thinks it is (most likely
1074 * - The TLB contains junk in slots corresponding to inactive ASIDs.
1076 * - The CPU went so far out to lunch that it may have missed a TLB
1088 WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd)); in initialize_tlbstate_and_flush()
1103 write_cr3(build_cr3(mm->pgd, 0, 0)); in initialize_tlbstate_and_flush()
1109 this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); in initialize_tlbstate_and_flush()
1129 * - mm_tlb_gen: the latest generation. in flush_tlb_func()
1130 * - local_tlb_gen: the generation that this CPU has already caught in flush_tlb_func()
1132 * - f->new_tlb_gen: the generation that the requester of the flush in flush_tlb_func()
1133 * wants us to catch up to. in flush_tlb_func()
1139 bool local = smp_processor_id() == f->initiating_cpu; in flush_tlb_func()
1152 if (f->mm && f->mm != loaded_mm) { in flush_tlb_func()
1153 cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm)); in flush_tlb_func()
1172 loaded_mm->context.ctx_id); in flush_tlb_func()
1177 * paging-structure cache to avoid speculatively reading in flush_tlb_func()
1190 if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID && in flush_tlb_func()
1191 f->new_tlb_gen <= local_tlb_gen)) { in flush_tlb_func()
1193 * The TLB is already up to date in respect to f->new_tlb_gen. in flush_tlb_func()
1205 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen); in flush_tlb_func()
1210 * happen if two concurrent flushes happen -- the first flush to in flush_tlb_func()
1211 * be handled can catch us all the way up, leaving no work for in flush_tlb_func()
1218 WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen); in flush_tlb_func()
1223 * possible that f->new_tlb_gen <= local_tlb_gen), but we're in flush_tlb_func()
1232 * 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that in flush_tlb_func()
1235 * f->new_tlb_gen == 3, then we know that the flush needed to bring in flush_tlb_func()
1236 * us up to date for tlb_gen 3 is the partial flush we're in flush_tlb_func()
1249 * 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimization. in flush_tlb_func()
1257 if (f->end != TLB_FLUSH_ALL && in flush_tlb_func()
1258 f->new_tlb_gen == local_tlb_gen + 1 && in flush_tlb_func()
1259 f->new_tlb_gen == mm_tlb_gen) { in flush_tlb_func()
1261 unsigned long addr = f->start; in flush_tlb_func()
1264 VM_WARN_ON(f->new_tlb_gen == TLB_GENERATION_INVALID); in flush_tlb_func()
1267 VM_WARN_ON(f->mm == NULL); in flush_tlb_func()
1269 nr_invalidate = (f->end - f->start) >> f->stride_shift; in flush_tlb_func()
1271 while (addr < f->end) { in flush_tlb_func()
1273 addr += 1UL << f->stride_shift; in flush_tlb_func()
1292 (f->mm == NULL) ? TLB_LOCAL_SHOOTDOWN : in flush_tlb_func()
1314 if (!info->mm) in should_flush_tlb()
1325 if (loaded_mm == info->mm) in should_flush_tlb()
1329 if (info->trim_cpumask) in should_flush_tlb()
1337 if (time_after(jiffies, READ_ONCE(mm->context.next_trim_cpumask))) { in should_trim_cpumask()
1338 WRITE_ONCE(mm->context.next_trim_cpumask, jiffies + HZ); in should_trim_cpumask()
1356 if (info->end == TLB_FLUSH_ALL) in native_flush_tlb_multi()
1360 (info->end - info->start) >> PAGE_SHIFT); in native_flush_tlb_multi()
1372 if (info->freed_tables || mm_in_asid_transition(info->mm)) in native_flush_tlb_multi()
1412 * Ensure that the following code is non-reentrant and flush_tlb_info in get_flush_tlb_info()
1414 * interrupt handlers and machine-check exception handlers. in get_flush_tlb_info()
1423 if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) { in get_flush_tlb_info()
1428 info->start = start; in get_flush_tlb_info()
1429 info->end = end; in get_flush_tlb_info()
1430 info->mm = mm; in get_flush_tlb_info()
1431 info->stride_shift = stride_shift; in get_flush_tlb_info()
1432 info->freed_tables = freed_tables; in get_flush_tlb_info()
1433 info->new_tlb_gen = new_tlb_gen; in get_flush_tlb_info()
1434 info->initiating_cpu = smp_processor_id(); in get_flush_tlb_info()
1435 info->trim_cpumask = 0; in get_flush_tlb_info()
1465 * a local TLB flush is needed. Optimize this use-case by calling in flush_tlb_mm_range()
1471 info->trim_cpumask = should_trim_cpumask(mm); in flush_tlb_mm_range()
1496 /* First try (faster) hardware-assisted TLB invalidation. */ in flush_tlb_all()
1500 /* Fall back to the IPI-based invalidation. */ in flush_tlb_all()
1509 for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) { in invlpgb_kernel_range_flush()
1510 nr = (info->end - addr) >> PAGE_SHIFT; in invlpgb_kernel_range_flush()
1529 for (addr = f->start; addr < f->end; addr += PAGE_SIZE) in do_kernel_range_flush()
1558 if (info->end == TLB_FLUSH_ALL) in flush_tlb_kernel_range()
1576 build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, in __get_current_cr3_fast()
1601 * If PTI is on, then the kernel is mapped with non-global PTEs, and in flush_tlb_one_kernel()
1673 * Read-modify-write to CR4 - protect it from preemption and in native_flush_tlb_global()
1698 /* If current->mm == NULL then the read_cr3() "borrows" an mm */ in native_flush_tlb_local()
1722 * !PGE -> !PCID (setup_pcid()), thus every flush is total. in __flush_tlb_all()
1739 * a local TLB flush is needed. Optimize this use-case by calling in arch_tlbbatch_flush()
1742 if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->unmapped_pages) { in arch_tlbbatch_flush()
1744 batch->unmapped_pages = false; in arch_tlbbatch_flush()
1745 } else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) { in arch_tlbbatch_flush()
1746 flush_tlb_multi(&batch->cpumask, info); in arch_tlbbatch_flush()
1747 } else if (cpumask_test_cpu(cpu, &batch->cpumask)) { in arch_tlbbatch_flush()
1754 cpumask_clear(&batch->cpumask); in arch_tlbbatch_flush()
1770 struct mm_struct *current_mm = current->mm; in nmi_uaccess_okay()
1776 * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though, in nmi_uaccess_okay()
1787 VM_WARN_ON_ONCE(__pa(current_mm->pgd) != read_cr3_pa()); in nmi_uaccess_okay()
1809 len = min(count, sizeof(buf) - 1); in tlbflush_write_file()
1811 return -EFAULT; in tlbflush_write_file()
1815 return -EINVAL; in tlbflush_write_file()
1818 return -EINVAL; in tlbflush_write_file()