1 #include <linux/init.h> 2 3 #include <linux/mm.h> 4 #include <linux/spinlock.h> 5 #include <linux/smp.h> 6 #include <linux/interrupt.h> 7 #include <linux/export.h> 8 #include <linux/cpu.h> 9 10 #include <asm/tlbflush.h> 11 #include <asm/mmu_context.h> 12 #include <asm/cache.h> 13 #include <asm/apic.h> 14 #include <asm/uv/uv.h> 15 #include <linux/debugfs.h> 16 17 /* 18 * TLB flushing, formerly SMP-only 19 * c/o Linus Torvalds. 20 * 21 * These mean you can really definitely utterly forget about 22 * writing to user space from interrupts. (Its not allowed anyway). 23 * 24 * Optimizations Manfred Spraul <manfred@colorfullife.com> 25 * 26 * More scalable flush, from Andi Kleen 27 * 28 * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi 29 */ 30 31 atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); 32 33 DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode); 34 35 static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, 36 u16 *new_asid, bool *need_flush) 37 { 38 u16 asid; 39 40 if (!static_cpu_has(X86_FEATURE_PCID)) { 41 *new_asid = 0; 42 *need_flush = true; 43 return; 44 } 45 46 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { 47 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) != 48 next->context.ctx_id) 49 continue; 50 51 *new_asid = asid; 52 *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) < 53 next_tlb_gen); 54 return; 55 } 56 57 /* 58 * We don't currently own an ASID slot on this CPU. 59 * Allocate a slot. 60 */ 61 *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1; 62 if (*new_asid >= TLB_NR_DYN_ASIDS) { 63 *new_asid = 0; 64 this_cpu_write(cpu_tlbstate.next_asid, 1); 65 } 66 *need_flush = true; 67 } 68 69 void leave_mm(int cpu) 70 { 71 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); 72 73 /* 74 * It's plausible that we're in lazy TLB mode while our mm is init_mm. 75 * If so, our callers still expect us to flush the TLB, but there 76 * aren't any user TLB entries in init_mm to worry about. 77 * 78 * This needs to happen before any other sanity checks due to 79 * intel_idle's shenanigans. 80 */ 81 if (loaded_mm == &init_mm) 82 return; 83 84 /* Warn if we're not lazy. */ 85 WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy)); 86 87 switch_mm(NULL, &init_mm, NULL); 88 } 89 90 void switch_mm(struct mm_struct *prev, struct mm_struct *next, 91 struct task_struct *tsk) 92 { 93 unsigned long flags; 94 95 local_irq_save(flags); 96 switch_mm_irqs_off(prev, next, tsk); 97 local_irq_restore(flags); 98 } 99 100 void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, 101 struct task_struct *tsk) 102 { 103 struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); 104 u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); 105 unsigned cpu = smp_processor_id(); 106 u64 next_tlb_gen; 107 108 /* 109 * NB: The scheduler will call us with prev == next when switching 110 * from lazy TLB mode to normal mode if active_mm isn't changing. 111 * When this happens, we don't assume that CR3 (and hence 112 * cpu_tlbstate.loaded_mm) matches next. 113 * 114 * NB: leave_mm() calls us with prev == NULL and tsk == NULL. 115 */ 116 117 /* We don't want flush_tlb_func_* to run concurrently with us. */ 118 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) 119 WARN_ON_ONCE(!irqs_disabled()); 120 121 /* 122 * Verify that CR3 is what we think it is. This will catch 123 * hypothetical buggy code that directly switches to swapper_pg_dir 124 * without going through leave_mm() / switch_mm_irqs_off() or that 125 * does something like write_cr3(read_cr3_pa()). 126 * 127 * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3() 128 * isn't free. 129 */ 130 #ifdef CONFIG_DEBUG_VM 131 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) { 132 /* 133 * If we were to BUG here, we'd be very likely to kill 134 * the system so hard that we don't see the call trace. 135 * Try to recover instead by ignoring the error and doing 136 * a global flush to minimize the chance of corruption. 137 * 138 * (This is far from being a fully correct recovery. 139 * Architecturally, the CPU could prefetch something 140 * back into an incorrect ASID slot and leave it there 141 * to cause trouble down the road. It's better than 142 * nothing, though.) 143 */ 144 __flush_tlb_all(); 145 } 146 #endif 147 this_cpu_write(cpu_tlbstate.is_lazy, false); 148 149 if (real_prev == next) { 150 VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != 151 next->context.ctx_id); 152 153 /* 154 * We don't currently support having a real mm loaded without 155 * our cpu set in mm_cpumask(). We have all the bookkeeping 156 * in place to figure out whether we would need to flush 157 * if our cpu were cleared in mm_cpumask(), but we don't 158 * currently use it. 159 */ 160 if (WARN_ON_ONCE(real_prev != &init_mm && 161 !cpumask_test_cpu(cpu, mm_cpumask(next)))) 162 cpumask_set_cpu(cpu, mm_cpumask(next)); 163 164 return; 165 } else { 166 u16 new_asid; 167 bool need_flush; 168 169 if (IS_ENABLED(CONFIG_VMAP_STACK)) { 170 /* 171 * If our current stack is in vmalloc space and isn't 172 * mapped in the new pgd, we'll double-fault. Forcibly 173 * map it. 174 */ 175 unsigned int index = pgd_index(current_stack_pointer); 176 pgd_t *pgd = next->pgd + index; 177 178 if (unlikely(pgd_none(*pgd))) 179 set_pgd(pgd, init_mm.pgd[index]); 180 } 181 182 /* Stop remote flushes for the previous mm */ 183 VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) && 184 real_prev != &init_mm); 185 cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); 186 187 /* 188 * Start remote flushes and then read tlb_gen. 189 */ 190 cpumask_set_cpu(cpu, mm_cpumask(next)); 191 next_tlb_gen = atomic64_read(&next->context.tlb_gen); 192 193 choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); 194 195 if (need_flush) { 196 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); 197 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); 198 write_cr3(build_cr3(next, new_asid)); 199 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 200 TLB_FLUSH_ALL); 201 } else { 202 /* The new ASID is already up to date. */ 203 write_cr3(build_cr3_noflush(next, new_asid)); 204 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); 205 } 206 207 this_cpu_write(cpu_tlbstate.loaded_mm, next); 208 this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); 209 } 210 211 load_mm_cr4(next); 212 switch_ldt(real_prev, next); 213 } 214 215 /* 216 * enter_lazy_tlb() is a hint from the scheduler that we are entering a 217 * kernel thread or other context without an mm. Acceptable implementations 218 * include doing nothing whatsoever, switching to init_mm, or various clever 219 * lazy tricks to try to minimize TLB flushes. 220 * 221 * The scheduler reserves the right to call enter_lazy_tlb() several times 222 * in a row. It will notify us that we're going back to a real mm by 223 * calling switch_mm_irqs_off(). 224 */ 225 void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 226 { 227 if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm) 228 return; 229 230 if (static_branch_unlikely(&tlb_use_lazy_mode)) { 231 /* 232 * There's a significant optimization that may be possible 233 * here. We have accurate enough TLB flush tracking that we 234 * don't need to maintain coherence of TLB per se when we're 235 * lazy. We do, however, need to maintain coherence of 236 * paging-structure caches. We could, in principle, leave our 237 * old mm loaded and only switch to init_mm when 238 * tlb_remove_page() happens. 239 */ 240 this_cpu_write(cpu_tlbstate.is_lazy, true); 241 } else { 242 switch_mm(NULL, &init_mm, NULL); 243 } 244 } 245 246 /* 247 * Call this when reinitializing a CPU. It fixes the following potential 248 * problems: 249 * 250 * - The ASID changed from what cpu_tlbstate thinks it is (most likely 251 * because the CPU was taken down and came back up with CR3's PCID 252 * bits clear. CPU hotplug can do this. 253 * 254 * - The TLB contains junk in slots corresponding to inactive ASIDs. 255 * 256 * - The CPU went so far out to lunch that it may have missed a TLB 257 * flush. 258 */ 259 void initialize_tlbstate_and_flush(void) 260 { 261 int i; 262 struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm); 263 u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen); 264 unsigned long cr3 = __read_cr3(); 265 266 /* Assert that CR3 already references the right mm. */ 267 WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd)); 268 269 /* 270 * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization 271 * doesn't work like other CR4 bits because it can only be set from 272 * long mode.) 273 */ 274 WARN_ON(boot_cpu_has(X86_FEATURE_PCID) && 275 !(cr4_read_shadow() & X86_CR4_PCIDE)); 276 277 /* Force ASID 0 and force a TLB flush. */ 278 write_cr3(build_cr3(mm, 0)); 279 280 /* Reinitialize tlbstate. */ 281 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); 282 this_cpu_write(cpu_tlbstate.next_asid, 1); 283 this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); 284 this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen); 285 286 for (i = 1; i < TLB_NR_DYN_ASIDS; i++) 287 this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0); 288 } 289 290 /* 291 * flush_tlb_func_common()'s memory ordering requirement is that any 292 * TLB fills that happen after we flush the TLB are ordered after we 293 * read active_mm's tlb_gen. We don't need any explicit barriers 294 * because all x86 flush operations are serializing and the 295 * atomic64_read operation won't be reordered by the compiler. 296 */ 297 static void flush_tlb_func_common(const struct flush_tlb_info *f, 298 bool local, enum tlb_flush_reason reason) 299 { 300 /* 301 * We have three different tlb_gen values in here. They are: 302 * 303 * - mm_tlb_gen: the latest generation. 304 * - local_tlb_gen: the generation that this CPU has already caught 305 * up to. 306 * - f->new_tlb_gen: the generation that the requester of the flush 307 * wants us to catch up to. 308 */ 309 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); 310 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); 311 u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen); 312 u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); 313 314 /* This code cannot presently handle being reentered. */ 315 VM_WARN_ON(!irqs_disabled()); 316 317 if (unlikely(loaded_mm == &init_mm)) 318 return; 319 320 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) != 321 loaded_mm->context.ctx_id); 322 323 if (this_cpu_read(cpu_tlbstate.is_lazy)) { 324 /* 325 * We're in lazy mode. We need to at least flush our 326 * paging-structure cache to avoid speculatively reading 327 * garbage into our TLB. Since switching to init_mm is barely 328 * slower than a minimal flush, just switch to init_mm. 329 */ 330 switch_mm_irqs_off(NULL, &init_mm, NULL); 331 return; 332 } 333 334 if (unlikely(local_tlb_gen == mm_tlb_gen)) { 335 /* 336 * There's nothing to do: we're already up to date. This can 337 * happen if two concurrent flushes happen -- the first flush to 338 * be handled can catch us all the way up, leaving no work for 339 * the second flush. 340 */ 341 trace_tlb_flush(reason, 0); 342 return; 343 } 344 345 WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen); 346 WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen); 347 348 /* 349 * If we get to this point, we know that our TLB is out of date. 350 * This does not strictly imply that we need to flush (it's 351 * possible that f->new_tlb_gen <= local_tlb_gen), but we're 352 * going to need to flush in the very near future, so we might 353 * as well get it over with. 354 * 355 * The only question is whether to do a full or partial flush. 356 * 357 * We do a partial flush if requested and two extra conditions 358 * are met: 359 * 360 * 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that 361 * we've always done all needed flushes to catch up to 362 * local_tlb_gen. If, for example, local_tlb_gen == 2 and 363 * f->new_tlb_gen == 3, then we know that the flush needed to bring 364 * us up to date for tlb_gen 3 is the partial flush we're 365 * processing. 366 * 367 * As an example of why this check is needed, suppose that there 368 * are two concurrent flushes. The first is a full flush that 369 * changes context.tlb_gen from 1 to 2. The second is a partial 370 * flush that changes context.tlb_gen from 2 to 3. If they get 371 * processed on this CPU in reverse order, we'll see 372 * local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL. 373 * If we were to use __flush_tlb_single() and set local_tlb_gen to 374 * 3, we'd be break the invariant: we'd update local_tlb_gen above 375 * 1 without the full flush that's needed for tlb_gen 2. 376 * 377 * 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimiation. 378 * Partial TLB flushes are not all that much cheaper than full TLB 379 * flushes, so it seems unlikely that it would be a performance win 380 * to do a partial flush if that won't bring our TLB fully up to 381 * date. By doing a full flush instead, we can increase 382 * local_tlb_gen all the way to mm_tlb_gen and we can probably 383 * avoid another flush in the very near future. 384 */ 385 if (f->end != TLB_FLUSH_ALL && 386 f->new_tlb_gen == local_tlb_gen + 1 && 387 f->new_tlb_gen == mm_tlb_gen) { 388 /* Partial flush */ 389 unsigned long addr; 390 unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT; 391 392 addr = f->start; 393 while (addr < f->end) { 394 __flush_tlb_single(addr); 395 addr += PAGE_SIZE; 396 } 397 if (local) 398 count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages); 399 trace_tlb_flush(reason, nr_pages); 400 } else { 401 /* Full flush. */ 402 local_flush_tlb(); 403 if (local) 404 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 405 trace_tlb_flush(reason, TLB_FLUSH_ALL); 406 } 407 408 /* Both paths above update our state to mm_tlb_gen. */ 409 this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen); 410 } 411 412 static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason) 413 { 414 const struct flush_tlb_info *f = info; 415 416 flush_tlb_func_common(f, true, reason); 417 } 418 419 static void flush_tlb_func_remote(void *info) 420 { 421 const struct flush_tlb_info *f = info; 422 423 inc_irq_stat(irq_tlb_count); 424 425 if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm)) 426 return; 427 428 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); 429 flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN); 430 } 431 432 void native_flush_tlb_others(const struct cpumask *cpumask, 433 const struct flush_tlb_info *info) 434 { 435 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); 436 if (info->end == TLB_FLUSH_ALL) 437 trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL); 438 else 439 trace_tlb_flush(TLB_REMOTE_SEND_IPI, 440 (info->end - info->start) >> PAGE_SHIFT); 441 442 if (is_uv_system()) { 443 /* 444 * This whole special case is confused. UV has a "Broadcast 445 * Assist Unit", which seems to be a fancy way to send IPIs. 446 * Back when x86 used an explicit TLB flush IPI, UV was 447 * optimized to use its own mechanism. These days, x86 uses 448 * smp_call_function_many(), but UV still uses a manual IPI, 449 * and that IPI's action is out of date -- it does a manual 450 * flush instead of calling flush_tlb_func_remote(). This 451 * means that the percpu tlb_gen variables won't be updated 452 * and we'll do pointless flushes on future context switches. 453 * 454 * Rather than hooking native_flush_tlb_others() here, I think 455 * that UV should be updated so that smp_call_function_many(), 456 * etc, are optimal on UV. 457 */ 458 unsigned int cpu; 459 460 cpu = smp_processor_id(); 461 cpumask = uv_flush_tlb_others(cpumask, info); 462 if (cpumask) 463 smp_call_function_many(cpumask, flush_tlb_func_remote, 464 (void *)info, 1); 465 return; 466 } 467 smp_call_function_many(cpumask, flush_tlb_func_remote, 468 (void *)info, 1); 469 } 470 471 /* 472 * See Documentation/x86/tlb.txt for details. We choose 33 473 * because it is large enough to cover the vast majority (at 474 * least 95%) of allocations, and is small enough that we are 475 * confident it will not cause too much overhead. Each single 476 * flush is about 100 ns, so this caps the maximum overhead at 477 * _about_ 3,000 ns. 478 * 479 * This is in units of pages. 480 */ 481 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 482 483 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 484 unsigned long end, unsigned long vmflag) 485 { 486 int cpu; 487 488 struct flush_tlb_info info = { 489 .mm = mm, 490 }; 491 492 cpu = get_cpu(); 493 494 /* This is also a barrier that synchronizes with switch_mm(). */ 495 info.new_tlb_gen = inc_mm_tlb_gen(mm); 496 497 /* Should we flush just the requested range? */ 498 if ((end != TLB_FLUSH_ALL) && 499 !(vmflag & VM_HUGETLB) && 500 ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) { 501 info.start = start; 502 info.end = end; 503 } else { 504 info.start = 0UL; 505 info.end = TLB_FLUSH_ALL; 506 } 507 508 if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { 509 VM_WARN_ON(irqs_disabled()); 510 local_irq_disable(); 511 flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN); 512 local_irq_enable(); 513 } 514 515 if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) 516 flush_tlb_others(mm_cpumask(mm), &info); 517 518 put_cpu(); 519 } 520 521 522 static void do_flush_tlb_all(void *info) 523 { 524 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); 525 __flush_tlb_all(); 526 } 527 528 void flush_tlb_all(void) 529 { 530 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); 531 on_each_cpu(do_flush_tlb_all, NULL, 1); 532 } 533 534 static void do_kernel_range_flush(void *info) 535 { 536 struct flush_tlb_info *f = info; 537 unsigned long addr; 538 539 /* flush range by one by one 'invlpg' */ 540 for (addr = f->start; addr < f->end; addr += PAGE_SIZE) 541 __flush_tlb_single(addr); 542 } 543 544 void flush_tlb_kernel_range(unsigned long start, unsigned long end) 545 { 546 547 /* Balance as user space task's flush, a bit conservative */ 548 if (end == TLB_FLUSH_ALL || 549 (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { 550 on_each_cpu(do_flush_tlb_all, NULL, 1); 551 } else { 552 struct flush_tlb_info info; 553 info.start = start; 554 info.end = end; 555 on_each_cpu(do_kernel_range_flush, &info, 1); 556 } 557 } 558 559 void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) 560 { 561 struct flush_tlb_info info = { 562 .mm = NULL, 563 .start = 0UL, 564 .end = TLB_FLUSH_ALL, 565 }; 566 567 int cpu = get_cpu(); 568 569 if (cpumask_test_cpu(cpu, &batch->cpumask)) { 570 VM_WARN_ON(irqs_disabled()); 571 local_irq_disable(); 572 flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN); 573 local_irq_enable(); 574 } 575 576 if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) 577 flush_tlb_others(&batch->cpumask, &info); 578 579 cpumask_clear(&batch->cpumask); 580 581 put_cpu(); 582 } 583 584 static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, 585 size_t count, loff_t *ppos) 586 { 587 char buf[32]; 588 unsigned int len; 589 590 len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling); 591 return simple_read_from_buffer(user_buf, count, ppos, buf, len); 592 } 593 594 static ssize_t tlbflush_write_file(struct file *file, 595 const char __user *user_buf, size_t count, loff_t *ppos) 596 { 597 char buf[32]; 598 ssize_t len; 599 int ceiling; 600 601 len = min(count, sizeof(buf) - 1); 602 if (copy_from_user(buf, user_buf, len)) 603 return -EFAULT; 604 605 buf[len] = '\0'; 606 if (kstrtoint(buf, 0, &ceiling)) 607 return -EINVAL; 608 609 if (ceiling < 0) 610 return -EINVAL; 611 612 tlb_single_page_flush_ceiling = ceiling; 613 return count; 614 } 615 616 static const struct file_operations fops_tlbflush = { 617 .read = tlbflush_read_file, 618 .write = tlbflush_write_file, 619 .llseek = default_llseek, 620 }; 621 622 static int __init create_tlb_single_page_flush_ceiling(void) 623 { 624 debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR, 625 arch_debugfs_dir, NULL, &fops_tlbflush); 626 return 0; 627 } 628 late_initcall(create_tlb_single_page_flush_ceiling); 629 630 static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf, 631 size_t count, loff_t *ppos) 632 { 633 char buf[2]; 634 635 buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0'; 636 buf[1] = '\n'; 637 638 return simple_read_from_buffer(user_buf, count, ppos, buf, 2); 639 } 640 641 static ssize_t tlblazy_write_file(struct file *file, 642 const char __user *user_buf, size_t count, loff_t *ppos) 643 { 644 bool val; 645 646 if (kstrtobool_from_user(user_buf, count, &val)) 647 return -EINVAL; 648 649 if (val) 650 static_branch_enable(&tlb_use_lazy_mode); 651 else 652 static_branch_disable(&tlb_use_lazy_mode); 653 654 return count; 655 } 656 657 static const struct file_operations fops_tlblazy = { 658 .read = tlblazy_read_file, 659 .write = tlblazy_write_file, 660 .llseek = default_llseek, 661 }; 662 663 static int __init init_tlb_use_lazy_mode(void) 664 { 665 if (boot_cpu_has(X86_FEATURE_PCID)) { 666 /* 667 * Heuristic: with PCID on, switching to and from 668 * init_mm is reasonably fast, but remote flush IPIs 669 * as expensive as ever, so turn off lazy TLB mode. 670 * 671 * We can't do this in setup_pcid() because static keys 672 * haven't been initialized yet, and it would blow up 673 * badly. 674 */ 675 static_branch_disable(&tlb_use_lazy_mode); 676 } 677 678 debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR, 679 arch_debugfs_dir, NULL, &fops_tlblazy); 680 return 0; 681 } 682 late_initcall(init_tlb_use_lazy_mode); 683