1196d9d8bSPeter Zijlstra #include <linux/gfp.h> 2196d9d8bSPeter Zijlstra #include <linux/highmem.h> 3196d9d8bSPeter Zijlstra #include <linux/kernel.h> 4196d9d8bSPeter Zijlstra #include <linux/mmdebug.h> 5196d9d8bSPeter Zijlstra #include <linux/mm_types.h> 636090defSArnd Bergmann #include <linux/mm_inline.h> 7196d9d8bSPeter Zijlstra #include <linux/pagemap.h> 8196d9d8bSPeter Zijlstra #include <linux/rcupdate.h> 9196d9d8bSPeter Zijlstra #include <linux/smp.h> 10196d9d8bSPeter Zijlstra #include <linux/swap.h> 115df397deSLinus Torvalds #include <linux/rmap.h> 12196d9d8bSPeter Zijlstra 13196d9d8bSPeter Zijlstra #include <asm/pgalloc.h> 14196d9d8bSPeter Zijlstra #include <asm/tlb.h> 15196d9d8bSPeter Zijlstra 16580a586cSPeter Zijlstra #ifndef CONFIG_MMU_GATHER_NO_GATHER 17952a31c9SMartin Schwidefsky 18196d9d8bSPeter Zijlstra static bool tlb_next_batch(struct mmu_gather *tlb) 19196d9d8bSPeter Zijlstra { 20196d9d8bSPeter Zijlstra struct mmu_gather_batch *batch; 21196d9d8bSPeter Zijlstra 22c4745482SLinus Torvalds /* Limit batching if we have delayed rmaps pending */ 23c4745482SLinus Torvalds if (tlb->delayed_rmap && tlb->active != &tlb->local) 245df397deSLinus Torvalds return false; 255df397deSLinus Torvalds 26196d9d8bSPeter Zijlstra batch = tlb->active; 27196d9d8bSPeter Zijlstra if (batch->next) { 28196d9d8bSPeter Zijlstra tlb->active = batch->next; 29196d9d8bSPeter Zijlstra return true; 30196d9d8bSPeter Zijlstra } 31196d9d8bSPeter Zijlstra 32196d9d8bSPeter Zijlstra if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) 33196d9d8bSPeter Zijlstra return false; 34196d9d8bSPeter Zijlstra 35dcc1be11SLorenzo Stoakes batch = (void *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); 36196d9d8bSPeter Zijlstra if (!batch) 37196d9d8bSPeter Zijlstra return false; 38196d9d8bSPeter Zijlstra 39196d9d8bSPeter Zijlstra tlb->batch_count++; 40196d9d8bSPeter Zijlstra batch->next = NULL; 41196d9d8bSPeter Zijlstra batch->nr = 0; 42196d9d8bSPeter Zijlstra batch->max = MAX_GATHER_BATCH; 43196d9d8bSPeter Zijlstra 44196d9d8bSPeter Zijlstra tlb->active->next = batch; 45196d9d8bSPeter Zijlstra tlb->active = batch; 46196d9d8bSPeter Zijlstra 47196d9d8bSPeter Zijlstra return true; 48196d9d8bSPeter Zijlstra } 49196d9d8bSPeter Zijlstra 505df397deSLinus Torvalds #ifdef CONFIG_SMP 51c4745482SLinus Torvalds static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_struct *vma) 525df397deSLinus Torvalds { 53d7f861b9SDavid Hildenbrand struct encoded_page **pages = batch->encoded_pages; 54d7f861b9SDavid Hildenbrand 555df397deSLinus Torvalds for (int i = 0; i < batch->nr; i++) { 56d7f861b9SDavid Hildenbrand struct encoded_page *enc = pages[i]; 575df397deSLinus Torvalds 58da510964SDavid Hildenbrand if (encoded_page_flags(enc) & ENCODED_PAGE_BIT_DELAY_RMAP) { 595df397deSLinus Torvalds struct page *page = encoded_page_ptr(enc); 60d7f861b9SDavid Hildenbrand unsigned int nr_pages = 1; 61d7f861b9SDavid Hildenbrand 62d7f861b9SDavid Hildenbrand if (unlikely(encoded_page_flags(enc) & 63d7f861b9SDavid Hildenbrand ENCODED_PAGE_BIT_NR_PAGES_NEXT)) 64d7f861b9SDavid Hildenbrand nr_pages = encoded_nr_pages(pages[++i]); 65d7f861b9SDavid Hildenbrand 66d7f861b9SDavid Hildenbrand folio_remove_rmap_ptes(page_folio(page), page, nr_pages, 67d7f861b9SDavid Hildenbrand vma); 685df397deSLinus Torvalds } 695df397deSLinus Torvalds } 70c4745482SLinus Torvalds } 715df397deSLinus Torvalds 72c4745482SLinus Torvalds /** 73c4745482SLinus Torvalds * tlb_flush_rmaps - do pending rmap removals after we have flushed the TLB 74c4745482SLinus Torvalds * @tlb: the current mmu_gather 7519134bc2SMatthew Wilcox (Oracle) * @vma: The memory area from which the pages are being removed. 76c4745482SLinus Torvalds * 77c4745482SLinus Torvalds * Note that because of how tlb_next_batch() above works, we will 78c4745482SLinus Torvalds * never start multiple new batches with pending delayed rmaps, so 79c4745482SLinus Torvalds * we only need to walk through the current active batch and the 80c4745482SLinus Torvalds * original local one. 81c4745482SLinus Torvalds */ 82c4745482SLinus Torvalds void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) 83c4745482SLinus Torvalds { 84c4745482SLinus Torvalds if (!tlb->delayed_rmap) 85c4745482SLinus Torvalds return; 86c4745482SLinus Torvalds 87c4745482SLinus Torvalds tlb_flush_rmap_batch(&tlb->local, vma); 88c4745482SLinus Torvalds if (tlb->active != &tlb->local) 89c4745482SLinus Torvalds tlb_flush_rmap_batch(tlb->active, vma); 905df397deSLinus Torvalds tlb->delayed_rmap = 0; 915df397deSLinus Torvalds } 925df397deSLinus Torvalds #endif 935df397deSLinus Torvalds 94*e61abd44SDavid Hildenbrand /* 95*e61abd44SDavid Hildenbrand * We might end up freeing a lot of pages. Reschedule on a regular 96*e61abd44SDavid Hildenbrand * basis to avoid soft lockups in configurations without full 97*e61abd44SDavid Hildenbrand * preemption enabled. The magic number of 512 folios seems to work. 98*e61abd44SDavid Hildenbrand */ 99*e61abd44SDavid Hildenbrand #define MAX_NR_FOLIOS_PER_FREE 512 100196d9d8bSPeter Zijlstra 101*e61abd44SDavid Hildenbrand static void __tlb_batch_free_encoded_pages(struct mmu_gather_batch *batch) 102*e61abd44SDavid Hildenbrand { 1037cc8f9c7SLinus Torvalds struct encoded_page **pages = batch->encoded_pages; 104*e61abd44SDavid Hildenbrand unsigned int nr, nr_pages; 105b191c9bcSJianxing Wang 106d7f861b9SDavid Hildenbrand while (batch->nr) { 107*e61abd44SDavid Hildenbrand if (!page_poisoning_enabled_static() && !want_init_on_free()) { 108*e61abd44SDavid Hildenbrand nr = min(MAX_NR_FOLIOS_PER_FREE, batch->nr); 109b191c9bcSJianxing Wang 110d7f861b9SDavid Hildenbrand /* 111d7f861b9SDavid Hildenbrand * Make sure we cover page + nr_pages, and don't leave 112d7f861b9SDavid Hildenbrand * nr_pages behind when capping the number of entries. 113d7f861b9SDavid Hildenbrand */ 114d7f861b9SDavid Hildenbrand if (unlikely(encoded_page_flags(pages[nr - 1]) & 115d7f861b9SDavid Hildenbrand ENCODED_PAGE_BIT_NR_PAGES_NEXT)) 116d7f861b9SDavid Hildenbrand nr++; 117*e61abd44SDavid Hildenbrand } else { 118*e61abd44SDavid Hildenbrand /* 119*e61abd44SDavid Hildenbrand * With page poisoning and init_on_free, the time it 120*e61abd44SDavid Hildenbrand * takes to free memory grows proportionally with the 121*e61abd44SDavid Hildenbrand * actual memory size. Therefore, limit based on the 122*e61abd44SDavid Hildenbrand * actual memory size and not the number of involved 123*e61abd44SDavid Hildenbrand * folios. 124*e61abd44SDavid Hildenbrand */ 125*e61abd44SDavid Hildenbrand for (nr = 0, nr_pages = 0; 126*e61abd44SDavid Hildenbrand nr < batch->nr && nr_pages < MAX_NR_FOLIOS_PER_FREE; 127*e61abd44SDavid Hildenbrand nr++) { 128*e61abd44SDavid Hildenbrand if (unlikely(encoded_page_flags(pages[nr]) & 129*e61abd44SDavid Hildenbrand ENCODED_PAGE_BIT_NR_PAGES_NEXT)) 130*e61abd44SDavid Hildenbrand nr_pages += encoded_nr_pages(pages[++nr]); 131*e61abd44SDavid Hildenbrand else 132*e61abd44SDavid Hildenbrand nr_pages++; 133*e61abd44SDavid Hildenbrand } 134*e61abd44SDavid Hildenbrand } 135d7f861b9SDavid Hildenbrand 136b191c9bcSJianxing Wang free_pages_and_swap_cache(pages, nr); 137b191c9bcSJianxing Wang pages += nr; 138b191c9bcSJianxing Wang batch->nr -= nr; 139b191c9bcSJianxing Wang 140b191c9bcSJianxing Wang cond_resched(); 141d7f861b9SDavid Hildenbrand } 142196d9d8bSPeter Zijlstra } 143*e61abd44SDavid Hildenbrand 144*e61abd44SDavid Hildenbrand static void tlb_batch_pages_flush(struct mmu_gather *tlb) 145*e61abd44SDavid Hildenbrand { 146*e61abd44SDavid Hildenbrand struct mmu_gather_batch *batch; 147*e61abd44SDavid Hildenbrand 148*e61abd44SDavid Hildenbrand for (batch = &tlb->local; batch && batch->nr; batch = batch->next) 149*e61abd44SDavid Hildenbrand __tlb_batch_free_encoded_pages(batch); 150196d9d8bSPeter Zijlstra tlb->active = &tlb->local; 151196d9d8bSPeter Zijlstra } 152196d9d8bSPeter Zijlstra 153952a31c9SMartin Schwidefsky static void tlb_batch_list_free(struct mmu_gather *tlb) 154196d9d8bSPeter Zijlstra { 155196d9d8bSPeter Zijlstra struct mmu_gather_batch *batch, *next; 156196d9d8bSPeter Zijlstra 157196d9d8bSPeter Zijlstra for (batch = tlb->local.next; batch; batch = next) { 158196d9d8bSPeter Zijlstra next = batch->next; 159196d9d8bSPeter Zijlstra free_pages((unsigned long)batch, 0); 160196d9d8bSPeter Zijlstra } 161196d9d8bSPeter Zijlstra tlb->local.next = NULL; 162196d9d8bSPeter Zijlstra } 163196d9d8bSPeter Zijlstra 164d7f861b9SDavid Hildenbrand static bool __tlb_remove_folio_pages_size(struct mmu_gather *tlb, 165d7f861b9SDavid Hildenbrand struct page *page, unsigned int nr_pages, bool delay_rmap, 166d7f861b9SDavid Hildenbrand int page_size) 167196d9d8bSPeter Zijlstra { 168da510964SDavid Hildenbrand int flags = delay_rmap ? ENCODED_PAGE_BIT_DELAY_RMAP : 0; 169196d9d8bSPeter Zijlstra struct mmu_gather_batch *batch; 170196d9d8bSPeter Zijlstra 171196d9d8bSPeter Zijlstra VM_BUG_ON(!tlb->end); 172ed6a7935SPeter Zijlstra 1733af4bd03SPeter Zijlstra #ifdef CONFIG_MMU_GATHER_PAGE_SIZE 174196d9d8bSPeter Zijlstra VM_WARN_ON(tlb->page_size != page_size); 175d7f861b9SDavid Hildenbrand VM_WARN_ON_ONCE(nr_pages != 1 && page_size != PAGE_SIZE); 176d7f861b9SDavid Hildenbrand VM_WARN_ON_ONCE(page_folio(page) != page_folio(page + nr_pages - 1)); 177ed6a7935SPeter Zijlstra #endif 178196d9d8bSPeter Zijlstra 179196d9d8bSPeter Zijlstra batch = tlb->active; 180196d9d8bSPeter Zijlstra /* 181196d9d8bSPeter Zijlstra * Add the page and check if we are full. If so 182196d9d8bSPeter Zijlstra * force a flush. 183196d9d8bSPeter Zijlstra */ 184d7f861b9SDavid Hildenbrand if (likely(nr_pages == 1)) { 185da510964SDavid Hildenbrand batch->encoded_pages[batch->nr++] = encode_page(page, flags); 186d7f861b9SDavid Hildenbrand } else { 187d7f861b9SDavid Hildenbrand flags |= ENCODED_PAGE_BIT_NR_PAGES_NEXT; 188d7f861b9SDavid Hildenbrand batch->encoded_pages[batch->nr++] = encode_page(page, flags); 189d7f861b9SDavid Hildenbrand batch->encoded_pages[batch->nr++] = encode_nr_pages(nr_pages); 190d7f861b9SDavid Hildenbrand } 191d7f861b9SDavid Hildenbrand /* 192d7f861b9SDavid Hildenbrand * Make sure that we can always add another "page" + "nr_pages", 193d7f861b9SDavid Hildenbrand * requiring two entries instead of only a single one. 194d7f861b9SDavid Hildenbrand */ 195d7f861b9SDavid Hildenbrand if (batch->nr >= batch->max - 1) { 196196d9d8bSPeter Zijlstra if (!tlb_next_batch(tlb)) 197196d9d8bSPeter Zijlstra return true; 198196d9d8bSPeter Zijlstra batch = tlb->active; 199196d9d8bSPeter Zijlstra } 200d7f861b9SDavid Hildenbrand VM_BUG_ON_PAGE(batch->nr > batch->max - 1, page); 201196d9d8bSPeter Zijlstra 202196d9d8bSPeter Zijlstra return false; 203196d9d8bSPeter Zijlstra } 204196d9d8bSPeter Zijlstra 205d7f861b9SDavid Hildenbrand bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page, 206d7f861b9SDavid Hildenbrand unsigned int nr_pages, bool delay_rmap) 207d7f861b9SDavid Hildenbrand { 208d7f861b9SDavid Hildenbrand return __tlb_remove_folio_pages_size(tlb, page, nr_pages, delay_rmap, 209d7f861b9SDavid Hildenbrand PAGE_SIZE); 210d7f861b9SDavid Hildenbrand } 211d7f861b9SDavid Hildenbrand 212d7f861b9SDavid Hildenbrand bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, 213d7f861b9SDavid Hildenbrand bool delay_rmap, int page_size) 214d7f861b9SDavid Hildenbrand { 215d7f861b9SDavid Hildenbrand return __tlb_remove_folio_pages_size(tlb, page, 1, delay_rmap, page_size); 216d7f861b9SDavid Hildenbrand } 217d7f861b9SDavid Hildenbrand 218580a586cSPeter Zijlstra #endif /* MMU_GATHER_NO_GATHER */ 219952a31c9SMartin Schwidefsky 2200d6e24d4SPeter Zijlstra #ifdef CONFIG_MMU_GATHER_TABLE_FREE 2210d6e24d4SPeter Zijlstra 2220d6e24d4SPeter Zijlstra static void __tlb_remove_table_free(struct mmu_table_batch *batch) 2230d6e24d4SPeter Zijlstra { 2240d6e24d4SPeter Zijlstra int i; 2250d6e24d4SPeter Zijlstra 2260d6e24d4SPeter Zijlstra for (i = 0; i < batch->nr; i++) 2270d6e24d4SPeter Zijlstra __tlb_remove_table(batch->tables[i]); 2280d6e24d4SPeter Zijlstra 2290d6e24d4SPeter Zijlstra free_page((unsigned long)batch); 2300d6e24d4SPeter Zijlstra } 2310d6e24d4SPeter Zijlstra 232ff2e6d72SPeter Zijlstra #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE 233196d9d8bSPeter Zijlstra 234196d9d8bSPeter Zijlstra /* 2350d6e24d4SPeter Zijlstra * Semi RCU freeing of the page directories. 2360d6e24d4SPeter Zijlstra * 2370d6e24d4SPeter Zijlstra * This is needed by some architectures to implement software pagetable walkers. 2380d6e24d4SPeter Zijlstra * 2390d6e24d4SPeter Zijlstra * gup_fast() and other software pagetable walkers do a lockless page-table 2400d6e24d4SPeter Zijlstra * walk and therefore needs some synchronization with the freeing of the page 2410d6e24d4SPeter Zijlstra * directories. The chosen means to accomplish that is by disabling IRQs over 2420d6e24d4SPeter Zijlstra * the walk. 2430d6e24d4SPeter Zijlstra * 2440d6e24d4SPeter Zijlstra * Architectures that use IPIs to flush TLBs will then automagically DTRT, 2450d6e24d4SPeter Zijlstra * since we unlink the page, flush TLBs, free the page. Since the disabling of 2460d6e24d4SPeter Zijlstra * IRQs delays the completion of the TLB flush we can never observe an already 2470d6e24d4SPeter Zijlstra * freed page. 2480d6e24d4SPeter Zijlstra * 2490d6e24d4SPeter Zijlstra * Architectures that do not have this (PPC) need to delay the freeing by some 2500d6e24d4SPeter Zijlstra * other means, this is that means. 2510d6e24d4SPeter Zijlstra * 2520d6e24d4SPeter Zijlstra * What we do is batch the freed directory pages (tables) and RCU free them. 2530d6e24d4SPeter Zijlstra * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling 2540d6e24d4SPeter Zijlstra * holds off grace periods. 2550d6e24d4SPeter Zijlstra * 2560d6e24d4SPeter Zijlstra * However, in order to batch these pages we need to allocate storage, this 2570d6e24d4SPeter Zijlstra * allocation is deep inside the MM code and can thus easily fail on memory 2580d6e24d4SPeter Zijlstra * pressure. To guarantee progress we fall back to single table freeing, see 2590d6e24d4SPeter Zijlstra * the implementation of tlb_remove_table_one(). 2600d6e24d4SPeter Zijlstra * 261196d9d8bSPeter Zijlstra */ 262196d9d8bSPeter Zijlstra 2630d6e24d4SPeter Zijlstra static void tlb_remove_table_smp_sync(void *arg) 2640d6e24d4SPeter Zijlstra { 2650d6e24d4SPeter Zijlstra /* Simply deliver the interrupt */ 2660d6e24d4SPeter Zijlstra } 2670d6e24d4SPeter Zijlstra 2682ba99c5eSJann Horn void tlb_remove_table_sync_one(void) 2690d6e24d4SPeter Zijlstra { 2700d6e24d4SPeter Zijlstra /* 2710d6e24d4SPeter Zijlstra * This isn't an RCU grace period and hence the page-tables cannot be 2720d6e24d4SPeter Zijlstra * assumed to be actually RCU-freed. 2730d6e24d4SPeter Zijlstra * 2740d6e24d4SPeter Zijlstra * It is however sufficient for software page-table walkers that rely on 2750d6e24d4SPeter Zijlstra * IRQ disabling. 2760d6e24d4SPeter Zijlstra */ 2770d6e24d4SPeter Zijlstra smp_call_function(tlb_remove_table_smp_sync, NULL, 1); 2780d6e24d4SPeter Zijlstra } 2790d6e24d4SPeter Zijlstra 2800d6e24d4SPeter Zijlstra static void tlb_remove_table_rcu(struct rcu_head *head) 2810d6e24d4SPeter Zijlstra { 2820d6e24d4SPeter Zijlstra __tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu)); 2830d6e24d4SPeter Zijlstra } 2840d6e24d4SPeter Zijlstra 2850d6e24d4SPeter Zijlstra static void tlb_remove_table_free(struct mmu_table_batch *batch) 2860d6e24d4SPeter Zijlstra { 2870d6e24d4SPeter Zijlstra call_rcu(&batch->rcu, tlb_remove_table_rcu); 2880d6e24d4SPeter Zijlstra } 2890d6e24d4SPeter Zijlstra 2900d6e24d4SPeter Zijlstra #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ 2910d6e24d4SPeter Zijlstra 2920d6e24d4SPeter Zijlstra static void tlb_remove_table_free(struct mmu_table_batch *batch) 2930d6e24d4SPeter Zijlstra { 2940d6e24d4SPeter Zijlstra __tlb_remove_table_free(batch); 2950d6e24d4SPeter Zijlstra } 2960d6e24d4SPeter Zijlstra 2970d6e24d4SPeter Zijlstra #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ 2980d6e24d4SPeter Zijlstra 299196d9d8bSPeter Zijlstra /* 300196d9d8bSPeter Zijlstra * If we want tlb_remove_table() to imply TLB invalidates. 301196d9d8bSPeter Zijlstra */ 302196d9d8bSPeter Zijlstra static inline void tlb_table_invalidate(struct mmu_gather *tlb) 303196d9d8bSPeter Zijlstra { 3040ed13259SPeter Zijlstra if (tlb_needs_table_invalidate()) { 305196d9d8bSPeter Zijlstra /* 3060ed13259SPeter Zijlstra * Invalidate page-table caches used by hardware walkers. Then 3070ed13259SPeter Zijlstra * we still need to RCU-sched wait while freeing the pages 3080ed13259SPeter Zijlstra * because software walkers can still be in-flight. 309196d9d8bSPeter Zijlstra */ 310196d9d8bSPeter Zijlstra tlb_flush_mmu_tlbonly(tlb); 3110ed13259SPeter Zijlstra } 312196d9d8bSPeter Zijlstra } 313196d9d8bSPeter Zijlstra 314196d9d8bSPeter Zijlstra static void tlb_remove_table_one(void *table) 315196d9d8bSPeter Zijlstra { 3160d6e24d4SPeter Zijlstra tlb_remove_table_sync_one(); 317196d9d8bSPeter Zijlstra __tlb_remove_table(table); 318196d9d8bSPeter Zijlstra } 319196d9d8bSPeter Zijlstra 3200a8caf21SPeter Zijlstra static void tlb_table_flush(struct mmu_gather *tlb) 321196d9d8bSPeter Zijlstra { 322196d9d8bSPeter Zijlstra struct mmu_table_batch **batch = &tlb->batch; 323196d9d8bSPeter Zijlstra 324196d9d8bSPeter Zijlstra if (*batch) { 325196d9d8bSPeter Zijlstra tlb_table_invalidate(tlb); 3260d6e24d4SPeter Zijlstra tlb_remove_table_free(*batch); 327196d9d8bSPeter Zijlstra *batch = NULL; 328196d9d8bSPeter Zijlstra } 329196d9d8bSPeter Zijlstra } 330196d9d8bSPeter Zijlstra 331196d9d8bSPeter Zijlstra void tlb_remove_table(struct mmu_gather *tlb, void *table) 332196d9d8bSPeter Zijlstra { 333196d9d8bSPeter Zijlstra struct mmu_table_batch **batch = &tlb->batch; 334196d9d8bSPeter Zijlstra 335196d9d8bSPeter Zijlstra if (*batch == NULL) { 336196d9d8bSPeter Zijlstra *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); 337196d9d8bSPeter Zijlstra if (*batch == NULL) { 338196d9d8bSPeter Zijlstra tlb_table_invalidate(tlb); 339196d9d8bSPeter Zijlstra tlb_remove_table_one(table); 340196d9d8bSPeter Zijlstra return; 341196d9d8bSPeter Zijlstra } 342196d9d8bSPeter Zijlstra (*batch)->nr = 0; 343196d9d8bSPeter Zijlstra } 344196d9d8bSPeter Zijlstra 345196d9d8bSPeter Zijlstra (*batch)->tables[(*batch)->nr++] = table; 346196d9d8bSPeter Zijlstra if ((*batch)->nr == MAX_TABLE_BATCH) 347196d9d8bSPeter Zijlstra tlb_table_flush(tlb); 348196d9d8bSPeter Zijlstra } 349196d9d8bSPeter Zijlstra 3500d6e24d4SPeter Zijlstra static inline void tlb_table_init(struct mmu_gather *tlb) 3510d6e24d4SPeter Zijlstra { 3520d6e24d4SPeter Zijlstra tlb->batch = NULL; 3530d6e24d4SPeter Zijlstra } 3540d6e24d4SPeter Zijlstra 3550d6e24d4SPeter Zijlstra #else /* !CONFIG_MMU_GATHER_TABLE_FREE */ 3560d6e24d4SPeter Zijlstra 3570d6e24d4SPeter Zijlstra static inline void tlb_table_flush(struct mmu_gather *tlb) { } 3580d6e24d4SPeter Zijlstra static inline void tlb_table_init(struct mmu_gather *tlb) { } 3590d6e24d4SPeter Zijlstra 3600d6e24d4SPeter Zijlstra #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ 361196d9d8bSPeter Zijlstra 3620a8caf21SPeter Zijlstra static void tlb_flush_mmu_free(struct mmu_gather *tlb) 3630a8caf21SPeter Zijlstra { 3640a8caf21SPeter Zijlstra tlb_table_flush(tlb); 365580a586cSPeter Zijlstra #ifndef CONFIG_MMU_GATHER_NO_GATHER 3660a8caf21SPeter Zijlstra tlb_batch_pages_flush(tlb); 3670a8caf21SPeter Zijlstra #endif 3680a8caf21SPeter Zijlstra } 3690a8caf21SPeter Zijlstra 3700a8caf21SPeter Zijlstra void tlb_flush_mmu(struct mmu_gather *tlb) 3710a8caf21SPeter Zijlstra { 3720a8caf21SPeter Zijlstra tlb_flush_mmu_tlbonly(tlb); 3730a8caf21SPeter Zijlstra tlb_flush_mmu_free(tlb); 3740a8caf21SPeter Zijlstra } 3750a8caf21SPeter Zijlstra 376d8b45053SWill Deacon static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, 377a72afd87SWill Deacon bool fullmm) 378196d9d8bSPeter Zijlstra { 3791808d65bSPeter Zijlstra tlb->mm = mm; 380a72afd87SWill Deacon tlb->fullmm = fullmm; 3811808d65bSPeter Zijlstra 382580a586cSPeter Zijlstra #ifndef CONFIG_MMU_GATHER_NO_GATHER 3831808d65bSPeter Zijlstra tlb->need_flush_all = 0; 3841808d65bSPeter Zijlstra tlb->local.next = NULL; 3851808d65bSPeter Zijlstra tlb->local.nr = 0; 3861808d65bSPeter Zijlstra tlb->local.max = ARRAY_SIZE(tlb->__pages); 3871808d65bSPeter Zijlstra tlb->active = &tlb->local; 3881808d65bSPeter Zijlstra tlb->batch_count = 0; 3891808d65bSPeter Zijlstra #endif 3905df397deSLinus Torvalds tlb->delayed_rmap = 0; 3911808d65bSPeter Zijlstra 3920d6e24d4SPeter Zijlstra tlb_table_init(tlb); 3933af4bd03SPeter Zijlstra #ifdef CONFIG_MMU_GATHER_PAGE_SIZE 3941808d65bSPeter Zijlstra tlb->page_size = 0; 3951808d65bSPeter Zijlstra #endif 3961808d65bSPeter Zijlstra 3971808d65bSPeter Zijlstra __tlb_reset_range(tlb); 398196d9d8bSPeter Zijlstra inc_tlb_flush_pending(tlb->mm); 399196d9d8bSPeter Zijlstra } 400196d9d8bSPeter Zijlstra 401845be1cdSRandy Dunlap /** 402845be1cdSRandy Dunlap * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down 403845be1cdSRandy Dunlap * @tlb: the mmu_gather structure to initialize 404845be1cdSRandy Dunlap * @mm: the mm_struct of the target address space 405845be1cdSRandy Dunlap * 406845be1cdSRandy Dunlap * Called to initialize an (on-stack) mmu_gather structure for page-table 407845be1cdSRandy Dunlap * tear-down from @mm. 408845be1cdSRandy Dunlap */ 409a72afd87SWill Deacon void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) 410d8b45053SWill Deacon { 411a72afd87SWill Deacon __tlb_gather_mmu(tlb, mm, false); 412d8b45053SWill Deacon } 413d8b45053SWill Deacon 414845be1cdSRandy Dunlap /** 415845be1cdSRandy Dunlap * tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down 416845be1cdSRandy Dunlap * @tlb: the mmu_gather structure to initialize 417845be1cdSRandy Dunlap * @mm: the mm_struct of the target address space 418845be1cdSRandy Dunlap * 419845be1cdSRandy Dunlap * In this case, @mm is without users and we're going to destroy the 420845be1cdSRandy Dunlap * full address space (exit/execve). 421845be1cdSRandy Dunlap * 422845be1cdSRandy Dunlap * Called to initialize an (on-stack) mmu_gather structure for page-table 423845be1cdSRandy Dunlap * tear-down from @mm. 424845be1cdSRandy Dunlap */ 425d8b45053SWill Deacon void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm) 426d8b45053SWill Deacon { 427a72afd87SWill Deacon __tlb_gather_mmu(tlb, mm, true); 428d8b45053SWill Deacon } 429d8b45053SWill Deacon 4301808d65bSPeter Zijlstra /** 4311808d65bSPeter Zijlstra * tlb_finish_mmu - finish an mmu_gather structure 4321808d65bSPeter Zijlstra * @tlb: the mmu_gather structure to finish 4331808d65bSPeter Zijlstra * 4341808d65bSPeter Zijlstra * Called at the end of the shootdown operation to free up any resources that 4351808d65bSPeter Zijlstra * were required. 4361808d65bSPeter Zijlstra */ 437ae8eba8bSWill Deacon void tlb_finish_mmu(struct mmu_gather *tlb) 438196d9d8bSPeter Zijlstra { 439196d9d8bSPeter Zijlstra /* 440196d9d8bSPeter Zijlstra * If there are parallel threads are doing PTE changes on same range 441c1e8d7c6SMichel Lespinasse * under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB 4427a30df49SYang Shi * flush by batching, one thread may end up seeing inconsistent PTEs 4437a30df49SYang Shi * and result in having stale TLB entries. So flush TLB forcefully 4447a30df49SYang Shi * if we detect parallel PTE batching threads. 4457a30df49SYang Shi * 4467a30df49SYang Shi * However, some syscalls, e.g. munmap(), may free page tables, this 4477a30df49SYang Shi * needs force flush everything in the given range. Otherwise this 4487a30df49SYang Shi * may result in having stale TLB entries for some architectures, 4497a30df49SYang Shi * e.g. aarch64, that could specify flush what level TLB. 450196d9d8bSPeter Zijlstra */ 4511808d65bSPeter Zijlstra if (mm_tlb_flush_nested(tlb->mm)) { 4527a30df49SYang Shi /* 4537a30df49SYang Shi * The aarch64 yields better performance with fullmm by 4547a30df49SYang Shi * avoiding multiple CPUs spamming TLBI messages at the 4557a30df49SYang Shi * same time. 4567a30df49SYang Shi * 4577a30df49SYang Shi * On x86 non-fullmm doesn't yield significant difference 4587a30df49SYang Shi * against fullmm. 4597a30df49SYang Shi */ 4607a30df49SYang Shi tlb->fullmm = 1; 4611808d65bSPeter Zijlstra __tlb_reset_range(tlb); 4627a30df49SYang Shi tlb->freed_tables = 1; 4631808d65bSPeter Zijlstra } 464196d9d8bSPeter Zijlstra 4651808d65bSPeter Zijlstra tlb_flush_mmu(tlb); 4661808d65bSPeter Zijlstra 467580a586cSPeter Zijlstra #ifndef CONFIG_MMU_GATHER_NO_GATHER 4681808d65bSPeter Zijlstra tlb_batch_list_free(tlb); 4691808d65bSPeter Zijlstra #endif 470196d9d8bSPeter Zijlstra dec_tlb_flush_pending(tlb->mm); 471196d9d8bSPeter Zijlstra } 472