11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * High memory handling common code and variables. 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de 51da177e4SLinus Torvalds * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Redesigned the x86 32-bit VM architecture to deal with 91da177e4SLinus Torvalds * 64-bit physical space. With current x86 CPUs this 101da177e4SLinus Torvalds * means up to 64 Gigabytes physical RAM. 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * Rewrote high memory support to move the page cache into 131da177e4SLinus Torvalds * high memory. Implemented permanent (schedulable) kmaps 141da177e4SLinus Torvalds * based on Linus' idea. 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> 171da177e4SLinus Torvalds */ 181da177e4SLinus Torvalds 191da177e4SLinus Torvalds #include <linux/mm.h> 201da177e4SLinus Torvalds #include <linux/module.h> 211da177e4SLinus Torvalds #include <linux/swap.h> 221da177e4SLinus Torvalds #include <linux/bio.h> 231da177e4SLinus Torvalds #include <linux/pagemap.h> 241da177e4SLinus Torvalds #include <linux/mempool.h> 251da177e4SLinus Torvalds #include <linux/blkdev.h> 261da177e4SLinus Torvalds #include <linux/init.h> 271da177e4SLinus Torvalds #include <linux/hash.h> 281da177e4SLinus Torvalds #include <linux/highmem.h> 292056a782SJens Axboe #include <linux/blktrace_api.h> 301da177e4SLinus Torvalds #include <asm/tlbflush.h> 311da177e4SLinus Torvalds 321da177e4SLinus Torvalds /* 331da177e4SLinus Torvalds * Virtual_count is not a pure "count". 341da177e4SLinus Torvalds * 0 means that it is not mapped, and has not been mapped 351da177e4SLinus Torvalds * since a TLB flush - it is usable. 361da177e4SLinus Torvalds * 1 means that there are no users, but it has been mapped 371da177e4SLinus Torvalds * since the last TLB flush - so we can't use it. 381da177e4SLinus Torvalds * n means that there are (n-1) current users of it. 391da177e4SLinus Torvalds */ 401da177e4SLinus Torvalds #ifdef CONFIG_HIGHMEM 41260b2367SAl Viro 42c1f60a5aSChristoph Lameter unsigned long totalhigh_pages __read_mostly; 43c1f60a5aSChristoph Lameter 44c1f60a5aSChristoph Lameter unsigned int nr_free_highpages (void) 45c1f60a5aSChristoph Lameter { 46c1f60a5aSChristoph Lameter pg_data_t *pgdat; 47c1f60a5aSChristoph Lameter unsigned int pages = 0; 48c1f60a5aSChristoph Lameter 49c1f60a5aSChristoph Lameter for_each_online_pgdat(pgdat) 50d23ad423SChristoph Lameter pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM], 51d23ad423SChristoph Lameter NR_FREE_PAGES); 52c1f60a5aSChristoph Lameter 53c1f60a5aSChristoph Lameter return pages; 54c1f60a5aSChristoph Lameter } 55c1f60a5aSChristoph Lameter 561da177e4SLinus Torvalds static int pkmap_count[LAST_PKMAP]; 571da177e4SLinus Torvalds static unsigned int last_pkmap_nr; 581da177e4SLinus Torvalds static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); 591da177e4SLinus Torvalds 601da177e4SLinus Torvalds pte_t * pkmap_page_table; 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); 631da177e4SLinus Torvalds 641da177e4SLinus Torvalds static void flush_all_zero_pkmaps(void) 651da177e4SLinus Torvalds { 661da177e4SLinus Torvalds int i; 671da177e4SLinus Torvalds 681da177e4SLinus Torvalds flush_cache_kmaps(); 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds for (i = 0; i < LAST_PKMAP; i++) { 711da177e4SLinus Torvalds struct page *page; 721da177e4SLinus Torvalds 731da177e4SLinus Torvalds /* 741da177e4SLinus Torvalds * zero means we don't have anything to do, 751da177e4SLinus Torvalds * >1 means that it is still in use. Only 761da177e4SLinus Torvalds * a count of 1 means that it is free but 771da177e4SLinus Torvalds * needs to be unmapped 781da177e4SLinus Torvalds */ 791da177e4SLinus Torvalds if (pkmap_count[i] != 1) 801da177e4SLinus Torvalds continue; 811da177e4SLinus Torvalds pkmap_count[i] = 0; 821da177e4SLinus Torvalds 831da177e4SLinus Torvalds /* sanity check */ 8475babcacSEric Sesterhenn BUG_ON(pte_none(pkmap_page_table[i])); 851da177e4SLinus Torvalds 861da177e4SLinus Torvalds /* 871da177e4SLinus Torvalds * Don't need an atomic fetch-and-clear op here; 881da177e4SLinus Torvalds * no-one has the page mapped, and cannot get at 891da177e4SLinus Torvalds * its virtual address (and hence PTE) without first 901da177e4SLinus Torvalds * getting the kmap_lock (which is held here). 911da177e4SLinus Torvalds * So no dangers, even with speculative execution. 921da177e4SLinus Torvalds */ 931da177e4SLinus Torvalds page = pte_page(pkmap_page_table[i]); 941da177e4SLinus Torvalds pte_clear(&init_mm, (unsigned long)page_address(page), 951da177e4SLinus Torvalds &pkmap_page_table[i]); 961da177e4SLinus Torvalds 971da177e4SLinus Torvalds set_page_address(page, NULL); 981da177e4SLinus Torvalds } 991da177e4SLinus Torvalds flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); 1001da177e4SLinus Torvalds } 1011da177e4SLinus Torvalds 102*ce6234b5SJeremy Fitzhardinge /* Flush all unused kmap mappings in order to remove stray 103*ce6234b5SJeremy Fitzhardinge mappings. */ 104*ce6234b5SJeremy Fitzhardinge void kmap_flush_unused(void) 105*ce6234b5SJeremy Fitzhardinge { 106*ce6234b5SJeremy Fitzhardinge spin_lock(&kmap_lock); 107*ce6234b5SJeremy Fitzhardinge flush_all_zero_pkmaps(); 108*ce6234b5SJeremy Fitzhardinge spin_unlock(&kmap_lock); 109*ce6234b5SJeremy Fitzhardinge } 110*ce6234b5SJeremy Fitzhardinge 1111da177e4SLinus Torvalds static inline unsigned long map_new_virtual(struct page *page) 1121da177e4SLinus Torvalds { 1131da177e4SLinus Torvalds unsigned long vaddr; 1141da177e4SLinus Torvalds int count; 1151da177e4SLinus Torvalds 1161da177e4SLinus Torvalds start: 1171da177e4SLinus Torvalds count = LAST_PKMAP; 1181da177e4SLinus Torvalds /* Find an empty entry */ 1191da177e4SLinus Torvalds for (;;) { 1201da177e4SLinus Torvalds last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; 1211da177e4SLinus Torvalds if (!last_pkmap_nr) { 1221da177e4SLinus Torvalds flush_all_zero_pkmaps(); 1231da177e4SLinus Torvalds count = LAST_PKMAP; 1241da177e4SLinus Torvalds } 1251da177e4SLinus Torvalds if (!pkmap_count[last_pkmap_nr]) 1261da177e4SLinus Torvalds break; /* Found a usable entry */ 1271da177e4SLinus Torvalds if (--count) 1281da177e4SLinus Torvalds continue; 1291da177e4SLinus Torvalds 1301da177e4SLinus Torvalds /* 1311da177e4SLinus Torvalds * Sleep for somebody else to unmap their entries 1321da177e4SLinus Torvalds */ 1331da177e4SLinus Torvalds { 1341da177e4SLinus Torvalds DECLARE_WAITQUEUE(wait, current); 1351da177e4SLinus Torvalds 1361da177e4SLinus Torvalds __set_current_state(TASK_UNINTERRUPTIBLE); 1371da177e4SLinus Torvalds add_wait_queue(&pkmap_map_wait, &wait); 1381da177e4SLinus Torvalds spin_unlock(&kmap_lock); 1391da177e4SLinus Torvalds schedule(); 1401da177e4SLinus Torvalds remove_wait_queue(&pkmap_map_wait, &wait); 1411da177e4SLinus Torvalds spin_lock(&kmap_lock); 1421da177e4SLinus Torvalds 1431da177e4SLinus Torvalds /* Somebody else might have mapped it while we slept */ 1441da177e4SLinus Torvalds if (page_address(page)) 1451da177e4SLinus Torvalds return (unsigned long)page_address(page); 1461da177e4SLinus Torvalds 1471da177e4SLinus Torvalds /* Re-start */ 1481da177e4SLinus Torvalds goto start; 1491da177e4SLinus Torvalds } 1501da177e4SLinus Torvalds } 1511da177e4SLinus Torvalds vaddr = PKMAP_ADDR(last_pkmap_nr); 1521da177e4SLinus Torvalds set_pte_at(&init_mm, vaddr, 1531da177e4SLinus Torvalds &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); 1541da177e4SLinus Torvalds 1551da177e4SLinus Torvalds pkmap_count[last_pkmap_nr] = 1; 1561da177e4SLinus Torvalds set_page_address(page, (void *)vaddr); 1571da177e4SLinus Torvalds 1581da177e4SLinus Torvalds return vaddr; 1591da177e4SLinus Torvalds } 1601da177e4SLinus Torvalds 1611da177e4SLinus Torvalds void fastcall *kmap_high(struct page *page) 1621da177e4SLinus Torvalds { 1631da177e4SLinus Torvalds unsigned long vaddr; 1641da177e4SLinus Torvalds 1651da177e4SLinus Torvalds /* 1661da177e4SLinus Torvalds * For highmem pages, we can't trust "virtual" until 1671da177e4SLinus Torvalds * after we have the lock. 1681da177e4SLinus Torvalds * 1691da177e4SLinus Torvalds * We cannot call this from interrupts, as it may block 1701da177e4SLinus Torvalds */ 1711da177e4SLinus Torvalds spin_lock(&kmap_lock); 1721da177e4SLinus Torvalds vaddr = (unsigned long)page_address(page); 1731da177e4SLinus Torvalds if (!vaddr) 1741da177e4SLinus Torvalds vaddr = map_new_virtual(page); 1751da177e4SLinus Torvalds pkmap_count[PKMAP_NR(vaddr)]++; 17675babcacSEric Sesterhenn BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2); 1771da177e4SLinus Torvalds spin_unlock(&kmap_lock); 1781da177e4SLinus Torvalds return (void*) vaddr; 1791da177e4SLinus Torvalds } 1801da177e4SLinus Torvalds 1811da177e4SLinus Torvalds EXPORT_SYMBOL(kmap_high); 1821da177e4SLinus Torvalds 1831da177e4SLinus Torvalds void fastcall kunmap_high(struct page *page) 1841da177e4SLinus Torvalds { 1851da177e4SLinus Torvalds unsigned long vaddr; 1861da177e4SLinus Torvalds unsigned long nr; 1871da177e4SLinus Torvalds int need_wakeup; 1881da177e4SLinus Torvalds 1891da177e4SLinus Torvalds spin_lock(&kmap_lock); 1901da177e4SLinus Torvalds vaddr = (unsigned long)page_address(page); 19175babcacSEric Sesterhenn BUG_ON(!vaddr); 1921da177e4SLinus Torvalds nr = PKMAP_NR(vaddr); 1931da177e4SLinus Torvalds 1941da177e4SLinus Torvalds /* 1951da177e4SLinus Torvalds * A count must never go down to zero 1961da177e4SLinus Torvalds * without a TLB flush! 1971da177e4SLinus Torvalds */ 1981da177e4SLinus Torvalds need_wakeup = 0; 1991da177e4SLinus Torvalds switch (--pkmap_count[nr]) { 2001da177e4SLinus Torvalds case 0: 2011da177e4SLinus Torvalds BUG(); 2021da177e4SLinus Torvalds case 1: 2031da177e4SLinus Torvalds /* 2041da177e4SLinus Torvalds * Avoid an unnecessary wake_up() function call. 2051da177e4SLinus Torvalds * The common case is pkmap_count[] == 1, but 2061da177e4SLinus Torvalds * no waiters. 2071da177e4SLinus Torvalds * The tasks queued in the wait-queue are guarded 2081da177e4SLinus Torvalds * by both the lock in the wait-queue-head and by 2091da177e4SLinus Torvalds * the kmap_lock. As the kmap_lock is held here, 2101da177e4SLinus Torvalds * no need for the wait-queue-head's lock. Simply 2111da177e4SLinus Torvalds * test if the queue is empty. 2121da177e4SLinus Torvalds */ 2131da177e4SLinus Torvalds need_wakeup = waitqueue_active(&pkmap_map_wait); 2141da177e4SLinus Torvalds } 2151da177e4SLinus Torvalds spin_unlock(&kmap_lock); 2161da177e4SLinus Torvalds 2171da177e4SLinus Torvalds /* do wake-up, if needed, race-free outside of the spin lock */ 2181da177e4SLinus Torvalds if (need_wakeup) 2191da177e4SLinus Torvalds wake_up(&pkmap_map_wait); 2201da177e4SLinus Torvalds } 2211da177e4SLinus Torvalds 2221da177e4SLinus Torvalds EXPORT_SYMBOL(kunmap_high); 2231da177e4SLinus Torvalds #endif 2241da177e4SLinus Torvalds 2251da177e4SLinus Torvalds #if defined(HASHED_PAGE_VIRTUAL) 2261da177e4SLinus Torvalds 2271da177e4SLinus Torvalds #define PA_HASH_ORDER 7 2281da177e4SLinus Torvalds 2291da177e4SLinus Torvalds /* 2301da177e4SLinus Torvalds * Describes one page->virtual association 2311da177e4SLinus Torvalds */ 2321da177e4SLinus Torvalds struct page_address_map { 2331da177e4SLinus Torvalds struct page *page; 2341da177e4SLinus Torvalds void *virtual; 2351da177e4SLinus Torvalds struct list_head list; 2361da177e4SLinus Torvalds }; 2371da177e4SLinus Torvalds 2381da177e4SLinus Torvalds /* 2391da177e4SLinus Torvalds * page_address_map freelist, allocated from page_address_maps. 2401da177e4SLinus Torvalds */ 2411da177e4SLinus Torvalds static struct list_head page_address_pool; /* freelist */ 2421da177e4SLinus Torvalds static spinlock_t pool_lock; /* protects page_address_pool */ 2431da177e4SLinus Torvalds 2441da177e4SLinus Torvalds /* 2451da177e4SLinus Torvalds * Hash table bucket 2461da177e4SLinus Torvalds */ 2471da177e4SLinus Torvalds static struct page_address_slot { 2481da177e4SLinus Torvalds struct list_head lh; /* List of page_address_maps */ 2491da177e4SLinus Torvalds spinlock_t lock; /* Protect this bucket's list */ 2501da177e4SLinus Torvalds } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER]; 2511da177e4SLinus Torvalds 2521da177e4SLinus Torvalds static struct page_address_slot *page_slot(struct page *page) 2531da177e4SLinus Torvalds { 2541da177e4SLinus Torvalds return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)]; 2551da177e4SLinus Torvalds } 2561da177e4SLinus Torvalds 2571da177e4SLinus Torvalds void *page_address(struct page *page) 2581da177e4SLinus Torvalds { 2591da177e4SLinus Torvalds unsigned long flags; 2601da177e4SLinus Torvalds void *ret; 2611da177e4SLinus Torvalds struct page_address_slot *pas; 2621da177e4SLinus Torvalds 2631da177e4SLinus Torvalds if (!PageHighMem(page)) 2641da177e4SLinus Torvalds return lowmem_page_address(page); 2651da177e4SLinus Torvalds 2661da177e4SLinus Torvalds pas = page_slot(page); 2671da177e4SLinus Torvalds ret = NULL; 2681da177e4SLinus Torvalds spin_lock_irqsave(&pas->lock, flags); 2691da177e4SLinus Torvalds if (!list_empty(&pas->lh)) { 2701da177e4SLinus Torvalds struct page_address_map *pam; 2711da177e4SLinus Torvalds 2721da177e4SLinus Torvalds list_for_each_entry(pam, &pas->lh, list) { 2731da177e4SLinus Torvalds if (pam->page == page) { 2741da177e4SLinus Torvalds ret = pam->virtual; 2751da177e4SLinus Torvalds goto done; 2761da177e4SLinus Torvalds } 2771da177e4SLinus Torvalds } 2781da177e4SLinus Torvalds } 2791da177e4SLinus Torvalds done: 2801da177e4SLinus Torvalds spin_unlock_irqrestore(&pas->lock, flags); 2811da177e4SLinus Torvalds return ret; 2821da177e4SLinus Torvalds } 2831da177e4SLinus Torvalds 2841da177e4SLinus Torvalds EXPORT_SYMBOL(page_address); 2851da177e4SLinus Torvalds 2861da177e4SLinus Torvalds void set_page_address(struct page *page, void *virtual) 2871da177e4SLinus Torvalds { 2881da177e4SLinus Torvalds unsigned long flags; 2891da177e4SLinus Torvalds struct page_address_slot *pas; 2901da177e4SLinus Torvalds struct page_address_map *pam; 2911da177e4SLinus Torvalds 2921da177e4SLinus Torvalds BUG_ON(!PageHighMem(page)); 2931da177e4SLinus Torvalds 2941da177e4SLinus Torvalds pas = page_slot(page); 2951da177e4SLinus Torvalds if (virtual) { /* Add */ 2961da177e4SLinus Torvalds BUG_ON(list_empty(&page_address_pool)); 2971da177e4SLinus Torvalds 2981da177e4SLinus Torvalds spin_lock_irqsave(&pool_lock, flags); 2991da177e4SLinus Torvalds pam = list_entry(page_address_pool.next, 3001da177e4SLinus Torvalds struct page_address_map, list); 3011da177e4SLinus Torvalds list_del(&pam->list); 3021da177e4SLinus Torvalds spin_unlock_irqrestore(&pool_lock, flags); 3031da177e4SLinus Torvalds 3041da177e4SLinus Torvalds pam->page = page; 3051da177e4SLinus Torvalds pam->virtual = virtual; 3061da177e4SLinus Torvalds 3071da177e4SLinus Torvalds spin_lock_irqsave(&pas->lock, flags); 3081da177e4SLinus Torvalds list_add_tail(&pam->list, &pas->lh); 3091da177e4SLinus Torvalds spin_unlock_irqrestore(&pas->lock, flags); 3101da177e4SLinus Torvalds } else { /* Remove */ 3111da177e4SLinus Torvalds spin_lock_irqsave(&pas->lock, flags); 3121da177e4SLinus Torvalds list_for_each_entry(pam, &pas->lh, list) { 3131da177e4SLinus Torvalds if (pam->page == page) { 3141da177e4SLinus Torvalds list_del(&pam->list); 3151da177e4SLinus Torvalds spin_unlock_irqrestore(&pas->lock, flags); 3161da177e4SLinus Torvalds spin_lock_irqsave(&pool_lock, flags); 3171da177e4SLinus Torvalds list_add_tail(&pam->list, &page_address_pool); 3181da177e4SLinus Torvalds spin_unlock_irqrestore(&pool_lock, flags); 3191da177e4SLinus Torvalds goto done; 3201da177e4SLinus Torvalds } 3211da177e4SLinus Torvalds } 3221da177e4SLinus Torvalds spin_unlock_irqrestore(&pas->lock, flags); 3231da177e4SLinus Torvalds } 3241da177e4SLinus Torvalds done: 3251da177e4SLinus Torvalds return; 3261da177e4SLinus Torvalds } 3271da177e4SLinus Torvalds 3281da177e4SLinus Torvalds static struct page_address_map page_address_maps[LAST_PKMAP]; 3291da177e4SLinus Torvalds 3301da177e4SLinus Torvalds void __init page_address_init(void) 3311da177e4SLinus Torvalds { 3321da177e4SLinus Torvalds int i; 3331da177e4SLinus Torvalds 3341da177e4SLinus Torvalds INIT_LIST_HEAD(&page_address_pool); 3351da177e4SLinus Torvalds for (i = 0; i < ARRAY_SIZE(page_address_maps); i++) 3361da177e4SLinus Torvalds list_add(&page_address_maps[i].list, &page_address_pool); 3371da177e4SLinus Torvalds for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) { 3381da177e4SLinus Torvalds INIT_LIST_HEAD(&page_address_htable[i].lh); 3391da177e4SLinus Torvalds spin_lock_init(&page_address_htable[i].lock); 3401da177e4SLinus Torvalds } 3411da177e4SLinus Torvalds spin_lock_init(&pool_lock); 3421da177e4SLinus Torvalds } 3431da177e4SLinus Torvalds 3441da177e4SLinus Torvalds #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */ 345