xref: /linux/mm/highmem.c (revision ce6234b5298902aaec831a67d5f8d9bd2ef5a488)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * High memory handling common code and variables.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
51da177e4SLinus Torvalds  *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Redesigned the x86 32-bit VM architecture to deal with
91da177e4SLinus Torvalds  * 64-bit physical space. With current x86 CPUs this
101da177e4SLinus Torvalds  * means up to 64 Gigabytes physical RAM.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Rewrote high memory support to move the page cache into
131da177e4SLinus Torvalds  * high memory. Implemented permanent (schedulable) kmaps
141da177e4SLinus Torvalds  * based on Linus' idea.
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
171da177e4SLinus Torvalds  */
181da177e4SLinus Torvalds 
191da177e4SLinus Torvalds #include <linux/mm.h>
201da177e4SLinus Torvalds #include <linux/module.h>
211da177e4SLinus Torvalds #include <linux/swap.h>
221da177e4SLinus Torvalds #include <linux/bio.h>
231da177e4SLinus Torvalds #include <linux/pagemap.h>
241da177e4SLinus Torvalds #include <linux/mempool.h>
251da177e4SLinus Torvalds #include <linux/blkdev.h>
261da177e4SLinus Torvalds #include <linux/init.h>
271da177e4SLinus Torvalds #include <linux/hash.h>
281da177e4SLinus Torvalds #include <linux/highmem.h>
292056a782SJens Axboe #include <linux/blktrace_api.h>
301da177e4SLinus Torvalds #include <asm/tlbflush.h>
311da177e4SLinus Torvalds 
321da177e4SLinus Torvalds /*
331da177e4SLinus Torvalds  * Virtual_count is not a pure "count".
341da177e4SLinus Torvalds  *  0 means that it is not mapped, and has not been mapped
351da177e4SLinus Torvalds  *    since a TLB flush - it is usable.
361da177e4SLinus Torvalds  *  1 means that there are no users, but it has been mapped
371da177e4SLinus Torvalds  *    since the last TLB flush - so we can't use it.
381da177e4SLinus Torvalds  *  n means that there are (n-1) current users of it.
391da177e4SLinus Torvalds  */
401da177e4SLinus Torvalds #ifdef CONFIG_HIGHMEM
41260b2367SAl Viro 
42c1f60a5aSChristoph Lameter unsigned long totalhigh_pages __read_mostly;
43c1f60a5aSChristoph Lameter 
44c1f60a5aSChristoph Lameter unsigned int nr_free_highpages (void)
45c1f60a5aSChristoph Lameter {
46c1f60a5aSChristoph Lameter 	pg_data_t *pgdat;
47c1f60a5aSChristoph Lameter 	unsigned int pages = 0;
48c1f60a5aSChristoph Lameter 
49c1f60a5aSChristoph Lameter 	for_each_online_pgdat(pgdat)
50d23ad423SChristoph Lameter 		pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
51d23ad423SChristoph Lameter 			NR_FREE_PAGES);
52c1f60a5aSChristoph Lameter 
53c1f60a5aSChristoph Lameter 	return pages;
54c1f60a5aSChristoph Lameter }
55c1f60a5aSChristoph Lameter 
561da177e4SLinus Torvalds static int pkmap_count[LAST_PKMAP];
571da177e4SLinus Torvalds static unsigned int last_pkmap_nr;
581da177e4SLinus Torvalds static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
591da177e4SLinus Torvalds 
601da177e4SLinus Torvalds pte_t * pkmap_page_table;
611da177e4SLinus Torvalds 
621da177e4SLinus Torvalds static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
631da177e4SLinus Torvalds 
641da177e4SLinus Torvalds static void flush_all_zero_pkmaps(void)
651da177e4SLinus Torvalds {
661da177e4SLinus Torvalds 	int i;
671da177e4SLinus Torvalds 
681da177e4SLinus Torvalds 	flush_cache_kmaps();
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds 	for (i = 0; i < LAST_PKMAP; i++) {
711da177e4SLinus Torvalds 		struct page *page;
721da177e4SLinus Torvalds 
731da177e4SLinus Torvalds 		/*
741da177e4SLinus Torvalds 		 * zero means we don't have anything to do,
751da177e4SLinus Torvalds 		 * >1 means that it is still in use. Only
761da177e4SLinus Torvalds 		 * a count of 1 means that it is free but
771da177e4SLinus Torvalds 		 * needs to be unmapped
781da177e4SLinus Torvalds 		 */
791da177e4SLinus Torvalds 		if (pkmap_count[i] != 1)
801da177e4SLinus Torvalds 			continue;
811da177e4SLinus Torvalds 		pkmap_count[i] = 0;
821da177e4SLinus Torvalds 
831da177e4SLinus Torvalds 		/* sanity check */
8475babcacSEric Sesterhenn 		BUG_ON(pte_none(pkmap_page_table[i]));
851da177e4SLinus Torvalds 
861da177e4SLinus Torvalds 		/*
871da177e4SLinus Torvalds 		 * Don't need an atomic fetch-and-clear op here;
881da177e4SLinus Torvalds 		 * no-one has the page mapped, and cannot get at
891da177e4SLinus Torvalds 		 * its virtual address (and hence PTE) without first
901da177e4SLinus Torvalds 		 * getting the kmap_lock (which is held here).
911da177e4SLinus Torvalds 		 * So no dangers, even with speculative execution.
921da177e4SLinus Torvalds 		 */
931da177e4SLinus Torvalds 		page = pte_page(pkmap_page_table[i]);
941da177e4SLinus Torvalds 		pte_clear(&init_mm, (unsigned long)page_address(page),
951da177e4SLinus Torvalds 			  &pkmap_page_table[i]);
961da177e4SLinus Torvalds 
971da177e4SLinus Torvalds 		set_page_address(page, NULL);
981da177e4SLinus Torvalds 	}
991da177e4SLinus Torvalds 	flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
1001da177e4SLinus Torvalds }
1011da177e4SLinus Torvalds 
102*ce6234b5SJeremy Fitzhardinge /* Flush all unused kmap mappings in order to remove stray
103*ce6234b5SJeremy Fitzhardinge    mappings. */
104*ce6234b5SJeremy Fitzhardinge void kmap_flush_unused(void)
105*ce6234b5SJeremy Fitzhardinge {
106*ce6234b5SJeremy Fitzhardinge 	spin_lock(&kmap_lock);
107*ce6234b5SJeremy Fitzhardinge 	flush_all_zero_pkmaps();
108*ce6234b5SJeremy Fitzhardinge 	spin_unlock(&kmap_lock);
109*ce6234b5SJeremy Fitzhardinge }
110*ce6234b5SJeremy Fitzhardinge 
1111da177e4SLinus Torvalds static inline unsigned long map_new_virtual(struct page *page)
1121da177e4SLinus Torvalds {
1131da177e4SLinus Torvalds 	unsigned long vaddr;
1141da177e4SLinus Torvalds 	int count;
1151da177e4SLinus Torvalds 
1161da177e4SLinus Torvalds start:
1171da177e4SLinus Torvalds 	count = LAST_PKMAP;
1181da177e4SLinus Torvalds 	/* Find an empty entry */
1191da177e4SLinus Torvalds 	for (;;) {
1201da177e4SLinus Torvalds 		last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
1211da177e4SLinus Torvalds 		if (!last_pkmap_nr) {
1221da177e4SLinus Torvalds 			flush_all_zero_pkmaps();
1231da177e4SLinus Torvalds 			count = LAST_PKMAP;
1241da177e4SLinus Torvalds 		}
1251da177e4SLinus Torvalds 		if (!pkmap_count[last_pkmap_nr])
1261da177e4SLinus Torvalds 			break;	/* Found a usable entry */
1271da177e4SLinus Torvalds 		if (--count)
1281da177e4SLinus Torvalds 			continue;
1291da177e4SLinus Torvalds 
1301da177e4SLinus Torvalds 		/*
1311da177e4SLinus Torvalds 		 * Sleep for somebody else to unmap their entries
1321da177e4SLinus Torvalds 		 */
1331da177e4SLinus Torvalds 		{
1341da177e4SLinus Torvalds 			DECLARE_WAITQUEUE(wait, current);
1351da177e4SLinus Torvalds 
1361da177e4SLinus Torvalds 			__set_current_state(TASK_UNINTERRUPTIBLE);
1371da177e4SLinus Torvalds 			add_wait_queue(&pkmap_map_wait, &wait);
1381da177e4SLinus Torvalds 			spin_unlock(&kmap_lock);
1391da177e4SLinus Torvalds 			schedule();
1401da177e4SLinus Torvalds 			remove_wait_queue(&pkmap_map_wait, &wait);
1411da177e4SLinus Torvalds 			spin_lock(&kmap_lock);
1421da177e4SLinus Torvalds 
1431da177e4SLinus Torvalds 			/* Somebody else might have mapped it while we slept */
1441da177e4SLinus Torvalds 			if (page_address(page))
1451da177e4SLinus Torvalds 				return (unsigned long)page_address(page);
1461da177e4SLinus Torvalds 
1471da177e4SLinus Torvalds 			/* Re-start */
1481da177e4SLinus Torvalds 			goto start;
1491da177e4SLinus Torvalds 		}
1501da177e4SLinus Torvalds 	}
1511da177e4SLinus Torvalds 	vaddr = PKMAP_ADDR(last_pkmap_nr);
1521da177e4SLinus Torvalds 	set_pte_at(&init_mm, vaddr,
1531da177e4SLinus Torvalds 		   &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
1541da177e4SLinus Torvalds 
1551da177e4SLinus Torvalds 	pkmap_count[last_pkmap_nr] = 1;
1561da177e4SLinus Torvalds 	set_page_address(page, (void *)vaddr);
1571da177e4SLinus Torvalds 
1581da177e4SLinus Torvalds 	return vaddr;
1591da177e4SLinus Torvalds }
1601da177e4SLinus Torvalds 
1611da177e4SLinus Torvalds void fastcall *kmap_high(struct page *page)
1621da177e4SLinus Torvalds {
1631da177e4SLinus Torvalds 	unsigned long vaddr;
1641da177e4SLinus Torvalds 
1651da177e4SLinus Torvalds 	/*
1661da177e4SLinus Torvalds 	 * For highmem pages, we can't trust "virtual" until
1671da177e4SLinus Torvalds 	 * after we have the lock.
1681da177e4SLinus Torvalds 	 *
1691da177e4SLinus Torvalds 	 * We cannot call this from interrupts, as it may block
1701da177e4SLinus Torvalds 	 */
1711da177e4SLinus Torvalds 	spin_lock(&kmap_lock);
1721da177e4SLinus Torvalds 	vaddr = (unsigned long)page_address(page);
1731da177e4SLinus Torvalds 	if (!vaddr)
1741da177e4SLinus Torvalds 		vaddr = map_new_virtual(page);
1751da177e4SLinus Torvalds 	pkmap_count[PKMAP_NR(vaddr)]++;
17675babcacSEric Sesterhenn 	BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2);
1771da177e4SLinus Torvalds 	spin_unlock(&kmap_lock);
1781da177e4SLinus Torvalds 	return (void*) vaddr;
1791da177e4SLinus Torvalds }
1801da177e4SLinus Torvalds 
1811da177e4SLinus Torvalds EXPORT_SYMBOL(kmap_high);
1821da177e4SLinus Torvalds 
1831da177e4SLinus Torvalds void fastcall kunmap_high(struct page *page)
1841da177e4SLinus Torvalds {
1851da177e4SLinus Torvalds 	unsigned long vaddr;
1861da177e4SLinus Torvalds 	unsigned long nr;
1871da177e4SLinus Torvalds 	int need_wakeup;
1881da177e4SLinus Torvalds 
1891da177e4SLinus Torvalds 	spin_lock(&kmap_lock);
1901da177e4SLinus Torvalds 	vaddr = (unsigned long)page_address(page);
19175babcacSEric Sesterhenn 	BUG_ON(!vaddr);
1921da177e4SLinus Torvalds 	nr = PKMAP_NR(vaddr);
1931da177e4SLinus Torvalds 
1941da177e4SLinus Torvalds 	/*
1951da177e4SLinus Torvalds 	 * A count must never go down to zero
1961da177e4SLinus Torvalds 	 * without a TLB flush!
1971da177e4SLinus Torvalds 	 */
1981da177e4SLinus Torvalds 	need_wakeup = 0;
1991da177e4SLinus Torvalds 	switch (--pkmap_count[nr]) {
2001da177e4SLinus Torvalds 	case 0:
2011da177e4SLinus Torvalds 		BUG();
2021da177e4SLinus Torvalds 	case 1:
2031da177e4SLinus Torvalds 		/*
2041da177e4SLinus Torvalds 		 * Avoid an unnecessary wake_up() function call.
2051da177e4SLinus Torvalds 		 * The common case is pkmap_count[] == 1, but
2061da177e4SLinus Torvalds 		 * no waiters.
2071da177e4SLinus Torvalds 		 * The tasks queued in the wait-queue are guarded
2081da177e4SLinus Torvalds 		 * by both the lock in the wait-queue-head and by
2091da177e4SLinus Torvalds 		 * the kmap_lock.  As the kmap_lock is held here,
2101da177e4SLinus Torvalds 		 * no need for the wait-queue-head's lock.  Simply
2111da177e4SLinus Torvalds 		 * test if the queue is empty.
2121da177e4SLinus Torvalds 		 */
2131da177e4SLinus Torvalds 		need_wakeup = waitqueue_active(&pkmap_map_wait);
2141da177e4SLinus Torvalds 	}
2151da177e4SLinus Torvalds 	spin_unlock(&kmap_lock);
2161da177e4SLinus Torvalds 
2171da177e4SLinus Torvalds 	/* do wake-up, if needed, race-free outside of the spin lock */
2181da177e4SLinus Torvalds 	if (need_wakeup)
2191da177e4SLinus Torvalds 		wake_up(&pkmap_map_wait);
2201da177e4SLinus Torvalds }
2211da177e4SLinus Torvalds 
2221da177e4SLinus Torvalds EXPORT_SYMBOL(kunmap_high);
2231da177e4SLinus Torvalds #endif
2241da177e4SLinus Torvalds 
2251da177e4SLinus Torvalds #if defined(HASHED_PAGE_VIRTUAL)
2261da177e4SLinus Torvalds 
2271da177e4SLinus Torvalds #define PA_HASH_ORDER	7
2281da177e4SLinus Torvalds 
2291da177e4SLinus Torvalds /*
2301da177e4SLinus Torvalds  * Describes one page->virtual association
2311da177e4SLinus Torvalds  */
2321da177e4SLinus Torvalds struct page_address_map {
2331da177e4SLinus Torvalds 	struct page *page;
2341da177e4SLinus Torvalds 	void *virtual;
2351da177e4SLinus Torvalds 	struct list_head list;
2361da177e4SLinus Torvalds };
2371da177e4SLinus Torvalds 
2381da177e4SLinus Torvalds /*
2391da177e4SLinus Torvalds  * page_address_map freelist, allocated from page_address_maps.
2401da177e4SLinus Torvalds  */
2411da177e4SLinus Torvalds static struct list_head page_address_pool;	/* freelist */
2421da177e4SLinus Torvalds static spinlock_t pool_lock;			/* protects page_address_pool */
2431da177e4SLinus Torvalds 
2441da177e4SLinus Torvalds /*
2451da177e4SLinus Torvalds  * Hash table bucket
2461da177e4SLinus Torvalds  */
2471da177e4SLinus Torvalds static struct page_address_slot {
2481da177e4SLinus Torvalds 	struct list_head lh;			/* List of page_address_maps */
2491da177e4SLinus Torvalds 	spinlock_t lock;			/* Protect this bucket's list */
2501da177e4SLinus Torvalds } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];
2511da177e4SLinus Torvalds 
2521da177e4SLinus Torvalds static struct page_address_slot *page_slot(struct page *page)
2531da177e4SLinus Torvalds {
2541da177e4SLinus Torvalds 	return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
2551da177e4SLinus Torvalds }
2561da177e4SLinus Torvalds 
2571da177e4SLinus Torvalds void *page_address(struct page *page)
2581da177e4SLinus Torvalds {
2591da177e4SLinus Torvalds 	unsigned long flags;
2601da177e4SLinus Torvalds 	void *ret;
2611da177e4SLinus Torvalds 	struct page_address_slot *pas;
2621da177e4SLinus Torvalds 
2631da177e4SLinus Torvalds 	if (!PageHighMem(page))
2641da177e4SLinus Torvalds 		return lowmem_page_address(page);
2651da177e4SLinus Torvalds 
2661da177e4SLinus Torvalds 	pas = page_slot(page);
2671da177e4SLinus Torvalds 	ret = NULL;
2681da177e4SLinus Torvalds 	spin_lock_irqsave(&pas->lock, flags);
2691da177e4SLinus Torvalds 	if (!list_empty(&pas->lh)) {
2701da177e4SLinus Torvalds 		struct page_address_map *pam;
2711da177e4SLinus Torvalds 
2721da177e4SLinus Torvalds 		list_for_each_entry(pam, &pas->lh, list) {
2731da177e4SLinus Torvalds 			if (pam->page == page) {
2741da177e4SLinus Torvalds 				ret = pam->virtual;
2751da177e4SLinus Torvalds 				goto done;
2761da177e4SLinus Torvalds 			}
2771da177e4SLinus Torvalds 		}
2781da177e4SLinus Torvalds 	}
2791da177e4SLinus Torvalds done:
2801da177e4SLinus Torvalds 	spin_unlock_irqrestore(&pas->lock, flags);
2811da177e4SLinus Torvalds 	return ret;
2821da177e4SLinus Torvalds }
2831da177e4SLinus Torvalds 
2841da177e4SLinus Torvalds EXPORT_SYMBOL(page_address);
2851da177e4SLinus Torvalds 
2861da177e4SLinus Torvalds void set_page_address(struct page *page, void *virtual)
2871da177e4SLinus Torvalds {
2881da177e4SLinus Torvalds 	unsigned long flags;
2891da177e4SLinus Torvalds 	struct page_address_slot *pas;
2901da177e4SLinus Torvalds 	struct page_address_map *pam;
2911da177e4SLinus Torvalds 
2921da177e4SLinus Torvalds 	BUG_ON(!PageHighMem(page));
2931da177e4SLinus Torvalds 
2941da177e4SLinus Torvalds 	pas = page_slot(page);
2951da177e4SLinus Torvalds 	if (virtual) {		/* Add */
2961da177e4SLinus Torvalds 		BUG_ON(list_empty(&page_address_pool));
2971da177e4SLinus Torvalds 
2981da177e4SLinus Torvalds 		spin_lock_irqsave(&pool_lock, flags);
2991da177e4SLinus Torvalds 		pam = list_entry(page_address_pool.next,
3001da177e4SLinus Torvalds 				struct page_address_map, list);
3011da177e4SLinus Torvalds 		list_del(&pam->list);
3021da177e4SLinus Torvalds 		spin_unlock_irqrestore(&pool_lock, flags);
3031da177e4SLinus Torvalds 
3041da177e4SLinus Torvalds 		pam->page = page;
3051da177e4SLinus Torvalds 		pam->virtual = virtual;
3061da177e4SLinus Torvalds 
3071da177e4SLinus Torvalds 		spin_lock_irqsave(&pas->lock, flags);
3081da177e4SLinus Torvalds 		list_add_tail(&pam->list, &pas->lh);
3091da177e4SLinus Torvalds 		spin_unlock_irqrestore(&pas->lock, flags);
3101da177e4SLinus Torvalds 	} else {		/* Remove */
3111da177e4SLinus Torvalds 		spin_lock_irqsave(&pas->lock, flags);
3121da177e4SLinus Torvalds 		list_for_each_entry(pam, &pas->lh, list) {
3131da177e4SLinus Torvalds 			if (pam->page == page) {
3141da177e4SLinus Torvalds 				list_del(&pam->list);
3151da177e4SLinus Torvalds 				spin_unlock_irqrestore(&pas->lock, flags);
3161da177e4SLinus Torvalds 				spin_lock_irqsave(&pool_lock, flags);
3171da177e4SLinus Torvalds 				list_add_tail(&pam->list, &page_address_pool);
3181da177e4SLinus Torvalds 				spin_unlock_irqrestore(&pool_lock, flags);
3191da177e4SLinus Torvalds 				goto done;
3201da177e4SLinus Torvalds 			}
3211da177e4SLinus Torvalds 		}
3221da177e4SLinus Torvalds 		spin_unlock_irqrestore(&pas->lock, flags);
3231da177e4SLinus Torvalds 	}
3241da177e4SLinus Torvalds done:
3251da177e4SLinus Torvalds 	return;
3261da177e4SLinus Torvalds }
3271da177e4SLinus Torvalds 
3281da177e4SLinus Torvalds static struct page_address_map page_address_maps[LAST_PKMAP];
3291da177e4SLinus Torvalds 
3301da177e4SLinus Torvalds void __init page_address_init(void)
3311da177e4SLinus Torvalds {
3321da177e4SLinus Torvalds 	int i;
3331da177e4SLinus Torvalds 
3341da177e4SLinus Torvalds 	INIT_LIST_HEAD(&page_address_pool);
3351da177e4SLinus Torvalds 	for (i = 0; i < ARRAY_SIZE(page_address_maps); i++)
3361da177e4SLinus Torvalds 		list_add(&page_address_maps[i].list, &page_address_pool);
3371da177e4SLinus Torvalds 	for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
3381da177e4SLinus Torvalds 		INIT_LIST_HEAD(&page_address_htable[i].lh);
3391da177e4SLinus Torvalds 		spin_lock_init(&page_address_htable[i].lock);
3401da177e4SLinus Torvalds 	}
3411da177e4SLinus Torvalds 	spin_lock_init(&pool_lock);
3421da177e4SLinus Torvalds }
3431da177e4SLinus Torvalds 
3441da177e4SLinus Torvalds #endif	/* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
345