xref: /linux/mm/highmem.c (revision 2b64b2ed277ff23e785fbdb65098ee7e1252d64f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * High memory handling common code and variables.
4  *
5  * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
6  *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
7  *
8  *
9  * Redesigned the x86 32-bit VM architecture to deal with
10  * 64-bit physical space. With current x86 CPUs this
11  * means up to 64 Gigabytes physical RAM.
12  *
13  * Rewrote high memory support to move the page cache into
14  * high memory. Implemented permanent (schedulable) kmaps
15  * based on Linus' idea.
16  *
17  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
18  */
19 
20 #include <linux/mm.h>
21 #include <linux/export.h>
22 #include <linux/swap.h>
23 #include <linux/bio.h>
24 #include <linux/pagemap.h>
25 #include <linux/mempool.h>
26 #include <linux/blkdev.h>
27 #include <linux/init.h>
28 #include <linux/hash.h>
29 #include <linux/highmem.h>
30 #include <linux/kgdb.h>
31 #include <asm/tlbflush.h>
32 
33 
34 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
35 DEFINE_PER_CPU(int, __kmap_atomic_idx);
36 #endif
37 
38 /*
39  * Virtual_count is not a pure "count".
40  *  0 means that it is not mapped, and has not been mapped
41  *    since a TLB flush - it is usable.
42  *  1 means that there are no users, but it has been mapped
43  *    since the last TLB flush - so we can't use it.
44  *  n means that there are (n-1) current users of it.
45  */
46 #ifdef CONFIG_HIGHMEM
47 
48 /*
49  * Architecture with aliasing data cache may define the following family of
50  * helper functions in its asm/highmem.h to control cache color of virtual
51  * addresses where physical memory pages are mapped by kmap.
52  */
53 #ifndef get_pkmap_color
54 
55 /*
56  * Determine color of virtual address where the page should be mapped.
57  */
58 static inline unsigned int get_pkmap_color(struct page *page)
59 {
60 	return 0;
61 }
62 #define get_pkmap_color get_pkmap_color
63 
64 /*
65  * Get next index for mapping inside PKMAP region for page with given color.
66  */
67 static inline unsigned int get_next_pkmap_nr(unsigned int color)
68 {
69 	static unsigned int last_pkmap_nr;
70 
71 	last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
72 	return last_pkmap_nr;
73 }
74 
75 /*
76  * Determine if page index inside PKMAP region (pkmap_nr) of given color
77  * has wrapped around PKMAP region end. When this happens an attempt to
78  * flush all unused PKMAP slots is made.
79  */
80 static inline int no_more_pkmaps(unsigned int pkmap_nr, unsigned int color)
81 {
82 	return pkmap_nr == 0;
83 }
84 
85 /*
86  * Get the number of PKMAP entries of the given color. If no free slot is
87  * found after checking that many entries, kmap will sleep waiting for
88  * someone to call kunmap and free PKMAP slot.
89  */
90 static inline int get_pkmap_entries_count(unsigned int color)
91 {
92 	return LAST_PKMAP;
93 }
94 
95 /*
96  * Get head of a wait queue for PKMAP entries of the given color.
97  * Wait queues for different mapping colors should be independent to avoid
98  * unnecessary wakeups caused by freeing of slots of other colors.
99  */
100 static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
101 {
102 	static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
103 
104 	return &pkmap_map_wait;
105 }
106 #endif
107 
108 atomic_long_t _totalhigh_pages __read_mostly;
109 EXPORT_SYMBOL(_totalhigh_pages);
110 
111 EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
112 
113 unsigned int nr_free_highpages (void)
114 {
115 	struct zone *zone;
116 	unsigned int pages = 0;
117 
118 	for_each_populated_zone(zone) {
119 		if (is_highmem(zone))
120 			pages += zone_page_state(zone, NR_FREE_PAGES);
121 	}
122 
123 	return pages;
124 }
125 
126 static int pkmap_count[LAST_PKMAP];
127 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
128 
129 pte_t * pkmap_page_table;
130 
131 /*
132  * Most architectures have no use for kmap_high_get(), so let's abstract
133  * the disabling of IRQ out of the locking in that case to save on a
134  * potential useless overhead.
135  */
136 #ifdef ARCH_NEEDS_KMAP_HIGH_GET
137 #define lock_kmap()             spin_lock_irq(&kmap_lock)
138 #define unlock_kmap()           spin_unlock_irq(&kmap_lock)
139 #define lock_kmap_any(flags)    spin_lock_irqsave(&kmap_lock, flags)
140 #define unlock_kmap_any(flags)  spin_unlock_irqrestore(&kmap_lock, flags)
141 #else
142 #define lock_kmap()             spin_lock(&kmap_lock)
143 #define unlock_kmap()           spin_unlock(&kmap_lock)
144 #define lock_kmap_any(flags)    \
145 		do { spin_lock(&kmap_lock); (void)(flags); } while (0)
146 #define unlock_kmap_any(flags)  \
147 		do { spin_unlock(&kmap_lock); (void)(flags); } while (0)
148 #endif
149 
150 struct page *kmap_to_page(void *vaddr)
151 {
152 	unsigned long addr = (unsigned long)vaddr;
153 
154 	if (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) {
155 		int i = PKMAP_NR(addr);
156 		return pte_page(pkmap_page_table[i]);
157 	}
158 
159 	return virt_to_page(addr);
160 }
161 EXPORT_SYMBOL(kmap_to_page);
162 
163 static void flush_all_zero_pkmaps(void)
164 {
165 	int i;
166 	int need_flush = 0;
167 
168 	flush_cache_kmaps();
169 
170 	for (i = 0; i < LAST_PKMAP; i++) {
171 		struct page *page;
172 
173 		/*
174 		 * zero means we don't have anything to do,
175 		 * >1 means that it is still in use. Only
176 		 * a count of 1 means that it is free but
177 		 * needs to be unmapped
178 		 */
179 		if (pkmap_count[i] != 1)
180 			continue;
181 		pkmap_count[i] = 0;
182 
183 		/* sanity check */
184 		BUG_ON(pte_none(pkmap_page_table[i]));
185 
186 		/*
187 		 * Don't need an atomic fetch-and-clear op here;
188 		 * no-one has the page mapped, and cannot get at
189 		 * its virtual address (and hence PTE) without first
190 		 * getting the kmap_lock (which is held here).
191 		 * So no dangers, even with speculative execution.
192 		 */
193 		page = pte_page(pkmap_page_table[i]);
194 		pte_clear(&init_mm, PKMAP_ADDR(i), &pkmap_page_table[i]);
195 
196 		set_page_address(page, NULL);
197 		need_flush = 1;
198 	}
199 	if (need_flush)
200 		flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
201 }
202 
203 /**
204  * kmap_flush_unused - flush all unused kmap mappings in order to remove stray mappings
205  */
206 void kmap_flush_unused(void)
207 {
208 	lock_kmap();
209 	flush_all_zero_pkmaps();
210 	unlock_kmap();
211 }
212 
213 static inline unsigned long map_new_virtual(struct page *page)
214 {
215 	unsigned long vaddr;
216 	int count;
217 	unsigned int last_pkmap_nr;
218 	unsigned int color = get_pkmap_color(page);
219 
220 start:
221 	count = get_pkmap_entries_count(color);
222 	/* Find an empty entry */
223 	for (;;) {
224 		last_pkmap_nr = get_next_pkmap_nr(color);
225 		if (no_more_pkmaps(last_pkmap_nr, color)) {
226 			flush_all_zero_pkmaps();
227 			count = get_pkmap_entries_count(color);
228 		}
229 		if (!pkmap_count[last_pkmap_nr])
230 			break;	/* Found a usable entry */
231 		if (--count)
232 			continue;
233 
234 		/*
235 		 * Sleep for somebody else to unmap their entries
236 		 */
237 		{
238 			DECLARE_WAITQUEUE(wait, current);
239 			wait_queue_head_t *pkmap_map_wait =
240 				get_pkmap_wait_queue_head(color);
241 
242 			__set_current_state(TASK_UNINTERRUPTIBLE);
243 			add_wait_queue(pkmap_map_wait, &wait);
244 			unlock_kmap();
245 			schedule();
246 			remove_wait_queue(pkmap_map_wait, &wait);
247 			lock_kmap();
248 
249 			/* Somebody else might have mapped it while we slept */
250 			if (page_address(page))
251 				return (unsigned long)page_address(page);
252 
253 			/* Re-start */
254 			goto start;
255 		}
256 	}
257 	vaddr = PKMAP_ADDR(last_pkmap_nr);
258 	set_pte_at(&init_mm, vaddr,
259 		   &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
260 
261 	pkmap_count[last_pkmap_nr] = 1;
262 	set_page_address(page, (void *)vaddr);
263 
264 	return vaddr;
265 }
266 
267 /**
268  * kmap_high - map a highmem page into memory
269  * @page: &struct page to map
270  *
271  * Returns the page's virtual memory address.
272  *
273  * We cannot call this from interrupts, as it may block.
274  */
275 void *kmap_high(struct page *page)
276 {
277 	unsigned long vaddr;
278 
279 	/*
280 	 * For highmem pages, we can't trust "virtual" until
281 	 * after we have the lock.
282 	 */
283 	lock_kmap();
284 	vaddr = (unsigned long)page_address(page);
285 	if (!vaddr)
286 		vaddr = map_new_virtual(page);
287 	pkmap_count[PKMAP_NR(vaddr)]++;
288 	BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2);
289 	unlock_kmap();
290 	return (void*) vaddr;
291 }
292 
293 EXPORT_SYMBOL(kmap_high);
294 
295 #ifdef ARCH_NEEDS_KMAP_HIGH_GET
296 /**
297  * kmap_high_get - pin a highmem page into memory
298  * @page: &struct page to pin
299  *
300  * Returns the page's current virtual memory address, or NULL if no mapping
301  * exists.  If and only if a non null address is returned then a
302  * matching call to kunmap_high() is necessary.
303  *
304  * This can be called from any context.
305  */
306 void *kmap_high_get(struct page *page)
307 {
308 	unsigned long vaddr, flags;
309 
310 	lock_kmap_any(flags);
311 	vaddr = (unsigned long)page_address(page);
312 	if (vaddr) {
313 		BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 1);
314 		pkmap_count[PKMAP_NR(vaddr)]++;
315 	}
316 	unlock_kmap_any(flags);
317 	return (void*) vaddr;
318 }
319 #endif
320 
321 /**
322  * kunmap_high - unmap a highmem page into memory
323  * @page: &struct page to unmap
324  *
325  * If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called
326  * only from user context.
327  */
328 void kunmap_high(struct page *page)
329 {
330 	unsigned long vaddr;
331 	unsigned long nr;
332 	unsigned long flags;
333 	int need_wakeup;
334 	unsigned int color = get_pkmap_color(page);
335 	wait_queue_head_t *pkmap_map_wait;
336 
337 	lock_kmap_any(flags);
338 	vaddr = (unsigned long)page_address(page);
339 	BUG_ON(!vaddr);
340 	nr = PKMAP_NR(vaddr);
341 
342 	/*
343 	 * A count must never go down to zero
344 	 * without a TLB flush!
345 	 */
346 	need_wakeup = 0;
347 	switch (--pkmap_count[nr]) {
348 	case 0:
349 		BUG();
350 	case 1:
351 		/*
352 		 * Avoid an unnecessary wake_up() function call.
353 		 * The common case is pkmap_count[] == 1, but
354 		 * no waiters.
355 		 * The tasks queued in the wait-queue are guarded
356 		 * by both the lock in the wait-queue-head and by
357 		 * the kmap_lock.  As the kmap_lock is held here,
358 		 * no need for the wait-queue-head's lock.  Simply
359 		 * test if the queue is empty.
360 		 */
361 		pkmap_map_wait = get_pkmap_wait_queue_head(color);
362 		need_wakeup = waitqueue_active(pkmap_map_wait);
363 	}
364 	unlock_kmap_any(flags);
365 
366 	/* do wake-up, if needed, race-free outside of the spin lock */
367 	if (need_wakeup)
368 		wake_up(pkmap_map_wait);
369 }
370 
371 EXPORT_SYMBOL(kunmap_high);
372 #endif
373 
374 #if defined(HASHED_PAGE_VIRTUAL)
375 
376 #define PA_HASH_ORDER	7
377 
378 /*
379  * Describes one page->virtual association
380  */
381 struct page_address_map {
382 	struct page *page;
383 	void *virtual;
384 	struct list_head list;
385 };
386 
387 static struct page_address_map page_address_maps[LAST_PKMAP];
388 
389 /*
390  * Hash table bucket
391  */
392 static struct page_address_slot {
393 	struct list_head lh;			/* List of page_address_maps */
394 	spinlock_t lock;			/* Protect this bucket's list */
395 } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];
396 
397 static struct page_address_slot *page_slot(const struct page *page)
398 {
399 	return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
400 }
401 
402 /**
403  * page_address - get the mapped virtual address of a page
404  * @page: &struct page to get the virtual address of
405  *
406  * Returns the page's virtual address.
407  */
408 void *page_address(const struct page *page)
409 {
410 	unsigned long flags;
411 	void *ret;
412 	struct page_address_slot *pas;
413 
414 	if (!PageHighMem(page))
415 		return lowmem_page_address(page);
416 
417 	pas = page_slot(page);
418 	ret = NULL;
419 	spin_lock_irqsave(&pas->lock, flags);
420 	if (!list_empty(&pas->lh)) {
421 		struct page_address_map *pam;
422 
423 		list_for_each_entry(pam, &pas->lh, list) {
424 			if (pam->page == page) {
425 				ret = pam->virtual;
426 				goto done;
427 			}
428 		}
429 	}
430 done:
431 	spin_unlock_irqrestore(&pas->lock, flags);
432 	return ret;
433 }
434 
435 EXPORT_SYMBOL(page_address);
436 
437 /**
438  * set_page_address - set a page's virtual address
439  * @page: &struct page to set
440  * @virtual: virtual address to use
441  */
442 void set_page_address(struct page *page, void *virtual)
443 {
444 	unsigned long flags;
445 	struct page_address_slot *pas;
446 	struct page_address_map *pam;
447 
448 	BUG_ON(!PageHighMem(page));
449 
450 	pas = page_slot(page);
451 	if (virtual) {		/* Add */
452 		pam = &page_address_maps[PKMAP_NR((unsigned long)virtual)];
453 		pam->page = page;
454 		pam->virtual = virtual;
455 
456 		spin_lock_irqsave(&pas->lock, flags);
457 		list_add_tail(&pam->list, &pas->lh);
458 		spin_unlock_irqrestore(&pas->lock, flags);
459 	} else {		/* Remove */
460 		spin_lock_irqsave(&pas->lock, flags);
461 		list_for_each_entry(pam, &pas->lh, list) {
462 			if (pam->page == page) {
463 				list_del(&pam->list);
464 				spin_unlock_irqrestore(&pas->lock, flags);
465 				goto done;
466 			}
467 		}
468 		spin_unlock_irqrestore(&pas->lock, flags);
469 	}
470 done:
471 	return;
472 }
473 
474 void __init page_address_init(void)
475 {
476 	int i;
477 
478 	for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
479 		INIT_LIST_HEAD(&page_address_htable[i].lh);
480 		spin_lock_init(&page_address_htable[i].lock);
481 	}
482 }
483 
484 #endif	/* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
485