1 /* 2 * High memory handling common code and variables. 3 * 4 * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de 5 * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de 6 * 7 * 8 * Redesigned the x86 32-bit VM architecture to deal with 9 * 64-bit physical space. With current x86 CPUs this 10 * means up to 64 Gigabytes physical RAM. 11 * 12 * Rewrote high memory support to move the page cache into 13 * high memory. Implemented permanent (schedulable) kmaps 14 * based on Linus' idea. 15 * 16 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> 17 */ 18 19 #include <linux/mm.h> 20 #include <linux/module.h> 21 #include <linux/swap.h> 22 #include <linux/bio.h> 23 #include <linux/pagemap.h> 24 #include <linux/mempool.h> 25 #include <linux/blkdev.h> 26 #include <linux/init.h> 27 #include <linux/hash.h> 28 #include <linux/highmem.h> 29 #include <asm/tlbflush.h> 30 31 /* 32 * Virtual_count is not a pure "count". 33 * 0 means that it is not mapped, and has not been mapped 34 * since a TLB flush - it is usable. 35 * 1 means that there are no users, but it has been mapped 36 * since the last TLB flush - so we can't use it. 37 * n means that there are (n-1) current users of it. 38 */ 39 #ifdef CONFIG_HIGHMEM 40 41 unsigned long totalhigh_pages __read_mostly; 42 EXPORT_SYMBOL(totalhigh_pages); 43 44 unsigned int nr_free_highpages (void) 45 { 46 pg_data_t *pgdat; 47 unsigned int pages = 0; 48 49 for_each_online_pgdat(pgdat) { 50 pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM], 51 NR_FREE_PAGES); 52 if (zone_movable_is_highmem()) 53 pages += zone_page_state( 54 &pgdat->node_zones[ZONE_MOVABLE], 55 NR_FREE_PAGES); 56 } 57 58 return pages; 59 } 60 61 static int pkmap_count[LAST_PKMAP]; 62 static unsigned int last_pkmap_nr; 63 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); 64 65 pte_t * pkmap_page_table; 66 67 static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); 68 69 /* 70 * Most architectures have no use for kmap_high_get(), so let's abstract 71 * the disabling of IRQ out of the locking in that case to save on a 72 * potential useless overhead. 73 */ 74 #ifdef ARCH_NEEDS_KMAP_HIGH_GET 75 #define lock_kmap() spin_lock_irq(&kmap_lock) 76 #define unlock_kmap() spin_unlock_irq(&kmap_lock) 77 #define lock_kmap_any(flags) spin_lock_irqsave(&kmap_lock, flags) 78 #define unlock_kmap_any(flags) spin_unlock_irqrestore(&kmap_lock, flags) 79 #else 80 #define lock_kmap() spin_lock(&kmap_lock) 81 #define unlock_kmap() spin_unlock(&kmap_lock) 82 #define lock_kmap_any(flags) \ 83 do { spin_lock(&kmap_lock); (void)(flags); } while (0) 84 #define unlock_kmap_any(flags) \ 85 do { spin_unlock(&kmap_lock); (void)(flags); } while (0) 86 #endif 87 88 static void flush_all_zero_pkmaps(void) 89 { 90 int i; 91 int need_flush = 0; 92 93 flush_cache_kmaps(); 94 95 for (i = 0; i < LAST_PKMAP; i++) { 96 struct page *page; 97 98 /* 99 * zero means we don't have anything to do, 100 * >1 means that it is still in use. Only 101 * a count of 1 means that it is free but 102 * needs to be unmapped 103 */ 104 if (pkmap_count[i] != 1) 105 continue; 106 pkmap_count[i] = 0; 107 108 /* sanity check */ 109 BUG_ON(pte_none(pkmap_page_table[i])); 110 111 /* 112 * Don't need an atomic fetch-and-clear op here; 113 * no-one has the page mapped, and cannot get at 114 * its virtual address (and hence PTE) without first 115 * getting the kmap_lock (which is held here). 116 * So no dangers, even with speculative execution. 117 */ 118 page = pte_page(pkmap_page_table[i]); 119 pte_clear(&init_mm, (unsigned long)page_address(page), 120 &pkmap_page_table[i]); 121 122 set_page_address(page, NULL); 123 need_flush = 1; 124 } 125 if (need_flush) 126 flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); 127 } 128 129 /** 130 * kmap_flush_unused - flush all unused kmap mappings in order to remove stray mappings 131 */ 132 void kmap_flush_unused(void) 133 { 134 lock_kmap(); 135 flush_all_zero_pkmaps(); 136 unlock_kmap(); 137 } 138 139 static inline unsigned long map_new_virtual(struct page *page) 140 { 141 unsigned long vaddr; 142 int count; 143 144 start: 145 count = LAST_PKMAP; 146 /* Find an empty entry */ 147 for (;;) { 148 last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; 149 if (!last_pkmap_nr) { 150 flush_all_zero_pkmaps(); 151 count = LAST_PKMAP; 152 } 153 if (!pkmap_count[last_pkmap_nr]) 154 break; /* Found a usable entry */ 155 if (--count) 156 continue; 157 158 /* 159 * Sleep for somebody else to unmap their entries 160 */ 161 { 162 DECLARE_WAITQUEUE(wait, current); 163 164 __set_current_state(TASK_UNINTERRUPTIBLE); 165 add_wait_queue(&pkmap_map_wait, &wait); 166 unlock_kmap(); 167 schedule(); 168 remove_wait_queue(&pkmap_map_wait, &wait); 169 lock_kmap(); 170 171 /* Somebody else might have mapped it while we slept */ 172 if (page_address(page)) 173 return (unsigned long)page_address(page); 174 175 /* Re-start */ 176 goto start; 177 } 178 } 179 vaddr = PKMAP_ADDR(last_pkmap_nr); 180 set_pte_at(&init_mm, vaddr, 181 &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); 182 183 pkmap_count[last_pkmap_nr] = 1; 184 set_page_address(page, (void *)vaddr); 185 186 return vaddr; 187 } 188 189 /** 190 * kmap_high - map a highmem page into memory 191 * @page: &struct page to map 192 * 193 * Returns the page's virtual memory address. 194 * 195 * We cannot call this from interrupts, as it may block. 196 */ 197 void *kmap_high(struct page *page) 198 { 199 unsigned long vaddr; 200 201 /* 202 * For highmem pages, we can't trust "virtual" until 203 * after we have the lock. 204 */ 205 lock_kmap(); 206 vaddr = (unsigned long)page_address(page); 207 if (!vaddr) 208 vaddr = map_new_virtual(page); 209 pkmap_count[PKMAP_NR(vaddr)]++; 210 BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2); 211 unlock_kmap(); 212 return (void*) vaddr; 213 } 214 215 EXPORT_SYMBOL(kmap_high); 216 217 #ifdef ARCH_NEEDS_KMAP_HIGH_GET 218 /** 219 * kmap_high_get - pin a highmem page into memory 220 * @page: &struct page to pin 221 * 222 * Returns the page's current virtual memory address, or NULL if no mapping 223 * exists. When and only when a non null address is returned then a 224 * matching call to kunmap_high() is necessary. 225 * 226 * This can be called from any context. 227 */ 228 void *kmap_high_get(struct page *page) 229 { 230 unsigned long vaddr, flags; 231 232 lock_kmap_any(flags); 233 vaddr = (unsigned long)page_address(page); 234 if (vaddr) { 235 BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 1); 236 pkmap_count[PKMAP_NR(vaddr)]++; 237 } 238 unlock_kmap_any(flags); 239 return (void*) vaddr; 240 } 241 #endif 242 243 /** 244 * kunmap_high - map a highmem page into memory 245 * @page: &struct page to unmap 246 * 247 * If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called 248 * only from user context. 249 */ 250 void kunmap_high(struct page *page) 251 { 252 unsigned long vaddr; 253 unsigned long nr; 254 unsigned long flags; 255 int need_wakeup; 256 257 lock_kmap_any(flags); 258 vaddr = (unsigned long)page_address(page); 259 BUG_ON(!vaddr); 260 nr = PKMAP_NR(vaddr); 261 262 /* 263 * A count must never go down to zero 264 * without a TLB flush! 265 */ 266 need_wakeup = 0; 267 switch (--pkmap_count[nr]) { 268 case 0: 269 BUG(); 270 case 1: 271 /* 272 * Avoid an unnecessary wake_up() function call. 273 * The common case is pkmap_count[] == 1, but 274 * no waiters. 275 * The tasks queued in the wait-queue are guarded 276 * by both the lock in the wait-queue-head and by 277 * the kmap_lock. As the kmap_lock is held here, 278 * no need for the wait-queue-head's lock. Simply 279 * test if the queue is empty. 280 */ 281 need_wakeup = waitqueue_active(&pkmap_map_wait); 282 } 283 unlock_kmap_any(flags); 284 285 /* do wake-up, if needed, race-free outside of the spin lock */ 286 if (need_wakeup) 287 wake_up(&pkmap_map_wait); 288 } 289 290 EXPORT_SYMBOL(kunmap_high); 291 #endif 292 293 #if defined(HASHED_PAGE_VIRTUAL) 294 295 #define PA_HASH_ORDER 7 296 297 /* 298 * Describes one page->virtual association 299 */ 300 struct page_address_map { 301 struct page *page; 302 void *virtual; 303 struct list_head list; 304 }; 305 306 /* 307 * page_address_map freelist, allocated from page_address_maps. 308 */ 309 static struct list_head page_address_pool; /* freelist */ 310 static spinlock_t pool_lock; /* protects page_address_pool */ 311 312 /* 313 * Hash table bucket 314 */ 315 static struct page_address_slot { 316 struct list_head lh; /* List of page_address_maps */ 317 spinlock_t lock; /* Protect this bucket's list */ 318 } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER]; 319 320 static struct page_address_slot *page_slot(struct page *page) 321 { 322 return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)]; 323 } 324 325 /** 326 * page_address - get the mapped virtual address of a page 327 * @page: &struct page to get the virtual address of 328 * 329 * Returns the page's virtual address. 330 */ 331 void *page_address(struct page *page) 332 { 333 unsigned long flags; 334 void *ret; 335 struct page_address_slot *pas; 336 337 if (!PageHighMem(page)) 338 return lowmem_page_address(page); 339 340 pas = page_slot(page); 341 ret = NULL; 342 spin_lock_irqsave(&pas->lock, flags); 343 if (!list_empty(&pas->lh)) { 344 struct page_address_map *pam; 345 346 list_for_each_entry(pam, &pas->lh, list) { 347 if (pam->page == page) { 348 ret = pam->virtual; 349 goto done; 350 } 351 } 352 } 353 done: 354 spin_unlock_irqrestore(&pas->lock, flags); 355 return ret; 356 } 357 358 EXPORT_SYMBOL(page_address); 359 360 /** 361 * set_page_address - set a page's virtual address 362 * @page: &struct page to set 363 * @virtual: virtual address to use 364 */ 365 void set_page_address(struct page *page, void *virtual) 366 { 367 unsigned long flags; 368 struct page_address_slot *pas; 369 struct page_address_map *pam; 370 371 BUG_ON(!PageHighMem(page)); 372 373 pas = page_slot(page); 374 if (virtual) { /* Add */ 375 BUG_ON(list_empty(&page_address_pool)); 376 377 spin_lock_irqsave(&pool_lock, flags); 378 pam = list_entry(page_address_pool.next, 379 struct page_address_map, list); 380 list_del(&pam->list); 381 spin_unlock_irqrestore(&pool_lock, flags); 382 383 pam->page = page; 384 pam->virtual = virtual; 385 386 spin_lock_irqsave(&pas->lock, flags); 387 list_add_tail(&pam->list, &pas->lh); 388 spin_unlock_irqrestore(&pas->lock, flags); 389 } else { /* Remove */ 390 spin_lock_irqsave(&pas->lock, flags); 391 list_for_each_entry(pam, &pas->lh, list) { 392 if (pam->page == page) { 393 list_del(&pam->list); 394 spin_unlock_irqrestore(&pas->lock, flags); 395 spin_lock_irqsave(&pool_lock, flags); 396 list_add_tail(&pam->list, &page_address_pool); 397 spin_unlock_irqrestore(&pool_lock, flags); 398 goto done; 399 } 400 } 401 spin_unlock_irqrestore(&pas->lock, flags); 402 } 403 done: 404 return; 405 } 406 407 static struct page_address_map page_address_maps[LAST_PKMAP]; 408 409 void __init page_address_init(void) 410 { 411 int i; 412 413 INIT_LIST_HEAD(&page_address_pool); 414 for (i = 0; i < ARRAY_SIZE(page_address_maps); i++) 415 list_add(&page_address_maps[i].list, &page_address_pool); 416 for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) { 417 INIT_LIST_HEAD(&page_address_htable[i].lh); 418 spin_lock_init(&page_address_htable[i].lock); 419 } 420 spin_lock_init(&pool_lock); 421 } 422 423 #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */ 424 425 #if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT) 426 427 void debug_kmap_atomic(enum km_type type) 428 { 429 static int warn_count = 10; 430 431 if (unlikely(warn_count < 0)) 432 return; 433 434 if (unlikely(in_interrupt())) { 435 if (in_nmi()) { 436 if (type != KM_NMI && type != KM_NMI_PTE) { 437 WARN_ON(1); 438 warn_count--; 439 } 440 } else if (in_irq()) { 441 if (type != KM_IRQ0 && type != KM_IRQ1 && 442 type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && 443 type != KM_BOUNCE_READ && type != KM_IRQ_PTE) { 444 WARN_ON(1); 445 warn_count--; 446 } 447 } else if (!irqs_disabled()) { /* softirq */ 448 if (type != KM_IRQ0 && type != KM_IRQ1 && 449 type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 && 450 type != KM_SKB_SUNRPC_DATA && 451 type != KM_SKB_DATA_SOFTIRQ && 452 type != KM_BOUNCE_READ) { 453 WARN_ON(1); 454 warn_count--; 455 } 456 } 457 } 458 459 if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || 460 type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ || 461 type == KM_IRQ_PTE || type == KM_NMI || 462 type == KM_NMI_PTE ) { 463 if (!irqs_disabled()) { 464 WARN_ON(1); 465 warn_count--; 466 } 467 } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) { 468 if (irq_count() == 0 && !irqs_disabled()) { 469 WARN_ON(1); 470 warn_count--; 471 } 472 } 473 } 474 475 #endif 476