1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/systm.h> 28 #include <sys/archsystm.h> 29 #include <sys/machsystm.h> 30 #include <sys/t_lock.h> 31 #include <sys/vmem.h> 32 #include <sys/mman.h> 33 #include <sys/vm.h> 34 #include <sys/cpu.h> 35 #include <sys/cmn_err.h> 36 #include <sys/cpuvar.h> 37 #include <sys/atomic.h> 38 #include <vm/as.h> 39 #include <vm/hat.h> 40 #include <vm/as.h> 41 #include <vm/page.h> 42 #include <vm/seg.h> 43 #include <vm/seg_kmem.h> 44 #include <vm/seg_kpm.h> 45 #include <vm/hat_sfmmu.h> 46 #include <sys/debug.h> 47 #include <sys/cpu_module.h> 48 #include <sys/mem_cage.h> 49 50 /* 51 * A quick way to generate a cache consistent address to map in a page. 52 * users: ppcopy, pagezero, /proc, dev/mem 53 * 54 * The ppmapin/ppmapout routines provide a quick way of generating a cache 55 * consistent address by reserving a given amount of kernel address space. 56 * The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided 57 * into x number of sets, where x is the number of colors for the virtual 58 * cache. The number of colors is how many times a page can be mapped 59 * simulatenously in the cache. For direct map caches this translates to 60 * the number of pages in the cache. 61 * Each set will be assigned a group of virtual pages from the reserved memory 62 * depending on its virtual color. 63 * When trying to assign a virtual address we will find out the color for the 64 * physical page in question (if applicable). Then we will try to find an 65 * available virtual page from the set of the appropiate color. 66 */ 67 68 #define clsettoarray(color, set) ((color * nsets) + set) 69 70 int pp_slots = 4; /* small default, tuned by cpu module */ 71 72 /* tuned by cpu module, default is "safe" */ 73 int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE; 74 75 static caddr_t ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE]; 76 static int nsets; /* number of sets */ 77 static int ppmap_pages; /* generate align mask */ 78 static int ppmap_shift; /* set selector */ 79 80 #ifdef PPDEBUG 81 #define MAXCOLORS 16 /* for debug only */ 82 static int ppalloc_noslot = 0; /* # of allocations from kernelmap */ 83 static int align_hits[MAXCOLORS]; 84 static int pp_allocs; /* # of ppmapin requests */ 85 #endif /* PPDEBUG */ 86 87 /* 88 * There are only 64 TLB entries on spitfire, 16 on cheetah 89 * (fully-associative TLB) so we allow the cpu module to tune the 90 * number to use here via pp_slots. 91 */ 92 static struct ppmap_va { 93 caddr_t ppmap_slots[MAXPP_SLOTS]; 94 } ppmap_va[NCPU]; 95 96 void 97 ppmapinit(void) 98 { 99 int color, nset, setsize; 100 caddr_t va; 101 102 ASSERT(pp_slots <= MAXPP_SLOTS); 103 104 va = (caddr_t)PPMAPBASE; 105 if (cache & CACHE_VAC) { 106 int a; 107 108 ppmap_pages = mmu_btop(shm_alignment); 109 nsets = PPMAPSIZE / shm_alignment; 110 setsize = shm_alignment; 111 ppmap_shift = MMU_PAGESHIFT; 112 a = ppmap_pages; 113 while (a >>= 1) 114 ppmap_shift++; 115 } else { 116 /* 117 * If we do not have a virtual indexed cache we simply 118 * have only one set containing all pages. 119 */ 120 ppmap_pages = 1; 121 nsets = mmu_btop(PPMAPSIZE); 122 setsize = MMU_PAGESIZE; 123 ppmap_shift = MMU_PAGESHIFT; 124 } 125 for (color = 0; color < ppmap_pages; color++) { 126 for (nset = 0; nset < nsets; nset++) { 127 ppmap_vaddrs[clsettoarray(color, nset)] = 128 (caddr_t)((uintptr_t)va + (nset * setsize)); 129 } 130 va += MMU_PAGESIZE; 131 } 132 } 133 134 /* 135 * Allocate a cache consistent virtual address to map a page, pp, 136 * with protection, vprot; and map it in the MMU, using the most 137 * efficient means possible. The argument avoid is a virtual address 138 * hint which when masked yields an offset into a virtual cache 139 * that should be avoided when allocating an address to map in a 140 * page. An avoid arg of -1 means you don't care, for instance pagezero. 141 * 142 * machine dependent, depends on virtual address space layout, 143 * understands that all kernel addresses have bit 31 set. 144 * 145 * NOTE: For sun4 platforms the meaning of the hint argument is opposite from 146 * that found in other architectures. In other architectures the hint 147 * (called avoid) was used to ask ppmapin to NOT use the specified cache color. 148 * This was used to avoid virtual cache trashing in the bcopy. Unfortunately 149 * in the case of a COW, this later on caused a cache aliasing conflict. In 150 * sun4, the bcopy routine uses the block ld/st instructions so we don't have 151 * to worry about virtual cache trashing. Actually, by using the hint to choose 152 * the right color we can almost guarantee a cache conflict will not occur. 153 */ 154 155 caddr_t 156 ppmapin(page_t *pp, uint_t vprot, caddr_t hint) 157 { 158 int color, nset, index, start; 159 caddr_t va; 160 161 #ifdef PPDEBUG 162 pp_allocs++; 163 #endif /* PPDEBUG */ 164 if (cache & CACHE_VAC) { 165 color = sfmmu_get_ppvcolor(pp); 166 if (color == -1) { 167 if ((intptr_t)hint != -1L) { 168 color = addr_to_vcolor(hint); 169 } else { 170 color = addr_to_vcolor(mmu_ptob(pp->p_pagenum)); 171 } 172 } 173 174 } else { 175 /* 176 * For physical caches, we can pick any address we want. 177 */ 178 color = 0; 179 } 180 181 start = color; 182 do { 183 for (nset = 0; nset < nsets; nset++) { 184 index = clsettoarray(color, nset); 185 va = ppmap_vaddrs[index]; 186 if (va != NULL) { 187 #ifdef PPDEBUG 188 align_hits[color]++; 189 #endif /* PPDEBUG */ 190 if (atomic_cas_ptr(&ppmap_vaddrs[index], 191 va, NULL) == va) { 192 hat_memload(kas.a_hat, va, pp, 193 vprot | HAT_NOSYNC, 194 HAT_LOAD_LOCK); 195 return (va); 196 } 197 } 198 } 199 /* 200 * first pick didn't succeed, try another 201 */ 202 if (++color == ppmap_pages) 203 color = 0; 204 } while (color != start); 205 206 #ifdef PPDEBUG 207 ppalloc_noslot++; 208 #endif /* PPDEBUG */ 209 210 /* 211 * No free slots; get a random one from the kernel heap area. 212 */ 213 va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 214 215 hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK); 216 217 return (va); 218 219 } 220 221 void 222 ppmapout(caddr_t va) 223 { 224 int color, nset, index; 225 226 if (va >= kernelheap && va < ekernelheap) { 227 /* 228 * Space came from kernelmap, flush the page and 229 * return the space. 230 */ 231 hat_unload(kas.a_hat, va, PAGESIZE, 232 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 233 vmem_free(heap_arena, va, PAGESIZE); 234 } else { 235 /* 236 * Space came from ppmap_vaddrs[], give it back. 237 */ 238 color = addr_to_vcolor(va); 239 ASSERT((cache & CACHE_VAC)? (color < ppmap_pages) : 1); 240 241 nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1); 242 index = clsettoarray(color, nset); 243 hat_unload(kas.a_hat, va, PAGESIZE, 244 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 245 246 ASSERT(ppmap_vaddrs[index] == NULL); 247 ppmap_vaddrs[index] = va; 248 } 249 } 250 251 #ifdef DEBUG 252 #define PP_STAT_ADD(stat) (stat)++ 253 uint_t pload, ploadfail; 254 uint_t ppzero, ppzero_short; 255 #else 256 #define PP_STAT_ADD(stat) 257 #endif /* DEBUG */ 258 259 /* 260 * Find a slot in per CPU page copy area. Load up a locked TLB in the 261 * running cpu. We don't call hat layer to load up the tte since the 262 * mapping is only temporary. If the thread migrates it'll get a TLB 263 * miss trap and TLB/TSB miss handler will panic since there is no 264 * official hat record of this mapping. 265 */ 266 static caddr_t 267 pp_load_tlb(processorid_t cpu, caddr_t **pslot, page_t *pp, uint_t prot) 268 { 269 struct ppmap_va *ppmap; 270 tte_t tte; 271 caddr_t *myslot; 272 caddr_t va; 273 long i, start, stride; 274 int vcolor; 275 uint_t flags, strict_flag; 276 277 PP_STAT_ADD(pload); 278 279 ppmap = &ppmap_va[cpu]; 280 va = (caddr_t)(PPMAP_FAST_BASE + (MMU_PAGESIZE * MAXPP_SLOTS) * cpu); 281 myslot = ppmap->ppmap_slots; 282 ASSERT(addr_to_vcolor(va) == 0); 283 284 if (prot & TTE_HWWR_INT) { 285 flags = PPAGE_STORE_VCOLORING | PPAGE_STORES_POLLUTE; 286 strict_flag = PPAGE_STORES_POLLUTE; 287 } else { 288 flags = PPAGE_LOAD_VCOLORING | PPAGE_LOADS_POLLUTE; 289 strict_flag = PPAGE_LOADS_POLLUTE; 290 } 291 292 /* 293 * If consistent handling is required then keep the current 294 * vcolor of the page. Furthermore, if loads or stores can 295 * pollute the VAC then using a "new" page (unassigned vcolor) 296 * won't work and we have to return a failure. 297 */ 298 if (pp_consistent_coloring & flags) { 299 vcolor = sfmmu_get_ppvcolor(pp); 300 if ((vcolor == -1) && 301 (pp_consistent_coloring & strict_flag)) 302 return (NULL); 303 /* else keep the current vcolor of the page */ 304 } else { 305 vcolor = -1; 306 } 307 308 if (vcolor != -1) { 309 va += MMU_PAGESIZE * vcolor; 310 start = vcolor; 311 stride = ppmap_pages; /* number of colors */ 312 myslot += vcolor; 313 } else { 314 start = 0; 315 stride = 1; 316 } 317 318 for (i = start; i < pp_slots; i += stride) { 319 if (*myslot == NULL) { 320 if (atomic_cas_ptr(myslot, NULL, va) == NULL) 321 break; 322 } 323 myslot += stride; 324 va += MMU_PAGESIZE * stride; 325 } 326 327 if (i >= pp_slots) { 328 PP_STAT_ADD(ploadfail); 329 return (NULL); 330 } 331 332 ASSERT(vcolor == -1 || addr_to_vcolor(va) == vcolor); 333 334 /* 335 * Now we have a slot we can use, make the tte. 336 */ 337 tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(pp->p_pagenum); 338 tte.tte_intlo = TTE_PFN_INTLO(pp->p_pagenum) | TTE_CP_INT | 339 TTE_CV_INT | TTE_PRIV_INT | TTE_LCK_INT | prot; 340 341 ASSERT(CPU->cpu_id == cpu); 342 sfmmu_dtlb_ld_kva(va, &tte); 343 344 *pslot = myslot; /* Return ptr to the slot we used. */ 345 346 return (va); 347 } 348 349 static void 350 pp_unload_tlb(caddr_t *pslot, caddr_t va) 351 { 352 ASSERT(*pslot == va); 353 354 vtag_flushpage(va, (uint64_t)ksfmmup); 355 *pslot = NULL; /* release the slot */ 356 } 357 358 /* 359 * Common copy routine which attempts to use hwblkpagecopy. If this routine 360 * can't be used, failure (0) will be returned. Otherwise, a PAGESIZE page 361 * will be copied and success (1) will be returned. 362 */ 363 int 364 ppcopy_common(page_t *fm_pp, page_t *to_pp) 365 { 366 caddr_t fm_va, to_va; 367 caddr_t *fm_slot, *to_slot; 368 processorid_t cpu; 369 label_t ljb; 370 int ret = 1; 371 372 ASSERT(fm_pp != NULL && PAGE_LOCKED(fm_pp)); 373 ASSERT(to_pp != NULL && PAGE_LOCKED(to_pp)); 374 375 /* 376 * If we can't use VIS block loads and stores we can't use 377 * pp_load_tlb/pp_unload_tlb due to the possibility of 378 * d$ aliasing. 379 */ 380 if (!use_hw_bcopy && (cache & CACHE_VAC)) 381 return (0); 382 383 kpreempt_disable(); 384 cpu = CPU->cpu_id; 385 fm_va = pp_load_tlb(cpu, &fm_slot, fm_pp, 0); 386 if (fm_va == NULL) { 387 kpreempt_enable(); 388 return (0); 389 } 390 to_va = pp_load_tlb(cpu, &to_slot, to_pp, TTE_HWWR_INT); 391 if (to_va == NULL) { 392 pp_unload_tlb(fm_slot, fm_va); 393 kpreempt_enable(); 394 return (0); 395 } 396 if (on_fault(&ljb)) { 397 ret = 0; 398 goto faulted; 399 } 400 hwblkpagecopy(fm_va, to_va); 401 no_fault(); 402 faulted: 403 ASSERT(CPU->cpu_id == cpu); 404 pp_unload_tlb(fm_slot, fm_va); 405 pp_unload_tlb(to_slot, to_va); 406 kpreempt_enable(); 407 return (ret); 408 } 409 410 /* 411 * Routine to copy kernel pages during relocation. It will copy one 412 * PAGESIZE page to another PAGESIZE page. This function may be called 413 * above LOCK_LEVEL so it should not grab any locks. 414 */ 415 void 416 ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp) 417 { 418 uint64_t fm_pa, to_pa; 419 size_t nbytes; 420 421 fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT; 422 to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT; 423 424 nbytes = MMU_PAGESIZE; 425 426 for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32) 427 hw_pa_bcopy32(fm_pa, to_pa); 428 } 429 430 /* 431 * Copy the data from the physical page represented by "frompp" to 432 * that represented by "topp". 433 * 434 * Try to use per cpu mapping first, if that fails then call pp_mapin 435 * to load it. 436 * 437 * Returns one on success or zero on some sort of fault while doing the copy. 438 */ 439 int 440 ppcopy(page_t *fm_pp, page_t *to_pp) 441 { 442 caddr_t fm_va, to_va; 443 label_t ljb; 444 int ret = 1; 445 boolean_t use_kpm = B_FALSE; 446 447 /* Try the fast path first */ 448 if (ppcopy_common(fm_pp, to_pp)) 449 return (1); 450 451 /* 452 * Try to map using KPM if enabled and we are the cageout thread. 453 * If it fails, fall back to ppmapin/ppmaput 454 */ 455 456 if (kpm_enable) { 457 if (curthread == kcage_cageout_thread) 458 use_kpm = B_TRUE; 459 } 460 461 if (use_kpm) { 462 if ((fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL || 463 (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) { 464 if (fm_va != NULL) 465 hat_kpm_mapout(fm_pp, NULL, fm_va); 466 use_kpm = B_FALSE; 467 } 468 } 469 470 if (use_kpm == B_FALSE) { 471 /* do the slow path */ 472 fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1); 473 to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va); 474 if (on_fault(&ljb)) { 475 ret = 0; 476 goto faulted; 477 } 478 } 479 bcopy(fm_va, to_va, PAGESIZE); 480 no_fault(); 481 faulted: 482 /* unmap */ 483 if (use_kpm == B_TRUE) { 484 hat_kpm_mapout(fm_pp, NULL, fm_va); 485 hat_kpm_mapout(to_pp, NULL, to_va); 486 } else { 487 ppmapout(fm_va); 488 ppmapout(to_va); 489 } 490 return (ret); 491 } 492 493 /* 494 * Zero the physical page from off to off + len given by `pp' 495 * without changing the reference and modified bits of page. 496 * 497 * Again, we'll try per cpu mapping first. 498 */ 499 void 500 pagezero(page_t *pp, uint_t off, uint_t len) 501 { 502 caddr_t va; 503 caddr_t *slot; 504 int fast = 1; 505 processorid_t cpu; 506 extern int hwblkclr(void *, size_t); 507 extern int use_hw_bzero; 508 509 ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE); 510 ASSERT(PAGE_LOCKED(pp)); 511 512 PP_STAT_ADD(ppzero); 513 514 if (len != MMU_PAGESIZE || !use_hw_bzero) { 515 /* 516 * Since the fast path doesn't do anything about 517 * VAC coloring, we make sure bcopy h/w will be used. 518 */ 519 fast = 0; 520 va = NULL; 521 PP_STAT_ADD(ppzero_short); 522 } 523 524 kpreempt_disable(); 525 526 if (fast) { 527 cpu = CPU->cpu_id; 528 va = pp_load_tlb(cpu, &slot, pp, TTE_HWWR_INT); 529 } 530 531 if (va == NULL) { 532 /* 533 * We are here either length != MMU_PAGESIZE or pp_load_tlb() 534 * returns NULL or use_hw_bzero is disabled. 535 */ 536 va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1); 537 fast = 0; 538 } 539 540 if (hwblkclr(va + off, len)) { 541 /* 542 * We may not have used block commit asi. 543 * So flush the I-$ manually 544 */ 545 546 ASSERT(fast == 0); 547 548 sync_icache(va + off, len); 549 } else { 550 /* 551 * We have used blk commit, and flushed the I-$. However we 552 * still may have an instruction in the pipeline. Only a flush 553 * instruction will invalidate that. 554 */ 555 doflush(va); 556 } 557 558 if (fast) { 559 ASSERT(CPU->cpu_id == cpu); 560 pp_unload_tlb(slot, va); 561 } else { 562 ppmapout(va); 563 } 564 565 kpreempt_enable(); 566 } 567