1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/archsystm.h> 31 #include <sys/machsystm.h> 32 #include <sys/t_lock.h> 33 #include <sys/vmem.h> 34 #include <sys/mman.h> 35 #include <sys/vm.h> 36 #include <sys/cpu.h> 37 #include <sys/cmn_err.h> 38 #include <sys/cpuvar.h> 39 #include <sys/atomic.h> 40 #include <vm/as.h> 41 #include <vm/hat.h> 42 #include <vm/as.h> 43 #include <vm/page.h> 44 #include <vm/seg.h> 45 #include <vm/seg_kmem.h> 46 #include <vm/seg_kpm.h> 47 #include <vm/hat_sfmmu.h> 48 #include <sys/debug.h> 49 #include <sys/cpu_module.h> 50 #include <sys/mem_cage.h> 51 52 /* 53 * A quick way to generate a cache consistent address to map in a page. 54 * users: ppcopy, pagezero, /proc, dev/mem 55 * 56 * The ppmapin/ppmapout routines provide a quick way of generating a cache 57 * consistent address by reserving a given amount of kernel address space. 58 * The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided 59 * into x number of sets, where x is the number of colors for the virtual 60 * cache. The number of colors is how many times a page can be mapped 61 * simulatenously in the cache. For direct map caches this translates to 62 * the number of pages in the cache. 63 * Each set will be assigned a group of virtual pages from the reserved memory 64 * depending on its virtual color. 65 * When trying to assign a virtual address we will find out the color for the 66 * physical page in question (if applicable). Then we will try to find an 67 * available virtual page from the set of the appropiate color. 68 */ 69 70 #define clsettoarray(color, set) ((color * nsets) + set) 71 72 int pp_slots = 4; /* small default, tuned by cpu module */ 73 74 /* tuned by cpu module, default is "safe" */ 75 int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE; 76 77 static caddr_t ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE]; 78 static int nsets; /* number of sets */ 79 static int ppmap_pages; /* generate align mask */ 80 static int ppmap_shift; /* set selector */ 81 82 #ifdef PPDEBUG 83 #define MAXCOLORS 16 /* for debug only */ 84 static int ppalloc_noslot = 0; /* # of allocations from kernelmap */ 85 static int align_hits[MAXCOLORS]; 86 static int pp_allocs; /* # of ppmapin requests */ 87 #endif /* PPDEBUG */ 88 89 /* 90 * There are only 64 TLB entries on spitfire, 16 on cheetah 91 * (fully-associative TLB) so we allow the cpu module to tune the 92 * number to use here via pp_slots. 93 */ 94 static struct ppmap_va { 95 caddr_t ppmap_slots[MAXPP_SLOTS]; 96 } ppmap_va[NCPU]; 97 98 void 99 ppmapinit(void) 100 { 101 int color, nset, setsize; 102 caddr_t va; 103 104 ASSERT(pp_slots <= MAXPP_SLOTS); 105 106 va = (caddr_t)PPMAPBASE; 107 if (cache & CACHE_VAC) { 108 int a; 109 110 ppmap_pages = mmu_btop(shm_alignment); 111 nsets = PPMAPSIZE / shm_alignment; 112 setsize = shm_alignment; 113 ppmap_shift = MMU_PAGESHIFT; 114 a = ppmap_pages; 115 while (a >>= 1) 116 ppmap_shift++; 117 } else { 118 /* 119 * If we do not have a virtual indexed cache we simply 120 * have only one set containing all pages. 121 */ 122 ppmap_pages = 1; 123 nsets = mmu_btop(PPMAPSIZE); 124 setsize = MMU_PAGESIZE; 125 ppmap_shift = MMU_PAGESHIFT; 126 } 127 for (color = 0; color < ppmap_pages; color++) { 128 for (nset = 0; nset < nsets; nset++) { 129 ppmap_vaddrs[clsettoarray(color, nset)] = 130 (caddr_t)((uintptr_t)va + (nset * setsize)); 131 } 132 va += MMU_PAGESIZE; 133 } 134 } 135 136 /* 137 * Allocate a cache consistent virtual address to map a page, pp, 138 * with protection, vprot; and map it in the MMU, using the most 139 * efficient means possible. The argument avoid is a virtual address 140 * hint which when masked yields an offset into a virtual cache 141 * that should be avoided when allocating an address to map in a 142 * page. An avoid arg of -1 means you don't care, for instance pagezero. 143 * 144 * machine dependent, depends on virtual address space layout, 145 * understands that all kernel addresses have bit 31 set. 146 * 147 * NOTE: For sun4 platforms the meaning of the hint argument is opposite from 148 * that found in other architectures. In other architectures the hint 149 * (called avoid) was used to ask ppmapin to NOT use the specified cache color. 150 * This was used to avoid virtual cache trashing in the bcopy. Unfortunately 151 * in the case of a COW, this later on caused a cache aliasing conflict. In 152 * sun4, the bcopy routine uses the block ld/st instructions so we don't have 153 * to worry about virtual cache trashing. Actually, by using the hint to choose 154 * the right color we can almost guarantee a cache conflict will not occur. 155 */ 156 157 caddr_t 158 ppmapin(page_t *pp, uint_t vprot, caddr_t hint) 159 { 160 int color, nset, index, start; 161 caddr_t va; 162 163 #ifdef PPDEBUG 164 pp_allocs++; 165 #endif /* PPDEBUG */ 166 if (cache & CACHE_VAC) { 167 color = sfmmu_get_ppvcolor(pp); 168 if (color == -1) { 169 if ((intptr_t)hint != -1L) { 170 color = addr_to_vcolor(hint); 171 } else { 172 color = addr_to_vcolor(mmu_ptob(pp->p_pagenum)); 173 } 174 } 175 176 } else { 177 /* 178 * For physical caches, we can pick any address we want. 179 */ 180 color = 0; 181 } 182 183 start = color; 184 do { 185 for (nset = 0; nset < nsets; nset++) { 186 index = clsettoarray(color, nset); 187 va = ppmap_vaddrs[index]; 188 if (va != NULL) { 189 #ifdef PPDEBUG 190 align_hits[color]++; 191 #endif /* PPDEBUG */ 192 if (casptr(&ppmap_vaddrs[index], 193 va, NULL) == va) { 194 hat_memload(kas.a_hat, va, pp, 195 vprot | HAT_NOSYNC, 196 HAT_LOAD_LOCK); 197 return (va); 198 } 199 } 200 } 201 /* 202 * first pick didn't succeed, try another 203 */ 204 if (++color == ppmap_pages) 205 color = 0; 206 } while (color != start); 207 208 #ifdef PPDEBUG 209 ppalloc_noslot++; 210 #endif /* PPDEBUG */ 211 212 /* 213 * No free slots; get a random one from the kernel heap area. 214 */ 215 va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 216 217 hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK); 218 219 return (va); 220 221 } 222 223 void 224 ppmapout(caddr_t va) 225 { 226 int color, nset, index; 227 228 if (va >= kernelheap && va < ekernelheap) { 229 /* 230 * Space came from kernelmap, flush the page and 231 * return the space. 232 */ 233 hat_unload(kas.a_hat, va, PAGESIZE, 234 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 235 vmem_free(heap_arena, va, PAGESIZE); 236 } else { 237 /* 238 * Space came from ppmap_vaddrs[], give it back. 239 */ 240 color = addr_to_vcolor(va); 241 ASSERT((cache & CACHE_VAC)? (color < ppmap_pages) : 1); 242 243 nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1); 244 index = clsettoarray(color, nset); 245 hat_unload(kas.a_hat, va, PAGESIZE, 246 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 247 248 ASSERT(ppmap_vaddrs[index] == NULL); 249 ppmap_vaddrs[index] = va; 250 } 251 } 252 253 #ifdef DEBUG 254 #define PP_STAT_ADD(stat) (stat)++ 255 uint_t pload, ploadfail; 256 uint_t ppzero, ppzero_short; 257 #else 258 #define PP_STAT_ADD(stat) 259 #endif /* DEBUG */ 260 261 /* 262 * Find a slot in per CPU page copy area. Load up a locked TLB in the 263 * running cpu. We don't call hat layer to load up the tte since the 264 * mapping is only temporary. If the thread migrates it'll get a TLB 265 * miss trap and TLB/TSB miss handler will panic since there is no 266 * official hat record of this mapping. 267 */ 268 static caddr_t 269 pp_load_tlb(processorid_t cpu, caddr_t **pslot, page_t *pp, uint_t prot) 270 { 271 struct ppmap_va *ppmap; 272 tte_t tte; 273 caddr_t *myslot; 274 caddr_t va; 275 long i, start, stride; 276 int vcolor; 277 uint_t flags, strict_flag; 278 279 PP_STAT_ADD(pload); 280 281 ppmap = &ppmap_va[cpu]; 282 va = (caddr_t)(PPMAP_FAST_BASE + (MMU_PAGESIZE * MAXPP_SLOTS) * cpu); 283 myslot = ppmap->ppmap_slots; 284 ASSERT(addr_to_vcolor(va) == 0); 285 286 if (prot & TTE_HWWR_INT) { 287 flags = PPAGE_STORE_VCOLORING | PPAGE_STORES_POLLUTE; 288 strict_flag = PPAGE_STORES_POLLUTE; 289 } else { 290 flags = PPAGE_LOAD_VCOLORING | PPAGE_LOADS_POLLUTE; 291 strict_flag = PPAGE_LOADS_POLLUTE; 292 } 293 294 /* 295 * If consistent handling is required then keep the current 296 * vcolor of the page. Furthermore, if loads or stores can 297 * pollute the VAC then using a "new" page (unassigned vcolor) 298 * won't work and we have to return a failure. 299 */ 300 if (pp_consistent_coloring & flags) { 301 vcolor = sfmmu_get_ppvcolor(pp); 302 if ((vcolor == -1) && 303 (pp_consistent_coloring & strict_flag)) 304 return (NULL); 305 /* else keep the current vcolor of the page */ 306 } else { 307 vcolor = -1; 308 } 309 310 if (vcolor != -1) { 311 va += MMU_PAGESIZE * vcolor; 312 start = vcolor; 313 stride = ppmap_pages; /* number of colors */ 314 myslot += vcolor; 315 } else { 316 start = 0; 317 stride = 1; 318 } 319 320 for (i = start; i < pp_slots; i += stride) { 321 if (*myslot == NULL) { 322 if (casptr(myslot, NULL, va) == NULL) 323 break; 324 } 325 myslot += stride; 326 va += MMU_PAGESIZE * stride; 327 } 328 329 if (i >= pp_slots) { 330 PP_STAT_ADD(ploadfail); 331 return (NULL); 332 } 333 334 ASSERT(vcolor == -1 || addr_to_vcolor(va) == vcolor); 335 336 /* 337 * Now we have a slot we can use, make the tte. 338 */ 339 tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(pp->p_pagenum); 340 tte.tte_intlo = TTE_PFN_INTLO(pp->p_pagenum) | TTE_CP_INT | 341 TTE_CV_INT | TTE_PRIV_INT | TTE_LCK_INT | prot; 342 343 ASSERT(CPU->cpu_id == cpu); 344 sfmmu_dtlb_ld_kva(va, &tte); 345 346 *pslot = myslot; /* Return ptr to the slot we used. */ 347 348 return (va); 349 } 350 351 static void 352 pp_unload_tlb(caddr_t *pslot, caddr_t va) 353 { 354 ASSERT(*pslot == va); 355 356 vtag_flushpage(va, (uint64_t)ksfmmup); 357 *pslot = NULL; /* release the slot */ 358 } 359 360 /* 361 * Common copy routine which attempts to use hwblkpagecopy. If this routine 362 * can't be used, failure (0) will be returned. Otherwise, a PAGESIZE page 363 * will be copied and success (1) will be returned. 364 */ 365 int 366 ppcopy_common(page_t *fm_pp, page_t *to_pp) 367 { 368 caddr_t fm_va, to_va; 369 caddr_t *fm_slot, *to_slot; 370 processorid_t cpu; 371 label_t ljb; 372 int ret = 1; 373 374 ASSERT(fm_pp != NULL && PAGE_LOCKED(fm_pp)); 375 ASSERT(to_pp != NULL && PAGE_LOCKED(to_pp)); 376 377 /* 378 * If we can't use VIS block loads and stores we can't use 379 * pp_load_tlb/pp_unload_tlb due to the possibility of 380 * d$ aliasing. 381 */ 382 if (!use_hw_bcopy && (cache & CACHE_VAC)) 383 return (0); 384 385 kpreempt_disable(); 386 cpu = CPU->cpu_id; 387 fm_va = pp_load_tlb(cpu, &fm_slot, fm_pp, 0); 388 if (fm_va == NULL) { 389 kpreempt_enable(); 390 return (0); 391 } 392 to_va = pp_load_tlb(cpu, &to_slot, to_pp, TTE_HWWR_INT); 393 if (to_va == NULL) { 394 pp_unload_tlb(fm_slot, fm_va); 395 kpreempt_enable(); 396 return (0); 397 } 398 if (on_fault(&ljb)) { 399 ret = 0; 400 goto faulted; 401 } 402 hwblkpagecopy(fm_va, to_va); 403 no_fault(); 404 faulted: 405 ASSERT(CPU->cpu_id == cpu); 406 pp_unload_tlb(fm_slot, fm_va); 407 pp_unload_tlb(to_slot, to_va); 408 kpreempt_enable(); 409 return (ret); 410 } 411 412 /* 413 * Routine to copy kernel pages during relocation. It will copy one 414 * PAGESIZE page to another PAGESIZE page. This function may be called 415 * above LOCK_LEVEL so it should not grab any locks. 416 */ 417 void 418 ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp) 419 { 420 uint64_t fm_pa, to_pa; 421 size_t nbytes; 422 423 fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT; 424 to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT; 425 426 nbytes = MMU_PAGESIZE; 427 428 for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32) 429 hw_pa_bcopy32(fm_pa, to_pa); 430 } 431 432 /* 433 * Copy the data from the physical page represented by "frompp" to 434 * that represented by "topp". 435 * 436 * Try to use per cpu mapping first, if that fails then call pp_mapin 437 * to load it. 438 * 439 * Returns one on success or zero on some sort of fault while doing the copy. 440 */ 441 int 442 ppcopy(page_t *fm_pp, page_t *to_pp) 443 { 444 caddr_t fm_va, to_va; 445 label_t ljb; 446 int ret = 1; 447 boolean_t use_kpm = B_FALSE; 448 449 /* Try the fast path first */ 450 if (ppcopy_common(fm_pp, to_pp)) 451 return (1); 452 453 /* 454 * Try to map using KPM if enabled and we are the cageout thread. 455 * If it fails, fall back to ppmapin/ppmaput 456 */ 457 458 if (kpm_enable) { 459 if (curthread == kcage_cageout_thread) 460 use_kpm = B_TRUE; 461 } 462 463 if (use_kpm) { 464 if ((fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL || 465 (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) { 466 if (fm_va != NULL) 467 hat_kpm_mapout(fm_pp, NULL, fm_va); 468 use_kpm = B_FALSE; 469 } 470 } 471 472 if (use_kpm == B_FALSE) { 473 /* do the slow path */ 474 fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1); 475 to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va); 476 if (on_fault(&ljb)) { 477 ret = 0; 478 goto faulted; 479 } 480 } 481 bcopy(fm_va, to_va, PAGESIZE); 482 no_fault(); 483 faulted: 484 /* unmap */ 485 if (use_kpm == B_TRUE) { 486 hat_kpm_mapout(fm_pp, NULL, fm_va); 487 hat_kpm_mapout(to_pp, NULL, to_va); 488 } else { 489 ppmapout(fm_va); 490 ppmapout(to_va); 491 } 492 return (ret); 493 } 494 495 /* 496 * Zero the physical page from off to off + len given by `pp' 497 * without changing the reference and modified bits of page. 498 * 499 * Again, we'll try per cpu mapping first. 500 */ 501 void 502 pagezero(page_t *pp, uint_t off, uint_t len) 503 { 504 caddr_t va; 505 caddr_t *slot; 506 int fast = 1; 507 processorid_t cpu; 508 extern int hwblkclr(void *, size_t); 509 extern int use_hw_bzero; 510 511 ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE); 512 ASSERT(PAGE_LOCKED(pp)); 513 514 PP_STAT_ADD(ppzero); 515 516 if (len != MMU_PAGESIZE || !use_hw_bzero) { 517 /* 518 * Since the fast path doesn't do anything about 519 * VAC coloring, we make sure bcopy h/w will be used. 520 */ 521 fast = 0; 522 va = NULL; 523 PP_STAT_ADD(ppzero_short); 524 } 525 526 kpreempt_disable(); 527 528 if (fast) { 529 cpu = CPU->cpu_id; 530 va = pp_load_tlb(cpu, &slot, pp, TTE_HWWR_INT); 531 } 532 533 if (va == NULL) { 534 /* 535 * We are here either length != MMU_PAGESIZE or pp_load_tlb() 536 * returns NULL or use_hw_bzero is disabled. 537 */ 538 va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1); 539 fast = 0; 540 } 541 542 if (hwblkclr(va + off, len)) { 543 /* 544 * We may not have used block commit asi. 545 * So flush the I-$ manually 546 */ 547 548 ASSERT(fast == 0); 549 550 sync_icache(va + off, len); 551 } else { 552 /* 553 * We have used blk commit, and flushed the I-$. However we 554 * still may have an instruction in the pipeline. Only a flush 555 * instruction will invalidate that. 556 */ 557 doflush(va); 558 } 559 560 if (fast) { 561 ASSERT(CPU->cpu_id == cpu); 562 pp_unload_tlb(slot, va); 563 } else { 564 ppmapout(va); 565 } 566 567 kpreempt_enable(); 568 } 569