1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/archsystm.h> 32 #include <sys/machsystm.h> 33 #include <sys/t_lock.h> 34 #include <sys/vmem.h> 35 #include <sys/mman.h> 36 #include <sys/vm.h> 37 #include <sys/cpu.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpuvar.h> 40 #include <sys/atomic.h> 41 #include <vm/as.h> 42 #include <vm/hat.h> 43 #include <vm/as.h> 44 #include <vm/page.h> 45 #include <vm/seg.h> 46 #include <vm/seg_kmem.h> 47 #include <vm/hat_sfmmu.h> 48 #include <sys/debug.h> 49 #include <sys/cpu_module.h> 50 51 /* 52 * A quick way to generate a cache consistent address to map in a page. 53 * users: ppcopy, pagezero, /proc, dev/mem 54 * 55 * The ppmapin/ppmapout routines provide a quick way of generating a cache 56 * consistent address by reserving a given amount of kernel address space. 57 * The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided 58 * into x number of sets, where x is the number of colors for the virtual 59 * cache. The number of colors is how many times a page can be mapped 60 * simulatenously in the cache. For direct map caches this translates to 61 * the number of pages in the cache. 62 * Each set will be assigned a group of virtual pages from the reserved memory 63 * depending on its virtual color. 64 * When trying to assign a virtual address we will find out the color for the 65 * physical page in question (if applicable). Then we will try to find an 66 * available virtual page from the set of the appropiate color. 67 */ 68 69 #define clsettoarray(color, set) ((color * nsets) + set) 70 71 int pp_slots = 4; /* small default, tuned by cpu module */ 72 73 /* tuned by cpu module, default is "safe" */ 74 int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE; 75 76 static caddr_t ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE]; 77 static int nsets; /* number of sets */ 78 static int ppmap_pages; /* generate align mask */ 79 static int ppmap_shift; /* set selector */ 80 81 #ifdef PPDEBUG 82 #define MAXCOLORS 16 /* for debug only */ 83 static int ppalloc_noslot = 0; /* # of allocations from kernelmap */ 84 static int align_hits[MAXCOLORS]; 85 static int pp_allocs; /* # of ppmapin requests */ 86 #endif /* PPDEBUG */ 87 88 /* 89 * There are only 64 TLB entries on spitfire, 16 on cheetah 90 * (fully-associative TLB) so we allow the cpu module to tune the 91 * number to use here via pp_slots. 92 */ 93 static struct ppmap_va { 94 caddr_t ppmap_slots[MAXPP_SLOTS]; 95 } ppmap_va[NCPU]; 96 97 void 98 ppmapinit(void) 99 { 100 int color, nset, setsize; 101 caddr_t va; 102 103 ASSERT(pp_slots <= MAXPP_SLOTS); 104 105 va = (caddr_t)PPMAPBASE; 106 if (cache & CACHE_VAC) { 107 int a; 108 109 ppmap_pages = mmu_btop(shm_alignment); 110 nsets = PPMAPSIZE / shm_alignment; 111 setsize = shm_alignment; 112 ppmap_shift = MMU_PAGESHIFT; 113 a = ppmap_pages; 114 while (a >>= 1) 115 ppmap_shift++; 116 } else { 117 /* 118 * If we do not have a virtual indexed cache we simply 119 * have only one set containing all pages. 120 */ 121 ppmap_pages = 1; 122 nsets = mmu_btop(PPMAPSIZE); 123 setsize = MMU_PAGESIZE; 124 ppmap_shift = MMU_PAGESHIFT; 125 } 126 for (color = 0; color < ppmap_pages; color++) { 127 for (nset = 0; nset < nsets; nset++) { 128 ppmap_vaddrs[clsettoarray(color, nset)] = 129 (caddr_t)((uintptr_t)va + (nset * setsize)); 130 } 131 va += MMU_PAGESIZE; 132 } 133 } 134 135 /* 136 * Allocate a cache consistent virtual address to map a page, pp, 137 * with protection, vprot; and map it in the MMU, using the most 138 * efficient means possible. The argument avoid is a virtual address 139 * hint which when masked yields an offset into a virtual cache 140 * that should be avoided when allocating an address to map in a 141 * page. An avoid arg of -1 means you don't care, for instance pagezero. 142 * 143 * machine dependent, depends on virtual address space layout, 144 * understands that all kernel addresses have bit 31 set. 145 * 146 * NOTE: For sun4 platforms the meaning of the hint argument is opposite from 147 * that found in other architectures. In other architectures the hint 148 * (called avoid) was used to ask ppmapin to NOT use the specified cache color. 149 * This was used to avoid virtual cache trashing in the bcopy. Unfortunately 150 * in the case of a COW, this later on caused a cache aliasing conflict. In 151 * sun4, the bcopy routine uses the block ld/st instructions so we don't have 152 * to worry about virtual cache trashing. Actually, by using the hint to choose 153 * the right color we can almost guarantee a cache conflict will not occur. 154 */ 155 156 caddr_t 157 ppmapin(page_t *pp, uint_t vprot, caddr_t hint) 158 { 159 int color, nset, index, start; 160 caddr_t va; 161 162 #ifdef PPDEBUG 163 pp_allocs++; 164 #endif /* PPDEBUG */ 165 if (cache & CACHE_VAC) { 166 color = sfmmu_get_ppvcolor(pp); 167 if (color == -1) { 168 if ((intptr_t)hint != -1L) { 169 color = addr_to_vcolor(hint); 170 } else { 171 color = addr_to_vcolor(mmu_ptob(pp->p_pagenum)); 172 } 173 } 174 175 } else { 176 /* 177 * For physical caches, we can pick any address we want. 178 */ 179 color = 0; 180 } 181 182 start = color; 183 do { 184 for (nset = 0; nset < nsets; nset++) { 185 index = clsettoarray(color, nset); 186 va = ppmap_vaddrs[index]; 187 if (va != NULL) { 188 #ifdef PPDEBUG 189 align_hits[color]++; 190 #endif /* PPDEBUG */ 191 if (casptr(&ppmap_vaddrs[index], 192 va, NULL) == va) { 193 hat_memload(kas.a_hat, va, pp, 194 vprot | HAT_NOSYNC, 195 HAT_LOAD_LOCK); 196 return (va); 197 } 198 } 199 } 200 /* 201 * first pick didn't succeed, try another 202 */ 203 if (++color == ppmap_pages) 204 color = 0; 205 } while (color != start); 206 207 #ifdef PPDEBUG 208 ppalloc_noslot++; 209 #endif /* PPDEBUG */ 210 211 /* 212 * No free slots; get a random one from the kernel heap area. 213 */ 214 va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 215 216 hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK); 217 218 return (va); 219 220 } 221 222 void 223 ppmapout(caddr_t va) 224 { 225 int color, nset, index; 226 227 if (va >= kernelheap && va < ekernelheap) { 228 /* 229 * Space came from kernelmap, flush the page and 230 * return the space. 231 */ 232 hat_unload(kas.a_hat, va, PAGESIZE, 233 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 234 vmem_free(heap_arena, va, PAGESIZE); 235 } else { 236 /* 237 * Space came from ppmap_vaddrs[], give it back. 238 */ 239 color = addr_to_vcolor(va); 240 ASSERT((cache & CACHE_VAC)? (color < ppmap_pages) : 1); 241 242 nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1); 243 index = clsettoarray(color, nset); 244 hat_unload(kas.a_hat, va, PAGESIZE, 245 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 246 247 ASSERT(ppmap_vaddrs[index] == NULL); 248 ppmap_vaddrs[index] = va; 249 } 250 } 251 252 #ifdef DEBUG 253 #define PP_STAT_ADD(stat) (stat)++ 254 uint_t pload, ploadfail; 255 uint_t ppzero, ppzero_short; 256 #else 257 #define PP_STAT_ADD(stat) 258 #endif /* DEBUG */ 259 260 /* 261 * Find a slot in per CPU page copy area. Load up a locked TLB in the 262 * running cpu. We don't call hat layer to load up the tte since the 263 * mapping is only temporary. If the thread migrates it'll get a TLB 264 * miss trap and TLB/TSB miss handler will panic since there is no 265 * official hat record of this mapping. 266 */ 267 static caddr_t 268 pp_load_tlb(processorid_t cpu, caddr_t **pslot, page_t *pp, uint_t prot) 269 { 270 struct ppmap_va *ppmap; 271 tte_t tte; 272 caddr_t *myslot; 273 caddr_t va; 274 long i, start, stride; 275 int vcolor; 276 uint_t flags, strict_flag; 277 278 PP_STAT_ADD(pload); 279 280 ppmap = &ppmap_va[cpu]; 281 va = (caddr_t)(PPMAP_FAST_BASE + (MMU_PAGESIZE * MAXPP_SLOTS) * cpu); 282 myslot = ppmap->ppmap_slots; 283 ASSERT(addr_to_vcolor(va) == 0); 284 285 if (prot & TTE_HWWR_INT) { 286 flags = PPAGE_STORE_VCOLORING | PPAGE_STORES_POLLUTE; 287 strict_flag = PPAGE_STORES_POLLUTE; 288 } else { 289 flags = PPAGE_LOAD_VCOLORING | PPAGE_LOADS_POLLUTE; 290 strict_flag = PPAGE_LOADS_POLLUTE; 291 } 292 293 /* 294 * If consistent handling is required then keep the current 295 * vcolor of the page. Furthermore, if loads or stores can 296 * pollute the VAC then using a "new" page (unassigned vcolor) 297 * won't work and we have to return a failure. 298 */ 299 if (pp_consistent_coloring & flags) { 300 vcolor = sfmmu_get_ppvcolor(pp); 301 if ((vcolor == -1) && 302 (pp_consistent_coloring & strict_flag)) 303 return (NULL); 304 /* else keep the current vcolor of the page */ 305 } else { 306 vcolor = -1; 307 } 308 309 if (vcolor != -1) { 310 va += MMU_PAGESIZE * vcolor; 311 start = vcolor; 312 stride = ppmap_pages; /* number of colors */ 313 myslot += vcolor; 314 } else { 315 start = 0; 316 stride = 1; 317 } 318 319 for (i = start; i < pp_slots; i += stride) { 320 if (*myslot == NULL) { 321 if (casptr(myslot, NULL, va) == NULL) 322 break; 323 } 324 myslot += stride; 325 va += MMU_PAGESIZE * stride; 326 } 327 328 if (i >= pp_slots) { 329 PP_STAT_ADD(ploadfail); 330 return (NULL); 331 } 332 333 ASSERT(vcolor == -1 || addr_to_vcolor(va) == vcolor); 334 335 /* 336 * Now we have a slot we can use, make the tte. 337 */ 338 tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(pp->p_pagenum); 339 tte.tte_intlo = TTE_PFN_INTLO(pp->p_pagenum) | TTE_CP_INT | 340 TTE_CV_INT | TTE_PRIV_INT | TTE_LCK_INT | prot; 341 342 ASSERT(CPU->cpu_id == cpu); 343 sfmmu_dtlb_ld(va, KCONTEXT, &tte); 344 345 *pslot = myslot; /* Return ptr to the slot we used. */ 346 347 return (va); 348 } 349 350 static void 351 pp_unload_tlb(caddr_t *pslot, caddr_t va) 352 { 353 ASSERT(*pslot == va); 354 355 vtag_flushpage(va, KCONTEXT); 356 *pslot = NULL; /* release the slot */ 357 } 358 359 /* 360 * Common copy routine which attempts to use hwblkpagecopy. If this routine 361 * can't be used, failure (0) will be returned. Otherwise, a PAGESIZE page 362 * will be copied and success (1) will be returned. 363 */ 364 int 365 ppcopy_common(page_t *fm_pp, page_t *to_pp) 366 { 367 caddr_t fm_va, to_va; 368 caddr_t *fm_slot, *to_slot; 369 processorid_t cpu; 370 371 ASSERT(PAGE_LOCKED(fm_pp)); 372 ASSERT(PAGE_LOCKED(to_pp)); 373 374 /* 375 * If we can't use VIS block loads and stores we can't use 376 * pp_load_tlb/pp_unload_tlb due to the possibility of 377 * d$ aliasing. 378 */ 379 if (!use_hw_bcopy && (cache & CACHE_VAC)) 380 return (0); 381 382 kpreempt_disable(); 383 cpu = CPU->cpu_id; 384 fm_va = pp_load_tlb(cpu, &fm_slot, fm_pp, 0); 385 if (fm_va == NULL) { 386 kpreempt_enable(); 387 return (0); 388 } 389 to_va = pp_load_tlb(cpu, &to_slot, to_pp, TTE_HWWR_INT); 390 if (to_va == NULL) { 391 pp_unload_tlb(fm_slot, fm_va); 392 kpreempt_enable(); 393 return (0); 394 } 395 hwblkpagecopy(fm_va, to_va); 396 ASSERT(CPU->cpu_id == cpu); 397 pp_unload_tlb(fm_slot, fm_va); 398 pp_unload_tlb(to_slot, to_va); 399 kpreempt_enable(); 400 return (1); 401 } 402 403 /* 404 * Routine to copy kernel pages during relocation. It will copy one 405 * PAGESIZE page to another PAGESIZE page. This function may be called 406 * above LOCK_LEVEL so it should not grab any locks. 407 */ 408 void 409 ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp) 410 { 411 uint64_t fm_pa, to_pa; 412 size_t nbytes; 413 414 fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT; 415 to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT; 416 417 nbytes = MMU_PAGESIZE; 418 419 for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32) 420 hw_pa_bcopy32(fm_pa, to_pa); 421 } 422 423 /* 424 * Copy the data from the physical page represented by "frompp" to 425 * that represented by "topp". 426 * 427 * Try to use per cpu mapping first, if that fails then call pp_mapin 428 * to load it. 429 */ 430 void 431 ppcopy(page_t *fm_pp, page_t *to_pp) 432 { 433 caddr_t fm_va, to_va; 434 435 /* Try the fast path first */ 436 if (ppcopy_common(fm_pp, to_pp)) 437 return; 438 439 /* Fast path failed, so we need to do the slow path. */ 440 fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1); 441 to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va); 442 bcopy(fm_va, to_va, PAGESIZE); 443 ppmapout(fm_va); 444 ppmapout(to_va); 445 } 446 447 /* 448 * Zero the physical page from off to off + len given by `pp' 449 * without changing the reference and modified bits of page. 450 * 451 * Again, we'll try per cpu mapping first. 452 */ 453 void 454 pagezero(page_t *pp, uint_t off, uint_t len) 455 { 456 caddr_t va; 457 caddr_t *slot; 458 int fast = 1; 459 processorid_t cpu; 460 extern int hwblkclr(void *, size_t); 461 extern int use_hw_bzero; 462 463 ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE); 464 ASSERT(PAGE_LOCKED(pp)); 465 466 PP_STAT_ADD(ppzero); 467 468 if (len != MMU_PAGESIZE || !use_hw_bzero) { 469 /* 470 * Since the fast path doesn't do anything about 471 * VAC coloring, we make sure bcopy h/w will be used. 472 */ 473 fast = 0; 474 va = NULL; 475 PP_STAT_ADD(ppzero_short); 476 } 477 478 kpreempt_disable(); 479 480 if (fast) { 481 cpu = CPU->cpu_id; 482 va = pp_load_tlb(cpu, &slot, pp, TTE_HWWR_INT); 483 } 484 485 if (va == NULL) { 486 /* 487 * We are here either length != MMU_PAGESIZE or pp_load_tlb() 488 * returns NULL or use_hw_bzero is disabled. 489 */ 490 va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1); 491 fast = 0; 492 } 493 494 if (hwblkclr(va + off, len)) { 495 /* 496 * We may not have used block commit asi. 497 * So flush the I-$ manually 498 */ 499 500 ASSERT(fast == 0); 501 502 sync_icache(va + off, len); 503 } else { 504 /* 505 * We have used blk commit, and flushed the I-$. However we 506 * still may have an instruction in the pipeline. Only a flush 507 * instruction will invalidate that. 508 */ 509 doflush(va); 510 } 511 512 if (fast) { 513 ASSERT(CPU->cpu_id == cpu); 514 pp_unload_tlb(slot, va); 515 } else { 516 ppmapout(va); 517 } 518 519 kpreempt_enable(); 520 } 521