1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/systm.h> 28 #include <sys/archsystm.h> 29 #include <sys/machsystm.h> 30 #include <sys/t_lock.h> 31 #include <sys/vmem.h> 32 #include <sys/mman.h> 33 #include <sys/vm.h> 34 #include <sys/cpu.h> 35 #include <sys/cmn_err.h> 36 #include <sys/cpuvar.h> 37 #include <sys/atomic.h> 38 #include <vm/as.h> 39 #include <vm/hat.h> 40 #include <vm/as.h> 41 #include <vm/page.h> 42 #include <vm/seg.h> 43 #include <vm/seg_kmem.h> 44 #include <vm/seg_kpm.h> 45 #include <vm/hat_sfmmu.h> 46 #include <sys/debug.h> 47 #include <sys/cpu_module.h> 48 49 /* 50 * A quick way to generate a cache consistent address to map in a page. 51 * users: ppcopy, pagezero, /proc, dev/mem 52 * 53 * The ppmapin/ppmapout routines provide a quick way of generating a cache 54 * consistent address by reserving a given amount of kernel address space. 55 * The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided 56 * into x number of sets, where x is the number of colors for the virtual 57 * cache. The number of colors is how many times a page can be mapped 58 * simulatenously in the cache. For direct map caches this translates to 59 * the number of pages in the cache. 60 * Each set will be assigned a group of virtual pages from the reserved memory 61 * depending on its virtual color. 62 * When trying to assign a virtual address we will find out the color for the 63 * physical page in question (if applicable). Then we will try to find an 64 * available virtual page from the set of the appropiate color. 65 */ 66 67 int pp_slots = 4; /* small default, tuned by cpu module */ 68 69 /* tuned by cpu module, default is "safe" */ 70 int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE; 71 72 static caddr_t ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE]; 73 static int nsets; /* number of sets */ 74 static int ppmap_shift; /* set selector */ 75 76 #ifdef PPDEBUG 77 #define MAXCOLORS 16 /* for debug only */ 78 static int ppalloc_noslot = 0; /* # of allocations from kernelmap */ 79 static int align_hits; 80 static int pp_allocs; /* # of ppmapin requests */ 81 #endif /* PPDEBUG */ 82 83 /* 84 * There are only 64 TLB entries on spitfire, 16 on cheetah 85 * (fully-associative TLB) so we allow the cpu module to tune the 86 * number to use here via pp_slots. 87 */ 88 static struct ppmap_va { 89 caddr_t ppmap_slots[MAXPP_SLOTS]; 90 } ppmap_va[NCPU]; 91 92 /* prevent compilation with VAC defined */ 93 #ifdef VAC 94 #error "sun4v ppmapin and ppmapout do not support VAC" 95 #endif 96 97 void 98 ppmapinit(void) 99 { 100 int nset; 101 caddr_t va; 102 103 ASSERT(pp_slots <= MAXPP_SLOTS); 104 105 va = (caddr_t)PPMAPBASE; 106 107 /* 108 * sun4v does not have a virtual indexed cache and simply 109 * has only one set containing all pages. 110 */ 111 nsets = mmu_btop(PPMAPSIZE); 112 ppmap_shift = MMU_PAGESHIFT; 113 114 for (nset = 0; nset < nsets; nset++) { 115 ppmap_vaddrs[nset] = 116 (caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE)); 117 } 118 } 119 120 /* 121 * Allocate a cache consistent virtual address to map a page, pp, 122 * with protection, vprot; and map it in the MMU, using the most 123 * efficient means possible. The argument avoid is a virtual address 124 * hint which when masked yields an offset into a virtual cache 125 * that should be avoided when allocating an address to map in a 126 * page. An avoid arg of -1 means you don't care, for instance pagezero. 127 * 128 * machine dependent, depends on virtual address space layout, 129 * understands that all kernel addresses have bit 31 set. 130 * 131 * NOTE: For sun4 platforms the meaning of the hint argument is opposite from 132 * that found in other architectures. In other architectures the hint 133 * (called avoid) was used to ask ppmapin to NOT use the specified cache color. 134 * This was used to avoid virtual cache trashing in the bcopy. Unfortunately 135 * in the case of a COW, this later on caused a cache aliasing conflict. In 136 * sun4, the bcopy routine uses the block ld/st instructions so we don't have 137 * to worry about virtual cache trashing. Actually, by using the hint to choose 138 * the right color we can almost guarantee a cache conflict will not occur. 139 */ 140 141 /*ARGSUSED2*/ 142 caddr_t 143 ppmapin(page_t *pp, uint_t vprot, caddr_t hint) 144 { 145 int nset; 146 caddr_t va; 147 148 #ifdef PPDEBUG 149 pp_allocs++; 150 #endif /* PPDEBUG */ 151 152 /* 153 * For sun4v caches are physical caches, we can pick any address 154 * we want. 155 */ 156 for (nset = 0; nset < nsets; nset++) { 157 va = ppmap_vaddrs[nset]; 158 if (va != NULL) { 159 #ifdef PPDEBUG 160 align_hits++; 161 #endif /* PPDEBUG */ 162 if (atomic_cas_ptr(&ppmap_vaddrs[nset], va, NULL) == 163 va) { 164 hat_memload(kas.a_hat, va, pp, 165 vprot | HAT_NOSYNC, 166 HAT_LOAD_LOCK); 167 return (va); 168 } 169 } 170 } 171 172 #ifdef PPDEBUG 173 ppalloc_noslot++; 174 #endif /* PPDEBUG */ 175 176 /* 177 * No free slots; get a random one from the kernel heap area. 178 */ 179 va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 180 181 hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK); 182 183 return (va); 184 185 } 186 187 void 188 ppmapout(caddr_t va) 189 { 190 int nset; 191 192 if (va >= kernelheap && va < ekernelheap) { 193 /* 194 * Space came from kernelmap, flush the page and 195 * return the space. 196 */ 197 hat_unload(kas.a_hat, va, PAGESIZE, 198 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 199 vmem_free(heap_arena, va, PAGESIZE); 200 } else { 201 /* 202 * Space came from ppmap_vaddrs[], give it back. 203 */ 204 nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1); 205 hat_unload(kas.a_hat, va, PAGESIZE, 206 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 207 208 ASSERT(ppmap_vaddrs[nset] == NULL); 209 ppmap_vaddrs[nset] = va; 210 } 211 } 212 213 #ifdef DEBUG 214 #define PP_STAT_ADD(stat) (stat)++ 215 uint_t pload, ploadfail; 216 uint_t ppzero, ppzero_short; 217 #else 218 #define PP_STAT_ADD(stat) 219 #endif /* DEBUG */ 220 221 static void 222 pp_unload_tlb(caddr_t *pslot, caddr_t va) 223 { 224 ASSERT(*pslot == va); 225 226 vtag_flushpage(va, (uint64_t)ksfmmup); 227 *pslot = NULL; /* release the slot */ 228 } 229 230 /* 231 * Routine to copy kernel pages during relocation. It will copy one 232 * PAGESIZE page to another PAGESIZE page. This function may be called 233 * above LOCK_LEVEL so it should not grab any locks. 234 */ 235 void 236 ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp) 237 { 238 uint64_t fm_pa, to_pa; 239 size_t nbytes; 240 241 fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT; 242 to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT; 243 244 nbytes = MMU_PAGESIZE; 245 246 for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32) 247 hw_pa_bcopy32(fm_pa, to_pa); 248 } 249 250 /* 251 * Copy the data from the physical page represented by "frompp" to 252 * that represented by "topp". 253 * 254 * Try to use per cpu mapping first, if that fails then call pp_mapin 255 * to load it. 256 * Returns one on success or zero on some sort of fault while doing the copy. 257 */ 258 int 259 ppcopy(page_t *fm_pp, page_t *to_pp) 260 { 261 caddr_t fm_va = NULL; 262 caddr_t to_va; 263 boolean_t fast; 264 label_t ljb; 265 int ret = 1; 266 267 ASSERT(PAGE_LOCKED(fm_pp)); 268 ASSERT(PAGE_LOCKED(to_pp)); 269 270 /* 271 * Try to map using KPM if enabled. If it fails, fall 272 * back to ppmapin/ppmapout. 273 */ 274 if ((kpm_enable == 0) || 275 (fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL || 276 (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) { 277 if (fm_va != NULL) 278 hat_kpm_mapout(fm_pp, NULL, fm_va); 279 fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1); 280 to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va); 281 fast = B_FALSE; 282 } else 283 fast = B_TRUE; 284 285 if (on_fault(&ljb)) { 286 ret = 0; 287 goto faulted; 288 } 289 bcopy(fm_va, to_va, PAGESIZE); 290 no_fault(); 291 faulted: 292 293 /* Unmap */ 294 if (fast) { 295 hat_kpm_mapout(fm_pp, NULL, fm_va); 296 hat_kpm_mapout(to_pp, NULL, to_va); 297 } else { 298 ppmapout(fm_va); 299 ppmapout(to_va); 300 } 301 return (ret); 302 } 303 304 /* 305 * Zero the physical page from off to off + len given by `pp' 306 * without changing the reference and modified bits of page. 307 * 308 * Again, we'll try per cpu mapping first. 309 */ 310 311 void 312 pagezero(page_t *pp, uint_t off, uint_t len) 313 { 314 caddr_t va; 315 extern int hwblkclr(void *, size_t); 316 extern int use_hw_bzero; 317 boolean_t fast; 318 319 ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE); 320 ASSERT(PAGE_LOCKED(pp)); 321 322 PP_STAT_ADD(ppzero); 323 324 if (len != MMU_PAGESIZE || !use_hw_bzero) { 325 PP_STAT_ADD(ppzero_short); 326 } 327 328 kpreempt_disable(); 329 330 /* 331 * Try to use KPM if enabled. If that fails, fall back to 332 * ppmapin/ppmapout. 333 */ 334 335 if (kpm_enable != 0) { 336 fast = B_TRUE; 337 va = hat_kpm_mapin(pp, NULL); 338 } else 339 va = NULL; 340 341 if (va == NULL) { 342 fast = B_FALSE; 343 va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1); 344 } 345 346 if (!use_hw_bzero) { 347 bzero(va + off, len); 348 sync_icache(va + off, len); 349 } else if (hwblkclr(va + off, len)) { 350 /* 351 * We may not have used block commit asi. 352 * So flush the I-$ manually 353 */ 354 sync_icache(va + off, len); 355 } else { 356 /* 357 * We have used blk commit, and flushed the I-$. 358 * However we still may have an instruction in the 359 * pipeline. Only a flush will invalidate that. 360 */ 361 doflush(va); 362 } 363 364 if (fast) { 365 hat_kpm_mapout(pp, NULL, va); 366 } else { 367 ppmapout(va); 368 } 369 kpreempt_enable(); 370 } 371