1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * UNIX machine dependent virtual memory support. 29 */ 30 31 #ifndef _VM_DEP_H 32 #define _VM_DEP_H 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 #include <vm/hat_sfmmu.h> 41 #include <sys/archsystm.h> 42 #include <sys/memnode.h> 43 44 #define GETTICK() gettick() 45 46 /* 47 * Per page size free lists. Allocated dynamically. 48 */ 49 #define MAX_MEM_TYPES 2 /* 0 = reloc, 1 = noreloc */ 50 #define MTYPE_RELOC 0 51 #define MTYPE_NORELOC 1 52 53 #define PP_2_MTYPE(pp) (PP_ISNORELOC(pp) ? MTYPE_NORELOC : MTYPE_RELOC) 54 55 #define MTYPE_INIT(mtype, vp, vaddr, flags) \ 56 mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC; 57 58 /* 59 * macros to loop through the mtype range - noops for sparc 60 */ 61 #define MTYPE_START(mnode, mtype, flags) 62 #define MTYPE_NEXT(mnode, mtype, flags) (-1) 63 64 /* mtype init for page_get_replacement_page */ 65 66 #define MTYPE_PGR_INIT(mtype, flags, pp, mnode) \ 67 mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC; 68 69 #define MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi) \ 70 ASSERT(mtype != MTYPE_NORELOC); \ 71 pfnlo = mem_node_config[mnode].physbase; \ 72 pfnhi = mem_node_config[mnode].physmax; 73 74 /* 75 * Internal PG_ flags. 76 */ 77 #define PGI_RELOCONLY 0x10000 /* acts in the opposite sense to PG_NORELOC */ 78 #define PGI_NOCAGE 0x20000 /* indicates Cage is disabled */ 79 #define PGI_PGCPHIPRI 0x40000 /* page_get_contig_page priority allocation */ 80 #define PGI_PGCPSZC0 0x80000 /* relocate base pagesize page */ 81 82 /* 83 * PGI mtype flags - should not overlap PGI flags 84 */ 85 #define PGI_MT_RANGE 0x1000000 /* mtype range */ 86 #define PGI_MT_NEXT 0x2000000 /* get next mtype */ 87 88 extern page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES]; 89 extern page_t ***page_cachelists[MAX_MEM_TYPES]; 90 91 #define PAGE_FREELISTS(mnode, szc, color, mtype) \ 92 (*(page_freelists[szc][mtype][mnode] + (color))) 93 94 #define PAGE_CACHELISTS(mnode, color, mtype) \ 95 (*(page_cachelists[mtype][mnode] + (color))) 96 97 /* 98 * There are 'page_colors' colors/bins. Spread them out under a 99 * couple of locks. There are mutexes for both the page freelist 100 * and the page cachelist. We want enough locks to make contention 101 * reasonable, but not too many -- otherwise page_freelist_lock() gets 102 * so expensive that it becomes the bottleneck! 103 */ 104 #define NPC_MUTEX 16 105 106 extern kmutex_t *fpc_mutex[NPC_MUTEX]; 107 extern kmutex_t *cpc_mutex[NPC_MUTEX]; 108 109 /* Find the bin for the given page if it was of size szc */ 110 #define PP_2_BIN_SZC(pp, szc) \ 111 (((pp->p_pagenum) & page_colors_mask) >> \ 112 (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift)) 113 114 #define PP_2_BIN(pp) (PP_2_BIN_SZC(pp, pp->p_szc)) 115 116 #define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum)) 117 118 #define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \ 119 &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \ 120 &cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) 121 122 #define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode]) 123 #define CPC_MUTEX(mnode, i) (&cpc_mutex[i][mnode]) 124 125 #define PFN_BASE(pfnum, szc) (pfnum & ~((1 << PAGE_BSZS_SHIFT(szc)) - 1)) 126 127 typedef char hpmctr_t; 128 129 #ifdef DEBUG 130 #define CHK_LPG(pp, szc) chk_lpg(pp, szc) 131 extern void chk_lpg(page_t *, uchar_t); 132 #else 133 #define CHK_LPG(pp, szc) 134 #endif 135 136 #ifdef DEBUG 137 138 /* page list count */ 139 typedef struct { 140 pgcnt_t plc_m_pgmax; 141 pgcnt_t plc_m_pgcnt; 142 pgcnt_t plc_m_clpgcnt; /* cache list cnt */ 143 struct { 144 pgcnt_t plc_mt_pgmax; 145 pgcnt_t plc_mt_pgcnt; 146 struct { 147 pgcnt_t plc_mts_pgcnt; 148 int plc_mts_colors; 149 pgcnt_t *plc_mtsc_pgcnt; 150 } plc_mts[MMU_PAGE_SIZES]; 151 } plc_mt[MAX_MEM_TYPES]; 152 } plcnt_t[MAX_MEM_NODES]; 153 154 extern plcnt_t plcnt; 155 156 #define PLCNT_SZ(ctrs_sz) { \ 157 int szc; \ 158 for (szc = 0; szc <= mmu_page_sizes; szc++) { \ 159 int colors = page_get_pagecolors(szc); \ 160 ctrs_sz += (max_mem_nodes * MAX_MEM_TYPES * \ 161 colors * sizeof (pgcnt_t)); \ 162 } \ 163 } 164 165 #define PLCNT_INIT(base) { \ 166 int mn, mt, szc, colors; \ 167 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 168 colors = page_get_pagecolors(szc); \ 169 for (mn = 0; mn < max_mem_nodes; mn++) { \ 170 for (mt = 0; mt < MAX_MEM_TYPES; mt++) { \ 171 plcnt[mn].plc_mt[mt].plc_mts[szc]. \ 172 plc_mts_colors = colors; \ 173 plcnt[mn].plc_mt[mt].plc_mts[szc]. \ 174 plc_mtsc_pgcnt = (pgcnt_t *)base; \ 175 base += (colors * sizeof (pgcnt_t)); \ 176 } \ 177 } \ 178 } \ 179 } 180 181 #define PLCNT_DO(pp, mn, szc, cnt, flags) { \ 182 int mtype = PP_2_MTYPE(pp); \ 183 int bin = PP_2_BIN(pp); \ 184 if (flags & (PG_LIST_ISINIT | PG_LIST_ISCAGE)) \ 185 atomic_add_long(&plcnt[mn].plc_mt[mtype].plc_mt_pgmax, \ 186 cnt); \ 187 atomic_add_long(&mem_node_config[mn].cursize, cnt); \ 188 if (flags & PG_CACHE_LIST) \ 189 atomic_add_long(&plcnt[mn].plc_m_clpgcnt, cnt); \ 190 atomic_add_long(&plcnt[mn].plc_m_pgcnt, cnt); \ 191 atomic_add_long(&plcnt[mn].plc_mt[mtype].plc_mt_pgcnt, cnt); \ 192 atomic_add_long(&plcnt[mn].plc_mt[mtype].plc_mts[szc]. \ 193 plc_mts_pgcnt, cnt); \ 194 atomic_add_long(&plcnt[mn].plc_mt[mtype].plc_mts[szc]. \ 195 plc_mtsc_pgcnt[bin], cnt); \ 196 } 197 198 #define PLCNT_INCR(pp, mn, szc, flags) { \ 199 long cnt = (1 << PAGE_BSZS_SHIFT(szc)); \ 200 if (flags & PG_LIST_ISINIT) \ 201 plcnt[mn].plc_m_pgmax += cnt; \ 202 PLCNT_DO(pp, mn, szc, cnt, flags); \ 203 } 204 205 #define PLCNT_DECR(pp, mn, szc, flags) { \ 206 long cnt = ((-1) << PAGE_BSZS_SHIFT(szc)); \ 207 PLCNT_DO(pp, mn, szc, cnt, flags); \ 208 } 209 210 #else 211 212 #define PLCNT_SZ(ctrs_sz) 213 214 #define PLCNT_INIT(base) 215 216 #define PLCNT_INCR(pp, mnode, szc, flags) { \ 217 long cnt = (1 << PAGE_BSZS_SHIFT(szc)); \ 218 atomic_add_long(&mem_node_config[mnode].cursize, cnt); \ 219 } 220 221 #define PLCNT_DECR(pp, mnode, szc, flags) { \ 222 long cnt = ((-1) << PAGE_BSZS_SHIFT(szc)); \ 223 atomic_add_long(&mem_node_config[mnode].cursize, cnt); \ 224 } 225 226 #endif 227 228 /* 229 * get the ecache setsize for the current cpu. 230 */ 231 #define CPUSETSIZE() (cpunodes[CPU->cpu_id].ecache_setsize) 232 233 234 #define PAGE_BSZS_SHIFT(szc) TTE_BSZS_SHIFT(szc) 235 /* 236 * For sfmmu each larger page is 8 times the size of the previous 237 * size page. 238 */ 239 #define FULL_REGION_CNT(rg_szc) (8) 240 241 /* 242 * The counter base must be per page_counter element to prevent 243 * races when re-indexing, and the base page size element should 244 * be aligned on a boundary of the given region size. 245 * 246 * We also round up the number of pages spanned by the counters 247 * for a given region to PC_BASE_ALIGN in certain situations to simplify 248 * the coding for some non-performance critical routines. 249 */ 250 #define PC_BASE_ALIGN ((pfn_t)1 << PAGE_BSZS_SHIFT(mmu_page_sizes-1)) 251 #define PC_BASE_ALIGN_MASK (PC_BASE_ALIGN - 1) 252 253 extern int ecache_alignsize; 254 #define L2CACHE_ALIGN ecache_alignsize 255 256 extern int consistent_coloring; 257 extern uint_t vac_colors_mask; 258 extern int vac_size; 259 extern int vac_shift; 260 261 /* 262 * Auto large page selection support variables. Some CPU 263 * implementations may differ from the defaults and will need 264 * to change these. 265 */ 266 extern int auto_lpg_tlb_threshold; 267 extern int auto_lpg_minszc; 268 extern int auto_lpg_maxszc; 269 extern size_t auto_lpg_heap_default; 270 extern size_t auto_lpg_stack_default; 271 extern size_t auto_lpg_va_default; 272 extern size_t auto_lpg_remap_threshold; 273 274 /* 275 * AS_2_BIN macro controls the page coloring policy. 276 * 0 (default) uses various vaddr bits 277 * 1 virtual=paddr 278 * 2 bin hopping 279 */ 280 #define AS_2_BIN(as, seg, vp, addr, bin) \ 281 switch (consistent_coloring) { \ 282 default: \ 283 cmn_err(CE_WARN, \ 284 "AS_2_BIN: bad consistent coloring value"); \ 285 /* assume default algorithm -> continue */ \ 286 case 0: { \ 287 uint32_t ndx, new; \ 288 int slew = 0; \ 289 \ 290 if (vp != NULL && IS_SWAPVP(vp) && \ 291 seg->s_ops == &segvn_ops) \ 292 slew = as_color_bin(as); \ 293 \ 294 bin = (((uintptr_t)addr >> MMU_PAGESHIFT) + \ 295 (((uintptr_t)addr >> page_coloring_shift) << \ 296 (vac_shift - MMU_PAGESHIFT)) + slew) & \ 297 page_colors_mask; \ 298 \ 299 break; \ 300 } \ 301 case 1: \ 302 bin = ((uintptr_t)addr >> MMU_PAGESHIFT) & \ 303 page_colors_mask; \ 304 break; \ 305 case 2: { \ 306 int cnt = as_color_bin(as); \ 307 /* make sure physical color aligns with vac color */ \ 308 while ((cnt & vac_colors_mask) != \ 309 addr_to_vcolor(addr)) { \ 310 cnt++; \ 311 } \ 312 bin = cnt = cnt & page_colors_mask; \ 313 /* update per as page coloring fields */ \ 314 cnt = (cnt + 1) & page_colors_mask; \ 315 if (cnt == (as_color_start(as) & page_colors_mask)) { \ 316 cnt = as_color_start(as) = as_color_start(as) + \ 317 PGCLR_LOOPFACTOR; \ 318 } \ 319 as_color_bin(as) = cnt & page_colors_mask; \ 320 break; \ 321 } \ 322 } \ 323 ASSERT(bin <= page_colors_mask); 324 325 /* 326 * Function to get an ecache color bin: F(as, cnt, vcolor). 327 * the goal of this function is to: 328 * - to spread a processes' physical pages across the entire ecache to 329 * maximize its use. 330 * - to minimize vac flushes caused when we reuse a physical page on a 331 * different vac color than it was previously used. 332 * - to prevent all processes to use the same exact colors and trash each 333 * other. 334 * 335 * cnt is a bin ptr kept on a per as basis. As we page_create we increment 336 * the ptr so we spread out the physical pages to cover the entire ecache. 337 * The virtual color is made a subset of the physical color in order to 338 * in minimize virtual cache flushing. 339 * We add in the as to spread out different as. This happens when we 340 * initialize the start count value. 341 * sizeof(struct as) is 60 so we shift by 3 to get into the bit range 342 * that will tend to change. For example, on spitfire based machines 343 * (vcshft == 1) contigous as are spread bu ~6 bins. 344 * vcshft provides for proper virtual color alignment. 345 * In theory cnt should be updated using cas only but if we are off by one 346 * or 2 it is no big deal. 347 * We also keep a start value which is used to randomize on what bin we 348 * start counting when it is time to start another loop. This avoids 349 * contigous allocations of ecache size to point to the same bin. 350 * Why 3? Seems work ok. Better than 7 or anything larger. 351 */ 352 #define PGCLR_LOOPFACTOR 3 353 354 /* 355 * When a bin is empty, and we can't satisfy a color request correctly, 356 * we scan. If we assume that the programs have reasonable spatial 357 * behavior, then it will not be a good idea to use the adjacent color. 358 * Using the adjacent color would result in virtually adjacent addresses 359 * mapping into the same spot in the cache. So, if we stumble across 360 * an empty bin, skip a bunch before looking. After the first skip, 361 * then just look one bin at a time so we don't miss our cache on 362 * every look. Be sure to check every bin. Page_create() will panic 363 * if we miss a page. 364 * 365 * This also explains the `<=' in the for loops in both page_get_freelist() 366 * and page_get_cachelist(). Since we checked the target bin, skipped 367 * a bunch, then continued one a time, we wind up checking the target bin 368 * twice to make sure we get all of them bins. 369 */ 370 #define BIN_STEP 20 371 372 #ifdef VM_STATS 373 struct vmm_vmstats_str { 374 ulong_t pc_list_add_pages[MMU_PAGE_SIZES]; 375 ulong_t pc_list_sub_pages1[MMU_PAGE_SIZES]; 376 ulong_t pc_list_sub_pages2[MMU_PAGE_SIZES]; 377 ulong_t pc_list_sub_pages3[MMU_PAGE_SIZES]; 378 ulong_t pgf_alloc[MMU_PAGE_SIZES]; 379 ulong_t pgf_allocok[MMU_PAGE_SIZES]; 380 ulong_t pgf_allocokrem[MMU_PAGE_SIZES]; 381 ulong_t pgf_allocfailed[MMU_PAGE_SIZES]; 382 ulong_t pgf_allocdeferred; 383 ulong_t pgf_allocretry[MMU_PAGE_SIZES]; 384 ulong_t pgc_alloc; 385 ulong_t pgc_allocok; 386 ulong_t pgc_allocokrem; 387 ulong_t pgc_allocokdeferred; 388 ulong_t pgc_allocfailed; 389 ulong_t pgcp_alloc[MMU_PAGE_SIZES]; 390 ulong_t pgcp_allocfailed[MMU_PAGE_SIZES]; 391 ulong_t pgcp_allocempty[MMU_PAGE_SIZES]; 392 ulong_t pgcp_allocok[MMU_PAGE_SIZES]; 393 ulong_t ptcp[MMU_PAGE_SIZES]; 394 ulong_t ptcpfreethresh[MMU_PAGE_SIZES]; 395 ulong_t ptcpfailexcl[MMU_PAGE_SIZES]; 396 ulong_t ptcpfailszc[MMU_PAGE_SIZES]; 397 ulong_t ptcpfailcage[MMU_PAGE_SIZES]; 398 ulong_t ptcpok[MMU_PAGE_SIZES]; 399 ulong_t pgmf_alloc[MMU_PAGE_SIZES]; 400 ulong_t pgmf_allocfailed[MMU_PAGE_SIZES]; 401 ulong_t pgmf_allocempty[MMU_PAGE_SIZES]; 402 ulong_t pgmf_allocok[MMU_PAGE_SIZES]; 403 ulong_t pgmc_alloc; 404 ulong_t pgmc_allocfailed; 405 ulong_t pgmc_allocempty; 406 ulong_t pgmc_allocok; 407 ulong_t ppr_reloc[MMU_PAGE_SIZES]; 408 ulong_t ppr_relocok[MMU_PAGE_SIZES]; 409 ulong_t ppr_relocnoroot[MMU_PAGE_SIZES]; 410 ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES]; 411 ulong_t ppr_relocnolock[MMU_PAGE_SIZES]; 412 ulong_t ppr_relocnomem[MMU_PAGE_SIZES]; 413 ulong_t ppr_krelocfail[MMU_PAGE_SIZES]; 414 ulong_t page_ctrs_coalesce; /* page coalesce counter */ 415 ulong_t page_ctrs_cands_skip; /* candidates useful */ 416 ulong_t page_ctrs_changed; /* ctrs changed after locking */ 417 ulong_t page_ctrs_failed; /* page_freelist_coalesce failed */ 418 ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */ 419 ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */ 420 }; 421 extern struct vmm_vmstats_str vmm_vmstats; 422 #endif /* VM_STATS */ 423 424 /* 425 * Used to hold off page relocations into the cage until OBP has completed 426 * its boot-time handoff of its resources to the kernel. 427 */ 428 extern int page_relocate_ready; 429 430 /* 431 * cpu/mmu-dependent vm variables may be reset at bootup. 432 */ 433 extern uint_t mmu_page_sizes; 434 extern uint_t max_mmu_page_sizes; 435 extern uint_t mmu_hashcnt; 436 extern uint_t max_mmu_hashcnt; 437 extern size_t mmu_ism_pagesize; 438 extern int mmu_exported_pagesize_mask; 439 extern uint_t mmu_exported_page_sizes; 440 extern uint_t szc_2_userszc[]; 441 extern uint_t userszc_2_szc[]; 442 443 #define USERSZC_2_SZC(userszc) (userszc_2_szc[userszc]) 444 #define SZC_2_USERSZC(szc) (szc_2_userszc[szc]) 445 446 /* 447 * Platform specific map_pgsz large page hook routines. 448 */ 449 extern size_t map_pgszva(struct proc *p, caddr_t addr, size_t len); 450 extern size_t map_pgszheap(struct proc *p, caddr_t addr, size_t len); 451 extern size_t map_pgszstk(struct proc *p, caddr_t addr, size_t len); 452 453 /* 454 * Platform specific page routines 455 */ 456 extern void mach_page_add(page_t **, page_t *); 457 extern void mach_page_sub(page_t **, page_t *); 458 extern uint_t page_get_pagecolors(uint_t); 459 extern void ppcopy_kernel__relocatable(page_t *, page_t *); 460 #define ppcopy_kernel(p1, p2) ppcopy_kernel__relocatable(p1, p2) 461 462 /* 463 * platform specific large pages for kernel heap support 464 */ 465 extern size_t get_segkmem_lpsize(size_t lpsize); 466 extern size_t mmu_get_kernel_lpsize(size_t lpsize); 467 extern void mmu_init_kernel_pgsz(struct hat *hat); 468 extern void mmu_init_kcontext(); 469 extern uint64_t kcontextreg; 470 471 #ifdef __cplusplus 472 } 473 #endif 474 475 #endif /* _VM_DEP_H */ 476