1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * UNIX machine dependent virtual memory support. 28 */ 29 30 #ifndef _VM_DEP_H 31 #define _VM_DEP_H 32 33 #pragma ident "%Z%%M% %I% %E% SMI" 34 35 #ifdef __cplusplus 36 extern "C" { 37 #endif 38 39 #include <vm/hat_sfmmu.h> 40 #include <sys/archsystm.h> 41 #include <sys/memnode.h> 42 43 #define GETTICK() gettick() 44 45 /* 46 * Per page size free lists. Allocated dynamically. 47 */ 48 #define MAX_MEM_TYPES 2 /* 0 = reloc, 1 = noreloc */ 49 #define MTYPE_RELOC 0 50 #define MTYPE_NORELOC 1 51 52 #define PP_2_MTYPE(pp) (PP_ISNORELOC(pp) ? MTYPE_NORELOC : MTYPE_RELOC) 53 54 #define MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) \ 55 mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC; 56 57 /* mtype init for page_get_replacement_page */ 58 #define MTYPE_PGR_INIT(mtype, flags, pp, mnode, pgcnt) \ 59 mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC; 60 61 #define MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi) \ 62 ASSERT(mtype != MTYPE_NORELOC); \ 63 pfnlo = mem_node_config[mnode].physbase; \ 64 pfnhi = mem_node_config[mnode].physmax; 65 66 /* 67 * candidate counters in vm_pagelist.c are indexed by color and range 68 */ 69 #define MAX_MNODE_MRANGES MAX_MEM_TYPES 70 #define MNODE_RANGE_CNT(mnode) MAX_MNODE_MRANGES 71 #define MNODE_MAX_MRANGE(mnode) (MAX_MEM_TYPES - 1) 72 #define MTYPE_2_MRANGE(mnode, mtype) (mtype) 73 74 /* 75 * Internal PG_ flags. 76 */ 77 #define PGI_RELOCONLY 0x10000 /* acts in the opposite sense to PG_NORELOC */ 78 #define PGI_NOCAGE 0x20000 /* indicates Cage is disabled */ 79 #define PGI_PGCPHIPRI 0x40000 /* page_get_contig_page priority allocation */ 80 #define PGI_PGCPSZC0 0x80000 /* relocate base pagesize page */ 81 82 /* 83 * PGI mtype flags - should not overlap PGI flags 84 */ 85 #define PGI_MT_RANGE 0x1000000 /* mtype range */ 86 #define PGI_MT_NEXT 0x2000000 /* get next mtype */ 87 88 extern page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES]; 89 extern page_t ***page_cachelists[MAX_MEM_TYPES]; 90 91 #define PAGE_FREELISTS(mnode, szc, color, mtype) \ 92 (*(page_freelists[szc][mtype][mnode] + (color))) 93 94 #define PAGE_CACHELISTS(mnode, color, mtype) \ 95 (*(page_cachelists[mtype][mnode] + (color))) 96 97 /* 98 * There are 'page_colors' colors/bins. Spread them out under a 99 * couple of locks. There are mutexes for both the page freelist 100 * and the page cachelist. We want enough locks to make contention 101 * reasonable, but not too many -- otherwise page_freelist_lock() gets 102 * so expensive that it becomes the bottleneck! 103 */ 104 #define NPC_MUTEX 16 105 106 extern kmutex_t *fpc_mutex[NPC_MUTEX]; 107 extern kmutex_t *cpc_mutex[NPC_MUTEX]; 108 109 /* 110 * cpu specific color conversion functions 111 */ 112 extern uint_t page_get_nsz_color_mask_cpu(uchar_t, uint_t); 113 #pragma weak page_get_nsz_color_mask_cpu 114 115 extern uint_t page_get_nsz_color_cpu(uchar_t, uint_t); 116 #pragma weak page_get_nsz_color_cpu 117 118 extern uint_t page_get_color_shift_cpu(uchar_t, uchar_t); 119 #pragma weak page_get_color_shift_cpu 120 121 extern pfn_t page_next_pfn_for_color_cpu(pfn_t, 122 uchar_t, uint_t, uint_t, uint_t); 123 #pragma weak page_next_pfn_for_color_cpu 124 125 extern uint_t page_pfn_2_color_cpu(pfn_t, uchar_t); 126 #pragma weak page_pfn_2_color_cpu 127 128 #define PAGE_GET_COLOR_SHIFT(szc, nszc) \ 129 ((&page_get_color_shift_cpu != NULL) ? \ 130 page_get_color_shift_cpu(szc, nszc) : \ 131 (hw_page_array[(nszc)].hp_shift - \ 132 hw_page_array[(szc)].hp_shift)) 133 134 #define PFN_2_COLOR(pfn, szc) \ 135 ((&page_pfn_2_color_cpu != NULL) ? \ 136 page_pfn_2_color_cpu(pfn, szc) : \ 137 ((pfn & (hw_page_array[0].hp_colors - 1)) >> \ 138 (hw_page_array[szc].hp_shift - \ 139 hw_page_array[0].hp_shift))) 140 141 #define PNUM_SIZE(szc) \ 142 (hw_page_array[(szc)].hp_pgcnt) 143 #define PNUM_SHIFT(szc) \ 144 (hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift) 145 #define PAGE_GET_SHIFT(szc) \ 146 (hw_page_array[(szc)].hp_shift) 147 #define PAGE_GET_PAGECOLORS(szc) \ 148 (hw_page_array[(szc)].hp_colors) 149 150 /* 151 * This macro calculates the next sequential pfn with the specified 152 * color using color equivalency mask 153 */ 154 #define PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask) \ 155 ASSERT(((color) & ~(ceq_mask)) == 0); \ 156 if (&page_next_pfn_for_color_cpu == NULL) { \ 157 uint_t pfn_shift = PAGE_BSZS_SHIFT(szc); \ 158 pfn_t spfn = pfn >> pfn_shift; \ 159 pfn_t stride = (ceq_mask) + 1; \ 160 ASSERT((((ceq_mask) + 1) & (ceq_mask)) == 0); \ 161 if (((spfn ^ (color)) & (ceq_mask)) == 0) { \ 162 pfn += stride << pfn_shift; \ 163 } else { \ 164 pfn = (spfn & ~(pfn_t)(ceq_mask)) | (color); \ 165 pfn = (pfn > spfn ? pfn : pfn + stride) << pfn_shift; \ 166 } \ 167 } else { \ 168 pfn = page_next_pfn_for_color_cpu(pfn, szc, color, \ 169 ceq_mask, color_mask); \ 170 } 171 172 /* get the color equivalency mask for the next szc */ 173 #define PAGE_GET_NSZ_MASK(szc, mask) \ 174 ((&page_get_nsz_color_mask_cpu == NULL) ? \ 175 ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) : \ 176 page_get_nsz_color_mask_cpu(szc, mask)) 177 178 /* get the color of the next szc */ 179 #define PAGE_GET_NSZ_COLOR(szc, color) \ 180 ((&page_get_nsz_color_cpu == NULL) ? \ 181 ((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) : \ 182 page_get_nsz_color_cpu(szc, color)) 183 184 /* Find the bin for the given page if it was of size szc */ 185 #define PP_2_BIN_SZC(pp, szc) (PFN_2_COLOR(pp->p_pagenum, szc)) 186 187 #define PP_2_BIN(pp) (PP_2_BIN_SZC(pp, pp->p_szc)) 188 189 #define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum)) 190 191 #define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \ 192 &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \ 193 &cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) 194 195 #define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode]) 196 #define CPC_MUTEX(mnode, i) (&cpc_mutex[i][mnode]) 197 198 #define PFN_BASE(pfnum, szc) (pfnum & ~((1 << PAGE_BSZS_SHIFT(szc)) - 1)) 199 200 /* 201 * this structure is used for walking free page lists 202 * controls when to split large pages into smaller pages, 203 * and when to coalesce smaller pages into larger pages 204 */ 205 typedef struct page_list_walker { 206 uint_t plw_colors; /* num of colors for szc */ 207 uint_t plw_color_mask; /* colors-1 */ 208 uint_t plw_bin_step; /* next bin: 1 or 2 */ 209 uint_t plw_count; /* loop count */ 210 uint_t plw_bin0; /* starting bin */ 211 uint_t plw_bin_marker; /* bin after initial jump */ 212 uint_t plw_bin_split_prev; /* last bin we tried to split */ 213 uint_t plw_do_split; /* set if OK to split */ 214 uint_t plw_split_next; /* next bin to split */ 215 uint_t plw_ceq_dif; /* number of different color groups */ 216 /* to check */ 217 uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */ 218 uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */ 219 } page_list_walker_t; 220 221 void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin, 222 int can_split, int use_ceq, page_list_walker_t *plw); 223 224 typedef char hpmctr_t; 225 226 #ifdef DEBUG 227 #define CHK_LPG(pp, szc) chk_lpg(pp, szc) 228 extern void chk_lpg(page_t *, uchar_t); 229 #else 230 #define CHK_LPG(pp, szc) 231 #endif 232 233 /* 234 * page list count per mnode and type. 235 */ 236 typedef struct { 237 pgcnt_t plc_mt_pgmax; /* max page cnt */ 238 pgcnt_t plc_mt_clpgcnt; /* cache list cnt */ 239 pgcnt_t plc_mt_flpgcnt; /* free list cnt - small pages */ 240 pgcnt_t plc_mt_lgpgcnt; /* free list cnt - large pages */ 241 #ifdef DEBUG 242 struct { 243 pgcnt_t plc_mts_pgcnt; /* per page size count */ 244 int plc_mts_colors; 245 pgcnt_t *plc_mtsc_pgcnt; /* per color bin count */ 246 } plc_mts[MMU_PAGE_SIZES]; 247 #endif 248 } plcnt_t[MAX_MEM_NODES][MAX_MEM_TYPES]; 249 250 #ifdef DEBUG 251 252 #define PLCNT_SZ(ctrs_sz) { \ 253 int szc; \ 254 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 255 int colors = page_get_pagecolors(szc); \ 256 ctrs_sz += (max_mem_nodes * MAX_MEM_TYPES * \ 257 colors * sizeof (pgcnt_t)); \ 258 } \ 259 } 260 261 #define PLCNT_INIT(base) { \ 262 int mn, mt, szc, colors; \ 263 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 264 colors = page_get_pagecolors(szc); \ 265 for (mn = 0; mn < max_mem_nodes; mn++) { \ 266 for (mt = 0; mt < MAX_MEM_TYPES; mt++) { \ 267 plcnt[mn][mt].plc_mts[szc]. \ 268 plc_mts_colors = colors; \ 269 plcnt[mn][mt].plc_mts[szc]. \ 270 plc_mtsc_pgcnt = (pgcnt_t *)base; \ 271 base += (colors * sizeof (pgcnt_t)); \ 272 } \ 273 } \ 274 } \ 275 } 276 277 #define PLCNT_DO(pp, mn, mtype, szc, cnt, flags) { \ 278 int bin = PP_2_BIN(pp); \ 279 if (flags & PG_CACHE_LIST) \ 280 atomic_add_long(&plcnt[mn][mtype].plc_mt_clpgcnt, cnt); \ 281 else if (szc) \ 282 atomic_add_long(&plcnt[mn][mtype].plc_mt_lgpgcnt, cnt); \ 283 else \ 284 atomic_add_long(&plcnt[mn][mtype].plc_mt_flpgcnt, cnt); \ 285 atomic_add_long(&plcnt[mn][mtype].plc_mts[szc].plc_mts_pgcnt, \ 286 cnt); \ 287 atomic_add_long(&plcnt[mn][mtype].plc_mts[szc]. \ 288 plc_mtsc_pgcnt[bin], cnt); \ 289 } 290 291 #else 292 293 #define PLCNT_SZ(ctrs_sz) 294 295 #define PLCNT_INIT(base) 296 297 /* PG_FREE_LIST may not be explicitly set in flags for large pages */ 298 299 #define PLCNT_DO(pp, mn, mtype, szc, cnt, flags) { \ 300 if (flags & PG_CACHE_LIST) \ 301 atomic_add_long(&plcnt[mn][mtype].plc_mt_clpgcnt, cnt); \ 302 else if (szc) \ 303 atomic_add_long(&plcnt[mn][mtype].plc_mt_lgpgcnt, cnt); \ 304 else \ 305 atomic_add_long(&plcnt[mn][mtype].plc_mt_flpgcnt, cnt); \ 306 } 307 308 #endif 309 310 #define PLCNT_INCR(pp, mn, mtype, szc, flags) { \ 311 long cnt = (1 << PAGE_BSZS_SHIFT(szc)); \ 312 PLCNT_DO(pp, mn, mtype, szc, cnt, flags); \ 313 } 314 315 #define PLCNT_DECR(pp, mn, mtype, szc, flags) { \ 316 long cnt = ((-1) << PAGE_BSZS_SHIFT(szc)); \ 317 PLCNT_DO(pp, mn, mtype, szc, cnt, flags); \ 318 } 319 320 /* 321 * macros to update page list max counts - done when pages transferred 322 * from RELOC to NORELOC mtype (kcage_init or kcage_assimilate_page). 323 */ 324 325 #define PLCNT_XFER_NORELOC(pp) { \ 326 long cnt = (1 << PAGE_BSZS_SHIFT((pp)->p_szc)); \ 327 int mn = PP_2_MEM_NODE(pp); \ 328 atomic_add_long(&plcnt[mn][MTYPE_NORELOC].plc_mt_pgmax, cnt); \ 329 atomic_add_long(&plcnt[mn][MTYPE_RELOC].plc_mt_pgmax, -cnt); \ 330 } 331 332 /* 333 * macro to modify the page list max counts when memory is added to 334 * the page lists during startup (add_physmem) or during a DR operation 335 * when memory is added (kphysm_add_memory_dynamic) or deleted 336 * (kphysm_del_cleanup). 337 */ 338 #define PLCNT_MODIFY_MAX(pfn, cnt) { \ 339 int mn = PFN_2_MEM_NODE(pfn); \ 340 atomic_add_long(&plcnt[mn][MTYPE_RELOC].plc_mt_pgmax, (cnt)); \ 341 } 342 343 extern plcnt_t plcnt; 344 345 #define MNODE_PGCNT(mn) \ 346 (plcnt[mn][MTYPE_RELOC].plc_mt_clpgcnt + \ 347 plcnt[mn][MTYPE_NORELOC].plc_mt_clpgcnt + \ 348 plcnt[mn][MTYPE_RELOC].plc_mt_flpgcnt + \ 349 plcnt[mn][MTYPE_NORELOC].plc_mt_flpgcnt + \ 350 plcnt[mn][MTYPE_RELOC].plc_mt_lgpgcnt + \ 351 plcnt[mn][MTYPE_NORELOC].plc_mt_lgpgcnt) 352 353 #define MNODETYPE_PGCNT(mn, mtype) \ 354 (plcnt[mn][mtype].plc_mt_clpgcnt + \ 355 plcnt[mn][mtype].plc_mt_flpgcnt + \ 356 plcnt[mn][mtype].plc_mt_lgpgcnt) 357 358 /* 359 * macros to loop through the mtype range - MTYPE_START returns -1 in 360 * mtype if no pages in mnode/mtype and possibly NEXT mtype. 361 */ 362 #define MTYPE_START(mnode, mtype, flags) { \ 363 if (plcnt[mnode][mtype].plc_mt_pgmax == 0) { \ 364 ASSERT(MNODETYPE_PGCNT(mnode, mtype) == 0); \ 365 MTYPE_NEXT(mnode, mtype, flags); \ 366 } \ 367 } 368 369 /* 370 * if allocation from the RELOC pool failed and there is sufficient cage 371 * memory, attempt to allocate from the NORELOC pool. 372 */ 373 #define MTYPE_NEXT(mnode, mtype, flags) { \ 374 if (!(flags & (PG_NORELOC | PGI_NOCAGE | PGI_RELOCONLY)) && \ 375 (kcage_freemem >= kcage_lotsfree)) { \ 376 if (plcnt[mnode][MTYPE_NORELOC].plc_mt_pgmax == 0) { \ 377 ASSERT(MNODETYPE_PGCNT(mnode, MTYPE_NORELOC) == 0); \ 378 mtype = -1; \ 379 } else { \ 380 mtype = MTYPE_NORELOC; \ 381 flags |= PG_NORELOC; \ 382 } \ 383 } else { \ 384 mtype = -1; \ 385 } \ 386 } 387 388 /* 389 * get the ecache setsize for the current cpu. 390 */ 391 #define CPUSETSIZE() (cpunodes[CPU->cpu_id].ecache_setsize) 392 #define CPUASSOC() (cpunodes[CPU->cpu_id].ecache_associativity) 393 394 extern struct cpu cpu0; 395 #define CPU0 &cpu0 396 397 #define PAGE_BSZS_SHIFT(szc) TTE_BSZS_SHIFT(szc) 398 /* 399 * For sfmmu each larger page is 8 times the size of the previous 400 * size page. 401 */ 402 #define FULL_REGION_CNT(rg_szc) (8) 403 404 /* 405 * The counter base must be per page_counter element to prevent 406 * races when re-indexing, and the base page size element should 407 * be aligned on a boundary of the given region size. 408 * 409 * We also round up the number of pages spanned by the counters 410 * for a given region to PC_BASE_ALIGN in certain situations to simplify 411 * the coding for some non-performance critical routines. 412 */ 413 #define PC_BASE_ALIGN ((pfn_t)1 << PAGE_BSZS_SHIFT(mmu_page_sizes-1)) 414 #define PC_BASE_ALIGN_MASK (PC_BASE_ALIGN - 1) 415 416 extern int ecache_alignsize; 417 #define L2CACHE_ALIGN ecache_alignsize 418 #define L2CACHE_ALIGN_MAX 512 419 420 extern int consistent_coloring; 421 extern uint_t vac_colors_mask; 422 extern int vac_size; 423 extern int vac_shift; 424 425 /* 426 * Auto large page selection support variables. Some CPU 427 * implementations may differ from the defaults and will need 428 * to change these. 429 */ 430 extern int auto_lpg_tlb_threshold; 431 extern int auto_lpg_minszc; 432 extern int auto_lpg_maxszc; 433 extern size_t auto_lpg_heap_default; 434 extern size_t auto_lpg_stack_default; 435 extern size_t auto_lpg_va_default; 436 extern size_t auto_lpg_remap_threshold; 437 extern pgcnt_t auto_lpg_min_physmem; 438 439 /* 440 * AS_2_BIN macro controls the page coloring policy. 441 * 0 (default) uses various vaddr bits 442 * 1 virtual=paddr 443 * 2 bin hopping 444 */ 445 #define AS_2_BIN(as, seg, vp, addr, bin, szc) \ 446 switch (consistent_coloring) { \ 447 default: \ 448 cmn_err(CE_WARN, \ 449 "AS_2_BIN: bad consistent coloring value"); \ 450 /* assume default algorithm -> continue */ \ 451 case 0: { \ 452 uint32_t ndx, new; \ 453 int slew = 0; \ 454 pfn_t pfn; \ 455 \ 456 if (vp != NULL && IS_SWAPVP(vp) && \ 457 seg->s_ops == &segvn_ops) \ 458 slew = as_color_bin(as); \ 459 \ 460 pfn = ((uintptr_t)addr >> MMU_PAGESHIFT) + \ 461 (((uintptr_t)addr >> page_coloring_shift) << \ 462 (vac_shift - MMU_PAGESHIFT)); \ 463 if ((szc) == 0 || \ 464 (szc == 1 && &page_pfn_2_color_cpu == NULL && \ 465 CPUASSOC() > PNUM_SIZE(1))) { \ 466 pfn += slew; \ 467 bin = PFN_2_COLOR(pfn, szc); \ 468 } else { \ 469 bin = PFN_2_COLOR(pfn, szc); \ 470 bin += slew >> (vac_shift - MMU_PAGESHIFT); \ 471 bin &= hw_page_array[(szc)].hp_colors - 1; \ 472 } \ 473 break; \ 474 } \ 475 case 1: \ 476 bin = PFN_2_COLOR(((uintptr_t)addr >> MMU_PAGESHIFT), \ 477 szc); \ 478 break; \ 479 case 2: { \ 480 int cnt = as_color_bin(as); \ 481 uint_t color_mask = page_get_pagecolors(0) - 1; \ 482 \ 483 /* make sure physical color aligns with vac color */ \ 484 while ((cnt & vac_colors_mask) != \ 485 addr_to_vcolor(addr)) { \ 486 cnt++; \ 487 } \ 488 bin = cnt = cnt & color_mask; \ 489 bin >>= PAGE_GET_COLOR_SHIFT(0, szc); \ 490 /* update per as page coloring fields */ \ 491 cnt = (cnt + 1) & color_mask; \ 492 if (cnt == (as_color_start(as) & color_mask)) { \ 493 cnt = as_color_start(as) = as_color_start(as) + \ 494 PGCLR_LOOPFACTOR; \ 495 } \ 496 as_color_bin(as) = cnt & color_mask; \ 497 break; \ 498 } \ 499 } \ 500 ASSERT(bin < page_get_pagecolors(szc)); 501 502 /* 503 * cpu private vm data - accessed thru CPU->cpu_vm_data 504 * vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock() 505 * vc_pnext_memseg: tracks last memseg visited in page_nextn() 506 * vc_kmptr: unaligned kmem pointer for this vm_cpu_data_t 507 * vc_kmsize: orignal kmem size for this vm_cpu_data_t 508 */ 509 510 typedef struct { 511 struct memseg *vc_pnum_memseg; 512 struct memseg *vc_pnext_memseg; 513 void *vc_kmptr; 514 size_t vc_kmsize; 515 } vm_cpu_data_t; 516 517 /* allocation size to ensure vm_cpu_data_t resides in its own cache line */ 518 #define VM_CPU_DATA_PADSIZE \ 519 (P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX)) 520 521 /* for boot cpu before kmem is initialized */ 522 extern char vm_cpu_data0[]; 523 524 /* 525 * Function to get an ecache color bin: F(as, cnt, vcolor). 526 * the goal of this function is to: 527 * - to spread a processes' physical pages across the entire ecache to 528 * maximize its use. 529 * - to minimize vac flushes caused when we reuse a physical page on a 530 * different vac color than it was previously used. 531 * - to prevent all processes to use the same exact colors and trash each 532 * other. 533 * 534 * cnt is a bin ptr kept on a per as basis. As we page_create we increment 535 * the ptr so we spread out the physical pages to cover the entire ecache. 536 * The virtual color is made a subset of the physical color in order to 537 * in minimize virtual cache flushing. 538 * We add in the as to spread out different as. This happens when we 539 * initialize the start count value. 540 * sizeof(struct as) is 60 so we shift by 3 to get into the bit range 541 * that will tend to change. For example, on spitfire based machines 542 * (vcshft == 1) contigous as are spread bu ~6 bins. 543 * vcshft provides for proper virtual color alignment. 544 * In theory cnt should be updated using cas only but if we are off by one 545 * or 2 it is no big deal. 546 * We also keep a start value which is used to randomize on what bin we 547 * start counting when it is time to start another loop. This avoids 548 * contigous allocations of ecache size to point to the same bin. 549 * Why 3? Seems work ok. Better than 7 or anything larger. 550 */ 551 #define PGCLR_LOOPFACTOR 3 552 553 /* 554 * When a bin is empty, and we can't satisfy a color request correctly, 555 * we scan. If we assume that the programs have reasonable spatial 556 * behavior, then it will not be a good idea to use the adjacent color. 557 * Using the adjacent color would result in virtually adjacent addresses 558 * mapping into the same spot in the cache. So, if we stumble across 559 * an empty bin, skip a bunch before looking. After the first skip, 560 * then just look one bin at a time so we don't miss our cache on 561 * every look. Be sure to check every bin. Page_create() will panic 562 * if we miss a page. 563 * 564 * This also explains the `<=' in the for loops in both page_get_freelist() 565 * and page_get_cachelist(). Since we checked the target bin, skipped 566 * a bunch, then continued one a time, we wind up checking the target bin 567 * twice to make sure we get all of them bins. 568 */ 569 #define BIN_STEP 20 570 571 #ifdef VM_STATS 572 struct vmm_vmstats_str { 573 ulong_t pgf_alloc[MMU_PAGE_SIZES]; /* page_get_freelist */ 574 ulong_t pgf_allocok[MMU_PAGE_SIZES]; 575 ulong_t pgf_allocokrem[MMU_PAGE_SIZES]; 576 ulong_t pgf_allocfailed[MMU_PAGE_SIZES]; 577 ulong_t pgf_allocdeferred; 578 ulong_t pgf_allocretry[MMU_PAGE_SIZES]; 579 ulong_t pgc_alloc; /* page_get_cachelist */ 580 ulong_t pgc_allocok; 581 ulong_t pgc_allocokrem; 582 ulong_t pgc_allocokdeferred; 583 ulong_t pgc_allocfailed; 584 ulong_t pgcp_alloc[MMU_PAGE_SIZES]; /* page_get_contig_pages */ 585 ulong_t pgcp_allocfailed[MMU_PAGE_SIZES]; 586 ulong_t pgcp_allocempty[MMU_PAGE_SIZES]; 587 ulong_t pgcp_allocok[MMU_PAGE_SIZES]; 588 ulong_t ptcp[MMU_PAGE_SIZES]; /* page_trylock_contig_pages */ 589 ulong_t ptcpfreethresh[MMU_PAGE_SIZES]; 590 ulong_t ptcpfailexcl[MMU_PAGE_SIZES]; 591 ulong_t ptcpfailszc[MMU_PAGE_SIZES]; 592 ulong_t ptcpfailcage[MMU_PAGE_SIZES]; 593 ulong_t ptcpok[MMU_PAGE_SIZES]; 594 ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */ 595 ulong_t pgmf_allocfailed[MMU_PAGE_SIZES]; 596 ulong_t pgmf_allocempty[MMU_PAGE_SIZES]; 597 ulong_t pgmf_allocok[MMU_PAGE_SIZES]; 598 ulong_t pgmc_alloc; /* page_get_mnode_cachelist */ 599 ulong_t pgmc_allocfailed; 600 ulong_t pgmc_allocempty; 601 ulong_t pgmc_allocok; 602 ulong_t pladd_free[MMU_PAGE_SIZES]; /* page_list_add/sub */ 603 ulong_t plsub_free[MMU_PAGE_SIZES]; 604 ulong_t pladd_cache; 605 ulong_t plsub_cache; 606 ulong_t plsubpages_szcbig; 607 ulong_t plsubpages_szc0; 608 ulong_t pfs_req[MMU_PAGE_SIZES]; /* page_freelist_split */ 609 ulong_t pfs_demote[MMU_PAGE_SIZES]; 610 ulong_t pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 611 ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */ 612 ulong_t ppr_relocok[MMU_PAGE_SIZES]; 613 ulong_t ppr_relocnoroot[MMU_PAGE_SIZES]; 614 ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES]; 615 ulong_t ppr_relocnolock[MMU_PAGE_SIZES]; 616 ulong_t ppr_relocnomem[MMU_PAGE_SIZES]; 617 ulong_t ppr_krelocfail[MMU_PAGE_SIZES]; 618 /* page coalesce counter */ 619 ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 620 /* candidates useful */ 621 ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 622 /* ctrs changed after locking */ 623 ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 624 /* page_freelist_coalesce failed */ 625 ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 626 ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */ 627 ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */ 628 }; 629 extern struct vmm_vmstats_str vmm_vmstats; 630 #endif /* VM_STATS */ 631 632 /* 633 * Used to hold off page relocations into the cage until OBP has completed 634 * its boot-time handoff of its resources to the kernel. 635 */ 636 extern int page_relocate_ready; 637 638 /* 639 * cpu/mmu-dependent vm variables may be reset at bootup. 640 */ 641 extern uint_t mmu_page_sizes; 642 extern uint_t max_mmu_page_sizes; 643 extern uint_t mmu_hashcnt; 644 extern uint_t max_mmu_hashcnt; 645 extern size_t mmu_ism_pagesize; 646 extern int mmu_exported_pagesize_mask; 647 extern uint_t mmu_exported_page_sizes; 648 extern uint_t szc_2_userszc[]; 649 extern uint_t userszc_2_szc[]; 650 651 #define USERSZC_2_SZC(userszc) (userszc_2_szc[userszc]) 652 #define SZC_2_USERSZC(szc) (szc_2_userszc[szc]) 653 654 /* 655 * Platform specific page routines 656 */ 657 extern void mach_page_add(page_t **, page_t *); 658 extern void mach_page_sub(page_t **, page_t *); 659 extern uint_t page_get_pagecolors(uint_t); 660 extern void ppcopy_kernel__relocatable(page_t *, page_t *); 661 #define ppcopy_kernel(p1, p2) ppcopy_kernel__relocatable(p1, p2) 662 663 /* 664 * platform specific large pages for kernel heap support 665 */ 666 extern size_t get_segkmem_lpsize(size_t lpsize); 667 extern size_t mmu_get_kernel_lpsize(size_t lpsize); 668 extern void mmu_init_kernel_pgsz(struct hat *hat); 669 extern void mmu_init_kcontext(); 670 extern uint64_t kcontextreg; 671 672 #ifdef __cplusplus 673 } 674 #endif 675 676 #endif /* _VM_DEP_H */ 677