1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * UNIX machine dependent virtual memory support. 28 */ 29 30 #ifndef _VM_DEP_H 31 #define _VM_DEP_H 32 33 #pragma ident "%Z%%M% %I% %E% SMI" 34 35 #ifdef __cplusplus 36 extern "C" { 37 #endif 38 39 #include <sys/clock.h> 40 #include <vm/hat_pte.h> 41 42 /* 43 * WARNING: vm_dep.h is included by files in common. As such, macros 44 * dependent upon PTE36 such as LARGEPAGESIZE cannot be used in this file. 45 */ 46 47 #define GETTICK() tsc_read() 48 49 /* memranges in descending order */ 50 extern pfn_t *memranges; 51 52 #define MEMRANGEHI(mtype) \ 53 ((mtype > 0) ? memranges[mtype - 1] - 1: physmax) 54 #define MEMRANGELO(mtype) (memranges[mtype]) 55 56 #define MTYPE_FREEMEM(mt) \ 57 (mnoderanges[mt].mnr_mt_clpgcnt + \ 58 mnoderanges[mt].mnr_mt_flpgcnt + \ 59 mnoderanges[mt].mnr_mt_lgpgcnt) 60 61 /* 62 * combined memory ranges from mnode and memranges[] to manage single 63 * mnode/mtype dimension in the page lists. 64 */ 65 typedef struct { 66 pfn_t mnr_pfnlo; 67 pfn_t mnr_pfnhi; 68 int mnr_mnode; 69 int mnr_memrange; /* index into memranges[] */ 70 /* maintain page list stats */ 71 pgcnt_t mnr_mt_pgmax; /* mnode/mtype max page cnt */ 72 pgcnt_t mnr_mt_clpgcnt; /* cache list cnt */ 73 pgcnt_t mnr_mt_flpgcnt; /* free list cnt - small pages */ 74 pgcnt_t mnr_mt_lgpgcnt; /* free list cnt - large pages */ 75 #ifdef DEBUG 76 struct mnr_mts { /* mnode/mtype szc stats */ 77 pgcnt_t mnr_mts_pgcnt; 78 int mnr_mts_colors; 79 pgcnt_t *mnr_mtsc_pgcnt; 80 } *mnr_mts; 81 #endif 82 } mnoderange_t; 83 84 #ifdef DEBUG 85 #define PLCNT_SZ(ctrs_sz) { \ 86 int szc, colors; \ 87 ctrs_sz += mnoderangecnt * sizeof (struct mnr_mts) * \ 88 mmu_page_sizes; \ 89 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 90 colors = page_get_pagecolors(szc); \ 91 ctrs_sz += mnoderangecnt * sizeof (pgcnt_t) * colors; \ 92 } \ 93 } 94 95 #define PLCNT_INIT(addr) { \ 96 int mt, szc, colors; \ 97 for (mt = 0; mt < mnoderangecnt; mt++) { \ 98 mnoderanges[mt].mnr_mts = (struct mnr_mts *)addr; \ 99 addr += (sizeof (struct mnr_mts) * mmu_page_sizes); \ 100 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 101 colors = page_get_pagecolors(szc); \ 102 mnoderanges[mt].mnr_mts[szc].mnr_mts_colors = \ 103 colors; \ 104 mnoderanges[mt].mnr_mts[szc].mnr_mtsc_pgcnt = \ 105 (pgcnt_t *)addr; \ 106 addr += (sizeof (pgcnt_t) * colors); \ 107 } \ 108 } \ 109 } 110 #define PLCNT_DO(pp, mtype, szc, cnt, flags) { \ 111 int bin = PP_2_BIN(pp); \ 112 if (flags & PG_CACHE_LIST) \ 113 atomic_add_long(&mnoderanges[mtype]. \ 114 mnr_mt_clpgcnt, cnt); \ 115 else if (szc) \ 116 atomic_add_long(&mnoderanges[mtype]. \ 117 mnr_mt_lgpgcnt, cnt); \ 118 else \ 119 atomic_add_long(&mnoderanges[mtype]. \ 120 mnr_mt_flpgcnt, cnt); \ 121 atomic_add_long(&mnoderanges[mtype].mnr_mts[szc]. \ 122 mnr_mts_pgcnt, cnt); \ 123 atomic_add_long(&mnoderanges[mtype].mnr_mts[szc]. \ 124 mnr_mtsc_pgcnt[bin], cnt); \ 125 } 126 #else 127 #define PLCNT_SZ(ctrs_sz) 128 #define PLCNT_INIT(base) 129 #define PLCNT_DO(pp, mtype, szc, cnt, flags) { \ 130 if (flags & PG_CACHE_LIST) \ 131 atomic_add_long(&mnoderanges[mtype]. \ 132 mnr_mt_clpgcnt, cnt); \ 133 else if (szc) \ 134 atomic_add_long(&mnoderanges[mtype]. \ 135 mnr_mt_lgpgcnt, cnt); \ 136 else \ 137 atomic_add_long(&mnoderanges[mtype]. \ 138 mnr_mt_flpgcnt, cnt); \ 139 } 140 #endif 141 142 #define PLCNT_INCR(pp, mnode, mtype, szc, flags) { \ 143 long cnt = (1 << PAGE_BSZS_SHIFT(szc)); \ 144 ASSERT(mtype == PP_2_MTYPE(pp)); \ 145 if (physmax4g && mtype <= mtype4g) \ 146 atomic_add_long(&freemem4g, cnt); \ 147 PLCNT_DO(pp, mtype, szc, cnt, flags); \ 148 } 149 150 #define PLCNT_DECR(pp, mnode, mtype, szc, flags) { \ 151 long cnt = ((-1) << PAGE_BSZS_SHIFT(szc)); \ 152 ASSERT(mtype == PP_2_MTYPE(pp)); \ 153 if (physmax4g && mtype <= mtype4g) \ 154 atomic_add_long(&freemem4g, cnt); \ 155 PLCNT_DO(pp, mtype, szc, cnt, flags); \ 156 } 157 158 /* 159 * macros to update page list max counts. no-op on x86. 160 */ 161 #define PLCNT_XFER_NORELOC(pp) 162 163 #define PLCNT_MODIFY_MAX(pfn, cnt) mtype_modify_max(pfn, (pgcnt_t)cnt) 164 165 extern mnoderange_t *mnoderanges; 166 extern int mnoderangecnt; 167 extern int mtype4g; 168 169 /* 170 * 4g memory management variables for systems with more than 4g of memory: 171 * 172 * physical memory below 4g is required for 32bit dma devices and, currently, 173 * for kmem memory. On systems with more than 4g of memory, the pool of memory 174 * below 4g can be depleted without any paging activity given that there is 175 * likely to be sufficient memory above 4g. 176 * 177 * physmax4g is set true if the largest pfn is over 4g. The rest of the 178 * 4g memory management code is enabled only when physmax4g is true. 179 * 180 * maxmem4g is the count of the maximum number of pages on the page lists 181 * with physical addresses below 4g. It can be a lot less then 4g given that 182 * BIOS may reserve large chunks of space below 4g for hot plug pci devices, 183 * agp aperture etc. 184 * 185 * freemem4g maintains the count of the number of available pages on the 186 * page lists with physical addresses below 4g. 187 * 188 * DESFREE4G specifies the desired amount of below 4g memory. It defaults to 189 * 6% (desfree4gshift = 4) of maxmem4g. 190 * 191 * RESTRICT4G_ALLOC returns true if freemem4g falls below DESFREE4G 192 * and the amount of physical memory above 4g is greater than freemem4g. 193 * In this case, page_get_* routines will restrict below 4g allocations 194 * for requests that don't specifically require it. 195 */ 196 197 extern int physmax4g; 198 extern pgcnt_t maxmem4g; 199 extern pgcnt_t freemem4g; 200 extern int lotsfree4gshift; 201 extern int desfree4gshift; 202 #define LOTSFREE4G (maxmem4g >> lotsfree4gshift) 203 #define DESFREE4G (maxmem4g >> desfree4gshift) 204 205 #define RESTRICT4G_ALLOC \ 206 (physmax4g && (freemem4g < DESFREE4G) && ((freemem4g << 1) < freemem)) 207 208 /* 209 * 16m memory management: 210 * 211 * reserve some amount of physical memory below 16m for legacy devices. 212 * 213 * RESTRICT16M_ALLOC returns true if an there are sufficient free pages above 214 * 16m or if the 16m pool drops below DESFREE16M. 215 * 216 * In this case, general page allocations via page_get_{free,cache}list 217 * routines will be restricted from allocating from the 16m pool. Allocations 218 * that require specific pfn ranges (page_get_anylist) and PG_PANIC allocations 219 * are not restricted. 220 */ 221 222 #define FREEMEM16M MTYPE_FREEMEM(0) 223 #define DESFREE16M desfree16m 224 #define RESTRICT16M_ALLOC(freemem, pgcnt, flags) \ 225 ((freemem != 0) && ((flags & PG_PANIC) == 0) && \ 226 ((freemem >= (FREEMEM16M)) || \ 227 (FREEMEM16M < (DESFREE16M + pgcnt)))) 228 extern pgcnt_t desfree16m; 229 230 extern int restricted_kmemalloc; 231 extern int memrange_num(pfn_t); 232 extern int pfn_2_mtype(pfn_t); 233 extern int mtype_func(int, int, uint_t); 234 extern void mtype_modify_max(pfn_t, long); 235 extern int mnode_pgcnt(int); 236 extern int mnode_range_cnt(int); 237 238 #define NUM_MEM_RANGES 4 /* memory range types */ 239 240 /* 241 * candidate counters in vm_pagelist.c are indexed by color and range 242 */ 243 #define MAX_MNODE_MRANGES NUM_MEM_RANGES 244 #define MNODE_RANGE_CNT(mnode) mnode_range_cnt(mnode) 245 #define MNODE_MAX_MRANGE(mnode) (memrange_num(mem_node_config[mnode].physbase)) 246 #define MTYPE_2_MRANGE(mnode, mtype) \ 247 (mnode_maxmrange[mnode] - mnoderanges[mtype].mnr_memrange) 248 249 /* 250 * Per page size free lists. Allocated dynamically. 251 * dimensions [mtype][mmu_page_sizes][colors] 252 * 253 * mtype specifies a physical memory range with a unique mnode. 254 */ 255 256 extern page_t ****page_freelists; 257 258 #define PAGE_FREELISTS(mnode, szc, color, mtype) \ 259 (*(page_freelists[mtype][szc] + (color))) 260 261 /* 262 * For now there is only a single size cache list. Allocated dynamically. 263 * dimensions [mtype][colors] 264 * 265 * mtype specifies a physical memory range with a unique mnode. 266 */ 267 extern page_t ***page_cachelists; 268 269 #define PAGE_CACHELISTS(mnode, color, mtype) \ 270 (*(page_cachelists[mtype] + (color))) 271 272 /* 273 * There are mutexes for both the page freelist 274 * and the page cachelist. We want enough locks to make contention 275 * reasonable, but not too many -- otherwise page_freelist_lock() gets 276 * so expensive that it becomes the bottleneck! 277 */ 278 279 #define NPC_MUTEX 16 280 281 extern kmutex_t *fpc_mutex[NPC_MUTEX]; 282 extern kmutex_t *cpc_mutex[NPC_MUTEX]; 283 284 extern page_t *page_get_mnode_freelist(int, uint_t, int, uchar_t, uint_t); 285 extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int); 286 287 #define PAGE_GET_COLOR_SHIFT(szc, nszc) \ 288 (hw_page_array[(nszc)].hp_shift - hw_page_array[(szc)].hp_shift) 289 290 #define PFN_2_COLOR(pfn, szc) \ 291 (((pfn) & page_colors_mask) >> \ 292 (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift)) 293 294 #define PNUM_SIZE(szc) \ 295 (hw_page_array[(szc)].hp_pgcnt) 296 #define PNUM_SHIFT(szc) \ 297 (hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift) 298 #define PAGE_GET_SHIFT(szc) \ 299 (hw_page_array[(szc)].hp_shift) 300 #define PAGE_GET_PAGECOLORS(szc) \ 301 (hw_page_array[(szc)].hp_colors) 302 303 /* 304 * This macro calculates the next sequential pfn with the specified 305 * color using color equivalency mask 306 */ 307 #define PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask) \ 308 ASSERT(((color) & ~(ceq_mask)) == 0); \ 309 { \ 310 uint_t pfn_shift = PAGE_BSZS_SHIFT(szc); \ 311 pfn_t spfn = pfn >> pfn_shift; \ 312 pfn_t stride = (ceq_mask) + 1; \ 313 ASSERT((((ceq_mask) + 1) & (ceq_mask)) == 0); \ 314 if (((spfn ^ (color)) & (ceq_mask)) == 0) { \ 315 pfn += stride << pfn_shift; \ 316 } else { \ 317 pfn = (spfn & ~(pfn_t)(ceq_mask)) | (color); \ 318 pfn = (pfn > spfn ? pfn : pfn + stride) << pfn_shift; \ 319 } \ 320 } 321 322 /* get the color equivalency mask for the next szc */ 323 #define PAGE_GET_NSZ_MASK(szc, mask) \ 324 ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) 325 326 /* get the color of the next szc */ 327 #define PAGE_GET_NSZ_COLOR(szc, color) \ 328 ((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) 329 330 /* Find the bin for the given page if it was of size szc */ 331 #define PP_2_BIN_SZC(pp, szc) (PFN_2_COLOR(pp->p_pagenum, szc)) 332 333 #define PP_2_BIN(pp) (PP_2_BIN_SZC(pp, pp->p_szc)) 334 335 #define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum)) 336 #define PP_2_MTYPE(pp) (pfn_2_mtype(pp->p_pagenum)) 337 #define PP_2_SZC(pp) (pp->p_szc) 338 339 #define SZCPAGES(szc) (1 << PAGE_BSZS_SHIFT(szc)) 340 #define PFN_BASE(pfnum, szc) (pfnum & ~(SZCPAGES(szc) - 1)) 341 342 /* 343 * this structure is used for walking free page lists 344 * controls when to split large pages into smaller pages, 345 * and when to coalesce smaller pages into larger pages 346 */ 347 typedef struct page_list_walker { 348 uint_t plw_colors; /* num of colors for szc */ 349 uint_t plw_color_mask; /* colors-1 */ 350 uint_t plw_bin_step; /* next bin: 1 or 2 */ 351 uint_t plw_count; /* loop count */ 352 uint_t plw_bin0; /* starting bin */ 353 uint_t plw_bin_marker; /* bin after initial jump */ 354 uint_t plw_bin_split_prev; /* last bin we tried to split */ 355 uint_t plw_do_split; /* set if OK to split */ 356 uint_t plw_split_next; /* next bin to split */ 357 uint_t plw_ceq_dif; /* number of different color groups */ 358 /* to check */ 359 uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */ 360 uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */ 361 } page_list_walker_t; 362 363 void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin, 364 int can_split, int use_ceq, page_list_walker_t *plw); 365 366 uint_t page_list_walk_next_bin(uchar_t szc, uint_t bin, 367 page_list_walker_t *plw); 368 369 extern struct cpu cpus[]; 370 #define CPU0 cpus 371 372 #if defined(__amd64) 373 374 /* 375 * set the mtype range (called from page_get_{free,cache}list) 376 * - set range to above 4g if the system has more than 4g of memory and the 377 * amount of memory below 4g runs low. If not, set range to above 16m if 378 * 16m threshold is reached otherwise set range to all of memory 379 * starting from the hi pfns. 380 * 381 * page_get_anylist gets its mtype range from the specified ddi_dma_attr_t. 382 */ 383 #define MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) { \ 384 mtype = mnoderangecnt - 1; \ 385 if (RESTRICT4G_ALLOC) { \ 386 VM_STAT_ADD(vmm_vmstats.restrict4gcnt); \ 387 /* here only for > 4g systems */ \ 388 flags |= PGI_MT_RANGE4G; \ 389 } else if (RESTRICT16M_ALLOC(freemem, btop(pgsz), flags)) { \ 390 flags |= PGI_MT_RANGE16M; \ 391 } else { \ 392 VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt); \ 393 VM_STAT_COND_ADD((flags & PG_PANIC), \ 394 vmm_vmstats.pgpanicalloc); \ 395 flags |= PGI_MT_RANGE0; \ 396 } \ 397 } 398 399 #elif defined(__i386) 400 401 /* 402 * set the mtype range 403 * - kmem requests needs to be below 4g if restricted_kmemalloc is set. 404 * - for non kmem requests, set range to above 4g if the amount of memory 405 * below 4g runs low. 406 */ 407 408 #define MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) { \ 409 if (restricted_kmemalloc && (vp) == &kvp && \ 410 (caddr_t)(vaddr) >= kernelheap && \ 411 (caddr_t)(vaddr) < ekernelheap) { \ 412 ASSERT(physmax4g); \ 413 mtype = mtype4g; \ 414 if (RESTRICT16M_ALLOC(freemem4g - btop(pgsz), \ 415 btop(pgsz), flags)) { \ 416 flags |= PGI_MT_RANGE16M; \ 417 } else { \ 418 VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt); \ 419 VM_STAT_COND_ADD((flags & PG_PANIC), \ 420 vmm_vmstats.pgpanicalloc); \ 421 flags |= PGI_MT_RANGE0; \ 422 } \ 423 } else { \ 424 mtype = mnoderangecnt - 1; \ 425 if (RESTRICT4G_ALLOC) { \ 426 VM_STAT_ADD(vmm_vmstats.restrict4gcnt); \ 427 /* here only for > 4g systems */ \ 428 flags |= PGI_MT_RANGE4G; \ 429 } else if (RESTRICT16M_ALLOC(freemem, btop(pgsz), \ 430 flags)) { \ 431 flags |= PGI_MT_RANGE16M; \ 432 } else { \ 433 VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt); \ 434 VM_STAT_COND_ADD((flags & PG_PANIC), \ 435 vmm_vmstats.pgpanicalloc); \ 436 flags |= PGI_MT_RANGE0; \ 437 } \ 438 } \ 439 } 440 441 #endif /* __i386 */ 442 443 /* 444 * macros to loop through the mtype range (page_get_mnode_{free,cache,any}list, 445 * and page_get_contig_pages) 446 * 447 * MTYPE_START sets the initial mtype. -1 if the mtype range specified does 448 * not contain mnode. 449 * 450 * MTYPE_NEXT sets the next mtype. -1 if there are no more valid 451 * mtype in the range. 452 */ 453 454 #define MTYPE_START(mnode, mtype, flags) \ 455 (mtype = mtype_func(mnode, mtype, flags)) 456 457 #define MTYPE_NEXT(mnode, mtype, flags) { \ 458 if (flags & PGI_MT_RANGE) { \ 459 mtype = mtype_func(mnode, mtype, flags | PGI_MT_NEXT); \ 460 } else { \ 461 mtype = -1; \ 462 } \ 463 } 464 465 /* mtype init for page_get_replacement_page */ 466 467 #define MTYPE_PGR_INIT(mtype, flags, pp, mnode, pgcnt) { \ 468 mtype = mnoderangecnt - 1; \ 469 if (RESTRICT16M_ALLOC(freemem, pgcnt, flags)) { \ 470 flags |= PGI_MT_RANGE16M; \ 471 } else { \ 472 VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt); \ 473 flags |= PGI_MT_RANGE0; \ 474 } \ 475 } 476 477 #define MNODE_PGCNT(mnode) mnode_pgcnt(mnode) 478 479 #define MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi) \ 480 ASSERT(mnoderanges[mtype].mnr_mnode == mnode); \ 481 pfnlo = mnoderanges[mtype].mnr_pfnlo; \ 482 pfnhi = mnoderanges[mtype].mnr_pfnhi; 483 484 #define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \ 485 &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \ 486 &cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) 487 488 #define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode]) 489 #define CPC_MUTEX(mnode, i) (&cpc_mutex[i][mnode]) 490 491 #ifdef DEBUG 492 #define CHK_LPG(pp, szc) chk_lpg(pp, szc) 493 extern void chk_lpg(page_t *, uchar_t); 494 #else 495 #define CHK_LPG(pp, szc) 496 #endif 497 498 #define FULL_REGION_CNT(rg_szc) \ 499 (LEVEL_SIZE(rg_szc) >> LEVEL_SHIFT(rg_szc - 1)) 500 501 /* Return the leader for this mapping size */ 502 #define PP_GROUPLEADER(pp, szc) \ 503 (&(pp)[-(int)((pp)->p_pagenum & (SZCPAGES(szc)-1))]) 504 505 /* Return the root page for this page based on p_szc */ 506 #define PP_PAGEROOT(pp) ((pp)->p_szc == 0 ? (pp) : \ 507 PP_GROUPLEADER((pp), (pp)->p_szc)) 508 509 /* 510 * The counter base must be per page_counter element to prevent 511 * races when re-indexing, and the base page size element should 512 * be aligned on a boundary of the given region size. 513 * 514 * We also round up the number of pages spanned by the counters 515 * for a given region to PC_BASE_ALIGN in certain situations to simplify 516 * the coding for some non-performance critical routines. 517 */ 518 519 #define PC_BASE_ALIGN ((pfn_t)1 << PAGE_BSZS_SHIFT(MMU_PAGE_SIZES-1)) 520 #define PC_BASE_ALIGN_MASK (PC_BASE_ALIGN - 1) 521 522 /* 523 * cpu/mmu-dependent vm variables 524 */ 525 extern uint_t mmu_page_sizes; 526 extern uint_t mmu_exported_page_sizes; 527 528 /* For x86, userszc is the same as the kernel's szc */ 529 #define USERSZC_2_SZC(userszc) (userszc) 530 #define SZC_2_USERSZC(szc) (szc) 531 532 /* 533 * for hw_page_map_t, sized to hold the ratio of large page to base 534 * pagesize (1024 max) 535 */ 536 typedef short hpmctr_t; 537 538 /* 539 * get the setsize of the current cpu - assume homogenous for x86 540 */ 541 extern int l2cache_sz, l2cache_linesz, l2cache_assoc; 542 543 #define L2CACHE_ALIGN l2cache_linesz 544 #define L2CACHE_ALIGN_MAX 64 545 #define CPUSETSIZE() \ 546 (l2cache_assoc ? (l2cache_sz / l2cache_assoc) : MMU_PAGESIZE) 547 548 /* 549 * Return the log2(pagesize(szc) / MMU_PAGESIZE) --- or the shift count 550 * for the number of base pages in this pagesize 551 */ 552 #define PAGE_BSZS_SHIFT(szc) (LEVEL_SHIFT(szc) - MMU_PAGESHIFT) 553 554 /* 555 * Internal PG_ flags. 556 */ 557 #define PGI_RELOCONLY 0x010000 /* opposite of PG_NORELOC */ 558 #define PGI_NOCAGE 0x020000 /* cage is disabled */ 559 #define PGI_PGCPHIPRI 0x040000 /* page_get_contig_page pri alloc */ 560 #define PGI_PGCPSZC0 0x080000 /* relocate base pagesize page */ 561 562 /* 563 * PGI range flags - should not overlap PGI flags 564 */ 565 #define PGI_MT_RANGE0 0x1000000 /* mtype range to 0 */ 566 #define PGI_MT_RANGE16M 0x2000000 /* mtype range to 16m */ 567 #define PGI_MT_RANGE4G 0x4000000 /* mtype range to 4g */ 568 #define PGI_MT_NEXT 0x8000000 /* get next mtype */ 569 #define PGI_MT_RANGE (PGI_MT_RANGE0 | PGI_MT_RANGE16M | PGI_MT_RANGE4G) 570 571 /* 572 * Maximum and default values for user heap, stack, private and shared 573 * anonymous memory, and user text and initialized data. 574 * Used by map_pgsz*() routines. 575 */ 576 extern size_t max_uheap_lpsize; 577 extern size_t default_uheap_lpsize; 578 extern size_t max_ustack_lpsize; 579 extern size_t default_ustack_lpsize; 580 extern size_t max_privmap_lpsize; 581 extern size_t max_uidata_lpsize; 582 extern size_t max_utext_lpsize; 583 extern size_t max_shm_lpsize; 584 extern size_t mcntl0_lpsize; 585 586 /* 587 * Sanity control. Don't use large pages regardless of user 588 * settings if there's less than priv or shm_lpg_min_physmem memory installed. 589 * The units for this variable are 8K pages. 590 */ 591 extern pgcnt_t privm_lpg_min_physmem; 592 extern pgcnt_t shm_lpg_min_physmem; 593 594 /* 595 * hash as and addr to get a bin. 596 */ 597 598 #define AS_2_BIN(as, seg, vp, addr, bin, szc) \ 599 bin = (((((uintptr_t)(addr) >> PAGESHIFT) + ((uintptr_t)(as) >> 4)) \ 600 & page_colors_mask) >> \ 601 (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift)) 602 603 /* 604 * cpu private vm data - accessed thru CPU->cpu_vm_data 605 * vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock() 606 * vc_pnext_memseg: tracks last memseg visited in page_nextn() 607 * vc_kmptr: orignal unaligned kmem pointer for this vm_cpu_data_t 608 * vc_kmsize: orignal kmem size for this vm_cpu_data_t 609 */ 610 611 typedef struct { 612 struct memseg *vc_pnum_memseg; 613 struct memseg *vc_pnext_memseg; 614 void *vc_kmptr; 615 size_t vc_kmsize; 616 } vm_cpu_data_t; 617 618 /* allocation size to ensure vm_cpu_data_t resides in its own cache line */ 619 #define VM_CPU_DATA_PADSIZE \ 620 (P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX)) 621 622 /* for boot cpu before kmem is initialized */ 623 extern char vm_cpu_data0[]; 624 625 /* 626 * When a bin is empty, and we can't satisfy a color request correctly, 627 * we scan. If we assume that the programs have reasonable spatial 628 * behavior, then it will not be a good idea to use the adjacent color. 629 * Using the adjacent color would result in virtually adjacent addresses 630 * mapping into the same spot in the cache. So, if we stumble across 631 * an empty bin, skip a bunch before looking. After the first skip, 632 * then just look one bin at a time so we don't miss our cache on 633 * every look. Be sure to check every bin. Page_create() will panic 634 * if we miss a page. 635 * 636 * This also explains the `<=' in the for loops in both page_get_freelist() 637 * and page_get_cachelist(). Since we checked the target bin, skipped 638 * a bunch, then continued one a time, we wind up checking the target bin 639 * twice to make sure we get all of them bins. 640 */ 641 #define BIN_STEP 19 642 643 #ifdef VM_STATS 644 struct vmm_vmstats_str { 645 ulong_t pgf_alloc[MMU_PAGE_SIZES]; /* page_get_freelist */ 646 ulong_t pgf_allocok[MMU_PAGE_SIZES]; 647 ulong_t pgf_allocokrem[MMU_PAGE_SIZES]; 648 ulong_t pgf_allocfailed[MMU_PAGE_SIZES]; 649 ulong_t pgf_allocdeferred; 650 ulong_t pgf_allocretry[MMU_PAGE_SIZES]; 651 ulong_t pgc_alloc; /* page_get_cachelist */ 652 ulong_t pgc_allocok; 653 ulong_t pgc_allocokrem; 654 ulong_t pgc_allocokdeferred; 655 ulong_t pgc_allocfailed; 656 ulong_t pgcp_alloc[MMU_PAGE_SIZES]; /* page_get_contig_pages */ 657 ulong_t pgcp_allocfailed[MMU_PAGE_SIZES]; 658 ulong_t pgcp_allocempty[MMU_PAGE_SIZES]; 659 ulong_t pgcp_allocok[MMU_PAGE_SIZES]; 660 ulong_t ptcp[MMU_PAGE_SIZES]; /* page_trylock_contig_pages */ 661 ulong_t ptcpfreethresh[MMU_PAGE_SIZES]; 662 ulong_t ptcpfailexcl[MMU_PAGE_SIZES]; 663 ulong_t ptcpfailszc[MMU_PAGE_SIZES]; 664 ulong_t ptcpfailcage[MMU_PAGE_SIZES]; 665 ulong_t ptcpok[MMU_PAGE_SIZES]; 666 ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */ 667 ulong_t pgmf_allocfailed[MMU_PAGE_SIZES]; 668 ulong_t pgmf_allocempty[MMU_PAGE_SIZES]; 669 ulong_t pgmf_allocok[MMU_PAGE_SIZES]; 670 ulong_t pgmc_alloc; /* page_get_mnode_cachelist */ 671 ulong_t pgmc_allocfailed; 672 ulong_t pgmc_allocempty; 673 ulong_t pgmc_allocok; 674 ulong_t pladd_free[MMU_PAGE_SIZES]; /* page_list_add/sub */ 675 ulong_t plsub_free[MMU_PAGE_SIZES]; 676 ulong_t pladd_cache; 677 ulong_t plsub_cache; 678 ulong_t plsubpages_szcbig; 679 ulong_t plsubpages_szc0; 680 ulong_t pfs_req[MMU_PAGE_SIZES]; /* page_freelist_split */ 681 ulong_t pfs_demote[MMU_PAGE_SIZES]; 682 ulong_t pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 683 ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */ 684 ulong_t ppr_relocnoroot[MMU_PAGE_SIZES]; 685 ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES]; 686 ulong_t ppr_relocnolock[MMU_PAGE_SIZES]; 687 ulong_t ppr_relocnomem[MMU_PAGE_SIZES]; 688 ulong_t ppr_relocok[MMU_PAGE_SIZES]; 689 /* page coalesce counter */ 690 ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 691 /* candidates useful */ 692 ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 693 /* ctrs changed after locking */ 694 ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 695 /* page_freelist_coalesce failed */ 696 ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 697 ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */ 698 ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */ 699 ulong_t restrict4gcnt; 700 ulong_t unrestrict16mcnt; /* non-DMA 16m allocs allowed */ 701 ulong_t pgpanicalloc; /* PG_PANIC allocation */ 702 }; 703 extern struct vmm_vmstats_str vmm_vmstats; 704 #endif /* VM_STATS */ 705 706 extern size_t page_ctrs_sz(void); 707 extern caddr_t page_ctrs_alloc(caddr_t); 708 extern void page_ctr_sub(int, int, page_t *, int); 709 extern page_t *page_freelist_split(uchar_t, 710 uint_t, int, int, pfn_t, page_list_walker_t *); 711 extern page_t *page_freelist_coalesce(int, uchar_t, uint_t, uint_t, int, 712 pfn_t); 713 extern uint_t page_get_pagecolors(uint_t); 714 715 #ifdef __cplusplus 716 } 717 #endif 718 719 #endif /* _VM_DEP_H */ 720