1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * UNIX machine dependent virtual memory support. 28 */ 29 30 #ifndef _VM_DEP_H 31 #define _VM_DEP_H 32 33 #pragma ident "%Z%%M% %I% %E% SMI" 34 35 #ifdef __cplusplus 36 extern "C" { 37 #endif 38 39 #include <sys/clock.h> 40 #include <vm/hat_pte.h> 41 #include <sys/param.h> 42 43 /* 44 * WARNING: vm_dep.h is included by files in common. As such, macros 45 * dependent upon PTE36 such as LARGEPAGESIZE cannot be used in this file. 46 */ 47 48 #define GETTICK() tsc_read() 49 50 /* memranges in descending order */ 51 extern pfn_t *memranges; 52 53 #define MEMRANGEHI(mtype) \ 54 ((mtype > 0) ? memranges[mtype - 1] - 1: physmax) 55 #define MEMRANGELO(mtype) (memranges[mtype]) 56 57 #define MTYPE_FREEMEM(mt) \ 58 (mnoderanges[mt].mnr_mt_clpgcnt + \ 59 mnoderanges[mt].mnr_mt_flpgcnt + \ 60 mnoderanges[mt].mnr_mt_lgpgcnt) 61 62 /* 63 * combined memory ranges from mnode and memranges[] to manage single 64 * mnode/mtype dimension in the page lists. 65 */ 66 typedef struct { 67 pfn_t mnr_pfnlo; 68 pfn_t mnr_pfnhi; 69 int mnr_mnode; 70 int mnr_memrange; /* index into memranges[] */ 71 /* maintain page list stats */ 72 pgcnt_t mnr_mt_pgmax; /* mnode/mtype max page cnt */ 73 pgcnt_t mnr_mt_clpgcnt; /* cache list cnt */ 74 pgcnt_t mnr_mt_flpgcnt; /* free list cnt - small pages */ 75 pgcnt_t mnr_mt_lgpgcnt; /* free list cnt - large pages */ 76 #ifdef DEBUG 77 struct mnr_mts { /* mnode/mtype szc stats */ 78 pgcnt_t mnr_mts_pgcnt; 79 int mnr_mts_colors; 80 pgcnt_t *mnr_mtsc_pgcnt; 81 } *mnr_mts; 82 #endif 83 } mnoderange_t; 84 85 #ifdef DEBUG 86 #define PLCNT_SZ(ctrs_sz) { \ 87 int szc, colors; \ 88 ctrs_sz += mnoderangecnt * sizeof (struct mnr_mts) * \ 89 mmu_page_sizes; \ 90 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 91 colors = page_get_pagecolors(szc); \ 92 ctrs_sz += mnoderangecnt * sizeof (pgcnt_t) * colors; \ 93 } \ 94 } 95 96 #define PLCNT_INIT(addr) { \ 97 int mt, szc, colors; \ 98 for (mt = 0; mt < mnoderangecnt; mt++) { \ 99 mnoderanges[mt].mnr_mts = (struct mnr_mts *)addr; \ 100 addr += (sizeof (struct mnr_mts) * mmu_page_sizes); \ 101 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 102 colors = page_get_pagecolors(szc); \ 103 mnoderanges[mt].mnr_mts[szc].mnr_mts_colors = \ 104 colors; \ 105 mnoderanges[mt].mnr_mts[szc].mnr_mtsc_pgcnt = \ 106 (pgcnt_t *)addr; \ 107 addr += (sizeof (pgcnt_t) * colors); \ 108 } \ 109 } \ 110 } 111 #define PLCNT_DO(pp, mtype, szc, cnt, flags) { \ 112 int bin = PP_2_BIN(pp); \ 113 if (flags & PG_CACHE_LIST) \ 114 atomic_add_long(&mnoderanges[mtype]. \ 115 mnr_mt_clpgcnt, cnt); \ 116 else if (szc) \ 117 atomic_add_long(&mnoderanges[mtype]. \ 118 mnr_mt_lgpgcnt, cnt); \ 119 else \ 120 atomic_add_long(&mnoderanges[mtype]. \ 121 mnr_mt_flpgcnt, cnt); \ 122 atomic_add_long(&mnoderanges[mtype].mnr_mts[szc]. \ 123 mnr_mts_pgcnt, cnt); \ 124 atomic_add_long(&mnoderanges[mtype].mnr_mts[szc]. \ 125 mnr_mtsc_pgcnt[bin], cnt); \ 126 } 127 #else 128 #define PLCNT_SZ(ctrs_sz) 129 #define PLCNT_INIT(base) 130 #define PLCNT_DO(pp, mtype, szc, cnt, flags) { \ 131 if (flags & PG_CACHE_LIST) \ 132 atomic_add_long(&mnoderanges[mtype]. \ 133 mnr_mt_clpgcnt, cnt); \ 134 else if (szc) \ 135 atomic_add_long(&mnoderanges[mtype]. \ 136 mnr_mt_lgpgcnt, cnt); \ 137 else \ 138 atomic_add_long(&mnoderanges[mtype]. \ 139 mnr_mt_flpgcnt, cnt); \ 140 } 141 #endif 142 143 #define PLCNT_INCR(pp, mnode, mtype, szc, flags) { \ 144 long cnt = (1 << PAGE_BSZS_SHIFT(szc)); \ 145 ASSERT(mtype == PP_2_MTYPE(pp)); \ 146 if (physmax4g && mtype <= mtype4g) \ 147 atomic_add_long(&freemem4g, cnt); \ 148 PLCNT_DO(pp, mtype, szc, cnt, flags); \ 149 } 150 151 #define PLCNT_DECR(pp, mnode, mtype, szc, flags) { \ 152 long cnt = ((-1) << PAGE_BSZS_SHIFT(szc)); \ 153 ASSERT(mtype == PP_2_MTYPE(pp)); \ 154 if (physmax4g && mtype <= mtype4g) \ 155 atomic_add_long(&freemem4g, cnt); \ 156 PLCNT_DO(pp, mtype, szc, cnt, flags); \ 157 } 158 159 /* 160 * macros to update page list max counts. no-op on x86. 161 */ 162 #define PLCNT_XFER_NORELOC(pp) 163 164 #define PLCNT_MODIFY_MAX(pfn, cnt) mtype_modify_max(pfn, (pgcnt_t)cnt) 165 166 extern mnoderange_t *mnoderanges; 167 extern int mnoderangecnt; 168 extern int mtype4g; 169 170 /* 171 * 4g memory management variables for systems with more than 4g of memory: 172 * 173 * physical memory below 4g is required for 32bit dma devices and, currently, 174 * for kmem memory. On systems with more than 4g of memory, the pool of memory 175 * below 4g can be depleted without any paging activity given that there is 176 * likely to be sufficient memory above 4g. 177 * 178 * physmax4g is set true if the largest pfn is over 4g. The rest of the 179 * 4g memory management code is enabled only when physmax4g is true. 180 * 181 * maxmem4g is the count of the maximum number of pages on the page lists 182 * with physical addresses below 4g. It can be a lot less then 4g given that 183 * BIOS may reserve large chunks of space below 4g for hot plug pci devices, 184 * agp aperture etc. 185 * 186 * freemem4g maintains the count of the number of available pages on the 187 * page lists with physical addresses below 4g. 188 * 189 * DESFREE4G specifies the desired amount of below 4g memory. It defaults to 190 * 6% (desfree4gshift = 4) of maxmem4g. 191 * 192 * RESTRICT4G_ALLOC returns true if freemem4g falls below DESFREE4G 193 * and the amount of physical memory above 4g is greater than freemem4g. 194 * In this case, page_get_* routines will restrict below 4g allocations 195 * for requests that don't specifically require it. 196 */ 197 198 extern int physmax4g; 199 extern pgcnt_t maxmem4g; 200 extern pgcnt_t freemem4g; 201 extern int lotsfree4gshift; 202 extern int desfree4gshift; 203 #define LOTSFREE4G (maxmem4g >> lotsfree4gshift) 204 #define DESFREE4G (maxmem4g >> desfree4gshift) 205 206 #define RESTRICT4G_ALLOC \ 207 (physmax4g && (freemem4g < DESFREE4G) && ((freemem4g << 1) < freemem)) 208 209 /* 210 * 16m memory management: 211 * 212 * reserve some amount of physical memory below 16m for legacy devices. 213 * 214 * RESTRICT16M_ALLOC returns true if an there are sufficient free pages above 215 * 16m or if the 16m pool drops below DESFREE16M. 216 * 217 * In this case, general page allocations via page_get_{free,cache}list 218 * routines will be restricted from allocating from the 16m pool. Allocations 219 * that require specific pfn ranges (page_get_anylist) and PG_PANIC allocations 220 * are not restricted. 221 */ 222 223 #define FREEMEM16M MTYPE_FREEMEM(0) 224 #define DESFREE16M desfree16m 225 #define RESTRICT16M_ALLOC(freemem, pgcnt, flags) \ 226 ((freemem != 0) && ((flags & PG_PANIC) == 0) && \ 227 ((freemem >= (FREEMEM16M)) || \ 228 (FREEMEM16M < (DESFREE16M + pgcnt)))) 229 extern pgcnt_t desfree16m; 230 231 extern int restricted_kmemalloc; 232 extern int memrange_num(pfn_t); 233 extern int pfn_2_mtype(pfn_t); 234 extern int mtype_func(int, int, uint_t); 235 extern void mtype_modify_max(pfn_t, long); 236 extern int mnode_pgcnt(int); 237 extern int mnode_range_cnt(int); 238 239 #define NUM_MEM_RANGES 4 /* memory range types */ 240 241 /* 242 * candidate counters in vm_pagelist.c are indexed by color and range 243 */ 244 #define MAX_MNODE_MRANGES NUM_MEM_RANGES 245 #define MNODE_RANGE_CNT(mnode) mnode_range_cnt(mnode) 246 #define MNODE_MAX_MRANGE(mnode) (memrange_num(mem_node_config[mnode].physbase)) 247 #define MTYPE_2_MRANGE(mnode, mtype) \ 248 (mnode_maxmrange[mnode] - mnoderanges[mtype].mnr_memrange) 249 250 /* 251 * Per page size free lists. Allocated dynamically. 252 * dimensions [mtype][mmu_page_sizes][colors] 253 * 254 * mtype specifies a physical memory range with a unique mnode. 255 */ 256 257 extern page_t ****page_freelists; 258 259 #define PAGE_FREELISTS(mnode, szc, color, mtype) \ 260 (*(page_freelists[mtype][szc] + (color))) 261 262 /* 263 * For now there is only a single size cache list. Allocated dynamically. 264 * dimensions [mtype][colors] 265 * 266 * mtype specifies a physical memory range with a unique mnode. 267 */ 268 extern page_t ***page_cachelists; 269 270 #define PAGE_CACHELISTS(mnode, color, mtype) \ 271 (*(page_cachelists[mtype] + (color))) 272 273 /* 274 * There are mutexes for both the page freelist 275 * and the page cachelist. We want enough locks to make contention 276 * reasonable, but not too many -- otherwise page_freelist_lock() gets 277 * so expensive that it becomes the bottleneck! 278 */ 279 280 #define NPC_MUTEX 16 281 282 extern kmutex_t *fpc_mutex[NPC_MUTEX]; 283 extern kmutex_t *cpc_mutex[NPC_MUTEX]; 284 285 extern page_t *page_get_mnode_freelist(int, uint_t, int, uchar_t, uint_t); 286 extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int); 287 288 #define PAGE_GET_COLOR_SHIFT(szc, nszc) \ 289 (hw_page_array[(nszc)].hp_shift - hw_page_array[(szc)].hp_shift) 290 291 #define PFN_2_COLOR(pfn, szc) \ 292 (((pfn) & page_colors_mask) >> \ 293 (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift)) 294 295 #define PNUM_SIZE(szc) \ 296 (hw_page_array[(szc)].hp_pgcnt) 297 #define PNUM_SHIFT(szc) \ 298 (hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift) 299 #define PAGE_GET_SHIFT(szc) \ 300 (hw_page_array[(szc)].hp_shift) 301 #define PAGE_GET_PAGECOLORS(szc) \ 302 (hw_page_array[(szc)].hp_colors) 303 304 /* 305 * This macro calculates the next sequential pfn with the specified 306 * color using color equivalency mask 307 */ 308 #define PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask) \ 309 ASSERT(((color) & ~(ceq_mask)) == 0); \ 310 { \ 311 uint_t pfn_shift = PAGE_BSZS_SHIFT(szc); \ 312 pfn_t spfn = pfn >> pfn_shift; \ 313 pfn_t stride = (ceq_mask) + 1; \ 314 ASSERT((((ceq_mask) + 1) & (ceq_mask)) == 0); \ 315 if (((spfn ^ (color)) & (ceq_mask)) == 0) { \ 316 pfn += stride << pfn_shift; \ 317 } else { \ 318 pfn = (spfn & ~(pfn_t)(ceq_mask)) | (color); \ 319 pfn = (pfn > spfn ? pfn : pfn + stride) << pfn_shift; \ 320 } \ 321 } 322 323 /* get the color equivalency mask for the next szc */ 324 #define PAGE_GET_NSZ_MASK(szc, mask) \ 325 ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) 326 327 /* get the color of the next szc */ 328 #define PAGE_GET_NSZ_COLOR(szc, color) \ 329 ((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) 330 331 /* Find the bin for the given page if it was of size szc */ 332 #define PP_2_BIN_SZC(pp, szc) (PFN_2_COLOR(pp->p_pagenum, szc)) 333 334 #define PP_2_BIN(pp) (PP_2_BIN_SZC(pp, pp->p_szc)) 335 336 #define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum)) 337 #define PP_2_MTYPE(pp) (pfn_2_mtype(pfn_to_mfn(pp->p_pagenum))) 338 #define PP_2_SZC(pp) (pp->p_szc) 339 340 #define SZCPAGES(szc) (1 << PAGE_BSZS_SHIFT(szc)) 341 #define PFN_BASE(pfnum, szc) (pfnum & ~(SZCPAGES(szc) - 1)) 342 343 /* 344 * this structure is used for walking free page lists 345 * controls when to split large pages into smaller pages, 346 * and when to coalesce smaller pages into larger pages 347 */ 348 typedef struct page_list_walker { 349 uint_t plw_colors; /* num of colors for szc */ 350 uint_t plw_color_mask; /* colors-1 */ 351 uint_t plw_bin_step; /* next bin: 1 or 2 */ 352 uint_t plw_count; /* loop count */ 353 uint_t plw_bin0; /* starting bin */ 354 uint_t plw_bin_marker; /* bin after initial jump */ 355 uint_t plw_bin_split_prev; /* last bin we tried to split */ 356 uint_t plw_do_split; /* set if OK to split */ 357 uint_t plw_split_next; /* next bin to split */ 358 uint_t plw_ceq_dif; /* number of different color groups */ 359 /* to check */ 360 uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */ 361 uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */ 362 } page_list_walker_t; 363 364 void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin, 365 int can_split, int use_ceq, page_list_walker_t *plw); 366 367 uint_t page_list_walk_next_bin(uchar_t szc, uint_t bin, 368 page_list_walker_t *plw); 369 370 extern struct cpu cpus[]; 371 #define CPU0 cpus 372 373 #if defined(__amd64) 374 375 /* 376 * set the mtype range (called from page_get_{free,cache}list) 377 * - set range to above 4g if the system has more than 4g of memory and the 378 * amount of memory below 4g runs low. If not, set range to above 16m if 379 * 16m threshold is reached otherwise set range to all of memory 380 * starting from the hi pfns. 381 * 382 * page_get_anylist gets its mtype range from the specified ddi_dma_attr_t. 383 */ 384 #define MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) { \ 385 mtype = mnoderangecnt - 1; \ 386 if (RESTRICT4G_ALLOC) { \ 387 VM_STAT_ADD(vmm_vmstats.restrict4gcnt); \ 388 /* here only for > 4g systems */ \ 389 flags |= PGI_MT_RANGE4G; \ 390 } else if (RESTRICT16M_ALLOC(freemem, btop(pgsz), flags)) { \ 391 flags |= PGI_MT_RANGE16M; \ 392 } else { \ 393 VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt); \ 394 VM_STAT_COND_ADD((flags & PG_PANIC), \ 395 vmm_vmstats.pgpanicalloc); \ 396 flags |= PGI_MT_RANGE0; \ 397 } \ 398 } 399 400 #elif defined(__i386) 401 402 /* 403 * set the mtype range 404 * - kmem requests needs to be below 4g if restricted_kmemalloc is set. 405 * - for non kmem requests, set range to above 4g if the amount of memory 406 * below 4g runs low. 407 */ 408 409 #define MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) { \ 410 if (restricted_kmemalloc && VN_ISKAS(vp) && \ 411 (caddr_t)(vaddr) >= kernelheap && \ 412 (caddr_t)(vaddr) < ekernelheap) { \ 413 ASSERT(physmax4g); \ 414 mtype = mtype4g; \ 415 if (RESTRICT16M_ALLOC(freemem4g - btop(pgsz), \ 416 btop(pgsz), flags)) { \ 417 flags |= PGI_MT_RANGE16M; \ 418 } else { \ 419 VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt); \ 420 VM_STAT_COND_ADD((flags & PG_PANIC), \ 421 vmm_vmstats.pgpanicalloc); \ 422 flags |= PGI_MT_RANGE0; \ 423 } \ 424 } else { \ 425 mtype = mnoderangecnt - 1; \ 426 if (RESTRICT4G_ALLOC) { \ 427 VM_STAT_ADD(vmm_vmstats.restrict4gcnt); \ 428 /* here only for > 4g systems */ \ 429 flags |= PGI_MT_RANGE4G; \ 430 } else if (RESTRICT16M_ALLOC(freemem, btop(pgsz), \ 431 flags)) { \ 432 flags |= PGI_MT_RANGE16M; \ 433 } else { \ 434 VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt); \ 435 VM_STAT_COND_ADD((flags & PG_PANIC), \ 436 vmm_vmstats.pgpanicalloc); \ 437 flags |= PGI_MT_RANGE0; \ 438 } \ 439 } \ 440 } 441 442 #endif /* __i386 */ 443 444 /* 445 * macros to loop through the mtype range (page_get_mnode_{free,cache,any}list, 446 * and page_get_contig_pages) 447 * 448 * MTYPE_START sets the initial mtype. -1 if the mtype range specified does 449 * not contain mnode. 450 * 451 * MTYPE_NEXT sets the next mtype. -1 if there are no more valid 452 * mtype in the range. 453 */ 454 455 #define MTYPE_START(mnode, mtype, flags) \ 456 (mtype = mtype_func(mnode, mtype, flags)) 457 458 #define MTYPE_NEXT(mnode, mtype, flags) { \ 459 if (flags & PGI_MT_RANGE) { \ 460 mtype = mtype_func(mnode, mtype, flags | PGI_MT_NEXT); \ 461 } else { \ 462 mtype = -1; \ 463 } \ 464 } 465 466 /* mtype init for page_get_replacement_page */ 467 468 #define MTYPE_PGR_INIT(mtype, flags, pp, mnode, pgcnt) { \ 469 mtype = mnoderangecnt - 1; \ 470 if (RESTRICT16M_ALLOC(freemem, pgcnt, flags)) { \ 471 flags |= PGI_MT_RANGE16M; \ 472 } else { \ 473 VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt); \ 474 flags |= PGI_MT_RANGE0; \ 475 } \ 476 } 477 478 #define MNODE_PGCNT(mnode) mnode_pgcnt(mnode) 479 480 #define MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi) \ 481 ASSERT(mnoderanges[mtype].mnr_mnode == mnode); \ 482 pfnlo = mnoderanges[mtype].mnr_pfnlo; \ 483 pfnhi = mnoderanges[mtype].mnr_pfnhi; 484 485 #define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \ 486 &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \ 487 &cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) 488 489 #define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode]) 490 #define CPC_MUTEX(mnode, i) (&cpc_mutex[i][mnode]) 491 492 #ifdef DEBUG 493 #define CHK_LPG(pp, szc) chk_lpg(pp, szc) 494 extern void chk_lpg(page_t *, uchar_t); 495 #else 496 #define CHK_LPG(pp, szc) 497 #endif 498 499 #define FULL_REGION_CNT(rg_szc) \ 500 (LEVEL_SIZE(rg_szc) >> LEVEL_SHIFT(rg_szc - 1)) 501 502 /* Return the leader for this mapping size */ 503 #define PP_GROUPLEADER(pp, szc) \ 504 (&(pp)[-(int)((pp)->p_pagenum & (SZCPAGES(szc)-1))]) 505 506 /* Return the root page for this page based on p_szc */ 507 #define PP_PAGEROOT(pp) ((pp)->p_szc == 0 ? (pp) : \ 508 PP_GROUPLEADER((pp), (pp)->p_szc)) 509 510 /* 511 * The counter base must be per page_counter element to prevent 512 * races when re-indexing, and the base page size element should 513 * be aligned on a boundary of the given region size. 514 * 515 * We also round up the number of pages spanned by the counters 516 * for a given region to PC_BASE_ALIGN in certain situations to simplify 517 * the coding for some non-performance critical routines. 518 */ 519 520 #define PC_BASE_ALIGN ((pfn_t)1 << PAGE_BSZS_SHIFT(MMU_PAGE_SIZES-1)) 521 #define PC_BASE_ALIGN_MASK (PC_BASE_ALIGN - 1) 522 523 /* 524 * cpu/mmu-dependent vm variables 525 */ 526 extern uint_t mmu_page_sizes; 527 extern uint_t mmu_exported_page_sizes; 528 529 /* For x86, userszc is the same as the kernel's szc */ 530 #define USERSZC_2_SZC(userszc) (userszc) 531 #define SZC_2_USERSZC(szc) (szc) 532 533 /* 534 * for hw_page_map_t, sized to hold the ratio of large page to base 535 * pagesize (1024 max) 536 */ 537 typedef short hpmctr_t; 538 539 /* 540 * get the setsize of the current cpu - assume homogenous for x86 541 */ 542 extern int l2cache_sz, l2cache_linesz, l2cache_assoc; 543 544 #define L2CACHE_ALIGN l2cache_linesz 545 #define L2CACHE_ALIGN_MAX 64 546 #define CPUSETSIZE() \ 547 (l2cache_assoc ? (l2cache_sz / l2cache_assoc) : MMU_PAGESIZE) 548 549 /* 550 * Return the log2(pagesize(szc) / MMU_PAGESIZE) --- or the shift count 551 * for the number of base pages in this pagesize 552 */ 553 #define PAGE_BSZS_SHIFT(szc) (LEVEL_SHIFT(szc) - MMU_PAGESHIFT) 554 555 /* 556 * Internal PG_ flags. 557 */ 558 #define PGI_RELOCONLY 0x010000 /* opposite of PG_NORELOC */ 559 #define PGI_NOCAGE 0x020000 /* cage is disabled */ 560 #define PGI_PGCPHIPRI 0x040000 /* page_get_contig_page pri alloc */ 561 #define PGI_PGCPSZC0 0x080000 /* relocate base pagesize page */ 562 563 /* 564 * PGI range flags - should not overlap PGI flags 565 */ 566 #define PGI_MT_RANGE0 0x1000000 /* mtype range to 0 */ 567 #define PGI_MT_RANGE16M 0x2000000 /* mtype range to 16m */ 568 #define PGI_MT_RANGE4G 0x4000000 /* mtype range to 4g */ 569 #define PGI_MT_NEXT 0x8000000 /* get next mtype */ 570 #define PGI_MT_RANGE (PGI_MT_RANGE0 | PGI_MT_RANGE16M | PGI_MT_RANGE4G) 571 572 /* 573 * Maximum and default values for user heap, stack, private and shared 574 * anonymous memory, and user text and initialized data. 575 * Used by map_pgsz*() routines. 576 */ 577 extern size_t max_uheap_lpsize; 578 extern size_t default_uheap_lpsize; 579 extern size_t max_ustack_lpsize; 580 extern size_t default_ustack_lpsize; 581 extern size_t max_privmap_lpsize; 582 extern size_t max_uidata_lpsize; 583 extern size_t max_utext_lpsize; 584 extern size_t max_shm_lpsize; 585 extern size_t mcntl0_lpsize; 586 587 /* 588 * Sanity control. Don't use large pages regardless of user 589 * settings if there's less than priv or shm_lpg_min_physmem memory installed. 590 * The units for this variable are 8K pages. 591 */ 592 extern pgcnt_t privm_lpg_min_physmem; 593 extern pgcnt_t shm_lpg_min_physmem; 594 595 /* 596 * hash as and addr to get a bin. 597 */ 598 599 #define AS_2_BIN(as, seg, vp, addr, bin, szc) \ 600 bin = (((((uintptr_t)(addr) >> PAGESHIFT) + ((uintptr_t)(as) >> 4)) \ 601 & page_colors_mask) >> \ 602 (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift)) 603 604 /* 605 * cpu private vm data - accessed thru CPU->cpu_vm_data 606 * vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock() 607 * vc_pnext_memseg: tracks last memseg visited in page_nextn() 608 * vc_kmptr: orignal unaligned kmem pointer for this vm_cpu_data_t 609 * vc_kmsize: orignal kmem size for this vm_cpu_data_t 610 */ 611 612 typedef struct { 613 struct memseg *vc_pnum_memseg; 614 struct memseg *vc_pnext_memseg; 615 void *vc_kmptr; 616 size_t vc_kmsize; 617 } vm_cpu_data_t; 618 619 /* allocation size to ensure vm_cpu_data_t resides in its own cache line */ 620 #define VM_CPU_DATA_PADSIZE \ 621 (P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX)) 622 623 /* for boot cpu before kmem is initialized */ 624 extern char vm_cpu_data0[]; 625 626 /* 627 * When a bin is empty, and we can't satisfy a color request correctly, 628 * we scan. If we assume that the programs have reasonable spatial 629 * behavior, then it will not be a good idea to use the adjacent color. 630 * Using the adjacent color would result in virtually adjacent addresses 631 * mapping into the same spot in the cache. So, if we stumble across 632 * an empty bin, skip a bunch before looking. After the first skip, 633 * then just look one bin at a time so we don't miss our cache on 634 * every look. Be sure to check every bin. Page_create() will panic 635 * if we miss a page. 636 * 637 * This also explains the `<=' in the for loops in both page_get_freelist() 638 * and page_get_cachelist(). Since we checked the target bin, skipped 639 * a bunch, then continued one a time, we wind up checking the target bin 640 * twice to make sure we get all of them bins. 641 */ 642 #define BIN_STEP 19 643 644 #ifdef VM_STATS 645 struct vmm_vmstats_str { 646 ulong_t pgf_alloc[MMU_PAGE_SIZES]; /* page_get_freelist */ 647 ulong_t pgf_allocok[MMU_PAGE_SIZES]; 648 ulong_t pgf_allocokrem[MMU_PAGE_SIZES]; 649 ulong_t pgf_allocfailed[MMU_PAGE_SIZES]; 650 ulong_t pgf_allocdeferred; 651 ulong_t pgf_allocretry[MMU_PAGE_SIZES]; 652 ulong_t pgc_alloc; /* page_get_cachelist */ 653 ulong_t pgc_allocok; 654 ulong_t pgc_allocokrem; 655 ulong_t pgc_allocokdeferred; 656 ulong_t pgc_allocfailed; 657 ulong_t pgcp_alloc[MMU_PAGE_SIZES]; /* page_get_contig_pages */ 658 ulong_t pgcp_allocfailed[MMU_PAGE_SIZES]; 659 ulong_t pgcp_allocempty[MMU_PAGE_SIZES]; 660 ulong_t pgcp_allocok[MMU_PAGE_SIZES]; 661 ulong_t ptcp[MMU_PAGE_SIZES]; /* page_trylock_contig_pages */ 662 ulong_t ptcpfreethresh[MMU_PAGE_SIZES]; 663 ulong_t ptcpfailexcl[MMU_PAGE_SIZES]; 664 ulong_t ptcpfailszc[MMU_PAGE_SIZES]; 665 ulong_t ptcpfailcage[MMU_PAGE_SIZES]; 666 ulong_t ptcpok[MMU_PAGE_SIZES]; 667 ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */ 668 ulong_t pgmf_allocfailed[MMU_PAGE_SIZES]; 669 ulong_t pgmf_allocempty[MMU_PAGE_SIZES]; 670 ulong_t pgmf_allocok[MMU_PAGE_SIZES]; 671 ulong_t pgmc_alloc; /* page_get_mnode_cachelist */ 672 ulong_t pgmc_allocfailed; 673 ulong_t pgmc_allocempty; 674 ulong_t pgmc_allocok; 675 ulong_t pladd_free[MMU_PAGE_SIZES]; /* page_list_add/sub */ 676 ulong_t plsub_free[MMU_PAGE_SIZES]; 677 ulong_t pladd_cache; 678 ulong_t plsub_cache; 679 ulong_t plsubpages_szcbig; 680 ulong_t plsubpages_szc0; 681 ulong_t pfs_req[MMU_PAGE_SIZES]; /* page_freelist_split */ 682 ulong_t pfs_demote[MMU_PAGE_SIZES]; 683 ulong_t pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 684 ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */ 685 ulong_t ppr_relocnoroot[MMU_PAGE_SIZES]; 686 ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES]; 687 ulong_t ppr_relocnolock[MMU_PAGE_SIZES]; 688 ulong_t ppr_relocnomem[MMU_PAGE_SIZES]; 689 ulong_t ppr_relocok[MMU_PAGE_SIZES]; 690 ulong_t ppr_copyfail; 691 /* page coalesce counter */ 692 ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 693 /* candidates useful */ 694 ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 695 /* ctrs changed after locking */ 696 ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 697 /* page_freelist_coalesce failed */ 698 ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 699 ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */ 700 ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */ 701 ulong_t restrict4gcnt; 702 ulong_t unrestrict16mcnt; /* non-DMA 16m allocs allowed */ 703 ulong_t pgpanicalloc; /* PG_PANIC allocation */ 704 }; 705 extern struct vmm_vmstats_str vmm_vmstats; 706 #endif /* VM_STATS */ 707 708 extern size_t page_ctrs_sz(void); 709 extern caddr_t page_ctrs_alloc(caddr_t); 710 extern void page_ctr_sub(int, int, page_t *, int); 711 extern page_t *page_freelist_split(uchar_t, 712 uint_t, int, int, pfn_t, page_list_walker_t *); 713 extern page_t *page_freelist_coalesce(int, uchar_t, uint_t, uint_t, int, 714 pfn_t); 715 extern uint_t page_get_pagecolors(uint_t); 716 717 #ifdef __cplusplus 718 } 719 #endif 720 721 #endif /* _VM_DEP_H */ 722