1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * UNIX machine dependent virtual memory support. 28 */ 29 30 #ifndef _VM_DEP_H 31 #define _VM_DEP_H 32 33 #pragma ident "%Z%%M% %I% %E% SMI" 34 35 #ifdef __cplusplus 36 extern "C" { 37 #endif 38 39 #include <sys/clock.h> 40 #include <vm/hat_pte.h> 41 #include <sys/param.h> 42 #include <sys/memnode.h> 43 44 /* 45 * WARNING: vm_dep.h is included by files in common. As such, macros 46 * dependent upon PTE36 such as LARGEPAGESIZE cannot be used in this file. 47 */ 48 49 #define GETTICK() tsc_read() 50 51 /* memranges in descending order */ 52 extern pfn_t *memranges; 53 54 #define MEMRANGEHI(mtype) \ 55 ((mtype > 0) ? memranges[mtype - 1] - 1: physmax) 56 #define MEMRANGELO(mtype) (memranges[mtype]) 57 58 #define MTYPE_FREEMEM(mt) \ 59 (mnoderanges[mt].mnr_mt_clpgcnt + \ 60 mnoderanges[mt].mnr_mt_flpgcnt + \ 61 mnoderanges[mt].mnr_mt_lgpgcnt) 62 63 /* 64 * combined memory ranges from mnode and memranges[] to manage single 65 * mnode/mtype dimension in the page lists. 66 */ 67 typedef struct { 68 pfn_t mnr_pfnlo; 69 pfn_t mnr_pfnhi; 70 int mnr_mnode; 71 int mnr_memrange; /* index into memranges[] */ 72 /* maintain page list stats */ 73 pgcnt_t mnr_mt_pgmax; /* mnode/mtype max page cnt */ 74 pgcnt_t mnr_mt_clpgcnt; /* cache list cnt */ 75 pgcnt_t mnr_mt_flpgcnt; /* free list cnt - small pages */ 76 pgcnt_t mnr_mt_lgpgcnt; /* free list cnt - large pages */ 77 #ifdef DEBUG 78 struct mnr_mts { /* mnode/mtype szc stats */ 79 pgcnt_t mnr_mts_pgcnt; 80 int mnr_mts_colors; 81 pgcnt_t *mnr_mtsc_pgcnt; 82 } *mnr_mts; 83 #endif 84 } mnoderange_t; 85 86 #ifdef DEBUG 87 #define PLCNT_SZ(ctrs_sz) { \ 88 int szc, colors; \ 89 ctrs_sz += mnoderangecnt * sizeof (struct mnr_mts) * \ 90 mmu_page_sizes; \ 91 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 92 colors = page_get_pagecolors(szc); \ 93 ctrs_sz += mnoderangecnt * sizeof (pgcnt_t) * colors; \ 94 } \ 95 } 96 97 #define PLCNT_INIT(addr) { \ 98 int mt, szc, colors; \ 99 for (mt = 0; mt < mnoderangecnt; mt++) { \ 100 mnoderanges[mt].mnr_mts = (struct mnr_mts *)addr; \ 101 addr += (sizeof (struct mnr_mts) * mmu_page_sizes); \ 102 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 103 colors = page_get_pagecolors(szc); \ 104 mnoderanges[mt].mnr_mts[szc].mnr_mts_colors = \ 105 colors; \ 106 mnoderanges[mt].mnr_mts[szc].mnr_mtsc_pgcnt = \ 107 (pgcnt_t *)addr; \ 108 addr += (sizeof (pgcnt_t) * colors); \ 109 } \ 110 } \ 111 } 112 #define PLCNT_DO(pp, mtype, szc, cnt, flags) { \ 113 int bin = PP_2_BIN(pp); \ 114 if (flags & PG_CACHE_LIST) \ 115 atomic_add_long(&mnoderanges[mtype]. \ 116 mnr_mt_clpgcnt, cnt); \ 117 else if (szc) \ 118 atomic_add_long(&mnoderanges[mtype]. \ 119 mnr_mt_lgpgcnt, cnt); \ 120 else \ 121 atomic_add_long(&mnoderanges[mtype]. \ 122 mnr_mt_flpgcnt, cnt); \ 123 atomic_add_long(&mnoderanges[mtype].mnr_mts[szc]. \ 124 mnr_mts_pgcnt, cnt); \ 125 atomic_add_long(&mnoderanges[mtype].mnr_mts[szc]. \ 126 mnr_mtsc_pgcnt[bin], cnt); \ 127 } 128 #else 129 #define PLCNT_SZ(ctrs_sz) 130 #define PLCNT_INIT(base) 131 #define PLCNT_DO(pp, mtype, szc, cnt, flags) { \ 132 if (flags & PG_CACHE_LIST) \ 133 atomic_add_long(&mnoderanges[mtype]. \ 134 mnr_mt_clpgcnt, cnt); \ 135 else if (szc) \ 136 atomic_add_long(&mnoderanges[mtype]. \ 137 mnr_mt_lgpgcnt, cnt); \ 138 else \ 139 atomic_add_long(&mnoderanges[mtype]. \ 140 mnr_mt_flpgcnt, cnt); \ 141 } 142 #endif 143 144 #define PLCNT_INCR(pp, mnode, mtype, szc, flags) { \ 145 long cnt = (1 << PAGE_BSZS_SHIFT(szc)); \ 146 ASSERT(mtype == PP_2_MTYPE(pp)); \ 147 if (physmax4g && mtype <= mtype4g) \ 148 atomic_add_long(&freemem4g, cnt); \ 149 PLCNT_DO(pp, mtype, szc, cnt, flags); \ 150 } 151 152 #define PLCNT_DECR(pp, mnode, mtype, szc, flags) { \ 153 long cnt = ((-1) << PAGE_BSZS_SHIFT(szc)); \ 154 ASSERT(mtype == PP_2_MTYPE(pp)); \ 155 if (physmax4g && mtype <= mtype4g) \ 156 atomic_add_long(&freemem4g, cnt); \ 157 PLCNT_DO(pp, mtype, szc, cnt, flags); \ 158 } 159 160 /* 161 * macros to update page list max counts. no-op on x86. 162 */ 163 #define PLCNT_XFER_NORELOC(pp) 164 165 #define PLCNT_MODIFY_MAX(pfn, cnt) mtype_modify_max(pfn, (pgcnt_t)cnt) 166 167 extern mnoderange_t *mnoderanges; 168 extern int mnoderangecnt; 169 extern int mtype4g; 170 171 /* 172 * 4g memory management variables for systems with more than 4g of memory: 173 * 174 * physical memory below 4g is required for 32bit dma devices and, currently, 175 * for kmem memory. On systems with more than 4g of memory, the pool of memory 176 * below 4g can be depleted without any paging activity given that there is 177 * likely to be sufficient memory above 4g. 178 * 179 * physmax4g is set true if the largest pfn is over 4g. The rest of the 180 * 4g memory management code is enabled only when physmax4g is true. 181 * 182 * maxmem4g is the count of the maximum number of pages on the page lists 183 * with physical addresses below 4g. It can be a lot less then 4g given that 184 * BIOS may reserve large chunks of space below 4g for hot plug pci devices, 185 * agp aperture etc. 186 * 187 * freemem4g maintains the count of the number of available pages on the 188 * page lists with physical addresses below 4g. 189 * 190 * DESFREE4G specifies the desired amount of below 4g memory. It defaults to 191 * 6% (desfree4gshift = 4) of maxmem4g. 192 * 193 * RESTRICT4G_ALLOC returns true if freemem4g falls below DESFREE4G 194 * and the amount of physical memory above 4g is greater than freemem4g. 195 * In this case, page_get_* routines will restrict below 4g allocations 196 * for requests that don't specifically require it. 197 */ 198 199 extern int physmax4g; 200 extern pgcnt_t maxmem4g; 201 extern pgcnt_t freemem4g; 202 extern int lotsfree4gshift; 203 extern int desfree4gshift; 204 #define LOTSFREE4G (maxmem4g >> lotsfree4gshift) 205 #define DESFREE4G (maxmem4g >> desfree4gshift) 206 207 #define RESTRICT4G_ALLOC \ 208 (physmax4g && (freemem4g < DESFREE4G) && ((freemem4g << 1) < freemem)) 209 210 /* 211 * 16m memory management: 212 * 213 * reserve some amount of physical memory below 16m for legacy devices. 214 * 215 * RESTRICT16M_ALLOC returns true if an there are sufficient free pages above 216 * 16m or if the 16m pool drops below DESFREE16M. 217 * 218 * In this case, general page allocations via page_get_{free,cache}list 219 * routines will be restricted from allocating from the 16m pool. Allocations 220 * that require specific pfn ranges (page_get_anylist) and PG_PANIC allocations 221 * are not restricted. 222 */ 223 224 #define FREEMEM16M MTYPE_FREEMEM(0) 225 #define DESFREE16M desfree16m 226 #define RESTRICT16M_ALLOC(freemem, pgcnt, flags) \ 227 ((freemem != 0) && ((flags & PG_PANIC) == 0) && \ 228 ((freemem >= (FREEMEM16M)) || \ 229 (FREEMEM16M < (DESFREE16M + pgcnt)))) 230 extern pgcnt_t desfree16m; 231 232 extern int restricted_kmemalloc; 233 extern int memrange_num(pfn_t); 234 extern int pfn_2_mtype(pfn_t); 235 extern int mtype_func(int, int, uint_t); 236 extern void mtype_modify_max(pfn_t, long); 237 extern int mnode_pgcnt(int); 238 extern int mnode_range_cnt(int); 239 240 #define NUM_MEM_RANGES 4 /* memory range types */ 241 242 /* 243 * candidate counters in vm_pagelist.c are indexed by color and range 244 */ 245 #define MAX_MNODE_MRANGES NUM_MEM_RANGES 246 #define MNODE_RANGE_CNT(mnode) mnode_range_cnt(mnode) 247 #define MNODE_MAX_MRANGE(mnode) (memrange_num(mem_node_config[mnode].physbase)) 248 #define MTYPE_2_MRANGE(mnode, mtype) \ 249 (mnode_maxmrange[mnode] - mnoderanges[mtype].mnr_memrange) 250 251 /* 252 * Per page size free lists. Allocated dynamically. 253 * dimensions [mtype][mmu_page_sizes][colors] 254 * 255 * mtype specifies a physical memory range with a unique mnode. 256 */ 257 258 extern page_t ****page_freelists; 259 260 #define PAGE_FREELISTS(mnode, szc, color, mtype) \ 261 (*(page_freelists[mtype][szc] + (color))) 262 263 /* 264 * For now there is only a single size cache list. Allocated dynamically. 265 * dimensions [mtype][colors] 266 * 267 * mtype specifies a physical memory range with a unique mnode. 268 */ 269 extern page_t ***page_cachelists; 270 271 #define PAGE_CACHELISTS(mnode, color, mtype) \ 272 (*(page_cachelists[mtype] + (color))) 273 274 /* 275 * There are mutexes for both the page freelist 276 * and the page cachelist. We want enough locks to make contention 277 * reasonable, but not too many -- otherwise page_freelist_lock() gets 278 * so expensive that it becomes the bottleneck! 279 */ 280 281 #define NPC_MUTEX 16 282 283 extern kmutex_t *fpc_mutex[NPC_MUTEX]; 284 extern kmutex_t *cpc_mutex[NPC_MUTEX]; 285 286 extern page_t *page_get_mnode_freelist(int, uint_t, int, uchar_t, uint_t); 287 extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int); 288 289 /* mem node iterator is not used on x86 */ 290 #define MEM_NODE_ITERATOR_DECL(it) 291 #define MEM_NODE_ITERATOR_INIT(pfn, mnode, it) 292 293 /* 294 * interleaved_mnodes mode is never set on x86, therefore, 295 * simply return the limits of the given mnode, which then 296 * determines the length of hpm_counters array for the mnode. 297 */ 298 #define HPM_COUNTERS_LIMITS(mnode, physbase, physmax, first) \ 299 { \ 300 (physbase) = mem_node_config[(mnode)].physbase; \ 301 (physmax) = mem_node_config[(mnode)].physmax; \ 302 (first) = (mnode); \ 303 } 304 305 #define PAGE_CTRS_WRITE_LOCK(mnode) \ 306 { \ 307 rw_enter(&page_ctrs_rwlock[(mnode)], RW_WRITER);\ 308 page_freelist_lock(mnode); \ 309 } 310 311 #define PAGE_CTRS_WRITE_UNLOCK(mnode) \ 312 { \ 313 page_freelist_unlock(mnode); \ 314 rw_exit(&page_ctrs_rwlock[(mnode)]); \ 315 } 316 317 #define PAGE_GET_COLOR_SHIFT(szc, nszc) \ 318 (hw_page_array[(nszc)].hp_shift - hw_page_array[(szc)].hp_shift) 319 320 #define PAGE_CONVERT_COLOR(ncolor, szc, nszc) \ 321 ((ncolor) << PAGE_GET_COLOR_SHIFT((szc), (nszc))) 322 323 #define PFN_2_COLOR(pfn, szc, it) \ 324 (((pfn) & page_colors_mask) >> \ 325 (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift)) 326 327 #define PNUM_SIZE(szc) \ 328 (hw_page_array[(szc)].hp_pgcnt) 329 #define PNUM_SHIFT(szc) \ 330 (hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift) 331 #define PAGE_GET_SHIFT(szc) \ 332 (hw_page_array[(szc)].hp_shift) 333 #define PAGE_GET_PAGECOLORS(szc) \ 334 (hw_page_array[(szc)].hp_colors) 335 336 /* 337 * This macro calculates the next sequential pfn with the specified 338 * color using color equivalency mask 339 */ 340 #define PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask, it) \ 341 ASSERT(((color) & ~(ceq_mask)) == 0); \ 342 { \ 343 uint_t pfn_shift = PAGE_BSZS_SHIFT(szc); \ 344 pfn_t spfn = pfn >> pfn_shift; \ 345 pfn_t stride = (ceq_mask) + 1; \ 346 ASSERT((((ceq_mask) + 1) & (ceq_mask)) == 0); \ 347 if (((spfn ^ (color)) & (ceq_mask)) == 0) { \ 348 pfn += stride << pfn_shift; \ 349 } else { \ 350 pfn = (spfn & ~(pfn_t)(ceq_mask)) | (color); \ 351 pfn = (pfn > spfn ? pfn : pfn + stride) << pfn_shift; \ 352 } \ 353 } 354 355 /* get the color equivalency mask for the next szc */ 356 #define PAGE_GET_NSZ_MASK(szc, mask) \ 357 ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) 358 359 /* get the color of the next szc */ 360 #define PAGE_GET_NSZ_COLOR(szc, color) \ 361 ((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) 362 363 /* Find the bin for the given page if it was of size szc */ 364 #define PP_2_BIN_SZC(pp, szc) (PFN_2_COLOR(pp->p_pagenum, szc, NULL)) 365 366 #define PP_2_BIN(pp) (PP_2_BIN_SZC(pp, pp->p_szc)) 367 368 #define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum)) 369 #define PP_2_MTYPE(pp) (pfn_2_mtype(pfn_to_mfn(pp->p_pagenum))) 370 #define PP_2_SZC(pp) (pp->p_szc) 371 372 #define SZCPAGES(szc) (1 << PAGE_BSZS_SHIFT(szc)) 373 #define PFN_BASE(pfnum, szc) (pfnum & ~(SZCPAGES(szc) - 1)) 374 375 /* 376 * this structure is used for walking free page lists 377 * controls when to split large pages into smaller pages, 378 * and when to coalesce smaller pages into larger pages 379 */ 380 typedef struct page_list_walker { 381 uint_t plw_colors; /* num of colors for szc */ 382 uint_t plw_color_mask; /* colors-1 */ 383 uint_t plw_bin_step; /* next bin: 1 or 2 */ 384 uint_t plw_count; /* loop count */ 385 uint_t plw_bin0; /* starting bin */ 386 uint_t plw_bin_marker; /* bin after initial jump */ 387 uint_t plw_bin_split_prev; /* last bin we tried to split */ 388 uint_t plw_do_split; /* set if OK to split */ 389 uint_t plw_split_next; /* next bin to split */ 390 uint_t plw_ceq_dif; /* number of different color groups */ 391 /* to check */ 392 uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */ 393 uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */ 394 } page_list_walker_t; 395 396 void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin, 397 int can_split, int use_ceq, page_list_walker_t *plw); 398 399 uint_t page_list_walk_next_bin(uchar_t szc, uint_t bin, 400 page_list_walker_t *plw); 401 402 extern struct cpu cpus[]; 403 #define CPU0 cpus 404 405 #if defined(__amd64) 406 407 /* 408 * set the mtype range (called from page_get_{free,cache}list) 409 * - set range to above 4g if the system has more than 4g of memory and the 410 * amount of memory below 4g runs low. If not, set range to above 16m if 411 * 16m threshold is reached otherwise set range to all of memory 412 * starting from the hi pfns. 413 * 414 * page_get_anylist gets its mtype range from the specified ddi_dma_attr_t. 415 */ 416 #define MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) { \ 417 mtype = mnoderangecnt - 1; \ 418 if (RESTRICT4G_ALLOC) { \ 419 VM_STAT_ADD(vmm_vmstats.restrict4gcnt); \ 420 /* here only for > 4g systems */ \ 421 flags |= PGI_MT_RANGE4G; \ 422 } else if (RESTRICT16M_ALLOC(freemem, btop(pgsz), flags)) { \ 423 flags |= PGI_MT_RANGE16M; \ 424 } else { \ 425 VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt); \ 426 VM_STAT_COND_ADD((flags & PG_PANIC), \ 427 vmm_vmstats.pgpanicalloc); \ 428 flags |= PGI_MT_RANGE0; \ 429 } \ 430 } 431 432 #elif defined(__i386) 433 434 /* 435 * set the mtype range 436 * - kmem requests needs to be below 4g if restricted_kmemalloc is set. 437 * - for non kmem requests, set range to above 4g if the amount of memory 438 * below 4g runs low. 439 */ 440 441 #define MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) { \ 442 if (restricted_kmemalloc && VN_ISKAS(vp) && \ 443 (caddr_t)(vaddr) >= kernelheap && \ 444 (caddr_t)(vaddr) < ekernelheap) { \ 445 ASSERT(physmax4g); \ 446 mtype = mtype4g; \ 447 if (RESTRICT16M_ALLOC(freemem4g - btop(pgsz), \ 448 btop(pgsz), flags)) { \ 449 flags |= PGI_MT_RANGE16M; \ 450 } else { \ 451 VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt); \ 452 VM_STAT_COND_ADD((flags & PG_PANIC), \ 453 vmm_vmstats.pgpanicalloc); \ 454 flags |= PGI_MT_RANGE0; \ 455 } \ 456 } else { \ 457 mtype = mnoderangecnt - 1; \ 458 if (RESTRICT4G_ALLOC) { \ 459 VM_STAT_ADD(vmm_vmstats.restrict4gcnt); \ 460 /* here only for > 4g systems */ \ 461 flags |= PGI_MT_RANGE4G; \ 462 } else if (RESTRICT16M_ALLOC(freemem, btop(pgsz), \ 463 flags)) { \ 464 flags |= PGI_MT_RANGE16M; \ 465 } else { \ 466 VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt); \ 467 VM_STAT_COND_ADD((flags & PG_PANIC), \ 468 vmm_vmstats.pgpanicalloc); \ 469 flags |= PGI_MT_RANGE0; \ 470 } \ 471 } \ 472 } 473 474 #endif /* __i386 */ 475 476 /* 477 * macros to loop through the mtype range (page_get_mnode_{free,cache,any}list, 478 * and page_get_contig_pages) 479 * 480 * MTYPE_START sets the initial mtype. -1 if the mtype range specified does 481 * not contain mnode. 482 * 483 * MTYPE_NEXT sets the next mtype. -1 if there are no more valid 484 * mtype in the range. 485 */ 486 487 #define MTYPE_START(mnode, mtype, flags) \ 488 (mtype = mtype_func(mnode, mtype, flags)) 489 490 #define MTYPE_NEXT(mnode, mtype, flags) { \ 491 if (flags & PGI_MT_RANGE) { \ 492 mtype = mtype_func(mnode, mtype, flags | PGI_MT_NEXT); \ 493 } else { \ 494 mtype = -1; \ 495 } \ 496 } 497 498 /* mtype init for page_get_replacement_page */ 499 500 #define MTYPE_PGR_INIT(mtype, flags, pp, mnode, pgcnt) { \ 501 mtype = mnoderangecnt - 1; \ 502 if (RESTRICT16M_ALLOC(freemem, pgcnt, flags)) { \ 503 flags |= PGI_MT_RANGE16M; \ 504 } else { \ 505 VM_STAT_ADD(vmm_vmstats.unrestrict16mcnt); \ 506 flags |= PGI_MT_RANGE0; \ 507 } \ 508 } 509 510 #define MNODE_PGCNT(mnode) mnode_pgcnt(mnode) 511 512 #define MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi) \ 513 ASSERT(mnoderanges[mtype].mnr_mnode == mnode); \ 514 pfnlo = mnoderanges[mtype].mnr_pfnlo; \ 515 pfnhi = mnoderanges[mtype].mnr_pfnhi; 516 517 #define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \ 518 &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \ 519 &cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) 520 521 #define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode]) 522 #define CPC_MUTEX(mnode, i) (&cpc_mutex[i][mnode]) 523 524 #ifdef DEBUG 525 #define CHK_LPG(pp, szc) chk_lpg(pp, szc) 526 extern void chk_lpg(page_t *, uchar_t); 527 #else 528 #define CHK_LPG(pp, szc) 529 #endif 530 531 #define FULL_REGION_CNT(rg_szc) \ 532 (LEVEL_SIZE(rg_szc) >> LEVEL_SHIFT(rg_szc - 1)) 533 534 /* Return the leader for this mapping size */ 535 #define PP_GROUPLEADER(pp, szc) \ 536 (&(pp)[-(int)((pp)->p_pagenum & (SZCPAGES(szc)-1))]) 537 538 /* Return the root page for this page based on p_szc */ 539 #define PP_PAGEROOT(pp) ((pp)->p_szc == 0 ? (pp) : \ 540 PP_GROUPLEADER((pp), (pp)->p_szc)) 541 542 /* 543 * The counter base must be per page_counter element to prevent 544 * races when re-indexing, and the base page size element should 545 * be aligned on a boundary of the given region size. 546 * 547 * We also round up the number of pages spanned by the counters 548 * for a given region to PC_BASE_ALIGN in certain situations to simplify 549 * the coding for some non-performance critical routines. 550 */ 551 552 #define PC_BASE_ALIGN ((pfn_t)1 << PAGE_BSZS_SHIFT(MMU_PAGE_SIZES-1)) 553 #define PC_BASE_ALIGN_MASK (PC_BASE_ALIGN - 1) 554 555 /* 556 * cpu/mmu-dependent vm variables 557 */ 558 extern uint_t mmu_page_sizes; 559 extern uint_t mmu_exported_page_sizes; 560 561 /* For x86, userszc is the same as the kernel's szc */ 562 #define USERSZC_2_SZC(userszc) (userszc) 563 #define SZC_2_USERSZC(szc) (szc) 564 565 /* 566 * for hw_page_map_t, sized to hold the ratio of large page to base 567 * pagesize (1024 max) 568 */ 569 typedef short hpmctr_t; 570 571 /* 572 * get the setsize of the current cpu - assume homogenous for x86 573 */ 574 extern int l2cache_sz, l2cache_linesz, l2cache_assoc; 575 576 #define L2CACHE_ALIGN l2cache_linesz 577 #define L2CACHE_ALIGN_MAX 64 578 #define CPUSETSIZE() \ 579 (l2cache_assoc ? (l2cache_sz / l2cache_assoc) : MMU_PAGESIZE) 580 581 /* 582 * Return the log2(pagesize(szc) / MMU_PAGESIZE) --- or the shift count 583 * for the number of base pages in this pagesize 584 */ 585 #define PAGE_BSZS_SHIFT(szc) (LEVEL_SHIFT(szc) - MMU_PAGESHIFT) 586 587 /* 588 * Internal PG_ flags. 589 */ 590 #define PGI_RELOCONLY 0x010000 /* opposite of PG_NORELOC */ 591 #define PGI_NOCAGE 0x020000 /* cage is disabled */ 592 #define PGI_PGCPHIPRI 0x040000 /* page_get_contig_page pri alloc */ 593 #define PGI_PGCPSZC0 0x080000 /* relocate base pagesize page */ 594 595 /* 596 * PGI range flags - should not overlap PGI flags 597 */ 598 #define PGI_MT_RANGE0 0x1000000 /* mtype range to 0 */ 599 #define PGI_MT_RANGE16M 0x2000000 /* mtype range to 16m */ 600 #define PGI_MT_RANGE4G 0x4000000 /* mtype range to 4g */ 601 #define PGI_MT_NEXT 0x8000000 /* get next mtype */ 602 #define PGI_MT_RANGE (PGI_MT_RANGE0 | PGI_MT_RANGE16M | PGI_MT_RANGE4G) 603 604 /* 605 * Maximum and default values for user heap, stack, private and shared 606 * anonymous memory, and user text and initialized data. 607 * Used by map_pgsz*() routines. 608 */ 609 extern size_t max_uheap_lpsize; 610 extern size_t default_uheap_lpsize; 611 extern size_t max_ustack_lpsize; 612 extern size_t default_ustack_lpsize; 613 extern size_t max_privmap_lpsize; 614 extern size_t max_uidata_lpsize; 615 extern size_t max_utext_lpsize; 616 extern size_t max_shm_lpsize; 617 extern size_t mcntl0_lpsize; 618 619 /* 620 * Sanity control. Don't use large pages regardless of user 621 * settings if there's less than priv or shm_lpg_min_physmem memory installed. 622 * The units for this variable are 8K pages. 623 */ 624 extern pgcnt_t privm_lpg_min_physmem; 625 extern pgcnt_t shm_lpg_min_physmem; 626 627 /* 628 * hash as and addr to get a bin. 629 */ 630 631 #define AS_2_BIN(as, seg, vp, addr, bin, szc) \ 632 bin = (((((uintptr_t)(addr) >> PAGESHIFT) + ((uintptr_t)(as) >> 4)) \ 633 & page_colors_mask) >> \ 634 (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift)) 635 636 /* 637 * cpu private vm data - accessed thru CPU->cpu_vm_data 638 * vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock() 639 * vc_pnext_memseg: tracks last memseg visited in page_nextn() 640 * vc_kmptr: orignal unaligned kmem pointer for this vm_cpu_data_t 641 * vc_kmsize: orignal kmem size for this vm_cpu_data_t 642 */ 643 644 typedef struct { 645 struct memseg *vc_pnum_memseg; 646 struct memseg *vc_pnext_memseg; 647 void *vc_kmptr; 648 size_t vc_kmsize; 649 } vm_cpu_data_t; 650 651 /* allocation size to ensure vm_cpu_data_t resides in its own cache line */ 652 #define VM_CPU_DATA_PADSIZE \ 653 (P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX)) 654 655 /* for boot cpu before kmem is initialized */ 656 extern char vm_cpu_data0[]; 657 658 /* 659 * When a bin is empty, and we can't satisfy a color request correctly, 660 * we scan. If we assume that the programs have reasonable spatial 661 * behavior, then it will not be a good idea to use the adjacent color. 662 * Using the adjacent color would result in virtually adjacent addresses 663 * mapping into the same spot in the cache. So, if we stumble across 664 * an empty bin, skip a bunch before looking. After the first skip, 665 * then just look one bin at a time so we don't miss our cache on 666 * every look. Be sure to check every bin. Page_create() will panic 667 * if we miss a page. 668 * 669 * This also explains the `<=' in the for loops in both page_get_freelist() 670 * and page_get_cachelist(). Since we checked the target bin, skipped 671 * a bunch, then continued one a time, we wind up checking the target bin 672 * twice to make sure we get all of them bins. 673 */ 674 #define BIN_STEP 19 675 676 #ifdef VM_STATS 677 struct vmm_vmstats_str { 678 ulong_t pgf_alloc[MMU_PAGE_SIZES]; /* page_get_freelist */ 679 ulong_t pgf_allocok[MMU_PAGE_SIZES]; 680 ulong_t pgf_allocokrem[MMU_PAGE_SIZES]; 681 ulong_t pgf_allocfailed[MMU_PAGE_SIZES]; 682 ulong_t pgf_allocdeferred; 683 ulong_t pgf_allocretry[MMU_PAGE_SIZES]; 684 ulong_t pgc_alloc; /* page_get_cachelist */ 685 ulong_t pgc_allocok; 686 ulong_t pgc_allocokrem; 687 ulong_t pgc_allocokdeferred; 688 ulong_t pgc_allocfailed; 689 ulong_t pgcp_alloc[MMU_PAGE_SIZES]; /* page_get_contig_pages */ 690 ulong_t pgcp_allocfailed[MMU_PAGE_SIZES]; 691 ulong_t pgcp_allocempty[MMU_PAGE_SIZES]; 692 ulong_t pgcp_allocok[MMU_PAGE_SIZES]; 693 ulong_t ptcp[MMU_PAGE_SIZES]; /* page_trylock_contig_pages */ 694 ulong_t ptcpfreethresh[MMU_PAGE_SIZES]; 695 ulong_t ptcpfailexcl[MMU_PAGE_SIZES]; 696 ulong_t ptcpfailszc[MMU_PAGE_SIZES]; 697 ulong_t ptcpfailcage[MMU_PAGE_SIZES]; 698 ulong_t ptcpok[MMU_PAGE_SIZES]; 699 ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */ 700 ulong_t pgmf_allocfailed[MMU_PAGE_SIZES]; 701 ulong_t pgmf_allocempty[MMU_PAGE_SIZES]; 702 ulong_t pgmf_allocok[MMU_PAGE_SIZES]; 703 ulong_t pgmc_alloc; /* page_get_mnode_cachelist */ 704 ulong_t pgmc_allocfailed; 705 ulong_t pgmc_allocempty; 706 ulong_t pgmc_allocok; 707 ulong_t pladd_free[MMU_PAGE_SIZES]; /* page_list_add/sub */ 708 ulong_t plsub_free[MMU_PAGE_SIZES]; 709 ulong_t pladd_cache; 710 ulong_t plsub_cache; 711 ulong_t plsubpages_szcbig; 712 ulong_t plsubpages_szc0; 713 ulong_t pfs_req[MMU_PAGE_SIZES]; /* page_freelist_split */ 714 ulong_t pfs_demote[MMU_PAGE_SIZES]; 715 ulong_t pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 716 ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */ 717 ulong_t ppr_relocnoroot[MMU_PAGE_SIZES]; 718 ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES]; 719 ulong_t ppr_relocnolock[MMU_PAGE_SIZES]; 720 ulong_t ppr_relocnomem[MMU_PAGE_SIZES]; 721 ulong_t ppr_relocok[MMU_PAGE_SIZES]; 722 ulong_t ppr_copyfail; 723 /* page coalesce counter */ 724 ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 725 /* candidates useful */ 726 ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 727 /* ctrs changed after locking */ 728 ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 729 /* page_freelist_coalesce failed */ 730 ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 731 ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */ 732 ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */ 733 ulong_t restrict4gcnt; 734 ulong_t unrestrict16mcnt; /* non-DMA 16m allocs allowed */ 735 ulong_t pgpanicalloc; /* PG_PANIC allocation */ 736 }; 737 extern struct vmm_vmstats_str vmm_vmstats; 738 #endif /* VM_STATS */ 739 740 extern size_t page_ctrs_sz(void); 741 extern caddr_t page_ctrs_alloc(caddr_t); 742 extern void page_ctr_sub(int, int, page_t *, int); 743 extern page_t *page_freelist_split(uchar_t, 744 uint_t, int, int, pfn_t, page_list_walker_t *); 745 extern page_t *page_freelist_coalesce(int, uchar_t, uint_t, uint_t, int, 746 pfn_t); 747 extern uint_t page_get_pagecolors(uint_t); 748 749 #ifdef __cplusplus 750 } 751 #endif 752 753 #endif /* _VM_DEP_H */ 754