1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright (c) 2010, Intel Corporation. 26 * All rights reserved. 27 */ 28 29 /* 30 * UNIX machine dependent virtual memory support. 31 */ 32 33 #ifndef _VM_DEP_H 34 #define _VM_DEP_H 35 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 #include <sys/clock.h> 42 #include <vm/hat_pte.h> 43 #include <sys/param.h> 44 #include <sys/memnode.h> 45 46 /* 47 * WARNING: vm_dep.h is included by files in common. 48 */ 49 50 #define GETTICK() tsc_read() 51 /* 52 * Do not use this function for obtaining clock tick. This 53 * is called by callers who do not need to have a guarenteed 54 * correct tick value. The proper routine to use is tsc_read(). 55 */ 56 57 extern hrtime_t randtick(); 58 extern uint_t page_create_update_flags_x86(uint_t); 59 extern int kernel_page_update_flags_x86(uint_t *); 60 61 extern size_t plcnt_sz(size_t); 62 #define PLCNT_SZ(ctrs_sz) (ctrs_sz = plcnt_sz(ctrs_sz)) 63 64 extern caddr_t plcnt_init(caddr_t); 65 #define PLCNT_INIT(addr) (addr = plcnt_init(addr)) 66 67 extern void plcnt_inc_dec(page_t *, int, int, long, int); 68 #define PLCNT_INCR(pp, mnode, mtype, szc, flags) \ 69 plcnt_inc_dec(pp, mtype, szc, 1l << PAGE_BSZS_SHIFT(szc), flags) 70 #define PLCNT_DECR(pp, mnode, mtype, szc, flags) \ 71 plcnt_inc_dec(pp, mtype, szc, -1l << PAGE_BSZS_SHIFT(szc), flags) 72 73 /* 74 * macro to update page list max counts. no-op on x86. 75 */ 76 #define PLCNT_XFER_NORELOC(pp) 77 78 /* 79 * macro to modify the page list max counts when memory is added to 80 * the page lists during startup (add_physmem) or during a DR operation 81 * when memory is added (kphysm_add_memory_dynamic) or deleted 82 * (kphysm_del_cleanup). 83 */ 84 #define PLCNT_MODIFY_MAX(pfn, cnt) mtype_modify_max(pfn, cnt) 85 86 extern int memrange_num(pfn_t); 87 extern int pfn_2_mtype(pfn_t); 88 extern int mtype_func(int, int, uint_t); 89 extern void mtype_modify_max(pfn_t, long); 90 extern int mnode_pgcnt(int); 91 extern int mnode_range_cnt(int); 92 93 /* 94 * candidate counters in vm_pagelist.c are indexed by color and range 95 */ 96 #define NUM_MEM_RANGES 4 /* memory range types */ 97 #define MAX_MNODE_MRANGES NUM_MEM_RANGES 98 #define MNODE_RANGE_CNT(mnode) mnode_range_cnt(mnode) 99 #define MNODE_MAX_MRANGE(mnode) memrange_num(mem_node_config[mnode].physbase) 100 101 /* 102 * combined memory ranges from mnode and memranges[] to manage single 103 * mnode/mtype dimension in the page lists. 104 */ 105 typedef struct { 106 pfn_t mnr_pfnlo; 107 pfn_t mnr_pfnhi; 108 int mnr_mnode; 109 int mnr_memrange; /* index into memranges[] */ 110 int mnr_next; /* next lower PA mnoderange */ 111 int mnr_exists; 112 /* maintain page list stats */ 113 pgcnt_t mnr_mt_clpgcnt; /* cache list cnt */ 114 pgcnt_t mnr_mt_flpgcnt[MMU_PAGE_SIZES]; /* free list cnt per szc */ 115 pgcnt_t mnr_mt_totcnt; /* sum of cache and free lists */ 116 #ifdef DEBUG 117 struct mnr_mts { /* mnode/mtype szc stats */ 118 pgcnt_t mnr_mts_pgcnt; 119 int mnr_mts_colors; 120 pgcnt_t *mnr_mtsc_pgcnt; 121 } *mnr_mts; 122 #endif 123 } mnoderange_t; 124 125 #define MEMRANGEHI(mtype) \ 126 (((mtype) > 0) ? memranges[(mtype) - 1] - 1: physmax) 127 #define MEMRANGELO(mtype) (memranges[(mtype)]) 128 129 #define MTYPE_FREEMEM(mt) (mnoderanges[(mt)].mnr_mt_totcnt) 130 131 /* 132 * This was really badly defined, it implicitly uses mnode_maxmrange[] 133 * which is a static in vm_pagelist.c 134 */ 135 extern int mtype_2_mrange(int); 136 #define MTYPE_2_MRANGE(mnode, mtype) \ 137 (mnode_maxmrange[mnode] - mtype_2_mrange(mtype)) 138 139 /* 140 * this structure is used for walking free page lists, it 141 * controls when to split large pages into smaller pages, 142 * and when to coalesce smaller pages into larger pages 143 */ 144 typedef struct page_list_walker { 145 uint_t plw_colors; /* num of colors for szc */ 146 uint_t plw_color_mask; /* colors-1 */ 147 uint_t plw_bin_step; /* next bin: 1 or 2 */ 148 uint_t plw_count; /* loop count */ 149 uint_t plw_bin0; /* starting bin */ 150 uint_t plw_bin_marker; /* bin after initial jump */ 151 uint_t plw_bin_split_prev; /* last bin we tried to split */ 152 uint_t plw_do_split; /* set if OK to split */ 153 uint_t plw_split_next; /* next bin to split */ 154 uint_t plw_ceq_dif; /* number of different color groups */ 155 /* to check */ 156 uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */ 157 uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */ 158 } page_list_walker_t; 159 160 /* 161 * Page freelists are organized as two freelist types user and kernel, with 162 * their own policy and allocation routines. The definitions related to the 163 * freelist type structure are grouped below. 164 * 165 * The page size free lists themselves are allocated dynamically with 166 * dimensions [mtype][mmu_page_sizes][colors] 167 * 168 * mtype specifies a physical memory range with a unique mnode. 169 */ 170 171 #define MAX_PFLT_POLICIES 3 172 #define MAX_PFLT_TYPE 2 173 enum freelist_types {PFLT_USER, PFLT_KMEM}; 174 175 /* 176 * The kernel only needs a small number of page colors, far fewer than user 177 * programs. 178 */ 179 #define KFLT_PAGE_COLORS 16 180 181 typedef struct page_freelist_type page_freelist_type_t; 182 extern page_freelist_type_t flt_user; 183 extern page_freelist_type_t flt_kern; 184 extern page_freelist_type_t *ufltp; 185 extern page_freelist_type_t *kfltp; 186 187 void page_flt_init(page_freelist_type_t *, page_freelist_type_t *); 188 page_t *page_get_uflt(struct vnode *, u_offset_t, struct seg *, caddr_t, 189 size_t, uint_t, struct lgrp *); 190 page_t *page_get_kflt(struct vnode *, u_offset_t, struct seg *, caddr_t, 191 size_t, uint_t, struct lgrp *); 192 void page_kflt_walk_init(uchar_t, uint_t, uint_t, int, int, 193 page_list_walker_t *); 194 uint_t page_kflt_walk_next_bin(uchar_t, uint_t, page_list_walker_t *); 195 page_t *page_import_kflt(page_freelist_type_t *, uint_t, int, uchar_t, 196 uint_t, int *); 197 page_t *page_user_alloc_kflt(page_freelist_type_t *, int, uint_t, int, uchar_t, 198 uint_t); 199 void kflt_expand(void); 200 201 typedef page_t *(*pflt_get_func_p) (struct vnode *, u_offset_t, struct seg *, 202 caddr_t, size_t, uint_t, lgrp_t *); 203 typedef page_t *(*pflt_policy_func_p)(page_freelist_type_t *, int, uint_t, int, 204 uchar_t, uint_t); 205 typedef void (*pflt_list_walk_init_func_p)(uchar_t, uint_t, uint_t, int, int, 206 page_list_walker_t *); 207 typedef uint_t (*pflt_list_walk_next_func_p)(uchar_t, uint_t, 208 page_list_walker_t *); 209 210 struct page_freelist_type { 211 int pflt_type; /* type is user or kernel */ 212 pflt_get_func_p pflt_get_free; /* top-level alloc routine */ 213 pflt_list_walk_init_func_p pflt_walk_init; /* walker routines */ 214 pflt_list_walk_next_func_p pflt_walk_next; 215 int pflt_num_policies; /* the number of policy routines */ 216 /* 217 * the policy routines are called by the allocator routine 218 * to implement the actual allocation policies. 219 */ 220 pflt_policy_func_p pflt_policy[MAX_PFLT_POLICIES]; 221 page_t ****pflt_freelists; /* the page freelist arrays */ 222 }; 223 224 #if defined(__amd64) && !defined(__xpv) 225 #define PAGE_FREELISTP(is_kflt, mnode, szc, color, mtype) \ 226 ((is_kflt) ? \ 227 (page_t **)(kfltp->pflt_freelists[mtype] + (color)) : \ 228 ((ufltp->pflt_freelists[mtype][szc] + (color)))) 229 230 #define PAGE_GET_FREELISTS(pp, vp, off, seg, vaddr, size, flags, lgrp) \ 231 { \ 232 if (kflt_on && (((flags) & PG_KFLT) == PG_KFLT)) { \ 233 pp = kfltp->pflt_get_free(vp, off, seg, vaddr, size, \ 234 flags, lgrp); \ 235 } else { \ 236 pp = ufltp->pflt_get_free(vp, off, seg, vaddr, size, \ 237 flags, lgrp); \ 238 } \ 239 } 240 #else /* __amd64 && ! __xpv */ 241 #define PAGE_FREELISTP(is_kflt, mnode, szc, color, mtype) \ 242 ((ufltp->pflt_freelists[mtype][szc] + (color))) 243 244 #define PAGE_GET_FREELISTS(pp, vp, off, seg, vaddr, size, flags, lgrp) \ 245 pp = ufltp->pflt_get_free(vp, off, seg, vaddr, size, \ 246 flags, lgrp); 247 #endif /* __amd64 && ! __xpv */ 248 249 #define PAGE_FREELISTS(is_kflt, mnode, szc, color, mtype) \ 250 (*(PAGE_FREELISTP(is_kflt, mnode, szc, color, mtype))) 251 252 #define PAGE_GET_FREELISTS_POLICY(fp, i) \ 253 (fp->pflt_policy[i]) 254 255 #define PAGE_LIST_WALK_INIT(fp, szc, flags, bin, can_split, use_ceq, plw) \ 256 fp->pflt_walk_init(szc, flags, bin, can_split, use_ceq, plw) 257 258 #define PAGE_LIST_WALK_NEXT(fp, szc, bin, plw) \ 259 fp->pflt_walk_next(szc, bin, plw) 260 261 262 /* 263 * For now there is only a single size cache list. Allocated dynamically. 264 * dimensions [mtype][colors] 265 * 266 * mtype specifies a physical memory range with a unique mnode. 267 */ 268 extern page_t ***page_cachelists; 269 270 #define PAGE_CACHELISTS(mnode, color, mtype) \ 271 (*(page_cachelists[mtype] + (color))) 272 273 /* 274 * There are mutexes for the user page freelist, the kernel page freelist 275 * and the page cachelist. We want enough locks to make contention 276 * reasonable, but not too many -- otherwise page_freelist_lock() gets 277 * so expensive that it becomes the bottleneck! 278 */ 279 280 #define NPC_MUTEX 16 281 282 /* 283 * The kflt_disable variable is used to determine whether the kernel freelist 284 * is supported on this platform. 285 */ 286 extern int kflt_disable; 287 288 extern kmutex_t *fpc_mutex[NPC_MUTEX]; 289 extern kmutex_t *kfpc_mutex[NPC_MUTEX]; 290 extern kmutex_t *cpc_mutex[NPC_MUTEX]; 291 292 #define PC_ISKFLT(fltp) (fltp->pflt_type == PFLT_KMEM) 293 /* flag used by the kflt_export function when calling page_promote */ 294 #define PC_KFLT_EXPORT 0x4 295 296 extern page_t *page_get_mnode_freelist(page_freelist_type_t *, int, uint_t, 297 int, uchar_t, uint_t); 298 extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int); 299 extern page_t *page_get_contig_pages(page_freelist_type_t *, int, uint_t, int, 300 uchar_t, uint_t); 301 extern void page_list_walk_init(uchar_t, uint_t, uint_t, int, int, 302 page_list_walker_t *); 303 extern uint_t page_list_walk_next_bin(uchar_t, uint_t, page_list_walker_t *); 304 305 extern void kflt_evict_wakeup(); 306 extern void kflt_freemem_add(pgcnt_t); 307 extern void kflt_freemem_sub(pgcnt_t); 308 309 /* mem node iterator is not used on x86 */ 310 #define MEM_NODE_ITERATOR_DECL(it) 311 #define MEM_NODE_ITERATOR_INIT(pfn, mnode, szc, it) 312 313 /* 314 * interleaved_mnodes mode is never set on x86, therefore, 315 * simply return the limits of the given mnode, which then 316 * determines the length of hpm_counters array for the mnode. 317 */ 318 #define HPM_COUNTERS_LIMITS(mnode, physbase, physmax, first) \ 319 { \ 320 (physbase) = mem_node_config[(mnode)].physbase; \ 321 (physmax) = mem_node_config[(mnode)].physmax; \ 322 (first) = (mnode); \ 323 } 324 325 #define PAGE_CTRS_WRITE_LOCK(mnode) \ 326 { \ 327 rw_enter(&page_ctrs_rwlock[(mnode)], RW_WRITER);\ 328 page_freelist_lock(mnode); \ 329 } 330 331 #define PAGE_CTRS_WRITE_UNLOCK(mnode) \ 332 { \ 333 page_freelist_unlock(mnode); \ 334 rw_exit(&page_ctrs_rwlock[(mnode)]); \ 335 } 336 337 /* 338 * macro to call page_ctrs_adjust() when memory is added 339 * during a DR operation. 340 */ 341 #define PAGE_CTRS_ADJUST(pfn, cnt, rv) { \ 342 spgcnt_t _cnt = (spgcnt_t)(cnt); \ 343 int _mn; \ 344 pgcnt_t _np; \ 345 pfn_t _pfn = (pfn); \ 346 pfn_t _endpfn = _pfn + _cnt; \ 347 while (_pfn < _endpfn) { \ 348 _mn = PFN_2_MEM_NODE(_pfn); \ 349 _np = MIN(_endpfn, mem_node_config[_mn].physmax + 1) - _pfn; \ 350 _pfn += _np; \ 351 if ((rv = page_ctrs_adjust(_mn)) != 0) \ 352 break; \ 353 } \ 354 } 355 356 #define PAGE_GET_COLOR_SHIFT(szc, nszc) \ 357 (hw_page_array[(nszc)].hp_shift - hw_page_array[(szc)].hp_shift) 358 359 #define PAGE_CONVERT_COLOR(ncolor, szc, nszc) \ 360 ((ncolor) << PAGE_GET_COLOR_SHIFT((szc), (nszc))) 361 362 #define PFN_2_COLOR(pfn, szc, it) \ 363 (((pfn) & page_colors_mask) >> \ 364 (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift)) 365 366 #define PNUM_SIZE(szc) \ 367 (hw_page_array[(szc)].hp_pgcnt) 368 #define PNUM_SHIFT(szc) \ 369 (hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift) 370 #define PAGE_GET_SHIFT(szc) \ 371 (hw_page_array[(szc)].hp_shift) 372 #define PAGE_GET_PAGECOLORS(szc) \ 373 (hw_page_array[(szc)].hp_colors) 374 375 /* 376 * This macro calculates the next sequential pfn with the specified 377 * color using color equivalency mask 378 */ 379 #define PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask, it) \ 380 { \ 381 uint_t pfn_shift = PAGE_BSZS_SHIFT(szc); \ 382 pfn_t spfn = pfn >> pfn_shift; \ 383 pfn_t stride = (ceq_mask) + 1; \ 384 ASSERT(((color) & ~(ceq_mask)) == 0); \ 385 ASSERT((((ceq_mask) + 1) & (ceq_mask)) == 0); \ 386 if (((spfn ^ (color)) & (ceq_mask)) == 0) { \ 387 pfn += stride << pfn_shift; \ 388 } else { \ 389 pfn = (spfn & ~(pfn_t)(ceq_mask)) | (color); \ 390 pfn = (pfn > spfn ? pfn : pfn + stride) << pfn_shift; \ 391 } \ 392 } 393 394 #define USER_2_KMEM_BIN(bin) ((bin) & (KFLT_PAGE_COLORS - 1)) 395 396 /* get the color equivalency mask for the next szc */ 397 #define PAGE_GET_NSZ_MASK(szc, mask) \ 398 ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) 399 400 /* get the color of the next szc */ 401 #define PAGE_GET_NSZ_COLOR(szc, color) \ 402 ((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) 403 404 /* Find the bin for the given page if it was of size szc */ 405 #define PP_2_BIN_SZC(pp, szc) (PFN_2_COLOR(pp->p_pagenum, szc, NULL)) 406 407 #define PP_2_BIN(pp) ((PP_ISKFLT(pp)) ? \ 408 USER_2_KMEM_BIN(PP_2_BIN_SZC(pp, pp->p_szc)) : \ 409 (PP_2_BIN_SZC(pp, pp->p_szc))) 410 411 #define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum)) 412 #define PP_2_MTYPE(pp) (pfn_2_mtype(pp->p_pagenum)) 413 #define PP_2_SZC(pp) (pp->p_szc) 414 415 #define SZCPAGES(szc) (1 << PAGE_BSZS_SHIFT(szc)) 416 #define PFN_BASE(pfnum, szc) (pfnum & ~(SZCPAGES(szc) - 1)) 417 418 void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin, 419 int can_split, int use_ceq, page_list_walker_t *plw); 420 421 uint_t page_list_walk_next_bin(uchar_t szc, uint_t bin, 422 page_list_walker_t *plw); 423 424 extern struct cpu cpus[]; 425 #define CPU0 cpus 426 427 extern int mtype_init(vnode_t *, caddr_t, uint_t *, size_t); 428 #define MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) \ 429 (mtype = mtype_init(vp, vaddr, &(flags), pgsz)) 430 431 /* 432 * macros to loop through the mtype range (page_get_mnode_{free,cache,any}list, 433 * and page_get_contig_pages) 434 * 435 * MTYPE_START sets the initial mtype. -1 if the mtype range specified does 436 * not contain mnode. 437 * 438 * MTYPE_NEXT sets the next mtype. -1 if there are no more valid 439 * mtype in the range. 440 */ 441 442 #define MTYPE_START(mnode, mtype, flags) \ 443 (mtype = mtype_func(mnode, mtype, flags)) 444 445 #define MTYPE_NEXT(mnode, mtype, flags) { \ 446 if (flags & PGI_MT_RANGE) { \ 447 mtype = mtype_func(mnode, mtype, flags | PGI_MT_NEXT); \ 448 } else { \ 449 mtype = -1; \ 450 } \ 451 } 452 453 extern int mtype_pgr_init(int *, page_t *, int, pgcnt_t); 454 #define MTYPE_PGR_INIT(mtype, flags, pp, mnode, pgcnt) \ 455 (mtype = mtype_pgr_init(&flags, pp, mnode, pgcnt)) 456 457 #define MNODE_PGCNT(mnode) mnode_pgcnt(mnode) 458 459 extern void mnodetype_2_pfn(int, int, pfn_t *, pfn_t *); 460 #define MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi) \ 461 mnodetype_2_pfn(mnode, mtype, &pfnlo, &pfnhi) 462 463 #define PC_FREELIST_BIN_MUTEX(is_kflt, mnode, bin, flags) \ 464 ((is_kflt) ? \ 465 (&kfpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) : \ 466 (&fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode])) 467 468 #define PC_BIN_MUTEX(is_kflt, mnode, bin, flags) \ 469 ((flags & PG_FREE_LIST) ? \ 470 PC_FREELIST_BIN_MUTEX(is_kflt, mnode, bin, flags): \ 471 &cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) 472 473 #define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode]) 474 #define CPC_MUTEX(mnode, i) (&cpc_mutex[i][mnode]) 475 #define KFPC_MUTEX(mnode, i) (&kfpc_mutex[i][mnode]) 476 477 #ifdef DEBUG 478 #define CHK_LPG(pp, szc) chk_lpg(pp, szc) 479 extern void chk_lpg(page_t *, uchar_t); 480 #else 481 #define CHK_LPG(pp, szc) 482 #endif 483 484 #define FULL_REGION_CNT(rg_szc) \ 485 (LEVEL_SIZE(rg_szc) >> LEVEL_SHIFT(rg_szc - 1)) 486 487 /* Return the leader for this mapping size */ 488 #define PP_GROUPLEADER(pp, szc) \ 489 (&(pp)[-(int)((pp)->p_pagenum & (SZCPAGES(szc)-1))]) 490 491 /* Return the root page for this page based on p_szc */ 492 #define PP_PAGEROOT(pp) ((pp)->p_szc == 0 ? (pp) : \ 493 PP_GROUPLEADER((pp), (pp)->p_szc)) 494 495 /* 496 * The counter base must be per page_counter element to prevent 497 * races when re-indexing, and the base page size element should 498 * be aligned on a boundary of the given region size. 499 * 500 * We also round up the number of pages spanned by the counters 501 * for a given region to PC_BASE_ALIGN in certain situations to simplify 502 * the coding for some non-performance critical routines. 503 */ 504 505 #define PC_BASE_ALIGN ((pfn_t)1 << PAGE_BSZS_SHIFT(MMU_PAGE_SIZES-1)) 506 #define PC_BASE_ALIGN_MASK (PC_BASE_ALIGN - 1) 507 508 /* 509 * cpu/mmu-dependent vm variables 510 */ 511 extern uint_t mmu_page_sizes; 512 extern uint_t mmu_exported_page_sizes; 513 /* 514 * page sizes that legacy applications can see via getpagesizes(3c). 515 * Used to prevent legacy applications from inadvertantly using the 516 * 'new' large pagesizes (1g and above). 517 */ 518 extern uint_t mmu_legacy_page_sizes; 519 520 /* For x86, userszc is the same as the kernel's szc */ 521 #define USERSZC_2_SZC(userszc) (userszc) 522 #define SZC_2_USERSZC(szc) (szc) 523 524 /* 525 * for hw_page_map_t, sized to hold the ratio of large page to base 526 * pagesize (1024 max) 527 */ 528 typedef short hpmctr_t; 529 530 /* 531 * get the setsize of the current cpu - assume homogenous for x86 532 */ 533 extern int l2cache_sz, l2cache_linesz, l2cache_assoc; 534 535 #define L2CACHE_ALIGN l2cache_linesz 536 #define L2CACHE_ALIGN_MAX 64 537 #define CPUSETSIZE() \ 538 (l2cache_assoc ? (l2cache_sz / l2cache_assoc) : MMU_PAGESIZE) 539 540 /* 541 * Return the log2(pagesize(szc) / MMU_PAGESIZE) --- or the shift count 542 * for the number of base pages in this pagesize 543 */ 544 #define PAGE_BSZS_SHIFT(szc) (LEVEL_SHIFT(szc) - MMU_PAGESHIFT) 545 546 /* 547 * Internal PG_ flags. 548 */ 549 #define PGI_RELOCONLY 0x010000 /* opposite of PG_NORELOC */ 550 #define PGI_NOCAGE 0x020000 /* cage is disabled */ 551 #define PGI_PGCPHIPRI 0x040000 /* page_get_contig_page pri alloc */ 552 #define PGI_PGCPSZC0 0x080000 /* relocate base pagesize page */ 553 554 /* 555 * PGI range flags - should not overlap PGI flags 556 */ 557 #define PGI_MT_RANGE0 0x1000000 /* mtype range to 0 */ 558 #define PGI_MT_RANGE16M 0x2000000 /* mtype range to 16m */ 559 #define PGI_MT_RANGE4G 0x4000000 /* mtype range to 4g */ 560 #define PGI_MT_NEXT 0x8000000 /* get next mtype */ 561 #define PGI_MT_RANGE (PGI_MT_RANGE0 | PGI_MT_RANGE16M | PGI_MT_RANGE4G) 562 563 /* Flag to avoid allocating a page in page_import_kflt() */ 564 #define PGI_NOPGALLOC 0x10000000 565 566 /* 567 * Maximum and default values for user heap, stack, private and shared 568 * anonymous memory, and user text and initialized data. 569 * Used by map_pgsz*() routines. 570 */ 571 extern size_t max_uheap_lpsize; 572 extern size_t default_uheap_lpsize; 573 extern size_t max_ustack_lpsize; 574 extern size_t default_ustack_lpsize; 575 extern size_t max_privmap_lpsize; 576 extern size_t max_uidata_lpsize; 577 extern size_t max_utext_lpsize; 578 extern size_t max_shm_lpsize; 579 extern size_t mcntl0_lpsize; 580 581 /* 582 * Sanity control. Don't use large pages regardless of user 583 * settings if there's less than priv or shm_lpg_min_physmem memory installed. 584 * The units for this variable are 8K pages. 585 */ 586 extern pgcnt_t privm_lpg_min_physmem; 587 extern pgcnt_t shm_lpg_min_physmem; 588 589 /* 590 * hash as and addr to get a bin. 591 */ 592 593 #define AS_2_USER_BIN(as, seg, vp, addr, bin, szc) \ 594 bin = (((((uintptr_t)(addr) >> PAGESHIFT) + ((uintptr_t)(as) >> 4)) \ 595 & page_colors_mask) >> \ 596 (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift)) 597 598 #define AS_2_BIN(is_kflt, as, seg, vp, addr, bin, szc) { \ 599 AS_2_USER_BIN(as, seg, vp, addr, bin, szc); \ 600 if (is_kflt) { \ 601 bin = USER_2_KMEM_BIN(bin); \ 602 } \ 603 } 604 /* 605 * cpu private vm data - accessed thru CPU->cpu_vm_data 606 * vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock() 607 * vc_pnext_memseg: tracks last memseg visited in page_nextn() 608 * vc_kmptr: orignal unaligned kmem pointer for this vm_cpu_data_t 609 * vc_kmsize: orignal kmem size for this vm_cpu_data_t 610 */ 611 612 typedef struct { 613 struct memseg *vc_pnum_memseg; 614 struct memseg *vc_pnext_memseg; 615 void *vc_kmptr; 616 size_t vc_kmsize; 617 } vm_cpu_data_t; 618 619 /* allocation size to ensure vm_cpu_data_t resides in its own cache line */ 620 #define VM_CPU_DATA_PADSIZE \ 621 (P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX)) 622 623 /* for boot cpu before kmem is initialized */ 624 extern char vm_cpu_data0[]; 625 626 /* 627 * When a bin is empty, and we can't satisfy a color request correctly, 628 * we scan. If we assume that the programs have reasonable spatial 629 * behavior, then it will not be a good idea to use the adjacent color. 630 * Using the adjacent color would result in virtually adjacent addresses 631 * mapping into the same spot in the cache. So, if we stumble across 632 * an empty bin, skip a bunch before looking. After the first skip, 633 * then just look one bin at a time so we don't miss our cache on 634 * every look. Be sure to check every bin. Page_create() will panic 635 * if we miss a page. 636 * 637 * This also explains the `<=' in the for loops in both page_get_freelist() 638 * and page_get_cachelist(). Since we checked the target bin, skipped 639 * a bunch, then continued one a time, we wind up checking the target bin 640 * twice to make sure we get all of them bins. 641 */ 642 #define BIN_STEP 19 643 644 #ifdef VM_STATS 645 struct vmm_vmstats_str { 646 /* page_get_uflt and page_get_kflt */ 647 ulong_t pgf_alloc[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; 648 ulong_t pgf_allocok[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; 649 ulong_t pgf_allocokrem[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; 650 ulong_t pgf_allocfailed[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; 651 ulong_t pgf_allocdeferred; 652 ulong_t pgf_allocretry[MMU_PAGE_SIZES][MAX_PFLT_TYPE]; 653 ulong_t pgik_allocok; /* page_import_kflt */ 654 ulong_t pgik_allocfailed; 655 ulong_t pgkx_allocok; /* kflt_expand */ 656 ulong_t pgkx_allocfailed; 657 ulong_t puak_allocok; /* page_user_alloc_kflt */ 658 ulong_t puak_allocfailed; 659 ulong_t pgexportok; /* kflt_export */ 660 ulong_t pgexportfail; 661 ulong_t pgkflt_disable; /* kflt_user_evict */ 662 ulong_t pgc_alloc; /* page_get_cachelist */ 663 ulong_t pgc_allocok; 664 ulong_t pgc_allocokrem; 665 ulong_t pgc_allocokdeferred; 666 ulong_t pgc_allocfailed; 667 ulong_t pgcp_alloc[MMU_PAGE_SIZES]; /* page_get_contig_pages */ 668 ulong_t pgcp_allocfailed[MMU_PAGE_SIZES]; 669 ulong_t pgcp_allocempty[MMU_PAGE_SIZES]; 670 ulong_t pgcp_allocok[MMU_PAGE_SIZES]; 671 ulong_t ptcp[MMU_PAGE_SIZES]; /* page_trylock_contig_pages */ 672 ulong_t ptcpfreethresh[MMU_PAGE_SIZES]; 673 ulong_t ptcpfailexcl[MMU_PAGE_SIZES]; 674 ulong_t ptcpfailszc[MMU_PAGE_SIZES]; 675 ulong_t ptcpfailcage[MMU_PAGE_SIZES]; 676 ulong_t ptcpfailkflt[MMU_PAGE_SIZES]; 677 ulong_t ptcpok[MMU_PAGE_SIZES]; 678 ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */ 679 ulong_t pgmf_allocfailed[MMU_PAGE_SIZES]; 680 ulong_t pgmf_allocempty[MMU_PAGE_SIZES]; 681 ulong_t pgmf_allocok[MMU_PAGE_SIZES]; 682 ulong_t pgmc_alloc; /* page_get_mnode_cachelist */ 683 ulong_t pgmc_allocfailed; 684 ulong_t pgmc_allocempty; 685 ulong_t pgmc_allocok; 686 ulong_t pladd_free[MMU_PAGE_SIZES]; /* page_list_add/sub */ 687 ulong_t plsub_free[MMU_PAGE_SIZES]; 688 ulong_t pladd_cache; 689 ulong_t plsub_cache; 690 ulong_t plsubpages_szcbig; 691 ulong_t plsubpages_szc0; 692 ulong_t pfs_req[MMU_PAGE_SIZES]; /* page_freelist_split */ 693 ulong_t pfs_demote[MMU_PAGE_SIZES]; 694 ulong_t pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 695 ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */ 696 ulong_t ppr_relocnoroot[MMU_PAGE_SIZES]; 697 ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES]; 698 ulong_t ppr_relocnolock[MMU_PAGE_SIZES]; 699 ulong_t ppr_relocnomem[MMU_PAGE_SIZES]; 700 ulong_t ppr_relocok[MMU_PAGE_SIZES]; 701 ulong_t ppr_copyfail; 702 /* page coalesce counter */ 703 ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 704 /* candidates useful */ 705 ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 706 /* ctrs changed after locking */ 707 ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 708 /* page_freelist_coalesce failed */ 709 ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 710 ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */ 711 ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */ 712 ulong_t restrict4gcnt; 713 ulong_t unrestrict16mcnt; /* non-DMA 16m allocs allowed */ 714 ulong_t pgpanicalloc; /* PG_PANIC allocation */ 715 ulong_t pcf_deny[MMU_PAGE_SIZES]; /* page_chk_freelist */ 716 ulong_t pcf_allow[MMU_PAGE_SIZES]; 717 }; 718 extern struct vmm_vmstats_str vmm_vmstats; 719 #endif /* VM_STATS */ 720 721 extern size_t page_ctrs_sz(void); 722 extern caddr_t page_ctrs_alloc(caddr_t); 723 extern void page_ctr_sub(int, int, page_t *, int); 724 extern page_t *page_freelist_split(uchar_t, 725 uint_t, int, int, pfn_t, pfn_t, page_list_walker_t *); 726 extern page_t *page_freelist_coalesce(int, uchar_t, uint_t, uint_t, int, 727 pfn_t); 728 extern void page_freelist_coalesce_all(int); 729 extern uint_t page_get_pagecolors(uint_t); 730 extern void pfnzero(pfn_t, uint_t, uint_t); 731 732 #ifdef __cplusplus 733 } 734 #endif 735 736 #endif /* _VM_DEP_H */ 737