1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2019 Joyent, Inc. 24 */ 25 26 /* 27 * UNIX machine dependent virtual memory support. 28 */ 29 30 #ifndef _VM_DEP_H 31 #define _VM_DEP_H 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 #include <vm/hat_sfmmu.h> 38 #include <sys/archsystm.h> 39 #include <sys/memnode.h> 40 41 #define GETTICK() gettick() 42 43 /* tick value that should be used for random values */ 44 extern u_longlong_t randtick(void); 45 46 /* 47 * Per page size free lists. Allocated dynamically. 48 */ 49 #define MAX_MEM_TYPES 2 /* 0 = reloc, 1 = noreloc */ 50 #define MTYPE_RELOC 0 51 #define MTYPE_NORELOC 1 52 53 #define PP_2_MTYPE(pp) (PP_ISNORELOC(pp) ? MTYPE_NORELOC : MTYPE_RELOC) 54 55 #define MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) \ 56 mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC; 57 58 /* mtype init for page_get_replacement_page */ 59 #define MTYPE_PGR_INIT(mtype, flags, pp, pgcnt) \ 60 mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC; 61 62 #define MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi) \ 63 pfnlo = mem_node_config[mnode].physbase; \ 64 pfnhi = mem_node_config[mnode].physmax; 65 66 /* 67 * candidate counters in vm_pagelist.c are indexed by color and range 68 */ 69 #define MAX_MNODE_MRANGES MAX_MEM_TYPES 70 #define MNODE_RANGE_CNT(mnode) MAX_MNODE_MRANGES 71 #define MNODE_MAX_MRANGE(mnode) (MAX_MEM_TYPES - 1) 72 #define MTYPE_2_MRANGE(mnode, mtype) (mtype) 73 74 /* 75 * Internal PG_ flags. 76 */ 77 #define PGI_RELOCONLY 0x10000 /* acts in the opposite sense to PG_NORELOC */ 78 #define PGI_NOCAGE 0x20000 /* indicates Cage is disabled */ 79 #define PGI_PGCPHIPRI 0x40000 /* page_get_contig_page priority allocation */ 80 #define PGI_PGCPSZC0 0x80000 /* relocate base pagesize page */ 81 82 /* 83 * PGI mtype flags - should not overlap PGI flags 84 */ 85 #define PGI_MT_RANGE 0x1000000 /* mtype range */ 86 #define PGI_MT_NEXT 0x2000000 /* get next mtype */ 87 88 extern page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES]; 89 extern page_t ***page_cachelists[MAX_MEM_TYPES]; 90 91 #define PAGE_FREELISTS(mnode, szc, color, mtype) \ 92 (*(page_freelists[szc][mtype][mnode] + (color))) 93 94 #define PAGE_CACHELISTS(mnode, color, mtype) \ 95 (*(page_cachelists[mtype][mnode] + (color))) 96 97 /* 98 * There are 'page_colors' colors/bins. Spread them out under a 99 * couple of locks. There are mutexes for both the page freelist 100 * and the page cachelist. We want enough locks to make contention 101 * reasonable, but not too many -- otherwise page_freelist_lock() gets 102 * so expensive that it becomes the bottleneck! 103 */ 104 #define NPC_MUTEX 16 105 106 extern kmutex_t *fpc_mutex[NPC_MUTEX]; 107 extern kmutex_t *cpc_mutex[NPC_MUTEX]; 108 109 /* 110 * Iterator provides the info needed to convert RA to PA. 111 * MEM_NODE_ITERATOR_INIT() should be called before 112 * PAGE_NEXT_PFN_FOR_COLOR() if pfn was not obtained via a previous 113 * PAGE_NEXT_PFN_FOR_COLOR() call. Iterator caches color 2 hash 114 * translations requiring initializer call if color or ceq_mask changes, 115 * even if pfn doesn't. MEM_NODE_ITERATOR_INIT() must also be called before 116 * PFN_2_COLOR() that uses a valid iterator argument. 117 * 118 * plat_mem_node_iterator_init() starts from last mblock in continuation 119 * case which may be invalid because memory DR. To detect this situation 120 * mi_genid is checked against mpo_genid which is incremented after a 121 * memory DR operation. See also plat_slice_add()/plat_slice_del(). 122 */ 123 #ifdef sun4v 124 125 typedef struct mem_node_iterator { 126 uint_t mi_mnode; /* mnode in which to iterate */ 127 int mi_init; /* set to 1 when first init */ 128 int mi_genid; /* set/checked against mpo_genid */ 129 int mi_last_mblock; /* last mblock visited */ 130 uint_t mi_hash_ceq_mask; /* cached copy of ceq_mask */ 131 uint_t mi_hash_color; /* cached copy of color */ 132 uint_t mi_mnode_mask; /* number of mask bits */ 133 uint_t mi_mnode_pfn_shift; /* mnode position in pfn */ 134 pfn_t mi_mblock_base; /* first valid pfn in current mblock */ 135 pfn_t mi_mblock_end; /* last valid pfn in current mblock */ 136 pfn_t mi_ra_to_pa; /* ra adjustment for current mblock */ 137 pfn_t mi_mnode_pfn_mask; /* mask to obtain mnode id bits */ 138 } mem_node_iterator_t; 139 140 #define MEM_NODE_ITERATOR_DECL(it) \ 141 mem_node_iterator_t it 142 #define MEM_NODE_ITERATOR_INIT(pfn, mnode, szc, it) \ 143 (pfn) = plat_mem_node_iterator_init((pfn), (mnode), (szc), (it), 1) 144 145 extern pfn_t plat_mem_node_iterator_init(pfn_t, int, uchar_t, 146 mem_node_iterator_t *, int); 147 extern pfn_t plat_rapfn_to_papfn(pfn_t); 148 extern int interleaved_mnodes; 149 150 #else /* sun4v */ 151 152 #define MEM_NODE_ITERATOR_DECL(it) \ 153 void *it = NULL 154 #define MEM_NODE_ITERATOR_INIT(pfn, mnode, szc, it) 155 156 #endif /* sun4v */ 157 158 /* 159 * Return the mnode limits so that hpc_counters length and base 160 * index can be determined. When interleaved_mnodes is set, we 161 * create an array only for the first mnode that exists. All other 162 * mnodes will share the array in this case. 163 * If interleaved_mnodes is not set, simply return the limits for 164 * the given mnode. 165 */ 166 #define HPM_COUNTERS_LIMITS(mnode, physbase, physmax, first) \ 167 if (!interleaved_mnodes) { \ 168 (physbase) = mem_node_config[(mnode)].physbase; \ 169 (physmax) = mem_node_config[(mnode)].physmax; \ 170 (first) = (mnode); \ 171 } else if ((first) < 0) { \ 172 mem_node_max_range(&(physbase), &(physmax)); \ 173 (first) = (mnode); \ 174 } 175 176 #define PAGE_CTRS_WRITE_LOCK(mnode) \ 177 if (!interleaved_mnodes) { \ 178 rw_enter(&page_ctrs_rwlock[(mnode)], RW_WRITER); \ 179 page_freelist_lock(mnode); \ 180 } else { \ 181 /* changing shared hpm_counters */ \ 182 int _i; \ 183 for (_i = 0; _i < max_mem_nodes; _i++) { \ 184 rw_enter(&page_ctrs_rwlock[_i], RW_WRITER); \ 185 page_freelist_lock(_i); \ 186 } \ 187 } 188 189 #define PAGE_CTRS_WRITE_UNLOCK(mnode) \ 190 if (!interleaved_mnodes) { \ 191 page_freelist_unlock(mnode); \ 192 rw_exit(&page_ctrs_rwlock[(mnode)]); \ 193 } else { \ 194 int _i; \ 195 for (_i = 0; _i < max_mem_nodes; _i++) { \ 196 page_freelist_unlock(_i); \ 197 rw_exit(&page_ctrs_rwlock[_i]); \ 198 } \ 199 } 200 201 /* 202 * cpu specific color conversion functions 203 */ 204 extern uint_t page_get_nsz_color_mask_cpu(uchar_t, uint_t); 205 #pragma weak page_get_nsz_color_mask_cpu 206 207 extern uint_t page_get_nsz_color_cpu(uchar_t, uint_t); 208 #pragma weak page_get_nsz_color_cpu 209 210 extern uint_t page_get_color_shift_cpu(uchar_t, uchar_t); 211 #pragma weak page_get_color_shift_cpu 212 213 extern uint_t page_convert_color_cpu(uint_t, uchar_t, uchar_t); 214 #pragma weak page_convert_color_cpu 215 216 extern pfn_t page_next_pfn_for_color_cpu(pfn_t, 217 uchar_t, uint_t, uint_t, uint_t, void *); 218 #pragma weak page_next_pfn_for_color_cpu 219 220 extern uint_t page_pfn_2_color_cpu(pfn_t, uchar_t, void *); 221 #pragma weak page_pfn_2_color_cpu 222 223 #define PAGE_GET_COLOR_SHIFT(szc, nszc) \ 224 ((&page_get_color_shift_cpu != NULL) ? \ 225 page_get_color_shift_cpu(szc, nszc) : \ 226 (hw_page_array[(nszc)].hp_shift - \ 227 hw_page_array[(szc)].hp_shift)) 228 229 #define PAGE_CONVERT_COLOR(ncolor, szc, nszc) \ 230 ((&page_convert_color_cpu != NULL) ? \ 231 page_convert_color_cpu(ncolor, szc, nszc) : \ 232 ((ncolor) << PAGE_GET_COLOR_SHIFT((szc), (nszc)))) 233 234 #define PFN_2_COLOR(pfn, szc, it) \ 235 ((&page_pfn_2_color_cpu != NULL) ? \ 236 page_pfn_2_color_cpu(pfn, szc, it) : \ 237 ((pfn & (hw_page_array[0].hp_colors - 1)) >> \ 238 (hw_page_array[szc].hp_shift - \ 239 hw_page_array[0].hp_shift))) 240 241 #define PNUM_SIZE(szc) \ 242 (hw_page_array[(szc)].hp_pgcnt) 243 #define PNUM_SHIFT(szc) \ 244 (hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift) 245 #define PAGE_GET_SHIFT(szc) \ 246 (hw_page_array[(szc)].hp_shift) 247 #define PAGE_GET_PAGECOLORS(szc) \ 248 (hw_page_array[(szc)].hp_colors) 249 250 /* 251 * This macro calculates the next sequential pfn with the specified 252 * color using color equivalency mask 253 */ 254 #define PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask, it) \ 255 { \ 256 ASSERT(((color) & ~(ceq_mask)) == 0); \ 257 if (&page_next_pfn_for_color_cpu == NULL) { \ 258 uint_t pfn_shift = PAGE_BSZS_SHIFT(szc); \ 259 pfn_t spfn = pfn >> pfn_shift; \ 260 pfn_t stride = (ceq_mask) + 1; \ 261 ASSERT((((ceq_mask) + 1) & (ceq_mask)) == 0); \ 262 if (((spfn ^ (color)) & (ceq_mask)) == 0) { \ 263 pfn += stride << pfn_shift; \ 264 } else { \ 265 pfn = (spfn & ~(pfn_t)(ceq_mask)) | (color); \ 266 pfn = (pfn > spfn ? pfn : pfn + stride) << \ 267 pfn_shift; \ 268 } \ 269 } else { \ 270 pfn = page_next_pfn_for_color_cpu(pfn, szc, color, \ 271 ceq_mask, color_mask, it); \ 272 } \ 273 } 274 275 /* get the color equivalency mask for the next szc */ 276 #define PAGE_GET_NSZ_MASK(szc, mask) \ 277 ((&page_get_nsz_color_mask_cpu == NULL) ? \ 278 ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) : \ 279 page_get_nsz_color_mask_cpu(szc, mask)) 280 281 /* get the color of the next szc */ 282 #define PAGE_GET_NSZ_COLOR(szc, color) \ 283 ((&page_get_nsz_color_cpu == NULL) ? \ 284 ((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) : \ 285 page_get_nsz_color_cpu(szc, color)) 286 287 /* Find the bin for the given page if it was of size szc */ 288 #define PP_2_BIN_SZC(pp, szc) (PFN_2_COLOR(pp->p_pagenum, szc, (void *)(-1))) 289 290 #define PP_2_BIN(pp) (PP_2_BIN_SZC(pp, pp->p_szc)) 291 292 #define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum)) 293 294 #define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \ 295 &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \ 296 &cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) 297 298 #define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode]) 299 #define CPC_MUTEX(mnode, i) (&cpc_mutex[i][mnode]) 300 301 #define PFN_BASE(pfnum, szc) (pfnum & ~((1 << PAGE_BSZS_SHIFT(szc)) - 1)) 302 303 /* 304 * this structure is used for walking free page lists 305 * controls when to split large pages into smaller pages, 306 * and when to coalesce smaller pages into larger pages 307 */ 308 typedef struct page_list_walker { 309 uint_t plw_colors; /* num of colors for szc */ 310 uint_t plw_color_mask; /* colors-1 */ 311 uint_t plw_bin_step; /* next bin: 1 or 2 */ 312 uint_t plw_count; /* loop count */ 313 uint_t plw_bin0; /* starting bin */ 314 uint_t plw_bin_marker; /* bin after initial jump */ 315 uint_t plw_bin_split_prev; /* last bin we tried to split */ 316 uint_t plw_do_split; /* set if OK to split */ 317 uint_t plw_split_next; /* next bin to split */ 318 uint_t plw_ceq_dif; /* number of different color groups */ 319 /* to check */ 320 uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */ 321 uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */ 322 } page_list_walker_t; 323 324 void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin, 325 int can_split, int use_ceq, page_list_walker_t *plw); 326 327 typedef char hpmctr_t; 328 329 #ifdef DEBUG 330 #define CHK_LPG(pp, szc) chk_lpg(pp, szc) 331 extern void chk_lpg(page_t *, uchar_t); 332 #else 333 #define CHK_LPG(pp, szc) 334 #endif 335 336 /* 337 * page list count per mnode and type. 338 */ 339 typedef struct { 340 pgcnt_t plc_mt_pgmax; /* max page cnt */ 341 pgcnt_t plc_mt_clpgcnt; /* cache list cnt */ 342 pgcnt_t plc_mt_flpgcnt; /* free list cnt - small pages */ 343 pgcnt_t plc_mt_lgpgcnt; /* free list cnt - large pages */ 344 #ifdef DEBUG 345 struct { 346 pgcnt_t plc_mts_pgcnt; /* per page size count */ 347 int plc_mts_colors; 348 pgcnt_t *plc_mtsc_pgcnt; /* per color bin count */ 349 } plc_mts[MMU_PAGE_SIZES]; 350 #endif 351 } plcnt_t[MAX_MEM_NODES][MAX_MEM_TYPES]; 352 353 #ifdef DEBUG 354 355 #define PLCNT_SZ(ctrs_sz) { \ 356 int szc; \ 357 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 358 int colors = page_get_pagecolors(szc); \ 359 ctrs_sz += (max_mem_nodes * MAX_MEM_TYPES * \ 360 colors * sizeof (pgcnt_t)); \ 361 } \ 362 } 363 364 #define PLCNT_INIT(base) { \ 365 int mn, mt, szc, colors; \ 366 for (szc = 0; szc < mmu_page_sizes; szc++) { \ 367 colors = page_get_pagecolors(szc); \ 368 for (mn = 0; mn < max_mem_nodes; mn++) { \ 369 for (mt = 0; mt < MAX_MEM_TYPES; mt++) { \ 370 plcnt[mn][mt].plc_mts[szc]. \ 371 plc_mts_colors = colors; \ 372 plcnt[mn][mt].plc_mts[szc]. \ 373 plc_mtsc_pgcnt = (pgcnt_t *)base; \ 374 base += (colors * sizeof (pgcnt_t)); \ 375 } \ 376 } \ 377 } \ 378 } 379 380 #define PLCNT_DO(pp, mn, mtype, szc, cnt, flags) { \ 381 int bin = PP_2_BIN(pp); \ 382 if (flags & PG_CACHE_LIST) \ 383 atomic_add_long(&plcnt[mn][mtype].plc_mt_clpgcnt, cnt); \ 384 else if (szc) \ 385 atomic_add_long(&plcnt[mn][mtype].plc_mt_lgpgcnt, cnt); \ 386 else \ 387 atomic_add_long(&plcnt[mn][mtype].plc_mt_flpgcnt, cnt); \ 388 atomic_add_long(&plcnt[mn][mtype].plc_mts[szc].plc_mts_pgcnt, \ 389 cnt); \ 390 atomic_add_long(&plcnt[mn][mtype].plc_mts[szc]. \ 391 plc_mtsc_pgcnt[bin], cnt); \ 392 } 393 394 #else 395 396 #define PLCNT_SZ(ctrs_sz) 397 398 #define PLCNT_INIT(base) 399 400 /* PG_FREE_LIST may not be explicitly set in flags for large pages */ 401 402 #define PLCNT_DO(pp, mn, mtype, szc, cnt, flags) { \ 403 if (flags & PG_CACHE_LIST) \ 404 atomic_add_long(&plcnt[mn][mtype].plc_mt_clpgcnt, cnt); \ 405 else if (szc) \ 406 atomic_add_long(&plcnt[mn][mtype].plc_mt_lgpgcnt, cnt); \ 407 else \ 408 atomic_add_long(&plcnt[mn][mtype].plc_mt_flpgcnt, cnt); \ 409 } 410 411 #endif 412 413 #define PLCNT_INCR(pp, mn, mtype, szc, flags) { \ 414 long cnt = (1 << PAGE_BSZS_SHIFT(szc)); \ 415 PLCNT_DO(pp, mn, mtype, szc, cnt, flags); \ 416 } 417 418 #define PLCNT_DECR(pp, mn, mtype, szc, flags) { \ 419 long cnt = ((-1) << PAGE_BSZS_SHIFT(szc)); \ 420 PLCNT_DO(pp, mn, mtype, szc, cnt, flags); \ 421 } 422 423 /* 424 * macros to update page list max counts - done when pages transferred 425 * from RELOC to NORELOC mtype (kcage_init or kcage_assimilate_page). 426 */ 427 428 #define PLCNT_XFER_NORELOC(pp) { \ 429 long cnt = (1 << PAGE_BSZS_SHIFT((pp)->p_szc)); \ 430 int mn = PP_2_MEM_NODE(pp); \ 431 atomic_add_long(&plcnt[mn][MTYPE_NORELOC].plc_mt_pgmax, cnt); \ 432 atomic_add_long(&plcnt[mn][MTYPE_RELOC].plc_mt_pgmax, -cnt); \ 433 } 434 435 /* 436 * macro to modify the page list max counts when memory is added to 437 * the page lists during startup (add_physmem) or during a DR operation 438 * when memory is added (kphysm_add_memory_dynamic) or deleted 439 * (kphysm_del_cleanup). 440 */ 441 #define PLCNT_MODIFY_MAX(pfn, cnt) { \ 442 spgcnt_t _cnt = (spgcnt_t)(cnt); \ 443 pgcnt_t _acnt = ABS(_cnt); \ 444 int _mn; \ 445 pgcnt_t _np; \ 446 if (&plat_mem_node_intersect_range != NULL) { \ 447 for (_mn = 0; _mn < max_mem_nodes; _mn++) { \ 448 plat_mem_node_intersect_range((pfn), _acnt, _mn, &_np);\ 449 if (_np == 0) \ 450 continue; \ 451 atomic_add_long(&plcnt[_mn][MTYPE_RELOC].plc_mt_pgmax, \ 452 (_cnt < 0) ? -_np : _np); \ 453 } \ 454 } else { \ 455 pfn_t _pfn = (pfn); \ 456 pfn_t _endpfn = _pfn + _acnt; \ 457 while (_pfn < _endpfn) { \ 458 _mn = PFN_2_MEM_NODE(_pfn); \ 459 _np = MIN(_endpfn, mem_node_config[_mn].physmax + 1) - \ 460 _pfn; \ 461 _pfn += _np; \ 462 atomic_add_long(&plcnt[_mn][MTYPE_RELOC].plc_mt_pgmax, \ 463 (_cnt < 0) ? -_np : _np); \ 464 } \ 465 } \ 466 } 467 468 /* 469 * macro to call page_ctrs_adjust() when memory is added 470 * during a DR operation. 471 */ 472 #define PAGE_CTRS_ADJUST(pfn, cnt, rv) { \ 473 spgcnt_t _cnt = (spgcnt_t)(cnt); \ 474 int _mn; \ 475 pgcnt_t _np; \ 476 rv = 0; \ 477 if (&plat_mem_node_intersect_range != NULL) { \ 478 for (_mn = 0; _mn < max_mem_nodes; _mn++) { \ 479 plat_mem_node_intersect_range((pfn), _cnt, _mn, &_np); \ 480 if (_np == 0) \ 481 continue; \ 482 if ((rv = page_ctrs_adjust(_mn)) != 0) \ 483 break; \ 484 } \ 485 } else { \ 486 pfn_t _pfn = (pfn); \ 487 pfn_t _endpfn = _pfn + _cnt; \ 488 while (_pfn < _endpfn) { \ 489 _mn = PFN_2_MEM_NODE(_pfn); \ 490 _np = MIN(_endpfn, mem_node_config[_mn].physmax + 1) - \ 491 _pfn; \ 492 _pfn += _np; \ 493 if ((rv = page_ctrs_adjust(_mn)) != 0) \ 494 break; \ 495 } \ 496 } \ 497 } 498 499 extern plcnt_t plcnt; 500 501 #define MNODE_PGCNT(mn) \ 502 (plcnt[mn][MTYPE_RELOC].plc_mt_clpgcnt + \ 503 plcnt[mn][MTYPE_NORELOC].plc_mt_clpgcnt + \ 504 plcnt[mn][MTYPE_RELOC].plc_mt_flpgcnt + \ 505 plcnt[mn][MTYPE_NORELOC].plc_mt_flpgcnt + \ 506 plcnt[mn][MTYPE_RELOC].plc_mt_lgpgcnt + \ 507 plcnt[mn][MTYPE_NORELOC].plc_mt_lgpgcnt) 508 509 #define MNODETYPE_PGCNT(mn, mtype) \ 510 (plcnt[mn][mtype].plc_mt_clpgcnt + \ 511 plcnt[mn][mtype].plc_mt_flpgcnt + \ 512 plcnt[mn][mtype].plc_mt_lgpgcnt) 513 514 /* 515 * macros to loop through the mtype range - MTYPE_START returns -1 in 516 * mtype if no pages in mnode/mtype and possibly NEXT mtype. 517 */ 518 #define MTYPE_START(mnode, mtype, flags) { \ 519 if (plcnt[mnode][mtype].plc_mt_pgmax == 0) { \ 520 ASSERT(mtype == MTYPE_RELOC || \ 521 MNODETYPE_PGCNT(mnode, mtype) == 0 || \ 522 plcnt[mnode][mtype].plc_mt_pgmax != 0); \ 523 MTYPE_NEXT(mnode, mtype, flags); \ 524 } \ 525 } 526 527 /* 528 * if allocation from the RELOC pool failed and there is sufficient cage 529 * memory, attempt to allocate from the NORELOC pool. 530 */ 531 #define MTYPE_NEXT(mnode, mtype, flags) { \ 532 if (!(flags & (PG_NORELOC | PGI_NOCAGE | PGI_RELOCONLY)) && \ 533 (kcage_freemem >= kcage_lotsfree)) { \ 534 if (plcnt[mnode][MTYPE_NORELOC].plc_mt_pgmax == 0) { \ 535 ASSERT(MNODETYPE_PGCNT(mnode, MTYPE_NORELOC) == 0 || \ 536 plcnt[mnode][MTYPE_NORELOC].plc_mt_pgmax != 0); \ 537 mtype = -1; \ 538 } else { \ 539 mtype = MTYPE_NORELOC; \ 540 flags |= PG_NORELOC; \ 541 } \ 542 } else { \ 543 mtype = -1; \ 544 } \ 545 } 546 547 /* 548 * get the ecache setsize for the current cpu. 549 */ 550 #define CPUSETSIZE() (cpunodes[CPU->cpu_id].ecache_setsize) 551 552 extern struct cpu cpu0; 553 #define CPU0 &cpu0 554 555 #define PAGE_BSZS_SHIFT(szc) TTE_BSZS_SHIFT(szc) 556 /* 557 * For sfmmu each larger page is 8 times the size of the previous 558 * size page. 559 */ 560 #define FULL_REGION_CNT(rg_szc) (8) 561 562 /* 563 * The counter base must be per page_counter element to prevent 564 * races when re-indexing, and the base page size element should 565 * be aligned on a boundary of the given region size. 566 * 567 * We also round up the number of pages spanned by the counters 568 * for a given region to PC_BASE_ALIGN in certain situations to simplify 569 * the coding for some non-performance critical routines. 570 */ 571 #define PC_BASE_ALIGN ((pfn_t)1 << PAGE_BSZS_SHIFT(mmu_page_sizes-1)) 572 #define PC_BASE_ALIGN_MASK (PC_BASE_ALIGN - 1) 573 574 extern int ecache_alignsize; 575 #define L2CACHE_ALIGN ecache_alignsize 576 #define L2CACHE_ALIGN_MAX 512 577 578 extern int update_proc_pgcolorbase_after_fork; 579 extern int consistent_coloring; 580 extern uint_t vac_colors_mask; 581 extern int vac_size; 582 extern int vac_shift; 583 584 /* 585 * Kernel mem segment in 64-bit space 586 */ 587 extern caddr_t kmem64_base, kmem64_end, kmem64_aligned_end; 588 extern int kmem64_alignsize, kmem64_szc; 589 extern uint64_t kmem64_pabase; 590 extern int max_bootlp_tteszc; 591 592 /* 593 * Maximum and default values for user heap, stack, private and shared 594 * anonymous memory, and user text and initialized data. 595 * 596 * Initial values are defined in architecture specific mach_vm_dep.c file. 597 * Used by map_pgsz*() routines. 598 */ 599 extern size_t max_uheap_lpsize; 600 extern size_t default_uheap_lpsize; 601 extern size_t max_ustack_lpsize; 602 extern size_t default_ustack_lpsize; 603 extern size_t max_privmap_lpsize; 604 extern size_t max_uidata_lpsize; 605 extern size_t max_utext_lpsize; 606 extern size_t max_shm_lpsize; 607 608 /* 609 * For adjusting the default lpsize, for DTLB-limited page sizes. 610 */ 611 extern void adjust_data_maxlpsize(size_t ismpagesize); 612 613 /* 614 * Sanity control. Don't use large pages regardless of user 615 * settings if there's less than priv or shm_lpg_min_physmem memory installed. 616 * The units for this variable are 8K pages. 617 */ 618 extern pgcnt_t privm_lpg_min_physmem; 619 extern pgcnt_t shm_lpg_min_physmem; 620 621 /* 622 * AS_2_BIN macro controls the page coloring policy. 623 * 0 (default) uses various vaddr bits 624 * 1 virtual=paddr 625 * 2 bin hopping 626 */ 627 #define AS_2_BIN(as, seg, vp, addr, bin, szc) \ 628 switch (consistent_coloring) { \ 629 default: \ 630 cmn_err(CE_WARN, \ 631 "AS_2_BIN: bad consistent coloring value"); \ 632 /* assume default algorithm -> continue */ \ 633 case 0: { \ 634 uint32_t ndx, new; \ 635 int slew = 0; \ 636 pfn_t pfn; \ 637 \ 638 if (vp != NULL && IS_SWAPVP(vp) && \ 639 seg->s_ops == &segvn_ops) \ 640 slew = as_color_bin(as); \ 641 \ 642 pfn = ((uintptr_t)addr >> MMU_PAGESHIFT) + \ 643 (((uintptr_t)addr >> page_coloring_shift) << \ 644 (vac_shift - MMU_PAGESHIFT)); \ 645 if ((szc) == 0 || &page_pfn_2_color_cpu == NULL) { \ 646 pfn += slew; \ 647 bin = PFN_2_COLOR(pfn, szc, NULL); \ 648 } else { \ 649 bin = PFN_2_COLOR(pfn, szc, NULL); \ 650 bin += slew >> (vac_shift - MMU_PAGESHIFT); \ 651 bin &= hw_page_array[(szc)].hp_colors - 1; \ 652 } \ 653 break; \ 654 } \ 655 case 1: \ 656 bin = PFN_2_COLOR(((uintptr_t)addr >> MMU_PAGESHIFT), \ 657 szc, NULL); \ 658 break; \ 659 case 2: { \ 660 int cnt = as_color_bin(as); \ 661 uint_t color_mask = page_get_pagecolors(0) - 1; \ 662 \ 663 /* make sure physical color aligns with vac color */ \ 664 while ((cnt & vac_colors_mask) != \ 665 addr_to_vcolor(addr)) { \ 666 cnt++; \ 667 } \ 668 bin = cnt = cnt & color_mask; \ 669 bin >>= PAGE_GET_COLOR_SHIFT(0, szc); \ 670 /* update per as page coloring fields */ \ 671 cnt = (cnt + 1) & color_mask; \ 672 if (cnt == (as_color_start(as) & color_mask)) { \ 673 cnt = as_color_start(as) = as_color_start(as) + \ 674 PGCLR_LOOPFACTOR; \ 675 } \ 676 as_color_bin(as) = cnt & color_mask; \ 677 break; \ 678 } \ 679 } \ 680 ASSERT(bin < page_get_pagecolors(szc)); 681 682 /* 683 * cpu private vm data - accessed thru CPU->cpu_vm_data 684 * vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock() 685 * vc_pnext_memseg: tracks last memseg visited in page_nextn() 686 * vc_kmptr: unaligned kmem pointer for this vm_cpu_data_t 687 * vc_kmsize: orignal kmem size for this vm_cpu_data_t 688 */ 689 690 typedef struct { 691 struct memseg *vc_pnum_memseg; 692 struct memseg *vc_pnext_memseg; 693 void *vc_kmptr; 694 size_t vc_kmsize; 695 } vm_cpu_data_t; 696 697 /* allocation size to ensure vm_cpu_data_t resides in its own cache line */ 698 #define VM_CPU_DATA_PADSIZE \ 699 (P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX)) 700 701 /* 702 * Function to get an ecache color bin: F(as, cnt, vcolor). 703 * the goal of this function is to: 704 * - to spread a processes' physical pages across the entire ecache to 705 * maximize its use. 706 * - to minimize vac flushes caused when we reuse a physical page on a 707 * different vac color than it was previously used. 708 * - to prevent all processes to use the same exact colors and trash each 709 * other. 710 * 711 * cnt is a bin ptr kept on a per as basis. As we page_create we increment 712 * the ptr so we spread out the physical pages to cover the entire ecache. 713 * The virtual color is made a subset of the physical color in order to 714 * in minimize virtual cache flushing. 715 * We add in the as to spread out different as. This happens when we 716 * initialize the start count value. 717 * sizeof(struct as) is 60 so we shift by 3 to get into the bit range 718 * that will tend to change. For example, on spitfire based machines 719 * (vcshft == 1) contigous as are spread bu ~6 bins. 720 * vcshft provides for proper virtual color alignment. 721 * In theory cnt should be updated using cas only but if we are off by one 722 * or 2 it is no big deal. 723 * We also keep a start value which is used to randomize on what bin we 724 * start counting when it is time to start another loop. This avoids 725 * contigous allocations of ecache size to point to the same bin. 726 * Why 3? Seems work ok. Better than 7 or anything larger. 727 */ 728 #define PGCLR_LOOPFACTOR 3 729 730 /* 731 * When a bin is empty, and we can't satisfy a color request correctly, 732 * we scan. If we assume that the programs have reasonable spatial 733 * behavior, then it will not be a good idea to use the adjacent color. 734 * Using the adjacent color would result in virtually adjacent addresses 735 * mapping into the same spot in the cache. So, if we stumble across 736 * an empty bin, skip a bunch before looking. After the first skip, 737 * then just look one bin at a time so we don't miss our cache on 738 * every look. Be sure to check every bin. Page_create() will panic 739 * if we miss a page. 740 * 741 * This also explains the `<=' in the for loops in both page_get_freelist() 742 * and page_get_cachelist(). Since we checked the target bin, skipped 743 * a bunch, then continued one a time, we wind up checking the target bin 744 * twice to make sure we get all of them bins. 745 */ 746 #define BIN_STEP 20 747 748 #ifdef VM_STATS 749 struct vmm_vmstats_str { 750 ulong_t pgf_alloc[MMU_PAGE_SIZES]; /* page_get_freelist */ 751 ulong_t pgf_allocok[MMU_PAGE_SIZES]; 752 ulong_t pgf_allocokrem[MMU_PAGE_SIZES]; 753 ulong_t pgf_allocfailed[MMU_PAGE_SIZES]; 754 ulong_t pgf_allocdeferred; 755 ulong_t pgf_allocretry[MMU_PAGE_SIZES]; 756 ulong_t pgc_alloc; /* page_get_cachelist */ 757 ulong_t pgc_allocok; 758 ulong_t pgc_allocokrem; 759 ulong_t pgc_allocokdeferred; 760 ulong_t pgc_allocfailed; 761 ulong_t pgcp_alloc[MMU_PAGE_SIZES]; /* page_get_contig_pages */ 762 ulong_t pgcp_allocfailed[MMU_PAGE_SIZES]; 763 ulong_t pgcp_allocempty[MMU_PAGE_SIZES]; 764 ulong_t pgcp_allocok[MMU_PAGE_SIZES]; 765 ulong_t ptcp[MMU_PAGE_SIZES]; /* page_trylock_contig_pages */ 766 ulong_t ptcpfreethresh[MMU_PAGE_SIZES]; 767 ulong_t ptcpfailexcl[MMU_PAGE_SIZES]; 768 ulong_t ptcpfailszc[MMU_PAGE_SIZES]; 769 ulong_t ptcpfailcage[MMU_PAGE_SIZES]; 770 ulong_t ptcpok[MMU_PAGE_SIZES]; 771 ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */ 772 ulong_t pgmf_allocfailed[MMU_PAGE_SIZES]; 773 ulong_t pgmf_allocempty[MMU_PAGE_SIZES]; 774 ulong_t pgmf_allocok[MMU_PAGE_SIZES]; 775 ulong_t pgmc_alloc; /* page_get_mnode_cachelist */ 776 ulong_t pgmc_allocfailed; 777 ulong_t pgmc_allocempty; 778 ulong_t pgmc_allocok; 779 ulong_t pladd_free[MMU_PAGE_SIZES]; /* page_list_add/sub */ 780 ulong_t plsub_free[MMU_PAGE_SIZES]; 781 ulong_t pladd_cache; 782 ulong_t plsub_cache; 783 ulong_t plsubpages_szcbig; 784 ulong_t plsubpages_szc0; 785 ulong_t pfs_req[MMU_PAGE_SIZES]; /* page_freelist_split */ 786 ulong_t pfs_demote[MMU_PAGE_SIZES]; 787 ulong_t pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 788 ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */ 789 ulong_t ppr_relocok[MMU_PAGE_SIZES]; 790 ulong_t ppr_relocnoroot[MMU_PAGE_SIZES]; 791 ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES]; 792 ulong_t ppr_relocnolock[MMU_PAGE_SIZES]; 793 ulong_t ppr_relocnomem[MMU_PAGE_SIZES]; 794 ulong_t ppr_krelocfail[MMU_PAGE_SIZES]; 795 ulong_t ppr_copyfail; 796 /* page coalesce counter */ 797 ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 798 /* candidates useful */ 799 ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 800 /* ctrs changed after locking */ 801 ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 802 /* page_freelist_coalesce failed */ 803 ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 804 ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */ 805 ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */ 806 }; 807 extern struct vmm_vmstats_str vmm_vmstats; 808 #endif /* VM_STATS */ 809 810 /* 811 * Used to hold off page relocations into the cage until OBP has completed 812 * its boot-time handoff of its resources to the kernel. 813 */ 814 extern int page_relocate_ready; 815 816 /* 817 * cpu/mmu-dependent vm variables may be reset at bootup. 818 */ 819 extern uint_t mmu_page_sizes; 820 extern uint_t max_mmu_page_sizes; 821 extern uint_t mmu_hashcnt; 822 extern uint_t max_mmu_hashcnt; 823 extern size_t mmu_ism_pagesize; 824 extern int mmu_exported_pagesize_mask; 825 extern uint_t mmu_exported_page_sizes; 826 extern uint_t szc_2_userszc[]; 827 extern uint_t userszc_2_szc[]; 828 829 #define mmu_legacy_page_sizes mmu_exported_page_sizes 830 #define USERSZC_2_SZC(userszc) (userszc_2_szc[userszc]) 831 #define SZC_2_USERSZC(szc) (szc_2_userszc[szc]) 832 833 /* 834 * Platform specific page routines 835 */ 836 extern void mach_page_add(page_t **, page_t *); 837 extern void mach_page_sub(page_t **, page_t *); 838 extern uint_t page_get_pagecolors(uint_t); 839 extern void ppcopy_kernel__relocatable(page_t *, page_t *); 840 #define ppcopy_kernel(p1, p2) ppcopy_kernel__relocatable(p1, p2) 841 842 /* 843 * platform specific large pages for kernel heap support 844 */ 845 extern size_t get_segkmem_lpsize(size_t lpsize); 846 extern size_t mmu_get_kernel_lpsize(size_t lpsize); 847 extern void mmu_init_kernel_pgsz(struct hat *hat); 848 extern void mmu_init_kcontext(); 849 extern uint64_t kcontextreg; 850 851 /* 852 * Nucleus data page allocator routines 853 */ 854 extern void ndata_alloc_init(struct memlist *, uintptr_t, uintptr_t); 855 extern void *ndata_alloc(struct memlist *, size_t, size_t); 856 extern void *ndata_extra_base(struct memlist *, size_t, caddr_t); 857 extern size_t ndata_maxsize(struct memlist *); 858 extern size_t ndata_spare(struct memlist *, size_t, size_t); 859 860 #ifdef __cplusplus 861 } 862 #endif 863 864 #endif /* _VM_DEP_H */ 865