17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ee88d2b9Skchow * Common Development and Distribution License (the "License"). 6ee88d2b9Skchow * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 2256f33205SJonathan Adams * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #include <sys/types.h> 277c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 287c478bd9Sstevel@tonic-gate #include <sys/vmem.h> 297c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 307c478bd9Sstevel@tonic-gate #include <sys/systm.h> 317c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> /* for page_freelist_coalesce() */ 327c478bd9Sstevel@tonic-gate #include <sys/errno.h> 337c478bd9Sstevel@tonic-gate #include <sys/memnode.h> 347c478bd9Sstevel@tonic-gate #include <sys/memlist.h> 357c478bd9Sstevel@tonic-gate #include <sys/memlist_impl.h> 367c478bd9Sstevel@tonic-gate #include <sys/tuneable.h> 377c478bd9Sstevel@tonic-gate #include <sys/proc.h> 387c478bd9Sstevel@tonic-gate #include <sys/disp.h> 397c478bd9Sstevel@tonic-gate #include <sys/debug.h> 407c478bd9Sstevel@tonic-gate #include <sys/vm.h> 417c478bd9Sstevel@tonic-gate #include <sys/callb.h> 427c478bd9Sstevel@tonic-gate #include <sys/memlist_plat.h> /* for installed_top_size() */ 437c478bd9Sstevel@tonic-gate #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */ 447c478bd9Sstevel@tonic-gate #include <sys/dumphdr.h> /* for dump_resize() */ 457c478bd9Sstevel@tonic-gate #include <sys/atomic.h> /* for use in stats collection */ 467c478bd9Sstevel@tonic-gate #include <sys/rwlock.h> 477c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 487c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 497c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h> 507c478bd9Sstevel@tonic-gate #include <vm/page.h> 51e21bae1bSkchow #include <vm/vm_dep.h> 527c478bd9Sstevel@tonic-gate #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */ 537c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 547c478bd9Sstevel@tonic-gate #include <sys/mem_config.h> 557c478bd9Sstevel@tonic-gate #include <sys/mem_cage.h> 567c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 577c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 587c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate extern struct memlist *phys_avail; 617c478bd9Sstevel@tonic-gate 627c478bd9Sstevel@tonic-gate extern uint_t page_ctrs_adjust(int); 63af4c679fSSean McEnroe void page_ctrs_cleanup(void); 647c478bd9Sstevel@tonic-gate static void kphysm_setup_post_add(pgcnt_t); 657c478bd9Sstevel@tonic-gate static int kphysm_setup_pre_del(pgcnt_t); 667c478bd9Sstevel@tonic-gate static void kphysm_setup_post_del(pgcnt_t, int); 677c478bd9Sstevel@tonic-gate 687c478bd9Sstevel@tonic-gate static int kphysm_split_memseg(pfn_t base, pgcnt_t npgs); 697c478bd9Sstevel@tonic-gate 707c478bd9Sstevel@tonic-gate static int delspan_reserve(pfn_t, pgcnt_t); 717c478bd9Sstevel@tonic-gate static void delspan_unreserve(pfn_t, pgcnt_t); 727c478bd9Sstevel@tonic-gate 739853d9e8SJason Beloro kmutex_t memseg_lists_lock; 749853d9e8SJason Beloro struct memseg *memseg_va_avail; 759853d9e8SJason Beloro struct memseg *memseg_alloc(void); 767c478bd9Sstevel@tonic-gate static struct memseg *memseg_delete_junk; 777c478bd9Sstevel@tonic-gate static struct memseg *memseg_edit_junk; 787c478bd9Sstevel@tonic-gate void memseg_remap_init(void); 799853d9e8SJason Beloro static void memseg_remap_to_dummy(struct memseg *); 807c478bd9Sstevel@tonic-gate static void kphysm_addmem_error_undospan(pfn_t, pgcnt_t); 817c478bd9Sstevel@tonic-gate static struct memseg *memseg_reuse(pgcnt_t); 827c478bd9Sstevel@tonic-gate 837c478bd9Sstevel@tonic-gate static struct kmem_cache *memseg_cache; 847c478bd9Sstevel@tonic-gate 857c478bd9Sstevel@tonic-gate /* 869853d9e8SJason Beloro * Interfaces to manage externally allocated 879853d9e8SJason Beloro * page_t memory (metadata) for a memseg. 889853d9e8SJason Beloro */ 899853d9e8SJason Beloro #pragma weak memseg_alloc_meta 909853d9e8SJason Beloro #pragma weak memseg_free_meta 919853d9e8SJason Beloro #pragma weak memseg_get_metapfn 929853d9e8SJason Beloro #pragma weak memseg_remap_meta 939853d9e8SJason Beloro 949853d9e8SJason Beloro extern int ppvm_enable; 959853d9e8SJason Beloro extern page_t *ppvm_base; 969853d9e8SJason Beloro extern int memseg_alloc_meta(pfn_t, pgcnt_t, void **, pgcnt_t *); 979853d9e8SJason Beloro extern void memseg_free_meta(void *, pgcnt_t); 989853d9e8SJason Beloro extern pfn_t memseg_get_metapfn(void *, pgcnt_t); 999853d9e8SJason Beloro extern void memseg_remap_meta(struct memseg *); 1009853d9e8SJason Beloro static int memseg_is_dynamic(struct memseg *); 1019853d9e8SJason Beloro static int memseg_includes_meta(struct memseg *); 102af4c679fSSean McEnroe pfn_t memseg_get_start(struct memseg *); 1039853d9e8SJason Beloro static void memseg_cpu_vm_flush(void); 1049853d9e8SJason Beloro 1059853d9e8SJason Beloro int meta_alloc_enable; 1069853d9e8SJason Beloro 107a3114836SGerry Liu #ifdef DEBUG 108a3114836SGerry Liu static int memseg_debug; 109a3114836SGerry Liu #define MEMSEG_DEBUG(args...) if (memseg_debug) printf(args) 110a3114836SGerry Liu #else 111a3114836SGerry Liu #define MEMSEG_DEBUG(...) 112a3114836SGerry Liu #endif 113a3114836SGerry Liu 1149853d9e8SJason Beloro /* 1159853d9e8SJason Beloro * Add a chunk of memory to the system. 1167c478bd9Sstevel@tonic-gate * base: starting PAGESIZE page of new memory. 1177c478bd9Sstevel@tonic-gate * npgs: length in PAGESIZE pages. 1187c478bd9Sstevel@tonic-gate * 1197c478bd9Sstevel@tonic-gate * Adding mem this way doesn't increase the size of the hash tables; 1207c478bd9Sstevel@tonic-gate * growing them would be too hard. This should be OK, but adding memory 1217c478bd9Sstevel@tonic-gate * dynamically most likely means more hash misses, since the tables will 1227c478bd9Sstevel@tonic-gate * be smaller than they otherwise would be. 1237c478bd9Sstevel@tonic-gate */ 1247c478bd9Sstevel@tonic-gate int 1257c478bd9Sstevel@tonic-gate kphysm_add_memory_dynamic(pfn_t base, pgcnt_t npgs) 1267c478bd9Sstevel@tonic-gate { 1277c478bd9Sstevel@tonic-gate page_t *pp; 1289853d9e8SJason Beloro page_t *opp, *oepp, *segpp; 1297c478bd9Sstevel@tonic-gate struct memseg *seg; 1307c478bd9Sstevel@tonic-gate uint64_t avmem; 1317c478bd9Sstevel@tonic-gate pfn_t pfn; 1327c478bd9Sstevel@tonic-gate pfn_t pt_base = base; 1337c478bd9Sstevel@tonic-gate pgcnt_t tpgs = npgs; 1349853d9e8SJason Beloro pgcnt_t metapgs = 0; 1357c478bd9Sstevel@tonic-gate int exhausted; 1367c478bd9Sstevel@tonic-gate pfn_t pnum; 1377c478bd9Sstevel@tonic-gate int mnode; 1387c478bd9Sstevel@tonic-gate caddr_t vaddr; 1397c478bd9Sstevel@tonic-gate int reuse; 1407c478bd9Sstevel@tonic-gate int mlret; 1419853d9e8SJason Beloro int rv; 1429853d9e8SJason Beloro int flags; 1439853d9e8SJason Beloro int meta_alloc = 0; 1447c478bd9Sstevel@tonic-gate void *mapva; 1459853d9e8SJason Beloro void *metabase = (void *)base; 1467c478bd9Sstevel@tonic-gate pgcnt_t nkpmpgs = 0; 1477c478bd9Sstevel@tonic-gate offset_t kpm_pages_off; 1487c478bd9Sstevel@tonic-gate 1497c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, 1507c478bd9Sstevel@tonic-gate "?kphysm_add_memory_dynamic: adding %ldK at 0x%" PRIx64 "\n", 1517c478bd9Sstevel@tonic-gate npgs << (PAGESHIFT - 10), (uint64_t)base << PAGESHIFT); 1527c478bd9Sstevel@tonic-gate 1537c478bd9Sstevel@tonic-gate /* 1547c478bd9Sstevel@tonic-gate * Add this span in the delete list to prevent interactions. 1557c478bd9Sstevel@tonic-gate */ 1567c478bd9Sstevel@tonic-gate if (!delspan_reserve(base, npgs)) { 1577c478bd9Sstevel@tonic-gate return (KPHYSM_ESPAN); 1587c478bd9Sstevel@tonic-gate } 1597c478bd9Sstevel@tonic-gate /* 1607c478bd9Sstevel@tonic-gate * Check to see if any of the memory span has been added 1617c478bd9Sstevel@tonic-gate * by trying an add to the installed memory list. This 1627c478bd9Sstevel@tonic-gate * forms the interlocking process for add. 1637c478bd9Sstevel@tonic-gate */ 1647c478bd9Sstevel@tonic-gate 1657c478bd9Sstevel@tonic-gate memlist_write_lock(); 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(pt_base) << PAGESHIFT, 1687c478bd9Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install); 1697c478bd9Sstevel@tonic-gate 1707c478bd9Sstevel@tonic-gate if (mlret == MEML_SPANOP_OK) 1717c478bd9Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 1727c478bd9Sstevel@tonic-gate 1737c478bd9Sstevel@tonic-gate memlist_write_unlock(); 1747c478bd9Sstevel@tonic-gate 1757c478bd9Sstevel@tonic-gate if (mlret != MEML_SPANOP_OK) { 1767c478bd9Sstevel@tonic-gate if (mlret == MEML_SPANOP_EALLOC) { 1777c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 1787c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 1799853d9e8SJason Beloro } else if (mlret == MEML_SPANOP_ESPAN) { 1807c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 1817c478bd9Sstevel@tonic-gate return (KPHYSM_ESPAN); 1827c478bd9Sstevel@tonic-gate } else { 1837c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 1847c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 1857c478bd9Sstevel@tonic-gate } 1867c478bd9Sstevel@tonic-gate } 1877c478bd9Sstevel@tonic-gate 1889853d9e8SJason Beloro if (meta_alloc_enable) { 1899853d9e8SJason Beloro /* 1909853d9e8SJason Beloro * Allocate the page_t's from existing memory; 1919853d9e8SJason Beloro * if that fails, allocate from the incoming memory. 1929853d9e8SJason Beloro */ 1939853d9e8SJason Beloro rv = memseg_alloc_meta(base, npgs, &metabase, &metapgs); 1949853d9e8SJason Beloro if (rv == KPHYSM_OK) { 1959853d9e8SJason Beloro ASSERT(metapgs); 1969853d9e8SJason Beloro ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs); 1979853d9e8SJason Beloro meta_alloc = 1; 1989853d9e8SJason Beloro goto mapalloc; 1999853d9e8SJason Beloro } 2009853d9e8SJason Beloro } 2019853d9e8SJason Beloro 2027c478bd9Sstevel@tonic-gate /* 2037c478bd9Sstevel@tonic-gate * We store the page_t's for this new memory in the first 2047c478bd9Sstevel@tonic-gate * few pages of the chunk. Here, we go and get'em ... 2057c478bd9Sstevel@tonic-gate */ 2067c478bd9Sstevel@tonic-gate 2077c478bd9Sstevel@tonic-gate /* 2087c478bd9Sstevel@tonic-gate * The expression after the '-' gives the number of pages 2097c478bd9Sstevel@tonic-gate * that will fit in the new memory based on a requirement 2107c478bd9Sstevel@tonic-gate * of (PAGESIZE + sizeof (page_t)) bytes per page. 2117c478bd9Sstevel@tonic-gate */ 2127c478bd9Sstevel@tonic-gate metapgs = npgs - (((uint64_t)(npgs) << PAGESHIFT) / 2137c478bd9Sstevel@tonic-gate (PAGESIZE + sizeof (page_t))); 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate npgs -= metapgs; 2167c478bd9Sstevel@tonic-gate base += metapgs; 2177c478bd9Sstevel@tonic-gate 2187c478bd9Sstevel@tonic-gate ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs); 2197c478bd9Sstevel@tonic-gate 2207c478bd9Sstevel@tonic-gate exhausted = (metapgs == 0 || npgs == 0); 2217c478bd9Sstevel@tonic-gate 2227c478bd9Sstevel@tonic-gate if (kpm_enable && !exhausted) { 2237c478bd9Sstevel@tonic-gate pgcnt_t start, end, nkpmpgs_prelim; 2247c478bd9Sstevel@tonic-gate size_t ptsz; 2257c478bd9Sstevel@tonic-gate 2267c478bd9Sstevel@tonic-gate /* 2277c478bd9Sstevel@tonic-gate * A viable kpm large page mapping must not overlap two 2287c478bd9Sstevel@tonic-gate * dynamic memsegs. Therefore the total size is checked 2297c478bd9Sstevel@tonic-gate * to be at least kpm_pgsz and also whether start and end 2307c478bd9Sstevel@tonic-gate * points are at least kpm_pgsz aligned. 2317c478bd9Sstevel@tonic-gate */ 2327c478bd9Sstevel@tonic-gate if (ptokpmp(tpgs) < 1 || pmodkpmp(pt_base) || 2337c478bd9Sstevel@tonic-gate pmodkpmp(base + npgs)) { 2347c478bd9Sstevel@tonic-gate 2357c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 2367c478bd9Sstevel@tonic-gate 2377c478bd9Sstevel@tonic-gate /* 2387c478bd9Sstevel@tonic-gate * There is no specific error code for violating 2397c478bd9Sstevel@tonic-gate * kpm granularity constraints. 2407c478bd9Sstevel@tonic-gate */ 2417c478bd9Sstevel@tonic-gate return (KPHYSM_ENOTVIABLE); 2427c478bd9Sstevel@tonic-gate } 2437c478bd9Sstevel@tonic-gate 2447c478bd9Sstevel@tonic-gate start = kpmptop(ptokpmp(base)); 2457c478bd9Sstevel@tonic-gate end = kpmptop(ptokpmp(base + npgs)); 2467c478bd9Sstevel@tonic-gate nkpmpgs_prelim = ptokpmp(end - start); 2477c478bd9Sstevel@tonic-gate ptsz = npgs * sizeof (page_t); 2487c478bd9Sstevel@tonic-gate metapgs = btopr(ptsz + nkpmpgs_prelim * KPMPAGE_T_SZ); 2497c478bd9Sstevel@tonic-gate exhausted = (tpgs <= metapgs); 2507c478bd9Sstevel@tonic-gate if (!exhausted) { 2517c478bd9Sstevel@tonic-gate npgs = tpgs - metapgs; 2527c478bd9Sstevel@tonic-gate base = pt_base + metapgs; 2537c478bd9Sstevel@tonic-gate 2547c478bd9Sstevel@tonic-gate /* final nkpmpgs */ 2557c478bd9Sstevel@tonic-gate start = kpmptop(ptokpmp(base)); 2567c478bd9Sstevel@tonic-gate nkpmpgs = ptokpmp(end - start); 2577c478bd9Sstevel@tonic-gate kpm_pages_off = ptsz + 2587c478bd9Sstevel@tonic-gate (nkpmpgs_prelim - nkpmpgs) * KPMPAGE_T_SZ; 2597c478bd9Sstevel@tonic-gate } 2607c478bd9Sstevel@tonic-gate } 2617c478bd9Sstevel@tonic-gate 2627c478bd9Sstevel@tonic-gate /* 2637c478bd9Sstevel@tonic-gate * Is memory area supplied too small? 2647c478bd9Sstevel@tonic-gate */ 2657c478bd9Sstevel@tonic-gate if (exhausted) { 2667c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 2677c478bd9Sstevel@tonic-gate /* 2687c478bd9Sstevel@tonic-gate * There is no specific error code for 'too small'. 2697c478bd9Sstevel@tonic-gate */ 2707c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 2717c478bd9Sstevel@tonic-gate } 2727c478bd9Sstevel@tonic-gate 2739853d9e8SJason Beloro mapalloc: 2747c478bd9Sstevel@tonic-gate /* 2757c478bd9Sstevel@tonic-gate * We may re-use a previously allocated VA space for the page_ts 2767c478bd9Sstevel@tonic-gate * eventually, but we need to initialize and lock the pages first. 2777c478bd9Sstevel@tonic-gate */ 2787c478bd9Sstevel@tonic-gate 2797c478bd9Sstevel@tonic-gate /* 2807c478bd9Sstevel@tonic-gate * Get an address in the kernel address map, map 2817c478bd9Sstevel@tonic-gate * the page_t pages and see if we can touch them. 2827c478bd9Sstevel@tonic-gate */ 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate mapva = vmem_alloc(heap_arena, ptob(metapgs), VM_NOSLEEP); 2857c478bd9Sstevel@tonic-gate if (mapva == NULL) { 2867c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_add_memory_dynamic:" 2877c478bd9Sstevel@tonic-gate " Can't allocate VA for page_ts"); 2887c478bd9Sstevel@tonic-gate 2899853d9e8SJason Beloro if (meta_alloc) 2909853d9e8SJason Beloro memseg_free_meta(metabase, metapgs); 2917c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 2927c478bd9Sstevel@tonic-gate 2937c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 2947c478bd9Sstevel@tonic-gate } 2957c478bd9Sstevel@tonic-gate pp = mapva; 2967c478bd9Sstevel@tonic-gate 2977c478bd9Sstevel@tonic-gate if (physmax < (pt_base + tpgs)) 2987c478bd9Sstevel@tonic-gate physmax = (pt_base + tpgs); 2997c478bd9Sstevel@tonic-gate 3007c478bd9Sstevel@tonic-gate /* 3017c478bd9Sstevel@tonic-gate * In the remapping code we map one page at a time so we must do 3027c478bd9Sstevel@tonic-gate * the same here to match mapping sizes. 3037c478bd9Sstevel@tonic-gate */ 3047c478bd9Sstevel@tonic-gate pfn = pt_base; 3057c478bd9Sstevel@tonic-gate vaddr = (caddr_t)pp; 3067c478bd9Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) { 3079853d9e8SJason Beloro if (meta_alloc) 3089853d9e8SJason Beloro pfn = memseg_get_metapfn(metabase, (pgcnt_t)pnum); 3097c478bd9Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn, 3107c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE, 3117c478bd9Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); 3127c478bd9Sstevel@tonic-gate pfn++; 3137c478bd9Sstevel@tonic-gate vaddr += ptob(1); 3147c478bd9Sstevel@tonic-gate } 3157c478bd9Sstevel@tonic-gate 3167c478bd9Sstevel@tonic-gate if (ddi_peek32((dev_info_t *)NULL, 3177c478bd9Sstevel@tonic-gate (int32_t *)pp, (int32_t *)0) == DDI_FAILURE) { 3187c478bd9Sstevel@tonic-gate 31928e72544SJakub Jirsa cmn_err(CE_WARN, "kphysm_add_memory_dynamic:" 3207c478bd9Sstevel@tonic-gate " Can't access pp array at 0x%p [phys 0x%lx]", 3217c478bd9Sstevel@tonic-gate (void *)pp, pt_base); 3227c478bd9Sstevel@tonic-gate 3237c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs), 3247c478bd9Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 3257c478bd9Sstevel@tonic-gate 3267c478bd9Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 3279853d9e8SJason Beloro if (meta_alloc) 3289853d9e8SJason Beloro memseg_free_meta(metabase, metapgs); 3297c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 3307c478bd9Sstevel@tonic-gate 3317c478bd9Sstevel@tonic-gate return (KPHYSM_EFAULT); 3327c478bd9Sstevel@tonic-gate } 3337c478bd9Sstevel@tonic-gate 3347c478bd9Sstevel@tonic-gate /* 3357c478bd9Sstevel@tonic-gate * Add this memory slice to its memory node translation. 3367c478bd9Sstevel@tonic-gate * 3377c478bd9Sstevel@tonic-gate * Note that right now, each node may have only one slice; 3387c478bd9Sstevel@tonic-gate * this may change with COD or in larger SSM systems with 3397c478bd9Sstevel@tonic-gate * nested latency groups, so we must not assume that the 3407c478bd9Sstevel@tonic-gate * node does not yet exist. 341a3114836SGerry Liu * 342a3114836SGerry Liu * Note that there may be multiple memory nodes associated with 343a3114836SGerry Liu * a single lgrp node on x86 systems. 3447c478bd9Sstevel@tonic-gate */ 34520c26ed3SChristopher Baumbauer - Sun Microsystems - San Diego United States pnum = pt_base + tpgs - 1; 3469853d9e8SJason Beloro mem_node_add_range(pt_base, pnum); 3477c478bd9Sstevel@tonic-gate 3487c478bd9Sstevel@tonic-gate /* 349da6c28aaSamw * Allocate or resize page counters as necessary to accommodate 3507c478bd9Sstevel@tonic-gate * the increase in memory pages. 3517c478bd9Sstevel@tonic-gate */ 3527c478bd9Sstevel@tonic-gate mnode = PFN_2_MEM_NODE(pnum); 3539853d9e8SJason Beloro PAGE_CTRS_ADJUST(base, npgs, rv); 3549853d9e8SJason Beloro if (rv) { 3557c478bd9Sstevel@tonic-gate 3569853d9e8SJason Beloro mem_node_del_range(pt_base, pnum); 3577c478bd9Sstevel@tonic-gate 358af4c679fSSean McEnroe /* cleanup the page counters */ 359af4c679fSSean McEnroe page_ctrs_cleanup(); 360af4c679fSSean McEnroe 3617c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs), 3627c478bd9Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 3637c478bd9Sstevel@tonic-gate 3647c478bd9Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 3659853d9e8SJason Beloro if (meta_alloc) 3669853d9e8SJason Beloro memseg_free_meta(metabase, metapgs); 3677c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 3687c478bd9Sstevel@tonic-gate 3697c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 3707c478bd9Sstevel@tonic-gate } 3717c478bd9Sstevel@tonic-gate 3727c478bd9Sstevel@tonic-gate /* 3737c478bd9Sstevel@tonic-gate * Update the phys_avail memory list. 3747c478bd9Sstevel@tonic-gate * The phys_install list was done at the start. 3757c478bd9Sstevel@tonic-gate */ 3767c478bd9Sstevel@tonic-gate 3777c478bd9Sstevel@tonic-gate memlist_write_lock(); 3787c478bd9Sstevel@tonic-gate 3797c478bd9Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(base) << PAGESHIFT, 3807c478bd9Sstevel@tonic-gate (uint64_t)(npgs) << PAGESHIFT, &phys_avail); 3817c478bd9Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 3827c478bd9Sstevel@tonic-gate 3837c478bd9Sstevel@tonic-gate memlist_write_unlock(); 3847c478bd9Sstevel@tonic-gate 3857c478bd9Sstevel@tonic-gate /* See if we can find a memseg to re-use. */ 3869853d9e8SJason Beloro if (meta_alloc) { 3879853d9e8SJason Beloro seg = memseg_reuse(0); 3889853d9e8SJason Beloro reuse = 1; /* force unmapping of temp mapva */ 3899853d9e8SJason Beloro flags = MEMSEG_DYNAMIC | MEMSEG_META_ALLOC; 3909853d9e8SJason Beloro /* 3919853d9e8SJason Beloro * There is a 1:1 fixed relationship between a pfn 3929853d9e8SJason Beloro * and a page_t VA. The pfn is used as an index into 3939853d9e8SJason Beloro * the ppvm_base page_t table in order to calculate 3949853d9e8SJason Beloro * the page_t base address for a given pfn range. 3959853d9e8SJason Beloro */ 3969853d9e8SJason Beloro segpp = ppvm_base + base; 3979853d9e8SJason Beloro } else { 3987c478bd9Sstevel@tonic-gate seg = memseg_reuse(metapgs); 3997c478bd9Sstevel@tonic-gate reuse = (seg != NULL); 4009853d9e8SJason Beloro flags = MEMSEG_DYNAMIC | MEMSEG_META_INCL; 4019853d9e8SJason Beloro segpp = pp; 4029853d9e8SJason Beloro } 4037c478bd9Sstevel@tonic-gate 4047c478bd9Sstevel@tonic-gate /* 4057c478bd9Sstevel@tonic-gate * Initialize the memseg structure representing this memory 4067c478bd9Sstevel@tonic-gate * and add it to the existing list of memsegs. Do some basic 4077c478bd9Sstevel@tonic-gate * initialization and add the memory to the system. 4087c478bd9Sstevel@tonic-gate * In order to prevent lock deadlocks, the add_physmem() 4097c478bd9Sstevel@tonic-gate * code is repeated here, but split into several stages. 4109853d9e8SJason Beloro * 4119853d9e8SJason Beloro * If a memseg is reused, invalidate memseg pointers in 4129853d9e8SJason Beloro * all cpu vm caches. We need to do this this since the check 4139853d9e8SJason Beloro * pp >= seg->pages && pp < seg->epages 4149853d9e8SJason Beloro * used in various places is not atomic and so the first compare 4159853d9e8SJason Beloro * can happen before reuse and the second compare after reuse. 4169853d9e8SJason Beloro * The invalidation ensures that a memseg is not deferenced while 4179853d9e8SJason Beloro * it's page/pfn pointers are changing. 4187c478bd9Sstevel@tonic-gate */ 4197c478bd9Sstevel@tonic-gate if (seg == NULL) { 4209853d9e8SJason Beloro seg = memseg_alloc(); 4219853d9e8SJason Beloro ASSERT(seg != NULL); 4229853d9e8SJason Beloro seg->msegflags = flags; 4239853d9e8SJason Beloro MEMSEG_DEBUG("memseg_get: alloc seg=0x%p, pages=0x%p", 4249853d9e8SJason Beloro (void *)seg, (void *)(seg->pages)); 4259853d9e8SJason Beloro seg->pages = segpp; 4267c478bd9Sstevel@tonic-gate } else { 4279853d9e8SJason Beloro ASSERT(seg->msegflags == flags); 4289853d9e8SJason Beloro ASSERT(seg->pages_base == seg->pages_end); 4299853d9e8SJason Beloro MEMSEG_DEBUG("memseg_get: reuse seg=0x%p, pages=0x%p", 4309853d9e8SJason Beloro (void *)seg, (void *)(seg->pages)); 4319853d9e8SJason Beloro if (meta_alloc) { 4329853d9e8SJason Beloro memseg_cpu_vm_flush(); 4339853d9e8SJason Beloro seg->pages = segpp; 4349853d9e8SJason Beloro } 4357c478bd9Sstevel@tonic-gate } 4367c478bd9Sstevel@tonic-gate 4377c478bd9Sstevel@tonic-gate seg->epages = seg->pages + npgs; 4387c478bd9Sstevel@tonic-gate seg->pages_base = base; 4397c478bd9Sstevel@tonic-gate seg->pages_end = base + npgs; 4407c478bd9Sstevel@tonic-gate 4417c478bd9Sstevel@tonic-gate /* 4427c478bd9Sstevel@tonic-gate * Initialize metadata. The page_ts are set to locked state 4437c478bd9Sstevel@tonic-gate * ready to be freed. 4447c478bd9Sstevel@tonic-gate */ 4457c478bd9Sstevel@tonic-gate bzero((caddr_t)pp, ptob(metapgs)); 4467c478bd9Sstevel@tonic-gate 4477c478bd9Sstevel@tonic-gate pfn = seg->pages_base; 4487c478bd9Sstevel@tonic-gate /* Save the original pp base in case we reuse a memseg. */ 4497c478bd9Sstevel@tonic-gate opp = pp; 4507c478bd9Sstevel@tonic-gate oepp = opp + npgs; 4517c478bd9Sstevel@tonic-gate for (pp = opp; pp < oepp; pp++) { 4527c478bd9Sstevel@tonic-gate pp->p_pagenum = pfn; 4537c478bd9Sstevel@tonic-gate pfn++; 4547c478bd9Sstevel@tonic-gate page_iolock_init(pp); 4557c478bd9Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM)) 4567c478bd9Sstevel@tonic-gate continue; 4577c478bd9Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 4587c478bd9Sstevel@tonic-gate } 4597c478bd9Sstevel@tonic-gate 4607c478bd9Sstevel@tonic-gate if (reuse) { 4617c478bd9Sstevel@tonic-gate /* Remap our page_ts to the re-used memseg VA space. */ 4627c478bd9Sstevel@tonic-gate pfn = pt_base; 4637c478bd9Sstevel@tonic-gate vaddr = (caddr_t)seg->pages; 4647c478bd9Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) { 4659853d9e8SJason Beloro if (meta_alloc) 4669853d9e8SJason Beloro pfn = memseg_get_metapfn(metabase, 4679853d9e8SJason Beloro (pgcnt_t)pnum); 4687c478bd9Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn, 4697c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE, 4707c478bd9Sstevel@tonic-gate HAT_LOAD_REMAP | HAT_LOAD | HAT_LOAD_NOCONSIST); 4717c478bd9Sstevel@tonic-gate pfn++; 4727c478bd9Sstevel@tonic-gate vaddr += ptob(1); 4737c478bd9Sstevel@tonic-gate } 4747c478bd9Sstevel@tonic-gate 4757c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)opp, ptob(metapgs), 4767c478bd9Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 4777c478bd9Sstevel@tonic-gate 4787c478bd9Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 4797c478bd9Sstevel@tonic-gate } 4807c478bd9Sstevel@tonic-gate 4817c478bd9Sstevel@tonic-gate hat_kpm_addmem_mseg_update(seg, nkpmpgs, kpm_pages_off); 4827c478bd9Sstevel@tonic-gate 4837c478bd9Sstevel@tonic-gate memsegs_lock(1); 4847c478bd9Sstevel@tonic-gate 4857c478bd9Sstevel@tonic-gate /* 4867c478bd9Sstevel@tonic-gate * The new memseg is inserted at the beginning of the list. 4877c478bd9Sstevel@tonic-gate * Not only does this save searching for the tail, but in the 4887c478bd9Sstevel@tonic-gate * case of a re-used memseg, it solves the problem of what 48928e72544SJakub Jirsa * happens if some process has still got a pointer to the 4907c478bd9Sstevel@tonic-gate * memseg and follows the next pointer to continue traversing 4917c478bd9Sstevel@tonic-gate * the memsegs list. 4927c478bd9Sstevel@tonic-gate */ 4937c478bd9Sstevel@tonic-gate 4947c478bd9Sstevel@tonic-gate hat_kpm_addmem_mseg_insert(seg); 4957c478bd9Sstevel@tonic-gate 4967c478bd9Sstevel@tonic-gate seg->next = memsegs; 4977c478bd9Sstevel@tonic-gate membar_producer(); 4987c478bd9Sstevel@tonic-gate 4997c478bd9Sstevel@tonic-gate hat_kpm_addmem_memsegs_update(seg); 5007c478bd9Sstevel@tonic-gate 5017c478bd9Sstevel@tonic-gate memsegs = seg; 5027c478bd9Sstevel@tonic-gate 5037c478bd9Sstevel@tonic-gate build_pfn_hash(); 5047c478bd9Sstevel@tonic-gate 5057c478bd9Sstevel@tonic-gate total_pages += npgs; 5067c478bd9Sstevel@tonic-gate 5077c478bd9Sstevel@tonic-gate /* 5087c478bd9Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed. 5097c478bd9Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use. 5107c478bd9Sstevel@tonic-gate */ 5117c478bd9Sstevel@tonic-gate setupclock(1); 5127c478bd9Sstevel@tonic-gate 5137c478bd9Sstevel@tonic-gate memsegs_unlock(1); 5147c478bd9Sstevel@tonic-gate 515ee88d2b9Skchow PLCNT_MODIFY_MAX(seg->pages_base, (long)npgs); 516ee88d2b9Skchow 5177c478bd9Sstevel@tonic-gate /* 5187c478bd9Sstevel@tonic-gate * Free the pages outside the lock to avoid locking loops. 5197c478bd9Sstevel@tonic-gate */ 5207c478bd9Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 5217c478bd9Sstevel@tonic-gate page_free(pp, 1); 5227c478bd9Sstevel@tonic-gate } 5237c478bd9Sstevel@tonic-gate 5247c478bd9Sstevel@tonic-gate /* 5257c478bd9Sstevel@tonic-gate * Now that we've updated the appropriate memory lists we 5267c478bd9Sstevel@tonic-gate * need to reset a number of globals, since we've increased memory. 5277c478bd9Sstevel@tonic-gate * Several have already been updated for us as noted above. The 5287c478bd9Sstevel@tonic-gate * globals we're interested in at this point are: 5297c478bd9Sstevel@tonic-gate * physmax - highest page frame number. 5307c478bd9Sstevel@tonic-gate * physinstalled - number of pages currently installed (done earlier) 5317c478bd9Sstevel@tonic-gate * maxmem - max free pages in the system 5327c478bd9Sstevel@tonic-gate * physmem - physical memory pages available 5337c478bd9Sstevel@tonic-gate * availrmem - real memory available 5347c478bd9Sstevel@tonic-gate */ 5357c478bd9Sstevel@tonic-gate 5367c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 5377c478bd9Sstevel@tonic-gate maxmem += npgs; 5387c478bd9Sstevel@tonic-gate physmem += npgs; 5397c478bd9Sstevel@tonic-gate availrmem += npgs; 5407c478bd9Sstevel@tonic-gate availrmem_initial += npgs; 5417c478bd9Sstevel@tonic-gate 5427c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 5437c478bd9Sstevel@tonic-gate 5447c478bd9Sstevel@tonic-gate dump_resize(); 5457c478bd9Sstevel@tonic-gate 5467c478bd9Sstevel@tonic-gate page_freelist_coalesce_all(mnode); 5477c478bd9Sstevel@tonic-gate 5487c478bd9Sstevel@tonic-gate kphysm_setup_post_add(npgs); 5497c478bd9Sstevel@tonic-gate 5507c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: mem = %ldK " 5517c478bd9Sstevel@tonic-gate "(0x%" PRIx64 ")\n", 5527c478bd9Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10), 5537c478bd9Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT); 5547c478bd9Sstevel@tonic-gate 5557c478bd9Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT; 5567c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: " 5577c478bd9Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem); 5587c478bd9Sstevel@tonic-gate 5597c478bd9Sstevel@tonic-gate /* 5607c478bd9Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems 5617c478bd9Sstevel@tonic-gate */ 5627c478bd9Sstevel@tonic-gate if (nlgrps == 1) 5637c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0); 5647c478bd9Sstevel@tonic-gate 5653a634bfcSVikram Hegde /* 5663a634bfcSVikram Hegde * Inform DDI of update 5673a634bfcSVikram Hegde */ 5683a634bfcSVikram Hegde ddi_mem_update((uint64_t)(pt_base) << PAGESHIFT, 5693a634bfcSVikram Hegde (uint64_t)(tpgs) << PAGESHIFT); 5703a634bfcSVikram Hegde 5717c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 5727c478bd9Sstevel@tonic-gate 573a3114836SGerry Liu return (KPHYSM_OK); /* Successfully added system memory */ 5747c478bd9Sstevel@tonic-gate } 5757c478bd9Sstevel@tonic-gate 5767c478bd9Sstevel@tonic-gate /* 5777c478bd9Sstevel@tonic-gate * There are various error conditions in kphysm_add_memory_dynamic() 5787c478bd9Sstevel@tonic-gate * which require a rollback of already changed global state. 5797c478bd9Sstevel@tonic-gate */ 5807c478bd9Sstevel@tonic-gate static void 5817c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pfn_t pt_base, pgcnt_t tpgs) 5827c478bd9Sstevel@tonic-gate { 5837c478bd9Sstevel@tonic-gate int mlret; 5847c478bd9Sstevel@tonic-gate 5857c478bd9Sstevel@tonic-gate /* Unreserve memory span. */ 5867c478bd9Sstevel@tonic-gate memlist_write_lock(); 5877c478bd9Sstevel@tonic-gate 5887c478bd9Sstevel@tonic-gate mlret = memlist_delete_span( 5897c478bd9Sstevel@tonic-gate (uint64_t)(pt_base) << PAGESHIFT, 5907c478bd9Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install); 5917c478bd9Sstevel@tonic-gate 5927c478bd9Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 5937c478bd9Sstevel@tonic-gate phys_install_has_changed(); 5947c478bd9Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 5957c478bd9Sstevel@tonic-gate 5967c478bd9Sstevel@tonic-gate memlist_write_unlock(); 5977c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 5987c478bd9Sstevel@tonic-gate } 5997c478bd9Sstevel@tonic-gate 6007c478bd9Sstevel@tonic-gate /* 6019853d9e8SJason Beloro * Only return an available memseg of exactly the right size 6029853d9e8SJason Beloro * if size is required. 6037c478bd9Sstevel@tonic-gate * When the meta data area has it's own virtual address space 6047c478bd9Sstevel@tonic-gate * we will need to manage this more carefully and do best fit 605da6c28aaSamw * allocations, possibly splitting an available area. 6067c478bd9Sstevel@tonic-gate */ 6079853d9e8SJason Beloro struct memseg * 6087c478bd9Sstevel@tonic-gate memseg_reuse(pgcnt_t metapgs) 6097c478bd9Sstevel@tonic-gate { 6109853d9e8SJason Beloro int type; 6117c478bd9Sstevel@tonic-gate struct memseg **segpp, *seg; 6127c478bd9Sstevel@tonic-gate 6137c478bd9Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 6147c478bd9Sstevel@tonic-gate 6157c478bd9Sstevel@tonic-gate segpp = &memseg_va_avail; 6167c478bd9Sstevel@tonic-gate for (; (seg = *segpp) != NULL; segpp = &seg->lnext) { 6177c478bd9Sstevel@tonic-gate caddr_t end; 6187c478bd9Sstevel@tonic-gate 6199853d9e8SJason Beloro /* 6209853d9e8SJason Beloro * Make sure we are reusing the right segment type. 6219853d9e8SJason Beloro */ 6229853d9e8SJason Beloro type = metapgs ? MEMSEG_META_INCL : MEMSEG_META_ALLOC; 6239853d9e8SJason Beloro 6249853d9e8SJason Beloro if ((seg->msegflags & (MEMSEG_META_INCL | MEMSEG_META_ALLOC)) 6259853d9e8SJason Beloro != type) 6269853d9e8SJason Beloro continue; 6279853d9e8SJason Beloro 6287c478bd9Sstevel@tonic-gate if (kpm_enable) 6297c478bd9Sstevel@tonic-gate end = hat_kpm_mseg_reuse(seg); 6307c478bd9Sstevel@tonic-gate else 6317c478bd9Sstevel@tonic-gate end = (caddr_t)seg->epages; 6327c478bd9Sstevel@tonic-gate 6339853d9e8SJason Beloro /* 6349853d9e8SJason Beloro * Check for the right size if it is provided. 6359853d9e8SJason Beloro */ 6369853d9e8SJason Beloro if (!metapgs || btopr(end - (caddr_t)seg->pages) == metapgs) { 6377c478bd9Sstevel@tonic-gate *segpp = seg->lnext; 6387c478bd9Sstevel@tonic-gate seg->lnext = NULL; 6397c478bd9Sstevel@tonic-gate break; 6407c478bd9Sstevel@tonic-gate } 6417c478bd9Sstevel@tonic-gate } 6427c478bd9Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 6437c478bd9Sstevel@tonic-gate 6447c478bd9Sstevel@tonic-gate return (seg); 6457c478bd9Sstevel@tonic-gate } 6467c478bd9Sstevel@tonic-gate 6477c478bd9Sstevel@tonic-gate static uint_t handle_gen; 6487c478bd9Sstevel@tonic-gate 6497c478bd9Sstevel@tonic-gate struct memdelspan { 6507c478bd9Sstevel@tonic-gate struct memdelspan *mds_next; 6517c478bd9Sstevel@tonic-gate pfn_t mds_base; 6527c478bd9Sstevel@tonic-gate pgcnt_t mds_npgs; 6537c478bd9Sstevel@tonic-gate uint_t *mds_bitmap; 6547c478bd9Sstevel@tonic-gate uint_t *mds_bitmap_retired; 6557c478bd9Sstevel@tonic-gate }; 6567c478bd9Sstevel@tonic-gate 6577c478bd9Sstevel@tonic-gate #define NBPBMW (sizeof (uint_t) * NBBY) 6587c478bd9Sstevel@tonic-gate #define MDS_BITMAPBYTES(MDSP) \ 6597c478bd9Sstevel@tonic-gate ((((MDSP)->mds_npgs + NBPBMW - 1) / NBPBMW) * sizeof (uint_t)) 6607c478bd9Sstevel@tonic-gate 6617c478bd9Sstevel@tonic-gate struct transit_list { 6627c478bd9Sstevel@tonic-gate struct transit_list *trl_next; 6637c478bd9Sstevel@tonic-gate struct memdelspan *trl_spans; 6647c478bd9Sstevel@tonic-gate int trl_collect; 6657c478bd9Sstevel@tonic-gate }; 6667c478bd9Sstevel@tonic-gate 6677c478bd9Sstevel@tonic-gate struct transit_list_head { 6687c478bd9Sstevel@tonic-gate kmutex_t trh_lock; 6697c478bd9Sstevel@tonic-gate struct transit_list *trh_head; 6707c478bd9Sstevel@tonic-gate }; 6717c478bd9Sstevel@tonic-gate 6727c478bd9Sstevel@tonic-gate static struct transit_list_head transit_list_head; 6737c478bd9Sstevel@tonic-gate 6747c478bd9Sstevel@tonic-gate struct mem_handle; 6757c478bd9Sstevel@tonic-gate static void transit_list_collect(struct mem_handle *, int); 6767c478bd9Sstevel@tonic-gate static void transit_list_insert(struct transit_list *); 6777c478bd9Sstevel@tonic-gate static void transit_list_remove(struct transit_list *); 6787c478bd9Sstevel@tonic-gate 6797c478bd9Sstevel@tonic-gate #ifdef DEBUG 6807c478bd9Sstevel@tonic-gate #define MEM_DEL_STATS 6817c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 6827c478bd9Sstevel@tonic-gate 6837c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 6847c478bd9Sstevel@tonic-gate static int mem_del_stat_print = 0; 6857c478bd9Sstevel@tonic-gate struct mem_del_stat { 6867c478bd9Sstevel@tonic-gate uint_t nloop; 6877c478bd9Sstevel@tonic-gate uint_t need_free; 6887c478bd9Sstevel@tonic-gate uint_t free_loop; 6897c478bd9Sstevel@tonic-gate uint_t free_low; 6907c478bd9Sstevel@tonic-gate uint_t free_failed; 6917c478bd9Sstevel@tonic-gate uint_t ncheck; 6927c478bd9Sstevel@tonic-gate uint_t nopaget; 6937c478bd9Sstevel@tonic-gate uint_t lockfail; 6947c478bd9Sstevel@tonic-gate uint_t nfree; 6957c478bd9Sstevel@tonic-gate uint_t nreloc; 6967c478bd9Sstevel@tonic-gate uint_t nrelocfail; 6977c478bd9Sstevel@tonic-gate uint_t already_done; 6987c478bd9Sstevel@tonic-gate uint_t first_notfree; 6997c478bd9Sstevel@tonic-gate uint_t npplocked; 7007c478bd9Sstevel@tonic-gate uint_t nlockreloc; 7017c478bd9Sstevel@tonic-gate uint_t nnorepl; 7027c478bd9Sstevel@tonic-gate uint_t nmodreloc; 7037c478bd9Sstevel@tonic-gate uint_t ndestroy; 7047c478bd9Sstevel@tonic-gate uint_t nputpage; 7057c478bd9Sstevel@tonic-gate uint_t nnoreclaim; 7067c478bd9Sstevel@tonic-gate uint_t ndelay; 7077c478bd9Sstevel@tonic-gate uint_t demotefail; 7087c478bd9Sstevel@tonic-gate uint64_t nticks_total; 7097c478bd9Sstevel@tonic-gate uint64_t nticks_pgrp; 7107c478bd9Sstevel@tonic-gate uint_t retired; 7117c478bd9Sstevel@tonic-gate uint_t toxic; 7127c478bd9Sstevel@tonic-gate uint_t failing; 7137c478bd9Sstevel@tonic-gate uint_t modtoxic; 7147c478bd9Sstevel@tonic-gate uint_t npplkdtoxic; 7157c478bd9Sstevel@tonic-gate uint_t gptlmodfail; 7167c478bd9Sstevel@tonic-gate uint_t gptllckfail; 7177c478bd9Sstevel@tonic-gate }; 7187c478bd9Sstevel@tonic-gate /* 7197c478bd9Sstevel@tonic-gate * The stat values are only incremented in the delete thread 7207c478bd9Sstevel@tonic-gate * so no locking or atomic required. 7217c478bd9Sstevel@tonic-gate */ 7227c478bd9Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD) (MHP)->mh_delstat.FLD++ 7237c478bd9Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck) ((MHP)->mh_delstat.nticks_total += (ntck)) 7247c478bd9Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck) ((MHP)->mh_delstat.nticks_pgrp += (ntck)) 7257c478bd9Sstevel@tonic-gate static void mem_del_stat_print_func(struct mem_handle *); 7267c478bd9Sstevel@tonic-gate #define MDSTAT_PRINT(MHP) mem_del_stat_print_func((MHP)) 7277c478bd9Sstevel@tonic-gate #else /* MEM_DEL_STATS */ 7287c478bd9Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD) 7297c478bd9Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck) 7307c478bd9Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck) 7317c478bd9Sstevel@tonic-gate #define MDSTAT_PRINT(MHP) 7327c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 7337c478bd9Sstevel@tonic-gate 7347c478bd9Sstevel@tonic-gate typedef enum mhnd_state {MHND_FREE = 0, MHND_INIT, MHND_STARTING, 7357c478bd9Sstevel@tonic-gate MHND_RUNNING, MHND_DONE, MHND_RELEASE} mhnd_state_t; 7367c478bd9Sstevel@tonic-gate 7377c478bd9Sstevel@tonic-gate /* 7387c478bd9Sstevel@tonic-gate * mh_mutex must be taken to examine or change mh_exthandle and mh_state. 7397c478bd9Sstevel@tonic-gate * The mutex may not be required for other fields, dependent on mh_state. 7407c478bd9Sstevel@tonic-gate */ 7417c478bd9Sstevel@tonic-gate struct mem_handle { 7427c478bd9Sstevel@tonic-gate kmutex_t mh_mutex; 7437c478bd9Sstevel@tonic-gate struct mem_handle *mh_next; 7447c478bd9Sstevel@tonic-gate memhandle_t mh_exthandle; 7457c478bd9Sstevel@tonic-gate mhnd_state_t mh_state; 7467c478bd9Sstevel@tonic-gate struct transit_list mh_transit; 7477c478bd9Sstevel@tonic-gate pgcnt_t mh_phys_pages; 7487c478bd9Sstevel@tonic-gate pgcnt_t mh_vm_pages; 7497c478bd9Sstevel@tonic-gate pgcnt_t mh_hold_todo; 7507c478bd9Sstevel@tonic-gate void (*mh_delete_complete)(void *, int error); 7517c478bd9Sstevel@tonic-gate void *mh_delete_complete_arg; 7527c478bd9Sstevel@tonic-gate volatile uint_t mh_cancel; 7537c478bd9Sstevel@tonic-gate volatile uint_t mh_dr_aio_cleanup_cancel; 7547c478bd9Sstevel@tonic-gate volatile uint_t mh_aio_cleanup_done; 7557c478bd9Sstevel@tonic-gate kcondvar_t mh_cv; 7567c478bd9Sstevel@tonic-gate kthread_id_t mh_thread_id; 7577c478bd9Sstevel@tonic-gate page_t *mh_deleted; /* link through p_next */ 7587c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 7597c478bd9Sstevel@tonic-gate struct mem_del_stat mh_delstat; 7607c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 7617c478bd9Sstevel@tonic-gate }; 7627c478bd9Sstevel@tonic-gate 7637c478bd9Sstevel@tonic-gate static struct mem_handle *mem_handle_head; 7647c478bd9Sstevel@tonic-gate static kmutex_t mem_handle_list_mutex; 7657c478bd9Sstevel@tonic-gate 7667c478bd9Sstevel@tonic-gate static struct mem_handle * 7677c478bd9Sstevel@tonic-gate kphysm_allocate_mem_handle() 7687c478bd9Sstevel@tonic-gate { 7697c478bd9Sstevel@tonic-gate struct mem_handle *mhp; 7707c478bd9Sstevel@tonic-gate 7717c478bd9Sstevel@tonic-gate mhp = kmem_zalloc(sizeof (struct mem_handle), KM_SLEEP); 7727c478bd9Sstevel@tonic-gate mutex_init(&mhp->mh_mutex, NULL, MUTEX_DEFAULT, NULL); 7737c478bd9Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 7747c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 7757c478bd9Sstevel@tonic-gate /* handle_gen is protected by list mutex. */ 7769f1a1f17Sdmick mhp->mh_exthandle = (memhandle_t)(uintptr_t)(++handle_gen); 7777c478bd9Sstevel@tonic-gate mhp->mh_next = mem_handle_head; 7787c478bd9Sstevel@tonic-gate mem_handle_head = mhp; 7797c478bd9Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 7807c478bd9Sstevel@tonic-gate 7817c478bd9Sstevel@tonic-gate return (mhp); 7827c478bd9Sstevel@tonic-gate } 7837c478bd9Sstevel@tonic-gate 7847c478bd9Sstevel@tonic-gate static void 7857c478bd9Sstevel@tonic-gate kphysm_free_mem_handle(struct mem_handle *mhp) 7867c478bd9Sstevel@tonic-gate { 7877c478bd9Sstevel@tonic-gate struct mem_handle **mhpp; 7887c478bd9Sstevel@tonic-gate 7897c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&mhp->mh_mutex)); 7907c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE); 7917c478bd9Sstevel@tonic-gate /* 7927c478bd9Sstevel@tonic-gate * Exit the mutex to preserve locking order. This is OK 7937c478bd9Sstevel@tonic-gate * here as once in the FREE state, the handle cannot 7947c478bd9Sstevel@tonic-gate * be found by a lookup. 7957c478bd9Sstevel@tonic-gate */ 7967c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 7977c478bd9Sstevel@tonic-gate 7987c478bd9Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 7997c478bd9Sstevel@tonic-gate mhpp = &mem_handle_head; 8007c478bd9Sstevel@tonic-gate while (*mhpp != NULL && *mhpp != mhp) 8017c478bd9Sstevel@tonic-gate mhpp = &(*mhpp)->mh_next; 8027c478bd9Sstevel@tonic-gate ASSERT(*mhpp == mhp); 8037c478bd9Sstevel@tonic-gate /* 8047c478bd9Sstevel@tonic-gate * No need to lock the handle (mh_mutex) as only 8057c478bd9Sstevel@tonic-gate * mh_next changing and this is the only thread that 8067c478bd9Sstevel@tonic-gate * can be referncing mhp. 8077c478bd9Sstevel@tonic-gate */ 8087c478bd9Sstevel@tonic-gate *mhpp = mhp->mh_next; 8097c478bd9Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 8107c478bd9Sstevel@tonic-gate 8117c478bd9Sstevel@tonic-gate mutex_destroy(&mhp->mh_mutex); 8127c478bd9Sstevel@tonic-gate kmem_free(mhp, sizeof (struct mem_handle)); 8137c478bd9Sstevel@tonic-gate } 8147c478bd9Sstevel@tonic-gate 8157c478bd9Sstevel@tonic-gate /* 8167c478bd9Sstevel@tonic-gate * This function finds the internal mem_handle corresponding to an 8177c478bd9Sstevel@tonic-gate * external handle and returns it with the mh_mutex held. 8187c478bd9Sstevel@tonic-gate */ 8197c478bd9Sstevel@tonic-gate static struct mem_handle * 8207c478bd9Sstevel@tonic-gate kphysm_lookup_mem_handle(memhandle_t handle) 8217c478bd9Sstevel@tonic-gate { 8227c478bd9Sstevel@tonic-gate struct mem_handle *mhp; 8237c478bd9Sstevel@tonic-gate 8247c478bd9Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 8257c478bd9Sstevel@tonic-gate for (mhp = mem_handle_head; mhp != NULL; mhp = mhp->mh_next) { 8267c478bd9Sstevel@tonic-gate if (mhp->mh_exthandle == handle) { 8277c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 8287c478bd9Sstevel@tonic-gate /* 8297c478bd9Sstevel@tonic-gate * The state of the handle could have been changed 8307c478bd9Sstevel@tonic-gate * by kphysm_del_release() while waiting for mh_mutex. 8317c478bd9Sstevel@tonic-gate */ 8327c478bd9Sstevel@tonic-gate if (mhp->mh_state == MHND_FREE) { 8337c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 8347c478bd9Sstevel@tonic-gate continue; 8357c478bd9Sstevel@tonic-gate } 8367c478bd9Sstevel@tonic-gate break; 8377c478bd9Sstevel@tonic-gate } 8387c478bd9Sstevel@tonic-gate } 8397c478bd9Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 8407c478bd9Sstevel@tonic-gate return (mhp); 8417c478bd9Sstevel@tonic-gate } 8427c478bd9Sstevel@tonic-gate 8437c478bd9Sstevel@tonic-gate int 8447c478bd9Sstevel@tonic-gate kphysm_del_gethandle(memhandle_t *xmhp) 8457c478bd9Sstevel@tonic-gate { 8467c478bd9Sstevel@tonic-gate struct mem_handle *mhp; 8477c478bd9Sstevel@tonic-gate 8487c478bd9Sstevel@tonic-gate mhp = kphysm_allocate_mem_handle(); 8497c478bd9Sstevel@tonic-gate /* 8507c478bd9Sstevel@tonic-gate * The handle is allocated using KM_SLEEP, so cannot fail. 8517c478bd9Sstevel@tonic-gate * If the implementation is changed, the correct error to return 8527c478bd9Sstevel@tonic-gate * here would be KPHYSM_ENOHANDLES. 8537c478bd9Sstevel@tonic-gate */ 8547c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE); 8557c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_INIT; 8567c478bd9Sstevel@tonic-gate *xmhp = mhp->mh_exthandle; 8577c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 8587c478bd9Sstevel@tonic-gate return (KPHYSM_OK); 8597c478bd9Sstevel@tonic-gate } 8607c478bd9Sstevel@tonic-gate 8617c478bd9Sstevel@tonic-gate static int 8627c478bd9Sstevel@tonic-gate overlapping(pfn_t b1, pgcnt_t l1, pfn_t b2, pgcnt_t l2) 8637c478bd9Sstevel@tonic-gate { 8647c478bd9Sstevel@tonic-gate pfn_t e1, e2; 8657c478bd9Sstevel@tonic-gate 8667c478bd9Sstevel@tonic-gate e1 = b1 + l1; 8677c478bd9Sstevel@tonic-gate e2 = b2 + l2; 8687c478bd9Sstevel@tonic-gate 8697c478bd9Sstevel@tonic-gate return (!(b2 >= e1 || b1 >= e2)); 8707c478bd9Sstevel@tonic-gate } 8717c478bd9Sstevel@tonic-gate 8727c478bd9Sstevel@tonic-gate static int can_remove_pgs(pgcnt_t); 8737c478bd9Sstevel@tonic-gate 8747c478bd9Sstevel@tonic-gate static struct memdelspan * 8757c478bd9Sstevel@tonic-gate span_to_install(pfn_t base, pgcnt_t npgs) 8767c478bd9Sstevel@tonic-gate { 8777c478bd9Sstevel@tonic-gate struct memdelspan *mdsp; 8787c478bd9Sstevel@tonic-gate struct memdelspan *mdsp_new; 8797c478bd9Sstevel@tonic-gate uint64_t address, size, thislen; 8807c478bd9Sstevel@tonic-gate struct memlist *mlp; 8817c478bd9Sstevel@tonic-gate 8827c478bd9Sstevel@tonic-gate mdsp_new = NULL; 8837c478bd9Sstevel@tonic-gate 8847c478bd9Sstevel@tonic-gate address = (uint64_t)base << PAGESHIFT; 8857c478bd9Sstevel@tonic-gate size = (uint64_t)npgs << PAGESHIFT; 8867c478bd9Sstevel@tonic-gate while (size != 0) { 8877c478bd9Sstevel@tonic-gate memlist_read_lock(); 88856f33205SJonathan Adams for (mlp = phys_install; mlp != NULL; mlp = mlp->ml_next) { 88956f33205SJonathan Adams if (address >= (mlp->ml_address + mlp->ml_size)) 8907c478bd9Sstevel@tonic-gate continue; 89156f33205SJonathan Adams if ((address + size) > mlp->ml_address) 8927c478bd9Sstevel@tonic-gate break; 8937c478bd9Sstevel@tonic-gate } 8947c478bd9Sstevel@tonic-gate if (mlp == NULL) { 8957c478bd9Sstevel@tonic-gate address += size; 8967c478bd9Sstevel@tonic-gate size = 0; 8977c478bd9Sstevel@tonic-gate thislen = 0; 8987c478bd9Sstevel@tonic-gate } else { 89956f33205SJonathan Adams if (address < mlp->ml_address) { 90056f33205SJonathan Adams size -= (mlp->ml_address - address); 90156f33205SJonathan Adams address = mlp->ml_address; 9027c478bd9Sstevel@tonic-gate } 90356f33205SJonathan Adams ASSERT(address >= mlp->ml_address); 90456f33205SJonathan Adams if ((address + size) > 90556f33205SJonathan Adams (mlp->ml_address + mlp->ml_size)) { 90656f33205SJonathan Adams thislen = 90756f33205SJonathan Adams mlp->ml_size - (address - mlp->ml_address); 9087c478bd9Sstevel@tonic-gate } else { 9097c478bd9Sstevel@tonic-gate thislen = size; 9107c478bd9Sstevel@tonic-gate } 9117c478bd9Sstevel@tonic-gate } 9127c478bd9Sstevel@tonic-gate memlist_read_unlock(); 9137c478bd9Sstevel@tonic-gate /* TODO: phys_install could change now */ 9147c478bd9Sstevel@tonic-gate if (thislen == 0) 9157c478bd9Sstevel@tonic-gate continue; 9167c478bd9Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP); 9177c478bd9Sstevel@tonic-gate mdsp->mds_base = btop(address); 9187c478bd9Sstevel@tonic-gate mdsp->mds_npgs = btop(thislen); 9197c478bd9Sstevel@tonic-gate mdsp->mds_next = mdsp_new; 9207c478bd9Sstevel@tonic-gate mdsp_new = mdsp; 9217c478bd9Sstevel@tonic-gate address += thislen; 9227c478bd9Sstevel@tonic-gate size -= thislen; 9237c478bd9Sstevel@tonic-gate } 9247c478bd9Sstevel@tonic-gate return (mdsp_new); 9257c478bd9Sstevel@tonic-gate } 9267c478bd9Sstevel@tonic-gate 9277c478bd9Sstevel@tonic-gate static void 9287c478bd9Sstevel@tonic-gate free_delspans(struct memdelspan *mdsp) 9297c478bd9Sstevel@tonic-gate { 9307c478bd9Sstevel@tonic-gate struct memdelspan *amdsp; 9317c478bd9Sstevel@tonic-gate 9327c478bd9Sstevel@tonic-gate while ((amdsp = mdsp) != NULL) { 9337c478bd9Sstevel@tonic-gate mdsp = amdsp->mds_next; 9347c478bd9Sstevel@tonic-gate kmem_free(amdsp, sizeof (struct memdelspan)); 9357c478bd9Sstevel@tonic-gate } 9367c478bd9Sstevel@tonic-gate } 9377c478bd9Sstevel@tonic-gate 9387c478bd9Sstevel@tonic-gate /* 9397c478bd9Sstevel@tonic-gate * Concatenate lists. No list ordering is required. 9407c478bd9Sstevel@tonic-gate */ 9417c478bd9Sstevel@tonic-gate 9427c478bd9Sstevel@tonic-gate static void 9437c478bd9Sstevel@tonic-gate delspan_concat(struct memdelspan **mdspp, struct memdelspan *mdsp) 9447c478bd9Sstevel@tonic-gate { 9457c478bd9Sstevel@tonic-gate while (*mdspp != NULL) 9467c478bd9Sstevel@tonic-gate mdspp = &(*mdspp)->mds_next; 9477c478bd9Sstevel@tonic-gate 9487c478bd9Sstevel@tonic-gate *mdspp = mdsp; 9497c478bd9Sstevel@tonic-gate } 9507c478bd9Sstevel@tonic-gate 9517c478bd9Sstevel@tonic-gate /* 9527c478bd9Sstevel@tonic-gate * Given a new list of delspans, check there is no overlap with 9537c478bd9Sstevel@tonic-gate * all existing span activity (add or delete) and then concatenate 9547c478bd9Sstevel@tonic-gate * the new spans to the given list. 9557c478bd9Sstevel@tonic-gate * Return 1 for OK, 0 if overlapping. 9567c478bd9Sstevel@tonic-gate */ 9577c478bd9Sstevel@tonic-gate static int 9587c478bd9Sstevel@tonic-gate delspan_insert( 9597c478bd9Sstevel@tonic-gate struct transit_list *my_tlp, 9607c478bd9Sstevel@tonic-gate struct memdelspan *mdsp_new) 9617c478bd9Sstevel@tonic-gate { 9627c478bd9Sstevel@tonic-gate struct transit_list_head *trh; 9637c478bd9Sstevel@tonic-gate struct transit_list *tlp; 9647c478bd9Sstevel@tonic-gate int ret; 9657c478bd9Sstevel@tonic-gate 9667c478bd9Sstevel@tonic-gate trh = &transit_list_head; 9677c478bd9Sstevel@tonic-gate 9687c478bd9Sstevel@tonic-gate ASSERT(my_tlp != NULL); 9697c478bd9Sstevel@tonic-gate ASSERT(mdsp_new != NULL); 9707c478bd9Sstevel@tonic-gate 9717c478bd9Sstevel@tonic-gate ret = 1; 9727c478bd9Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 9737c478bd9Sstevel@tonic-gate /* ASSERT(my_tlp->trl_spans == NULL || tlp_in_list(trh, my_tlp)); */ 9747c478bd9Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) { 9757c478bd9Sstevel@tonic-gate struct memdelspan *mdsp; 9767c478bd9Sstevel@tonic-gate 9777c478bd9Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL; 9787c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) { 9797c478bd9Sstevel@tonic-gate struct memdelspan *nmdsp; 9807c478bd9Sstevel@tonic-gate 9817c478bd9Sstevel@tonic-gate for (nmdsp = mdsp_new; nmdsp != NULL; 9827c478bd9Sstevel@tonic-gate nmdsp = nmdsp->mds_next) { 9837c478bd9Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs, 9847c478bd9Sstevel@tonic-gate nmdsp->mds_base, nmdsp->mds_npgs)) { 9857c478bd9Sstevel@tonic-gate ret = 0; 9867c478bd9Sstevel@tonic-gate goto done; 9877c478bd9Sstevel@tonic-gate } 9887c478bd9Sstevel@tonic-gate } 9897c478bd9Sstevel@tonic-gate } 9907c478bd9Sstevel@tonic-gate } 9917c478bd9Sstevel@tonic-gate done: 9927c478bd9Sstevel@tonic-gate if (ret != 0) { 9937c478bd9Sstevel@tonic-gate if (my_tlp->trl_spans == NULL) 9947c478bd9Sstevel@tonic-gate transit_list_insert(my_tlp); 9957c478bd9Sstevel@tonic-gate delspan_concat(&my_tlp->trl_spans, mdsp_new); 9967c478bd9Sstevel@tonic-gate } 9977c478bd9Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 9987c478bd9Sstevel@tonic-gate return (ret); 9997c478bd9Sstevel@tonic-gate } 10007c478bd9Sstevel@tonic-gate 10017c478bd9Sstevel@tonic-gate static void 10027c478bd9Sstevel@tonic-gate delspan_remove( 10037c478bd9Sstevel@tonic-gate struct transit_list *my_tlp, 10047c478bd9Sstevel@tonic-gate pfn_t base, 10057c478bd9Sstevel@tonic-gate pgcnt_t npgs) 10067c478bd9Sstevel@tonic-gate { 10077c478bd9Sstevel@tonic-gate struct transit_list_head *trh; 10087c478bd9Sstevel@tonic-gate struct memdelspan *mdsp; 10097c478bd9Sstevel@tonic-gate 10107c478bd9Sstevel@tonic-gate trh = &transit_list_head; 10117c478bd9Sstevel@tonic-gate 10127c478bd9Sstevel@tonic-gate ASSERT(my_tlp != NULL); 10137c478bd9Sstevel@tonic-gate 10147c478bd9Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 10157c478bd9Sstevel@tonic-gate if ((mdsp = my_tlp->trl_spans) != NULL) { 10167c478bd9Sstevel@tonic-gate if (npgs == 0) { 10177c478bd9Sstevel@tonic-gate my_tlp->trl_spans = NULL; 10187c478bd9Sstevel@tonic-gate free_delspans(mdsp); 10197c478bd9Sstevel@tonic-gate transit_list_remove(my_tlp); 10207c478bd9Sstevel@tonic-gate } else { 10217c478bd9Sstevel@tonic-gate struct memdelspan **prv; 10227c478bd9Sstevel@tonic-gate 10237c478bd9Sstevel@tonic-gate prv = &my_tlp->trl_spans; 10247c478bd9Sstevel@tonic-gate while (mdsp != NULL) { 10257c478bd9Sstevel@tonic-gate pfn_t p_end; 10267c478bd9Sstevel@tonic-gate 10277c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 10287c478bd9Sstevel@tonic-gate if (mdsp->mds_base >= base && 10297c478bd9Sstevel@tonic-gate p_end <= (base + npgs)) { 10307c478bd9Sstevel@tonic-gate *prv = mdsp->mds_next; 10317c478bd9Sstevel@tonic-gate mdsp->mds_next = NULL; 10327c478bd9Sstevel@tonic-gate free_delspans(mdsp); 10337c478bd9Sstevel@tonic-gate } else { 10347c478bd9Sstevel@tonic-gate prv = &mdsp->mds_next; 10357c478bd9Sstevel@tonic-gate } 10367c478bd9Sstevel@tonic-gate mdsp = *prv; 10377c478bd9Sstevel@tonic-gate } 10387c478bd9Sstevel@tonic-gate if (my_tlp->trl_spans == NULL) 10397c478bd9Sstevel@tonic-gate transit_list_remove(my_tlp); 10407c478bd9Sstevel@tonic-gate } 10417c478bd9Sstevel@tonic-gate } 10427c478bd9Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 10437c478bd9Sstevel@tonic-gate } 10447c478bd9Sstevel@tonic-gate 10457c478bd9Sstevel@tonic-gate /* 10467c478bd9Sstevel@tonic-gate * Reserve interface for add to stop delete before add finished. 10477c478bd9Sstevel@tonic-gate * This list is only accessed through the delspan_insert/remove 10487c478bd9Sstevel@tonic-gate * functions and so is fully protected by the mutex in struct transit_list. 10497c478bd9Sstevel@tonic-gate */ 10507c478bd9Sstevel@tonic-gate 10517c478bd9Sstevel@tonic-gate static struct transit_list reserve_transit; 10527c478bd9Sstevel@tonic-gate 10537c478bd9Sstevel@tonic-gate static int 10547c478bd9Sstevel@tonic-gate delspan_reserve(pfn_t base, pgcnt_t npgs) 10557c478bd9Sstevel@tonic-gate { 10567c478bd9Sstevel@tonic-gate struct memdelspan *mdsp; 10577c478bd9Sstevel@tonic-gate int ret; 10587c478bd9Sstevel@tonic-gate 10597c478bd9Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP); 10607c478bd9Sstevel@tonic-gate mdsp->mds_base = base; 10617c478bd9Sstevel@tonic-gate mdsp->mds_npgs = npgs; 10627c478bd9Sstevel@tonic-gate if ((ret = delspan_insert(&reserve_transit, mdsp)) == 0) { 10637c478bd9Sstevel@tonic-gate free_delspans(mdsp); 10647c478bd9Sstevel@tonic-gate } 10657c478bd9Sstevel@tonic-gate return (ret); 10667c478bd9Sstevel@tonic-gate } 10677c478bd9Sstevel@tonic-gate 10687c478bd9Sstevel@tonic-gate static void 10697c478bd9Sstevel@tonic-gate delspan_unreserve(pfn_t base, pgcnt_t npgs) 10707c478bd9Sstevel@tonic-gate { 10717c478bd9Sstevel@tonic-gate delspan_remove(&reserve_transit, base, npgs); 10727c478bd9Sstevel@tonic-gate } 10737c478bd9Sstevel@tonic-gate 10747c478bd9Sstevel@tonic-gate /* 10757c478bd9Sstevel@tonic-gate * Return whether memseg was created by kphysm_add_memory_dynamic(). 10767c478bd9Sstevel@tonic-gate */ 10777c478bd9Sstevel@tonic-gate static int 10789853d9e8SJason Beloro memseg_is_dynamic(struct memseg *seg) 10797c478bd9Sstevel@tonic-gate { 10809853d9e8SJason Beloro return (seg->msegflags & MEMSEG_DYNAMIC); 10817c478bd9Sstevel@tonic-gate } 10827c478bd9Sstevel@tonic-gate 10837c478bd9Sstevel@tonic-gate int 10847c478bd9Sstevel@tonic-gate kphysm_del_span( 10857c478bd9Sstevel@tonic-gate memhandle_t handle, 10867c478bd9Sstevel@tonic-gate pfn_t base, 10877c478bd9Sstevel@tonic-gate pgcnt_t npgs) 10887c478bd9Sstevel@tonic-gate { 10897c478bd9Sstevel@tonic-gate struct mem_handle *mhp; 10907c478bd9Sstevel@tonic-gate struct memseg *seg; 10917c478bd9Sstevel@tonic-gate struct memdelspan *mdsp; 10927c478bd9Sstevel@tonic-gate struct memdelspan *mdsp_new; 10937c478bd9Sstevel@tonic-gate pgcnt_t phys_pages, vm_pages; 10947c478bd9Sstevel@tonic-gate pfn_t p_end; 10957c478bd9Sstevel@tonic-gate page_t *pp; 10967c478bd9Sstevel@tonic-gate int ret; 10977c478bd9Sstevel@tonic-gate 10987c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 10997c478bd9Sstevel@tonic-gate if (mhp == NULL) { 11007c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE); 11017c478bd9Sstevel@tonic-gate } 11027c478bd9Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT) { 11037c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 11047c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 11057c478bd9Sstevel@tonic-gate } 11067c478bd9Sstevel@tonic-gate 11077c478bd9Sstevel@tonic-gate /* 11087c478bd9Sstevel@tonic-gate * Intersect the span with the installed memory list (phys_install). 11097c478bd9Sstevel@tonic-gate */ 11107c478bd9Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs); 11117c478bd9Sstevel@tonic-gate if (mdsp_new == NULL) { 11127c478bd9Sstevel@tonic-gate /* 11137c478bd9Sstevel@tonic-gate * No physical memory in this range. Is this an 11147c478bd9Sstevel@tonic-gate * error? If an attempt to start the delete is made 11157c478bd9Sstevel@tonic-gate * for OK returns from del_span such as this, start will 11167c478bd9Sstevel@tonic-gate * return an error. 11177c478bd9Sstevel@tonic-gate * Could return KPHYSM_ENOWORK. 11187c478bd9Sstevel@tonic-gate */ 11197c478bd9Sstevel@tonic-gate /* 11207c478bd9Sstevel@tonic-gate * It is assumed that there are no error returns 11217c478bd9Sstevel@tonic-gate * from span_to_install() due to kmem_alloc failure. 11227c478bd9Sstevel@tonic-gate */ 11237c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 11247c478bd9Sstevel@tonic-gate return (KPHYSM_OK); 11257c478bd9Sstevel@tonic-gate } 11267c478bd9Sstevel@tonic-gate /* 11277c478bd9Sstevel@tonic-gate * Does this span overlap an existing span? 11287c478bd9Sstevel@tonic-gate */ 11297c478bd9Sstevel@tonic-gate if (delspan_insert(&mhp->mh_transit, mdsp_new) == 0) { 11307c478bd9Sstevel@tonic-gate /* 11317c478bd9Sstevel@tonic-gate * Differentiate between already on list for this handle 11327c478bd9Sstevel@tonic-gate * (KPHYSM_EDUP) and busy elsewhere (KPHYSM_EBUSY). 11337c478bd9Sstevel@tonic-gate */ 11347c478bd9Sstevel@tonic-gate ret = KPHYSM_EBUSY; 11357c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 11367c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) { 11377c478bd9Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs, 11387c478bd9Sstevel@tonic-gate base, npgs)) { 11397c478bd9Sstevel@tonic-gate ret = KPHYSM_EDUP; 11407c478bd9Sstevel@tonic-gate break; 11417c478bd9Sstevel@tonic-gate } 11427c478bd9Sstevel@tonic-gate } 11437c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 11447c478bd9Sstevel@tonic-gate free_delspans(mdsp_new); 11457c478bd9Sstevel@tonic-gate return (ret); 11467c478bd9Sstevel@tonic-gate } 11477c478bd9Sstevel@tonic-gate /* 11487c478bd9Sstevel@tonic-gate * At this point the spans in mdsp_new have been inserted into the 11497c478bd9Sstevel@tonic-gate * list of spans for this handle and thereby to the global list of 11507c478bd9Sstevel@tonic-gate * spans being processed. Each of these spans must now be checked 11517c478bd9Sstevel@tonic-gate * for relocatability. As a side-effect segments in the memseg list 11527c478bd9Sstevel@tonic-gate * may be split. 11537c478bd9Sstevel@tonic-gate * 11547c478bd9Sstevel@tonic-gate * Note that mdsp_new can no longer be used as it is now part of 11557c478bd9Sstevel@tonic-gate * a larger list. Select elements of this larger list based 11567c478bd9Sstevel@tonic-gate * on base and npgs. 11577c478bd9Sstevel@tonic-gate */ 11587c478bd9Sstevel@tonic-gate restart: 11597c478bd9Sstevel@tonic-gate phys_pages = 0; 11607c478bd9Sstevel@tonic-gate vm_pages = 0; 11617c478bd9Sstevel@tonic-gate ret = KPHYSM_OK; 11627c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 11637c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) { 11647c478bd9Sstevel@tonic-gate pgcnt_t pages_checked; 11657c478bd9Sstevel@tonic-gate 11667c478bd9Sstevel@tonic-gate if (!overlapping(mdsp->mds_base, mdsp->mds_npgs, base, npgs)) { 11677c478bd9Sstevel@tonic-gate continue; 11687c478bd9Sstevel@tonic-gate } 11697c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 11707c478bd9Sstevel@tonic-gate /* 11717c478bd9Sstevel@tonic-gate * The pages_checked count is a hack. All pages should be 11727c478bd9Sstevel@tonic-gate * checked for relocatability. Those not covered by memsegs 11737c478bd9Sstevel@tonic-gate * should be tested with arch_kphysm_del_span_ok(). 11747c478bd9Sstevel@tonic-gate */ 11757c478bd9Sstevel@tonic-gate pages_checked = 0; 11767c478bd9Sstevel@tonic-gate for (seg = memsegs; seg; seg = seg->next) { 11777c478bd9Sstevel@tonic-gate pfn_t mseg_start; 11787c478bd9Sstevel@tonic-gate 11797c478bd9Sstevel@tonic-gate if (seg->pages_base >= p_end || 11807c478bd9Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) { 11817c478bd9Sstevel@tonic-gate /* Span and memseg don't overlap. */ 11827c478bd9Sstevel@tonic-gate continue; 11837c478bd9Sstevel@tonic-gate } 11849853d9e8SJason Beloro mseg_start = memseg_get_start(seg); 11857c478bd9Sstevel@tonic-gate /* Check that segment is suitable for delete. */ 11869853d9e8SJason Beloro if (memseg_includes_meta(seg)) { 11877c478bd9Sstevel@tonic-gate /* 11889853d9e8SJason Beloro * Check that this segment is completely 11899853d9e8SJason Beloro * within the span. 11907c478bd9Sstevel@tonic-gate */ 11917c478bd9Sstevel@tonic-gate if (mseg_start < mdsp->mds_base || 11927c478bd9Sstevel@tonic-gate seg->pages_end > p_end) { 11937c478bd9Sstevel@tonic-gate ret = KPHYSM_EBUSY; 11947c478bd9Sstevel@tonic-gate break; 11957c478bd9Sstevel@tonic-gate } 11967c478bd9Sstevel@tonic-gate pages_checked += seg->pages_end - mseg_start; 11977c478bd9Sstevel@tonic-gate } else { 11987c478bd9Sstevel@tonic-gate /* 11997c478bd9Sstevel@tonic-gate * If this segment is larger than the span, 12007c478bd9Sstevel@tonic-gate * try to split it. After the split, it 12017c478bd9Sstevel@tonic-gate * is necessary to restart. 12027c478bd9Sstevel@tonic-gate */ 12037c478bd9Sstevel@tonic-gate if (seg->pages_base < mdsp->mds_base || 12047c478bd9Sstevel@tonic-gate seg->pages_end > p_end) { 12057c478bd9Sstevel@tonic-gate pfn_t abase; 12067c478bd9Sstevel@tonic-gate pgcnt_t anpgs; 12077c478bd9Sstevel@tonic-gate int s_ret; 12087c478bd9Sstevel@tonic-gate 12097c478bd9Sstevel@tonic-gate /* Split required. */ 12107c478bd9Sstevel@tonic-gate if (mdsp->mds_base < seg->pages_base) 12117c478bd9Sstevel@tonic-gate abase = seg->pages_base; 12127c478bd9Sstevel@tonic-gate else 12137c478bd9Sstevel@tonic-gate abase = mdsp->mds_base; 12147c478bd9Sstevel@tonic-gate if (p_end > seg->pages_end) 12157c478bd9Sstevel@tonic-gate anpgs = seg->pages_end - abase; 12167c478bd9Sstevel@tonic-gate else 12177c478bd9Sstevel@tonic-gate anpgs = p_end - abase; 12187c478bd9Sstevel@tonic-gate s_ret = kphysm_split_memseg(abase, 12197c478bd9Sstevel@tonic-gate anpgs); 12207c478bd9Sstevel@tonic-gate if (s_ret == 0) { 12217c478bd9Sstevel@tonic-gate /* Split failed. */ 12227c478bd9Sstevel@tonic-gate ret = KPHYSM_ERESOURCE; 12237c478bd9Sstevel@tonic-gate break; 12247c478bd9Sstevel@tonic-gate } 12257c478bd9Sstevel@tonic-gate goto restart; 12267c478bd9Sstevel@tonic-gate } 12277c478bd9Sstevel@tonic-gate pages_checked += 12287c478bd9Sstevel@tonic-gate seg->pages_end - seg->pages_base; 12297c478bd9Sstevel@tonic-gate } 12307c478bd9Sstevel@tonic-gate /* 12317c478bd9Sstevel@tonic-gate * The memseg is wholly within the delete span. 12327c478bd9Sstevel@tonic-gate * The individual pages can now be checked. 12337c478bd9Sstevel@tonic-gate */ 12347c478bd9Sstevel@tonic-gate /* Cage test. */ 12357c478bd9Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 12367c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 12377c478bd9Sstevel@tonic-gate ret = KPHYSM_ENONRELOC; 12387c478bd9Sstevel@tonic-gate break; 12397c478bd9Sstevel@tonic-gate } 12407c478bd9Sstevel@tonic-gate } 12417c478bd9Sstevel@tonic-gate if (ret != KPHYSM_OK) { 12427c478bd9Sstevel@tonic-gate break; 12437c478bd9Sstevel@tonic-gate } 12447c478bd9Sstevel@tonic-gate phys_pages += (seg->pages_end - mseg_start); 12457c478bd9Sstevel@tonic-gate vm_pages += MSEG_NPAGES(seg); 12467c478bd9Sstevel@tonic-gate } 12477c478bd9Sstevel@tonic-gate if (ret != KPHYSM_OK) 12487c478bd9Sstevel@tonic-gate break; 12497c478bd9Sstevel@tonic-gate if (pages_checked != mdsp->mds_npgs) { 12507c478bd9Sstevel@tonic-gate ret = KPHYSM_ENONRELOC; 12517c478bd9Sstevel@tonic-gate break; 12527c478bd9Sstevel@tonic-gate } 12537c478bd9Sstevel@tonic-gate } 12547c478bd9Sstevel@tonic-gate 12557c478bd9Sstevel@tonic-gate if (ret == KPHYSM_OK) { 12567c478bd9Sstevel@tonic-gate mhp->mh_phys_pages += phys_pages; 12577c478bd9Sstevel@tonic-gate mhp->mh_vm_pages += vm_pages; 12587c478bd9Sstevel@tonic-gate } else { 12597c478bd9Sstevel@tonic-gate /* 12607c478bd9Sstevel@tonic-gate * Keep holding the mh_mutex to prevent it going away. 12617c478bd9Sstevel@tonic-gate */ 12627c478bd9Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, base, npgs); 12637c478bd9Sstevel@tonic-gate } 12647c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 12657c478bd9Sstevel@tonic-gate return (ret); 12667c478bd9Sstevel@tonic-gate } 12677c478bd9Sstevel@tonic-gate 12687c478bd9Sstevel@tonic-gate int 12697c478bd9Sstevel@tonic-gate kphysm_del_span_query( 12707c478bd9Sstevel@tonic-gate pfn_t base, 12717c478bd9Sstevel@tonic-gate pgcnt_t npgs, 12727c478bd9Sstevel@tonic-gate memquery_t *mqp) 12737c478bd9Sstevel@tonic-gate { 12747c478bd9Sstevel@tonic-gate struct memdelspan *mdsp; 12757c478bd9Sstevel@tonic-gate struct memdelspan *mdsp_new; 12767c478bd9Sstevel@tonic-gate int done_first_nonreloc; 12777c478bd9Sstevel@tonic-gate 12787c478bd9Sstevel@tonic-gate mqp->phys_pages = 0; 12797c478bd9Sstevel@tonic-gate mqp->managed = 0; 12807c478bd9Sstevel@tonic-gate mqp->nonrelocatable = 0; 12817c478bd9Sstevel@tonic-gate mqp->first_nonrelocatable = 0; 12827c478bd9Sstevel@tonic-gate mqp->last_nonrelocatable = 0; 12837c478bd9Sstevel@tonic-gate 12847c478bd9Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs); 12857c478bd9Sstevel@tonic-gate /* 12867c478bd9Sstevel@tonic-gate * It is OK to proceed here if mdsp_new == NULL. 12877c478bd9Sstevel@tonic-gate */ 12887c478bd9Sstevel@tonic-gate done_first_nonreloc = 0; 12897c478bd9Sstevel@tonic-gate for (mdsp = mdsp_new; mdsp != NULL; mdsp = mdsp->mds_next) { 12907c478bd9Sstevel@tonic-gate pfn_t sbase; 12917c478bd9Sstevel@tonic-gate pgcnt_t snpgs; 12927c478bd9Sstevel@tonic-gate 12937c478bd9Sstevel@tonic-gate mqp->phys_pages += mdsp->mds_npgs; 12947c478bd9Sstevel@tonic-gate sbase = mdsp->mds_base; 12957c478bd9Sstevel@tonic-gate snpgs = mdsp->mds_npgs; 12967c478bd9Sstevel@tonic-gate while (snpgs != 0) { 12977c478bd9Sstevel@tonic-gate struct memseg *lseg, *seg; 12987c478bd9Sstevel@tonic-gate pfn_t p_end; 12997c478bd9Sstevel@tonic-gate page_t *pp; 13007c478bd9Sstevel@tonic-gate pfn_t mseg_start; 13017c478bd9Sstevel@tonic-gate 13027c478bd9Sstevel@tonic-gate p_end = sbase + snpgs; 13037c478bd9Sstevel@tonic-gate /* 13047c478bd9Sstevel@tonic-gate * Find the lowest addressed memseg that starts 13057c478bd9Sstevel@tonic-gate * after sbase and account for it. 13067c478bd9Sstevel@tonic-gate * This is to catch dynamic memsegs whose start 13077c478bd9Sstevel@tonic-gate * is hidden. 13087c478bd9Sstevel@tonic-gate */ 13097c478bd9Sstevel@tonic-gate seg = NULL; 13107c478bd9Sstevel@tonic-gate for (lseg = memsegs; lseg != NULL; lseg = lseg->next) { 13117c478bd9Sstevel@tonic-gate if ((lseg->pages_base >= sbase) || 13127c478bd9Sstevel@tonic-gate (lseg->pages_base < p_end && 13137c478bd9Sstevel@tonic-gate lseg->pages_end > sbase)) { 13147c478bd9Sstevel@tonic-gate if (seg == NULL || 13157c478bd9Sstevel@tonic-gate seg->pages_base > lseg->pages_base) 13167c478bd9Sstevel@tonic-gate seg = lseg; 13177c478bd9Sstevel@tonic-gate } 13187c478bd9Sstevel@tonic-gate } 13197c478bd9Sstevel@tonic-gate if (seg != NULL) { 13209853d9e8SJason Beloro mseg_start = memseg_get_start(seg); 13217c478bd9Sstevel@tonic-gate /* 13227c478bd9Sstevel@tonic-gate * Now have the full extent of the memseg so 13237c478bd9Sstevel@tonic-gate * do the range check. 13247c478bd9Sstevel@tonic-gate */ 13257c478bd9Sstevel@tonic-gate if (mseg_start >= p_end || 13267c478bd9Sstevel@tonic-gate seg->pages_end <= sbase) { 13277c478bd9Sstevel@tonic-gate /* Span does not overlap memseg. */ 13287c478bd9Sstevel@tonic-gate seg = NULL; 13297c478bd9Sstevel@tonic-gate } 13307c478bd9Sstevel@tonic-gate } 13317c478bd9Sstevel@tonic-gate /* 13327c478bd9Sstevel@tonic-gate * Account for gap either before the segment if 13337c478bd9Sstevel@tonic-gate * there is one or to the end of the span. 13347c478bd9Sstevel@tonic-gate */ 13357c478bd9Sstevel@tonic-gate if (seg == NULL || mseg_start > sbase) { 13367c478bd9Sstevel@tonic-gate pfn_t a_end; 13377c478bd9Sstevel@tonic-gate 13387c478bd9Sstevel@tonic-gate a_end = (seg == NULL) ? p_end : mseg_start; 13397c478bd9Sstevel@tonic-gate /* 13407c478bd9Sstevel@tonic-gate * Check with arch layer for relocatability. 13417c478bd9Sstevel@tonic-gate */ 13427c478bd9Sstevel@tonic-gate if (arch_kphysm_del_span_ok(sbase, 13437c478bd9Sstevel@tonic-gate (a_end - sbase))) { 13447c478bd9Sstevel@tonic-gate /* 13457c478bd9Sstevel@tonic-gate * No non-relocatble pages in this 13467c478bd9Sstevel@tonic-gate * area, avoid the fine-grained 13477c478bd9Sstevel@tonic-gate * test. 13487c478bd9Sstevel@tonic-gate */ 13497c478bd9Sstevel@tonic-gate snpgs -= (a_end - sbase); 13507c478bd9Sstevel@tonic-gate sbase = a_end; 13517c478bd9Sstevel@tonic-gate } 13527c478bd9Sstevel@tonic-gate while (sbase < a_end) { 13537c478bd9Sstevel@tonic-gate if (!arch_kphysm_del_span_ok(sbase, 13547c478bd9Sstevel@tonic-gate 1)) { 13557c478bd9Sstevel@tonic-gate mqp->nonrelocatable++; 13567c478bd9Sstevel@tonic-gate if (!done_first_nonreloc) { 13577c478bd9Sstevel@tonic-gate mqp-> 13587c478bd9Sstevel@tonic-gate first_nonrelocatable 13597c478bd9Sstevel@tonic-gate = sbase; 13607c478bd9Sstevel@tonic-gate done_first_nonreloc = 1; 13617c478bd9Sstevel@tonic-gate } 13627c478bd9Sstevel@tonic-gate mqp->last_nonrelocatable = 13637c478bd9Sstevel@tonic-gate sbase; 13647c478bd9Sstevel@tonic-gate } 13657c478bd9Sstevel@tonic-gate sbase++; 13667c478bd9Sstevel@tonic-gate snpgs--; 13677c478bd9Sstevel@tonic-gate } 13687c478bd9Sstevel@tonic-gate } 13697c478bd9Sstevel@tonic-gate if (seg != NULL) { 13707c478bd9Sstevel@tonic-gate ASSERT(mseg_start <= sbase); 13717c478bd9Sstevel@tonic-gate if (seg->pages_base != mseg_start && 13727c478bd9Sstevel@tonic-gate seg->pages_base > sbase) { 13737c478bd9Sstevel@tonic-gate pgcnt_t skip_pgs; 13747c478bd9Sstevel@tonic-gate 13757c478bd9Sstevel@tonic-gate /* 13767c478bd9Sstevel@tonic-gate * Skip the page_t area of a 13777c478bd9Sstevel@tonic-gate * dynamic memseg. 13787c478bd9Sstevel@tonic-gate */ 13797c478bd9Sstevel@tonic-gate skip_pgs = seg->pages_base - sbase; 13807c478bd9Sstevel@tonic-gate if (snpgs <= skip_pgs) { 13817c478bd9Sstevel@tonic-gate sbase += snpgs; 13827c478bd9Sstevel@tonic-gate snpgs = 0; 13837c478bd9Sstevel@tonic-gate continue; 13847c478bd9Sstevel@tonic-gate } 13857c478bd9Sstevel@tonic-gate snpgs -= skip_pgs; 13867c478bd9Sstevel@tonic-gate sbase += skip_pgs; 13877c478bd9Sstevel@tonic-gate } 13887c478bd9Sstevel@tonic-gate ASSERT(snpgs != 0); 13897c478bd9Sstevel@tonic-gate ASSERT(seg->pages_base <= sbase); 13907c478bd9Sstevel@tonic-gate /* 13917c478bd9Sstevel@tonic-gate * The individual pages can now be checked. 13927c478bd9Sstevel@tonic-gate */ 13937c478bd9Sstevel@tonic-gate for (pp = seg->pages + 13947c478bd9Sstevel@tonic-gate (sbase - seg->pages_base); 13957c478bd9Sstevel@tonic-gate snpgs != 0 && pp < seg->epages; pp++) { 13967c478bd9Sstevel@tonic-gate mqp->managed++; 13977c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 13987c478bd9Sstevel@tonic-gate mqp->nonrelocatable++; 13997c478bd9Sstevel@tonic-gate if (!done_first_nonreloc) { 14007c478bd9Sstevel@tonic-gate mqp-> 14017c478bd9Sstevel@tonic-gate first_nonrelocatable 14027c478bd9Sstevel@tonic-gate = sbase; 14037c478bd9Sstevel@tonic-gate done_first_nonreloc = 1; 14047c478bd9Sstevel@tonic-gate } 14057c478bd9Sstevel@tonic-gate mqp->last_nonrelocatable = 14067c478bd9Sstevel@tonic-gate sbase; 14077c478bd9Sstevel@tonic-gate } 14087c478bd9Sstevel@tonic-gate sbase++; 14097c478bd9Sstevel@tonic-gate snpgs--; 14107c478bd9Sstevel@tonic-gate } 14117c478bd9Sstevel@tonic-gate } 14127c478bd9Sstevel@tonic-gate } 14137c478bd9Sstevel@tonic-gate } 14147c478bd9Sstevel@tonic-gate 14157c478bd9Sstevel@tonic-gate free_delspans(mdsp_new); 14167c478bd9Sstevel@tonic-gate 14177c478bd9Sstevel@tonic-gate return (KPHYSM_OK); 14187c478bd9Sstevel@tonic-gate } 14197c478bd9Sstevel@tonic-gate 14207c478bd9Sstevel@tonic-gate /* 14217c478bd9Sstevel@tonic-gate * This release function can be called at any stage as follows: 14227c478bd9Sstevel@tonic-gate * _gethandle only called 14237c478bd9Sstevel@tonic-gate * _span(s) only called 14247c478bd9Sstevel@tonic-gate * _start called but failed 14257c478bd9Sstevel@tonic-gate * delete thread exited 14267c478bd9Sstevel@tonic-gate */ 14277c478bd9Sstevel@tonic-gate int 14287c478bd9Sstevel@tonic-gate kphysm_del_release(memhandle_t handle) 14297c478bd9Sstevel@tonic-gate { 14307c478bd9Sstevel@tonic-gate struct mem_handle *mhp; 14317c478bd9Sstevel@tonic-gate 14327c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 14337c478bd9Sstevel@tonic-gate if (mhp == NULL) { 14347c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE); 14357c478bd9Sstevel@tonic-gate } 14367c478bd9Sstevel@tonic-gate switch (mhp->mh_state) { 14377c478bd9Sstevel@tonic-gate case MHND_STARTING: 14387c478bd9Sstevel@tonic-gate case MHND_RUNNING: 14397c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14407c478bd9Sstevel@tonic-gate return (KPHYSM_ENOTFINISHED); 14417c478bd9Sstevel@tonic-gate case MHND_FREE: 14427c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE); 14437c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14447c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE); 14457c478bd9Sstevel@tonic-gate case MHND_INIT: 14467c478bd9Sstevel@tonic-gate break; 14477c478bd9Sstevel@tonic-gate case MHND_DONE: 14487c478bd9Sstevel@tonic-gate break; 14497c478bd9Sstevel@tonic-gate case MHND_RELEASE: 14507c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14517c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 14527c478bd9Sstevel@tonic-gate default: 14537c478bd9Sstevel@tonic-gate #ifdef DEBUG 14547c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_release(0x%p) state corrupt %d", 14557c478bd9Sstevel@tonic-gate (void *)mhp, mhp->mh_state); 14567c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 14577c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14587c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE); 14597c478bd9Sstevel@tonic-gate } 14607c478bd9Sstevel@tonic-gate /* 14617c478bd9Sstevel@tonic-gate * Set state so that we can wait if necessary. 14627c478bd9Sstevel@tonic-gate * Also this means that we have read/write access to all 14637c478bd9Sstevel@tonic-gate * fields except mh_exthandle and mh_state. 14647c478bd9Sstevel@tonic-gate */ 14657c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_RELEASE; 14667c478bd9Sstevel@tonic-gate /* 14677c478bd9Sstevel@tonic-gate * The mem_handle cannot be de-allocated by any other operation 14687c478bd9Sstevel@tonic-gate * now, so no need to hold mh_mutex. 14697c478bd9Sstevel@tonic-gate */ 14707c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14717c478bd9Sstevel@tonic-gate 14727c478bd9Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, 0, 0); 14737c478bd9Sstevel@tonic-gate mhp->mh_phys_pages = 0; 14747c478bd9Sstevel@tonic-gate mhp->mh_vm_pages = 0; 14757c478bd9Sstevel@tonic-gate mhp->mh_hold_todo = 0; 14767c478bd9Sstevel@tonic-gate mhp->mh_delete_complete = NULL; 14777c478bd9Sstevel@tonic-gate mhp->mh_delete_complete_arg = NULL; 14787c478bd9Sstevel@tonic-gate mhp->mh_cancel = 0; 14797c478bd9Sstevel@tonic-gate 14807c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 14817c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_RELEASE); 14827c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_FREE; 14837c478bd9Sstevel@tonic-gate 14847c478bd9Sstevel@tonic-gate kphysm_free_mem_handle(mhp); 14857c478bd9Sstevel@tonic-gate 14867c478bd9Sstevel@tonic-gate return (KPHYSM_OK); 14877c478bd9Sstevel@tonic-gate } 14887c478bd9Sstevel@tonic-gate 14897c478bd9Sstevel@tonic-gate /* 14907c478bd9Sstevel@tonic-gate * This cancel function can only be called with the thread running. 14917c478bd9Sstevel@tonic-gate */ 14927c478bd9Sstevel@tonic-gate int 14937c478bd9Sstevel@tonic-gate kphysm_del_cancel(memhandle_t handle) 14947c478bd9Sstevel@tonic-gate { 14957c478bd9Sstevel@tonic-gate struct mem_handle *mhp; 14967c478bd9Sstevel@tonic-gate 14977c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 14987c478bd9Sstevel@tonic-gate if (mhp == NULL) { 14997c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE); 15007c478bd9Sstevel@tonic-gate } 15017c478bd9Sstevel@tonic-gate if (mhp->mh_state != MHND_STARTING && mhp->mh_state != MHND_RUNNING) { 15027c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 15037c478bd9Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING); 15047c478bd9Sstevel@tonic-gate } 15057c478bd9Sstevel@tonic-gate /* 15067c478bd9Sstevel@tonic-gate * Set the cancel flag and wake the delete thread up. 15077c478bd9Sstevel@tonic-gate * The thread may be waiting on I/O, so the effect of the cancel 15087c478bd9Sstevel@tonic-gate * may be delayed. 15097c478bd9Sstevel@tonic-gate */ 15107c478bd9Sstevel@tonic-gate if (mhp->mh_cancel == 0) { 15117c478bd9Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ECANCELLED; 15127c478bd9Sstevel@tonic-gate cv_signal(&mhp->mh_cv); 15137c478bd9Sstevel@tonic-gate } 15147c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 15157c478bd9Sstevel@tonic-gate return (KPHYSM_OK); 15167c478bd9Sstevel@tonic-gate } 15177c478bd9Sstevel@tonic-gate 15187c478bd9Sstevel@tonic-gate int 15197c478bd9Sstevel@tonic-gate kphysm_del_status( 15207c478bd9Sstevel@tonic-gate memhandle_t handle, 15217c478bd9Sstevel@tonic-gate memdelstat_t *mdstp) 15227c478bd9Sstevel@tonic-gate { 15237c478bd9Sstevel@tonic-gate struct mem_handle *mhp; 15247c478bd9Sstevel@tonic-gate 15257c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 15267c478bd9Sstevel@tonic-gate if (mhp == NULL) { 15277c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE); 15287c478bd9Sstevel@tonic-gate } 15297c478bd9Sstevel@tonic-gate /* 15307c478bd9Sstevel@tonic-gate * Calling kphysm_del_status() is allowed before the delete 15317c478bd9Sstevel@tonic-gate * is started to allow for status display. 15327c478bd9Sstevel@tonic-gate */ 15337c478bd9Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT && mhp->mh_state != MHND_STARTING && 15347c478bd9Sstevel@tonic-gate mhp->mh_state != MHND_RUNNING) { 15357c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 15367c478bd9Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING); 15377c478bd9Sstevel@tonic-gate } 15387c478bd9Sstevel@tonic-gate mdstp->phys_pages = mhp->mh_phys_pages; 15397c478bd9Sstevel@tonic-gate mdstp->managed = mhp->mh_vm_pages; 15407c478bd9Sstevel@tonic-gate mdstp->collected = mhp->mh_vm_pages - mhp->mh_hold_todo; 15417c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 15427c478bd9Sstevel@tonic-gate return (KPHYSM_OK); 15437c478bd9Sstevel@tonic-gate } 15447c478bd9Sstevel@tonic-gate 15457c478bd9Sstevel@tonic-gate static int mem_delete_additional_pages = 100; 15467c478bd9Sstevel@tonic-gate 15477c478bd9Sstevel@tonic-gate static int 15487c478bd9Sstevel@tonic-gate can_remove_pgs(pgcnt_t npgs) 15497c478bd9Sstevel@tonic-gate { 15507c478bd9Sstevel@tonic-gate /* 15517c478bd9Sstevel@tonic-gate * If all pageable pages were paged out, freemem would 15527c478bd9Sstevel@tonic-gate * equal availrmem. There is a minimum requirement for 15537c478bd9Sstevel@tonic-gate * availrmem. 15547c478bd9Sstevel@tonic-gate */ 15557c478bd9Sstevel@tonic-gate if ((availrmem - (tune.t_minarmem + mem_delete_additional_pages)) 15567c478bd9Sstevel@tonic-gate < npgs) 15577c478bd9Sstevel@tonic-gate return (0); 15587c478bd9Sstevel@tonic-gate /* TODO: check swap space, etc. */ 15597c478bd9Sstevel@tonic-gate return (1); 15607c478bd9Sstevel@tonic-gate } 15617c478bd9Sstevel@tonic-gate 15627c478bd9Sstevel@tonic-gate static int 15637c478bd9Sstevel@tonic-gate get_availrmem(pgcnt_t npgs) 15647c478bd9Sstevel@tonic-gate { 15657c478bd9Sstevel@tonic-gate int ret; 15667c478bd9Sstevel@tonic-gate 15677c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 15687c478bd9Sstevel@tonic-gate ret = can_remove_pgs(npgs); 15697c478bd9Sstevel@tonic-gate if (ret != 0) 15707c478bd9Sstevel@tonic-gate availrmem -= npgs; 15717c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 15727c478bd9Sstevel@tonic-gate return (ret); 15737c478bd9Sstevel@tonic-gate } 15747c478bd9Sstevel@tonic-gate 15757c478bd9Sstevel@tonic-gate static void 15767c478bd9Sstevel@tonic-gate put_availrmem(pgcnt_t npgs) 15777c478bd9Sstevel@tonic-gate { 15787c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 15797c478bd9Sstevel@tonic-gate availrmem += npgs; 15807c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 15817c478bd9Sstevel@tonic-gate } 15827c478bd9Sstevel@tonic-gate 15837c478bd9Sstevel@tonic-gate #define FREEMEM_INCR 100 15847c478bd9Sstevel@tonic-gate static pgcnt_t freemem_incr = FREEMEM_INCR; 15857c478bd9Sstevel@tonic-gate #define DEL_FREE_WAIT_FRAC 4 15867c478bd9Sstevel@tonic-gate #define DEL_FREE_WAIT_TICKS ((hz+DEL_FREE_WAIT_FRAC-1)/DEL_FREE_WAIT_FRAC) 15877c478bd9Sstevel@tonic-gate 15887c478bd9Sstevel@tonic-gate #define DEL_BUSY_WAIT_FRAC 20 15897c478bd9Sstevel@tonic-gate #define DEL_BUSY_WAIT_TICKS ((hz+DEL_BUSY_WAIT_FRAC-1)/DEL_BUSY_WAIT_FRAC) 15907c478bd9Sstevel@tonic-gate 15917c478bd9Sstevel@tonic-gate static void kphysm_del_cleanup(struct mem_handle *); 15927c478bd9Sstevel@tonic-gate 15937c478bd9Sstevel@tonic-gate static void page_delete_collect(page_t *, struct mem_handle *); 15947c478bd9Sstevel@tonic-gate 15957c478bd9Sstevel@tonic-gate static pgcnt_t 15967c478bd9Sstevel@tonic-gate delthr_get_freemem(struct mem_handle *mhp) 15977c478bd9Sstevel@tonic-gate { 15987c478bd9Sstevel@tonic-gate pgcnt_t free_get; 15997c478bd9Sstevel@tonic-gate int ret; 16007c478bd9Sstevel@tonic-gate 16017c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&mhp->mh_mutex)); 16027c478bd9Sstevel@tonic-gate 16037c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, need_free); 16047c478bd9Sstevel@tonic-gate /* 16057c478bd9Sstevel@tonic-gate * Get up to freemem_incr pages. 16067c478bd9Sstevel@tonic-gate */ 16077c478bd9Sstevel@tonic-gate free_get = freemem_incr; 16087c478bd9Sstevel@tonic-gate if (free_get > mhp->mh_hold_todo) 16097c478bd9Sstevel@tonic-gate free_get = mhp->mh_hold_todo; 16107c478bd9Sstevel@tonic-gate /* 16117c478bd9Sstevel@tonic-gate * Take free_get pages away from freemem, 16127c478bd9Sstevel@tonic-gate * waiting if necessary. 16137c478bd9Sstevel@tonic-gate */ 16147c478bd9Sstevel@tonic-gate 16157c478bd9Sstevel@tonic-gate while (!mhp->mh_cancel) { 16167c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 16177c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, free_loop); 16187c478bd9Sstevel@tonic-gate /* 16197c478bd9Sstevel@tonic-gate * Duplicate test from page_create_throttle() 16207c478bd9Sstevel@tonic-gate * but don't override with !PG_WAIT. 16217c478bd9Sstevel@tonic-gate */ 16227c478bd9Sstevel@tonic-gate if (freemem < (free_get + throttlefree)) { 16237c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, free_low); 16247c478bd9Sstevel@tonic-gate ret = 0; 16257c478bd9Sstevel@tonic-gate } else { 16267c478bd9Sstevel@tonic-gate ret = page_create_wait(free_get, 0); 16277c478bd9Sstevel@tonic-gate if (ret == 0) { 16287c478bd9Sstevel@tonic-gate /* EMPTY */ 16297c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, free_failed); 16307c478bd9Sstevel@tonic-gate } 16317c478bd9Sstevel@tonic-gate } 16327c478bd9Sstevel@tonic-gate if (ret != 0) { 16337c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 16347c478bd9Sstevel@tonic-gate return (free_get); 16357c478bd9Sstevel@tonic-gate } 16367c478bd9Sstevel@tonic-gate 16377c478bd9Sstevel@tonic-gate /* 16387c478bd9Sstevel@tonic-gate * Put pressure on pageout. 16397c478bd9Sstevel@tonic-gate */ 16407c478bd9Sstevel@tonic-gate page_needfree(free_get); 16417c478bd9Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 16427c478bd9Sstevel@tonic-gate 16437c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1644d3d50737SRafael Vanoni (void) cv_reltimedwait(&mhp->mh_cv, &mhp->mh_mutex, 1645d3d50737SRafael Vanoni DEL_FREE_WAIT_TICKS, TR_CLOCK_TICK); 16467c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 16477c478bd9Sstevel@tonic-gate page_needfree(-(spgcnt_t)free_get); 16487c478bd9Sstevel@tonic-gate 16497c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 16507c478bd9Sstevel@tonic-gate } 16517c478bd9Sstevel@tonic-gate return (0); 16527c478bd9Sstevel@tonic-gate } 16537c478bd9Sstevel@tonic-gate 16547c478bd9Sstevel@tonic-gate #define DR_AIO_CLEANUP_DELAY 25000 /* 0.025secs, in usec */ 16557c478bd9Sstevel@tonic-gate #define DR_AIO_CLEANUP_MAXLOOPS_NODELAY 100 16567c478bd9Sstevel@tonic-gate /* 16577c478bd9Sstevel@tonic-gate * This function is run as a helper thread for delete_memory_thread. 16587c478bd9Sstevel@tonic-gate * It is needed in order to force kaio cleanup, so that pages used in kaio 16597c478bd9Sstevel@tonic-gate * will be unlocked and subsequently relocated by delete_memory_thread. 16607c478bd9Sstevel@tonic-gate * The address of the delete_memory_threads's mem_handle is passed in to 16617c478bd9Sstevel@tonic-gate * this thread function, and is used to set the mh_aio_cleanup_done member 16627c478bd9Sstevel@tonic-gate * prior to calling thread_exit(). 16637c478bd9Sstevel@tonic-gate */ 16647c478bd9Sstevel@tonic-gate static void 16657c478bd9Sstevel@tonic-gate dr_aio_cleanup_thread(caddr_t amhp) 16667c478bd9Sstevel@tonic-gate { 16677c478bd9Sstevel@tonic-gate proc_t *procp; 16687c478bd9Sstevel@tonic-gate int (*aio_cleanup_dr_delete_memory)(proc_t *); 16697c478bd9Sstevel@tonic-gate int cleaned; 16707c478bd9Sstevel@tonic-gate int n = 0; 16717c478bd9Sstevel@tonic-gate struct mem_handle *mhp; 16727c478bd9Sstevel@tonic-gate volatile uint_t *pcancel; 16737c478bd9Sstevel@tonic-gate 16747c478bd9Sstevel@tonic-gate mhp = (struct mem_handle *)amhp; 16757c478bd9Sstevel@tonic-gate ASSERT(mhp != NULL); 16767c478bd9Sstevel@tonic-gate pcancel = &mhp->mh_dr_aio_cleanup_cancel; 16777c478bd9Sstevel@tonic-gate if (modload("sys", "kaio") == -1) { 16787c478bd9Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 16797c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "dr_aio_cleanup_thread: cannot load kaio"); 16807c478bd9Sstevel@tonic-gate thread_exit(); 16817c478bd9Sstevel@tonic-gate } 16827c478bd9Sstevel@tonic-gate aio_cleanup_dr_delete_memory = (int (*)(proc_t *)) 16837c478bd9Sstevel@tonic-gate modgetsymvalue("aio_cleanup_dr_delete_memory", 0); 16847c478bd9Sstevel@tonic-gate if (aio_cleanup_dr_delete_memory == NULL) { 16857c478bd9Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 16867c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 16877c478bd9Sstevel@tonic-gate "aio_cleanup_dr_delete_memory not found in kaio"); 16887c478bd9Sstevel@tonic-gate thread_exit(); 16897c478bd9Sstevel@tonic-gate } 16907c478bd9Sstevel@tonic-gate do { 16917c478bd9Sstevel@tonic-gate cleaned = 0; 16927c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 16937c478bd9Sstevel@tonic-gate for (procp = practive; (*pcancel == 0) && (procp != NULL); 16947c478bd9Sstevel@tonic-gate procp = procp->p_next) { 16957c478bd9Sstevel@tonic-gate mutex_enter(&procp->p_lock); 16967c478bd9Sstevel@tonic-gate if (procp->p_aio != NULL) { 16977c478bd9Sstevel@tonic-gate /* cleanup proc's outstanding kaio */ 16987c478bd9Sstevel@tonic-gate cleaned += 16997c478bd9Sstevel@tonic-gate (*aio_cleanup_dr_delete_memory)(procp); 17007c478bd9Sstevel@tonic-gate } 17017c478bd9Sstevel@tonic-gate mutex_exit(&procp->p_lock); 17027c478bd9Sstevel@tonic-gate } 17037c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 17047c478bd9Sstevel@tonic-gate if ((*pcancel == 0) && 17057c478bd9Sstevel@tonic-gate (!cleaned || (++n == DR_AIO_CLEANUP_MAXLOOPS_NODELAY))) { 17067c478bd9Sstevel@tonic-gate /* delay a bit before retrying all procs again */ 17077c478bd9Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY)); 17087c478bd9Sstevel@tonic-gate n = 0; 17097c478bd9Sstevel@tonic-gate } 17107c478bd9Sstevel@tonic-gate } while (*pcancel == 0); 17117c478bd9Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 17127c478bd9Sstevel@tonic-gate thread_exit(); 17137c478bd9Sstevel@tonic-gate } 17147c478bd9Sstevel@tonic-gate 17157c478bd9Sstevel@tonic-gate static void 17167c478bd9Sstevel@tonic-gate delete_memory_thread(caddr_t amhp) 17177c478bd9Sstevel@tonic-gate { 17187c478bd9Sstevel@tonic-gate struct mem_handle *mhp; 17197c478bd9Sstevel@tonic-gate struct memdelspan *mdsp; 17207c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 17217c478bd9Sstevel@tonic-gate page_t *pp_targ; 17227c478bd9Sstevel@tonic-gate spgcnt_t freemem_left; 17237c478bd9Sstevel@tonic-gate void (*del_complete_funcp)(void *, int error); 17247c478bd9Sstevel@tonic-gate void *del_complete_arg; 17257c478bd9Sstevel@tonic-gate int comp_code; 17267c478bd9Sstevel@tonic-gate int ret; 17277c478bd9Sstevel@tonic-gate int first_scan; 17287c478bd9Sstevel@tonic-gate uint_t szc; 17297c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 17307c478bd9Sstevel@tonic-gate uint64_t start_total, ntick_total; 17317c478bd9Sstevel@tonic-gate uint64_t start_pgrp, ntick_pgrp; 17327c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 17337c478bd9Sstevel@tonic-gate 17347c478bd9Sstevel@tonic-gate mhp = (struct mem_handle *)amhp; 17357c478bd9Sstevel@tonic-gate 17367c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 17377c478bd9Sstevel@tonic-gate start_total = ddi_get_lbolt(); 17387c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 17397c478bd9Sstevel@tonic-gate 17407c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mhp->mh_mutex, 17417c478bd9Sstevel@tonic-gate callb_generic_cpr, "memdel"); 17427c478bd9Sstevel@tonic-gate 17437c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 17447c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_STARTING); 17457c478bd9Sstevel@tonic-gate 17467c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_RUNNING; 17477c478bd9Sstevel@tonic-gate mhp->mh_thread_id = curthread; 17487c478bd9Sstevel@tonic-gate 17497c478bd9Sstevel@tonic-gate mhp->mh_hold_todo = mhp->mh_vm_pages; 17507c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 17517c478bd9Sstevel@tonic-gate 17527c478bd9Sstevel@tonic-gate /* Allocate the remap pages now, if necessary. */ 17537c478bd9Sstevel@tonic-gate memseg_remap_init(); 17547c478bd9Sstevel@tonic-gate 17557c478bd9Sstevel@tonic-gate /* 17567c478bd9Sstevel@tonic-gate * Subtract from availrmem now if possible as availrmem 17577c478bd9Sstevel@tonic-gate * may not be available by the end of the delete. 17587c478bd9Sstevel@tonic-gate */ 17597c478bd9Sstevel@tonic-gate if (!get_availrmem(mhp->mh_vm_pages)) { 17607c478bd9Sstevel@tonic-gate comp_code = KPHYSM_ENOTVIABLE; 17617c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 17627c478bd9Sstevel@tonic-gate goto early_exit; 17637c478bd9Sstevel@tonic-gate } 17647c478bd9Sstevel@tonic-gate 17657c478bd9Sstevel@tonic-gate ret = kphysm_setup_pre_del(mhp->mh_vm_pages); 17667c478bd9Sstevel@tonic-gate 17677c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 17687c478bd9Sstevel@tonic-gate 17697c478bd9Sstevel@tonic-gate if (ret != 0) { 17707c478bd9Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_EREFUSED; 17717c478bd9Sstevel@tonic-gate goto refused; 17727c478bd9Sstevel@tonic-gate } 17737c478bd9Sstevel@tonic-gate 17747c478bd9Sstevel@tonic-gate transit_list_collect(mhp, 1); 17757c478bd9Sstevel@tonic-gate 17767c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 17777c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) { 17787c478bd9Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap == NULL); 17797c478bd9Sstevel@tonic-gate mdsp->mds_bitmap = kmem_zalloc(MDS_BITMAPBYTES(mdsp), KM_SLEEP); 17807c478bd9Sstevel@tonic-gate mdsp->mds_bitmap_retired = kmem_zalloc(MDS_BITMAPBYTES(mdsp), 17817c478bd9Sstevel@tonic-gate KM_SLEEP); 17827c478bd9Sstevel@tonic-gate } 17837c478bd9Sstevel@tonic-gate 17847c478bd9Sstevel@tonic-gate first_scan = 1; 17857c478bd9Sstevel@tonic-gate freemem_left = 0; 17867c478bd9Sstevel@tonic-gate /* 17877c478bd9Sstevel@tonic-gate * Start dr_aio_cleanup_thread, which periodically iterates 17887c478bd9Sstevel@tonic-gate * through the process list and invokes aio cleanup. This 17897c478bd9Sstevel@tonic-gate * is needed in order to avoid a deadly embrace between the 17907c478bd9Sstevel@tonic-gate * delete_memory_thread (waiting on writer lock for page, with the 17917c478bd9Sstevel@tonic-gate * exclusive-wanted bit set), kaio read request threads (waiting for a 17927c478bd9Sstevel@tonic-gate * reader lock on the same page that is wanted by the 17937c478bd9Sstevel@tonic-gate * delete_memory_thread), and threads waiting for kaio completion 17947c478bd9Sstevel@tonic-gate * (blocked on spt_amp->lock). 17957c478bd9Sstevel@tonic-gate */ 17967c478bd9Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 0; 17977c478bd9Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 0; 17987c478bd9Sstevel@tonic-gate (void) thread_create(NULL, 0, dr_aio_cleanup_thread, 17997c478bd9Sstevel@tonic-gate (caddr_t)mhp, 0, &p0, TS_RUN, maxclsyspri - 1); 18007c478bd9Sstevel@tonic-gate while ((mhp->mh_hold_todo != 0) && (mhp->mh_cancel == 0)) { 18017c478bd9Sstevel@tonic-gate pgcnt_t collected; 18027c478bd9Sstevel@tonic-gate 18037c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nloop); 18047c478bd9Sstevel@tonic-gate collected = 0; 18057c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; (mdsp != NULL) && 18067c478bd9Sstevel@tonic-gate (mhp->mh_cancel == 0); mdsp = mdsp->mds_next) { 18077c478bd9Sstevel@tonic-gate pfn_t pfn, p_end; 18087c478bd9Sstevel@tonic-gate 18097c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 18107c478bd9Sstevel@tonic-gate for (pfn = mdsp->mds_base; (pfn < p_end) && 18117c478bd9Sstevel@tonic-gate (mhp->mh_cancel == 0); pfn++) { 18127c478bd9Sstevel@tonic-gate page_t *pp, *tpp, *tpp_targ; 18137c478bd9Sstevel@tonic-gate pgcnt_t bit; 18147c478bd9Sstevel@tonic-gate struct vnode *vp; 18157c478bd9Sstevel@tonic-gate u_offset_t offset; 18167c478bd9Sstevel@tonic-gate int mod, result; 18177c478bd9Sstevel@tonic-gate spgcnt_t pgcnt; 18187c478bd9Sstevel@tonic-gate 18197c478bd9Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 18207c478bd9Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] & 18217c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) { 18227c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, already_done); 18237c478bd9Sstevel@tonic-gate continue; 18247c478bd9Sstevel@tonic-gate } 18257c478bd9Sstevel@tonic-gate if (freemem_left == 0) { 18267c478bd9Sstevel@tonic-gate freemem_left += delthr_get_freemem(mhp); 18277c478bd9Sstevel@tonic-gate if (freemem_left == 0) 18287c478bd9Sstevel@tonic-gate break; 18297c478bd9Sstevel@tonic-gate } 18307c478bd9Sstevel@tonic-gate 18317c478bd9Sstevel@tonic-gate /* 18327c478bd9Sstevel@tonic-gate * Release mh_mutex - some of this 18337c478bd9Sstevel@tonic-gate * stuff takes some time (eg PUTPAGE). 18347c478bd9Sstevel@tonic-gate */ 18357c478bd9Sstevel@tonic-gate 18367c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 18377c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, ncheck); 18387c478bd9Sstevel@tonic-gate 18397c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 18407c478bd9Sstevel@tonic-gate if (pp == NULL) { 18417c478bd9Sstevel@tonic-gate /* 18427c478bd9Sstevel@tonic-gate * Not covered by a page_t - will 18437c478bd9Sstevel@tonic-gate * be dealt with elsewhere. 18447c478bd9Sstevel@tonic-gate */ 18457c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nopaget); 18467c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 18477c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 18487c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW)); 18497c478bd9Sstevel@tonic-gate continue; 18507c478bd9Sstevel@tonic-gate } 18517c478bd9Sstevel@tonic-gate 18527c478bd9Sstevel@tonic-gate if (!page_try_reclaim_lock(pp, SE_EXCL, 1853db874c57Selowe SE_EXCL_WANTED | SE_RETIRED)) { 18547c478bd9Sstevel@tonic-gate /* 1855db874c57Selowe * Page in use elsewhere. Skip it. 18567c478bd9Sstevel@tonic-gate */ 18577c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, lockfail); 18587c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 18597c478bd9Sstevel@tonic-gate continue; 18607c478bd9Sstevel@tonic-gate } 18617c478bd9Sstevel@tonic-gate /* 18627c478bd9Sstevel@tonic-gate * See if the cage expanded into the delete. 18637c478bd9Sstevel@tonic-gate * This can happen as we have to allow the 18647c478bd9Sstevel@tonic-gate * cage to expand. 18657c478bd9Sstevel@tonic-gate */ 18667c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 18677c478bd9Sstevel@tonic-gate page_unlock(pp); 18687c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 18697c478bd9Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ENONRELOC; 18707c478bd9Sstevel@tonic-gate break; 18717c478bd9Sstevel@tonic-gate } 1872db874c57Selowe if (PP_RETIRED(pp)) { 18737c478bd9Sstevel@tonic-gate /* 18747c478bd9Sstevel@tonic-gate * Page has been retired and is 18757c478bd9Sstevel@tonic-gate * not part of the cage so we 18767c478bd9Sstevel@tonic-gate * can now do the accounting for 18777c478bd9Sstevel@tonic-gate * it. 18787c478bd9Sstevel@tonic-gate */ 18797c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, retired); 18807c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 18817c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] 18827c478bd9Sstevel@tonic-gate |= (1 << (bit % NBPBMW)); 18837c478bd9Sstevel@tonic-gate mdsp->mds_bitmap_retired[bit / 18847c478bd9Sstevel@tonic-gate NBPBMW] |= 18857c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW)); 18867c478bd9Sstevel@tonic-gate mhp->mh_hold_todo--; 18877c478bd9Sstevel@tonic-gate continue; 18887c478bd9Sstevel@tonic-gate } 18897c478bd9Sstevel@tonic-gate ASSERT(freemem_left != 0); 18907c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 18917c478bd9Sstevel@tonic-gate /* 18927c478bd9Sstevel@tonic-gate * Like page_reclaim() only 'freemem' 18937c478bd9Sstevel@tonic-gate * processing is already done. 18947c478bd9Sstevel@tonic-gate */ 18957c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nfree); 18967c478bd9Sstevel@tonic-gate free_page_collect: 18977c478bd9Sstevel@tonic-gate if (PP_ISAGED(pp)) { 18987c478bd9Sstevel@tonic-gate page_list_sub(pp, 18997c478bd9Sstevel@tonic-gate PG_FREE_LIST); 19007c478bd9Sstevel@tonic-gate } else { 19017c478bd9Sstevel@tonic-gate page_list_sub(pp, 19027c478bd9Sstevel@tonic-gate PG_CACHE_LIST); 19037c478bd9Sstevel@tonic-gate } 19047c478bd9Sstevel@tonic-gate PP_CLRFREE(pp); 19057c478bd9Sstevel@tonic-gate PP_CLRAGED(pp); 19067c478bd9Sstevel@tonic-gate collected++; 19077c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19087c478bd9Sstevel@tonic-gate page_delete_collect(pp, mhp); 19097c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 19107c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW)); 19117c478bd9Sstevel@tonic-gate freemem_left--; 19127c478bd9Sstevel@tonic-gate continue; 19137c478bd9Sstevel@tonic-gate } 19147c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL); 19157c478bd9Sstevel@tonic-gate if (first_scan) { 19167c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, first_notfree); 19177c478bd9Sstevel@tonic-gate page_unlock(pp); 19187c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19197c478bd9Sstevel@tonic-gate continue; 19207c478bd9Sstevel@tonic-gate } 19217c478bd9Sstevel@tonic-gate /* 19227c478bd9Sstevel@tonic-gate * Keep stats on pages encountered that 1923db874c57Selowe * are marked for retirement. 19247c478bd9Sstevel@tonic-gate */ 1925db874c57Selowe if (PP_TOXIC(pp)) { 19267c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, toxic); 1927db874c57Selowe } else if (PP_PR_REQ(pp)) { 19287c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, failing); 19297c478bd9Sstevel@tonic-gate } 19307c478bd9Sstevel@tonic-gate /* 19317c478bd9Sstevel@tonic-gate * In certain cases below, special exceptions 19327c478bd9Sstevel@tonic-gate * are made for pages that are toxic. This 19337c478bd9Sstevel@tonic-gate * is because the current meaning of toxic 19347c478bd9Sstevel@tonic-gate * is that an uncorrectable error has been 19357c478bd9Sstevel@tonic-gate * previously associated with the page. 19367c478bd9Sstevel@tonic-gate */ 19377c478bd9Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 1938db874c57Selowe if (!PP_TOXIC(pp)) { 19397c478bd9Sstevel@tonic-gate /* 19407c478bd9Sstevel@tonic-gate * Must relocate locked in 19417c478bd9Sstevel@tonic-gate * memory pages. 19427c478bd9Sstevel@tonic-gate */ 19437c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19447c478bd9Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 19457c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19467c478bd9Sstevel@tonic-gate /* 19477c478bd9Sstevel@tonic-gate * Lock all constituent pages 19487c478bd9Sstevel@tonic-gate * of a large page to ensure 19497c478bd9Sstevel@tonic-gate * that p_szc won't change. 19507c478bd9Sstevel@tonic-gate */ 19517c478bd9Sstevel@tonic-gate if (!group_page_trylock(pp, 19527c478bd9Sstevel@tonic-gate SE_EXCL)) { 19537c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, 19547c478bd9Sstevel@tonic-gate gptllckfail); 19557c478bd9Sstevel@tonic-gate page_unlock(pp); 19567c478bd9Sstevel@tonic-gate mutex_enter( 19577c478bd9Sstevel@tonic-gate &mhp->mh_mutex); 19587c478bd9Sstevel@tonic-gate continue; 19597c478bd9Sstevel@tonic-gate } 19607c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, npplocked); 19617c478bd9Sstevel@tonic-gate pp_targ = 19627c478bd9Sstevel@tonic-gate page_get_replacement_page( 19637c478bd9Sstevel@tonic-gate pp, NULL, 0); 19647c478bd9Sstevel@tonic-gate if (pp_targ != NULL) { 19657c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19667c478bd9Sstevel@tonic-gate ntick_pgrp = 19677c478bd9Sstevel@tonic-gate (uint64_t) 19687c478bd9Sstevel@tonic-gate ddi_get_lbolt() - 19697c478bd9Sstevel@tonic-gate start_pgrp; 19707c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19717c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, 19727c478bd9Sstevel@tonic-gate ntick_pgrp); 19737c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, 19747c478bd9Sstevel@tonic-gate nlockreloc); 19757c478bd9Sstevel@tonic-gate goto reloc; 19767c478bd9Sstevel@tonic-gate } 19777c478bd9Sstevel@tonic-gate group_page_unlock(pp); 19787c478bd9Sstevel@tonic-gate page_unlock(pp); 19797c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19807c478bd9Sstevel@tonic-gate ntick_pgrp = 19817c478bd9Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() - 19827c478bd9Sstevel@tonic-gate start_pgrp; 19837c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19847c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 19857c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nnorepl); 19867c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19877c478bd9Sstevel@tonic-gate continue; 19887c478bd9Sstevel@tonic-gate } else { 19897c478bd9Sstevel@tonic-gate /* 19907c478bd9Sstevel@tonic-gate * Cannot do anything about 19917c478bd9Sstevel@tonic-gate * this page because it is 19927c478bd9Sstevel@tonic-gate * toxic. 19937c478bd9Sstevel@tonic-gate */ 19947c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, npplkdtoxic); 19957c478bd9Sstevel@tonic-gate page_unlock(pp); 19967c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19977c478bd9Sstevel@tonic-gate continue; 19987c478bd9Sstevel@tonic-gate } 19997c478bd9Sstevel@tonic-gate } 20007c478bd9Sstevel@tonic-gate /* 20017c478bd9Sstevel@tonic-gate * Unload the mappings and check if mod bit 20027c478bd9Sstevel@tonic-gate * is set. 20037c478bd9Sstevel@tonic-gate */ 2004ad23a2dbSjohansen ASSERT(!PP_ISKAS(pp)); 20057c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 20067c478bd9Sstevel@tonic-gate mod = hat_ismod(pp); 20077c478bd9Sstevel@tonic-gate 20087c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20097c478bd9Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 20107c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 2011db874c57Selowe if (mod && !PP_TOXIC(pp)) { 20127c478bd9Sstevel@tonic-gate /* 20137c478bd9Sstevel@tonic-gate * Lock all constituent pages 20147c478bd9Sstevel@tonic-gate * of a large page to ensure 20157c478bd9Sstevel@tonic-gate * that p_szc won't change. 20167c478bd9Sstevel@tonic-gate */ 20177c478bd9Sstevel@tonic-gate if (!group_page_trylock(pp, SE_EXCL)) { 20187c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, gptlmodfail); 20197c478bd9Sstevel@tonic-gate page_unlock(pp); 20207c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20217c478bd9Sstevel@tonic-gate continue; 20227c478bd9Sstevel@tonic-gate } 20237c478bd9Sstevel@tonic-gate pp_targ = page_get_replacement_page(pp, 20247c478bd9Sstevel@tonic-gate NULL, 0); 20257c478bd9Sstevel@tonic-gate if (pp_targ != NULL) { 20267c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nmodreloc); 20277c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20287c478bd9Sstevel@tonic-gate ntick_pgrp = 20297c478bd9Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() - 20307c478bd9Sstevel@tonic-gate start_pgrp; 20317c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20327c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20337c478bd9Sstevel@tonic-gate goto reloc; 20347c478bd9Sstevel@tonic-gate } 20357c478bd9Sstevel@tonic-gate group_page_unlock(pp); 20367c478bd9Sstevel@tonic-gate } 20377c478bd9Sstevel@tonic-gate 20387c478bd9Sstevel@tonic-gate if (!page_try_demote_pages(pp)) { 20397c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, demotefail); 20407c478bd9Sstevel@tonic-gate page_unlock(pp); 20417c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20427c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20437c478bd9Sstevel@tonic-gate start_pgrp; 20447c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20457c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20467c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20477c478bd9Sstevel@tonic-gate continue; 20487c478bd9Sstevel@tonic-gate } 20497c478bd9Sstevel@tonic-gate 20507c478bd9Sstevel@tonic-gate /* 20517c478bd9Sstevel@tonic-gate * Regular 'page-out'. 20527c478bd9Sstevel@tonic-gate */ 20537c478bd9Sstevel@tonic-gate if (!mod) { 20547c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, ndestroy); 20557c478bd9Sstevel@tonic-gate page_destroy(pp, 1); 20567c478bd9Sstevel@tonic-gate /* 20577c478bd9Sstevel@tonic-gate * page_destroy was called with 20587c478bd9Sstevel@tonic-gate * dontfree. As long as p_lckcnt 20597c478bd9Sstevel@tonic-gate * and p_cowcnt are both zero, the 20607c478bd9Sstevel@tonic-gate * only additional action of 20617c478bd9Sstevel@tonic-gate * page_destroy with !dontfree is to 20627c478bd9Sstevel@tonic-gate * call page_free, so we can collect 20637c478bd9Sstevel@tonic-gate * the page here. 20647c478bd9Sstevel@tonic-gate */ 20657c478bd9Sstevel@tonic-gate collected++; 20667c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20677c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20687c478bd9Sstevel@tonic-gate start_pgrp; 20697c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20707c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20717c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20727c478bd9Sstevel@tonic-gate page_delete_collect(pp, mhp); 20737c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 20747c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW)); 20757c478bd9Sstevel@tonic-gate continue; 20767c478bd9Sstevel@tonic-gate } 20777c478bd9Sstevel@tonic-gate /* 20787c478bd9Sstevel@tonic-gate * The page is toxic and the mod bit is 20797c478bd9Sstevel@tonic-gate * set, we cannot do anything here to deal 20807c478bd9Sstevel@tonic-gate * with it. 20817c478bd9Sstevel@tonic-gate */ 2082db874c57Selowe if (PP_TOXIC(pp)) { 20837c478bd9Sstevel@tonic-gate page_unlock(pp); 20847c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20857c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20867c478bd9Sstevel@tonic-gate start_pgrp; 20877c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20887c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20897c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, modtoxic); 20907c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20917c478bd9Sstevel@tonic-gate continue; 20927c478bd9Sstevel@tonic-gate } 20937c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nputpage); 20947c478bd9Sstevel@tonic-gate vp = pp->p_vnode; 20957c478bd9Sstevel@tonic-gate offset = pp->p_offset; 20967c478bd9Sstevel@tonic-gate VN_HOLD(vp); 20977c478bd9Sstevel@tonic-gate page_unlock(pp); 20987c478bd9Sstevel@tonic-gate (void) VOP_PUTPAGE(vp, offset, PAGESIZE, 2099da6c28aaSamw B_INVAL|B_FORCE, kcred, NULL); 21007c478bd9Sstevel@tonic-gate VN_RELE(vp); 21017c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 21027c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 21037c478bd9Sstevel@tonic-gate start_pgrp; 21047c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 21057c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 21067c478bd9Sstevel@tonic-gate /* 21077c478bd9Sstevel@tonic-gate * Try to get the page back immediately 21087c478bd9Sstevel@tonic-gate * so that it can be collected. 21097c478bd9Sstevel@tonic-gate */ 21107c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 21117c478bd9Sstevel@tonic-gate if (pp == NULL) { 21127c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim); 21137c478bd9Sstevel@tonic-gate /* 21147c478bd9Sstevel@tonic-gate * This should not happen as this 21157c478bd9Sstevel@tonic-gate * thread is deleting the page. 21167c478bd9Sstevel@tonic-gate * If this code is generalized, this 21177c478bd9Sstevel@tonic-gate * becomes a reality. 21187c478bd9Sstevel@tonic-gate */ 21197c478bd9Sstevel@tonic-gate #ifdef DEBUG 21207c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 21217c478bd9Sstevel@tonic-gate "delete_memory_thread(0x%p) " 21227c478bd9Sstevel@tonic-gate "pfn 0x%lx has no page_t", 21237c478bd9Sstevel@tonic-gate (void *)mhp, pfn); 21247c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 21257c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 21267c478bd9Sstevel@tonic-gate continue; 21277c478bd9Sstevel@tonic-gate } 21287c478bd9Sstevel@tonic-gate if (page_try_reclaim_lock(pp, SE_EXCL, 2129db874c57Selowe SE_EXCL_WANTED | SE_RETIRED)) { 21307c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 21317c478bd9Sstevel@tonic-gate goto free_page_collect; 21327c478bd9Sstevel@tonic-gate } 21337c478bd9Sstevel@tonic-gate page_unlock(pp); 21347c478bd9Sstevel@tonic-gate } 21357c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim); 21367c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 21377c478bd9Sstevel@tonic-gate continue; 21387c478bd9Sstevel@tonic-gate 21397c478bd9Sstevel@tonic-gate reloc: 21407c478bd9Sstevel@tonic-gate /* 21417c478bd9Sstevel@tonic-gate * Got some freemem and a target 21427c478bd9Sstevel@tonic-gate * page, so move the data to avoid 21437c478bd9Sstevel@tonic-gate * I/O and lock problems. 21447c478bd9Sstevel@tonic-gate */ 21457c478bd9Sstevel@tonic-gate ASSERT(!page_iolock_assert(pp)); 21467c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nreloc); 21477c478bd9Sstevel@tonic-gate /* 21487c478bd9Sstevel@tonic-gate * page_relocate() will return pgcnt: the 21497c478bd9Sstevel@tonic-gate * number of consecutive pages relocated. 21507c478bd9Sstevel@tonic-gate * If it is successful, pp will be a 21517c478bd9Sstevel@tonic-gate * linked list of the page structs that 21527c478bd9Sstevel@tonic-gate * were relocated. If page_relocate() is 21537c478bd9Sstevel@tonic-gate * unsuccessful, pp will be unmodified. 21547c478bd9Sstevel@tonic-gate */ 21557c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 21567c478bd9Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 21577c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 21587c478bd9Sstevel@tonic-gate result = page_relocate(&pp, &pp_targ, 0, 0, 21597c478bd9Sstevel@tonic-gate &pgcnt, NULL); 21607c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 21617c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 21627c478bd9Sstevel@tonic-gate start_pgrp; 21637c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 21647c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 21657c478bd9Sstevel@tonic-gate if (result != 0) { 21667c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nrelocfail); 21677c478bd9Sstevel@tonic-gate /* 21687c478bd9Sstevel@tonic-gate * We did not succeed. We need 21697c478bd9Sstevel@tonic-gate * to give the pp_targ pages back. 21707c478bd9Sstevel@tonic-gate * page_free(pp_targ, 1) without 21717c478bd9Sstevel@tonic-gate * the freemem accounting. 21727c478bd9Sstevel@tonic-gate */ 21737c478bd9Sstevel@tonic-gate group_page_unlock(pp); 21747c478bd9Sstevel@tonic-gate page_free_replacement_page(pp_targ); 21757c478bd9Sstevel@tonic-gate page_unlock(pp); 21767c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 21777c478bd9Sstevel@tonic-gate continue; 21787c478bd9Sstevel@tonic-gate } 21797c478bd9Sstevel@tonic-gate 21807c478bd9Sstevel@tonic-gate /* 21817c478bd9Sstevel@tonic-gate * We will then collect pgcnt pages. 21827c478bd9Sstevel@tonic-gate */ 21837c478bd9Sstevel@tonic-gate ASSERT(pgcnt > 0); 21847c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 21857c478bd9Sstevel@tonic-gate /* 21867c478bd9Sstevel@tonic-gate * We need to make sure freemem_left is 21877c478bd9Sstevel@tonic-gate * large enough. 21887c478bd9Sstevel@tonic-gate */ 21897c478bd9Sstevel@tonic-gate while ((freemem_left < pgcnt) && 21907c478bd9Sstevel@tonic-gate (!mhp->mh_cancel)) { 21917c478bd9Sstevel@tonic-gate freemem_left += 21927c478bd9Sstevel@tonic-gate delthr_get_freemem(mhp); 21937c478bd9Sstevel@tonic-gate } 21947c478bd9Sstevel@tonic-gate 21957c478bd9Sstevel@tonic-gate /* 21967c478bd9Sstevel@tonic-gate * Do not proceed if mh_cancel is set. 21977c478bd9Sstevel@tonic-gate */ 21987c478bd9Sstevel@tonic-gate if (mhp->mh_cancel) { 21997c478bd9Sstevel@tonic-gate while (pp_targ != NULL) { 22007c478bd9Sstevel@tonic-gate /* 22017c478bd9Sstevel@tonic-gate * Unlink and unlock each page. 22027c478bd9Sstevel@tonic-gate */ 22037c478bd9Sstevel@tonic-gate tpp_targ = pp_targ; 22047c478bd9Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ); 22057c478bd9Sstevel@tonic-gate page_unlock(tpp_targ); 22067c478bd9Sstevel@tonic-gate } 22077c478bd9Sstevel@tonic-gate /* 22087c478bd9Sstevel@tonic-gate * We need to give the pp pages back. 22097c478bd9Sstevel@tonic-gate * page_free(pp, 1) without the 22107c478bd9Sstevel@tonic-gate * freemem accounting. 22117c478bd9Sstevel@tonic-gate */ 22127c478bd9Sstevel@tonic-gate page_free_replacement_page(pp); 22137c478bd9Sstevel@tonic-gate break; 22147c478bd9Sstevel@tonic-gate } 22157c478bd9Sstevel@tonic-gate 22167c478bd9Sstevel@tonic-gate /* Now remove pgcnt from freemem_left */ 22177c478bd9Sstevel@tonic-gate freemem_left -= pgcnt; 22187c478bd9Sstevel@tonic-gate ASSERT(freemem_left >= 0); 22197c478bd9Sstevel@tonic-gate szc = pp->p_szc; 22207c478bd9Sstevel@tonic-gate while (pp != NULL) { 22217c478bd9Sstevel@tonic-gate /* 22227c478bd9Sstevel@tonic-gate * pp and pp_targ were passed back as 22237c478bd9Sstevel@tonic-gate * a linked list of pages. 22247c478bd9Sstevel@tonic-gate * Unlink and unlock each page. 22257c478bd9Sstevel@tonic-gate */ 22267c478bd9Sstevel@tonic-gate tpp_targ = pp_targ; 22277c478bd9Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ); 22287c478bd9Sstevel@tonic-gate page_unlock(tpp_targ); 22297c478bd9Sstevel@tonic-gate /* 22307c478bd9Sstevel@tonic-gate * The original page is now free 22317c478bd9Sstevel@tonic-gate * so remove it from the linked 22327c478bd9Sstevel@tonic-gate * list and collect it. 22337c478bd9Sstevel@tonic-gate */ 22347c478bd9Sstevel@tonic-gate tpp = pp; 22357c478bd9Sstevel@tonic-gate page_sub(&pp, tpp); 22367c478bd9Sstevel@tonic-gate pfn = page_pptonum(tpp); 22377c478bd9Sstevel@tonic-gate collected++; 22387c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(tpp)); 22397c478bd9Sstevel@tonic-gate ASSERT(tpp->p_vnode == NULL); 22407c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(tpp)); 22417c478bd9Sstevel@tonic-gate ASSERT(tpp->p_szc == szc); 22427c478bd9Sstevel@tonic-gate tpp->p_szc = 0; 22437c478bd9Sstevel@tonic-gate page_delete_collect(tpp, mhp); 22447c478bd9Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 22457c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 22467c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW)); 22477c478bd9Sstevel@tonic-gate } 22487c478bd9Sstevel@tonic-gate ASSERT(pp_targ == NULL); 22497c478bd9Sstevel@tonic-gate } 22507c478bd9Sstevel@tonic-gate } 22517c478bd9Sstevel@tonic-gate first_scan = 0; 22527c478bd9Sstevel@tonic-gate if ((mhp->mh_cancel == 0) && (mhp->mh_hold_todo != 0) && 22537c478bd9Sstevel@tonic-gate (collected == 0)) { 22547c478bd9Sstevel@tonic-gate /* 22557c478bd9Sstevel@tonic-gate * This code is needed as we cannot wait 22567c478bd9Sstevel@tonic-gate * for a page to be locked OR the delete to 22577c478bd9Sstevel@tonic-gate * be cancelled. Also, we must delay so 22587c478bd9Sstevel@tonic-gate * that other threads get a chance to run 22597c478bd9Sstevel@tonic-gate * on our cpu, otherwise page locks may be 22607c478bd9Sstevel@tonic-gate * held indefinitely by those threads. 22617c478bd9Sstevel@tonic-gate */ 22627c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, ndelay); 22637c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 2264d3d50737SRafael Vanoni (void) cv_reltimedwait(&mhp->mh_cv, &mhp->mh_mutex, 2265d3d50737SRafael Vanoni DEL_BUSY_WAIT_TICKS, TR_CLOCK_TICK); 22667c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex); 22677c478bd9Sstevel@tonic-gate } 22687c478bd9Sstevel@tonic-gate } 22697c478bd9Sstevel@tonic-gate /* stop the dr aio cleanup thread */ 22707c478bd9Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 1; 22717c478bd9Sstevel@tonic-gate transit_list_collect(mhp, 0); 22727c478bd9Sstevel@tonic-gate if (freemem_left != 0) { 22737c478bd9Sstevel@tonic-gate /* Return any surplus. */ 22747c478bd9Sstevel@tonic-gate page_create_putback(freemem_left); 22757c478bd9Sstevel@tonic-gate freemem_left = 0; 22767c478bd9Sstevel@tonic-gate } 22777c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 22787c478bd9Sstevel@tonic-gate ntick_total = (uint64_t)ddi_get_lbolt() - start_total; 22797c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 22807c478bd9Sstevel@tonic-gate MDSTAT_TOTAL(mhp, ntick_total); 22817c478bd9Sstevel@tonic-gate MDSTAT_PRINT(mhp); 22827c478bd9Sstevel@tonic-gate 22837c478bd9Sstevel@tonic-gate /* 22847c478bd9Sstevel@tonic-gate * If the memory delete was cancelled, exclusive-wanted bits must 2285db874c57Selowe * be cleared. If there are retired pages being deleted, they need 2286db874c57Selowe * to be unretired. 22877c478bd9Sstevel@tonic-gate */ 22887c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 22897c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) { 22907c478bd9Sstevel@tonic-gate pfn_t pfn, p_end; 22917c478bd9Sstevel@tonic-gate 22927c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 22937c478bd9Sstevel@tonic-gate for (pfn = mdsp->mds_base; pfn < p_end; pfn++) { 22947c478bd9Sstevel@tonic-gate page_t *pp; 22957c478bd9Sstevel@tonic-gate pgcnt_t bit; 22967c478bd9Sstevel@tonic-gate 22977c478bd9Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 22987c478bd9Sstevel@tonic-gate if (mhp->mh_cancel) { 22997c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 23007c478bd9Sstevel@tonic-gate if (pp != NULL) { 23017c478bd9Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] & 23027c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW))) == 0) { 23037c478bd9Sstevel@tonic-gate page_lock_clr_exclwanted(pp); 23047c478bd9Sstevel@tonic-gate } 23057c478bd9Sstevel@tonic-gate } 23067c478bd9Sstevel@tonic-gate } else { 23077c478bd9Sstevel@tonic-gate pp = NULL; 23087c478bd9Sstevel@tonic-gate } 23097c478bd9Sstevel@tonic-gate if ((mdsp->mds_bitmap_retired[bit / NBPBMW] & 23107c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) { 23117c478bd9Sstevel@tonic-gate /* do we already have pp? */ 23127c478bd9Sstevel@tonic-gate if (pp == NULL) { 23137c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 23147c478bd9Sstevel@tonic-gate } 23157c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 2316db874c57Selowe ASSERT(PP_RETIRED(pp)); 23177c478bd9Sstevel@tonic-gate if (mhp->mh_cancel != 0) { 2318db874c57Selowe page_unlock(pp); 23197c478bd9Sstevel@tonic-gate /* 23207c478bd9Sstevel@tonic-gate * To satisfy ASSERT below in 23217c478bd9Sstevel@tonic-gate * cancel code. 23227c478bd9Sstevel@tonic-gate */ 23237c478bd9Sstevel@tonic-gate mhp->mh_hold_todo++; 23247c478bd9Sstevel@tonic-gate } else { 23258b464eb8Smec (void) page_unretire_pp(pp, 23268b464eb8Smec PR_UNR_CLEAN); 23277c478bd9Sstevel@tonic-gate } 23287c478bd9Sstevel@tonic-gate } 23297c478bd9Sstevel@tonic-gate } 23307c478bd9Sstevel@tonic-gate } 23317c478bd9Sstevel@tonic-gate /* 23327c478bd9Sstevel@tonic-gate * Free retired page bitmap and collected page bitmap 23337c478bd9Sstevel@tonic-gate */ 23347c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 23357c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) { 23367c478bd9Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap_retired != NULL); 23377c478bd9Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap_retired, MDS_BITMAPBYTES(mdsp)); 23387c478bd9Sstevel@tonic-gate mdsp->mds_bitmap_retired = NULL; /* Paranoia. */ 23397c478bd9Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap != NULL); 23407c478bd9Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap, MDS_BITMAPBYTES(mdsp)); 23417c478bd9Sstevel@tonic-gate mdsp->mds_bitmap = NULL; /* Paranoia. */ 23427c478bd9Sstevel@tonic-gate } 23437c478bd9Sstevel@tonic-gate 23447c478bd9Sstevel@tonic-gate /* wait for our dr aio cancel thread to exit */ 23457c478bd9Sstevel@tonic-gate while (!(mhp->mh_aio_cleanup_done)) { 23467c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 23477c478bd9Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY)); 23487c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex); 23497c478bd9Sstevel@tonic-gate } 23507c478bd9Sstevel@tonic-gate refused: 23517c478bd9Sstevel@tonic-gate if (mhp->mh_cancel != 0) { 23527c478bd9Sstevel@tonic-gate page_t *pp; 23537c478bd9Sstevel@tonic-gate 23547c478bd9Sstevel@tonic-gate comp_code = mhp->mh_cancel; 23557c478bd9Sstevel@tonic-gate /* 23567c478bd9Sstevel@tonic-gate * Go through list of deleted pages (mh_deleted) freeing 23577c478bd9Sstevel@tonic-gate * them. 23587c478bd9Sstevel@tonic-gate */ 23597c478bd9Sstevel@tonic-gate while ((pp = mhp->mh_deleted) != NULL) { 23607c478bd9Sstevel@tonic-gate mhp->mh_deleted = pp->p_next; 23617c478bd9Sstevel@tonic-gate mhp->mh_hold_todo++; 23627c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23637c478bd9Sstevel@tonic-gate /* Restore p_next. */ 23647c478bd9Sstevel@tonic-gate pp->p_next = pp->p_prev; 23657c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 23667c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, 23677c478bd9Sstevel@tonic-gate "page %p is free", 23687c478bd9Sstevel@tonic-gate (void *)pp); 23697c478bd9Sstevel@tonic-gate } 23707c478bd9Sstevel@tonic-gate page_free(pp, 1); 23717c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 23727c478bd9Sstevel@tonic-gate } 23737c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo == mhp->mh_vm_pages); 23747c478bd9Sstevel@tonic-gate 23757c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23767c478bd9Sstevel@tonic-gate put_availrmem(mhp->mh_vm_pages); 23777c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 23787c478bd9Sstevel@tonic-gate 23797c478bd9Sstevel@tonic-gate goto t_exit; 23807c478bd9Sstevel@tonic-gate } 23817c478bd9Sstevel@tonic-gate 23827c478bd9Sstevel@tonic-gate /* 23837c478bd9Sstevel@tonic-gate * All the pages are no longer in use and are exclusively locked. 23847c478bd9Sstevel@tonic-gate */ 23857c478bd9Sstevel@tonic-gate 23867c478bd9Sstevel@tonic-gate mhp->mh_deleted = NULL; 23877c478bd9Sstevel@tonic-gate 23887c478bd9Sstevel@tonic-gate kphysm_del_cleanup(mhp); 23897c478bd9Sstevel@tonic-gate 239073347c69Smb158278 /* 23919853d9e8SJason Beloro * mem_node_del_range needs to be after kphysm_del_cleanup so 239273347c69Smb158278 * that the mem_node_config[] will remain intact for the cleanup. 239373347c69Smb158278 */ 239473347c69Smb158278 for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 239573347c69Smb158278 mdsp = mdsp->mds_next) { 23969853d9e8SJason Beloro mem_node_del_range(mdsp->mds_base, 23979853d9e8SJason Beloro mdsp->mds_base + mdsp->mds_npgs - 1); 239873347c69Smb158278 } 2399af4c679fSSean McEnroe /* cleanup the page counters */ 2400af4c679fSSean McEnroe page_ctrs_cleanup(); 240173347c69Smb158278 24027c478bd9Sstevel@tonic-gate comp_code = KPHYSM_OK; 24037c478bd9Sstevel@tonic-gate 24047c478bd9Sstevel@tonic-gate t_exit: 24057c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24067c478bd9Sstevel@tonic-gate kphysm_setup_post_del(mhp->mh_vm_pages, 24077c478bd9Sstevel@tonic-gate (comp_code == KPHYSM_OK) ? 0 : 1); 24087c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 24097c478bd9Sstevel@tonic-gate 24107c478bd9Sstevel@tonic-gate early_exit: 24117c478bd9Sstevel@tonic-gate /* mhp->mh_mutex exited by CALLB_CPR_EXIT() */ 24127c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_DONE; 24137c478bd9Sstevel@tonic-gate del_complete_funcp = mhp->mh_delete_complete; 24147c478bd9Sstevel@tonic-gate del_complete_arg = mhp->mh_delete_complete_arg; 24157c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 24167c478bd9Sstevel@tonic-gate (*del_complete_funcp)(del_complete_arg, comp_code); 24177c478bd9Sstevel@tonic-gate thread_exit(); 24187c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 24197c478bd9Sstevel@tonic-gate } 24207c478bd9Sstevel@tonic-gate 24217c478bd9Sstevel@tonic-gate /* 24227c478bd9Sstevel@tonic-gate * Start the delete of the memory from the system. 24237c478bd9Sstevel@tonic-gate */ 24247c478bd9Sstevel@tonic-gate int 24257c478bd9Sstevel@tonic-gate kphysm_del_start( 24267c478bd9Sstevel@tonic-gate memhandle_t handle, 24277c478bd9Sstevel@tonic-gate void (*complete)(void *, int), 24287c478bd9Sstevel@tonic-gate void *complete_arg) 24297c478bd9Sstevel@tonic-gate { 24307c478bd9Sstevel@tonic-gate struct mem_handle *mhp; 24317c478bd9Sstevel@tonic-gate 24327c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 24337c478bd9Sstevel@tonic-gate if (mhp == NULL) { 24347c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE); 24357c478bd9Sstevel@tonic-gate } 24367c478bd9Sstevel@tonic-gate switch (mhp->mh_state) { 24377c478bd9Sstevel@tonic-gate case MHND_FREE: 24387c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE); 24397c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24407c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE); 24417c478bd9Sstevel@tonic-gate case MHND_INIT: 24427c478bd9Sstevel@tonic-gate break; 24437c478bd9Sstevel@tonic-gate case MHND_STARTING: 24447c478bd9Sstevel@tonic-gate case MHND_RUNNING: 24457c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24467c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 24477c478bd9Sstevel@tonic-gate case MHND_DONE: 24487c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24497c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 24507c478bd9Sstevel@tonic-gate case MHND_RELEASE: 24517c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24527c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 24537c478bd9Sstevel@tonic-gate default: 24547c478bd9Sstevel@tonic-gate #ifdef DEBUG 24557c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_start(0x%p) state corrupt %d", 24567c478bd9Sstevel@tonic-gate (void *)mhp, mhp->mh_state); 24577c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 24587c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24597c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE); 24607c478bd9Sstevel@tonic-gate } 24617c478bd9Sstevel@tonic-gate 24627c478bd9Sstevel@tonic-gate if (mhp->mh_transit.trl_spans == NULL) { 24637c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24647c478bd9Sstevel@tonic-gate return (KPHYSM_ENOWORK); 24657c478bd9Sstevel@tonic-gate } 24667c478bd9Sstevel@tonic-gate 24677c478bd9Sstevel@tonic-gate ASSERT(complete != NULL); 24687c478bd9Sstevel@tonic-gate mhp->mh_delete_complete = complete; 24697c478bd9Sstevel@tonic-gate mhp->mh_delete_complete_arg = complete_arg; 24707c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_STARTING; 24717c478bd9Sstevel@tonic-gate /* 24727c478bd9Sstevel@tonic-gate * Release the mutex in case thread_create sleeps. 24737c478bd9Sstevel@tonic-gate */ 24747c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24757c478bd9Sstevel@tonic-gate 24767c478bd9Sstevel@tonic-gate /* 24777c478bd9Sstevel@tonic-gate * The "obvious" process for this thread is pageout (proc_pageout) 24787c478bd9Sstevel@tonic-gate * but this gives the thread too much power over freemem 24797c478bd9Sstevel@tonic-gate * which results in freemem starvation. 24807c478bd9Sstevel@tonic-gate */ 24817c478bd9Sstevel@tonic-gate (void) thread_create(NULL, 0, delete_memory_thread, mhp, 0, &p0, 24827c478bd9Sstevel@tonic-gate TS_RUN, maxclsyspri - 1); 24837c478bd9Sstevel@tonic-gate 24847c478bd9Sstevel@tonic-gate return (KPHYSM_OK); 24857c478bd9Sstevel@tonic-gate } 24867c478bd9Sstevel@tonic-gate 24877c478bd9Sstevel@tonic-gate static kmutex_t pp_dummy_lock; /* Protects init. of pp_dummy. */ 24887c478bd9Sstevel@tonic-gate static caddr_t pp_dummy; 24897c478bd9Sstevel@tonic-gate static pgcnt_t pp_dummy_npages; 24907c478bd9Sstevel@tonic-gate static pfn_t *pp_dummy_pfn; /* Array of dummy pfns. */ 24917c478bd9Sstevel@tonic-gate 24927c478bd9Sstevel@tonic-gate static void 24937c478bd9Sstevel@tonic-gate memseg_remap_init_pages(page_t *pages, page_t *epages) 24947c478bd9Sstevel@tonic-gate { 24957c478bd9Sstevel@tonic-gate page_t *pp; 24967c478bd9Sstevel@tonic-gate 24977c478bd9Sstevel@tonic-gate for (pp = pages; pp < epages; pp++) { 24987c478bd9Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */ 24997c478bd9Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 25007c478bd9Sstevel@tonic-gate page_iolock_init(pp); 25017c478bd9Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM)) 25027c478bd9Sstevel@tonic-gate continue; 25037c478bd9Sstevel@tonic-gate page_lock_delete(pp); 25047c478bd9Sstevel@tonic-gate } 25057c478bd9Sstevel@tonic-gate } 25067c478bd9Sstevel@tonic-gate 25077c478bd9Sstevel@tonic-gate void 25087c478bd9Sstevel@tonic-gate memseg_remap_init() 25097c478bd9Sstevel@tonic-gate { 25107c478bd9Sstevel@tonic-gate mutex_enter(&pp_dummy_lock); 25117c478bd9Sstevel@tonic-gate if (pp_dummy == NULL) { 25127c478bd9Sstevel@tonic-gate uint_t dpages; 25137c478bd9Sstevel@tonic-gate int i; 25147c478bd9Sstevel@tonic-gate 25157c478bd9Sstevel@tonic-gate /* 25167c478bd9Sstevel@tonic-gate * dpages starts off as the size of the structure and 25177c478bd9Sstevel@tonic-gate * ends up as the minimum number of pages that will 25187c478bd9Sstevel@tonic-gate * hold a whole number of page_t structures. 25197c478bd9Sstevel@tonic-gate */ 25207c478bd9Sstevel@tonic-gate dpages = sizeof (page_t); 25217c478bd9Sstevel@tonic-gate ASSERT(dpages != 0); 25227c478bd9Sstevel@tonic-gate ASSERT(dpages <= MMU_PAGESIZE); 25237c478bd9Sstevel@tonic-gate 25247c478bd9Sstevel@tonic-gate while ((dpages & 1) == 0) 25257c478bd9Sstevel@tonic-gate dpages >>= 1; 25267c478bd9Sstevel@tonic-gate 25277c478bd9Sstevel@tonic-gate pp_dummy_npages = dpages; 25287c478bd9Sstevel@tonic-gate /* 25297c478bd9Sstevel@tonic-gate * Allocate pp_dummy pages directly from static_arena, 25307c478bd9Sstevel@tonic-gate * since these are whole page allocations and are 25317c478bd9Sstevel@tonic-gate * referenced by physical address. This also has the 25327c478bd9Sstevel@tonic-gate * nice fringe benefit of hiding the memory from 25337c478bd9Sstevel@tonic-gate * ::findleaks since it doesn't deal well with allocated 25347c478bd9Sstevel@tonic-gate * kernel heap memory that doesn't have any mappings. 25357c478bd9Sstevel@tonic-gate */ 25367c478bd9Sstevel@tonic-gate pp_dummy = vmem_xalloc(static_arena, ptob(pp_dummy_npages), 25377c478bd9Sstevel@tonic-gate PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP); 25387c478bd9Sstevel@tonic-gate bzero(pp_dummy, ptob(pp_dummy_npages)); 25397c478bd9Sstevel@tonic-gate ASSERT(((uintptr_t)pp_dummy & MMU_PAGEOFFSET) == 0); 25407c478bd9Sstevel@tonic-gate pp_dummy_pfn = kmem_alloc(sizeof (*pp_dummy_pfn) * 25417c478bd9Sstevel@tonic-gate pp_dummy_npages, KM_SLEEP); 25427c478bd9Sstevel@tonic-gate for (i = 0; i < pp_dummy_npages; i++) { 25437c478bd9Sstevel@tonic-gate pp_dummy_pfn[i] = hat_getpfnum(kas.a_hat, 25447c478bd9Sstevel@tonic-gate &pp_dummy[MMU_PAGESIZE * i]); 25457c478bd9Sstevel@tonic-gate ASSERT(pp_dummy_pfn[i] != PFN_INVALID); 25467c478bd9Sstevel@tonic-gate } 25477c478bd9Sstevel@tonic-gate /* 25487c478bd9Sstevel@tonic-gate * Initialize the page_t's to a known 'deleted' state 25497c478bd9Sstevel@tonic-gate * that matches the state of deleted pages. 25507c478bd9Sstevel@tonic-gate */ 25517c478bd9Sstevel@tonic-gate memseg_remap_init_pages((page_t *)pp_dummy, 255273347c69Smb158278 (page_t *)(pp_dummy + ptob(pp_dummy_npages))); 25537c478bd9Sstevel@tonic-gate /* Remove kmem mappings for the pages for safety. */ 25547c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, pp_dummy, ptob(pp_dummy_npages), 25557c478bd9Sstevel@tonic-gate HAT_UNLOAD_UNLOCK); 25567c478bd9Sstevel@tonic-gate /* Leave pp_dummy pointer set as flag that init is done. */ 25577c478bd9Sstevel@tonic-gate } 25587c478bd9Sstevel@tonic-gate mutex_exit(&pp_dummy_lock); 25597c478bd9Sstevel@tonic-gate } 25607c478bd9Sstevel@tonic-gate 25619853d9e8SJason Beloro /* 25629853d9e8SJason Beloro * Remap a page-aglined range of page_t's to dummy pages. 25639853d9e8SJason Beloro */ 25649853d9e8SJason Beloro void 25659853d9e8SJason Beloro remap_to_dummy(caddr_t va, pgcnt_t metapgs) 25667c478bd9Sstevel@tonic-gate { 25679853d9e8SJason Beloro int phase; 25689853d9e8SJason Beloro 2569a3114836SGerry Liu ASSERT(IS_P2ALIGNED((uint64_t)(uintptr_t)va, PAGESIZE)); 25709853d9e8SJason Beloro 25719853d9e8SJason Beloro /* 25729853d9e8SJason Beloro * We may start remapping at a non-zero page offset 25739853d9e8SJason Beloro * within the dummy pages since the low/high ends 25749853d9e8SJason Beloro * of the outgoing pp's could be shared by other 25759853d9e8SJason Beloro * memsegs (see memseg_remap_meta). 25769853d9e8SJason Beloro */ 2577a3114836SGerry Liu phase = btop((uint64_t)(uintptr_t)va) % pp_dummy_npages; 2578a3114836SGerry Liu /*CONSTCOND*/ 25799853d9e8SJason Beloro ASSERT(PAGESIZE % sizeof (page_t) || phase == 0); 25807c478bd9Sstevel@tonic-gate 25817c478bd9Sstevel@tonic-gate while (metapgs != 0) { 25827c478bd9Sstevel@tonic-gate pgcnt_t n; 25839853d9e8SJason Beloro int i, j; 25847c478bd9Sstevel@tonic-gate 25857c478bd9Sstevel@tonic-gate n = pp_dummy_npages; 25867c478bd9Sstevel@tonic-gate if (n > metapgs) 25877c478bd9Sstevel@tonic-gate n = metapgs; 25887c478bd9Sstevel@tonic-gate for (i = 0; i < n; i++) { 25899853d9e8SJason Beloro j = (i + phase) % pp_dummy_npages; 25909853d9e8SJason Beloro hat_devload(kas.a_hat, va, ptob(1), pp_dummy_pfn[j], 25917c478bd9Sstevel@tonic-gate PROT_READ, 25927c478bd9Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_NOCONSIST | 25937c478bd9Sstevel@tonic-gate HAT_LOAD_REMAP); 25949853d9e8SJason Beloro va += ptob(1); 25957c478bd9Sstevel@tonic-gate } 25967c478bd9Sstevel@tonic-gate metapgs -= n; 25977c478bd9Sstevel@tonic-gate } 25987c478bd9Sstevel@tonic-gate } 25997c478bd9Sstevel@tonic-gate 26009853d9e8SJason Beloro static void 26019853d9e8SJason Beloro memseg_remap_to_dummy(struct memseg *seg) 26029853d9e8SJason Beloro { 26039853d9e8SJason Beloro caddr_t pp; 26049853d9e8SJason Beloro pgcnt_t metapgs; 26059853d9e8SJason Beloro 26069853d9e8SJason Beloro ASSERT(memseg_is_dynamic(seg)); 26079853d9e8SJason Beloro ASSERT(pp_dummy != NULL); 26089853d9e8SJason Beloro 26099853d9e8SJason Beloro 26109853d9e8SJason Beloro if (!memseg_includes_meta(seg)) { 26119853d9e8SJason Beloro memseg_remap_meta(seg); 26129853d9e8SJason Beloro return; 26139853d9e8SJason Beloro } 26149853d9e8SJason Beloro 26159853d9e8SJason Beloro pp = (caddr_t)seg->pages; 26169853d9e8SJason Beloro metapgs = seg->pages_base - memseg_get_start(seg); 26179853d9e8SJason Beloro ASSERT(metapgs != 0); 26189853d9e8SJason Beloro 26199853d9e8SJason Beloro seg->pages_end = seg->pages_base; 26209853d9e8SJason Beloro 26219853d9e8SJason Beloro remap_to_dummy(pp, metapgs); 26229853d9e8SJason Beloro } 26239853d9e8SJason Beloro 26247c478bd9Sstevel@tonic-gate /* 26257c478bd9Sstevel@tonic-gate * Transition all the deleted pages to the deleted state so that 26267c478bd9Sstevel@tonic-gate * page_lock will not wait. The page_lock_delete call will 26277c478bd9Sstevel@tonic-gate * also wake up any waiters. 26287c478bd9Sstevel@tonic-gate */ 26297c478bd9Sstevel@tonic-gate static void 26307c478bd9Sstevel@tonic-gate memseg_lock_delete_all(struct memseg *seg) 26317c478bd9Sstevel@tonic-gate { 26327c478bd9Sstevel@tonic-gate page_t *pp; 26337c478bd9Sstevel@tonic-gate 26347c478bd9Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 26357c478bd9Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */ 26367c478bd9Sstevel@tonic-gate page_lock_delete(pp); 26377c478bd9Sstevel@tonic-gate } 26387c478bd9Sstevel@tonic-gate } 26397c478bd9Sstevel@tonic-gate 26407c478bd9Sstevel@tonic-gate static void 26417c478bd9Sstevel@tonic-gate kphysm_del_cleanup(struct mem_handle *mhp) 26427c478bd9Sstevel@tonic-gate { 26437c478bd9Sstevel@tonic-gate struct memdelspan *mdsp; 26447c478bd9Sstevel@tonic-gate struct memseg *seg; 26457c478bd9Sstevel@tonic-gate struct memseg **segpp; 26467c478bd9Sstevel@tonic-gate struct memseg *seglist; 26477c478bd9Sstevel@tonic-gate pfn_t p_end; 26487c478bd9Sstevel@tonic-gate uint64_t avmem; 26497c478bd9Sstevel@tonic-gate pgcnt_t avpgs; 26507c478bd9Sstevel@tonic-gate pgcnt_t npgs; 26517c478bd9Sstevel@tonic-gate 26527c478bd9Sstevel@tonic-gate avpgs = mhp->mh_vm_pages; 26537c478bd9Sstevel@tonic-gate 26547c478bd9Sstevel@tonic-gate memsegs_lock(1); 26557c478bd9Sstevel@tonic-gate 26567c478bd9Sstevel@tonic-gate /* 26577c478bd9Sstevel@tonic-gate * remove from main segment list. 26587c478bd9Sstevel@tonic-gate */ 26597c478bd9Sstevel@tonic-gate npgs = 0; 26607c478bd9Sstevel@tonic-gate seglist = NULL; 26617c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 26627c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) { 26637c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 26647c478bd9Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL; ) { 26657c478bd9Sstevel@tonic-gate if (seg->pages_base >= p_end || 26667c478bd9Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) { 26677c478bd9Sstevel@tonic-gate /* Span and memseg don't overlap. */ 26687c478bd9Sstevel@tonic-gate segpp = &((*segpp)->next); 26697c478bd9Sstevel@tonic-gate continue; 26707c478bd9Sstevel@tonic-gate } 26717c478bd9Sstevel@tonic-gate ASSERT(seg->pages_base >= mdsp->mds_base); 26727c478bd9Sstevel@tonic-gate ASSERT(seg->pages_end <= p_end); 26737c478bd9Sstevel@tonic-gate 2674e21bae1bSkchow PLCNT_MODIFY_MAX(seg->pages_base, 2675e21bae1bSkchow seg->pages_base - seg->pages_end); 2676e21bae1bSkchow 26777c478bd9Sstevel@tonic-gate /* Hide the memseg from future scans. */ 26787c478bd9Sstevel@tonic-gate hat_kpm_delmem_mseg_update(seg, segpp); 26797c478bd9Sstevel@tonic-gate *segpp = seg->next; 26807c478bd9Sstevel@tonic-gate membar_producer(); /* TODO: Needed? */ 26817c478bd9Sstevel@tonic-gate npgs += MSEG_NPAGES(seg); 26827c478bd9Sstevel@tonic-gate 26837c478bd9Sstevel@tonic-gate /* 26847c478bd9Sstevel@tonic-gate * Leave the deleted segment's next pointer intact 26857c478bd9Sstevel@tonic-gate * in case a memsegs scanning loop is walking this 26867c478bd9Sstevel@tonic-gate * segment concurrently. 26877c478bd9Sstevel@tonic-gate */ 26887c478bd9Sstevel@tonic-gate seg->lnext = seglist; 26897c478bd9Sstevel@tonic-gate seglist = seg; 26907c478bd9Sstevel@tonic-gate } 26917c478bd9Sstevel@tonic-gate } 26927c478bd9Sstevel@tonic-gate 26937c478bd9Sstevel@tonic-gate build_pfn_hash(); 26947c478bd9Sstevel@tonic-gate 26957c478bd9Sstevel@tonic-gate ASSERT(npgs < total_pages); 26967c478bd9Sstevel@tonic-gate total_pages -= npgs; 26977c478bd9Sstevel@tonic-gate 26987c478bd9Sstevel@tonic-gate /* 26997c478bd9Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed. 27007c478bd9Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use. 27017c478bd9Sstevel@tonic-gate */ 27027c478bd9Sstevel@tonic-gate setupclock(1); 27037c478bd9Sstevel@tonic-gate 27047c478bd9Sstevel@tonic-gate memsegs_unlock(1); 27057c478bd9Sstevel@tonic-gate 27067c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 27077c478bd9Sstevel@tonic-gate 27087c478bd9Sstevel@tonic-gate while ((seg = seglist) != NULL) { 27097c478bd9Sstevel@tonic-gate pfn_t mseg_start; 27107c478bd9Sstevel@tonic-gate pfn_t mseg_base, mseg_end; 27117c478bd9Sstevel@tonic-gate pgcnt_t mseg_npgs; 27127c478bd9Sstevel@tonic-gate int mlret; 27137c478bd9Sstevel@tonic-gate 27147c478bd9Sstevel@tonic-gate seglist = seg->lnext; 27157c478bd9Sstevel@tonic-gate 27167c478bd9Sstevel@tonic-gate /* 27177c478bd9Sstevel@tonic-gate * Put the page_t's into the deleted state to stop 27187c478bd9Sstevel@tonic-gate * cv_wait()s on the pages. When we remap, the dummy 27197c478bd9Sstevel@tonic-gate * page_t's will be in the same state. 27207c478bd9Sstevel@tonic-gate */ 27217c478bd9Sstevel@tonic-gate memseg_lock_delete_all(seg); 27227c478bd9Sstevel@tonic-gate /* 27237c478bd9Sstevel@tonic-gate * Collect up information based on pages_base and pages_end 27247c478bd9Sstevel@tonic-gate * early so that we can flag early that the memseg has been 27257c478bd9Sstevel@tonic-gate * deleted by setting pages_end == pages_base. 27267c478bd9Sstevel@tonic-gate */ 27277c478bd9Sstevel@tonic-gate mseg_base = seg->pages_base; 27287c478bd9Sstevel@tonic-gate mseg_end = seg->pages_end; 27297c478bd9Sstevel@tonic-gate mseg_npgs = MSEG_NPAGES(seg); 27309853d9e8SJason Beloro mseg_start = memseg_get_start(seg); 27317c478bd9Sstevel@tonic-gate 27329853d9e8SJason Beloro if (memseg_is_dynamic(seg)) { 27337c478bd9Sstevel@tonic-gate /* Remap the meta data to our special dummy area. */ 27349853d9e8SJason Beloro memseg_remap_to_dummy(seg); 27357c478bd9Sstevel@tonic-gate 27367c478bd9Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 27377c478bd9Sstevel@tonic-gate seg->lnext = memseg_va_avail; 27387c478bd9Sstevel@tonic-gate memseg_va_avail = seg; 27397c478bd9Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 27407c478bd9Sstevel@tonic-gate } else { 27417c478bd9Sstevel@tonic-gate /* 27427c478bd9Sstevel@tonic-gate * For memory whose page_ts were allocated 27437c478bd9Sstevel@tonic-gate * at boot, we need to find a new use for 27447c478bd9Sstevel@tonic-gate * the page_t memory. 27457c478bd9Sstevel@tonic-gate * For the moment, just leak it. 27467c478bd9Sstevel@tonic-gate * (It is held in the memseg_delete_junk list.) 27477c478bd9Sstevel@tonic-gate */ 27489853d9e8SJason Beloro seg->pages_end = seg->pages_base; 27497c478bd9Sstevel@tonic-gate 27507c478bd9Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 27517c478bd9Sstevel@tonic-gate seg->lnext = memseg_delete_junk; 27527c478bd9Sstevel@tonic-gate memseg_delete_junk = seg; 27537c478bd9Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 27547c478bd9Sstevel@tonic-gate } 27557c478bd9Sstevel@tonic-gate 27567c478bd9Sstevel@tonic-gate /* Must not use seg now as it could be re-used. */ 27577c478bd9Sstevel@tonic-gate 27587c478bd9Sstevel@tonic-gate memlist_write_lock(); 27597c478bd9Sstevel@tonic-gate 27607c478bd9Sstevel@tonic-gate mlret = memlist_delete_span( 27617c478bd9Sstevel@tonic-gate (uint64_t)(mseg_base) << PAGESHIFT, 27627c478bd9Sstevel@tonic-gate (uint64_t)(mseg_npgs) << PAGESHIFT, 27637c478bd9Sstevel@tonic-gate &phys_avail); 27647c478bd9Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 27657c478bd9Sstevel@tonic-gate 27667c478bd9Sstevel@tonic-gate mlret = memlist_delete_span( 27677c478bd9Sstevel@tonic-gate (uint64_t)(mseg_start) << PAGESHIFT, 27687c478bd9Sstevel@tonic-gate (uint64_t)(mseg_end - mseg_start) << 27697c478bd9Sstevel@tonic-gate PAGESHIFT, 27707c478bd9Sstevel@tonic-gate &phys_install); 27717c478bd9Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 27727c478bd9Sstevel@tonic-gate phys_install_has_changed(); 27737c478bd9Sstevel@tonic-gate 27747c478bd9Sstevel@tonic-gate memlist_write_unlock(); 27757c478bd9Sstevel@tonic-gate } 27767c478bd9Sstevel@tonic-gate 27777c478bd9Sstevel@tonic-gate memlist_read_lock(); 27787c478bd9Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 27797c478bd9Sstevel@tonic-gate memlist_read_unlock(); 27807c478bd9Sstevel@tonic-gate 27817c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 27827c478bd9Sstevel@tonic-gate maxmem -= avpgs; 27837c478bd9Sstevel@tonic-gate physmem -= avpgs; 27847c478bd9Sstevel@tonic-gate /* availrmem is adjusted during the delete. */ 27857c478bd9Sstevel@tonic-gate availrmem_initial -= avpgs; 27867c478bd9Sstevel@tonic-gate 27877c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 27887c478bd9Sstevel@tonic-gate 27897c478bd9Sstevel@tonic-gate dump_resize(); 27907c478bd9Sstevel@tonic-gate 27917c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: mem = %ldK " 27927c478bd9Sstevel@tonic-gate "(0x%" PRIx64 ")\n", 27937c478bd9Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10), 27947c478bd9Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT); 27957c478bd9Sstevel@tonic-gate 27967c478bd9Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT; 27977c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: " 27987c478bd9Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem); 27997c478bd9Sstevel@tonic-gate 28007c478bd9Sstevel@tonic-gate /* 28017c478bd9Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems 28027c478bd9Sstevel@tonic-gate */ 28037c478bd9Sstevel@tonic-gate if (nlgrps == 1) 28047c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0); 28057c478bd9Sstevel@tonic-gate 28067c478bd9Sstevel@tonic-gate /* Successfully deleted system memory */ 28077c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 28087c478bd9Sstevel@tonic-gate } 28097c478bd9Sstevel@tonic-gate 28107c478bd9Sstevel@tonic-gate static uint_t mdel_nullvp_waiter; 28117c478bd9Sstevel@tonic-gate 28127c478bd9Sstevel@tonic-gate static void 28137c478bd9Sstevel@tonic-gate page_delete_collect( 28147c478bd9Sstevel@tonic-gate page_t *pp, 28157c478bd9Sstevel@tonic-gate struct mem_handle *mhp) 28167c478bd9Sstevel@tonic-gate { 28177c478bd9Sstevel@tonic-gate if (pp->p_vnode) { 28187c478bd9Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL); 28197c478bd9Sstevel@tonic-gate /* do not do PP_SETAGED(pp); */ 28207c478bd9Sstevel@tonic-gate } else { 28217c478bd9Sstevel@tonic-gate kmutex_t *sep; 28227c478bd9Sstevel@tonic-gate 28237c478bd9Sstevel@tonic-gate sep = page_se_mutex(pp); 28247c478bd9Sstevel@tonic-gate mutex_enter(sep); 28257c478bd9Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) { 28267c478bd9Sstevel@tonic-gate mdel_nullvp_waiter++; 28277c478bd9Sstevel@tonic-gate cv_broadcast(&pp->p_cv); 28287c478bd9Sstevel@tonic-gate } 28297c478bd9Sstevel@tonic-gate mutex_exit(sep); 28307c478bd9Sstevel@tonic-gate } 28317c478bd9Sstevel@tonic-gate ASSERT(pp->p_next == pp->p_prev); 28327c478bd9Sstevel@tonic-gate ASSERT(pp->p_next == NULL || pp->p_next == pp); 28337c478bd9Sstevel@tonic-gate pp->p_next = mhp->mh_deleted; 28347c478bd9Sstevel@tonic-gate mhp->mh_deleted = pp; 28357c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo != 0); 28367c478bd9Sstevel@tonic-gate mhp->mh_hold_todo--; 28377c478bd9Sstevel@tonic-gate } 28387c478bd9Sstevel@tonic-gate 28397c478bd9Sstevel@tonic-gate static void 28407c478bd9Sstevel@tonic-gate transit_list_collect(struct mem_handle *mhp, int v) 28417c478bd9Sstevel@tonic-gate { 28427c478bd9Sstevel@tonic-gate struct transit_list_head *trh; 28437c478bd9Sstevel@tonic-gate 28447c478bd9Sstevel@tonic-gate trh = &transit_list_head; 28457c478bd9Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 28467c478bd9Sstevel@tonic-gate mhp->mh_transit.trl_collect = v; 28477c478bd9Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 28487c478bd9Sstevel@tonic-gate } 28497c478bd9Sstevel@tonic-gate 28507c478bd9Sstevel@tonic-gate static void 28517c478bd9Sstevel@tonic-gate transit_list_insert(struct transit_list *tlp) 28527c478bd9Sstevel@tonic-gate { 28537c478bd9Sstevel@tonic-gate struct transit_list_head *trh; 28547c478bd9Sstevel@tonic-gate 28557c478bd9Sstevel@tonic-gate trh = &transit_list_head; 28567c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock)); 28577c478bd9Sstevel@tonic-gate tlp->trl_next = trh->trh_head; 28587c478bd9Sstevel@tonic-gate trh->trh_head = tlp; 28597c478bd9Sstevel@tonic-gate } 28607c478bd9Sstevel@tonic-gate 28617c478bd9Sstevel@tonic-gate static void 28627c478bd9Sstevel@tonic-gate transit_list_remove(struct transit_list *tlp) 28637c478bd9Sstevel@tonic-gate { 28647c478bd9Sstevel@tonic-gate struct transit_list_head *trh; 28657c478bd9Sstevel@tonic-gate struct transit_list **tlpp; 28667c478bd9Sstevel@tonic-gate 28677c478bd9Sstevel@tonic-gate trh = &transit_list_head; 28687c478bd9Sstevel@tonic-gate tlpp = &trh->trh_head; 28697c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock)); 28707c478bd9Sstevel@tonic-gate while (*tlpp != NULL && *tlpp != tlp) 28717c478bd9Sstevel@tonic-gate tlpp = &(*tlpp)->trl_next; 28727c478bd9Sstevel@tonic-gate ASSERT(*tlpp != NULL); 28737c478bd9Sstevel@tonic-gate if (*tlpp == tlp) 28747c478bd9Sstevel@tonic-gate *tlpp = tlp->trl_next; 28757c478bd9Sstevel@tonic-gate tlp->trl_next = NULL; 28767c478bd9Sstevel@tonic-gate } 28777c478bd9Sstevel@tonic-gate 28787c478bd9Sstevel@tonic-gate static struct transit_list * 28797c478bd9Sstevel@tonic-gate pfnum_to_transit_list(struct transit_list_head *trh, pfn_t pfnum) 28807c478bd9Sstevel@tonic-gate { 28817c478bd9Sstevel@tonic-gate struct transit_list *tlp; 28827c478bd9Sstevel@tonic-gate 28837c478bd9Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) { 28847c478bd9Sstevel@tonic-gate struct memdelspan *mdsp; 28857c478bd9Sstevel@tonic-gate 28867c478bd9Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL; 28877c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) { 28887c478bd9Sstevel@tonic-gate if (pfnum >= mdsp->mds_base && 28897c478bd9Sstevel@tonic-gate pfnum < (mdsp->mds_base + mdsp->mds_npgs)) { 28907c478bd9Sstevel@tonic-gate return (tlp); 28917c478bd9Sstevel@tonic-gate } 28927c478bd9Sstevel@tonic-gate } 28937c478bd9Sstevel@tonic-gate } 28947c478bd9Sstevel@tonic-gate return (NULL); 28957c478bd9Sstevel@tonic-gate } 28967c478bd9Sstevel@tonic-gate 28977c478bd9Sstevel@tonic-gate int 28987c478bd9Sstevel@tonic-gate pfn_is_being_deleted(pfn_t pfnum) 28997c478bd9Sstevel@tonic-gate { 29007c478bd9Sstevel@tonic-gate struct transit_list_head *trh; 29017c478bd9Sstevel@tonic-gate struct transit_list *tlp; 29027c478bd9Sstevel@tonic-gate int ret; 29037c478bd9Sstevel@tonic-gate 29047c478bd9Sstevel@tonic-gate trh = &transit_list_head; 29057c478bd9Sstevel@tonic-gate if (trh->trh_head == NULL) 29067c478bd9Sstevel@tonic-gate return (0); 29077c478bd9Sstevel@tonic-gate 29087c478bd9Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 29097c478bd9Sstevel@tonic-gate tlp = pfnum_to_transit_list(trh, pfnum); 29107c478bd9Sstevel@tonic-gate ret = (tlp != NULL && tlp->trl_collect); 29117c478bd9Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 29127c478bd9Sstevel@tonic-gate 29137c478bd9Sstevel@tonic-gate return (ret); 29147c478bd9Sstevel@tonic-gate } 29157c478bd9Sstevel@tonic-gate 29167c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS 29177c478bd9Sstevel@tonic-gate extern int hz; 29187c478bd9Sstevel@tonic-gate static void 29197c478bd9Sstevel@tonic-gate mem_del_stat_print_func(struct mem_handle *mhp) 29207c478bd9Sstevel@tonic-gate { 29217c478bd9Sstevel@tonic-gate uint64_t tmp; 29227c478bd9Sstevel@tonic-gate 29237c478bd9Sstevel@tonic-gate if (mem_del_stat_print) { 29247c478bd9Sstevel@tonic-gate printf("memory delete loop %x/%x, statistics%s\n", 29257c478bd9Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_base, 29267c478bd9Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_npgs, 29277c478bd9Sstevel@tonic-gate (mhp->mh_cancel ? " (cancelled)" : "")); 29287c478bd9Sstevel@tonic-gate printf("\t%8u nloop\n", mhp->mh_delstat.nloop); 29297c478bd9Sstevel@tonic-gate printf("\t%8u need_free\n", mhp->mh_delstat.need_free); 29307c478bd9Sstevel@tonic-gate printf("\t%8u free_loop\n", mhp->mh_delstat.free_loop); 29317c478bd9Sstevel@tonic-gate printf("\t%8u free_low\n", mhp->mh_delstat.free_low); 29327c478bd9Sstevel@tonic-gate printf("\t%8u free_failed\n", mhp->mh_delstat.free_failed); 29337c478bd9Sstevel@tonic-gate printf("\t%8u ncheck\n", mhp->mh_delstat.ncheck); 29347c478bd9Sstevel@tonic-gate printf("\t%8u nopaget\n", mhp->mh_delstat.nopaget); 29357c478bd9Sstevel@tonic-gate printf("\t%8u lockfail\n", mhp->mh_delstat.lockfail); 29367c478bd9Sstevel@tonic-gate printf("\t%8u nfree\n", mhp->mh_delstat.nfree); 29377c478bd9Sstevel@tonic-gate printf("\t%8u nreloc\n", mhp->mh_delstat.nreloc); 29387c478bd9Sstevel@tonic-gate printf("\t%8u nrelocfail\n", mhp->mh_delstat.nrelocfail); 29397c478bd9Sstevel@tonic-gate printf("\t%8u already_done\n", mhp->mh_delstat.already_done); 29407c478bd9Sstevel@tonic-gate printf("\t%8u first_notfree\n", mhp->mh_delstat.first_notfree); 29417c478bd9Sstevel@tonic-gate printf("\t%8u npplocked\n", mhp->mh_delstat.npplocked); 29427c478bd9Sstevel@tonic-gate printf("\t%8u nlockreloc\n", mhp->mh_delstat.nlockreloc); 29437c478bd9Sstevel@tonic-gate printf("\t%8u nnorepl\n", mhp->mh_delstat.nnorepl); 29447c478bd9Sstevel@tonic-gate printf("\t%8u nmodreloc\n", mhp->mh_delstat.nmodreloc); 29457c478bd9Sstevel@tonic-gate printf("\t%8u ndestroy\n", mhp->mh_delstat.ndestroy); 29467c478bd9Sstevel@tonic-gate printf("\t%8u nputpage\n", mhp->mh_delstat.nputpage); 29477c478bd9Sstevel@tonic-gate printf("\t%8u nnoreclaim\n", mhp->mh_delstat.nnoreclaim); 29487c478bd9Sstevel@tonic-gate printf("\t%8u ndelay\n", mhp->mh_delstat.ndelay); 29497c478bd9Sstevel@tonic-gate printf("\t%8u demotefail\n", mhp->mh_delstat.demotefail); 29507c478bd9Sstevel@tonic-gate printf("\t%8u retired\n", mhp->mh_delstat.retired); 29517c478bd9Sstevel@tonic-gate printf("\t%8u toxic\n", mhp->mh_delstat.toxic); 29527c478bd9Sstevel@tonic-gate printf("\t%8u failing\n", mhp->mh_delstat.failing); 29537c478bd9Sstevel@tonic-gate printf("\t%8u modtoxic\n", mhp->mh_delstat.modtoxic); 29547c478bd9Sstevel@tonic-gate printf("\t%8u npplkdtoxic\n", mhp->mh_delstat.npplkdtoxic); 29557c478bd9Sstevel@tonic-gate printf("\t%8u gptlmodfail\n", mhp->mh_delstat.gptlmodfail); 29567c478bd9Sstevel@tonic-gate printf("\t%8u gptllckfail\n", mhp->mh_delstat.gptllckfail); 29577c478bd9Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_total / hz; /* seconds */ 29587c478bd9Sstevel@tonic-gate printf( 29597c478bd9Sstevel@tonic-gate "\t%"PRIu64" nticks_total - %"PRIu64" min %"PRIu64" sec\n", 29607c478bd9Sstevel@tonic-gate mhp->mh_delstat.nticks_total, tmp / 60, tmp % 60); 29617c478bd9Sstevel@tonic-gate 29627c478bd9Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_pgrp / hz; /* seconds */ 29637c478bd9Sstevel@tonic-gate printf( 29647c478bd9Sstevel@tonic-gate "\t%"PRIu64" nticks_pgrp - %"PRIu64" min %"PRIu64" sec\n", 29657c478bd9Sstevel@tonic-gate mhp->mh_delstat.nticks_pgrp, tmp / 60, tmp % 60); 29667c478bd9Sstevel@tonic-gate } 29677c478bd9Sstevel@tonic-gate } 29687c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 29697c478bd9Sstevel@tonic-gate 29707c478bd9Sstevel@tonic-gate struct mem_callback { 29717c478bd9Sstevel@tonic-gate kphysm_setup_vector_t *vec; 29727c478bd9Sstevel@tonic-gate void *arg; 29737c478bd9Sstevel@tonic-gate }; 29747c478bd9Sstevel@tonic-gate 29757c478bd9Sstevel@tonic-gate #define NMEMCALLBACKS 100 29767c478bd9Sstevel@tonic-gate 29777c478bd9Sstevel@tonic-gate static struct mem_callback mem_callbacks[NMEMCALLBACKS]; 29787c478bd9Sstevel@tonic-gate static uint_t nmemcallbacks; 29797c478bd9Sstevel@tonic-gate static krwlock_t mem_callback_rwlock; 29807c478bd9Sstevel@tonic-gate 29817c478bd9Sstevel@tonic-gate int 29827c478bd9Sstevel@tonic-gate kphysm_setup_func_register(kphysm_setup_vector_t *vec, void *arg) 29837c478bd9Sstevel@tonic-gate { 29847c478bd9Sstevel@tonic-gate uint_t i, found; 29857c478bd9Sstevel@tonic-gate 29867c478bd9Sstevel@tonic-gate /* 29877c478bd9Sstevel@tonic-gate * This test will become more complicated when the version must 29887c478bd9Sstevel@tonic-gate * change. 29897c478bd9Sstevel@tonic-gate */ 29907c478bd9Sstevel@tonic-gate if (vec->version != KPHYSM_SETUP_VECTOR_VERSION) 29917c478bd9Sstevel@tonic-gate return (EINVAL); 29927c478bd9Sstevel@tonic-gate 29937c478bd9Sstevel@tonic-gate if (vec->post_add == NULL || vec->pre_del == NULL || 29947c478bd9Sstevel@tonic-gate vec->post_del == NULL) 29957c478bd9Sstevel@tonic-gate return (EINVAL); 29967c478bd9Sstevel@tonic-gate 29977c478bd9Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER); 29987c478bd9Sstevel@tonic-gate for (i = 0, found = 0; i < nmemcallbacks; i++) { 29997c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec == NULL && found == 0) 30007c478bd9Sstevel@tonic-gate found = i + 1; 30017c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec == vec && 30027c478bd9Sstevel@tonic-gate mem_callbacks[i].arg == arg) { 30037c478bd9Sstevel@tonic-gate #ifdef DEBUG 30047c478bd9Sstevel@tonic-gate /* Catch this in DEBUG kernels. */ 30057c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_setup_func_register" 30067c478bd9Sstevel@tonic-gate "(0x%p, 0x%p) duplicate registration from 0x%p", 30077c478bd9Sstevel@tonic-gate (void *)vec, arg, (void *)caller()); 30087c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 30097c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 30107c478bd9Sstevel@tonic-gate return (EEXIST); 30117c478bd9Sstevel@tonic-gate } 30127c478bd9Sstevel@tonic-gate } 30137c478bd9Sstevel@tonic-gate if (found != 0) { 30147c478bd9Sstevel@tonic-gate i = found - 1; 30157c478bd9Sstevel@tonic-gate } else { 30167c478bd9Sstevel@tonic-gate ASSERT(nmemcallbacks < NMEMCALLBACKS); 30177c478bd9Sstevel@tonic-gate if (nmemcallbacks == NMEMCALLBACKS) { 30187c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 30197c478bd9Sstevel@tonic-gate return (ENOMEM); 30207c478bd9Sstevel@tonic-gate } 30217c478bd9Sstevel@tonic-gate i = nmemcallbacks++; 30227c478bd9Sstevel@tonic-gate } 30237c478bd9Sstevel@tonic-gate mem_callbacks[i].vec = vec; 30247c478bd9Sstevel@tonic-gate mem_callbacks[i].arg = arg; 30257c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 30267c478bd9Sstevel@tonic-gate return (0); 30277c478bd9Sstevel@tonic-gate } 30287c478bd9Sstevel@tonic-gate 30297c478bd9Sstevel@tonic-gate void 30307c478bd9Sstevel@tonic-gate kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, void *arg) 30317c478bd9Sstevel@tonic-gate { 30327c478bd9Sstevel@tonic-gate uint_t i; 30337c478bd9Sstevel@tonic-gate 30347c478bd9Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER); 30357c478bd9Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 30367c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec == vec && 30377c478bd9Sstevel@tonic-gate mem_callbacks[i].arg == arg) { 30387c478bd9Sstevel@tonic-gate mem_callbacks[i].vec = NULL; 30397c478bd9Sstevel@tonic-gate mem_callbacks[i].arg = NULL; 30407c478bd9Sstevel@tonic-gate if (i == (nmemcallbacks - 1)) 30417c478bd9Sstevel@tonic-gate nmemcallbacks--; 30427c478bd9Sstevel@tonic-gate break; 30437c478bd9Sstevel@tonic-gate } 30447c478bd9Sstevel@tonic-gate } 30457c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 30467c478bd9Sstevel@tonic-gate } 30477c478bd9Sstevel@tonic-gate 30487c478bd9Sstevel@tonic-gate static void 30497c478bd9Sstevel@tonic-gate kphysm_setup_post_add(pgcnt_t delta_pages) 30507c478bd9Sstevel@tonic-gate { 30517c478bd9Sstevel@tonic-gate uint_t i; 30527c478bd9Sstevel@tonic-gate 30537c478bd9Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER); 30547c478bd9Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 30557c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 30567c478bd9Sstevel@tonic-gate (*mem_callbacks[i].vec->post_add) 30577c478bd9Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages); 30587c478bd9Sstevel@tonic-gate } 30597c478bd9Sstevel@tonic-gate } 30607c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 30617c478bd9Sstevel@tonic-gate } 30627c478bd9Sstevel@tonic-gate 30637c478bd9Sstevel@tonic-gate /* 30647c478bd9Sstevel@tonic-gate * Note the locking between pre_del and post_del: The reader lock is held 30657c478bd9Sstevel@tonic-gate * between the two calls to stop the set of functions from changing. 30667c478bd9Sstevel@tonic-gate */ 30677c478bd9Sstevel@tonic-gate 30687c478bd9Sstevel@tonic-gate static int 30697c478bd9Sstevel@tonic-gate kphysm_setup_pre_del(pgcnt_t delta_pages) 30707c478bd9Sstevel@tonic-gate { 30717c478bd9Sstevel@tonic-gate uint_t i; 30727c478bd9Sstevel@tonic-gate int ret; 30737c478bd9Sstevel@tonic-gate int aret; 30747c478bd9Sstevel@tonic-gate 30757c478bd9Sstevel@tonic-gate ret = 0; 30767c478bd9Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER); 30777c478bd9Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 30787c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 30797c478bd9Sstevel@tonic-gate aret = (*mem_callbacks[i].vec->pre_del) 30807c478bd9Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages); 30817c478bd9Sstevel@tonic-gate ret |= aret; 30827c478bd9Sstevel@tonic-gate } 30837c478bd9Sstevel@tonic-gate } 30847c478bd9Sstevel@tonic-gate 30857c478bd9Sstevel@tonic-gate return (ret); 30867c478bd9Sstevel@tonic-gate } 30877c478bd9Sstevel@tonic-gate 30887c478bd9Sstevel@tonic-gate static void 30897c478bd9Sstevel@tonic-gate kphysm_setup_post_del(pgcnt_t delta_pages, int cancelled) 30907c478bd9Sstevel@tonic-gate { 30917c478bd9Sstevel@tonic-gate uint_t i; 30927c478bd9Sstevel@tonic-gate 30937c478bd9Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 30947c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 30957c478bd9Sstevel@tonic-gate (*mem_callbacks[i].vec->post_del) 30967c478bd9Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages, cancelled); 30977c478bd9Sstevel@tonic-gate } 30987c478bd9Sstevel@tonic-gate } 30997c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 31007c478bd9Sstevel@tonic-gate } 31017c478bd9Sstevel@tonic-gate 31027c478bd9Sstevel@tonic-gate static int 31037c478bd9Sstevel@tonic-gate kphysm_split_memseg( 31047c478bd9Sstevel@tonic-gate pfn_t base, 31057c478bd9Sstevel@tonic-gate pgcnt_t npgs) 31067c478bd9Sstevel@tonic-gate { 31077c478bd9Sstevel@tonic-gate struct memseg *seg; 31087c478bd9Sstevel@tonic-gate struct memseg **segpp; 31097c478bd9Sstevel@tonic-gate pgcnt_t size_low, size_high; 31107c478bd9Sstevel@tonic-gate struct memseg *seg_low, *seg_mid, *seg_high; 31117c478bd9Sstevel@tonic-gate 31127c478bd9Sstevel@tonic-gate /* 31137c478bd9Sstevel@tonic-gate * Lock the memsegs list against other updates now 31147c478bd9Sstevel@tonic-gate */ 31157c478bd9Sstevel@tonic-gate memsegs_lock(1); 31167c478bd9Sstevel@tonic-gate 31177c478bd9Sstevel@tonic-gate /* 31187c478bd9Sstevel@tonic-gate * Find boot time memseg that wholly covers this area. 31197c478bd9Sstevel@tonic-gate */ 31207c478bd9Sstevel@tonic-gate 31217c478bd9Sstevel@tonic-gate /* First find the memseg with page 'base' in it. */ 31227c478bd9Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL; 31237c478bd9Sstevel@tonic-gate segpp = &((*segpp)->next)) { 31247c478bd9Sstevel@tonic-gate if (base >= seg->pages_base && base < seg->pages_end) 31257c478bd9Sstevel@tonic-gate break; 31267c478bd9Sstevel@tonic-gate } 31277c478bd9Sstevel@tonic-gate if (seg == NULL) { 31287c478bd9Sstevel@tonic-gate memsegs_unlock(1); 31297c478bd9Sstevel@tonic-gate return (0); 31307c478bd9Sstevel@tonic-gate } 31319853d9e8SJason Beloro if (memseg_includes_meta(seg)) { 31327c478bd9Sstevel@tonic-gate memsegs_unlock(1); 31337c478bd9Sstevel@tonic-gate return (0); 31347c478bd9Sstevel@tonic-gate } 31357c478bd9Sstevel@tonic-gate if ((base + npgs) > seg->pages_end) { 31367c478bd9Sstevel@tonic-gate memsegs_unlock(1); 31377c478bd9Sstevel@tonic-gate return (0); 31387c478bd9Sstevel@tonic-gate } 31397c478bd9Sstevel@tonic-gate 31407c478bd9Sstevel@tonic-gate /* 31417c478bd9Sstevel@tonic-gate * Work out the size of the two segments that will 31427c478bd9Sstevel@tonic-gate * surround the new segment, one for low address 31437c478bd9Sstevel@tonic-gate * and one for high. 31447c478bd9Sstevel@tonic-gate */ 31457c478bd9Sstevel@tonic-gate ASSERT(base >= seg->pages_base); 31467c478bd9Sstevel@tonic-gate size_low = base - seg->pages_base; 31477c478bd9Sstevel@tonic-gate ASSERT(seg->pages_end >= (base + npgs)); 31487c478bd9Sstevel@tonic-gate size_high = seg->pages_end - (base + npgs); 31497c478bd9Sstevel@tonic-gate 31507c478bd9Sstevel@tonic-gate /* 31517c478bd9Sstevel@tonic-gate * Sanity check. 31527c478bd9Sstevel@tonic-gate */ 31537c478bd9Sstevel@tonic-gate if ((size_low + size_high) == 0) { 31547c478bd9Sstevel@tonic-gate memsegs_unlock(1); 31557c478bd9Sstevel@tonic-gate return (0); 31567c478bd9Sstevel@tonic-gate } 31577c478bd9Sstevel@tonic-gate 31587c478bd9Sstevel@tonic-gate /* 31597c478bd9Sstevel@tonic-gate * Allocate the new structures. The old memseg will not be freed 31607c478bd9Sstevel@tonic-gate * as there may be a reference to it. 31617c478bd9Sstevel@tonic-gate */ 31627c478bd9Sstevel@tonic-gate seg_low = NULL; 31637c478bd9Sstevel@tonic-gate seg_high = NULL; 31647c478bd9Sstevel@tonic-gate 31659853d9e8SJason Beloro if (size_low != 0) 31669853d9e8SJason Beloro seg_low = memseg_alloc(); 31677c478bd9Sstevel@tonic-gate 31689853d9e8SJason Beloro seg_mid = memseg_alloc(); 31697c478bd9Sstevel@tonic-gate 31709853d9e8SJason Beloro if (size_high != 0) 31719853d9e8SJason Beloro seg_high = memseg_alloc(); 31727c478bd9Sstevel@tonic-gate 31737c478bd9Sstevel@tonic-gate /* 31747c478bd9Sstevel@tonic-gate * All allocation done now. 31757c478bd9Sstevel@tonic-gate */ 31767c478bd9Sstevel@tonic-gate if (size_low != 0) { 31777c478bd9Sstevel@tonic-gate seg_low->pages = seg->pages; 31787c478bd9Sstevel@tonic-gate seg_low->epages = seg_low->pages + size_low; 31797c478bd9Sstevel@tonic-gate seg_low->pages_base = seg->pages_base; 31807c478bd9Sstevel@tonic-gate seg_low->pages_end = seg_low->pages_base + size_low; 31817c478bd9Sstevel@tonic-gate seg_low->next = seg_mid; 31829853d9e8SJason Beloro seg_low->msegflags = seg->msegflags; 31837c478bd9Sstevel@tonic-gate } 31847c478bd9Sstevel@tonic-gate if (size_high != 0) { 31857c478bd9Sstevel@tonic-gate seg_high->pages = seg->epages - size_high; 31867c478bd9Sstevel@tonic-gate seg_high->epages = seg_high->pages + size_high; 31877c478bd9Sstevel@tonic-gate seg_high->pages_base = seg->pages_end - size_high; 31887c478bd9Sstevel@tonic-gate seg_high->pages_end = seg_high->pages_base + size_high; 31897c478bd9Sstevel@tonic-gate seg_high->next = seg->next; 31909853d9e8SJason Beloro seg_high->msegflags = seg->msegflags; 31917c478bd9Sstevel@tonic-gate } 31927c478bd9Sstevel@tonic-gate 31937c478bd9Sstevel@tonic-gate seg_mid->pages = seg->pages + size_low; 31947c478bd9Sstevel@tonic-gate seg_mid->pages_base = seg->pages_base + size_low; 31957c478bd9Sstevel@tonic-gate seg_mid->epages = seg->epages - size_high; 31967c478bd9Sstevel@tonic-gate seg_mid->pages_end = seg->pages_end - size_high; 31977c478bd9Sstevel@tonic-gate seg_mid->next = (seg_high != NULL) ? seg_high : seg->next; 31989853d9e8SJason Beloro seg_mid->msegflags = seg->msegflags; 31997c478bd9Sstevel@tonic-gate 32007c478bd9Sstevel@tonic-gate /* 32017c478bd9Sstevel@tonic-gate * Update hat_kpm specific info of all involved memsegs and 32027c478bd9Sstevel@tonic-gate * allow hat_kpm specific global chain updates. 32037c478bd9Sstevel@tonic-gate */ 32047c478bd9Sstevel@tonic-gate hat_kpm_split_mseg_update(seg, segpp, seg_low, seg_mid, seg_high); 32057c478bd9Sstevel@tonic-gate 32067c478bd9Sstevel@tonic-gate /* 32077c478bd9Sstevel@tonic-gate * At this point we have two equivalent memseg sub-chains, 32087c478bd9Sstevel@tonic-gate * seg and seg_low/seg_mid/seg_high, which both chain on to 32097c478bd9Sstevel@tonic-gate * the same place in the global chain. By re-writing the pointer 32107c478bd9Sstevel@tonic-gate * in the previous element we switch atomically from using the old 32117c478bd9Sstevel@tonic-gate * (seg) to the new. 32127c478bd9Sstevel@tonic-gate */ 32137c478bd9Sstevel@tonic-gate *segpp = (seg_low != NULL) ? seg_low : seg_mid; 32147c478bd9Sstevel@tonic-gate 32157c478bd9Sstevel@tonic-gate membar_enter(); 32167c478bd9Sstevel@tonic-gate 32177c478bd9Sstevel@tonic-gate build_pfn_hash(); 32187c478bd9Sstevel@tonic-gate memsegs_unlock(1); 32197c478bd9Sstevel@tonic-gate 32207c478bd9Sstevel@tonic-gate /* 32217c478bd9Sstevel@tonic-gate * We leave the old segment, 'seg', intact as there may be 32227c478bd9Sstevel@tonic-gate * references to it. Also, as the value of total_pages has not 32237c478bd9Sstevel@tonic-gate * changed and the memsegs list is effectively the same when 32247c478bd9Sstevel@tonic-gate * accessed via the old or the new pointer, we do not have to 32257c478bd9Sstevel@tonic-gate * cause pageout_scanner() to re-evaluate its hand pointers. 32267c478bd9Sstevel@tonic-gate * 32277c478bd9Sstevel@tonic-gate * We currently do not re-use or reclaim the page_t memory. 32287c478bd9Sstevel@tonic-gate * If we do, then this may have to change. 32297c478bd9Sstevel@tonic-gate */ 32307c478bd9Sstevel@tonic-gate 32317c478bd9Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 32327c478bd9Sstevel@tonic-gate seg->lnext = memseg_edit_junk; 32337c478bd9Sstevel@tonic-gate memseg_edit_junk = seg; 32347c478bd9Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 32357c478bd9Sstevel@tonic-gate 32367c478bd9Sstevel@tonic-gate return (1); 32377c478bd9Sstevel@tonic-gate } 32387c478bd9Sstevel@tonic-gate 32397c478bd9Sstevel@tonic-gate /* 32407c478bd9Sstevel@tonic-gate * The sfmmu hat layer (e.g.) accesses some parts of the memseg 32417c478bd9Sstevel@tonic-gate * structure using physical addresses. Therefore a kmem_cache is 32427c478bd9Sstevel@tonic-gate * used with KMC_NOHASH to avoid page crossings within a memseg 32437c478bd9Sstevel@tonic-gate * structure. KMC_NOHASH requires that no external (outside of 32447c478bd9Sstevel@tonic-gate * slab) information is allowed. This, in turn, implies that the 32457c478bd9Sstevel@tonic-gate * cache's slabsize must be exactly a single page, since per-slab 32467c478bd9Sstevel@tonic-gate * information (e.g. the freelist for the slab) is kept at the 32477c478bd9Sstevel@tonic-gate * end of the slab, where it is easy to locate. Should be changed 32487c478bd9Sstevel@tonic-gate * when a more obvious kmem_cache interface/flag will become 32497c478bd9Sstevel@tonic-gate * available. 32507c478bd9Sstevel@tonic-gate */ 32517c478bd9Sstevel@tonic-gate void 32527c478bd9Sstevel@tonic-gate mem_config_init() 32537c478bd9Sstevel@tonic-gate { 32547c478bd9Sstevel@tonic-gate memseg_cache = kmem_cache_create("memseg_cache", sizeof (struct memseg), 32557c478bd9Sstevel@tonic-gate 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH); 32567c478bd9Sstevel@tonic-gate } 32579853d9e8SJason Beloro 32589853d9e8SJason Beloro struct memseg * 32599853d9e8SJason Beloro memseg_alloc() 32609853d9e8SJason Beloro { 32619853d9e8SJason Beloro struct memseg *seg; 32629853d9e8SJason Beloro 32639853d9e8SJason Beloro seg = kmem_cache_alloc(memseg_cache, KM_SLEEP); 32649853d9e8SJason Beloro bzero(seg, sizeof (struct memseg)); 32659853d9e8SJason Beloro 32669853d9e8SJason Beloro return (seg); 32679853d9e8SJason Beloro } 32689853d9e8SJason Beloro 32699853d9e8SJason Beloro /* 32709853d9e8SJason Beloro * Return whether the page_t memory for this memseg 32719853d9e8SJason Beloro * is included in the memseg itself. 32729853d9e8SJason Beloro */ 32739853d9e8SJason Beloro static int 32749853d9e8SJason Beloro memseg_includes_meta(struct memseg *seg) 32759853d9e8SJason Beloro { 32769853d9e8SJason Beloro return (seg->msegflags & MEMSEG_META_INCL); 32779853d9e8SJason Beloro } 32789853d9e8SJason Beloro 32799853d9e8SJason Beloro pfn_t 32809853d9e8SJason Beloro memseg_get_start(struct memseg *seg) 32819853d9e8SJason Beloro { 32829853d9e8SJason Beloro pfn_t pt_start; 32839853d9e8SJason Beloro 32849853d9e8SJason Beloro if (memseg_includes_meta(seg)) { 32859853d9e8SJason Beloro pt_start = hat_getpfnum(kas.a_hat, (caddr_t)seg->pages); 32869853d9e8SJason Beloro 32879853d9e8SJason Beloro /* Meta data is required to be at the beginning */ 32889853d9e8SJason Beloro ASSERT(pt_start < seg->pages_base); 32899853d9e8SJason Beloro } else 32909853d9e8SJason Beloro pt_start = seg->pages_base; 32919853d9e8SJason Beloro 32929853d9e8SJason Beloro return (pt_start); 32939853d9e8SJason Beloro } 32949853d9e8SJason Beloro 32959853d9e8SJason Beloro /* 32969853d9e8SJason Beloro * Invalidate memseg pointers in cpu private vm data caches. 32979853d9e8SJason Beloro */ 32989853d9e8SJason Beloro static void 32999853d9e8SJason Beloro memseg_cpu_vm_flush() 33009853d9e8SJason Beloro { 33019853d9e8SJason Beloro cpu_t *cp; 33029853d9e8SJason Beloro vm_cpu_data_t *vc; 33039853d9e8SJason Beloro 33049853d9e8SJason Beloro mutex_enter(&cpu_lock); 3305*0ed5c46eSJosef 'Jeff' Sipek pause_cpus(NULL, NULL); 33069853d9e8SJason Beloro 33079853d9e8SJason Beloro cp = cpu_list; 33089853d9e8SJason Beloro do { 33099853d9e8SJason Beloro vc = cp->cpu_vm_data; 33109853d9e8SJason Beloro vc->vc_pnum_memseg = NULL; 33119853d9e8SJason Beloro vc->vc_pnext_memseg = NULL; 33129853d9e8SJason Beloro 33139853d9e8SJason Beloro } while ((cp = cp->cpu_next) != cpu_list); 33149853d9e8SJason Beloro 33159853d9e8SJason Beloro start_cpus(); 33169853d9e8SJason Beloro mutex_exit(&cpu_lock); 33179853d9e8SJason Beloro } 3318