xref: /titanic_50/usr/src/uts/common/os/mem_config.c (revision 0ed5c46e82c989cfa9726d9dae452e3d24ef83be)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5ee88d2b9Skchow  * Common Development and Distribution License (the "License").
6ee88d2b9Skchow  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2256f33205SJonathan Adams  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/types.h>
277c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
287c478bd9Sstevel@tonic-gate #include <sys/vmem.h>
297c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
307c478bd9Sstevel@tonic-gate #include <sys/systm.h>
317c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>	/* for page_freelist_coalesce() */
327c478bd9Sstevel@tonic-gate #include <sys/errno.h>
337c478bd9Sstevel@tonic-gate #include <sys/memnode.h>
347c478bd9Sstevel@tonic-gate #include <sys/memlist.h>
357c478bd9Sstevel@tonic-gate #include <sys/memlist_impl.h>
367c478bd9Sstevel@tonic-gate #include <sys/tuneable.h>
377c478bd9Sstevel@tonic-gate #include <sys/proc.h>
387c478bd9Sstevel@tonic-gate #include <sys/disp.h>
397c478bd9Sstevel@tonic-gate #include <sys/debug.h>
407c478bd9Sstevel@tonic-gate #include <sys/vm.h>
417c478bd9Sstevel@tonic-gate #include <sys/callb.h>
427c478bd9Sstevel@tonic-gate #include <sys/memlist_plat.h>	/* for installed_top_size() */
437c478bd9Sstevel@tonic-gate #include <sys/condvar_impl.h>	/* for CV_HAS_WAITERS() */
447c478bd9Sstevel@tonic-gate #include <sys/dumphdr.h>	/* for dump_resize() */
457c478bd9Sstevel@tonic-gate #include <sys/atomic.h>		/* for use in stats collection */
467c478bd9Sstevel@tonic-gate #include <sys/rwlock.h>
477c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
487c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
497c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h>
507c478bd9Sstevel@tonic-gate #include <vm/page.h>
51e21bae1bSkchow #include <vm/vm_dep.h>
527c478bd9Sstevel@tonic-gate #define	SUNDDI_IMPL		/* so sunddi.h will not redefine splx() et al */
537c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
547c478bd9Sstevel@tonic-gate #include <sys/mem_config.h>
557c478bd9Sstevel@tonic-gate #include <sys/mem_cage.h>
567c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
577c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
587c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
597c478bd9Sstevel@tonic-gate 
607c478bd9Sstevel@tonic-gate extern struct memlist *phys_avail;
617c478bd9Sstevel@tonic-gate 
627c478bd9Sstevel@tonic-gate extern uint_t page_ctrs_adjust(int);
63af4c679fSSean McEnroe void page_ctrs_cleanup(void);
647c478bd9Sstevel@tonic-gate static void kphysm_setup_post_add(pgcnt_t);
657c478bd9Sstevel@tonic-gate static int kphysm_setup_pre_del(pgcnt_t);
667c478bd9Sstevel@tonic-gate static void kphysm_setup_post_del(pgcnt_t, int);
677c478bd9Sstevel@tonic-gate 
687c478bd9Sstevel@tonic-gate static int kphysm_split_memseg(pfn_t base, pgcnt_t npgs);
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate static int delspan_reserve(pfn_t, pgcnt_t);
717c478bd9Sstevel@tonic-gate static void delspan_unreserve(pfn_t, pgcnt_t);
727c478bd9Sstevel@tonic-gate 
739853d9e8SJason Beloro kmutex_t memseg_lists_lock;
749853d9e8SJason Beloro struct memseg *memseg_va_avail;
759853d9e8SJason Beloro struct memseg *memseg_alloc(void);
767c478bd9Sstevel@tonic-gate static struct memseg *memseg_delete_junk;
777c478bd9Sstevel@tonic-gate static struct memseg *memseg_edit_junk;
787c478bd9Sstevel@tonic-gate void memseg_remap_init(void);
799853d9e8SJason Beloro static void memseg_remap_to_dummy(struct memseg *);
807c478bd9Sstevel@tonic-gate static void kphysm_addmem_error_undospan(pfn_t, pgcnt_t);
817c478bd9Sstevel@tonic-gate static struct memseg *memseg_reuse(pgcnt_t);
827c478bd9Sstevel@tonic-gate 
837c478bd9Sstevel@tonic-gate static struct kmem_cache *memseg_cache;
847c478bd9Sstevel@tonic-gate 
857c478bd9Sstevel@tonic-gate /*
869853d9e8SJason Beloro  * Interfaces to manage externally allocated
879853d9e8SJason Beloro  * page_t memory (metadata) for a memseg.
889853d9e8SJason Beloro  */
899853d9e8SJason Beloro #pragma weak	memseg_alloc_meta
909853d9e8SJason Beloro #pragma weak	memseg_free_meta
919853d9e8SJason Beloro #pragma weak	memseg_get_metapfn
929853d9e8SJason Beloro #pragma weak	memseg_remap_meta
939853d9e8SJason Beloro 
949853d9e8SJason Beloro extern int ppvm_enable;
959853d9e8SJason Beloro extern page_t *ppvm_base;
969853d9e8SJason Beloro extern int memseg_alloc_meta(pfn_t, pgcnt_t, void **, pgcnt_t *);
979853d9e8SJason Beloro extern void memseg_free_meta(void *, pgcnt_t);
989853d9e8SJason Beloro extern pfn_t memseg_get_metapfn(void *, pgcnt_t);
999853d9e8SJason Beloro extern void memseg_remap_meta(struct memseg *);
1009853d9e8SJason Beloro static int memseg_is_dynamic(struct memseg *);
1019853d9e8SJason Beloro static int memseg_includes_meta(struct memseg *);
102af4c679fSSean McEnroe pfn_t memseg_get_start(struct memseg *);
1039853d9e8SJason Beloro static void memseg_cpu_vm_flush(void);
1049853d9e8SJason Beloro 
1059853d9e8SJason Beloro int meta_alloc_enable;
1069853d9e8SJason Beloro 
107a3114836SGerry Liu #ifdef	DEBUG
108a3114836SGerry Liu static int memseg_debug;
109a3114836SGerry Liu #define	MEMSEG_DEBUG(args...) if (memseg_debug) printf(args)
110a3114836SGerry Liu #else
111a3114836SGerry Liu #define	MEMSEG_DEBUG(...)
112a3114836SGerry Liu #endif
113a3114836SGerry Liu 
1149853d9e8SJason Beloro /*
1159853d9e8SJason Beloro  * Add a chunk of memory to the system.
1167c478bd9Sstevel@tonic-gate  * base: starting PAGESIZE page of new memory.
1177c478bd9Sstevel@tonic-gate  * npgs: length in PAGESIZE pages.
1187c478bd9Sstevel@tonic-gate  *
1197c478bd9Sstevel@tonic-gate  * Adding mem this way doesn't increase the size of the hash tables;
1207c478bd9Sstevel@tonic-gate  * growing them would be too hard.  This should be OK, but adding memory
1217c478bd9Sstevel@tonic-gate  * dynamically most likely means more hash misses, since the tables will
1227c478bd9Sstevel@tonic-gate  * be smaller than they otherwise would be.
1237c478bd9Sstevel@tonic-gate  */
1247c478bd9Sstevel@tonic-gate int
kphysm_add_memory_dynamic(pfn_t base,pgcnt_t npgs)1257c478bd9Sstevel@tonic-gate kphysm_add_memory_dynamic(pfn_t base, pgcnt_t npgs)
1267c478bd9Sstevel@tonic-gate {
1277c478bd9Sstevel@tonic-gate 	page_t *pp;
1289853d9e8SJason Beloro 	page_t		*opp, *oepp, *segpp;
1297c478bd9Sstevel@tonic-gate 	struct memseg	*seg;
1307c478bd9Sstevel@tonic-gate 	uint64_t	avmem;
1317c478bd9Sstevel@tonic-gate 	pfn_t		pfn;
1327c478bd9Sstevel@tonic-gate 	pfn_t		pt_base = base;
1337c478bd9Sstevel@tonic-gate 	pgcnt_t		tpgs = npgs;
1349853d9e8SJason Beloro 	pgcnt_t		metapgs = 0;
1357c478bd9Sstevel@tonic-gate 	int		exhausted;
1367c478bd9Sstevel@tonic-gate 	pfn_t		pnum;
1377c478bd9Sstevel@tonic-gate 	int		mnode;
1387c478bd9Sstevel@tonic-gate 	caddr_t		vaddr;
1397c478bd9Sstevel@tonic-gate 	int		reuse;
1407c478bd9Sstevel@tonic-gate 	int		mlret;
1419853d9e8SJason Beloro 	int		rv;
1429853d9e8SJason Beloro 	int		flags;
1439853d9e8SJason Beloro 	int		meta_alloc = 0;
1447c478bd9Sstevel@tonic-gate 	void		*mapva;
1459853d9e8SJason Beloro 	void		*metabase = (void *)base;
1467c478bd9Sstevel@tonic-gate 	pgcnt_t		nkpmpgs = 0;
1477c478bd9Sstevel@tonic-gate 	offset_t	kpm_pages_off;
1487c478bd9Sstevel@tonic-gate 
1497c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT,
1507c478bd9Sstevel@tonic-gate 	    "?kphysm_add_memory_dynamic: adding %ldK at 0x%" PRIx64 "\n",
1517c478bd9Sstevel@tonic-gate 	    npgs << (PAGESHIFT - 10), (uint64_t)base << PAGESHIFT);
1527c478bd9Sstevel@tonic-gate 
1537c478bd9Sstevel@tonic-gate 	/*
1547c478bd9Sstevel@tonic-gate 	 * Add this span in the delete list to prevent interactions.
1557c478bd9Sstevel@tonic-gate 	 */
1567c478bd9Sstevel@tonic-gate 	if (!delspan_reserve(base, npgs)) {
1577c478bd9Sstevel@tonic-gate 		return (KPHYSM_ESPAN);
1587c478bd9Sstevel@tonic-gate 	}
1597c478bd9Sstevel@tonic-gate 	/*
1607c478bd9Sstevel@tonic-gate 	 * Check to see if any of the memory span has been added
1617c478bd9Sstevel@tonic-gate 	 * by trying an add to the installed memory list. This
1627c478bd9Sstevel@tonic-gate 	 * forms the interlocking process for add.
1637c478bd9Sstevel@tonic-gate 	 */
1647c478bd9Sstevel@tonic-gate 
1657c478bd9Sstevel@tonic-gate 	memlist_write_lock();
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate 	mlret = memlist_add_span((uint64_t)(pt_base) << PAGESHIFT,
1687c478bd9Sstevel@tonic-gate 	    (uint64_t)(tpgs) << PAGESHIFT, &phys_install);
1697c478bd9Sstevel@tonic-gate 
1707c478bd9Sstevel@tonic-gate 	if (mlret == MEML_SPANOP_OK)
1717c478bd9Sstevel@tonic-gate 		installed_top_size(phys_install, &physmax, &physinstalled);
1727c478bd9Sstevel@tonic-gate 
1737c478bd9Sstevel@tonic-gate 	memlist_write_unlock();
1747c478bd9Sstevel@tonic-gate 
1757c478bd9Sstevel@tonic-gate 	if (mlret != MEML_SPANOP_OK) {
1767c478bd9Sstevel@tonic-gate 		if (mlret == MEML_SPANOP_EALLOC) {
1777c478bd9Sstevel@tonic-gate 			delspan_unreserve(pt_base, tpgs);
1787c478bd9Sstevel@tonic-gate 			return (KPHYSM_ERESOURCE);
1799853d9e8SJason Beloro 		} else if (mlret == MEML_SPANOP_ESPAN) {
1807c478bd9Sstevel@tonic-gate 			delspan_unreserve(pt_base, tpgs);
1817c478bd9Sstevel@tonic-gate 			return (KPHYSM_ESPAN);
1827c478bd9Sstevel@tonic-gate 		} else {
1837c478bd9Sstevel@tonic-gate 			delspan_unreserve(pt_base, tpgs);
1847c478bd9Sstevel@tonic-gate 			return (KPHYSM_ERESOURCE);
1857c478bd9Sstevel@tonic-gate 		}
1867c478bd9Sstevel@tonic-gate 	}
1877c478bd9Sstevel@tonic-gate 
1889853d9e8SJason Beloro 	if (meta_alloc_enable) {
1899853d9e8SJason Beloro 		/*
1909853d9e8SJason Beloro 		 * Allocate the page_t's from existing memory;
1919853d9e8SJason Beloro 		 * if that fails, allocate from the incoming memory.
1929853d9e8SJason Beloro 		 */
1939853d9e8SJason Beloro 		rv = memseg_alloc_meta(base, npgs, &metabase, &metapgs);
1949853d9e8SJason Beloro 		if (rv == KPHYSM_OK) {
1959853d9e8SJason Beloro 			ASSERT(metapgs);
1969853d9e8SJason Beloro 			ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs);
1979853d9e8SJason Beloro 			meta_alloc = 1;
1989853d9e8SJason Beloro 			goto mapalloc;
1999853d9e8SJason Beloro 		}
2009853d9e8SJason Beloro 	}
2019853d9e8SJason Beloro 
2027c478bd9Sstevel@tonic-gate 	/*
2037c478bd9Sstevel@tonic-gate 	 * We store the page_t's for this new memory in the first
2047c478bd9Sstevel@tonic-gate 	 * few pages of the chunk. Here, we go and get'em ...
2057c478bd9Sstevel@tonic-gate 	 */
2067c478bd9Sstevel@tonic-gate 
2077c478bd9Sstevel@tonic-gate 	/*
2087c478bd9Sstevel@tonic-gate 	 * The expression after the '-' gives the number of pages
2097c478bd9Sstevel@tonic-gate 	 * that will fit in the new memory based on a requirement
2107c478bd9Sstevel@tonic-gate 	 * of (PAGESIZE + sizeof (page_t)) bytes per page.
2117c478bd9Sstevel@tonic-gate 	 */
2127c478bd9Sstevel@tonic-gate 	metapgs = npgs - (((uint64_t)(npgs) << PAGESHIFT) /
2137c478bd9Sstevel@tonic-gate 	    (PAGESIZE + sizeof (page_t)));
2147c478bd9Sstevel@tonic-gate 
2157c478bd9Sstevel@tonic-gate 	npgs -= metapgs;
2167c478bd9Sstevel@tonic-gate 	base += metapgs;
2177c478bd9Sstevel@tonic-gate 
2187c478bd9Sstevel@tonic-gate 	ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs);
2197c478bd9Sstevel@tonic-gate 
2207c478bd9Sstevel@tonic-gate 	exhausted = (metapgs == 0 || npgs == 0);
2217c478bd9Sstevel@tonic-gate 
2227c478bd9Sstevel@tonic-gate 	if (kpm_enable && !exhausted) {
2237c478bd9Sstevel@tonic-gate 		pgcnt_t start, end, nkpmpgs_prelim;
2247c478bd9Sstevel@tonic-gate 		size_t	ptsz;
2257c478bd9Sstevel@tonic-gate 
2267c478bd9Sstevel@tonic-gate 		/*
2277c478bd9Sstevel@tonic-gate 		 * A viable kpm large page mapping must not overlap two
2287c478bd9Sstevel@tonic-gate 		 * dynamic memsegs. Therefore the total size is checked
2297c478bd9Sstevel@tonic-gate 		 * to be at least kpm_pgsz and also whether start and end
2307c478bd9Sstevel@tonic-gate 		 * points are at least kpm_pgsz aligned.
2317c478bd9Sstevel@tonic-gate 		 */
2327c478bd9Sstevel@tonic-gate 		if (ptokpmp(tpgs) < 1 || pmodkpmp(pt_base) ||
2337c478bd9Sstevel@tonic-gate 		    pmodkpmp(base + npgs)) {
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate 			kphysm_addmem_error_undospan(pt_base, tpgs);
2367c478bd9Sstevel@tonic-gate 
2377c478bd9Sstevel@tonic-gate 			/*
2387c478bd9Sstevel@tonic-gate 			 * There is no specific error code for violating
2397c478bd9Sstevel@tonic-gate 			 * kpm granularity constraints.
2407c478bd9Sstevel@tonic-gate 			 */
2417c478bd9Sstevel@tonic-gate 			return (KPHYSM_ENOTVIABLE);
2427c478bd9Sstevel@tonic-gate 		}
2437c478bd9Sstevel@tonic-gate 
2447c478bd9Sstevel@tonic-gate 		start = kpmptop(ptokpmp(base));
2457c478bd9Sstevel@tonic-gate 		end = kpmptop(ptokpmp(base + npgs));
2467c478bd9Sstevel@tonic-gate 		nkpmpgs_prelim = ptokpmp(end - start);
2477c478bd9Sstevel@tonic-gate 		ptsz = npgs * sizeof (page_t);
2487c478bd9Sstevel@tonic-gate 		metapgs = btopr(ptsz + nkpmpgs_prelim * KPMPAGE_T_SZ);
2497c478bd9Sstevel@tonic-gate 		exhausted = (tpgs <= metapgs);
2507c478bd9Sstevel@tonic-gate 		if (!exhausted) {
2517c478bd9Sstevel@tonic-gate 			npgs = tpgs - metapgs;
2527c478bd9Sstevel@tonic-gate 			base = pt_base + metapgs;
2537c478bd9Sstevel@tonic-gate 
2547c478bd9Sstevel@tonic-gate 			/* final nkpmpgs */
2557c478bd9Sstevel@tonic-gate 			start = kpmptop(ptokpmp(base));
2567c478bd9Sstevel@tonic-gate 			nkpmpgs = ptokpmp(end - start);
2577c478bd9Sstevel@tonic-gate 			kpm_pages_off = ptsz +
2587c478bd9Sstevel@tonic-gate 			    (nkpmpgs_prelim - nkpmpgs) * KPMPAGE_T_SZ;
2597c478bd9Sstevel@tonic-gate 		}
2607c478bd9Sstevel@tonic-gate 	}
2617c478bd9Sstevel@tonic-gate 
2627c478bd9Sstevel@tonic-gate 	/*
2637c478bd9Sstevel@tonic-gate 	 * Is memory area supplied too small?
2647c478bd9Sstevel@tonic-gate 	 */
2657c478bd9Sstevel@tonic-gate 	if (exhausted) {
2667c478bd9Sstevel@tonic-gate 		kphysm_addmem_error_undospan(pt_base, tpgs);
2677c478bd9Sstevel@tonic-gate 		/*
2687c478bd9Sstevel@tonic-gate 		 * There is no specific error code for 'too small'.
2697c478bd9Sstevel@tonic-gate 		 */
2707c478bd9Sstevel@tonic-gate 		return (KPHYSM_ERESOURCE);
2717c478bd9Sstevel@tonic-gate 	}
2727c478bd9Sstevel@tonic-gate 
2739853d9e8SJason Beloro mapalloc:
2747c478bd9Sstevel@tonic-gate 	/*
2757c478bd9Sstevel@tonic-gate 	 * We may re-use a previously allocated VA space for the page_ts
2767c478bd9Sstevel@tonic-gate 	 * eventually, but we need to initialize and lock the pages first.
2777c478bd9Sstevel@tonic-gate 	 */
2787c478bd9Sstevel@tonic-gate 
2797c478bd9Sstevel@tonic-gate 	/*
2807c478bd9Sstevel@tonic-gate 	 * Get an address in the kernel address map, map
2817c478bd9Sstevel@tonic-gate 	 * the page_t pages and see if we can touch them.
2827c478bd9Sstevel@tonic-gate 	 */
2837c478bd9Sstevel@tonic-gate 
2847c478bd9Sstevel@tonic-gate 	mapva = vmem_alloc(heap_arena, ptob(metapgs), VM_NOSLEEP);
2857c478bd9Sstevel@tonic-gate 	if (mapva == NULL) {
2867c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "kphysm_add_memory_dynamic:"
2877c478bd9Sstevel@tonic-gate 		    " Can't allocate VA for page_ts");
2887c478bd9Sstevel@tonic-gate 
2899853d9e8SJason Beloro 		if (meta_alloc)
2909853d9e8SJason Beloro 			memseg_free_meta(metabase, metapgs);
2917c478bd9Sstevel@tonic-gate 		kphysm_addmem_error_undospan(pt_base, tpgs);
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate 		return (KPHYSM_ERESOURCE);
2947c478bd9Sstevel@tonic-gate 	}
2957c478bd9Sstevel@tonic-gate 	pp = mapva;
2967c478bd9Sstevel@tonic-gate 
2977c478bd9Sstevel@tonic-gate 	if (physmax < (pt_base + tpgs))
2987c478bd9Sstevel@tonic-gate 		physmax = (pt_base + tpgs);
2997c478bd9Sstevel@tonic-gate 
3007c478bd9Sstevel@tonic-gate 	/*
3017c478bd9Sstevel@tonic-gate 	 * In the remapping code we map one page at a time so we must do
3027c478bd9Sstevel@tonic-gate 	 * the same here to match mapping sizes.
3037c478bd9Sstevel@tonic-gate 	 */
3047c478bd9Sstevel@tonic-gate 	pfn = pt_base;
3057c478bd9Sstevel@tonic-gate 	vaddr = (caddr_t)pp;
3067c478bd9Sstevel@tonic-gate 	for (pnum = 0; pnum < metapgs; pnum++) {
3079853d9e8SJason Beloro 		if (meta_alloc)
3089853d9e8SJason Beloro 			pfn = memseg_get_metapfn(metabase, (pgcnt_t)pnum);
3097c478bd9Sstevel@tonic-gate 		hat_devload(kas.a_hat, vaddr, ptob(1), pfn,
3107c478bd9Sstevel@tonic-gate 		    PROT_READ | PROT_WRITE,
3117c478bd9Sstevel@tonic-gate 		    HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
3127c478bd9Sstevel@tonic-gate 		pfn++;
3137c478bd9Sstevel@tonic-gate 		vaddr += ptob(1);
3147c478bd9Sstevel@tonic-gate 	}
3157c478bd9Sstevel@tonic-gate 
3167c478bd9Sstevel@tonic-gate 	if (ddi_peek32((dev_info_t *)NULL,
3177c478bd9Sstevel@tonic-gate 	    (int32_t *)pp, (int32_t *)0) == DDI_FAILURE) {
3187c478bd9Sstevel@tonic-gate 
31928e72544SJakub Jirsa 		cmn_err(CE_WARN, "kphysm_add_memory_dynamic:"
3207c478bd9Sstevel@tonic-gate 		    " Can't access pp array at 0x%p [phys 0x%lx]",
3217c478bd9Sstevel@tonic-gate 		    (void *)pp, pt_base);
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate 		hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs),
3247c478bd9Sstevel@tonic-gate 		    HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK);
3257c478bd9Sstevel@tonic-gate 
3267c478bd9Sstevel@tonic-gate 		vmem_free(heap_arena, mapva, ptob(metapgs));
3279853d9e8SJason Beloro 		if (meta_alloc)
3289853d9e8SJason Beloro 			memseg_free_meta(metabase, metapgs);
3297c478bd9Sstevel@tonic-gate 		kphysm_addmem_error_undospan(pt_base, tpgs);
3307c478bd9Sstevel@tonic-gate 
3317c478bd9Sstevel@tonic-gate 		return (KPHYSM_EFAULT);
3327c478bd9Sstevel@tonic-gate 	}
3337c478bd9Sstevel@tonic-gate 
3347c478bd9Sstevel@tonic-gate 	/*
3357c478bd9Sstevel@tonic-gate 	 * Add this memory slice to its memory node translation.
3367c478bd9Sstevel@tonic-gate 	 *
3377c478bd9Sstevel@tonic-gate 	 * Note that right now, each node may have only one slice;
3387c478bd9Sstevel@tonic-gate 	 * this may change with COD or in larger SSM systems with
3397c478bd9Sstevel@tonic-gate 	 * nested latency groups, so we must not assume that the
3407c478bd9Sstevel@tonic-gate 	 * node does not yet exist.
341a3114836SGerry Liu 	 *
342a3114836SGerry Liu 	 * Note that there may be multiple memory nodes associated with
343a3114836SGerry Liu 	 * a single lgrp node on x86 systems.
3447c478bd9Sstevel@tonic-gate 	 */
34520c26ed3SChristopher Baumbauer - Sun Microsystems - San Diego United States 	pnum = pt_base + tpgs - 1;
3469853d9e8SJason Beloro 	mem_node_add_range(pt_base, pnum);
3477c478bd9Sstevel@tonic-gate 
3487c478bd9Sstevel@tonic-gate 	/*
349da6c28aaSamw 	 * Allocate or resize page counters as necessary to accommodate
3507c478bd9Sstevel@tonic-gate 	 * the increase in memory pages.
3517c478bd9Sstevel@tonic-gate 	 */
3527c478bd9Sstevel@tonic-gate 	mnode = PFN_2_MEM_NODE(pnum);
3539853d9e8SJason Beloro 	PAGE_CTRS_ADJUST(base, npgs, rv);
3549853d9e8SJason Beloro 	if (rv) {
3557c478bd9Sstevel@tonic-gate 
3569853d9e8SJason Beloro 		mem_node_del_range(pt_base, pnum);
3577c478bd9Sstevel@tonic-gate 
358af4c679fSSean McEnroe 		/* cleanup the  page counters */
359af4c679fSSean McEnroe 		page_ctrs_cleanup();
360af4c679fSSean McEnroe 
3617c478bd9Sstevel@tonic-gate 		hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs),
3627c478bd9Sstevel@tonic-gate 		    HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK);
3637c478bd9Sstevel@tonic-gate 
3647c478bd9Sstevel@tonic-gate 		vmem_free(heap_arena, mapva, ptob(metapgs));
3659853d9e8SJason Beloro 		if (meta_alloc)
3669853d9e8SJason Beloro 			memseg_free_meta(metabase, metapgs);
3677c478bd9Sstevel@tonic-gate 		kphysm_addmem_error_undospan(pt_base, tpgs);
3687c478bd9Sstevel@tonic-gate 
3697c478bd9Sstevel@tonic-gate 		return (KPHYSM_ERESOURCE);
3707c478bd9Sstevel@tonic-gate 	}
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate 	/*
3737c478bd9Sstevel@tonic-gate 	 * Update the phys_avail memory list.
3747c478bd9Sstevel@tonic-gate 	 * The phys_install list was done at the start.
3757c478bd9Sstevel@tonic-gate 	 */
3767c478bd9Sstevel@tonic-gate 
3777c478bd9Sstevel@tonic-gate 	memlist_write_lock();
3787c478bd9Sstevel@tonic-gate 
3797c478bd9Sstevel@tonic-gate 	mlret = memlist_add_span((uint64_t)(base) << PAGESHIFT,
3807c478bd9Sstevel@tonic-gate 	    (uint64_t)(npgs) << PAGESHIFT, &phys_avail);
3817c478bd9Sstevel@tonic-gate 	ASSERT(mlret == MEML_SPANOP_OK);
3827c478bd9Sstevel@tonic-gate 
3837c478bd9Sstevel@tonic-gate 	memlist_write_unlock();
3847c478bd9Sstevel@tonic-gate 
3857c478bd9Sstevel@tonic-gate 	/* See if we can find a memseg to re-use. */
3869853d9e8SJason Beloro 	if (meta_alloc) {
3879853d9e8SJason Beloro 		seg = memseg_reuse(0);
3889853d9e8SJason Beloro 		reuse = 1;	/* force unmapping of temp mapva */
3899853d9e8SJason Beloro 		flags = MEMSEG_DYNAMIC | MEMSEG_META_ALLOC;
3909853d9e8SJason Beloro 		/*
3919853d9e8SJason Beloro 		 * There is a 1:1 fixed relationship between a pfn
3929853d9e8SJason Beloro 		 * and a page_t VA.  The pfn is used as an index into
3939853d9e8SJason Beloro 		 * the ppvm_base page_t table in order to calculate
3949853d9e8SJason Beloro 		 * the page_t base address for a given pfn range.
3959853d9e8SJason Beloro 		 */
3969853d9e8SJason Beloro 		segpp = ppvm_base + base;
3979853d9e8SJason Beloro 	} else {
3987c478bd9Sstevel@tonic-gate 		seg = memseg_reuse(metapgs);
3997c478bd9Sstevel@tonic-gate 		reuse = (seg != NULL);
4009853d9e8SJason Beloro 		flags = MEMSEG_DYNAMIC | MEMSEG_META_INCL;
4019853d9e8SJason Beloro 		segpp = pp;
4029853d9e8SJason Beloro 	}
4037c478bd9Sstevel@tonic-gate 
4047c478bd9Sstevel@tonic-gate 	/*
4057c478bd9Sstevel@tonic-gate 	 * Initialize the memseg structure representing this memory
4067c478bd9Sstevel@tonic-gate 	 * and add it to the existing list of memsegs. Do some basic
4077c478bd9Sstevel@tonic-gate 	 * initialization and add the memory to the system.
4087c478bd9Sstevel@tonic-gate 	 * In order to prevent lock deadlocks, the add_physmem()
4097c478bd9Sstevel@tonic-gate 	 * code is repeated here, but split into several stages.
4109853d9e8SJason Beloro 	 *
4119853d9e8SJason Beloro 	 * If a memseg is reused, invalidate memseg pointers in
4129853d9e8SJason Beloro 	 * all cpu vm caches.  We need to do this this since the check
4139853d9e8SJason Beloro 	 * 	pp >= seg->pages && pp < seg->epages
4149853d9e8SJason Beloro 	 * used in various places is not atomic and so the first compare
4159853d9e8SJason Beloro 	 * can happen before reuse and the second compare after reuse.
4169853d9e8SJason Beloro 	 * The invalidation ensures that a memseg is not deferenced while
4179853d9e8SJason Beloro 	 * it's page/pfn pointers are changing.
4187c478bd9Sstevel@tonic-gate 	 */
4197c478bd9Sstevel@tonic-gate 	if (seg == NULL) {
4209853d9e8SJason Beloro 		seg = memseg_alloc();
4219853d9e8SJason Beloro 		ASSERT(seg != NULL);
4229853d9e8SJason Beloro 		seg->msegflags = flags;
4239853d9e8SJason Beloro 		MEMSEG_DEBUG("memseg_get: alloc seg=0x%p, pages=0x%p",
4249853d9e8SJason Beloro 		    (void *)seg, (void *)(seg->pages));
4259853d9e8SJason Beloro 		seg->pages = segpp;
4267c478bd9Sstevel@tonic-gate 	} else {
4279853d9e8SJason Beloro 		ASSERT(seg->msegflags == flags);
4289853d9e8SJason Beloro 		ASSERT(seg->pages_base == seg->pages_end);
4299853d9e8SJason Beloro 		MEMSEG_DEBUG("memseg_get: reuse seg=0x%p, pages=0x%p",
4309853d9e8SJason Beloro 		    (void *)seg, (void *)(seg->pages));
4319853d9e8SJason Beloro 		if (meta_alloc) {
4329853d9e8SJason Beloro 			memseg_cpu_vm_flush();
4339853d9e8SJason Beloro 			seg->pages = segpp;
4349853d9e8SJason Beloro 		}
4357c478bd9Sstevel@tonic-gate 	}
4367c478bd9Sstevel@tonic-gate 
4377c478bd9Sstevel@tonic-gate 	seg->epages = seg->pages + npgs;
4387c478bd9Sstevel@tonic-gate 	seg->pages_base = base;
4397c478bd9Sstevel@tonic-gate 	seg->pages_end = base + npgs;
4407c478bd9Sstevel@tonic-gate 
4417c478bd9Sstevel@tonic-gate 	/*
4427c478bd9Sstevel@tonic-gate 	 * Initialize metadata. The page_ts are set to locked state
4437c478bd9Sstevel@tonic-gate 	 * ready to be freed.
4447c478bd9Sstevel@tonic-gate 	 */
4457c478bd9Sstevel@tonic-gate 	bzero((caddr_t)pp, ptob(metapgs));
4467c478bd9Sstevel@tonic-gate 
4477c478bd9Sstevel@tonic-gate 	pfn = seg->pages_base;
4487c478bd9Sstevel@tonic-gate 	/* Save the original pp base in case we reuse a memseg. */
4497c478bd9Sstevel@tonic-gate 	opp = pp;
4507c478bd9Sstevel@tonic-gate 	oepp = opp + npgs;
4517c478bd9Sstevel@tonic-gate 	for (pp = opp; pp < oepp; pp++) {
4527c478bd9Sstevel@tonic-gate 		pp->p_pagenum = pfn;
4537c478bd9Sstevel@tonic-gate 		pfn++;
4547c478bd9Sstevel@tonic-gate 		page_iolock_init(pp);
4557c478bd9Sstevel@tonic-gate 		while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM))
4567c478bd9Sstevel@tonic-gate 			continue;
4577c478bd9Sstevel@tonic-gate 		pp->p_offset = (u_offset_t)-1;
4587c478bd9Sstevel@tonic-gate 	}
4597c478bd9Sstevel@tonic-gate 
4607c478bd9Sstevel@tonic-gate 	if (reuse) {
4617c478bd9Sstevel@tonic-gate 		/* Remap our page_ts to the re-used memseg VA space. */
4627c478bd9Sstevel@tonic-gate 		pfn = pt_base;
4637c478bd9Sstevel@tonic-gate 		vaddr = (caddr_t)seg->pages;
4647c478bd9Sstevel@tonic-gate 		for (pnum = 0; pnum < metapgs; pnum++) {
4659853d9e8SJason Beloro 			if (meta_alloc)
4669853d9e8SJason Beloro 				pfn = memseg_get_metapfn(metabase,
4679853d9e8SJason Beloro 				    (pgcnt_t)pnum);
4687c478bd9Sstevel@tonic-gate 			hat_devload(kas.a_hat, vaddr, ptob(1), pfn,
4697c478bd9Sstevel@tonic-gate 			    PROT_READ | PROT_WRITE,
4707c478bd9Sstevel@tonic-gate 			    HAT_LOAD_REMAP | HAT_LOAD | HAT_LOAD_NOCONSIST);
4717c478bd9Sstevel@tonic-gate 			pfn++;
4727c478bd9Sstevel@tonic-gate 			vaddr += ptob(1);
4737c478bd9Sstevel@tonic-gate 		}
4747c478bd9Sstevel@tonic-gate 
4757c478bd9Sstevel@tonic-gate 		hat_unload(kas.a_hat, (caddr_t)opp, ptob(metapgs),
4767c478bd9Sstevel@tonic-gate 		    HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK);
4777c478bd9Sstevel@tonic-gate 
4787c478bd9Sstevel@tonic-gate 		vmem_free(heap_arena, mapva, ptob(metapgs));
4797c478bd9Sstevel@tonic-gate 	}
4807c478bd9Sstevel@tonic-gate 
4817c478bd9Sstevel@tonic-gate 	hat_kpm_addmem_mseg_update(seg, nkpmpgs, kpm_pages_off);
4827c478bd9Sstevel@tonic-gate 
4837c478bd9Sstevel@tonic-gate 	memsegs_lock(1);
4847c478bd9Sstevel@tonic-gate 
4857c478bd9Sstevel@tonic-gate 	/*
4867c478bd9Sstevel@tonic-gate 	 * The new memseg is inserted at the beginning of the list.
4877c478bd9Sstevel@tonic-gate 	 * Not only does this save searching for the tail, but in the
4887c478bd9Sstevel@tonic-gate 	 * case of a re-used memseg, it solves the problem of what
48928e72544SJakub Jirsa 	 * happens if some process has still got a pointer to the
4907c478bd9Sstevel@tonic-gate 	 * memseg and follows the next pointer to continue traversing
4917c478bd9Sstevel@tonic-gate 	 * the memsegs list.
4927c478bd9Sstevel@tonic-gate 	 */
4937c478bd9Sstevel@tonic-gate 
4947c478bd9Sstevel@tonic-gate 	hat_kpm_addmem_mseg_insert(seg);
4957c478bd9Sstevel@tonic-gate 
4967c478bd9Sstevel@tonic-gate 	seg->next = memsegs;
4977c478bd9Sstevel@tonic-gate 	membar_producer();
4987c478bd9Sstevel@tonic-gate 
4997c478bd9Sstevel@tonic-gate 	hat_kpm_addmem_memsegs_update(seg);
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 	memsegs = seg;
5027c478bd9Sstevel@tonic-gate 
5037c478bd9Sstevel@tonic-gate 	build_pfn_hash();
5047c478bd9Sstevel@tonic-gate 
5057c478bd9Sstevel@tonic-gate 	total_pages += npgs;
5067c478bd9Sstevel@tonic-gate 
5077c478bd9Sstevel@tonic-gate 	/*
5087c478bd9Sstevel@tonic-gate 	 * Recalculate the paging parameters now total_pages has changed.
5097c478bd9Sstevel@tonic-gate 	 * This will also cause the clock hands to be reset before next use.
5107c478bd9Sstevel@tonic-gate 	 */
5117c478bd9Sstevel@tonic-gate 	setupclock(1);
5127c478bd9Sstevel@tonic-gate 
5137c478bd9Sstevel@tonic-gate 	memsegs_unlock(1);
5147c478bd9Sstevel@tonic-gate 
515ee88d2b9Skchow 	PLCNT_MODIFY_MAX(seg->pages_base, (long)npgs);
516ee88d2b9Skchow 
5177c478bd9Sstevel@tonic-gate 	/*
5187c478bd9Sstevel@tonic-gate 	 * Free the pages outside the lock to avoid locking loops.
5197c478bd9Sstevel@tonic-gate 	 */
5207c478bd9Sstevel@tonic-gate 	for (pp = seg->pages; pp < seg->epages; pp++) {
5217c478bd9Sstevel@tonic-gate 		page_free(pp, 1);
5227c478bd9Sstevel@tonic-gate 	}
5237c478bd9Sstevel@tonic-gate 
5247c478bd9Sstevel@tonic-gate 	/*
5257c478bd9Sstevel@tonic-gate 	 * Now that we've updated the appropriate memory lists we
5267c478bd9Sstevel@tonic-gate 	 * need to reset a number of globals, since we've increased memory.
5277c478bd9Sstevel@tonic-gate 	 * Several have already been updated for us as noted above. The
5287c478bd9Sstevel@tonic-gate 	 * globals we're interested in at this point are:
5297c478bd9Sstevel@tonic-gate 	 *   physmax - highest page frame number.
5307c478bd9Sstevel@tonic-gate 	 *   physinstalled - number of pages currently installed (done earlier)
5317c478bd9Sstevel@tonic-gate 	 *   maxmem - max free pages in the system
5327c478bd9Sstevel@tonic-gate 	 *   physmem - physical memory pages available
5337c478bd9Sstevel@tonic-gate 	 *   availrmem - real memory available
5347c478bd9Sstevel@tonic-gate 	 */
5357c478bd9Sstevel@tonic-gate 
5367c478bd9Sstevel@tonic-gate 	mutex_enter(&freemem_lock);
5377c478bd9Sstevel@tonic-gate 	maxmem += npgs;
5387c478bd9Sstevel@tonic-gate 	physmem += npgs;
5397c478bd9Sstevel@tonic-gate 	availrmem += npgs;
5407c478bd9Sstevel@tonic-gate 	availrmem_initial += npgs;
5417c478bd9Sstevel@tonic-gate 
5427c478bd9Sstevel@tonic-gate 	mutex_exit(&freemem_lock);
5437c478bd9Sstevel@tonic-gate 
5447c478bd9Sstevel@tonic-gate 	dump_resize();
5457c478bd9Sstevel@tonic-gate 
5467c478bd9Sstevel@tonic-gate 	page_freelist_coalesce_all(mnode);
5477c478bd9Sstevel@tonic-gate 
5487c478bd9Sstevel@tonic-gate 	kphysm_setup_post_add(npgs);
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: mem = %ldK "
5517c478bd9Sstevel@tonic-gate 	    "(0x%" PRIx64 ")\n",
5527c478bd9Sstevel@tonic-gate 	    physinstalled << (PAGESHIFT - 10),
5537c478bd9Sstevel@tonic-gate 	    (uint64_t)physinstalled << PAGESHIFT);
5547c478bd9Sstevel@tonic-gate 
5557c478bd9Sstevel@tonic-gate 	avmem = (uint64_t)freemem << PAGESHIFT;
5567c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: "
5577c478bd9Sstevel@tonic-gate 	    "avail mem = %" PRId64 "\n", avmem);
5587c478bd9Sstevel@tonic-gate 
5597c478bd9Sstevel@tonic-gate 	/*
5607c478bd9Sstevel@tonic-gate 	 * Update lgroup generation number on single lgroup systems
5617c478bd9Sstevel@tonic-gate 	 */
5627c478bd9Sstevel@tonic-gate 	if (nlgrps == 1)
5637c478bd9Sstevel@tonic-gate 		lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0);
5647c478bd9Sstevel@tonic-gate 
5653a634bfcSVikram Hegde 	/*
5663a634bfcSVikram Hegde 	 * Inform DDI of update
5673a634bfcSVikram Hegde 	 */
5683a634bfcSVikram Hegde 	ddi_mem_update((uint64_t)(pt_base) << PAGESHIFT,
5693a634bfcSVikram Hegde 	    (uint64_t)(tpgs) << PAGESHIFT);
5703a634bfcSVikram Hegde 
5717c478bd9Sstevel@tonic-gate 	delspan_unreserve(pt_base, tpgs);
5727c478bd9Sstevel@tonic-gate 
573a3114836SGerry Liu 	return (KPHYSM_OK);		/* Successfully added system memory */
5747c478bd9Sstevel@tonic-gate }
5757c478bd9Sstevel@tonic-gate 
5767c478bd9Sstevel@tonic-gate /*
5777c478bd9Sstevel@tonic-gate  * There are various error conditions in kphysm_add_memory_dynamic()
5787c478bd9Sstevel@tonic-gate  * which require a rollback of already changed global state.
5797c478bd9Sstevel@tonic-gate  */
5807c478bd9Sstevel@tonic-gate static void
kphysm_addmem_error_undospan(pfn_t pt_base,pgcnt_t tpgs)5817c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pfn_t pt_base, pgcnt_t tpgs)
5827c478bd9Sstevel@tonic-gate {
5837c478bd9Sstevel@tonic-gate 	int mlret;
5847c478bd9Sstevel@tonic-gate 
5857c478bd9Sstevel@tonic-gate 	/* Unreserve memory span. */
5867c478bd9Sstevel@tonic-gate 	memlist_write_lock();
5877c478bd9Sstevel@tonic-gate 
5887c478bd9Sstevel@tonic-gate 	mlret = memlist_delete_span(
5897c478bd9Sstevel@tonic-gate 	    (uint64_t)(pt_base) << PAGESHIFT,
5907c478bd9Sstevel@tonic-gate 	    (uint64_t)(tpgs) << PAGESHIFT, &phys_install);
5917c478bd9Sstevel@tonic-gate 
5927c478bd9Sstevel@tonic-gate 	ASSERT(mlret == MEML_SPANOP_OK);
5937c478bd9Sstevel@tonic-gate 	phys_install_has_changed();
5947c478bd9Sstevel@tonic-gate 	installed_top_size(phys_install, &physmax, &physinstalled);
5957c478bd9Sstevel@tonic-gate 
5967c478bd9Sstevel@tonic-gate 	memlist_write_unlock();
5977c478bd9Sstevel@tonic-gate 	delspan_unreserve(pt_base, tpgs);
5987c478bd9Sstevel@tonic-gate }
5997c478bd9Sstevel@tonic-gate 
6007c478bd9Sstevel@tonic-gate /*
6019853d9e8SJason Beloro  * Only return an available memseg of exactly the right size
6029853d9e8SJason Beloro  * if size is required.
6037c478bd9Sstevel@tonic-gate  * When the meta data area has it's own virtual address space
6047c478bd9Sstevel@tonic-gate  * we will need to manage this more carefully and do best fit
605da6c28aaSamw  * allocations, possibly splitting an available area.
6067c478bd9Sstevel@tonic-gate  */
6079853d9e8SJason Beloro struct memseg *
memseg_reuse(pgcnt_t metapgs)6087c478bd9Sstevel@tonic-gate memseg_reuse(pgcnt_t metapgs)
6097c478bd9Sstevel@tonic-gate {
6109853d9e8SJason Beloro 	int type;
6117c478bd9Sstevel@tonic-gate 	struct memseg **segpp, *seg;
6127c478bd9Sstevel@tonic-gate 
6137c478bd9Sstevel@tonic-gate 	mutex_enter(&memseg_lists_lock);
6147c478bd9Sstevel@tonic-gate 
6157c478bd9Sstevel@tonic-gate 	segpp = &memseg_va_avail;
6167c478bd9Sstevel@tonic-gate 	for (; (seg = *segpp) != NULL; segpp = &seg->lnext) {
6177c478bd9Sstevel@tonic-gate 		caddr_t end;
6187c478bd9Sstevel@tonic-gate 
6199853d9e8SJason Beloro 		/*
6209853d9e8SJason Beloro 		 * Make sure we are reusing the right segment type.
6219853d9e8SJason Beloro 		 */
6229853d9e8SJason Beloro 		type = metapgs ? MEMSEG_META_INCL : MEMSEG_META_ALLOC;
6239853d9e8SJason Beloro 
6249853d9e8SJason Beloro 		if ((seg->msegflags & (MEMSEG_META_INCL | MEMSEG_META_ALLOC))
6259853d9e8SJason Beloro 		    != type)
6269853d9e8SJason Beloro 			continue;
6279853d9e8SJason Beloro 
6287c478bd9Sstevel@tonic-gate 		if (kpm_enable)
6297c478bd9Sstevel@tonic-gate 			end = hat_kpm_mseg_reuse(seg);
6307c478bd9Sstevel@tonic-gate 		else
6317c478bd9Sstevel@tonic-gate 			end = (caddr_t)seg->epages;
6327c478bd9Sstevel@tonic-gate 
6339853d9e8SJason Beloro 		/*
6349853d9e8SJason Beloro 		 * Check for the right size if it is provided.
6359853d9e8SJason Beloro 		 */
6369853d9e8SJason Beloro 		if (!metapgs || btopr(end - (caddr_t)seg->pages) == metapgs) {
6377c478bd9Sstevel@tonic-gate 			*segpp = seg->lnext;
6387c478bd9Sstevel@tonic-gate 			seg->lnext = NULL;
6397c478bd9Sstevel@tonic-gate 			break;
6407c478bd9Sstevel@tonic-gate 		}
6417c478bd9Sstevel@tonic-gate 	}
6427c478bd9Sstevel@tonic-gate 	mutex_exit(&memseg_lists_lock);
6437c478bd9Sstevel@tonic-gate 
6447c478bd9Sstevel@tonic-gate 	return (seg);
6457c478bd9Sstevel@tonic-gate }
6467c478bd9Sstevel@tonic-gate 
6477c478bd9Sstevel@tonic-gate static uint_t handle_gen;
6487c478bd9Sstevel@tonic-gate 
6497c478bd9Sstevel@tonic-gate struct memdelspan {
6507c478bd9Sstevel@tonic-gate 	struct memdelspan *mds_next;
6517c478bd9Sstevel@tonic-gate 	pfn_t		mds_base;
6527c478bd9Sstevel@tonic-gate 	pgcnt_t		mds_npgs;
6537c478bd9Sstevel@tonic-gate 	uint_t		*mds_bitmap;
6547c478bd9Sstevel@tonic-gate 	uint_t		*mds_bitmap_retired;
6557c478bd9Sstevel@tonic-gate };
6567c478bd9Sstevel@tonic-gate 
6577c478bd9Sstevel@tonic-gate #define	NBPBMW		(sizeof (uint_t) * NBBY)
6587c478bd9Sstevel@tonic-gate #define	MDS_BITMAPBYTES(MDSP) \
6597c478bd9Sstevel@tonic-gate 	((((MDSP)->mds_npgs + NBPBMW - 1) / NBPBMW) * sizeof (uint_t))
6607c478bd9Sstevel@tonic-gate 
6617c478bd9Sstevel@tonic-gate struct transit_list {
6627c478bd9Sstevel@tonic-gate 	struct transit_list	*trl_next;
6637c478bd9Sstevel@tonic-gate 	struct memdelspan	*trl_spans;
6647c478bd9Sstevel@tonic-gate 	int			trl_collect;
6657c478bd9Sstevel@tonic-gate };
6667c478bd9Sstevel@tonic-gate 
6677c478bd9Sstevel@tonic-gate struct transit_list_head {
6687c478bd9Sstevel@tonic-gate 	kmutex_t		trh_lock;
6697c478bd9Sstevel@tonic-gate 	struct transit_list	*trh_head;
6707c478bd9Sstevel@tonic-gate };
6717c478bd9Sstevel@tonic-gate 
6727c478bd9Sstevel@tonic-gate static struct transit_list_head transit_list_head;
6737c478bd9Sstevel@tonic-gate 
6747c478bd9Sstevel@tonic-gate struct mem_handle;
6757c478bd9Sstevel@tonic-gate static void transit_list_collect(struct mem_handle *, int);
6767c478bd9Sstevel@tonic-gate static void transit_list_insert(struct transit_list *);
6777c478bd9Sstevel@tonic-gate static void transit_list_remove(struct transit_list *);
6787c478bd9Sstevel@tonic-gate 
6797c478bd9Sstevel@tonic-gate #ifdef DEBUG
6807c478bd9Sstevel@tonic-gate #define	MEM_DEL_STATS
6817c478bd9Sstevel@tonic-gate #endif /* DEBUG */
6827c478bd9Sstevel@tonic-gate 
6837c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
6847c478bd9Sstevel@tonic-gate static int mem_del_stat_print = 0;
6857c478bd9Sstevel@tonic-gate struct mem_del_stat {
6867c478bd9Sstevel@tonic-gate 	uint_t	nloop;
6877c478bd9Sstevel@tonic-gate 	uint_t	need_free;
6887c478bd9Sstevel@tonic-gate 	uint_t	free_loop;
6897c478bd9Sstevel@tonic-gate 	uint_t	free_low;
6907c478bd9Sstevel@tonic-gate 	uint_t	free_failed;
6917c478bd9Sstevel@tonic-gate 	uint_t	ncheck;
6927c478bd9Sstevel@tonic-gate 	uint_t	nopaget;
6937c478bd9Sstevel@tonic-gate 	uint_t	lockfail;
6947c478bd9Sstevel@tonic-gate 	uint_t	nfree;
6957c478bd9Sstevel@tonic-gate 	uint_t	nreloc;
6967c478bd9Sstevel@tonic-gate 	uint_t	nrelocfail;
6977c478bd9Sstevel@tonic-gate 	uint_t	already_done;
6987c478bd9Sstevel@tonic-gate 	uint_t	first_notfree;
6997c478bd9Sstevel@tonic-gate 	uint_t	npplocked;
7007c478bd9Sstevel@tonic-gate 	uint_t	nlockreloc;
7017c478bd9Sstevel@tonic-gate 	uint_t	nnorepl;
7027c478bd9Sstevel@tonic-gate 	uint_t	nmodreloc;
7037c478bd9Sstevel@tonic-gate 	uint_t	ndestroy;
7047c478bd9Sstevel@tonic-gate 	uint_t	nputpage;
7057c478bd9Sstevel@tonic-gate 	uint_t	nnoreclaim;
7067c478bd9Sstevel@tonic-gate 	uint_t	ndelay;
7077c478bd9Sstevel@tonic-gate 	uint_t	demotefail;
7087c478bd9Sstevel@tonic-gate 	uint64_t nticks_total;
7097c478bd9Sstevel@tonic-gate 	uint64_t nticks_pgrp;
7107c478bd9Sstevel@tonic-gate 	uint_t	retired;
7117c478bd9Sstevel@tonic-gate 	uint_t	toxic;
7127c478bd9Sstevel@tonic-gate 	uint_t	failing;
7137c478bd9Sstevel@tonic-gate 	uint_t	modtoxic;
7147c478bd9Sstevel@tonic-gate 	uint_t	npplkdtoxic;
7157c478bd9Sstevel@tonic-gate 	uint_t	gptlmodfail;
7167c478bd9Sstevel@tonic-gate 	uint_t	gptllckfail;
7177c478bd9Sstevel@tonic-gate };
7187c478bd9Sstevel@tonic-gate /*
7197c478bd9Sstevel@tonic-gate  * The stat values are only incremented in the delete thread
7207c478bd9Sstevel@tonic-gate  * so no locking or atomic required.
7217c478bd9Sstevel@tonic-gate  */
7227c478bd9Sstevel@tonic-gate #define	MDSTAT_INCR(MHP, FLD)	(MHP)->mh_delstat.FLD++
7237c478bd9Sstevel@tonic-gate #define	MDSTAT_TOTAL(MHP, ntck)	((MHP)->mh_delstat.nticks_total += (ntck))
7247c478bd9Sstevel@tonic-gate #define	MDSTAT_PGRP(MHP, ntck)	((MHP)->mh_delstat.nticks_pgrp += (ntck))
7257c478bd9Sstevel@tonic-gate static void mem_del_stat_print_func(struct mem_handle *);
7267c478bd9Sstevel@tonic-gate #define	MDSTAT_PRINT(MHP)	mem_del_stat_print_func((MHP))
7277c478bd9Sstevel@tonic-gate #else /* MEM_DEL_STATS */
7287c478bd9Sstevel@tonic-gate #define	MDSTAT_INCR(MHP, FLD)
7297c478bd9Sstevel@tonic-gate #define	MDSTAT_TOTAL(MHP, ntck)
7307c478bd9Sstevel@tonic-gate #define	MDSTAT_PGRP(MHP, ntck)
7317c478bd9Sstevel@tonic-gate #define	MDSTAT_PRINT(MHP)
7327c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
7337c478bd9Sstevel@tonic-gate 
7347c478bd9Sstevel@tonic-gate typedef enum mhnd_state {MHND_FREE = 0, MHND_INIT, MHND_STARTING,
7357c478bd9Sstevel@tonic-gate 	MHND_RUNNING, MHND_DONE, MHND_RELEASE} mhnd_state_t;
7367c478bd9Sstevel@tonic-gate 
7377c478bd9Sstevel@tonic-gate /*
7387c478bd9Sstevel@tonic-gate  * mh_mutex must be taken to examine or change mh_exthandle and mh_state.
7397c478bd9Sstevel@tonic-gate  * The mutex may not be required for other fields, dependent on mh_state.
7407c478bd9Sstevel@tonic-gate  */
7417c478bd9Sstevel@tonic-gate struct mem_handle {
7427c478bd9Sstevel@tonic-gate 	kmutex_t	mh_mutex;
7437c478bd9Sstevel@tonic-gate 	struct mem_handle *mh_next;
7447c478bd9Sstevel@tonic-gate 	memhandle_t	mh_exthandle;
7457c478bd9Sstevel@tonic-gate 	mhnd_state_t	mh_state;
7467c478bd9Sstevel@tonic-gate 	struct transit_list mh_transit;
7477c478bd9Sstevel@tonic-gate 	pgcnt_t		mh_phys_pages;
7487c478bd9Sstevel@tonic-gate 	pgcnt_t		mh_vm_pages;
7497c478bd9Sstevel@tonic-gate 	pgcnt_t		mh_hold_todo;
7507c478bd9Sstevel@tonic-gate 	void		(*mh_delete_complete)(void *, int error);
7517c478bd9Sstevel@tonic-gate 	void		*mh_delete_complete_arg;
7527c478bd9Sstevel@tonic-gate 	volatile uint_t mh_cancel;
7537c478bd9Sstevel@tonic-gate 	volatile uint_t mh_dr_aio_cleanup_cancel;
7547c478bd9Sstevel@tonic-gate 	volatile uint_t mh_aio_cleanup_done;
7557c478bd9Sstevel@tonic-gate 	kcondvar_t	mh_cv;
7567c478bd9Sstevel@tonic-gate 	kthread_id_t	mh_thread_id;
7577c478bd9Sstevel@tonic-gate 	page_t		*mh_deleted;	/* link through p_next */
7587c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
7597c478bd9Sstevel@tonic-gate 	struct mem_del_stat mh_delstat;
7607c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
7617c478bd9Sstevel@tonic-gate };
7627c478bd9Sstevel@tonic-gate 
7637c478bd9Sstevel@tonic-gate static struct mem_handle *mem_handle_head;
7647c478bd9Sstevel@tonic-gate static kmutex_t mem_handle_list_mutex;
7657c478bd9Sstevel@tonic-gate 
7667c478bd9Sstevel@tonic-gate static struct mem_handle *
kphysm_allocate_mem_handle()7677c478bd9Sstevel@tonic-gate kphysm_allocate_mem_handle()
7687c478bd9Sstevel@tonic-gate {
7697c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
7707c478bd9Sstevel@tonic-gate 
7717c478bd9Sstevel@tonic-gate 	mhp = kmem_zalloc(sizeof (struct mem_handle), KM_SLEEP);
7727c478bd9Sstevel@tonic-gate 	mutex_init(&mhp->mh_mutex, NULL, MUTEX_DEFAULT, NULL);
7737c478bd9Sstevel@tonic-gate 	mutex_enter(&mem_handle_list_mutex);
7747c478bd9Sstevel@tonic-gate 	mutex_enter(&mhp->mh_mutex);
7757c478bd9Sstevel@tonic-gate 	/* handle_gen is protected by list mutex. */
7769f1a1f17Sdmick 	mhp->mh_exthandle = (memhandle_t)(uintptr_t)(++handle_gen);
7777c478bd9Sstevel@tonic-gate 	mhp->mh_next = mem_handle_head;
7787c478bd9Sstevel@tonic-gate 	mem_handle_head = mhp;
7797c478bd9Sstevel@tonic-gate 	mutex_exit(&mem_handle_list_mutex);
7807c478bd9Sstevel@tonic-gate 
7817c478bd9Sstevel@tonic-gate 	return (mhp);
7827c478bd9Sstevel@tonic-gate }
7837c478bd9Sstevel@tonic-gate 
7847c478bd9Sstevel@tonic-gate static void
kphysm_free_mem_handle(struct mem_handle * mhp)7857c478bd9Sstevel@tonic-gate kphysm_free_mem_handle(struct mem_handle *mhp)
7867c478bd9Sstevel@tonic-gate {
7877c478bd9Sstevel@tonic-gate 	struct mem_handle **mhpp;
7887c478bd9Sstevel@tonic-gate 
7897c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&mhp->mh_mutex));
7907c478bd9Sstevel@tonic-gate 	ASSERT(mhp->mh_state == MHND_FREE);
7917c478bd9Sstevel@tonic-gate 	/*
7927c478bd9Sstevel@tonic-gate 	 * Exit the mutex to preserve locking order. This is OK
7937c478bd9Sstevel@tonic-gate 	 * here as once in the FREE state, the handle cannot
7947c478bd9Sstevel@tonic-gate 	 * be found by a lookup.
7957c478bd9Sstevel@tonic-gate 	 */
7967c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
7977c478bd9Sstevel@tonic-gate 
7987c478bd9Sstevel@tonic-gate 	mutex_enter(&mem_handle_list_mutex);
7997c478bd9Sstevel@tonic-gate 	mhpp = &mem_handle_head;
8007c478bd9Sstevel@tonic-gate 	while (*mhpp != NULL && *mhpp != mhp)
8017c478bd9Sstevel@tonic-gate 		mhpp = &(*mhpp)->mh_next;
8027c478bd9Sstevel@tonic-gate 	ASSERT(*mhpp == mhp);
8037c478bd9Sstevel@tonic-gate 	/*
8047c478bd9Sstevel@tonic-gate 	 * No need to lock the handle (mh_mutex) as only
8057c478bd9Sstevel@tonic-gate 	 * mh_next changing and this is the only thread that
8067c478bd9Sstevel@tonic-gate 	 * can be referncing mhp.
8077c478bd9Sstevel@tonic-gate 	 */
8087c478bd9Sstevel@tonic-gate 	*mhpp = mhp->mh_next;
8097c478bd9Sstevel@tonic-gate 	mutex_exit(&mem_handle_list_mutex);
8107c478bd9Sstevel@tonic-gate 
8117c478bd9Sstevel@tonic-gate 	mutex_destroy(&mhp->mh_mutex);
8127c478bd9Sstevel@tonic-gate 	kmem_free(mhp, sizeof (struct mem_handle));
8137c478bd9Sstevel@tonic-gate }
8147c478bd9Sstevel@tonic-gate 
8157c478bd9Sstevel@tonic-gate /*
8167c478bd9Sstevel@tonic-gate  * This function finds the internal mem_handle corresponding to an
8177c478bd9Sstevel@tonic-gate  * external handle and returns it with the mh_mutex held.
8187c478bd9Sstevel@tonic-gate  */
8197c478bd9Sstevel@tonic-gate static struct mem_handle *
kphysm_lookup_mem_handle(memhandle_t handle)8207c478bd9Sstevel@tonic-gate kphysm_lookup_mem_handle(memhandle_t handle)
8217c478bd9Sstevel@tonic-gate {
8227c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
8237c478bd9Sstevel@tonic-gate 
8247c478bd9Sstevel@tonic-gate 	mutex_enter(&mem_handle_list_mutex);
8257c478bd9Sstevel@tonic-gate 	for (mhp = mem_handle_head; mhp != NULL; mhp = mhp->mh_next) {
8267c478bd9Sstevel@tonic-gate 		if (mhp->mh_exthandle == handle) {
8277c478bd9Sstevel@tonic-gate 			mutex_enter(&mhp->mh_mutex);
8287c478bd9Sstevel@tonic-gate 			/*
8297c478bd9Sstevel@tonic-gate 			 * The state of the handle could have been changed
8307c478bd9Sstevel@tonic-gate 			 * by kphysm_del_release() while waiting for mh_mutex.
8317c478bd9Sstevel@tonic-gate 			 */
8327c478bd9Sstevel@tonic-gate 			if (mhp->mh_state == MHND_FREE) {
8337c478bd9Sstevel@tonic-gate 				mutex_exit(&mhp->mh_mutex);
8347c478bd9Sstevel@tonic-gate 				continue;
8357c478bd9Sstevel@tonic-gate 			}
8367c478bd9Sstevel@tonic-gate 			break;
8377c478bd9Sstevel@tonic-gate 		}
8387c478bd9Sstevel@tonic-gate 	}
8397c478bd9Sstevel@tonic-gate 	mutex_exit(&mem_handle_list_mutex);
8407c478bd9Sstevel@tonic-gate 	return (mhp);
8417c478bd9Sstevel@tonic-gate }
8427c478bd9Sstevel@tonic-gate 
8437c478bd9Sstevel@tonic-gate int
kphysm_del_gethandle(memhandle_t * xmhp)8447c478bd9Sstevel@tonic-gate kphysm_del_gethandle(memhandle_t *xmhp)
8457c478bd9Sstevel@tonic-gate {
8467c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
8477c478bd9Sstevel@tonic-gate 
8487c478bd9Sstevel@tonic-gate 	mhp = kphysm_allocate_mem_handle();
8497c478bd9Sstevel@tonic-gate 	/*
8507c478bd9Sstevel@tonic-gate 	 * The handle is allocated using KM_SLEEP, so cannot fail.
8517c478bd9Sstevel@tonic-gate 	 * If the implementation is changed, the correct error to return
8527c478bd9Sstevel@tonic-gate 	 * here would be KPHYSM_ENOHANDLES.
8537c478bd9Sstevel@tonic-gate 	 */
8547c478bd9Sstevel@tonic-gate 	ASSERT(mhp->mh_state == MHND_FREE);
8557c478bd9Sstevel@tonic-gate 	mhp->mh_state = MHND_INIT;
8567c478bd9Sstevel@tonic-gate 	*xmhp = mhp->mh_exthandle;
8577c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
8587c478bd9Sstevel@tonic-gate 	return (KPHYSM_OK);
8597c478bd9Sstevel@tonic-gate }
8607c478bd9Sstevel@tonic-gate 
8617c478bd9Sstevel@tonic-gate static int
overlapping(pfn_t b1,pgcnt_t l1,pfn_t b2,pgcnt_t l2)8627c478bd9Sstevel@tonic-gate overlapping(pfn_t b1, pgcnt_t l1, pfn_t b2, pgcnt_t l2)
8637c478bd9Sstevel@tonic-gate {
8647c478bd9Sstevel@tonic-gate 	pfn_t e1, e2;
8657c478bd9Sstevel@tonic-gate 
8667c478bd9Sstevel@tonic-gate 	e1 = b1 + l1;
8677c478bd9Sstevel@tonic-gate 	e2 = b2 + l2;
8687c478bd9Sstevel@tonic-gate 
8697c478bd9Sstevel@tonic-gate 	return (!(b2 >= e1 || b1 >= e2));
8707c478bd9Sstevel@tonic-gate }
8717c478bd9Sstevel@tonic-gate 
8727c478bd9Sstevel@tonic-gate static int can_remove_pgs(pgcnt_t);
8737c478bd9Sstevel@tonic-gate 
8747c478bd9Sstevel@tonic-gate static struct memdelspan *
span_to_install(pfn_t base,pgcnt_t npgs)8757c478bd9Sstevel@tonic-gate span_to_install(pfn_t base, pgcnt_t npgs)
8767c478bd9Sstevel@tonic-gate {
8777c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp;
8787c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp_new;
8797c478bd9Sstevel@tonic-gate 	uint64_t address, size, thislen;
8807c478bd9Sstevel@tonic-gate 	struct memlist *mlp;
8817c478bd9Sstevel@tonic-gate 
8827c478bd9Sstevel@tonic-gate 	mdsp_new = NULL;
8837c478bd9Sstevel@tonic-gate 
8847c478bd9Sstevel@tonic-gate 	address = (uint64_t)base << PAGESHIFT;
8857c478bd9Sstevel@tonic-gate 	size = (uint64_t)npgs << PAGESHIFT;
8867c478bd9Sstevel@tonic-gate 	while (size != 0) {
8877c478bd9Sstevel@tonic-gate 		memlist_read_lock();
88856f33205SJonathan Adams 		for (mlp = phys_install; mlp != NULL; mlp = mlp->ml_next) {
88956f33205SJonathan Adams 			if (address >= (mlp->ml_address + mlp->ml_size))
8907c478bd9Sstevel@tonic-gate 				continue;
89156f33205SJonathan Adams 			if ((address + size) > mlp->ml_address)
8927c478bd9Sstevel@tonic-gate 				break;
8937c478bd9Sstevel@tonic-gate 		}
8947c478bd9Sstevel@tonic-gate 		if (mlp == NULL) {
8957c478bd9Sstevel@tonic-gate 			address += size;
8967c478bd9Sstevel@tonic-gate 			size = 0;
8977c478bd9Sstevel@tonic-gate 			thislen = 0;
8987c478bd9Sstevel@tonic-gate 		} else {
89956f33205SJonathan Adams 			if (address < mlp->ml_address) {
90056f33205SJonathan Adams 				size -= (mlp->ml_address - address);
90156f33205SJonathan Adams 				address = mlp->ml_address;
9027c478bd9Sstevel@tonic-gate 			}
90356f33205SJonathan Adams 			ASSERT(address >= mlp->ml_address);
90456f33205SJonathan Adams 			if ((address + size) >
90556f33205SJonathan Adams 			    (mlp->ml_address + mlp->ml_size)) {
90656f33205SJonathan Adams 				thislen =
90756f33205SJonathan Adams 				    mlp->ml_size - (address - mlp->ml_address);
9087c478bd9Sstevel@tonic-gate 			} else {
9097c478bd9Sstevel@tonic-gate 				thislen = size;
9107c478bd9Sstevel@tonic-gate 			}
9117c478bd9Sstevel@tonic-gate 		}
9127c478bd9Sstevel@tonic-gate 		memlist_read_unlock();
9137c478bd9Sstevel@tonic-gate 		/* TODO: phys_install could change now */
9147c478bd9Sstevel@tonic-gate 		if (thislen == 0)
9157c478bd9Sstevel@tonic-gate 			continue;
9167c478bd9Sstevel@tonic-gate 		mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP);
9177c478bd9Sstevel@tonic-gate 		mdsp->mds_base = btop(address);
9187c478bd9Sstevel@tonic-gate 		mdsp->mds_npgs = btop(thislen);
9197c478bd9Sstevel@tonic-gate 		mdsp->mds_next = mdsp_new;
9207c478bd9Sstevel@tonic-gate 		mdsp_new = mdsp;
9217c478bd9Sstevel@tonic-gate 		address += thislen;
9227c478bd9Sstevel@tonic-gate 		size -= thislen;
9237c478bd9Sstevel@tonic-gate 	}
9247c478bd9Sstevel@tonic-gate 	return (mdsp_new);
9257c478bd9Sstevel@tonic-gate }
9267c478bd9Sstevel@tonic-gate 
9277c478bd9Sstevel@tonic-gate static void
free_delspans(struct memdelspan * mdsp)9287c478bd9Sstevel@tonic-gate free_delspans(struct memdelspan *mdsp)
9297c478bd9Sstevel@tonic-gate {
9307c478bd9Sstevel@tonic-gate 	struct memdelspan *amdsp;
9317c478bd9Sstevel@tonic-gate 
9327c478bd9Sstevel@tonic-gate 	while ((amdsp = mdsp) != NULL) {
9337c478bd9Sstevel@tonic-gate 		mdsp = amdsp->mds_next;
9347c478bd9Sstevel@tonic-gate 		kmem_free(amdsp, sizeof (struct memdelspan));
9357c478bd9Sstevel@tonic-gate 	}
9367c478bd9Sstevel@tonic-gate }
9377c478bd9Sstevel@tonic-gate 
9387c478bd9Sstevel@tonic-gate /*
9397c478bd9Sstevel@tonic-gate  * Concatenate lists. No list ordering is required.
9407c478bd9Sstevel@tonic-gate  */
9417c478bd9Sstevel@tonic-gate 
9427c478bd9Sstevel@tonic-gate static void
delspan_concat(struct memdelspan ** mdspp,struct memdelspan * mdsp)9437c478bd9Sstevel@tonic-gate delspan_concat(struct memdelspan **mdspp, struct memdelspan *mdsp)
9447c478bd9Sstevel@tonic-gate {
9457c478bd9Sstevel@tonic-gate 	while (*mdspp != NULL)
9467c478bd9Sstevel@tonic-gate 		mdspp = &(*mdspp)->mds_next;
9477c478bd9Sstevel@tonic-gate 
9487c478bd9Sstevel@tonic-gate 	*mdspp = mdsp;
9497c478bd9Sstevel@tonic-gate }
9507c478bd9Sstevel@tonic-gate 
9517c478bd9Sstevel@tonic-gate /*
9527c478bd9Sstevel@tonic-gate  * Given a new list of delspans, check there is no overlap with
9537c478bd9Sstevel@tonic-gate  * all existing span activity (add or delete) and then concatenate
9547c478bd9Sstevel@tonic-gate  * the new spans to the given list.
9557c478bd9Sstevel@tonic-gate  * Return 1 for OK, 0 if overlapping.
9567c478bd9Sstevel@tonic-gate  */
9577c478bd9Sstevel@tonic-gate static int
delspan_insert(struct transit_list * my_tlp,struct memdelspan * mdsp_new)9587c478bd9Sstevel@tonic-gate delspan_insert(
9597c478bd9Sstevel@tonic-gate 	struct transit_list *my_tlp,
9607c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp_new)
9617c478bd9Sstevel@tonic-gate {
9627c478bd9Sstevel@tonic-gate 	struct transit_list_head *trh;
9637c478bd9Sstevel@tonic-gate 	struct transit_list *tlp;
9647c478bd9Sstevel@tonic-gate 	int ret;
9657c478bd9Sstevel@tonic-gate 
9667c478bd9Sstevel@tonic-gate 	trh = &transit_list_head;
9677c478bd9Sstevel@tonic-gate 
9687c478bd9Sstevel@tonic-gate 	ASSERT(my_tlp != NULL);
9697c478bd9Sstevel@tonic-gate 	ASSERT(mdsp_new != NULL);
9707c478bd9Sstevel@tonic-gate 
9717c478bd9Sstevel@tonic-gate 	ret = 1;
9727c478bd9Sstevel@tonic-gate 	mutex_enter(&trh->trh_lock);
9737c478bd9Sstevel@tonic-gate 	/* ASSERT(my_tlp->trl_spans == NULL || tlp_in_list(trh, my_tlp)); */
9747c478bd9Sstevel@tonic-gate 	for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) {
9757c478bd9Sstevel@tonic-gate 		struct memdelspan *mdsp;
9767c478bd9Sstevel@tonic-gate 
9777c478bd9Sstevel@tonic-gate 		for (mdsp = tlp->trl_spans; mdsp != NULL;
9787c478bd9Sstevel@tonic-gate 		    mdsp = mdsp->mds_next) {
9797c478bd9Sstevel@tonic-gate 			struct memdelspan *nmdsp;
9807c478bd9Sstevel@tonic-gate 
9817c478bd9Sstevel@tonic-gate 			for (nmdsp = mdsp_new; nmdsp != NULL;
9827c478bd9Sstevel@tonic-gate 			    nmdsp = nmdsp->mds_next) {
9837c478bd9Sstevel@tonic-gate 				if (overlapping(mdsp->mds_base, mdsp->mds_npgs,
9847c478bd9Sstevel@tonic-gate 				    nmdsp->mds_base, nmdsp->mds_npgs)) {
9857c478bd9Sstevel@tonic-gate 					ret = 0;
9867c478bd9Sstevel@tonic-gate 					goto done;
9877c478bd9Sstevel@tonic-gate 				}
9887c478bd9Sstevel@tonic-gate 			}
9897c478bd9Sstevel@tonic-gate 		}
9907c478bd9Sstevel@tonic-gate 	}
9917c478bd9Sstevel@tonic-gate done:
9927c478bd9Sstevel@tonic-gate 	if (ret != 0) {
9937c478bd9Sstevel@tonic-gate 		if (my_tlp->trl_spans == NULL)
9947c478bd9Sstevel@tonic-gate 			transit_list_insert(my_tlp);
9957c478bd9Sstevel@tonic-gate 		delspan_concat(&my_tlp->trl_spans, mdsp_new);
9967c478bd9Sstevel@tonic-gate 	}
9977c478bd9Sstevel@tonic-gate 	mutex_exit(&trh->trh_lock);
9987c478bd9Sstevel@tonic-gate 	return (ret);
9997c478bd9Sstevel@tonic-gate }
10007c478bd9Sstevel@tonic-gate 
10017c478bd9Sstevel@tonic-gate static void
delspan_remove(struct transit_list * my_tlp,pfn_t base,pgcnt_t npgs)10027c478bd9Sstevel@tonic-gate delspan_remove(
10037c478bd9Sstevel@tonic-gate 	struct transit_list *my_tlp,
10047c478bd9Sstevel@tonic-gate 	pfn_t base,
10057c478bd9Sstevel@tonic-gate 	pgcnt_t npgs)
10067c478bd9Sstevel@tonic-gate {
10077c478bd9Sstevel@tonic-gate 	struct transit_list_head *trh;
10087c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp;
10097c478bd9Sstevel@tonic-gate 
10107c478bd9Sstevel@tonic-gate 	trh = &transit_list_head;
10117c478bd9Sstevel@tonic-gate 
10127c478bd9Sstevel@tonic-gate 	ASSERT(my_tlp != NULL);
10137c478bd9Sstevel@tonic-gate 
10147c478bd9Sstevel@tonic-gate 	mutex_enter(&trh->trh_lock);
10157c478bd9Sstevel@tonic-gate 	if ((mdsp = my_tlp->trl_spans) != NULL) {
10167c478bd9Sstevel@tonic-gate 		if (npgs == 0) {
10177c478bd9Sstevel@tonic-gate 			my_tlp->trl_spans = NULL;
10187c478bd9Sstevel@tonic-gate 			free_delspans(mdsp);
10197c478bd9Sstevel@tonic-gate 			transit_list_remove(my_tlp);
10207c478bd9Sstevel@tonic-gate 		} else {
10217c478bd9Sstevel@tonic-gate 			struct memdelspan **prv;
10227c478bd9Sstevel@tonic-gate 
10237c478bd9Sstevel@tonic-gate 			prv = &my_tlp->trl_spans;
10247c478bd9Sstevel@tonic-gate 			while (mdsp != NULL) {
10257c478bd9Sstevel@tonic-gate 				pfn_t p_end;
10267c478bd9Sstevel@tonic-gate 
10277c478bd9Sstevel@tonic-gate 				p_end = mdsp->mds_base + mdsp->mds_npgs;
10287c478bd9Sstevel@tonic-gate 				if (mdsp->mds_base >= base &&
10297c478bd9Sstevel@tonic-gate 				    p_end <= (base + npgs)) {
10307c478bd9Sstevel@tonic-gate 					*prv = mdsp->mds_next;
10317c478bd9Sstevel@tonic-gate 					mdsp->mds_next = NULL;
10327c478bd9Sstevel@tonic-gate 					free_delspans(mdsp);
10337c478bd9Sstevel@tonic-gate 				} else {
10347c478bd9Sstevel@tonic-gate 					prv = &mdsp->mds_next;
10357c478bd9Sstevel@tonic-gate 				}
10367c478bd9Sstevel@tonic-gate 				mdsp = *prv;
10377c478bd9Sstevel@tonic-gate 			}
10387c478bd9Sstevel@tonic-gate 			if (my_tlp->trl_spans == NULL)
10397c478bd9Sstevel@tonic-gate 				transit_list_remove(my_tlp);
10407c478bd9Sstevel@tonic-gate 		}
10417c478bd9Sstevel@tonic-gate 	}
10427c478bd9Sstevel@tonic-gate 	mutex_exit(&trh->trh_lock);
10437c478bd9Sstevel@tonic-gate }
10447c478bd9Sstevel@tonic-gate 
10457c478bd9Sstevel@tonic-gate /*
10467c478bd9Sstevel@tonic-gate  * Reserve interface for add to stop delete before add finished.
10477c478bd9Sstevel@tonic-gate  * This list is only accessed through the delspan_insert/remove
10487c478bd9Sstevel@tonic-gate  * functions and so is fully protected by the mutex in struct transit_list.
10497c478bd9Sstevel@tonic-gate  */
10507c478bd9Sstevel@tonic-gate 
10517c478bd9Sstevel@tonic-gate static struct transit_list reserve_transit;
10527c478bd9Sstevel@tonic-gate 
10537c478bd9Sstevel@tonic-gate static int
delspan_reserve(pfn_t base,pgcnt_t npgs)10547c478bd9Sstevel@tonic-gate delspan_reserve(pfn_t base, pgcnt_t npgs)
10557c478bd9Sstevel@tonic-gate {
10567c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp;
10577c478bd9Sstevel@tonic-gate 	int ret;
10587c478bd9Sstevel@tonic-gate 
10597c478bd9Sstevel@tonic-gate 	mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP);
10607c478bd9Sstevel@tonic-gate 	mdsp->mds_base = base;
10617c478bd9Sstevel@tonic-gate 	mdsp->mds_npgs = npgs;
10627c478bd9Sstevel@tonic-gate 	if ((ret = delspan_insert(&reserve_transit, mdsp)) == 0) {
10637c478bd9Sstevel@tonic-gate 		free_delspans(mdsp);
10647c478bd9Sstevel@tonic-gate 	}
10657c478bd9Sstevel@tonic-gate 	return (ret);
10667c478bd9Sstevel@tonic-gate }
10677c478bd9Sstevel@tonic-gate 
10687c478bd9Sstevel@tonic-gate static void
delspan_unreserve(pfn_t base,pgcnt_t npgs)10697c478bd9Sstevel@tonic-gate delspan_unreserve(pfn_t base, pgcnt_t npgs)
10707c478bd9Sstevel@tonic-gate {
10717c478bd9Sstevel@tonic-gate 	delspan_remove(&reserve_transit, base, npgs);
10727c478bd9Sstevel@tonic-gate }
10737c478bd9Sstevel@tonic-gate 
10747c478bd9Sstevel@tonic-gate /*
10757c478bd9Sstevel@tonic-gate  * Return whether memseg was created by kphysm_add_memory_dynamic().
10767c478bd9Sstevel@tonic-gate  */
10777c478bd9Sstevel@tonic-gate static int
memseg_is_dynamic(struct memseg * seg)10789853d9e8SJason Beloro memseg_is_dynamic(struct memseg *seg)
10797c478bd9Sstevel@tonic-gate {
10809853d9e8SJason Beloro 	return (seg->msegflags & MEMSEG_DYNAMIC);
10817c478bd9Sstevel@tonic-gate }
10827c478bd9Sstevel@tonic-gate 
10837c478bd9Sstevel@tonic-gate int
kphysm_del_span(memhandle_t handle,pfn_t base,pgcnt_t npgs)10847c478bd9Sstevel@tonic-gate kphysm_del_span(
10857c478bd9Sstevel@tonic-gate 	memhandle_t handle,
10867c478bd9Sstevel@tonic-gate 	pfn_t base,
10877c478bd9Sstevel@tonic-gate 	pgcnt_t npgs)
10887c478bd9Sstevel@tonic-gate {
10897c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
10907c478bd9Sstevel@tonic-gate 	struct memseg *seg;
10917c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp;
10927c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp_new;
10937c478bd9Sstevel@tonic-gate 	pgcnt_t phys_pages, vm_pages;
10947c478bd9Sstevel@tonic-gate 	pfn_t p_end;
10957c478bd9Sstevel@tonic-gate 	page_t *pp;
10967c478bd9Sstevel@tonic-gate 	int ret;
10977c478bd9Sstevel@tonic-gate 
10987c478bd9Sstevel@tonic-gate 	mhp = kphysm_lookup_mem_handle(handle);
10997c478bd9Sstevel@tonic-gate 	if (mhp == NULL) {
11007c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
11017c478bd9Sstevel@tonic-gate 	}
11027c478bd9Sstevel@tonic-gate 	if (mhp->mh_state != MHND_INIT) {
11037c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
11047c478bd9Sstevel@tonic-gate 		return (KPHYSM_ESEQUENCE);
11057c478bd9Sstevel@tonic-gate 	}
11067c478bd9Sstevel@tonic-gate 
11077c478bd9Sstevel@tonic-gate 	/*
11087c478bd9Sstevel@tonic-gate 	 * Intersect the span with the installed memory list (phys_install).
11097c478bd9Sstevel@tonic-gate 	 */
11107c478bd9Sstevel@tonic-gate 	mdsp_new = span_to_install(base, npgs);
11117c478bd9Sstevel@tonic-gate 	if (mdsp_new == NULL) {
11127c478bd9Sstevel@tonic-gate 		/*
11137c478bd9Sstevel@tonic-gate 		 * No physical memory in this range. Is this an
11147c478bd9Sstevel@tonic-gate 		 * error? If an attempt to start the delete is made
11157c478bd9Sstevel@tonic-gate 		 * for OK returns from del_span such as this, start will
11167c478bd9Sstevel@tonic-gate 		 * return an error.
11177c478bd9Sstevel@tonic-gate 		 * Could return KPHYSM_ENOWORK.
11187c478bd9Sstevel@tonic-gate 		 */
11197c478bd9Sstevel@tonic-gate 		/*
11207c478bd9Sstevel@tonic-gate 		 * It is assumed that there are no error returns
11217c478bd9Sstevel@tonic-gate 		 * from span_to_install() due to kmem_alloc failure.
11227c478bd9Sstevel@tonic-gate 		 */
11237c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
11247c478bd9Sstevel@tonic-gate 		return (KPHYSM_OK);
11257c478bd9Sstevel@tonic-gate 	}
11267c478bd9Sstevel@tonic-gate 	/*
11277c478bd9Sstevel@tonic-gate 	 * Does this span overlap an existing span?
11287c478bd9Sstevel@tonic-gate 	 */
11297c478bd9Sstevel@tonic-gate 	if (delspan_insert(&mhp->mh_transit, mdsp_new) == 0) {
11307c478bd9Sstevel@tonic-gate 		/*
11317c478bd9Sstevel@tonic-gate 		 * Differentiate between already on list for this handle
11327c478bd9Sstevel@tonic-gate 		 * (KPHYSM_EDUP) and busy elsewhere (KPHYSM_EBUSY).
11337c478bd9Sstevel@tonic-gate 		 */
11347c478bd9Sstevel@tonic-gate 		ret = KPHYSM_EBUSY;
11357c478bd9Sstevel@tonic-gate 		for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
11367c478bd9Sstevel@tonic-gate 		    mdsp = mdsp->mds_next) {
11377c478bd9Sstevel@tonic-gate 			if (overlapping(mdsp->mds_base, mdsp->mds_npgs,
11387c478bd9Sstevel@tonic-gate 			    base, npgs)) {
11397c478bd9Sstevel@tonic-gate 				ret = KPHYSM_EDUP;
11407c478bd9Sstevel@tonic-gate 				break;
11417c478bd9Sstevel@tonic-gate 			}
11427c478bd9Sstevel@tonic-gate 		}
11437c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
11447c478bd9Sstevel@tonic-gate 		free_delspans(mdsp_new);
11457c478bd9Sstevel@tonic-gate 		return (ret);
11467c478bd9Sstevel@tonic-gate 	}
11477c478bd9Sstevel@tonic-gate 	/*
11487c478bd9Sstevel@tonic-gate 	 * At this point the spans in mdsp_new have been inserted into the
11497c478bd9Sstevel@tonic-gate 	 * list of spans for this handle and thereby to the global list of
11507c478bd9Sstevel@tonic-gate 	 * spans being processed. Each of these spans must now be checked
11517c478bd9Sstevel@tonic-gate 	 * for relocatability. As a side-effect segments in the memseg list
11527c478bd9Sstevel@tonic-gate 	 * may be split.
11537c478bd9Sstevel@tonic-gate 	 *
11547c478bd9Sstevel@tonic-gate 	 * Note that mdsp_new can no longer be used as it is now part of
11557c478bd9Sstevel@tonic-gate 	 * a larger list. Select elements of this larger list based
11567c478bd9Sstevel@tonic-gate 	 * on base and npgs.
11577c478bd9Sstevel@tonic-gate 	 */
11587c478bd9Sstevel@tonic-gate restart:
11597c478bd9Sstevel@tonic-gate 	phys_pages = 0;
11607c478bd9Sstevel@tonic-gate 	vm_pages = 0;
11617c478bd9Sstevel@tonic-gate 	ret = KPHYSM_OK;
11627c478bd9Sstevel@tonic-gate 	for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
11637c478bd9Sstevel@tonic-gate 	    mdsp = mdsp->mds_next) {
11647c478bd9Sstevel@tonic-gate 		pgcnt_t pages_checked;
11657c478bd9Sstevel@tonic-gate 
11667c478bd9Sstevel@tonic-gate 		if (!overlapping(mdsp->mds_base, mdsp->mds_npgs, base, npgs)) {
11677c478bd9Sstevel@tonic-gate 			continue;
11687c478bd9Sstevel@tonic-gate 		}
11697c478bd9Sstevel@tonic-gate 		p_end = mdsp->mds_base + mdsp->mds_npgs;
11707c478bd9Sstevel@tonic-gate 		/*
11717c478bd9Sstevel@tonic-gate 		 * The pages_checked count is a hack. All pages should be
11727c478bd9Sstevel@tonic-gate 		 * checked for relocatability. Those not covered by memsegs
11737c478bd9Sstevel@tonic-gate 		 * should be tested with arch_kphysm_del_span_ok().
11747c478bd9Sstevel@tonic-gate 		 */
11757c478bd9Sstevel@tonic-gate 		pages_checked = 0;
11767c478bd9Sstevel@tonic-gate 		for (seg = memsegs; seg; seg = seg->next) {
11777c478bd9Sstevel@tonic-gate 			pfn_t mseg_start;
11787c478bd9Sstevel@tonic-gate 
11797c478bd9Sstevel@tonic-gate 			if (seg->pages_base >= p_end ||
11807c478bd9Sstevel@tonic-gate 			    seg->pages_end <= mdsp->mds_base) {
11817c478bd9Sstevel@tonic-gate 				/* Span and memseg don't overlap. */
11827c478bd9Sstevel@tonic-gate 				continue;
11837c478bd9Sstevel@tonic-gate 			}
11849853d9e8SJason Beloro 			mseg_start = memseg_get_start(seg);
11857c478bd9Sstevel@tonic-gate 			/* Check that segment is suitable for delete. */
11869853d9e8SJason Beloro 			if (memseg_includes_meta(seg)) {
11877c478bd9Sstevel@tonic-gate 				/*
11889853d9e8SJason Beloro 				 * Check that this segment is completely
11899853d9e8SJason Beloro 				 * within the span.
11907c478bd9Sstevel@tonic-gate 				 */
11917c478bd9Sstevel@tonic-gate 				if (mseg_start < mdsp->mds_base ||
11927c478bd9Sstevel@tonic-gate 				    seg->pages_end > p_end) {
11937c478bd9Sstevel@tonic-gate 					ret = KPHYSM_EBUSY;
11947c478bd9Sstevel@tonic-gate 					break;
11957c478bd9Sstevel@tonic-gate 				}
11967c478bd9Sstevel@tonic-gate 				pages_checked += seg->pages_end - mseg_start;
11977c478bd9Sstevel@tonic-gate 			} else {
11987c478bd9Sstevel@tonic-gate 				/*
11997c478bd9Sstevel@tonic-gate 				 * If this segment is larger than the span,
12007c478bd9Sstevel@tonic-gate 				 * try to split it. After the split, it
12017c478bd9Sstevel@tonic-gate 				 * is necessary to restart.
12027c478bd9Sstevel@tonic-gate 				 */
12037c478bd9Sstevel@tonic-gate 				if (seg->pages_base < mdsp->mds_base ||
12047c478bd9Sstevel@tonic-gate 				    seg->pages_end > p_end) {
12057c478bd9Sstevel@tonic-gate 					pfn_t abase;
12067c478bd9Sstevel@tonic-gate 					pgcnt_t anpgs;
12077c478bd9Sstevel@tonic-gate 					int s_ret;
12087c478bd9Sstevel@tonic-gate 
12097c478bd9Sstevel@tonic-gate 					/* Split required.  */
12107c478bd9Sstevel@tonic-gate 					if (mdsp->mds_base < seg->pages_base)
12117c478bd9Sstevel@tonic-gate 						abase = seg->pages_base;
12127c478bd9Sstevel@tonic-gate 					else
12137c478bd9Sstevel@tonic-gate 						abase = mdsp->mds_base;
12147c478bd9Sstevel@tonic-gate 					if (p_end > seg->pages_end)
12157c478bd9Sstevel@tonic-gate 						anpgs = seg->pages_end - abase;
12167c478bd9Sstevel@tonic-gate 					else
12177c478bd9Sstevel@tonic-gate 						anpgs = p_end - abase;
12187c478bd9Sstevel@tonic-gate 					s_ret = kphysm_split_memseg(abase,
12197c478bd9Sstevel@tonic-gate 					    anpgs);
12207c478bd9Sstevel@tonic-gate 					if (s_ret == 0) {
12217c478bd9Sstevel@tonic-gate 						/* Split failed. */
12227c478bd9Sstevel@tonic-gate 						ret = KPHYSM_ERESOURCE;
12237c478bd9Sstevel@tonic-gate 						break;
12247c478bd9Sstevel@tonic-gate 					}
12257c478bd9Sstevel@tonic-gate 					goto restart;
12267c478bd9Sstevel@tonic-gate 				}
12277c478bd9Sstevel@tonic-gate 				pages_checked +=
12287c478bd9Sstevel@tonic-gate 				    seg->pages_end - seg->pages_base;
12297c478bd9Sstevel@tonic-gate 			}
12307c478bd9Sstevel@tonic-gate 			/*
12317c478bd9Sstevel@tonic-gate 			 * The memseg is wholly within the delete span.
12327c478bd9Sstevel@tonic-gate 			 * The individual pages can now be checked.
12337c478bd9Sstevel@tonic-gate 			 */
12347c478bd9Sstevel@tonic-gate 			/* Cage test. */
12357c478bd9Sstevel@tonic-gate 			for (pp = seg->pages; pp < seg->epages; pp++) {
12367c478bd9Sstevel@tonic-gate 				if (PP_ISNORELOC(pp)) {
12377c478bd9Sstevel@tonic-gate 					ret = KPHYSM_ENONRELOC;
12387c478bd9Sstevel@tonic-gate 					break;
12397c478bd9Sstevel@tonic-gate 				}
12407c478bd9Sstevel@tonic-gate 			}
12417c478bd9Sstevel@tonic-gate 			if (ret != KPHYSM_OK) {
12427c478bd9Sstevel@tonic-gate 				break;
12437c478bd9Sstevel@tonic-gate 			}
12447c478bd9Sstevel@tonic-gate 			phys_pages += (seg->pages_end - mseg_start);
12457c478bd9Sstevel@tonic-gate 			vm_pages += MSEG_NPAGES(seg);
12467c478bd9Sstevel@tonic-gate 		}
12477c478bd9Sstevel@tonic-gate 		if (ret != KPHYSM_OK)
12487c478bd9Sstevel@tonic-gate 			break;
12497c478bd9Sstevel@tonic-gate 		if (pages_checked != mdsp->mds_npgs) {
12507c478bd9Sstevel@tonic-gate 			ret = KPHYSM_ENONRELOC;
12517c478bd9Sstevel@tonic-gate 			break;
12527c478bd9Sstevel@tonic-gate 		}
12537c478bd9Sstevel@tonic-gate 	}
12547c478bd9Sstevel@tonic-gate 
12557c478bd9Sstevel@tonic-gate 	if (ret == KPHYSM_OK) {
12567c478bd9Sstevel@tonic-gate 		mhp->mh_phys_pages += phys_pages;
12577c478bd9Sstevel@tonic-gate 		mhp->mh_vm_pages += vm_pages;
12587c478bd9Sstevel@tonic-gate 	} else {
12597c478bd9Sstevel@tonic-gate 		/*
12607c478bd9Sstevel@tonic-gate 		 * Keep holding the mh_mutex to prevent it going away.
12617c478bd9Sstevel@tonic-gate 		 */
12627c478bd9Sstevel@tonic-gate 		delspan_remove(&mhp->mh_transit, base, npgs);
12637c478bd9Sstevel@tonic-gate 	}
12647c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
12657c478bd9Sstevel@tonic-gate 	return (ret);
12667c478bd9Sstevel@tonic-gate }
12677c478bd9Sstevel@tonic-gate 
12687c478bd9Sstevel@tonic-gate int
kphysm_del_span_query(pfn_t base,pgcnt_t npgs,memquery_t * mqp)12697c478bd9Sstevel@tonic-gate kphysm_del_span_query(
12707c478bd9Sstevel@tonic-gate 	pfn_t base,
12717c478bd9Sstevel@tonic-gate 	pgcnt_t npgs,
12727c478bd9Sstevel@tonic-gate 	memquery_t *mqp)
12737c478bd9Sstevel@tonic-gate {
12747c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp;
12757c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp_new;
12767c478bd9Sstevel@tonic-gate 	int done_first_nonreloc;
12777c478bd9Sstevel@tonic-gate 
12787c478bd9Sstevel@tonic-gate 	mqp->phys_pages = 0;
12797c478bd9Sstevel@tonic-gate 	mqp->managed = 0;
12807c478bd9Sstevel@tonic-gate 	mqp->nonrelocatable = 0;
12817c478bd9Sstevel@tonic-gate 	mqp->first_nonrelocatable = 0;
12827c478bd9Sstevel@tonic-gate 	mqp->last_nonrelocatable = 0;
12837c478bd9Sstevel@tonic-gate 
12847c478bd9Sstevel@tonic-gate 	mdsp_new = span_to_install(base, npgs);
12857c478bd9Sstevel@tonic-gate 	/*
12867c478bd9Sstevel@tonic-gate 	 * It is OK to proceed here if mdsp_new == NULL.
12877c478bd9Sstevel@tonic-gate 	 */
12887c478bd9Sstevel@tonic-gate 	done_first_nonreloc = 0;
12897c478bd9Sstevel@tonic-gate 	for (mdsp = mdsp_new; mdsp != NULL; mdsp = mdsp->mds_next) {
12907c478bd9Sstevel@tonic-gate 		pfn_t sbase;
12917c478bd9Sstevel@tonic-gate 		pgcnt_t snpgs;
12927c478bd9Sstevel@tonic-gate 
12937c478bd9Sstevel@tonic-gate 		mqp->phys_pages += mdsp->mds_npgs;
12947c478bd9Sstevel@tonic-gate 		sbase = mdsp->mds_base;
12957c478bd9Sstevel@tonic-gate 		snpgs = mdsp->mds_npgs;
12967c478bd9Sstevel@tonic-gate 		while (snpgs != 0) {
12977c478bd9Sstevel@tonic-gate 			struct memseg *lseg, *seg;
12987c478bd9Sstevel@tonic-gate 			pfn_t p_end;
12997c478bd9Sstevel@tonic-gate 			page_t *pp;
13007c478bd9Sstevel@tonic-gate 			pfn_t mseg_start;
13017c478bd9Sstevel@tonic-gate 
13027c478bd9Sstevel@tonic-gate 			p_end = sbase + snpgs;
13037c478bd9Sstevel@tonic-gate 			/*
13047c478bd9Sstevel@tonic-gate 			 * Find the lowest addressed memseg that starts
13057c478bd9Sstevel@tonic-gate 			 * after sbase and account for it.
13067c478bd9Sstevel@tonic-gate 			 * This is to catch dynamic memsegs whose start
13077c478bd9Sstevel@tonic-gate 			 * is hidden.
13087c478bd9Sstevel@tonic-gate 			 */
13097c478bd9Sstevel@tonic-gate 			seg = NULL;
13107c478bd9Sstevel@tonic-gate 			for (lseg = memsegs; lseg != NULL; lseg = lseg->next) {
13117c478bd9Sstevel@tonic-gate 				if ((lseg->pages_base >= sbase) ||
13127c478bd9Sstevel@tonic-gate 				    (lseg->pages_base < p_end &&
13137c478bd9Sstevel@tonic-gate 				    lseg->pages_end > sbase)) {
13147c478bd9Sstevel@tonic-gate 					if (seg == NULL ||
13157c478bd9Sstevel@tonic-gate 					    seg->pages_base > lseg->pages_base)
13167c478bd9Sstevel@tonic-gate 						seg = lseg;
13177c478bd9Sstevel@tonic-gate 				}
13187c478bd9Sstevel@tonic-gate 			}
13197c478bd9Sstevel@tonic-gate 			if (seg != NULL) {
13209853d9e8SJason Beloro 				mseg_start = memseg_get_start(seg);
13217c478bd9Sstevel@tonic-gate 				/*
13227c478bd9Sstevel@tonic-gate 				 * Now have the full extent of the memseg so
13237c478bd9Sstevel@tonic-gate 				 * do the range check.
13247c478bd9Sstevel@tonic-gate 				 */
13257c478bd9Sstevel@tonic-gate 				if (mseg_start >= p_end ||
13267c478bd9Sstevel@tonic-gate 				    seg->pages_end <= sbase) {
13277c478bd9Sstevel@tonic-gate 					/* Span does not overlap memseg. */
13287c478bd9Sstevel@tonic-gate 					seg = NULL;
13297c478bd9Sstevel@tonic-gate 				}
13307c478bd9Sstevel@tonic-gate 			}
13317c478bd9Sstevel@tonic-gate 			/*
13327c478bd9Sstevel@tonic-gate 			 * Account for gap either before the segment if
13337c478bd9Sstevel@tonic-gate 			 * there is one or to the end of the span.
13347c478bd9Sstevel@tonic-gate 			 */
13357c478bd9Sstevel@tonic-gate 			if (seg == NULL || mseg_start > sbase) {
13367c478bd9Sstevel@tonic-gate 				pfn_t a_end;
13377c478bd9Sstevel@tonic-gate 
13387c478bd9Sstevel@tonic-gate 				a_end = (seg == NULL) ? p_end : mseg_start;
13397c478bd9Sstevel@tonic-gate 				/*
13407c478bd9Sstevel@tonic-gate 				 * Check with arch layer for relocatability.
13417c478bd9Sstevel@tonic-gate 				 */
13427c478bd9Sstevel@tonic-gate 				if (arch_kphysm_del_span_ok(sbase,
13437c478bd9Sstevel@tonic-gate 				    (a_end - sbase))) {
13447c478bd9Sstevel@tonic-gate 					/*
13457c478bd9Sstevel@tonic-gate 					 * No non-relocatble pages in this
13467c478bd9Sstevel@tonic-gate 					 * area, avoid the fine-grained
13477c478bd9Sstevel@tonic-gate 					 * test.
13487c478bd9Sstevel@tonic-gate 					 */
13497c478bd9Sstevel@tonic-gate 					snpgs -= (a_end - sbase);
13507c478bd9Sstevel@tonic-gate 					sbase = a_end;
13517c478bd9Sstevel@tonic-gate 				}
13527c478bd9Sstevel@tonic-gate 				while (sbase < a_end) {
13537c478bd9Sstevel@tonic-gate 					if (!arch_kphysm_del_span_ok(sbase,
13547c478bd9Sstevel@tonic-gate 					    1)) {
13557c478bd9Sstevel@tonic-gate 						mqp->nonrelocatable++;
13567c478bd9Sstevel@tonic-gate 						if (!done_first_nonreloc) {
13577c478bd9Sstevel@tonic-gate 							mqp->
13587c478bd9Sstevel@tonic-gate 							    first_nonrelocatable
13597c478bd9Sstevel@tonic-gate 							    = sbase;
13607c478bd9Sstevel@tonic-gate 							done_first_nonreloc = 1;
13617c478bd9Sstevel@tonic-gate 						}
13627c478bd9Sstevel@tonic-gate 						mqp->last_nonrelocatable =
13637c478bd9Sstevel@tonic-gate 						    sbase;
13647c478bd9Sstevel@tonic-gate 					}
13657c478bd9Sstevel@tonic-gate 					sbase++;
13667c478bd9Sstevel@tonic-gate 					snpgs--;
13677c478bd9Sstevel@tonic-gate 				}
13687c478bd9Sstevel@tonic-gate 			}
13697c478bd9Sstevel@tonic-gate 			if (seg != NULL) {
13707c478bd9Sstevel@tonic-gate 				ASSERT(mseg_start <= sbase);
13717c478bd9Sstevel@tonic-gate 				if (seg->pages_base != mseg_start &&
13727c478bd9Sstevel@tonic-gate 				    seg->pages_base > sbase) {
13737c478bd9Sstevel@tonic-gate 					pgcnt_t skip_pgs;
13747c478bd9Sstevel@tonic-gate 
13757c478bd9Sstevel@tonic-gate 					/*
13767c478bd9Sstevel@tonic-gate 					 * Skip the page_t area of a
13777c478bd9Sstevel@tonic-gate 					 * dynamic memseg.
13787c478bd9Sstevel@tonic-gate 					 */
13797c478bd9Sstevel@tonic-gate 					skip_pgs = seg->pages_base - sbase;
13807c478bd9Sstevel@tonic-gate 					if (snpgs <= skip_pgs) {
13817c478bd9Sstevel@tonic-gate 						sbase += snpgs;
13827c478bd9Sstevel@tonic-gate 						snpgs = 0;
13837c478bd9Sstevel@tonic-gate 						continue;
13847c478bd9Sstevel@tonic-gate 					}
13857c478bd9Sstevel@tonic-gate 					snpgs -= skip_pgs;
13867c478bd9Sstevel@tonic-gate 					sbase += skip_pgs;
13877c478bd9Sstevel@tonic-gate 				}
13887c478bd9Sstevel@tonic-gate 				ASSERT(snpgs != 0);
13897c478bd9Sstevel@tonic-gate 				ASSERT(seg->pages_base <= sbase);
13907c478bd9Sstevel@tonic-gate 				/*
13917c478bd9Sstevel@tonic-gate 				 * The individual pages can now be checked.
13927c478bd9Sstevel@tonic-gate 				 */
13937c478bd9Sstevel@tonic-gate 				for (pp = seg->pages +
13947c478bd9Sstevel@tonic-gate 				    (sbase - seg->pages_base);
13957c478bd9Sstevel@tonic-gate 				    snpgs != 0 && pp < seg->epages; pp++) {
13967c478bd9Sstevel@tonic-gate 					mqp->managed++;
13977c478bd9Sstevel@tonic-gate 					if (PP_ISNORELOC(pp)) {
13987c478bd9Sstevel@tonic-gate 						mqp->nonrelocatable++;
13997c478bd9Sstevel@tonic-gate 						if (!done_first_nonreloc) {
14007c478bd9Sstevel@tonic-gate 							mqp->
14017c478bd9Sstevel@tonic-gate 							    first_nonrelocatable
14027c478bd9Sstevel@tonic-gate 							    = sbase;
14037c478bd9Sstevel@tonic-gate 							done_first_nonreloc = 1;
14047c478bd9Sstevel@tonic-gate 						}
14057c478bd9Sstevel@tonic-gate 						mqp->last_nonrelocatable =
14067c478bd9Sstevel@tonic-gate 						    sbase;
14077c478bd9Sstevel@tonic-gate 					}
14087c478bd9Sstevel@tonic-gate 					sbase++;
14097c478bd9Sstevel@tonic-gate 					snpgs--;
14107c478bd9Sstevel@tonic-gate 				}
14117c478bd9Sstevel@tonic-gate 			}
14127c478bd9Sstevel@tonic-gate 		}
14137c478bd9Sstevel@tonic-gate 	}
14147c478bd9Sstevel@tonic-gate 
14157c478bd9Sstevel@tonic-gate 	free_delspans(mdsp_new);
14167c478bd9Sstevel@tonic-gate 
14177c478bd9Sstevel@tonic-gate 	return (KPHYSM_OK);
14187c478bd9Sstevel@tonic-gate }
14197c478bd9Sstevel@tonic-gate 
14207c478bd9Sstevel@tonic-gate /*
14217c478bd9Sstevel@tonic-gate  * This release function can be called at any stage as follows:
14227c478bd9Sstevel@tonic-gate  *	_gethandle only called
14237c478bd9Sstevel@tonic-gate  *	_span(s) only called
14247c478bd9Sstevel@tonic-gate  *	_start called but failed
14257c478bd9Sstevel@tonic-gate  *	delete thread exited
14267c478bd9Sstevel@tonic-gate  */
14277c478bd9Sstevel@tonic-gate int
kphysm_del_release(memhandle_t handle)14287c478bd9Sstevel@tonic-gate kphysm_del_release(memhandle_t handle)
14297c478bd9Sstevel@tonic-gate {
14307c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
14317c478bd9Sstevel@tonic-gate 
14327c478bd9Sstevel@tonic-gate 	mhp = kphysm_lookup_mem_handle(handle);
14337c478bd9Sstevel@tonic-gate 	if (mhp == NULL) {
14347c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
14357c478bd9Sstevel@tonic-gate 	}
14367c478bd9Sstevel@tonic-gate 	switch (mhp->mh_state) {
14377c478bd9Sstevel@tonic-gate 	case MHND_STARTING:
14387c478bd9Sstevel@tonic-gate 	case MHND_RUNNING:
14397c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
14407c478bd9Sstevel@tonic-gate 		return (KPHYSM_ENOTFINISHED);
14417c478bd9Sstevel@tonic-gate 	case MHND_FREE:
14427c478bd9Sstevel@tonic-gate 		ASSERT(mhp->mh_state != MHND_FREE);
14437c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
14447c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
14457c478bd9Sstevel@tonic-gate 	case MHND_INIT:
14467c478bd9Sstevel@tonic-gate 		break;
14477c478bd9Sstevel@tonic-gate 	case MHND_DONE:
14487c478bd9Sstevel@tonic-gate 		break;
14497c478bd9Sstevel@tonic-gate 	case MHND_RELEASE:
14507c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
14517c478bd9Sstevel@tonic-gate 		return (KPHYSM_ESEQUENCE);
14527c478bd9Sstevel@tonic-gate 	default:
14537c478bd9Sstevel@tonic-gate #ifdef DEBUG
14547c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "kphysm_del_release(0x%p) state corrupt %d",
14557c478bd9Sstevel@tonic-gate 		    (void *)mhp, mhp->mh_state);
14567c478bd9Sstevel@tonic-gate #endif /* DEBUG */
14577c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
14587c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
14597c478bd9Sstevel@tonic-gate 	}
14607c478bd9Sstevel@tonic-gate 	/*
14617c478bd9Sstevel@tonic-gate 	 * Set state so that we can wait if necessary.
14627c478bd9Sstevel@tonic-gate 	 * Also this means that we have read/write access to all
14637c478bd9Sstevel@tonic-gate 	 * fields except mh_exthandle and mh_state.
14647c478bd9Sstevel@tonic-gate 	 */
14657c478bd9Sstevel@tonic-gate 	mhp->mh_state = MHND_RELEASE;
14667c478bd9Sstevel@tonic-gate 	/*
14677c478bd9Sstevel@tonic-gate 	 * The mem_handle cannot be de-allocated by any other operation
14687c478bd9Sstevel@tonic-gate 	 * now, so no need to hold mh_mutex.
14697c478bd9Sstevel@tonic-gate 	 */
14707c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
14717c478bd9Sstevel@tonic-gate 
14727c478bd9Sstevel@tonic-gate 	delspan_remove(&mhp->mh_transit, 0, 0);
14737c478bd9Sstevel@tonic-gate 	mhp->mh_phys_pages = 0;
14747c478bd9Sstevel@tonic-gate 	mhp->mh_vm_pages = 0;
14757c478bd9Sstevel@tonic-gate 	mhp->mh_hold_todo = 0;
14767c478bd9Sstevel@tonic-gate 	mhp->mh_delete_complete = NULL;
14777c478bd9Sstevel@tonic-gate 	mhp->mh_delete_complete_arg = NULL;
14787c478bd9Sstevel@tonic-gate 	mhp->mh_cancel = 0;
14797c478bd9Sstevel@tonic-gate 
14807c478bd9Sstevel@tonic-gate 	mutex_enter(&mhp->mh_mutex);
14817c478bd9Sstevel@tonic-gate 	ASSERT(mhp->mh_state == MHND_RELEASE);
14827c478bd9Sstevel@tonic-gate 	mhp->mh_state = MHND_FREE;
14837c478bd9Sstevel@tonic-gate 
14847c478bd9Sstevel@tonic-gate 	kphysm_free_mem_handle(mhp);
14857c478bd9Sstevel@tonic-gate 
14867c478bd9Sstevel@tonic-gate 	return (KPHYSM_OK);
14877c478bd9Sstevel@tonic-gate }
14887c478bd9Sstevel@tonic-gate 
14897c478bd9Sstevel@tonic-gate /*
14907c478bd9Sstevel@tonic-gate  * This cancel function can only be called with the thread running.
14917c478bd9Sstevel@tonic-gate  */
14927c478bd9Sstevel@tonic-gate int
kphysm_del_cancel(memhandle_t handle)14937c478bd9Sstevel@tonic-gate kphysm_del_cancel(memhandle_t handle)
14947c478bd9Sstevel@tonic-gate {
14957c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
14967c478bd9Sstevel@tonic-gate 
14977c478bd9Sstevel@tonic-gate 	mhp = kphysm_lookup_mem_handle(handle);
14987c478bd9Sstevel@tonic-gate 	if (mhp == NULL) {
14997c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
15007c478bd9Sstevel@tonic-gate 	}
15017c478bd9Sstevel@tonic-gate 	if (mhp->mh_state != MHND_STARTING && mhp->mh_state != MHND_RUNNING) {
15027c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
15037c478bd9Sstevel@tonic-gate 		return (KPHYSM_ENOTRUNNING);
15047c478bd9Sstevel@tonic-gate 	}
15057c478bd9Sstevel@tonic-gate 	/*
15067c478bd9Sstevel@tonic-gate 	 * Set the cancel flag and wake the delete thread up.
15077c478bd9Sstevel@tonic-gate 	 * The thread may be waiting on I/O, so the effect of the cancel
15087c478bd9Sstevel@tonic-gate 	 * may be delayed.
15097c478bd9Sstevel@tonic-gate 	 */
15107c478bd9Sstevel@tonic-gate 	if (mhp->mh_cancel == 0) {
15117c478bd9Sstevel@tonic-gate 		mhp->mh_cancel = KPHYSM_ECANCELLED;
15127c478bd9Sstevel@tonic-gate 		cv_signal(&mhp->mh_cv);
15137c478bd9Sstevel@tonic-gate 	}
15147c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
15157c478bd9Sstevel@tonic-gate 	return (KPHYSM_OK);
15167c478bd9Sstevel@tonic-gate }
15177c478bd9Sstevel@tonic-gate 
15187c478bd9Sstevel@tonic-gate int
kphysm_del_status(memhandle_t handle,memdelstat_t * mdstp)15197c478bd9Sstevel@tonic-gate kphysm_del_status(
15207c478bd9Sstevel@tonic-gate 	memhandle_t handle,
15217c478bd9Sstevel@tonic-gate 	memdelstat_t *mdstp)
15227c478bd9Sstevel@tonic-gate {
15237c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
15247c478bd9Sstevel@tonic-gate 
15257c478bd9Sstevel@tonic-gate 	mhp = kphysm_lookup_mem_handle(handle);
15267c478bd9Sstevel@tonic-gate 	if (mhp == NULL) {
15277c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
15287c478bd9Sstevel@tonic-gate 	}
15297c478bd9Sstevel@tonic-gate 	/*
15307c478bd9Sstevel@tonic-gate 	 * Calling kphysm_del_status() is allowed before the delete
15317c478bd9Sstevel@tonic-gate 	 * is started to allow for status display.
15327c478bd9Sstevel@tonic-gate 	 */
15337c478bd9Sstevel@tonic-gate 	if (mhp->mh_state != MHND_INIT && mhp->mh_state != MHND_STARTING &&
15347c478bd9Sstevel@tonic-gate 	    mhp->mh_state != MHND_RUNNING) {
15357c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
15367c478bd9Sstevel@tonic-gate 		return (KPHYSM_ENOTRUNNING);
15377c478bd9Sstevel@tonic-gate 	}
15387c478bd9Sstevel@tonic-gate 	mdstp->phys_pages = mhp->mh_phys_pages;
15397c478bd9Sstevel@tonic-gate 	mdstp->managed = mhp->mh_vm_pages;
15407c478bd9Sstevel@tonic-gate 	mdstp->collected = mhp->mh_vm_pages - mhp->mh_hold_todo;
15417c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
15427c478bd9Sstevel@tonic-gate 	return (KPHYSM_OK);
15437c478bd9Sstevel@tonic-gate }
15447c478bd9Sstevel@tonic-gate 
15457c478bd9Sstevel@tonic-gate static int mem_delete_additional_pages = 100;
15467c478bd9Sstevel@tonic-gate 
15477c478bd9Sstevel@tonic-gate static int
can_remove_pgs(pgcnt_t npgs)15487c478bd9Sstevel@tonic-gate can_remove_pgs(pgcnt_t npgs)
15497c478bd9Sstevel@tonic-gate {
15507c478bd9Sstevel@tonic-gate 	/*
15517c478bd9Sstevel@tonic-gate 	 * If all pageable pages were paged out, freemem would
15527c478bd9Sstevel@tonic-gate 	 * equal availrmem.  There is a minimum requirement for
15537c478bd9Sstevel@tonic-gate 	 * availrmem.
15547c478bd9Sstevel@tonic-gate 	 */
15557c478bd9Sstevel@tonic-gate 	if ((availrmem - (tune.t_minarmem + mem_delete_additional_pages))
15567c478bd9Sstevel@tonic-gate 	    < npgs)
15577c478bd9Sstevel@tonic-gate 		return (0);
15587c478bd9Sstevel@tonic-gate 	/* TODO: check swap space, etc. */
15597c478bd9Sstevel@tonic-gate 	return (1);
15607c478bd9Sstevel@tonic-gate }
15617c478bd9Sstevel@tonic-gate 
15627c478bd9Sstevel@tonic-gate static int
get_availrmem(pgcnt_t npgs)15637c478bd9Sstevel@tonic-gate get_availrmem(pgcnt_t npgs)
15647c478bd9Sstevel@tonic-gate {
15657c478bd9Sstevel@tonic-gate 	int ret;
15667c478bd9Sstevel@tonic-gate 
15677c478bd9Sstevel@tonic-gate 	mutex_enter(&freemem_lock);
15687c478bd9Sstevel@tonic-gate 	ret = can_remove_pgs(npgs);
15697c478bd9Sstevel@tonic-gate 	if (ret != 0)
15707c478bd9Sstevel@tonic-gate 		availrmem -= npgs;
15717c478bd9Sstevel@tonic-gate 	mutex_exit(&freemem_lock);
15727c478bd9Sstevel@tonic-gate 	return (ret);
15737c478bd9Sstevel@tonic-gate }
15747c478bd9Sstevel@tonic-gate 
15757c478bd9Sstevel@tonic-gate static void
put_availrmem(pgcnt_t npgs)15767c478bd9Sstevel@tonic-gate put_availrmem(pgcnt_t npgs)
15777c478bd9Sstevel@tonic-gate {
15787c478bd9Sstevel@tonic-gate 	mutex_enter(&freemem_lock);
15797c478bd9Sstevel@tonic-gate 	availrmem += npgs;
15807c478bd9Sstevel@tonic-gate 	mutex_exit(&freemem_lock);
15817c478bd9Sstevel@tonic-gate }
15827c478bd9Sstevel@tonic-gate 
15837c478bd9Sstevel@tonic-gate #define	FREEMEM_INCR	100
15847c478bd9Sstevel@tonic-gate static pgcnt_t freemem_incr = FREEMEM_INCR;
15857c478bd9Sstevel@tonic-gate #define	DEL_FREE_WAIT_FRAC	4
15867c478bd9Sstevel@tonic-gate #define	DEL_FREE_WAIT_TICKS	((hz+DEL_FREE_WAIT_FRAC-1)/DEL_FREE_WAIT_FRAC)
15877c478bd9Sstevel@tonic-gate 
15887c478bd9Sstevel@tonic-gate #define	DEL_BUSY_WAIT_FRAC	20
15897c478bd9Sstevel@tonic-gate #define	DEL_BUSY_WAIT_TICKS	((hz+DEL_BUSY_WAIT_FRAC-1)/DEL_BUSY_WAIT_FRAC)
15907c478bd9Sstevel@tonic-gate 
15917c478bd9Sstevel@tonic-gate static void kphysm_del_cleanup(struct mem_handle *);
15927c478bd9Sstevel@tonic-gate 
15937c478bd9Sstevel@tonic-gate static void page_delete_collect(page_t *, struct mem_handle *);
15947c478bd9Sstevel@tonic-gate 
15957c478bd9Sstevel@tonic-gate static pgcnt_t
delthr_get_freemem(struct mem_handle * mhp)15967c478bd9Sstevel@tonic-gate delthr_get_freemem(struct mem_handle *mhp)
15977c478bd9Sstevel@tonic-gate {
15987c478bd9Sstevel@tonic-gate 	pgcnt_t free_get;
15997c478bd9Sstevel@tonic-gate 	int ret;
16007c478bd9Sstevel@tonic-gate 
16017c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&mhp->mh_mutex));
16027c478bd9Sstevel@tonic-gate 
16037c478bd9Sstevel@tonic-gate 	MDSTAT_INCR(mhp, need_free);
16047c478bd9Sstevel@tonic-gate 	/*
16057c478bd9Sstevel@tonic-gate 	 * Get up to freemem_incr pages.
16067c478bd9Sstevel@tonic-gate 	 */
16077c478bd9Sstevel@tonic-gate 	free_get = freemem_incr;
16087c478bd9Sstevel@tonic-gate 	if (free_get > mhp->mh_hold_todo)
16097c478bd9Sstevel@tonic-gate 		free_get = mhp->mh_hold_todo;
16107c478bd9Sstevel@tonic-gate 	/*
16117c478bd9Sstevel@tonic-gate 	 * Take free_get pages away from freemem,
16127c478bd9Sstevel@tonic-gate 	 * waiting if necessary.
16137c478bd9Sstevel@tonic-gate 	 */
16147c478bd9Sstevel@tonic-gate 
16157c478bd9Sstevel@tonic-gate 	while (!mhp->mh_cancel) {
16167c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
16177c478bd9Sstevel@tonic-gate 		MDSTAT_INCR(mhp, free_loop);
16187c478bd9Sstevel@tonic-gate 		/*
16197c478bd9Sstevel@tonic-gate 		 * Duplicate test from page_create_throttle()
16207c478bd9Sstevel@tonic-gate 		 * but don't override with !PG_WAIT.
16217c478bd9Sstevel@tonic-gate 		 */
16227c478bd9Sstevel@tonic-gate 		if (freemem < (free_get + throttlefree)) {
16237c478bd9Sstevel@tonic-gate 			MDSTAT_INCR(mhp, free_low);
16247c478bd9Sstevel@tonic-gate 			ret = 0;
16257c478bd9Sstevel@tonic-gate 		} else {
16267c478bd9Sstevel@tonic-gate 			ret = page_create_wait(free_get, 0);
16277c478bd9Sstevel@tonic-gate 			if (ret == 0) {
16287c478bd9Sstevel@tonic-gate 				/* EMPTY */
16297c478bd9Sstevel@tonic-gate 				MDSTAT_INCR(mhp, free_failed);
16307c478bd9Sstevel@tonic-gate 			}
16317c478bd9Sstevel@tonic-gate 		}
16327c478bd9Sstevel@tonic-gate 		if (ret != 0) {
16337c478bd9Sstevel@tonic-gate 			mutex_enter(&mhp->mh_mutex);
16347c478bd9Sstevel@tonic-gate 			return (free_get);
16357c478bd9Sstevel@tonic-gate 		}
16367c478bd9Sstevel@tonic-gate 
16377c478bd9Sstevel@tonic-gate 		/*
16387c478bd9Sstevel@tonic-gate 		 * Put pressure on pageout.
16397c478bd9Sstevel@tonic-gate 		 */
16407c478bd9Sstevel@tonic-gate 		page_needfree(free_get);
16417c478bd9Sstevel@tonic-gate 		cv_signal(&proc_pageout->p_cv);
16427c478bd9Sstevel@tonic-gate 
16437c478bd9Sstevel@tonic-gate 		mutex_enter(&mhp->mh_mutex);
1644d3d50737SRafael Vanoni 		(void) cv_reltimedwait(&mhp->mh_cv, &mhp->mh_mutex,
1645d3d50737SRafael Vanoni 		    DEL_FREE_WAIT_TICKS, TR_CLOCK_TICK);
16467c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
16477c478bd9Sstevel@tonic-gate 		page_needfree(-(spgcnt_t)free_get);
16487c478bd9Sstevel@tonic-gate 
16497c478bd9Sstevel@tonic-gate 		mutex_enter(&mhp->mh_mutex);
16507c478bd9Sstevel@tonic-gate 	}
16517c478bd9Sstevel@tonic-gate 	return (0);
16527c478bd9Sstevel@tonic-gate }
16537c478bd9Sstevel@tonic-gate 
16547c478bd9Sstevel@tonic-gate #define	DR_AIO_CLEANUP_DELAY	25000	/* 0.025secs, in usec */
16557c478bd9Sstevel@tonic-gate #define	DR_AIO_CLEANUP_MAXLOOPS_NODELAY	100
16567c478bd9Sstevel@tonic-gate /*
16577c478bd9Sstevel@tonic-gate  * This function is run as a helper thread for delete_memory_thread.
16587c478bd9Sstevel@tonic-gate  * It is needed in order to force kaio cleanup, so that pages used in kaio
16597c478bd9Sstevel@tonic-gate  * will be unlocked and subsequently relocated by delete_memory_thread.
16607c478bd9Sstevel@tonic-gate  * The address of the delete_memory_threads's mem_handle is passed in to
16617c478bd9Sstevel@tonic-gate  * this thread function, and is used to set the mh_aio_cleanup_done member
16627c478bd9Sstevel@tonic-gate  * prior to calling thread_exit().
16637c478bd9Sstevel@tonic-gate  */
16647c478bd9Sstevel@tonic-gate static void
dr_aio_cleanup_thread(caddr_t amhp)16657c478bd9Sstevel@tonic-gate dr_aio_cleanup_thread(caddr_t amhp)
16667c478bd9Sstevel@tonic-gate {
16677c478bd9Sstevel@tonic-gate 	proc_t *procp;
16687c478bd9Sstevel@tonic-gate 	int (*aio_cleanup_dr_delete_memory)(proc_t *);
16697c478bd9Sstevel@tonic-gate 	int cleaned;
16707c478bd9Sstevel@tonic-gate 	int n = 0;
16717c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
16727c478bd9Sstevel@tonic-gate 	volatile uint_t *pcancel;
16737c478bd9Sstevel@tonic-gate 
16747c478bd9Sstevel@tonic-gate 	mhp = (struct mem_handle *)amhp;
16757c478bd9Sstevel@tonic-gate 	ASSERT(mhp != NULL);
16767c478bd9Sstevel@tonic-gate 	pcancel = &mhp->mh_dr_aio_cleanup_cancel;
16777c478bd9Sstevel@tonic-gate 	if (modload("sys", "kaio") == -1) {
16787c478bd9Sstevel@tonic-gate 		mhp->mh_aio_cleanup_done = 1;
16797c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "dr_aio_cleanup_thread: cannot load kaio");
16807c478bd9Sstevel@tonic-gate 		thread_exit();
16817c478bd9Sstevel@tonic-gate 	}
16827c478bd9Sstevel@tonic-gate 	aio_cleanup_dr_delete_memory = (int (*)(proc_t *))
16837c478bd9Sstevel@tonic-gate 	    modgetsymvalue("aio_cleanup_dr_delete_memory", 0);
16847c478bd9Sstevel@tonic-gate 	if (aio_cleanup_dr_delete_memory == NULL) {
16857c478bd9Sstevel@tonic-gate 		mhp->mh_aio_cleanup_done = 1;
16867c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN,
16877c478bd9Sstevel@tonic-gate 	    "aio_cleanup_dr_delete_memory not found in kaio");
16887c478bd9Sstevel@tonic-gate 		thread_exit();
16897c478bd9Sstevel@tonic-gate 	}
16907c478bd9Sstevel@tonic-gate 	do {
16917c478bd9Sstevel@tonic-gate 		cleaned = 0;
16927c478bd9Sstevel@tonic-gate 		mutex_enter(&pidlock);
16937c478bd9Sstevel@tonic-gate 		for (procp = practive; (*pcancel == 0) && (procp != NULL);
16947c478bd9Sstevel@tonic-gate 		    procp = procp->p_next) {
16957c478bd9Sstevel@tonic-gate 			mutex_enter(&procp->p_lock);
16967c478bd9Sstevel@tonic-gate 			if (procp->p_aio != NULL) {
16977c478bd9Sstevel@tonic-gate 				/* cleanup proc's outstanding kaio */
16987c478bd9Sstevel@tonic-gate 				cleaned +=
16997c478bd9Sstevel@tonic-gate 				    (*aio_cleanup_dr_delete_memory)(procp);
17007c478bd9Sstevel@tonic-gate 			}
17017c478bd9Sstevel@tonic-gate 			mutex_exit(&procp->p_lock);
17027c478bd9Sstevel@tonic-gate 		}
17037c478bd9Sstevel@tonic-gate 		mutex_exit(&pidlock);
17047c478bd9Sstevel@tonic-gate 		if ((*pcancel == 0) &&
17057c478bd9Sstevel@tonic-gate 		    (!cleaned || (++n == DR_AIO_CLEANUP_MAXLOOPS_NODELAY))) {
17067c478bd9Sstevel@tonic-gate 			/* delay a bit before retrying all procs again */
17077c478bd9Sstevel@tonic-gate 			delay(drv_usectohz(DR_AIO_CLEANUP_DELAY));
17087c478bd9Sstevel@tonic-gate 			n = 0;
17097c478bd9Sstevel@tonic-gate 		}
17107c478bd9Sstevel@tonic-gate 	} while (*pcancel == 0);
17117c478bd9Sstevel@tonic-gate 	mhp->mh_aio_cleanup_done = 1;
17127c478bd9Sstevel@tonic-gate 	thread_exit();
17137c478bd9Sstevel@tonic-gate }
17147c478bd9Sstevel@tonic-gate 
17157c478bd9Sstevel@tonic-gate static void
delete_memory_thread(caddr_t amhp)17167c478bd9Sstevel@tonic-gate delete_memory_thread(caddr_t amhp)
17177c478bd9Sstevel@tonic-gate {
17187c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
17197c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp;
17207c478bd9Sstevel@tonic-gate 	callb_cpr_t cprinfo;
17217c478bd9Sstevel@tonic-gate 	page_t *pp_targ;
17227c478bd9Sstevel@tonic-gate 	spgcnt_t freemem_left;
17237c478bd9Sstevel@tonic-gate 	void (*del_complete_funcp)(void *, int error);
17247c478bd9Sstevel@tonic-gate 	void *del_complete_arg;
17257c478bd9Sstevel@tonic-gate 	int comp_code;
17267c478bd9Sstevel@tonic-gate 	int ret;
17277c478bd9Sstevel@tonic-gate 	int first_scan;
17287c478bd9Sstevel@tonic-gate 	uint_t szc;
17297c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
17307c478bd9Sstevel@tonic-gate 	uint64_t start_total, ntick_total;
17317c478bd9Sstevel@tonic-gate 	uint64_t start_pgrp, ntick_pgrp;
17327c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
17337c478bd9Sstevel@tonic-gate 
17347c478bd9Sstevel@tonic-gate 	mhp = (struct mem_handle *)amhp;
17357c478bd9Sstevel@tonic-gate 
17367c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
17377c478bd9Sstevel@tonic-gate 	start_total = ddi_get_lbolt();
17387c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
17397c478bd9Sstevel@tonic-gate 
17407c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &mhp->mh_mutex,
17417c478bd9Sstevel@tonic-gate 	    callb_generic_cpr, "memdel");
17427c478bd9Sstevel@tonic-gate 
17437c478bd9Sstevel@tonic-gate 	mutex_enter(&mhp->mh_mutex);
17447c478bd9Sstevel@tonic-gate 	ASSERT(mhp->mh_state == MHND_STARTING);
17457c478bd9Sstevel@tonic-gate 
17467c478bd9Sstevel@tonic-gate 	mhp->mh_state = MHND_RUNNING;
17477c478bd9Sstevel@tonic-gate 	mhp->mh_thread_id = curthread;
17487c478bd9Sstevel@tonic-gate 
17497c478bd9Sstevel@tonic-gate 	mhp->mh_hold_todo = mhp->mh_vm_pages;
17507c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
17517c478bd9Sstevel@tonic-gate 
17527c478bd9Sstevel@tonic-gate 	/* Allocate the remap pages now, if necessary. */
17537c478bd9Sstevel@tonic-gate 	memseg_remap_init();
17547c478bd9Sstevel@tonic-gate 
17557c478bd9Sstevel@tonic-gate 	/*
17567c478bd9Sstevel@tonic-gate 	 * Subtract from availrmem now if possible as availrmem
17577c478bd9Sstevel@tonic-gate 	 * may not be available by the end of the delete.
17587c478bd9Sstevel@tonic-gate 	 */
17597c478bd9Sstevel@tonic-gate 	if (!get_availrmem(mhp->mh_vm_pages)) {
17607c478bd9Sstevel@tonic-gate 		comp_code = KPHYSM_ENOTVIABLE;
17617c478bd9Sstevel@tonic-gate 		mutex_enter(&mhp->mh_mutex);
17627c478bd9Sstevel@tonic-gate 		goto early_exit;
17637c478bd9Sstevel@tonic-gate 	}
17647c478bd9Sstevel@tonic-gate 
17657c478bd9Sstevel@tonic-gate 	ret = kphysm_setup_pre_del(mhp->mh_vm_pages);
17667c478bd9Sstevel@tonic-gate 
17677c478bd9Sstevel@tonic-gate 	mutex_enter(&mhp->mh_mutex);
17687c478bd9Sstevel@tonic-gate 
17697c478bd9Sstevel@tonic-gate 	if (ret != 0) {
17707c478bd9Sstevel@tonic-gate 		mhp->mh_cancel = KPHYSM_EREFUSED;
17717c478bd9Sstevel@tonic-gate 		goto refused;
17727c478bd9Sstevel@tonic-gate 	}
17737c478bd9Sstevel@tonic-gate 
17747c478bd9Sstevel@tonic-gate 	transit_list_collect(mhp, 1);
17757c478bd9Sstevel@tonic-gate 
17767c478bd9Sstevel@tonic-gate 	for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
17777c478bd9Sstevel@tonic-gate 	    mdsp = mdsp->mds_next) {
17787c478bd9Sstevel@tonic-gate 		ASSERT(mdsp->mds_bitmap == NULL);
17797c478bd9Sstevel@tonic-gate 		mdsp->mds_bitmap = kmem_zalloc(MDS_BITMAPBYTES(mdsp), KM_SLEEP);
17807c478bd9Sstevel@tonic-gate 		mdsp->mds_bitmap_retired = kmem_zalloc(MDS_BITMAPBYTES(mdsp),
17817c478bd9Sstevel@tonic-gate 		    KM_SLEEP);
17827c478bd9Sstevel@tonic-gate 	}
17837c478bd9Sstevel@tonic-gate 
17847c478bd9Sstevel@tonic-gate 	first_scan = 1;
17857c478bd9Sstevel@tonic-gate 	freemem_left = 0;
17867c478bd9Sstevel@tonic-gate 	/*
17877c478bd9Sstevel@tonic-gate 	 * Start dr_aio_cleanup_thread, which periodically iterates
17887c478bd9Sstevel@tonic-gate 	 * through the process list and invokes aio cleanup.  This
17897c478bd9Sstevel@tonic-gate 	 * is needed in order to avoid a deadly embrace between the
17907c478bd9Sstevel@tonic-gate 	 * delete_memory_thread (waiting on writer lock for page, with the
17917c478bd9Sstevel@tonic-gate 	 * exclusive-wanted bit set), kaio read request threads (waiting for a
17927c478bd9Sstevel@tonic-gate 	 * reader lock on the same page that is wanted by the
17937c478bd9Sstevel@tonic-gate 	 * delete_memory_thread), and threads waiting for kaio completion
17947c478bd9Sstevel@tonic-gate 	 * (blocked on spt_amp->lock).
17957c478bd9Sstevel@tonic-gate 	 */
17967c478bd9Sstevel@tonic-gate 	mhp->mh_dr_aio_cleanup_cancel = 0;
17977c478bd9Sstevel@tonic-gate 	mhp->mh_aio_cleanup_done = 0;
17987c478bd9Sstevel@tonic-gate 	(void) thread_create(NULL, 0, dr_aio_cleanup_thread,
17997c478bd9Sstevel@tonic-gate 	    (caddr_t)mhp, 0, &p0, TS_RUN, maxclsyspri - 1);
18007c478bd9Sstevel@tonic-gate 	while ((mhp->mh_hold_todo != 0) && (mhp->mh_cancel == 0)) {
18017c478bd9Sstevel@tonic-gate 		pgcnt_t collected;
18027c478bd9Sstevel@tonic-gate 
18037c478bd9Sstevel@tonic-gate 		MDSTAT_INCR(mhp, nloop);
18047c478bd9Sstevel@tonic-gate 		collected = 0;
18057c478bd9Sstevel@tonic-gate 		for (mdsp = mhp->mh_transit.trl_spans; (mdsp != NULL) &&
18067c478bd9Sstevel@tonic-gate 		    (mhp->mh_cancel == 0); mdsp = mdsp->mds_next) {
18077c478bd9Sstevel@tonic-gate 			pfn_t pfn, p_end;
18087c478bd9Sstevel@tonic-gate 
18097c478bd9Sstevel@tonic-gate 			p_end = mdsp->mds_base + mdsp->mds_npgs;
18107c478bd9Sstevel@tonic-gate 			for (pfn = mdsp->mds_base; (pfn < p_end) &&
18117c478bd9Sstevel@tonic-gate 			    (mhp->mh_cancel == 0); pfn++) {
18127c478bd9Sstevel@tonic-gate 				page_t *pp, *tpp, *tpp_targ;
18137c478bd9Sstevel@tonic-gate 				pgcnt_t bit;
18147c478bd9Sstevel@tonic-gate 				struct vnode *vp;
18157c478bd9Sstevel@tonic-gate 				u_offset_t offset;
18167c478bd9Sstevel@tonic-gate 				int mod, result;
18177c478bd9Sstevel@tonic-gate 				spgcnt_t pgcnt;
18187c478bd9Sstevel@tonic-gate 
18197c478bd9Sstevel@tonic-gate 				bit = pfn - mdsp->mds_base;
18207c478bd9Sstevel@tonic-gate 				if ((mdsp->mds_bitmap[bit / NBPBMW] &
18217c478bd9Sstevel@tonic-gate 				    (1 << (bit % NBPBMW))) != 0) {
18227c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, already_done);
18237c478bd9Sstevel@tonic-gate 					continue;
18247c478bd9Sstevel@tonic-gate 				}
18257c478bd9Sstevel@tonic-gate 				if (freemem_left == 0) {
18267c478bd9Sstevel@tonic-gate 					freemem_left += delthr_get_freemem(mhp);
18277c478bd9Sstevel@tonic-gate 					if (freemem_left == 0)
18287c478bd9Sstevel@tonic-gate 						break;
18297c478bd9Sstevel@tonic-gate 				}
18307c478bd9Sstevel@tonic-gate 
18317c478bd9Sstevel@tonic-gate 				/*
18327c478bd9Sstevel@tonic-gate 				 * Release mh_mutex - some of this
18337c478bd9Sstevel@tonic-gate 				 * stuff takes some time (eg PUTPAGE).
18347c478bd9Sstevel@tonic-gate 				 */
18357c478bd9Sstevel@tonic-gate 
18367c478bd9Sstevel@tonic-gate 				mutex_exit(&mhp->mh_mutex);
18377c478bd9Sstevel@tonic-gate 				MDSTAT_INCR(mhp, ncheck);
18387c478bd9Sstevel@tonic-gate 
18397c478bd9Sstevel@tonic-gate 				pp = page_numtopp_nolock(pfn);
18407c478bd9Sstevel@tonic-gate 				if (pp == NULL) {
18417c478bd9Sstevel@tonic-gate 					/*
18427c478bd9Sstevel@tonic-gate 					 * Not covered by a page_t - will
18437c478bd9Sstevel@tonic-gate 					 * be dealt with elsewhere.
18447c478bd9Sstevel@tonic-gate 					 */
18457c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, nopaget);
18467c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
18477c478bd9Sstevel@tonic-gate 					mdsp->mds_bitmap[bit / NBPBMW] |=
18487c478bd9Sstevel@tonic-gate 					    (1 << (bit % NBPBMW));
18497c478bd9Sstevel@tonic-gate 					continue;
18507c478bd9Sstevel@tonic-gate 				}
18517c478bd9Sstevel@tonic-gate 
18527c478bd9Sstevel@tonic-gate 				if (!page_try_reclaim_lock(pp, SE_EXCL,
1853db874c57Selowe 				    SE_EXCL_WANTED | SE_RETIRED)) {
18547c478bd9Sstevel@tonic-gate 					/*
1855db874c57Selowe 					 * Page in use elsewhere.  Skip it.
18567c478bd9Sstevel@tonic-gate 					 */
18577c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, lockfail);
18587c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
18597c478bd9Sstevel@tonic-gate 					continue;
18607c478bd9Sstevel@tonic-gate 				}
18617c478bd9Sstevel@tonic-gate 				/*
18627c478bd9Sstevel@tonic-gate 				 * See if the cage expanded into the delete.
18637c478bd9Sstevel@tonic-gate 				 * This can happen as we have to allow the
18647c478bd9Sstevel@tonic-gate 				 * cage to expand.
18657c478bd9Sstevel@tonic-gate 				 */
18667c478bd9Sstevel@tonic-gate 				if (PP_ISNORELOC(pp)) {
18677c478bd9Sstevel@tonic-gate 					page_unlock(pp);
18687c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
18697c478bd9Sstevel@tonic-gate 					mhp->mh_cancel = KPHYSM_ENONRELOC;
18707c478bd9Sstevel@tonic-gate 					break;
18717c478bd9Sstevel@tonic-gate 				}
1872db874c57Selowe 				if (PP_RETIRED(pp)) {
18737c478bd9Sstevel@tonic-gate 					/*
18747c478bd9Sstevel@tonic-gate 					 * Page has been retired and is
18757c478bd9Sstevel@tonic-gate 					 * not part of the cage so we
18767c478bd9Sstevel@tonic-gate 					 * can now do the accounting for
18777c478bd9Sstevel@tonic-gate 					 * it.
18787c478bd9Sstevel@tonic-gate 					 */
18797c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, retired);
18807c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
18817c478bd9Sstevel@tonic-gate 					mdsp->mds_bitmap[bit / NBPBMW]
18827c478bd9Sstevel@tonic-gate 					    |= (1 << (bit % NBPBMW));
18837c478bd9Sstevel@tonic-gate 					mdsp->mds_bitmap_retired[bit /
18847c478bd9Sstevel@tonic-gate 					    NBPBMW] |=
18857c478bd9Sstevel@tonic-gate 					    (1 << (bit % NBPBMW));
18867c478bd9Sstevel@tonic-gate 					mhp->mh_hold_todo--;
18877c478bd9Sstevel@tonic-gate 					continue;
18887c478bd9Sstevel@tonic-gate 				}
18897c478bd9Sstevel@tonic-gate 				ASSERT(freemem_left != 0);
18907c478bd9Sstevel@tonic-gate 				if (PP_ISFREE(pp)) {
18917c478bd9Sstevel@tonic-gate 					/*
18927c478bd9Sstevel@tonic-gate 					 * Like page_reclaim() only 'freemem'
18937c478bd9Sstevel@tonic-gate 					 * processing is already done.
18947c478bd9Sstevel@tonic-gate 					 */
18957c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, nfree);
18967c478bd9Sstevel@tonic-gate 				free_page_collect:
18977c478bd9Sstevel@tonic-gate 					if (PP_ISAGED(pp)) {
18987c478bd9Sstevel@tonic-gate 						page_list_sub(pp,
18997c478bd9Sstevel@tonic-gate 						    PG_FREE_LIST);
19007c478bd9Sstevel@tonic-gate 					} else {
19017c478bd9Sstevel@tonic-gate 						page_list_sub(pp,
19027c478bd9Sstevel@tonic-gate 						    PG_CACHE_LIST);
19037c478bd9Sstevel@tonic-gate 					}
19047c478bd9Sstevel@tonic-gate 					PP_CLRFREE(pp);
19057c478bd9Sstevel@tonic-gate 					PP_CLRAGED(pp);
19067c478bd9Sstevel@tonic-gate 					collected++;
19077c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
19087c478bd9Sstevel@tonic-gate 					page_delete_collect(pp, mhp);
19097c478bd9Sstevel@tonic-gate 					mdsp->mds_bitmap[bit / NBPBMW] |=
19107c478bd9Sstevel@tonic-gate 					    (1 << (bit % NBPBMW));
19117c478bd9Sstevel@tonic-gate 					freemem_left--;
19127c478bd9Sstevel@tonic-gate 					continue;
19137c478bd9Sstevel@tonic-gate 				}
19147c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_vnode != NULL);
19157c478bd9Sstevel@tonic-gate 				if (first_scan) {
19167c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, first_notfree);
19177c478bd9Sstevel@tonic-gate 					page_unlock(pp);
19187c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
19197c478bd9Sstevel@tonic-gate 					continue;
19207c478bd9Sstevel@tonic-gate 				}
19217c478bd9Sstevel@tonic-gate 				/*
19227c478bd9Sstevel@tonic-gate 				 * Keep stats on pages encountered that
1923db874c57Selowe 				 * are marked for retirement.
19247c478bd9Sstevel@tonic-gate 				 */
1925db874c57Selowe 				if (PP_TOXIC(pp)) {
19267c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, toxic);
1927db874c57Selowe 				} else if (PP_PR_REQ(pp)) {
19287c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, failing);
19297c478bd9Sstevel@tonic-gate 				}
19307c478bd9Sstevel@tonic-gate 				/*
19317c478bd9Sstevel@tonic-gate 				 * In certain cases below, special exceptions
19327c478bd9Sstevel@tonic-gate 				 * are made for pages that are toxic.  This
19337c478bd9Sstevel@tonic-gate 				 * is because the current meaning of toxic
19347c478bd9Sstevel@tonic-gate 				 * is that an uncorrectable error has been
19357c478bd9Sstevel@tonic-gate 				 * previously associated with the page.
19367c478bd9Sstevel@tonic-gate 				 */
19377c478bd9Sstevel@tonic-gate 				if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
1938db874c57Selowe 					if (!PP_TOXIC(pp)) {
19397c478bd9Sstevel@tonic-gate 						/*
19407c478bd9Sstevel@tonic-gate 						 * Must relocate locked in
19417c478bd9Sstevel@tonic-gate 						 * memory pages.
19427c478bd9Sstevel@tonic-gate 						 */
19437c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
19447c478bd9Sstevel@tonic-gate 						start_pgrp = ddi_get_lbolt();
19457c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
19467c478bd9Sstevel@tonic-gate 						/*
19477c478bd9Sstevel@tonic-gate 						 * Lock all constituent pages
19487c478bd9Sstevel@tonic-gate 						 * of a large page to ensure
19497c478bd9Sstevel@tonic-gate 						 * that p_szc won't change.
19507c478bd9Sstevel@tonic-gate 						 */
19517c478bd9Sstevel@tonic-gate 						if (!group_page_trylock(pp,
19527c478bd9Sstevel@tonic-gate 						    SE_EXCL)) {
19537c478bd9Sstevel@tonic-gate 							MDSTAT_INCR(mhp,
19547c478bd9Sstevel@tonic-gate 							    gptllckfail);
19557c478bd9Sstevel@tonic-gate 							page_unlock(pp);
19567c478bd9Sstevel@tonic-gate 							mutex_enter(
19577c478bd9Sstevel@tonic-gate 							    &mhp->mh_mutex);
19587c478bd9Sstevel@tonic-gate 							continue;
19597c478bd9Sstevel@tonic-gate 						}
19607c478bd9Sstevel@tonic-gate 						MDSTAT_INCR(mhp, npplocked);
19617c478bd9Sstevel@tonic-gate 						pp_targ =
19627c478bd9Sstevel@tonic-gate 						    page_get_replacement_page(
19637c478bd9Sstevel@tonic-gate 						    pp, NULL, 0);
19647c478bd9Sstevel@tonic-gate 						if (pp_targ != NULL) {
19657c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
19667c478bd9Sstevel@tonic-gate 							ntick_pgrp =
19677c478bd9Sstevel@tonic-gate 							    (uint64_t)
19687c478bd9Sstevel@tonic-gate 							    ddi_get_lbolt() -
19697c478bd9Sstevel@tonic-gate 							    start_pgrp;
19707c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
19717c478bd9Sstevel@tonic-gate 							MDSTAT_PGRP(mhp,
19727c478bd9Sstevel@tonic-gate 							    ntick_pgrp);
19737c478bd9Sstevel@tonic-gate 							MDSTAT_INCR(mhp,
19747c478bd9Sstevel@tonic-gate 							    nlockreloc);
19757c478bd9Sstevel@tonic-gate 							goto reloc;
19767c478bd9Sstevel@tonic-gate 						}
19777c478bd9Sstevel@tonic-gate 						group_page_unlock(pp);
19787c478bd9Sstevel@tonic-gate 						page_unlock(pp);
19797c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
19807c478bd9Sstevel@tonic-gate 						ntick_pgrp =
19817c478bd9Sstevel@tonic-gate 						    (uint64_t)ddi_get_lbolt() -
19827c478bd9Sstevel@tonic-gate 						    start_pgrp;
19837c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
19847c478bd9Sstevel@tonic-gate 						MDSTAT_PGRP(mhp, ntick_pgrp);
19857c478bd9Sstevel@tonic-gate 						MDSTAT_INCR(mhp, nnorepl);
19867c478bd9Sstevel@tonic-gate 						mutex_enter(&mhp->mh_mutex);
19877c478bd9Sstevel@tonic-gate 						continue;
19887c478bd9Sstevel@tonic-gate 					} else {
19897c478bd9Sstevel@tonic-gate 						/*
19907c478bd9Sstevel@tonic-gate 						 * Cannot do anything about
19917c478bd9Sstevel@tonic-gate 						 * this page because it is
19927c478bd9Sstevel@tonic-gate 						 * toxic.
19937c478bd9Sstevel@tonic-gate 						 */
19947c478bd9Sstevel@tonic-gate 						MDSTAT_INCR(mhp, npplkdtoxic);
19957c478bd9Sstevel@tonic-gate 						page_unlock(pp);
19967c478bd9Sstevel@tonic-gate 						mutex_enter(&mhp->mh_mutex);
19977c478bd9Sstevel@tonic-gate 						continue;
19987c478bd9Sstevel@tonic-gate 					}
19997c478bd9Sstevel@tonic-gate 				}
20007c478bd9Sstevel@tonic-gate 				/*
20017c478bd9Sstevel@tonic-gate 				 * Unload the mappings and check if mod bit
20027c478bd9Sstevel@tonic-gate 				 * is set.
20037c478bd9Sstevel@tonic-gate 				 */
2004ad23a2dbSjohansen 				ASSERT(!PP_ISKAS(pp));
20057c478bd9Sstevel@tonic-gate 				(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
20067c478bd9Sstevel@tonic-gate 				mod = hat_ismod(pp);
20077c478bd9Sstevel@tonic-gate 
20087c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20097c478bd9Sstevel@tonic-gate 				start_pgrp = ddi_get_lbolt();
20107c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
2011db874c57Selowe 				if (mod && !PP_TOXIC(pp)) {
20127c478bd9Sstevel@tonic-gate 					/*
20137c478bd9Sstevel@tonic-gate 					 * Lock all constituent pages
20147c478bd9Sstevel@tonic-gate 					 * of a large page to ensure
20157c478bd9Sstevel@tonic-gate 					 * that p_szc won't change.
20167c478bd9Sstevel@tonic-gate 					 */
20177c478bd9Sstevel@tonic-gate 					if (!group_page_trylock(pp, SE_EXCL)) {
20187c478bd9Sstevel@tonic-gate 						MDSTAT_INCR(mhp, gptlmodfail);
20197c478bd9Sstevel@tonic-gate 						page_unlock(pp);
20207c478bd9Sstevel@tonic-gate 						mutex_enter(&mhp->mh_mutex);
20217c478bd9Sstevel@tonic-gate 						continue;
20227c478bd9Sstevel@tonic-gate 					}
20237c478bd9Sstevel@tonic-gate 					pp_targ = page_get_replacement_page(pp,
20247c478bd9Sstevel@tonic-gate 					    NULL, 0);
20257c478bd9Sstevel@tonic-gate 					if (pp_targ != NULL) {
20267c478bd9Sstevel@tonic-gate 						MDSTAT_INCR(mhp, nmodreloc);
20277c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20287c478bd9Sstevel@tonic-gate 						ntick_pgrp =
20297c478bd9Sstevel@tonic-gate 						    (uint64_t)ddi_get_lbolt() -
20307c478bd9Sstevel@tonic-gate 						    start_pgrp;
20317c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20327c478bd9Sstevel@tonic-gate 						MDSTAT_PGRP(mhp, ntick_pgrp);
20337c478bd9Sstevel@tonic-gate 						goto reloc;
20347c478bd9Sstevel@tonic-gate 					}
20357c478bd9Sstevel@tonic-gate 					group_page_unlock(pp);
20367c478bd9Sstevel@tonic-gate 				}
20377c478bd9Sstevel@tonic-gate 
20387c478bd9Sstevel@tonic-gate 				if (!page_try_demote_pages(pp)) {
20397c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, demotefail);
20407c478bd9Sstevel@tonic-gate 					page_unlock(pp);
20417c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20427c478bd9Sstevel@tonic-gate 					ntick_pgrp = (uint64_t)ddi_get_lbolt() -
20437c478bd9Sstevel@tonic-gate 					    start_pgrp;
20447c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20457c478bd9Sstevel@tonic-gate 					MDSTAT_PGRP(mhp, ntick_pgrp);
20467c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
20477c478bd9Sstevel@tonic-gate 					continue;
20487c478bd9Sstevel@tonic-gate 				}
20497c478bd9Sstevel@tonic-gate 
20507c478bd9Sstevel@tonic-gate 				/*
20517c478bd9Sstevel@tonic-gate 				 * Regular 'page-out'.
20527c478bd9Sstevel@tonic-gate 				 */
20537c478bd9Sstevel@tonic-gate 				if (!mod) {
20547c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, ndestroy);
20557c478bd9Sstevel@tonic-gate 					page_destroy(pp, 1);
20567c478bd9Sstevel@tonic-gate 					/*
20577c478bd9Sstevel@tonic-gate 					 * page_destroy was called with
20587c478bd9Sstevel@tonic-gate 					 * dontfree. As long as p_lckcnt
20597c478bd9Sstevel@tonic-gate 					 * and p_cowcnt are both zero, the
20607c478bd9Sstevel@tonic-gate 					 * only additional action of
20617c478bd9Sstevel@tonic-gate 					 * page_destroy with !dontfree is to
20627c478bd9Sstevel@tonic-gate 					 * call page_free, so we can collect
20637c478bd9Sstevel@tonic-gate 					 * the page here.
20647c478bd9Sstevel@tonic-gate 					 */
20657c478bd9Sstevel@tonic-gate 					collected++;
20667c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20677c478bd9Sstevel@tonic-gate 					ntick_pgrp = (uint64_t)ddi_get_lbolt() -
20687c478bd9Sstevel@tonic-gate 					    start_pgrp;
20697c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20707c478bd9Sstevel@tonic-gate 					MDSTAT_PGRP(mhp, ntick_pgrp);
20717c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
20727c478bd9Sstevel@tonic-gate 					page_delete_collect(pp, mhp);
20737c478bd9Sstevel@tonic-gate 					mdsp->mds_bitmap[bit / NBPBMW] |=
20747c478bd9Sstevel@tonic-gate 					    (1 << (bit % NBPBMW));
20757c478bd9Sstevel@tonic-gate 					continue;
20767c478bd9Sstevel@tonic-gate 				}
20777c478bd9Sstevel@tonic-gate 				/*
20787c478bd9Sstevel@tonic-gate 				 * The page is toxic and the mod bit is
20797c478bd9Sstevel@tonic-gate 				 * set, we cannot do anything here to deal
20807c478bd9Sstevel@tonic-gate 				 * with it.
20817c478bd9Sstevel@tonic-gate 				 */
2082db874c57Selowe 				if (PP_TOXIC(pp)) {
20837c478bd9Sstevel@tonic-gate 					page_unlock(pp);
20847c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20857c478bd9Sstevel@tonic-gate 					ntick_pgrp = (uint64_t)ddi_get_lbolt() -
20867c478bd9Sstevel@tonic-gate 					    start_pgrp;
20877c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20887c478bd9Sstevel@tonic-gate 					MDSTAT_PGRP(mhp, ntick_pgrp);
20897c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, modtoxic);
20907c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
20917c478bd9Sstevel@tonic-gate 					continue;
20927c478bd9Sstevel@tonic-gate 				}
20937c478bd9Sstevel@tonic-gate 				MDSTAT_INCR(mhp, nputpage);
20947c478bd9Sstevel@tonic-gate 				vp = pp->p_vnode;
20957c478bd9Sstevel@tonic-gate 				offset = pp->p_offset;
20967c478bd9Sstevel@tonic-gate 				VN_HOLD(vp);
20977c478bd9Sstevel@tonic-gate 				page_unlock(pp);
20987c478bd9Sstevel@tonic-gate 				(void) VOP_PUTPAGE(vp, offset, PAGESIZE,
2099da6c28aaSamw 				    B_INVAL|B_FORCE, kcred, NULL);
21007c478bd9Sstevel@tonic-gate 				VN_RELE(vp);
21017c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
21027c478bd9Sstevel@tonic-gate 				ntick_pgrp = (uint64_t)ddi_get_lbolt() -
21037c478bd9Sstevel@tonic-gate 				    start_pgrp;
21047c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
21057c478bd9Sstevel@tonic-gate 				MDSTAT_PGRP(mhp, ntick_pgrp);
21067c478bd9Sstevel@tonic-gate 				/*
21077c478bd9Sstevel@tonic-gate 				 * Try to get the page back immediately
21087c478bd9Sstevel@tonic-gate 				 * so that it can be collected.
21097c478bd9Sstevel@tonic-gate 				 */
21107c478bd9Sstevel@tonic-gate 				pp = page_numtopp_nolock(pfn);
21117c478bd9Sstevel@tonic-gate 				if (pp == NULL) {
21127c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, nnoreclaim);
21137c478bd9Sstevel@tonic-gate 					/*
21147c478bd9Sstevel@tonic-gate 					 * This should not happen as this
21157c478bd9Sstevel@tonic-gate 					 * thread is deleting the page.
21167c478bd9Sstevel@tonic-gate 					 * If this code is generalized, this
21177c478bd9Sstevel@tonic-gate 					 * becomes a reality.
21187c478bd9Sstevel@tonic-gate 					 */
21197c478bd9Sstevel@tonic-gate #ifdef DEBUG
21207c478bd9Sstevel@tonic-gate 					cmn_err(CE_WARN,
21217c478bd9Sstevel@tonic-gate 					    "delete_memory_thread(0x%p) "
21227c478bd9Sstevel@tonic-gate 					    "pfn 0x%lx has no page_t",
21237c478bd9Sstevel@tonic-gate 					    (void *)mhp, pfn);
21247c478bd9Sstevel@tonic-gate #endif /* DEBUG */
21257c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
21267c478bd9Sstevel@tonic-gate 					continue;
21277c478bd9Sstevel@tonic-gate 				}
21287c478bd9Sstevel@tonic-gate 				if (page_try_reclaim_lock(pp, SE_EXCL,
2129db874c57Selowe 				    SE_EXCL_WANTED | SE_RETIRED)) {
21307c478bd9Sstevel@tonic-gate 					if (PP_ISFREE(pp)) {
21317c478bd9Sstevel@tonic-gate 						goto free_page_collect;
21327c478bd9Sstevel@tonic-gate 					}
21337c478bd9Sstevel@tonic-gate 					page_unlock(pp);
21347c478bd9Sstevel@tonic-gate 				}
21357c478bd9Sstevel@tonic-gate 				MDSTAT_INCR(mhp, nnoreclaim);
21367c478bd9Sstevel@tonic-gate 				mutex_enter(&mhp->mh_mutex);
21377c478bd9Sstevel@tonic-gate 				continue;
21387c478bd9Sstevel@tonic-gate 
21397c478bd9Sstevel@tonic-gate 			reloc:
21407c478bd9Sstevel@tonic-gate 				/*
21417c478bd9Sstevel@tonic-gate 				 * Got some freemem and a target
21427c478bd9Sstevel@tonic-gate 				 * page, so move the data to avoid
21437c478bd9Sstevel@tonic-gate 				 * I/O and lock problems.
21447c478bd9Sstevel@tonic-gate 				 */
21457c478bd9Sstevel@tonic-gate 				ASSERT(!page_iolock_assert(pp));
21467c478bd9Sstevel@tonic-gate 				MDSTAT_INCR(mhp, nreloc);
21477c478bd9Sstevel@tonic-gate 				/*
21487c478bd9Sstevel@tonic-gate 				 * page_relocate() will return pgcnt: the
21497c478bd9Sstevel@tonic-gate 				 * number of consecutive pages relocated.
21507c478bd9Sstevel@tonic-gate 				 * If it is successful, pp will be a
21517c478bd9Sstevel@tonic-gate 				 * linked list of the page structs that
21527c478bd9Sstevel@tonic-gate 				 * were relocated. If page_relocate() is
21537c478bd9Sstevel@tonic-gate 				 * unsuccessful, pp will be unmodified.
21547c478bd9Sstevel@tonic-gate 				 */
21557c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
21567c478bd9Sstevel@tonic-gate 				start_pgrp = ddi_get_lbolt();
21577c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
21587c478bd9Sstevel@tonic-gate 				result = page_relocate(&pp, &pp_targ, 0, 0,
21597c478bd9Sstevel@tonic-gate 				    &pgcnt, NULL);
21607c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
21617c478bd9Sstevel@tonic-gate 				ntick_pgrp = (uint64_t)ddi_get_lbolt() -
21627c478bd9Sstevel@tonic-gate 				    start_pgrp;
21637c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
21647c478bd9Sstevel@tonic-gate 				MDSTAT_PGRP(mhp, ntick_pgrp);
21657c478bd9Sstevel@tonic-gate 				if (result != 0) {
21667c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, nrelocfail);
21677c478bd9Sstevel@tonic-gate 					/*
21687c478bd9Sstevel@tonic-gate 					 * We did not succeed. We need
21697c478bd9Sstevel@tonic-gate 					 * to give the pp_targ pages back.
21707c478bd9Sstevel@tonic-gate 					 * page_free(pp_targ, 1) without
21717c478bd9Sstevel@tonic-gate 					 * the freemem accounting.
21727c478bd9Sstevel@tonic-gate 					 */
21737c478bd9Sstevel@tonic-gate 					group_page_unlock(pp);
21747c478bd9Sstevel@tonic-gate 					page_free_replacement_page(pp_targ);
21757c478bd9Sstevel@tonic-gate 					page_unlock(pp);
21767c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
21777c478bd9Sstevel@tonic-gate 					continue;
21787c478bd9Sstevel@tonic-gate 				}
21797c478bd9Sstevel@tonic-gate 
21807c478bd9Sstevel@tonic-gate 				/*
21817c478bd9Sstevel@tonic-gate 				 * We will then collect pgcnt pages.
21827c478bd9Sstevel@tonic-gate 				 */
21837c478bd9Sstevel@tonic-gate 				ASSERT(pgcnt > 0);
21847c478bd9Sstevel@tonic-gate 				mutex_enter(&mhp->mh_mutex);
21857c478bd9Sstevel@tonic-gate 				/*
21867c478bd9Sstevel@tonic-gate 				 * We need to make sure freemem_left is
21877c478bd9Sstevel@tonic-gate 				 * large enough.
21887c478bd9Sstevel@tonic-gate 				 */
21897c478bd9Sstevel@tonic-gate 				while ((freemem_left < pgcnt) &&
21907c478bd9Sstevel@tonic-gate 				    (!mhp->mh_cancel)) {
21917c478bd9Sstevel@tonic-gate 					freemem_left +=
21927c478bd9Sstevel@tonic-gate 					    delthr_get_freemem(mhp);
21937c478bd9Sstevel@tonic-gate 				}
21947c478bd9Sstevel@tonic-gate 
21957c478bd9Sstevel@tonic-gate 				/*
21967c478bd9Sstevel@tonic-gate 				 * Do not proceed if mh_cancel is set.
21977c478bd9Sstevel@tonic-gate 				 */
21987c478bd9Sstevel@tonic-gate 				if (mhp->mh_cancel) {
21997c478bd9Sstevel@tonic-gate 					while (pp_targ != NULL) {
22007c478bd9Sstevel@tonic-gate 						/*
22017c478bd9Sstevel@tonic-gate 						 * Unlink and unlock each page.
22027c478bd9Sstevel@tonic-gate 						 */
22037c478bd9Sstevel@tonic-gate 						tpp_targ = pp_targ;
22047c478bd9Sstevel@tonic-gate 						page_sub(&pp_targ, tpp_targ);
22057c478bd9Sstevel@tonic-gate 						page_unlock(tpp_targ);
22067c478bd9Sstevel@tonic-gate 					}
22077c478bd9Sstevel@tonic-gate 					/*
22087c478bd9Sstevel@tonic-gate 					 * We need to give the pp pages back.
22097c478bd9Sstevel@tonic-gate 					 * page_free(pp, 1) without the
22107c478bd9Sstevel@tonic-gate 					 * freemem accounting.
22117c478bd9Sstevel@tonic-gate 					 */
22127c478bd9Sstevel@tonic-gate 					page_free_replacement_page(pp);
22137c478bd9Sstevel@tonic-gate 					break;
22147c478bd9Sstevel@tonic-gate 				}
22157c478bd9Sstevel@tonic-gate 
22167c478bd9Sstevel@tonic-gate 				/* Now remove pgcnt from freemem_left */
22177c478bd9Sstevel@tonic-gate 				freemem_left -= pgcnt;
22187c478bd9Sstevel@tonic-gate 				ASSERT(freemem_left >= 0);
22197c478bd9Sstevel@tonic-gate 				szc = pp->p_szc;
22207c478bd9Sstevel@tonic-gate 				while (pp != NULL) {
22217c478bd9Sstevel@tonic-gate 					/*
22227c478bd9Sstevel@tonic-gate 					 * pp and pp_targ were passed back as
22237c478bd9Sstevel@tonic-gate 					 * a linked list of pages.
22247c478bd9Sstevel@tonic-gate 					 * Unlink and unlock each page.
22257c478bd9Sstevel@tonic-gate 					 */
22267c478bd9Sstevel@tonic-gate 					tpp_targ = pp_targ;
22277c478bd9Sstevel@tonic-gate 					page_sub(&pp_targ, tpp_targ);
22287c478bd9Sstevel@tonic-gate 					page_unlock(tpp_targ);
22297c478bd9Sstevel@tonic-gate 					/*
22307c478bd9Sstevel@tonic-gate 					 * The original page is now free
22317c478bd9Sstevel@tonic-gate 					 * so remove it from the linked
22327c478bd9Sstevel@tonic-gate 					 * list and collect it.
22337c478bd9Sstevel@tonic-gate 					 */
22347c478bd9Sstevel@tonic-gate 					tpp = pp;
22357c478bd9Sstevel@tonic-gate 					page_sub(&pp, tpp);
22367c478bd9Sstevel@tonic-gate 					pfn = page_pptonum(tpp);
22377c478bd9Sstevel@tonic-gate 					collected++;
22387c478bd9Sstevel@tonic-gate 					ASSERT(PAGE_EXCL(tpp));
22397c478bd9Sstevel@tonic-gate 					ASSERT(tpp->p_vnode == NULL);
22407c478bd9Sstevel@tonic-gate 					ASSERT(!hat_page_is_mapped(tpp));
22417c478bd9Sstevel@tonic-gate 					ASSERT(tpp->p_szc == szc);
22427c478bd9Sstevel@tonic-gate 					tpp->p_szc = 0;
22437c478bd9Sstevel@tonic-gate 					page_delete_collect(tpp, mhp);
22447c478bd9Sstevel@tonic-gate 					bit = pfn - mdsp->mds_base;
22457c478bd9Sstevel@tonic-gate 					mdsp->mds_bitmap[bit / NBPBMW] |=
22467c478bd9Sstevel@tonic-gate 					    (1 << (bit % NBPBMW));
22477c478bd9Sstevel@tonic-gate 				}
22487c478bd9Sstevel@tonic-gate 				ASSERT(pp_targ == NULL);
22497c478bd9Sstevel@tonic-gate 			}
22507c478bd9Sstevel@tonic-gate 		}
22517c478bd9Sstevel@tonic-gate 		first_scan = 0;
22527c478bd9Sstevel@tonic-gate 		if ((mhp->mh_cancel == 0) && (mhp->mh_hold_todo != 0) &&
22537c478bd9Sstevel@tonic-gate 		    (collected == 0)) {
22547c478bd9Sstevel@tonic-gate 			/*
22557c478bd9Sstevel@tonic-gate 			 * This code is needed as we cannot wait
22567c478bd9Sstevel@tonic-gate 			 * for a page to be locked OR the delete to
22577c478bd9Sstevel@tonic-gate 			 * be cancelled.  Also, we must delay so
22587c478bd9Sstevel@tonic-gate 			 * that other threads get a chance to run
22597c478bd9Sstevel@tonic-gate 			 * on our cpu, otherwise page locks may be
22607c478bd9Sstevel@tonic-gate 			 * held indefinitely by those threads.
22617c478bd9Sstevel@tonic-gate 			 */
22627c478bd9Sstevel@tonic-gate 			MDSTAT_INCR(mhp, ndelay);
22637c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
2264d3d50737SRafael Vanoni 			(void) cv_reltimedwait(&mhp->mh_cv, &mhp->mh_mutex,
2265d3d50737SRafael Vanoni 			    DEL_BUSY_WAIT_TICKS, TR_CLOCK_TICK);
22667c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex);
22677c478bd9Sstevel@tonic-gate 		}
22687c478bd9Sstevel@tonic-gate 	}
22697c478bd9Sstevel@tonic-gate 	/* stop the dr aio cleanup thread */
22707c478bd9Sstevel@tonic-gate 	mhp->mh_dr_aio_cleanup_cancel = 1;
22717c478bd9Sstevel@tonic-gate 	transit_list_collect(mhp, 0);
22727c478bd9Sstevel@tonic-gate 	if (freemem_left != 0) {
22737c478bd9Sstevel@tonic-gate 		/* Return any surplus. */
22747c478bd9Sstevel@tonic-gate 		page_create_putback(freemem_left);
22757c478bd9Sstevel@tonic-gate 		freemem_left = 0;
22767c478bd9Sstevel@tonic-gate 	}
22777c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
22787c478bd9Sstevel@tonic-gate 	ntick_total = (uint64_t)ddi_get_lbolt() - start_total;
22797c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
22807c478bd9Sstevel@tonic-gate 	MDSTAT_TOTAL(mhp, ntick_total);
22817c478bd9Sstevel@tonic-gate 	MDSTAT_PRINT(mhp);
22827c478bd9Sstevel@tonic-gate 
22837c478bd9Sstevel@tonic-gate 	/*
22847c478bd9Sstevel@tonic-gate 	 * If the memory delete was cancelled, exclusive-wanted bits must
2285db874c57Selowe 	 * be cleared. If there are retired pages being deleted, they need
2286db874c57Selowe 	 * to be unretired.
22877c478bd9Sstevel@tonic-gate 	 */
22887c478bd9Sstevel@tonic-gate 	for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
22897c478bd9Sstevel@tonic-gate 	    mdsp = mdsp->mds_next) {
22907c478bd9Sstevel@tonic-gate 		pfn_t pfn, p_end;
22917c478bd9Sstevel@tonic-gate 
22927c478bd9Sstevel@tonic-gate 		p_end = mdsp->mds_base + mdsp->mds_npgs;
22937c478bd9Sstevel@tonic-gate 		for (pfn = mdsp->mds_base; pfn < p_end; pfn++) {
22947c478bd9Sstevel@tonic-gate 			page_t *pp;
22957c478bd9Sstevel@tonic-gate 			pgcnt_t bit;
22967c478bd9Sstevel@tonic-gate 
22977c478bd9Sstevel@tonic-gate 			bit = pfn - mdsp->mds_base;
22987c478bd9Sstevel@tonic-gate 			if (mhp->mh_cancel) {
22997c478bd9Sstevel@tonic-gate 				pp = page_numtopp_nolock(pfn);
23007c478bd9Sstevel@tonic-gate 				if (pp != NULL) {
23017c478bd9Sstevel@tonic-gate 					if ((mdsp->mds_bitmap[bit / NBPBMW] &
23027c478bd9Sstevel@tonic-gate 					    (1 << (bit % NBPBMW))) == 0) {
23037c478bd9Sstevel@tonic-gate 						page_lock_clr_exclwanted(pp);
23047c478bd9Sstevel@tonic-gate 					}
23057c478bd9Sstevel@tonic-gate 				}
23067c478bd9Sstevel@tonic-gate 			} else {
23077c478bd9Sstevel@tonic-gate 				pp = NULL;
23087c478bd9Sstevel@tonic-gate 			}
23097c478bd9Sstevel@tonic-gate 			if ((mdsp->mds_bitmap_retired[bit / NBPBMW] &
23107c478bd9Sstevel@tonic-gate 			    (1 << (bit % NBPBMW))) != 0) {
23117c478bd9Sstevel@tonic-gate 				/* do we already have pp? */
23127c478bd9Sstevel@tonic-gate 				if (pp == NULL) {
23137c478bd9Sstevel@tonic-gate 					pp = page_numtopp_nolock(pfn);
23147c478bd9Sstevel@tonic-gate 				}
23157c478bd9Sstevel@tonic-gate 				ASSERT(pp != NULL);
2316db874c57Selowe 				ASSERT(PP_RETIRED(pp));
23177c478bd9Sstevel@tonic-gate 				if (mhp->mh_cancel != 0) {
2318db874c57Selowe 					page_unlock(pp);
23197c478bd9Sstevel@tonic-gate 					/*
23207c478bd9Sstevel@tonic-gate 					 * To satisfy ASSERT below in
23217c478bd9Sstevel@tonic-gate 					 * cancel code.
23227c478bd9Sstevel@tonic-gate 					 */
23237c478bd9Sstevel@tonic-gate 					mhp->mh_hold_todo++;
23247c478bd9Sstevel@tonic-gate 				} else {
23258b464eb8Smec 					(void) page_unretire_pp(pp,
23268b464eb8Smec 					    PR_UNR_CLEAN);
23277c478bd9Sstevel@tonic-gate 				}
23287c478bd9Sstevel@tonic-gate 			}
23297c478bd9Sstevel@tonic-gate 		}
23307c478bd9Sstevel@tonic-gate 	}
23317c478bd9Sstevel@tonic-gate 	/*
23327c478bd9Sstevel@tonic-gate 	 * Free retired page bitmap and collected page bitmap
23337c478bd9Sstevel@tonic-gate 	 */
23347c478bd9Sstevel@tonic-gate 	for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
23357c478bd9Sstevel@tonic-gate 	    mdsp = mdsp->mds_next) {
23367c478bd9Sstevel@tonic-gate 		ASSERT(mdsp->mds_bitmap_retired != NULL);
23377c478bd9Sstevel@tonic-gate 		kmem_free(mdsp->mds_bitmap_retired, MDS_BITMAPBYTES(mdsp));
23387c478bd9Sstevel@tonic-gate 		mdsp->mds_bitmap_retired = NULL;	/* Paranoia. */
23397c478bd9Sstevel@tonic-gate 		ASSERT(mdsp->mds_bitmap != NULL);
23407c478bd9Sstevel@tonic-gate 		kmem_free(mdsp->mds_bitmap, MDS_BITMAPBYTES(mdsp));
23417c478bd9Sstevel@tonic-gate 		mdsp->mds_bitmap = NULL;	/* Paranoia. */
23427c478bd9Sstevel@tonic-gate 	}
23437c478bd9Sstevel@tonic-gate 
23447c478bd9Sstevel@tonic-gate 	/* wait for our dr aio cancel thread to exit */
23457c478bd9Sstevel@tonic-gate 	while (!(mhp->mh_aio_cleanup_done)) {
23467c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
23477c478bd9Sstevel@tonic-gate 		delay(drv_usectohz(DR_AIO_CLEANUP_DELAY));
23487c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex);
23497c478bd9Sstevel@tonic-gate 	}
23507c478bd9Sstevel@tonic-gate refused:
23517c478bd9Sstevel@tonic-gate 	if (mhp->mh_cancel != 0) {
23527c478bd9Sstevel@tonic-gate 		page_t *pp;
23537c478bd9Sstevel@tonic-gate 
23547c478bd9Sstevel@tonic-gate 		comp_code = mhp->mh_cancel;
23557c478bd9Sstevel@tonic-gate 		/*
23567c478bd9Sstevel@tonic-gate 		 * Go through list of deleted pages (mh_deleted) freeing
23577c478bd9Sstevel@tonic-gate 		 * them.
23587c478bd9Sstevel@tonic-gate 		 */
23597c478bd9Sstevel@tonic-gate 		while ((pp = mhp->mh_deleted) != NULL) {
23607c478bd9Sstevel@tonic-gate 			mhp->mh_deleted = pp->p_next;
23617c478bd9Sstevel@tonic-gate 			mhp->mh_hold_todo++;
23627c478bd9Sstevel@tonic-gate 			mutex_exit(&mhp->mh_mutex);
23637c478bd9Sstevel@tonic-gate 			/* Restore p_next. */
23647c478bd9Sstevel@tonic-gate 			pp->p_next = pp->p_prev;
23657c478bd9Sstevel@tonic-gate 			if (PP_ISFREE(pp)) {
23667c478bd9Sstevel@tonic-gate 				cmn_err(CE_PANIC,
23677c478bd9Sstevel@tonic-gate 				    "page %p is free",
23687c478bd9Sstevel@tonic-gate 				    (void *)pp);
23697c478bd9Sstevel@tonic-gate 			}
23707c478bd9Sstevel@tonic-gate 			page_free(pp, 1);
23717c478bd9Sstevel@tonic-gate 			mutex_enter(&mhp->mh_mutex);
23727c478bd9Sstevel@tonic-gate 		}
23737c478bd9Sstevel@tonic-gate 		ASSERT(mhp->mh_hold_todo == mhp->mh_vm_pages);
23747c478bd9Sstevel@tonic-gate 
23757c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
23767c478bd9Sstevel@tonic-gate 		put_availrmem(mhp->mh_vm_pages);
23777c478bd9Sstevel@tonic-gate 		mutex_enter(&mhp->mh_mutex);
23787c478bd9Sstevel@tonic-gate 
23797c478bd9Sstevel@tonic-gate 		goto t_exit;
23807c478bd9Sstevel@tonic-gate 	}
23817c478bd9Sstevel@tonic-gate 
23827c478bd9Sstevel@tonic-gate 	/*
23837c478bd9Sstevel@tonic-gate 	 * All the pages are no longer in use and are exclusively locked.
23847c478bd9Sstevel@tonic-gate 	 */
23857c478bd9Sstevel@tonic-gate 
23867c478bd9Sstevel@tonic-gate 	mhp->mh_deleted = NULL;
23877c478bd9Sstevel@tonic-gate 
23887c478bd9Sstevel@tonic-gate 	kphysm_del_cleanup(mhp);
23897c478bd9Sstevel@tonic-gate 
239073347c69Smb158278 	/*
23919853d9e8SJason Beloro 	 * mem_node_del_range needs to be after kphysm_del_cleanup so
239273347c69Smb158278 	 * that the mem_node_config[] will remain intact for the cleanup.
239373347c69Smb158278 	 */
239473347c69Smb158278 	for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
239573347c69Smb158278 	    mdsp = mdsp->mds_next) {
23969853d9e8SJason Beloro 		mem_node_del_range(mdsp->mds_base,
23979853d9e8SJason Beloro 		    mdsp->mds_base + mdsp->mds_npgs - 1);
239873347c69Smb158278 	}
2399af4c679fSSean McEnroe 	/* cleanup the page counters */
2400af4c679fSSean McEnroe 	page_ctrs_cleanup();
240173347c69Smb158278 
24027c478bd9Sstevel@tonic-gate 	comp_code = KPHYSM_OK;
24037c478bd9Sstevel@tonic-gate 
24047c478bd9Sstevel@tonic-gate t_exit:
24057c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
24067c478bd9Sstevel@tonic-gate 	kphysm_setup_post_del(mhp->mh_vm_pages,
24077c478bd9Sstevel@tonic-gate 	    (comp_code == KPHYSM_OK) ? 0 : 1);
24087c478bd9Sstevel@tonic-gate 	mutex_enter(&mhp->mh_mutex);
24097c478bd9Sstevel@tonic-gate 
24107c478bd9Sstevel@tonic-gate early_exit:
24117c478bd9Sstevel@tonic-gate 	/* mhp->mh_mutex exited by CALLB_CPR_EXIT() */
24127c478bd9Sstevel@tonic-gate 	mhp->mh_state = MHND_DONE;
24137c478bd9Sstevel@tonic-gate 	del_complete_funcp = mhp->mh_delete_complete;
24147c478bd9Sstevel@tonic-gate 	del_complete_arg = mhp->mh_delete_complete_arg;
24157c478bd9Sstevel@tonic-gate 	CALLB_CPR_EXIT(&cprinfo);
24167c478bd9Sstevel@tonic-gate 	(*del_complete_funcp)(del_complete_arg, comp_code);
24177c478bd9Sstevel@tonic-gate 	thread_exit();
24187c478bd9Sstevel@tonic-gate 	/*NOTREACHED*/
24197c478bd9Sstevel@tonic-gate }
24207c478bd9Sstevel@tonic-gate 
24217c478bd9Sstevel@tonic-gate /*
24227c478bd9Sstevel@tonic-gate  * Start the delete of the memory from the system.
24237c478bd9Sstevel@tonic-gate  */
24247c478bd9Sstevel@tonic-gate int
kphysm_del_start(memhandle_t handle,void (* complete)(void *,int),void * complete_arg)24257c478bd9Sstevel@tonic-gate kphysm_del_start(
24267c478bd9Sstevel@tonic-gate 	memhandle_t handle,
24277c478bd9Sstevel@tonic-gate 	void (*complete)(void *, int),
24287c478bd9Sstevel@tonic-gate 	void *complete_arg)
24297c478bd9Sstevel@tonic-gate {
24307c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
24317c478bd9Sstevel@tonic-gate 
24327c478bd9Sstevel@tonic-gate 	mhp = kphysm_lookup_mem_handle(handle);
24337c478bd9Sstevel@tonic-gate 	if (mhp == NULL) {
24347c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
24357c478bd9Sstevel@tonic-gate 	}
24367c478bd9Sstevel@tonic-gate 	switch (mhp->mh_state) {
24377c478bd9Sstevel@tonic-gate 	case MHND_FREE:
24387c478bd9Sstevel@tonic-gate 		ASSERT(mhp->mh_state != MHND_FREE);
24397c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
24407c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
24417c478bd9Sstevel@tonic-gate 	case MHND_INIT:
24427c478bd9Sstevel@tonic-gate 		break;
24437c478bd9Sstevel@tonic-gate 	case MHND_STARTING:
24447c478bd9Sstevel@tonic-gate 	case MHND_RUNNING:
24457c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
24467c478bd9Sstevel@tonic-gate 		return (KPHYSM_ESEQUENCE);
24477c478bd9Sstevel@tonic-gate 	case MHND_DONE:
24487c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
24497c478bd9Sstevel@tonic-gate 		return (KPHYSM_ESEQUENCE);
24507c478bd9Sstevel@tonic-gate 	case MHND_RELEASE:
24517c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
24527c478bd9Sstevel@tonic-gate 		return (KPHYSM_ESEQUENCE);
24537c478bd9Sstevel@tonic-gate 	default:
24547c478bd9Sstevel@tonic-gate #ifdef DEBUG
24557c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "kphysm_del_start(0x%p) state corrupt %d",
24567c478bd9Sstevel@tonic-gate 		    (void *)mhp, mhp->mh_state);
24577c478bd9Sstevel@tonic-gate #endif /* DEBUG */
24587c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
24597c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
24607c478bd9Sstevel@tonic-gate 	}
24617c478bd9Sstevel@tonic-gate 
24627c478bd9Sstevel@tonic-gate 	if (mhp->mh_transit.trl_spans == NULL) {
24637c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
24647c478bd9Sstevel@tonic-gate 		return (KPHYSM_ENOWORK);
24657c478bd9Sstevel@tonic-gate 	}
24667c478bd9Sstevel@tonic-gate 
24677c478bd9Sstevel@tonic-gate 	ASSERT(complete != NULL);
24687c478bd9Sstevel@tonic-gate 	mhp->mh_delete_complete = complete;
24697c478bd9Sstevel@tonic-gate 	mhp->mh_delete_complete_arg = complete_arg;
24707c478bd9Sstevel@tonic-gate 	mhp->mh_state = MHND_STARTING;
24717c478bd9Sstevel@tonic-gate 	/*
24727c478bd9Sstevel@tonic-gate 	 * Release the mutex in case thread_create sleeps.
24737c478bd9Sstevel@tonic-gate 	 */
24747c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
24757c478bd9Sstevel@tonic-gate 
24767c478bd9Sstevel@tonic-gate 	/*
24777c478bd9Sstevel@tonic-gate 	 * The "obvious" process for this thread is pageout (proc_pageout)
24787c478bd9Sstevel@tonic-gate 	 * but this gives the thread too much power over freemem
24797c478bd9Sstevel@tonic-gate 	 * which results in freemem starvation.
24807c478bd9Sstevel@tonic-gate 	 */
24817c478bd9Sstevel@tonic-gate 	(void) thread_create(NULL, 0, delete_memory_thread, mhp, 0, &p0,
24827c478bd9Sstevel@tonic-gate 	    TS_RUN, maxclsyspri - 1);
24837c478bd9Sstevel@tonic-gate 
24847c478bd9Sstevel@tonic-gate 	return (KPHYSM_OK);
24857c478bd9Sstevel@tonic-gate }
24867c478bd9Sstevel@tonic-gate 
24877c478bd9Sstevel@tonic-gate static kmutex_t pp_dummy_lock;		/* Protects init. of pp_dummy. */
24887c478bd9Sstevel@tonic-gate static caddr_t pp_dummy;
24897c478bd9Sstevel@tonic-gate static pgcnt_t pp_dummy_npages;
24907c478bd9Sstevel@tonic-gate static pfn_t *pp_dummy_pfn;	/* Array of dummy pfns. */
24917c478bd9Sstevel@tonic-gate 
24927c478bd9Sstevel@tonic-gate static void
memseg_remap_init_pages(page_t * pages,page_t * epages)24937c478bd9Sstevel@tonic-gate memseg_remap_init_pages(page_t *pages, page_t *epages)
24947c478bd9Sstevel@tonic-gate {
24957c478bd9Sstevel@tonic-gate 	page_t *pp;
24967c478bd9Sstevel@tonic-gate 
24977c478bd9Sstevel@tonic-gate 	for (pp = pages; pp < epages; pp++) {
24987c478bd9Sstevel@tonic-gate 		pp->p_pagenum = PFN_INVALID;	/* XXXX */
24997c478bd9Sstevel@tonic-gate 		pp->p_offset = (u_offset_t)-1;
25007c478bd9Sstevel@tonic-gate 		page_iolock_init(pp);
25017c478bd9Sstevel@tonic-gate 		while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM))
25027c478bd9Sstevel@tonic-gate 			continue;
25037c478bd9Sstevel@tonic-gate 		page_lock_delete(pp);
25047c478bd9Sstevel@tonic-gate 	}
25057c478bd9Sstevel@tonic-gate }
25067c478bd9Sstevel@tonic-gate 
25077c478bd9Sstevel@tonic-gate void
memseg_remap_init()25087c478bd9Sstevel@tonic-gate memseg_remap_init()
25097c478bd9Sstevel@tonic-gate {
25107c478bd9Sstevel@tonic-gate 	mutex_enter(&pp_dummy_lock);
25117c478bd9Sstevel@tonic-gate 	if (pp_dummy == NULL) {
25127c478bd9Sstevel@tonic-gate 		uint_t dpages;
25137c478bd9Sstevel@tonic-gate 		int i;
25147c478bd9Sstevel@tonic-gate 
25157c478bd9Sstevel@tonic-gate 		/*
25167c478bd9Sstevel@tonic-gate 		 * dpages starts off as the size of the structure and
25177c478bd9Sstevel@tonic-gate 		 * ends up as the minimum number of pages that will
25187c478bd9Sstevel@tonic-gate 		 * hold a whole number of page_t structures.
25197c478bd9Sstevel@tonic-gate 		 */
25207c478bd9Sstevel@tonic-gate 		dpages = sizeof (page_t);
25217c478bd9Sstevel@tonic-gate 		ASSERT(dpages != 0);
25227c478bd9Sstevel@tonic-gate 		ASSERT(dpages <= MMU_PAGESIZE);
25237c478bd9Sstevel@tonic-gate 
25247c478bd9Sstevel@tonic-gate 		while ((dpages & 1) == 0)
25257c478bd9Sstevel@tonic-gate 			dpages >>= 1;
25267c478bd9Sstevel@tonic-gate 
25277c478bd9Sstevel@tonic-gate 		pp_dummy_npages = dpages;
25287c478bd9Sstevel@tonic-gate 		/*
25297c478bd9Sstevel@tonic-gate 		 * Allocate pp_dummy pages directly from static_arena,
25307c478bd9Sstevel@tonic-gate 		 * since these are whole page allocations and are
25317c478bd9Sstevel@tonic-gate 		 * referenced by physical address.  This also has the
25327c478bd9Sstevel@tonic-gate 		 * nice fringe benefit of hiding the memory from
25337c478bd9Sstevel@tonic-gate 		 * ::findleaks since it doesn't deal well with allocated
25347c478bd9Sstevel@tonic-gate 		 * kernel heap memory that doesn't have any mappings.
25357c478bd9Sstevel@tonic-gate 		 */
25367c478bd9Sstevel@tonic-gate 		pp_dummy = vmem_xalloc(static_arena, ptob(pp_dummy_npages),
25377c478bd9Sstevel@tonic-gate 		    PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP);
25387c478bd9Sstevel@tonic-gate 		bzero(pp_dummy, ptob(pp_dummy_npages));
25397c478bd9Sstevel@tonic-gate 		ASSERT(((uintptr_t)pp_dummy & MMU_PAGEOFFSET) == 0);
25407c478bd9Sstevel@tonic-gate 		pp_dummy_pfn = kmem_alloc(sizeof (*pp_dummy_pfn) *
25417c478bd9Sstevel@tonic-gate 		    pp_dummy_npages, KM_SLEEP);
25427c478bd9Sstevel@tonic-gate 		for (i = 0; i < pp_dummy_npages; i++) {
25437c478bd9Sstevel@tonic-gate 			pp_dummy_pfn[i] = hat_getpfnum(kas.a_hat,
25447c478bd9Sstevel@tonic-gate 			    &pp_dummy[MMU_PAGESIZE * i]);
25457c478bd9Sstevel@tonic-gate 			ASSERT(pp_dummy_pfn[i] != PFN_INVALID);
25467c478bd9Sstevel@tonic-gate 		}
25477c478bd9Sstevel@tonic-gate 		/*
25487c478bd9Sstevel@tonic-gate 		 * Initialize the page_t's to a known 'deleted' state
25497c478bd9Sstevel@tonic-gate 		 * that matches the state of deleted pages.
25507c478bd9Sstevel@tonic-gate 		 */
25517c478bd9Sstevel@tonic-gate 		memseg_remap_init_pages((page_t *)pp_dummy,
255273347c69Smb158278 		    (page_t *)(pp_dummy + ptob(pp_dummy_npages)));
25537c478bd9Sstevel@tonic-gate 		/* Remove kmem mappings for the pages for safety. */
25547c478bd9Sstevel@tonic-gate 		hat_unload(kas.a_hat, pp_dummy, ptob(pp_dummy_npages),
25557c478bd9Sstevel@tonic-gate 		    HAT_UNLOAD_UNLOCK);
25567c478bd9Sstevel@tonic-gate 		/* Leave pp_dummy pointer set as flag that init is done. */
25577c478bd9Sstevel@tonic-gate 	}
25587c478bd9Sstevel@tonic-gate 	mutex_exit(&pp_dummy_lock);
25597c478bd9Sstevel@tonic-gate }
25607c478bd9Sstevel@tonic-gate 
25619853d9e8SJason Beloro /*
25629853d9e8SJason Beloro  * Remap a page-aglined range of page_t's to dummy pages.
25639853d9e8SJason Beloro  */
25649853d9e8SJason Beloro void
remap_to_dummy(caddr_t va,pgcnt_t metapgs)25659853d9e8SJason Beloro remap_to_dummy(caddr_t va, pgcnt_t metapgs)
25667c478bd9Sstevel@tonic-gate {
25679853d9e8SJason Beloro 	int phase;
25689853d9e8SJason Beloro 
2569a3114836SGerry Liu 	ASSERT(IS_P2ALIGNED((uint64_t)(uintptr_t)va, PAGESIZE));
25709853d9e8SJason Beloro 
25719853d9e8SJason Beloro 	/*
25729853d9e8SJason Beloro 	 * We may start remapping at a non-zero page offset
25739853d9e8SJason Beloro 	 * within the dummy pages since the low/high ends
25749853d9e8SJason Beloro 	 * of the outgoing pp's could be shared by other
25759853d9e8SJason Beloro 	 * memsegs (see memseg_remap_meta).
25769853d9e8SJason Beloro 	 */
2577a3114836SGerry Liu 	phase = btop((uint64_t)(uintptr_t)va) % pp_dummy_npages;
2578a3114836SGerry Liu 	/*CONSTCOND*/
25799853d9e8SJason Beloro 	ASSERT(PAGESIZE % sizeof (page_t) || phase == 0);
25807c478bd9Sstevel@tonic-gate 
25817c478bd9Sstevel@tonic-gate 	while (metapgs != 0) {
25827c478bd9Sstevel@tonic-gate 		pgcnt_t n;
25839853d9e8SJason Beloro 		int i, j;
25847c478bd9Sstevel@tonic-gate 
25857c478bd9Sstevel@tonic-gate 		n = pp_dummy_npages;
25867c478bd9Sstevel@tonic-gate 		if (n > metapgs)
25877c478bd9Sstevel@tonic-gate 			n = metapgs;
25887c478bd9Sstevel@tonic-gate 		for (i = 0; i < n; i++) {
25899853d9e8SJason Beloro 			j = (i + phase) % pp_dummy_npages;
25909853d9e8SJason Beloro 			hat_devload(kas.a_hat, va, ptob(1), pp_dummy_pfn[j],
25917c478bd9Sstevel@tonic-gate 			    PROT_READ,
25927c478bd9Sstevel@tonic-gate 			    HAT_LOAD | HAT_LOAD_NOCONSIST |
25937c478bd9Sstevel@tonic-gate 			    HAT_LOAD_REMAP);
25949853d9e8SJason Beloro 			va += ptob(1);
25957c478bd9Sstevel@tonic-gate 		}
25967c478bd9Sstevel@tonic-gate 		metapgs -= n;
25977c478bd9Sstevel@tonic-gate 	}
25987c478bd9Sstevel@tonic-gate }
25997c478bd9Sstevel@tonic-gate 
26009853d9e8SJason Beloro static void
memseg_remap_to_dummy(struct memseg * seg)26019853d9e8SJason Beloro memseg_remap_to_dummy(struct memseg *seg)
26029853d9e8SJason Beloro {
26039853d9e8SJason Beloro 	caddr_t pp;
26049853d9e8SJason Beloro 	pgcnt_t metapgs;
26059853d9e8SJason Beloro 
26069853d9e8SJason Beloro 	ASSERT(memseg_is_dynamic(seg));
26079853d9e8SJason Beloro 	ASSERT(pp_dummy != NULL);
26089853d9e8SJason Beloro 
26099853d9e8SJason Beloro 
26109853d9e8SJason Beloro 	if (!memseg_includes_meta(seg)) {
26119853d9e8SJason Beloro 		memseg_remap_meta(seg);
26129853d9e8SJason Beloro 		return;
26139853d9e8SJason Beloro 	}
26149853d9e8SJason Beloro 
26159853d9e8SJason Beloro 	pp = (caddr_t)seg->pages;
26169853d9e8SJason Beloro 	metapgs = seg->pages_base - memseg_get_start(seg);
26179853d9e8SJason Beloro 	ASSERT(metapgs != 0);
26189853d9e8SJason Beloro 
26199853d9e8SJason Beloro 	seg->pages_end = seg->pages_base;
26209853d9e8SJason Beloro 
26219853d9e8SJason Beloro 	remap_to_dummy(pp, metapgs);
26229853d9e8SJason Beloro }
26239853d9e8SJason Beloro 
26247c478bd9Sstevel@tonic-gate /*
26257c478bd9Sstevel@tonic-gate  * Transition all the deleted pages to the deleted state so that
26267c478bd9Sstevel@tonic-gate  * page_lock will not wait. The page_lock_delete call will
26277c478bd9Sstevel@tonic-gate  * also wake up any waiters.
26287c478bd9Sstevel@tonic-gate  */
26297c478bd9Sstevel@tonic-gate static void
memseg_lock_delete_all(struct memseg * seg)26307c478bd9Sstevel@tonic-gate memseg_lock_delete_all(struct memseg *seg)
26317c478bd9Sstevel@tonic-gate {
26327c478bd9Sstevel@tonic-gate 	page_t *pp;
26337c478bd9Sstevel@tonic-gate 
26347c478bd9Sstevel@tonic-gate 	for (pp = seg->pages; pp < seg->epages; pp++) {
26357c478bd9Sstevel@tonic-gate 		pp->p_pagenum = PFN_INVALID;	/* XXXX */
26367c478bd9Sstevel@tonic-gate 		page_lock_delete(pp);
26377c478bd9Sstevel@tonic-gate 	}
26387c478bd9Sstevel@tonic-gate }
26397c478bd9Sstevel@tonic-gate 
26407c478bd9Sstevel@tonic-gate static void
kphysm_del_cleanup(struct mem_handle * mhp)26417c478bd9Sstevel@tonic-gate kphysm_del_cleanup(struct mem_handle *mhp)
26427c478bd9Sstevel@tonic-gate {
26437c478bd9Sstevel@tonic-gate 	struct memdelspan	*mdsp;
26447c478bd9Sstevel@tonic-gate 	struct memseg		*seg;
26457c478bd9Sstevel@tonic-gate 	struct memseg   	**segpp;
26467c478bd9Sstevel@tonic-gate 	struct memseg		*seglist;
26477c478bd9Sstevel@tonic-gate 	pfn_t			p_end;
26487c478bd9Sstevel@tonic-gate 	uint64_t		avmem;
26497c478bd9Sstevel@tonic-gate 	pgcnt_t			avpgs;
26507c478bd9Sstevel@tonic-gate 	pgcnt_t			npgs;
26517c478bd9Sstevel@tonic-gate 
26527c478bd9Sstevel@tonic-gate 	avpgs = mhp->mh_vm_pages;
26537c478bd9Sstevel@tonic-gate 
26547c478bd9Sstevel@tonic-gate 	memsegs_lock(1);
26557c478bd9Sstevel@tonic-gate 
26567c478bd9Sstevel@tonic-gate 	/*
26577c478bd9Sstevel@tonic-gate 	 * remove from main segment list.
26587c478bd9Sstevel@tonic-gate 	 */
26597c478bd9Sstevel@tonic-gate 	npgs = 0;
26607c478bd9Sstevel@tonic-gate 	seglist = NULL;
26617c478bd9Sstevel@tonic-gate 	for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
26627c478bd9Sstevel@tonic-gate 	    mdsp = mdsp->mds_next) {
26637c478bd9Sstevel@tonic-gate 		p_end = mdsp->mds_base + mdsp->mds_npgs;
26647c478bd9Sstevel@tonic-gate 		for (segpp = &memsegs; (seg = *segpp) != NULL; ) {
26657c478bd9Sstevel@tonic-gate 			if (seg->pages_base >= p_end ||
26667c478bd9Sstevel@tonic-gate 			    seg->pages_end <= mdsp->mds_base) {
26677c478bd9Sstevel@tonic-gate 				/* Span and memseg don't overlap. */
26687c478bd9Sstevel@tonic-gate 				segpp = &((*segpp)->next);
26697c478bd9Sstevel@tonic-gate 				continue;
26707c478bd9Sstevel@tonic-gate 			}
26717c478bd9Sstevel@tonic-gate 			ASSERT(seg->pages_base >= mdsp->mds_base);
26727c478bd9Sstevel@tonic-gate 			ASSERT(seg->pages_end <= p_end);
26737c478bd9Sstevel@tonic-gate 
2674e21bae1bSkchow 			PLCNT_MODIFY_MAX(seg->pages_base,
2675e21bae1bSkchow 			    seg->pages_base - seg->pages_end);
2676e21bae1bSkchow 
26777c478bd9Sstevel@tonic-gate 			/* Hide the memseg from future scans. */
26787c478bd9Sstevel@tonic-gate 			hat_kpm_delmem_mseg_update(seg, segpp);
26797c478bd9Sstevel@tonic-gate 			*segpp = seg->next;
26807c478bd9Sstevel@tonic-gate 			membar_producer();	/* TODO: Needed? */
26817c478bd9Sstevel@tonic-gate 			npgs += MSEG_NPAGES(seg);
26827c478bd9Sstevel@tonic-gate 
26837c478bd9Sstevel@tonic-gate 			/*
26847c478bd9Sstevel@tonic-gate 			 * Leave the deleted segment's next pointer intact
26857c478bd9Sstevel@tonic-gate 			 * in case a memsegs scanning loop is walking this
26867c478bd9Sstevel@tonic-gate 			 * segment concurrently.
26877c478bd9Sstevel@tonic-gate 			 */
26887c478bd9Sstevel@tonic-gate 			seg->lnext = seglist;
26897c478bd9Sstevel@tonic-gate 			seglist = seg;
26907c478bd9Sstevel@tonic-gate 		}
26917c478bd9Sstevel@tonic-gate 	}
26927c478bd9Sstevel@tonic-gate 
26937c478bd9Sstevel@tonic-gate 	build_pfn_hash();
26947c478bd9Sstevel@tonic-gate 
26957c478bd9Sstevel@tonic-gate 	ASSERT(npgs < total_pages);
26967c478bd9Sstevel@tonic-gate 	total_pages -= npgs;
26977c478bd9Sstevel@tonic-gate 
26987c478bd9Sstevel@tonic-gate 	/*
26997c478bd9Sstevel@tonic-gate 	 * Recalculate the paging parameters now total_pages has changed.
27007c478bd9Sstevel@tonic-gate 	 * This will also cause the clock hands to be reset before next use.
27017c478bd9Sstevel@tonic-gate 	 */
27027c478bd9Sstevel@tonic-gate 	setupclock(1);
27037c478bd9Sstevel@tonic-gate 
27047c478bd9Sstevel@tonic-gate 	memsegs_unlock(1);
27057c478bd9Sstevel@tonic-gate 
27067c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
27077c478bd9Sstevel@tonic-gate 
27087c478bd9Sstevel@tonic-gate 	while ((seg = seglist) != NULL) {
27097c478bd9Sstevel@tonic-gate 		pfn_t mseg_start;
27107c478bd9Sstevel@tonic-gate 		pfn_t mseg_base, mseg_end;
27117c478bd9Sstevel@tonic-gate 		pgcnt_t mseg_npgs;
27127c478bd9Sstevel@tonic-gate 		int mlret;
27137c478bd9Sstevel@tonic-gate 
27147c478bd9Sstevel@tonic-gate 		seglist = seg->lnext;
27157c478bd9Sstevel@tonic-gate 
27167c478bd9Sstevel@tonic-gate 		/*
27177c478bd9Sstevel@tonic-gate 		 * Put the page_t's into the deleted state to stop
27187c478bd9Sstevel@tonic-gate 		 * cv_wait()s on the pages. When we remap, the dummy
27197c478bd9Sstevel@tonic-gate 		 * page_t's will be in the same state.
27207c478bd9Sstevel@tonic-gate 		 */
27217c478bd9Sstevel@tonic-gate 		memseg_lock_delete_all(seg);
27227c478bd9Sstevel@tonic-gate 		/*
27237c478bd9Sstevel@tonic-gate 		 * Collect up information based on pages_base and pages_end
27247c478bd9Sstevel@tonic-gate 		 * early so that we can flag early that the memseg has been
27257c478bd9Sstevel@tonic-gate 		 * deleted by setting pages_end == pages_base.
27267c478bd9Sstevel@tonic-gate 		 */
27277c478bd9Sstevel@tonic-gate 		mseg_base = seg->pages_base;
27287c478bd9Sstevel@tonic-gate 		mseg_end = seg->pages_end;
27297c478bd9Sstevel@tonic-gate 		mseg_npgs = MSEG_NPAGES(seg);
27309853d9e8SJason Beloro 		mseg_start = memseg_get_start(seg);
27317c478bd9Sstevel@tonic-gate 
27329853d9e8SJason Beloro 		if (memseg_is_dynamic(seg)) {
27337c478bd9Sstevel@tonic-gate 			/* Remap the meta data to our special dummy area. */
27349853d9e8SJason Beloro 			memseg_remap_to_dummy(seg);
27357c478bd9Sstevel@tonic-gate 
27367c478bd9Sstevel@tonic-gate 			mutex_enter(&memseg_lists_lock);
27377c478bd9Sstevel@tonic-gate 			seg->lnext = memseg_va_avail;
27387c478bd9Sstevel@tonic-gate 			memseg_va_avail = seg;
27397c478bd9Sstevel@tonic-gate 			mutex_exit(&memseg_lists_lock);
27407c478bd9Sstevel@tonic-gate 		} else {
27417c478bd9Sstevel@tonic-gate 			/*
27427c478bd9Sstevel@tonic-gate 			 * For memory whose page_ts were allocated
27437c478bd9Sstevel@tonic-gate 			 * at boot, we need to find a new use for
27447c478bd9Sstevel@tonic-gate 			 * the page_t memory.
27457c478bd9Sstevel@tonic-gate 			 * For the moment, just leak it.
27467c478bd9Sstevel@tonic-gate 			 * (It is held in the memseg_delete_junk list.)
27477c478bd9Sstevel@tonic-gate 			 */
27489853d9e8SJason Beloro 			seg->pages_end = seg->pages_base;
27497c478bd9Sstevel@tonic-gate 
27507c478bd9Sstevel@tonic-gate 			mutex_enter(&memseg_lists_lock);
27517c478bd9Sstevel@tonic-gate 			seg->lnext = memseg_delete_junk;
27527c478bd9Sstevel@tonic-gate 			memseg_delete_junk = seg;
27537c478bd9Sstevel@tonic-gate 			mutex_exit(&memseg_lists_lock);
27547c478bd9Sstevel@tonic-gate 		}
27557c478bd9Sstevel@tonic-gate 
27567c478bd9Sstevel@tonic-gate 		/* Must not use seg now as it could be re-used. */
27577c478bd9Sstevel@tonic-gate 
27587c478bd9Sstevel@tonic-gate 		memlist_write_lock();
27597c478bd9Sstevel@tonic-gate 
27607c478bd9Sstevel@tonic-gate 		mlret = memlist_delete_span(
27617c478bd9Sstevel@tonic-gate 		    (uint64_t)(mseg_base) << PAGESHIFT,
27627c478bd9Sstevel@tonic-gate 		    (uint64_t)(mseg_npgs) << PAGESHIFT,
27637c478bd9Sstevel@tonic-gate 		    &phys_avail);
27647c478bd9Sstevel@tonic-gate 		ASSERT(mlret == MEML_SPANOP_OK);
27657c478bd9Sstevel@tonic-gate 
27667c478bd9Sstevel@tonic-gate 		mlret = memlist_delete_span(
27677c478bd9Sstevel@tonic-gate 		    (uint64_t)(mseg_start) << PAGESHIFT,
27687c478bd9Sstevel@tonic-gate 		    (uint64_t)(mseg_end - mseg_start) <<
27697c478bd9Sstevel@tonic-gate 		    PAGESHIFT,
27707c478bd9Sstevel@tonic-gate 		    &phys_install);
27717c478bd9Sstevel@tonic-gate 		ASSERT(mlret == MEML_SPANOP_OK);
27727c478bd9Sstevel@tonic-gate 		phys_install_has_changed();
27737c478bd9Sstevel@tonic-gate 
27747c478bd9Sstevel@tonic-gate 		memlist_write_unlock();
27757c478bd9Sstevel@tonic-gate 	}
27767c478bd9Sstevel@tonic-gate 
27777c478bd9Sstevel@tonic-gate 	memlist_read_lock();
27787c478bd9Sstevel@tonic-gate 	installed_top_size(phys_install, &physmax, &physinstalled);
27797c478bd9Sstevel@tonic-gate 	memlist_read_unlock();
27807c478bd9Sstevel@tonic-gate 
27817c478bd9Sstevel@tonic-gate 	mutex_enter(&freemem_lock);
27827c478bd9Sstevel@tonic-gate 	maxmem -= avpgs;
27837c478bd9Sstevel@tonic-gate 	physmem -= avpgs;
27847c478bd9Sstevel@tonic-gate 	/* availrmem is adjusted during the delete. */
27857c478bd9Sstevel@tonic-gate 	availrmem_initial -= avpgs;
27867c478bd9Sstevel@tonic-gate 
27877c478bd9Sstevel@tonic-gate 	mutex_exit(&freemem_lock);
27887c478bd9Sstevel@tonic-gate 
27897c478bd9Sstevel@tonic-gate 	dump_resize();
27907c478bd9Sstevel@tonic-gate 
27917c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "?kphysm_delete: mem = %ldK "
27927c478bd9Sstevel@tonic-gate 	    "(0x%" PRIx64 ")\n",
27937c478bd9Sstevel@tonic-gate 	    physinstalled << (PAGESHIFT - 10),
27947c478bd9Sstevel@tonic-gate 	    (uint64_t)physinstalled << PAGESHIFT);
27957c478bd9Sstevel@tonic-gate 
27967c478bd9Sstevel@tonic-gate 	avmem = (uint64_t)freemem << PAGESHIFT;
27977c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "?kphysm_delete: "
27987c478bd9Sstevel@tonic-gate 	    "avail mem = %" PRId64 "\n", avmem);
27997c478bd9Sstevel@tonic-gate 
28007c478bd9Sstevel@tonic-gate 	/*
28017c478bd9Sstevel@tonic-gate 	 * Update lgroup generation number on single lgroup systems
28027c478bd9Sstevel@tonic-gate 	 */
28037c478bd9Sstevel@tonic-gate 	if (nlgrps == 1)
28047c478bd9Sstevel@tonic-gate 		lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0);
28057c478bd9Sstevel@tonic-gate 
28067c478bd9Sstevel@tonic-gate 	/* Successfully deleted system memory */
28077c478bd9Sstevel@tonic-gate 	mutex_enter(&mhp->mh_mutex);
28087c478bd9Sstevel@tonic-gate }
28097c478bd9Sstevel@tonic-gate 
28107c478bd9Sstevel@tonic-gate static uint_t mdel_nullvp_waiter;
28117c478bd9Sstevel@tonic-gate 
28127c478bd9Sstevel@tonic-gate static void
page_delete_collect(page_t * pp,struct mem_handle * mhp)28137c478bd9Sstevel@tonic-gate page_delete_collect(
28147c478bd9Sstevel@tonic-gate 	page_t *pp,
28157c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp)
28167c478bd9Sstevel@tonic-gate {
28177c478bd9Sstevel@tonic-gate 	if (pp->p_vnode) {
28187c478bd9Sstevel@tonic-gate 		page_hashout(pp, (kmutex_t *)NULL);
28197c478bd9Sstevel@tonic-gate 		/* do not do PP_SETAGED(pp); */
28207c478bd9Sstevel@tonic-gate 	} else {
28217c478bd9Sstevel@tonic-gate 		kmutex_t *sep;
28227c478bd9Sstevel@tonic-gate 
28237c478bd9Sstevel@tonic-gate 		sep = page_se_mutex(pp);
28247c478bd9Sstevel@tonic-gate 		mutex_enter(sep);
28257c478bd9Sstevel@tonic-gate 		if (CV_HAS_WAITERS(&pp->p_cv)) {
28267c478bd9Sstevel@tonic-gate 			mdel_nullvp_waiter++;
28277c478bd9Sstevel@tonic-gate 			cv_broadcast(&pp->p_cv);
28287c478bd9Sstevel@tonic-gate 		}
28297c478bd9Sstevel@tonic-gate 		mutex_exit(sep);
28307c478bd9Sstevel@tonic-gate 	}
28317c478bd9Sstevel@tonic-gate 	ASSERT(pp->p_next == pp->p_prev);
28327c478bd9Sstevel@tonic-gate 	ASSERT(pp->p_next == NULL || pp->p_next == pp);
28337c478bd9Sstevel@tonic-gate 	pp->p_next = mhp->mh_deleted;
28347c478bd9Sstevel@tonic-gate 	mhp->mh_deleted = pp;
28357c478bd9Sstevel@tonic-gate 	ASSERT(mhp->mh_hold_todo != 0);
28367c478bd9Sstevel@tonic-gate 	mhp->mh_hold_todo--;
28377c478bd9Sstevel@tonic-gate }
28387c478bd9Sstevel@tonic-gate 
28397c478bd9Sstevel@tonic-gate static void
transit_list_collect(struct mem_handle * mhp,int v)28407c478bd9Sstevel@tonic-gate transit_list_collect(struct mem_handle *mhp, int v)
28417c478bd9Sstevel@tonic-gate {
28427c478bd9Sstevel@tonic-gate 	struct transit_list_head *trh;
28437c478bd9Sstevel@tonic-gate 
28447c478bd9Sstevel@tonic-gate 	trh = &transit_list_head;
28457c478bd9Sstevel@tonic-gate 	mutex_enter(&trh->trh_lock);
28467c478bd9Sstevel@tonic-gate 	mhp->mh_transit.trl_collect = v;
28477c478bd9Sstevel@tonic-gate 	mutex_exit(&trh->trh_lock);
28487c478bd9Sstevel@tonic-gate }
28497c478bd9Sstevel@tonic-gate 
28507c478bd9Sstevel@tonic-gate static void
transit_list_insert(struct transit_list * tlp)28517c478bd9Sstevel@tonic-gate transit_list_insert(struct transit_list *tlp)
28527c478bd9Sstevel@tonic-gate {
28537c478bd9Sstevel@tonic-gate 	struct transit_list_head *trh;
28547c478bd9Sstevel@tonic-gate 
28557c478bd9Sstevel@tonic-gate 	trh = &transit_list_head;
28567c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&trh->trh_lock));
28577c478bd9Sstevel@tonic-gate 	tlp->trl_next = trh->trh_head;
28587c478bd9Sstevel@tonic-gate 	trh->trh_head = tlp;
28597c478bd9Sstevel@tonic-gate }
28607c478bd9Sstevel@tonic-gate 
28617c478bd9Sstevel@tonic-gate static void
transit_list_remove(struct transit_list * tlp)28627c478bd9Sstevel@tonic-gate transit_list_remove(struct transit_list *tlp)
28637c478bd9Sstevel@tonic-gate {
28647c478bd9Sstevel@tonic-gate 	struct transit_list_head *trh;
28657c478bd9Sstevel@tonic-gate 	struct transit_list **tlpp;
28667c478bd9Sstevel@tonic-gate 
28677c478bd9Sstevel@tonic-gate 	trh = &transit_list_head;
28687c478bd9Sstevel@tonic-gate 	tlpp = &trh->trh_head;
28697c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&trh->trh_lock));
28707c478bd9Sstevel@tonic-gate 	while (*tlpp != NULL && *tlpp != tlp)
28717c478bd9Sstevel@tonic-gate 		tlpp = &(*tlpp)->trl_next;
28727c478bd9Sstevel@tonic-gate 	ASSERT(*tlpp != NULL);
28737c478bd9Sstevel@tonic-gate 	if (*tlpp == tlp)
28747c478bd9Sstevel@tonic-gate 		*tlpp = tlp->trl_next;
28757c478bd9Sstevel@tonic-gate 	tlp->trl_next = NULL;
28767c478bd9Sstevel@tonic-gate }
28777c478bd9Sstevel@tonic-gate 
28787c478bd9Sstevel@tonic-gate static struct transit_list *
pfnum_to_transit_list(struct transit_list_head * trh,pfn_t pfnum)28797c478bd9Sstevel@tonic-gate pfnum_to_transit_list(struct transit_list_head *trh, pfn_t pfnum)
28807c478bd9Sstevel@tonic-gate {
28817c478bd9Sstevel@tonic-gate 	struct transit_list *tlp;
28827c478bd9Sstevel@tonic-gate 
28837c478bd9Sstevel@tonic-gate 	for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) {
28847c478bd9Sstevel@tonic-gate 		struct memdelspan *mdsp;
28857c478bd9Sstevel@tonic-gate 
28867c478bd9Sstevel@tonic-gate 		for (mdsp = tlp->trl_spans; mdsp != NULL;
28877c478bd9Sstevel@tonic-gate 		    mdsp = mdsp->mds_next) {
28887c478bd9Sstevel@tonic-gate 			if (pfnum >= mdsp->mds_base &&
28897c478bd9Sstevel@tonic-gate 			    pfnum < (mdsp->mds_base + mdsp->mds_npgs)) {
28907c478bd9Sstevel@tonic-gate 				return (tlp);
28917c478bd9Sstevel@tonic-gate 			}
28927c478bd9Sstevel@tonic-gate 		}
28937c478bd9Sstevel@tonic-gate 	}
28947c478bd9Sstevel@tonic-gate 	return (NULL);
28957c478bd9Sstevel@tonic-gate }
28967c478bd9Sstevel@tonic-gate 
28977c478bd9Sstevel@tonic-gate int
pfn_is_being_deleted(pfn_t pfnum)28987c478bd9Sstevel@tonic-gate pfn_is_being_deleted(pfn_t pfnum)
28997c478bd9Sstevel@tonic-gate {
29007c478bd9Sstevel@tonic-gate 	struct transit_list_head *trh;
29017c478bd9Sstevel@tonic-gate 	struct transit_list *tlp;
29027c478bd9Sstevel@tonic-gate 	int ret;
29037c478bd9Sstevel@tonic-gate 
29047c478bd9Sstevel@tonic-gate 	trh = &transit_list_head;
29057c478bd9Sstevel@tonic-gate 	if (trh->trh_head == NULL)
29067c478bd9Sstevel@tonic-gate 		return (0);
29077c478bd9Sstevel@tonic-gate 
29087c478bd9Sstevel@tonic-gate 	mutex_enter(&trh->trh_lock);
29097c478bd9Sstevel@tonic-gate 	tlp = pfnum_to_transit_list(trh, pfnum);
29107c478bd9Sstevel@tonic-gate 	ret = (tlp != NULL && tlp->trl_collect);
29117c478bd9Sstevel@tonic-gate 	mutex_exit(&trh->trh_lock);
29127c478bd9Sstevel@tonic-gate 
29137c478bd9Sstevel@tonic-gate 	return (ret);
29147c478bd9Sstevel@tonic-gate }
29157c478bd9Sstevel@tonic-gate 
29167c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
29177c478bd9Sstevel@tonic-gate extern int hz;
29187c478bd9Sstevel@tonic-gate static void
mem_del_stat_print_func(struct mem_handle * mhp)29197c478bd9Sstevel@tonic-gate mem_del_stat_print_func(struct mem_handle *mhp)
29207c478bd9Sstevel@tonic-gate {
29217c478bd9Sstevel@tonic-gate 	uint64_t tmp;
29227c478bd9Sstevel@tonic-gate 
29237c478bd9Sstevel@tonic-gate 	if (mem_del_stat_print) {
29247c478bd9Sstevel@tonic-gate 		printf("memory delete loop %x/%x, statistics%s\n",
29257c478bd9Sstevel@tonic-gate 		    (uint_t)mhp->mh_transit.trl_spans->mds_base,
29267c478bd9Sstevel@tonic-gate 		    (uint_t)mhp->mh_transit.trl_spans->mds_npgs,
29277c478bd9Sstevel@tonic-gate 		    (mhp->mh_cancel ? " (cancelled)" : ""));
29287c478bd9Sstevel@tonic-gate 		printf("\t%8u nloop\n", mhp->mh_delstat.nloop);
29297c478bd9Sstevel@tonic-gate 		printf("\t%8u need_free\n", mhp->mh_delstat.need_free);
29307c478bd9Sstevel@tonic-gate 		printf("\t%8u free_loop\n", mhp->mh_delstat.free_loop);
29317c478bd9Sstevel@tonic-gate 		printf("\t%8u free_low\n", mhp->mh_delstat.free_low);
29327c478bd9Sstevel@tonic-gate 		printf("\t%8u free_failed\n", mhp->mh_delstat.free_failed);
29337c478bd9Sstevel@tonic-gate 		printf("\t%8u ncheck\n", mhp->mh_delstat.ncheck);
29347c478bd9Sstevel@tonic-gate 		printf("\t%8u nopaget\n", mhp->mh_delstat.nopaget);
29357c478bd9Sstevel@tonic-gate 		printf("\t%8u lockfail\n", mhp->mh_delstat.lockfail);
29367c478bd9Sstevel@tonic-gate 		printf("\t%8u nfree\n", mhp->mh_delstat.nfree);
29377c478bd9Sstevel@tonic-gate 		printf("\t%8u nreloc\n", mhp->mh_delstat.nreloc);
29387c478bd9Sstevel@tonic-gate 		printf("\t%8u nrelocfail\n", mhp->mh_delstat.nrelocfail);
29397c478bd9Sstevel@tonic-gate 		printf("\t%8u already_done\n", mhp->mh_delstat.already_done);
29407c478bd9Sstevel@tonic-gate 		printf("\t%8u first_notfree\n", mhp->mh_delstat.first_notfree);
29417c478bd9Sstevel@tonic-gate 		printf("\t%8u npplocked\n", mhp->mh_delstat.npplocked);
29427c478bd9Sstevel@tonic-gate 		printf("\t%8u nlockreloc\n", mhp->mh_delstat.nlockreloc);
29437c478bd9Sstevel@tonic-gate 		printf("\t%8u nnorepl\n", mhp->mh_delstat.nnorepl);
29447c478bd9Sstevel@tonic-gate 		printf("\t%8u nmodreloc\n", mhp->mh_delstat.nmodreloc);
29457c478bd9Sstevel@tonic-gate 		printf("\t%8u ndestroy\n", mhp->mh_delstat.ndestroy);
29467c478bd9Sstevel@tonic-gate 		printf("\t%8u nputpage\n", mhp->mh_delstat.nputpage);
29477c478bd9Sstevel@tonic-gate 		printf("\t%8u nnoreclaim\n", mhp->mh_delstat.nnoreclaim);
29487c478bd9Sstevel@tonic-gate 		printf("\t%8u ndelay\n", mhp->mh_delstat.ndelay);
29497c478bd9Sstevel@tonic-gate 		printf("\t%8u demotefail\n", mhp->mh_delstat.demotefail);
29507c478bd9Sstevel@tonic-gate 		printf("\t%8u retired\n", mhp->mh_delstat.retired);
29517c478bd9Sstevel@tonic-gate 		printf("\t%8u toxic\n", mhp->mh_delstat.toxic);
29527c478bd9Sstevel@tonic-gate 		printf("\t%8u failing\n", mhp->mh_delstat.failing);
29537c478bd9Sstevel@tonic-gate 		printf("\t%8u modtoxic\n", mhp->mh_delstat.modtoxic);
29547c478bd9Sstevel@tonic-gate 		printf("\t%8u npplkdtoxic\n", mhp->mh_delstat.npplkdtoxic);
29557c478bd9Sstevel@tonic-gate 		printf("\t%8u gptlmodfail\n", mhp->mh_delstat.gptlmodfail);
29567c478bd9Sstevel@tonic-gate 		printf("\t%8u gptllckfail\n", mhp->mh_delstat.gptllckfail);
29577c478bd9Sstevel@tonic-gate 		tmp = mhp->mh_delstat.nticks_total / hz;  /* seconds */
29587c478bd9Sstevel@tonic-gate 		printf(
29597c478bd9Sstevel@tonic-gate 		    "\t%"PRIu64" nticks_total - %"PRIu64" min %"PRIu64" sec\n",
29607c478bd9Sstevel@tonic-gate 		    mhp->mh_delstat.nticks_total, tmp / 60, tmp % 60);
29617c478bd9Sstevel@tonic-gate 
29627c478bd9Sstevel@tonic-gate 		tmp = mhp->mh_delstat.nticks_pgrp / hz;  /* seconds */
29637c478bd9Sstevel@tonic-gate 		printf(
29647c478bd9Sstevel@tonic-gate 		    "\t%"PRIu64" nticks_pgrp - %"PRIu64" min %"PRIu64" sec\n",
29657c478bd9Sstevel@tonic-gate 		    mhp->mh_delstat.nticks_pgrp, tmp / 60, tmp % 60);
29667c478bd9Sstevel@tonic-gate 	}
29677c478bd9Sstevel@tonic-gate }
29687c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
29697c478bd9Sstevel@tonic-gate 
29707c478bd9Sstevel@tonic-gate struct mem_callback {
29717c478bd9Sstevel@tonic-gate 	kphysm_setup_vector_t	*vec;
29727c478bd9Sstevel@tonic-gate 	void			*arg;
29737c478bd9Sstevel@tonic-gate };
29747c478bd9Sstevel@tonic-gate 
29757c478bd9Sstevel@tonic-gate #define	NMEMCALLBACKS		100
29767c478bd9Sstevel@tonic-gate 
29777c478bd9Sstevel@tonic-gate static struct mem_callback mem_callbacks[NMEMCALLBACKS];
29787c478bd9Sstevel@tonic-gate static uint_t nmemcallbacks;
29797c478bd9Sstevel@tonic-gate static krwlock_t mem_callback_rwlock;
29807c478bd9Sstevel@tonic-gate 
29817c478bd9Sstevel@tonic-gate int
kphysm_setup_func_register(kphysm_setup_vector_t * vec,void * arg)29827c478bd9Sstevel@tonic-gate kphysm_setup_func_register(kphysm_setup_vector_t *vec, void *arg)
29837c478bd9Sstevel@tonic-gate {
29847c478bd9Sstevel@tonic-gate 	uint_t i, found;
29857c478bd9Sstevel@tonic-gate 
29867c478bd9Sstevel@tonic-gate 	/*
29877c478bd9Sstevel@tonic-gate 	 * This test will become more complicated when the version must
29887c478bd9Sstevel@tonic-gate 	 * change.
29897c478bd9Sstevel@tonic-gate 	 */
29907c478bd9Sstevel@tonic-gate 	if (vec->version != KPHYSM_SETUP_VECTOR_VERSION)
29917c478bd9Sstevel@tonic-gate 		return (EINVAL);
29927c478bd9Sstevel@tonic-gate 
29937c478bd9Sstevel@tonic-gate 	if (vec->post_add == NULL || vec->pre_del == NULL ||
29947c478bd9Sstevel@tonic-gate 	    vec->post_del == NULL)
29957c478bd9Sstevel@tonic-gate 		return (EINVAL);
29967c478bd9Sstevel@tonic-gate 
29977c478bd9Sstevel@tonic-gate 	rw_enter(&mem_callback_rwlock, RW_WRITER);
29987c478bd9Sstevel@tonic-gate 	for (i = 0, found = 0; i < nmemcallbacks; i++) {
29997c478bd9Sstevel@tonic-gate 		if (mem_callbacks[i].vec == NULL && found == 0)
30007c478bd9Sstevel@tonic-gate 			found = i + 1;
30017c478bd9Sstevel@tonic-gate 		if (mem_callbacks[i].vec == vec &&
30027c478bd9Sstevel@tonic-gate 		    mem_callbacks[i].arg == arg) {
30037c478bd9Sstevel@tonic-gate #ifdef DEBUG
30047c478bd9Sstevel@tonic-gate 			/* Catch this in DEBUG kernels. */
30057c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "kphysm_setup_func_register"
30067c478bd9Sstevel@tonic-gate 			    "(0x%p, 0x%p) duplicate registration from 0x%p",
30077c478bd9Sstevel@tonic-gate 			    (void *)vec, arg, (void *)caller());
30087c478bd9Sstevel@tonic-gate #endif /* DEBUG */
30097c478bd9Sstevel@tonic-gate 			rw_exit(&mem_callback_rwlock);
30107c478bd9Sstevel@tonic-gate 			return (EEXIST);
30117c478bd9Sstevel@tonic-gate 		}
30127c478bd9Sstevel@tonic-gate 	}
30137c478bd9Sstevel@tonic-gate 	if (found != 0) {
30147c478bd9Sstevel@tonic-gate 		i = found - 1;
30157c478bd9Sstevel@tonic-gate 	} else {
30167c478bd9Sstevel@tonic-gate 		ASSERT(nmemcallbacks < NMEMCALLBACKS);
30177c478bd9Sstevel@tonic-gate 		if (nmemcallbacks == NMEMCALLBACKS) {
30187c478bd9Sstevel@tonic-gate 			rw_exit(&mem_callback_rwlock);
30197c478bd9Sstevel@tonic-gate 			return (ENOMEM);
30207c478bd9Sstevel@tonic-gate 		}
30217c478bd9Sstevel@tonic-gate 		i = nmemcallbacks++;
30227c478bd9Sstevel@tonic-gate 	}
30237c478bd9Sstevel@tonic-gate 	mem_callbacks[i].vec = vec;
30247c478bd9Sstevel@tonic-gate 	mem_callbacks[i].arg = arg;
30257c478bd9Sstevel@tonic-gate 	rw_exit(&mem_callback_rwlock);
30267c478bd9Sstevel@tonic-gate 	return (0);
30277c478bd9Sstevel@tonic-gate }
30287c478bd9Sstevel@tonic-gate 
30297c478bd9Sstevel@tonic-gate void
kphysm_setup_func_unregister(kphysm_setup_vector_t * vec,void * arg)30307c478bd9Sstevel@tonic-gate kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, void *arg)
30317c478bd9Sstevel@tonic-gate {
30327c478bd9Sstevel@tonic-gate 	uint_t i;
30337c478bd9Sstevel@tonic-gate 
30347c478bd9Sstevel@tonic-gate 	rw_enter(&mem_callback_rwlock, RW_WRITER);
30357c478bd9Sstevel@tonic-gate 	for (i = 0; i < nmemcallbacks; i++) {
30367c478bd9Sstevel@tonic-gate 		if (mem_callbacks[i].vec == vec &&
30377c478bd9Sstevel@tonic-gate 		    mem_callbacks[i].arg == arg) {
30387c478bd9Sstevel@tonic-gate 			mem_callbacks[i].vec = NULL;
30397c478bd9Sstevel@tonic-gate 			mem_callbacks[i].arg = NULL;
30407c478bd9Sstevel@tonic-gate 			if (i == (nmemcallbacks - 1))
30417c478bd9Sstevel@tonic-gate 				nmemcallbacks--;
30427c478bd9Sstevel@tonic-gate 			break;
30437c478bd9Sstevel@tonic-gate 		}
30447c478bd9Sstevel@tonic-gate 	}
30457c478bd9Sstevel@tonic-gate 	rw_exit(&mem_callback_rwlock);
30467c478bd9Sstevel@tonic-gate }
30477c478bd9Sstevel@tonic-gate 
30487c478bd9Sstevel@tonic-gate static void
kphysm_setup_post_add(pgcnt_t delta_pages)30497c478bd9Sstevel@tonic-gate kphysm_setup_post_add(pgcnt_t delta_pages)
30507c478bd9Sstevel@tonic-gate {
30517c478bd9Sstevel@tonic-gate 	uint_t i;
30527c478bd9Sstevel@tonic-gate 
30537c478bd9Sstevel@tonic-gate 	rw_enter(&mem_callback_rwlock, RW_READER);
30547c478bd9Sstevel@tonic-gate 	for (i = 0; i < nmemcallbacks; i++) {
30557c478bd9Sstevel@tonic-gate 		if (mem_callbacks[i].vec != NULL) {
30567c478bd9Sstevel@tonic-gate 			(*mem_callbacks[i].vec->post_add)
30577c478bd9Sstevel@tonic-gate 			    (mem_callbacks[i].arg, delta_pages);
30587c478bd9Sstevel@tonic-gate 		}
30597c478bd9Sstevel@tonic-gate 	}
30607c478bd9Sstevel@tonic-gate 	rw_exit(&mem_callback_rwlock);
30617c478bd9Sstevel@tonic-gate }
30627c478bd9Sstevel@tonic-gate 
30637c478bd9Sstevel@tonic-gate /*
30647c478bd9Sstevel@tonic-gate  * Note the locking between pre_del and post_del: The reader lock is held
30657c478bd9Sstevel@tonic-gate  * between the two calls to stop the set of functions from changing.
30667c478bd9Sstevel@tonic-gate  */
30677c478bd9Sstevel@tonic-gate 
30687c478bd9Sstevel@tonic-gate static int
kphysm_setup_pre_del(pgcnt_t delta_pages)30697c478bd9Sstevel@tonic-gate kphysm_setup_pre_del(pgcnt_t delta_pages)
30707c478bd9Sstevel@tonic-gate {
30717c478bd9Sstevel@tonic-gate 	uint_t i;
30727c478bd9Sstevel@tonic-gate 	int ret;
30737c478bd9Sstevel@tonic-gate 	int aret;
30747c478bd9Sstevel@tonic-gate 
30757c478bd9Sstevel@tonic-gate 	ret = 0;
30767c478bd9Sstevel@tonic-gate 	rw_enter(&mem_callback_rwlock, RW_READER);
30777c478bd9Sstevel@tonic-gate 	for (i = 0; i < nmemcallbacks; i++) {
30787c478bd9Sstevel@tonic-gate 		if (mem_callbacks[i].vec != NULL) {
30797c478bd9Sstevel@tonic-gate 			aret = (*mem_callbacks[i].vec->pre_del)
30807c478bd9Sstevel@tonic-gate 			    (mem_callbacks[i].arg, delta_pages);
30817c478bd9Sstevel@tonic-gate 			ret |= aret;
30827c478bd9Sstevel@tonic-gate 		}
30837c478bd9Sstevel@tonic-gate 	}
30847c478bd9Sstevel@tonic-gate 
30857c478bd9Sstevel@tonic-gate 	return (ret);
30867c478bd9Sstevel@tonic-gate }
30877c478bd9Sstevel@tonic-gate 
30887c478bd9Sstevel@tonic-gate static void
kphysm_setup_post_del(pgcnt_t delta_pages,int cancelled)30897c478bd9Sstevel@tonic-gate kphysm_setup_post_del(pgcnt_t delta_pages, int cancelled)
30907c478bd9Sstevel@tonic-gate {
30917c478bd9Sstevel@tonic-gate 	uint_t i;
30927c478bd9Sstevel@tonic-gate 
30937c478bd9Sstevel@tonic-gate 	for (i = 0; i < nmemcallbacks; i++) {
30947c478bd9Sstevel@tonic-gate 		if (mem_callbacks[i].vec != NULL) {
30957c478bd9Sstevel@tonic-gate 			(*mem_callbacks[i].vec->post_del)
30967c478bd9Sstevel@tonic-gate 			    (mem_callbacks[i].arg, delta_pages, cancelled);
30977c478bd9Sstevel@tonic-gate 		}
30987c478bd9Sstevel@tonic-gate 	}
30997c478bd9Sstevel@tonic-gate 	rw_exit(&mem_callback_rwlock);
31007c478bd9Sstevel@tonic-gate }
31017c478bd9Sstevel@tonic-gate 
31027c478bd9Sstevel@tonic-gate static int
kphysm_split_memseg(pfn_t base,pgcnt_t npgs)31037c478bd9Sstevel@tonic-gate kphysm_split_memseg(
31047c478bd9Sstevel@tonic-gate 	pfn_t base,
31057c478bd9Sstevel@tonic-gate 	pgcnt_t npgs)
31067c478bd9Sstevel@tonic-gate {
31077c478bd9Sstevel@tonic-gate 	struct memseg *seg;
31087c478bd9Sstevel@tonic-gate 	struct memseg **segpp;
31097c478bd9Sstevel@tonic-gate 	pgcnt_t size_low, size_high;
31107c478bd9Sstevel@tonic-gate 	struct memseg *seg_low, *seg_mid, *seg_high;
31117c478bd9Sstevel@tonic-gate 
31127c478bd9Sstevel@tonic-gate 	/*
31137c478bd9Sstevel@tonic-gate 	 * Lock the memsegs list against other updates now
31147c478bd9Sstevel@tonic-gate 	 */
31157c478bd9Sstevel@tonic-gate 	memsegs_lock(1);
31167c478bd9Sstevel@tonic-gate 
31177c478bd9Sstevel@tonic-gate 	/*
31187c478bd9Sstevel@tonic-gate 	 * Find boot time memseg that wholly covers this area.
31197c478bd9Sstevel@tonic-gate 	 */
31207c478bd9Sstevel@tonic-gate 
31217c478bd9Sstevel@tonic-gate 	/* First find the memseg with page 'base' in it. */
31227c478bd9Sstevel@tonic-gate 	for (segpp = &memsegs; (seg = *segpp) != NULL;
31237c478bd9Sstevel@tonic-gate 	    segpp = &((*segpp)->next)) {
31247c478bd9Sstevel@tonic-gate 		if (base >= seg->pages_base && base < seg->pages_end)
31257c478bd9Sstevel@tonic-gate 			break;
31267c478bd9Sstevel@tonic-gate 	}
31277c478bd9Sstevel@tonic-gate 	if (seg == NULL) {
31287c478bd9Sstevel@tonic-gate 		memsegs_unlock(1);
31297c478bd9Sstevel@tonic-gate 		return (0);
31307c478bd9Sstevel@tonic-gate 	}
31319853d9e8SJason Beloro 	if (memseg_includes_meta(seg)) {
31327c478bd9Sstevel@tonic-gate 		memsegs_unlock(1);
31337c478bd9Sstevel@tonic-gate 		return (0);
31347c478bd9Sstevel@tonic-gate 	}
31357c478bd9Sstevel@tonic-gate 	if ((base + npgs) > seg->pages_end) {
31367c478bd9Sstevel@tonic-gate 		memsegs_unlock(1);
31377c478bd9Sstevel@tonic-gate 		return (0);
31387c478bd9Sstevel@tonic-gate 	}
31397c478bd9Sstevel@tonic-gate 
31407c478bd9Sstevel@tonic-gate 	/*
31417c478bd9Sstevel@tonic-gate 	 * Work out the size of the two segments that will
31427c478bd9Sstevel@tonic-gate 	 * surround the new segment, one for low address
31437c478bd9Sstevel@tonic-gate 	 * and one for high.
31447c478bd9Sstevel@tonic-gate 	 */
31457c478bd9Sstevel@tonic-gate 	ASSERT(base >= seg->pages_base);
31467c478bd9Sstevel@tonic-gate 	size_low = base - seg->pages_base;
31477c478bd9Sstevel@tonic-gate 	ASSERT(seg->pages_end >= (base + npgs));
31487c478bd9Sstevel@tonic-gate 	size_high = seg->pages_end - (base + npgs);
31497c478bd9Sstevel@tonic-gate 
31507c478bd9Sstevel@tonic-gate 	/*
31517c478bd9Sstevel@tonic-gate 	 * Sanity check.
31527c478bd9Sstevel@tonic-gate 	 */
31537c478bd9Sstevel@tonic-gate 	if ((size_low + size_high) == 0) {
31547c478bd9Sstevel@tonic-gate 		memsegs_unlock(1);
31557c478bd9Sstevel@tonic-gate 		return (0);
31567c478bd9Sstevel@tonic-gate 	}
31577c478bd9Sstevel@tonic-gate 
31587c478bd9Sstevel@tonic-gate 	/*
31597c478bd9Sstevel@tonic-gate 	 * Allocate the new structures. The old memseg will not be freed
31607c478bd9Sstevel@tonic-gate 	 * as there may be a reference to it.
31617c478bd9Sstevel@tonic-gate 	 */
31627c478bd9Sstevel@tonic-gate 	seg_low = NULL;
31637c478bd9Sstevel@tonic-gate 	seg_high = NULL;
31647c478bd9Sstevel@tonic-gate 
31659853d9e8SJason Beloro 	if (size_low != 0)
31669853d9e8SJason Beloro 		seg_low = memseg_alloc();
31677c478bd9Sstevel@tonic-gate 
31689853d9e8SJason Beloro 	seg_mid = memseg_alloc();
31697c478bd9Sstevel@tonic-gate 
31709853d9e8SJason Beloro 	if (size_high != 0)
31719853d9e8SJason Beloro 		seg_high = memseg_alloc();
31727c478bd9Sstevel@tonic-gate 
31737c478bd9Sstevel@tonic-gate 	/*
31747c478bd9Sstevel@tonic-gate 	 * All allocation done now.
31757c478bd9Sstevel@tonic-gate 	 */
31767c478bd9Sstevel@tonic-gate 	if (size_low != 0) {
31777c478bd9Sstevel@tonic-gate 		seg_low->pages = seg->pages;
31787c478bd9Sstevel@tonic-gate 		seg_low->epages = seg_low->pages + size_low;
31797c478bd9Sstevel@tonic-gate 		seg_low->pages_base = seg->pages_base;
31807c478bd9Sstevel@tonic-gate 		seg_low->pages_end = seg_low->pages_base + size_low;
31817c478bd9Sstevel@tonic-gate 		seg_low->next = seg_mid;
31829853d9e8SJason Beloro 		seg_low->msegflags = seg->msegflags;
31837c478bd9Sstevel@tonic-gate 	}
31847c478bd9Sstevel@tonic-gate 	if (size_high != 0) {
31857c478bd9Sstevel@tonic-gate 		seg_high->pages = seg->epages - size_high;
31867c478bd9Sstevel@tonic-gate 		seg_high->epages = seg_high->pages + size_high;
31877c478bd9Sstevel@tonic-gate 		seg_high->pages_base = seg->pages_end - size_high;
31887c478bd9Sstevel@tonic-gate 		seg_high->pages_end = seg_high->pages_base + size_high;
31897c478bd9Sstevel@tonic-gate 		seg_high->next = seg->next;
31909853d9e8SJason Beloro 		seg_high->msegflags = seg->msegflags;
31917c478bd9Sstevel@tonic-gate 	}
31927c478bd9Sstevel@tonic-gate 
31937c478bd9Sstevel@tonic-gate 	seg_mid->pages = seg->pages + size_low;
31947c478bd9Sstevel@tonic-gate 	seg_mid->pages_base = seg->pages_base + size_low;
31957c478bd9Sstevel@tonic-gate 	seg_mid->epages = seg->epages - size_high;
31967c478bd9Sstevel@tonic-gate 	seg_mid->pages_end = seg->pages_end - size_high;
31977c478bd9Sstevel@tonic-gate 	seg_mid->next = (seg_high != NULL) ? seg_high : seg->next;
31989853d9e8SJason Beloro 	seg_mid->msegflags = seg->msegflags;
31997c478bd9Sstevel@tonic-gate 
32007c478bd9Sstevel@tonic-gate 	/*
32017c478bd9Sstevel@tonic-gate 	 * Update hat_kpm specific info of all involved memsegs and
32027c478bd9Sstevel@tonic-gate 	 * allow hat_kpm specific global chain updates.
32037c478bd9Sstevel@tonic-gate 	 */
32047c478bd9Sstevel@tonic-gate 	hat_kpm_split_mseg_update(seg, segpp, seg_low, seg_mid, seg_high);
32057c478bd9Sstevel@tonic-gate 
32067c478bd9Sstevel@tonic-gate 	/*
32077c478bd9Sstevel@tonic-gate 	 * At this point we have two equivalent memseg sub-chains,
32087c478bd9Sstevel@tonic-gate 	 * seg and seg_low/seg_mid/seg_high, which both chain on to
32097c478bd9Sstevel@tonic-gate 	 * the same place in the global chain. By re-writing the pointer
32107c478bd9Sstevel@tonic-gate 	 * in the previous element we switch atomically from using the old
32117c478bd9Sstevel@tonic-gate 	 * (seg) to the new.
32127c478bd9Sstevel@tonic-gate 	 */
32137c478bd9Sstevel@tonic-gate 	*segpp = (seg_low != NULL) ? seg_low : seg_mid;
32147c478bd9Sstevel@tonic-gate 
32157c478bd9Sstevel@tonic-gate 	membar_enter();
32167c478bd9Sstevel@tonic-gate 
32177c478bd9Sstevel@tonic-gate 	build_pfn_hash();
32187c478bd9Sstevel@tonic-gate 	memsegs_unlock(1);
32197c478bd9Sstevel@tonic-gate 
32207c478bd9Sstevel@tonic-gate 	/*
32217c478bd9Sstevel@tonic-gate 	 * We leave the old segment, 'seg', intact as there may be
32227c478bd9Sstevel@tonic-gate 	 * references to it. Also, as the value of total_pages has not
32237c478bd9Sstevel@tonic-gate 	 * changed and the memsegs list is effectively the same when
32247c478bd9Sstevel@tonic-gate 	 * accessed via the old or the new pointer, we do not have to
32257c478bd9Sstevel@tonic-gate 	 * cause pageout_scanner() to re-evaluate its hand pointers.
32267c478bd9Sstevel@tonic-gate 	 *
32277c478bd9Sstevel@tonic-gate 	 * We currently do not re-use or reclaim the page_t memory.
32287c478bd9Sstevel@tonic-gate 	 * If we do, then this may have to change.
32297c478bd9Sstevel@tonic-gate 	 */
32307c478bd9Sstevel@tonic-gate 
32317c478bd9Sstevel@tonic-gate 	mutex_enter(&memseg_lists_lock);
32327c478bd9Sstevel@tonic-gate 	seg->lnext = memseg_edit_junk;
32337c478bd9Sstevel@tonic-gate 	memseg_edit_junk = seg;
32347c478bd9Sstevel@tonic-gate 	mutex_exit(&memseg_lists_lock);
32357c478bd9Sstevel@tonic-gate 
32367c478bd9Sstevel@tonic-gate 	return (1);
32377c478bd9Sstevel@tonic-gate }
32387c478bd9Sstevel@tonic-gate 
32397c478bd9Sstevel@tonic-gate /*
32407c478bd9Sstevel@tonic-gate  * The sfmmu hat layer (e.g.) accesses some parts of the memseg
32417c478bd9Sstevel@tonic-gate  * structure using physical addresses. Therefore a kmem_cache is
32427c478bd9Sstevel@tonic-gate  * used with KMC_NOHASH to avoid page crossings within a memseg
32437c478bd9Sstevel@tonic-gate  * structure. KMC_NOHASH requires that no external (outside of
32447c478bd9Sstevel@tonic-gate  * slab) information is allowed. This, in turn, implies that the
32457c478bd9Sstevel@tonic-gate  * cache's slabsize must be exactly a single page, since per-slab
32467c478bd9Sstevel@tonic-gate  * information (e.g. the freelist for the slab) is kept at the
32477c478bd9Sstevel@tonic-gate  * end of the slab, where it is easy to locate. Should be changed
32487c478bd9Sstevel@tonic-gate  * when a more obvious kmem_cache interface/flag will become
32497c478bd9Sstevel@tonic-gate  * available.
32507c478bd9Sstevel@tonic-gate  */
32517c478bd9Sstevel@tonic-gate void
mem_config_init()32527c478bd9Sstevel@tonic-gate mem_config_init()
32537c478bd9Sstevel@tonic-gate {
32547c478bd9Sstevel@tonic-gate 	memseg_cache = kmem_cache_create("memseg_cache", sizeof (struct memseg),
32557c478bd9Sstevel@tonic-gate 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
32567c478bd9Sstevel@tonic-gate }
32579853d9e8SJason Beloro 
32589853d9e8SJason Beloro struct memseg *
memseg_alloc()32599853d9e8SJason Beloro memseg_alloc()
32609853d9e8SJason Beloro {
32619853d9e8SJason Beloro 	struct memseg *seg;
32629853d9e8SJason Beloro 
32639853d9e8SJason Beloro 	seg = kmem_cache_alloc(memseg_cache, KM_SLEEP);
32649853d9e8SJason Beloro 	bzero(seg, sizeof (struct memseg));
32659853d9e8SJason Beloro 
32669853d9e8SJason Beloro 	return (seg);
32679853d9e8SJason Beloro }
32689853d9e8SJason Beloro 
32699853d9e8SJason Beloro /*
32709853d9e8SJason Beloro  * Return whether the page_t memory for this memseg
32719853d9e8SJason Beloro  * is included in the memseg itself.
32729853d9e8SJason Beloro  */
32739853d9e8SJason Beloro static int
memseg_includes_meta(struct memseg * seg)32749853d9e8SJason Beloro memseg_includes_meta(struct memseg *seg)
32759853d9e8SJason Beloro {
32769853d9e8SJason Beloro 	return (seg->msegflags & MEMSEG_META_INCL);
32779853d9e8SJason Beloro }
32789853d9e8SJason Beloro 
32799853d9e8SJason Beloro pfn_t
memseg_get_start(struct memseg * seg)32809853d9e8SJason Beloro memseg_get_start(struct memseg *seg)
32819853d9e8SJason Beloro {
32829853d9e8SJason Beloro 	pfn_t		pt_start;
32839853d9e8SJason Beloro 
32849853d9e8SJason Beloro 	if (memseg_includes_meta(seg)) {
32859853d9e8SJason Beloro 		pt_start = hat_getpfnum(kas.a_hat, (caddr_t)seg->pages);
32869853d9e8SJason Beloro 
32879853d9e8SJason Beloro 		/* Meta data is required to be at the beginning */
32889853d9e8SJason Beloro 		ASSERT(pt_start < seg->pages_base);
32899853d9e8SJason Beloro 	} else
32909853d9e8SJason Beloro 		pt_start = seg->pages_base;
32919853d9e8SJason Beloro 
32929853d9e8SJason Beloro 	return (pt_start);
32939853d9e8SJason Beloro }
32949853d9e8SJason Beloro 
32959853d9e8SJason Beloro /*
32969853d9e8SJason Beloro  * Invalidate memseg pointers in cpu private vm data caches.
32979853d9e8SJason Beloro  */
32989853d9e8SJason Beloro static void
memseg_cpu_vm_flush()32999853d9e8SJason Beloro memseg_cpu_vm_flush()
33009853d9e8SJason Beloro {
33019853d9e8SJason Beloro 	cpu_t *cp;
33029853d9e8SJason Beloro 	vm_cpu_data_t *vc;
33039853d9e8SJason Beloro 
33049853d9e8SJason Beloro 	mutex_enter(&cpu_lock);
3305*0ed5c46eSJosef 'Jeff' Sipek 	pause_cpus(NULL, NULL);
33069853d9e8SJason Beloro 
33079853d9e8SJason Beloro 	cp = cpu_list;
33089853d9e8SJason Beloro 	do {
33099853d9e8SJason Beloro 		vc = cp->cpu_vm_data;
33109853d9e8SJason Beloro 		vc->vc_pnum_memseg = NULL;
33119853d9e8SJason Beloro 		vc->vc_pnext_memseg = NULL;
33129853d9e8SJason Beloro 
33139853d9e8SJason Beloro 	} while ((cp = cp->cpu_next) != cpu_list);
33149853d9e8SJason Beloro 
33159853d9e8SJason Beloro 	start_cpus();
33169853d9e8SJason Beloro 	mutex_exit(&cpu_lock);
33179853d9e8SJason Beloro }
3318