17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5ee88d2b9Skchow * Common Development and Distribution License (the "License").
6ee88d2b9Skchow * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
2256f33205SJonathan Adams * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
267c478bd9Sstevel@tonic-gate #include <sys/types.h>
277c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
287c478bd9Sstevel@tonic-gate #include <sys/vmem.h>
297c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
307c478bd9Sstevel@tonic-gate #include <sys/systm.h>
317c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> /* for page_freelist_coalesce() */
327c478bd9Sstevel@tonic-gate #include <sys/errno.h>
337c478bd9Sstevel@tonic-gate #include <sys/memnode.h>
347c478bd9Sstevel@tonic-gate #include <sys/memlist.h>
357c478bd9Sstevel@tonic-gate #include <sys/memlist_impl.h>
367c478bd9Sstevel@tonic-gate #include <sys/tuneable.h>
377c478bd9Sstevel@tonic-gate #include <sys/proc.h>
387c478bd9Sstevel@tonic-gate #include <sys/disp.h>
397c478bd9Sstevel@tonic-gate #include <sys/debug.h>
407c478bd9Sstevel@tonic-gate #include <sys/vm.h>
417c478bd9Sstevel@tonic-gate #include <sys/callb.h>
427c478bd9Sstevel@tonic-gate #include <sys/memlist_plat.h> /* for installed_top_size() */
437c478bd9Sstevel@tonic-gate #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */
447c478bd9Sstevel@tonic-gate #include <sys/dumphdr.h> /* for dump_resize() */
457c478bd9Sstevel@tonic-gate #include <sys/atomic.h> /* for use in stats collection */
467c478bd9Sstevel@tonic-gate #include <sys/rwlock.h>
477c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
487c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
497c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h>
507c478bd9Sstevel@tonic-gate #include <vm/page.h>
51e21bae1bSkchow #include <vm/vm_dep.h>
527c478bd9Sstevel@tonic-gate #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */
537c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
547c478bd9Sstevel@tonic-gate #include <sys/mem_config.h>
557c478bd9Sstevel@tonic-gate #include <sys/mem_cage.h>
567c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
577c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
587c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
597c478bd9Sstevel@tonic-gate
607c478bd9Sstevel@tonic-gate extern struct memlist *phys_avail;
617c478bd9Sstevel@tonic-gate
627c478bd9Sstevel@tonic-gate extern uint_t page_ctrs_adjust(int);
63af4c679fSSean McEnroe void page_ctrs_cleanup(void);
647c478bd9Sstevel@tonic-gate static void kphysm_setup_post_add(pgcnt_t);
657c478bd9Sstevel@tonic-gate static int kphysm_setup_pre_del(pgcnt_t);
667c478bd9Sstevel@tonic-gate static void kphysm_setup_post_del(pgcnt_t, int);
677c478bd9Sstevel@tonic-gate
687c478bd9Sstevel@tonic-gate static int kphysm_split_memseg(pfn_t base, pgcnt_t npgs);
697c478bd9Sstevel@tonic-gate
707c478bd9Sstevel@tonic-gate static int delspan_reserve(pfn_t, pgcnt_t);
717c478bd9Sstevel@tonic-gate static void delspan_unreserve(pfn_t, pgcnt_t);
727c478bd9Sstevel@tonic-gate
739853d9e8SJason Beloro kmutex_t memseg_lists_lock;
749853d9e8SJason Beloro struct memseg *memseg_va_avail;
759853d9e8SJason Beloro struct memseg *memseg_alloc(void);
767c478bd9Sstevel@tonic-gate static struct memseg *memseg_delete_junk;
777c478bd9Sstevel@tonic-gate static struct memseg *memseg_edit_junk;
787c478bd9Sstevel@tonic-gate void memseg_remap_init(void);
799853d9e8SJason Beloro static void memseg_remap_to_dummy(struct memseg *);
807c478bd9Sstevel@tonic-gate static void kphysm_addmem_error_undospan(pfn_t, pgcnt_t);
817c478bd9Sstevel@tonic-gate static struct memseg *memseg_reuse(pgcnt_t);
827c478bd9Sstevel@tonic-gate
837c478bd9Sstevel@tonic-gate static struct kmem_cache *memseg_cache;
847c478bd9Sstevel@tonic-gate
857c478bd9Sstevel@tonic-gate /*
869853d9e8SJason Beloro * Interfaces to manage externally allocated
879853d9e8SJason Beloro * page_t memory (metadata) for a memseg.
889853d9e8SJason Beloro */
899853d9e8SJason Beloro #pragma weak memseg_alloc_meta
909853d9e8SJason Beloro #pragma weak memseg_free_meta
919853d9e8SJason Beloro #pragma weak memseg_get_metapfn
929853d9e8SJason Beloro #pragma weak memseg_remap_meta
939853d9e8SJason Beloro
949853d9e8SJason Beloro extern int ppvm_enable;
959853d9e8SJason Beloro extern page_t *ppvm_base;
969853d9e8SJason Beloro extern int memseg_alloc_meta(pfn_t, pgcnt_t, void **, pgcnt_t *);
979853d9e8SJason Beloro extern void memseg_free_meta(void *, pgcnt_t);
989853d9e8SJason Beloro extern pfn_t memseg_get_metapfn(void *, pgcnt_t);
999853d9e8SJason Beloro extern void memseg_remap_meta(struct memseg *);
1009853d9e8SJason Beloro static int memseg_is_dynamic(struct memseg *);
1019853d9e8SJason Beloro static int memseg_includes_meta(struct memseg *);
102af4c679fSSean McEnroe pfn_t memseg_get_start(struct memseg *);
1039853d9e8SJason Beloro static void memseg_cpu_vm_flush(void);
1049853d9e8SJason Beloro
1059853d9e8SJason Beloro int meta_alloc_enable;
1069853d9e8SJason Beloro
107a3114836SGerry Liu #ifdef DEBUG
108a3114836SGerry Liu static int memseg_debug;
109a3114836SGerry Liu #define MEMSEG_DEBUG(args...) if (memseg_debug) printf(args)
110a3114836SGerry Liu #else
111a3114836SGerry Liu #define MEMSEG_DEBUG(...)
112a3114836SGerry Liu #endif
113a3114836SGerry Liu
1149853d9e8SJason Beloro /*
1159853d9e8SJason Beloro * Add a chunk of memory to the system.
1167c478bd9Sstevel@tonic-gate * base: starting PAGESIZE page of new memory.
1177c478bd9Sstevel@tonic-gate * npgs: length in PAGESIZE pages.
1187c478bd9Sstevel@tonic-gate *
1197c478bd9Sstevel@tonic-gate * Adding mem this way doesn't increase the size of the hash tables;
1207c478bd9Sstevel@tonic-gate * growing them would be too hard. This should be OK, but adding memory
1217c478bd9Sstevel@tonic-gate * dynamically most likely means more hash misses, since the tables will
1227c478bd9Sstevel@tonic-gate * be smaller than they otherwise would be.
1237c478bd9Sstevel@tonic-gate */
1247c478bd9Sstevel@tonic-gate int
kphysm_add_memory_dynamic(pfn_t base,pgcnt_t npgs)1257c478bd9Sstevel@tonic-gate kphysm_add_memory_dynamic(pfn_t base, pgcnt_t npgs)
1267c478bd9Sstevel@tonic-gate {
1277c478bd9Sstevel@tonic-gate page_t *pp;
1289853d9e8SJason Beloro page_t *opp, *oepp, *segpp;
1297c478bd9Sstevel@tonic-gate struct memseg *seg;
1307c478bd9Sstevel@tonic-gate uint64_t avmem;
1317c478bd9Sstevel@tonic-gate pfn_t pfn;
1327c478bd9Sstevel@tonic-gate pfn_t pt_base = base;
1337c478bd9Sstevel@tonic-gate pgcnt_t tpgs = npgs;
1349853d9e8SJason Beloro pgcnt_t metapgs = 0;
1357c478bd9Sstevel@tonic-gate int exhausted;
1367c478bd9Sstevel@tonic-gate pfn_t pnum;
1377c478bd9Sstevel@tonic-gate int mnode;
1387c478bd9Sstevel@tonic-gate caddr_t vaddr;
1397c478bd9Sstevel@tonic-gate int reuse;
1407c478bd9Sstevel@tonic-gate int mlret;
1419853d9e8SJason Beloro int rv;
1429853d9e8SJason Beloro int flags;
1439853d9e8SJason Beloro int meta_alloc = 0;
1447c478bd9Sstevel@tonic-gate void *mapva;
1459853d9e8SJason Beloro void *metabase = (void *)base;
1467c478bd9Sstevel@tonic-gate pgcnt_t nkpmpgs = 0;
1477c478bd9Sstevel@tonic-gate offset_t kpm_pages_off;
1487c478bd9Sstevel@tonic-gate
1497c478bd9Sstevel@tonic-gate cmn_err(CE_CONT,
1507c478bd9Sstevel@tonic-gate "?kphysm_add_memory_dynamic: adding %ldK at 0x%" PRIx64 "\n",
1517c478bd9Sstevel@tonic-gate npgs << (PAGESHIFT - 10), (uint64_t)base << PAGESHIFT);
1527c478bd9Sstevel@tonic-gate
1537c478bd9Sstevel@tonic-gate /*
1547c478bd9Sstevel@tonic-gate * Add this span in the delete list to prevent interactions.
1557c478bd9Sstevel@tonic-gate */
1567c478bd9Sstevel@tonic-gate if (!delspan_reserve(base, npgs)) {
1577c478bd9Sstevel@tonic-gate return (KPHYSM_ESPAN);
1587c478bd9Sstevel@tonic-gate }
1597c478bd9Sstevel@tonic-gate /*
1607c478bd9Sstevel@tonic-gate * Check to see if any of the memory span has been added
1617c478bd9Sstevel@tonic-gate * by trying an add to the installed memory list. This
1627c478bd9Sstevel@tonic-gate * forms the interlocking process for add.
1637c478bd9Sstevel@tonic-gate */
1647c478bd9Sstevel@tonic-gate
1657c478bd9Sstevel@tonic-gate memlist_write_lock();
1667c478bd9Sstevel@tonic-gate
1677c478bd9Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(pt_base) << PAGESHIFT,
1687c478bd9Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install);
1697c478bd9Sstevel@tonic-gate
1707c478bd9Sstevel@tonic-gate if (mlret == MEML_SPANOP_OK)
1717c478bd9Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled);
1727c478bd9Sstevel@tonic-gate
1737c478bd9Sstevel@tonic-gate memlist_write_unlock();
1747c478bd9Sstevel@tonic-gate
1757c478bd9Sstevel@tonic-gate if (mlret != MEML_SPANOP_OK) {
1767c478bd9Sstevel@tonic-gate if (mlret == MEML_SPANOP_EALLOC) {
1777c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs);
1787c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE);
1799853d9e8SJason Beloro } else if (mlret == MEML_SPANOP_ESPAN) {
1807c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs);
1817c478bd9Sstevel@tonic-gate return (KPHYSM_ESPAN);
1827c478bd9Sstevel@tonic-gate } else {
1837c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs);
1847c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE);
1857c478bd9Sstevel@tonic-gate }
1867c478bd9Sstevel@tonic-gate }
1877c478bd9Sstevel@tonic-gate
1889853d9e8SJason Beloro if (meta_alloc_enable) {
1899853d9e8SJason Beloro /*
1909853d9e8SJason Beloro * Allocate the page_t's from existing memory;
1919853d9e8SJason Beloro * if that fails, allocate from the incoming memory.
1929853d9e8SJason Beloro */
1939853d9e8SJason Beloro rv = memseg_alloc_meta(base, npgs, &metabase, &metapgs);
1949853d9e8SJason Beloro if (rv == KPHYSM_OK) {
1959853d9e8SJason Beloro ASSERT(metapgs);
1969853d9e8SJason Beloro ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs);
1979853d9e8SJason Beloro meta_alloc = 1;
1989853d9e8SJason Beloro goto mapalloc;
1999853d9e8SJason Beloro }
2009853d9e8SJason Beloro }
2019853d9e8SJason Beloro
2027c478bd9Sstevel@tonic-gate /*
2037c478bd9Sstevel@tonic-gate * We store the page_t's for this new memory in the first
2047c478bd9Sstevel@tonic-gate * few pages of the chunk. Here, we go and get'em ...
2057c478bd9Sstevel@tonic-gate */
2067c478bd9Sstevel@tonic-gate
2077c478bd9Sstevel@tonic-gate /*
2087c478bd9Sstevel@tonic-gate * The expression after the '-' gives the number of pages
2097c478bd9Sstevel@tonic-gate * that will fit in the new memory based on a requirement
2107c478bd9Sstevel@tonic-gate * of (PAGESIZE + sizeof (page_t)) bytes per page.
2117c478bd9Sstevel@tonic-gate */
2127c478bd9Sstevel@tonic-gate metapgs = npgs - (((uint64_t)(npgs) << PAGESHIFT) /
2137c478bd9Sstevel@tonic-gate (PAGESIZE + sizeof (page_t)));
2147c478bd9Sstevel@tonic-gate
2157c478bd9Sstevel@tonic-gate npgs -= metapgs;
2167c478bd9Sstevel@tonic-gate base += metapgs;
2177c478bd9Sstevel@tonic-gate
2187c478bd9Sstevel@tonic-gate ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs);
2197c478bd9Sstevel@tonic-gate
2207c478bd9Sstevel@tonic-gate exhausted = (metapgs == 0 || npgs == 0);
2217c478bd9Sstevel@tonic-gate
2227c478bd9Sstevel@tonic-gate if (kpm_enable && !exhausted) {
2237c478bd9Sstevel@tonic-gate pgcnt_t start, end, nkpmpgs_prelim;
2247c478bd9Sstevel@tonic-gate size_t ptsz;
2257c478bd9Sstevel@tonic-gate
2267c478bd9Sstevel@tonic-gate /*
2277c478bd9Sstevel@tonic-gate * A viable kpm large page mapping must not overlap two
2287c478bd9Sstevel@tonic-gate * dynamic memsegs. Therefore the total size is checked
2297c478bd9Sstevel@tonic-gate * to be at least kpm_pgsz and also whether start and end
2307c478bd9Sstevel@tonic-gate * points are at least kpm_pgsz aligned.
2317c478bd9Sstevel@tonic-gate */
2327c478bd9Sstevel@tonic-gate if (ptokpmp(tpgs) < 1 || pmodkpmp(pt_base) ||
2337c478bd9Sstevel@tonic-gate pmodkpmp(base + npgs)) {
2347c478bd9Sstevel@tonic-gate
2357c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs);
2367c478bd9Sstevel@tonic-gate
2377c478bd9Sstevel@tonic-gate /*
2387c478bd9Sstevel@tonic-gate * There is no specific error code for violating
2397c478bd9Sstevel@tonic-gate * kpm granularity constraints.
2407c478bd9Sstevel@tonic-gate */
2417c478bd9Sstevel@tonic-gate return (KPHYSM_ENOTVIABLE);
2427c478bd9Sstevel@tonic-gate }
2437c478bd9Sstevel@tonic-gate
2447c478bd9Sstevel@tonic-gate start = kpmptop(ptokpmp(base));
2457c478bd9Sstevel@tonic-gate end = kpmptop(ptokpmp(base + npgs));
2467c478bd9Sstevel@tonic-gate nkpmpgs_prelim = ptokpmp(end - start);
2477c478bd9Sstevel@tonic-gate ptsz = npgs * sizeof (page_t);
2487c478bd9Sstevel@tonic-gate metapgs = btopr(ptsz + nkpmpgs_prelim * KPMPAGE_T_SZ);
2497c478bd9Sstevel@tonic-gate exhausted = (tpgs <= metapgs);
2507c478bd9Sstevel@tonic-gate if (!exhausted) {
2517c478bd9Sstevel@tonic-gate npgs = tpgs - metapgs;
2527c478bd9Sstevel@tonic-gate base = pt_base + metapgs;
2537c478bd9Sstevel@tonic-gate
2547c478bd9Sstevel@tonic-gate /* final nkpmpgs */
2557c478bd9Sstevel@tonic-gate start = kpmptop(ptokpmp(base));
2567c478bd9Sstevel@tonic-gate nkpmpgs = ptokpmp(end - start);
2577c478bd9Sstevel@tonic-gate kpm_pages_off = ptsz +
2587c478bd9Sstevel@tonic-gate (nkpmpgs_prelim - nkpmpgs) * KPMPAGE_T_SZ;
2597c478bd9Sstevel@tonic-gate }
2607c478bd9Sstevel@tonic-gate }
2617c478bd9Sstevel@tonic-gate
2627c478bd9Sstevel@tonic-gate /*
2637c478bd9Sstevel@tonic-gate * Is memory area supplied too small?
2647c478bd9Sstevel@tonic-gate */
2657c478bd9Sstevel@tonic-gate if (exhausted) {
2667c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs);
2677c478bd9Sstevel@tonic-gate /*
2687c478bd9Sstevel@tonic-gate * There is no specific error code for 'too small'.
2697c478bd9Sstevel@tonic-gate */
2707c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE);
2717c478bd9Sstevel@tonic-gate }
2727c478bd9Sstevel@tonic-gate
2739853d9e8SJason Beloro mapalloc:
2747c478bd9Sstevel@tonic-gate /*
2757c478bd9Sstevel@tonic-gate * We may re-use a previously allocated VA space for the page_ts
2767c478bd9Sstevel@tonic-gate * eventually, but we need to initialize and lock the pages first.
2777c478bd9Sstevel@tonic-gate */
2787c478bd9Sstevel@tonic-gate
2797c478bd9Sstevel@tonic-gate /*
2807c478bd9Sstevel@tonic-gate * Get an address in the kernel address map, map
2817c478bd9Sstevel@tonic-gate * the page_t pages and see if we can touch them.
2827c478bd9Sstevel@tonic-gate */
2837c478bd9Sstevel@tonic-gate
2847c478bd9Sstevel@tonic-gate mapva = vmem_alloc(heap_arena, ptob(metapgs), VM_NOSLEEP);
2857c478bd9Sstevel@tonic-gate if (mapva == NULL) {
2867c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_add_memory_dynamic:"
2877c478bd9Sstevel@tonic-gate " Can't allocate VA for page_ts");
2887c478bd9Sstevel@tonic-gate
2899853d9e8SJason Beloro if (meta_alloc)
2909853d9e8SJason Beloro memseg_free_meta(metabase, metapgs);
2917c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs);
2927c478bd9Sstevel@tonic-gate
2937c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE);
2947c478bd9Sstevel@tonic-gate }
2957c478bd9Sstevel@tonic-gate pp = mapva;
2967c478bd9Sstevel@tonic-gate
2977c478bd9Sstevel@tonic-gate if (physmax < (pt_base + tpgs))
2987c478bd9Sstevel@tonic-gate physmax = (pt_base + tpgs);
2997c478bd9Sstevel@tonic-gate
3007c478bd9Sstevel@tonic-gate /*
3017c478bd9Sstevel@tonic-gate * In the remapping code we map one page at a time so we must do
3027c478bd9Sstevel@tonic-gate * the same here to match mapping sizes.
3037c478bd9Sstevel@tonic-gate */
3047c478bd9Sstevel@tonic-gate pfn = pt_base;
3057c478bd9Sstevel@tonic-gate vaddr = (caddr_t)pp;
3067c478bd9Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) {
3079853d9e8SJason Beloro if (meta_alloc)
3089853d9e8SJason Beloro pfn = memseg_get_metapfn(metabase, (pgcnt_t)pnum);
3097c478bd9Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn,
3107c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE,
3117c478bd9Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
3127c478bd9Sstevel@tonic-gate pfn++;
3137c478bd9Sstevel@tonic-gate vaddr += ptob(1);
3147c478bd9Sstevel@tonic-gate }
3157c478bd9Sstevel@tonic-gate
3167c478bd9Sstevel@tonic-gate if (ddi_peek32((dev_info_t *)NULL,
3177c478bd9Sstevel@tonic-gate (int32_t *)pp, (int32_t *)0) == DDI_FAILURE) {
3187c478bd9Sstevel@tonic-gate
31928e72544SJakub Jirsa cmn_err(CE_WARN, "kphysm_add_memory_dynamic:"
3207c478bd9Sstevel@tonic-gate " Can't access pp array at 0x%p [phys 0x%lx]",
3217c478bd9Sstevel@tonic-gate (void *)pp, pt_base);
3227c478bd9Sstevel@tonic-gate
3237c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs),
3247c478bd9Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK);
3257c478bd9Sstevel@tonic-gate
3267c478bd9Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs));
3279853d9e8SJason Beloro if (meta_alloc)
3289853d9e8SJason Beloro memseg_free_meta(metabase, metapgs);
3297c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs);
3307c478bd9Sstevel@tonic-gate
3317c478bd9Sstevel@tonic-gate return (KPHYSM_EFAULT);
3327c478bd9Sstevel@tonic-gate }
3337c478bd9Sstevel@tonic-gate
3347c478bd9Sstevel@tonic-gate /*
3357c478bd9Sstevel@tonic-gate * Add this memory slice to its memory node translation.
3367c478bd9Sstevel@tonic-gate *
3377c478bd9Sstevel@tonic-gate * Note that right now, each node may have only one slice;
3387c478bd9Sstevel@tonic-gate * this may change with COD or in larger SSM systems with
3397c478bd9Sstevel@tonic-gate * nested latency groups, so we must not assume that the
3407c478bd9Sstevel@tonic-gate * node does not yet exist.
341a3114836SGerry Liu *
342a3114836SGerry Liu * Note that there may be multiple memory nodes associated with
343a3114836SGerry Liu * a single lgrp node on x86 systems.
3447c478bd9Sstevel@tonic-gate */
34520c26ed3SChristopher Baumbauer - Sun Microsystems - San Diego United States pnum = pt_base + tpgs - 1;
3469853d9e8SJason Beloro mem_node_add_range(pt_base, pnum);
3477c478bd9Sstevel@tonic-gate
3487c478bd9Sstevel@tonic-gate /*
349da6c28aaSamw * Allocate or resize page counters as necessary to accommodate
3507c478bd9Sstevel@tonic-gate * the increase in memory pages.
3517c478bd9Sstevel@tonic-gate */
3527c478bd9Sstevel@tonic-gate mnode = PFN_2_MEM_NODE(pnum);
3539853d9e8SJason Beloro PAGE_CTRS_ADJUST(base, npgs, rv);
3549853d9e8SJason Beloro if (rv) {
3557c478bd9Sstevel@tonic-gate
3569853d9e8SJason Beloro mem_node_del_range(pt_base, pnum);
3577c478bd9Sstevel@tonic-gate
358af4c679fSSean McEnroe /* cleanup the page counters */
359af4c679fSSean McEnroe page_ctrs_cleanup();
360af4c679fSSean McEnroe
3617c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs),
3627c478bd9Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK);
3637c478bd9Sstevel@tonic-gate
3647c478bd9Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs));
3659853d9e8SJason Beloro if (meta_alloc)
3669853d9e8SJason Beloro memseg_free_meta(metabase, metapgs);
3677c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs);
3687c478bd9Sstevel@tonic-gate
3697c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE);
3707c478bd9Sstevel@tonic-gate }
3717c478bd9Sstevel@tonic-gate
3727c478bd9Sstevel@tonic-gate /*
3737c478bd9Sstevel@tonic-gate * Update the phys_avail memory list.
3747c478bd9Sstevel@tonic-gate * The phys_install list was done at the start.
3757c478bd9Sstevel@tonic-gate */
3767c478bd9Sstevel@tonic-gate
3777c478bd9Sstevel@tonic-gate memlist_write_lock();
3787c478bd9Sstevel@tonic-gate
3797c478bd9Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(base) << PAGESHIFT,
3807c478bd9Sstevel@tonic-gate (uint64_t)(npgs) << PAGESHIFT, &phys_avail);
3817c478bd9Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK);
3827c478bd9Sstevel@tonic-gate
3837c478bd9Sstevel@tonic-gate memlist_write_unlock();
3847c478bd9Sstevel@tonic-gate
3857c478bd9Sstevel@tonic-gate /* See if we can find a memseg to re-use. */
3869853d9e8SJason Beloro if (meta_alloc) {
3879853d9e8SJason Beloro seg = memseg_reuse(0);
3889853d9e8SJason Beloro reuse = 1; /* force unmapping of temp mapva */
3899853d9e8SJason Beloro flags = MEMSEG_DYNAMIC | MEMSEG_META_ALLOC;
3909853d9e8SJason Beloro /*
3919853d9e8SJason Beloro * There is a 1:1 fixed relationship between a pfn
3929853d9e8SJason Beloro * and a page_t VA. The pfn is used as an index into
3939853d9e8SJason Beloro * the ppvm_base page_t table in order to calculate
3949853d9e8SJason Beloro * the page_t base address for a given pfn range.
3959853d9e8SJason Beloro */
3969853d9e8SJason Beloro segpp = ppvm_base + base;
3979853d9e8SJason Beloro } else {
3987c478bd9Sstevel@tonic-gate seg = memseg_reuse(metapgs);
3997c478bd9Sstevel@tonic-gate reuse = (seg != NULL);
4009853d9e8SJason Beloro flags = MEMSEG_DYNAMIC | MEMSEG_META_INCL;
4019853d9e8SJason Beloro segpp = pp;
4029853d9e8SJason Beloro }
4037c478bd9Sstevel@tonic-gate
4047c478bd9Sstevel@tonic-gate /*
4057c478bd9Sstevel@tonic-gate * Initialize the memseg structure representing this memory
4067c478bd9Sstevel@tonic-gate * and add it to the existing list of memsegs. Do some basic
4077c478bd9Sstevel@tonic-gate * initialization and add the memory to the system.
4087c478bd9Sstevel@tonic-gate * In order to prevent lock deadlocks, the add_physmem()
4097c478bd9Sstevel@tonic-gate * code is repeated here, but split into several stages.
4109853d9e8SJason Beloro *
4119853d9e8SJason Beloro * If a memseg is reused, invalidate memseg pointers in
4129853d9e8SJason Beloro * all cpu vm caches. We need to do this this since the check
4139853d9e8SJason Beloro * pp >= seg->pages && pp < seg->epages
4149853d9e8SJason Beloro * used in various places is not atomic and so the first compare
4159853d9e8SJason Beloro * can happen before reuse and the second compare after reuse.
4169853d9e8SJason Beloro * The invalidation ensures that a memseg is not deferenced while
4179853d9e8SJason Beloro * it's page/pfn pointers are changing.
4187c478bd9Sstevel@tonic-gate */
4197c478bd9Sstevel@tonic-gate if (seg == NULL) {
4209853d9e8SJason Beloro seg = memseg_alloc();
4219853d9e8SJason Beloro ASSERT(seg != NULL);
4229853d9e8SJason Beloro seg->msegflags = flags;
4239853d9e8SJason Beloro MEMSEG_DEBUG("memseg_get: alloc seg=0x%p, pages=0x%p",
4249853d9e8SJason Beloro (void *)seg, (void *)(seg->pages));
4259853d9e8SJason Beloro seg->pages = segpp;
4267c478bd9Sstevel@tonic-gate } else {
4279853d9e8SJason Beloro ASSERT(seg->msegflags == flags);
4289853d9e8SJason Beloro ASSERT(seg->pages_base == seg->pages_end);
4299853d9e8SJason Beloro MEMSEG_DEBUG("memseg_get: reuse seg=0x%p, pages=0x%p",
4309853d9e8SJason Beloro (void *)seg, (void *)(seg->pages));
4319853d9e8SJason Beloro if (meta_alloc) {
4329853d9e8SJason Beloro memseg_cpu_vm_flush();
4339853d9e8SJason Beloro seg->pages = segpp;
4349853d9e8SJason Beloro }
4357c478bd9Sstevel@tonic-gate }
4367c478bd9Sstevel@tonic-gate
4377c478bd9Sstevel@tonic-gate seg->epages = seg->pages + npgs;
4387c478bd9Sstevel@tonic-gate seg->pages_base = base;
4397c478bd9Sstevel@tonic-gate seg->pages_end = base + npgs;
4407c478bd9Sstevel@tonic-gate
4417c478bd9Sstevel@tonic-gate /*
4427c478bd9Sstevel@tonic-gate * Initialize metadata. The page_ts are set to locked state
4437c478bd9Sstevel@tonic-gate * ready to be freed.
4447c478bd9Sstevel@tonic-gate */
4457c478bd9Sstevel@tonic-gate bzero((caddr_t)pp, ptob(metapgs));
4467c478bd9Sstevel@tonic-gate
4477c478bd9Sstevel@tonic-gate pfn = seg->pages_base;
4487c478bd9Sstevel@tonic-gate /* Save the original pp base in case we reuse a memseg. */
4497c478bd9Sstevel@tonic-gate opp = pp;
4507c478bd9Sstevel@tonic-gate oepp = opp + npgs;
4517c478bd9Sstevel@tonic-gate for (pp = opp; pp < oepp; pp++) {
4527c478bd9Sstevel@tonic-gate pp->p_pagenum = pfn;
4537c478bd9Sstevel@tonic-gate pfn++;
4547c478bd9Sstevel@tonic-gate page_iolock_init(pp);
4557c478bd9Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM))
4567c478bd9Sstevel@tonic-gate continue;
4577c478bd9Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1;
4587c478bd9Sstevel@tonic-gate }
4597c478bd9Sstevel@tonic-gate
4607c478bd9Sstevel@tonic-gate if (reuse) {
4617c478bd9Sstevel@tonic-gate /* Remap our page_ts to the re-used memseg VA space. */
4627c478bd9Sstevel@tonic-gate pfn = pt_base;
4637c478bd9Sstevel@tonic-gate vaddr = (caddr_t)seg->pages;
4647c478bd9Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) {
4659853d9e8SJason Beloro if (meta_alloc)
4669853d9e8SJason Beloro pfn = memseg_get_metapfn(metabase,
4679853d9e8SJason Beloro (pgcnt_t)pnum);
4687c478bd9Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn,
4697c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE,
4707c478bd9Sstevel@tonic-gate HAT_LOAD_REMAP | HAT_LOAD | HAT_LOAD_NOCONSIST);
4717c478bd9Sstevel@tonic-gate pfn++;
4727c478bd9Sstevel@tonic-gate vaddr += ptob(1);
4737c478bd9Sstevel@tonic-gate }
4747c478bd9Sstevel@tonic-gate
4757c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)opp, ptob(metapgs),
4767c478bd9Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK);
4777c478bd9Sstevel@tonic-gate
4787c478bd9Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs));
4797c478bd9Sstevel@tonic-gate }
4807c478bd9Sstevel@tonic-gate
4817c478bd9Sstevel@tonic-gate hat_kpm_addmem_mseg_update(seg, nkpmpgs, kpm_pages_off);
4827c478bd9Sstevel@tonic-gate
4837c478bd9Sstevel@tonic-gate memsegs_lock(1);
4847c478bd9Sstevel@tonic-gate
4857c478bd9Sstevel@tonic-gate /*
4867c478bd9Sstevel@tonic-gate * The new memseg is inserted at the beginning of the list.
4877c478bd9Sstevel@tonic-gate * Not only does this save searching for the tail, but in the
4887c478bd9Sstevel@tonic-gate * case of a re-used memseg, it solves the problem of what
48928e72544SJakub Jirsa * happens if some process has still got a pointer to the
4907c478bd9Sstevel@tonic-gate * memseg and follows the next pointer to continue traversing
4917c478bd9Sstevel@tonic-gate * the memsegs list.
4927c478bd9Sstevel@tonic-gate */
4937c478bd9Sstevel@tonic-gate
4947c478bd9Sstevel@tonic-gate hat_kpm_addmem_mseg_insert(seg);
4957c478bd9Sstevel@tonic-gate
4967c478bd9Sstevel@tonic-gate seg->next = memsegs;
4977c478bd9Sstevel@tonic-gate membar_producer();
4987c478bd9Sstevel@tonic-gate
4997c478bd9Sstevel@tonic-gate hat_kpm_addmem_memsegs_update(seg);
5007c478bd9Sstevel@tonic-gate
5017c478bd9Sstevel@tonic-gate memsegs = seg;
5027c478bd9Sstevel@tonic-gate
5037c478bd9Sstevel@tonic-gate build_pfn_hash();
5047c478bd9Sstevel@tonic-gate
5057c478bd9Sstevel@tonic-gate total_pages += npgs;
5067c478bd9Sstevel@tonic-gate
5077c478bd9Sstevel@tonic-gate /*
5087c478bd9Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed.
5097c478bd9Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use.
5107c478bd9Sstevel@tonic-gate */
5117c478bd9Sstevel@tonic-gate setupclock(1);
5127c478bd9Sstevel@tonic-gate
5137c478bd9Sstevel@tonic-gate memsegs_unlock(1);
5147c478bd9Sstevel@tonic-gate
515ee88d2b9Skchow PLCNT_MODIFY_MAX(seg->pages_base, (long)npgs);
516ee88d2b9Skchow
5177c478bd9Sstevel@tonic-gate /*
5187c478bd9Sstevel@tonic-gate * Free the pages outside the lock to avoid locking loops.
5197c478bd9Sstevel@tonic-gate */
5207c478bd9Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) {
5217c478bd9Sstevel@tonic-gate page_free(pp, 1);
5227c478bd9Sstevel@tonic-gate }
5237c478bd9Sstevel@tonic-gate
5247c478bd9Sstevel@tonic-gate /*
5257c478bd9Sstevel@tonic-gate * Now that we've updated the appropriate memory lists we
5267c478bd9Sstevel@tonic-gate * need to reset a number of globals, since we've increased memory.
5277c478bd9Sstevel@tonic-gate * Several have already been updated for us as noted above. The
5287c478bd9Sstevel@tonic-gate * globals we're interested in at this point are:
5297c478bd9Sstevel@tonic-gate * physmax - highest page frame number.
5307c478bd9Sstevel@tonic-gate * physinstalled - number of pages currently installed (done earlier)
5317c478bd9Sstevel@tonic-gate * maxmem - max free pages in the system
5327c478bd9Sstevel@tonic-gate * physmem - physical memory pages available
5337c478bd9Sstevel@tonic-gate * availrmem - real memory available
5347c478bd9Sstevel@tonic-gate */
5357c478bd9Sstevel@tonic-gate
5367c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock);
5377c478bd9Sstevel@tonic-gate maxmem += npgs;
5387c478bd9Sstevel@tonic-gate physmem += npgs;
5397c478bd9Sstevel@tonic-gate availrmem += npgs;
5407c478bd9Sstevel@tonic-gate availrmem_initial += npgs;
5417c478bd9Sstevel@tonic-gate
5427c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock);
5437c478bd9Sstevel@tonic-gate
5447c478bd9Sstevel@tonic-gate dump_resize();
5457c478bd9Sstevel@tonic-gate
5467c478bd9Sstevel@tonic-gate page_freelist_coalesce_all(mnode);
5477c478bd9Sstevel@tonic-gate
5487c478bd9Sstevel@tonic-gate kphysm_setup_post_add(npgs);
5497c478bd9Sstevel@tonic-gate
5507c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: mem = %ldK "
5517c478bd9Sstevel@tonic-gate "(0x%" PRIx64 ")\n",
5527c478bd9Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10),
5537c478bd9Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT);
5547c478bd9Sstevel@tonic-gate
5557c478bd9Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT;
5567c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: "
5577c478bd9Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem);
5587c478bd9Sstevel@tonic-gate
5597c478bd9Sstevel@tonic-gate /*
5607c478bd9Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems
5617c478bd9Sstevel@tonic-gate */
5627c478bd9Sstevel@tonic-gate if (nlgrps == 1)
5637c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0);
5647c478bd9Sstevel@tonic-gate
5653a634bfcSVikram Hegde /*
5663a634bfcSVikram Hegde * Inform DDI of update
5673a634bfcSVikram Hegde */
5683a634bfcSVikram Hegde ddi_mem_update((uint64_t)(pt_base) << PAGESHIFT,
5693a634bfcSVikram Hegde (uint64_t)(tpgs) << PAGESHIFT);
5703a634bfcSVikram Hegde
5717c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs);
5727c478bd9Sstevel@tonic-gate
573a3114836SGerry Liu return (KPHYSM_OK); /* Successfully added system memory */
5747c478bd9Sstevel@tonic-gate }
5757c478bd9Sstevel@tonic-gate
5767c478bd9Sstevel@tonic-gate /*
5777c478bd9Sstevel@tonic-gate * There are various error conditions in kphysm_add_memory_dynamic()
5787c478bd9Sstevel@tonic-gate * which require a rollback of already changed global state.
5797c478bd9Sstevel@tonic-gate */
5807c478bd9Sstevel@tonic-gate static void
kphysm_addmem_error_undospan(pfn_t pt_base,pgcnt_t tpgs)5817c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pfn_t pt_base, pgcnt_t tpgs)
5827c478bd9Sstevel@tonic-gate {
5837c478bd9Sstevel@tonic-gate int mlret;
5847c478bd9Sstevel@tonic-gate
5857c478bd9Sstevel@tonic-gate /* Unreserve memory span. */
5867c478bd9Sstevel@tonic-gate memlist_write_lock();
5877c478bd9Sstevel@tonic-gate
5887c478bd9Sstevel@tonic-gate mlret = memlist_delete_span(
5897c478bd9Sstevel@tonic-gate (uint64_t)(pt_base) << PAGESHIFT,
5907c478bd9Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install);
5917c478bd9Sstevel@tonic-gate
5927c478bd9Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK);
5937c478bd9Sstevel@tonic-gate phys_install_has_changed();
5947c478bd9Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled);
5957c478bd9Sstevel@tonic-gate
5967c478bd9Sstevel@tonic-gate memlist_write_unlock();
5977c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs);
5987c478bd9Sstevel@tonic-gate }
5997c478bd9Sstevel@tonic-gate
6007c478bd9Sstevel@tonic-gate /*
6019853d9e8SJason Beloro * Only return an available memseg of exactly the right size
6029853d9e8SJason Beloro * if size is required.
6037c478bd9Sstevel@tonic-gate * When the meta data area has it's own virtual address space
6047c478bd9Sstevel@tonic-gate * we will need to manage this more carefully and do best fit
605da6c28aaSamw * allocations, possibly splitting an available area.
6067c478bd9Sstevel@tonic-gate */
6079853d9e8SJason Beloro struct memseg *
memseg_reuse(pgcnt_t metapgs)6087c478bd9Sstevel@tonic-gate memseg_reuse(pgcnt_t metapgs)
6097c478bd9Sstevel@tonic-gate {
6109853d9e8SJason Beloro int type;
6117c478bd9Sstevel@tonic-gate struct memseg **segpp, *seg;
6127c478bd9Sstevel@tonic-gate
6137c478bd9Sstevel@tonic-gate mutex_enter(&memseg_lists_lock);
6147c478bd9Sstevel@tonic-gate
6157c478bd9Sstevel@tonic-gate segpp = &memseg_va_avail;
6167c478bd9Sstevel@tonic-gate for (; (seg = *segpp) != NULL; segpp = &seg->lnext) {
6177c478bd9Sstevel@tonic-gate caddr_t end;
6187c478bd9Sstevel@tonic-gate
6199853d9e8SJason Beloro /*
6209853d9e8SJason Beloro * Make sure we are reusing the right segment type.
6219853d9e8SJason Beloro */
6229853d9e8SJason Beloro type = metapgs ? MEMSEG_META_INCL : MEMSEG_META_ALLOC;
6239853d9e8SJason Beloro
6249853d9e8SJason Beloro if ((seg->msegflags & (MEMSEG_META_INCL | MEMSEG_META_ALLOC))
6259853d9e8SJason Beloro != type)
6269853d9e8SJason Beloro continue;
6279853d9e8SJason Beloro
6287c478bd9Sstevel@tonic-gate if (kpm_enable)
6297c478bd9Sstevel@tonic-gate end = hat_kpm_mseg_reuse(seg);
6307c478bd9Sstevel@tonic-gate else
6317c478bd9Sstevel@tonic-gate end = (caddr_t)seg->epages;
6327c478bd9Sstevel@tonic-gate
6339853d9e8SJason Beloro /*
6349853d9e8SJason Beloro * Check for the right size if it is provided.
6359853d9e8SJason Beloro */
6369853d9e8SJason Beloro if (!metapgs || btopr(end - (caddr_t)seg->pages) == metapgs) {
6377c478bd9Sstevel@tonic-gate *segpp = seg->lnext;
6387c478bd9Sstevel@tonic-gate seg->lnext = NULL;
6397c478bd9Sstevel@tonic-gate break;
6407c478bd9Sstevel@tonic-gate }
6417c478bd9Sstevel@tonic-gate }
6427c478bd9Sstevel@tonic-gate mutex_exit(&memseg_lists_lock);
6437c478bd9Sstevel@tonic-gate
6447c478bd9Sstevel@tonic-gate return (seg);
6457c478bd9Sstevel@tonic-gate }
6467c478bd9Sstevel@tonic-gate
6477c478bd9Sstevel@tonic-gate static uint_t handle_gen;
6487c478bd9Sstevel@tonic-gate
6497c478bd9Sstevel@tonic-gate struct memdelspan {
6507c478bd9Sstevel@tonic-gate struct memdelspan *mds_next;
6517c478bd9Sstevel@tonic-gate pfn_t mds_base;
6527c478bd9Sstevel@tonic-gate pgcnt_t mds_npgs;
6537c478bd9Sstevel@tonic-gate uint_t *mds_bitmap;
6547c478bd9Sstevel@tonic-gate uint_t *mds_bitmap_retired;
6557c478bd9Sstevel@tonic-gate };
6567c478bd9Sstevel@tonic-gate
6577c478bd9Sstevel@tonic-gate #define NBPBMW (sizeof (uint_t) * NBBY)
6587c478bd9Sstevel@tonic-gate #define MDS_BITMAPBYTES(MDSP) \
6597c478bd9Sstevel@tonic-gate ((((MDSP)->mds_npgs + NBPBMW - 1) / NBPBMW) * sizeof (uint_t))
6607c478bd9Sstevel@tonic-gate
6617c478bd9Sstevel@tonic-gate struct transit_list {
6627c478bd9Sstevel@tonic-gate struct transit_list *trl_next;
6637c478bd9Sstevel@tonic-gate struct memdelspan *trl_spans;
6647c478bd9Sstevel@tonic-gate int trl_collect;
6657c478bd9Sstevel@tonic-gate };
6667c478bd9Sstevel@tonic-gate
6677c478bd9Sstevel@tonic-gate struct transit_list_head {
6687c478bd9Sstevel@tonic-gate kmutex_t trh_lock;
6697c478bd9Sstevel@tonic-gate struct transit_list *trh_head;
6707c478bd9Sstevel@tonic-gate };
6717c478bd9Sstevel@tonic-gate
6727c478bd9Sstevel@tonic-gate static struct transit_list_head transit_list_head;
6737c478bd9Sstevel@tonic-gate
6747c478bd9Sstevel@tonic-gate struct mem_handle;
6757c478bd9Sstevel@tonic-gate static void transit_list_collect(struct mem_handle *, int);
6767c478bd9Sstevel@tonic-gate static void transit_list_insert(struct transit_list *);
6777c478bd9Sstevel@tonic-gate static void transit_list_remove(struct transit_list *);
6787c478bd9Sstevel@tonic-gate
6797c478bd9Sstevel@tonic-gate #ifdef DEBUG
6807c478bd9Sstevel@tonic-gate #define MEM_DEL_STATS
6817c478bd9Sstevel@tonic-gate #endif /* DEBUG */
6827c478bd9Sstevel@tonic-gate
6837c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
6847c478bd9Sstevel@tonic-gate static int mem_del_stat_print = 0;
6857c478bd9Sstevel@tonic-gate struct mem_del_stat {
6867c478bd9Sstevel@tonic-gate uint_t nloop;
6877c478bd9Sstevel@tonic-gate uint_t need_free;
6887c478bd9Sstevel@tonic-gate uint_t free_loop;
6897c478bd9Sstevel@tonic-gate uint_t free_low;
6907c478bd9Sstevel@tonic-gate uint_t free_failed;
6917c478bd9Sstevel@tonic-gate uint_t ncheck;
6927c478bd9Sstevel@tonic-gate uint_t nopaget;
6937c478bd9Sstevel@tonic-gate uint_t lockfail;
6947c478bd9Sstevel@tonic-gate uint_t nfree;
6957c478bd9Sstevel@tonic-gate uint_t nreloc;
6967c478bd9Sstevel@tonic-gate uint_t nrelocfail;
6977c478bd9Sstevel@tonic-gate uint_t already_done;
6987c478bd9Sstevel@tonic-gate uint_t first_notfree;
6997c478bd9Sstevel@tonic-gate uint_t npplocked;
7007c478bd9Sstevel@tonic-gate uint_t nlockreloc;
7017c478bd9Sstevel@tonic-gate uint_t nnorepl;
7027c478bd9Sstevel@tonic-gate uint_t nmodreloc;
7037c478bd9Sstevel@tonic-gate uint_t ndestroy;
7047c478bd9Sstevel@tonic-gate uint_t nputpage;
7057c478bd9Sstevel@tonic-gate uint_t nnoreclaim;
7067c478bd9Sstevel@tonic-gate uint_t ndelay;
7077c478bd9Sstevel@tonic-gate uint_t demotefail;
7087c478bd9Sstevel@tonic-gate uint64_t nticks_total;
7097c478bd9Sstevel@tonic-gate uint64_t nticks_pgrp;
7107c478bd9Sstevel@tonic-gate uint_t retired;
7117c478bd9Sstevel@tonic-gate uint_t toxic;
7127c478bd9Sstevel@tonic-gate uint_t failing;
7137c478bd9Sstevel@tonic-gate uint_t modtoxic;
7147c478bd9Sstevel@tonic-gate uint_t npplkdtoxic;
7157c478bd9Sstevel@tonic-gate uint_t gptlmodfail;
7167c478bd9Sstevel@tonic-gate uint_t gptllckfail;
7177c478bd9Sstevel@tonic-gate };
7187c478bd9Sstevel@tonic-gate /*
7197c478bd9Sstevel@tonic-gate * The stat values are only incremented in the delete thread
7207c478bd9Sstevel@tonic-gate * so no locking or atomic required.
7217c478bd9Sstevel@tonic-gate */
7227c478bd9Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD) (MHP)->mh_delstat.FLD++
7237c478bd9Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck) ((MHP)->mh_delstat.nticks_total += (ntck))
7247c478bd9Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck) ((MHP)->mh_delstat.nticks_pgrp += (ntck))
7257c478bd9Sstevel@tonic-gate static void mem_del_stat_print_func(struct mem_handle *);
7267c478bd9Sstevel@tonic-gate #define MDSTAT_PRINT(MHP) mem_del_stat_print_func((MHP))
7277c478bd9Sstevel@tonic-gate #else /* MEM_DEL_STATS */
7287c478bd9Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD)
7297c478bd9Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck)
7307c478bd9Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck)
7317c478bd9Sstevel@tonic-gate #define MDSTAT_PRINT(MHP)
7327c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
7337c478bd9Sstevel@tonic-gate
7347c478bd9Sstevel@tonic-gate typedef enum mhnd_state {MHND_FREE = 0, MHND_INIT, MHND_STARTING,
7357c478bd9Sstevel@tonic-gate MHND_RUNNING, MHND_DONE, MHND_RELEASE} mhnd_state_t;
7367c478bd9Sstevel@tonic-gate
7377c478bd9Sstevel@tonic-gate /*
7387c478bd9Sstevel@tonic-gate * mh_mutex must be taken to examine or change mh_exthandle and mh_state.
7397c478bd9Sstevel@tonic-gate * The mutex may not be required for other fields, dependent on mh_state.
7407c478bd9Sstevel@tonic-gate */
7417c478bd9Sstevel@tonic-gate struct mem_handle {
7427c478bd9Sstevel@tonic-gate kmutex_t mh_mutex;
7437c478bd9Sstevel@tonic-gate struct mem_handle *mh_next;
7447c478bd9Sstevel@tonic-gate memhandle_t mh_exthandle;
7457c478bd9Sstevel@tonic-gate mhnd_state_t mh_state;
7467c478bd9Sstevel@tonic-gate struct transit_list mh_transit;
7477c478bd9Sstevel@tonic-gate pgcnt_t mh_phys_pages;
7487c478bd9Sstevel@tonic-gate pgcnt_t mh_vm_pages;
7497c478bd9Sstevel@tonic-gate pgcnt_t mh_hold_todo;
7507c478bd9Sstevel@tonic-gate void (*mh_delete_complete)(void *, int error);
7517c478bd9Sstevel@tonic-gate void *mh_delete_complete_arg;
7527c478bd9Sstevel@tonic-gate volatile uint_t mh_cancel;
7537c478bd9Sstevel@tonic-gate volatile uint_t mh_dr_aio_cleanup_cancel;
7547c478bd9Sstevel@tonic-gate volatile uint_t mh_aio_cleanup_done;
7557c478bd9Sstevel@tonic-gate kcondvar_t mh_cv;
7567c478bd9Sstevel@tonic-gate kthread_id_t mh_thread_id;
7577c478bd9Sstevel@tonic-gate page_t *mh_deleted; /* link through p_next */
7587c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
7597c478bd9Sstevel@tonic-gate struct mem_del_stat mh_delstat;
7607c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
7617c478bd9Sstevel@tonic-gate };
7627c478bd9Sstevel@tonic-gate
7637c478bd9Sstevel@tonic-gate static struct mem_handle *mem_handle_head;
7647c478bd9Sstevel@tonic-gate static kmutex_t mem_handle_list_mutex;
7657c478bd9Sstevel@tonic-gate
7667c478bd9Sstevel@tonic-gate static struct mem_handle *
kphysm_allocate_mem_handle()7677c478bd9Sstevel@tonic-gate kphysm_allocate_mem_handle()
7687c478bd9Sstevel@tonic-gate {
7697c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
7707c478bd9Sstevel@tonic-gate
7717c478bd9Sstevel@tonic-gate mhp = kmem_zalloc(sizeof (struct mem_handle), KM_SLEEP);
7727c478bd9Sstevel@tonic-gate mutex_init(&mhp->mh_mutex, NULL, MUTEX_DEFAULT, NULL);
7737c478bd9Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex);
7747c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
7757c478bd9Sstevel@tonic-gate /* handle_gen is protected by list mutex. */
7769f1a1f17Sdmick mhp->mh_exthandle = (memhandle_t)(uintptr_t)(++handle_gen);
7777c478bd9Sstevel@tonic-gate mhp->mh_next = mem_handle_head;
7787c478bd9Sstevel@tonic-gate mem_handle_head = mhp;
7797c478bd9Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex);
7807c478bd9Sstevel@tonic-gate
7817c478bd9Sstevel@tonic-gate return (mhp);
7827c478bd9Sstevel@tonic-gate }
7837c478bd9Sstevel@tonic-gate
7847c478bd9Sstevel@tonic-gate static void
kphysm_free_mem_handle(struct mem_handle * mhp)7857c478bd9Sstevel@tonic-gate kphysm_free_mem_handle(struct mem_handle *mhp)
7867c478bd9Sstevel@tonic-gate {
7877c478bd9Sstevel@tonic-gate struct mem_handle **mhpp;
7887c478bd9Sstevel@tonic-gate
7897c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&mhp->mh_mutex));
7907c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE);
7917c478bd9Sstevel@tonic-gate /*
7927c478bd9Sstevel@tonic-gate * Exit the mutex to preserve locking order. This is OK
7937c478bd9Sstevel@tonic-gate * here as once in the FREE state, the handle cannot
7947c478bd9Sstevel@tonic-gate * be found by a lookup.
7957c478bd9Sstevel@tonic-gate */
7967c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
7977c478bd9Sstevel@tonic-gate
7987c478bd9Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex);
7997c478bd9Sstevel@tonic-gate mhpp = &mem_handle_head;
8007c478bd9Sstevel@tonic-gate while (*mhpp != NULL && *mhpp != mhp)
8017c478bd9Sstevel@tonic-gate mhpp = &(*mhpp)->mh_next;
8027c478bd9Sstevel@tonic-gate ASSERT(*mhpp == mhp);
8037c478bd9Sstevel@tonic-gate /*
8047c478bd9Sstevel@tonic-gate * No need to lock the handle (mh_mutex) as only
8057c478bd9Sstevel@tonic-gate * mh_next changing and this is the only thread that
8067c478bd9Sstevel@tonic-gate * can be referncing mhp.
8077c478bd9Sstevel@tonic-gate */
8087c478bd9Sstevel@tonic-gate *mhpp = mhp->mh_next;
8097c478bd9Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex);
8107c478bd9Sstevel@tonic-gate
8117c478bd9Sstevel@tonic-gate mutex_destroy(&mhp->mh_mutex);
8127c478bd9Sstevel@tonic-gate kmem_free(mhp, sizeof (struct mem_handle));
8137c478bd9Sstevel@tonic-gate }
8147c478bd9Sstevel@tonic-gate
8157c478bd9Sstevel@tonic-gate /*
8167c478bd9Sstevel@tonic-gate * This function finds the internal mem_handle corresponding to an
8177c478bd9Sstevel@tonic-gate * external handle and returns it with the mh_mutex held.
8187c478bd9Sstevel@tonic-gate */
8197c478bd9Sstevel@tonic-gate static struct mem_handle *
kphysm_lookup_mem_handle(memhandle_t handle)8207c478bd9Sstevel@tonic-gate kphysm_lookup_mem_handle(memhandle_t handle)
8217c478bd9Sstevel@tonic-gate {
8227c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
8237c478bd9Sstevel@tonic-gate
8247c478bd9Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex);
8257c478bd9Sstevel@tonic-gate for (mhp = mem_handle_head; mhp != NULL; mhp = mhp->mh_next) {
8267c478bd9Sstevel@tonic-gate if (mhp->mh_exthandle == handle) {
8277c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
8287c478bd9Sstevel@tonic-gate /*
8297c478bd9Sstevel@tonic-gate * The state of the handle could have been changed
8307c478bd9Sstevel@tonic-gate * by kphysm_del_release() while waiting for mh_mutex.
8317c478bd9Sstevel@tonic-gate */
8327c478bd9Sstevel@tonic-gate if (mhp->mh_state == MHND_FREE) {
8337c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
8347c478bd9Sstevel@tonic-gate continue;
8357c478bd9Sstevel@tonic-gate }
8367c478bd9Sstevel@tonic-gate break;
8377c478bd9Sstevel@tonic-gate }
8387c478bd9Sstevel@tonic-gate }
8397c478bd9Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex);
8407c478bd9Sstevel@tonic-gate return (mhp);
8417c478bd9Sstevel@tonic-gate }
8427c478bd9Sstevel@tonic-gate
8437c478bd9Sstevel@tonic-gate int
kphysm_del_gethandle(memhandle_t * xmhp)8447c478bd9Sstevel@tonic-gate kphysm_del_gethandle(memhandle_t *xmhp)
8457c478bd9Sstevel@tonic-gate {
8467c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
8477c478bd9Sstevel@tonic-gate
8487c478bd9Sstevel@tonic-gate mhp = kphysm_allocate_mem_handle();
8497c478bd9Sstevel@tonic-gate /*
8507c478bd9Sstevel@tonic-gate * The handle is allocated using KM_SLEEP, so cannot fail.
8517c478bd9Sstevel@tonic-gate * If the implementation is changed, the correct error to return
8527c478bd9Sstevel@tonic-gate * here would be KPHYSM_ENOHANDLES.
8537c478bd9Sstevel@tonic-gate */
8547c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE);
8557c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_INIT;
8567c478bd9Sstevel@tonic-gate *xmhp = mhp->mh_exthandle;
8577c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
8587c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
8597c478bd9Sstevel@tonic-gate }
8607c478bd9Sstevel@tonic-gate
8617c478bd9Sstevel@tonic-gate static int
overlapping(pfn_t b1,pgcnt_t l1,pfn_t b2,pgcnt_t l2)8627c478bd9Sstevel@tonic-gate overlapping(pfn_t b1, pgcnt_t l1, pfn_t b2, pgcnt_t l2)
8637c478bd9Sstevel@tonic-gate {
8647c478bd9Sstevel@tonic-gate pfn_t e1, e2;
8657c478bd9Sstevel@tonic-gate
8667c478bd9Sstevel@tonic-gate e1 = b1 + l1;
8677c478bd9Sstevel@tonic-gate e2 = b2 + l2;
8687c478bd9Sstevel@tonic-gate
8697c478bd9Sstevel@tonic-gate return (!(b2 >= e1 || b1 >= e2));
8707c478bd9Sstevel@tonic-gate }
8717c478bd9Sstevel@tonic-gate
8727c478bd9Sstevel@tonic-gate static int can_remove_pgs(pgcnt_t);
8737c478bd9Sstevel@tonic-gate
8747c478bd9Sstevel@tonic-gate static struct memdelspan *
span_to_install(pfn_t base,pgcnt_t npgs)8757c478bd9Sstevel@tonic-gate span_to_install(pfn_t base, pgcnt_t npgs)
8767c478bd9Sstevel@tonic-gate {
8777c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
8787c478bd9Sstevel@tonic-gate struct memdelspan *mdsp_new;
8797c478bd9Sstevel@tonic-gate uint64_t address, size, thislen;
8807c478bd9Sstevel@tonic-gate struct memlist *mlp;
8817c478bd9Sstevel@tonic-gate
8827c478bd9Sstevel@tonic-gate mdsp_new = NULL;
8837c478bd9Sstevel@tonic-gate
8847c478bd9Sstevel@tonic-gate address = (uint64_t)base << PAGESHIFT;
8857c478bd9Sstevel@tonic-gate size = (uint64_t)npgs << PAGESHIFT;
8867c478bd9Sstevel@tonic-gate while (size != 0) {
8877c478bd9Sstevel@tonic-gate memlist_read_lock();
88856f33205SJonathan Adams for (mlp = phys_install; mlp != NULL; mlp = mlp->ml_next) {
88956f33205SJonathan Adams if (address >= (mlp->ml_address + mlp->ml_size))
8907c478bd9Sstevel@tonic-gate continue;
89156f33205SJonathan Adams if ((address + size) > mlp->ml_address)
8927c478bd9Sstevel@tonic-gate break;
8937c478bd9Sstevel@tonic-gate }
8947c478bd9Sstevel@tonic-gate if (mlp == NULL) {
8957c478bd9Sstevel@tonic-gate address += size;
8967c478bd9Sstevel@tonic-gate size = 0;
8977c478bd9Sstevel@tonic-gate thislen = 0;
8987c478bd9Sstevel@tonic-gate } else {
89956f33205SJonathan Adams if (address < mlp->ml_address) {
90056f33205SJonathan Adams size -= (mlp->ml_address - address);
90156f33205SJonathan Adams address = mlp->ml_address;
9027c478bd9Sstevel@tonic-gate }
90356f33205SJonathan Adams ASSERT(address >= mlp->ml_address);
90456f33205SJonathan Adams if ((address + size) >
90556f33205SJonathan Adams (mlp->ml_address + mlp->ml_size)) {
90656f33205SJonathan Adams thislen =
90756f33205SJonathan Adams mlp->ml_size - (address - mlp->ml_address);
9087c478bd9Sstevel@tonic-gate } else {
9097c478bd9Sstevel@tonic-gate thislen = size;
9107c478bd9Sstevel@tonic-gate }
9117c478bd9Sstevel@tonic-gate }
9127c478bd9Sstevel@tonic-gate memlist_read_unlock();
9137c478bd9Sstevel@tonic-gate /* TODO: phys_install could change now */
9147c478bd9Sstevel@tonic-gate if (thislen == 0)
9157c478bd9Sstevel@tonic-gate continue;
9167c478bd9Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP);
9177c478bd9Sstevel@tonic-gate mdsp->mds_base = btop(address);
9187c478bd9Sstevel@tonic-gate mdsp->mds_npgs = btop(thislen);
9197c478bd9Sstevel@tonic-gate mdsp->mds_next = mdsp_new;
9207c478bd9Sstevel@tonic-gate mdsp_new = mdsp;
9217c478bd9Sstevel@tonic-gate address += thislen;
9227c478bd9Sstevel@tonic-gate size -= thislen;
9237c478bd9Sstevel@tonic-gate }
9247c478bd9Sstevel@tonic-gate return (mdsp_new);
9257c478bd9Sstevel@tonic-gate }
9267c478bd9Sstevel@tonic-gate
9277c478bd9Sstevel@tonic-gate static void
free_delspans(struct memdelspan * mdsp)9287c478bd9Sstevel@tonic-gate free_delspans(struct memdelspan *mdsp)
9297c478bd9Sstevel@tonic-gate {
9307c478bd9Sstevel@tonic-gate struct memdelspan *amdsp;
9317c478bd9Sstevel@tonic-gate
9327c478bd9Sstevel@tonic-gate while ((amdsp = mdsp) != NULL) {
9337c478bd9Sstevel@tonic-gate mdsp = amdsp->mds_next;
9347c478bd9Sstevel@tonic-gate kmem_free(amdsp, sizeof (struct memdelspan));
9357c478bd9Sstevel@tonic-gate }
9367c478bd9Sstevel@tonic-gate }
9377c478bd9Sstevel@tonic-gate
9387c478bd9Sstevel@tonic-gate /*
9397c478bd9Sstevel@tonic-gate * Concatenate lists. No list ordering is required.
9407c478bd9Sstevel@tonic-gate */
9417c478bd9Sstevel@tonic-gate
9427c478bd9Sstevel@tonic-gate static void
delspan_concat(struct memdelspan ** mdspp,struct memdelspan * mdsp)9437c478bd9Sstevel@tonic-gate delspan_concat(struct memdelspan **mdspp, struct memdelspan *mdsp)
9447c478bd9Sstevel@tonic-gate {
9457c478bd9Sstevel@tonic-gate while (*mdspp != NULL)
9467c478bd9Sstevel@tonic-gate mdspp = &(*mdspp)->mds_next;
9477c478bd9Sstevel@tonic-gate
9487c478bd9Sstevel@tonic-gate *mdspp = mdsp;
9497c478bd9Sstevel@tonic-gate }
9507c478bd9Sstevel@tonic-gate
9517c478bd9Sstevel@tonic-gate /*
9527c478bd9Sstevel@tonic-gate * Given a new list of delspans, check there is no overlap with
9537c478bd9Sstevel@tonic-gate * all existing span activity (add or delete) and then concatenate
9547c478bd9Sstevel@tonic-gate * the new spans to the given list.
9557c478bd9Sstevel@tonic-gate * Return 1 for OK, 0 if overlapping.
9567c478bd9Sstevel@tonic-gate */
9577c478bd9Sstevel@tonic-gate static int
delspan_insert(struct transit_list * my_tlp,struct memdelspan * mdsp_new)9587c478bd9Sstevel@tonic-gate delspan_insert(
9597c478bd9Sstevel@tonic-gate struct transit_list *my_tlp,
9607c478bd9Sstevel@tonic-gate struct memdelspan *mdsp_new)
9617c478bd9Sstevel@tonic-gate {
9627c478bd9Sstevel@tonic-gate struct transit_list_head *trh;
9637c478bd9Sstevel@tonic-gate struct transit_list *tlp;
9647c478bd9Sstevel@tonic-gate int ret;
9657c478bd9Sstevel@tonic-gate
9667c478bd9Sstevel@tonic-gate trh = &transit_list_head;
9677c478bd9Sstevel@tonic-gate
9687c478bd9Sstevel@tonic-gate ASSERT(my_tlp != NULL);
9697c478bd9Sstevel@tonic-gate ASSERT(mdsp_new != NULL);
9707c478bd9Sstevel@tonic-gate
9717c478bd9Sstevel@tonic-gate ret = 1;
9727c478bd9Sstevel@tonic-gate mutex_enter(&trh->trh_lock);
9737c478bd9Sstevel@tonic-gate /* ASSERT(my_tlp->trl_spans == NULL || tlp_in_list(trh, my_tlp)); */
9747c478bd9Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) {
9757c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
9767c478bd9Sstevel@tonic-gate
9777c478bd9Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL;
9787c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
9797c478bd9Sstevel@tonic-gate struct memdelspan *nmdsp;
9807c478bd9Sstevel@tonic-gate
9817c478bd9Sstevel@tonic-gate for (nmdsp = mdsp_new; nmdsp != NULL;
9827c478bd9Sstevel@tonic-gate nmdsp = nmdsp->mds_next) {
9837c478bd9Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs,
9847c478bd9Sstevel@tonic-gate nmdsp->mds_base, nmdsp->mds_npgs)) {
9857c478bd9Sstevel@tonic-gate ret = 0;
9867c478bd9Sstevel@tonic-gate goto done;
9877c478bd9Sstevel@tonic-gate }
9887c478bd9Sstevel@tonic-gate }
9897c478bd9Sstevel@tonic-gate }
9907c478bd9Sstevel@tonic-gate }
9917c478bd9Sstevel@tonic-gate done:
9927c478bd9Sstevel@tonic-gate if (ret != 0) {
9937c478bd9Sstevel@tonic-gate if (my_tlp->trl_spans == NULL)
9947c478bd9Sstevel@tonic-gate transit_list_insert(my_tlp);
9957c478bd9Sstevel@tonic-gate delspan_concat(&my_tlp->trl_spans, mdsp_new);
9967c478bd9Sstevel@tonic-gate }
9977c478bd9Sstevel@tonic-gate mutex_exit(&trh->trh_lock);
9987c478bd9Sstevel@tonic-gate return (ret);
9997c478bd9Sstevel@tonic-gate }
10007c478bd9Sstevel@tonic-gate
10017c478bd9Sstevel@tonic-gate static void
delspan_remove(struct transit_list * my_tlp,pfn_t base,pgcnt_t npgs)10027c478bd9Sstevel@tonic-gate delspan_remove(
10037c478bd9Sstevel@tonic-gate struct transit_list *my_tlp,
10047c478bd9Sstevel@tonic-gate pfn_t base,
10057c478bd9Sstevel@tonic-gate pgcnt_t npgs)
10067c478bd9Sstevel@tonic-gate {
10077c478bd9Sstevel@tonic-gate struct transit_list_head *trh;
10087c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
10097c478bd9Sstevel@tonic-gate
10107c478bd9Sstevel@tonic-gate trh = &transit_list_head;
10117c478bd9Sstevel@tonic-gate
10127c478bd9Sstevel@tonic-gate ASSERT(my_tlp != NULL);
10137c478bd9Sstevel@tonic-gate
10147c478bd9Sstevel@tonic-gate mutex_enter(&trh->trh_lock);
10157c478bd9Sstevel@tonic-gate if ((mdsp = my_tlp->trl_spans) != NULL) {
10167c478bd9Sstevel@tonic-gate if (npgs == 0) {
10177c478bd9Sstevel@tonic-gate my_tlp->trl_spans = NULL;
10187c478bd9Sstevel@tonic-gate free_delspans(mdsp);
10197c478bd9Sstevel@tonic-gate transit_list_remove(my_tlp);
10207c478bd9Sstevel@tonic-gate } else {
10217c478bd9Sstevel@tonic-gate struct memdelspan **prv;
10227c478bd9Sstevel@tonic-gate
10237c478bd9Sstevel@tonic-gate prv = &my_tlp->trl_spans;
10247c478bd9Sstevel@tonic-gate while (mdsp != NULL) {
10257c478bd9Sstevel@tonic-gate pfn_t p_end;
10267c478bd9Sstevel@tonic-gate
10277c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs;
10287c478bd9Sstevel@tonic-gate if (mdsp->mds_base >= base &&
10297c478bd9Sstevel@tonic-gate p_end <= (base + npgs)) {
10307c478bd9Sstevel@tonic-gate *prv = mdsp->mds_next;
10317c478bd9Sstevel@tonic-gate mdsp->mds_next = NULL;
10327c478bd9Sstevel@tonic-gate free_delspans(mdsp);
10337c478bd9Sstevel@tonic-gate } else {
10347c478bd9Sstevel@tonic-gate prv = &mdsp->mds_next;
10357c478bd9Sstevel@tonic-gate }
10367c478bd9Sstevel@tonic-gate mdsp = *prv;
10377c478bd9Sstevel@tonic-gate }
10387c478bd9Sstevel@tonic-gate if (my_tlp->trl_spans == NULL)
10397c478bd9Sstevel@tonic-gate transit_list_remove(my_tlp);
10407c478bd9Sstevel@tonic-gate }
10417c478bd9Sstevel@tonic-gate }
10427c478bd9Sstevel@tonic-gate mutex_exit(&trh->trh_lock);
10437c478bd9Sstevel@tonic-gate }
10447c478bd9Sstevel@tonic-gate
10457c478bd9Sstevel@tonic-gate /*
10467c478bd9Sstevel@tonic-gate * Reserve interface for add to stop delete before add finished.
10477c478bd9Sstevel@tonic-gate * This list is only accessed through the delspan_insert/remove
10487c478bd9Sstevel@tonic-gate * functions and so is fully protected by the mutex in struct transit_list.
10497c478bd9Sstevel@tonic-gate */
10507c478bd9Sstevel@tonic-gate
10517c478bd9Sstevel@tonic-gate static struct transit_list reserve_transit;
10527c478bd9Sstevel@tonic-gate
10537c478bd9Sstevel@tonic-gate static int
delspan_reserve(pfn_t base,pgcnt_t npgs)10547c478bd9Sstevel@tonic-gate delspan_reserve(pfn_t base, pgcnt_t npgs)
10557c478bd9Sstevel@tonic-gate {
10567c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
10577c478bd9Sstevel@tonic-gate int ret;
10587c478bd9Sstevel@tonic-gate
10597c478bd9Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP);
10607c478bd9Sstevel@tonic-gate mdsp->mds_base = base;
10617c478bd9Sstevel@tonic-gate mdsp->mds_npgs = npgs;
10627c478bd9Sstevel@tonic-gate if ((ret = delspan_insert(&reserve_transit, mdsp)) == 0) {
10637c478bd9Sstevel@tonic-gate free_delspans(mdsp);
10647c478bd9Sstevel@tonic-gate }
10657c478bd9Sstevel@tonic-gate return (ret);
10667c478bd9Sstevel@tonic-gate }
10677c478bd9Sstevel@tonic-gate
10687c478bd9Sstevel@tonic-gate static void
delspan_unreserve(pfn_t base,pgcnt_t npgs)10697c478bd9Sstevel@tonic-gate delspan_unreserve(pfn_t base, pgcnt_t npgs)
10707c478bd9Sstevel@tonic-gate {
10717c478bd9Sstevel@tonic-gate delspan_remove(&reserve_transit, base, npgs);
10727c478bd9Sstevel@tonic-gate }
10737c478bd9Sstevel@tonic-gate
10747c478bd9Sstevel@tonic-gate /*
10757c478bd9Sstevel@tonic-gate * Return whether memseg was created by kphysm_add_memory_dynamic().
10767c478bd9Sstevel@tonic-gate */
10777c478bd9Sstevel@tonic-gate static int
memseg_is_dynamic(struct memseg * seg)10789853d9e8SJason Beloro memseg_is_dynamic(struct memseg *seg)
10797c478bd9Sstevel@tonic-gate {
10809853d9e8SJason Beloro return (seg->msegflags & MEMSEG_DYNAMIC);
10817c478bd9Sstevel@tonic-gate }
10827c478bd9Sstevel@tonic-gate
10837c478bd9Sstevel@tonic-gate int
kphysm_del_span(memhandle_t handle,pfn_t base,pgcnt_t npgs)10847c478bd9Sstevel@tonic-gate kphysm_del_span(
10857c478bd9Sstevel@tonic-gate memhandle_t handle,
10867c478bd9Sstevel@tonic-gate pfn_t base,
10877c478bd9Sstevel@tonic-gate pgcnt_t npgs)
10887c478bd9Sstevel@tonic-gate {
10897c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
10907c478bd9Sstevel@tonic-gate struct memseg *seg;
10917c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
10927c478bd9Sstevel@tonic-gate struct memdelspan *mdsp_new;
10937c478bd9Sstevel@tonic-gate pgcnt_t phys_pages, vm_pages;
10947c478bd9Sstevel@tonic-gate pfn_t p_end;
10957c478bd9Sstevel@tonic-gate page_t *pp;
10967c478bd9Sstevel@tonic-gate int ret;
10977c478bd9Sstevel@tonic-gate
10987c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle);
10997c478bd9Sstevel@tonic-gate if (mhp == NULL) {
11007c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
11017c478bd9Sstevel@tonic-gate }
11027c478bd9Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT) {
11037c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
11047c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE);
11057c478bd9Sstevel@tonic-gate }
11067c478bd9Sstevel@tonic-gate
11077c478bd9Sstevel@tonic-gate /*
11087c478bd9Sstevel@tonic-gate * Intersect the span with the installed memory list (phys_install).
11097c478bd9Sstevel@tonic-gate */
11107c478bd9Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs);
11117c478bd9Sstevel@tonic-gate if (mdsp_new == NULL) {
11127c478bd9Sstevel@tonic-gate /*
11137c478bd9Sstevel@tonic-gate * No physical memory in this range. Is this an
11147c478bd9Sstevel@tonic-gate * error? If an attempt to start the delete is made
11157c478bd9Sstevel@tonic-gate * for OK returns from del_span such as this, start will
11167c478bd9Sstevel@tonic-gate * return an error.
11177c478bd9Sstevel@tonic-gate * Could return KPHYSM_ENOWORK.
11187c478bd9Sstevel@tonic-gate */
11197c478bd9Sstevel@tonic-gate /*
11207c478bd9Sstevel@tonic-gate * It is assumed that there are no error returns
11217c478bd9Sstevel@tonic-gate * from span_to_install() due to kmem_alloc failure.
11227c478bd9Sstevel@tonic-gate */
11237c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
11247c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
11257c478bd9Sstevel@tonic-gate }
11267c478bd9Sstevel@tonic-gate /*
11277c478bd9Sstevel@tonic-gate * Does this span overlap an existing span?
11287c478bd9Sstevel@tonic-gate */
11297c478bd9Sstevel@tonic-gate if (delspan_insert(&mhp->mh_transit, mdsp_new) == 0) {
11307c478bd9Sstevel@tonic-gate /*
11317c478bd9Sstevel@tonic-gate * Differentiate between already on list for this handle
11327c478bd9Sstevel@tonic-gate * (KPHYSM_EDUP) and busy elsewhere (KPHYSM_EBUSY).
11337c478bd9Sstevel@tonic-gate */
11347c478bd9Sstevel@tonic-gate ret = KPHYSM_EBUSY;
11357c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
11367c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
11377c478bd9Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs,
11387c478bd9Sstevel@tonic-gate base, npgs)) {
11397c478bd9Sstevel@tonic-gate ret = KPHYSM_EDUP;
11407c478bd9Sstevel@tonic-gate break;
11417c478bd9Sstevel@tonic-gate }
11427c478bd9Sstevel@tonic-gate }
11437c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
11447c478bd9Sstevel@tonic-gate free_delspans(mdsp_new);
11457c478bd9Sstevel@tonic-gate return (ret);
11467c478bd9Sstevel@tonic-gate }
11477c478bd9Sstevel@tonic-gate /*
11487c478bd9Sstevel@tonic-gate * At this point the spans in mdsp_new have been inserted into the
11497c478bd9Sstevel@tonic-gate * list of spans for this handle and thereby to the global list of
11507c478bd9Sstevel@tonic-gate * spans being processed. Each of these spans must now be checked
11517c478bd9Sstevel@tonic-gate * for relocatability. As a side-effect segments in the memseg list
11527c478bd9Sstevel@tonic-gate * may be split.
11537c478bd9Sstevel@tonic-gate *
11547c478bd9Sstevel@tonic-gate * Note that mdsp_new can no longer be used as it is now part of
11557c478bd9Sstevel@tonic-gate * a larger list. Select elements of this larger list based
11567c478bd9Sstevel@tonic-gate * on base and npgs.
11577c478bd9Sstevel@tonic-gate */
11587c478bd9Sstevel@tonic-gate restart:
11597c478bd9Sstevel@tonic-gate phys_pages = 0;
11607c478bd9Sstevel@tonic-gate vm_pages = 0;
11617c478bd9Sstevel@tonic-gate ret = KPHYSM_OK;
11627c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
11637c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
11647c478bd9Sstevel@tonic-gate pgcnt_t pages_checked;
11657c478bd9Sstevel@tonic-gate
11667c478bd9Sstevel@tonic-gate if (!overlapping(mdsp->mds_base, mdsp->mds_npgs, base, npgs)) {
11677c478bd9Sstevel@tonic-gate continue;
11687c478bd9Sstevel@tonic-gate }
11697c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs;
11707c478bd9Sstevel@tonic-gate /*
11717c478bd9Sstevel@tonic-gate * The pages_checked count is a hack. All pages should be
11727c478bd9Sstevel@tonic-gate * checked for relocatability. Those not covered by memsegs
11737c478bd9Sstevel@tonic-gate * should be tested with arch_kphysm_del_span_ok().
11747c478bd9Sstevel@tonic-gate */
11757c478bd9Sstevel@tonic-gate pages_checked = 0;
11767c478bd9Sstevel@tonic-gate for (seg = memsegs; seg; seg = seg->next) {
11777c478bd9Sstevel@tonic-gate pfn_t mseg_start;
11787c478bd9Sstevel@tonic-gate
11797c478bd9Sstevel@tonic-gate if (seg->pages_base >= p_end ||
11807c478bd9Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) {
11817c478bd9Sstevel@tonic-gate /* Span and memseg don't overlap. */
11827c478bd9Sstevel@tonic-gate continue;
11837c478bd9Sstevel@tonic-gate }
11849853d9e8SJason Beloro mseg_start = memseg_get_start(seg);
11857c478bd9Sstevel@tonic-gate /* Check that segment is suitable for delete. */
11869853d9e8SJason Beloro if (memseg_includes_meta(seg)) {
11877c478bd9Sstevel@tonic-gate /*
11889853d9e8SJason Beloro * Check that this segment is completely
11899853d9e8SJason Beloro * within the span.
11907c478bd9Sstevel@tonic-gate */
11917c478bd9Sstevel@tonic-gate if (mseg_start < mdsp->mds_base ||
11927c478bd9Sstevel@tonic-gate seg->pages_end > p_end) {
11937c478bd9Sstevel@tonic-gate ret = KPHYSM_EBUSY;
11947c478bd9Sstevel@tonic-gate break;
11957c478bd9Sstevel@tonic-gate }
11967c478bd9Sstevel@tonic-gate pages_checked += seg->pages_end - mseg_start;
11977c478bd9Sstevel@tonic-gate } else {
11987c478bd9Sstevel@tonic-gate /*
11997c478bd9Sstevel@tonic-gate * If this segment is larger than the span,
12007c478bd9Sstevel@tonic-gate * try to split it. After the split, it
12017c478bd9Sstevel@tonic-gate * is necessary to restart.
12027c478bd9Sstevel@tonic-gate */
12037c478bd9Sstevel@tonic-gate if (seg->pages_base < mdsp->mds_base ||
12047c478bd9Sstevel@tonic-gate seg->pages_end > p_end) {
12057c478bd9Sstevel@tonic-gate pfn_t abase;
12067c478bd9Sstevel@tonic-gate pgcnt_t anpgs;
12077c478bd9Sstevel@tonic-gate int s_ret;
12087c478bd9Sstevel@tonic-gate
12097c478bd9Sstevel@tonic-gate /* Split required. */
12107c478bd9Sstevel@tonic-gate if (mdsp->mds_base < seg->pages_base)
12117c478bd9Sstevel@tonic-gate abase = seg->pages_base;
12127c478bd9Sstevel@tonic-gate else
12137c478bd9Sstevel@tonic-gate abase = mdsp->mds_base;
12147c478bd9Sstevel@tonic-gate if (p_end > seg->pages_end)
12157c478bd9Sstevel@tonic-gate anpgs = seg->pages_end - abase;
12167c478bd9Sstevel@tonic-gate else
12177c478bd9Sstevel@tonic-gate anpgs = p_end - abase;
12187c478bd9Sstevel@tonic-gate s_ret = kphysm_split_memseg(abase,
12197c478bd9Sstevel@tonic-gate anpgs);
12207c478bd9Sstevel@tonic-gate if (s_ret == 0) {
12217c478bd9Sstevel@tonic-gate /* Split failed. */
12227c478bd9Sstevel@tonic-gate ret = KPHYSM_ERESOURCE;
12237c478bd9Sstevel@tonic-gate break;
12247c478bd9Sstevel@tonic-gate }
12257c478bd9Sstevel@tonic-gate goto restart;
12267c478bd9Sstevel@tonic-gate }
12277c478bd9Sstevel@tonic-gate pages_checked +=
12287c478bd9Sstevel@tonic-gate seg->pages_end - seg->pages_base;
12297c478bd9Sstevel@tonic-gate }
12307c478bd9Sstevel@tonic-gate /*
12317c478bd9Sstevel@tonic-gate * The memseg is wholly within the delete span.
12327c478bd9Sstevel@tonic-gate * The individual pages can now be checked.
12337c478bd9Sstevel@tonic-gate */
12347c478bd9Sstevel@tonic-gate /* Cage test. */
12357c478bd9Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) {
12367c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) {
12377c478bd9Sstevel@tonic-gate ret = KPHYSM_ENONRELOC;
12387c478bd9Sstevel@tonic-gate break;
12397c478bd9Sstevel@tonic-gate }
12407c478bd9Sstevel@tonic-gate }
12417c478bd9Sstevel@tonic-gate if (ret != KPHYSM_OK) {
12427c478bd9Sstevel@tonic-gate break;
12437c478bd9Sstevel@tonic-gate }
12447c478bd9Sstevel@tonic-gate phys_pages += (seg->pages_end - mseg_start);
12457c478bd9Sstevel@tonic-gate vm_pages += MSEG_NPAGES(seg);
12467c478bd9Sstevel@tonic-gate }
12477c478bd9Sstevel@tonic-gate if (ret != KPHYSM_OK)
12487c478bd9Sstevel@tonic-gate break;
12497c478bd9Sstevel@tonic-gate if (pages_checked != mdsp->mds_npgs) {
12507c478bd9Sstevel@tonic-gate ret = KPHYSM_ENONRELOC;
12517c478bd9Sstevel@tonic-gate break;
12527c478bd9Sstevel@tonic-gate }
12537c478bd9Sstevel@tonic-gate }
12547c478bd9Sstevel@tonic-gate
12557c478bd9Sstevel@tonic-gate if (ret == KPHYSM_OK) {
12567c478bd9Sstevel@tonic-gate mhp->mh_phys_pages += phys_pages;
12577c478bd9Sstevel@tonic-gate mhp->mh_vm_pages += vm_pages;
12587c478bd9Sstevel@tonic-gate } else {
12597c478bd9Sstevel@tonic-gate /*
12607c478bd9Sstevel@tonic-gate * Keep holding the mh_mutex to prevent it going away.
12617c478bd9Sstevel@tonic-gate */
12627c478bd9Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, base, npgs);
12637c478bd9Sstevel@tonic-gate }
12647c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
12657c478bd9Sstevel@tonic-gate return (ret);
12667c478bd9Sstevel@tonic-gate }
12677c478bd9Sstevel@tonic-gate
12687c478bd9Sstevel@tonic-gate int
kphysm_del_span_query(pfn_t base,pgcnt_t npgs,memquery_t * mqp)12697c478bd9Sstevel@tonic-gate kphysm_del_span_query(
12707c478bd9Sstevel@tonic-gate pfn_t base,
12717c478bd9Sstevel@tonic-gate pgcnt_t npgs,
12727c478bd9Sstevel@tonic-gate memquery_t *mqp)
12737c478bd9Sstevel@tonic-gate {
12747c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
12757c478bd9Sstevel@tonic-gate struct memdelspan *mdsp_new;
12767c478bd9Sstevel@tonic-gate int done_first_nonreloc;
12777c478bd9Sstevel@tonic-gate
12787c478bd9Sstevel@tonic-gate mqp->phys_pages = 0;
12797c478bd9Sstevel@tonic-gate mqp->managed = 0;
12807c478bd9Sstevel@tonic-gate mqp->nonrelocatable = 0;
12817c478bd9Sstevel@tonic-gate mqp->first_nonrelocatable = 0;
12827c478bd9Sstevel@tonic-gate mqp->last_nonrelocatable = 0;
12837c478bd9Sstevel@tonic-gate
12847c478bd9Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs);
12857c478bd9Sstevel@tonic-gate /*
12867c478bd9Sstevel@tonic-gate * It is OK to proceed here if mdsp_new == NULL.
12877c478bd9Sstevel@tonic-gate */
12887c478bd9Sstevel@tonic-gate done_first_nonreloc = 0;
12897c478bd9Sstevel@tonic-gate for (mdsp = mdsp_new; mdsp != NULL; mdsp = mdsp->mds_next) {
12907c478bd9Sstevel@tonic-gate pfn_t sbase;
12917c478bd9Sstevel@tonic-gate pgcnt_t snpgs;
12927c478bd9Sstevel@tonic-gate
12937c478bd9Sstevel@tonic-gate mqp->phys_pages += mdsp->mds_npgs;
12947c478bd9Sstevel@tonic-gate sbase = mdsp->mds_base;
12957c478bd9Sstevel@tonic-gate snpgs = mdsp->mds_npgs;
12967c478bd9Sstevel@tonic-gate while (snpgs != 0) {
12977c478bd9Sstevel@tonic-gate struct memseg *lseg, *seg;
12987c478bd9Sstevel@tonic-gate pfn_t p_end;
12997c478bd9Sstevel@tonic-gate page_t *pp;
13007c478bd9Sstevel@tonic-gate pfn_t mseg_start;
13017c478bd9Sstevel@tonic-gate
13027c478bd9Sstevel@tonic-gate p_end = sbase + snpgs;
13037c478bd9Sstevel@tonic-gate /*
13047c478bd9Sstevel@tonic-gate * Find the lowest addressed memseg that starts
13057c478bd9Sstevel@tonic-gate * after sbase and account for it.
13067c478bd9Sstevel@tonic-gate * This is to catch dynamic memsegs whose start
13077c478bd9Sstevel@tonic-gate * is hidden.
13087c478bd9Sstevel@tonic-gate */
13097c478bd9Sstevel@tonic-gate seg = NULL;
13107c478bd9Sstevel@tonic-gate for (lseg = memsegs; lseg != NULL; lseg = lseg->next) {
13117c478bd9Sstevel@tonic-gate if ((lseg->pages_base >= sbase) ||
13127c478bd9Sstevel@tonic-gate (lseg->pages_base < p_end &&
13137c478bd9Sstevel@tonic-gate lseg->pages_end > sbase)) {
13147c478bd9Sstevel@tonic-gate if (seg == NULL ||
13157c478bd9Sstevel@tonic-gate seg->pages_base > lseg->pages_base)
13167c478bd9Sstevel@tonic-gate seg = lseg;
13177c478bd9Sstevel@tonic-gate }
13187c478bd9Sstevel@tonic-gate }
13197c478bd9Sstevel@tonic-gate if (seg != NULL) {
13209853d9e8SJason Beloro mseg_start = memseg_get_start(seg);
13217c478bd9Sstevel@tonic-gate /*
13227c478bd9Sstevel@tonic-gate * Now have the full extent of the memseg so
13237c478bd9Sstevel@tonic-gate * do the range check.
13247c478bd9Sstevel@tonic-gate */
13257c478bd9Sstevel@tonic-gate if (mseg_start >= p_end ||
13267c478bd9Sstevel@tonic-gate seg->pages_end <= sbase) {
13277c478bd9Sstevel@tonic-gate /* Span does not overlap memseg. */
13287c478bd9Sstevel@tonic-gate seg = NULL;
13297c478bd9Sstevel@tonic-gate }
13307c478bd9Sstevel@tonic-gate }
13317c478bd9Sstevel@tonic-gate /*
13327c478bd9Sstevel@tonic-gate * Account for gap either before the segment if
13337c478bd9Sstevel@tonic-gate * there is one or to the end of the span.
13347c478bd9Sstevel@tonic-gate */
13357c478bd9Sstevel@tonic-gate if (seg == NULL || mseg_start > sbase) {
13367c478bd9Sstevel@tonic-gate pfn_t a_end;
13377c478bd9Sstevel@tonic-gate
13387c478bd9Sstevel@tonic-gate a_end = (seg == NULL) ? p_end : mseg_start;
13397c478bd9Sstevel@tonic-gate /*
13407c478bd9Sstevel@tonic-gate * Check with arch layer for relocatability.
13417c478bd9Sstevel@tonic-gate */
13427c478bd9Sstevel@tonic-gate if (arch_kphysm_del_span_ok(sbase,
13437c478bd9Sstevel@tonic-gate (a_end - sbase))) {
13447c478bd9Sstevel@tonic-gate /*
13457c478bd9Sstevel@tonic-gate * No non-relocatble pages in this
13467c478bd9Sstevel@tonic-gate * area, avoid the fine-grained
13477c478bd9Sstevel@tonic-gate * test.
13487c478bd9Sstevel@tonic-gate */
13497c478bd9Sstevel@tonic-gate snpgs -= (a_end - sbase);
13507c478bd9Sstevel@tonic-gate sbase = a_end;
13517c478bd9Sstevel@tonic-gate }
13527c478bd9Sstevel@tonic-gate while (sbase < a_end) {
13537c478bd9Sstevel@tonic-gate if (!arch_kphysm_del_span_ok(sbase,
13547c478bd9Sstevel@tonic-gate 1)) {
13557c478bd9Sstevel@tonic-gate mqp->nonrelocatable++;
13567c478bd9Sstevel@tonic-gate if (!done_first_nonreloc) {
13577c478bd9Sstevel@tonic-gate mqp->
13587c478bd9Sstevel@tonic-gate first_nonrelocatable
13597c478bd9Sstevel@tonic-gate = sbase;
13607c478bd9Sstevel@tonic-gate done_first_nonreloc = 1;
13617c478bd9Sstevel@tonic-gate }
13627c478bd9Sstevel@tonic-gate mqp->last_nonrelocatable =
13637c478bd9Sstevel@tonic-gate sbase;
13647c478bd9Sstevel@tonic-gate }
13657c478bd9Sstevel@tonic-gate sbase++;
13667c478bd9Sstevel@tonic-gate snpgs--;
13677c478bd9Sstevel@tonic-gate }
13687c478bd9Sstevel@tonic-gate }
13697c478bd9Sstevel@tonic-gate if (seg != NULL) {
13707c478bd9Sstevel@tonic-gate ASSERT(mseg_start <= sbase);
13717c478bd9Sstevel@tonic-gate if (seg->pages_base != mseg_start &&
13727c478bd9Sstevel@tonic-gate seg->pages_base > sbase) {
13737c478bd9Sstevel@tonic-gate pgcnt_t skip_pgs;
13747c478bd9Sstevel@tonic-gate
13757c478bd9Sstevel@tonic-gate /*
13767c478bd9Sstevel@tonic-gate * Skip the page_t area of a
13777c478bd9Sstevel@tonic-gate * dynamic memseg.
13787c478bd9Sstevel@tonic-gate */
13797c478bd9Sstevel@tonic-gate skip_pgs = seg->pages_base - sbase;
13807c478bd9Sstevel@tonic-gate if (snpgs <= skip_pgs) {
13817c478bd9Sstevel@tonic-gate sbase += snpgs;
13827c478bd9Sstevel@tonic-gate snpgs = 0;
13837c478bd9Sstevel@tonic-gate continue;
13847c478bd9Sstevel@tonic-gate }
13857c478bd9Sstevel@tonic-gate snpgs -= skip_pgs;
13867c478bd9Sstevel@tonic-gate sbase += skip_pgs;
13877c478bd9Sstevel@tonic-gate }
13887c478bd9Sstevel@tonic-gate ASSERT(snpgs != 0);
13897c478bd9Sstevel@tonic-gate ASSERT(seg->pages_base <= sbase);
13907c478bd9Sstevel@tonic-gate /*
13917c478bd9Sstevel@tonic-gate * The individual pages can now be checked.
13927c478bd9Sstevel@tonic-gate */
13937c478bd9Sstevel@tonic-gate for (pp = seg->pages +
13947c478bd9Sstevel@tonic-gate (sbase - seg->pages_base);
13957c478bd9Sstevel@tonic-gate snpgs != 0 && pp < seg->epages; pp++) {
13967c478bd9Sstevel@tonic-gate mqp->managed++;
13977c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) {
13987c478bd9Sstevel@tonic-gate mqp->nonrelocatable++;
13997c478bd9Sstevel@tonic-gate if (!done_first_nonreloc) {
14007c478bd9Sstevel@tonic-gate mqp->
14017c478bd9Sstevel@tonic-gate first_nonrelocatable
14027c478bd9Sstevel@tonic-gate = sbase;
14037c478bd9Sstevel@tonic-gate done_first_nonreloc = 1;
14047c478bd9Sstevel@tonic-gate }
14057c478bd9Sstevel@tonic-gate mqp->last_nonrelocatable =
14067c478bd9Sstevel@tonic-gate sbase;
14077c478bd9Sstevel@tonic-gate }
14087c478bd9Sstevel@tonic-gate sbase++;
14097c478bd9Sstevel@tonic-gate snpgs--;
14107c478bd9Sstevel@tonic-gate }
14117c478bd9Sstevel@tonic-gate }
14127c478bd9Sstevel@tonic-gate }
14137c478bd9Sstevel@tonic-gate }
14147c478bd9Sstevel@tonic-gate
14157c478bd9Sstevel@tonic-gate free_delspans(mdsp_new);
14167c478bd9Sstevel@tonic-gate
14177c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
14187c478bd9Sstevel@tonic-gate }
14197c478bd9Sstevel@tonic-gate
14207c478bd9Sstevel@tonic-gate /*
14217c478bd9Sstevel@tonic-gate * This release function can be called at any stage as follows:
14227c478bd9Sstevel@tonic-gate * _gethandle only called
14237c478bd9Sstevel@tonic-gate * _span(s) only called
14247c478bd9Sstevel@tonic-gate * _start called but failed
14257c478bd9Sstevel@tonic-gate * delete thread exited
14267c478bd9Sstevel@tonic-gate */
14277c478bd9Sstevel@tonic-gate int
kphysm_del_release(memhandle_t handle)14287c478bd9Sstevel@tonic-gate kphysm_del_release(memhandle_t handle)
14297c478bd9Sstevel@tonic-gate {
14307c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
14317c478bd9Sstevel@tonic-gate
14327c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle);
14337c478bd9Sstevel@tonic-gate if (mhp == NULL) {
14347c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
14357c478bd9Sstevel@tonic-gate }
14367c478bd9Sstevel@tonic-gate switch (mhp->mh_state) {
14377c478bd9Sstevel@tonic-gate case MHND_STARTING:
14387c478bd9Sstevel@tonic-gate case MHND_RUNNING:
14397c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
14407c478bd9Sstevel@tonic-gate return (KPHYSM_ENOTFINISHED);
14417c478bd9Sstevel@tonic-gate case MHND_FREE:
14427c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE);
14437c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
14447c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
14457c478bd9Sstevel@tonic-gate case MHND_INIT:
14467c478bd9Sstevel@tonic-gate break;
14477c478bd9Sstevel@tonic-gate case MHND_DONE:
14487c478bd9Sstevel@tonic-gate break;
14497c478bd9Sstevel@tonic-gate case MHND_RELEASE:
14507c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
14517c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE);
14527c478bd9Sstevel@tonic-gate default:
14537c478bd9Sstevel@tonic-gate #ifdef DEBUG
14547c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_release(0x%p) state corrupt %d",
14557c478bd9Sstevel@tonic-gate (void *)mhp, mhp->mh_state);
14567c478bd9Sstevel@tonic-gate #endif /* DEBUG */
14577c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
14587c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
14597c478bd9Sstevel@tonic-gate }
14607c478bd9Sstevel@tonic-gate /*
14617c478bd9Sstevel@tonic-gate * Set state so that we can wait if necessary.
14627c478bd9Sstevel@tonic-gate * Also this means that we have read/write access to all
14637c478bd9Sstevel@tonic-gate * fields except mh_exthandle and mh_state.
14647c478bd9Sstevel@tonic-gate */
14657c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_RELEASE;
14667c478bd9Sstevel@tonic-gate /*
14677c478bd9Sstevel@tonic-gate * The mem_handle cannot be de-allocated by any other operation
14687c478bd9Sstevel@tonic-gate * now, so no need to hold mh_mutex.
14697c478bd9Sstevel@tonic-gate */
14707c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
14717c478bd9Sstevel@tonic-gate
14727c478bd9Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, 0, 0);
14737c478bd9Sstevel@tonic-gate mhp->mh_phys_pages = 0;
14747c478bd9Sstevel@tonic-gate mhp->mh_vm_pages = 0;
14757c478bd9Sstevel@tonic-gate mhp->mh_hold_todo = 0;
14767c478bd9Sstevel@tonic-gate mhp->mh_delete_complete = NULL;
14777c478bd9Sstevel@tonic-gate mhp->mh_delete_complete_arg = NULL;
14787c478bd9Sstevel@tonic-gate mhp->mh_cancel = 0;
14797c478bd9Sstevel@tonic-gate
14807c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
14817c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_RELEASE);
14827c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_FREE;
14837c478bd9Sstevel@tonic-gate
14847c478bd9Sstevel@tonic-gate kphysm_free_mem_handle(mhp);
14857c478bd9Sstevel@tonic-gate
14867c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
14877c478bd9Sstevel@tonic-gate }
14887c478bd9Sstevel@tonic-gate
14897c478bd9Sstevel@tonic-gate /*
14907c478bd9Sstevel@tonic-gate * This cancel function can only be called with the thread running.
14917c478bd9Sstevel@tonic-gate */
14927c478bd9Sstevel@tonic-gate int
kphysm_del_cancel(memhandle_t handle)14937c478bd9Sstevel@tonic-gate kphysm_del_cancel(memhandle_t handle)
14947c478bd9Sstevel@tonic-gate {
14957c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
14967c478bd9Sstevel@tonic-gate
14977c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle);
14987c478bd9Sstevel@tonic-gate if (mhp == NULL) {
14997c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
15007c478bd9Sstevel@tonic-gate }
15017c478bd9Sstevel@tonic-gate if (mhp->mh_state != MHND_STARTING && mhp->mh_state != MHND_RUNNING) {
15027c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
15037c478bd9Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING);
15047c478bd9Sstevel@tonic-gate }
15057c478bd9Sstevel@tonic-gate /*
15067c478bd9Sstevel@tonic-gate * Set the cancel flag and wake the delete thread up.
15077c478bd9Sstevel@tonic-gate * The thread may be waiting on I/O, so the effect of the cancel
15087c478bd9Sstevel@tonic-gate * may be delayed.
15097c478bd9Sstevel@tonic-gate */
15107c478bd9Sstevel@tonic-gate if (mhp->mh_cancel == 0) {
15117c478bd9Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ECANCELLED;
15127c478bd9Sstevel@tonic-gate cv_signal(&mhp->mh_cv);
15137c478bd9Sstevel@tonic-gate }
15147c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
15157c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
15167c478bd9Sstevel@tonic-gate }
15177c478bd9Sstevel@tonic-gate
15187c478bd9Sstevel@tonic-gate int
kphysm_del_status(memhandle_t handle,memdelstat_t * mdstp)15197c478bd9Sstevel@tonic-gate kphysm_del_status(
15207c478bd9Sstevel@tonic-gate memhandle_t handle,
15217c478bd9Sstevel@tonic-gate memdelstat_t *mdstp)
15227c478bd9Sstevel@tonic-gate {
15237c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
15247c478bd9Sstevel@tonic-gate
15257c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle);
15267c478bd9Sstevel@tonic-gate if (mhp == NULL) {
15277c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
15287c478bd9Sstevel@tonic-gate }
15297c478bd9Sstevel@tonic-gate /*
15307c478bd9Sstevel@tonic-gate * Calling kphysm_del_status() is allowed before the delete
15317c478bd9Sstevel@tonic-gate * is started to allow for status display.
15327c478bd9Sstevel@tonic-gate */
15337c478bd9Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT && mhp->mh_state != MHND_STARTING &&
15347c478bd9Sstevel@tonic-gate mhp->mh_state != MHND_RUNNING) {
15357c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
15367c478bd9Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING);
15377c478bd9Sstevel@tonic-gate }
15387c478bd9Sstevel@tonic-gate mdstp->phys_pages = mhp->mh_phys_pages;
15397c478bd9Sstevel@tonic-gate mdstp->managed = mhp->mh_vm_pages;
15407c478bd9Sstevel@tonic-gate mdstp->collected = mhp->mh_vm_pages - mhp->mh_hold_todo;
15417c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
15427c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
15437c478bd9Sstevel@tonic-gate }
15447c478bd9Sstevel@tonic-gate
15457c478bd9Sstevel@tonic-gate static int mem_delete_additional_pages = 100;
15467c478bd9Sstevel@tonic-gate
15477c478bd9Sstevel@tonic-gate static int
can_remove_pgs(pgcnt_t npgs)15487c478bd9Sstevel@tonic-gate can_remove_pgs(pgcnt_t npgs)
15497c478bd9Sstevel@tonic-gate {
15507c478bd9Sstevel@tonic-gate /*
15517c478bd9Sstevel@tonic-gate * If all pageable pages were paged out, freemem would
15527c478bd9Sstevel@tonic-gate * equal availrmem. There is a minimum requirement for
15537c478bd9Sstevel@tonic-gate * availrmem.
15547c478bd9Sstevel@tonic-gate */
15557c478bd9Sstevel@tonic-gate if ((availrmem - (tune.t_minarmem + mem_delete_additional_pages))
15567c478bd9Sstevel@tonic-gate < npgs)
15577c478bd9Sstevel@tonic-gate return (0);
15587c478bd9Sstevel@tonic-gate /* TODO: check swap space, etc. */
15597c478bd9Sstevel@tonic-gate return (1);
15607c478bd9Sstevel@tonic-gate }
15617c478bd9Sstevel@tonic-gate
15627c478bd9Sstevel@tonic-gate static int
get_availrmem(pgcnt_t npgs)15637c478bd9Sstevel@tonic-gate get_availrmem(pgcnt_t npgs)
15647c478bd9Sstevel@tonic-gate {
15657c478bd9Sstevel@tonic-gate int ret;
15667c478bd9Sstevel@tonic-gate
15677c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock);
15687c478bd9Sstevel@tonic-gate ret = can_remove_pgs(npgs);
15697c478bd9Sstevel@tonic-gate if (ret != 0)
15707c478bd9Sstevel@tonic-gate availrmem -= npgs;
15717c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock);
15727c478bd9Sstevel@tonic-gate return (ret);
15737c478bd9Sstevel@tonic-gate }
15747c478bd9Sstevel@tonic-gate
15757c478bd9Sstevel@tonic-gate static void
put_availrmem(pgcnt_t npgs)15767c478bd9Sstevel@tonic-gate put_availrmem(pgcnt_t npgs)
15777c478bd9Sstevel@tonic-gate {
15787c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock);
15797c478bd9Sstevel@tonic-gate availrmem += npgs;
15807c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock);
15817c478bd9Sstevel@tonic-gate }
15827c478bd9Sstevel@tonic-gate
15837c478bd9Sstevel@tonic-gate #define FREEMEM_INCR 100
15847c478bd9Sstevel@tonic-gate static pgcnt_t freemem_incr = FREEMEM_INCR;
15857c478bd9Sstevel@tonic-gate #define DEL_FREE_WAIT_FRAC 4
15867c478bd9Sstevel@tonic-gate #define DEL_FREE_WAIT_TICKS ((hz+DEL_FREE_WAIT_FRAC-1)/DEL_FREE_WAIT_FRAC)
15877c478bd9Sstevel@tonic-gate
15887c478bd9Sstevel@tonic-gate #define DEL_BUSY_WAIT_FRAC 20
15897c478bd9Sstevel@tonic-gate #define DEL_BUSY_WAIT_TICKS ((hz+DEL_BUSY_WAIT_FRAC-1)/DEL_BUSY_WAIT_FRAC)
15907c478bd9Sstevel@tonic-gate
15917c478bd9Sstevel@tonic-gate static void kphysm_del_cleanup(struct mem_handle *);
15927c478bd9Sstevel@tonic-gate
15937c478bd9Sstevel@tonic-gate static void page_delete_collect(page_t *, struct mem_handle *);
15947c478bd9Sstevel@tonic-gate
15957c478bd9Sstevel@tonic-gate static pgcnt_t
delthr_get_freemem(struct mem_handle * mhp)15967c478bd9Sstevel@tonic-gate delthr_get_freemem(struct mem_handle *mhp)
15977c478bd9Sstevel@tonic-gate {
15987c478bd9Sstevel@tonic-gate pgcnt_t free_get;
15997c478bd9Sstevel@tonic-gate int ret;
16007c478bd9Sstevel@tonic-gate
16017c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&mhp->mh_mutex));
16027c478bd9Sstevel@tonic-gate
16037c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, need_free);
16047c478bd9Sstevel@tonic-gate /*
16057c478bd9Sstevel@tonic-gate * Get up to freemem_incr pages.
16067c478bd9Sstevel@tonic-gate */
16077c478bd9Sstevel@tonic-gate free_get = freemem_incr;
16087c478bd9Sstevel@tonic-gate if (free_get > mhp->mh_hold_todo)
16097c478bd9Sstevel@tonic-gate free_get = mhp->mh_hold_todo;
16107c478bd9Sstevel@tonic-gate /*
16117c478bd9Sstevel@tonic-gate * Take free_get pages away from freemem,
16127c478bd9Sstevel@tonic-gate * waiting if necessary.
16137c478bd9Sstevel@tonic-gate */
16147c478bd9Sstevel@tonic-gate
16157c478bd9Sstevel@tonic-gate while (!mhp->mh_cancel) {
16167c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
16177c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, free_loop);
16187c478bd9Sstevel@tonic-gate /*
16197c478bd9Sstevel@tonic-gate * Duplicate test from page_create_throttle()
16207c478bd9Sstevel@tonic-gate * but don't override with !PG_WAIT.
16217c478bd9Sstevel@tonic-gate */
16227c478bd9Sstevel@tonic-gate if (freemem < (free_get + throttlefree)) {
16237c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, free_low);
16247c478bd9Sstevel@tonic-gate ret = 0;
16257c478bd9Sstevel@tonic-gate } else {
16267c478bd9Sstevel@tonic-gate ret = page_create_wait(free_get, 0);
16277c478bd9Sstevel@tonic-gate if (ret == 0) {
16287c478bd9Sstevel@tonic-gate /* EMPTY */
16297c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, free_failed);
16307c478bd9Sstevel@tonic-gate }
16317c478bd9Sstevel@tonic-gate }
16327c478bd9Sstevel@tonic-gate if (ret != 0) {
16337c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
16347c478bd9Sstevel@tonic-gate return (free_get);
16357c478bd9Sstevel@tonic-gate }
16367c478bd9Sstevel@tonic-gate
16377c478bd9Sstevel@tonic-gate /*
16387c478bd9Sstevel@tonic-gate * Put pressure on pageout.
16397c478bd9Sstevel@tonic-gate */
16407c478bd9Sstevel@tonic-gate page_needfree(free_get);
16417c478bd9Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv);
16427c478bd9Sstevel@tonic-gate
16437c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
1644d3d50737SRafael Vanoni (void) cv_reltimedwait(&mhp->mh_cv, &mhp->mh_mutex,
1645d3d50737SRafael Vanoni DEL_FREE_WAIT_TICKS, TR_CLOCK_TICK);
16467c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
16477c478bd9Sstevel@tonic-gate page_needfree(-(spgcnt_t)free_get);
16487c478bd9Sstevel@tonic-gate
16497c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
16507c478bd9Sstevel@tonic-gate }
16517c478bd9Sstevel@tonic-gate return (0);
16527c478bd9Sstevel@tonic-gate }
16537c478bd9Sstevel@tonic-gate
16547c478bd9Sstevel@tonic-gate #define DR_AIO_CLEANUP_DELAY 25000 /* 0.025secs, in usec */
16557c478bd9Sstevel@tonic-gate #define DR_AIO_CLEANUP_MAXLOOPS_NODELAY 100
16567c478bd9Sstevel@tonic-gate /*
16577c478bd9Sstevel@tonic-gate * This function is run as a helper thread for delete_memory_thread.
16587c478bd9Sstevel@tonic-gate * It is needed in order to force kaio cleanup, so that pages used in kaio
16597c478bd9Sstevel@tonic-gate * will be unlocked and subsequently relocated by delete_memory_thread.
16607c478bd9Sstevel@tonic-gate * The address of the delete_memory_threads's mem_handle is passed in to
16617c478bd9Sstevel@tonic-gate * this thread function, and is used to set the mh_aio_cleanup_done member
16627c478bd9Sstevel@tonic-gate * prior to calling thread_exit().
16637c478bd9Sstevel@tonic-gate */
16647c478bd9Sstevel@tonic-gate static void
dr_aio_cleanup_thread(caddr_t amhp)16657c478bd9Sstevel@tonic-gate dr_aio_cleanup_thread(caddr_t amhp)
16667c478bd9Sstevel@tonic-gate {
16677c478bd9Sstevel@tonic-gate proc_t *procp;
16687c478bd9Sstevel@tonic-gate int (*aio_cleanup_dr_delete_memory)(proc_t *);
16697c478bd9Sstevel@tonic-gate int cleaned;
16707c478bd9Sstevel@tonic-gate int n = 0;
16717c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
16727c478bd9Sstevel@tonic-gate volatile uint_t *pcancel;
16737c478bd9Sstevel@tonic-gate
16747c478bd9Sstevel@tonic-gate mhp = (struct mem_handle *)amhp;
16757c478bd9Sstevel@tonic-gate ASSERT(mhp != NULL);
16767c478bd9Sstevel@tonic-gate pcancel = &mhp->mh_dr_aio_cleanup_cancel;
16777c478bd9Sstevel@tonic-gate if (modload("sys", "kaio") == -1) {
16787c478bd9Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1;
16797c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "dr_aio_cleanup_thread: cannot load kaio");
16807c478bd9Sstevel@tonic-gate thread_exit();
16817c478bd9Sstevel@tonic-gate }
16827c478bd9Sstevel@tonic-gate aio_cleanup_dr_delete_memory = (int (*)(proc_t *))
16837c478bd9Sstevel@tonic-gate modgetsymvalue("aio_cleanup_dr_delete_memory", 0);
16847c478bd9Sstevel@tonic-gate if (aio_cleanup_dr_delete_memory == NULL) {
16857c478bd9Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1;
16867c478bd9Sstevel@tonic-gate cmn_err(CE_WARN,
16877c478bd9Sstevel@tonic-gate "aio_cleanup_dr_delete_memory not found in kaio");
16887c478bd9Sstevel@tonic-gate thread_exit();
16897c478bd9Sstevel@tonic-gate }
16907c478bd9Sstevel@tonic-gate do {
16917c478bd9Sstevel@tonic-gate cleaned = 0;
16927c478bd9Sstevel@tonic-gate mutex_enter(&pidlock);
16937c478bd9Sstevel@tonic-gate for (procp = practive; (*pcancel == 0) && (procp != NULL);
16947c478bd9Sstevel@tonic-gate procp = procp->p_next) {
16957c478bd9Sstevel@tonic-gate mutex_enter(&procp->p_lock);
16967c478bd9Sstevel@tonic-gate if (procp->p_aio != NULL) {
16977c478bd9Sstevel@tonic-gate /* cleanup proc's outstanding kaio */
16987c478bd9Sstevel@tonic-gate cleaned +=
16997c478bd9Sstevel@tonic-gate (*aio_cleanup_dr_delete_memory)(procp);
17007c478bd9Sstevel@tonic-gate }
17017c478bd9Sstevel@tonic-gate mutex_exit(&procp->p_lock);
17027c478bd9Sstevel@tonic-gate }
17037c478bd9Sstevel@tonic-gate mutex_exit(&pidlock);
17047c478bd9Sstevel@tonic-gate if ((*pcancel == 0) &&
17057c478bd9Sstevel@tonic-gate (!cleaned || (++n == DR_AIO_CLEANUP_MAXLOOPS_NODELAY))) {
17067c478bd9Sstevel@tonic-gate /* delay a bit before retrying all procs again */
17077c478bd9Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY));
17087c478bd9Sstevel@tonic-gate n = 0;
17097c478bd9Sstevel@tonic-gate }
17107c478bd9Sstevel@tonic-gate } while (*pcancel == 0);
17117c478bd9Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1;
17127c478bd9Sstevel@tonic-gate thread_exit();
17137c478bd9Sstevel@tonic-gate }
17147c478bd9Sstevel@tonic-gate
17157c478bd9Sstevel@tonic-gate static void
delete_memory_thread(caddr_t amhp)17167c478bd9Sstevel@tonic-gate delete_memory_thread(caddr_t amhp)
17177c478bd9Sstevel@tonic-gate {
17187c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
17197c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
17207c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo;
17217c478bd9Sstevel@tonic-gate page_t *pp_targ;
17227c478bd9Sstevel@tonic-gate spgcnt_t freemem_left;
17237c478bd9Sstevel@tonic-gate void (*del_complete_funcp)(void *, int error);
17247c478bd9Sstevel@tonic-gate void *del_complete_arg;
17257c478bd9Sstevel@tonic-gate int comp_code;
17267c478bd9Sstevel@tonic-gate int ret;
17277c478bd9Sstevel@tonic-gate int first_scan;
17287c478bd9Sstevel@tonic-gate uint_t szc;
17297c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
17307c478bd9Sstevel@tonic-gate uint64_t start_total, ntick_total;
17317c478bd9Sstevel@tonic-gate uint64_t start_pgrp, ntick_pgrp;
17327c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
17337c478bd9Sstevel@tonic-gate
17347c478bd9Sstevel@tonic-gate mhp = (struct mem_handle *)amhp;
17357c478bd9Sstevel@tonic-gate
17367c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
17377c478bd9Sstevel@tonic-gate start_total = ddi_get_lbolt();
17387c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
17397c478bd9Sstevel@tonic-gate
17407c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mhp->mh_mutex,
17417c478bd9Sstevel@tonic-gate callb_generic_cpr, "memdel");
17427c478bd9Sstevel@tonic-gate
17437c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
17447c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_STARTING);
17457c478bd9Sstevel@tonic-gate
17467c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_RUNNING;
17477c478bd9Sstevel@tonic-gate mhp->mh_thread_id = curthread;
17487c478bd9Sstevel@tonic-gate
17497c478bd9Sstevel@tonic-gate mhp->mh_hold_todo = mhp->mh_vm_pages;
17507c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
17517c478bd9Sstevel@tonic-gate
17527c478bd9Sstevel@tonic-gate /* Allocate the remap pages now, if necessary. */
17537c478bd9Sstevel@tonic-gate memseg_remap_init();
17547c478bd9Sstevel@tonic-gate
17557c478bd9Sstevel@tonic-gate /*
17567c478bd9Sstevel@tonic-gate * Subtract from availrmem now if possible as availrmem
17577c478bd9Sstevel@tonic-gate * may not be available by the end of the delete.
17587c478bd9Sstevel@tonic-gate */
17597c478bd9Sstevel@tonic-gate if (!get_availrmem(mhp->mh_vm_pages)) {
17607c478bd9Sstevel@tonic-gate comp_code = KPHYSM_ENOTVIABLE;
17617c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
17627c478bd9Sstevel@tonic-gate goto early_exit;
17637c478bd9Sstevel@tonic-gate }
17647c478bd9Sstevel@tonic-gate
17657c478bd9Sstevel@tonic-gate ret = kphysm_setup_pre_del(mhp->mh_vm_pages);
17667c478bd9Sstevel@tonic-gate
17677c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
17687c478bd9Sstevel@tonic-gate
17697c478bd9Sstevel@tonic-gate if (ret != 0) {
17707c478bd9Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_EREFUSED;
17717c478bd9Sstevel@tonic-gate goto refused;
17727c478bd9Sstevel@tonic-gate }
17737c478bd9Sstevel@tonic-gate
17747c478bd9Sstevel@tonic-gate transit_list_collect(mhp, 1);
17757c478bd9Sstevel@tonic-gate
17767c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
17777c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
17787c478bd9Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap == NULL);
17797c478bd9Sstevel@tonic-gate mdsp->mds_bitmap = kmem_zalloc(MDS_BITMAPBYTES(mdsp), KM_SLEEP);
17807c478bd9Sstevel@tonic-gate mdsp->mds_bitmap_retired = kmem_zalloc(MDS_BITMAPBYTES(mdsp),
17817c478bd9Sstevel@tonic-gate KM_SLEEP);
17827c478bd9Sstevel@tonic-gate }
17837c478bd9Sstevel@tonic-gate
17847c478bd9Sstevel@tonic-gate first_scan = 1;
17857c478bd9Sstevel@tonic-gate freemem_left = 0;
17867c478bd9Sstevel@tonic-gate /*
17877c478bd9Sstevel@tonic-gate * Start dr_aio_cleanup_thread, which periodically iterates
17887c478bd9Sstevel@tonic-gate * through the process list and invokes aio cleanup. This
17897c478bd9Sstevel@tonic-gate * is needed in order to avoid a deadly embrace between the
17907c478bd9Sstevel@tonic-gate * delete_memory_thread (waiting on writer lock for page, with the
17917c478bd9Sstevel@tonic-gate * exclusive-wanted bit set), kaio read request threads (waiting for a
17927c478bd9Sstevel@tonic-gate * reader lock on the same page that is wanted by the
17937c478bd9Sstevel@tonic-gate * delete_memory_thread), and threads waiting for kaio completion
17947c478bd9Sstevel@tonic-gate * (blocked on spt_amp->lock).
17957c478bd9Sstevel@tonic-gate */
17967c478bd9Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 0;
17977c478bd9Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 0;
17987c478bd9Sstevel@tonic-gate (void) thread_create(NULL, 0, dr_aio_cleanup_thread,
17997c478bd9Sstevel@tonic-gate (caddr_t)mhp, 0, &p0, TS_RUN, maxclsyspri - 1);
18007c478bd9Sstevel@tonic-gate while ((mhp->mh_hold_todo != 0) && (mhp->mh_cancel == 0)) {
18017c478bd9Sstevel@tonic-gate pgcnt_t collected;
18027c478bd9Sstevel@tonic-gate
18037c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nloop);
18047c478bd9Sstevel@tonic-gate collected = 0;
18057c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; (mdsp != NULL) &&
18067c478bd9Sstevel@tonic-gate (mhp->mh_cancel == 0); mdsp = mdsp->mds_next) {
18077c478bd9Sstevel@tonic-gate pfn_t pfn, p_end;
18087c478bd9Sstevel@tonic-gate
18097c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs;
18107c478bd9Sstevel@tonic-gate for (pfn = mdsp->mds_base; (pfn < p_end) &&
18117c478bd9Sstevel@tonic-gate (mhp->mh_cancel == 0); pfn++) {
18127c478bd9Sstevel@tonic-gate page_t *pp, *tpp, *tpp_targ;
18137c478bd9Sstevel@tonic-gate pgcnt_t bit;
18147c478bd9Sstevel@tonic-gate struct vnode *vp;
18157c478bd9Sstevel@tonic-gate u_offset_t offset;
18167c478bd9Sstevel@tonic-gate int mod, result;
18177c478bd9Sstevel@tonic-gate spgcnt_t pgcnt;
18187c478bd9Sstevel@tonic-gate
18197c478bd9Sstevel@tonic-gate bit = pfn - mdsp->mds_base;
18207c478bd9Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] &
18217c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) {
18227c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, already_done);
18237c478bd9Sstevel@tonic-gate continue;
18247c478bd9Sstevel@tonic-gate }
18257c478bd9Sstevel@tonic-gate if (freemem_left == 0) {
18267c478bd9Sstevel@tonic-gate freemem_left += delthr_get_freemem(mhp);
18277c478bd9Sstevel@tonic-gate if (freemem_left == 0)
18287c478bd9Sstevel@tonic-gate break;
18297c478bd9Sstevel@tonic-gate }
18307c478bd9Sstevel@tonic-gate
18317c478bd9Sstevel@tonic-gate /*
18327c478bd9Sstevel@tonic-gate * Release mh_mutex - some of this
18337c478bd9Sstevel@tonic-gate * stuff takes some time (eg PUTPAGE).
18347c478bd9Sstevel@tonic-gate */
18357c478bd9Sstevel@tonic-gate
18367c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
18377c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, ncheck);
18387c478bd9Sstevel@tonic-gate
18397c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn);
18407c478bd9Sstevel@tonic-gate if (pp == NULL) {
18417c478bd9Sstevel@tonic-gate /*
18427c478bd9Sstevel@tonic-gate * Not covered by a page_t - will
18437c478bd9Sstevel@tonic-gate * be dealt with elsewhere.
18447c478bd9Sstevel@tonic-gate */
18457c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nopaget);
18467c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
18477c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |=
18487c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW));
18497c478bd9Sstevel@tonic-gate continue;
18507c478bd9Sstevel@tonic-gate }
18517c478bd9Sstevel@tonic-gate
18527c478bd9Sstevel@tonic-gate if (!page_try_reclaim_lock(pp, SE_EXCL,
1853db874c57Selowe SE_EXCL_WANTED | SE_RETIRED)) {
18547c478bd9Sstevel@tonic-gate /*
1855db874c57Selowe * Page in use elsewhere. Skip it.
18567c478bd9Sstevel@tonic-gate */
18577c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, lockfail);
18587c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
18597c478bd9Sstevel@tonic-gate continue;
18607c478bd9Sstevel@tonic-gate }
18617c478bd9Sstevel@tonic-gate /*
18627c478bd9Sstevel@tonic-gate * See if the cage expanded into the delete.
18637c478bd9Sstevel@tonic-gate * This can happen as we have to allow the
18647c478bd9Sstevel@tonic-gate * cage to expand.
18657c478bd9Sstevel@tonic-gate */
18667c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) {
18677c478bd9Sstevel@tonic-gate page_unlock(pp);
18687c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
18697c478bd9Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ENONRELOC;
18707c478bd9Sstevel@tonic-gate break;
18717c478bd9Sstevel@tonic-gate }
1872db874c57Selowe if (PP_RETIRED(pp)) {
18737c478bd9Sstevel@tonic-gate /*
18747c478bd9Sstevel@tonic-gate * Page has been retired and is
18757c478bd9Sstevel@tonic-gate * not part of the cage so we
18767c478bd9Sstevel@tonic-gate * can now do the accounting for
18777c478bd9Sstevel@tonic-gate * it.
18787c478bd9Sstevel@tonic-gate */
18797c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, retired);
18807c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
18817c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW]
18827c478bd9Sstevel@tonic-gate |= (1 << (bit % NBPBMW));
18837c478bd9Sstevel@tonic-gate mdsp->mds_bitmap_retired[bit /
18847c478bd9Sstevel@tonic-gate NBPBMW] |=
18857c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW));
18867c478bd9Sstevel@tonic-gate mhp->mh_hold_todo--;
18877c478bd9Sstevel@tonic-gate continue;
18887c478bd9Sstevel@tonic-gate }
18897c478bd9Sstevel@tonic-gate ASSERT(freemem_left != 0);
18907c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) {
18917c478bd9Sstevel@tonic-gate /*
18927c478bd9Sstevel@tonic-gate * Like page_reclaim() only 'freemem'
18937c478bd9Sstevel@tonic-gate * processing is already done.
18947c478bd9Sstevel@tonic-gate */
18957c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nfree);
18967c478bd9Sstevel@tonic-gate free_page_collect:
18977c478bd9Sstevel@tonic-gate if (PP_ISAGED(pp)) {
18987c478bd9Sstevel@tonic-gate page_list_sub(pp,
18997c478bd9Sstevel@tonic-gate PG_FREE_LIST);
19007c478bd9Sstevel@tonic-gate } else {
19017c478bd9Sstevel@tonic-gate page_list_sub(pp,
19027c478bd9Sstevel@tonic-gate PG_CACHE_LIST);
19037c478bd9Sstevel@tonic-gate }
19047c478bd9Sstevel@tonic-gate PP_CLRFREE(pp);
19057c478bd9Sstevel@tonic-gate PP_CLRAGED(pp);
19067c478bd9Sstevel@tonic-gate collected++;
19077c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
19087c478bd9Sstevel@tonic-gate page_delete_collect(pp, mhp);
19097c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |=
19107c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW));
19117c478bd9Sstevel@tonic-gate freemem_left--;
19127c478bd9Sstevel@tonic-gate continue;
19137c478bd9Sstevel@tonic-gate }
19147c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL);
19157c478bd9Sstevel@tonic-gate if (first_scan) {
19167c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, first_notfree);
19177c478bd9Sstevel@tonic-gate page_unlock(pp);
19187c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
19197c478bd9Sstevel@tonic-gate continue;
19207c478bd9Sstevel@tonic-gate }
19217c478bd9Sstevel@tonic-gate /*
19227c478bd9Sstevel@tonic-gate * Keep stats on pages encountered that
1923db874c57Selowe * are marked for retirement.
19247c478bd9Sstevel@tonic-gate */
1925db874c57Selowe if (PP_TOXIC(pp)) {
19267c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, toxic);
1927db874c57Selowe } else if (PP_PR_REQ(pp)) {
19287c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, failing);
19297c478bd9Sstevel@tonic-gate }
19307c478bd9Sstevel@tonic-gate /*
19317c478bd9Sstevel@tonic-gate * In certain cases below, special exceptions
19327c478bd9Sstevel@tonic-gate * are made for pages that are toxic. This
19337c478bd9Sstevel@tonic-gate * is because the current meaning of toxic
19347c478bd9Sstevel@tonic-gate * is that an uncorrectable error has been
19357c478bd9Sstevel@tonic-gate * previously associated with the page.
19367c478bd9Sstevel@tonic-gate */
19377c478bd9Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
1938db874c57Selowe if (!PP_TOXIC(pp)) {
19397c478bd9Sstevel@tonic-gate /*
19407c478bd9Sstevel@tonic-gate * Must relocate locked in
19417c478bd9Sstevel@tonic-gate * memory pages.
19427c478bd9Sstevel@tonic-gate */
19437c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
19447c478bd9Sstevel@tonic-gate start_pgrp = ddi_get_lbolt();
19457c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
19467c478bd9Sstevel@tonic-gate /*
19477c478bd9Sstevel@tonic-gate * Lock all constituent pages
19487c478bd9Sstevel@tonic-gate * of a large page to ensure
19497c478bd9Sstevel@tonic-gate * that p_szc won't change.
19507c478bd9Sstevel@tonic-gate */
19517c478bd9Sstevel@tonic-gate if (!group_page_trylock(pp,
19527c478bd9Sstevel@tonic-gate SE_EXCL)) {
19537c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp,
19547c478bd9Sstevel@tonic-gate gptllckfail);
19557c478bd9Sstevel@tonic-gate page_unlock(pp);
19567c478bd9Sstevel@tonic-gate mutex_enter(
19577c478bd9Sstevel@tonic-gate &mhp->mh_mutex);
19587c478bd9Sstevel@tonic-gate continue;
19597c478bd9Sstevel@tonic-gate }
19607c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, npplocked);
19617c478bd9Sstevel@tonic-gate pp_targ =
19627c478bd9Sstevel@tonic-gate page_get_replacement_page(
19637c478bd9Sstevel@tonic-gate pp, NULL, 0);
19647c478bd9Sstevel@tonic-gate if (pp_targ != NULL) {
19657c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
19667c478bd9Sstevel@tonic-gate ntick_pgrp =
19677c478bd9Sstevel@tonic-gate (uint64_t)
19687c478bd9Sstevel@tonic-gate ddi_get_lbolt() -
19697c478bd9Sstevel@tonic-gate start_pgrp;
19707c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
19717c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp,
19727c478bd9Sstevel@tonic-gate ntick_pgrp);
19737c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp,
19747c478bd9Sstevel@tonic-gate nlockreloc);
19757c478bd9Sstevel@tonic-gate goto reloc;
19767c478bd9Sstevel@tonic-gate }
19777c478bd9Sstevel@tonic-gate group_page_unlock(pp);
19787c478bd9Sstevel@tonic-gate page_unlock(pp);
19797c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
19807c478bd9Sstevel@tonic-gate ntick_pgrp =
19817c478bd9Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() -
19827c478bd9Sstevel@tonic-gate start_pgrp;
19837c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
19847c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
19857c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nnorepl);
19867c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
19877c478bd9Sstevel@tonic-gate continue;
19887c478bd9Sstevel@tonic-gate } else {
19897c478bd9Sstevel@tonic-gate /*
19907c478bd9Sstevel@tonic-gate * Cannot do anything about
19917c478bd9Sstevel@tonic-gate * this page because it is
19927c478bd9Sstevel@tonic-gate * toxic.
19937c478bd9Sstevel@tonic-gate */
19947c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, npplkdtoxic);
19957c478bd9Sstevel@tonic-gate page_unlock(pp);
19967c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
19977c478bd9Sstevel@tonic-gate continue;
19987c478bd9Sstevel@tonic-gate }
19997c478bd9Sstevel@tonic-gate }
20007c478bd9Sstevel@tonic-gate /*
20017c478bd9Sstevel@tonic-gate * Unload the mappings and check if mod bit
20027c478bd9Sstevel@tonic-gate * is set.
20037c478bd9Sstevel@tonic-gate */
2004ad23a2dbSjohansen ASSERT(!PP_ISKAS(pp));
20057c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
20067c478bd9Sstevel@tonic-gate mod = hat_ismod(pp);
20077c478bd9Sstevel@tonic-gate
20087c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20097c478bd9Sstevel@tonic-gate start_pgrp = ddi_get_lbolt();
20107c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
2011db874c57Selowe if (mod && !PP_TOXIC(pp)) {
20127c478bd9Sstevel@tonic-gate /*
20137c478bd9Sstevel@tonic-gate * Lock all constituent pages
20147c478bd9Sstevel@tonic-gate * of a large page to ensure
20157c478bd9Sstevel@tonic-gate * that p_szc won't change.
20167c478bd9Sstevel@tonic-gate */
20177c478bd9Sstevel@tonic-gate if (!group_page_trylock(pp, SE_EXCL)) {
20187c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, gptlmodfail);
20197c478bd9Sstevel@tonic-gate page_unlock(pp);
20207c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
20217c478bd9Sstevel@tonic-gate continue;
20227c478bd9Sstevel@tonic-gate }
20237c478bd9Sstevel@tonic-gate pp_targ = page_get_replacement_page(pp,
20247c478bd9Sstevel@tonic-gate NULL, 0);
20257c478bd9Sstevel@tonic-gate if (pp_targ != NULL) {
20267c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nmodreloc);
20277c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20287c478bd9Sstevel@tonic-gate ntick_pgrp =
20297c478bd9Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() -
20307c478bd9Sstevel@tonic-gate start_pgrp;
20317c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20327c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
20337c478bd9Sstevel@tonic-gate goto reloc;
20347c478bd9Sstevel@tonic-gate }
20357c478bd9Sstevel@tonic-gate group_page_unlock(pp);
20367c478bd9Sstevel@tonic-gate }
20377c478bd9Sstevel@tonic-gate
20387c478bd9Sstevel@tonic-gate if (!page_try_demote_pages(pp)) {
20397c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, demotefail);
20407c478bd9Sstevel@tonic-gate page_unlock(pp);
20417c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20427c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() -
20437c478bd9Sstevel@tonic-gate start_pgrp;
20447c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20457c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
20467c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
20477c478bd9Sstevel@tonic-gate continue;
20487c478bd9Sstevel@tonic-gate }
20497c478bd9Sstevel@tonic-gate
20507c478bd9Sstevel@tonic-gate /*
20517c478bd9Sstevel@tonic-gate * Regular 'page-out'.
20527c478bd9Sstevel@tonic-gate */
20537c478bd9Sstevel@tonic-gate if (!mod) {
20547c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, ndestroy);
20557c478bd9Sstevel@tonic-gate page_destroy(pp, 1);
20567c478bd9Sstevel@tonic-gate /*
20577c478bd9Sstevel@tonic-gate * page_destroy was called with
20587c478bd9Sstevel@tonic-gate * dontfree. As long as p_lckcnt
20597c478bd9Sstevel@tonic-gate * and p_cowcnt are both zero, the
20607c478bd9Sstevel@tonic-gate * only additional action of
20617c478bd9Sstevel@tonic-gate * page_destroy with !dontfree is to
20627c478bd9Sstevel@tonic-gate * call page_free, so we can collect
20637c478bd9Sstevel@tonic-gate * the page here.
20647c478bd9Sstevel@tonic-gate */
20657c478bd9Sstevel@tonic-gate collected++;
20667c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20677c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() -
20687c478bd9Sstevel@tonic-gate start_pgrp;
20697c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20707c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
20717c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
20727c478bd9Sstevel@tonic-gate page_delete_collect(pp, mhp);
20737c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |=
20747c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW));
20757c478bd9Sstevel@tonic-gate continue;
20767c478bd9Sstevel@tonic-gate }
20777c478bd9Sstevel@tonic-gate /*
20787c478bd9Sstevel@tonic-gate * The page is toxic and the mod bit is
20797c478bd9Sstevel@tonic-gate * set, we cannot do anything here to deal
20807c478bd9Sstevel@tonic-gate * with it.
20817c478bd9Sstevel@tonic-gate */
2082db874c57Selowe if (PP_TOXIC(pp)) {
20837c478bd9Sstevel@tonic-gate page_unlock(pp);
20847c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20857c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() -
20867c478bd9Sstevel@tonic-gate start_pgrp;
20877c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20887c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
20897c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, modtoxic);
20907c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
20917c478bd9Sstevel@tonic-gate continue;
20927c478bd9Sstevel@tonic-gate }
20937c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nputpage);
20947c478bd9Sstevel@tonic-gate vp = pp->p_vnode;
20957c478bd9Sstevel@tonic-gate offset = pp->p_offset;
20967c478bd9Sstevel@tonic-gate VN_HOLD(vp);
20977c478bd9Sstevel@tonic-gate page_unlock(pp);
20987c478bd9Sstevel@tonic-gate (void) VOP_PUTPAGE(vp, offset, PAGESIZE,
2099da6c28aaSamw B_INVAL|B_FORCE, kcred, NULL);
21007c478bd9Sstevel@tonic-gate VN_RELE(vp);
21017c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
21027c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() -
21037c478bd9Sstevel@tonic-gate start_pgrp;
21047c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
21057c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
21067c478bd9Sstevel@tonic-gate /*
21077c478bd9Sstevel@tonic-gate * Try to get the page back immediately
21087c478bd9Sstevel@tonic-gate * so that it can be collected.
21097c478bd9Sstevel@tonic-gate */
21107c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn);
21117c478bd9Sstevel@tonic-gate if (pp == NULL) {
21127c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim);
21137c478bd9Sstevel@tonic-gate /*
21147c478bd9Sstevel@tonic-gate * This should not happen as this
21157c478bd9Sstevel@tonic-gate * thread is deleting the page.
21167c478bd9Sstevel@tonic-gate * If this code is generalized, this
21177c478bd9Sstevel@tonic-gate * becomes a reality.
21187c478bd9Sstevel@tonic-gate */
21197c478bd9Sstevel@tonic-gate #ifdef DEBUG
21207c478bd9Sstevel@tonic-gate cmn_err(CE_WARN,
21217c478bd9Sstevel@tonic-gate "delete_memory_thread(0x%p) "
21227c478bd9Sstevel@tonic-gate "pfn 0x%lx has no page_t",
21237c478bd9Sstevel@tonic-gate (void *)mhp, pfn);
21247c478bd9Sstevel@tonic-gate #endif /* DEBUG */
21257c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
21267c478bd9Sstevel@tonic-gate continue;
21277c478bd9Sstevel@tonic-gate }
21287c478bd9Sstevel@tonic-gate if (page_try_reclaim_lock(pp, SE_EXCL,
2129db874c57Selowe SE_EXCL_WANTED | SE_RETIRED)) {
21307c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) {
21317c478bd9Sstevel@tonic-gate goto free_page_collect;
21327c478bd9Sstevel@tonic-gate }
21337c478bd9Sstevel@tonic-gate page_unlock(pp);
21347c478bd9Sstevel@tonic-gate }
21357c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim);
21367c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
21377c478bd9Sstevel@tonic-gate continue;
21387c478bd9Sstevel@tonic-gate
21397c478bd9Sstevel@tonic-gate reloc:
21407c478bd9Sstevel@tonic-gate /*
21417c478bd9Sstevel@tonic-gate * Got some freemem and a target
21427c478bd9Sstevel@tonic-gate * page, so move the data to avoid
21437c478bd9Sstevel@tonic-gate * I/O and lock problems.
21447c478bd9Sstevel@tonic-gate */
21457c478bd9Sstevel@tonic-gate ASSERT(!page_iolock_assert(pp));
21467c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nreloc);
21477c478bd9Sstevel@tonic-gate /*
21487c478bd9Sstevel@tonic-gate * page_relocate() will return pgcnt: the
21497c478bd9Sstevel@tonic-gate * number of consecutive pages relocated.
21507c478bd9Sstevel@tonic-gate * If it is successful, pp will be a
21517c478bd9Sstevel@tonic-gate * linked list of the page structs that
21527c478bd9Sstevel@tonic-gate * were relocated. If page_relocate() is
21537c478bd9Sstevel@tonic-gate * unsuccessful, pp will be unmodified.
21547c478bd9Sstevel@tonic-gate */
21557c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
21567c478bd9Sstevel@tonic-gate start_pgrp = ddi_get_lbolt();
21577c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
21587c478bd9Sstevel@tonic-gate result = page_relocate(&pp, &pp_targ, 0, 0,
21597c478bd9Sstevel@tonic-gate &pgcnt, NULL);
21607c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
21617c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() -
21627c478bd9Sstevel@tonic-gate start_pgrp;
21637c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
21647c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
21657c478bd9Sstevel@tonic-gate if (result != 0) {
21667c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nrelocfail);
21677c478bd9Sstevel@tonic-gate /*
21687c478bd9Sstevel@tonic-gate * We did not succeed. We need
21697c478bd9Sstevel@tonic-gate * to give the pp_targ pages back.
21707c478bd9Sstevel@tonic-gate * page_free(pp_targ, 1) without
21717c478bd9Sstevel@tonic-gate * the freemem accounting.
21727c478bd9Sstevel@tonic-gate */
21737c478bd9Sstevel@tonic-gate group_page_unlock(pp);
21747c478bd9Sstevel@tonic-gate page_free_replacement_page(pp_targ);
21757c478bd9Sstevel@tonic-gate page_unlock(pp);
21767c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
21777c478bd9Sstevel@tonic-gate continue;
21787c478bd9Sstevel@tonic-gate }
21797c478bd9Sstevel@tonic-gate
21807c478bd9Sstevel@tonic-gate /*
21817c478bd9Sstevel@tonic-gate * We will then collect pgcnt pages.
21827c478bd9Sstevel@tonic-gate */
21837c478bd9Sstevel@tonic-gate ASSERT(pgcnt > 0);
21847c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
21857c478bd9Sstevel@tonic-gate /*
21867c478bd9Sstevel@tonic-gate * We need to make sure freemem_left is
21877c478bd9Sstevel@tonic-gate * large enough.
21887c478bd9Sstevel@tonic-gate */
21897c478bd9Sstevel@tonic-gate while ((freemem_left < pgcnt) &&
21907c478bd9Sstevel@tonic-gate (!mhp->mh_cancel)) {
21917c478bd9Sstevel@tonic-gate freemem_left +=
21927c478bd9Sstevel@tonic-gate delthr_get_freemem(mhp);
21937c478bd9Sstevel@tonic-gate }
21947c478bd9Sstevel@tonic-gate
21957c478bd9Sstevel@tonic-gate /*
21967c478bd9Sstevel@tonic-gate * Do not proceed if mh_cancel is set.
21977c478bd9Sstevel@tonic-gate */
21987c478bd9Sstevel@tonic-gate if (mhp->mh_cancel) {
21997c478bd9Sstevel@tonic-gate while (pp_targ != NULL) {
22007c478bd9Sstevel@tonic-gate /*
22017c478bd9Sstevel@tonic-gate * Unlink and unlock each page.
22027c478bd9Sstevel@tonic-gate */
22037c478bd9Sstevel@tonic-gate tpp_targ = pp_targ;
22047c478bd9Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ);
22057c478bd9Sstevel@tonic-gate page_unlock(tpp_targ);
22067c478bd9Sstevel@tonic-gate }
22077c478bd9Sstevel@tonic-gate /*
22087c478bd9Sstevel@tonic-gate * We need to give the pp pages back.
22097c478bd9Sstevel@tonic-gate * page_free(pp, 1) without the
22107c478bd9Sstevel@tonic-gate * freemem accounting.
22117c478bd9Sstevel@tonic-gate */
22127c478bd9Sstevel@tonic-gate page_free_replacement_page(pp);
22137c478bd9Sstevel@tonic-gate break;
22147c478bd9Sstevel@tonic-gate }
22157c478bd9Sstevel@tonic-gate
22167c478bd9Sstevel@tonic-gate /* Now remove pgcnt from freemem_left */
22177c478bd9Sstevel@tonic-gate freemem_left -= pgcnt;
22187c478bd9Sstevel@tonic-gate ASSERT(freemem_left >= 0);
22197c478bd9Sstevel@tonic-gate szc = pp->p_szc;
22207c478bd9Sstevel@tonic-gate while (pp != NULL) {
22217c478bd9Sstevel@tonic-gate /*
22227c478bd9Sstevel@tonic-gate * pp and pp_targ were passed back as
22237c478bd9Sstevel@tonic-gate * a linked list of pages.
22247c478bd9Sstevel@tonic-gate * Unlink and unlock each page.
22257c478bd9Sstevel@tonic-gate */
22267c478bd9Sstevel@tonic-gate tpp_targ = pp_targ;
22277c478bd9Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ);
22287c478bd9Sstevel@tonic-gate page_unlock(tpp_targ);
22297c478bd9Sstevel@tonic-gate /*
22307c478bd9Sstevel@tonic-gate * The original page is now free
22317c478bd9Sstevel@tonic-gate * so remove it from the linked
22327c478bd9Sstevel@tonic-gate * list and collect it.
22337c478bd9Sstevel@tonic-gate */
22347c478bd9Sstevel@tonic-gate tpp = pp;
22357c478bd9Sstevel@tonic-gate page_sub(&pp, tpp);
22367c478bd9Sstevel@tonic-gate pfn = page_pptonum(tpp);
22377c478bd9Sstevel@tonic-gate collected++;
22387c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(tpp));
22397c478bd9Sstevel@tonic-gate ASSERT(tpp->p_vnode == NULL);
22407c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(tpp));
22417c478bd9Sstevel@tonic-gate ASSERT(tpp->p_szc == szc);
22427c478bd9Sstevel@tonic-gate tpp->p_szc = 0;
22437c478bd9Sstevel@tonic-gate page_delete_collect(tpp, mhp);
22447c478bd9Sstevel@tonic-gate bit = pfn - mdsp->mds_base;
22457c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |=
22467c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW));
22477c478bd9Sstevel@tonic-gate }
22487c478bd9Sstevel@tonic-gate ASSERT(pp_targ == NULL);
22497c478bd9Sstevel@tonic-gate }
22507c478bd9Sstevel@tonic-gate }
22517c478bd9Sstevel@tonic-gate first_scan = 0;
22527c478bd9Sstevel@tonic-gate if ((mhp->mh_cancel == 0) && (mhp->mh_hold_todo != 0) &&
22537c478bd9Sstevel@tonic-gate (collected == 0)) {
22547c478bd9Sstevel@tonic-gate /*
22557c478bd9Sstevel@tonic-gate * This code is needed as we cannot wait
22567c478bd9Sstevel@tonic-gate * for a page to be locked OR the delete to
22577c478bd9Sstevel@tonic-gate * be cancelled. Also, we must delay so
22587c478bd9Sstevel@tonic-gate * that other threads get a chance to run
22597c478bd9Sstevel@tonic-gate * on our cpu, otherwise page locks may be
22607c478bd9Sstevel@tonic-gate * held indefinitely by those threads.
22617c478bd9Sstevel@tonic-gate */
22627c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, ndelay);
22637c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo);
2264d3d50737SRafael Vanoni (void) cv_reltimedwait(&mhp->mh_cv, &mhp->mh_mutex,
2265d3d50737SRafael Vanoni DEL_BUSY_WAIT_TICKS, TR_CLOCK_TICK);
22667c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex);
22677c478bd9Sstevel@tonic-gate }
22687c478bd9Sstevel@tonic-gate }
22697c478bd9Sstevel@tonic-gate /* stop the dr aio cleanup thread */
22707c478bd9Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 1;
22717c478bd9Sstevel@tonic-gate transit_list_collect(mhp, 0);
22727c478bd9Sstevel@tonic-gate if (freemem_left != 0) {
22737c478bd9Sstevel@tonic-gate /* Return any surplus. */
22747c478bd9Sstevel@tonic-gate page_create_putback(freemem_left);
22757c478bd9Sstevel@tonic-gate freemem_left = 0;
22767c478bd9Sstevel@tonic-gate }
22777c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
22787c478bd9Sstevel@tonic-gate ntick_total = (uint64_t)ddi_get_lbolt() - start_total;
22797c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
22807c478bd9Sstevel@tonic-gate MDSTAT_TOTAL(mhp, ntick_total);
22817c478bd9Sstevel@tonic-gate MDSTAT_PRINT(mhp);
22827c478bd9Sstevel@tonic-gate
22837c478bd9Sstevel@tonic-gate /*
22847c478bd9Sstevel@tonic-gate * If the memory delete was cancelled, exclusive-wanted bits must
2285db874c57Selowe * be cleared. If there are retired pages being deleted, they need
2286db874c57Selowe * to be unretired.
22877c478bd9Sstevel@tonic-gate */
22887c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
22897c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
22907c478bd9Sstevel@tonic-gate pfn_t pfn, p_end;
22917c478bd9Sstevel@tonic-gate
22927c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs;
22937c478bd9Sstevel@tonic-gate for (pfn = mdsp->mds_base; pfn < p_end; pfn++) {
22947c478bd9Sstevel@tonic-gate page_t *pp;
22957c478bd9Sstevel@tonic-gate pgcnt_t bit;
22967c478bd9Sstevel@tonic-gate
22977c478bd9Sstevel@tonic-gate bit = pfn - mdsp->mds_base;
22987c478bd9Sstevel@tonic-gate if (mhp->mh_cancel) {
22997c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn);
23007c478bd9Sstevel@tonic-gate if (pp != NULL) {
23017c478bd9Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] &
23027c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW))) == 0) {
23037c478bd9Sstevel@tonic-gate page_lock_clr_exclwanted(pp);
23047c478bd9Sstevel@tonic-gate }
23057c478bd9Sstevel@tonic-gate }
23067c478bd9Sstevel@tonic-gate } else {
23077c478bd9Sstevel@tonic-gate pp = NULL;
23087c478bd9Sstevel@tonic-gate }
23097c478bd9Sstevel@tonic-gate if ((mdsp->mds_bitmap_retired[bit / NBPBMW] &
23107c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) {
23117c478bd9Sstevel@tonic-gate /* do we already have pp? */
23127c478bd9Sstevel@tonic-gate if (pp == NULL) {
23137c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn);
23147c478bd9Sstevel@tonic-gate }
23157c478bd9Sstevel@tonic-gate ASSERT(pp != NULL);
2316db874c57Selowe ASSERT(PP_RETIRED(pp));
23177c478bd9Sstevel@tonic-gate if (mhp->mh_cancel != 0) {
2318db874c57Selowe page_unlock(pp);
23197c478bd9Sstevel@tonic-gate /*
23207c478bd9Sstevel@tonic-gate * To satisfy ASSERT below in
23217c478bd9Sstevel@tonic-gate * cancel code.
23227c478bd9Sstevel@tonic-gate */
23237c478bd9Sstevel@tonic-gate mhp->mh_hold_todo++;
23247c478bd9Sstevel@tonic-gate } else {
23258b464eb8Smec (void) page_unretire_pp(pp,
23268b464eb8Smec PR_UNR_CLEAN);
23277c478bd9Sstevel@tonic-gate }
23287c478bd9Sstevel@tonic-gate }
23297c478bd9Sstevel@tonic-gate }
23307c478bd9Sstevel@tonic-gate }
23317c478bd9Sstevel@tonic-gate /*
23327c478bd9Sstevel@tonic-gate * Free retired page bitmap and collected page bitmap
23337c478bd9Sstevel@tonic-gate */
23347c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
23357c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
23367c478bd9Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap_retired != NULL);
23377c478bd9Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap_retired, MDS_BITMAPBYTES(mdsp));
23387c478bd9Sstevel@tonic-gate mdsp->mds_bitmap_retired = NULL; /* Paranoia. */
23397c478bd9Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap != NULL);
23407c478bd9Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap, MDS_BITMAPBYTES(mdsp));
23417c478bd9Sstevel@tonic-gate mdsp->mds_bitmap = NULL; /* Paranoia. */
23427c478bd9Sstevel@tonic-gate }
23437c478bd9Sstevel@tonic-gate
23447c478bd9Sstevel@tonic-gate /* wait for our dr aio cancel thread to exit */
23457c478bd9Sstevel@tonic-gate while (!(mhp->mh_aio_cleanup_done)) {
23467c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo);
23477c478bd9Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY));
23487c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex);
23497c478bd9Sstevel@tonic-gate }
23507c478bd9Sstevel@tonic-gate refused:
23517c478bd9Sstevel@tonic-gate if (mhp->mh_cancel != 0) {
23527c478bd9Sstevel@tonic-gate page_t *pp;
23537c478bd9Sstevel@tonic-gate
23547c478bd9Sstevel@tonic-gate comp_code = mhp->mh_cancel;
23557c478bd9Sstevel@tonic-gate /*
23567c478bd9Sstevel@tonic-gate * Go through list of deleted pages (mh_deleted) freeing
23577c478bd9Sstevel@tonic-gate * them.
23587c478bd9Sstevel@tonic-gate */
23597c478bd9Sstevel@tonic-gate while ((pp = mhp->mh_deleted) != NULL) {
23607c478bd9Sstevel@tonic-gate mhp->mh_deleted = pp->p_next;
23617c478bd9Sstevel@tonic-gate mhp->mh_hold_todo++;
23627c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
23637c478bd9Sstevel@tonic-gate /* Restore p_next. */
23647c478bd9Sstevel@tonic-gate pp->p_next = pp->p_prev;
23657c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) {
23667c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC,
23677c478bd9Sstevel@tonic-gate "page %p is free",
23687c478bd9Sstevel@tonic-gate (void *)pp);
23697c478bd9Sstevel@tonic-gate }
23707c478bd9Sstevel@tonic-gate page_free(pp, 1);
23717c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
23727c478bd9Sstevel@tonic-gate }
23737c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo == mhp->mh_vm_pages);
23747c478bd9Sstevel@tonic-gate
23757c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
23767c478bd9Sstevel@tonic-gate put_availrmem(mhp->mh_vm_pages);
23777c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
23787c478bd9Sstevel@tonic-gate
23797c478bd9Sstevel@tonic-gate goto t_exit;
23807c478bd9Sstevel@tonic-gate }
23817c478bd9Sstevel@tonic-gate
23827c478bd9Sstevel@tonic-gate /*
23837c478bd9Sstevel@tonic-gate * All the pages are no longer in use and are exclusively locked.
23847c478bd9Sstevel@tonic-gate */
23857c478bd9Sstevel@tonic-gate
23867c478bd9Sstevel@tonic-gate mhp->mh_deleted = NULL;
23877c478bd9Sstevel@tonic-gate
23887c478bd9Sstevel@tonic-gate kphysm_del_cleanup(mhp);
23897c478bd9Sstevel@tonic-gate
239073347c69Smb158278 /*
23919853d9e8SJason Beloro * mem_node_del_range needs to be after kphysm_del_cleanup so
239273347c69Smb158278 * that the mem_node_config[] will remain intact for the cleanup.
239373347c69Smb158278 */
239473347c69Smb158278 for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
239573347c69Smb158278 mdsp = mdsp->mds_next) {
23969853d9e8SJason Beloro mem_node_del_range(mdsp->mds_base,
23979853d9e8SJason Beloro mdsp->mds_base + mdsp->mds_npgs - 1);
239873347c69Smb158278 }
2399af4c679fSSean McEnroe /* cleanup the page counters */
2400af4c679fSSean McEnroe page_ctrs_cleanup();
240173347c69Smb158278
24027c478bd9Sstevel@tonic-gate comp_code = KPHYSM_OK;
24037c478bd9Sstevel@tonic-gate
24047c478bd9Sstevel@tonic-gate t_exit:
24057c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24067c478bd9Sstevel@tonic-gate kphysm_setup_post_del(mhp->mh_vm_pages,
24077c478bd9Sstevel@tonic-gate (comp_code == KPHYSM_OK) ? 0 : 1);
24087c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
24097c478bd9Sstevel@tonic-gate
24107c478bd9Sstevel@tonic-gate early_exit:
24117c478bd9Sstevel@tonic-gate /* mhp->mh_mutex exited by CALLB_CPR_EXIT() */
24127c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_DONE;
24137c478bd9Sstevel@tonic-gate del_complete_funcp = mhp->mh_delete_complete;
24147c478bd9Sstevel@tonic-gate del_complete_arg = mhp->mh_delete_complete_arg;
24157c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo);
24167c478bd9Sstevel@tonic-gate (*del_complete_funcp)(del_complete_arg, comp_code);
24177c478bd9Sstevel@tonic-gate thread_exit();
24187c478bd9Sstevel@tonic-gate /*NOTREACHED*/
24197c478bd9Sstevel@tonic-gate }
24207c478bd9Sstevel@tonic-gate
24217c478bd9Sstevel@tonic-gate /*
24227c478bd9Sstevel@tonic-gate * Start the delete of the memory from the system.
24237c478bd9Sstevel@tonic-gate */
24247c478bd9Sstevel@tonic-gate int
kphysm_del_start(memhandle_t handle,void (* complete)(void *,int),void * complete_arg)24257c478bd9Sstevel@tonic-gate kphysm_del_start(
24267c478bd9Sstevel@tonic-gate memhandle_t handle,
24277c478bd9Sstevel@tonic-gate void (*complete)(void *, int),
24287c478bd9Sstevel@tonic-gate void *complete_arg)
24297c478bd9Sstevel@tonic-gate {
24307c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
24317c478bd9Sstevel@tonic-gate
24327c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle);
24337c478bd9Sstevel@tonic-gate if (mhp == NULL) {
24347c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
24357c478bd9Sstevel@tonic-gate }
24367c478bd9Sstevel@tonic-gate switch (mhp->mh_state) {
24377c478bd9Sstevel@tonic-gate case MHND_FREE:
24387c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE);
24397c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24407c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
24417c478bd9Sstevel@tonic-gate case MHND_INIT:
24427c478bd9Sstevel@tonic-gate break;
24437c478bd9Sstevel@tonic-gate case MHND_STARTING:
24447c478bd9Sstevel@tonic-gate case MHND_RUNNING:
24457c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24467c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE);
24477c478bd9Sstevel@tonic-gate case MHND_DONE:
24487c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24497c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE);
24507c478bd9Sstevel@tonic-gate case MHND_RELEASE:
24517c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24527c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE);
24537c478bd9Sstevel@tonic-gate default:
24547c478bd9Sstevel@tonic-gate #ifdef DEBUG
24557c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_start(0x%p) state corrupt %d",
24567c478bd9Sstevel@tonic-gate (void *)mhp, mhp->mh_state);
24577c478bd9Sstevel@tonic-gate #endif /* DEBUG */
24587c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24597c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
24607c478bd9Sstevel@tonic-gate }
24617c478bd9Sstevel@tonic-gate
24627c478bd9Sstevel@tonic-gate if (mhp->mh_transit.trl_spans == NULL) {
24637c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24647c478bd9Sstevel@tonic-gate return (KPHYSM_ENOWORK);
24657c478bd9Sstevel@tonic-gate }
24667c478bd9Sstevel@tonic-gate
24677c478bd9Sstevel@tonic-gate ASSERT(complete != NULL);
24687c478bd9Sstevel@tonic-gate mhp->mh_delete_complete = complete;
24697c478bd9Sstevel@tonic-gate mhp->mh_delete_complete_arg = complete_arg;
24707c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_STARTING;
24717c478bd9Sstevel@tonic-gate /*
24727c478bd9Sstevel@tonic-gate * Release the mutex in case thread_create sleeps.
24737c478bd9Sstevel@tonic-gate */
24747c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24757c478bd9Sstevel@tonic-gate
24767c478bd9Sstevel@tonic-gate /*
24777c478bd9Sstevel@tonic-gate * The "obvious" process for this thread is pageout (proc_pageout)
24787c478bd9Sstevel@tonic-gate * but this gives the thread too much power over freemem
24797c478bd9Sstevel@tonic-gate * which results in freemem starvation.
24807c478bd9Sstevel@tonic-gate */
24817c478bd9Sstevel@tonic-gate (void) thread_create(NULL, 0, delete_memory_thread, mhp, 0, &p0,
24827c478bd9Sstevel@tonic-gate TS_RUN, maxclsyspri - 1);
24837c478bd9Sstevel@tonic-gate
24847c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
24857c478bd9Sstevel@tonic-gate }
24867c478bd9Sstevel@tonic-gate
24877c478bd9Sstevel@tonic-gate static kmutex_t pp_dummy_lock; /* Protects init. of pp_dummy. */
24887c478bd9Sstevel@tonic-gate static caddr_t pp_dummy;
24897c478bd9Sstevel@tonic-gate static pgcnt_t pp_dummy_npages;
24907c478bd9Sstevel@tonic-gate static pfn_t *pp_dummy_pfn; /* Array of dummy pfns. */
24917c478bd9Sstevel@tonic-gate
24927c478bd9Sstevel@tonic-gate static void
memseg_remap_init_pages(page_t * pages,page_t * epages)24937c478bd9Sstevel@tonic-gate memseg_remap_init_pages(page_t *pages, page_t *epages)
24947c478bd9Sstevel@tonic-gate {
24957c478bd9Sstevel@tonic-gate page_t *pp;
24967c478bd9Sstevel@tonic-gate
24977c478bd9Sstevel@tonic-gate for (pp = pages; pp < epages; pp++) {
24987c478bd9Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */
24997c478bd9Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1;
25007c478bd9Sstevel@tonic-gate page_iolock_init(pp);
25017c478bd9Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM))
25027c478bd9Sstevel@tonic-gate continue;
25037c478bd9Sstevel@tonic-gate page_lock_delete(pp);
25047c478bd9Sstevel@tonic-gate }
25057c478bd9Sstevel@tonic-gate }
25067c478bd9Sstevel@tonic-gate
25077c478bd9Sstevel@tonic-gate void
memseg_remap_init()25087c478bd9Sstevel@tonic-gate memseg_remap_init()
25097c478bd9Sstevel@tonic-gate {
25107c478bd9Sstevel@tonic-gate mutex_enter(&pp_dummy_lock);
25117c478bd9Sstevel@tonic-gate if (pp_dummy == NULL) {
25127c478bd9Sstevel@tonic-gate uint_t dpages;
25137c478bd9Sstevel@tonic-gate int i;
25147c478bd9Sstevel@tonic-gate
25157c478bd9Sstevel@tonic-gate /*
25167c478bd9Sstevel@tonic-gate * dpages starts off as the size of the structure and
25177c478bd9Sstevel@tonic-gate * ends up as the minimum number of pages that will
25187c478bd9Sstevel@tonic-gate * hold a whole number of page_t structures.
25197c478bd9Sstevel@tonic-gate */
25207c478bd9Sstevel@tonic-gate dpages = sizeof (page_t);
25217c478bd9Sstevel@tonic-gate ASSERT(dpages != 0);
25227c478bd9Sstevel@tonic-gate ASSERT(dpages <= MMU_PAGESIZE);
25237c478bd9Sstevel@tonic-gate
25247c478bd9Sstevel@tonic-gate while ((dpages & 1) == 0)
25257c478bd9Sstevel@tonic-gate dpages >>= 1;
25267c478bd9Sstevel@tonic-gate
25277c478bd9Sstevel@tonic-gate pp_dummy_npages = dpages;
25287c478bd9Sstevel@tonic-gate /*
25297c478bd9Sstevel@tonic-gate * Allocate pp_dummy pages directly from static_arena,
25307c478bd9Sstevel@tonic-gate * since these are whole page allocations and are
25317c478bd9Sstevel@tonic-gate * referenced by physical address. This also has the
25327c478bd9Sstevel@tonic-gate * nice fringe benefit of hiding the memory from
25337c478bd9Sstevel@tonic-gate * ::findleaks since it doesn't deal well with allocated
25347c478bd9Sstevel@tonic-gate * kernel heap memory that doesn't have any mappings.
25357c478bd9Sstevel@tonic-gate */
25367c478bd9Sstevel@tonic-gate pp_dummy = vmem_xalloc(static_arena, ptob(pp_dummy_npages),
25377c478bd9Sstevel@tonic-gate PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP);
25387c478bd9Sstevel@tonic-gate bzero(pp_dummy, ptob(pp_dummy_npages));
25397c478bd9Sstevel@tonic-gate ASSERT(((uintptr_t)pp_dummy & MMU_PAGEOFFSET) == 0);
25407c478bd9Sstevel@tonic-gate pp_dummy_pfn = kmem_alloc(sizeof (*pp_dummy_pfn) *
25417c478bd9Sstevel@tonic-gate pp_dummy_npages, KM_SLEEP);
25427c478bd9Sstevel@tonic-gate for (i = 0; i < pp_dummy_npages; i++) {
25437c478bd9Sstevel@tonic-gate pp_dummy_pfn[i] = hat_getpfnum(kas.a_hat,
25447c478bd9Sstevel@tonic-gate &pp_dummy[MMU_PAGESIZE * i]);
25457c478bd9Sstevel@tonic-gate ASSERT(pp_dummy_pfn[i] != PFN_INVALID);
25467c478bd9Sstevel@tonic-gate }
25477c478bd9Sstevel@tonic-gate /*
25487c478bd9Sstevel@tonic-gate * Initialize the page_t's to a known 'deleted' state
25497c478bd9Sstevel@tonic-gate * that matches the state of deleted pages.
25507c478bd9Sstevel@tonic-gate */
25517c478bd9Sstevel@tonic-gate memseg_remap_init_pages((page_t *)pp_dummy,
255273347c69Smb158278 (page_t *)(pp_dummy + ptob(pp_dummy_npages)));
25537c478bd9Sstevel@tonic-gate /* Remove kmem mappings for the pages for safety. */
25547c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, pp_dummy, ptob(pp_dummy_npages),
25557c478bd9Sstevel@tonic-gate HAT_UNLOAD_UNLOCK);
25567c478bd9Sstevel@tonic-gate /* Leave pp_dummy pointer set as flag that init is done. */
25577c478bd9Sstevel@tonic-gate }
25587c478bd9Sstevel@tonic-gate mutex_exit(&pp_dummy_lock);
25597c478bd9Sstevel@tonic-gate }
25607c478bd9Sstevel@tonic-gate
25619853d9e8SJason Beloro /*
25629853d9e8SJason Beloro * Remap a page-aglined range of page_t's to dummy pages.
25639853d9e8SJason Beloro */
25649853d9e8SJason Beloro void
remap_to_dummy(caddr_t va,pgcnt_t metapgs)25659853d9e8SJason Beloro remap_to_dummy(caddr_t va, pgcnt_t metapgs)
25667c478bd9Sstevel@tonic-gate {
25679853d9e8SJason Beloro int phase;
25689853d9e8SJason Beloro
2569a3114836SGerry Liu ASSERT(IS_P2ALIGNED((uint64_t)(uintptr_t)va, PAGESIZE));
25709853d9e8SJason Beloro
25719853d9e8SJason Beloro /*
25729853d9e8SJason Beloro * We may start remapping at a non-zero page offset
25739853d9e8SJason Beloro * within the dummy pages since the low/high ends
25749853d9e8SJason Beloro * of the outgoing pp's could be shared by other
25759853d9e8SJason Beloro * memsegs (see memseg_remap_meta).
25769853d9e8SJason Beloro */
2577a3114836SGerry Liu phase = btop((uint64_t)(uintptr_t)va) % pp_dummy_npages;
2578a3114836SGerry Liu /*CONSTCOND*/
25799853d9e8SJason Beloro ASSERT(PAGESIZE % sizeof (page_t) || phase == 0);
25807c478bd9Sstevel@tonic-gate
25817c478bd9Sstevel@tonic-gate while (metapgs != 0) {
25827c478bd9Sstevel@tonic-gate pgcnt_t n;
25839853d9e8SJason Beloro int i, j;
25847c478bd9Sstevel@tonic-gate
25857c478bd9Sstevel@tonic-gate n = pp_dummy_npages;
25867c478bd9Sstevel@tonic-gate if (n > metapgs)
25877c478bd9Sstevel@tonic-gate n = metapgs;
25887c478bd9Sstevel@tonic-gate for (i = 0; i < n; i++) {
25899853d9e8SJason Beloro j = (i + phase) % pp_dummy_npages;
25909853d9e8SJason Beloro hat_devload(kas.a_hat, va, ptob(1), pp_dummy_pfn[j],
25917c478bd9Sstevel@tonic-gate PROT_READ,
25927c478bd9Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_NOCONSIST |
25937c478bd9Sstevel@tonic-gate HAT_LOAD_REMAP);
25949853d9e8SJason Beloro va += ptob(1);
25957c478bd9Sstevel@tonic-gate }
25967c478bd9Sstevel@tonic-gate metapgs -= n;
25977c478bd9Sstevel@tonic-gate }
25987c478bd9Sstevel@tonic-gate }
25997c478bd9Sstevel@tonic-gate
26009853d9e8SJason Beloro static void
memseg_remap_to_dummy(struct memseg * seg)26019853d9e8SJason Beloro memseg_remap_to_dummy(struct memseg *seg)
26029853d9e8SJason Beloro {
26039853d9e8SJason Beloro caddr_t pp;
26049853d9e8SJason Beloro pgcnt_t metapgs;
26059853d9e8SJason Beloro
26069853d9e8SJason Beloro ASSERT(memseg_is_dynamic(seg));
26079853d9e8SJason Beloro ASSERT(pp_dummy != NULL);
26089853d9e8SJason Beloro
26099853d9e8SJason Beloro
26109853d9e8SJason Beloro if (!memseg_includes_meta(seg)) {
26119853d9e8SJason Beloro memseg_remap_meta(seg);
26129853d9e8SJason Beloro return;
26139853d9e8SJason Beloro }
26149853d9e8SJason Beloro
26159853d9e8SJason Beloro pp = (caddr_t)seg->pages;
26169853d9e8SJason Beloro metapgs = seg->pages_base - memseg_get_start(seg);
26179853d9e8SJason Beloro ASSERT(metapgs != 0);
26189853d9e8SJason Beloro
26199853d9e8SJason Beloro seg->pages_end = seg->pages_base;
26209853d9e8SJason Beloro
26219853d9e8SJason Beloro remap_to_dummy(pp, metapgs);
26229853d9e8SJason Beloro }
26239853d9e8SJason Beloro
26247c478bd9Sstevel@tonic-gate /*
26257c478bd9Sstevel@tonic-gate * Transition all the deleted pages to the deleted state so that
26267c478bd9Sstevel@tonic-gate * page_lock will not wait. The page_lock_delete call will
26277c478bd9Sstevel@tonic-gate * also wake up any waiters.
26287c478bd9Sstevel@tonic-gate */
26297c478bd9Sstevel@tonic-gate static void
memseg_lock_delete_all(struct memseg * seg)26307c478bd9Sstevel@tonic-gate memseg_lock_delete_all(struct memseg *seg)
26317c478bd9Sstevel@tonic-gate {
26327c478bd9Sstevel@tonic-gate page_t *pp;
26337c478bd9Sstevel@tonic-gate
26347c478bd9Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) {
26357c478bd9Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */
26367c478bd9Sstevel@tonic-gate page_lock_delete(pp);
26377c478bd9Sstevel@tonic-gate }
26387c478bd9Sstevel@tonic-gate }
26397c478bd9Sstevel@tonic-gate
26407c478bd9Sstevel@tonic-gate static void
kphysm_del_cleanup(struct mem_handle * mhp)26417c478bd9Sstevel@tonic-gate kphysm_del_cleanup(struct mem_handle *mhp)
26427c478bd9Sstevel@tonic-gate {
26437c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
26447c478bd9Sstevel@tonic-gate struct memseg *seg;
26457c478bd9Sstevel@tonic-gate struct memseg **segpp;
26467c478bd9Sstevel@tonic-gate struct memseg *seglist;
26477c478bd9Sstevel@tonic-gate pfn_t p_end;
26487c478bd9Sstevel@tonic-gate uint64_t avmem;
26497c478bd9Sstevel@tonic-gate pgcnt_t avpgs;
26507c478bd9Sstevel@tonic-gate pgcnt_t npgs;
26517c478bd9Sstevel@tonic-gate
26527c478bd9Sstevel@tonic-gate avpgs = mhp->mh_vm_pages;
26537c478bd9Sstevel@tonic-gate
26547c478bd9Sstevel@tonic-gate memsegs_lock(1);
26557c478bd9Sstevel@tonic-gate
26567c478bd9Sstevel@tonic-gate /*
26577c478bd9Sstevel@tonic-gate * remove from main segment list.
26587c478bd9Sstevel@tonic-gate */
26597c478bd9Sstevel@tonic-gate npgs = 0;
26607c478bd9Sstevel@tonic-gate seglist = NULL;
26617c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
26627c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
26637c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs;
26647c478bd9Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL; ) {
26657c478bd9Sstevel@tonic-gate if (seg->pages_base >= p_end ||
26667c478bd9Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) {
26677c478bd9Sstevel@tonic-gate /* Span and memseg don't overlap. */
26687c478bd9Sstevel@tonic-gate segpp = &((*segpp)->next);
26697c478bd9Sstevel@tonic-gate continue;
26707c478bd9Sstevel@tonic-gate }
26717c478bd9Sstevel@tonic-gate ASSERT(seg->pages_base >= mdsp->mds_base);
26727c478bd9Sstevel@tonic-gate ASSERT(seg->pages_end <= p_end);
26737c478bd9Sstevel@tonic-gate
2674e21bae1bSkchow PLCNT_MODIFY_MAX(seg->pages_base,
2675e21bae1bSkchow seg->pages_base - seg->pages_end);
2676e21bae1bSkchow
26777c478bd9Sstevel@tonic-gate /* Hide the memseg from future scans. */
26787c478bd9Sstevel@tonic-gate hat_kpm_delmem_mseg_update(seg, segpp);
26797c478bd9Sstevel@tonic-gate *segpp = seg->next;
26807c478bd9Sstevel@tonic-gate membar_producer(); /* TODO: Needed? */
26817c478bd9Sstevel@tonic-gate npgs += MSEG_NPAGES(seg);
26827c478bd9Sstevel@tonic-gate
26837c478bd9Sstevel@tonic-gate /*
26847c478bd9Sstevel@tonic-gate * Leave the deleted segment's next pointer intact
26857c478bd9Sstevel@tonic-gate * in case a memsegs scanning loop is walking this
26867c478bd9Sstevel@tonic-gate * segment concurrently.
26877c478bd9Sstevel@tonic-gate */
26887c478bd9Sstevel@tonic-gate seg->lnext = seglist;
26897c478bd9Sstevel@tonic-gate seglist = seg;
26907c478bd9Sstevel@tonic-gate }
26917c478bd9Sstevel@tonic-gate }
26927c478bd9Sstevel@tonic-gate
26937c478bd9Sstevel@tonic-gate build_pfn_hash();
26947c478bd9Sstevel@tonic-gate
26957c478bd9Sstevel@tonic-gate ASSERT(npgs < total_pages);
26967c478bd9Sstevel@tonic-gate total_pages -= npgs;
26977c478bd9Sstevel@tonic-gate
26987c478bd9Sstevel@tonic-gate /*
26997c478bd9Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed.
27007c478bd9Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use.
27017c478bd9Sstevel@tonic-gate */
27027c478bd9Sstevel@tonic-gate setupclock(1);
27037c478bd9Sstevel@tonic-gate
27047c478bd9Sstevel@tonic-gate memsegs_unlock(1);
27057c478bd9Sstevel@tonic-gate
27067c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
27077c478bd9Sstevel@tonic-gate
27087c478bd9Sstevel@tonic-gate while ((seg = seglist) != NULL) {
27097c478bd9Sstevel@tonic-gate pfn_t mseg_start;
27107c478bd9Sstevel@tonic-gate pfn_t mseg_base, mseg_end;
27117c478bd9Sstevel@tonic-gate pgcnt_t mseg_npgs;
27127c478bd9Sstevel@tonic-gate int mlret;
27137c478bd9Sstevel@tonic-gate
27147c478bd9Sstevel@tonic-gate seglist = seg->lnext;
27157c478bd9Sstevel@tonic-gate
27167c478bd9Sstevel@tonic-gate /*
27177c478bd9Sstevel@tonic-gate * Put the page_t's into the deleted state to stop
27187c478bd9Sstevel@tonic-gate * cv_wait()s on the pages. When we remap, the dummy
27197c478bd9Sstevel@tonic-gate * page_t's will be in the same state.
27207c478bd9Sstevel@tonic-gate */
27217c478bd9Sstevel@tonic-gate memseg_lock_delete_all(seg);
27227c478bd9Sstevel@tonic-gate /*
27237c478bd9Sstevel@tonic-gate * Collect up information based on pages_base and pages_end
27247c478bd9Sstevel@tonic-gate * early so that we can flag early that the memseg has been
27257c478bd9Sstevel@tonic-gate * deleted by setting pages_end == pages_base.
27267c478bd9Sstevel@tonic-gate */
27277c478bd9Sstevel@tonic-gate mseg_base = seg->pages_base;
27287c478bd9Sstevel@tonic-gate mseg_end = seg->pages_end;
27297c478bd9Sstevel@tonic-gate mseg_npgs = MSEG_NPAGES(seg);
27309853d9e8SJason Beloro mseg_start = memseg_get_start(seg);
27317c478bd9Sstevel@tonic-gate
27329853d9e8SJason Beloro if (memseg_is_dynamic(seg)) {
27337c478bd9Sstevel@tonic-gate /* Remap the meta data to our special dummy area. */
27349853d9e8SJason Beloro memseg_remap_to_dummy(seg);
27357c478bd9Sstevel@tonic-gate
27367c478bd9Sstevel@tonic-gate mutex_enter(&memseg_lists_lock);
27377c478bd9Sstevel@tonic-gate seg->lnext = memseg_va_avail;
27387c478bd9Sstevel@tonic-gate memseg_va_avail = seg;
27397c478bd9Sstevel@tonic-gate mutex_exit(&memseg_lists_lock);
27407c478bd9Sstevel@tonic-gate } else {
27417c478bd9Sstevel@tonic-gate /*
27427c478bd9Sstevel@tonic-gate * For memory whose page_ts were allocated
27437c478bd9Sstevel@tonic-gate * at boot, we need to find a new use for
27447c478bd9Sstevel@tonic-gate * the page_t memory.
27457c478bd9Sstevel@tonic-gate * For the moment, just leak it.
27467c478bd9Sstevel@tonic-gate * (It is held in the memseg_delete_junk list.)
27477c478bd9Sstevel@tonic-gate */
27489853d9e8SJason Beloro seg->pages_end = seg->pages_base;
27497c478bd9Sstevel@tonic-gate
27507c478bd9Sstevel@tonic-gate mutex_enter(&memseg_lists_lock);
27517c478bd9Sstevel@tonic-gate seg->lnext = memseg_delete_junk;
27527c478bd9Sstevel@tonic-gate memseg_delete_junk = seg;
27537c478bd9Sstevel@tonic-gate mutex_exit(&memseg_lists_lock);
27547c478bd9Sstevel@tonic-gate }
27557c478bd9Sstevel@tonic-gate
27567c478bd9Sstevel@tonic-gate /* Must not use seg now as it could be re-used. */
27577c478bd9Sstevel@tonic-gate
27587c478bd9Sstevel@tonic-gate memlist_write_lock();
27597c478bd9Sstevel@tonic-gate
27607c478bd9Sstevel@tonic-gate mlret = memlist_delete_span(
27617c478bd9Sstevel@tonic-gate (uint64_t)(mseg_base) << PAGESHIFT,
27627c478bd9Sstevel@tonic-gate (uint64_t)(mseg_npgs) << PAGESHIFT,
27637c478bd9Sstevel@tonic-gate &phys_avail);
27647c478bd9Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK);
27657c478bd9Sstevel@tonic-gate
27667c478bd9Sstevel@tonic-gate mlret = memlist_delete_span(
27677c478bd9Sstevel@tonic-gate (uint64_t)(mseg_start) << PAGESHIFT,
27687c478bd9Sstevel@tonic-gate (uint64_t)(mseg_end - mseg_start) <<
27697c478bd9Sstevel@tonic-gate PAGESHIFT,
27707c478bd9Sstevel@tonic-gate &phys_install);
27717c478bd9Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK);
27727c478bd9Sstevel@tonic-gate phys_install_has_changed();
27737c478bd9Sstevel@tonic-gate
27747c478bd9Sstevel@tonic-gate memlist_write_unlock();
27757c478bd9Sstevel@tonic-gate }
27767c478bd9Sstevel@tonic-gate
27777c478bd9Sstevel@tonic-gate memlist_read_lock();
27787c478bd9Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled);
27797c478bd9Sstevel@tonic-gate memlist_read_unlock();
27807c478bd9Sstevel@tonic-gate
27817c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock);
27827c478bd9Sstevel@tonic-gate maxmem -= avpgs;
27837c478bd9Sstevel@tonic-gate physmem -= avpgs;
27847c478bd9Sstevel@tonic-gate /* availrmem is adjusted during the delete. */
27857c478bd9Sstevel@tonic-gate availrmem_initial -= avpgs;
27867c478bd9Sstevel@tonic-gate
27877c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock);
27887c478bd9Sstevel@tonic-gate
27897c478bd9Sstevel@tonic-gate dump_resize();
27907c478bd9Sstevel@tonic-gate
27917c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: mem = %ldK "
27927c478bd9Sstevel@tonic-gate "(0x%" PRIx64 ")\n",
27937c478bd9Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10),
27947c478bd9Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT);
27957c478bd9Sstevel@tonic-gate
27967c478bd9Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT;
27977c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: "
27987c478bd9Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem);
27997c478bd9Sstevel@tonic-gate
28007c478bd9Sstevel@tonic-gate /*
28017c478bd9Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems
28027c478bd9Sstevel@tonic-gate */
28037c478bd9Sstevel@tonic-gate if (nlgrps == 1)
28047c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0);
28057c478bd9Sstevel@tonic-gate
28067c478bd9Sstevel@tonic-gate /* Successfully deleted system memory */
28077c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
28087c478bd9Sstevel@tonic-gate }
28097c478bd9Sstevel@tonic-gate
28107c478bd9Sstevel@tonic-gate static uint_t mdel_nullvp_waiter;
28117c478bd9Sstevel@tonic-gate
28127c478bd9Sstevel@tonic-gate static void
page_delete_collect(page_t * pp,struct mem_handle * mhp)28137c478bd9Sstevel@tonic-gate page_delete_collect(
28147c478bd9Sstevel@tonic-gate page_t *pp,
28157c478bd9Sstevel@tonic-gate struct mem_handle *mhp)
28167c478bd9Sstevel@tonic-gate {
28177c478bd9Sstevel@tonic-gate if (pp->p_vnode) {
28187c478bd9Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL);
28197c478bd9Sstevel@tonic-gate /* do not do PP_SETAGED(pp); */
28207c478bd9Sstevel@tonic-gate } else {
28217c478bd9Sstevel@tonic-gate kmutex_t *sep;
28227c478bd9Sstevel@tonic-gate
28237c478bd9Sstevel@tonic-gate sep = page_se_mutex(pp);
28247c478bd9Sstevel@tonic-gate mutex_enter(sep);
28257c478bd9Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) {
28267c478bd9Sstevel@tonic-gate mdel_nullvp_waiter++;
28277c478bd9Sstevel@tonic-gate cv_broadcast(&pp->p_cv);
28287c478bd9Sstevel@tonic-gate }
28297c478bd9Sstevel@tonic-gate mutex_exit(sep);
28307c478bd9Sstevel@tonic-gate }
28317c478bd9Sstevel@tonic-gate ASSERT(pp->p_next == pp->p_prev);
28327c478bd9Sstevel@tonic-gate ASSERT(pp->p_next == NULL || pp->p_next == pp);
28337c478bd9Sstevel@tonic-gate pp->p_next = mhp->mh_deleted;
28347c478bd9Sstevel@tonic-gate mhp->mh_deleted = pp;
28357c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo != 0);
28367c478bd9Sstevel@tonic-gate mhp->mh_hold_todo--;
28377c478bd9Sstevel@tonic-gate }
28387c478bd9Sstevel@tonic-gate
28397c478bd9Sstevel@tonic-gate static void
transit_list_collect(struct mem_handle * mhp,int v)28407c478bd9Sstevel@tonic-gate transit_list_collect(struct mem_handle *mhp, int v)
28417c478bd9Sstevel@tonic-gate {
28427c478bd9Sstevel@tonic-gate struct transit_list_head *trh;
28437c478bd9Sstevel@tonic-gate
28447c478bd9Sstevel@tonic-gate trh = &transit_list_head;
28457c478bd9Sstevel@tonic-gate mutex_enter(&trh->trh_lock);
28467c478bd9Sstevel@tonic-gate mhp->mh_transit.trl_collect = v;
28477c478bd9Sstevel@tonic-gate mutex_exit(&trh->trh_lock);
28487c478bd9Sstevel@tonic-gate }
28497c478bd9Sstevel@tonic-gate
28507c478bd9Sstevel@tonic-gate static void
transit_list_insert(struct transit_list * tlp)28517c478bd9Sstevel@tonic-gate transit_list_insert(struct transit_list *tlp)
28527c478bd9Sstevel@tonic-gate {
28537c478bd9Sstevel@tonic-gate struct transit_list_head *trh;
28547c478bd9Sstevel@tonic-gate
28557c478bd9Sstevel@tonic-gate trh = &transit_list_head;
28567c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock));
28577c478bd9Sstevel@tonic-gate tlp->trl_next = trh->trh_head;
28587c478bd9Sstevel@tonic-gate trh->trh_head = tlp;
28597c478bd9Sstevel@tonic-gate }
28607c478bd9Sstevel@tonic-gate
28617c478bd9Sstevel@tonic-gate static void
transit_list_remove(struct transit_list * tlp)28627c478bd9Sstevel@tonic-gate transit_list_remove(struct transit_list *tlp)
28637c478bd9Sstevel@tonic-gate {
28647c478bd9Sstevel@tonic-gate struct transit_list_head *trh;
28657c478bd9Sstevel@tonic-gate struct transit_list **tlpp;
28667c478bd9Sstevel@tonic-gate
28677c478bd9Sstevel@tonic-gate trh = &transit_list_head;
28687c478bd9Sstevel@tonic-gate tlpp = &trh->trh_head;
28697c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock));
28707c478bd9Sstevel@tonic-gate while (*tlpp != NULL && *tlpp != tlp)
28717c478bd9Sstevel@tonic-gate tlpp = &(*tlpp)->trl_next;
28727c478bd9Sstevel@tonic-gate ASSERT(*tlpp != NULL);
28737c478bd9Sstevel@tonic-gate if (*tlpp == tlp)
28747c478bd9Sstevel@tonic-gate *tlpp = tlp->trl_next;
28757c478bd9Sstevel@tonic-gate tlp->trl_next = NULL;
28767c478bd9Sstevel@tonic-gate }
28777c478bd9Sstevel@tonic-gate
28787c478bd9Sstevel@tonic-gate static struct transit_list *
pfnum_to_transit_list(struct transit_list_head * trh,pfn_t pfnum)28797c478bd9Sstevel@tonic-gate pfnum_to_transit_list(struct transit_list_head *trh, pfn_t pfnum)
28807c478bd9Sstevel@tonic-gate {
28817c478bd9Sstevel@tonic-gate struct transit_list *tlp;
28827c478bd9Sstevel@tonic-gate
28837c478bd9Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) {
28847c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
28857c478bd9Sstevel@tonic-gate
28867c478bd9Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL;
28877c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
28887c478bd9Sstevel@tonic-gate if (pfnum >= mdsp->mds_base &&
28897c478bd9Sstevel@tonic-gate pfnum < (mdsp->mds_base + mdsp->mds_npgs)) {
28907c478bd9Sstevel@tonic-gate return (tlp);
28917c478bd9Sstevel@tonic-gate }
28927c478bd9Sstevel@tonic-gate }
28937c478bd9Sstevel@tonic-gate }
28947c478bd9Sstevel@tonic-gate return (NULL);
28957c478bd9Sstevel@tonic-gate }
28967c478bd9Sstevel@tonic-gate
28977c478bd9Sstevel@tonic-gate int
pfn_is_being_deleted(pfn_t pfnum)28987c478bd9Sstevel@tonic-gate pfn_is_being_deleted(pfn_t pfnum)
28997c478bd9Sstevel@tonic-gate {
29007c478bd9Sstevel@tonic-gate struct transit_list_head *trh;
29017c478bd9Sstevel@tonic-gate struct transit_list *tlp;
29027c478bd9Sstevel@tonic-gate int ret;
29037c478bd9Sstevel@tonic-gate
29047c478bd9Sstevel@tonic-gate trh = &transit_list_head;
29057c478bd9Sstevel@tonic-gate if (trh->trh_head == NULL)
29067c478bd9Sstevel@tonic-gate return (0);
29077c478bd9Sstevel@tonic-gate
29087c478bd9Sstevel@tonic-gate mutex_enter(&trh->trh_lock);
29097c478bd9Sstevel@tonic-gate tlp = pfnum_to_transit_list(trh, pfnum);
29107c478bd9Sstevel@tonic-gate ret = (tlp != NULL && tlp->trl_collect);
29117c478bd9Sstevel@tonic-gate mutex_exit(&trh->trh_lock);
29127c478bd9Sstevel@tonic-gate
29137c478bd9Sstevel@tonic-gate return (ret);
29147c478bd9Sstevel@tonic-gate }
29157c478bd9Sstevel@tonic-gate
29167c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
29177c478bd9Sstevel@tonic-gate extern int hz;
29187c478bd9Sstevel@tonic-gate static void
mem_del_stat_print_func(struct mem_handle * mhp)29197c478bd9Sstevel@tonic-gate mem_del_stat_print_func(struct mem_handle *mhp)
29207c478bd9Sstevel@tonic-gate {
29217c478bd9Sstevel@tonic-gate uint64_t tmp;
29227c478bd9Sstevel@tonic-gate
29237c478bd9Sstevel@tonic-gate if (mem_del_stat_print) {
29247c478bd9Sstevel@tonic-gate printf("memory delete loop %x/%x, statistics%s\n",
29257c478bd9Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_base,
29267c478bd9Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_npgs,
29277c478bd9Sstevel@tonic-gate (mhp->mh_cancel ? " (cancelled)" : ""));
29287c478bd9Sstevel@tonic-gate printf("\t%8u nloop\n", mhp->mh_delstat.nloop);
29297c478bd9Sstevel@tonic-gate printf("\t%8u need_free\n", mhp->mh_delstat.need_free);
29307c478bd9Sstevel@tonic-gate printf("\t%8u free_loop\n", mhp->mh_delstat.free_loop);
29317c478bd9Sstevel@tonic-gate printf("\t%8u free_low\n", mhp->mh_delstat.free_low);
29327c478bd9Sstevel@tonic-gate printf("\t%8u free_failed\n", mhp->mh_delstat.free_failed);
29337c478bd9Sstevel@tonic-gate printf("\t%8u ncheck\n", mhp->mh_delstat.ncheck);
29347c478bd9Sstevel@tonic-gate printf("\t%8u nopaget\n", mhp->mh_delstat.nopaget);
29357c478bd9Sstevel@tonic-gate printf("\t%8u lockfail\n", mhp->mh_delstat.lockfail);
29367c478bd9Sstevel@tonic-gate printf("\t%8u nfree\n", mhp->mh_delstat.nfree);
29377c478bd9Sstevel@tonic-gate printf("\t%8u nreloc\n", mhp->mh_delstat.nreloc);
29387c478bd9Sstevel@tonic-gate printf("\t%8u nrelocfail\n", mhp->mh_delstat.nrelocfail);
29397c478bd9Sstevel@tonic-gate printf("\t%8u already_done\n", mhp->mh_delstat.already_done);
29407c478bd9Sstevel@tonic-gate printf("\t%8u first_notfree\n", mhp->mh_delstat.first_notfree);
29417c478bd9Sstevel@tonic-gate printf("\t%8u npplocked\n", mhp->mh_delstat.npplocked);
29427c478bd9Sstevel@tonic-gate printf("\t%8u nlockreloc\n", mhp->mh_delstat.nlockreloc);
29437c478bd9Sstevel@tonic-gate printf("\t%8u nnorepl\n", mhp->mh_delstat.nnorepl);
29447c478bd9Sstevel@tonic-gate printf("\t%8u nmodreloc\n", mhp->mh_delstat.nmodreloc);
29457c478bd9Sstevel@tonic-gate printf("\t%8u ndestroy\n", mhp->mh_delstat.ndestroy);
29467c478bd9Sstevel@tonic-gate printf("\t%8u nputpage\n", mhp->mh_delstat.nputpage);
29477c478bd9Sstevel@tonic-gate printf("\t%8u nnoreclaim\n", mhp->mh_delstat.nnoreclaim);
29487c478bd9Sstevel@tonic-gate printf("\t%8u ndelay\n", mhp->mh_delstat.ndelay);
29497c478bd9Sstevel@tonic-gate printf("\t%8u demotefail\n", mhp->mh_delstat.demotefail);
29507c478bd9Sstevel@tonic-gate printf("\t%8u retired\n", mhp->mh_delstat.retired);
29517c478bd9Sstevel@tonic-gate printf("\t%8u toxic\n", mhp->mh_delstat.toxic);
29527c478bd9Sstevel@tonic-gate printf("\t%8u failing\n", mhp->mh_delstat.failing);
29537c478bd9Sstevel@tonic-gate printf("\t%8u modtoxic\n", mhp->mh_delstat.modtoxic);
29547c478bd9Sstevel@tonic-gate printf("\t%8u npplkdtoxic\n", mhp->mh_delstat.npplkdtoxic);
29557c478bd9Sstevel@tonic-gate printf("\t%8u gptlmodfail\n", mhp->mh_delstat.gptlmodfail);
29567c478bd9Sstevel@tonic-gate printf("\t%8u gptllckfail\n", mhp->mh_delstat.gptllckfail);
29577c478bd9Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_total / hz; /* seconds */
29587c478bd9Sstevel@tonic-gate printf(
29597c478bd9Sstevel@tonic-gate "\t%"PRIu64" nticks_total - %"PRIu64" min %"PRIu64" sec\n",
29607c478bd9Sstevel@tonic-gate mhp->mh_delstat.nticks_total, tmp / 60, tmp % 60);
29617c478bd9Sstevel@tonic-gate
29627c478bd9Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_pgrp / hz; /* seconds */
29637c478bd9Sstevel@tonic-gate printf(
29647c478bd9Sstevel@tonic-gate "\t%"PRIu64" nticks_pgrp - %"PRIu64" min %"PRIu64" sec\n",
29657c478bd9Sstevel@tonic-gate mhp->mh_delstat.nticks_pgrp, tmp / 60, tmp % 60);
29667c478bd9Sstevel@tonic-gate }
29677c478bd9Sstevel@tonic-gate }
29687c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
29697c478bd9Sstevel@tonic-gate
29707c478bd9Sstevel@tonic-gate struct mem_callback {
29717c478bd9Sstevel@tonic-gate kphysm_setup_vector_t *vec;
29727c478bd9Sstevel@tonic-gate void *arg;
29737c478bd9Sstevel@tonic-gate };
29747c478bd9Sstevel@tonic-gate
29757c478bd9Sstevel@tonic-gate #define NMEMCALLBACKS 100
29767c478bd9Sstevel@tonic-gate
29777c478bd9Sstevel@tonic-gate static struct mem_callback mem_callbacks[NMEMCALLBACKS];
29787c478bd9Sstevel@tonic-gate static uint_t nmemcallbacks;
29797c478bd9Sstevel@tonic-gate static krwlock_t mem_callback_rwlock;
29807c478bd9Sstevel@tonic-gate
29817c478bd9Sstevel@tonic-gate int
kphysm_setup_func_register(kphysm_setup_vector_t * vec,void * arg)29827c478bd9Sstevel@tonic-gate kphysm_setup_func_register(kphysm_setup_vector_t *vec, void *arg)
29837c478bd9Sstevel@tonic-gate {
29847c478bd9Sstevel@tonic-gate uint_t i, found;
29857c478bd9Sstevel@tonic-gate
29867c478bd9Sstevel@tonic-gate /*
29877c478bd9Sstevel@tonic-gate * This test will become more complicated when the version must
29887c478bd9Sstevel@tonic-gate * change.
29897c478bd9Sstevel@tonic-gate */
29907c478bd9Sstevel@tonic-gate if (vec->version != KPHYSM_SETUP_VECTOR_VERSION)
29917c478bd9Sstevel@tonic-gate return (EINVAL);
29927c478bd9Sstevel@tonic-gate
29937c478bd9Sstevel@tonic-gate if (vec->post_add == NULL || vec->pre_del == NULL ||
29947c478bd9Sstevel@tonic-gate vec->post_del == NULL)
29957c478bd9Sstevel@tonic-gate return (EINVAL);
29967c478bd9Sstevel@tonic-gate
29977c478bd9Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER);
29987c478bd9Sstevel@tonic-gate for (i = 0, found = 0; i < nmemcallbacks; i++) {
29997c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec == NULL && found == 0)
30007c478bd9Sstevel@tonic-gate found = i + 1;
30017c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec == vec &&
30027c478bd9Sstevel@tonic-gate mem_callbacks[i].arg == arg) {
30037c478bd9Sstevel@tonic-gate #ifdef DEBUG
30047c478bd9Sstevel@tonic-gate /* Catch this in DEBUG kernels. */
30057c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_setup_func_register"
30067c478bd9Sstevel@tonic-gate "(0x%p, 0x%p) duplicate registration from 0x%p",
30077c478bd9Sstevel@tonic-gate (void *)vec, arg, (void *)caller());
30087c478bd9Sstevel@tonic-gate #endif /* DEBUG */
30097c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock);
30107c478bd9Sstevel@tonic-gate return (EEXIST);
30117c478bd9Sstevel@tonic-gate }
30127c478bd9Sstevel@tonic-gate }
30137c478bd9Sstevel@tonic-gate if (found != 0) {
30147c478bd9Sstevel@tonic-gate i = found - 1;
30157c478bd9Sstevel@tonic-gate } else {
30167c478bd9Sstevel@tonic-gate ASSERT(nmemcallbacks < NMEMCALLBACKS);
30177c478bd9Sstevel@tonic-gate if (nmemcallbacks == NMEMCALLBACKS) {
30187c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock);
30197c478bd9Sstevel@tonic-gate return (ENOMEM);
30207c478bd9Sstevel@tonic-gate }
30217c478bd9Sstevel@tonic-gate i = nmemcallbacks++;
30227c478bd9Sstevel@tonic-gate }
30237c478bd9Sstevel@tonic-gate mem_callbacks[i].vec = vec;
30247c478bd9Sstevel@tonic-gate mem_callbacks[i].arg = arg;
30257c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock);
30267c478bd9Sstevel@tonic-gate return (0);
30277c478bd9Sstevel@tonic-gate }
30287c478bd9Sstevel@tonic-gate
30297c478bd9Sstevel@tonic-gate void
kphysm_setup_func_unregister(kphysm_setup_vector_t * vec,void * arg)30307c478bd9Sstevel@tonic-gate kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, void *arg)
30317c478bd9Sstevel@tonic-gate {
30327c478bd9Sstevel@tonic-gate uint_t i;
30337c478bd9Sstevel@tonic-gate
30347c478bd9Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER);
30357c478bd9Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) {
30367c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec == vec &&
30377c478bd9Sstevel@tonic-gate mem_callbacks[i].arg == arg) {
30387c478bd9Sstevel@tonic-gate mem_callbacks[i].vec = NULL;
30397c478bd9Sstevel@tonic-gate mem_callbacks[i].arg = NULL;
30407c478bd9Sstevel@tonic-gate if (i == (nmemcallbacks - 1))
30417c478bd9Sstevel@tonic-gate nmemcallbacks--;
30427c478bd9Sstevel@tonic-gate break;
30437c478bd9Sstevel@tonic-gate }
30447c478bd9Sstevel@tonic-gate }
30457c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock);
30467c478bd9Sstevel@tonic-gate }
30477c478bd9Sstevel@tonic-gate
30487c478bd9Sstevel@tonic-gate static void
kphysm_setup_post_add(pgcnt_t delta_pages)30497c478bd9Sstevel@tonic-gate kphysm_setup_post_add(pgcnt_t delta_pages)
30507c478bd9Sstevel@tonic-gate {
30517c478bd9Sstevel@tonic-gate uint_t i;
30527c478bd9Sstevel@tonic-gate
30537c478bd9Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER);
30547c478bd9Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) {
30557c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) {
30567c478bd9Sstevel@tonic-gate (*mem_callbacks[i].vec->post_add)
30577c478bd9Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages);
30587c478bd9Sstevel@tonic-gate }
30597c478bd9Sstevel@tonic-gate }
30607c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock);
30617c478bd9Sstevel@tonic-gate }
30627c478bd9Sstevel@tonic-gate
30637c478bd9Sstevel@tonic-gate /*
30647c478bd9Sstevel@tonic-gate * Note the locking between pre_del and post_del: The reader lock is held
30657c478bd9Sstevel@tonic-gate * between the two calls to stop the set of functions from changing.
30667c478bd9Sstevel@tonic-gate */
30677c478bd9Sstevel@tonic-gate
30687c478bd9Sstevel@tonic-gate static int
kphysm_setup_pre_del(pgcnt_t delta_pages)30697c478bd9Sstevel@tonic-gate kphysm_setup_pre_del(pgcnt_t delta_pages)
30707c478bd9Sstevel@tonic-gate {
30717c478bd9Sstevel@tonic-gate uint_t i;
30727c478bd9Sstevel@tonic-gate int ret;
30737c478bd9Sstevel@tonic-gate int aret;
30747c478bd9Sstevel@tonic-gate
30757c478bd9Sstevel@tonic-gate ret = 0;
30767c478bd9Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER);
30777c478bd9Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) {
30787c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) {
30797c478bd9Sstevel@tonic-gate aret = (*mem_callbacks[i].vec->pre_del)
30807c478bd9Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages);
30817c478bd9Sstevel@tonic-gate ret |= aret;
30827c478bd9Sstevel@tonic-gate }
30837c478bd9Sstevel@tonic-gate }
30847c478bd9Sstevel@tonic-gate
30857c478bd9Sstevel@tonic-gate return (ret);
30867c478bd9Sstevel@tonic-gate }
30877c478bd9Sstevel@tonic-gate
30887c478bd9Sstevel@tonic-gate static void
kphysm_setup_post_del(pgcnt_t delta_pages,int cancelled)30897c478bd9Sstevel@tonic-gate kphysm_setup_post_del(pgcnt_t delta_pages, int cancelled)
30907c478bd9Sstevel@tonic-gate {
30917c478bd9Sstevel@tonic-gate uint_t i;
30927c478bd9Sstevel@tonic-gate
30937c478bd9Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) {
30947c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) {
30957c478bd9Sstevel@tonic-gate (*mem_callbacks[i].vec->post_del)
30967c478bd9Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages, cancelled);
30977c478bd9Sstevel@tonic-gate }
30987c478bd9Sstevel@tonic-gate }
30997c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock);
31007c478bd9Sstevel@tonic-gate }
31017c478bd9Sstevel@tonic-gate
31027c478bd9Sstevel@tonic-gate static int
kphysm_split_memseg(pfn_t base,pgcnt_t npgs)31037c478bd9Sstevel@tonic-gate kphysm_split_memseg(
31047c478bd9Sstevel@tonic-gate pfn_t base,
31057c478bd9Sstevel@tonic-gate pgcnt_t npgs)
31067c478bd9Sstevel@tonic-gate {
31077c478bd9Sstevel@tonic-gate struct memseg *seg;
31087c478bd9Sstevel@tonic-gate struct memseg **segpp;
31097c478bd9Sstevel@tonic-gate pgcnt_t size_low, size_high;
31107c478bd9Sstevel@tonic-gate struct memseg *seg_low, *seg_mid, *seg_high;
31117c478bd9Sstevel@tonic-gate
31127c478bd9Sstevel@tonic-gate /*
31137c478bd9Sstevel@tonic-gate * Lock the memsegs list against other updates now
31147c478bd9Sstevel@tonic-gate */
31157c478bd9Sstevel@tonic-gate memsegs_lock(1);
31167c478bd9Sstevel@tonic-gate
31177c478bd9Sstevel@tonic-gate /*
31187c478bd9Sstevel@tonic-gate * Find boot time memseg that wholly covers this area.
31197c478bd9Sstevel@tonic-gate */
31207c478bd9Sstevel@tonic-gate
31217c478bd9Sstevel@tonic-gate /* First find the memseg with page 'base' in it. */
31227c478bd9Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL;
31237c478bd9Sstevel@tonic-gate segpp = &((*segpp)->next)) {
31247c478bd9Sstevel@tonic-gate if (base >= seg->pages_base && base < seg->pages_end)
31257c478bd9Sstevel@tonic-gate break;
31267c478bd9Sstevel@tonic-gate }
31277c478bd9Sstevel@tonic-gate if (seg == NULL) {
31287c478bd9Sstevel@tonic-gate memsegs_unlock(1);
31297c478bd9Sstevel@tonic-gate return (0);
31307c478bd9Sstevel@tonic-gate }
31319853d9e8SJason Beloro if (memseg_includes_meta(seg)) {
31327c478bd9Sstevel@tonic-gate memsegs_unlock(1);
31337c478bd9Sstevel@tonic-gate return (0);
31347c478bd9Sstevel@tonic-gate }
31357c478bd9Sstevel@tonic-gate if ((base + npgs) > seg->pages_end) {
31367c478bd9Sstevel@tonic-gate memsegs_unlock(1);
31377c478bd9Sstevel@tonic-gate return (0);
31387c478bd9Sstevel@tonic-gate }
31397c478bd9Sstevel@tonic-gate
31407c478bd9Sstevel@tonic-gate /*
31417c478bd9Sstevel@tonic-gate * Work out the size of the two segments that will
31427c478bd9Sstevel@tonic-gate * surround the new segment, one for low address
31437c478bd9Sstevel@tonic-gate * and one for high.
31447c478bd9Sstevel@tonic-gate */
31457c478bd9Sstevel@tonic-gate ASSERT(base >= seg->pages_base);
31467c478bd9Sstevel@tonic-gate size_low = base - seg->pages_base;
31477c478bd9Sstevel@tonic-gate ASSERT(seg->pages_end >= (base + npgs));
31487c478bd9Sstevel@tonic-gate size_high = seg->pages_end - (base + npgs);
31497c478bd9Sstevel@tonic-gate
31507c478bd9Sstevel@tonic-gate /*
31517c478bd9Sstevel@tonic-gate * Sanity check.
31527c478bd9Sstevel@tonic-gate */
31537c478bd9Sstevel@tonic-gate if ((size_low + size_high) == 0) {
31547c478bd9Sstevel@tonic-gate memsegs_unlock(1);
31557c478bd9Sstevel@tonic-gate return (0);
31567c478bd9Sstevel@tonic-gate }
31577c478bd9Sstevel@tonic-gate
31587c478bd9Sstevel@tonic-gate /*
31597c478bd9Sstevel@tonic-gate * Allocate the new structures. The old memseg will not be freed
31607c478bd9Sstevel@tonic-gate * as there may be a reference to it.
31617c478bd9Sstevel@tonic-gate */
31627c478bd9Sstevel@tonic-gate seg_low = NULL;
31637c478bd9Sstevel@tonic-gate seg_high = NULL;
31647c478bd9Sstevel@tonic-gate
31659853d9e8SJason Beloro if (size_low != 0)
31669853d9e8SJason Beloro seg_low = memseg_alloc();
31677c478bd9Sstevel@tonic-gate
31689853d9e8SJason Beloro seg_mid = memseg_alloc();
31697c478bd9Sstevel@tonic-gate
31709853d9e8SJason Beloro if (size_high != 0)
31719853d9e8SJason Beloro seg_high = memseg_alloc();
31727c478bd9Sstevel@tonic-gate
31737c478bd9Sstevel@tonic-gate /*
31747c478bd9Sstevel@tonic-gate * All allocation done now.
31757c478bd9Sstevel@tonic-gate */
31767c478bd9Sstevel@tonic-gate if (size_low != 0) {
31777c478bd9Sstevel@tonic-gate seg_low->pages = seg->pages;
31787c478bd9Sstevel@tonic-gate seg_low->epages = seg_low->pages + size_low;
31797c478bd9Sstevel@tonic-gate seg_low->pages_base = seg->pages_base;
31807c478bd9Sstevel@tonic-gate seg_low->pages_end = seg_low->pages_base + size_low;
31817c478bd9Sstevel@tonic-gate seg_low->next = seg_mid;
31829853d9e8SJason Beloro seg_low->msegflags = seg->msegflags;
31837c478bd9Sstevel@tonic-gate }
31847c478bd9Sstevel@tonic-gate if (size_high != 0) {
31857c478bd9Sstevel@tonic-gate seg_high->pages = seg->epages - size_high;
31867c478bd9Sstevel@tonic-gate seg_high->epages = seg_high->pages + size_high;
31877c478bd9Sstevel@tonic-gate seg_high->pages_base = seg->pages_end - size_high;
31887c478bd9Sstevel@tonic-gate seg_high->pages_end = seg_high->pages_base + size_high;
31897c478bd9Sstevel@tonic-gate seg_high->next = seg->next;
31909853d9e8SJason Beloro seg_high->msegflags = seg->msegflags;
31917c478bd9Sstevel@tonic-gate }
31927c478bd9Sstevel@tonic-gate
31937c478bd9Sstevel@tonic-gate seg_mid->pages = seg->pages + size_low;
31947c478bd9Sstevel@tonic-gate seg_mid->pages_base = seg->pages_base + size_low;
31957c478bd9Sstevel@tonic-gate seg_mid->epages = seg->epages - size_high;
31967c478bd9Sstevel@tonic-gate seg_mid->pages_end = seg->pages_end - size_high;
31977c478bd9Sstevel@tonic-gate seg_mid->next = (seg_high != NULL) ? seg_high : seg->next;
31989853d9e8SJason Beloro seg_mid->msegflags = seg->msegflags;
31997c478bd9Sstevel@tonic-gate
32007c478bd9Sstevel@tonic-gate /*
32017c478bd9Sstevel@tonic-gate * Update hat_kpm specific info of all involved memsegs and
32027c478bd9Sstevel@tonic-gate * allow hat_kpm specific global chain updates.
32037c478bd9Sstevel@tonic-gate */
32047c478bd9Sstevel@tonic-gate hat_kpm_split_mseg_update(seg, segpp, seg_low, seg_mid, seg_high);
32057c478bd9Sstevel@tonic-gate
32067c478bd9Sstevel@tonic-gate /*
32077c478bd9Sstevel@tonic-gate * At this point we have two equivalent memseg sub-chains,
32087c478bd9Sstevel@tonic-gate * seg and seg_low/seg_mid/seg_high, which both chain on to
32097c478bd9Sstevel@tonic-gate * the same place in the global chain. By re-writing the pointer
32107c478bd9Sstevel@tonic-gate * in the previous element we switch atomically from using the old
32117c478bd9Sstevel@tonic-gate * (seg) to the new.
32127c478bd9Sstevel@tonic-gate */
32137c478bd9Sstevel@tonic-gate *segpp = (seg_low != NULL) ? seg_low : seg_mid;
32147c478bd9Sstevel@tonic-gate
32157c478bd9Sstevel@tonic-gate membar_enter();
32167c478bd9Sstevel@tonic-gate
32177c478bd9Sstevel@tonic-gate build_pfn_hash();
32187c478bd9Sstevel@tonic-gate memsegs_unlock(1);
32197c478bd9Sstevel@tonic-gate
32207c478bd9Sstevel@tonic-gate /*
32217c478bd9Sstevel@tonic-gate * We leave the old segment, 'seg', intact as there may be
32227c478bd9Sstevel@tonic-gate * references to it. Also, as the value of total_pages has not
32237c478bd9Sstevel@tonic-gate * changed and the memsegs list is effectively the same when
32247c478bd9Sstevel@tonic-gate * accessed via the old or the new pointer, we do not have to
32257c478bd9Sstevel@tonic-gate * cause pageout_scanner() to re-evaluate its hand pointers.
32267c478bd9Sstevel@tonic-gate *
32277c478bd9Sstevel@tonic-gate * We currently do not re-use or reclaim the page_t memory.
32287c478bd9Sstevel@tonic-gate * If we do, then this may have to change.
32297c478bd9Sstevel@tonic-gate */
32307c478bd9Sstevel@tonic-gate
32317c478bd9Sstevel@tonic-gate mutex_enter(&memseg_lists_lock);
32327c478bd9Sstevel@tonic-gate seg->lnext = memseg_edit_junk;
32337c478bd9Sstevel@tonic-gate memseg_edit_junk = seg;
32347c478bd9Sstevel@tonic-gate mutex_exit(&memseg_lists_lock);
32357c478bd9Sstevel@tonic-gate
32367c478bd9Sstevel@tonic-gate return (1);
32377c478bd9Sstevel@tonic-gate }
32387c478bd9Sstevel@tonic-gate
32397c478bd9Sstevel@tonic-gate /*
32407c478bd9Sstevel@tonic-gate * The sfmmu hat layer (e.g.) accesses some parts of the memseg
32417c478bd9Sstevel@tonic-gate * structure using physical addresses. Therefore a kmem_cache is
32427c478bd9Sstevel@tonic-gate * used with KMC_NOHASH to avoid page crossings within a memseg
32437c478bd9Sstevel@tonic-gate * structure. KMC_NOHASH requires that no external (outside of
32447c478bd9Sstevel@tonic-gate * slab) information is allowed. This, in turn, implies that the
32457c478bd9Sstevel@tonic-gate * cache's slabsize must be exactly a single page, since per-slab
32467c478bd9Sstevel@tonic-gate * information (e.g. the freelist for the slab) is kept at the
32477c478bd9Sstevel@tonic-gate * end of the slab, where it is easy to locate. Should be changed
32487c478bd9Sstevel@tonic-gate * when a more obvious kmem_cache interface/flag will become
32497c478bd9Sstevel@tonic-gate * available.
32507c478bd9Sstevel@tonic-gate */
32517c478bd9Sstevel@tonic-gate void
mem_config_init()32527c478bd9Sstevel@tonic-gate mem_config_init()
32537c478bd9Sstevel@tonic-gate {
32547c478bd9Sstevel@tonic-gate memseg_cache = kmem_cache_create("memseg_cache", sizeof (struct memseg),
32557c478bd9Sstevel@tonic-gate 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
32567c478bd9Sstevel@tonic-gate }
32579853d9e8SJason Beloro
32589853d9e8SJason Beloro struct memseg *
memseg_alloc()32599853d9e8SJason Beloro memseg_alloc()
32609853d9e8SJason Beloro {
32619853d9e8SJason Beloro struct memseg *seg;
32629853d9e8SJason Beloro
32639853d9e8SJason Beloro seg = kmem_cache_alloc(memseg_cache, KM_SLEEP);
32649853d9e8SJason Beloro bzero(seg, sizeof (struct memseg));
32659853d9e8SJason Beloro
32669853d9e8SJason Beloro return (seg);
32679853d9e8SJason Beloro }
32689853d9e8SJason Beloro
32699853d9e8SJason Beloro /*
32709853d9e8SJason Beloro * Return whether the page_t memory for this memseg
32719853d9e8SJason Beloro * is included in the memseg itself.
32729853d9e8SJason Beloro */
32739853d9e8SJason Beloro static int
memseg_includes_meta(struct memseg * seg)32749853d9e8SJason Beloro memseg_includes_meta(struct memseg *seg)
32759853d9e8SJason Beloro {
32769853d9e8SJason Beloro return (seg->msegflags & MEMSEG_META_INCL);
32779853d9e8SJason Beloro }
32789853d9e8SJason Beloro
32799853d9e8SJason Beloro pfn_t
memseg_get_start(struct memseg * seg)32809853d9e8SJason Beloro memseg_get_start(struct memseg *seg)
32819853d9e8SJason Beloro {
32829853d9e8SJason Beloro pfn_t pt_start;
32839853d9e8SJason Beloro
32849853d9e8SJason Beloro if (memseg_includes_meta(seg)) {
32859853d9e8SJason Beloro pt_start = hat_getpfnum(kas.a_hat, (caddr_t)seg->pages);
32869853d9e8SJason Beloro
32879853d9e8SJason Beloro /* Meta data is required to be at the beginning */
32889853d9e8SJason Beloro ASSERT(pt_start < seg->pages_base);
32899853d9e8SJason Beloro } else
32909853d9e8SJason Beloro pt_start = seg->pages_base;
32919853d9e8SJason Beloro
32929853d9e8SJason Beloro return (pt_start);
32939853d9e8SJason Beloro }
32949853d9e8SJason Beloro
32959853d9e8SJason Beloro /*
32969853d9e8SJason Beloro * Invalidate memseg pointers in cpu private vm data caches.
32979853d9e8SJason Beloro */
32989853d9e8SJason Beloro static void
memseg_cpu_vm_flush()32999853d9e8SJason Beloro memseg_cpu_vm_flush()
33009853d9e8SJason Beloro {
33019853d9e8SJason Beloro cpu_t *cp;
33029853d9e8SJason Beloro vm_cpu_data_t *vc;
33039853d9e8SJason Beloro
33049853d9e8SJason Beloro mutex_enter(&cpu_lock);
3305*0ed5c46eSJosef 'Jeff' Sipek pause_cpus(NULL, NULL);
33069853d9e8SJason Beloro
33079853d9e8SJason Beloro cp = cpu_list;
33089853d9e8SJason Beloro do {
33099853d9e8SJason Beloro vc = cp->cpu_vm_data;
33109853d9e8SJason Beloro vc->vc_pnum_memseg = NULL;
33119853d9e8SJason Beloro vc->vc_pnext_memseg = NULL;
33129853d9e8SJason Beloro
33139853d9e8SJason Beloro } while ((cp = cp->cpu_next) != cpu_list);
33149853d9e8SJason Beloro
33159853d9e8SJason Beloro start_cpus();
33169853d9e8SJason Beloro mutex_exit(&cpu_lock);
33179853d9e8SJason Beloro }
3318