xref: /titanic_41/usr/src/uts/common/vm/seg_map.c (revision 93fb2a5ff9019dc98ff5e9836d0c2c7b5c5ecd7f)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5a5652762Spraks  * Common Development and Distribution License (the "License").
6a5652762Spraks  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22183971baSPrakash Sangappa  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
277c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * Portions of this source code were derived from Berkeley 4.3 BSD
317c478bd9Sstevel@tonic-gate  * under license from the Regents of the University of California.
327c478bd9Sstevel@tonic-gate  */
337c478bd9Sstevel@tonic-gate 
347c478bd9Sstevel@tonic-gate /*
357c478bd9Sstevel@tonic-gate  * VM - generic vnode mapping segment.
367c478bd9Sstevel@tonic-gate  *
377c478bd9Sstevel@tonic-gate  * The segmap driver is used only by the kernel to get faster (than seg_vn)
387c478bd9Sstevel@tonic-gate  * mappings [lower routine overhead; more persistent cache] to random
397c478bd9Sstevel@tonic-gate  * vnode/offsets.  Note than the kernel may (and does) use seg_vn as well.
407c478bd9Sstevel@tonic-gate  */
417c478bd9Sstevel@tonic-gate 
427c478bd9Sstevel@tonic-gate #include <sys/types.h>
437c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
447c478bd9Sstevel@tonic-gate #include <sys/param.h>
457c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
467c478bd9Sstevel@tonic-gate #include <sys/buf.h>
477c478bd9Sstevel@tonic-gate #include <sys/systm.h>
487c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
497c478bd9Sstevel@tonic-gate #include <sys/mman.h>
507c478bd9Sstevel@tonic-gate #include <sys/errno.h>
517c478bd9Sstevel@tonic-gate #include <sys/cred.h>
527c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
537c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
547c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
557c478bd9Sstevel@tonic-gate #include <sys/debug.h>
567c478bd9Sstevel@tonic-gate #include <sys/thread.h>
577c478bd9Sstevel@tonic-gate #include <sys/dumphdr.h>
587c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
597c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
607c478bd9Sstevel@tonic-gate 
617c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
627c478bd9Sstevel@tonic-gate #include <vm/hat.h>
637c478bd9Sstevel@tonic-gate #include <vm/as.h>
647c478bd9Sstevel@tonic-gate #include <vm/seg.h>
657c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h>
667c478bd9Sstevel@tonic-gate #include <vm/seg_map.h>
677c478bd9Sstevel@tonic-gate #include <vm/page.h>
687c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
697c478bd9Sstevel@tonic-gate #include <vm/rm.h>
707c478bd9Sstevel@tonic-gate 
717c478bd9Sstevel@tonic-gate /*
727c478bd9Sstevel@tonic-gate  * Private seg op routines.
737c478bd9Sstevel@tonic-gate  */
747c478bd9Sstevel@tonic-gate static void	segmap_free(struct seg *seg);
757c478bd9Sstevel@tonic-gate faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr,
767c478bd9Sstevel@tonic-gate 			size_t len, enum fault_type type, enum seg_rw rw);
777c478bd9Sstevel@tonic-gate static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr);
787c478bd9Sstevel@tonic-gate static int	segmap_checkprot(struct seg *seg, caddr_t addr, size_t len,
797c478bd9Sstevel@tonic-gate 			uint_t prot);
807c478bd9Sstevel@tonic-gate static int	segmap_kluster(struct seg *seg, caddr_t addr, ssize_t);
817c478bd9Sstevel@tonic-gate static int	segmap_getprot(struct seg *seg, caddr_t addr, size_t len,
827c478bd9Sstevel@tonic-gate 			uint_t *protv);
837c478bd9Sstevel@tonic-gate static u_offset_t	segmap_getoffset(struct seg *seg, caddr_t addr);
847c478bd9Sstevel@tonic-gate static int	segmap_gettype(struct seg *seg, caddr_t addr);
857c478bd9Sstevel@tonic-gate static int	segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
867c478bd9Sstevel@tonic-gate static void	segmap_dump(struct seg *seg);
877c478bd9Sstevel@tonic-gate static int	segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
887c478bd9Sstevel@tonic-gate 			struct page ***ppp, enum lock_type type,
897c478bd9Sstevel@tonic-gate 			enum seg_rw rw);
907c478bd9Sstevel@tonic-gate static void	segmap_badop(void);
917c478bd9Sstevel@tonic-gate static int	segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
927c478bd9Sstevel@tonic-gate static lgrp_mem_policy_info_t	*segmap_getpolicy(struct seg *seg,
937c478bd9Sstevel@tonic-gate     caddr_t addr);
941bd5c35fSelowe static int	segmap_capable(struct seg *seg, segcapability_t capability);
957c478bd9Sstevel@tonic-gate 
967c478bd9Sstevel@tonic-gate /* segkpm support */
977c478bd9Sstevel@tonic-gate static caddr_t	segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t,
987c478bd9Sstevel@tonic-gate 			struct smap *, enum seg_rw);
997c478bd9Sstevel@tonic-gate struct smap	*get_smap_kpm(caddr_t, page_t **);
1007c478bd9Sstevel@tonic-gate 
1017c478bd9Sstevel@tonic-gate #define	SEGMAP_BADOP(t)	(t(*)())segmap_badop
1027c478bd9Sstevel@tonic-gate 
1037c478bd9Sstevel@tonic-gate static struct seg_ops segmap_ops = {
1047c478bd9Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* dup */
1057c478bd9Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* unmap */
1067c478bd9Sstevel@tonic-gate 	segmap_free,
1077c478bd9Sstevel@tonic-gate 	segmap_fault,
1087c478bd9Sstevel@tonic-gate 	segmap_faulta,
1097c478bd9Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* setprot */
1107c478bd9Sstevel@tonic-gate 	segmap_checkprot,
1117c478bd9Sstevel@tonic-gate 	segmap_kluster,
1127c478bd9Sstevel@tonic-gate 	SEGMAP_BADOP(size_t),	/* swapout */
1137c478bd9Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* sync */
1147c478bd9Sstevel@tonic-gate 	SEGMAP_BADOP(size_t),	/* incore */
1157c478bd9Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* lockop */
1167c478bd9Sstevel@tonic-gate 	segmap_getprot,
1177c478bd9Sstevel@tonic-gate 	segmap_getoffset,
1187c478bd9Sstevel@tonic-gate 	segmap_gettype,
1197c478bd9Sstevel@tonic-gate 	segmap_getvp,
1207c478bd9Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* advise */
1217c478bd9Sstevel@tonic-gate 	segmap_dump,
1227c478bd9Sstevel@tonic-gate 	segmap_pagelock,	/* pagelock */
1237c478bd9Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* setpgsz */
1247c478bd9Sstevel@tonic-gate 	segmap_getmemid,	/* getmemid */
1257c478bd9Sstevel@tonic-gate 	segmap_getpolicy,	/* getpolicy */
1261bd5c35fSelowe 	segmap_capable,		/* capable */
12728f17cb2SRobert Mustacchi 	seg_inherit_notsup	/* inherit */
1287c478bd9Sstevel@tonic-gate };
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate /*
1317c478bd9Sstevel@tonic-gate  * Private segmap routines.
1327c478bd9Sstevel@tonic-gate  */
1337c478bd9Sstevel@tonic-gate static void	segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr,
1347c478bd9Sstevel@tonic-gate 			size_t len, enum seg_rw rw, struct smap *smp);
1357c478bd9Sstevel@tonic-gate static void	segmap_smapadd(struct smap *smp);
1367c478bd9Sstevel@tonic-gate static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp,
1377c478bd9Sstevel@tonic-gate 			u_offset_t off, int hashid);
1387c478bd9Sstevel@tonic-gate static void	segmap_hashout(struct smap *smp);
1397c478bd9Sstevel@tonic-gate 
1407c478bd9Sstevel@tonic-gate 
1417c478bd9Sstevel@tonic-gate /*
1427c478bd9Sstevel@tonic-gate  * Statistics for segmap operations.
1437c478bd9Sstevel@tonic-gate  *
1447c478bd9Sstevel@tonic-gate  * No explicit locking to protect these stats.
1457c478bd9Sstevel@tonic-gate  */
1467c478bd9Sstevel@tonic-gate struct segmapcnt segmapcnt = {
1477c478bd9Sstevel@tonic-gate 	{ "fault",		KSTAT_DATA_ULONG },
1487c478bd9Sstevel@tonic-gate 	{ "faulta",		KSTAT_DATA_ULONG },
1497c478bd9Sstevel@tonic-gate 	{ "getmap",		KSTAT_DATA_ULONG },
1507c478bd9Sstevel@tonic-gate 	{ "get_use",		KSTAT_DATA_ULONG },
1517c478bd9Sstevel@tonic-gate 	{ "get_reclaim",	KSTAT_DATA_ULONG },
1527c478bd9Sstevel@tonic-gate 	{ "get_reuse",		KSTAT_DATA_ULONG },
1537c478bd9Sstevel@tonic-gate 	{ "get_unused",		KSTAT_DATA_ULONG },
1547c478bd9Sstevel@tonic-gate 	{ "get_nofree",		KSTAT_DATA_ULONG },
1557c478bd9Sstevel@tonic-gate 	{ "rel_async",		KSTAT_DATA_ULONG },
1567c478bd9Sstevel@tonic-gate 	{ "rel_write",		KSTAT_DATA_ULONG },
1577c478bd9Sstevel@tonic-gate 	{ "rel_free",		KSTAT_DATA_ULONG },
1587c478bd9Sstevel@tonic-gate 	{ "rel_abort",		KSTAT_DATA_ULONG },
1597c478bd9Sstevel@tonic-gate 	{ "rel_dontneed",	KSTAT_DATA_ULONG },
1607c478bd9Sstevel@tonic-gate 	{ "release",		KSTAT_DATA_ULONG },
1617c478bd9Sstevel@tonic-gate 	{ "pagecreate",		KSTAT_DATA_ULONG },
1627c478bd9Sstevel@tonic-gate 	{ "free_notfree",	KSTAT_DATA_ULONG },
1637c478bd9Sstevel@tonic-gate 	{ "free_dirty",		KSTAT_DATA_ULONG },
1647c478bd9Sstevel@tonic-gate 	{ "free",		KSTAT_DATA_ULONG },
1657c478bd9Sstevel@tonic-gate 	{ "stolen",		KSTAT_DATA_ULONG },
1667c478bd9Sstevel@tonic-gate 	{ "get_nomtx",		KSTAT_DATA_ULONG }
1677c478bd9Sstevel@tonic-gate };
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt;
1707c478bd9Sstevel@tonic-gate uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t);
1717c478bd9Sstevel@tonic-gate 
1727c478bd9Sstevel@tonic-gate /*
1737c478bd9Sstevel@tonic-gate  * Return number of map pages in segment.
1747c478bd9Sstevel@tonic-gate  */
1757c478bd9Sstevel@tonic-gate #define	MAP_PAGES(seg)		((seg)->s_size >> MAXBSHIFT)
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate /*
1787c478bd9Sstevel@tonic-gate  * Translate addr into smap number within segment.
1797c478bd9Sstevel@tonic-gate  */
1807c478bd9Sstevel@tonic-gate #define	MAP_PAGE(seg, addr)  (((addr) - (seg)->s_base) >> MAXBSHIFT)
1817c478bd9Sstevel@tonic-gate 
1827c478bd9Sstevel@tonic-gate /*
1837c478bd9Sstevel@tonic-gate  * Translate addr in seg into struct smap pointer.
1847c478bd9Sstevel@tonic-gate  */
1857c478bd9Sstevel@tonic-gate #define	GET_SMAP(seg, addr)	\
1867c478bd9Sstevel@tonic-gate 	&(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate /*
1897c478bd9Sstevel@tonic-gate  * Bit in map (16 bit bitmap).
1907c478bd9Sstevel@tonic-gate  */
1917c478bd9Sstevel@tonic-gate #define	SMAP_BIT_MASK(bitindex)	(1 << ((bitindex) & 0xf))
1927c478bd9Sstevel@tonic-gate 
1937c478bd9Sstevel@tonic-gate static int smd_colormsk = 0;
1947c478bd9Sstevel@tonic-gate static int smd_ncolor = 0;
1957c478bd9Sstevel@tonic-gate static int smd_nfree = 0;
1967c478bd9Sstevel@tonic-gate static int smd_freemsk = 0;
1977c478bd9Sstevel@tonic-gate #ifdef DEBUG
1987c478bd9Sstevel@tonic-gate static int *colors_used;
1997c478bd9Sstevel@tonic-gate #endif
2007c478bd9Sstevel@tonic-gate static struct smap *smd_smap;
2017c478bd9Sstevel@tonic-gate static struct smaphash *smd_hash;
2027c478bd9Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS
2037c478bd9Sstevel@tonic-gate static unsigned int *smd_hash_len;
2047c478bd9Sstevel@tonic-gate #endif
2057c478bd9Sstevel@tonic-gate static struct smfree *smd_free;
2067c478bd9Sstevel@tonic-gate static ulong_t smd_hashmsk = 0;
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate #define	SEGMAP_MAXCOLOR		2
2097c478bd9Sstevel@tonic-gate #define	SEGMAP_CACHE_PAD	64
2107c478bd9Sstevel@tonic-gate 
2117c478bd9Sstevel@tonic-gate union segmap_cpu {
2127c478bd9Sstevel@tonic-gate 	struct {
2137c478bd9Sstevel@tonic-gate 		uint32_t	scpu_free_ndx[SEGMAP_MAXCOLOR];
2147c478bd9Sstevel@tonic-gate 		struct smap	*scpu_last_smap;
2157c478bd9Sstevel@tonic-gate 		ulong_t		scpu_getmap;
2167c478bd9Sstevel@tonic-gate 		ulong_t		scpu_release;
2177c478bd9Sstevel@tonic-gate 		ulong_t		scpu_get_reclaim;
2187c478bd9Sstevel@tonic-gate 		ulong_t		scpu_fault;
2197c478bd9Sstevel@tonic-gate 		ulong_t		scpu_pagecreate;
2207c478bd9Sstevel@tonic-gate 		ulong_t		scpu_get_reuse;
2217c478bd9Sstevel@tonic-gate 	} scpu;
2227c478bd9Sstevel@tonic-gate 	char	scpu_pad[SEGMAP_CACHE_PAD];
2237c478bd9Sstevel@tonic-gate };
2247c478bd9Sstevel@tonic-gate static union segmap_cpu *smd_cpu;
2257c478bd9Sstevel@tonic-gate 
2267c478bd9Sstevel@tonic-gate /*
2277c478bd9Sstevel@tonic-gate  * There are three locks in seg_map:
2287c478bd9Sstevel@tonic-gate  *	- per freelist mutexes
2297c478bd9Sstevel@tonic-gate  *	- per hashchain mutexes
2307c478bd9Sstevel@tonic-gate  *	- per smap mutexes
2317c478bd9Sstevel@tonic-gate  *
2327c478bd9Sstevel@tonic-gate  * The lock ordering is to get the smap mutex to lock down the slot
2337c478bd9Sstevel@tonic-gate  * first then the hash lock (for hash in/out (vp, off) list) or the
2347c478bd9Sstevel@tonic-gate  * freelist lock to put the slot back on the free list.
2357c478bd9Sstevel@tonic-gate  *
2367c478bd9Sstevel@tonic-gate  * The hash search is done by only holding the hashchain lock, when a wanted
2377c478bd9Sstevel@tonic-gate  * slot is found, we drop the hashchain lock then lock the slot so there
2387c478bd9Sstevel@tonic-gate  * is no overlapping of hashchain and smap locks. After the slot is
2397c478bd9Sstevel@tonic-gate  * locked, we verify again if the slot is still what we are looking
2407c478bd9Sstevel@tonic-gate  * for.
2417c478bd9Sstevel@tonic-gate  *
2427c478bd9Sstevel@tonic-gate  * Allocation of a free slot is done by holding the freelist lock,
2437c478bd9Sstevel@tonic-gate  * then locking the smap slot at the head of the freelist. This is
2447c478bd9Sstevel@tonic-gate  * in reversed lock order so mutex_tryenter() is used.
2457c478bd9Sstevel@tonic-gate  *
2467c478bd9Sstevel@tonic-gate  * The smap lock protects all fields in smap structure except for
2477c478bd9Sstevel@tonic-gate  * the link fields for hash/free lists which are protected by
2487c478bd9Sstevel@tonic-gate  * hashchain and freelist locks.
2497c478bd9Sstevel@tonic-gate  */
2507c478bd9Sstevel@tonic-gate 
2517c478bd9Sstevel@tonic-gate #define	SHASHMTX(hashid)	(&smd_hash[hashid].sh_mtx)
2527c478bd9Sstevel@tonic-gate 
2537c478bd9Sstevel@tonic-gate #define	SMP2SMF(smp)		(&smd_free[(smp - smd_smap) & smd_freemsk])
2547c478bd9Sstevel@tonic-gate #define	SMP2SMF_NDX(smp)	(ushort_t)((smp - smd_smap) & smd_freemsk)
2557c478bd9Sstevel@tonic-gate 
2567c478bd9Sstevel@tonic-gate #define	SMAPMTX(smp) (&smp->sm_mtx)
2577c478bd9Sstevel@tonic-gate 
2587c478bd9Sstevel@tonic-gate #define	SMAP_HASHFUNC(vp, off, hashid) \
2597c478bd9Sstevel@tonic-gate 	{ \
2607c478bd9Sstevel@tonic-gate 	hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
2617c478bd9Sstevel@tonic-gate 		((off) >> MAXBSHIFT)) & smd_hashmsk); \
2627c478bd9Sstevel@tonic-gate 	}
2637c478bd9Sstevel@tonic-gate 
2647c478bd9Sstevel@tonic-gate /*
2657c478bd9Sstevel@tonic-gate  * The most frequently updated kstat counters are kept in the
2667c478bd9Sstevel@tonic-gate  * per cpu array to avoid hot cache blocks. The update function
2677c478bd9Sstevel@tonic-gate  * sums the cpu local counters to update the global counters.
2687c478bd9Sstevel@tonic-gate  */
2697c478bd9Sstevel@tonic-gate 
2707c478bd9Sstevel@tonic-gate /* ARGSUSED */
2717c478bd9Sstevel@tonic-gate int
segmap_kstat_update(kstat_t * ksp,int rw)2727c478bd9Sstevel@tonic-gate segmap_kstat_update(kstat_t *ksp, int rw)
2737c478bd9Sstevel@tonic-gate {
2747c478bd9Sstevel@tonic-gate 	int i;
2757c478bd9Sstevel@tonic-gate 	ulong_t	getmap, release, get_reclaim;
2767c478bd9Sstevel@tonic-gate 	ulong_t	fault, pagecreate, get_reuse;
2777c478bd9Sstevel@tonic-gate 
2787c478bd9Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
2797c478bd9Sstevel@tonic-gate 		return (EACCES);
2807c478bd9Sstevel@tonic-gate 	getmap = release = get_reclaim = (ulong_t)0;
2817c478bd9Sstevel@tonic-gate 	fault = pagecreate = get_reuse = (ulong_t)0;
2827c478bd9Sstevel@tonic-gate 	for (i = 0; i < max_ncpus; i++) {
2837c478bd9Sstevel@tonic-gate 		getmap += smd_cpu[i].scpu.scpu_getmap;
2847c478bd9Sstevel@tonic-gate 		release  += smd_cpu[i].scpu.scpu_release;
2857c478bd9Sstevel@tonic-gate 		get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim;
2867c478bd9Sstevel@tonic-gate 		fault  += smd_cpu[i].scpu.scpu_fault;
2877c478bd9Sstevel@tonic-gate 		pagecreate  += smd_cpu[i].scpu.scpu_pagecreate;
2887c478bd9Sstevel@tonic-gate 		get_reuse += smd_cpu[i].scpu.scpu_get_reuse;
2897c478bd9Sstevel@tonic-gate 	}
2907c478bd9Sstevel@tonic-gate 	segmapcnt.smp_getmap.value.ul = getmap;
2917c478bd9Sstevel@tonic-gate 	segmapcnt.smp_release.value.ul = release;
2927c478bd9Sstevel@tonic-gate 	segmapcnt.smp_get_reclaim.value.ul = get_reclaim;
2937c478bd9Sstevel@tonic-gate 	segmapcnt.smp_fault.value.ul = fault;
2947c478bd9Sstevel@tonic-gate 	segmapcnt.smp_pagecreate.value.ul = pagecreate;
2957c478bd9Sstevel@tonic-gate 	segmapcnt.smp_get_reuse.value.ul = get_reuse;
2967c478bd9Sstevel@tonic-gate 	return (0);
2977c478bd9Sstevel@tonic-gate }
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate int
segmap_create(struct seg * seg,void * argsp)3007c478bd9Sstevel@tonic-gate segmap_create(struct seg *seg, void *argsp)
3017c478bd9Sstevel@tonic-gate {
3027c478bd9Sstevel@tonic-gate 	struct segmap_data *smd;
3037c478bd9Sstevel@tonic-gate 	struct smap *smp;
3047c478bd9Sstevel@tonic-gate 	struct smfree *sm;
3057c478bd9Sstevel@tonic-gate 	struct segmap_crargs *a = (struct segmap_crargs *)argsp;
3067c478bd9Sstevel@tonic-gate 	struct smaphash *shashp;
3077c478bd9Sstevel@tonic-gate 	union segmap_cpu *scpu;
3087c478bd9Sstevel@tonic-gate 	long i, npages;
3097c478bd9Sstevel@tonic-gate 	size_t hashsz;
3107c478bd9Sstevel@tonic-gate 	uint_t nfreelist;
3117c478bd9Sstevel@tonic-gate 	extern void prefetch_smap_w(void *);
3127c478bd9Sstevel@tonic-gate 	extern int max_ncpus;
3137c478bd9Sstevel@tonic-gate 
3147c478bd9Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
3157c478bd9Sstevel@tonic-gate 
3167c478bd9Sstevel@tonic-gate 	if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) {
3177c478bd9Sstevel@tonic-gate 		panic("segkmap not MAXBSIZE aligned");
3187c478bd9Sstevel@tonic-gate 		/*NOTREACHED*/
3197c478bd9Sstevel@tonic-gate 	}
3207c478bd9Sstevel@tonic-gate 
3217c478bd9Sstevel@tonic-gate 	smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP);
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate 	seg->s_data = (void *)smd;
3247c478bd9Sstevel@tonic-gate 	seg->s_ops = &segmap_ops;
3257c478bd9Sstevel@tonic-gate 	smd->smd_prot = a->prot;
3267c478bd9Sstevel@tonic-gate 
3277c478bd9Sstevel@tonic-gate 	/*
3287c478bd9Sstevel@tonic-gate 	 * Scale the number of smap freelists to be
3297c478bd9Sstevel@tonic-gate 	 * proportional to max_ncpus * number of virtual colors.
3307c478bd9Sstevel@tonic-gate 	 * The caller can over-ride this scaling by providing
3317c478bd9Sstevel@tonic-gate 	 * a non-zero a->nfreelist argument.
3327c478bd9Sstevel@tonic-gate 	 */
3337c478bd9Sstevel@tonic-gate 	nfreelist = a->nfreelist;
3347c478bd9Sstevel@tonic-gate 	if (nfreelist == 0)
3357c478bd9Sstevel@tonic-gate 		nfreelist = max_ncpus;
3367c478bd9Sstevel@tonic-gate 	else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) {
3377c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "segmap_create: nfreelist out of range "
3387c478bd9Sstevel@tonic-gate 		"%d, using %d", nfreelist, max_ncpus);
3397c478bd9Sstevel@tonic-gate 		nfreelist = max_ncpus;
3407c478bd9Sstevel@tonic-gate 	}
34179a77829SJosef 'Jeff' Sipek 	if (!ISP2(nfreelist)) {
3427c478bd9Sstevel@tonic-gate 		/* round up nfreelist to the next power of two. */
3437c478bd9Sstevel@tonic-gate 		nfreelist = 1 << (highbit(nfreelist));
3447c478bd9Sstevel@tonic-gate 	}
3457c478bd9Sstevel@tonic-gate 
3467c478bd9Sstevel@tonic-gate 	/*
3477c478bd9Sstevel@tonic-gate 	 * Get the number of virtual colors - must be a power of 2.
3487c478bd9Sstevel@tonic-gate 	 */
3497c478bd9Sstevel@tonic-gate 	if (a->shmsize)
3507c478bd9Sstevel@tonic-gate 		smd_ncolor = a->shmsize >> MAXBSHIFT;
3517c478bd9Sstevel@tonic-gate 	else
3527c478bd9Sstevel@tonic-gate 		smd_ncolor = 1;
3537c478bd9Sstevel@tonic-gate 	ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0);
3547c478bd9Sstevel@tonic-gate 	ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR);
3557c478bd9Sstevel@tonic-gate 	smd_colormsk = smd_ncolor - 1;
3567c478bd9Sstevel@tonic-gate 	smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist;
3577c478bd9Sstevel@tonic-gate 	smd_freemsk = smd_nfree - 1;
3587c478bd9Sstevel@tonic-gate 
3597c478bd9Sstevel@tonic-gate 	/*
3607c478bd9Sstevel@tonic-gate 	 * Allocate and initialize the freelist headers.
3617c478bd9Sstevel@tonic-gate 	 * Note that sm_freeq[1] starts out as the release queue. This
3627c478bd9Sstevel@tonic-gate 	 * is known when the smap structures are initialized below.
3637c478bd9Sstevel@tonic-gate 	 */
3647c478bd9Sstevel@tonic-gate 	smd_free = smd->smd_free =
3657c478bd9Sstevel@tonic-gate 	    kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP);
3667c478bd9Sstevel@tonic-gate 	for (i = 0; i < smd_nfree; i++) {
3677c478bd9Sstevel@tonic-gate 		sm = &smd->smd_free[i];
3687c478bd9Sstevel@tonic-gate 		mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
3697c478bd9Sstevel@tonic-gate 		mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
3707c478bd9Sstevel@tonic-gate 		sm->sm_allocq = &sm->sm_freeq[0];
3717c478bd9Sstevel@tonic-gate 		sm->sm_releq = &sm->sm_freeq[1];
3727c478bd9Sstevel@tonic-gate 	}
3737c478bd9Sstevel@tonic-gate 
3747c478bd9Sstevel@tonic-gate 	/*
3757c478bd9Sstevel@tonic-gate 	 * Allocate and initialize the smap hash chain headers.
3767c478bd9Sstevel@tonic-gate 	 * Compute hash size rounding down to the next power of two.
3777c478bd9Sstevel@tonic-gate 	 */
3787c478bd9Sstevel@tonic-gate 	npages = MAP_PAGES(seg);
3797c478bd9Sstevel@tonic-gate 	smd->smd_npages = npages;
3807c478bd9Sstevel@tonic-gate 	hashsz = npages / SMAP_HASHAVELEN;
3817c478bd9Sstevel@tonic-gate 	hashsz = 1 << (highbit(hashsz)-1);
3827c478bd9Sstevel@tonic-gate 	smd_hashmsk = hashsz - 1;
3837c478bd9Sstevel@tonic-gate 	smd_hash = smd->smd_hash =
3847c478bd9Sstevel@tonic-gate 	    kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP);
3857c478bd9Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS
3867c478bd9Sstevel@tonic-gate 	smd_hash_len =
3877c478bd9Sstevel@tonic-gate 	    kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP);
3887c478bd9Sstevel@tonic-gate #endif
3897c478bd9Sstevel@tonic-gate 	for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) {
3907c478bd9Sstevel@tonic-gate 		shashp->sh_hash_list = NULL;
3917c478bd9Sstevel@tonic-gate 		mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL);
3927c478bd9Sstevel@tonic-gate 	}
3937c478bd9Sstevel@tonic-gate 
3947c478bd9Sstevel@tonic-gate 	/*
3957c478bd9Sstevel@tonic-gate 	 * Allocate and initialize the smap structures.
3967c478bd9Sstevel@tonic-gate 	 * Link all slots onto the appropriate freelist.
3977c478bd9Sstevel@tonic-gate 	 * The smap array is large enough to affect boot time
3987c478bd9Sstevel@tonic-gate 	 * on large systems, so use memory prefetching and only
3997c478bd9Sstevel@tonic-gate 	 * go through the array 1 time. Inline a optimized version
4007c478bd9Sstevel@tonic-gate 	 * of segmap_smapadd to add structures to freelists with
4017c478bd9Sstevel@tonic-gate 	 * knowledge that no locks are needed here.
4027c478bd9Sstevel@tonic-gate 	 */
4037c478bd9Sstevel@tonic-gate 	smd_smap = smd->smd_sm =
4047c478bd9Sstevel@tonic-gate 	    kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP);
4057c478bd9Sstevel@tonic-gate 
4067c478bd9Sstevel@tonic-gate 	for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1];
4077c478bd9Sstevel@tonic-gate 	    smp >= smd->smd_sm; smp--) {
4087c478bd9Sstevel@tonic-gate 		struct smap *smpfreelist;
4097c478bd9Sstevel@tonic-gate 		struct sm_freeq *releq;
4107c478bd9Sstevel@tonic-gate 
4117c478bd9Sstevel@tonic-gate 		prefetch_smap_w((char *)smp);
4127c478bd9Sstevel@tonic-gate 
4137c478bd9Sstevel@tonic-gate 		smp->sm_vp = NULL;
4147c478bd9Sstevel@tonic-gate 		smp->sm_hash = NULL;
4157c478bd9Sstevel@tonic-gate 		smp->sm_off = 0;
4167c478bd9Sstevel@tonic-gate 		smp->sm_bitmap = 0;
4177c478bd9Sstevel@tonic-gate 		smp->sm_refcnt = 0;
4187c478bd9Sstevel@tonic-gate 		mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL);
4197c478bd9Sstevel@tonic-gate 		smp->sm_free_ndx = SMP2SMF_NDX(smp);
4207c478bd9Sstevel@tonic-gate 
4217c478bd9Sstevel@tonic-gate 		sm = SMP2SMF(smp);
4227c478bd9Sstevel@tonic-gate 		releq = sm->sm_releq;
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate 		smpfreelist = releq->smq_free;
4257c478bd9Sstevel@tonic-gate 		if (smpfreelist == 0) {
4267c478bd9Sstevel@tonic-gate 			releq->smq_free = smp->sm_next = smp->sm_prev = smp;
4277c478bd9Sstevel@tonic-gate 		} else {
4287c478bd9Sstevel@tonic-gate 			smp->sm_next = smpfreelist;
4297c478bd9Sstevel@tonic-gate 			smp->sm_prev = smpfreelist->sm_prev;
4307c478bd9Sstevel@tonic-gate 			smpfreelist->sm_prev = smp;
4317c478bd9Sstevel@tonic-gate 			smp->sm_prev->sm_next = smp;
4327c478bd9Sstevel@tonic-gate 			releq->smq_free = smp->sm_next;
4337c478bd9Sstevel@tonic-gate 		}
4347c478bd9Sstevel@tonic-gate 
4357c478bd9Sstevel@tonic-gate 		/*
4367c478bd9Sstevel@tonic-gate 		 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
4377c478bd9Sstevel@tonic-gate 		 */
4387c478bd9Sstevel@tonic-gate 		smp->sm_flags = 0;
4397c478bd9Sstevel@tonic-gate 
4407c478bd9Sstevel@tonic-gate #ifdef	SEGKPM_SUPPORT
4417c478bd9Sstevel@tonic-gate 		/*
4427c478bd9Sstevel@tonic-gate 		 * Due to the fragile prefetch loop no
4437c478bd9Sstevel@tonic-gate 		 * separate function is used here.
4447c478bd9Sstevel@tonic-gate 		 */
4457c478bd9Sstevel@tonic-gate 		smp->sm_kpme_next = NULL;
4467c478bd9Sstevel@tonic-gate 		smp->sm_kpme_prev = NULL;
4477c478bd9Sstevel@tonic-gate 		smp->sm_kpme_page = NULL;
4487c478bd9Sstevel@tonic-gate #endif
4497c478bd9Sstevel@tonic-gate 	}
4507c478bd9Sstevel@tonic-gate 
4517c478bd9Sstevel@tonic-gate 	/*
4527c478bd9Sstevel@tonic-gate 	 * Allocate the per color indices that distribute allocation
4537c478bd9Sstevel@tonic-gate 	 * requests over the free lists. Each cpu will have a private
4547c478bd9Sstevel@tonic-gate 	 * rotor index to spread the allocations even across the available
4557c478bd9Sstevel@tonic-gate 	 * smap freelists. Init the scpu_last_smap field to the first
4567c478bd9Sstevel@tonic-gate 	 * smap element so there is no need to check for NULL.
4577c478bd9Sstevel@tonic-gate 	 */
4587c478bd9Sstevel@tonic-gate 	smd_cpu =
4597c478bd9Sstevel@tonic-gate 	    kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP);
4607c478bd9Sstevel@tonic-gate 	for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) {
4617c478bd9Sstevel@tonic-gate 		int j;
4627c478bd9Sstevel@tonic-gate 		for (j = 0; j < smd_ncolor; j++)
4637c478bd9Sstevel@tonic-gate 			scpu->scpu.scpu_free_ndx[j] = j;
4647c478bd9Sstevel@tonic-gate 		scpu->scpu.scpu_last_smap = smd_smap;
4657c478bd9Sstevel@tonic-gate 	}
4667c478bd9Sstevel@tonic-gate 
467a5652762Spraks 	vpm_init();
468a5652762Spraks 
4697c478bd9Sstevel@tonic-gate #ifdef DEBUG
4707c478bd9Sstevel@tonic-gate 	/*
4717c478bd9Sstevel@tonic-gate 	 * Keep track of which colors are used more often.
4727c478bd9Sstevel@tonic-gate 	 */
4737c478bd9Sstevel@tonic-gate 	colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP);
4747c478bd9Sstevel@tonic-gate #endif /* DEBUG */
4757c478bd9Sstevel@tonic-gate 
4767c478bd9Sstevel@tonic-gate 	return (0);
4777c478bd9Sstevel@tonic-gate }
4787c478bd9Sstevel@tonic-gate 
4797c478bd9Sstevel@tonic-gate static void
segmap_free(seg)4807c478bd9Sstevel@tonic-gate segmap_free(seg)
4817c478bd9Sstevel@tonic-gate 	struct seg *seg;
4827c478bd9Sstevel@tonic-gate {
4837c478bd9Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
4847c478bd9Sstevel@tonic-gate }
4857c478bd9Sstevel@tonic-gate 
4867c478bd9Sstevel@tonic-gate /*
4877c478bd9Sstevel@tonic-gate  * Do a F_SOFTUNLOCK call over the range requested.
4887c478bd9Sstevel@tonic-gate  * The range must have already been F_SOFTLOCK'ed.
4897c478bd9Sstevel@tonic-gate  */
4907c478bd9Sstevel@tonic-gate static void
segmap_unlock(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum seg_rw rw,struct smap * smp)4917c478bd9Sstevel@tonic-gate segmap_unlock(
4927c478bd9Sstevel@tonic-gate 	struct hat *hat,
4937c478bd9Sstevel@tonic-gate 	struct seg *seg,
4947c478bd9Sstevel@tonic-gate 	caddr_t addr,
4957c478bd9Sstevel@tonic-gate 	size_t len,
4967c478bd9Sstevel@tonic-gate 	enum seg_rw rw,
4977c478bd9Sstevel@tonic-gate 	struct smap *smp)
4987c478bd9Sstevel@tonic-gate {
4997c478bd9Sstevel@tonic-gate 	page_t *pp;
5007c478bd9Sstevel@tonic-gate 	caddr_t adr;
5017c478bd9Sstevel@tonic-gate 	u_offset_t off;
5027c478bd9Sstevel@tonic-gate 	struct vnode *vp;
5037c478bd9Sstevel@tonic-gate 	kmutex_t *smtx;
5047c478bd9Sstevel@tonic-gate 
5057c478bd9Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
5067c478bd9Sstevel@tonic-gate 
5077c478bd9Sstevel@tonic-gate #ifdef lint
5087c478bd9Sstevel@tonic-gate 	seg = seg;
5097c478bd9Sstevel@tonic-gate #endif
5107c478bd9Sstevel@tonic-gate 
5117c478bd9Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
5127c478bd9Sstevel@tonic-gate 
5137c478bd9Sstevel@tonic-gate 		/*
5147c478bd9Sstevel@tonic-gate 		 * We're called only from segmap_fault and this was a
5157c478bd9Sstevel@tonic-gate 		 * NOP in case of a kpm based smap, so dangerous things
5167c478bd9Sstevel@tonic-gate 		 * must have happened in the meantime. Pages are prefaulted
5177c478bd9Sstevel@tonic-gate 		 * and locked in segmap_getmapflt and they will not be
5187c478bd9Sstevel@tonic-gate 		 * unlocked until segmap_release.
5197c478bd9Sstevel@tonic-gate 		 */
5207c478bd9Sstevel@tonic-gate 		panic("segmap_unlock: called with kpm addr %p", (void *)addr);
5217c478bd9Sstevel@tonic-gate 		/*NOTREACHED*/
5227c478bd9Sstevel@tonic-gate 	}
5237c478bd9Sstevel@tonic-gate 
5247c478bd9Sstevel@tonic-gate 	vp = smp->sm_vp;
5257c478bd9Sstevel@tonic-gate 	off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
5267c478bd9Sstevel@tonic-gate 
5277c478bd9Sstevel@tonic-gate 	hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE));
5287c478bd9Sstevel@tonic-gate 	for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
5297c478bd9Sstevel@tonic-gate 		ushort_t bitmask;
5307c478bd9Sstevel@tonic-gate 
5317c478bd9Sstevel@tonic-gate 		/*
5327c478bd9Sstevel@tonic-gate 		 * Use page_find() instead of page_lookup() to
5337c478bd9Sstevel@tonic-gate 		 * find the page since we know that it has
5347c478bd9Sstevel@tonic-gate 		 * "shared" lock.
5357c478bd9Sstevel@tonic-gate 		 */
5367c478bd9Sstevel@tonic-gate 		pp = page_find(vp, off);
5377c478bd9Sstevel@tonic-gate 		if (pp == NULL) {
5387c478bd9Sstevel@tonic-gate 			panic("segmap_unlock: page not found");
5397c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
5407c478bd9Sstevel@tonic-gate 		}
5417c478bd9Sstevel@tonic-gate 
5427c478bd9Sstevel@tonic-gate 		if (rw == S_WRITE) {
5437c478bd9Sstevel@tonic-gate 			hat_setrefmod(pp);
5447c478bd9Sstevel@tonic-gate 		} else if (rw != S_OTHER) {
5457c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
546183971baSPrakash Sangappa 			"segmap_fault:pp %p vp %p offset %llx", pp, vp, off);
5477c478bd9Sstevel@tonic-gate 			hat_setref(pp);
5487c478bd9Sstevel@tonic-gate 		}
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 		/*
5517c478bd9Sstevel@tonic-gate 		 * Clear bitmap, if the bit corresponding to "off" is set,
5527c478bd9Sstevel@tonic-gate 		 * since the page and translation are being unlocked.
5537c478bd9Sstevel@tonic-gate 		 */
5547c478bd9Sstevel@tonic-gate 		bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT);
5557c478bd9Sstevel@tonic-gate 
5567c478bd9Sstevel@tonic-gate 		/*
5577c478bd9Sstevel@tonic-gate 		 * Large Files: Following assertion is to verify
5587c478bd9Sstevel@tonic-gate 		 * the correctness of the cast to (int) above.
5597c478bd9Sstevel@tonic-gate 		 */
5607c478bd9Sstevel@tonic-gate 		ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
5617c478bd9Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
5627c478bd9Sstevel@tonic-gate 		mutex_enter(smtx);
5637c478bd9Sstevel@tonic-gate 		if (smp->sm_bitmap & bitmask) {
5647c478bd9Sstevel@tonic-gate 			smp->sm_bitmap &= ~bitmask;
5657c478bd9Sstevel@tonic-gate 		}
5667c478bd9Sstevel@tonic-gate 		mutex_exit(smtx);
5677c478bd9Sstevel@tonic-gate 
5687c478bd9Sstevel@tonic-gate 		page_unlock(pp);
5697c478bd9Sstevel@tonic-gate 	}
5707c478bd9Sstevel@tonic-gate }
5717c478bd9Sstevel@tonic-gate 
5727c478bd9Sstevel@tonic-gate #define	MAXPPB	(MAXBSIZE/4096)	/* assumes minimum page size of 4k */
5737c478bd9Sstevel@tonic-gate 
5747c478bd9Sstevel@tonic-gate /*
5757c478bd9Sstevel@tonic-gate  * This routine is called via a machine specific fault handling
5767c478bd9Sstevel@tonic-gate  * routine.  It is also called by software routines wishing to
5777c478bd9Sstevel@tonic-gate  * lock or unlock a range of addresses.
5787c478bd9Sstevel@tonic-gate  *
5797c478bd9Sstevel@tonic-gate  * Note that this routine expects a page-aligned "addr".
5807c478bd9Sstevel@tonic-gate  */
5817c478bd9Sstevel@tonic-gate faultcode_t
segmap_fault(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum fault_type type,enum seg_rw rw)5827c478bd9Sstevel@tonic-gate segmap_fault(
5837c478bd9Sstevel@tonic-gate 	struct hat *hat,
5847c478bd9Sstevel@tonic-gate 	struct seg *seg,
5857c478bd9Sstevel@tonic-gate 	caddr_t addr,
5867c478bd9Sstevel@tonic-gate 	size_t len,
5877c478bd9Sstevel@tonic-gate 	enum fault_type type,
5887c478bd9Sstevel@tonic-gate 	enum seg_rw rw)
5897c478bd9Sstevel@tonic-gate {
5907c478bd9Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
5917c478bd9Sstevel@tonic-gate 	struct smap *smp;
5927c478bd9Sstevel@tonic-gate 	page_t *pp, **ppp;
5937c478bd9Sstevel@tonic-gate 	struct vnode *vp;
5947c478bd9Sstevel@tonic-gate 	u_offset_t off;
5957c478bd9Sstevel@tonic-gate 	page_t *pl[MAXPPB + 1];
5967c478bd9Sstevel@tonic-gate 	uint_t prot;
5977c478bd9Sstevel@tonic-gate 	u_offset_t addroff;
5987c478bd9Sstevel@tonic-gate 	caddr_t adr;
5997c478bd9Sstevel@tonic-gate 	int err;
6007c478bd9Sstevel@tonic-gate 	u_offset_t sm_off;
6017c478bd9Sstevel@tonic-gate 	int hat_flag;
6027c478bd9Sstevel@tonic-gate 
6037c478bd9Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
6047c478bd9Sstevel@tonic-gate 		int newpage;
6057c478bd9Sstevel@tonic-gate 		kmutex_t *smtx;
6067c478bd9Sstevel@tonic-gate 
6077c478bd9Sstevel@tonic-gate 		/*
6087c478bd9Sstevel@tonic-gate 		 * Pages are successfully prefaulted and locked in
6097c478bd9Sstevel@tonic-gate 		 * segmap_getmapflt and can't be unlocked until
6107c478bd9Sstevel@tonic-gate 		 * segmap_release. No hat mappings have to be locked
6117c478bd9Sstevel@tonic-gate 		 * and they also can't be unlocked as long as the
6127c478bd9Sstevel@tonic-gate 		 * caller owns an active kpm addr.
6137c478bd9Sstevel@tonic-gate 		 */
6147c478bd9Sstevel@tonic-gate #ifndef DEBUG
6157c478bd9Sstevel@tonic-gate 		if (type != F_SOFTUNLOCK)
6167c478bd9Sstevel@tonic-gate 			return (0);
6177c478bd9Sstevel@tonic-gate #endif
6187c478bd9Sstevel@tonic-gate 
6197c478bd9Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
6207c478bd9Sstevel@tonic-gate 			panic("segmap_fault: smap not found "
6217c478bd9Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
6227c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
6237c478bd9Sstevel@tonic-gate 		}
6247c478bd9Sstevel@tonic-gate 
6257c478bd9Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
6267c478bd9Sstevel@tonic-gate #ifdef	DEBUG
6277c478bd9Sstevel@tonic-gate 		newpage = smp->sm_flags & SM_KPM_NEWPAGE;
6287c478bd9Sstevel@tonic-gate 		if (newpage) {
6297c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "segmap_fault: newpage? smp %p",
6307c478bd9Sstevel@tonic-gate 			    (void *)smp);
6317c478bd9Sstevel@tonic-gate 		}
6327c478bd9Sstevel@tonic-gate 
6337c478bd9Sstevel@tonic-gate 		if (type != F_SOFTUNLOCK) {
6347c478bd9Sstevel@tonic-gate 			mutex_exit(smtx);
6357c478bd9Sstevel@tonic-gate 			return (0);
6367c478bd9Sstevel@tonic-gate 		}
6377c478bd9Sstevel@tonic-gate #endif
6387c478bd9Sstevel@tonic-gate 		mutex_exit(smtx);
6397c478bd9Sstevel@tonic-gate 		vp = smp->sm_vp;
6407c478bd9Sstevel@tonic-gate 		sm_off = smp->sm_off;
6417c478bd9Sstevel@tonic-gate 
6427c478bd9Sstevel@tonic-gate 		if (vp == NULL)
6437c478bd9Sstevel@tonic-gate 			return (FC_MAKE_ERR(EIO));
6447c478bd9Sstevel@tonic-gate 
6457c478bd9Sstevel@tonic-gate 		ASSERT(smp->sm_refcnt > 0);
6467c478bd9Sstevel@tonic-gate 
6477c478bd9Sstevel@tonic-gate 		addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
6487c478bd9Sstevel@tonic-gate 		if (addroff + len > MAXBSIZE)
6497c478bd9Sstevel@tonic-gate 			panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
6507c478bd9Sstevel@tonic-gate 			    (void *)(addr + len));
6517c478bd9Sstevel@tonic-gate 
6527c478bd9Sstevel@tonic-gate 		off = sm_off + addroff;
6537c478bd9Sstevel@tonic-gate 
6547c478bd9Sstevel@tonic-gate 		pp = page_find(vp, off);
6557c478bd9Sstevel@tonic-gate 
6567c478bd9Sstevel@tonic-gate 		if (pp == NULL)
6577c478bd9Sstevel@tonic-gate 			panic("segmap_fault: softunlock page not found");
6587c478bd9Sstevel@tonic-gate 
6597c478bd9Sstevel@tonic-gate 		/*
6607c478bd9Sstevel@tonic-gate 		 * Set ref bit also here in case of S_OTHER to avoid the
6617c478bd9Sstevel@tonic-gate 		 * overhead of supporting other cases than F_SOFTUNLOCK
6627c478bd9Sstevel@tonic-gate 		 * with segkpm. We can do this because the underlying
6637c478bd9Sstevel@tonic-gate 		 * pages are locked anyway.
6647c478bd9Sstevel@tonic-gate 		 */
6657c478bd9Sstevel@tonic-gate 		if (rw == S_WRITE) {
6667c478bd9Sstevel@tonic-gate 			hat_setrefmod(pp);
6677c478bd9Sstevel@tonic-gate 		} else {
6687c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
6697c478bd9Sstevel@tonic-gate 			    "segmap_fault:pp %p vp %p offset %llx",
6707c478bd9Sstevel@tonic-gate 			    pp, vp, off);
6717c478bd9Sstevel@tonic-gate 			hat_setref(pp);
6727c478bd9Sstevel@tonic-gate 		}
6737c478bd9Sstevel@tonic-gate 
6747c478bd9Sstevel@tonic-gate 		return (0);
6757c478bd9Sstevel@tonic-gate 	}
6767c478bd9Sstevel@tonic-gate 
6777c478bd9Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
6787c478bd9Sstevel@tonic-gate 	smp = GET_SMAP(seg, addr);
6797c478bd9Sstevel@tonic-gate 	vp = smp->sm_vp;
6807c478bd9Sstevel@tonic-gate 	sm_off = smp->sm_off;
6817c478bd9Sstevel@tonic-gate 
6827c478bd9Sstevel@tonic-gate 	if (vp == NULL)
6837c478bd9Sstevel@tonic-gate 		return (FC_MAKE_ERR(EIO));
6847c478bd9Sstevel@tonic-gate 
6857c478bd9Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
6867c478bd9Sstevel@tonic-gate 
6877c478bd9Sstevel@tonic-gate 	addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
6887c478bd9Sstevel@tonic-gate 	if (addroff + len > MAXBSIZE) {
6897c478bd9Sstevel@tonic-gate 		panic("segmap_fault: endaddr %p "
6907c478bd9Sstevel@tonic-gate 		    "exceeds MAXBSIZE chunk", (void *)(addr + len));
6917c478bd9Sstevel@tonic-gate 		/*NOTREACHED*/
6927c478bd9Sstevel@tonic-gate 	}
6937c478bd9Sstevel@tonic-gate 	off = sm_off + addroff;
6947c478bd9Sstevel@tonic-gate 
6957c478bd9Sstevel@tonic-gate 	/*
6967c478bd9Sstevel@tonic-gate 	 * First handle the easy stuff
6977c478bd9Sstevel@tonic-gate 	 */
6987c478bd9Sstevel@tonic-gate 	if (type == F_SOFTUNLOCK) {
6997c478bd9Sstevel@tonic-gate 		segmap_unlock(hat, seg, addr, len, rw, smp);
7007c478bd9Sstevel@tonic-gate 		return (0);
7017c478bd9Sstevel@tonic-gate 	}
7027c478bd9Sstevel@tonic-gate 
7037c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
7047c478bd9Sstevel@tonic-gate 	    "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
7057c478bd9Sstevel@tonic-gate 	err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE,
706da6c28aaSamw 	    seg, addr, rw, CRED(), NULL);
7077c478bd9Sstevel@tonic-gate 
7087c478bd9Sstevel@tonic-gate 	if (err)
7097c478bd9Sstevel@tonic-gate 		return (FC_MAKE_ERR(err));
7107c478bd9Sstevel@tonic-gate 
7117c478bd9Sstevel@tonic-gate 	prot &= smd->smd_prot;
7127c478bd9Sstevel@tonic-gate 
7137c478bd9Sstevel@tonic-gate 	/*
7147c478bd9Sstevel@tonic-gate 	 * Handle all pages returned in the pl[] array.
7157c478bd9Sstevel@tonic-gate 	 * This loop is coded on the assumption that if
7167c478bd9Sstevel@tonic-gate 	 * there was no error from the VOP_GETPAGE routine,
7177c478bd9Sstevel@tonic-gate 	 * that the page list returned will contain all the
7187c478bd9Sstevel@tonic-gate 	 * needed pages for the vp from [off..off + len].
7197c478bd9Sstevel@tonic-gate 	 */
7207c478bd9Sstevel@tonic-gate 	ppp = pl;
7217c478bd9Sstevel@tonic-gate 	while ((pp = *ppp++) != NULL) {
7227c478bd9Sstevel@tonic-gate 		u_offset_t poff;
7237c478bd9Sstevel@tonic-gate 		ASSERT(pp->p_vnode == vp);
7247c478bd9Sstevel@tonic-gate 		hat_flag = HAT_LOAD;
7257c478bd9Sstevel@tonic-gate 
7267c478bd9Sstevel@tonic-gate 		/*
7277c478bd9Sstevel@tonic-gate 		 * Verify that the pages returned are within the range
7287c478bd9Sstevel@tonic-gate 		 * of this segmap region.  Note that it is theoretically
7297c478bd9Sstevel@tonic-gate 		 * possible for pages outside this range to be returned,
7307c478bd9Sstevel@tonic-gate 		 * but it is not very likely.  If we cannot use the
7317c478bd9Sstevel@tonic-gate 		 * page here, just release it and go on to the next one.
7327c478bd9Sstevel@tonic-gate 		 */
7337c478bd9Sstevel@tonic-gate 		if (pp->p_offset < sm_off ||
7347c478bd9Sstevel@tonic-gate 		    pp->p_offset >= sm_off + MAXBSIZE) {
7357c478bd9Sstevel@tonic-gate 			(void) page_release(pp, 1);
7367c478bd9Sstevel@tonic-gate 			continue;
7377c478bd9Sstevel@tonic-gate 		}
7387c478bd9Sstevel@tonic-gate 
7397c478bd9Sstevel@tonic-gate 		ASSERT(hat == kas.a_hat);
7407c478bd9Sstevel@tonic-gate 		poff = pp->p_offset;
7417c478bd9Sstevel@tonic-gate 		adr = addr + (poff - off);
7427c478bd9Sstevel@tonic-gate 		if (adr >= addr && adr < addr + len) {
7437c478bd9Sstevel@tonic-gate 			hat_setref(pp);
7447c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
7457c478bd9Sstevel@tonic-gate 			    "segmap_fault:pp %p vp %p offset %llx",
7467c478bd9Sstevel@tonic-gate 			    pp, vp, poff);
7477c478bd9Sstevel@tonic-gate 			if (type == F_SOFTLOCK)
7487c478bd9Sstevel@tonic-gate 				hat_flag = HAT_LOAD_LOCK;
7497c478bd9Sstevel@tonic-gate 		}
7507c478bd9Sstevel@tonic-gate 
7517c478bd9Sstevel@tonic-gate 		/*
7527c478bd9Sstevel@tonic-gate 		 * Deal with VMODSORT pages here. If we know this is a write
7537c478bd9Sstevel@tonic-gate 		 * do the setmod now and allow write protection.
7547c478bd9Sstevel@tonic-gate 		 * As long as it's modified or not S_OTHER, remove write
7557c478bd9Sstevel@tonic-gate 		 * protection. With S_OTHER it's up to the FS to deal with this.
7567c478bd9Sstevel@tonic-gate 		 */
7577c478bd9Sstevel@tonic-gate 		if (IS_VMODSORT(vp)) {
7587c478bd9Sstevel@tonic-gate 			if (rw == S_WRITE)
7597c478bd9Sstevel@tonic-gate 				hat_setmod(pp);
7607c478bd9Sstevel@tonic-gate 			else if (rw != S_OTHER && !hat_ismod(pp))
7617c478bd9Sstevel@tonic-gate 				prot &= ~PROT_WRITE;
7627c478bd9Sstevel@tonic-gate 		}
7637c478bd9Sstevel@tonic-gate 
7647c478bd9Sstevel@tonic-gate 		hat_memload(hat, adr, pp, prot, hat_flag);
7657c478bd9Sstevel@tonic-gate 		if (hat_flag != HAT_LOAD_LOCK)
7667c478bd9Sstevel@tonic-gate 			page_unlock(pp);
7677c478bd9Sstevel@tonic-gate 	}
7687c478bd9Sstevel@tonic-gate 	return (0);
7697c478bd9Sstevel@tonic-gate }
7707c478bd9Sstevel@tonic-gate 
7717c478bd9Sstevel@tonic-gate /*
7727c478bd9Sstevel@tonic-gate  * This routine is used to start I/O on pages asynchronously.
7737c478bd9Sstevel@tonic-gate  */
7747c478bd9Sstevel@tonic-gate static faultcode_t
segmap_faulta(struct seg * seg,caddr_t addr)7757c478bd9Sstevel@tonic-gate segmap_faulta(struct seg *seg, caddr_t addr)
7767c478bd9Sstevel@tonic-gate {
7777c478bd9Sstevel@tonic-gate 	struct smap *smp;
7787c478bd9Sstevel@tonic-gate 	struct vnode *vp;
7797c478bd9Sstevel@tonic-gate 	u_offset_t off;
7807c478bd9Sstevel@tonic-gate 	int err;
7817c478bd9Sstevel@tonic-gate 
7827c478bd9Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
7837c478bd9Sstevel@tonic-gate 		int	newpage;
7847c478bd9Sstevel@tonic-gate 		kmutex_t *smtx;
7857c478bd9Sstevel@tonic-gate 
7867c478bd9Sstevel@tonic-gate 		/*
7877c478bd9Sstevel@tonic-gate 		 * Pages are successfully prefaulted and locked in
7887c478bd9Sstevel@tonic-gate 		 * segmap_getmapflt and can't be unlocked until
7897c478bd9Sstevel@tonic-gate 		 * segmap_release. No hat mappings have to be locked
7907c478bd9Sstevel@tonic-gate 		 * and they also can't be unlocked as long as the
7917c478bd9Sstevel@tonic-gate 		 * caller owns an active kpm addr.
7927c478bd9Sstevel@tonic-gate 		 */
7937c478bd9Sstevel@tonic-gate #ifdef	DEBUG
7947c478bd9Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
7957c478bd9Sstevel@tonic-gate 			panic("segmap_faulta: smap not found "
7967c478bd9Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
7977c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
7987c478bd9Sstevel@tonic-gate 		}
7997c478bd9Sstevel@tonic-gate 
8007c478bd9Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
8017c478bd9Sstevel@tonic-gate 		newpage = smp->sm_flags & SM_KPM_NEWPAGE;
8027c478bd9Sstevel@tonic-gate 		mutex_exit(smtx);
8037c478bd9Sstevel@tonic-gate 		if (newpage)
8047c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p",
8057c478bd9Sstevel@tonic-gate 			    (void *)smp);
8067c478bd9Sstevel@tonic-gate #endif
8077c478bd9Sstevel@tonic-gate 		return (0);
8087c478bd9Sstevel@tonic-gate 	}
8097c478bd9Sstevel@tonic-gate 
8107c478bd9Sstevel@tonic-gate 	segmapcnt.smp_faulta.value.ul++;
8117c478bd9Sstevel@tonic-gate 	smp = GET_SMAP(seg, addr);
8127c478bd9Sstevel@tonic-gate 
8137c478bd9Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
8147c478bd9Sstevel@tonic-gate 
8157c478bd9Sstevel@tonic-gate 	vp = smp->sm_vp;
8167c478bd9Sstevel@tonic-gate 	off = smp->sm_off;
8177c478bd9Sstevel@tonic-gate 
8187c478bd9Sstevel@tonic-gate 	if (vp == NULL) {
8197c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "segmap_faulta - no vp");
8207c478bd9Sstevel@tonic-gate 		return (FC_MAKE_ERR(EIO));
8217c478bd9Sstevel@tonic-gate 	}
8227c478bd9Sstevel@tonic-gate 
8237c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
8247c478bd9Sstevel@tonic-gate 	    "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
8257c478bd9Sstevel@tonic-gate 
8267c478bd9Sstevel@tonic-gate 	err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr
8277c478bd9Sstevel@tonic-gate 	    & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0,
828da6c28aaSamw 	    seg, addr, S_READ, CRED(), NULL);
8297c478bd9Sstevel@tonic-gate 
8307c478bd9Sstevel@tonic-gate 	if (err)
8317c478bd9Sstevel@tonic-gate 		return (FC_MAKE_ERR(err));
8327c478bd9Sstevel@tonic-gate 	return (0);
8337c478bd9Sstevel@tonic-gate }
8347c478bd9Sstevel@tonic-gate 
8357c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8367c478bd9Sstevel@tonic-gate static int
segmap_checkprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)8377c478bd9Sstevel@tonic-gate segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
8387c478bd9Sstevel@tonic-gate {
8397c478bd9Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
8407c478bd9Sstevel@tonic-gate 
8417c478bd9Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock));
8427c478bd9Sstevel@tonic-gate 
8437c478bd9Sstevel@tonic-gate 	/*
8447c478bd9Sstevel@tonic-gate 	 * Need not acquire the segment lock since
8457c478bd9Sstevel@tonic-gate 	 * "smd_prot" is a read-only field.
8467c478bd9Sstevel@tonic-gate 	 */
8477c478bd9Sstevel@tonic-gate 	return (((smd->smd_prot & prot) != prot) ? EACCES : 0);
8487c478bd9Sstevel@tonic-gate }
8497c478bd9Sstevel@tonic-gate 
8507c478bd9Sstevel@tonic-gate static int
segmap_getprot(struct seg * seg,caddr_t addr,size_t len,uint_t * protv)8517c478bd9Sstevel@tonic-gate segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
8527c478bd9Sstevel@tonic-gate {
8537c478bd9Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
8547c478bd9Sstevel@tonic-gate 	size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
8557c478bd9Sstevel@tonic-gate 
856*93fb2a5fSJosef 'Jeff' Sipek 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
8577c478bd9Sstevel@tonic-gate 
8587c478bd9Sstevel@tonic-gate 	if (pgno != 0) {
859183971baSPrakash Sangappa 		do {
8607c478bd9Sstevel@tonic-gate 			protv[--pgno] = smd->smd_prot;
861183971baSPrakash Sangappa 		} while (pgno != 0);
8627c478bd9Sstevel@tonic-gate 	}
8637c478bd9Sstevel@tonic-gate 	return (0);
8647c478bd9Sstevel@tonic-gate }
8657c478bd9Sstevel@tonic-gate 
8667c478bd9Sstevel@tonic-gate static u_offset_t
segmap_getoffset(struct seg * seg,caddr_t addr)8677c478bd9Sstevel@tonic-gate segmap_getoffset(struct seg *seg, caddr_t addr)
8687c478bd9Sstevel@tonic-gate {
8697c478bd9Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
8707c478bd9Sstevel@tonic-gate 
8717c478bd9Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
8727c478bd9Sstevel@tonic-gate 
8737c478bd9Sstevel@tonic-gate 	return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base));
8747c478bd9Sstevel@tonic-gate }
8757c478bd9Sstevel@tonic-gate 
8767c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8777c478bd9Sstevel@tonic-gate static int
segmap_gettype(struct seg * seg,caddr_t addr)8787c478bd9Sstevel@tonic-gate segmap_gettype(struct seg *seg, caddr_t addr)
8797c478bd9Sstevel@tonic-gate {
8807c478bd9Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
8817c478bd9Sstevel@tonic-gate 
8827c478bd9Sstevel@tonic-gate 	return (MAP_SHARED);
8837c478bd9Sstevel@tonic-gate }
8847c478bd9Sstevel@tonic-gate 
8857c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8867c478bd9Sstevel@tonic-gate static int
segmap_getvp(struct seg * seg,caddr_t addr,struct vnode ** vpp)8877c478bd9Sstevel@tonic-gate segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
8887c478bd9Sstevel@tonic-gate {
8897c478bd9Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
8907c478bd9Sstevel@tonic-gate 
8917c478bd9Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
8927c478bd9Sstevel@tonic-gate 
8937c478bd9Sstevel@tonic-gate 	/* XXX - This doesn't make any sense */
8947c478bd9Sstevel@tonic-gate 	*vpp = smd->smd_sm->sm_vp;
8957c478bd9Sstevel@tonic-gate 	return (0);
8967c478bd9Sstevel@tonic-gate }
8977c478bd9Sstevel@tonic-gate 
8987c478bd9Sstevel@tonic-gate /*
8997c478bd9Sstevel@tonic-gate  * Check to see if it makes sense to do kluster/read ahead to
9007c478bd9Sstevel@tonic-gate  * addr + delta relative to the mapping at addr.  We assume here
9017c478bd9Sstevel@tonic-gate  * that delta is a signed PAGESIZE'd multiple (which can be negative).
9027c478bd9Sstevel@tonic-gate  *
9037c478bd9Sstevel@tonic-gate  * For segmap we always "approve" of this action from our standpoint.
9047c478bd9Sstevel@tonic-gate  */
9057c478bd9Sstevel@tonic-gate /*ARGSUSED*/
9067c478bd9Sstevel@tonic-gate static int
segmap_kluster(struct seg * seg,caddr_t addr,ssize_t delta)9077c478bd9Sstevel@tonic-gate segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
9087c478bd9Sstevel@tonic-gate {
9097c478bd9Sstevel@tonic-gate 	return (0);
9107c478bd9Sstevel@tonic-gate }
9117c478bd9Sstevel@tonic-gate 
9127c478bd9Sstevel@tonic-gate static void
segmap_badop()9137c478bd9Sstevel@tonic-gate segmap_badop()
9147c478bd9Sstevel@tonic-gate {
9157c478bd9Sstevel@tonic-gate 	panic("segmap_badop");
9167c478bd9Sstevel@tonic-gate 	/*NOTREACHED*/
9177c478bd9Sstevel@tonic-gate }
9187c478bd9Sstevel@tonic-gate 
9197c478bd9Sstevel@tonic-gate /*
9207c478bd9Sstevel@tonic-gate  * Special private segmap operations
9217c478bd9Sstevel@tonic-gate  */
9227c478bd9Sstevel@tonic-gate 
9237c478bd9Sstevel@tonic-gate /*
9247c478bd9Sstevel@tonic-gate  * Add smap to the appropriate free list.
9257c478bd9Sstevel@tonic-gate  */
9267c478bd9Sstevel@tonic-gate static void
segmap_smapadd(struct smap * smp)9277c478bd9Sstevel@tonic-gate segmap_smapadd(struct smap *smp)
9287c478bd9Sstevel@tonic-gate {
9297c478bd9Sstevel@tonic-gate 	struct smfree *sm;
9307c478bd9Sstevel@tonic-gate 	struct smap *smpfreelist;
9317c478bd9Sstevel@tonic-gate 	struct sm_freeq *releq;
9327c478bd9Sstevel@tonic-gate 
9337c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(SMAPMTX(smp)));
9347c478bd9Sstevel@tonic-gate 
9357c478bd9Sstevel@tonic-gate 	if (smp->sm_refcnt != 0) {
9367c478bd9Sstevel@tonic-gate 		panic("segmap_smapadd");
9377c478bd9Sstevel@tonic-gate 		/*NOTREACHED*/
9387c478bd9Sstevel@tonic-gate 	}
9397c478bd9Sstevel@tonic-gate 
9407c478bd9Sstevel@tonic-gate 	sm = &smd_free[smp->sm_free_ndx];
9417c478bd9Sstevel@tonic-gate 	/*
9427c478bd9Sstevel@tonic-gate 	 * Add to the tail of the release queue
9437c478bd9Sstevel@tonic-gate 	 * Note that sm_releq and sm_allocq could toggle
9447c478bd9Sstevel@tonic-gate 	 * before we get the lock. This does not affect
9457c478bd9Sstevel@tonic-gate 	 * correctness as the 2 queues are only maintained
9467c478bd9Sstevel@tonic-gate 	 * to reduce lock pressure.
9477c478bd9Sstevel@tonic-gate 	 */
9487c478bd9Sstevel@tonic-gate 	releq = sm->sm_releq;
9497c478bd9Sstevel@tonic-gate 	if (releq == &sm->sm_freeq[0])
9507c478bd9Sstevel@tonic-gate 		smp->sm_flags |= SM_QNDX_ZERO;
9517c478bd9Sstevel@tonic-gate 	else
9527c478bd9Sstevel@tonic-gate 		smp->sm_flags &= ~SM_QNDX_ZERO;
9537c478bd9Sstevel@tonic-gate 	mutex_enter(&releq->smq_mtx);
9547c478bd9Sstevel@tonic-gate 	smpfreelist = releq->smq_free;
9557c478bd9Sstevel@tonic-gate 	if (smpfreelist == 0) {
9567c478bd9Sstevel@tonic-gate 		int want;
9577c478bd9Sstevel@tonic-gate 
9587c478bd9Sstevel@tonic-gate 		releq->smq_free = smp->sm_next = smp->sm_prev = smp;
9597c478bd9Sstevel@tonic-gate 		/*
9607c478bd9Sstevel@tonic-gate 		 * Both queue mutexes held to set sm_want;
9617c478bd9Sstevel@tonic-gate 		 * snapshot the value before dropping releq mutex.
9627c478bd9Sstevel@tonic-gate 		 * If sm_want appears after the releq mutex is dropped,
9637c478bd9Sstevel@tonic-gate 		 * then the smap just freed is already gone.
9647c478bd9Sstevel@tonic-gate 		 */
9657c478bd9Sstevel@tonic-gate 		want = sm->sm_want;
9667c478bd9Sstevel@tonic-gate 		mutex_exit(&releq->smq_mtx);
9677c478bd9Sstevel@tonic-gate 		/*
9687c478bd9Sstevel@tonic-gate 		 * See if there was a waiter before dropping the releq mutex
9697c478bd9Sstevel@tonic-gate 		 * then recheck after obtaining sm_freeq[0] mutex as
9707c478bd9Sstevel@tonic-gate 		 * the another thread may have already signaled.
9717c478bd9Sstevel@tonic-gate 		 */
9727c478bd9Sstevel@tonic-gate 		if (want) {
9737c478bd9Sstevel@tonic-gate 			mutex_enter(&sm->sm_freeq[0].smq_mtx);
9747c478bd9Sstevel@tonic-gate 			if (sm->sm_want)
9757c478bd9Sstevel@tonic-gate 				cv_signal(&sm->sm_free_cv);
9767c478bd9Sstevel@tonic-gate 			mutex_exit(&sm->sm_freeq[0].smq_mtx);
9777c478bd9Sstevel@tonic-gate 		}
9787c478bd9Sstevel@tonic-gate 	} else {
9797c478bd9Sstevel@tonic-gate 		smp->sm_next = smpfreelist;
9807c478bd9Sstevel@tonic-gate 		smp->sm_prev = smpfreelist->sm_prev;
9817c478bd9Sstevel@tonic-gate 		smpfreelist->sm_prev = smp;
9827c478bd9Sstevel@tonic-gate 		smp->sm_prev->sm_next = smp;
9837c478bd9Sstevel@tonic-gate 		mutex_exit(&releq->smq_mtx);
9847c478bd9Sstevel@tonic-gate 	}
9857c478bd9Sstevel@tonic-gate }
9867c478bd9Sstevel@tonic-gate 
9877c478bd9Sstevel@tonic-gate 
9887c478bd9Sstevel@tonic-gate static struct smap *
segmap_hashin(struct smap * smp,struct vnode * vp,u_offset_t off,int hashid)9897c478bd9Sstevel@tonic-gate segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid)
9907c478bd9Sstevel@tonic-gate {
9917c478bd9Sstevel@tonic-gate 	struct smap **hpp;
9927c478bd9Sstevel@tonic-gate 	struct smap *tmp;
9937c478bd9Sstevel@tonic-gate 	kmutex_t *hmtx;
9947c478bd9Sstevel@tonic-gate 
9957c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(SMAPMTX(smp)));
9967c478bd9Sstevel@tonic-gate 	ASSERT(smp->sm_vp == NULL);
9977c478bd9Sstevel@tonic-gate 	ASSERT(smp->sm_hash == NULL);
9987c478bd9Sstevel@tonic-gate 	ASSERT(smp->sm_prev == NULL);
9997c478bd9Sstevel@tonic-gate 	ASSERT(smp->sm_next == NULL);
10007c478bd9Sstevel@tonic-gate 	ASSERT(hashid >= 0 && hashid <= smd_hashmsk);
10017c478bd9Sstevel@tonic-gate 
10027c478bd9Sstevel@tonic-gate 	hmtx = SHASHMTX(hashid);
10037c478bd9Sstevel@tonic-gate 
10047c478bd9Sstevel@tonic-gate 	mutex_enter(hmtx);
10057c478bd9Sstevel@tonic-gate 	/*
10067c478bd9Sstevel@tonic-gate 	 * First we need to verify that no one has created a smp
10077c478bd9Sstevel@tonic-gate 	 * with (vp,off) as its tag before we us.
10087c478bd9Sstevel@tonic-gate 	 */
10097c478bd9Sstevel@tonic-gate 	for (tmp = smd_hash[hashid].sh_hash_list;
10107c478bd9Sstevel@tonic-gate 	    tmp != NULL; tmp = tmp->sm_hash)
10117c478bd9Sstevel@tonic-gate 		if (tmp->sm_vp == vp && tmp->sm_off == off)
10127c478bd9Sstevel@tonic-gate 			break;
10137c478bd9Sstevel@tonic-gate 
10147c478bd9Sstevel@tonic-gate 	if (tmp == NULL) {
10157c478bd9Sstevel@tonic-gate 		/*
10167c478bd9Sstevel@tonic-gate 		 * No one created one yet.
10177c478bd9Sstevel@tonic-gate 		 *
10187c478bd9Sstevel@tonic-gate 		 * Funniness here - we don't increment the ref count on the
10197c478bd9Sstevel@tonic-gate 		 * vnode * even though we have another pointer to it here.
10207c478bd9Sstevel@tonic-gate 		 * The reason for this is that we don't want the fact that
10217c478bd9Sstevel@tonic-gate 		 * a seg_map entry somewhere refers to a vnode to prevent the
10227c478bd9Sstevel@tonic-gate 		 * vnode * itself from going away.  This is because this
10237c478bd9Sstevel@tonic-gate 		 * reference to the vnode is a "soft one".  In the case where
10247c478bd9Sstevel@tonic-gate 		 * a mapping is being used by a rdwr [or directory routine?]
10257c478bd9Sstevel@tonic-gate 		 * there already has to be a non-zero ref count on the vnode.
10267c478bd9Sstevel@tonic-gate 		 * In the case where the vp has been freed and the the smap
10277c478bd9Sstevel@tonic-gate 		 * structure is on the free list, there are no pages in memory
10287c478bd9Sstevel@tonic-gate 		 * that can refer to the vnode.  Thus even if we reuse the same
10297c478bd9Sstevel@tonic-gate 		 * vnode/smap structure for a vnode which has the same
10307c478bd9Sstevel@tonic-gate 		 * address but represents a different object, we are ok.
10317c478bd9Sstevel@tonic-gate 		 */
10327c478bd9Sstevel@tonic-gate 		smp->sm_vp = vp;
10337c478bd9Sstevel@tonic-gate 		smp->sm_off = off;
10347c478bd9Sstevel@tonic-gate 
10357c478bd9Sstevel@tonic-gate 		hpp = &smd_hash[hashid].sh_hash_list;
10367c478bd9Sstevel@tonic-gate 		smp->sm_hash = *hpp;
10377c478bd9Sstevel@tonic-gate 		*hpp = smp;
10387c478bd9Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS
10397c478bd9Sstevel@tonic-gate 		smd_hash_len[hashid]++;
10407c478bd9Sstevel@tonic-gate #endif
10417c478bd9Sstevel@tonic-gate 	}
10427c478bd9Sstevel@tonic-gate 	mutex_exit(hmtx);
10437c478bd9Sstevel@tonic-gate 
10447c478bd9Sstevel@tonic-gate 	return (tmp);
10457c478bd9Sstevel@tonic-gate }
10467c478bd9Sstevel@tonic-gate 
10477c478bd9Sstevel@tonic-gate static void
segmap_hashout(struct smap * smp)10487c478bd9Sstevel@tonic-gate segmap_hashout(struct smap *smp)
10497c478bd9Sstevel@tonic-gate {
10507c478bd9Sstevel@tonic-gate 	struct smap **hpp, *hp;
10517c478bd9Sstevel@tonic-gate 	struct vnode *vp;
10527c478bd9Sstevel@tonic-gate 	kmutex_t *mtx;
10537c478bd9Sstevel@tonic-gate 	int hashid;
10547c478bd9Sstevel@tonic-gate 	u_offset_t off;
10557c478bd9Sstevel@tonic-gate 
10567c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(SMAPMTX(smp)));
10577c478bd9Sstevel@tonic-gate 
10587c478bd9Sstevel@tonic-gate 	vp = smp->sm_vp;
10597c478bd9Sstevel@tonic-gate 	off = smp->sm_off;
10607c478bd9Sstevel@tonic-gate 
10617c478bd9Sstevel@tonic-gate 	SMAP_HASHFUNC(vp, off, hashid);	/* macro assigns hashid */
10627c478bd9Sstevel@tonic-gate 	mtx = SHASHMTX(hashid);
10637c478bd9Sstevel@tonic-gate 	mutex_enter(mtx);
10647c478bd9Sstevel@tonic-gate 
10657c478bd9Sstevel@tonic-gate 	hpp = &smd_hash[hashid].sh_hash_list;
10667c478bd9Sstevel@tonic-gate 	for (;;) {
10677c478bd9Sstevel@tonic-gate 		hp = *hpp;
10687c478bd9Sstevel@tonic-gate 		if (hp == NULL) {
10697c478bd9Sstevel@tonic-gate 			panic("segmap_hashout");
10707c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
10717c478bd9Sstevel@tonic-gate 		}
10727c478bd9Sstevel@tonic-gate 		if (hp == smp)
10737c478bd9Sstevel@tonic-gate 			break;
10747c478bd9Sstevel@tonic-gate 		hpp = &hp->sm_hash;
10757c478bd9Sstevel@tonic-gate 	}
10767c478bd9Sstevel@tonic-gate 
10777c478bd9Sstevel@tonic-gate 	*hpp = smp->sm_hash;
10787c478bd9Sstevel@tonic-gate 	smp->sm_hash = NULL;
10797c478bd9Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS
10807c478bd9Sstevel@tonic-gate 	smd_hash_len[hashid]--;
10817c478bd9Sstevel@tonic-gate #endif
10827c478bd9Sstevel@tonic-gate 	mutex_exit(mtx);
10837c478bd9Sstevel@tonic-gate 
10847c478bd9Sstevel@tonic-gate 	smp->sm_vp = NULL;
10857c478bd9Sstevel@tonic-gate 	smp->sm_off = (u_offset_t)0;
10867c478bd9Sstevel@tonic-gate 
10877c478bd9Sstevel@tonic-gate }
10887c478bd9Sstevel@tonic-gate 
10897c478bd9Sstevel@tonic-gate /*
10907c478bd9Sstevel@tonic-gate  * Attempt to free unmodified, unmapped, and non locked segmap
10917c478bd9Sstevel@tonic-gate  * pages.
10927c478bd9Sstevel@tonic-gate  */
10937c478bd9Sstevel@tonic-gate void
segmap_pagefree(struct vnode * vp,u_offset_t off)10947c478bd9Sstevel@tonic-gate segmap_pagefree(struct vnode *vp, u_offset_t off)
10957c478bd9Sstevel@tonic-gate {
10967c478bd9Sstevel@tonic-gate 	u_offset_t pgoff;
10977c478bd9Sstevel@tonic-gate 	page_t  *pp;
10987c478bd9Sstevel@tonic-gate 
10997c478bd9Sstevel@tonic-gate 	for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) {
11007c478bd9Sstevel@tonic-gate 
11017c478bd9Sstevel@tonic-gate 		if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL)
11027c478bd9Sstevel@tonic-gate 			continue;
11037c478bd9Sstevel@tonic-gate 
11047c478bd9Sstevel@tonic-gate 		switch (page_release(pp, 1)) {
11057c478bd9Sstevel@tonic-gate 		case PGREL_NOTREL:
11067c478bd9Sstevel@tonic-gate 			segmapcnt.smp_free_notfree.value.ul++;
11077c478bd9Sstevel@tonic-gate 			break;
11087c478bd9Sstevel@tonic-gate 		case PGREL_MOD:
11097c478bd9Sstevel@tonic-gate 			segmapcnt.smp_free_dirty.value.ul++;
11107c478bd9Sstevel@tonic-gate 			break;
11117c478bd9Sstevel@tonic-gate 		case PGREL_CLEAN:
11127c478bd9Sstevel@tonic-gate 			segmapcnt.smp_free.value.ul++;
11137c478bd9Sstevel@tonic-gate 			break;
11147c478bd9Sstevel@tonic-gate 		}
11157c478bd9Sstevel@tonic-gate 	}
11167c478bd9Sstevel@tonic-gate }
11177c478bd9Sstevel@tonic-gate 
11187c478bd9Sstevel@tonic-gate /*
11197c478bd9Sstevel@tonic-gate  * Locks held on entry: smap lock
11207c478bd9Sstevel@tonic-gate  * Locks held on exit : smap lock.
11217c478bd9Sstevel@tonic-gate  */
11227c478bd9Sstevel@tonic-gate 
11237c478bd9Sstevel@tonic-gate static void
grab_smp(struct smap * smp,page_t * pp)11247c478bd9Sstevel@tonic-gate grab_smp(struct smap *smp, page_t *pp)
11257c478bd9Sstevel@tonic-gate {
11267c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(SMAPMTX(smp)));
11277c478bd9Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt == 0);
11287c478bd9Sstevel@tonic-gate 
11297c478bd9Sstevel@tonic-gate 	if (smp->sm_vp != (struct vnode *)NULL) {
11307c478bd9Sstevel@tonic-gate 		struct vnode	*vp = smp->sm_vp;
11317c478bd9Sstevel@tonic-gate 		u_offset_t 	off = smp->sm_off;
11327c478bd9Sstevel@tonic-gate 		/*
11337c478bd9Sstevel@tonic-gate 		 * Destroy old vnode association and
11347c478bd9Sstevel@tonic-gate 		 * unload any hardware translations to
11357c478bd9Sstevel@tonic-gate 		 * the old object.
11367c478bd9Sstevel@tonic-gate 		 */
11377c478bd9Sstevel@tonic-gate 		smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++;
11387c478bd9Sstevel@tonic-gate 		segmap_hashout(smp);
11397c478bd9Sstevel@tonic-gate 
11407c478bd9Sstevel@tonic-gate 		/*
11417c478bd9Sstevel@tonic-gate 		 * This node is off freelist and hashlist,
11427c478bd9Sstevel@tonic-gate 		 * so there is no reason to drop/reacquire sm_mtx
11437c478bd9Sstevel@tonic-gate 		 * across calls to hat_unload.
11447c478bd9Sstevel@tonic-gate 		 */
11457c478bd9Sstevel@tonic-gate 		if (segmap_kpm) {
11467c478bd9Sstevel@tonic-gate 			caddr_t vaddr;
11477c478bd9Sstevel@tonic-gate 			int hat_unload_needed = 0;
11487c478bd9Sstevel@tonic-gate 
11497c478bd9Sstevel@tonic-gate 			/*
11507c478bd9Sstevel@tonic-gate 			 * unload kpm mapping
11517c478bd9Sstevel@tonic-gate 			 */
11527c478bd9Sstevel@tonic-gate 			if (pp != NULL) {
11537c478bd9Sstevel@tonic-gate 				vaddr = hat_kpm_page2va(pp, 1);
11547c478bd9Sstevel@tonic-gate 				hat_kpm_mapout(pp, GET_KPME(smp), vaddr);
11557c478bd9Sstevel@tonic-gate 				page_unlock(pp);
11567c478bd9Sstevel@tonic-gate 			}
11577c478bd9Sstevel@tonic-gate 
11587c478bd9Sstevel@tonic-gate 			/*
11597c478bd9Sstevel@tonic-gate 			 * Check if we have (also) the rare case of a
11607c478bd9Sstevel@tonic-gate 			 * non kpm mapping.
11617c478bd9Sstevel@tonic-gate 			 */
11627c478bd9Sstevel@tonic-gate 			if (smp->sm_flags & SM_NOTKPM_RELEASED) {
11637c478bd9Sstevel@tonic-gate 				hat_unload_needed = 1;
11647c478bd9Sstevel@tonic-gate 				smp->sm_flags &= ~SM_NOTKPM_RELEASED;
11657c478bd9Sstevel@tonic-gate 			}
11667c478bd9Sstevel@tonic-gate 
11677c478bd9Sstevel@tonic-gate 			if (hat_unload_needed) {
11687c478bd9Sstevel@tonic-gate 				hat_unload(kas.a_hat, segkmap->s_base +
11697c478bd9Sstevel@tonic-gate 				    ((smp - smd_smap) * MAXBSIZE),
11707c478bd9Sstevel@tonic-gate 				    MAXBSIZE, HAT_UNLOAD);
11717c478bd9Sstevel@tonic-gate 			}
11727c478bd9Sstevel@tonic-gate 
11737c478bd9Sstevel@tonic-gate 		} else {
11747c478bd9Sstevel@tonic-gate 			ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED);
11757c478bd9Sstevel@tonic-gate 			smp->sm_flags &= ~SM_NOTKPM_RELEASED;
11767c478bd9Sstevel@tonic-gate 			hat_unload(kas.a_hat, segkmap->s_base +
11777c478bd9Sstevel@tonic-gate 			    ((smp - smd_smap) * MAXBSIZE),
11787c478bd9Sstevel@tonic-gate 			    MAXBSIZE, HAT_UNLOAD);
11797c478bd9Sstevel@tonic-gate 		}
11807c478bd9Sstevel@tonic-gate 		segmap_pagefree(vp, off);
11817c478bd9Sstevel@tonic-gate 	}
11827c478bd9Sstevel@tonic-gate }
11837c478bd9Sstevel@tonic-gate 
11847c478bd9Sstevel@tonic-gate static struct smap *
get_free_smp(int free_ndx)11857c478bd9Sstevel@tonic-gate get_free_smp(int free_ndx)
11867c478bd9Sstevel@tonic-gate {
11877c478bd9Sstevel@tonic-gate 	struct smfree *sm;
11887c478bd9Sstevel@tonic-gate 	kmutex_t *smtx;
11897c478bd9Sstevel@tonic-gate 	struct smap *smp, *first;
11907c478bd9Sstevel@tonic-gate 	struct sm_freeq *allocq, *releq;
11917c478bd9Sstevel@tonic-gate 	struct kpme *kpme;
11927c478bd9Sstevel@tonic-gate 	page_t *pp = NULL;
11937c478bd9Sstevel@tonic-gate 	int end_ndx, page_locked = 0;
11947c478bd9Sstevel@tonic-gate 
11957c478bd9Sstevel@tonic-gate 	end_ndx = free_ndx;
11967c478bd9Sstevel@tonic-gate 	sm = &smd_free[free_ndx];
11977c478bd9Sstevel@tonic-gate 
11987c478bd9Sstevel@tonic-gate retry_queue:
11997c478bd9Sstevel@tonic-gate 	allocq = sm->sm_allocq;
12007c478bd9Sstevel@tonic-gate 	mutex_enter(&allocq->smq_mtx);
12017c478bd9Sstevel@tonic-gate 
12027c478bd9Sstevel@tonic-gate 	if ((smp = allocq->smq_free) == NULL) {
12037c478bd9Sstevel@tonic-gate 
12047c478bd9Sstevel@tonic-gate skip_queue:
12057c478bd9Sstevel@tonic-gate 		/*
12067c478bd9Sstevel@tonic-gate 		 * The alloc list is empty or this queue is being skipped;
12077c478bd9Sstevel@tonic-gate 		 * first see if the allocq toggled.
12087c478bd9Sstevel@tonic-gate 		 */
12097c478bd9Sstevel@tonic-gate 		if (sm->sm_allocq != allocq) {
12107c478bd9Sstevel@tonic-gate 			/* queue changed */
12117c478bd9Sstevel@tonic-gate 			mutex_exit(&allocq->smq_mtx);
12127c478bd9Sstevel@tonic-gate 			goto retry_queue;
12137c478bd9Sstevel@tonic-gate 		}
12147c478bd9Sstevel@tonic-gate 		releq = sm->sm_releq;
12157c478bd9Sstevel@tonic-gate 		if (!mutex_tryenter(&releq->smq_mtx)) {
12167c478bd9Sstevel@tonic-gate 			/* cannot get releq; a free smp may be there now */
12177c478bd9Sstevel@tonic-gate 			mutex_exit(&allocq->smq_mtx);
12187c478bd9Sstevel@tonic-gate 
12197c478bd9Sstevel@tonic-gate 			/*
12207c478bd9Sstevel@tonic-gate 			 * This loop could spin forever if this thread has
12217c478bd9Sstevel@tonic-gate 			 * higher priority than the thread that is holding
12227c478bd9Sstevel@tonic-gate 			 * releq->smq_mtx. In order to force the other thread
12237c478bd9Sstevel@tonic-gate 			 * to run, we'll lock/unlock the mutex which is safe
12247c478bd9Sstevel@tonic-gate 			 * since we just unlocked the allocq mutex.
12257c478bd9Sstevel@tonic-gate 			 */
12267c478bd9Sstevel@tonic-gate 			mutex_enter(&releq->smq_mtx);
12277c478bd9Sstevel@tonic-gate 			mutex_exit(&releq->smq_mtx);
12287c478bd9Sstevel@tonic-gate 			goto retry_queue;
12297c478bd9Sstevel@tonic-gate 		}
12307c478bd9Sstevel@tonic-gate 		if (releq->smq_free == NULL) {
12317c478bd9Sstevel@tonic-gate 			/*
12327c478bd9Sstevel@tonic-gate 			 * This freelist is empty.
12337c478bd9Sstevel@tonic-gate 			 * This should not happen unless clients
12347c478bd9Sstevel@tonic-gate 			 * are failing to release the segmap
12357c478bd9Sstevel@tonic-gate 			 * window after accessing the data.
12367c478bd9Sstevel@tonic-gate 			 * Before resorting to sleeping, try
12377c478bd9Sstevel@tonic-gate 			 * the next list of the same color.
12387c478bd9Sstevel@tonic-gate 			 */
12397c478bd9Sstevel@tonic-gate 			free_ndx = (free_ndx + smd_ncolor) & smd_freemsk;
12407c478bd9Sstevel@tonic-gate 			if (free_ndx != end_ndx) {
12417c478bd9Sstevel@tonic-gate 				mutex_exit(&releq->smq_mtx);
12427c478bd9Sstevel@tonic-gate 				mutex_exit(&allocq->smq_mtx);
12437c478bd9Sstevel@tonic-gate 				sm = &smd_free[free_ndx];
12447c478bd9Sstevel@tonic-gate 				goto retry_queue;
12457c478bd9Sstevel@tonic-gate 			}
12467c478bd9Sstevel@tonic-gate 			/*
12477c478bd9Sstevel@tonic-gate 			 * Tried all freelists of the same color once,
12487c478bd9Sstevel@tonic-gate 			 * wait on this list and hope something gets freed.
12497c478bd9Sstevel@tonic-gate 			 */
12507c478bd9Sstevel@tonic-gate 			segmapcnt.smp_get_nofree.value.ul++;
12517c478bd9Sstevel@tonic-gate 			sm->sm_want++;
12527c478bd9Sstevel@tonic-gate 			mutex_exit(&sm->sm_freeq[1].smq_mtx);
12537c478bd9Sstevel@tonic-gate 			cv_wait(&sm->sm_free_cv,
12547c478bd9Sstevel@tonic-gate 			    &sm->sm_freeq[0].smq_mtx);
12557c478bd9Sstevel@tonic-gate 			sm->sm_want--;
12567c478bd9Sstevel@tonic-gate 			mutex_exit(&sm->sm_freeq[0].smq_mtx);
12577c478bd9Sstevel@tonic-gate 			sm = &smd_free[free_ndx];
12587c478bd9Sstevel@tonic-gate 			goto retry_queue;
12597c478bd9Sstevel@tonic-gate 		} else {
12607c478bd9Sstevel@tonic-gate 			/*
12617c478bd9Sstevel@tonic-gate 			 * Something on the rele queue; flip the alloc
12627c478bd9Sstevel@tonic-gate 			 * and rele queues and retry.
12637c478bd9Sstevel@tonic-gate 			 */
12647c478bd9Sstevel@tonic-gate 			sm->sm_allocq = releq;
12657c478bd9Sstevel@tonic-gate 			sm->sm_releq = allocq;
12667c478bd9Sstevel@tonic-gate 			mutex_exit(&allocq->smq_mtx);
12677c478bd9Sstevel@tonic-gate 			mutex_exit(&releq->smq_mtx);
12687c478bd9Sstevel@tonic-gate 			if (page_locked) {
12697c478bd9Sstevel@tonic-gate 				delay(hz >> 2);
12707c478bd9Sstevel@tonic-gate 				page_locked = 0;
12717c478bd9Sstevel@tonic-gate 			}
12727c478bd9Sstevel@tonic-gate 			goto retry_queue;
12737c478bd9Sstevel@tonic-gate 		}
12747c478bd9Sstevel@tonic-gate 	} else {
12757c478bd9Sstevel@tonic-gate 		/*
12767c478bd9Sstevel@tonic-gate 		 * Fastpath the case we get the smap mutex
12777c478bd9Sstevel@tonic-gate 		 * on the first try.
12787c478bd9Sstevel@tonic-gate 		 */
12797c478bd9Sstevel@tonic-gate 		first = smp;
12807c478bd9Sstevel@tonic-gate next_smap:
12817c478bd9Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
12827c478bd9Sstevel@tonic-gate 		if (!mutex_tryenter(smtx)) {
12837c478bd9Sstevel@tonic-gate 			/*
12847c478bd9Sstevel@tonic-gate 			 * Another thread is trying to reclaim this slot.
12857c478bd9Sstevel@tonic-gate 			 * Skip to the next queue or smap.
12867c478bd9Sstevel@tonic-gate 			 */
12877c478bd9Sstevel@tonic-gate 			if ((smp = smp->sm_next) == first) {
12887c478bd9Sstevel@tonic-gate 				goto skip_queue;
12897c478bd9Sstevel@tonic-gate 			} else {
12907c478bd9Sstevel@tonic-gate 				goto next_smap;
12917c478bd9Sstevel@tonic-gate 			}
12927c478bd9Sstevel@tonic-gate 		} else {
12937c478bd9Sstevel@tonic-gate 			/*
12947c478bd9Sstevel@tonic-gate 			 * if kpme exists, get shared lock on the page
12957c478bd9Sstevel@tonic-gate 			 */
12967c478bd9Sstevel@tonic-gate 			if (segmap_kpm && smp->sm_vp != NULL) {
12977c478bd9Sstevel@tonic-gate 
12987c478bd9Sstevel@tonic-gate 				kpme = GET_KPME(smp);
12997c478bd9Sstevel@tonic-gate 				pp = kpme->kpe_page;
13007c478bd9Sstevel@tonic-gate 
13017c478bd9Sstevel@tonic-gate 				if (pp != NULL) {
13027c478bd9Sstevel@tonic-gate 					if (!page_trylock(pp, SE_SHARED)) {
13037c478bd9Sstevel@tonic-gate 						smp = smp->sm_next;
13047c478bd9Sstevel@tonic-gate 						mutex_exit(smtx);
13057c478bd9Sstevel@tonic-gate 						page_locked = 1;
13067c478bd9Sstevel@tonic-gate 
13077c478bd9Sstevel@tonic-gate 						pp = NULL;
13087c478bd9Sstevel@tonic-gate 
13097c478bd9Sstevel@tonic-gate 						if (smp == first) {
13107c478bd9Sstevel@tonic-gate 							goto skip_queue;
13117c478bd9Sstevel@tonic-gate 						} else {
13127c478bd9Sstevel@tonic-gate 							goto next_smap;
13137c478bd9Sstevel@tonic-gate 						}
13147c478bd9Sstevel@tonic-gate 					} else {
13157c478bd9Sstevel@tonic-gate 						if (kpme->kpe_page == NULL) {
13167c478bd9Sstevel@tonic-gate 							page_unlock(pp);
13177c478bd9Sstevel@tonic-gate 							pp = NULL;
13187c478bd9Sstevel@tonic-gate 						}
13197c478bd9Sstevel@tonic-gate 					}
13207c478bd9Sstevel@tonic-gate 				}
13217c478bd9Sstevel@tonic-gate 			}
13227c478bd9Sstevel@tonic-gate 
13237c478bd9Sstevel@tonic-gate 			/*
13247c478bd9Sstevel@tonic-gate 			 * At this point, we've selected smp.  Remove smp
13257c478bd9Sstevel@tonic-gate 			 * from its freelist.  If smp is the first one in
13267c478bd9Sstevel@tonic-gate 			 * the freelist, update the head of the freelist.
13277c478bd9Sstevel@tonic-gate 			 */
13287c478bd9Sstevel@tonic-gate 			if (first == smp) {
13297c478bd9Sstevel@tonic-gate 				ASSERT(first == allocq->smq_free);
13307c478bd9Sstevel@tonic-gate 				allocq->smq_free = smp->sm_next;
13317c478bd9Sstevel@tonic-gate 			}
13327c478bd9Sstevel@tonic-gate 
13337c478bd9Sstevel@tonic-gate 			/*
13347c478bd9Sstevel@tonic-gate 			 * if the head of the freelist still points to smp,
13357c478bd9Sstevel@tonic-gate 			 * then there are no more free smaps in that list.
13367c478bd9Sstevel@tonic-gate 			 */
13377c478bd9Sstevel@tonic-gate 			if (allocq->smq_free == smp)
13387c478bd9Sstevel@tonic-gate 				/*
13397c478bd9Sstevel@tonic-gate 				 * Took the last one
13407c478bd9Sstevel@tonic-gate 				 */
13417c478bd9Sstevel@tonic-gate 				allocq->smq_free = NULL;
13427c478bd9Sstevel@tonic-gate 			else {
13437c478bd9Sstevel@tonic-gate 				smp->sm_prev->sm_next = smp->sm_next;
13447c478bd9Sstevel@tonic-gate 				smp->sm_next->sm_prev = smp->sm_prev;
13457c478bd9Sstevel@tonic-gate 			}
13467c478bd9Sstevel@tonic-gate 			mutex_exit(&allocq->smq_mtx);
13477c478bd9Sstevel@tonic-gate 			smp->sm_prev = smp->sm_next = NULL;
13487c478bd9Sstevel@tonic-gate 
13497c478bd9Sstevel@tonic-gate 			/*
13507c478bd9Sstevel@tonic-gate 			 * if pp != NULL, pp must have been locked;
13517c478bd9Sstevel@tonic-gate 			 * grab_smp() unlocks pp.
13527c478bd9Sstevel@tonic-gate 			 */
13537c478bd9Sstevel@tonic-gate 			ASSERT((pp == NULL) || PAGE_LOCKED(pp));
13547c478bd9Sstevel@tonic-gate 			grab_smp(smp, pp);
13557c478bd9Sstevel@tonic-gate 			/* return smp locked. */
13567c478bd9Sstevel@tonic-gate 			ASSERT(SMAPMTX(smp) == smtx);
13577c478bd9Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(smtx));
13587c478bd9Sstevel@tonic-gate 			return (smp);
13597c478bd9Sstevel@tonic-gate 		}
13607c478bd9Sstevel@tonic-gate 	}
13617c478bd9Sstevel@tonic-gate }
13627c478bd9Sstevel@tonic-gate 
13637c478bd9Sstevel@tonic-gate /*
13647c478bd9Sstevel@tonic-gate  * Special public segmap operations
13657c478bd9Sstevel@tonic-gate  */
13667c478bd9Sstevel@tonic-gate 
13677c478bd9Sstevel@tonic-gate /*
1368da6c28aaSamw  * Create pages (without using VOP_GETPAGE) and load up translations to them.
13697c478bd9Sstevel@tonic-gate  * If softlock is TRUE, then set things up so that it looks like a call
13707c478bd9Sstevel@tonic-gate  * to segmap_fault with F_SOFTLOCK.
13717c478bd9Sstevel@tonic-gate  *
13727c478bd9Sstevel@tonic-gate  * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
13737c478bd9Sstevel@tonic-gate  *
13747c478bd9Sstevel@tonic-gate  * All fields in the generic segment (struct seg) are considered to be
13757c478bd9Sstevel@tonic-gate  * read-only for "segmap" even though the kernel address space (kas) may
13767c478bd9Sstevel@tonic-gate  * not be locked, hence no lock is needed to access them.
13777c478bd9Sstevel@tonic-gate  */
13787c478bd9Sstevel@tonic-gate int
segmap_pagecreate(struct seg * seg,caddr_t addr,size_t len,int softlock)13797c478bd9Sstevel@tonic-gate segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock)
13807c478bd9Sstevel@tonic-gate {
13817c478bd9Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
13827c478bd9Sstevel@tonic-gate 	page_t *pp;
13837c478bd9Sstevel@tonic-gate 	u_offset_t off;
13847c478bd9Sstevel@tonic-gate 	struct smap *smp;
13857c478bd9Sstevel@tonic-gate 	struct vnode *vp;
13867c478bd9Sstevel@tonic-gate 	caddr_t eaddr;
13877c478bd9Sstevel@tonic-gate 	int newpage = 0;
13887c478bd9Sstevel@tonic-gate 	uint_t prot;
13897c478bd9Sstevel@tonic-gate 	kmutex_t *smtx;
13907c478bd9Sstevel@tonic-gate 	int hat_flag;
13917c478bd9Sstevel@tonic-gate 
13927c478bd9Sstevel@tonic-gate 	ASSERT(seg->s_as == &kas);
13937c478bd9Sstevel@tonic-gate 
13947c478bd9Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
13957c478bd9Sstevel@tonic-gate 		/*
13967c478bd9Sstevel@tonic-gate 		 * Pages are successfully prefaulted and locked in
13977c478bd9Sstevel@tonic-gate 		 * segmap_getmapflt and can't be unlocked until
13987c478bd9Sstevel@tonic-gate 		 * segmap_release. The SM_KPM_NEWPAGE flag is set
13997c478bd9Sstevel@tonic-gate 		 * in segmap_pagecreate_kpm when new pages are created.
14007c478bd9Sstevel@tonic-gate 		 * and it is returned as "newpage" indication here.
14017c478bd9Sstevel@tonic-gate 		 */
14027c478bd9Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
14037c478bd9Sstevel@tonic-gate 			panic("segmap_pagecreate: smap not found "
14047c478bd9Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
14057c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
14067c478bd9Sstevel@tonic-gate 		}
14077c478bd9Sstevel@tonic-gate 
14087c478bd9Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
14097c478bd9Sstevel@tonic-gate 		newpage = smp->sm_flags & SM_KPM_NEWPAGE;
14107c478bd9Sstevel@tonic-gate 		smp->sm_flags &= ~SM_KPM_NEWPAGE;
14117c478bd9Sstevel@tonic-gate 		mutex_exit(smtx);
14127c478bd9Sstevel@tonic-gate 
14137c478bd9Sstevel@tonic-gate 		return (newpage);
14147c478bd9Sstevel@tonic-gate 	}
14157c478bd9Sstevel@tonic-gate 
14167c478bd9Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
14177c478bd9Sstevel@tonic-gate 
14187c478bd9Sstevel@tonic-gate 	eaddr = addr + len;
14197c478bd9Sstevel@tonic-gate 	addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
14207c478bd9Sstevel@tonic-gate 
14217c478bd9Sstevel@tonic-gate 	smp = GET_SMAP(seg, addr);
14227c478bd9Sstevel@tonic-gate 
14237c478bd9Sstevel@tonic-gate 	/*
14247c478bd9Sstevel@tonic-gate 	 * We don't grab smp mutex here since we assume the smp
14257c478bd9Sstevel@tonic-gate 	 * has a refcnt set already which prevents the slot from
14267c478bd9Sstevel@tonic-gate 	 * changing its id.
14277c478bd9Sstevel@tonic-gate 	 */
14287c478bd9Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
14297c478bd9Sstevel@tonic-gate 
14307c478bd9Sstevel@tonic-gate 	vp = smp->sm_vp;
14317c478bd9Sstevel@tonic-gate 	off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
14327c478bd9Sstevel@tonic-gate 	prot = smd->smd_prot;
14337c478bd9Sstevel@tonic-gate 
14347c478bd9Sstevel@tonic-gate 	for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
14357c478bd9Sstevel@tonic-gate 		hat_flag = HAT_LOAD;
14367c478bd9Sstevel@tonic-gate 		pp = page_lookup(vp, off, SE_SHARED);
14377c478bd9Sstevel@tonic-gate 		if (pp == NULL) {
14387c478bd9Sstevel@tonic-gate 			ushort_t bitindex;
14397c478bd9Sstevel@tonic-gate 
14407c478bd9Sstevel@tonic-gate 			if ((pp = page_create_va(vp, off,
14417c478bd9Sstevel@tonic-gate 			    PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
14427c478bd9Sstevel@tonic-gate 				panic("segmap_pagecreate: page_create failed");
14437c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
14447c478bd9Sstevel@tonic-gate 			}
14457c478bd9Sstevel@tonic-gate 			newpage = 1;
14467c478bd9Sstevel@tonic-gate 			page_io_unlock(pp);
14477c478bd9Sstevel@tonic-gate 
14487c478bd9Sstevel@tonic-gate 			/*
14497c478bd9Sstevel@tonic-gate 			 * Since pages created here do not contain valid
14507c478bd9Sstevel@tonic-gate 			 * data until the caller writes into them, the
14517c478bd9Sstevel@tonic-gate 			 * "exclusive" lock will not be dropped to prevent
14527c478bd9Sstevel@tonic-gate 			 * other users from accessing the page.  We also
14537c478bd9Sstevel@tonic-gate 			 * have to lock the translation to prevent a fault
1454da6c28aaSamw 			 * from occurring when the virtual address mapped by
14557c478bd9Sstevel@tonic-gate 			 * this page is written into.  This is necessary to
14567c478bd9Sstevel@tonic-gate 			 * avoid a deadlock since we haven't dropped the
14577c478bd9Sstevel@tonic-gate 			 * "exclusive" lock.
14587c478bd9Sstevel@tonic-gate 			 */
14597c478bd9Sstevel@tonic-gate 			bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT);
14607c478bd9Sstevel@tonic-gate 
14617c478bd9Sstevel@tonic-gate 			/*
14627c478bd9Sstevel@tonic-gate 			 * Large Files: The following assertion is to
14637c478bd9Sstevel@tonic-gate 			 * verify the cast above.
14647c478bd9Sstevel@tonic-gate 			 */
14657c478bd9Sstevel@tonic-gate 			ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
14667c478bd9Sstevel@tonic-gate 			smtx = SMAPMTX(smp);
14677c478bd9Sstevel@tonic-gate 			mutex_enter(smtx);
14687c478bd9Sstevel@tonic-gate 			smp->sm_bitmap |= SMAP_BIT_MASK(bitindex);
14697c478bd9Sstevel@tonic-gate 			mutex_exit(smtx);
14707c478bd9Sstevel@tonic-gate 
14717c478bd9Sstevel@tonic-gate 			hat_flag = HAT_LOAD_LOCK;
14727c478bd9Sstevel@tonic-gate 		} else if (softlock) {
14737c478bd9Sstevel@tonic-gate 			hat_flag = HAT_LOAD_LOCK;
14747c478bd9Sstevel@tonic-gate 		}
14757c478bd9Sstevel@tonic-gate 
14767c478bd9Sstevel@tonic-gate 		if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE))
14777c478bd9Sstevel@tonic-gate 			hat_setmod(pp);
14787c478bd9Sstevel@tonic-gate 
14797c478bd9Sstevel@tonic-gate 		hat_memload(kas.a_hat, addr, pp, prot, hat_flag);
14807c478bd9Sstevel@tonic-gate 
14817c478bd9Sstevel@tonic-gate 		if (hat_flag != HAT_LOAD_LOCK)
14827c478bd9Sstevel@tonic-gate 			page_unlock(pp);
14837c478bd9Sstevel@tonic-gate 
14847c478bd9Sstevel@tonic-gate 		TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE,
14857c478bd9Sstevel@tonic-gate 		    "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx",
14867c478bd9Sstevel@tonic-gate 		    seg, addr, pp, vp, off);
14877c478bd9Sstevel@tonic-gate 	}
14887c478bd9Sstevel@tonic-gate 
14897c478bd9Sstevel@tonic-gate 	return (newpage);
14907c478bd9Sstevel@tonic-gate }
14917c478bd9Sstevel@tonic-gate 
14927c478bd9Sstevel@tonic-gate void
segmap_pageunlock(struct seg * seg,caddr_t addr,size_t len,enum seg_rw rw)14937c478bd9Sstevel@tonic-gate segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
14947c478bd9Sstevel@tonic-gate {
14957c478bd9Sstevel@tonic-gate 	struct smap	*smp;
14967c478bd9Sstevel@tonic-gate 	ushort_t	bitmask;
14977c478bd9Sstevel@tonic-gate 	page_t		*pp;
14987c478bd9Sstevel@tonic-gate 	struct	vnode	*vp;
14997c478bd9Sstevel@tonic-gate 	u_offset_t	off;
15007c478bd9Sstevel@tonic-gate 	caddr_t		eaddr;
15017c478bd9Sstevel@tonic-gate 	kmutex_t	*smtx;
15027c478bd9Sstevel@tonic-gate 
15037c478bd9Sstevel@tonic-gate 	ASSERT(seg->s_as == &kas);
15047c478bd9Sstevel@tonic-gate 
15057c478bd9Sstevel@tonic-gate 	eaddr = addr + len;
15067c478bd9Sstevel@tonic-gate 	addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
15077c478bd9Sstevel@tonic-gate 
15087c478bd9Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
15097c478bd9Sstevel@tonic-gate 		/*
15107c478bd9Sstevel@tonic-gate 		 * Pages are successfully prefaulted and locked in
15117c478bd9Sstevel@tonic-gate 		 * segmap_getmapflt and can't be unlocked until
15127c478bd9Sstevel@tonic-gate 		 * segmap_release, so no pages or hat mappings have
15137c478bd9Sstevel@tonic-gate 		 * to be unlocked at this point.
15147c478bd9Sstevel@tonic-gate 		 */
15157c478bd9Sstevel@tonic-gate #ifdef DEBUG
15167c478bd9Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
15177c478bd9Sstevel@tonic-gate 			panic("segmap_pageunlock: smap not found "
15187c478bd9Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
15197c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
15207c478bd9Sstevel@tonic-gate 		}
15217c478bd9Sstevel@tonic-gate 
15227c478bd9Sstevel@tonic-gate 		ASSERT(smp->sm_refcnt > 0);
15237c478bd9Sstevel@tonic-gate 		mutex_exit(SMAPMTX(smp));
15247c478bd9Sstevel@tonic-gate #endif
15257c478bd9Sstevel@tonic-gate 		return;
15267c478bd9Sstevel@tonic-gate 	}
15277c478bd9Sstevel@tonic-gate 
15287c478bd9Sstevel@tonic-gate 	smp = GET_SMAP(seg, addr);
15297c478bd9Sstevel@tonic-gate 	smtx = SMAPMTX(smp);
15307c478bd9Sstevel@tonic-gate 
15317c478bd9Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
15327c478bd9Sstevel@tonic-gate 
15337c478bd9Sstevel@tonic-gate 	vp = smp->sm_vp;
15347c478bd9Sstevel@tonic-gate 	off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
15357c478bd9Sstevel@tonic-gate 
15367c478bd9Sstevel@tonic-gate 	for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
15377c478bd9Sstevel@tonic-gate 		bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT);
15387c478bd9Sstevel@tonic-gate 
15397c478bd9Sstevel@tonic-gate 		/*
15407c478bd9Sstevel@tonic-gate 		 * Large Files: Following assertion is to verify
15417c478bd9Sstevel@tonic-gate 		 * the correctness of the cast to (int) above.
15427c478bd9Sstevel@tonic-gate 		 */
15437c478bd9Sstevel@tonic-gate 		ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
15447c478bd9Sstevel@tonic-gate 
15457c478bd9Sstevel@tonic-gate 		/*
15467c478bd9Sstevel@tonic-gate 		 * If the bit corresponding to "off" is set,
15477c478bd9Sstevel@tonic-gate 		 * clear this bit in the bitmap, unlock translations,
15487c478bd9Sstevel@tonic-gate 		 * and release the "exclusive" lock on the page.
15497c478bd9Sstevel@tonic-gate 		 */
15507c478bd9Sstevel@tonic-gate 		if (smp->sm_bitmap & bitmask) {
15517c478bd9Sstevel@tonic-gate 			mutex_enter(smtx);
15527c478bd9Sstevel@tonic-gate 			smp->sm_bitmap &= ~bitmask;
15537c478bd9Sstevel@tonic-gate 			mutex_exit(smtx);
15547c478bd9Sstevel@tonic-gate 
15557c478bd9Sstevel@tonic-gate 			hat_unlock(kas.a_hat, addr, PAGESIZE);
15567c478bd9Sstevel@tonic-gate 
15577c478bd9Sstevel@tonic-gate 			/*
15587c478bd9Sstevel@tonic-gate 			 * Use page_find() instead of page_lookup() to
15597c478bd9Sstevel@tonic-gate 			 * find the page since we know that it has
15607c478bd9Sstevel@tonic-gate 			 * "exclusive" lock.
15617c478bd9Sstevel@tonic-gate 			 */
15627c478bd9Sstevel@tonic-gate 			pp = page_find(vp, off);
15637c478bd9Sstevel@tonic-gate 			if (pp == NULL) {
15647c478bd9Sstevel@tonic-gate 				panic("segmap_pageunlock: page not found");
15657c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
15667c478bd9Sstevel@tonic-gate 			}
15677c478bd9Sstevel@tonic-gate 			if (rw == S_WRITE) {
15687c478bd9Sstevel@tonic-gate 				hat_setrefmod(pp);
15697c478bd9Sstevel@tonic-gate 			} else if (rw != S_OTHER) {
15707c478bd9Sstevel@tonic-gate 				hat_setref(pp);
15717c478bd9Sstevel@tonic-gate 			}
15727c478bd9Sstevel@tonic-gate 
15737c478bd9Sstevel@tonic-gate 			page_unlock(pp);
15747c478bd9Sstevel@tonic-gate 		}
15757c478bd9Sstevel@tonic-gate 	}
15767c478bd9Sstevel@tonic-gate }
15777c478bd9Sstevel@tonic-gate 
15787c478bd9Sstevel@tonic-gate caddr_t
segmap_getmap(struct seg * seg,struct vnode * vp,u_offset_t off)15797c478bd9Sstevel@tonic-gate segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off)
15807c478bd9Sstevel@tonic-gate {
15817c478bd9Sstevel@tonic-gate 	return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER));
15827c478bd9Sstevel@tonic-gate }
15837c478bd9Sstevel@tonic-gate 
15847c478bd9Sstevel@tonic-gate /*
15857c478bd9Sstevel@tonic-gate  * This is the magic virtual address that offset 0 of an ELF
15867c478bd9Sstevel@tonic-gate  * file gets mapped to in user space. This is used to pick
15877c478bd9Sstevel@tonic-gate  * the vac color on the freelist.
15887c478bd9Sstevel@tonic-gate  */
15897c478bd9Sstevel@tonic-gate #define	ELF_OFFZERO_VA	(0x10000)
15907c478bd9Sstevel@tonic-gate /*
15917c478bd9Sstevel@tonic-gate  * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
15927c478bd9Sstevel@tonic-gate  * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
15937c478bd9Sstevel@tonic-gate  * The return address is  always MAXBSIZE aligned.
15947c478bd9Sstevel@tonic-gate  *
15957c478bd9Sstevel@tonic-gate  * If forcefault is nonzero and the MMU translations haven't yet been created,
15967c478bd9Sstevel@tonic-gate  * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
15977c478bd9Sstevel@tonic-gate  */
15987c478bd9Sstevel@tonic-gate caddr_t
segmap_getmapflt(struct seg * seg,struct vnode * vp,u_offset_t off,size_t len,int forcefault,enum seg_rw rw)15997c478bd9Sstevel@tonic-gate segmap_getmapflt(
16007c478bd9Sstevel@tonic-gate 	struct seg *seg,
16017c478bd9Sstevel@tonic-gate 	struct vnode *vp,
16027c478bd9Sstevel@tonic-gate 	u_offset_t off,
16037c478bd9Sstevel@tonic-gate 	size_t len,
16047c478bd9Sstevel@tonic-gate 	int forcefault,
16057c478bd9Sstevel@tonic-gate 	enum seg_rw rw)
16067c478bd9Sstevel@tonic-gate {
16077c478bd9Sstevel@tonic-gate 	struct smap *smp, *nsmp;
16087c478bd9Sstevel@tonic-gate 	extern struct vnode *common_specvp();
16097c478bd9Sstevel@tonic-gate 	caddr_t baseaddr;			/* MAXBSIZE aligned */
16107c478bd9Sstevel@tonic-gate 	u_offset_t baseoff;
16117c478bd9Sstevel@tonic-gate 	int newslot;
16127c478bd9Sstevel@tonic-gate 	caddr_t vaddr;
16137c478bd9Sstevel@tonic-gate 	int color, hashid;
16147c478bd9Sstevel@tonic-gate 	kmutex_t *hashmtx, *smapmtx;
16157c478bd9Sstevel@tonic-gate 	struct smfree *sm;
16167c478bd9Sstevel@tonic-gate 	page_t	*pp;
16177c478bd9Sstevel@tonic-gate 	struct kpme *kpme;
16187c478bd9Sstevel@tonic-gate 	uint_t	prot;
16197c478bd9Sstevel@tonic-gate 	caddr_t base;
16207c478bd9Sstevel@tonic-gate 	page_t	*pl[MAXPPB + 1];
16217c478bd9Sstevel@tonic-gate 	int	error;
16227c478bd9Sstevel@tonic-gate 	int	is_kpm = 1;
16237c478bd9Sstevel@tonic-gate 
16247c478bd9Sstevel@tonic-gate 	ASSERT(seg->s_as == &kas);
16257c478bd9Sstevel@tonic-gate 	ASSERT(seg == segkmap);
16267c478bd9Sstevel@tonic-gate 
16277c478bd9Sstevel@tonic-gate 	baseoff = off & (offset_t)MAXBMASK;
16287c478bd9Sstevel@tonic-gate 	if (off + len > baseoff + MAXBSIZE) {
16297c478bd9Sstevel@tonic-gate 		panic("segmap_getmap bad len");
16307c478bd9Sstevel@tonic-gate 		/*NOTREACHED*/
16317c478bd9Sstevel@tonic-gate 	}
16327c478bd9Sstevel@tonic-gate 
16337c478bd9Sstevel@tonic-gate 	/*
16347c478bd9Sstevel@tonic-gate 	 * If this is a block device we have to be sure to use the
16357c478bd9Sstevel@tonic-gate 	 * "common" block device vnode for the mapping.
16367c478bd9Sstevel@tonic-gate 	 */
16377c478bd9Sstevel@tonic-gate 	if (vp->v_type == VBLK)
16387c478bd9Sstevel@tonic-gate 		vp = common_specvp(vp);
16397c478bd9Sstevel@tonic-gate 
16407c478bd9Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++;
16417c478bd9Sstevel@tonic-gate 
16427c478bd9Sstevel@tonic-gate 	if (segmap_kpm == 0 ||
16437c478bd9Sstevel@tonic-gate 	    (forcefault == SM_PAGECREATE && rw != S_WRITE)) {
16447c478bd9Sstevel@tonic-gate 		is_kpm = 0;
16457c478bd9Sstevel@tonic-gate 	}
16467c478bd9Sstevel@tonic-gate 
16477c478bd9Sstevel@tonic-gate 	SMAP_HASHFUNC(vp, off, hashid);	/* macro assigns hashid */
16487c478bd9Sstevel@tonic-gate 	hashmtx = SHASHMTX(hashid);
16497c478bd9Sstevel@tonic-gate 
16507c478bd9Sstevel@tonic-gate retry_hash:
16517c478bd9Sstevel@tonic-gate 	mutex_enter(hashmtx);
16527c478bd9Sstevel@tonic-gate 	for (smp = smd_hash[hashid].sh_hash_list;
16537c478bd9Sstevel@tonic-gate 	    smp != NULL; smp = smp->sm_hash)
16547c478bd9Sstevel@tonic-gate 		if (smp->sm_vp == vp && smp->sm_off == baseoff)
16557c478bd9Sstevel@tonic-gate 			break;
16567c478bd9Sstevel@tonic-gate 	mutex_exit(hashmtx);
16577c478bd9Sstevel@tonic-gate 
16587c478bd9Sstevel@tonic-gate vrfy_smp:
16597c478bd9Sstevel@tonic-gate 	if (smp != NULL) {
16607c478bd9Sstevel@tonic-gate 
16617c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_count != 0);
16627c478bd9Sstevel@tonic-gate 
16637c478bd9Sstevel@tonic-gate 		/*
16647c478bd9Sstevel@tonic-gate 		 * Get smap lock and recheck its tag. The hash lock
16657c478bd9Sstevel@tonic-gate 		 * is dropped since the hash is based on (vp, off)
16667c478bd9Sstevel@tonic-gate 		 * and (vp, off) won't change when we have smap mtx.
16677c478bd9Sstevel@tonic-gate 		 */
16687c478bd9Sstevel@tonic-gate 		smapmtx = SMAPMTX(smp);
16697c478bd9Sstevel@tonic-gate 		mutex_enter(smapmtx);
16707c478bd9Sstevel@tonic-gate 		if (smp->sm_vp != vp || smp->sm_off != baseoff) {
16717c478bd9Sstevel@tonic-gate 			mutex_exit(smapmtx);
16727c478bd9Sstevel@tonic-gate 			goto retry_hash;
16737c478bd9Sstevel@tonic-gate 		}
16747c478bd9Sstevel@tonic-gate 
16757c478bd9Sstevel@tonic-gate 		if (smp->sm_refcnt == 0) {
16767c478bd9Sstevel@tonic-gate 
16777c478bd9Sstevel@tonic-gate 			smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++;
16787c478bd9Sstevel@tonic-gate 
16797c478bd9Sstevel@tonic-gate 			/*
16807c478bd9Sstevel@tonic-gate 			 * Could still be on the free list. However, this
16817c478bd9Sstevel@tonic-gate 			 * could also be an smp that is transitioning from
16827c478bd9Sstevel@tonic-gate 			 * the free list when we have too much contention
16837c478bd9Sstevel@tonic-gate 			 * for the smapmtx's. In this case, we have an
16847c478bd9Sstevel@tonic-gate 			 * unlocked smp that is not on the free list any
16857c478bd9Sstevel@tonic-gate 			 * longer, but still has a 0 refcnt.  The only way
16867c478bd9Sstevel@tonic-gate 			 * to be sure is to check the freelist pointers.
16877c478bd9Sstevel@tonic-gate 			 * Since we now have the smapmtx, we are guaranteed
16887c478bd9Sstevel@tonic-gate 			 * that the (vp, off) won't change, so we are safe
16897c478bd9Sstevel@tonic-gate 			 * to reclaim it.  get_free_smp() knows that this
16907c478bd9Sstevel@tonic-gate 			 * can happen, and it will check the refcnt.
16917c478bd9Sstevel@tonic-gate 			 */
16927c478bd9Sstevel@tonic-gate 
16937c478bd9Sstevel@tonic-gate 			if ((smp->sm_next != NULL)) {
16947c478bd9Sstevel@tonic-gate 				struct sm_freeq *freeq;
16957c478bd9Sstevel@tonic-gate 
16967c478bd9Sstevel@tonic-gate 				ASSERT(smp->sm_prev != NULL);
16977c478bd9Sstevel@tonic-gate 				sm = &smd_free[smp->sm_free_ndx];
16987c478bd9Sstevel@tonic-gate 
16997c478bd9Sstevel@tonic-gate 				if (smp->sm_flags & SM_QNDX_ZERO)
17007c478bd9Sstevel@tonic-gate 					freeq = &sm->sm_freeq[0];
17017c478bd9Sstevel@tonic-gate 				else
17027c478bd9Sstevel@tonic-gate 					freeq = &sm->sm_freeq[1];
17037c478bd9Sstevel@tonic-gate 
17047c478bd9Sstevel@tonic-gate 				mutex_enter(&freeq->smq_mtx);
17057c478bd9Sstevel@tonic-gate 				if (freeq->smq_free != smp) {
17067c478bd9Sstevel@tonic-gate 					/*
17077c478bd9Sstevel@tonic-gate 					 * fastpath normal case
17087c478bd9Sstevel@tonic-gate 					 */
17097c478bd9Sstevel@tonic-gate 					smp->sm_prev->sm_next = smp->sm_next;
17107c478bd9Sstevel@tonic-gate 					smp->sm_next->sm_prev = smp->sm_prev;
17117c478bd9Sstevel@tonic-gate 				} else if (smp == smp->sm_next) {
17127c478bd9Sstevel@tonic-gate 					/*
17137c478bd9Sstevel@tonic-gate 					 * Taking the last smap on freelist
17147c478bd9Sstevel@tonic-gate 					 */
17157c478bd9Sstevel@tonic-gate 					freeq->smq_free = NULL;
17167c478bd9Sstevel@tonic-gate 				} else {
17177c478bd9Sstevel@tonic-gate 					/*
17187c478bd9Sstevel@tonic-gate 					 * Reclaiming 1st smap on list
17197c478bd9Sstevel@tonic-gate 					 */
17207c478bd9Sstevel@tonic-gate 					freeq->smq_free = smp->sm_next;
17217c478bd9Sstevel@tonic-gate 					smp->sm_prev->sm_next = smp->sm_next;
17227c478bd9Sstevel@tonic-gate 					smp->sm_next->sm_prev = smp->sm_prev;
17237c478bd9Sstevel@tonic-gate 				}
17247c478bd9Sstevel@tonic-gate 				mutex_exit(&freeq->smq_mtx);
17257c478bd9Sstevel@tonic-gate 				smp->sm_prev = smp->sm_next = NULL;
17267c478bd9Sstevel@tonic-gate 			} else {
17277c478bd9Sstevel@tonic-gate 				ASSERT(smp->sm_prev == NULL);
17287c478bd9Sstevel@tonic-gate 				segmapcnt.smp_stolen.value.ul++;
17297c478bd9Sstevel@tonic-gate 			}
17307c478bd9Sstevel@tonic-gate 
17317c478bd9Sstevel@tonic-gate 		} else {
17327c478bd9Sstevel@tonic-gate 			segmapcnt.smp_get_use.value.ul++;
17337c478bd9Sstevel@tonic-gate 		}
17347c478bd9Sstevel@tonic-gate 		smp->sm_refcnt++;		/* another user */
17357c478bd9Sstevel@tonic-gate 
17367c478bd9Sstevel@tonic-gate 		/*
17377c478bd9Sstevel@tonic-gate 		 * We don't invoke segmap_fault via TLB miss, so we set ref
17387c478bd9Sstevel@tonic-gate 		 * and mod bits in advance. For S_OTHER  we set them in
17397c478bd9Sstevel@tonic-gate 		 * segmap_fault F_SOFTUNLOCK.
17407c478bd9Sstevel@tonic-gate 		 */
17417c478bd9Sstevel@tonic-gate 		if (is_kpm) {
17427c478bd9Sstevel@tonic-gate 			if (rw == S_WRITE) {
17437c478bd9Sstevel@tonic-gate 				smp->sm_flags |= SM_WRITE_DATA;
17447c478bd9Sstevel@tonic-gate 			} else if (rw == S_READ) {
17457c478bd9Sstevel@tonic-gate 				smp->sm_flags |= SM_READ_DATA;
17467c478bd9Sstevel@tonic-gate 			}
17477c478bd9Sstevel@tonic-gate 		}
17487c478bd9Sstevel@tonic-gate 		mutex_exit(smapmtx);
17497c478bd9Sstevel@tonic-gate 
17507c478bd9Sstevel@tonic-gate 		newslot = 0;
17517c478bd9Sstevel@tonic-gate 	} else {
17527c478bd9Sstevel@tonic-gate 
17537c478bd9Sstevel@tonic-gate 		uint32_t free_ndx, *free_ndxp;
17547c478bd9Sstevel@tonic-gate 		union segmap_cpu *scpu;
17557c478bd9Sstevel@tonic-gate 
17567c478bd9Sstevel@tonic-gate 		/*
17577c478bd9Sstevel@tonic-gate 		 * On a PAC machine or a machine with anti-alias
17587c478bd9Sstevel@tonic-gate 		 * hardware, smd_colormsk will be zero.
17597c478bd9Sstevel@tonic-gate 		 *
17607c478bd9Sstevel@tonic-gate 		 * On a VAC machine- pick color by offset in the file
17617c478bd9Sstevel@tonic-gate 		 * so we won't get VAC conflicts on elf files.
17627c478bd9Sstevel@tonic-gate 		 * On data files, color does not matter but we
17637c478bd9Sstevel@tonic-gate 		 * don't know what kind of file it is so we always
17647c478bd9Sstevel@tonic-gate 		 * pick color by offset. This causes color
17657c478bd9Sstevel@tonic-gate 		 * corresponding to file offset zero to be used more
17667c478bd9Sstevel@tonic-gate 		 * heavily.
17677c478bd9Sstevel@tonic-gate 		 */
17687c478bd9Sstevel@tonic-gate 		color = (baseoff >> MAXBSHIFT) & smd_colormsk;
17697c478bd9Sstevel@tonic-gate 		scpu = smd_cpu+CPU->cpu_seqid;
17707c478bd9Sstevel@tonic-gate 		free_ndxp = &scpu->scpu.scpu_free_ndx[color];
17717c478bd9Sstevel@tonic-gate 		free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk;
17727c478bd9Sstevel@tonic-gate #ifdef DEBUG
17737c478bd9Sstevel@tonic-gate 		colors_used[free_ndx]++;
17747c478bd9Sstevel@tonic-gate #endif /* DEBUG */
17757c478bd9Sstevel@tonic-gate 
17767c478bd9Sstevel@tonic-gate 		/*
17777c478bd9Sstevel@tonic-gate 		 * Get a locked smp slot from the free list.
17787c478bd9Sstevel@tonic-gate 		 */
17797c478bd9Sstevel@tonic-gate 		smp = get_free_smp(free_ndx);
17807c478bd9Sstevel@tonic-gate 		smapmtx = SMAPMTX(smp);
17817c478bd9Sstevel@tonic-gate 
17827c478bd9Sstevel@tonic-gate 		ASSERT(smp->sm_vp == NULL);
17837c478bd9Sstevel@tonic-gate 
17847c478bd9Sstevel@tonic-gate 		if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) {
17857c478bd9Sstevel@tonic-gate 			/*
17867c478bd9Sstevel@tonic-gate 			 * Failed to hashin, there exists one now.
17877c478bd9Sstevel@tonic-gate 			 * Return the smp we just allocated.
17887c478bd9Sstevel@tonic-gate 			 */
17897c478bd9Sstevel@tonic-gate 			segmap_smapadd(smp);
17907c478bd9Sstevel@tonic-gate 			mutex_exit(smapmtx);
17917c478bd9Sstevel@tonic-gate 
17927c478bd9Sstevel@tonic-gate 			smp = nsmp;
17937c478bd9Sstevel@tonic-gate 			goto vrfy_smp;
17947c478bd9Sstevel@tonic-gate 		}
17957c478bd9Sstevel@tonic-gate 		smp->sm_refcnt++;		/* another user */
17967c478bd9Sstevel@tonic-gate 
17977c478bd9Sstevel@tonic-gate 		/*
17987c478bd9Sstevel@tonic-gate 		 * We don't invoke segmap_fault via TLB miss, so we set ref
17997c478bd9Sstevel@tonic-gate 		 * and mod bits in advance. For S_OTHER  we set them in
18007c478bd9Sstevel@tonic-gate 		 * segmap_fault F_SOFTUNLOCK.
18017c478bd9Sstevel@tonic-gate 		 */
18027c478bd9Sstevel@tonic-gate 		if (is_kpm) {
18037c478bd9Sstevel@tonic-gate 			if (rw == S_WRITE) {
18047c478bd9Sstevel@tonic-gate 				smp->sm_flags |= SM_WRITE_DATA;
18057c478bd9Sstevel@tonic-gate 			} else if (rw == S_READ) {
18067c478bd9Sstevel@tonic-gate 				smp->sm_flags |= SM_READ_DATA;
18077c478bd9Sstevel@tonic-gate 			}
18087c478bd9Sstevel@tonic-gate 		}
18097c478bd9Sstevel@tonic-gate 		mutex_exit(smapmtx);
18107c478bd9Sstevel@tonic-gate 
18117c478bd9Sstevel@tonic-gate 		newslot = 1;
18127c478bd9Sstevel@tonic-gate 	}
18137c478bd9Sstevel@tonic-gate 
18147c478bd9Sstevel@tonic-gate 	if (!is_kpm)
18157c478bd9Sstevel@tonic-gate 		goto use_segmap_range;
18167c478bd9Sstevel@tonic-gate 
18177c478bd9Sstevel@tonic-gate 	/*
18187c478bd9Sstevel@tonic-gate 	 * Use segkpm
18197c478bd9Sstevel@tonic-gate 	 */
18208793b36bSNick Todd 	/* Lint directive required until 6746211 is fixed */
18218793b36bSNick Todd 	/*CONSTCOND*/
18227c478bd9Sstevel@tonic-gate 	ASSERT(PAGESIZE == MAXBSIZE);
18237c478bd9Sstevel@tonic-gate 
18247c478bd9Sstevel@tonic-gate 	/*
18257c478bd9Sstevel@tonic-gate 	 * remember the last smp faulted on this cpu.
18267c478bd9Sstevel@tonic-gate 	 */
18277c478bd9Sstevel@tonic-gate 	(smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp;
18287c478bd9Sstevel@tonic-gate 
18297c478bd9Sstevel@tonic-gate 	if (forcefault == SM_PAGECREATE) {
18307c478bd9Sstevel@tonic-gate 		baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw);
18317c478bd9Sstevel@tonic-gate 		return (baseaddr);
18327c478bd9Sstevel@tonic-gate 	}
18337c478bd9Sstevel@tonic-gate 
18347c478bd9Sstevel@tonic-gate 	if (newslot == 0 &&
18357c478bd9Sstevel@tonic-gate 	    (pp = GET_KPME(smp)->kpe_page) != NULL) {
18367c478bd9Sstevel@tonic-gate 
18377c478bd9Sstevel@tonic-gate 		/* fastpath */
18387c478bd9Sstevel@tonic-gate 		switch (rw) {
18397c478bd9Sstevel@tonic-gate 		case S_READ:
18407c478bd9Sstevel@tonic-gate 		case S_WRITE:
18417c478bd9Sstevel@tonic-gate 			if (page_trylock(pp, SE_SHARED)) {
18427c478bd9Sstevel@tonic-gate 				if (PP_ISFREE(pp) ||
18437c478bd9Sstevel@tonic-gate 				    !(pp->p_vnode == vp &&
18447c478bd9Sstevel@tonic-gate 				    pp->p_offset == baseoff)) {
18457c478bd9Sstevel@tonic-gate 					page_unlock(pp);
18467c478bd9Sstevel@tonic-gate 					pp = page_lookup(vp, baseoff,
18477c478bd9Sstevel@tonic-gate 					    SE_SHARED);
18487c478bd9Sstevel@tonic-gate 				}
18497c478bd9Sstevel@tonic-gate 			} else {
18507c478bd9Sstevel@tonic-gate 				pp = page_lookup(vp, baseoff, SE_SHARED);
18517c478bd9Sstevel@tonic-gate 			}
18527c478bd9Sstevel@tonic-gate 
18537c478bd9Sstevel@tonic-gate 			if (pp == NULL) {
18547c478bd9Sstevel@tonic-gate 				ASSERT(GET_KPME(smp)->kpe_page == NULL);
18557c478bd9Sstevel@tonic-gate 				break;
18567c478bd9Sstevel@tonic-gate 			}
18577c478bd9Sstevel@tonic-gate 
18587c478bd9Sstevel@tonic-gate 			if (rw == S_WRITE &&
18597c478bd9Sstevel@tonic-gate 			    hat_page_getattr(pp, P_MOD | P_REF) !=
18607c478bd9Sstevel@tonic-gate 			    (P_MOD | P_REF)) {
18617c478bd9Sstevel@tonic-gate 				page_unlock(pp);
18627c478bd9Sstevel@tonic-gate 				break;
18637c478bd9Sstevel@tonic-gate 			}
18647c478bd9Sstevel@tonic-gate 
18657c478bd9Sstevel@tonic-gate 			/*
18667c478bd9Sstevel@tonic-gate 			 * We have the p_selock as reader, grab_smp
18677c478bd9Sstevel@tonic-gate 			 * can't hit us, we have bumped the smap
18687c478bd9Sstevel@tonic-gate 			 * refcnt and hat_pageunload needs the
18697c478bd9Sstevel@tonic-gate 			 * p_selock exclusive.
18707c478bd9Sstevel@tonic-gate 			 */
18717c478bd9Sstevel@tonic-gate 			kpme = GET_KPME(smp);
18727c478bd9Sstevel@tonic-gate 			if (kpme->kpe_page == pp) {
18737c478bd9Sstevel@tonic-gate 				baseaddr = hat_kpm_page2va(pp, 0);
18747c478bd9Sstevel@tonic-gate 			} else if (kpme->kpe_page == NULL) {
18757c478bd9Sstevel@tonic-gate 				baseaddr = hat_kpm_mapin(pp, kpme);
18767c478bd9Sstevel@tonic-gate 			} else {
18777c478bd9Sstevel@tonic-gate 				panic("segmap_getmapflt: stale "
18787c478bd9Sstevel@tonic-gate 				    "kpme page, kpme %p", (void *)kpme);
18797c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
18807c478bd9Sstevel@tonic-gate 			}
18817c478bd9Sstevel@tonic-gate 
18827c478bd9Sstevel@tonic-gate 			/*
18837c478bd9Sstevel@tonic-gate 			 * We don't invoke segmap_fault via TLB miss,
18847c478bd9Sstevel@tonic-gate 			 * so we set ref and mod bits in advance.
18857c478bd9Sstevel@tonic-gate 			 * For S_OTHER and we set them in segmap_fault
18867c478bd9Sstevel@tonic-gate 			 * F_SOFTUNLOCK.
18877c478bd9Sstevel@tonic-gate 			 */
18887c478bd9Sstevel@tonic-gate 			if (rw == S_READ && !hat_isref(pp))
18897c478bd9Sstevel@tonic-gate 				hat_setref(pp);
18907c478bd9Sstevel@tonic-gate 
18917c478bd9Sstevel@tonic-gate 			return (baseaddr);
18927c478bd9Sstevel@tonic-gate 		default:
18937c478bd9Sstevel@tonic-gate 			break;
18947c478bd9Sstevel@tonic-gate 		}
18957c478bd9Sstevel@tonic-gate 	}
18967c478bd9Sstevel@tonic-gate 
18977c478bd9Sstevel@tonic-gate 	base = segkpm_create_va(baseoff);
18987c478bd9Sstevel@tonic-gate 	error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE,
1899da6c28aaSamw 	    seg, base, rw, CRED(), NULL);
19007c478bd9Sstevel@tonic-gate 
19017c478bd9Sstevel@tonic-gate 	pp = pl[0];
19027c478bd9Sstevel@tonic-gate 	if (error || pp == NULL) {
19037c478bd9Sstevel@tonic-gate 		/*
19047c478bd9Sstevel@tonic-gate 		 * Use segmap address slot and let segmap_fault deal
19057c478bd9Sstevel@tonic-gate 		 * with the error cases. There is no error return
19067c478bd9Sstevel@tonic-gate 		 * possible here.
19077c478bd9Sstevel@tonic-gate 		 */
19087c478bd9Sstevel@tonic-gate 		goto use_segmap_range;
19097c478bd9Sstevel@tonic-gate 	}
19107c478bd9Sstevel@tonic-gate 
19117c478bd9Sstevel@tonic-gate 	ASSERT(pl[1] == NULL);
19127c478bd9Sstevel@tonic-gate 
19137c478bd9Sstevel@tonic-gate 	/*
19147c478bd9Sstevel@tonic-gate 	 * When prot is not returned w/ PROT_ALL the returned pages
19157c478bd9Sstevel@tonic-gate 	 * are not backed by fs blocks. For most of the segmap users
19167c478bd9Sstevel@tonic-gate 	 * this is no problem, they don't write to the pages in the
19177c478bd9Sstevel@tonic-gate 	 * same request and therefore don't rely on a following
19187c478bd9Sstevel@tonic-gate 	 * trap driven segmap_fault. With SM_LOCKPROTO users it
19197c478bd9Sstevel@tonic-gate 	 * is more secure to use segkmap adresses to allow
19207c478bd9Sstevel@tonic-gate 	 * protection segmap_fault's.
19217c478bd9Sstevel@tonic-gate 	 */
19227c478bd9Sstevel@tonic-gate 	if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) {
19237c478bd9Sstevel@tonic-gate 		/*
19247c478bd9Sstevel@tonic-gate 		 * Use segmap address slot and let segmap_fault
19257c478bd9Sstevel@tonic-gate 		 * do the error return.
19267c478bd9Sstevel@tonic-gate 		 */
19277c478bd9Sstevel@tonic-gate 		ASSERT(rw != S_WRITE);
19287c478bd9Sstevel@tonic-gate 		ASSERT(PAGE_LOCKED(pp));
19297c478bd9Sstevel@tonic-gate 		page_unlock(pp);
19307c478bd9Sstevel@tonic-gate 		forcefault = 0;
19317c478bd9Sstevel@tonic-gate 		goto use_segmap_range;
19327c478bd9Sstevel@tonic-gate 	}
19337c478bd9Sstevel@tonic-gate 
19347c478bd9Sstevel@tonic-gate 	/*
19357c478bd9Sstevel@tonic-gate 	 * We have the p_selock as reader, grab_smp can't hit us, we
19367c478bd9Sstevel@tonic-gate 	 * have bumped the smap refcnt and hat_pageunload needs the
19377c478bd9Sstevel@tonic-gate 	 * p_selock exclusive.
19387c478bd9Sstevel@tonic-gate 	 */
19397c478bd9Sstevel@tonic-gate 	kpme = GET_KPME(smp);
19407c478bd9Sstevel@tonic-gate 	if (kpme->kpe_page == pp) {
19417c478bd9Sstevel@tonic-gate 		baseaddr = hat_kpm_page2va(pp, 0);
19427c478bd9Sstevel@tonic-gate 	} else if (kpme->kpe_page == NULL) {
19437c478bd9Sstevel@tonic-gate 		baseaddr = hat_kpm_mapin(pp, kpme);
19447c478bd9Sstevel@tonic-gate 	} else {
19457c478bd9Sstevel@tonic-gate 		panic("segmap_getmapflt: stale kpme page after "
19467c478bd9Sstevel@tonic-gate 		    "VOP_GETPAGE, kpme %p", (void *)kpme);
19477c478bd9Sstevel@tonic-gate 		/*NOTREACHED*/
19487c478bd9Sstevel@tonic-gate 	}
19497c478bd9Sstevel@tonic-gate 
19507c478bd9Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
19517c478bd9Sstevel@tonic-gate 
19527c478bd9Sstevel@tonic-gate 	return (baseaddr);
19537c478bd9Sstevel@tonic-gate 
19547c478bd9Sstevel@tonic-gate 
19557c478bd9Sstevel@tonic-gate use_segmap_range:
19567c478bd9Sstevel@tonic-gate 	baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE);
19577c478bd9Sstevel@tonic-gate 	TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP,
19587c478bd9Sstevel@tonic-gate 	    "segmap_getmap:seg %p addr %p vp %p offset %llx",
19597c478bd9Sstevel@tonic-gate 	    seg, baseaddr, vp, baseoff);
19607c478bd9Sstevel@tonic-gate 
19617c478bd9Sstevel@tonic-gate 	/*
19627c478bd9Sstevel@tonic-gate 	 * Prefault the translations
19637c478bd9Sstevel@tonic-gate 	 */
19647c478bd9Sstevel@tonic-gate 	vaddr = baseaddr + (off - baseoff);
19657c478bd9Sstevel@tonic-gate 	if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) {
19667c478bd9Sstevel@tonic-gate 
19677c478bd9Sstevel@tonic-gate 		caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr &
19687c478bd9Sstevel@tonic-gate 		    (uintptr_t)PAGEMASK);
19697c478bd9Sstevel@tonic-gate 
19707c478bd9Sstevel@tonic-gate 		(void) segmap_fault(kas.a_hat, seg, pgaddr,
19717c478bd9Sstevel@tonic-gate 		    (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK,
19727c478bd9Sstevel@tonic-gate 		    F_INVAL, rw);
19737c478bd9Sstevel@tonic-gate 	}
19747c478bd9Sstevel@tonic-gate 
19757c478bd9Sstevel@tonic-gate 	return (baseaddr);
19767c478bd9Sstevel@tonic-gate }
19777c478bd9Sstevel@tonic-gate 
19787c478bd9Sstevel@tonic-gate int
segmap_release(struct seg * seg,caddr_t addr,uint_t flags)19797c478bd9Sstevel@tonic-gate segmap_release(struct seg *seg, caddr_t addr, uint_t flags)
19807c478bd9Sstevel@tonic-gate {
19817c478bd9Sstevel@tonic-gate 	struct smap	*smp;
19827c478bd9Sstevel@tonic-gate 	int 		error;
19837c478bd9Sstevel@tonic-gate 	int		bflags = 0;
19847c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
19857c478bd9Sstevel@tonic-gate 	u_offset_t	offset;
19867c478bd9Sstevel@tonic-gate 	kmutex_t	*smtx;
19877c478bd9Sstevel@tonic-gate 	int		is_kpm = 0;
19887c478bd9Sstevel@tonic-gate 	page_t		*pp;
19897c478bd9Sstevel@tonic-gate 
19907c478bd9Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
19917c478bd9Sstevel@tonic-gate 
19927c478bd9Sstevel@tonic-gate 		if (((uintptr_t)addr & MAXBOFFSET) != 0) {
19937c478bd9Sstevel@tonic-gate 			panic("segmap_release: addr %p not "
19947c478bd9Sstevel@tonic-gate 			    "MAXBSIZE aligned", (void *)addr);
19957c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
19967c478bd9Sstevel@tonic-gate 		}
19977c478bd9Sstevel@tonic-gate 
19987c478bd9Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, &pp)) == NULL) {
19997c478bd9Sstevel@tonic-gate 			panic("segmap_release: smap not found "
20007c478bd9Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
20017c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
20027c478bd9Sstevel@tonic-gate 		}
20037c478bd9Sstevel@tonic-gate 
20047c478bd9Sstevel@tonic-gate 		TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
20057c478bd9Sstevel@tonic-gate 		    "segmap_relmap:seg %p addr %p smp %p",
20067c478bd9Sstevel@tonic-gate 		    seg, addr, smp);
20077c478bd9Sstevel@tonic-gate 
20087c478bd9Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
20097c478bd9Sstevel@tonic-gate 
20107c478bd9Sstevel@tonic-gate 		/*
2011da6c28aaSamw 		 * For compatibility reasons segmap_pagecreate_kpm sets this
20127c478bd9Sstevel@tonic-gate 		 * flag to allow a following segmap_pagecreate to return
20137c478bd9Sstevel@tonic-gate 		 * this as "newpage" flag. When segmap_pagecreate is not
20147c478bd9Sstevel@tonic-gate 		 * called at all we clear it now.
20157c478bd9Sstevel@tonic-gate 		 */
20167c478bd9Sstevel@tonic-gate 		smp->sm_flags &= ~SM_KPM_NEWPAGE;
20177c478bd9Sstevel@tonic-gate 		is_kpm = 1;
20187c478bd9Sstevel@tonic-gate 		if (smp->sm_flags & SM_WRITE_DATA) {
20197c478bd9Sstevel@tonic-gate 			hat_setrefmod(pp);
20207c478bd9Sstevel@tonic-gate 		} else if (smp->sm_flags & SM_READ_DATA) {
20217c478bd9Sstevel@tonic-gate 			hat_setref(pp);
20227c478bd9Sstevel@tonic-gate 		}
20237c478bd9Sstevel@tonic-gate 	} else {
20247c478bd9Sstevel@tonic-gate 		if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
20257c478bd9Sstevel@tonic-gate 		    ((uintptr_t)addr & MAXBOFFSET) != 0) {
20267c478bd9Sstevel@tonic-gate 			panic("segmap_release: bad addr %p", (void *)addr);
20277c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
20287c478bd9Sstevel@tonic-gate 		}
20297c478bd9Sstevel@tonic-gate 		smp = GET_SMAP(seg, addr);
20307c478bd9Sstevel@tonic-gate 
20317c478bd9Sstevel@tonic-gate 		TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
20327c478bd9Sstevel@tonic-gate 		    "segmap_relmap:seg %p addr %p smp %p",
20337c478bd9Sstevel@tonic-gate 		    seg, addr, smp);
20347c478bd9Sstevel@tonic-gate 
20357c478bd9Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
20367c478bd9Sstevel@tonic-gate 		mutex_enter(smtx);
20377c478bd9Sstevel@tonic-gate 		smp->sm_flags |= SM_NOTKPM_RELEASED;
20387c478bd9Sstevel@tonic-gate 	}
20397c478bd9Sstevel@tonic-gate 
20407c478bd9Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
20417c478bd9Sstevel@tonic-gate 
20427c478bd9Sstevel@tonic-gate 	/*
20437c478bd9Sstevel@tonic-gate 	 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED)
20447c478bd9Sstevel@tonic-gate 	 * are set.
20457c478bd9Sstevel@tonic-gate 	 */
20467c478bd9Sstevel@tonic-gate 	if ((flags & ~SM_DONTNEED) != 0) {
20477c478bd9Sstevel@tonic-gate 		if (flags & SM_WRITE)
20487c478bd9Sstevel@tonic-gate 			segmapcnt.smp_rel_write.value.ul++;
20497c478bd9Sstevel@tonic-gate 		if (flags & SM_ASYNC) {
20507c478bd9Sstevel@tonic-gate 			bflags |= B_ASYNC;
20517c478bd9Sstevel@tonic-gate 			segmapcnt.smp_rel_async.value.ul++;
20527c478bd9Sstevel@tonic-gate 		}
20537c478bd9Sstevel@tonic-gate 		if (flags & SM_INVAL) {
20547c478bd9Sstevel@tonic-gate 			bflags |= B_INVAL;
20557c478bd9Sstevel@tonic-gate 			segmapcnt.smp_rel_abort.value.ul++;
20567c478bd9Sstevel@tonic-gate 		}
20577c478bd9Sstevel@tonic-gate 		if (flags & SM_DESTROY) {
20587c478bd9Sstevel@tonic-gate 			bflags |= (B_INVAL|B_TRUNC);
20597c478bd9Sstevel@tonic-gate 			segmapcnt.smp_rel_abort.value.ul++;
20607c478bd9Sstevel@tonic-gate 		}
20617c478bd9Sstevel@tonic-gate 		if (smp->sm_refcnt == 1) {
20627c478bd9Sstevel@tonic-gate 			/*
20637c478bd9Sstevel@tonic-gate 			 * We only bother doing the FREE and DONTNEED flags
20647c478bd9Sstevel@tonic-gate 			 * if no one else is still referencing this mapping.
20657c478bd9Sstevel@tonic-gate 			 */
20667c478bd9Sstevel@tonic-gate 			if (flags & SM_FREE) {
20677c478bd9Sstevel@tonic-gate 				bflags |= B_FREE;
20687c478bd9Sstevel@tonic-gate 				segmapcnt.smp_rel_free.value.ul++;
20697c478bd9Sstevel@tonic-gate 			}
20707c478bd9Sstevel@tonic-gate 			if (flags & SM_DONTNEED) {
20717c478bd9Sstevel@tonic-gate 				bflags |= B_DONTNEED;
20727c478bd9Sstevel@tonic-gate 				segmapcnt.smp_rel_dontneed.value.ul++;
20737c478bd9Sstevel@tonic-gate 			}
20747c478bd9Sstevel@tonic-gate 		}
20757c478bd9Sstevel@tonic-gate 	} else {
20767c478bd9Sstevel@tonic-gate 		smd_cpu[CPU->cpu_seqid].scpu.scpu_release++;
20777c478bd9Sstevel@tonic-gate 	}
20787c478bd9Sstevel@tonic-gate 
20797c478bd9Sstevel@tonic-gate 	vp = smp->sm_vp;
20807c478bd9Sstevel@tonic-gate 	offset = smp->sm_off;
20817c478bd9Sstevel@tonic-gate 
20827c478bd9Sstevel@tonic-gate 	if (--smp->sm_refcnt == 0) {
20837c478bd9Sstevel@tonic-gate 
20847c478bd9Sstevel@tonic-gate 		smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA);
2085592a5a13Sstans 
20867c478bd9Sstevel@tonic-gate 		if (flags & (SM_INVAL|SM_DESTROY)) {
20877c478bd9Sstevel@tonic-gate 			segmap_hashout(smp);	/* remove map info */
20887c478bd9Sstevel@tonic-gate 			if (is_kpm) {
20897c478bd9Sstevel@tonic-gate 				hat_kpm_mapout(pp, GET_KPME(smp), addr);
20907c478bd9Sstevel@tonic-gate 				if (smp->sm_flags & SM_NOTKPM_RELEASED) {
20917c478bd9Sstevel@tonic-gate 					smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2092b611ee0aSPrakash Sangappa 					hat_unload(kas.a_hat, segkmap->s_base +
2093b611ee0aSPrakash Sangappa 					    ((smp - smd_smap) * MAXBSIZE),
2094b611ee0aSPrakash Sangappa 					    MAXBSIZE, HAT_UNLOAD);
20957c478bd9Sstevel@tonic-gate 				}
20967c478bd9Sstevel@tonic-gate 
20977c478bd9Sstevel@tonic-gate 			} else {
20987c478bd9Sstevel@tonic-gate 				if (segmap_kpm)
20997c478bd9Sstevel@tonic-gate 					segkpm_mapout_validkpme(GET_KPME(smp));
21007c478bd9Sstevel@tonic-gate 
21017c478bd9Sstevel@tonic-gate 				smp->sm_flags &= ~SM_NOTKPM_RELEASED;
21027c478bd9Sstevel@tonic-gate 				hat_unload(kas.a_hat, addr, MAXBSIZE,
21037c478bd9Sstevel@tonic-gate 				    HAT_UNLOAD);
21047c478bd9Sstevel@tonic-gate 			}
21057c478bd9Sstevel@tonic-gate 		}
21067c478bd9Sstevel@tonic-gate 		segmap_smapadd(smp);	/* add to free list */
21077c478bd9Sstevel@tonic-gate 	}
21087c478bd9Sstevel@tonic-gate 
21097c478bd9Sstevel@tonic-gate 	mutex_exit(smtx);
21107c478bd9Sstevel@tonic-gate 
21117c478bd9Sstevel@tonic-gate 	if (is_kpm)
21127c478bd9Sstevel@tonic-gate 		page_unlock(pp);
21137c478bd9Sstevel@tonic-gate 	/*
21147c478bd9Sstevel@tonic-gate 	 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED)
21157c478bd9Sstevel@tonic-gate 	 * are set.
21167c478bd9Sstevel@tonic-gate 	 */
21177c478bd9Sstevel@tonic-gate 	if ((flags & ~SM_DONTNEED) != 0) {
21187c478bd9Sstevel@tonic-gate 		error = VOP_PUTPAGE(vp, offset, MAXBSIZE,
2119da6c28aaSamw 		    bflags, CRED(), NULL);
21207c478bd9Sstevel@tonic-gate 	} else {
21217c478bd9Sstevel@tonic-gate 		error = 0;
21227c478bd9Sstevel@tonic-gate 	}
21237c478bd9Sstevel@tonic-gate 
21247c478bd9Sstevel@tonic-gate 	return (error);
21257c478bd9Sstevel@tonic-gate }
21267c478bd9Sstevel@tonic-gate 
21277c478bd9Sstevel@tonic-gate /*
21287c478bd9Sstevel@tonic-gate  * Dump the pages belonging to this segmap segment.
21297c478bd9Sstevel@tonic-gate  */
21307c478bd9Sstevel@tonic-gate static void
segmap_dump(struct seg * seg)21317c478bd9Sstevel@tonic-gate segmap_dump(struct seg *seg)
21327c478bd9Sstevel@tonic-gate {
21337c478bd9Sstevel@tonic-gate 	struct segmap_data *smd;
21347c478bd9Sstevel@tonic-gate 	struct smap *smp, *smp_end;
21357c478bd9Sstevel@tonic-gate 	page_t *pp;
21367c478bd9Sstevel@tonic-gate 	pfn_t pfn;
21377c478bd9Sstevel@tonic-gate 	u_offset_t off;
21387c478bd9Sstevel@tonic-gate 	caddr_t addr;
21397c478bd9Sstevel@tonic-gate 
21407c478bd9Sstevel@tonic-gate 	smd = (struct segmap_data *)seg->s_data;
21417c478bd9Sstevel@tonic-gate 	addr = seg->s_base;
21427c478bd9Sstevel@tonic-gate 	for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages;
21437c478bd9Sstevel@tonic-gate 	    smp < smp_end; smp++) {
21447c478bd9Sstevel@tonic-gate 
21457c478bd9Sstevel@tonic-gate 		if (smp->sm_refcnt) {
21467c478bd9Sstevel@tonic-gate 			for (off = 0; off < MAXBSIZE; off += PAGESIZE) {
21477c478bd9Sstevel@tonic-gate 				int we_own_it = 0;
21487c478bd9Sstevel@tonic-gate 
21497c478bd9Sstevel@tonic-gate 				/*
21507c478bd9Sstevel@tonic-gate 				 * If pp == NULL, the page either does
21517c478bd9Sstevel@tonic-gate 				 * not exist or is exclusively locked.
21527c478bd9Sstevel@tonic-gate 				 * So determine if it exists before
21537c478bd9Sstevel@tonic-gate 				 * searching for it.
21547c478bd9Sstevel@tonic-gate 				 */
21557c478bd9Sstevel@tonic-gate 				if ((pp = page_lookup_nowait(smp->sm_vp,
21567c478bd9Sstevel@tonic-gate 				    smp->sm_off + off, SE_SHARED)))
21577c478bd9Sstevel@tonic-gate 					we_own_it = 1;
21587c478bd9Sstevel@tonic-gate 				else
21597c478bd9Sstevel@tonic-gate 					pp = page_exists(smp->sm_vp,
21607c478bd9Sstevel@tonic-gate 					    smp->sm_off + off);
21617c478bd9Sstevel@tonic-gate 
21627c478bd9Sstevel@tonic-gate 				if (pp) {
21637c478bd9Sstevel@tonic-gate 					pfn = page_pptonum(pp);
21647c478bd9Sstevel@tonic-gate 					dump_addpage(seg->s_as,
21657c478bd9Sstevel@tonic-gate 					    addr + off, pfn);
21667c478bd9Sstevel@tonic-gate 					if (we_own_it)
21677c478bd9Sstevel@tonic-gate 						page_unlock(pp);
21687c478bd9Sstevel@tonic-gate 				}
21697c478bd9Sstevel@tonic-gate 				dump_timeleft = dump_timeout;
21707c478bd9Sstevel@tonic-gate 			}
21717c478bd9Sstevel@tonic-gate 		}
21727c478bd9Sstevel@tonic-gate 		addr += MAXBSIZE;
21737c478bd9Sstevel@tonic-gate 	}
21747c478bd9Sstevel@tonic-gate }
21757c478bd9Sstevel@tonic-gate 
21767c478bd9Sstevel@tonic-gate /*ARGSUSED*/
21777c478bd9Sstevel@tonic-gate static int
segmap_pagelock(struct seg * seg,caddr_t addr,size_t len,struct page *** ppp,enum lock_type type,enum seg_rw rw)21787c478bd9Sstevel@tonic-gate segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
21797c478bd9Sstevel@tonic-gate     struct page ***ppp, enum lock_type type, enum seg_rw rw)
21807c478bd9Sstevel@tonic-gate {
21817c478bd9Sstevel@tonic-gate 	return (ENOTSUP);
21827c478bd9Sstevel@tonic-gate }
21837c478bd9Sstevel@tonic-gate 
21847c478bd9Sstevel@tonic-gate static int
segmap_getmemid(struct seg * seg,caddr_t addr,memid_t * memidp)21857c478bd9Sstevel@tonic-gate segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
21867c478bd9Sstevel@tonic-gate {
21877c478bd9Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
21887c478bd9Sstevel@tonic-gate 
21897c478bd9Sstevel@tonic-gate 	memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp;
21907c478bd9Sstevel@tonic-gate 	memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base);
21917c478bd9Sstevel@tonic-gate 	return (0);
21927c478bd9Sstevel@tonic-gate }
21937c478bd9Sstevel@tonic-gate 
21947c478bd9Sstevel@tonic-gate /*ARGSUSED*/
21957c478bd9Sstevel@tonic-gate static lgrp_mem_policy_info_t *
segmap_getpolicy(struct seg * seg,caddr_t addr)21967c478bd9Sstevel@tonic-gate segmap_getpolicy(struct seg *seg, caddr_t addr)
21977c478bd9Sstevel@tonic-gate {
21987c478bd9Sstevel@tonic-gate 	return (NULL);
21997c478bd9Sstevel@tonic-gate }
22007c478bd9Sstevel@tonic-gate 
22011bd5c35fSelowe /*ARGSUSED*/
22021bd5c35fSelowe static int
segmap_capable(struct seg * seg,segcapability_t capability)22031bd5c35fSelowe segmap_capable(struct seg *seg, segcapability_t capability)
22041bd5c35fSelowe {
22051bd5c35fSelowe 	return (0);
22061bd5c35fSelowe }
22071bd5c35fSelowe 
22087c478bd9Sstevel@tonic-gate 
22097c478bd9Sstevel@tonic-gate #ifdef	SEGKPM_SUPPORT
22107c478bd9Sstevel@tonic-gate 
22117c478bd9Sstevel@tonic-gate /*
22127c478bd9Sstevel@tonic-gate  * segkpm support routines
22137c478bd9Sstevel@tonic-gate  */
22147c478bd9Sstevel@tonic-gate 
22157c478bd9Sstevel@tonic-gate static caddr_t
segmap_pagecreate_kpm(struct seg * seg,vnode_t * vp,u_offset_t off,struct smap * smp,enum seg_rw rw)22167c478bd9Sstevel@tonic-gate segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
22177c478bd9Sstevel@tonic-gate 	struct smap *smp, enum seg_rw rw)
22187c478bd9Sstevel@tonic-gate {
22197c478bd9Sstevel@tonic-gate 	caddr_t	base;
22207c478bd9Sstevel@tonic-gate 	page_t	*pp;
22217c478bd9Sstevel@tonic-gate 	int	newpage = 0;
22227c478bd9Sstevel@tonic-gate 	struct kpme	*kpme;
22237c478bd9Sstevel@tonic-gate 
22247c478bd9Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
22257c478bd9Sstevel@tonic-gate 
22267c478bd9Sstevel@tonic-gate 	if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
22277c478bd9Sstevel@tonic-gate 		kmutex_t *smtx;
22287c478bd9Sstevel@tonic-gate 
22297c478bd9Sstevel@tonic-gate 		base = segkpm_create_va(off);
22307c478bd9Sstevel@tonic-gate 
22317c478bd9Sstevel@tonic-gate 		if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
22327c478bd9Sstevel@tonic-gate 		    seg, base)) == NULL) {
22337c478bd9Sstevel@tonic-gate 			panic("segmap_pagecreate_kpm: "
22347c478bd9Sstevel@tonic-gate 			    "page_create failed");
22357c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
22367c478bd9Sstevel@tonic-gate 		}
22377c478bd9Sstevel@tonic-gate 
22387c478bd9Sstevel@tonic-gate 		newpage = 1;
22397c478bd9Sstevel@tonic-gate 		page_io_unlock(pp);
22407c478bd9Sstevel@tonic-gate 		ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
22417c478bd9Sstevel@tonic-gate 
22427c478bd9Sstevel@tonic-gate 		/*
22437c478bd9Sstevel@tonic-gate 		 * Mark this here until the following segmap_pagecreate
22447c478bd9Sstevel@tonic-gate 		 * or segmap_release.
22457c478bd9Sstevel@tonic-gate 		 */
22467c478bd9Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
22477c478bd9Sstevel@tonic-gate 		mutex_enter(smtx);
22487c478bd9Sstevel@tonic-gate 		smp->sm_flags |= SM_KPM_NEWPAGE;
22497c478bd9Sstevel@tonic-gate 		mutex_exit(smtx);
22507c478bd9Sstevel@tonic-gate 	}
22517c478bd9Sstevel@tonic-gate 
22527c478bd9Sstevel@tonic-gate 	kpme = GET_KPME(smp);
22537c478bd9Sstevel@tonic-gate 	if (!newpage && kpme->kpe_page == pp)
22547c478bd9Sstevel@tonic-gate 		base = hat_kpm_page2va(pp, 0);
22557c478bd9Sstevel@tonic-gate 	else
22567c478bd9Sstevel@tonic-gate 		base = hat_kpm_mapin(pp, kpme);
22577c478bd9Sstevel@tonic-gate 
22587c478bd9Sstevel@tonic-gate 	/*
22597c478bd9Sstevel@tonic-gate 	 * FS code may decide not to call segmap_pagecreate and we
22607c478bd9Sstevel@tonic-gate 	 * don't invoke segmap_fault via TLB miss, so we have to set
22617c478bd9Sstevel@tonic-gate 	 * ref and mod bits in advance.
22627c478bd9Sstevel@tonic-gate 	 */
22637c478bd9Sstevel@tonic-gate 	if (rw == S_WRITE) {
22647c478bd9Sstevel@tonic-gate 		hat_setrefmod(pp);
22657c478bd9Sstevel@tonic-gate 	} else {
22667c478bd9Sstevel@tonic-gate 		ASSERT(rw == S_READ);
22677c478bd9Sstevel@tonic-gate 		hat_setref(pp);
22687c478bd9Sstevel@tonic-gate 	}
22697c478bd9Sstevel@tonic-gate 
22707c478bd9Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
22717c478bd9Sstevel@tonic-gate 
22727c478bd9Sstevel@tonic-gate 	return (base);
22737c478bd9Sstevel@tonic-gate }
22747c478bd9Sstevel@tonic-gate 
22757c478bd9Sstevel@tonic-gate /*
22767c478bd9Sstevel@tonic-gate  * Find the smap structure corresponding to the
22777c478bd9Sstevel@tonic-gate  * KPM addr and return it locked.
22787c478bd9Sstevel@tonic-gate  */
22797c478bd9Sstevel@tonic-gate struct smap *
get_smap_kpm(caddr_t addr,page_t ** ppp)22807c478bd9Sstevel@tonic-gate get_smap_kpm(caddr_t addr, page_t **ppp)
22817c478bd9Sstevel@tonic-gate {
22827c478bd9Sstevel@tonic-gate 	struct smap	*smp;
22837c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
22847c478bd9Sstevel@tonic-gate 	u_offset_t	offset;
22857c478bd9Sstevel@tonic-gate 	caddr_t		baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK);
22867c478bd9Sstevel@tonic-gate 	int		hashid;
22877c478bd9Sstevel@tonic-gate 	kmutex_t	*hashmtx;
22887c478bd9Sstevel@tonic-gate 	page_t		*pp;
22897c478bd9Sstevel@tonic-gate 	union segmap_cpu *scpu;
22907c478bd9Sstevel@tonic-gate 
22917c478bd9Sstevel@tonic-gate 	pp = hat_kpm_vaddr2page(baseaddr);
22927c478bd9Sstevel@tonic-gate 
22937c478bd9Sstevel@tonic-gate 	ASSERT(pp && !PP_ISFREE(pp));
22947c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_LOCKED(pp));
22957c478bd9Sstevel@tonic-gate 	ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0);
22967c478bd9Sstevel@tonic-gate 
22977c478bd9Sstevel@tonic-gate 	vp = pp->p_vnode;
22987c478bd9Sstevel@tonic-gate 	offset = pp->p_offset;
22997c478bd9Sstevel@tonic-gate 	ASSERT(vp != NULL);
23007c478bd9Sstevel@tonic-gate 
23017c478bd9Sstevel@tonic-gate 	/*
23027c478bd9Sstevel@tonic-gate 	 * Assume the last smap used on this cpu is the one needed.
23037c478bd9Sstevel@tonic-gate 	 */
23047c478bd9Sstevel@tonic-gate 	scpu = smd_cpu+CPU->cpu_seqid;
23057c478bd9Sstevel@tonic-gate 	smp = scpu->scpu.scpu_last_smap;
23067c478bd9Sstevel@tonic-gate 	mutex_enter(&smp->sm_mtx);
23077c478bd9Sstevel@tonic-gate 	if (smp->sm_vp == vp && smp->sm_off == offset) {
23087c478bd9Sstevel@tonic-gate 		ASSERT(smp->sm_refcnt > 0);
23097c478bd9Sstevel@tonic-gate 	} else {
23107c478bd9Sstevel@tonic-gate 		/*
23117c478bd9Sstevel@tonic-gate 		 * Assumption wrong, find the smap on the hash chain.
23127c478bd9Sstevel@tonic-gate 		 */
23137c478bd9Sstevel@tonic-gate 		mutex_exit(&smp->sm_mtx);
23147c478bd9Sstevel@tonic-gate 		SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */
23157c478bd9Sstevel@tonic-gate 		hashmtx = SHASHMTX(hashid);
23167c478bd9Sstevel@tonic-gate 
23177c478bd9Sstevel@tonic-gate 		mutex_enter(hashmtx);
23187c478bd9Sstevel@tonic-gate 		smp = smd_hash[hashid].sh_hash_list;
23197c478bd9Sstevel@tonic-gate 		for (; smp != NULL; smp = smp->sm_hash) {
23207c478bd9Sstevel@tonic-gate 			if (smp->sm_vp == vp && smp->sm_off == offset)
23217c478bd9Sstevel@tonic-gate 				break;
23227c478bd9Sstevel@tonic-gate 		}
23237c478bd9Sstevel@tonic-gate 		mutex_exit(hashmtx);
23247c478bd9Sstevel@tonic-gate 		if (smp) {
23257c478bd9Sstevel@tonic-gate 			mutex_enter(&smp->sm_mtx);
23267c478bd9Sstevel@tonic-gate 			ASSERT(smp->sm_vp == vp && smp->sm_off == offset);
23277c478bd9Sstevel@tonic-gate 		}
23287c478bd9Sstevel@tonic-gate 	}
23297c478bd9Sstevel@tonic-gate 
23307c478bd9Sstevel@tonic-gate 	if (ppp)
23317c478bd9Sstevel@tonic-gate 		*ppp = smp ? pp : NULL;
23327c478bd9Sstevel@tonic-gate 
23337c478bd9Sstevel@tonic-gate 	return (smp);
23347c478bd9Sstevel@tonic-gate }
23357c478bd9Sstevel@tonic-gate 
23367c478bd9Sstevel@tonic-gate #else	/* SEGKPM_SUPPORT */
23377c478bd9Sstevel@tonic-gate 
23387c478bd9Sstevel@tonic-gate /* segkpm stubs */
23397c478bd9Sstevel@tonic-gate 
23407c478bd9Sstevel@tonic-gate /*ARGSUSED*/
23417c478bd9Sstevel@tonic-gate static caddr_t
segmap_pagecreate_kpm(struct seg * seg,vnode_t * vp,u_offset_t off,struct smap * smp,enum seg_rw rw)23427c478bd9Sstevel@tonic-gate segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
23437c478bd9Sstevel@tonic-gate 	struct smap *smp, enum seg_rw rw)
23447c478bd9Sstevel@tonic-gate {
23457c478bd9Sstevel@tonic-gate 	return (NULL);
23467c478bd9Sstevel@tonic-gate }
23477c478bd9Sstevel@tonic-gate 
23487c478bd9Sstevel@tonic-gate /*ARGSUSED*/
23497c478bd9Sstevel@tonic-gate struct smap *
get_smap_kpm(caddr_t addr,page_t ** ppp)23507c478bd9Sstevel@tonic-gate get_smap_kpm(caddr_t addr, page_t **ppp)
23517c478bd9Sstevel@tonic-gate {
23527c478bd9Sstevel@tonic-gate 	return (NULL);
23537c478bd9Sstevel@tonic-gate }
23547c478bd9Sstevel@tonic-gate 
23557c478bd9Sstevel@tonic-gate #endif	/* SEGKPM_SUPPORT */
2356