xref: /titanic_51/usr/src/uts/common/io/physmem.c (revision dc32d872cbeb56532bcea030255db9cd79bac7da)
18b464eb8Smec /*
28b464eb8Smec  * CDDL HEADER START
38b464eb8Smec  *
48b464eb8Smec  * The contents of this file are subject to the terms of the
58b464eb8Smec  * Common Development and Distribution License (the "License").
68b464eb8Smec  * You may not use this file except in compliance with the License.
78b464eb8Smec  *
88b464eb8Smec  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
98b464eb8Smec  * or http://www.opensolaris.org/os/licensing.
108b464eb8Smec  * See the License for the specific language governing permissions
118b464eb8Smec  * and limitations under the License.
128b464eb8Smec  *
138b464eb8Smec  * When distributing Covered Code, include this CDDL HEADER in each
148b464eb8Smec  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
158b464eb8Smec  * If applicable, add the following below this CDDL HEADER, with the
168b464eb8Smec  * fields enclosed by brackets "[]" replaced with your own identifying
178b464eb8Smec  * information: Portions Copyright [yyyy] [name of copyright owner]
188b464eb8Smec  *
198b464eb8Smec  * CDDL HEADER END
208b464eb8Smec  */
218b464eb8Smec /*
2219397407SSherry Moore  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
238b464eb8Smec  * Use is subject to license terms.
248b464eb8Smec  */
258b464eb8Smec 
268b464eb8Smec 
278b464eb8Smec #include <sys/types.h>
288b464eb8Smec #include <sys/modctl.h>
298b464eb8Smec #include <sys/conf.h>
308b464eb8Smec #include <sys/ddi.h>
318b464eb8Smec #include <sys/sunddi.h>
328b464eb8Smec #include <sys/devops.h>
338b464eb8Smec #include <sys/stat.h>
348b464eb8Smec #include <sys/file.h>
358b464eb8Smec #include <sys/cred.h>
368b464eb8Smec #include <sys/policy.h>
378b464eb8Smec #include <sys/errno.h>
388b464eb8Smec #include <vm/seg_dev.h>
398b464eb8Smec #include <vm/seg_vn.h>
408b464eb8Smec #include <vm/page.h>
418b464eb8Smec #include <sys/fs/swapnode.h>
428b464eb8Smec #include <sys/sysmacros.h>
438b464eb8Smec #include <sys/fcntl.h>
448b464eb8Smec #include <sys/vmsystm.h>
458b464eb8Smec #include <sys/physmem.h>
46aa59c4cbSrsb #include <sys/vfs_opreg.h>
478b464eb8Smec 
488b464eb8Smec static dev_info_t		*physmem_dip = NULL;
498b464eb8Smec 
508b464eb8Smec /*
518b464eb8Smec  * Linked list element hanging off physmem_proc_hash below, which holds all
528b464eb8Smec  * the information for a given segment which has been setup for this process.
538b464eb8Smec  * This is a simple linked list as we are assuming that for a given process
548b464eb8Smec  * the setup ioctl will only be called a handful of times.  If this assumption
558b464eb8Smec  * changes in the future, a quicker to traverse data structure should be used.
568b464eb8Smec  */
578b464eb8Smec struct physmem_hash {
588b464eb8Smec 	struct physmem_hash *ph_next;
598b464eb8Smec 	uint64_t ph_base_pa;
608b464eb8Smec 	caddr_t ph_base_va;
618b464eb8Smec 	size_t ph_seg_len;
628b464eb8Smec 	struct vnode *ph_vnode;
638b464eb8Smec };
648b464eb8Smec 
658b464eb8Smec /*
668b464eb8Smec  * Hash of all of the processes which have setup mappings with the driver with
678b464eb8Smec  * pointers to per process data.
688b464eb8Smec  */
698b464eb8Smec struct physmem_proc_hash {
708b464eb8Smec 	struct proc *pph_proc;
718b464eb8Smec 	struct physmem_hash *pph_hash;
728b464eb8Smec 	struct physmem_proc_hash *pph_next;
738b464eb8Smec };
748b464eb8Smec 
758b464eb8Smec 
768b464eb8Smec /* Needs to be a power of two for simple hash algorithm */
778b464eb8Smec #define	PPH_SIZE	8
788b464eb8Smec struct physmem_proc_hash *pph[PPH_SIZE];
798b464eb8Smec 
808b464eb8Smec /*
818b464eb8Smec  * Lock which protects the pph hash above.  To add an element (either a new
828b464eb8Smec  * process or a new segment) the WRITE lock must be held.  To traverse the
838b464eb8Smec  * list, only a READ lock is needed.
848b464eb8Smec  */
858b464eb8Smec krwlock_t pph_rwlock;
868b464eb8Smec 
878b464eb8Smec #define	PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1)))
888b464eb8Smec 
898b464eb8Smec /*
908b464eb8Smec  * Need to keep a reference count of how many processes have the driver
918b464eb8Smec  * open to prevent it from disappearing.
928b464eb8Smec  */
938b464eb8Smec uint64_t physmem_vnodecnt;
948b464eb8Smec kmutex_t physmem_mutex;		/* protects phsymem_vnodecnt */
958b464eb8Smec 
968b464eb8Smec static int physmem_getpage(struct vnode *vp, offset_t off, size_t len,
978b464eb8Smec     uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
98da6c28aaSamw     enum seg_rw rw, struct cred *cr, caller_context_t *ct);
998b464eb8Smec 
1008b464eb8Smec static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
1018b464eb8Smec     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
102da6c28aaSamw     struct cred *cred, caller_context_t *ct);
1038b464eb8Smec 
1048b464eb8Smec static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
1058b464eb8Smec     caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
106da6c28aaSamw     struct cred *cred, caller_context_t *ct);
1078b464eb8Smec 
108da6c28aaSamw static void physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct);
1098b464eb8Smec 
1108b464eb8Smec const fs_operation_def_t physmem_vnodeops_template[] = {
111aa59c4cbSrsb 	VOPNAME_GETPAGE,	{ .vop_getpage = physmem_getpage },
112aa59c4cbSrsb 	VOPNAME_ADDMAP,		{ .vop_addmap = physmem_addmap },
113aa59c4cbSrsb 	VOPNAME_DELMAP,		{ .vop_delmap = physmem_delmap },
114aa59c4cbSrsb 	VOPNAME_INACTIVE,	{ .vop_inactive = physmem_inactive },
1158b464eb8Smec 	NULL,			NULL
1168b464eb8Smec };
1178b464eb8Smec 
1188b464eb8Smec vnodeops_t *physmem_vnodeops = NULL;
1198b464eb8Smec 
1208b464eb8Smec /*
1218b464eb8Smec  * Removes the current process from the hash if the process has no more
1228b464eb8Smec  * physmem segments active.
1238b464eb8Smec  */
1248b464eb8Smec void
1258b464eb8Smec physmem_remove_hash_proc()
1268b464eb8Smec {
1278b464eb8Smec 	int index;
1288b464eb8Smec 	struct physmem_proc_hash **walker;
1298b464eb8Smec 	struct physmem_proc_hash *victim = NULL;
1308b464eb8Smec 
1318b464eb8Smec 	index = PHYSMEM_HASH(curproc);
1328b464eb8Smec 	rw_enter(&pph_rwlock, RW_WRITER);
1338b464eb8Smec 	walker = &pph[index];
1348b464eb8Smec 	while (*walker != NULL) {
1358b464eb8Smec 		if ((*walker)->pph_proc == curproc &&
1368b464eb8Smec 		    (*walker)->pph_hash == NULL) {
1378b464eb8Smec 			victim = *walker;
1388b464eb8Smec 			*walker = victim->pph_next;
1398b464eb8Smec 			break;
1408b464eb8Smec 		}
1418b464eb8Smec 		walker = &((*walker)->pph_next);
1428b464eb8Smec 	}
1438b464eb8Smec 	rw_exit(&pph_rwlock);
1448b464eb8Smec 	if (victim != NULL)
1458b464eb8Smec 		kmem_free(victim, sizeof (struct physmem_proc_hash));
1468b464eb8Smec }
1478b464eb8Smec 
1488b464eb8Smec /*
1498b464eb8Smec  * Add a new entry to the hash for the given process to cache the
1508b464eb8Smec  * address ranges that it is working on.  If this is the first hash
1518b464eb8Smec  * item to be added for this process, we will create the head pointer
1528b464eb8Smec  * for this process.
1538b464eb8Smec  * Returns 0 on success, ERANGE when the physical address is already in the
154cd64d6e9Smec  * hash.
1558b464eb8Smec  */
1568b464eb8Smec int
1578b464eb8Smec physmem_add_hash(struct physmem_hash *php)
1588b464eb8Smec {
1598b464eb8Smec 	int index;
1608b464eb8Smec 	struct physmem_proc_hash *iterator;
1618b464eb8Smec 	struct physmem_proc_hash *newp = NULL;
1628b464eb8Smec 	struct physmem_hash *temp;
1638b464eb8Smec 	int ret = 0;
1648b464eb8Smec 
1658b464eb8Smec 	index = PHYSMEM_HASH(curproc);
1668b464eb8Smec 
1678b464eb8Smec insert:
1688b464eb8Smec 	rw_enter(&pph_rwlock, RW_WRITER);
1698b464eb8Smec 	iterator = pph[index];
1708b464eb8Smec 	while (iterator != NULL) {
1718b464eb8Smec 		if (iterator->pph_proc == curproc) {
1728b464eb8Smec 			/*
1738b464eb8Smec 			 * check to make sure a single process does not try to
1748b464eb8Smec 			 * map the same region twice.
1758b464eb8Smec 			 */
1768b464eb8Smec 			for (temp = iterator->pph_hash; temp != NULL;
1778b464eb8Smec 			    temp = temp->ph_next) {
1788b464eb8Smec 				if ((php->ph_base_pa >= temp->ph_base_pa &&
1798b464eb8Smec 				    php->ph_base_pa < temp->ph_base_pa +
1808b464eb8Smec 				    temp->ph_seg_len) ||
1818b464eb8Smec 				    (temp->ph_base_pa >= php->ph_base_pa &&
1828b464eb8Smec 				    temp->ph_base_pa < php->ph_base_pa +
1838b464eb8Smec 				    php->ph_seg_len)) {
1848b464eb8Smec 					ret = ERANGE;
1858b464eb8Smec 					break;
1868b464eb8Smec 				}
1878b464eb8Smec 			}
1888b464eb8Smec 			if (ret == 0) {
1898b464eb8Smec 				php->ph_next = iterator->pph_hash;
1908b464eb8Smec 				iterator->pph_hash = php;
1918b464eb8Smec 			}
1928b464eb8Smec 			rw_exit(&pph_rwlock);
1938b464eb8Smec 			/* Need to check for two threads in sync */
1948b464eb8Smec 			if (newp != NULL)
1958b464eb8Smec 				kmem_free(newp, sizeof (*newp));
1968b464eb8Smec 			return (ret);
1978b464eb8Smec 		}
1988b464eb8Smec 		iterator = iterator->pph_next;
1998b464eb8Smec 	}
2008b464eb8Smec 
2018b464eb8Smec 	if (newp != NULL) {
2028b464eb8Smec 		newp->pph_proc = curproc;
2038b464eb8Smec 		newp->pph_next = pph[index];
2048b464eb8Smec 		newp->pph_hash = php;
2058b464eb8Smec 		php->ph_next = NULL;
2068b464eb8Smec 		pph[index] = newp;
2078b464eb8Smec 		rw_exit(&pph_rwlock);
2088b464eb8Smec 		return (0);
2098b464eb8Smec 	}
2108b464eb8Smec 
2118b464eb8Smec 	rw_exit(&pph_rwlock);
2128b464eb8Smec 	/* Dropped the lock so we could use KM_SLEEP */
2138b464eb8Smec 	newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP);
2148b464eb8Smec 	goto insert;
2158b464eb8Smec }
2168b464eb8Smec 
2178b464eb8Smec /*
2188b464eb8Smec  * Will return the pointer to the physmem_hash struct if the setup routine
2198b464eb8Smec  * has previously been called for this memory.
2208b464eb8Smec  * Returns NULL on failure.
2218b464eb8Smec  */
2228b464eb8Smec struct physmem_hash *
2238b464eb8Smec physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp)
2248b464eb8Smec {
2258b464eb8Smec 	int index;
2268b464eb8Smec 	struct physmem_proc_hash *proc_hp;
2278b464eb8Smec 	struct physmem_hash *php;
2288b464eb8Smec 
2298b464eb8Smec 	ASSERT(rw_lock_held(&pph_rwlock));
2308b464eb8Smec 
2318b464eb8Smec 	index = PHYSMEM_HASH(procp);
2328b464eb8Smec 	proc_hp = pph[index];
2338b464eb8Smec 	while (proc_hp != NULL) {
2348b464eb8Smec 		if (proc_hp->pph_proc == procp) {
2358b464eb8Smec 			php = proc_hp->pph_hash;
2368b464eb8Smec 			while (php != NULL) {
2378b464eb8Smec 				if ((req_paddr >= php->ph_base_pa) &&
2388b464eb8Smec 				    (req_paddr + len <=
2398b464eb8Smec 				    php->ph_base_pa + php->ph_seg_len)) {
2408b464eb8Smec 					return (php);
2418b464eb8Smec 				}
2428b464eb8Smec 				php = php->ph_next;
2438b464eb8Smec 			}
2448b464eb8Smec 		}
2458b464eb8Smec 		proc_hp = proc_hp->pph_next;
2468b464eb8Smec 	}
2478b464eb8Smec 	return (NULL);
2488b464eb8Smec }
2498b464eb8Smec 
2508b464eb8Smec int
2518b464eb8Smec physmem_validate_cookie(uint64_t p_cookie)
2528b464eb8Smec {
2538b464eb8Smec 	int index;
2548b464eb8Smec 	struct physmem_proc_hash *proc_hp;
2558b464eb8Smec 	struct physmem_hash *php;
2568b464eb8Smec 
2578b464eb8Smec 	ASSERT(rw_lock_held(&pph_rwlock));
2588b464eb8Smec 
2598b464eb8Smec 	index = PHYSMEM_HASH(curproc);
2608b464eb8Smec 	proc_hp = pph[index];
2618b464eb8Smec 	while (proc_hp != NULL) {
2628b464eb8Smec 		if (proc_hp->pph_proc == curproc) {
2638b464eb8Smec 			php = proc_hp->pph_hash;
2648b464eb8Smec 			while (php != NULL) {
2658b464eb8Smec 				if ((uint64_t)(uintptr_t)php == p_cookie) {
2668b464eb8Smec 					return (1);
2678b464eb8Smec 				}
2688b464eb8Smec 				php = php->ph_next;
2698b464eb8Smec 			}
2708b464eb8Smec 		}
2718b464eb8Smec 		proc_hp = proc_hp->pph_next;
2728b464eb8Smec 	}
2738b464eb8Smec 	return (0);
2748b464eb8Smec }
2758b464eb8Smec 
2768b464eb8Smec /*
2778b464eb8Smec  * Remove the given vnode from the pph hash.  If it exists in the hash the
2788b464eb8Smec  * process still has to be around as the vnode is obviously still around and
2798b464eb8Smec  * since it's a physmem vnode, it must be in the hash.
2808b464eb8Smec  * If it is not in the hash that must mean that the setup ioctl failed.
2818b464eb8Smec  * Return 0 in this instance, 1 if it is in the hash.
2828b464eb8Smec  */
2838b464eb8Smec int
2848b464eb8Smec physmem_remove_vnode_hash(vnode_t *vp)
2858b464eb8Smec {
2868b464eb8Smec 	int index;
2878b464eb8Smec 	struct physmem_proc_hash *proc_hp;
2888b464eb8Smec 	struct physmem_hash **phpp;
2898b464eb8Smec 	struct physmem_hash *victim;
2908b464eb8Smec 
2918b464eb8Smec 	index = PHYSMEM_HASH(curproc);
2928b464eb8Smec 	/* synchronize with the map routine */
2938b464eb8Smec 	rw_enter(&pph_rwlock, RW_WRITER);
2948b464eb8Smec 	proc_hp = pph[index];
2958b464eb8Smec 	while (proc_hp != NULL) {
2968b464eb8Smec 		if (proc_hp->pph_proc == curproc) {
2978b464eb8Smec 			phpp = &proc_hp->pph_hash;
2988b464eb8Smec 			while (*phpp != NULL) {
2998b464eb8Smec 				if ((*phpp)->ph_vnode == vp) {
3008b464eb8Smec 					victim = *phpp;
3018b464eb8Smec 					*phpp = victim->ph_next;
3028b464eb8Smec 
3038b464eb8Smec 					rw_exit(&pph_rwlock);
3048b464eb8Smec 					kmem_free(victim, sizeof (*victim));
3058b464eb8Smec 					return (1);
3068b464eb8Smec 				}
3078b464eb8Smec 				phpp = &(*phpp)->ph_next;
3088b464eb8Smec 			}
3098b464eb8Smec 		}
3108b464eb8Smec 		proc_hp = proc_hp->pph_next;
3118b464eb8Smec 	}
3128b464eb8Smec 	rw_exit(&pph_rwlock);
3138b464eb8Smec 
3148b464eb8Smec 	/* not found */
3158b464eb8Smec 	return (0);
3168b464eb8Smec }
3178b464eb8Smec 
3188b464eb8Smec int
3198b464eb8Smec physmem_setup_vnops()
3208b464eb8Smec {
3218b464eb8Smec 	int error;
3228b464eb8Smec 	char *name = "physmem";
3238b464eb8Smec 	if (physmem_vnodeops != NULL)
3248b464eb8Smec 		cmn_err(CE_PANIC, "physmem vnodeops already set\n");
3258b464eb8Smec 	error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops);
3268b464eb8Smec 	if (error != 0) {
3278b464eb8Smec 		cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template");
3288b464eb8Smec 	}
3298b464eb8Smec 	return (error);
3308b464eb8Smec }
3318b464eb8Smec 
3328b464eb8Smec /*
3338b464eb8Smec  * The guts of the PHYSMEM_SETUP ioctl.
3348b464eb8Smec  * Create a segment in the address space with the specified parameters.
3358b464eb8Smec  * If pspp->user_va is NULL, as_gap will be used to find an appropriate VA.
336da6c28aaSamw  * We do not do bounds checking on the requested physical addresses, if they
3378b464eb8Smec  * do not exist in the system, they will not be mappable.
3388b464eb8Smec  * Returns 0 on success with the following error codes on failure:
3398b464eb8Smec  *	ENOMEM - The VA range requested was already mapped if pspp->user_va is
3408b464eb8Smec  *		non-NULL or the system was unable to find enough VA space for
3418b464eb8Smec  *		the desired length if user_va was NULL>
3428b464eb8Smec  *	EINVAL - The requested PA, VA, or length was not PAGESIZE aligned.
3438b464eb8Smec  */
3448b464eb8Smec int
3458b464eb8Smec physmem_setup_addrs(struct physmem_setup_param *pspp)
3468b464eb8Smec {
3478b464eb8Smec 	struct as *as = curproc->p_as;
3488b464eb8Smec 	struct segvn_crargs vn_a;
3498b464eb8Smec 	int ret = 0;
3508b464eb8Smec 	uint64_t base_pa;
3518b464eb8Smec 	size_t len;
3528b464eb8Smec 	caddr_t uvaddr;
3538b464eb8Smec 	struct vnode *vp;
3548b464eb8Smec 	struct physmem_hash *php;
3558b464eb8Smec 
3568b464eb8Smec 	ASSERT(pspp != NULL);
3578b464eb8Smec 	base_pa = pspp->req_paddr;
3588b464eb8Smec 	len = pspp->len;
3598b464eb8Smec 	uvaddr = (caddr_t)(uintptr_t)pspp->user_va;
3608b464eb8Smec 
3618b464eb8Smec 	/* Sanity checking */
3628b464eb8Smec 	if (!IS_P2ALIGNED(base_pa, PAGESIZE))
3638b464eb8Smec 		return (EINVAL);
3648b464eb8Smec 	if (!IS_P2ALIGNED(len, PAGESIZE))
3658b464eb8Smec 		return (EINVAL);
3668b464eb8Smec 	if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE))
3678b464eb8Smec 		return (EINVAL);
3688b464eb8Smec 
3698b464eb8Smec 	php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP);
3708b464eb8Smec 
3718b464eb8Smec 	/* Need to bump vnode count so that the driver can not be unloaded */
3728b464eb8Smec 	mutex_enter(&physmem_mutex);
3738b464eb8Smec 	physmem_vnodecnt++;
3748b464eb8Smec 	mutex_exit(&physmem_mutex);
3758b464eb8Smec 
3768b464eb8Smec 	vp = vn_alloc(KM_SLEEP);
3778b464eb8Smec 	ASSERT(vp != NULL);	/* SLEEP can't return NULL */
3788b464eb8Smec 	vn_setops(vp, physmem_vnodeops);
3798b464eb8Smec 
3808b464eb8Smec 	php->ph_vnode = vp;
3818b464eb8Smec 
3828b464eb8Smec 	vn_a.vp = vp;
3838b464eb8Smec 	vn_a.offset = (u_offset_t)base_pa;
3848b464eb8Smec 	vn_a.type = MAP_SHARED;
3858b464eb8Smec 	vn_a.prot = PROT_ALL;
3868b464eb8Smec 	vn_a.maxprot = PROT_ALL;
3878b464eb8Smec 	vn_a.flags = 0;
3888b464eb8Smec 	vn_a.cred = NULL;
3898b464eb8Smec 	vn_a.amp = NULL;
3908b464eb8Smec 	vn_a.szc = 0;
3918b464eb8Smec 	vn_a.lgrp_mem_policy_flags = 0;
3928b464eb8Smec 
3938b464eb8Smec 	as_rangelock(as);
3948b464eb8Smec 	if (uvaddr != NULL) {
3958b464eb8Smec 		if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) {
3968b464eb8Smec 			ret = ENOMEM;
3978b464eb8Smec fail:
3988b464eb8Smec 			as_rangeunlock(as);
3998b464eb8Smec 			vn_free(vp);
4008b464eb8Smec 			kmem_free(php, sizeof (*php));
4018b464eb8Smec 			mutex_enter(&physmem_mutex);
4028b464eb8Smec 			physmem_vnodecnt--;
4038b464eb8Smec 			mutex_exit(&physmem_mutex);
4048b464eb8Smec 			return (ret);
4058b464eb8Smec 		}
4068b464eb8Smec 	} else {
4078b464eb8Smec 		/* We pick the address for the user */
4088b464eb8Smec 		map_addr(&uvaddr, len, 0, 1, 0);
4098b464eb8Smec 		if (uvaddr == NULL) {
4108b464eb8Smec 			ret = ENOMEM;
4118b464eb8Smec 			goto fail;
4128b464eb8Smec 		}
4138b464eb8Smec 	}
4148b464eb8Smec 	ret = as_map(as, uvaddr, len, segvn_create, &vn_a);
4158b464eb8Smec 
4168b464eb8Smec 	if (ret == 0) {
417cd64d6e9Smec 		as_rangeunlock(as);
4188b464eb8Smec 		php->ph_base_pa = base_pa;
4198b464eb8Smec 		php->ph_base_va = uvaddr;
4208b464eb8Smec 		php->ph_seg_len = len;
4218b464eb8Smec 		pspp->user_va = (uint64_t)(uintptr_t)uvaddr;
4228b464eb8Smec 		pspp->cookie = (uint64_t)(uintptr_t)php;
4238b464eb8Smec 		ret = physmem_add_hash(php);
4248b464eb8Smec 		if (ret == 0)
4258b464eb8Smec 			return (0);
426cd64d6e9Smec 
427cd64d6e9Smec 		/* Note that the call to as_unmap will free the vnode */
4288b464eb8Smec 		(void) as_unmap(as, uvaddr, len);
429cd64d6e9Smec 		kmem_free(php, sizeof (*php));
4308b464eb8Smec 		return (ret);
4318b464eb8Smec 	}
4328b464eb8Smec 
4338b464eb8Smec 	goto fail;
4348b464eb8Smec 	/*NOTREACHED*/
4358b464eb8Smec }
4368b464eb8Smec 
4378b464eb8Smec /*
4388b464eb8Smec  * The guts of the PHYSMEM_MAP ioctl.
4398b464eb8Smec  * Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already
4408b464eb8Smec  * been called for this PA range.
4418b464eb8Smec  * Returns 0 on success with the following error codes on failure:
4428b464eb8Smec  *	EPERM - The requested page is long term locked, and thus repeated
4438b464eb8Smec  *		requests to allocate this page will likely fail.
4448b464eb8Smec  *	EAGAIN - The requested page could not be allocated, but it is believed
4458b464eb8Smec  *		that future attempts could succeed.
4468b464eb8Smec  *	ENOMEM - There was not enough free memory in the system to safely
4478b464eb8Smec  *		map the requested page.
4488b464eb8Smec  *	EINVAL - The requested paddr was not PAGESIZE aligned or the
4498b464eb8Smec  *		PHYSMEM_SETUP ioctl was not called for this page.
4508b464eb8Smec  *	ENOENT - The requested page was iniside the kernel cage, and the
4518b464eb8Smec  *		PHYSMEM_CAGE flag was not set.
4528b464eb8Smec  *	EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag
4538b464eb8Smec  *		was not set.
4548b464eb8Smec  */
4558b464eb8Smec static int
4568b464eb8Smec physmem_map_addrs(struct physmem_map_param *pmpp)
4578b464eb8Smec {
4588b464eb8Smec 	caddr_t uvaddr;
4598b464eb8Smec 	page_t *pp;
4608b464eb8Smec 	uint64_t req_paddr;
4618b464eb8Smec 	struct vnode *vp;
4628b464eb8Smec 	int ret = 0;
4638b464eb8Smec 	struct physmem_hash *php;
4648b464eb8Smec 	uint_t flags = 0;
4658b464eb8Smec 
4668b464eb8Smec 	ASSERT(pmpp != NULL);
4678b464eb8Smec 	req_paddr = pmpp->req_paddr;
4688b464eb8Smec 
4698b464eb8Smec 	if (!IS_P2ALIGNED(req_paddr, PAGESIZE))
4708b464eb8Smec 		return (EINVAL);
4718b464eb8Smec 	/* Find the vnode for this map request */
4728b464eb8Smec 	rw_enter(&pph_rwlock, RW_READER);
4738b464eb8Smec 	php = physmem_get_hash(req_paddr, PAGESIZE, curproc);
4748b464eb8Smec 	if (php == NULL) {
4758b464eb8Smec 		rw_exit(&pph_rwlock);
4768b464eb8Smec 		return (EINVAL);
4778b464eb8Smec 	}
4788b464eb8Smec 	vp = php->ph_vnode;
4798b464eb8Smec 	uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa);
4808b464eb8Smec 	rw_exit(&pph_rwlock);
4818b464eb8Smec 
4828b464eb8Smec 	pp = page_numtopp_nolock(btop((size_t)req_paddr));
4838b464eb8Smec 	if (pp == NULL) {
4848b464eb8Smec 		pmpp->ret_va = NULL;
4858b464eb8Smec 		return (EPERM);
4868b464eb8Smec 	}
4878b464eb8Smec 
4888b464eb8Smec 	/*
4898b464eb8Smec 	 * Check to see if page already mapped correctly.  This can happen
4908b464eb8Smec 	 * when we failed to capture a page previously and it was captured
4918b464eb8Smec 	 * asynchronously for us.  Return success in this case.
4928b464eb8Smec 	 */
4938b464eb8Smec 	if (pp->p_vnode == vp) {
4948b464eb8Smec 		ASSERT(pp->p_offset == (u_offset_t)req_paddr);
4958b464eb8Smec 		pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
4968b464eb8Smec 		return (0);
4978b464eb8Smec 	}
4988b464eb8Smec 
4998b464eb8Smec 	/*
5008b464eb8Smec 	 * physmem should be responsible for checking for cage
5018b464eb8Smec 	 * and prom pages.
5028b464eb8Smec 	 */
5038b464eb8Smec 	if (pmpp->flags & PHYSMEM_CAGE)
5048b464eb8Smec 		flags = CAPTURE_GET_CAGE;
5058b464eb8Smec 	if (pmpp->flags & PHYSMEM_RETIRED)
5068b464eb8Smec 		flags |= CAPTURE_GET_RETIRED;
5078b464eb8Smec 
5088b464eb8Smec 	ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc);
5098b464eb8Smec 
5108b464eb8Smec 	if (ret != 0) {
5118b464eb8Smec 		pmpp->ret_va = NULL;
5128b464eb8Smec 		return (ret);
5138b464eb8Smec 	} else {
5148b464eb8Smec 		pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
5158b464eb8Smec 		return (0);
5168b464eb8Smec 	}
5178b464eb8Smec }
5188b464eb8Smec 
5198b464eb8Smec /*
5208b464eb8Smec  * Map the given page into the process's address space if possible.
5218b464eb8Smec  * We actually only hash the page in on the correct vnode as the page
5228b464eb8Smec  * will be mapped via segvn_pagefault.
5238b464eb8Smec  * returns 0 on success
5248b464eb8Smec  * returns 1 if there is no need to map this page anymore (process exited)
5258b464eb8Smec  * returns -1 if we failed to map the page.
5268b464eb8Smec  */
5278b464eb8Smec int
5288b464eb8Smec map_page_proc(page_t *pp, void *arg, uint_t flags)
5298b464eb8Smec {
5308b464eb8Smec 	struct vnode *vp;
5318b464eb8Smec 	proc_t *procp = (proc_t *)arg;
5328b464eb8Smec 	int ret;
5338b464eb8Smec 	u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum);
5348b464eb8Smec 	struct physmem_hash *php;
5358b464eb8Smec 
5368b464eb8Smec 	ASSERT(pp != NULL);
5378b464eb8Smec 
5388b464eb8Smec 	/*
5398b464eb8Smec 	 * Check against availrmem to make sure that we're not low on memory.
5408b464eb8Smec 	 * We check again here as ASYNC requests do not do this check elsewhere.
5418b464eb8Smec 	 * We return 1 as we don't want the page to have the PR_CAPTURE bit
5428b464eb8Smec 	 * set or be on the page capture hash.
5438b464eb8Smec 	 */
5448b464eb8Smec 	if (swapfs_minfree > availrmem + 1) {
5458b464eb8Smec 		page_free(pp, 1);
5468b464eb8Smec 		return (1);
5478b464eb8Smec 	}
5488b464eb8Smec 
5498b464eb8Smec 	/*
5508b464eb8Smec 	 * If this is an asynchronous request for the current process,
5518b464eb8Smec 	 * we can not map the page as it's possible that we are also in the
5528b464eb8Smec 	 * process of unmapping the page which could result in a deadlock
5538b464eb8Smec 	 * with the as lock.
5548b464eb8Smec 	 */
5558b464eb8Smec 	if ((flags & CAPTURE_ASYNC) && (curproc == procp)) {
5568b464eb8Smec 		page_free(pp, 1);
5578b464eb8Smec 		return (-1);
5588b464eb8Smec 	}
5598b464eb8Smec 
5608b464eb8Smec 	/* only return zeroed out pages */
5618b464eb8Smec 	pagezero(pp, 0, PAGESIZE);
5628b464eb8Smec 
5638b464eb8Smec 	rw_enter(&pph_rwlock, RW_READER);
5648b464eb8Smec 	php = physmem_get_hash(paddr, PAGESIZE, procp);
5658b464eb8Smec 	if (php == NULL) {
5668b464eb8Smec 		rw_exit(&pph_rwlock);
5678b464eb8Smec 		/*
5688b464eb8Smec 		 * Free the page as there is no longer a valid outstanding
5698b464eb8Smec 		 * request for this page.
5708b464eb8Smec 		 */
5718b464eb8Smec 		page_free(pp, 1);
5728b464eb8Smec 		return (1);
5738b464eb8Smec 	}
5748b464eb8Smec 
5758b464eb8Smec 	vp = php->ph_vnode;
5768b464eb8Smec 
5778b464eb8Smec 	/*
5788b464eb8Smec 	 * We need to protect against a possible deadlock here where we own
5798b464eb8Smec 	 * the vnode page hash mutex and want to acquire it again as there
5808b464eb8Smec 	 * are locations in the code, where we unlock a page while holding
5818b464eb8Smec 	 * the mutex which can lead to the page being captured and eventually
5828b464eb8Smec 	 * end up here.
5838b464eb8Smec 	 */
5848b464eb8Smec 	if (mutex_owned(page_vnode_mutex(vp))) {
5858b464eb8Smec 		rw_exit(&pph_rwlock);
5868b464eb8Smec 		page_free(pp, 1);
5878b464eb8Smec 		return (-1);
5888b464eb8Smec 	}
5898b464eb8Smec 
5908b464eb8Smec 	ret = page_hashin(pp, vp, paddr, NULL);
5918b464eb8Smec 	rw_exit(&pph_rwlock);
5928b464eb8Smec 	if (ret == 0) {
5938b464eb8Smec 		page_free(pp, 1);
5948b464eb8Smec 		return (-1);
5958b464eb8Smec 	}
5968b464eb8Smec 
5978b464eb8Smec 	page_downgrade(pp);
5988b464eb8Smec 
5998b464eb8Smec 	mutex_enter(&freemem_lock);
6008b464eb8Smec 	availrmem--;
6018b464eb8Smec 	mutex_exit(&freemem_lock);
6028b464eb8Smec 
6038b464eb8Smec 	return (0);
6048b464eb8Smec }
6058b464eb8Smec 
6068b464eb8Smec /*
6078b464eb8Smec  * The guts of the PHYSMEM_DESTROY ioctl.
6088b464eb8Smec  * The cookie passed in will provide all of the information needed to
6098b464eb8Smec  * free up the address space and physical memory associated with the
6108b464eb8Smec  * corresponding PHSYMEM_SETUP ioctl.
6118b464eb8Smec  * Returns 0 on success with the following error codes on failure:
6128b464eb8Smec  *	EINVAL - The cookie supplied is not valid.
6138b464eb8Smec  */
6148b464eb8Smec int
6158b464eb8Smec physmem_destroy_addrs(uint64_t p_cookie)
6168b464eb8Smec {
6178b464eb8Smec 	struct as *as = curproc->p_as;
6188b464eb8Smec 	size_t len;
6198b464eb8Smec 	caddr_t uvaddr;
6208b464eb8Smec 
6218b464eb8Smec 	rw_enter(&pph_rwlock, RW_READER);
6228b464eb8Smec 	if (physmem_validate_cookie(p_cookie) == 0) {
6238b464eb8Smec 		rw_exit(&pph_rwlock);
6248b464eb8Smec 		return (EINVAL);
6258b464eb8Smec 	}
6268b464eb8Smec 
6278b464eb8Smec 	len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len;
6288b464eb8Smec 	uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va;
6298b464eb8Smec 	rw_exit(&pph_rwlock);
6308b464eb8Smec 
6318b464eb8Smec 	(void) as_unmap(as, uvaddr, len);
6328b464eb8Smec 
6338b464eb8Smec 	return (0);
6348b464eb8Smec }
6358b464eb8Smec 
6368b464eb8Smec /*
6378b464eb8Smec  * If the page has been hashed into the physmem vnode, then just look it up
6388b464eb8Smec  * and return it via pl, otherwise return ENOMEM as the map ioctl has not
6398b464eb8Smec  * succeeded on the given page.
6408b464eb8Smec  */
6418b464eb8Smec /*ARGSUSED*/
6428b464eb8Smec static int
6438b464eb8Smec physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp,
6448b464eb8Smec     page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
645da6c28aaSamw     struct cred *cr, caller_context_t *ct)
6468b464eb8Smec {
6478b464eb8Smec 	page_t *pp;
6488b464eb8Smec 
6498b464eb8Smec 	ASSERT(len == PAGESIZE);
650*dc32d872SJosef 'Jeff' Sipek 	ASSERT(AS_READ_HELD(seg->s_as));
6518b464eb8Smec 
6528b464eb8Smec 	/*
6538b464eb8Smec 	 * If the page is in the hash, then we successfully claimed this
6548b464eb8Smec 	 * page earlier, so return it to the caller.
6558b464eb8Smec 	 */
6568b464eb8Smec 	pp = page_lookup(vp, off, SE_SHARED);
6578b464eb8Smec 	if (pp != NULL) {
6588b464eb8Smec 		pl[0] = pp;
6598b464eb8Smec 		pl[1] = NULL;
6608b464eb8Smec 		*protp = PROT_ALL;
6618b464eb8Smec 		return (0);
6628b464eb8Smec 	}
6638b464eb8Smec 	return (ENOMEM);
6648b464eb8Smec }
6658b464eb8Smec 
6668b464eb8Smec /*
6678b464eb8Smec  * We can not allow a process mapping /dev/physmem pages to fork as there can
6688b464eb8Smec  * only be a single mapping to a /dev/physmem page at a given time.  Thus, the
6698b464eb8Smec  * return of EINVAL when we are not working on our own address space.
6708b464eb8Smec  * Otherwise we return zero as this function is required for normal operation.
6718b464eb8Smec  */
6728b464eb8Smec /*ARGSUSED*/
6738b464eb8Smec static int
6748b464eb8Smec physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
6758b464eb8Smec     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
676da6c28aaSamw     struct cred *cred, caller_context_t *ct)
6778b464eb8Smec {
6788b464eb8Smec 	if (curproc->p_as != as) {
6798b464eb8Smec 		return (EINVAL);
6808b464eb8Smec 	}
6818b464eb8Smec 	return (0);
6828b464eb8Smec }
6838b464eb8Smec 
6848b464eb8Smec /* Will always get called for removing a whole segment. */
6858b464eb8Smec /*ARGSUSED*/
6868b464eb8Smec static int
6878b464eb8Smec physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
6888b464eb8Smec     caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
689da6c28aaSamw     struct cred *cred, caller_context_t *ct)
6908b464eb8Smec {
6918b464eb8Smec 	/*
6928b464eb8Smec 	 * Release our hold on the vnode so that the final VN_RELE will
6938b464eb8Smec 	 * call physmem_inactive to clean things up.
6948b464eb8Smec 	 */
6958b464eb8Smec 	VN_RELE(vp);
6968b464eb8Smec 
6978b464eb8Smec 	return (0);
6988b464eb8Smec }
6998b464eb8Smec 
7008b464eb8Smec /*
7018b464eb8Smec  * Clean up all the pages belonging to this vnode and then free it.
7028b464eb8Smec  */
7038b464eb8Smec /*ARGSUSED*/
7048b464eb8Smec static void
705da6c28aaSamw physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct)
7068b464eb8Smec {
7078b464eb8Smec 	page_t *pp;
7088b464eb8Smec 
7098b464eb8Smec 	/*
7108b464eb8Smec 	 * Remove the vnode from the hash now, to prevent asynchronous
7118b464eb8Smec 	 * attempts to map into this vnode.  This avoids a deadlock
7128b464eb8Smec 	 * where two threads try to get into this logic at the same
7138b464eb8Smec 	 * time and try to map the pages they are destroying into the
7148b464eb8Smec 	 * other's address space.
7158b464eb8Smec 	 * If it's not in the hash, just free it.
7168b464eb8Smec 	 */
7178b464eb8Smec 	if (physmem_remove_vnode_hash(vp) == 0) {
7188b464eb8Smec 		ASSERT(vp->v_pages == NULL);
7198b464eb8Smec 		vn_free(vp);
7208b464eb8Smec 		physmem_remove_hash_proc();
7218b464eb8Smec 		mutex_enter(&physmem_mutex);
7228b464eb8Smec 		physmem_vnodecnt--;
7238b464eb8Smec 		mutex_exit(&physmem_mutex);
7248b464eb8Smec 		return;
7258b464eb8Smec 	}
7268b464eb8Smec 
7278b464eb8Smec 	/*
7288b464eb8Smec 	 * At this point in time, no other logic can be adding or removing
7298b464eb8Smec 	 * pages from the vnode, otherwise the v_pages list could be inaccurate.
7308b464eb8Smec 	 */
7318b464eb8Smec 
7328b464eb8Smec 	while ((pp = vp->v_pages) != NULL) {
7338b464eb8Smec 		page_t *rpp;
7348b464eb8Smec 		if (page_tryupgrade(pp)) {
7358b464eb8Smec 			/*
7368b464eb8Smec 			 * set lckcnt for page_destroy to do availrmem
7378b464eb8Smec 			 * accounting
7388b464eb8Smec 			 */
7398b464eb8Smec 			pp->p_lckcnt = 1;
7408b464eb8Smec 			page_destroy(pp, 0);
7418b464eb8Smec 		} else {
7428b464eb8Smec 			/* failure to lock should be transient */
7438b464eb8Smec 			rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED);
7448b464eb8Smec 			if (rpp != pp) {
7458b464eb8Smec 				page_unlock(rpp);
7468b464eb8Smec 				continue;
7478b464eb8Smec 			}
7488b464eb8Smec 			page_unlock(pp);
7498b464eb8Smec 		}
7508b464eb8Smec 	}
7518b464eb8Smec 	vn_free(vp);
7528b464eb8Smec 	physmem_remove_hash_proc();
7538b464eb8Smec 	mutex_enter(&physmem_mutex);
7548b464eb8Smec 	physmem_vnodecnt--;
7558b464eb8Smec 	mutex_exit(&physmem_mutex);
7568b464eb8Smec }
7578b464eb8Smec 
7588b464eb8Smec /*ARGSUSED*/
7598b464eb8Smec static int
7608b464eb8Smec physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7618b464eb8Smec     int *rvalp)
7628b464eb8Smec {
7638b464eb8Smec 	int ret;
7648b464eb8Smec 
7658b464eb8Smec 	switch (cmd) {
7668b464eb8Smec 	case PHYSMEM_SETUP:
7678b464eb8Smec 		{
7688b464eb8Smec 			struct physmem_setup_param psp;
7698b464eb8Smec 			if (ddi_copyin((void *)arg, &psp,
7708b464eb8Smec 			    sizeof (struct physmem_setup_param), 0))
7718b464eb8Smec 				return (EFAULT);
7728b464eb8Smec 			ret = physmem_setup_addrs(&psp);
7738b464eb8Smec 			if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0))
7748b464eb8Smec 				return (EFAULT);
7758b464eb8Smec 		}
7768b464eb8Smec 		break;
7778b464eb8Smec 	case PHYSMEM_MAP:
7788b464eb8Smec 		{
7798b464eb8Smec 			struct physmem_map_param pmp;
7808b464eb8Smec 			if (ddi_copyin((void *)arg, &pmp,
7818b464eb8Smec 			    sizeof (struct physmem_map_param), 0))
7828b464eb8Smec 				return (EFAULT);
7838b464eb8Smec 			ret = physmem_map_addrs(&pmp);
7848b464eb8Smec 			if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0))
7858b464eb8Smec 				return (EFAULT);
7868b464eb8Smec 		}
7878b464eb8Smec 		break;
7888b464eb8Smec 	case PHYSMEM_DESTROY:
7898b464eb8Smec 		{
7908b464eb8Smec 			uint64_t cookie;
7918b464eb8Smec 			if (ddi_copyin((void *)arg, &cookie,
7928b464eb8Smec 			    sizeof (uint64_t), 0))
7938b464eb8Smec 				return (EFAULT);
7948b464eb8Smec 			ret = physmem_destroy_addrs(cookie);
7958b464eb8Smec 		}
7968b464eb8Smec 		break;
7978b464eb8Smec 	default:
7988b464eb8Smec 		return (ENOTSUP);
7998b464eb8Smec 	}
8008b464eb8Smec 	return (ret);
8018b464eb8Smec }
8028b464eb8Smec 
8038b464eb8Smec /*ARGSUSED*/
8048b464eb8Smec static int
8058b464eb8Smec physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp)
8068b464eb8Smec {
8078b464eb8Smec 	int ret;
8088b464eb8Smec 	static int msg_printed = 0;
8098b464eb8Smec 
8108b464eb8Smec 	if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) {
8118b464eb8Smec 		return (EINVAL);
8128b464eb8Smec 	}
8138b464eb8Smec 
8148b464eb8Smec 	/* need to make sure we have the right privileges */
8158b464eb8Smec 	if ((ret = secpolicy_resource(credp)) != 0)
8168b464eb8Smec 		return (ret);
8178b464eb8Smec 	if ((ret = secpolicy_lock_memory(credp)) != 0)
8188b464eb8Smec 		return (ret);
8198b464eb8Smec 
8208b464eb8Smec 	if (msg_printed == 0) {
8218b464eb8Smec 		cmn_err(CE_NOTE, "!driver has been opened. This driver may "
8228b464eb8Smec 		    "take out long term locks on pages which may impact "
8238b464eb8Smec 		    "dynamic reconfiguration events");
8248b464eb8Smec 		msg_printed = 1;
8258b464eb8Smec 	}
8268b464eb8Smec 
8278b464eb8Smec 	return (0);
8288b464eb8Smec }
8298b464eb8Smec 
8308b464eb8Smec /*ARGSUSED*/
8318b464eb8Smec static int
8328b464eb8Smec physmem_close(dev_t dev, int flag, int otyp, cred_t *credp)
8338b464eb8Smec {
8348b464eb8Smec 	return (0);
8358b464eb8Smec }
8368b464eb8Smec 
8378b464eb8Smec /*ARGSUSED*/
8388b464eb8Smec static int
8398b464eb8Smec physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd,
8408b464eb8Smec     void *arg, void **resultp)
8418b464eb8Smec {
8428b464eb8Smec 	switch (infocmd) {
8438b464eb8Smec 	case DDI_INFO_DEVT2DEVINFO:
8448b464eb8Smec 		*resultp = physmem_dip;
8458b464eb8Smec 		return (DDI_SUCCESS);
8468b464eb8Smec 
8478b464eb8Smec 	case DDI_INFO_DEVT2INSTANCE:
8488b464eb8Smec 		*resultp = (void *)(ulong_t)getminor((dev_t)arg);
8498b464eb8Smec 		return (DDI_SUCCESS);
8508b464eb8Smec 
8518b464eb8Smec 	default:
8528b464eb8Smec 		return (DDI_FAILURE);
8538b464eb8Smec 	}
8548b464eb8Smec }
8558b464eb8Smec 
8568b464eb8Smec static int
8578b464eb8Smec physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
8588b464eb8Smec {
8598b464eb8Smec 	int i;
8608b464eb8Smec 
8618b464eb8Smec 	if (cmd == DDI_RESUME) {
8628b464eb8Smec 		return (DDI_SUCCESS);
8638b464eb8Smec 	}
8648b464eb8Smec 
8658b464eb8Smec 	if (cmd != DDI_ATTACH)
8668b464eb8Smec 		return (DDI_FAILURE);
8678b464eb8Smec 
8688b464eb8Smec 	if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
8698b464eb8Smec 	    ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
8708b464eb8Smec 		return (DDI_FAILURE);
8718b464eb8Smec 
8728b464eb8Smec 	physmem_dip = dip;
8738b464eb8Smec 
8748b464eb8Smec 	/* Initialize driver specific data */
8758b464eb8Smec 	if (physmem_setup_vnops()) {
8768b464eb8Smec 		ddi_remove_minor_node(dip, ddi_get_name(dip));
8778b464eb8Smec 		return (DDI_FAILURE);
8788b464eb8Smec 	}
8798b464eb8Smec 
8808b464eb8Smec 	for (i = 0; i < PPH_SIZE; i++)
8818b464eb8Smec 		pph[i] = NULL;
8828b464eb8Smec 
8838b464eb8Smec 	page_capture_register_callback(PC_PHYSMEM, 10000,
8848b464eb8Smec 	    map_page_proc);
8858b464eb8Smec 
8868b464eb8Smec 	return (DDI_SUCCESS);
8878b464eb8Smec }
8888b464eb8Smec 
8898b464eb8Smec static int
8908b464eb8Smec physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
8918b464eb8Smec {
8928b464eb8Smec 	int ret = DDI_SUCCESS;
8938b464eb8Smec 
8948b464eb8Smec 	if (cmd == DDI_SUSPEND) {
8958b464eb8Smec 		return (DDI_SUCCESS);
8968b464eb8Smec 	}
8978b464eb8Smec 
8988b464eb8Smec 	if (cmd != DDI_DETACH)
8998b464eb8Smec 		return (DDI_FAILURE);
9008b464eb8Smec 
9018b464eb8Smec 	ASSERT(physmem_dip == dip);
9028b464eb8Smec 
9038b464eb8Smec 	mutex_enter(&physmem_mutex);
9048b464eb8Smec 	if (physmem_vnodecnt == 0) {
9058b464eb8Smec 		if (physmem_vnodeops != NULL) {
9068b464eb8Smec 			vn_freevnodeops(physmem_vnodeops);
9078b464eb8Smec 			physmem_vnodeops = NULL;
9088b464eb8Smec 			page_capture_unregister_callback(PC_PHYSMEM);
9098b464eb8Smec 		}
9108b464eb8Smec 	} else {
9118b464eb8Smec 		ret = EBUSY;
9128b464eb8Smec 	}
9138b464eb8Smec 	mutex_exit(&physmem_mutex);
9148b464eb8Smec 	if (ret == DDI_SUCCESS)
9158b464eb8Smec 		ddi_remove_minor_node(dip, ddi_get_name(dip));
9168b464eb8Smec 	return (ret);
9178b464eb8Smec }
9188b464eb8Smec 
9198b464eb8Smec static struct cb_ops physmem_cb_ops = {
9208b464eb8Smec 	physmem_open,	/* open */
9218b464eb8Smec 	physmem_close,	/* close */
9228b464eb8Smec 	nodev,		/* strategy */
9238b464eb8Smec 	nodev,		/* print */
9248b464eb8Smec 	nodev,		/* dump */
9258b464eb8Smec 	nodev,		/* read */
9268b464eb8Smec 	nodev,		/* write */
9278b464eb8Smec 	physmem_ioctl,	/* ioctl */
9288b464eb8Smec 	nodev,		/* devmap */
9298b464eb8Smec 	nodev,		/* mmap */
9308b464eb8Smec 	nodev,		/* segmap */
9318b464eb8Smec 	nochpoll,	/* chpoll */
9328b464eb8Smec 	ddi_prop_op,	/* prop_op */
9338b464eb8Smec 	NULL,		/* cb_str */
9348b464eb8Smec 	D_NEW | D_MP | D_DEVMAP,
9358b464eb8Smec 	CB_REV,
9368b464eb8Smec 	NULL,
9378b464eb8Smec 	NULL
9388b464eb8Smec };
9398b464eb8Smec 
9408b464eb8Smec static struct dev_ops physmem_ops = {
9418b464eb8Smec 	DEVO_REV,
9428b464eb8Smec 	0,
9438b464eb8Smec 	physmem_getinfo,
9448b464eb8Smec 	nulldev,
9458b464eb8Smec 	nulldev,
9468b464eb8Smec 	physmem_attach,
9478b464eb8Smec 	physmem_detach,
9488b464eb8Smec 	nodev,
9498b464eb8Smec 	&physmem_cb_ops,
9508b464eb8Smec 	NULL,
95119397407SSherry Moore 	NULL,
95219397407SSherry Moore 	ddi_quiesce_not_needed,		/* quiesce */
9538b464eb8Smec };
9548b464eb8Smec 
9558b464eb8Smec static struct modldrv modldrv = {
9568b464eb8Smec 	&mod_driverops,
95719397407SSherry Moore 	"physmem driver",
9588b464eb8Smec 	&physmem_ops
9598b464eb8Smec };
9608b464eb8Smec 
9618b464eb8Smec static struct modlinkage modlinkage = {
9628b464eb8Smec 	MODREV_1,
9638b464eb8Smec 	&modldrv,
9648b464eb8Smec 	NULL
9658b464eb8Smec };
9668b464eb8Smec 
9678b464eb8Smec int
9688b464eb8Smec _init(void)
9698b464eb8Smec {
9708b464eb8Smec 	return (mod_install(&modlinkage));
9718b464eb8Smec }
9728b464eb8Smec 
9738b464eb8Smec int
9748b464eb8Smec _info(struct modinfo *modinfop)
9758b464eb8Smec {
9768b464eb8Smec 	return (mod_info(&modlinkage, modinfop));
9778b464eb8Smec }
9788b464eb8Smec 
9798b464eb8Smec int
9808b464eb8Smec _fini(void)
9818b464eb8Smec {
9828b464eb8Smec 	return (mod_remove(&modlinkage));
9838b464eb8Smec }
984