xref: /titanic_52/usr/src/uts/common/io/physmem.c (revision 8b464eb836173b92f2b7a65623cd06c8c3c59289)
1*8b464eb8Smec /*
2*8b464eb8Smec  * CDDL HEADER START
3*8b464eb8Smec  *
4*8b464eb8Smec  * The contents of this file are subject to the terms of the
5*8b464eb8Smec  * Common Development and Distribution License (the "License").
6*8b464eb8Smec  * You may not use this file except in compliance with the License.
7*8b464eb8Smec  *
8*8b464eb8Smec  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*8b464eb8Smec  * or http://www.opensolaris.org/os/licensing.
10*8b464eb8Smec  * See the License for the specific language governing permissions
11*8b464eb8Smec  * and limitations under the License.
12*8b464eb8Smec  *
13*8b464eb8Smec  * When distributing Covered Code, include this CDDL HEADER in each
14*8b464eb8Smec  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*8b464eb8Smec  * If applicable, add the following below this CDDL HEADER, with the
16*8b464eb8Smec  * fields enclosed by brackets "[]" replaced with your own identifying
17*8b464eb8Smec  * information: Portions Copyright [yyyy] [name of copyright owner]
18*8b464eb8Smec  *
19*8b464eb8Smec  * CDDL HEADER END
20*8b464eb8Smec  */
21*8b464eb8Smec /*
22*8b464eb8Smec  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23*8b464eb8Smec  * Use is subject to license terms.
24*8b464eb8Smec  */
25*8b464eb8Smec 
26*8b464eb8Smec #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*8b464eb8Smec 
28*8b464eb8Smec #include <sys/types.h>
29*8b464eb8Smec #include <sys/modctl.h>
30*8b464eb8Smec #include <sys/conf.h>
31*8b464eb8Smec #include <sys/ddi.h>
32*8b464eb8Smec #include <sys/sunddi.h>
33*8b464eb8Smec #include <sys/devops.h>
34*8b464eb8Smec #include <sys/stat.h>
35*8b464eb8Smec #include <sys/file.h>
36*8b464eb8Smec #include <sys/cred.h>
37*8b464eb8Smec #include <sys/policy.h>
38*8b464eb8Smec #include <sys/errno.h>
39*8b464eb8Smec #include <vm/seg_dev.h>
40*8b464eb8Smec #include <vm/seg_vn.h>
41*8b464eb8Smec #include <vm/page.h>
42*8b464eb8Smec #include <sys/fs/swapnode.h>
43*8b464eb8Smec #include <sys/sysmacros.h>
44*8b464eb8Smec #include <sys/fcntl.h>
45*8b464eb8Smec #include <sys/vmsystm.h>
46*8b464eb8Smec #include <sys/physmem.h>
47*8b464eb8Smec 
48*8b464eb8Smec static dev_info_t		*physmem_dip = NULL;
49*8b464eb8Smec 
50*8b464eb8Smec /*
51*8b464eb8Smec  * Linked list element hanging off physmem_proc_hash below, which holds all
52*8b464eb8Smec  * the information for a given segment which has been setup for this process.
53*8b464eb8Smec  * This is a simple linked list as we are assuming that for a given process
54*8b464eb8Smec  * the setup ioctl will only be called a handful of times.  If this assumption
55*8b464eb8Smec  * changes in the future, a quicker to traverse data structure should be used.
56*8b464eb8Smec  */
57*8b464eb8Smec struct physmem_hash {
58*8b464eb8Smec 	struct physmem_hash *ph_next;
59*8b464eb8Smec 	uint64_t ph_base_pa;
60*8b464eb8Smec 	caddr_t ph_base_va;
61*8b464eb8Smec 	size_t ph_seg_len;
62*8b464eb8Smec 	struct vnode *ph_vnode;
63*8b464eb8Smec };
64*8b464eb8Smec 
65*8b464eb8Smec /*
66*8b464eb8Smec  * Hash of all of the processes which have setup mappings with the driver with
67*8b464eb8Smec  * pointers to per process data.
68*8b464eb8Smec  */
69*8b464eb8Smec struct physmem_proc_hash {
70*8b464eb8Smec 	struct proc *pph_proc;
71*8b464eb8Smec 	struct physmem_hash *pph_hash;
72*8b464eb8Smec 	struct physmem_proc_hash *pph_next;
73*8b464eb8Smec };
74*8b464eb8Smec 
75*8b464eb8Smec 
76*8b464eb8Smec /* Needs to be a power of two for simple hash algorithm */
77*8b464eb8Smec #define	PPH_SIZE	8
78*8b464eb8Smec struct physmem_proc_hash *pph[PPH_SIZE];
79*8b464eb8Smec 
80*8b464eb8Smec /*
81*8b464eb8Smec  * Lock which protects the pph hash above.  To add an element (either a new
82*8b464eb8Smec  * process or a new segment) the WRITE lock must be held.  To traverse the
83*8b464eb8Smec  * list, only a READ lock is needed.
84*8b464eb8Smec  */
85*8b464eb8Smec krwlock_t pph_rwlock;
86*8b464eb8Smec 
87*8b464eb8Smec #define	PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1)))
88*8b464eb8Smec 
89*8b464eb8Smec /*
90*8b464eb8Smec  * Need to keep a reference count of how many processes have the driver
91*8b464eb8Smec  * open to prevent it from disappearing.
92*8b464eb8Smec  */
93*8b464eb8Smec uint64_t physmem_vnodecnt;
94*8b464eb8Smec kmutex_t physmem_mutex;		/* protects phsymem_vnodecnt */
95*8b464eb8Smec 
96*8b464eb8Smec static int physmem_getpage(struct vnode *vp, offset_t off, size_t len,
97*8b464eb8Smec     uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
98*8b464eb8Smec     enum seg_rw rw, struct cred *cr);
99*8b464eb8Smec 
100*8b464eb8Smec static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
101*8b464eb8Smec     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
102*8b464eb8Smec     struct cred *cred);
103*8b464eb8Smec 
104*8b464eb8Smec static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
105*8b464eb8Smec     caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
106*8b464eb8Smec     struct cred *cred);
107*8b464eb8Smec 
108*8b464eb8Smec static void physmem_inactive(vnode_t *vp, cred_t *crp);
109*8b464eb8Smec 
110*8b464eb8Smec const fs_operation_def_t physmem_vnodeops_template[] = {
111*8b464eb8Smec 	VOPNAME_GETPAGE, physmem_getpage,
112*8b464eb8Smec 	VOPNAME_ADDMAP, (fs_generic_func_p) physmem_addmap,
113*8b464eb8Smec 	VOPNAME_DELMAP, physmem_delmap,
114*8b464eb8Smec 	VOPNAME_INACTIVE, (fs_generic_func_p) physmem_inactive,
115*8b464eb8Smec 	NULL, NULL
116*8b464eb8Smec };
117*8b464eb8Smec 
118*8b464eb8Smec vnodeops_t *physmem_vnodeops = NULL;
119*8b464eb8Smec 
120*8b464eb8Smec /*
121*8b464eb8Smec  * Removes the current process from the hash if the process has no more
122*8b464eb8Smec  * physmem segments active.
123*8b464eb8Smec  */
124*8b464eb8Smec void
125*8b464eb8Smec physmem_remove_hash_proc()
126*8b464eb8Smec {
127*8b464eb8Smec 	int index;
128*8b464eb8Smec 	struct physmem_proc_hash **walker;
129*8b464eb8Smec 	struct physmem_proc_hash *victim = NULL;
130*8b464eb8Smec 
131*8b464eb8Smec 	index = PHYSMEM_HASH(curproc);
132*8b464eb8Smec 	rw_enter(&pph_rwlock, RW_WRITER);
133*8b464eb8Smec 	walker = &pph[index];
134*8b464eb8Smec 	while (*walker != NULL) {
135*8b464eb8Smec 		if ((*walker)->pph_proc == curproc &&
136*8b464eb8Smec 		    (*walker)->pph_hash == NULL) {
137*8b464eb8Smec 			victim = *walker;
138*8b464eb8Smec 			*walker = victim->pph_next;
139*8b464eb8Smec 			break;
140*8b464eb8Smec 		}
141*8b464eb8Smec 		walker = &((*walker)->pph_next);
142*8b464eb8Smec 	}
143*8b464eb8Smec 	rw_exit(&pph_rwlock);
144*8b464eb8Smec 	if (victim != NULL)
145*8b464eb8Smec 		kmem_free(victim, sizeof (struct physmem_proc_hash));
146*8b464eb8Smec }
147*8b464eb8Smec 
148*8b464eb8Smec /*
149*8b464eb8Smec  * Add a new entry to the hash for the given process to cache the
150*8b464eb8Smec  * address ranges that it is working on.  If this is the first hash
151*8b464eb8Smec  * item to be added for this process, we will create the head pointer
152*8b464eb8Smec  * for this process.
153*8b464eb8Smec  * Returns 0 on success, ERANGE when the physical address is already in the
154*8b464eb8Smec  * hash.  Note that we add it to the hash as we have already called as_map
155*8b464eb8Smec  * and thus the as_unmap call will try to free the vnode, which needs
156*8b464eb8Smec  * to be found in the hash.
157*8b464eb8Smec  */
158*8b464eb8Smec int
159*8b464eb8Smec physmem_add_hash(struct physmem_hash *php)
160*8b464eb8Smec {
161*8b464eb8Smec 	int index;
162*8b464eb8Smec 	struct physmem_proc_hash *iterator;
163*8b464eb8Smec 	struct physmem_proc_hash *newp = NULL;
164*8b464eb8Smec 	struct physmem_hash *temp;
165*8b464eb8Smec 	int ret = 0;
166*8b464eb8Smec 
167*8b464eb8Smec 	index = PHYSMEM_HASH(curproc);
168*8b464eb8Smec 
169*8b464eb8Smec insert:
170*8b464eb8Smec 	rw_enter(&pph_rwlock, RW_WRITER);
171*8b464eb8Smec 	iterator = pph[index];
172*8b464eb8Smec 	while (iterator != NULL) {
173*8b464eb8Smec 		if (iterator->pph_proc == curproc) {
174*8b464eb8Smec 			/*
175*8b464eb8Smec 			 * check to make sure a single process does not try to
176*8b464eb8Smec 			 * map the same region twice.
177*8b464eb8Smec 			 */
178*8b464eb8Smec 			for (temp = iterator->pph_hash; temp != NULL;
179*8b464eb8Smec 			    temp = temp->ph_next) {
180*8b464eb8Smec 				if ((php->ph_base_pa >= temp->ph_base_pa &&
181*8b464eb8Smec 				    php->ph_base_pa < temp->ph_base_pa +
182*8b464eb8Smec 				    temp->ph_seg_len) ||
183*8b464eb8Smec 				    (temp->ph_base_pa >= php->ph_base_pa &&
184*8b464eb8Smec 				    temp->ph_base_pa < php->ph_base_pa +
185*8b464eb8Smec 				    php->ph_seg_len)) {
186*8b464eb8Smec 					ret = ERANGE;
187*8b464eb8Smec 					break;
188*8b464eb8Smec 				}
189*8b464eb8Smec 			}
190*8b464eb8Smec 			if (ret == 0) {
191*8b464eb8Smec 				php->ph_next = iterator->pph_hash;
192*8b464eb8Smec 				iterator->pph_hash = php;
193*8b464eb8Smec 			}
194*8b464eb8Smec 			rw_exit(&pph_rwlock);
195*8b464eb8Smec 			/* Need to check for two threads in sync */
196*8b464eb8Smec 			if (newp != NULL)
197*8b464eb8Smec 				kmem_free(newp, sizeof (*newp));
198*8b464eb8Smec 			return (ret);
199*8b464eb8Smec 		}
200*8b464eb8Smec 		iterator = iterator->pph_next;
201*8b464eb8Smec 	}
202*8b464eb8Smec 
203*8b464eb8Smec 	if (newp != NULL) {
204*8b464eb8Smec 		newp->pph_proc = curproc;
205*8b464eb8Smec 		newp->pph_next = pph[index];
206*8b464eb8Smec 		newp->pph_hash = php;
207*8b464eb8Smec 		php->ph_next = NULL;
208*8b464eb8Smec 		pph[index] = newp;
209*8b464eb8Smec 		rw_exit(&pph_rwlock);
210*8b464eb8Smec 		return (0);
211*8b464eb8Smec 	}
212*8b464eb8Smec 
213*8b464eb8Smec 	rw_exit(&pph_rwlock);
214*8b464eb8Smec 	/* Dropped the lock so we could use KM_SLEEP */
215*8b464eb8Smec 	newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP);
216*8b464eb8Smec 	goto insert;
217*8b464eb8Smec }
218*8b464eb8Smec 
219*8b464eb8Smec /*
220*8b464eb8Smec  * Will return the pointer to the physmem_hash struct if the setup routine
221*8b464eb8Smec  * has previously been called for this memory.
222*8b464eb8Smec  * Returns NULL on failure.
223*8b464eb8Smec  */
224*8b464eb8Smec struct physmem_hash *
225*8b464eb8Smec physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp)
226*8b464eb8Smec {
227*8b464eb8Smec 	int index;
228*8b464eb8Smec 	struct physmem_proc_hash *proc_hp;
229*8b464eb8Smec 	struct physmem_hash *php;
230*8b464eb8Smec 
231*8b464eb8Smec 	ASSERT(rw_lock_held(&pph_rwlock));
232*8b464eb8Smec 
233*8b464eb8Smec 	index = PHYSMEM_HASH(procp);
234*8b464eb8Smec 	proc_hp = pph[index];
235*8b464eb8Smec 	while (proc_hp != NULL) {
236*8b464eb8Smec 		if (proc_hp->pph_proc == procp) {
237*8b464eb8Smec 			php = proc_hp->pph_hash;
238*8b464eb8Smec 			while (php != NULL) {
239*8b464eb8Smec 				if ((req_paddr >= php->ph_base_pa) &&
240*8b464eb8Smec 				    (req_paddr + len <=
241*8b464eb8Smec 				    php->ph_base_pa + php->ph_seg_len)) {
242*8b464eb8Smec 					return (php);
243*8b464eb8Smec 				}
244*8b464eb8Smec 				php = php->ph_next;
245*8b464eb8Smec 			}
246*8b464eb8Smec 		}
247*8b464eb8Smec 		proc_hp = proc_hp->pph_next;
248*8b464eb8Smec 	}
249*8b464eb8Smec 	return (NULL);
250*8b464eb8Smec }
251*8b464eb8Smec 
252*8b464eb8Smec int
253*8b464eb8Smec physmem_validate_cookie(uint64_t p_cookie)
254*8b464eb8Smec {
255*8b464eb8Smec 	int index;
256*8b464eb8Smec 	struct physmem_proc_hash *proc_hp;
257*8b464eb8Smec 	struct physmem_hash *php;
258*8b464eb8Smec 
259*8b464eb8Smec 	ASSERT(rw_lock_held(&pph_rwlock));
260*8b464eb8Smec 
261*8b464eb8Smec 	index = PHYSMEM_HASH(curproc);
262*8b464eb8Smec 	proc_hp = pph[index];
263*8b464eb8Smec 	while (proc_hp != NULL) {
264*8b464eb8Smec 		if (proc_hp->pph_proc == curproc) {
265*8b464eb8Smec 			php = proc_hp->pph_hash;
266*8b464eb8Smec 			while (php != NULL) {
267*8b464eb8Smec 				if ((uint64_t)(uintptr_t)php == p_cookie) {
268*8b464eb8Smec 					return (1);
269*8b464eb8Smec 				}
270*8b464eb8Smec 				php = php->ph_next;
271*8b464eb8Smec 			}
272*8b464eb8Smec 		}
273*8b464eb8Smec 		proc_hp = proc_hp->pph_next;
274*8b464eb8Smec 	}
275*8b464eb8Smec 	return (0);
276*8b464eb8Smec }
277*8b464eb8Smec 
278*8b464eb8Smec /*
279*8b464eb8Smec  * Remove the given vnode from the pph hash.  If it exists in the hash the
280*8b464eb8Smec  * process still has to be around as the vnode is obviously still around and
281*8b464eb8Smec  * since it's a physmem vnode, it must be in the hash.
282*8b464eb8Smec  * If it is not in the hash that must mean that the setup ioctl failed.
283*8b464eb8Smec  * Return 0 in this instance, 1 if it is in the hash.
284*8b464eb8Smec  */
285*8b464eb8Smec int
286*8b464eb8Smec physmem_remove_vnode_hash(vnode_t *vp)
287*8b464eb8Smec {
288*8b464eb8Smec 	int index;
289*8b464eb8Smec 	struct physmem_proc_hash *proc_hp;
290*8b464eb8Smec 	struct physmem_hash **phpp;
291*8b464eb8Smec 	struct physmem_hash *victim;
292*8b464eb8Smec 
293*8b464eb8Smec 	index = PHYSMEM_HASH(curproc);
294*8b464eb8Smec 	/* synchronize with the map routine */
295*8b464eb8Smec 	rw_enter(&pph_rwlock, RW_WRITER);
296*8b464eb8Smec 	proc_hp = pph[index];
297*8b464eb8Smec 	while (proc_hp != NULL) {
298*8b464eb8Smec 		if (proc_hp->pph_proc == curproc) {
299*8b464eb8Smec 			phpp = &proc_hp->pph_hash;
300*8b464eb8Smec 			while (*phpp != NULL) {
301*8b464eb8Smec 				if ((*phpp)->ph_vnode == vp) {
302*8b464eb8Smec 					victim = *phpp;
303*8b464eb8Smec 					*phpp = victim->ph_next;
304*8b464eb8Smec 
305*8b464eb8Smec 					rw_exit(&pph_rwlock);
306*8b464eb8Smec 					kmem_free(victim, sizeof (*victim));
307*8b464eb8Smec 					return (1);
308*8b464eb8Smec 				}
309*8b464eb8Smec 				phpp = &(*phpp)->ph_next;
310*8b464eb8Smec 			}
311*8b464eb8Smec 		}
312*8b464eb8Smec 		proc_hp = proc_hp->pph_next;
313*8b464eb8Smec 	}
314*8b464eb8Smec 	rw_exit(&pph_rwlock);
315*8b464eb8Smec 
316*8b464eb8Smec 	/* not found */
317*8b464eb8Smec 	return (0);
318*8b464eb8Smec }
319*8b464eb8Smec 
320*8b464eb8Smec int
321*8b464eb8Smec physmem_setup_vnops()
322*8b464eb8Smec {
323*8b464eb8Smec 	int error;
324*8b464eb8Smec 	char *name = "physmem";
325*8b464eb8Smec 	if (physmem_vnodeops != NULL)
326*8b464eb8Smec 		cmn_err(CE_PANIC, "physmem vnodeops already set\n");
327*8b464eb8Smec 	error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops);
328*8b464eb8Smec 	if (error != 0) {
329*8b464eb8Smec 		cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template");
330*8b464eb8Smec 	}
331*8b464eb8Smec 	return (error);
332*8b464eb8Smec }
333*8b464eb8Smec 
334*8b464eb8Smec /*
335*8b464eb8Smec  * The guts of the PHYSMEM_SETUP ioctl.
336*8b464eb8Smec  * Create a segment in the address space with the specified parameters.
337*8b464eb8Smec  * If pspp->user_va is NULL, as_gap will be used to find an appropriate VA.
338*8b464eb8Smec  * We do not do bounds checking on the requested phsycial addresses, if they
339*8b464eb8Smec  * do not exist in the system, they will not be mappable.
340*8b464eb8Smec  * Returns 0 on success with the following error codes on failure:
341*8b464eb8Smec  *	ENOMEM - The VA range requested was already mapped if pspp->user_va is
342*8b464eb8Smec  *		non-NULL or the system was unable to find enough VA space for
343*8b464eb8Smec  *		the desired length if user_va was NULL>
344*8b464eb8Smec  *	EINVAL - The requested PA, VA, or length was not PAGESIZE aligned.
345*8b464eb8Smec  */
346*8b464eb8Smec int
347*8b464eb8Smec physmem_setup_addrs(struct physmem_setup_param *pspp)
348*8b464eb8Smec {
349*8b464eb8Smec 	struct as *as = curproc->p_as;
350*8b464eb8Smec 	struct segvn_crargs vn_a;
351*8b464eb8Smec 	int ret = 0;
352*8b464eb8Smec 	uint64_t base_pa;
353*8b464eb8Smec 	size_t len;
354*8b464eb8Smec 	caddr_t uvaddr;
355*8b464eb8Smec 	struct vnode *vp;
356*8b464eb8Smec 	struct physmem_hash *php;
357*8b464eb8Smec 
358*8b464eb8Smec 	ASSERT(pspp != NULL);
359*8b464eb8Smec 	base_pa = pspp->req_paddr;
360*8b464eb8Smec 	len = pspp->len;
361*8b464eb8Smec 	uvaddr = (caddr_t)(uintptr_t)pspp->user_va;
362*8b464eb8Smec 
363*8b464eb8Smec 	/* Sanity checking */
364*8b464eb8Smec 	if (!IS_P2ALIGNED(base_pa, PAGESIZE))
365*8b464eb8Smec 		return (EINVAL);
366*8b464eb8Smec 	if (!IS_P2ALIGNED(len, PAGESIZE))
367*8b464eb8Smec 		return (EINVAL);
368*8b464eb8Smec 	if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE))
369*8b464eb8Smec 		return (EINVAL);
370*8b464eb8Smec 
371*8b464eb8Smec 	php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP);
372*8b464eb8Smec 
373*8b464eb8Smec 	/* Need to bump vnode count so that the driver can not be unloaded */
374*8b464eb8Smec 	mutex_enter(&physmem_mutex);
375*8b464eb8Smec 	physmem_vnodecnt++;
376*8b464eb8Smec 	mutex_exit(&physmem_mutex);
377*8b464eb8Smec 
378*8b464eb8Smec 	vp = vn_alloc(KM_SLEEP);
379*8b464eb8Smec 	ASSERT(vp != NULL);	/* SLEEP can't return NULL */
380*8b464eb8Smec 	vn_setops(vp, physmem_vnodeops);
381*8b464eb8Smec 
382*8b464eb8Smec 	php->ph_vnode = vp;
383*8b464eb8Smec 
384*8b464eb8Smec 	vn_a.vp = vp;
385*8b464eb8Smec 	vn_a.offset = (u_offset_t)base_pa;
386*8b464eb8Smec 	vn_a.type = MAP_SHARED;
387*8b464eb8Smec 	vn_a.prot = PROT_ALL;
388*8b464eb8Smec 	vn_a.maxprot = PROT_ALL;
389*8b464eb8Smec 	vn_a.flags = 0;
390*8b464eb8Smec 	vn_a.cred = NULL;
391*8b464eb8Smec 	vn_a.amp = NULL;
392*8b464eb8Smec 	vn_a.szc = 0;
393*8b464eb8Smec 	vn_a.lgrp_mem_policy_flags = 0;
394*8b464eb8Smec 
395*8b464eb8Smec 	as_rangelock(as);
396*8b464eb8Smec 	if (uvaddr != NULL) {
397*8b464eb8Smec 		if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) {
398*8b464eb8Smec 			ret = ENOMEM;
399*8b464eb8Smec fail:
400*8b464eb8Smec 			as_rangeunlock(as);
401*8b464eb8Smec 			vn_free(vp);
402*8b464eb8Smec 			kmem_free(php, sizeof (*php));
403*8b464eb8Smec 			mutex_enter(&physmem_mutex);
404*8b464eb8Smec 			physmem_vnodecnt--;
405*8b464eb8Smec 			mutex_exit(&physmem_mutex);
406*8b464eb8Smec 			return (ret);
407*8b464eb8Smec 		}
408*8b464eb8Smec 	} else {
409*8b464eb8Smec 		/* We pick the address for the user */
410*8b464eb8Smec 		map_addr(&uvaddr, len, 0, 1, 0);
411*8b464eb8Smec 		if (uvaddr == NULL) {
412*8b464eb8Smec 			ret = ENOMEM;
413*8b464eb8Smec 			goto fail;
414*8b464eb8Smec 		}
415*8b464eb8Smec 	}
416*8b464eb8Smec 	ret = as_map(as, uvaddr, len, segvn_create, &vn_a);
417*8b464eb8Smec 
418*8b464eb8Smec 	as_rangeunlock(as);
419*8b464eb8Smec 	if (ret == 0) {
420*8b464eb8Smec 		php->ph_base_pa = base_pa;
421*8b464eb8Smec 		php->ph_base_va = uvaddr;
422*8b464eb8Smec 		php->ph_seg_len = len;
423*8b464eb8Smec 		pspp->user_va = (uint64_t)(uintptr_t)uvaddr;
424*8b464eb8Smec 		pspp->cookie = (uint64_t)(uintptr_t)php;
425*8b464eb8Smec 		ret = physmem_add_hash(php);
426*8b464eb8Smec 		if (ret == 0)
427*8b464eb8Smec 			return (0);
428*8b464eb8Smec 		(void) as_unmap(as, uvaddr, len);
429*8b464eb8Smec 		return (ret);
430*8b464eb8Smec 	}
431*8b464eb8Smec 
432*8b464eb8Smec 	goto fail;
433*8b464eb8Smec 	/*NOTREACHED*/
434*8b464eb8Smec }
435*8b464eb8Smec 
436*8b464eb8Smec /*
437*8b464eb8Smec  * The guts of the PHYSMEM_MAP ioctl.
438*8b464eb8Smec  * Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already
439*8b464eb8Smec  * been called for this PA range.
440*8b464eb8Smec  * Returns 0 on success with the following error codes on failure:
441*8b464eb8Smec  *	EPERM - The requested page is long term locked, and thus repeated
442*8b464eb8Smec  *		requests to allocate this page will likely fail.
443*8b464eb8Smec  *	EAGAIN - The requested page could not be allocated, but it is believed
444*8b464eb8Smec  *		that future attempts could succeed.
445*8b464eb8Smec  *	ENOMEM - There was not enough free memory in the system to safely
446*8b464eb8Smec  *		map the requested page.
447*8b464eb8Smec  *	EINVAL - The requested paddr was not PAGESIZE aligned or the
448*8b464eb8Smec  *		PHYSMEM_SETUP ioctl was not called for this page.
449*8b464eb8Smec  *	ENOENT - The requested page was iniside the kernel cage, and the
450*8b464eb8Smec  *		PHYSMEM_CAGE flag was not set.
451*8b464eb8Smec  *	EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag
452*8b464eb8Smec  *		was not set.
453*8b464eb8Smec  */
454*8b464eb8Smec static int
455*8b464eb8Smec physmem_map_addrs(struct physmem_map_param *pmpp)
456*8b464eb8Smec {
457*8b464eb8Smec 	caddr_t uvaddr;
458*8b464eb8Smec 	page_t *pp;
459*8b464eb8Smec 	uint64_t req_paddr;
460*8b464eb8Smec 	struct vnode *vp;
461*8b464eb8Smec 	int ret = 0;
462*8b464eb8Smec 	struct physmem_hash *php;
463*8b464eb8Smec 	uint_t flags = 0;
464*8b464eb8Smec 
465*8b464eb8Smec 	ASSERT(pmpp != NULL);
466*8b464eb8Smec 	req_paddr = pmpp->req_paddr;
467*8b464eb8Smec 
468*8b464eb8Smec 	if (!IS_P2ALIGNED(req_paddr, PAGESIZE))
469*8b464eb8Smec 		return (EINVAL);
470*8b464eb8Smec 	/* Find the vnode for this map request */
471*8b464eb8Smec 	rw_enter(&pph_rwlock, RW_READER);
472*8b464eb8Smec 	php = physmem_get_hash(req_paddr, PAGESIZE, curproc);
473*8b464eb8Smec 	if (php == NULL) {
474*8b464eb8Smec 		rw_exit(&pph_rwlock);
475*8b464eb8Smec 		return (EINVAL);
476*8b464eb8Smec 	}
477*8b464eb8Smec 	vp = php->ph_vnode;
478*8b464eb8Smec 	uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa);
479*8b464eb8Smec 	rw_exit(&pph_rwlock);
480*8b464eb8Smec 
481*8b464eb8Smec 	pp = page_numtopp_nolock(btop((size_t)req_paddr));
482*8b464eb8Smec 	if (pp == NULL) {
483*8b464eb8Smec 		pmpp->ret_va = NULL;
484*8b464eb8Smec 		return (EPERM);
485*8b464eb8Smec 	}
486*8b464eb8Smec 
487*8b464eb8Smec 	/*
488*8b464eb8Smec 	 * Check to see if page already mapped correctly.  This can happen
489*8b464eb8Smec 	 * when we failed to capture a page previously and it was captured
490*8b464eb8Smec 	 * asynchronously for us.  Return success in this case.
491*8b464eb8Smec 	 */
492*8b464eb8Smec 	if (pp->p_vnode == vp) {
493*8b464eb8Smec 		ASSERT(pp->p_offset == (u_offset_t)req_paddr);
494*8b464eb8Smec 		pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
495*8b464eb8Smec 		return (0);
496*8b464eb8Smec 	}
497*8b464eb8Smec 
498*8b464eb8Smec 	/*
499*8b464eb8Smec 	 * physmem should be responsible for checking for cage
500*8b464eb8Smec 	 * and prom pages.
501*8b464eb8Smec 	 */
502*8b464eb8Smec 	if (pmpp->flags & PHYSMEM_CAGE)
503*8b464eb8Smec 		flags = CAPTURE_GET_CAGE;
504*8b464eb8Smec 	if (pmpp->flags & PHYSMEM_RETIRED)
505*8b464eb8Smec 		flags |= CAPTURE_GET_RETIRED;
506*8b464eb8Smec 
507*8b464eb8Smec 	ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc);
508*8b464eb8Smec 
509*8b464eb8Smec 	if (ret != 0) {
510*8b464eb8Smec 		pmpp->ret_va = NULL;
511*8b464eb8Smec 		return (ret);
512*8b464eb8Smec 	} else {
513*8b464eb8Smec 		pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
514*8b464eb8Smec 		return (0);
515*8b464eb8Smec 	}
516*8b464eb8Smec }
517*8b464eb8Smec 
518*8b464eb8Smec /*
519*8b464eb8Smec  * Map the given page into the process's address space if possible.
520*8b464eb8Smec  * We actually only hash the page in on the correct vnode as the page
521*8b464eb8Smec  * will be mapped via segvn_pagefault.
522*8b464eb8Smec  * returns 0 on success
523*8b464eb8Smec  * returns 1 if there is no need to map this page anymore (process exited)
524*8b464eb8Smec  * returns -1 if we failed to map the page.
525*8b464eb8Smec  */
526*8b464eb8Smec int
527*8b464eb8Smec map_page_proc(page_t *pp, void *arg, uint_t flags)
528*8b464eb8Smec {
529*8b464eb8Smec 	struct vnode *vp;
530*8b464eb8Smec 	proc_t *procp = (proc_t *)arg;
531*8b464eb8Smec 	int ret;
532*8b464eb8Smec 	u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum);
533*8b464eb8Smec 	struct physmem_hash *php;
534*8b464eb8Smec 
535*8b464eb8Smec 	ASSERT(pp != NULL);
536*8b464eb8Smec 
537*8b464eb8Smec 	/*
538*8b464eb8Smec 	 * Check against availrmem to make sure that we're not low on memory.
539*8b464eb8Smec 	 * We check again here as ASYNC requests do not do this check elsewhere.
540*8b464eb8Smec 	 * We return 1 as we don't want the page to have the PR_CAPTURE bit
541*8b464eb8Smec 	 * set or be on the page capture hash.
542*8b464eb8Smec 	 */
543*8b464eb8Smec 	if (swapfs_minfree > availrmem + 1) {
544*8b464eb8Smec 		page_free(pp, 1);
545*8b464eb8Smec 		return (1);
546*8b464eb8Smec 	}
547*8b464eb8Smec 
548*8b464eb8Smec 	/*
549*8b464eb8Smec 	 * If this is an asynchronous request for the current process,
550*8b464eb8Smec 	 * we can not map the page as it's possible that we are also in the
551*8b464eb8Smec 	 * process of unmapping the page which could result in a deadlock
552*8b464eb8Smec 	 * with the as lock.
553*8b464eb8Smec 	 */
554*8b464eb8Smec 	if ((flags & CAPTURE_ASYNC) && (curproc == procp)) {
555*8b464eb8Smec 		page_free(pp, 1);
556*8b464eb8Smec 		return (-1);
557*8b464eb8Smec 	}
558*8b464eb8Smec 
559*8b464eb8Smec 	/* only return zeroed out pages */
560*8b464eb8Smec 	pagezero(pp, 0, PAGESIZE);
561*8b464eb8Smec 
562*8b464eb8Smec 	rw_enter(&pph_rwlock, RW_READER);
563*8b464eb8Smec 	php = physmem_get_hash(paddr, PAGESIZE, procp);
564*8b464eb8Smec 	if (php == NULL) {
565*8b464eb8Smec 		rw_exit(&pph_rwlock);
566*8b464eb8Smec 		/*
567*8b464eb8Smec 		 * Free the page as there is no longer a valid outstanding
568*8b464eb8Smec 		 * request for this page.
569*8b464eb8Smec 		 */
570*8b464eb8Smec 		page_free(pp, 1);
571*8b464eb8Smec 		return (1);
572*8b464eb8Smec 	}
573*8b464eb8Smec 
574*8b464eb8Smec 	vp = php->ph_vnode;
575*8b464eb8Smec 
576*8b464eb8Smec 	/*
577*8b464eb8Smec 	 * We need to protect against a possible deadlock here where we own
578*8b464eb8Smec 	 * the vnode page hash mutex and want to acquire it again as there
579*8b464eb8Smec 	 * are locations in the code, where we unlock a page while holding
580*8b464eb8Smec 	 * the mutex which can lead to the page being captured and eventually
581*8b464eb8Smec 	 * end up here.
582*8b464eb8Smec 	 */
583*8b464eb8Smec 	if (mutex_owned(page_vnode_mutex(vp))) {
584*8b464eb8Smec 		rw_exit(&pph_rwlock);
585*8b464eb8Smec 		page_free(pp, 1);
586*8b464eb8Smec 		return (-1);
587*8b464eb8Smec 	}
588*8b464eb8Smec 
589*8b464eb8Smec 	ret = page_hashin(pp, vp, paddr, NULL);
590*8b464eb8Smec 	rw_exit(&pph_rwlock);
591*8b464eb8Smec 	if (ret == 0) {
592*8b464eb8Smec 		page_free(pp, 1);
593*8b464eb8Smec 		return (-1);
594*8b464eb8Smec 	}
595*8b464eb8Smec 
596*8b464eb8Smec 	page_downgrade(pp);
597*8b464eb8Smec 
598*8b464eb8Smec 	mutex_enter(&freemem_lock);
599*8b464eb8Smec 	availrmem--;
600*8b464eb8Smec 	mutex_exit(&freemem_lock);
601*8b464eb8Smec 
602*8b464eb8Smec 	return (0);
603*8b464eb8Smec }
604*8b464eb8Smec 
605*8b464eb8Smec /*
606*8b464eb8Smec  * The guts of the PHYSMEM_DESTROY ioctl.
607*8b464eb8Smec  * The cookie passed in will provide all of the information needed to
608*8b464eb8Smec  * free up the address space and physical memory associated with the
609*8b464eb8Smec  * corresponding PHSYMEM_SETUP ioctl.
610*8b464eb8Smec  * Returns 0 on success with the following error codes on failure:
611*8b464eb8Smec  *	EINVAL - The cookie supplied is not valid.
612*8b464eb8Smec  */
613*8b464eb8Smec int
614*8b464eb8Smec physmem_destroy_addrs(uint64_t p_cookie)
615*8b464eb8Smec {
616*8b464eb8Smec 	struct as *as = curproc->p_as;
617*8b464eb8Smec 	size_t len;
618*8b464eb8Smec 	caddr_t uvaddr;
619*8b464eb8Smec 
620*8b464eb8Smec 	rw_enter(&pph_rwlock, RW_READER);
621*8b464eb8Smec 	if (physmem_validate_cookie(p_cookie) == 0) {
622*8b464eb8Smec 		rw_exit(&pph_rwlock);
623*8b464eb8Smec 		return (EINVAL);
624*8b464eb8Smec 	}
625*8b464eb8Smec 
626*8b464eb8Smec 	len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len;
627*8b464eb8Smec 	uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va;
628*8b464eb8Smec 	rw_exit(&pph_rwlock);
629*8b464eb8Smec 
630*8b464eb8Smec 	(void) as_unmap(as, uvaddr, len);
631*8b464eb8Smec 
632*8b464eb8Smec 	return (0);
633*8b464eb8Smec }
634*8b464eb8Smec 
635*8b464eb8Smec /*
636*8b464eb8Smec  * If the page has been hashed into the physmem vnode, then just look it up
637*8b464eb8Smec  * and return it via pl, otherwise return ENOMEM as the map ioctl has not
638*8b464eb8Smec  * succeeded on the given page.
639*8b464eb8Smec  */
640*8b464eb8Smec /*ARGSUSED*/
641*8b464eb8Smec static int
642*8b464eb8Smec physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp,
643*8b464eb8Smec     page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
644*8b464eb8Smec     struct cred *cr)
645*8b464eb8Smec {
646*8b464eb8Smec 	page_t *pp;
647*8b464eb8Smec 
648*8b464eb8Smec 	ASSERT(len == PAGESIZE);
649*8b464eb8Smec 	ASSERT(AS_READ_HELD(seg->s_as, &seg->s_as->a_lock));
650*8b464eb8Smec 
651*8b464eb8Smec 	/*
652*8b464eb8Smec 	 * If the page is in the hash, then we successfully claimed this
653*8b464eb8Smec 	 * page earlier, so return it to the caller.
654*8b464eb8Smec 	 */
655*8b464eb8Smec 	pp = page_lookup(vp, off, SE_SHARED);
656*8b464eb8Smec 	if (pp != NULL) {
657*8b464eb8Smec 		pl[0] = pp;
658*8b464eb8Smec 		pl[1] = NULL;
659*8b464eb8Smec 		*protp = PROT_ALL;
660*8b464eb8Smec 		return (0);
661*8b464eb8Smec 	}
662*8b464eb8Smec 	return (ENOMEM);
663*8b464eb8Smec }
664*8b464eb8Smec 
665*8b464eb8Smec /*
666*8b464eb8Smec  * We can not allow a process mapping /dev/physmem pages to fork as there can
667*8b464eb8Smec  * only be a single mapping to a /dev/physmem page at a given time.  Thus, the
668*8b464eb8Smec  * return of EINVAL when we are not working on our own address space.
669*8b464eb8Smec  * Otherwise we return zero as this function is required for normal operation.
670*8b464eb8Smec  */
671*8b464eb8Smec /*ARGSUSED*/
672*8b464eb8Smec static int
673*8b464eb8Smec physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
674*8b464eb8Smec     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
675*8b464eb8Smec     struct cred *cred)
676*8b464eb8Smec {
677*8b464eb8Smec 	if (curproc->p_as != as) {
678*8b464eb8Smec 		return (EINVAL);
679*8b464eb8Smec 	}
680*8b464eb8Smec 	return (0);
681*8b464eb8Smec }
682*8b464eb8Smec 
683*8b464eb8Smec /* Will always get called for removing a whole segment. */
684*8b464eb8Smec /*ARGSUSED*/
685*8b464eb8Smec static int
686*8b464eb8Smec physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
687*8b464eb8Smec     caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
688*8b464eb8Smec     struct cred *cred)
689*8b464eb8Smec {
690*8b464eb8Smec 	/*
691*8b464eb8Smec 	 * Release our hold on the vnode so that the final VN_RELE will
692*8b464eb8Smec 	 * call physmem_inactive to clean things up.
693*8b464eb8Smec 	 */
694*8b464eb8Smec 	VN_RELE(vp);
695*8b464eb8Smec 
696*8b464eb8Smec 	return (0);
697*8b464eb8Smec }
698*8b464eb8Smec 
699*8b464eb8Smec /*
700*8b464eb8Smec  * Clean up all the pages belonging to this vnode and then free it.
701*8b464eb8Smec  */
702*8b464eb8Smec /*ARGSUSED*/
703*8b464eb8Smec static void
704*8b464eb8Smec physmem_inactive(vnode_t *vp, cred_t *crp)
705*8b464eb8Smec {
706*8b464eb8Smec 	page_t *pp;
707*8b464eb8Smec 
708*8b464eb8Smec 	/*
709*8b464eb8Smec 	 * Remove the vnode from the hash now, to prevent asynchronous
710*8b464eb8Smec 	 * attempts to map into this vnode.  This avoids a deadlock
711*8b464eb8Smec 	 * where two threads try to get into this logic at the same
712*8b464eb8Smec 	 * time and try to map the pages they are destroying into the
713*8b464eb8Smec 	 * other's address space.
714*8b464eb8Smec 	 * If it's not in the hash, just free it.
715*8b464eb8Smec 	 */
716*8b464eb8Smec 	if (physmem_remove_vnode_hash(vp) == 0) {
717*8b464eb8Smec 		ASSERT(vp->v_pages == NULL);
718*8b464eb8Smec 		vn_free(vp);
719*8b464eb8Smec 		physmem_remove_hash_proc();
720*8b464eb8Smec 		mutex_enter(&physmem_mutex);
721*8b464eb8Smec 		physmem_vnodecnt--;
722*8b464eb8Smec 		mutex_exit(&physmem_mutex);
723*8b464eb8Smec 		return;
724*8b464eb8Smec 	}
725*8b464eb8Smec 
726*8b464eb8Smec 	/*
727*8b464eb8Smec 	 * At this point in time, no other logic can be adding or removing
728*8b464eb8Smec 	 * pages from the vnode, otherwise the v_pages list could be inaccurate.
729*8b464eb8Smec 	 */
730*8b464eb8Smec 
731*8b464eb8Smec 	while ((pp = vp->v_pages) != NULL) {
732*8b464eb8Smec 		page_t *rpp;
733*8b464eb8Smec 		if (page_tryupgrade(pp)) {
734*8b464eb8Smec 			/*
735*8b464eb8Smec 			 * set lckcnt for page_destroy to do availrmem
736*8b464eb8Smec 			 * accounting
737*8b464eb8Smec 			 */
738*8b464eb8Smec 			pp->p_lckcnt = 1;
739*8b464eb8Smec 			page_destroy(pp, 0);
740*8b464eb8Smec 		} else {
741*8b464eb8Smec 			/* failure to lock should be transient */
742*8b464eb8Smec 			rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED);
743*8b464eb8Smec 			if (rpp != pp) {
744*8b464eb8Smec 				page_unlock(rpp);
745*8b464eb8Smec 				continue;
746*8b464eb8Smec 			}
747*8b464eb8Smec 			page_unlock(pp);
748*8b464eb8Smec 		}
749*8b464eb8Smec 	}
750*8b464eb8Smec 	vn_free(vp);
751*8b464eb8Smec 	physmem_remove_hash_proc();
752*8b464eb8Smec 	mutex_enter(&physmem_mutex);
753*8b464eb8Smec 	physmem_vnodecnt--;
754*8b464eb8Smec 	mutex_exit(&physmem_mutex);
755*8b464eb8Smec }
756*8b464eb8Smec 
757*8b464eb8Smec /*ARGSUSED*/
758*8b464eb8Smec static int
759*8b464eb8Smec physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
760*8b464eb8Smec     int *rvalp)
761*8b464eb8Smec {
762*8b464eb8Smec 	int ret;
763*8b464eb8Smec 
764*8b464eb8Smec 	switch (cmd) {
765*8b464eb8Smec 	case PHYSMEM_SETUP:
766*8b464eb8Smec 		{
767*8b464eb8Smec 			struct physmem_setup_param psp;
768*8b464eb8Smec 			if (ddi_copyin((void *)arg, &psp,
769*8b464eb8Smec 			    sizeof (struct physmem_setup_param), 0))
770*8b464eb8Smec 				return (EFAULT);
771*8b464eb8Smec 			ret = physmem_setup_addrs(&psp);
772*8b464eb8Smec 			if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0))
773*8b464eb8Smec 				return (EFAULT);
774*8b464eb8Smec 		}
775*8b464eb8Smec 		break;
776*8b464eb8Smec 	case PHYSMEM_MAP:
777*8b464eb8Smec 		{
778*8b464eb8Smec 			struct physmem_map_param pmp;
779*8b464eb8Smec 			if (ddi_copyin((void *)arg, &pmp,
780*8b464eb8Smec 			    sizeof (struct physmem_map_param), 0))
781*8b464eb8Smec 				return (EFAULT);
782*8b464eb8Smec 			ret = physmem_map_addrs(&pmp);
783*8b464eb8Smec 			if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0))
784*8b464eb8Smec 				return (EFAULT);
785*8b464eb8Smec 		}
786*8b464eb8Smec 		break;
787*8b464eb8Smec 	case PHYSMEM_DESTROY:
788*8b464eb8Smec 		{
789*8b464eb8Smec 			uint64_t cookie;
790*8b464eb8Smec 			if (ddi_copyin((void *)arg, &cookie,
791*8b464eb8Smec 			    sizeof (uint64_t), 0))
792*8b464eb8Smec 				return (EFAULT);
793*8b464eb8Smec 			ret = physmem_destroy_addrs(cookie);
794*8b464eb8Smec 		}
795*8b464eb8Smec 		break;
796*8b464eb8Smec 	default:
797*8b464eb8Smec 		return (ENOTSUP);
798*8b464eb8Smec 	}
799*8b464eb8Smec 	return (ret);
800*8b464eb8Smec }
801*8b464eb8Smec 
802*8b464eb8Smec /*ARGSUSED*/
803*8b464eb8Smec static int
804*8b464eb8Smec physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp)
805*8b464eb8Smec {
806*8b464eb8Smec 	int ret;
807*8b464eb8Smec 	static int msg_printed = 0;
808*8b464eb8Smec 
809*8b464eb8Smec 	if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) {
810*8b464eb8Smec 		return (EINVAL);
811*8b464eb8Smec 	}
812*8b464eb8Smec 
813*8b464eb8Smec 	/* need to make sure we have the right privileges */
814*8b464eb8Smec 	if ((ret = secpolicy_resource(credp)) != 0)
815*8b464eb8Smec 		return (ret);
816*8b464eb8Smec 	if ((ret = secpolicy_lock_memory(credp)) != 0)
817*8b464eb8Smec 		return (ret);
818*8b464eb8Smec 
819*8b464eb8Smec 	if (msg_printed == 0) {
820*8b464eb8Smec 		cmn_err(CE_NOTE, "!driver has been opened. This driver may "
821*8b464eb8Smec 		    "take out long term locks on pages which may impact "
822*8b464eb8Smec 		    "dynamic reconfiguration events");
823*8b464eb8Smec 		msg_printed = 1;
824*8b464eb8Smec 	}
825*8b464eb8Smec 
826*8b464eb8Smec 	return (0);
827*8b464eb8Smec }
828*8b464eb8Smec 
829*8b464eb8Smec /*ARGSUSED*/
830*8b464eb8Smec static int
831*8b464eb8Smec physmem_close(dev_t dev, int flag, int otyp, cred_t *credp)
832*8b464eb8Smec {
833*8b464eb8Smec 	return (0);
834*8b464eb8Smec }
835*8b464eb8Smec 
836*8b464eb8Smec /*ARGSUSED*/
837*8b464eb8Smec static int
838*8b464eb8Smec physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd,
839*8b464eb8Smec     void *arg, void **resultp)
840*8b464eb8Smec {
841*8b464eb8Smec 	switch (infocmd) {
842*8b464eb8Smec 	case DDI_INFO_DEVT2DEVINFO:
843*8b464eb8Smec 		*resultp = physmem_dip;
844*8b464eb8Smec 		return (DDI_SUCCESS);
845*8b464eb8Smec 
846*8b464eb8Smec 	case DDI_INFO_DEVT2INSTANCE:
847*8b464eb8Smec 		*resultp = (void *)(ulong_t)getminor((dev_t)arg);
848*8b464eb8Smec 		return (DDI_SUCCESS);
849*8b464eb8Smec 
850*8b464eb8Smec 	default:
851*8b464eb8Smec 		return (DDI_FAILURE);
852*8b464eb8Smec 	}
853*8b464eb8Smec }
854*8b464eb8Smec 
855*8b464eb8Smec static int
856*8b464eb8Smec physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
857*8b464eb8Smec {
858*8b464eb8Smec 	int i;
859*8b464eb8Smec 
860*8b464eb8Smec 	if (cmd == DDI_RESUME) {
861*8b464eb8Smec 		return (DDI_SUCCESS);
862*8b464eb8Smec 	}
863*8b464eb8Smec 
864*8b464eb8Smec 	if (cmd != DDI_ATTACH)
865*8b464eb8Smec 		return (DDI_FAILURE);
866*8b464eb8Smec 
867*8b464eb8Smec 	if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
868*8b464eb8Smec 	    ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
869*8b464eb8Smec 		return (DDI_FAILURE);
870*8b464eb8Smec 
871*8b464eb8Smec 	physmem_dip = dip;
872*8b464eb8Smec 
873*8b464eb8Smec 	/* Initialize driver specific data */
874*8b464eb8Smec 	if (physmem_setup_vnops()) {
875*8b464eb8Smec 		ddi_remove_minor_node(dip, ddi_get_name(dip));
876*8b464eb8Smec 		return (DDI_FAILURE);
877*8b464eb8Smec 	}
878*8b464eb8Smec 
879*8b464eb8Smec 	for (i = 0; i < PPH_SIZE; i++)
880*8b464eb8Smec 		pph[i] = NULL;
881*8b464eb8Smec 
882*8b464eb8Smec 	page_capture_register_callback(PC_PHYSMEM, 10000,
883*8b464eb8Smec 	    map_page_proc);
884*8b464eb8Smec 
885*8b464eb8Smec 	return (DDI_SUCCESS);
886*8b464eb8Smec }
887*8b464eb8Smec 
888*8b464eb8Smec static int
889*8b464eb8Smec physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
890*8b464eb8Smec {
891*8b464eb8Smec 	int ret = DDI_SUCCESS;
892*8b464eb8Smec 
893*8b464eb8Smec 	if (cmd == DDI_SUSPEND) {
894*8b464eb8Smec 		return (DDI_SUCCESS);
895*8b464eb8Smec 	}
896*8b464eb8Smec 
897*8b464eb8Smec 	if (cmd != DDI_DETACH)
898*8b464eb8Smec 		return (DDI_FAILURE);
899*8b464eb8Smec 
900*8b464eb8Smec 	ASSERT(physmem_dip == dip);
901*8b464eb8Smec 
902*8b464eb8Smec 	mutex_enter(&physmem_mutex);
903*8b464eb8Smec 	if (physmem_vnodecnt == 0) {
904*8b464eb8Smec 		if (physmem_vnodeops != NULL) {
905*8b464eb8Smec 			vn_freevnodeops(physmem_vnodeops);
906*8b464eb8Smec 			physmem_vnodeops = NULL;
907*8b464eb8Smec 			page_capture_unregister_callback(PC_PHYSMEM);
908*8b464eb8Smec 		}
909*8b464eb8Smec 	} else {
910*8b464eb8Smec 		ret = EBUSY;
911*8b464eb8Smec 	}
912*8b464eb8Smec 	mutex_exit(&physmem_mutex);
913*8b464eb8Smec 	if (ret == DDI_SUCCESS)
914*8b464eb8Smec 		ddi_remove_minor_node(dip, ddi_get_name(dip));
915*8b464eb8Smec 	return (ret);
916*8b464eb8Smec }
917*8b464eb8Smec 
918*8b464eb8Smec static struct cb_ops physmem_cb_ops = {
919*8b464eb8Smec 	physmem_open,	/* open */
920*8b464eb8Smec 	physmem_close,	/* close */
921*8b464eb8Smec 	nodev,		/* strategy */
922*8b464eb8Smec 	nodev,		/* print */
923*8b464eb8Smec 	nodev,		/* dump */
924*8b464eb8Smec 	nodev,		/* read */
925*8b464eb8Smec 	nodev,		/* write */
926*8b464eb8Smec 	physmem_ioctl,	/* ioctl */
927*8b464eb8Smec 	nodev,		/* devmap */
928*8b464eb8Smec 	nodev,		/* mmap */
929*8b464eb8Smec 	nodev,		/* segmap */
930*8b464eb8Smec 	nochpoll,	/* chpoll */
931*8b464eb8Smec 	ddi_prop_op,	/* prop_op */
932*8b464eb8Smec 	NULL,		/* cb_str */
933*8b464eb8Smec 	D_NEW | D_MP | D_DEVMAP,
934*8b464eb8Smec 	CB_REV,
935*8b464eb8Smec 	NULL,
936*8b464eb8Smec 	NULL
937*8b464eb8Smec };
938*8b464eb8Smec 
939*8b464eb8Smec static struct dev_ops physmem_ops = {
940*8b464eb8Smec 	DEVO_REV,
941*8b464eb8Smec 	0,
942*8b464eb8Smec 	physmem_getinfo,
943*8b464eb8Smec 	nulldev,
944*8b464eb8Smec 	nulldev,
945*8b464eb8Smec 	physmem_attach,
946*8b464eb8Smec 	physmem_detach,
947*8b464eb8Smec 	nodev,
948*8b464eb8Smec 	&physmem_cb_ops,
949*8b464eb8Smec 	NULL,
950*8b464eb8Smec 	NULL
951*8b464eb8Smec };
952*8b464eb8Smec 
953*8b464eb8Smec static struct modldrv modldrv = {
954*8b464eb8Smec 	&mod_driverops,
955*8b464eb8Smec 	"physmem driver %I%",
956*8b464eb8Smec 	&physmem_ops
957*8b464eb8Smec };
958*8b464eb8Smec 
959*8b464eb8Smec static struct modlinkage modlinkage = {
960*8b464eb8Smec 	MODREV_1,
961*8b464eb8Smec 	&modldrv,
962*8b464eb8Smec 	NULL
963*8b464eb8Smec };
964*8b464eb8Smec 
965*8b464eb8Smec int
966*8b464eb8Smec _init(void)
967*8b464eb8Smec {
968*8b464eb8Smec 	return (mod_install(&modlinkage));
969*8b464eb8Smec }
970*8b464eb8Smec 
971*8b464eb8Smec int
972*8b464eb8Smec _info(struct modinfo *modinfop)
973*8b464eb8Smec {
974*8b464eb8Smec 	return (mod_info(&modlinkage, modinfop));
975*8b464eb8Smec }
976*8b464eb8Smec 
977*8b464eb8Smec int
978*8b464eb8Smec _fini(void)
979*8b464eb8Smec {
980*8b464eb8Smec 	return (mod_remove(&modlinkage));
981*8b464eb8Smec }
982