xref: /titanic_50/usr/src/uts/common/io/physmem.c (revision c77a61a72b5ecdc507d6cf104142edd371a16c84)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/modctl.h>
30 #include <sys/conf.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/devops.h>
34 #include <sys/stat.h>
35 #include <sys/file.h>
36 #include <sys/cred.h>
37 #include <sys/policy.h>
38 #include <sys/errno.h>
39 #include <vm/seg_dev.h>
40 #include <vm/seg_vn.h>
41 #include <vm/page.h>
42 #include <sys/fs/swapnode.h>
43 #include <sys/sysmacros.h>
44 #include <sys/fcntl.h>
45 #include <sys/vmsystm.h>
46 #include <sys/physmem.h>
47 
48 static dev_info_t		*physmem_dip = NULL;
49 
50 /*
51  * Linked list element hanging off physmem_proc_hash below, which holds all
52  * the information for a given segment which has been setup for this process.
53  * This is a simple linked list as we are assuming that for a given process
54  * the setup ioctl will only be called a handful of times.  If this assumption
55  * changes in the future, a quicker to traverse data structure should be used.
56  */
57 struct physmem_hash {
58 	struct physmem_hash *ph_next;
59 	uint64_t ph_base_pa;
60 	caddr_t ph_base_va;
61 	size_t ph_seg_len;
62 	struct vnode *ph_vnode;
63 };
64 
65 /*
66  * Hash of all of the processes which have setup mappings with the driver with
67  * pointers to per process data.
68  */
69 struct physmem_proc_hash {
70 	struct proc *pph_proc;
71 	struct physmem_hash *pph_hash;
72 	struct physmem_proc_hash *pph_next;
73 };
74 
75 
76 /* Needs to be a power of two for simple hash algorithm */
77 #define	PPH_SIZE	8
78 struct physmem_proc_hash *pph[PPH_SIZE];
79 
80 /*
81  * Lock which protects the pph hash above.  To add an element (either a new
82  * process or a new segment) the WRITE lock must be held.  To traverse the
83  * list, only a READ lock is needed.
84  */
85 krwlock_t pph_rwlock;
86 
87 #define	PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1)))
88 
89 /*
90  * Need to keep a reference count of how many processes have the driver
91  * open to prevent it from disappearing.
92  */
93 uint64_t physmem_vnodecnt;
94 kmutex_t physmem_mutex;		/* protects phsymem_vnodecnt */
95 
96 static int physmem_getpage(struct vnode *vp, offset_t off, size_t len,
97     uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
98     enum seg_rw rw, struct cred *cr);
99 
100 static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
101     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
102     struct cred *cred);
103 
104 static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
105     caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
106     struct cred *cred);
107 
108 static void physmem_inactive(vnode_t *vp, cred_t *crp);
109 
110 const fs_operation_def_t physmem_vnodeops_template[] = {
111 	VOPNAME_GETPAGE, physmem_getpage,
112 	VOPNAME_ADDMAP, (fs_generic_func_p) physmem_addmap,
113 	VOPNAME_DELMAP, physmem_delmap,
114 	VOPNAME_INACTIVE, (fs_generic_func_p) physmem_inactive,
115 	NULL, NULL
116 };
117 
118 vnodeops_t *physmem_vnodeops = NULL;
119 
120 /*
121  * Removes the current process from the hash if the process has no more
122  * physmem segments active.
123  */
124 void
125 physmem_remove_hash_proc()
126 {
127 	int index;
128 	struct physmem_proc_hash **walker;
129 	struct physmem_proc_hash *victim = NULL;
130 
131 	index = PHYSMEM_HASH(curproc);
132 	rw_enter(&pph_rwlock, RW_WRITER);
133 	walker = &pph[index];
134 	while (*walker != NULL) {
135 		if ((*walker)->pph_proc == curproc &&
136 		    (*walker)->pph_hash == NULL) {
137 			victim = *walker;
138 			*walker = victim->pph_next;
139 			break;
140 		}
141 		walker = &((*walker)->pph_next);
142 	}
143 	rw_exit(&pph_rwlock);
144 	if (victim != NULL)
145 		kmem_free(victim, sizeof (struct physmem_proc_hash));
146 }
147 
148 /*
149  * Add a new entry to the hash for the given process to cache the
150  * address ranges that it is working on.  If this is the first hash
151  * item to be added for this process, we will create the head pointer
152  * for this process.
153  * Returns 0 on success, ERANGE when the physical address is already in the
154  * hash.  Note that we add it to the hash as we have already called as_map
155  * and thus the as_unmap call will try to free the vnode, which needs
156  * to be found in the hash.
157  */
158 int
159 physmem_add_hash(struct physmem_hash *php)
160 {
161 	int index;
162 	struct physmem_proc_hash *iterator;
163 	struct physmem_proc_hash *newp = NULL;
164 	struct physmem_hash *temp;
165 	int ret = 0;
166 
167 	index = PHYSMEM_HASH(curproc);
168 
169 insert:
170 	rw_enter(&pph_rwlock, RW_WRITER);
171 	iterator = pph[index];
172 	while (iterator != NULL) {
173 		if (iterator->pph_proc == curproc) {
174 			/*
175 			 * check to make sure a single process does not try to
176 			 * map the same region twice.
177 			 */
178 			for (temp = iterator->pph_hash; temp != NULL;
179 			    temp = temp->ph_next) {
180 				if ((php->ph_base_pa >= temp->ph_base_pa &&
181 				    php->ph_base_pa < temp->ph_base_pa +
182 				    temp->ph_seg_len) ||
183 				    (temp->ph_base_pa >= php->ph_base_pa &&
184 				    temp->ph_base_pa < php->ph_base_pa +
185 				    php->ph_seg_len)) {
186 					ret = ERANGE;
187 					break;
188 				}
189 			}
190 			if (ret == 0) {
191 				php->ph_next = iterator->pph_hash;
192 				iterator->pph_hash = php;
193 			}
194 			rw_exit(&pph_rwlock);
195 			/* Need to check for two threads in sync */
196 			if (newp != NULL)
197 				kmem_free(newp, sizeof (*newp));
198 			return (ret);
199 		}
200 		iterator = iterator->pph_next;
201 	}
202 
203 	if (newp != NULL) {
204 		newp->pph_proc = curproc;
205 		newp->pph_next = pph[index];
206 		newp->pph_hash = php;
207 		php->ph_next = NULL;
208 		pph[index] = newp;
209 		rw_exit(&pph_rwlock);
210 		return (0);
211 	}
212 
213 	rw_exit(&pph_rwlock);
214 	/* Dropped the lock so we could use KM_SLEEP */
215 	newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP);
216 	goto insert;
217 }
218 
219 /*
220  * Will return the pointer to the physmem_hash struct if the setup routine
221  * has previously been called for this memory.
222  * Returns NULL on failure.
223  */
224 struct physmem_hash *
225 physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp)
226 {
227 	int index;
228 	struct physmem_proc_hash *proc_hp;
229 	struct physmem_hash *php;
230 
231 	ASSERT(rw_lock_held(&pph_rwlock));
232 
233 	index = PHYSMEM_HASH(procp);
234 	proc_hp = pph[index];
235 	while (proc_hp != NULL) {
236 		if (proc_hp->pph_proc == procp) {
237 			php = proc_hp->pph_hash;
238 			while (php != NULL) {
239 				if ((req_paddr >= php->ph_base_pa) &&
240 				    (req_paddr + len <=
241 				    php->ph_base_pa + php->ph_seg_len)) {
242 					return (php);
243 				}
244 				php = php->ph_next;
245 			}
246 		}
247 		proc_hp = proc_hp->pph_next;
248 	}
249 	return (NULL);
250 }
251 
252 int
253 physmem_validate_cookie(uint64_t p_cookie)
254 {
255 	int index;
256 	struct physmem_proc_hash *proc_hp;
257 	struct physmem_hash *php;
258 
259 	ASSERT(rw_lock_held(&pph_rwlock));
260 
261 	index = PHYSMEM_HASH(curproc);
262 	proc_hp = pph[index];
263 	while (proc_hp != NULL) {
264 		if (proc_hp->pph_proc == curproc) {
265 			php = proc_hp->pph_hash;
266 			while (php != NULL) {
267 				if ((uint64_t)(uintptr_t)php == p_cookie) {
268 					return (1);
269 				}
270 				php = php->ph_next;
271 			}
272 		}
273 		proc_hp = proc_hp->pph_next;
274 	}
275 	return (0);
276 }
277 
278 /*
279  * Remove the given vnode from the pph hash.  If it exists in the hash the
280  * process still has to be around as the vnode is obviously still around and
281  * since it's a physmem vnode, it must be in the hash.
282  * If it is not in the hash that must mean that the setup ioctl failed.
283  * Return 0 in this instance, 1 if it is in the hash.
284  */
285 int
286 physmem_remove_vnode_hash(vnode_t *vp)
287 {
288 	int index;
289 	struct physmem_proc_hash *proc_hp;
290 	struct physmem_hash **phpp;
291 	struct physmem_hash *victim;
292 
293 	index = PHYSMEM_HASH(curproc);
294 	/* synchronize with the map routine */
295 	rw_enter(&pph_rwlock, RW_WRITER);
296 	proc_hp = pph[index];
297 	while (proc_hp != NULL) {
298 		if (proc_hp->pph_proc == curproc) {
299 			phpp = &proc_hp->pph_hash;
300 			while (*phpp != NULL) {
301 				if ((*phpp)->ph_vnode == vp) {
302 					victim = *phpp;
303 					*phpp = victim->ph_next;
304 
305 					rw_exit(&pph_rwlock);
306 					kmem_free(victim, sizeof (*victim));
307 					return (1);
308 				}
309 				phpp = &(*phpp)->ph_next;
310 			}
311 		}
312 		proc_hp = proc_hp->pph_next;
313 	}
314 	rw_exit(&pph_rwlock);
315 
316 	/* not found */
317 	return (0);
318 }
319 
320 int
321 physmem_setup_vnops()
322 {
323 	int error;
324 	char *name = "physmem";
325 	if (physmem_vnodeops != NULL)
326 		cmn_err(CE_PANIC, "physmem vnodeops already set\n");
327 	error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops);
328 	if (error != 0) {
329 		cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template");
330 	}
331 	return (error);
332 }
333 
334 /*
335  * The guts of the PHYSMEM_SETUP ioctl.
336  * Create a segment in the address space with the specified parameters.
337  * If pspp->user_va is NULL, as_gap will be used to find an appropriate VA.
338  * We do not do bounds checking on the requested phsycial addresses, if they
339  * do not exist in the system, they will not be mappable.
340  * Returns 0 on success with the following error codes on failure:
341  *	ENOMEM - The VA range requested was already mapped if pspp->user_va is
342  *		non-NULL or the system was unable to find enough VA space for
343  *		the desired length if user_va was NULL>
344  *	EINVAL - The requested PA, VA, or length was not PAGESIZE aligned.
345  */
346 int
347 physmem_setup_addrs(struct physmem_setup_param *pspp)
348 {
349 	struct as *as = curproc->p_as;
350 	struct segvn_crargs vn_a;
351 	int ret = 0;
352 	uint64_t base_pa;
353 	size_t len;
354 	caddr_t uvaddr;
355 	struct vnode *vp;
356 	struct physmem_hash *php;
357 
358 	ASSERT(pspp != NULL);
359 	base_pa = pspp->req_paddr;
360 	len = pspp->len;
361 	uvaddr = (caddr_t)(uintptr_t)pspp->user_va;
362 
363 	/* Sanity checking */
364 	if (!IS_P2ALIGNED(base_pa, PAGESIZE))
365 		return (EINVAL);
366 	if (!IS_P2ALIGNED(len, PAGESIZE))
367 		return (EINVAL);
368 	if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE))
369 		return (EINVAL);
370 
371 	php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP);
372 
373 	/* Need to bump vnode count so that the driver can not be unloaded */
374 	mutex_enter(&physmem_mutex);
375 	physmem_vnodecnt++;
376 	mutex_exit(&physmem_mutex);
377 
378 	vp = vn_alloc(KM_SLEEP);
379 	ASSERT(vp != NULL);	/* SLEEP can't return NULL */
380 	vn_setops(vp, physmem_vnodeops);
381 
382 	php->ph_vnode = vp;
383 
384 	vn_a.vp = vp;
385 	vn_a.offset = (u_offset_t)base_pa;
386 	vn_a.type = MAP_SHARED;
387 	vn_a.prot = PROT_ALL;
388 	vn_a.maxprot = PROT_ALL;
389 	vn_a.flags = 0;
390 	vn_a.cred = NULL;
391 	vn_a.amp = NULL;
392 	vn_a.szc = 0;
393 	vn_a.lgrp_mem_policy_flags = 0;
394 
395 	as_rangelock(as);
396 	if (uvaddr != NULL) {
397 		if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) {
398 			ret = ENOMEM;
399 fail:
400 			as_rangeunlock(as);
401 			vn_free(vp);
402 			kmem_free(php, sizeof (*php));
403 			mutex_enter(&physmem_mutex);
404 			physmem_vnodecnt--;
405 			mutex_exit(&physmem_mutex);
406 			return (ret);
407 		}
408 	} else {
409 		/* We pick the address for the user */
410 		map_addr(&uvaddr, len, 0, 1, 0);
411 		if (uvaddr == NULL) {
412 			ret = ENOMEM;
413 			goto fail;
414 		}
415 	}
416 	ret = as_map(as, uvaddr, len, segvn_create, &vn_a);
417 
418 	as_rangeunlock(as);
419 	if (ret == 0) {
420 		php->ph_base_pa = base_pa;
421 		php->ph_base_va = uvaddr;
422 		php->ph_seg_len = len;
423 		pspp->user_va = (uint64_t)(uintptr_t)uvaddr;
424 		pspp->cookie = (uint64_t)(uintptr_t)php;
425 		ret = physmem_add_hash(php);
426 		if (ret == 0)
427 			return (0);
428 		(void) as_unmap(as, uvaddr, len);
429 		return (ret);
430 	}
431 
432 	goto fail;
433 	/*NOTREACHED*/
434 }
435 
436 /*
437  * The guts of the PHYSMEM_MAP ioctl.
438  * Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already
439  * been called for this PA range.
440  * Returns 0 on success with the following error codes on failure:
441  *	EPERM - The requested page is long term locked, and thus repeated
442  *		requests to allocate this page will likely fail.
443  *	EAGAIN - The requested page could not be allocated, but it is believed
444  *		that future attempts could succeed.
445  *	ENOMEM - There was not enough free memory in the system to safely
446  *		map the requested page.
447  *	EINVAL - The requested paddr was not PAGESIZE aligned or the
448  *		PHYSMEM_SETUP ioctl was not called for this page.
449  *	ENOENT - The requested page was iniside the kernel cage, and the
450  *		PHYSMEM_CAGE flag was not set.
451  *	EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag
452  *		was not set.
453  */
454 static int
455 physmem_map_addrs(struct physmem_map_param *pmpp)
456 {
457 	caddr_t uvaddr;
458 	page_t *pp;
459 	uint64_t req_paddr;
460 	struct vnode *vp;
461 	int ret = 0;
462 	struct physmem_hash *php;
463 	uint_t flags = 0;
464 
465 	ASSERT(pmpp != NULL);
466 	req_paddr = pmpp->req_paddr;
467 
468 	if (!IS_P2ALIGNED(req_paddr, PAGESIZE))
469 		return (EINVAL);
470 	/* Find the vnode for this map request */
471 	rw_enter(&pph_rwlock, RW_READER);
472 	php = physmem_get_hash(req_paddr, PAGESIZE, curproc);
473 	if (php == NULL) {
474 		rw_exit(&pph_rwlock);
475 		return (EINVAL);
476 	}
477 	vp = php->ph_vnode;
478 	uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa);
479 	rw_exit(&pph_rwlock);
480 
481 	pp = page_numtopp_nolock(btop((size_t)req_paddr));
482 	if (pp == NULL) {
483 		pmpp->ret_va = NULL;
484 		return (EPERM);
485 	}
486 
487 	/*
488 	 * Check to see if page already mapped correctly.  This can happen
489 	 * when we failed to capture a page previously and it was captured
490 	 * asynchronously for us.  Return success in this case.
491 	 */
492 	if (pp->p_vnode == vp) {
493 		ASSERT(pp->p_offset == (u_offset_t)req_paddr);
494 		pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
495 		return (0);
496 	}
497 
498 	/*
499 	 * physmem should be responsible for checking for cage
500 	 * and prom pages.
501 	 */
502 	if (pmpp->flags & PHYSMEM_CAGE)
503 		flags = CAPTURE_GET_CAGE;
504 	if (pmpp->flags & PHYSMEM_RETIRED)
505 		flags |= CAPTURE_GET_RETIRED;
506 
507 	ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc);
508 
509 	if (ret != 0) {
510 		pmpp->ret_va = NULL;
511 		return (ret);
512 	} else {
513 		pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
514 		return (0);
515 	}
516 }
517 
518 /*
519  * Map the given page into the process's address space if possible.
520  * We actually only hash the page in on the correct vnode as the page
521  * will be mapped via segvn_pagefault.
522  * returns 0 on success
523  * returns 1 if there is no need to map this page anymore (process exited)
524  * returns -1 if we failed to map the page.
525  */
526 int
527 map_page_proc(page_t *pp, void *arg, uint_t flags)
528 {
529 	struct vnode *vp;
530 	proc_t *procp = (proc_t *)arg;
531 	int ret;
532 	u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum);
533 	struct physmem_hash *php;
534 
535 	ASSERT(pp != NULL);
536 
537 	/*
538 	 * Check against availrmem to make sure that we're not low on memory.
539 	 * We check again here as ASYNC requests do not do this check elsewhere.
540 	 * We return 1 as we don't want the page to have the PR_CAPTURE bit
541 	 * set or be on the page capture hash.
542 	 */
543 	if (swapfs_minfree > availrmem + 1) {
544 		page_free(pp, 1);
545 		return (1);
546 	}
547 
548 	/*
549 	 * If this is an asynchronous request for the current process,
550 	 * we can not map the page as it's possible that we are also in the
551 	 * process of unmapping the page which could result in a deadlock
552 	 * with the as lock.
553 	 */
554 	if ((flags & CAPTURE_ASYNC) && (curproc == procp)) {
555 		page_free(pp, 1);
556 		return (-1);
557 	}
558 
559 	/* only return zeroed out pages */
560 	pagezero(pp, 0, PAGESIZE);
561 
562 	rw_enter(&pph_rwlock, RW_READER);
563 	php = physmem_get_hash(paddr, PAGESIZE, procp);
564 	if (php == NULL) {
565 		rw_exit(&pph_rwlock);
566 		/*
567 		 * Free the page as there is no longer a valid outstanding
568 		 * request for this page.
569 		 */
570 		page_free(pp, 1);
571 		return (1);
572 	}
573 
574 	vp = php->ph_vnode;
575 
576 	/*
577 	 * We need to protect against a possible deadlock here where we own
578 	 * the vnode page hash mutex and want to acquire it again as there
579 	 * are locations in the code, where we unlock a page while holding
580 	 * the mutex which can lead to the page being captured and eventually
581 	 * end up here.
582 	 */
583 	if (mutex_owned(page_vnode_mutex(vp))) {
584 		rw_exit(&pph_rwlock);
585 		page_free(pp, 1);
586 		return (-1);
587 	}
588 
589 	ret = page_hashin(pp, vp, paddr, NULL);
590 	rw_exit(&pph_rwlock);
591 	if (ret == 0) {
592 		page_free(pp, 1);
593 		return (-1);
594 	}
595 
596 	page_downgrade(pp);
597 
598 	mutex_enter(&freemem_lock);
599 	availrmem--;
600 	mutex_exit(&freemem_lock);
601 
602 	return (0);
603 }
604 
605 /*
606  * The guts of the PHYSMEM_DESTROY ioctl.
607  * The cookie passed in will provide all of the information needed to
608  * free up the address space and physical memory associated with the
609  * corresponding PHSYMEM_SETUP ioctl.
610  * Returns 0 on success with the following error codes on failure:
611  *	EINVAL - The cookie supplied is not valid.
612  */
613 int
614 physmem_destroy_addrs(uint64_t p_cookie)
615 {
616 	struct as *as = curproc->p_as;
617 	size_t len;
618 	caddr_t uvaddr;
619 
620 	rw_enter(&pph_rwlock, RW_READER);
621 	if (physmem_validate_cookie(p_cookie) == 0) {
622 		rw_exit(&pph_rwlock);
623 		return (EINVAL);
624 	}
625 
626 	len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len;
627 	uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va;
628 	rw_exit(&pph_rwlock);
629 
630 	(void) as_unmap(as, uvaddr, len);
631 
632 	return (0);
633 }
634 
635 /*
636  * If the page has been hashed into the physmem vnode, then just look it up
637  * and return it via pl, otherwise return ENOMEM as the map ioctl has not
638  * succeeded on the given page.
639  */
640 /*ARGSUSED*/
641 static int
642 physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp,
643     page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
644     struct cred *cr)
645 {
646 	page_t *pp;
647 
648 	ASSERT(len == PAGESIZE);
649 	ASSERT(AS_READ_HELD(seg->s_as, &seg->s_as->a_lock));
650 
651 	/*
652 	 * If the page is in the hash, then we successfully claimed this
653 	 * page earlier, so return it to the caller.
654 	 */
655 	pp = page_lookup(vp, off, SE_SHARED);
656 	if (pp != NULL) {
657 		pl[0] = pp;
658 		pl[1] = NULL;
659 		*protp = PROT_ALL;
660 		return (0);
661 	}
662 	return (ENOMEM);
663 }
664 
665 /*
666  * We can not allow a process mapping /dev/physmem pages to fork as there can
667  * only be a single mapping to a /dev/physmem page at a given time.  Thus, the
668  * return of EINVAL when we are not working on our own address space.
669  * Otherwise we return zero as this function is required for normal operation.
670  */
671 /*ARGSUSED*/
672 static int
673 physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
674     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
675     struct cred *cred)
676 {
677 	if (curproc->p_as != as) {
678 		return (EINVAL);
679 	}
680 	return (0);
681 }
682 
683 /* Will always get called for removing a whole segment. */
684 /*ARGSUSED*/
685 static int
686 physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
687     caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
688     struct cred *cred)
689 {
690 	/*
691 	 * Release our hold on the vnode so that the final VN_RELE will
692 	 * call physmem_inactive to clean things up.
693 	 */
694 	VN_RELE(vp);
695 
696 	return (0);
697 }
698 
699 /*
700  * Clean up all the pages belonging to this vnode and then free it.
701  */
702 /*ARGSUSED*/
703 static void
704 physmem_inactive(vnode_t *vp, cred_t *crp)
705 {
706 	page_t *pp;
707 
708 	/*
709 	 * Remove the vnode from the hash now, to prevent asynchronous
710 	 * attempts to map into this vnode.  This avoids a deadlock
711 	 * where two threads try to get into this logic at the same
712 	 * time and try to map the pages they are destroying into the
713 	 * other's address space.
714 	 * If it's not in the hash, just free it.
715 	 */
716 	if (physmem_remove_vnode_hash(vp) == 0) {
717 		ASSERT(vp->v_pages == NULL);
718 		vn_free(vp);
719 		physmem_remove_hash_proc();
720 		mutex_enter(&physmem_mutex);
721 		physmem_vnodecnt--;
722 		mutex_exit(&physmem_mutex);
723 		return;
724 	}
725 
726 	/*
727 	 * At this point in time, no other logic can be adding or removing
728 	 * pages from the vnode, otherwise the v_pages list could be inaccurate.
729 	 */
730 
731 	while ((pp = vp->v_pages) != NULL) {
732 		page_t *rpp;
733 		if (page_tryupgrade(pp)) {
734 			/*
735 			 * set lckcnt for page_destroy to do availrmem
736 			 * accounting
737 			 */
738 			pp->p_lckcnt = 1;
739 			page_destroy(pp, 0);
740 		} else {
741 			/* failure to lock should be transient */
742 			rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED);
743 			if (rpp != pp) {
744 				page_unlock(rpp);
745 				continue;
746 			}
747 			page_unlock(pp);
748 		}
749 	}
750 	vn_free(vp);
751 	physmem_remove_hash_proc();
752 	mutex_enter(&physmem_mutex);
753 	physmem_vnodecnt--;
754 	mutex_exit(&physmem_mutex);
755 }
756 
757 /*ARGSUSED*/
758 static int
759 physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
760     int *rvalp)
761 {
762 	int ret;
763 
764 	switch (cmd) {
765 	case PHYSMEM_SETUP:
766 		{
767 			struct physmem_setup_param psp;
768 			if (ddi_copyin((void *)arg, &psp,
769 			    sizeof (struct physmem_setup_param), 0))
770 				return (EFAULT);
771 			ret = physmem_setup_addrs(&psp);
772 			if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0))
773 				return (EFAULT);
774 		}
775 		break;
776 	case PHYSMEM_MAP:
777 		{
778 			struct physmem_map_param pmp;
779 			if (ddi_copyin((void *)arg, &pmp,
780 			    sizeof (struct physmem_map_param), 0))
781 				return (EFAULT);
782 			ret = physmem_map_addrs(&pmp);
783 			if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0))
784 				return (EFAULT);
785 		}
786 		break;
787 	case PHYSMEM_DESTROY:
788 		{
789 			uint64_t cookie;
790 			if (ddi_copyin((void *)arg, &cookie,
791 			    sizeof (uint64_t), 0))
792 				return (EFAULT);
793 			ret = physmem_destroy_addrs(cookie);
794 		}
795 		break;
796 	default:
797 		return (ENOTSUP);
798 	}
799 	return (ret);
800 }
801 
802 /*ARGSUSED*/
803 static int
804 physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp)
805 {
806 	int ret;
807 	static int msg_printed = 0;
808 
809 	if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) {
810 		return (EINVAL);
811 	}
812 
813 	/* need to make sure we have the right privileges */
814 	if ((ret = secpolicy_resource(credp)) != 0)
815 		return (ret);
816 	if ((ret = secpolicy_lock_memory(credp)) != 0)
817 		return (ret);
818 
819 	if (msg_printed == 0) {
820 		cmn_err(CE_NOTE, "!driver has been opened. This driver may "
821 		    "take out long term locks on pages which may impact "
822 		    "dynamic reconfiguration events");
823 		msg_printed = 1;
824 	}
825 
826 	return (0);
827 }
828 
829 /*ARGSUSED*/
830 static int
831 physmem_close(dev_t dev, int flag, int otyp, cred_t *credp)
832 {
833 	return (0);
834 }
835 
836 /*ARGSUSED*/
837 static int
838 physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd,
839     void *arg, void **resultp)
840 {
841 	switch (infocmd) {
842 	case DDI_INFO_DEVT2DEVINFO:
843 		*resultp = physmem_dip;
844 		return (DDI_SUCCESS);
845 
846 	case DDI_INFO_DEVT2INSTANCE:
847 		*resultp = (void *)(ulong_t)getminor((dev_t)arg);
848 		return (DDI_SUCCESS);
849 
850 	default:
851 		return (DDI_FAILURE);
852 	}
853 }
854 
855 static int
856 physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
857 {
858 	int i;
859 
860 	if (cmd == DDI_RESUME) {
861 		return (DDI_SUCCESS);
862 	}
863 
864 	if (cmd != DDI_ATTACH)
865 		return (DDI_FAILURE);
866 
867 	if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
868 	    ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
869 		return (DDI_FAILURE);
870 
871 	physmem_dip = dip;
872 
873 	/* Initialize driver specific data */
874 	if (physmem_setup_vnops()) {
875 		ddi_remove_minor_node(dip, ddi_get_name(dip));
876 		return (DDI_FAILURE);
877 	}
878 
879 	for (i = 0; i < PPH_SIZE; i++)
880 		pph[i] = NULL;
881 
882 	page_capture_register_callback(PC_PHYSMEM, 10000,
883 	    map_page_proc);
884 
885 	return (DDI_SUCCESS);
886 }
887 
888 static int
889 physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
890 {
891 	int ret = DDI_SUCCESS;
892 
893 	if (cmd == DDI_SUSPEND) {
894 		return (DDI_SUCCESS);
895 	}
896 
897 	if (cmd != DDI_DETACH)
898 		return (DDI_FAILURE);
899 
900 	ASSERT(physmem_dip == dip);
901 
902 	mutex_enter(&physmem_mutex);
903 	if (physmem_vnodecnt == 0) {
904 		if (physmem_vnodeops != NULL) {
905 			vn_freevnodeops(physmem_vnodeops);
906 			physmem_vnodeops = NULL;
907 			page_capture_unregister_callback(PC_PHYSMEM);
908 		}
909 	} else {
910 		ret = EBUSY;
911 	}
912 	mutex_exit(&physmem_mutex);
913 	if (ret == DDI_SUCCESS)
914 		ddi_remove_minor_node(dip, ddi_get_name(dip));
915 	return (ret);
916 }
917 
918 static struct cb_ops physmem_cb_ops = {
919 	physmem_open,	/* open */
920 	physmem_close,	/* close */
921 	nodev,		/* strategy */
922 	nodev,		/* print */
923 	nodev,		/* dump */
924 	nodev,		/* read */
925 	nodev,		/* write */
926 	physmem_ioctl,	/* ioctl */
927 	nodev,		/* devmap */
928 	nodev,		/* mmap */
929 	nodev,		/* segmap */
930 	nochpoll,	/* chpoll */
931 	ddi_prop_op,	/* prop_op */
932 	NULL,		/* cb_str */
933 	D_NEW | D_MP | D_DEVMAP,
934 	CB_REV,
935 	NULL,
936 	NULL
937 };
938 
939 static struct dev_ops physmem_ops = {
940 	DEVO_REV,
941 	0,
942 	physmem_getinfo,
943 	nulldev,
944 	nulldev,
945 	physmem_attach,
946 	physmem_detach,
947 	nodev,
948 	&physmem_cb_ops,
949 	NULL,
950 	NULL
951 };
952 
953 static struct modldrv modldrv = {
954 	&mod_driverops,
955 	"physmem driver %I%",
956 	&physmem_ops
957 };
958 
959 static struct modlinkage modlinkage = {
960 	MODREV_1,
961 	&modldrv,
962 	NULL
963 };
964 
965 int
966 _init(void)
967 {
968 	return (mod_install(&modlinkage));
969 }
970 
971 int
972 _info(struct modinfo *modinfop)
973 {
974 	return (mod_info(&modlinkage, modinfop));
975 }
976 
977 int
978 _fini(void)
979 {
980 	return (mod_remove(&modlinkage));
981 }
982