1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27 #include <sys/types.h>
28 #include <sys/modctl.h>
29 #include <sys/conf.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/devops.h>
33 #include <sys/stat.h>
34 #include <sys/file.h>
35 #include <sys/cred.h>
36 #include <sys/policy.h>
37 #include <sys/errno.h>
38 #include <vm/seg_dev.h>
39 #include <vm/seg_vn.h>
40 #include <vm/page.h>
41 #include <sys/fs/swapnode.h>
42 #include <sys/sysmacros.h>
43 #include <sys/fcntl.h>
44 #include <sys/vmsystm.h>
45 #include <sys/physmem.h>
46 #include <sys/vfs_opreg.h>
47
48 static dev_info_t *physmem_dip = NULL;
49
50 /*
51 * Linked list element hanging off physmem_proc_hash below, which holds all
52 * the information for a given segment which has been setup for this process.
53 * This is a simple linked list as we are assuming that for a given process
54 * the setup ioctl will only be called a handful of times. If this assumption
55 * changes in the future, a quicker to traverse data structure should be used.
56 */
57 struct physmem_hash {
58 struct physmem_hash *ph_next;
59 uint64_t ph_base_pa;
60 caddr_t ph_base_va;
61 size_t ph_seg_len;
62 struct vnode *ph_vnode;
63 };
64
65 /*
66 * Hash of all of the processes which have setup mappings with the driver with
67 * pointers to per process data.
68 */
69 struct physmem_proc_hash {
70 struct proc *pph_proc;
71 struct physmem_hash *pph_hash;
72 struct physmem_proc_hash *pph_next;
73 };
74
75
76 /* Needs to be a power of two for simple hash algorithm */
77 #define PPH_SIZE 8
78 struct physmem_proc_hash *pph[PPH_SIZE];
79
80 /*
81 * Lock which protects the pph hash above. To add an element (either a new
82 * process or a new segment) the WRITE lock must be held. To traverse the
83 * list, only a READ lock is needed.
84 */
85 krwlock_t pph_rwlock;
86
87 #define PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1)))
88
89 /*
90 * Need to keep a reference count of how many processes have the driver
91 * open to prevent it from disappearing.
92 */
93 uint64_t physmem_vnodecnt;
94 kmutex_t physmem_mutex; /* protects phsymem_vnodecnt */
95
96 static int physmem_getpage(struct vnode *vp, offset_t off, size_t len,
97 uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
98 enum seg_rw rw, struct cred *cr, caller_context_t *ct);
99
100 static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
101 caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
102 struct cred *cred, caller_context_t *ct);
103
104 static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
105 caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
106 struct cred *cred, caller_context_t *ct);
107
108 static void physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct);
109
110 const fs_operation_def_t physmem_vnodeops_template[] = {
111 VOPNAME_GETPAGE, { .vop_getpage = physmem_getpage },
112 VOPNAME_ADDMAP, { .vop_addmap = physmem_addmap },
113 VOPNAME_DELMAP, { .vop_delmap = physmem_delmap },
114 VOPNAME_INACTIVE, { .vop_inactive = physmem_inactive },
115 NULL, NULL
116 };
117
118 vnodeops_t *physmem_vnodeops = NULL;
119
120 /*
121 * Removes the current process from the hash if the process has no more
122 * physmem segments active.
123 */
124 void
physmem_remove_hash_proc()125 physmem_remove_hash_proc()
126 {
127 int index;
128 struct physmem_proc_hash **walker;
129 struct physmem_proc_hash *victim = NULL;
130
131 index = PHYSMEM_HASH(curproc);
132 rw_enter(&pph_rwlock, RW_WRITER);
133 walker = &pph[index];
134 while (*walker != NULL) {
135 if ((*walker)->pph_proc == curproc &&
136 (*walker)->pph_hash == NULL) {
137 victim = *walker;
138 *walker = victim->pph_next;
139 break;
140 }
141 walker = &((*walker)->pph_next);
142 }
143 rw_exit(&pph_rwlock);
144 if (victim != NULL)
145 kmem_free(victim, sizeof (struct physmem_proc_hash));
146 }
147
148 /*
149 * Add a new entry to the hash for the given process to cache the
150 * address ranges that it is working on. If this is the first hash
151 * item to be added for this process, we will create the head pointer
152 * for this process.
153 * Returns 0 on success, ERANGE when the physical address is already in the
154 * hash.
155 */
156 int
physmem_add_hash(struct physmem_hash * php)157 physmem_add_hash(struct physmem_hash *php)
158 {
159 int index;
160 struct physmem_proc_hash *iterator;
161 struct physmem_proc_hash *newp = NULL;
162 struct physmem_hash *temp;
163 int ret = 0;
164
165 index = PHYSMEM_HASH(curproc);
166
167 insert:
168 rw_enter(&pph_rwlock, RW_WRITER);
169 iterator = pph[index];
170 while (iterator != NULL) {
171 if (iterator->pph_proc == curproc) {
172 /*
173 * check to make sure a single process does not try to
174 * map the same region twice.
175 */
176 for (temp = iterator->pph_hash; temp != NULL;
177 temp = temp->ph_next) {
178 if ((php->ph_base_pa >= temp->ph_base_pa &&
179 php->ph_base_pa < temp->ph_base_pa +
180 temp->ph_seg_len) ||
181 (temp->ph_base_pa >= php->ph_base_pa &&
182 temp->ph_base_pa < php->ph_base_pa +
183 php->ph_seg_len)) {
184 ret = ERANGE;
185 break;
186 }
187 }
188 if (ret == 0) {
189 php->ph_next = iterator->pph_hash;
190 iterator->pph_hash = php;
191 }
192 rw_exit(&pph_rwlock);
193 /* Need to check for two threads in sync */
194 if (newp != NULL)
195 kmem_free(newp, sizeof (*newp));
196 return (ret);
197 }
198 iterator = iterator->pph_next;
199 }
200
201 if (newp != NULL) {
202 newp->pph_proc = curproc;
203 newp->pph_next = pph[index];
204 newp->pph_hash = php;
205 php->ph_next = NULL;
206 pph[index] = newp;
207 rw_exit(&pph_rwlock);
208 return (0);
209 }
210
211 rw_exit(&pph_rwlock);
212 /* Dropped the lock so we could use KM_SLEEP */
213 newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP);
214 goto insert;
215 }
216
217 /*
218 * Will return the pointer to the physmem_hash struct if the setup routine
219 * has previously been called for this memory.
220 * Returns NULL on failure.
221 */
222 struct physmem_hash *
physmem_get_hash(uint64_t req_paddr,size_t len,proc_t * procp)223 physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp)
224 {
225 int index;
226 struct physmem_proc_hash *proc_hp;
227 struct physmem_hash *php;
228
229 ASSERT(rw_lock_held(&pph_rwlock));
230
231 index = PHYSMEM_HASH(procp);
232 proc_hp = pph[index];
233 while (proc_hp != NULL) {
234 if (proc_hp->pph_proc == procp) {
235 php = proc_hp->pph_hash;
236 while (php != NULL) {
237 if ((req_paddr >= php->ph_base_pa) &&
238 (req_paddr + len <=
239 php->ph_base_pa + php->ph_seg_len)) {
240 return (php);
241 }
242 php = php->ph_next;
243 }
244 }
245 proc_hp = proc_hp->pph_next;
246 }
247 return (NULL);
248 }
249
250 int
physmem_validate_cookie(uint64_t p_cookie)251 physmem_validate_cookie(uint64_t p_cookie)
252 {
253 int index;
254 struct physmem_proc_hash *proc_hp;
255 struct physmem_hash *php;
256
257 ASSERT(rw_lock_held(&pph_rwlock));
258
259 index = PHYSMEM_HASH(curproc);
260 proc_hp = pph[index];
261 while (proc_hp != NULL) {
262 if (proc_hp->pph_proc == curproc) {
263 php = proc_hp->pph_hash;
264 while (php != NULL) {
265 if ((uint64_t)(uintptr_t)php == p_cookie) {
266 return (1);
267 }
268 php = php->ph_next;
269 }
270 }
271 proc_hp = proc_hp->pph_next;
272 }
273 return (0);
274 }
275
276 /*
277 * Remove the given vnode from the pph hash. If it exists in the hash the
278 * process still has to be around as the vnode is obviously still around and
279 * since it's a physmem vnode, it must be in the hash.
280 * If it is not in the hash that must mean that the setup ioctl failed.
281 * Return 0 in this instance, 1 if it is in the hash.
282 */
283 int
physmem_remove_vnode_hash(vnode_t * vp)284 physmem_remove_vnode_hash(vnode_t *vp)
285 {
286 int index;
287 struct physmem_proc_hash *proc_hp;
288 struct physmem_hash **phpp;
289 struct physmem_hash *victim;
290
291 index = PHYSMEM_HASH(curproc);
292 /* synchronize with the map routine */
293 rw_enter(&pph_rwlock, RW_WRITER);
294 proc_hp = pph[index];
295 while (proc_hp != NULL) {
296 if (proc_hp->pph_proc == curproc) {
297 phpp = &proc_hp->pph_hash;
298 while (*phpp != NULL) {
299 if ((*phpp)->ph_vnode == vp) {
300 victim = *phpp;
301 *phpp = victim->ph_next;
302
303 rw_exit(&pph_rwlock);
304 kmem_free(victim, sizeof (*victim));
305 return (1);
306 }
307 phpp = &(*phpp)->ph_next;
308 }
309 }
310 proc_hp = proc_hp->pph_next;
311 }
312 rw_exit(&pph_rwlock);
313
314 /* not found */
315 return (0);
316 }
317
318 int
physmem_setup_vnops()319 physmem_setup_vnops()
320 {
321 int error;
322 char *name = "physmem";
323 if (physmem_vnodeops != NULL)
324 cmn_err(CE_PANIC, "physmem vnodeops already set\n");
325 error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops);
326 if (error != 0) {
327 cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template");
328 }
329 return (error);
330 }
331
332 /*
333 * The guts of the PHYSMEM_SETUP ioctl.
334 * Create a segment in the address space with the specified parameters.
335 * If pspp->user_va is NULL, as_gap will be used to find an appropriate VA.
336 * We do not do bounds checking on the requested physical addresses, if they
337 * do not exist in the system, they will not be mappable.
338 * Returns 0 on success with the following error codes on failure:
339 * ENOMEM - The VA range requested was already mapped if pspp->user_va is
340 * non-NULL or the system was unable to find enough VA space for
341 * the desired length if user_va was NULL>
342 * EINVAL - The requested PA, VA, or length was not PAGESIZE aligned.
343 */
344 int
physmem_setup_addrs(struct physmem_setup_param * pspp)345 physmem_setup_addrs(struct physmem_setup_param *pspp)
346 {
347 struct as *as = curproc->p_as;
348 struct segvn_crargs vn_a;
349 int ret = 0;
350 uint64_t base_pa;
351 size_t len;
352 caddr_t uvaddr;
353 struct vnode *vp;
354 struct physmem_hash *php;
355
356 ASSERT(pspp != NULL);
357 base_pa = pspp->req_paddr;
358 len = pspp->len;
359 uvaddr = (caddr_t)(uintptr_t)pspp->user_va;
360
361 /* Sanity checking */
362 if (!IS_P2ALIGNED(base_pa, PAGESIZE))
363 return (EINVAL);
364 if (!IS_P2ALIGNED(len, PAGESIZE))
365 return (EINVAL);
366 if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE))
367 return (EINVAL);
368
369 php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP);
370
371 /* Need to bump vnode count so that the driver can not be unloaded */
372 mutex_enter(&physmem_mutex);
373 physmem_vnodecnt++;
374 mutex_exit(&physmem_mutex);
375
376 vp = vn_alloc(KM_SLEEP);
377 ASSERT(vp != NULL); /* SLEEP can't return NULL */
378 vn_setops(vp, physmem_vnodeops);
379
380 php->ph_vnode = vp;
381
382 vn_a.vp = vp;
383 vn_a.offset = (u_offset_t)base_pa;
384 vn_a.type = MAP_SHARED;
385 vn_a.prot = PROT_ALL;
386 vn_a.maxprot = PROT_ALL;
387 vn_a.flags = 0;
388 vn_a.cred = NULL;
389 vn_a.amp = NULL;
390 vn_a.szc = 0;
391 vn_a.lgrp_mem_policy_flags = 0;
392
393 as_rangelock(as);
394 if (uvaddr != NULL) {
395 if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) {
396 ret = ENOMEM;
397 fail:
398 as_rangeunlock(as);
399 vn_free(vp);
400 kmem_free(php, sizeof (*php));
401 mutex_enter(&physmem_mutex);
402 physmem_vnodecnt--;
403 mutex_exit(&physmem_mutex);
404 return (ret);
405 }
406 } else {
407 /* We pick the address for the user */
408 map_addr(&uvaddr, len, 0, 1, 0);
409 if (uvaddr == NULL) {
410 ret = ENOMEM;
411 goto fail;
412 }
413 }
414 ret = as_map(as, uvaddr, len, segvn_create, &vn_a);
415
416 if (ret == 0) {
417 as_rangeunlock(as);
418 php->ph_base_pa = base_pa;
419 php->ph_base_va = uvaddr;
420 php->ph_seg_len = len;
421 pspp->user_va = (uint64_t)(uintptr_t)uvaddr;
422 pspp->cookie = (uint64_t)(uintptr_t)php;
423 ret = physmem_add_hash(php);
424 if (ret == 0)
425 return (0);
426
427 /* Note that the call to as_unmap will free the vnode */
428 (void) as_unmap(as, uvaddr, len);
429 kmem_free(php, sizeof (*php));
430 return (ret);
431 }
432
433 goto fail;
434 /*NOTREACHED*/
435 }
436
437 /*
438 * The guts of the PHYSMEM_MAP ioctl.
439 * Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already
440 * been called for this PA range.
441 * Returns 0 on success with the following error codes on failure:
442 * EPERM - The requested page is long term locked, and thus repeated
443 * requests to allocate this page will likely fail.
444 * EAGAIN - The requested page could not be allocated, but it is believed
445 * that future attempts could succeed.
446 * ENOMEM - There was not enough free memory in the system to safely
447 * map the requested page.
448 * EINVAL - The requested paddr was not PAGESIZE aligned or the
449 * PHYSMEM_SETUP ioctl was not called for this page.
450 * ENOENT - The requested page was iniside the kernel cage, and the
451 * PHYSMEM_CAGE flag was not set.
452 * EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag
453 * was not set.
454 */
455 static int
physmem_map_addrs(struct physmem_map_param * pmpp)456 physmem_map_addrs(struct physmem_map_param *pmpp)
457 {
458 caddr_t uvaddr;
459 page_t *pp;
460 uint64_t req_paddr;
461 struct vnode *vp;
462 int ret = 0;
463 struct physmem_hash *php;
464 uint_t flags = 0;
465
466 ASSERT(pmpp != NULL);
467 req_paddr = pmpp->req_paddr;
468
469 if (!IS_P2ALIGNED(req_paddr, PAGESIZE))
470 return (EINVAL);
471 /* Find the vnode for this map request */
472 rw_enter(&pph_rwlock, RW_READER);
473 php = physmem_get_hash(req_paddr, PAGESIZE, curproc);
474 if (php == NULL) {
475 rw_exit(&pph_rwlock);
476 return (EINVAL);
477 }
478 vp = php->ph_vnode;
479 uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa);
480 rw_exit(&pph_rwlock);
481
482 pp = page_numtopp_nolock(btop((size_t)req_paddr));
483 if (pp == NULL) {
484 pmpp->ret_va = 0;
485 return (EPERM);
486 }
487
488 /*
489 * Check to see if page already mapped correctly. This can happen
490 * when we failed to capture a page previously and it was captured
491 * asynchronously for us. Return success in this case.
492 */
493 if (pp->p_vnode == vp) {
494 ASSERT(pp->p_offset == (u_offset_t)req_paddr);
495 pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
496 return (0);
497 }
498
499 /*
500 * physmem should be responsible for checking for cage
501 * and prom pages.
502 */
503 if (pmpp->flags & PHYSMEM_CAGE)
504 flags = CAPTURE_GET_CAGE;
505 if (pmpp->flags & PHYSMEM_RETIRED)
506 flags |= CAPTURE_GET_RETIRED;
507
508 ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc);
509
510 if (ret != 0) {
511 pmpp->ret_va = 0;
512 return (ret);
513 } else {
514 pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
515 return (0);
516 }
517 }
518
519 /*
520 * Map the given page into the process's address space if possible.
521 * We actually only hash the page in on the correct vnode as the page
522 * will be mapped via segvn_pagefault.
523 * returns 0 on success
524 * returns 1 if there is no need to map this page anymore (process exited)
525 * returns -1 if we failed to map the page.
526 */
527 int
map_page_proc(page_t * pp,void * arg,uint_t flags)528 map_page_proc(page_t *pp, void *arg, uint_t flags)
529 {
530 struct vnode *vp;
531 proc_t *procp = (proc_t *)arg;
532 int ret;
533 u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum);
534 struct physmem_hash *php;
535
536 ASSERT(pp != NULL);
537
538 /*
539 * Check against availrmem to make sure that we're not low on memory.
540 * We check again here as ASYNC requests do not do this check elsewhere.
541 * We return 1 as we don't want the page to have the PR_CAPTURE bit
542 * set or be on the page capture hash.
543 */
544 if (swapfs_minfree > availrmem + 1) {
545 page_free(pp, 1);
546 return (1);
547 }
548
549 /*
550 * If this is an asynchronous request for the current process,
551 * we can not map the page as it's possible that we are also in the
552 * process of unmapping the page which could result in a deadlock
553 * with the as lock.
554 */
555 if ((flags & CAPTURE_ASYNC) && (curproc == procp)) {
556 page_free(pp, 1);
557 return (-1);
558 }
559
560 /* only return zeroed out pages */
561 pagezero(pp, 0, PAGESIZE);
562
563 rw_enter(&pph_rwlock, RW_READER);
564 php = physmem_get_hash(paddr, PAGESIZE, procp);
565 if (php == NULL) {
566 rw_exit(&pph_rwlock);
567 /*
568 * Free the page as there is no longer a valid outstanding
569 * request for this page.
570 */
571 page_free(pp, 1);
572 return (1);
573 }
574
575 vp = php->ph_vnode;
576
577 /*
578 * We need to protect against a possible deadlock here where we own
579 * the vnode page hash mutex and want to acquire it again as there
580 * are locations in the code, where we unlock a page while holding
581 * the mutex which can lead to the page being captured and eventually
582 * end up here.
583 */
584 if (mutex_owned(page_vnode_mutex(vp))) {
585 rw_exit(&pph_rwlock);
586 page_free(pp, 1);
587 return (-1);
588 }
589
590 ret = page_hashin(pp, vp, paddr, NULL);
591 rw_exit(&pph_rwlock);
592 if (ret == 0) {
593 page_free(pp, 1);
594 return (-1);
595 }
596
597 page_downgrade(pp);
598
599 mutex_enter(&freemem_lock);
600 availrmem--;
601 mutex_exit(&freemem_lock);
602
603 return (0);
604 }
605
606 /*
607 * The guts of the PHYSMEM_DESTROY ioctl.
608 * The cookie passed in will provide all of the information needed to
609 * free up the address space and physical memory associated with the
610 * corresponding PHSYMEM_SETUP ioctl.
611 * Returns 0 on success with the following error codes on failure:
612 * EINVAL - The cookie supplied is not valid.
613 */
614 int
physmem_destroy_addrs(uint64_t p_cookie)615 physmem_destroy_addrs(uint64_t p_cookie)
616 {
617 struct as *as = curproc->p_as;
618 size_t len;
619 caddr_t uvaddr;
620
621 rw_enter(&pph_rwlock, RW_READER);
622 if (physmem_validate_cookie(p_cookie) == 0) {
623 rw_exit(&pph_rwlock);
624 return (EINVAL);
625 }
626
627 len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len;
628 uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va;
629 rw_exit(&pph_rwlock);
630
631 (void) as_unmap(as, uvaddr, len);
632
633 return (0);
634 }
635
636 /*
637 * If the page has been hashed into the physmem vnode, then just look it up
638 * and return it via pl, otherwise return ENOMEM as the map ioctl has not
639 * succeeded on the given page.
640 */
641 /*ARGSUSED*/
642 static int
physmem_getpage(struct vnode * vp,offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr,caller_context_t * ct)643 physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp,
644 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
645 struct cred *cr, caller_context_t *ct)
646 {
647 page_t *pp;
648
649 ASSERT(len == PAGESIZE);
650 ASSERT(AS_READ_HELD(seg->s_as));
651
652 /*
653 * If the page is in the hash, then we successfully claimed this
654 * page earlier, so return it to the caller.
655 */
656 pp = page_lookup(vp, off, SE_SHARED);
657 if (pp != NULL) {
658 pl[0] = pp;
659 pl[1] = NULL;
660 *protp = PROT_ALL;
661 return (0);
662 }
663 return (ENOMEM);
664 }
665
666 /*
667 * We can not allow a process mapping /dev/physmem pages to fork as there can
668 * only be a single mapping to a /dev/physmem page at a given time. Thus, the
669 * return of EINVAL when we are not working on our own address space.
670 * Otherwise we return zero as this function is required for normal operation.
671 */
672 /*ARGSUSED*/
673 static int
physmem_addmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cred,caller_context_t * ct)674 physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
675 caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
676 struct cred *cred, caller_context_t *ct)
677 {
678 if (curproc->p_as != as) {
679 return (EINVAL);
680 }
681 return (0);
682 }
683
684 /* Will always get called for removing a whole segment. */
685 /*ARGSUSED*/
686 static int
physmem_delmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint_t prot,uint_t maxprot,uint_t flags,struct cred * cred,caller_context_t * ct)687 physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
688 caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
689 struct cred *cred, caller_context_t *ct)
690 {
691 /*
692 * Release our hold on the vnode so that the final VN_RELE will
693 * call physmem_inactive to clean things up.
694 */
695 VN_RELE(vp);
696
697 return (0);
698 }
699
700 /*
701 * Clean up all the pages belonging to this vnode and then free it.
702 */
703 /*ARGSUSED*/
704 static void
physmem_inactive(vnode_t * vp,cred_t * crp,caller_context_t * ct)705 physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct)
706 {
707 page_t *pp;
708
709 /*
710 * Remove the vnode from the hash now, to prevent asynchronous
711 * attempts to map into this vnode. This avoids a deadlock
712 * where two threads try to get into this logic at the same
713 * time and try to map the pages they are destroying into the
714 * other's address space.
715 * If it's not in the hash, just free it.
716 */
717 if (physmem_remove_vnode_hash(vp) == 0) {
718 ASSERT(vp->v_pages == NULL);
719 vn_free(vp);
720 physmem_remove_hash_proc();
721 mutex_enter(&physmem_mutex);
722 physmem_vnodecnt--;
723 mutex_exit(&physmem_mutex);
724 return;
725 }
726
727 /*
728 * At this point in time, no other logic can be adding or removing
729 * pages from the vnode, otherwise the v_pages list could be inaccurate.
730 */
731
732 while ((pp = vp->v_pages) != NULL) {
733 page_t *rpp;
734 if (page_tryupgrade(pp)) {
735 /*
736 * set lckcnt for page_destroy to do availrmem
737 * accounting
738 */
739 pp->p_lckcnt = 1;
740 page_destroy(pp, 0);
741 } else {
742 /* failure to lock should be transient */
743 rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED);
744 if (rpp != pp) {
745 page_unlock(rpp);
746 continue;
747 }
748 page_unlock(pp);
749 }
750 }
751 vn_free(vp);
752 physmem_remove_hash_proc();
753 mutex_enter(&physmem_mutex);
754 physmem_vnodecnt--;
755 mutex_exit(&physmem_mutex);
756 }
757
758 /*ARGSUSED*/
759 static int
physmem_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)760 physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
761 int *rvalp)
762 {
763 int ret;
764
765 switch (cmd) {
766 case PHYSMEM_SETUP:
767 {
768 struct physmem_setup_param psp;
769 if (ddi_copyin((void *)arg, &psp,
770 sizeof (struct physmem_setup_param), 0))
771 return (EFAULT);
772 ret = physmem_setup_addrs(&psp);
773 if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0))
774 return (EFAULT);
775 }
776 break;
777 case PHYSMEM_MAP:
778 {
779 struct physmem_map_param pmp;
780 if (ddi_copyin((void *)arg, &pmp,
781 sizeof (struct physmem_map_param), 0))
782 return (EFAULT);
783 ret = physmem_map_addrs(&pmp);
784 if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0))
785 return (EFAULT);
786 }
787 break;
788 case PHYSMEM_DESTROY:
789 {
790 uint64_t cookie;
791 if (ddi_copyin((void *)arg, &cookie,
792 sizeof (uint64_t), 0))
793 return (EFAULT);
794 ret = physmem_destroy_addrs(cookie);
795 }
796 break;
797 default:
798 return (ENOTSUP);
799 }
800 return (ret);
801 }
802
803 /*ARGSUSED*/
804 static int
physmem_open(dev_t * devp,int flag,int otyp,cred_t * credp)805 physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp)
806 {
807 int ret;
808 static int msg_printed = 0;
809
810 if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) {
811 return (EINVAL);
812 }
813
814 /* need to make sure we have the right privileges */
815 if ((ret = secpolicy_resource(credp)) != 0)
816 return (ret);
817 if ((ret = secpolicy_lock_memory(credp)) != 0)
818 return (ret);
819
820 if (msg_printed == 0) {
821 cmn_err(CE_NOTE, "!driver has been opened. This driver may "
822 "take out long term locks on pages which may impact "
823 "dynamic reconfiguration events");
824 msg_printed = 1;
825 }
826
827 return (0);
828 }
829
830 /*ARGSUSED*/
831 static int
physmem_close(dev_t dev,int flag,int otyp,cred_t * credp)832 physmem_close(dev_t dev, int flag, int otyp, cred_t *credp)
833 {
834 return (0);
835 }
836
837 /*ARGSUSED*/
838 static int
physmem_getinfo(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** resultp)839 physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd,
840 void *arg, void **resultp)
841 {
842 switch (infocmd) {
843 case DDI_INFO_DEVT2DEVINFO:
844 *resultp = physmem_dip;
845 return (DDI_SUCCESS);
846
847 case DDI_INFO_DEVT2INSTANCE:
848 *resultp = (void *)(ulong_t)getminor((dev_t)arg);
849 return (DDI_SUCCESS);
850
851 default:
852 return (DDI_FAILURE);
853 }
854 }
855
856 static int
physmem_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)857 physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
858 {
859 int i;
860
861 if (cmd == DDI_RESUME) {
862 return (DDI_SUCCESS);
863 }
864
865 if (cmd != DDI_ATTACH)
866 return (DDI_FAILURE);
867
868 if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
869 ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
870 return (DDI_FAILURE);
871
872 physmem_dip = dip;
873
874 /* Initialize driver specific data */
875 if (physmem_setup_vnops()) {
876 ddi_remove_minor_node(dip, ddi_get_name(dip));
877 return (DDI_FAILURE);
878 }
879
880 for (i = 0; i < PPH_SIZE; i++)
881 pph[i] = NULL;
882
883 page_capture_register_callback(PC_PHYSMEM, 10000,
884 map_page_proc);
885
886 return (DDI_SUCCESS);
887 }
888
889 static int
physmem_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)890 physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
891 {
892 int ret = DDI_SUCCESS;
893
894 if (cmd == DDI_SUSPEND) {
895 return (DDI_SUCCESS);
896 }
897
898 if (cmd != DDI_DETACH)
899 return (DDI_FAILURE);
900
901 ASSERT(physmem_dip == dip);
902
903 mutex_enter(&physmem_mutex);
904 if (physmem_vnodecnt == 0) {
905 if (physmem_vnodeops != NULL) {
906 vn_freevnodeops(physmem_vnodeops);
907 physmem_vnodeops = NULL;
908 page_capture_unregister_callback(PC_PHYSMEM);
909 }
910 } else {
911 ret = EBUSY;
912 }
913 mutex_exit(&physmem_mutex);
914 if (ret == DDI_SUCCESS)
915 ddi_remove_minor_node(dip, ddi_get_name(dip));
916 return (ret);
917 }
918
919 static struct cb_ops physmem_cb_ops = {
920 physmem_open, /* open */
921 physmem_close, /* close */
922 nodev, /* strategy */
923 nodev, /* print */
924 nodev, /* dump */
925 nodev, /* read */
926 nodev, /* write */
927 physmem_ioctl, /* ioctl */
928 nodev, /* devmap */
929 nodev, /* mmap */
930 nodev, /* segmap */
931 nochpoll, /* chpoll */
932 ddi_prop_op, /* prop_op */
933 NULL, /* cb_str */
934 D_NEW | D_MP | D_DEVMAP,
935 CB_REV,
936 NULL,
937 NULL
938 };
939
940 static struct dev_ops physmem_ops = {
941 DEVO_REV,
942 0,
943 physmem_getinfo,
944 nulldev,
945 nulldev,
946 physmem_attach,
947 physmem_detach,
948 nodev,
949 &physmem_cb_ops,
950 NULL,
951 NULL,
952 ddi_quiesce_not_needed, /* quiesce */
953 };
954
955 static struct modldrv modldrv = {
956 &mod_driverops,
957 "physmem driver",
958 &physmem_ops
959 };
960
961 static struct modlinkage modlinkage = {
962 MODREV_1,
963 &modldrv,
964 NULL
965 };
966
967 int
_init(void)968 _init(void)
969 {
970 return (mod_install(&modlinkage));
971 }
972
973 int
_info(struct modinfo * modinfop)974 _info(struct modinfo *modinfop)
975 {
976 return (mod_info(&modlinkage, modinfop));
977 }
978
979 int
_fini(void)980 _fini(void)
981 {
982 return (mod_remove(&modlinkage));
983 }
984