17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*da6c28aaSamw * Common Development and Distribution License (the "License"). 6*da6c28aaSamw * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*da6c28aaSamw * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 277c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 307c478bd9Sstevel@tonic-gate 317c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 327c478bd9Sstevel@tonic-gate #include <sys/errno.h> 337c478bd9Sstevel@tonic-gate #include <sys/stat.h> 347c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 357c478bd9Sstevel@tonic-gate #include <sys/conf.h> 367c478bd9Sstevel@tonic-gate #include <sys/systm.h> 377c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 387c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 397c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 407c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 417c478bd9Sstevel@tonic-gate #include <sys/strsubr.h> 427c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 437c478bd9Sstevel@tonic-gate #include <sys/frame.h> 447c478bd9Sstevel@tonic-gate #include <sys/stack.h> 457c478bd9Sstevel@tonic-gate #include <sys/proc.h> 467c478bd9Sstevel@tonic-gate #include <sys/priv.h> 477c478bd9Sstevel@tonic-gate #include <sys/policy.h> 487c478bd9Sstevel@tonic-gate #include <sys/ontrap.h> 497c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> 507c478bd9Sstevel@tonic-gate #include <sys/prsystm.h> 517c478bd9Sstevel@tonic-gate 527c478bd9Sstevel@tonic-gate #include <vm/as.h> 537c478bd9Sstevel@tonic-gate #include <vm/seg.h> 547c478bd9Sstevel@tonic-gate #include <vm/seg_dev.h> 557c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 567c478bd9Sstevel@tonic-gate #include <vm/seg_spt.h> 577c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 587c478bd9Sstevel@tonic-gate 597c478bd9Sstevel@tonic-gate extern struct seg_ops segdev_ops; /* needs a header file */ 607c478bd9Sstevel@tonic-gate extern struct seg_ops segspt_shmops; /* needs a header file */ 617c478bd9Sstevel@tonic-gate 627c478bd9Sstevel@tonic-gate static int 637c478bd9Sstevel@tonic-gate page_valid(struct seg *seg, caddr_t addr) 647c478bd9Sstevel@tonic-gate { 657c478bd9Sstevel@tonic-gate struct segvn_data *svd; 667c478bd9Sstevel@tonic-gate vnode_t *vp; 677c478bd9Sstevel@tonic-gate vattr_t vattr; 687c478bd9Sstevel@tonic-gate 697c478bd9Sstevel@tonic-gate /* 707c478bd9Sstevel@tonic-gate * Fail if the page doesn't map to a page in the underlying 717c478bd9Sstevel@tonic-gate * mapped file, if an underlying mapped file exists. 727c478bd9Sstevel@tonic-gate */ 737c478bd9Sstevel@tonic-gate vattr.va_mask = AT_SIZE; 747c478bd9Sstevel@tonic-gate if (seg->s_ops == &segvn_ops && 757c478bd9Sstevel@tonic-gate SEGOP_GETVP(seg, addr, &vp) == 0 && 767c478bd9Sstevel@tonic-gate vp != NULL && vp->v_type == VREG && 77*da6c28aaSamw VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 787c478bd9Sstevel@tonic-gate u_offset_t size = roundup(vattr.va_size, (u_offset_t)PAGESIZE); 797c478bd9Sstevel@tonic-gate u_offset_t offset = SEGOP_GETOFFSET(seg, addr); 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate if (offset >= size) 827c478bd9Sstevel@tonic-gate return (0); 837c478bd9Sstevel@tonic-gate } 847c478bd9Sstevel@tonic-gate 857c478bd9Sstevel@tonic-gate /* 867c478bd9Sstevel@tonic-gate * Fail if this is an ISM shared segment and the address is 877c478bd9Sstevel@tonic-gate * not within the real size of the spt segment that backs it. 887c478bd9Sstevel@tonic-gate */ 897c478bd9Sstevel@tonic-gate if (seg->s_ops == &segspt_shmops && 907c478bd9Sstevel@tonic-gate addr >= seg->s_base + spt_realsize(seg)) 917c478bd9Sstevel@tonic-gate return (0); 927c478bd9Sstevel@tonic-gate 937c478bd9Sstevel@tonic-gate /* 947c478bd9Sstevel@tonic-gate * Fail if the segment is mapped from /dev/null. 957c478bd9Sstevel@tonic-gate * The key is that the mapping comes from segdev and the 967c478bd9Sstevel@tonic-gate * type is neither MAP_SHARED nor MAP_PRIVATE. 977c478bd9Sstevel@tonic-gate */ 987c478bd9Sstevel@tonic-gate if (seg->s_ops == &segdev_ops && 997c478bd9Sstevel@tonic-gate ((SEGOP_GETTYPE(seg, addr) & (MAP_SHARED | MAP_PRIVATE)) == 0)) 1007c478bd9Sstevel@tonic-gate return (0); 1017c478bd9Sstevel@tonic-gate 1027c478bd9Sstevel@tonic-gate /* 1037c478bd9Sstevel@tonic-gate * Fail if the page is a MAP_NORESERVE page that has 1047c478bd9Sstevel@tonic-gate * not actually materialized. 1057c478bd9Sstevel@tonic-gate * We cheat by knowing that segvn is the only segment 1067c478bd9Sstevel@tonic-gate * driver that supports MAP_NORESERVE. 1077c478bd9Sstevel@tonic-gate */ 1087c478bd9Sstevel@tonic-gate if (seg->s_ops == &segvn_ops && 1097c478bd9Sstevel@tonic-gate (svd = (struct segvn_data *)seg->s_data) != NULL && 1107c478bd9Sstevel@tonic-gate (svd->vp == NULL || svd->vp->v_type != VREG) && 1117c478bd9Sstevel@tonic-gate (svd->flags & MAP_NORESERVE)) { 1127c478bd9Sstevel@tonic-gate /* 1137c478bd9Sstevel@tonic-gate * Guilty knowledge here. We know that 1147c478bd9Sstevel@tonic-gate * segvn_incore returns more than just the 1157c478bd9Sstevel@tonic-gate * low-order bit that indicates the page is 1167c478bd9Sstevel@tonic-gate * actually in memory. If any bits are set, 1177c478bd9Sstevel@tonic-gate * then there is backing store for the page. 1187c478bd9Sstevel@tonic-gate */ 1197c478bd9Sstevel@tonic-gate char incore = 0; 1207c478bd9Sstevel@tonic-gate (void) SEGOP_INCORE(seg, addr, PAGESIZE, &incore); 1217c478bd9Sstevel@tonic-gate if (incore == 0) 1227c478bd9Sstevel@tonic-gate return (0); 1237c478bd9Sstevel@tonic-gate } 1247c478bd9Sstevel@tonic-gate return (1); 1257c478bd9Sstevel@tonic-gate } 1267c478bd9Sstevel@tonic-gate 1277c478bd9Sstevel@tonic-gate /* 1287c478bd9Sstevel@tonic-gate * Map address "addr" in address space "as" into a kernel virtual address. 1297c478bd9Sstevel@tonic-gate * The memory is guaranteed to be resident and locked down. 1307c478bd9Sstevel@tonic-gate */ 1317c478bd9Sstevel@tonic-gate static caddr_t 1327c478bd9Sstevel@tonic-gate mapin(struct as *as, caddr_t addr, int writing) 1337c478bd9Sstevel@tonic-gate { 1347c478bd9Sstevel@tonic-gate page_t *pp; 1357c478bd9Sstevel@tonic-gate caddr_t kaddr; 1367c478bd9Sstevel@tonic-gate pfn_t pfnum; 1377c478bd9Sstevel@tonic-gate 1387c478bd9Sstevel@tonic-gate /* 1397c478bd9Sstevel@tonic-gate * NB: Because of past mistakes, we have bits being returned 1407c478bd9Sstevel@tonic-gate * by getpfnum that are actually the page type bits of the pte. 1417c478bd9Sstevel@tonic-gate * When the object we are trying to map is a memory page with 1427c478bd9Sstevel@tonic-gate * a page structure everything is ok and we can use the optimal 1437c478bd9Sstevel@tonic-gate * method, ppmapin. Otherwise, we have to do something special. 1447c478bd9Sstevel@tonic-gate */ 1457c478bd9Sstevel@tonic-gate pfnum = hat_getpfnum(as->a_hat, addr); 1467c478bd9Sstevel@tonic-gate if (pf_is_memory(pfnum)) { 1477c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfnum); 1487c478bd9Sstevel@tonic-gate if (pp != NULL) { 1497c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 1507c478bd9Sstevel@tonic-gate kaddr = ppmapin(pp, writing ? 1517c478bd9Sstevel@tonic-gate (PROT_READ | PROT_WRITE) : PROT_READ, 1527c478bd9Sstevel@tonic-gate (caddr_t)-1); 1537c478bd9Sstevel@tonic-gate return (kaddr + ((uintptr_t)addr & PAGEOFFSET)); 1547c478bd9Sstevel@tonic-gate } 1557c478bd9Sstevel@tonic-gate } 1567c478bd9Sstevel@tonic-gate 1577c478bd9Sstevel@tonic-gate /* 1587c478bd9Sstevel@tonic-gate * Oh well, we didn't have a page struct for the object we were 1597c478bd9Sstevel@tonic-gate * trying to map in; ppmapin doesn't handle devices, but allocating a 1607c478bd9Sstevel@tonic-gate * heap address allows ppmapout to free virutal space when done. 1617c478bd9Sstevel@tonic-gate */ 1627c478bd9Sstevel@tonic-gate kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 1637c478bd9Sstevel@tonic-gate 1647c478bd9Sstevel@tonic-gate hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum, 1657c478bd9Sstevel@tonic-gate writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK); 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate return (kaddr + ((uintptr_t)addr & PAGEOFFSET)); 1687c478bd9Sstevel@tonic-gate } 1697c478bd9Sstevel@tonic-gate 1707c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1717c478bd9Sstevel@tonic-gate static void 1727c478bd9Sstevel@tonic-gate mapout(struct as *as, caddr_t addr, caddr_t vaddr, int writing) 1737c478bd9Sstevel@tonic-gate { 1747c478bd9Sstevel@tonic-gate vaddr = (caddr_t)(uintptr_t)((uintptr_t)vaddr & PAGEMASK); 1757c478bd9Sstevel@tonic-gate ppmapout(vaddr); 1767c478bd9Sstevel@tonic-gate } 1777c478bd9Sstevel@tonic-gate 1787c478bd9Sstevel@tonic-gate /* 179*da6c28aaSamw * Perform I/O to a given process. This will return EIO if we detect 1807c478bd9Sstevel@tonic-gate * corrupt memory and ENXIO if there is no such mapped address in the 1817c478bd9Sstevel@tonic-gate * user process's address space. 1827c478bd9Sstevel@tonic-gate */ 1837c478bd9Sstevel@tonic-gate static int 1847c478bd9Sstevel@tonic-gate urw(proc_t *p, int writing, void *buf, size_t len, uintptr_t a) 1857c478bd9Sstevel@tonic-gate { 1867c478bd9Sstevel@tonic-gate caddr_t addr = (caddr_t)a; 1877c478bd9Sstevel@tonic-gate caddr_t page; 1887c478bd9Sstevel@tonic-gate caddr_t vaddr; 1897c478bd9Sstevel@tonic-gate struct seg *seg; 1907c478bd9Sstevel@tonic-gate int error = 0; 1917c478bd9Sstevel@tonic-gate int err = 0; 1927c478bd9Sstevel@tonic-gate uint_t prot; 1937c478bd9Sstevel@tonic-gate uint_t prot_rw = writing ? PROT_WRITE : PROT_READ; 1947c478bd9Sstevel@tonic-gate int protchanged; 1957c478bd9Sstevel@tonic-gate on_trap_data_t otd; 1967c478bd9Sstevel@tonic-gate int retrycnt; 1977c478bd9Sstevel@tonic-gate struct as *as = p->p_as; 1987c478bd9Sstevel@tonic-gate enum seg_rw rw; 1997c478bd9Sstevel@tonic-gate 2007c478bd9Sstevel@tonic-gate /* 2017c478bd9Sstevel@tonic-gate * Locate segment containing address of interest. 2027c478bd9Sstevel@tonic-gate */ 2037c478bd9Sstevel@tonic-gate page = (caddr_t)(uintptr_t)((uintptr_t)addr & PAGEMASK); 2047c478bd9Sstevel@tonic-gate retrycnt = 0; 2057c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2067c478bd9Sstevel@tonic-gate retry: 2077c478bd9Sstevel@tonic-gate if ((seg = as_segat(as, page)) == NULL || 2087c478bd9Sstevel@tonic-gate !page_valid(seg, page)) { 2097c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2107c478bd9Sstevel@tonic-gate return (ENXIO); 2117c478bd9Sstevel@tonic-gate } 2127c478bd9Sstevel@tonic-gate SEGOP_GETPROT(seg, page, 0, &prot); 2137c478bd9Sstevel@tonic-gate 2147c478bd9Sstevel@tonic-gate protchanged = 0; 2157c478bd9Sstevel@tonic-gate if ((prot & prot_rw) == 0) { 2167c478bd9Sstevel@tonic-gate protchanged = 1; 2177c478bd9Sstevel@tonic-gate err = SEGOP_SETPROT(seg, page, PAGESIZE, prot | prot_rw); 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate if (err == IE_RETRY) { 2207c478bd9Sstevel@tonic-gate protchanged = 0; 2217c478bd9Sstevel@tonic-gate ASSERT(retrycnt == 0); 2227c478bd9Sstevel@tonic-gate retrycnt++; 2237c478bd9Sstevel@tonic-gate goto retry; 2247c478bd9Sstevel@tonic-gate } 2257c478bd9Sstevel@tonic-gate 2267c478bd9Sstevel@tonic-gate if (err != 0) { 2277c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2287c478bd9Sstevel@tonic-gate return (ENXIO); 2297c478bd9Sstevel@tonic-gate } 2307c478bd9Sstevel@tonic-gate } 2317c478bd9Sstevel@tonic-gate 2327c478bd9Sstevel@tonic-gate /* 2337c478bd9Sstevel@tonic-gate * segvn may do a copy-on-write for F_SOFTLOCK/S_READ case to break 2347c478bd9Sstevel@tonic-gate * sharing to avoid a copy on write of a softlocked page by another 2357c478bd9Sstevel@tonic-gate * thread. But since we locked the address space as a writer no other 2367c478bd9Sstevel@tonic-gate * thread can cause a copy on write. S_READ_NOCOW is passed as the 2377c478bd9Sstevel@tonic-gate * access type to tell segvn that it's ok not to do a copy-on-write 2387c478bd9Sstevel@tonic-gate * for this SOFTLOCK fault. 2397c478bd9Sstevel@tonic-gate */ 2407c478bd9Sstevel@tonic-gate if (writing) 2417c478bd9Sstevel@tonic-gate rw = S_WRITE; 2427c478bd9Sstevel@tonic-gate else if (seg->s_ops == &segvn_ops) 2437c478bd9Sstevel@tonic-gate rw = S_READ_NOCOW; 2447c478bd9Sstevel@tonic-gate else 2457c478bd9Sstevel@tonic-gate rw = S_READ; 2467c478bd9Sstevel@tonic-gate 2477c478bd9Sstevel@tonic-gate if (SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTLOCK, rw)) { 2487c478bd9Sstevel@tonic-gate if (protchanged) 2497c478bd9Sstevel@tonic-gate (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot); 2507c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2517c478bd9Sstevel@tonic-gate return (ENXIO); 2527c478bd9Sstevel@tonic-gate } 2537c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(vm, softlock, 1); 2547c478bd9Sstevel@tonic-gate 2557c478bd9Sstevel@tonic-gate /* 2567c478bd9Sstevel@tonic-gate * Make sure we're not trying to read or write off the end of the page. 2577c478bd9Sstevel@tonic-gate */ 2587c478bd9Sstevel@tonic-gate ASSERT(len <= page + PAGESIZE - addr); 2597c478bd9Sstevel@tonic-gate 2607c478bd9Sstevel@tonic-gate /* 2617c478bd9Sstevel@tonic-gate * Map in the locked page, copy to our local buffer, 2627c478bd9Sstevel@tonic-gate * then map the page out and unlock it. 2637c478bd9Sstevel@tonic-gate */ 2647c478bd9Sstevel@tonic-gate vaddr = mapin(as, addr, writing); 2657c478bd9Sstevel@tonic-gate 2667c478bd9Sstevel@tonic-gate /* 2677c478bd9Sstevel@tonic-gate * Since we are copying memory on behalf of the user process, 2687c478bd9Sstevel@tonic-gate * protect against memory error correction faults. 2697c478bd9Sstevel@tonic-gate */ 2707c478bd9Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_EC)) { 2717c478bd9Sstevel@tonic-gate if (seg->s_ops == &segdev_ops) { 2727c478bd9Sstevel@tonic-gate /* 2737c478bd9Sstevel@tonic-gate * Device memory can behave strangely; invoke 2747c478bd9Sstevel@tonic-gate * a segdev-specific copy operation instead. 2757c478bd9Sstevel@tonic-gate */ 2767c478bd9Sstevel@tonic-gate if (writing) { 2777c478bd9Sstevel@tonic-gate if (segdev_copyto(seg, addr, buf, vaddr, len)) 2787c478bd9Sstevel@tonic-gate error = ENXIO; 2797c478bd9Sstevel@tonic-gate } else { 2807c478bd9Sstevel@tonic-gate if (segdev_copyfrom(seg, addr, vaddr, buf, len)) 2817c478bd9Sstevel@tonic-gate error = ENXIO; 2827c478bd9Sstevel@tonic-gate } 2837c478bd9Sstevel@tonic-gate } else { 2847c478bd9Sstevel@tonic-gate if (writing) 2857c478bd9Sstevel@tonic-gate bcopy(buf, vaddr, len); 2867c478bd9Sstevel@tonic-gate else 2877c478bd9Sstevel@tonic-gate bcopy(vaddr, buf, len); 2887c478bd9Sstevel@tonic-gate } 2897c478bd9Sstevel@tonic-gate } else { 2907c478bd9Sstevel@tonic-gate error = EIO; 2917c478bd9Sstevel@tonic-gate } 2927c478bd9Sstevel@tonic-gate no_trap(); 2937c478bd9Sstevel@tonic-gate 2947c478bd9Sstevel@tonic-gate /* 2957c478bd9Sstevel@tonic-gate * If we're writing to an executable page, we may need to sychronize 2967c478bd9Sstevel@tonic-gate * the I$ with the modifications we made through the D$. 2977c478bd9Sstevel@tonic-gate */ 2987c478bd9Sstevel@tonic-gate if (writing && (prot & PROT_EXEC)) 2997c478bd9Sstevel@tonic-gate sync_icache(vaddr, (uint_t)len); 3007c478bd9Sstevel@tonic-gate 3017c478bd9Sstevel@tonic-gate mapout(as, addr, vaddr, writing); 3027c478bd9Sstevel@tonic-gate 3037c478bd9Sstevel@tonic-gate if (rw == S_READ_NOCOW) 3047c478bd9Sstevel@tonic-gate rw = S_READ; 3057c478bd9Sstevel@tonic-gate 3067c478bd9Sstevel@tonic-gate (void) SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTUNLOCK, rw); 3077c478bd9Sstevel@tonic-gate 3087c478bd9Sstevel@tonic-gate if (protchanged) 3097c478bd9Sstevel@tonic-gate (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot); 3107c478bd9Sstevel@tonic-gate 3117c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 3127c478bd9Sstevel@tonic-gate 3137c478bd9Sstevel@tonic-gate return (error); 3147c478bd9Sstevel@tonic-gate } 3157c478bd9Sstevel@tonic-gate 3167c478bd9Sstevel@tonic-gate int 3177c478bd9Sstevel@tonic-gate uread(proc_t *p, void *buf, size_t len, uintptr_t a) 3187c478bd9Sstevel@tonic-gate { 3197c478bd9Sstevel@tonic-gate return (urw(p, 0, buf, len, a)); 3207c478bd9Sstevel@tonic-gate } 3217c478bd9Sstevel@tonic-gate 3227c478bd9Sstevel@tonic-gate int 3237c478bd9Sstevel@tonic-gate uwrite(proc_t *p, void *buf, size_t len, uintptr_t a) 3247c478bd9Sstevel@tonic-gate { 3257c478bd9Sstevel@tonic-gate return (urw(p, 1, buf, len, a)); 3267c478bd9Sstevel@tonic-gate } 327