1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/atomic.h> 33 #include <sys/errno.h> 34 #include <sys/stat.h> 35 #include <sys/modctl.h> 36 #include <sys/conf.h> 37 #include <sys/systm.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/cpuvar.h> 41 #include <sys/kmem.h> 42 #include <sys/strsubr.h> 43 #include <sys/sysmacros.h> 44 #include <sys/frame.h> 45 #include <sys/stack.h> 46 #include <sys/proc.h> 47 #include <sys/priv.h> 48 #include <sys/policy.h> 49 #include <sys/ontrap.h> 50 #include <sys/vmsystm.h> 51 #include <sys/prsystm.h> 52 53 #include <vm/as.h> 54 #include <vm/seg.h> 55 #include <vm/seg_dev.h> 56 #include <vm/seg_vn.h> 57 #include <vm/seg_spt.h> 58 #include <vm/seg_kmem.h> 59 60 extern struct seg_ops segdev_ops; /* needs a header file */ 61 extern struct seg_ops segspt_shmops; /* needs a header file */ 62 63 static int 64 page_valid(struct seg *seg, caddr_t addr) 65 { 66 struct segvn_data *svd; 67 vnode_t *vp; 68 vattr_t vattr; 69 70 /* 71 * Fail if the page doesn't map to a page in the underlying 72 * mapped file, if an underlying mapped file exists. 73 */ 74 vattr.va_mask = AT_SIZE; 75 if (seg->s_ops == &segvn_ops && 76 SEGOP_GETVP(seg, addr, &vp) == 0 && 77 vp != NULL && vp->v_type == VREG && 78 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 79 u_offset_t size = roundup(vattr.va_size, (u_offset_t)PAGESIZE); 80 u_offset_t offset = SEGOP_GETOFFSET(seg, addr); 81 82 if (offset >= size) 83 return (0); 84 } 85 86 /* 87 * Fail if this is an ISM shared segment and the address is 88 * not within the real size of the spt segment that backs it. 89 */ 90 if (seg->s_ops == &segspt_shmops && 91 addr >= seg->s_base + spt_realsize(seg)) 92 return (0); 93 94 /* 95 * Fail if the segment is mapped from /dev/null. 96 * The key is that the mapping comes from segdev and the 97 * type is neither MAP_SHARED nor MAP_PRIVATE. 98 */ 99 if (seg->s_ops == &segdev_ops && 100 ((SEGOP_GETTYPE(seg, addr) & (MAP_SHARED | MAP_PRIVATE)) == 0)) 101 return (0); 102 103 /* 104 * Fail if the page is a MAP_NORESERVE page that has 105 * not actually materialized. 106 * We cheat by knowing that segvn is the only segment 107 * driver that supports MAP_NORESERVE. 108 */ 109 if (seg->s_ops == &segvn_ops && 110 (svd = (struct segvn_data *)seg->s_data) != NULL && 111 (svd->vp == NULL || svd->vp->v_type != VREG) && 112 (svd->flags & MAP_NORESERVE)) { 113 /* 114 * Guilty knowledge here. We know that 115 * segvn_incore returns more than just the 116 * low-order bit that indicates the page is 117 * actually in memory. If any bits are set, 118 * then there is backing store for the page. 119 */ 120 char incore = 0; 121 (void) SEGOP_INCORE(seg, addr, PAGESIZE, &incore); 122 if (incore == 0) 123 return (0); 124 } 125 return (1); 126 } 127 128 /* 129 * Map address "addr" in address space "as" into a kernel virtual address. 130 * The memory is guaranteed to be resident and locked down. 131 */ 132 static caddr_t 133 mapin(struct as *as, caddr_t addr, int writing) 134 { 135 page_t *pp; 136 caddr_t kaddr; 137 pfn_t pfnum; 138 139 /* 140 * NB: Because of past mistakes, we have bits being returned 141 * by getpfnum that are actually the page type bits of the pte. 142 * When the object we are trying to map is a memory page with 143 * a page structure everything is ok and we can use the optimal 144 * method, ppmapin. Otherwise, we have to do something special. 145 */ 146 pfnum = hat_getpfnum(as->a_hat, addr); 147 if (pf_is_memory(pfnum)) { 148 pp = page_numtopp_nolock(pfnum); 149 if (pp != NULL) { 150 ASSERT(PAGE_LOCKED(pp)); 151 kaddr = ppmapin(pp, writing ? 152 (PROT_READ | PROT_WRITE) : PROT_READ, 153 (caddr_t)-1); 154 return (kaddr + ((uintptr_t)addr & PAGEOFFSET)); 155 } 156 } 157 158 /* 159 * Oh well, we didn't have a page struct for the object we were 160 * trying to map in; ppmapin doesn't handle devices, but allocating a 161 * heap address allows ppmapout to free virutal space when done. 162 */ 163 kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 164 165 hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum, 166 writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK); 167 168 return (kaddr + ((uintptr_t)addr & PAGEOFFSET)); 169 } 170 171 /*ARGSUSED*/ 172 static void 173 mapout(struct as *as, caddr_t addr, caddr_t vaddr, int writing) 174 { 175 vaddr = (caddr_t)(uintptr_t)((uintptr_t)vaddr & PAGEMASK); 176 ppmapout(vaddr); 177 } 178 179 /* 180 * Perform I/O to a given process. This will return EIO if we dectect 181 * corrupt memory and ENXIO if there is no such mapped address in the 182 * user process's address space. 183 */ 184 static int 185 urw(proc_t *p, int writing, void *buf, size_t len, uintptr_t a) 186 { 187 caddr_t addr = (caddr_t)a; 188 caddr_t page; 189 caddr_t vaddr; 190 struct seg *seg; 191 int error = 0; 192 int err = 0; 193 uint_t prot; 194 uint_t prot_rw = writing ? PROT_WRITE : PROT_READ; 195 int protchanged; 196 on_trap_data_t otd; 197 int retrycnt; 198 struct as *as = p->p_as; 199 enum seg_rw rw; 200 201 /* 202 * Locate segment containing address of interest. 203 */ 204 page = (caddr_t)(uintptr_t)((uintptr_t)addr & PAGEMASK); 205 retrycnt = 0; 206 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 207 retry: 208 if ((seg = as_segat(as, page)) == NULL || 209 !page_valid(seg, page)) { 210 AS_LOCK_EXIT(as, &as->a_lock); 211 return (ENXIO); 212 } 213 SEGOP_GETPROT(seg, page, 0, &prot); 214 215 protchanged = 0; 216 if ((prot & prot_rw) == 0) { 217 protchanged = 1; 218 err = SEGOP_SETPROT(seg, page, PAGESIZE, prot | prot_rw); 219 220 if (err == IE_RETRY) { 221 protchanged = 0; 222 ASSERT(retrycnt == 0); 223 retrycnt++; 224 goto retry; 225 } 226 227 if (err != 0) { 228 AS_LOCK_EXIT(as, &as->a_lock); 229 return (ENXIO); 230 } 231 } 232 233 /* 234 * segvn may do a copy-on-write for F_SOFTLOCK/S_READ case to break 235 * sharing to avoid a copy on write of a softlocked page by another 236 * thread. But since we locked the address space as a writer no other 237 * thread can cause a copy on write. S_READ_NOCOW is passed as the 238 * access type to tell segvn that it's ok not to do a copy-on-write 239 * for this SOFTLOCK fault. 240 */ 241 if (writing) 242 rw = S_WRITE; 243 else if (seg->s_ops == &segvn_ops) 244 rw = S_READ_NOCOW; 245 else 246 rw = S_READ; 247 248 if (SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTLOCK, rw)) { 249 if (protchanged) 250 (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot); 251 AS_LOCK_EXIT(as, &as->a_lock); 252 return (ENXIO); 253 } 254 CPU_STATS_ADD_K(vm, softlock, 1); 255 256 /* 257 * Make sure we're not trying to read or write off the end of the page. 258 */ 259 ASSERT(len <= page + PAGESIZE - addr); 260 261 /* 262 * Map in the locked page, copy to our local buffer, 263 * then map the page out and unlock it. 264 */ 265 vaddr = mapin(as, addr, writing); 266 267 /* 268 * Since we are copying memory on behalf of the user process, 269 * protect against memory error correction faults. 270 */ 271 if (!on_trap(&otd, OT_DATA_EC)) { 272 if (seg->s_ops == &segdev_ops) { 273 /* 274 * Device memory can behave strangely; invoke 275 * a segdev-specific copy operation instead. 276 */ 277 if (writing) { 278 if (segdev_copyto(seg, addr, buf, vaddr, len)) 279 error = ENXIO; 280 } else { 281 if (segdev_copyfrom(seg, addr, vaddr, buf, len)) 282 error = ENXIO; 283 } 284 } else { 285 if (writing) 286 bcopy(buf, vaddr, len); 287 else 288 bcopy(vaddr, buf, len); 289 } 290 } else { 291 error = EIO; 292 } 293 no_trap(); 294 295 /* 296 * If we're writing to an executable page, we may need to sychronize 297 * the I$ with the modifications we made through the D$. 298 */ 299 if (writing && (prot & PROT_EXEC)) 300 sync_icache(vaddr, (uint_t)len); 301 302 mapout(as, addr, vaddr, writing); 303 304 if (rw == S_READ_NOCOW) 305 rw = S_READ; 306 307 (void) SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTUNLOCK, rw); 308 309 if (protchanged) 310 (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot); 311 312 AS_LOCK_EXIT(as, &as->a_lock); 313 314 return (error); 315 } 316 317 int 318 uread(proc_t *p, void *buf, size_t len, uintptr_t a) 319 { 320 return (urw(p, 0, buf, len, a)); 321 } 322 323 int 324 uwrite(proc_t *p, void *buf, size_t len, uintptr_t a) 325 { 326 return (urw(p, 1, buf, len, a)); 327 } 328