1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/atomic.h> 32 #include <sys/errno.h> 33 #include <sys/stat.h> 34 #include <sys/modctl.h> 35 #include <sys/conf.h> 36 #include <sys/systm.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/cpuvar.h> 40 #include <sys/kmem.h> 41 #include <sys/strsubr.h> 42 #include <sys/sysmacros.h> 43 #include <sys/frame.h> 44 #include <sys/stack.h> 45 #include <sys/proc.h> 46 #include <sys/priv.h> 47 #include <sys/policy.h> 48 #include <sys/ontrap.h> 49 #include <sys/vmsystm.h> 50 #include <sys/prsystm.h> 51 52 #include <vm/as.h> 53 #include <vm/seg.h> 54 #include <vm/seg_dev.h> 55 #include <vm/seg_vn.h> 56 #include <vm/seg_spt.h> 57 #include <vm/seg_kmem.h> 58 59 extern struct seg_ops segdev_ops; /* needs a header file */ 60 extern struct seg_ops segspt_shmops; /* needs a header file */ 61 62 static int 63 page_valid(struct seg *seg, caddr_t addr) 64 { 65 struct segvn_data *svd; 66 vnode_t *vp; 67 vattr_t vattr; 68 69 /* 70 * Fail if the page doesn't map to a page in the underlying 71 * mapped file, if an underlying mapped file exists. 72 */ 73 vattr.va_mask = AT_SIZE; 74 if (seg->s_ops == &segvn_ops && 75 SEGOP_GETVP(seg, addr, &vp) == 0 && 76 vp != NULL && vp->v_type == VREG && 77 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 78 u_offset_t size = roundup(vattr.va_size, (u_offset_t)PAGESIZE); 79 u_offset_t offset = SEGOP_GETOFFSET(seg, addr); 80 81 if (offset >= size) 82 return (0); 83 } 84 85 /* 86 * Fail if this is an ISM shared segment and the address is 87 * not within the real size of the spt segment that backs it. 88 */ 89 if (seg->s_ops == &segspt_shmops && 90 addr >= seg->s_base + spt_realsize(seg)) 91 return (0); 92 93 /* 94 * Fail if the segment is mapped from /dev/null. 95 * The key is that the mapping comes from segdev and the 96 * type is neither MAP_SHARED nor MAP_PRIVATE. 97 */ 98 if (seg->s_ops == &segdev_ops && 99 ((SEGOP_GETTYPE(seg, addr) & (MAP_SHARED | MAP_PRIVATE)) == 0)) 100 return (0); 101 102 /* 103 * Fail if the page is a MAP_NORESERVE page that has 104 * not actually materialized. 105 * We cheat by knowing that segvn is the only segment 106 * driver that supports MAP_NORESERVE. 107 */ 108 if (seg->s_ops == &segvn_ops && 109 (svd = (struct segvn_data *)seg->s_data) != NULL && 110 (svd->vp == NULL || svd->vp->v_type != VREG) && 111 (svd->flags & MAP_NORESERVE)) { 112 /* 113 * Guilty knowledge here. We know that 114 * segvn_incore returns more than just the 115 * low-order bit that indicates the page is 116 * actually in memory. If any bits are set, 117 * then there is backing store for the page. 118 */ 119 char incore = 0; 120 (void) SEGOP_INCORE(seg, addr, PAGESIZE, &incore); 121 if (incore == 0) 122 return (0); 123 } 124 return (1); 125 } 126 127 /* 128 * Map address "addr" in address space "as" into a kernel virtual address. 129 * The memory is guaranteed to be resident and locked down. 130 */ 131 static caddr_t 132 mapin(struct as *as, caddr_t addr, int writing) 133 { 134 page_t *pp; 135 caddr_t kaddr; 136 pfn_t pfnum; 137 138 /* 139 * NB: Because of past mistakes, we have bits being returned 140 * by getpfnum that are actually the page type bits of the pte. 141 * When the object we are trying to map is a memory page with 142 * a page structure everything is ok and we can use the optimal 143 * method, ppmapin. Otherwise, we have to do something special. 144 */ 145 pfnum = hat_getpfnum(as->a_hat, addr); 146 if (pf_is_memory(pfnum)) { 147 pp = page_numtopp_nolock(pfnum); 148 if (pp != NULL) { 149 ASSERT(PAGE_LOCKED(pp)); 150 kaddr = ppmapin(pp, writing ? 151 (PROT_READ | PROT_WRITE) : PROT_READ, 152 (caddr_t)-1); 153 return (kaddr + ((uintptr_t)addr & PAGEOFFSET)); 154 } 155 } 156 157 /* 158 * Oh well, we didn't have a page struct for the object we were 159 * trying to map in; ppmapin doesn't handle devices, but allocating a 160 * heap address allows ppmapout to free virutal space when done. 161 */ 162 kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 163 164 hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum, 165 writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK); 166 167 return (kaddr + ((uintptr_t)addr & PAGEOFFSET)); 168 } 169 170 /*ARGSUSED*/ 171 static void 172 mapout(struct as *as, caddr_t addr, caddr_t vaddr, int writing) 173 { 174 vaddr = (caddr_t)(uintptr_t)((uintptr_t)vaddr & PAGEMASK); 175 ppmapout(vaddr); 176 } 177 178 /* 179 * Perform I/O to a given process. This will return EIO if we detect 180 * corrupt memory and ENXIO if there is no such mapped address in the 181 * user process's address space. 182 */ 183 static int 184 urw(proc_t *p, int writing, void *buf, size_t len, uintptr_t a) 185 { 186 caddr_t addr = (caddr_t)a; 187 caddr_t page; 188 caddr_t vaddr; 189 struct seg *seg; 190 int error = 0; 191 int err = 0; 192 uint_t prot; 193 uint_t prot_rw = writing ? PROT_WRITE : PROT_READ; 194 int protchanged; 195 on_trap_data_t otd; 196 int retrycnt; 197 struct as *as = p->p_as; 198 enum seg_rw rw; 199 200 /* 201 * Locate segment containing address of interest. 202 */ 203 page = (caddr_t)(uintptr_t)((uintptr_t)addr & PAGEMASK); 204 retrycnt = 0; 205 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 206 retry: 207 if ((seg = as_segat(as, page)) == NULL || 208 !page_valid(seg, page)) { 209 AS_LOCK_EXIT(as, &as->a_lock); 210 return (ENXIO); 211 } 212 SEGOP_GETPROT(seg, page, 0, &prot); 213 214 protchanged = 0; 215 if ((prot & prot_rw) == 0) { 216 protchanged = 1; 217 err = SEGOP_SETPROT(seg, page, PAGESIZE, prot | prot_rw); 218 219 if (err == IE_RETRY) { 220 protchanged = 0; 221 ASSERT(retrycnt == 0); 222 retrycnt++; 223 goto retry; 224 } 225 226 if (err != 0) { 227 AS_LOCK_EXIT(as, &as->a_lock); 228 return (ENXIO); 229 } 230 } 231 232 /* 233 * segvn may do a copy-on-write for F_SOFTLOCK/S_READ case to break 234 * sharing to avoid a copy on write of a softlocked page by another 235 * thread. But since we locked the address space as a writer no other 236 * thread can cause a copy on write. S_READ_NOCOW is passed as the 237 * access type to tell segvn that it's ok not to do a copy-on-write 238 * for this SOFTLOCK fault. 239 */ 240 if (writing) 241 rw = S_WRITE; 242 else if (seg->s_ops == &segvn_ops) 243 rw = S_READ_NOCOW; 244 else 245 rw = S_READ; 246 247 if (SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTLOCK, rw)) { 248 if (protchanged) 249 (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot); 250 AS_LOCK_EXIT(as, &as->a_lock); 251 return (ENXIO); 252 } 253 CPU_STATS_ADD_K(vm, softlock, 1); 254 255 /* 256 * Make sure we're not trying to read or write off the end of the page. 257 */ 258 ASSERT(len <= page + PAGESIZE - addr); 259 260 /* 261 * Map in the locked page, copy to our local buffer, 262 * then map the page out and unlock it. 263 */ 264 vaddr = mapin(as, addr, writing); 265 266 /* 267 * Since we are copying memory on behalf of the user process, 268 * protect against memory error correction faults. 269 */ 270 if (!on_trap(&otd, OT_DATA_EC)) { 271 if (seg->s_ops == &segdev_ops) { 272 /* 273 * Device memory can behave strangely; invoke 274 * a segdev-specific copy operation instead. 275 */ 276 if (writing) { 277 if (segdev_copyto(seg, addr, buf, vaddr, len)) 278 error = ENXIO; 279 } else { 280 if (segdev_copyfrom(seg, addr, vaddr, buf, len)) 281 error = ENXIO; 282 } 283 } else { 284 if (writing) 285 bcopy(buf, vaddr, len); 286 else 287 bcopy(vaddr, buf, len); 288 } 289 } else { 290 error = EIO; 291 } 292 no_trap(); 293 294 /* 295 * If we're writing to an executable page, we may need to sychronize 296 * the I$ with the modifications we made through the D$. 297 */ 298 if (writing && (prot & PROT_EXEC)) 299 sync_icache(vaddr, (uint_t)len); 300 301 mapout(as, addr, vaddr, writing); 302 303 if (rw == S_READ_NOCOW) 304 rw = S_READ; 305 306 (void) SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTUNLOCK, rw); 307 308 if (protchanged) 309 (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot); 310 311 AS_LOCK_EXIT(as, &as->a_lock); 312 313 return (error); 314 } 315 316 int 317 uread(proc_t *p, void *buf, size_t len, uintptr_t a) 318 { 319 return (urw(p, 0, buf, len, a)); 320 } 321 322 int 323 uwrite(proc_t *p, void *buf, size_t len, uintptr_t a) 324 { 325 return (urw(p, 1, buf, len, a)); 326 } 327