1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 #include <sys/atomic.h>
30 #include <sys/errno.h>
31 #include <sys/stat.h>
32 #include <sys/modctl.h>
33 #include <sys/conf.h>
34 #include <sys/systm.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/cpuvar.h>
38 #include <sys/kmem.h>
39 #include <sys/strsubr.h>
40 #include <sys/sysmacros.h>
41 #include <sys/frame.h>
42 #include <sys/stack.h>
43 #include <sys/proc.h>
44 #include <sys/priv.h>
45 #include <sys/policy.h>
46 #include <sys/ontrap.h>
47 #include <sys/vmsystm.h>
48 #include <sys/prsystm.h>
49
50 #include <vm/as.h>
51 #include <vm/seg.h>
52 #include <vm/seg_dev.h>
53 #include <vm/seg_vn.h>
54 #include <vm/seg_spt.h>
55 #include <vm/seg_kmem.h>
56
57 extern struct seg_ops segdev_ops; /* needs a header file */
58 extern struct seg_ops segspt_shmops; /* needs a header file */
59
60 static int
page_valid(struct seg * seg,caddr_t addr)61 page_valid(struct seg *seg, caddr_t addr)
62 {
63 struct segvn_data *svd;
64 vnode_t *vp;
65 vattr_t vattr;
66
67 /*
68 * Fail if the page doesn't map to a page in the underlying
69 * mapped file, if an underlying mapped file exists.
70 */
71 vattr.va_mask = AT_SIZE;
72 if (seg->s_ops == &segvn_ops &&
73 SEGOP_GETVP(seg, addr, &vp) == 0 &&
74 vp != NULL && vp->v_type == VREG &&
75 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
76 u_offset_t size = roundup(vattr.va_size, (u_offset_t)PAGESIZE);
77 u_offset_t offset = SEGOP_GETOFFSET(seg, addr);
78
79 if (offset >= size)
80 return (0);
81 }
82
83 /*
84 * Fail if this is an ISM shared segment and the address is
85 * not within the real size of the spt segment that backs it.
86 */
87 if (seg->s_ops == &segspt_shmops &&
88 addr >= seg->s_base + spt_realsize(seg))
89 return (0);
90
91 /*
92 * Fail if the segment is mapped from /dev/null.
93 * The key is that the mapping comes from segdev and the
94 * type is neither MAP_SHARED nor MAP_PRIVATE.
95 */
96 if (seg->s_ops == &segdev_ops &&
97 ((SEGOP_GETTYPE(seg, addr) & (MAP_SHARED | MAP_PRIVATE)) == 0))
98 return (0);
99
100 /*
101 * Fail if the page is a MAP_NORESERVE page that has
102 * not actually materialized.
103 * We cheat by knowing that segvn is the only segment
104 * driver that supports MAP_NORESERVE.
105 */
106 if (seg->s_ops == &segvn_ops &&
107 (svd = (struct segvn_data *)seg->s_data) != NULL &&
108 (svd->vp == NULL || svd->vp->v_type != VREG) &&
109 (svd->flags & MAP_NORESERVE)) {
110 /*
111 * Guilty knowledge here. We know that
112 * segvn_incore returns more than just the
113 * low-order bit that indicates the page is
114 * actually in memory. If any bits are set,
115 * then there is backing store for the page.
116 */
117 char incore = 0;
118 (void) SEGOP_INCORE(seg, addr, PAGESIZE, &incore);
119 if (incore == 0)
120 return (0);
121 }
122 return (1);
123 }
124
125 /*
126 * Map address "addr" in address space "as" into a kernel virtual address.
127 * The memory is guaranteed to be resident and locked down.
128 */
129 static caddr_t
mapin(struct as * as,caddr_t addr,int writing)130 mapin(struct as *as, caddr_t addr, int writing)
131 {
132 page_t *pp;
133 caddr_t kaddr;
134 pfn_t pfnum;
135
136 /*
137 * NB: Because of past mistakes, we have bits being returned
138 * by getpfnum that are actually the page type bits of the pte.
139 * When the object we are trying to map is a memory page with
140 * a page structure everything is ok and we can use the optimal
141 * method, ppmapin. Otherwise, we have to do something special.
142 */
143 pfnum = hat_getpfnum(as->a_hat, addr);
144 if (pf_is_memory(pfnum)) {
145 pp = page_numtopp_nolock(pfnum);
146 if (pp != NULL) {
147 ASSERT(PAGE_LOCKED(pp));
148 kaddr = ppmapin(pp, writing ?
149 (PROT_READ | PROT_WRITE) : PROT_READ, (caddr_t)-1);
150 return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
151 }
152 }
153
154 /*
155 * Oh well, we didn't have a page struct for the object we were
156 * trying to map in; ppmapin doesn't handle devices, but allocating a
157 * heap address allows ppmapout to free virutal space when done.
158 */
159 kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
160
161 hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
162 writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK);
163
164 return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
165 }
166
167 /*ARGSUSED*/
168 static void
mapout(struct as * as,caddr_t addr,caddr_t vaddr,int writing)169 mapout(struct as *as, caddr_t addr, caddr_t vaddr, int writing)
170 {
171 vaddr = (caddr_t)(uintptr_t)((uintptr_t)vaddr & PAGEMASK);
172 ppmapout(vaddr);
173 }
174
175 /*
176 * Perform I/O to a given process. This will return EIO if we detect
177 * corrupt memory and ENXIO if there is no such mapped address in the
178 * user process's address space.
179 */
180 static int
urw(proc_t * p,int writing,void * buf,size_t len,uintptr_t a)181 urw(proc_t *p, int writing, void *buf, size_t len, uintptr_t a)
182 {
183 caddr_t addr = (caddr_t)a;
184 caddr_t page;
185 caddr_t vaddr;
186 struct seg *seg;
187 int error = 0;
188 int err = 0;
189 uint_t prot;
190 uint_t prot_rw = writing ? PROT_WRITE : PROT_READ;
191 int protchanged;
192 on_trap_data_t otd;
193 int retrycnt;
194 struct as *as = p->p_as;
195 enum seg_rw rw;
196
197 /*
198 * Locate segment containing address of interest.
199 */
200 page = (caddr_t)(uintptr_t)((uintptr_t)addr & PAGEMASK);
201 retrycnt = 0;
202 AS_LOCK_ENTER(as, RW_WRITER);
203 retry:
204 if ((seg = as_segat(as, page)) == NULL ||
205 !page_valid(seg, page)) {
206 AS_LOCK_EXIT(as);
207 return (ENXIO);
208 }
209 SEGOP_GETPROT(seg, page, 0, &prot);
210
211 protchanged = 0;
212 if ((prot & prot_rw) == 0) {
213 protchanged = 1;
214 err = SEGOP_SETPROT(seg, page, PAGESIZE, prot | prot_rw);
215
216 if (err == IE_RETRY) {
217 protchanged = 0;
218 ASSERT(retrycnt == 0);
219 retrycnt++;
220 goto retry;
221 }
222
223 if (err != 0) {
224 AS_LOCK_EXIT(as);
225 return (ENXIO);
226 }
227 }
228
229 /*
230 * segvn may do a copy-on-write for F_SOFTLOCK/S_READ case to break
231 * sharing to avoid a copy on write of a softlocked page by another
232 * thread. But since we locked the address space as a writer no other
233 * thread can cause a copy on write. S_READ_NOCOW is passed as the
234 * access type to tell segvn that it's ok not to do a copy-on-write
235 * for this SOFTLOCK fault.
236 */
237 if (writing)
238 rw = S_WRITE;
239 else if (seg->s_ops == &segvn_ops)
240 rw = S_READ_NOCOW;
241 else
242 rw = S_READ;
243
244 if (SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTLOCK, rw)) {
245 if (protchanged)
246 (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot);
247 AS_LOCK_EXIT(as);
248 return (ENXIO);
249 }
250 CPU_STATS_ADD_K(vm, softlock, 1);
251
252 /*
253 * Make sure we're not trying to read or write off the end of the page.
254 */
255 ASSERT(len <= page + PAGESIZE - addr);
256
257 /*
258 * Map in the locked page, copy to our local buffer,
259 * then map the page out and unlock it.
260 */
261 vaddr = mapin(as, addr, writing);
262
263 /*
264 * Since we are copying memory on behalf of the user process,
265 * protect against memory error correction faults.
266 */
267 if (!on_trap(&otd, OT_DATA_EC)) {
268 if (seg->s_ops == &segdev_ops) {
269 /*
270 * Device memory can behave strangely; invoke
271 * a segdev-specific copy operation instead.
272 */
273 if (writing) {
274 if (segdev_copyto(seg, addr, buf, vaddr, len))
275 error = ENXIO;
276 } else {
277 if (segdev_copyfrom(seg, addr, vaddr, buf, len))
278 error = ENXIO;
279 }
280 } else {
281 if (writing)
282 bcopy(buf, vaddr, len);
283 else
284 bcopy(vaddr, buf, len);
285 }
286 } else {
287 error = EIO;
288 }
289 no_trap();
290
291 /*
292 * If we're writing to an executable page, we may need to sychronize
293 * the I$ with the modifications we made through the D$.
294 */
295 if (writing && (prot & PROT_EXEC))
296 sync_icache(vaddr, (uint_t)len);
297
298 mapout(as, addr, vaddr, writing);
299
300 if (rw == S_READ_NOCOW)
301 rw = S_READ;
302
303 (void) SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTUNLOCK, rw);
304
305 if (protchanged)
306 (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot);
307
308 AS_LOCK_EXIT(as);
309
310 return (error);
311 }
312
313 int
uread(proc_t * p,void * buf,size_t len,uintptr_t a)314 uread(proc_t *p, void *buf, size_t len, uintptr_t a)
315 {
316 return (urw(p, 0, buf, len, a));
317 }
318
319 int
uwrite(proc_t * p,void * buf,size_t len,uintptr_t a)320 uwrite(proc_t *p, void *buf, size_t len, uintptr_t a)
321 {
322 return (urw(p, 1, buf, len, a));
323 }
324