xref: /titanic_50/usr/src/uts/common/os/urw.c (revision f6e214c7418f43af38bd8c3a557e3d0a1d311cfa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /*	  All Rights Reserved   */
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #include <sys/atomic.h>
32 #include <sys/errno.h>
33 #include <sys/stat.h>
34 #include <sys/modctl.h>
35 #include <sys/conf.h>
36 #include <sys/systm.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/cpuvar.h>
40 #include <sys/kmem.h>
41 #include <sys/strsubr.h>
42 #include <sys/sysmacros.h>
43 #include <sys/frame.h>
44 #include <sys/stack.h>
45 #include <sys/proc.h>
46 #include <sys/priv.h>
47 #include <sys/policy.h>
48 #include <sys/ontrap.h>
49 #include <sys/vmsystm.h>
50 #include <sys/prsystm.h>
51 
52 #include <vm/as.h>
53 #include <vm/seg.h>
54 #include <vm/seg_dev.h>
55 #include <vm/seg_vn.h>
56 #include <vm/seg_spt.h>
57 #include <vm/seg_kmem.h>
58 
59 extern struct seg_ops segdev_ops;	/* needs a header file */
60 extern struct seg_ops segspt_shmops;	/* needs a header file */
61 
62 static int
63 page_valid(struct seg *seg, caddr_t addr)
64 {
65 	struct segvn_data *svd;
66 	vnode_t *vp;
67 	vattr_t vattr;
68 
69 	/*
70 	 * Fail if the page doesn't map to a page in the underlying
71 	 * mapped file, if an underlying mapped file exists.
72 	 */
73 	vattr.va_mask = AT_SIZE;
74 	if (seg->s_ops == &segvn_ops &&
75 	    SEGOP_GETVP(seg, addr, &vp) == 0 &&
76 	    vp != NULL && vp->v_type == VREG &&
77 	    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
78 		u_offset_t size = roundup(vattr.va_size, (u_offset_t)PAGESIZE);
79 		u_offset_t offset = SEGOP_GETOFFSET(seg, addr);
80 
81 		if (offset >= size)
82 			return (0);
83 	}
84 
85 	/*
86 	 * Fail if this is an ISM shared segment and the address is
87 	 * not within the real size of the spt segment that backs it.
88 	 */
89 	if (seg->s_ops == &segspt_shmops &&
90 	    addr >= seg->s_base + spt_realsize(seg))
91 		return (0);
92 
93 	/*
94 	 * Fail if the segment is mapped from /dev/null.
95 	 * The key is that the mapping comes from segdev and the
96 	 * type is neither MAP_SHARED nor MAP_PRIVATE.
97 	 */
98 	if (seg->s_ops == &segdev_ops &&
99 	    ((SEGOP_GETTYPE(seg, addr) & (MAP_SHARED | MAP_PRIVATE)) == 0))
100 		return (0);
101 
102 	/*
103 	 * Fail if the page is a MAP_NORESERVE page that has
104 	 * not actually materialized.
105 	 * We cheat by knowing that segvn is the only segment
106 	 * driver that supports MAP_NORESERVE.
107 	 */
108 	if (seg->s_ops == &segvn_ops &&
109 	    (svd = (struct segvn_data *)seg->s_data) != NULL &&
110 	    (svd->vp == NULL || svd->vp->v_type != VREG) &&
111 	    (svd->flags & MAP_NORESERVE)) {
112 		/*
113 		 * Guilty knowledge here.  We know that
114 		 * segvn_incore returns more than just the
115 		 * low-order bit that indicates the page is
116 		 * actually in memory.  If any bits are set,
117 		 * then there is backing store for the page.
118 		 */
119 		char incore = 0;
120 		(void) SEGOP_INCORE(seg, addr, PAGESIZE, &incore);
121 		if (incore == 0)
122 			return (0);
123 	}
124 	return (1);
125 }
126 
127 /*
128  * Map address "addr" in address space "as" into a kernel virtual address.
129  * The memory is guaranteed to be resident and locked down.
130  */
131 static caddr_t
132 mapin(struct as *as, caddr_t addr, int writing)
133 {
134 	page_t *pp;
135 	caddr_t kaddr;
136 	pfn_t pfnum;
137 
138 	/*
139 	 * NB: Because of past mistakes, we have bits being returned
140 	 * by getpfnum that are actually the page type bits of the pte.
141 	 * When the object we are trying to map is a memory page with
142 	 * a page structure everything is ok and we can use the optimal
143 	 * method, ppmapin.  Otherwise, we have to do something special.
144 	 */
145 	pfnum = hat_getpfnum(as->a_hat, addr);
146 	if (pf_is_memory(pfnum)) {
147 		pp = page_numtopp_nolock(pfnum);
148 		if (pp != NULL) {
149 			ASSERT(PAGE_LOCKED(pp));
150 			kaddr = ppmapin(pp, writing ?
151 				(PROT_READ | PROT_WRITE) : PROT_READ,
152 				(caddr_t)-1);
153 			return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
154 		}
155 	}
156 
157 	/*
158 	 * Oh well, we didn't have a page struct for the object we were
159 	 * trying to map in; ppmapin doesn't handle devices, but allocating a
160 	 * heap address allows ppmapout to free virutal space when done.
161 	 */
162 	kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
163 
164 	hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
165 		writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK);
166 
167 	return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
168 }
169 
170 /*ARGSUSED*/
171 static void
172 mapout(struct as *as, caddr_t addr, caddr_t vaddr, int writing)
173 {
174 	vaddr = (caddr_t)(uintptr_t)((uintptr_t)vaddr & PAGEMASK);
175 	ppmapout(vaddr);
176 }
177 
178 /*
179  * Perform I/O to a given process. This will return EIO if we detect
180  * corrupt memory and ENXIO if there is no such mapped address in the
181  * user process's address space.
182  */
183 static int
184 urw(proc_t *p, int writing, void *buf, size_t len, uintptr_t a)
185 {
186 	caddr_t addr = (caddr_t)a;
187 	caddr_t page;
188 	caddr_t vaddr;
189 	struct seg *seg;
190 	int error = 0;
191 	int err = 0;
192 	uint_t prot;
193 	uint_t prot_rw = writing ? PROT_WRITE : PROT_READ;
194 	int protchanged;
195 	on_trap_data_t otd;
196 	int retrycnt;
197 	struct as *as = p->p_as;
198 	enum seg_rw rw;
199 
200 	/*
201 	 * Locate segment containing address of interest.
202 	 */
203 	page = (caddr_t)(uintptr_t)((uintptr_t)addr & PAGEMASK);
204 	retrycnt = 0;
205 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
206 retry:
207 	if ((seg = as_segat(as, page)) == NULL ||
208 	    !page_valid(seg, page)) {
209 		AS_LOCK_EXIT(as, &as->a_lock);
210 		return (ENXIO);
211 	}
212 	SEGOP_GETPROT(seg, page, 0, &prot);
213 
214 	protchanged = 0;
215 	if ((prot & prot_rw) == 0) {
216 		protchanged = 1;
217 		err = SEGOP_SETPROT(seg, page, PAGESIZE, prot | prot_rw);
218 
219 		if (err == IE_RETRY) {
220 			protchanged = 0;
221 			ASSERT(retrycnt == 0);
222 			retrycnt++;
223 			goto retry;
224 		}
225 
226 		if (err != 0) {
227 			AS_LOCK_EXIT(as, &as->a_lock);
228 			return (ENXIO);
229 		}
230 	}
231 
232 	/*
233 	 * segvn may do a copy-on-write for F_SOFTLOCK/S_READ case to break
234 	 * sharing to avoid a copy on write of a softlocked page by another
235 	 * thread. But since we locked the address space as a writer no other
236 	 * thread can cause a copy on write. S_READ_NOCOW is passed as the
237 	 * access type to tell segvn that it's ok not to do a copy-on-write
238 	 * for this SOFTLOCK fault.
239 	 */
240 	if (writing)
241 		rw = S_WRITE;
242 	else if (seg->s_ops == &segvn_ops)
243 		rw = S_READ_NOCOW;
244 	else
245 		rw = S_READ;
246 
247 	if (SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTLOCK, rw)) {
248 		if (protchanged)
249 			(void) SEGOP_SETPROT(seg, page, PAGESIZE, prot);
250 		AS_LOCK_EXIT(as, &as->a_lock);
251 		return (ENXIO);
252 	}
253 	CPU_STATS_ADD_K(vm, softlock, 1);
254 
255 	/*
256 	 * Make sure we're not trying to read or write off the end of the page.
257 	 */
258 	ASSERT(len <= page + PAGESIZE - addr);
259 
260 	/*
261 	 * Map in the locked page, copy to our local buffer,
262 	 * then map the page out and unlock it.
263 	 */
264 	vaddr = mapin(as, addr, writing);
265 
266 	/*
267 	 * Since we are copying memory on behalf of the user process,
268 	 * protect against memory error correction faults.
269 	 */
270 	if (!on_trap(&otd, OT_DATA_EC)) {
271 		if (seg->s_ops == &segdev_ops) {
272 			/*
273 			 * Device memory can behave strangely; invoke
274 			 * a segdev-specific copy operation instead.
275 			 */
276 			if (writing) {
277 				if (segdev_copyto(seg, addr, buf, vaddr, len))
278 					error = ENXIO;
279 			} else {
280 				if (segdev_copyfrom(seg, addr, vaddr, buf, len))
281 					error = ENXIO;
282 			}
283 		} else {
284 			if (writing)
285 				bcopy(buf, vaddr, len);
286 			else
287 				bcopy(vaddr, buf, len);
288 		}
289 	} else {
290 		error = EIO;
291 	}
292 	no_trap();
293 
294 	/*
295 	 * If we're writing to an executable page, we may need to sychronize
296 	 * the I$ with the modifications we made through the D$.
297 	 */
298 	if (writing && (prot & PROT_EXEC))
299 		sync_icache(vaddr, (uint_t)len);
300 
301 	mapout(as, addr, vaddr, writing);
302 
303 	if (rw == S_READ_NOCOW)
304 		rw = S_READ;
305 
306 	(void) SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTUNLOCK, rw);
307 
308 	if (protchanged)
309 		(void) SEGOP_SETPROT(seg, page, PAGESIZE, prot);
310 
311 	AS_LOCK_EXIT(as, &as->a_lock);
312 
313 	return (error);
314 }
315 
316 int
317 uread(proc_t *p, void *buf, size_t len, uintptr_t a)
318 {
319 	return (urw(p, 0, buf, len, a));
320 }
321 
322 int
323 uwrite(proc_t *p, void *buf, size_t len, uintptr_t a)
324 {
325 	return (urw(p, 1, buf, len, a));
326 }
327