xref: /titanic_41/usr/src/uts/common/vm/seg_umap.c (revision dd7afb26c5036958cddc0c2c1e499571664a6ed0)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2016 Joyent, Inc.
14  */
15 
16 /*
17  * VM - Kernel-to-user mapping segment
18  *
19  * The umap segment driver was primarily designed to facilitate the comm page:
20  * a portion of kernel memory shared with userspace so that certain (namely
21  * clock-related) actions could operate without making an expensive trip into
22  * the kernel.
23  *
24  * Since the initial requirements for the comm page are slim, advanced features
25  * of the segment driver such as per-page protection have been left
26  * unimplemented at this time.
27  */
28 
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/errno.h>
33 #include <sys/cred.h>
34 #include <sys/kmem.h>
35 #include <sys/lgrp.h>
36 #include <sys/mman.h>
37 
38 #include <vm/hat.h>
39 #include <vm/as.h>
40 #include <vm/seg.h>
41 #include <vm/seg_kmem.h>
42 #include <vm/seg_umap.h>
43 
44 
45 static boolean_t segumap_verify_safe(caddr_t, size_t);
46 static int segumap_dup(struct seg *, struct seg *);
47 static int segumap_unmap(struct seg *, caddr_t, size_t);
48 static void segumap_free(struct seg *);
49 static faultcode_t segumap_fault(struct hat *, struct seg *, caddr_t, size_t,
50     enum fault_type, enum seg_rw);
51 static faultcode_t segumap_faulta(struct seg *, caddr_t);
52 static int segumap_setprot(struct seg *, caddr_t, size_t, uint_t);
53 static int segumap_checkprot(struct seg *, caddr_t, size_t, uint_t);
54 static int segumap_sync(struct seg *, caddr_t, size_t, int, uint_t);
55 static size_t segumap_incore(struct seg *, caddr_t, size_t, char *);
56 static int segumap_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *,
57     size_t);
58 static int segumap_getprot(struct seg *, caddr_t, size_t, uint_t *);
59 static u_offset_t segumap_getoffset(struct seg *, caddr_t);
60 static int segumap_gettype(struct seg *, caddr_t);
61 static int segumap_getvp(struct seg *, caddr_t, struct vnode **);
62 static int segumap_advise(struct seg *, caddr_t, size_t, uint_t);
63 static void segumap_dump(struct seg *);
64 static int segumap_pagelock(struct seg *, caddr_t, size_t, struct page ***,
65     enum lock_type, enum seg_rw);
66 static int segumap_setpagesize(struct seg *, caddr_t, size_t, uint_t);
67 static int segumap_getmemid(struct seg *, caddr_t, memid_t *);
68 static int segumap_capable(struct seg *, segcapability_t);
69 
70 static struct seg_ops segumap_ops = {
71 	segumap_dup,
72 	segumap_unmap,
73 	segumap_free,
74 	segumap_fault,
75 	segumap_faulta,
76 	segumap_setprot,
77 	segumap_checkprot,
78 	NULL,			/* kluster: disabled */
79 	NULL,			/* swapout: disabled */
80 	segumap_sync,
81 	segumap_incore,
82 	segumap_lockop,
83 	segumap_getprot,
84 	segumap_getoffset,
85 	segumap_gettype,
86 	segumap_getvp,
87 	segumap_advise,
88 	segumap_dump,
89 	segumap_pagelock,
90 	segumap_setpagesize,
91 	segumap_getmemid,
92 	NULL,			/* getpolicy: disabled */
93 	segumap_capable,
94 	seg_inherit_notsup
95 };
96 
97 
98 /*
99  * Create a kernel/user-mapped segment.
100  */
101 int
segumap_create(struct seg * seg,void * argsp)102 segumap_create(struct seg *seg, void *argsp)
103 {
104 	segumap_crargs_t *a = (struct segumap_crargs *)argsp;
105 	segumap_data_t *data;
106 
107 	ASSERT((uintptr_t)a->kaddr > _userlimit);
108 
109 	/*
110 	 * Check several aspects of the mapping request to ensure validity:
111 	 * - kernel pages must reside entirely in kernel space
112 	 * - target protection must be user-accessible
113 	 * - kernel address must be page-aligned
114 	 * - kernel address must reside inside a "safe" segment
115 	 */
116 	if ((uintptr_t)a->kaddr <= _userlimit ||
117 	    ((uintptr_t)a->kaddr + seg->s_size) < (uintptr_t)a->kaddr ||
118 	    (a->prot & PROT_USER) == 0 ||
119 	    ((uintptr_t)a->kaddr & PAGEOFFSET) != 0 ||
120 	    !segumap_verify_safe(a->kaddr, seg->s_size)) {
121 		return (EINVAL);
122 	}
123 
124 	data = kmem_zalloc(sizeof (*data), KM_SLEEP);
125 	rw_init(&data->sud_lock, NULL, RW_DEFAULT, NULL);
126 	data->sud_kaddr = a->kaddr;
127 	data->sud_prot = a->prot;
128 
129 	seg->s_ops = &segumap_ops;
130 	seg->s_data = data;
131 	return (0);
132 }
133 
134 static boolean_t
segumap_verify_safe(caddr_t kaddr,size_t len)135 segumap_verify_safe(caddr_t kaddr, size_t len)
136 {
137 	struct seg *seg;
138 
139 	/*
140 	 * Presently, only pages which are backed by segkmem are allowed to be
141 	 * shared with userspace.  This prevents nasty paging behavior with
142 	 * other drivers such as seg_kp.  Furthermore, the backing kernel
143 	 * segment must completely contain the region to be mapped.
144 	 *
145 	 * Failing these checks is fatal for now since such mappings are done
146 	 * in a very limited context from the kernel.
147 	 */
148 	AS_LOCK_ENTER(&kas, RW_READER);
149 	seg = as_segat(&kas, kaddr);
150 	VERIFY(seg != NULL);
151 	VERIFY(seg->s_base + seg->s_size >= kaddr + len);
152 	VERIFY(seg->s_ops == &segkmem_ops);
153 	AS_LOCK_EXIT(&kas);
154 
155 	return (B_TRUE);
156 }
157 
158 static int
segumap_dup(struct seg * seg,struct seg * newseg)159 segumap_dup(struct seg *seg, struct seg *newseg)
160 {
161 	segumap_data_t *sud = (segumap_data_t *)seg->s_data;
162 	segumap_data_t *newsud;
163 
164 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
165 
166 	newsud = kmem_zalloc(sizeof (segumap_data_t), KM_SLEEP);
167 	rw_init(&newsud->sud_lock, NULL, RW_DEFAULT, NULL);
168 	newsud->sud_kaddr = sud->sud_kaddr;
169 	newsud->sud_prot = sud->sud_prot;
170 
171 	newseg->s_ops = seg->s_ops;
172 	newseg->s_data = newsud;
173 	return (0);
174 }
175 
176 static int
segumap_unmap(struct seg * seg,caddr_t addr,size_t len)177 segumap_unmap(struct seg *seg, caddr_t addr, size_t len)
178 {
179 	segumap_data_t *sud = (segumap_data_t *)seg->s_data;
180 
181 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
182 
183 	/* Only allow unmap of entire segment */
184 	if (addr != seg->s_base || len != seg->s_size) {
185 		return (EINVAL);
186 	}
187 	if (sud->sud_softlockcnt != 0) {
188 		return (EAGAIN);
189 	}
190 
191 	/*
192 	 * Unconditionally unload the entire segment range.
193 	 */
194 	hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
195 
196 	seg_free(seg);
197 	return (0);
198 }
199 
200 static void
segumap_free(struct seg * seg)201 segumap_free(struct seg *seg)
202 {
203 	segumap_data_t *data = (segumap_data_t *)seg->s_data;
204 
205 	ASSERT(data != NULL);
206 
207 	rw_destroy(&data->sud_lock);
208 	VERIFY(data->sud_softlockcnt == 0);
209 	kmem_free(data, sizeof (*data));
210 	seg->s_data = NULL;
211 }
212 
213 /* ARGSUSED */
214 static faultcode_t
segumap_fault(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum fault_type type,enum seg_rw tw)215 segumap_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
216     enum fault_type type, enum seg_rw tw)
217 {
218 	segumap_data_t *sud = (segumap_data_t *)seg->s_data;
219 
220 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
221 
222 	if (type == F_PROT) {
223 		/*
224 		 * Since protection on the segment is fixed, there is nothing
225 		 * to do but report an error for protection faults.
226 		 */
227 		return (FC_PROT);
228 	} else if (type == F_SOFTUNLOCK) {
229 		size_t plen = btop(len);
230 
231 		rw_enter(&sud->sud_lock, RW_WRITER);
232 		VERIFY(sud->sud_softlockcnt >= plen);
233 		sud->sud_softlockcnt -= plen;
234 		rw_exit(&sud->sud_lock);
235 		return (0);
236 	}
237 
238 	ASSERT(type == F_INVAL || type == F_SOFTLOCK);
239 	rw_enter(&sud->sud_lock, RW_WRITER);
240 
241 	if (type == F_INVAL ||
242 	    (type == F_SOFTLOCK && sud->sud_softlockcnt == 0)) {
243 		/*
244 		 * Load the (entire) segment into the HAT.
245 		 *
246 		 * It's possible that threads racing into as_fault will cause
247 		 * seg_umap to load the same range multiple times in quick
248 		 * succession.  Redundant hat_devload operations are safe.
249 		 */
250 		for (uintptr_t i = 0; i < seg->s_size; i += PAGESIZE) {
251 			pfn_t pfn;
252 
253 			pfn = hat_getpfnum(kas.a_hat, sud->sud_kaddr + i);
254 			VERIFY(pfn != PFN_INVALID);
255 			hat_devload(seg->s_as->a_hat, seg->s_base + i,
256 			    PAGESIZE, pfn, sud->sud_prot, HAT_LOAD);
257 		}
258 	}
259 	if (type == F_SOFTLOCK) {
260 		size_t nval = sud->sud_softlockcnt + btop(len);
261 
262 		if (sud->sud_softlockcnt >= nval) {
263 			rw_exit(&sud->sud_lock);
264 			return (FC_MAKE_ERR(EOVERFLOW));
265 		}
266 		sud->sud_softlockcnt = nval;
267 	}
268 
269 	rw_exit(&sud->sud_lock);
270 	return (0);
271 }
272 
273 /* ARGSUSED */
274 static faultcode_t
segumap_faulta(struct seg * seg,caddr_t addr)275 segumap_faulta(struct seg *seg, caddr_t addr)
276 {
277 	/* Do nothing since asynch pagefault should not load translation. */
278 	return (0);
279 }
280 
281 /* ARGSUSED */
282 static int
segumap_setprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)283 segumap_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
284 {
285 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
286 
287 	/*
288 	 * The seg_umap driver does not yet allow protection to be changed.
289 	 */
290 	return (EACCES);
291 }
292 
293 /* ARGSUSED */
294 static int
segumap_checkprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)295 segumap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
296 {
297 	segumap_data_t *sud = (segumap_data_t *)seg->s_data;
298 	int error = 0;
299 
300 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
301 
302 	rw_enter(&sud->sud_lock, RW_READER);
303 	if ((sud->sud_prot & prot) != prot) {
304 		error = EACCES;
305 	}
306 	rw_exit(&sud->sud_lock);
307 	return (error);
308 }
309 
310 /* ARGSUSED */
311 static int
segumap_sync(struct seg * seg,caddr_t addr,size_t len,int attr,uint_t flags)312 segumap_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
313 {
314 	/* Always succeed since there are no backing store to sync */
315 	return (0);
316 }
317 
318 /* ARGSUSED */
319 static size_t
segumap_incore(struct seg * seg,caddr_t addr,size_t len,char * vec)320 segumap_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
321 {
322 	size_t sz = 0;
323 
324 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
325 
326 	len = (len + PAGEOFFSET) & PAGEMASK;
327 	while (len > 0) {
328 		*vec = 1;
329 		sz += PAGESIZE;
330 		vec++;
331 		len -= PAGESIZE;
332 	}
333 	return (sz);
334 }
335 
336 /* ARGSUSED */
337 static int
segumap_lockop(struct seg * seg,caddr_t addr,size_t len,int attr,int op,ulong_t * lockmap,size_t pos)338 segumap_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op,
339     ulong_t *lockmap, size_t pos)
340 {
341 	/* Report success since kernel pages are always in memory. */
342 	return (0);
343 }
344 
345 static int
segumap_getprot(struct seg * seg,caddr_t addr,size_t len,uint_t * protv)346 segumap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
347 {
348 	segumap_data_t *sud = (segumap_data_t *)seg->s_data;
349 	size_t pgno;
350 	uint_t prot;
351 
352 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
353 
354 	rw_enter(&sud->sud_lock, RW_READER);
355 	prot = sud->sud_prot;
356 	rw_exit(&sud->sud_lock);
357 
358 	/*
359 	 * Reporting protection is simple since it is not tracked per-page.
360 	 */
361 	pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
362 	while (pgno > 0) {
363 		protv[--pgno] = prot;
364 	}
365 	return (0);
366 }
367 
368 /* ARGSUSED */
369 static u_offset_t
segumap_getoffset(struct seg * seg,caddr_t addr)370 segumap_getoffset(struct seg *seg, caddr_t addr)
371 {
372 	/*
373 	 * To avoid leaking information about the layout of the kernel address
374 	 * space, always report '0' as the offset.
375 	 */
376 	return (0);
377 }
378 
379 /* ARGSUSED */
380 static int
segumap_gettype(struct seg * seg,caddr_t addr)381 segumap_gettype(struct seg *seg, caddr_t addr)
382 {
383 	/*
384 	 * Since already-existing kernel pages are being mapped into userspace,
385 	 * always report the segment type as shared.
386 	 */
387 	return (MAP_SHARED);
388 }
389 
390 /* ARGSUSED */
391 static int
segumap_getvp(struct seg * seg,caddr_t addr,struct vnode ** vpp)392 segumap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
393 {
394 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
395 
396 	*vpp = NULL;
397 	return (0);
398 }
399 
400 /* ARGSUSED */
401 static int
segumap_advise(struct seg * seg,caddr_t addr,size_t len,uint_t behav)402 segumap_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
403 {
404 	if (behav == MADV_PURGE) {
405 		/* Purge does not make sense for this mapping */
406 		return (EINVAL);
407 	}
408 	/* Indicate success for everything else. */
409 	return (0);
410 }
411 
412 /* ARGSUSED */
413 static void
segumap_dump(struct seg * seg)414 segumap_dump(struct seg *seg)
415 {
416 	/*
417 	 * Since this is a mapping to share kernel data with userspace, nothing
418 	 * additional should be dumped.
419 	 */
420 }
421 
422 /* ARGSUSED */
423 static int
segumap_pagelock(struct seg * seg,caddr_t addr,size_t len,struct page *** ppp,enum lock_type type,enum seg_rw rw)424 segumap_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
425     enum lock_type type, enum seg_rw rw)
426 {
427 	return (ENOTSUP);
428 }
429 
430 /* ARGSUSED */
431 static int
segumap_setpagesize(struct seg * seg,caddr_t addr,size_t len,uint_t szc)432 segumap_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
433 {
434 	return (ENOTSUP);
435 }
436 
437 static int
segumap_getmemid(struct seg * seg,caddr_t addr,memid_t * memidp)438 segumap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
439 {
440 	segumap_data_t *sud = (segumap_data_t *)seg->s_data;
441 
442 	memidp->val[0] = (uintptr_t)sud->sud_kaddr;
443 	memidp->val[1] = (uintptr_t)(addr - seg->s_base);
444 	return (0);
445 }
446 
447 /* ARGSUSED */
448 static int
segumap_capable(struct seg * seg,segcapability_t capability)449 segumap_capable(struct seg *seg, segcapability_t capability)
450 {
451 	/* no special capablities */
452 	return (0);
453 }
454