1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018 Joyent, Inc.
14 */
15
16 /*
17 * VM - Kernel-to-user mapping segment
18 *
19 * The umap segment driver was primarily designed to facilitate the comm page:
20 * a portion of kernel memory shared with userspace so that certain (namely
21 * clock-related) actions could operate without making an expensive trip into
22 * the kernel.
23 *
24 * Since the initial requirements for the comm page are slim, advanced features
25 * of the segment driver such as per-page protection have been left
26 * unimplemented at this time.
27 */
28
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/errno.h>
33 #include <sys/cred.h>
34 #include <sys/kmem.h>
35 #include <sys/lgrp.h>
36 #include <sys/mman.h>
37
38 #include <vm/hat.h>
39 #include <vm/as.h>
40 #include <vm/seg.h>
41 #include <vm/seg_kmem.h>
42 #include <vm/seg_umap.h>
43
44
45 static boolean_t segumap_verify_safe(caddr_t, size_t);
46 static int segumap_dup(struct seg *, struct seg *);
47 static int segumap_unmap(struct seg *, caddr_t, size_t);
48 static void segumap_free(struct seg *);
49 static faultcode_t segumap_fault(struct hat *, struct seg *, caddr_t, size_t,
50 enum fault_type, enum seg_rw);
51 static faultcode_t segumap_faulta(struct seg *, caddr_t);
52 static int segumap_setprot(struct seg *, caddr_t, size_t, uint_t);
53 static int segumap_checkprot(struct seg *, caddr_t, size_t, uint_t);
54 static int segumap_sync(struct seg *, caddr_t, size_t, int, uint_t);
55 static size_t segumap_incore(struct seg *, caddr_t, size_t, char *);
56 static int segumap_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *,
57 size_t);
58 static int segumap_getprot(struct seg *, caddr_t, size_t, uint_t *);
59 static u_offset_t segumap_getoffset(struct seg *, caddr_t);
60 static int segumap_gettype(struct seg *, caddr_t);
61 static int segumap_getvp(struct seg *, caddr_t, struct vnode **);
62 static int segumap_advise(struct seg *, caddr_t, size_t, uint_t);
63 static void segumap_dump(struct seg *);
64 static int segumap_pagelock(struct seg *, caddr_t, size_t, struct page ***,
65 enum lock_type, enum seg_rw);
66 static int segumap_setpagesize(struct seg *, caddr_t, size_t, uint_t);
67 static int segumap_getmemid(struct seg *, caddr_t, memid_t *);
68 static int segumap_capable(struct seg *, segcapability_t);
69
70 static struct seg_ops segumap_ops = {
71 segumap_dup,
72 segumap_unmap,
73 segumap_free,
74 segumap_fault,
75 segumap_faulta,
76 segumap_setprot,
77 segumap_checkprot,
78 NULL, /* kluster: disabled */
79 NULL, /* swapout: disabled */
80 segumap_sync,
81 segumap_incore,
82 segumap_lockop,
83 segumap_getprot,
84 segumap_getoffset,
85 segumap_gettype,
86 segumap_getvp,
87 segumap_advise,
88 segumap_dump,
89 segumap_pagelock,
90 segumap_setpagesize,
91 segumap_getmemid,
92 NULL, /* getpolicy: disabled */
93 segumap_capable,
94 seg_inherit_notsup
95 };
96
97
98 /*
99 * Create a kernel/user-mapped segment.
100 */
101 int
segumap_create(struct seg ** segpp,void * argsp)102 segumap_create(struct seg **segpp, void *argsp)
103 {
104 struct seg *seg = *segpp;
105 segumap_crargs_t *a = (struct segumap_crargs *)argsp;
106 segumap_data_t *data;
107
108 ASSERT((uintptr_t)a->kaddr > _userlimit);
109
110 /*
111 * Check several aspects of the mapping request to ensure validity:
112 * - kernel pages must reside entirely in kernel space
113 * - target protection must be user-accessible
114 * - kernel address must be page-aligned
115 * - kernel address must reside inside a "safe" segment
116 */
117 if ((uintptr_t)a->kaddr <= _userlimit ||
118 ((uintptr_t)a->kaddr + seg->s_size) < (uintptr_t)a->kaddr ||
119 (a->prot & PROT_USER) == 0 ||
120 ((uintptr_t)a->kaddr & PAGEOFFSET) != 0 ||
121 !segumap_verify_safe(a->kaddr, seg->s_size)) {
122 return (EINVAL);
123 }
124
125 data = kmem_zalloc(sizeof (*data), KM_SLEEP);
126 rw_init(&data->sud_lock, NULL, RW_DEFAULT, NULL);
127 data->sud_kaddr = a->kaddr;
128 data->sud_prot = a->prot;
129
130 seg->s_ops = &segumap_ops;
131 seg->s_data = data;
132 return (0);
133 }
134
135 static boolean_t
segumap_verify_safe(caddr_t kaddr,size_t len)136 segumap_verify_safe(caddr_t kaddr, size_t len)
137 {
138 struct seg *seg;
139
140 /*
141 * Presently, only pages which are backed by segkmem are allowed to be
142 * shared with userspace. This prevents nasty paging behavior with
143 * other drivers such as seg_kp. Furthermore, the backing kernel
144 * segment must completely contain the region to be mapped.
145 *
146 * Failing these checks is fatal for now since such mappings are done
147 * in a very limited context from the kernel.
148 */
149 AS_LOCK_ENTER(&kas, RW_READER);
150 seg = as_segat(&kas, kaddr);
151 VERIFY(seg != NULL);
152 VERIFY(seg->s_base + seg->s_size >= kaddr + len);
153 VERIFY(seg->s_ops == &segkmem_ops);
154 AS_LOCK_EXIT(&kas);
155
156 return (B_TRUE);
157 }
158
159 static int
segumap_dup(struct seg * seg,struct seg * newseg)160 segumap_dup(struct seg *seg, struct seg *newseg)
161 {
162 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
163 segumap_data_t *newsud;
164
165 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
166
167 newsud = kmem_zalloc(sizeof (segumap_data_t), KM_SLEEP);
168 rw_init(&newsud->sud_lock, NULL, RW_DEFAULT, NULL);
169 newsud->sud_kaddr = sud->sud_kaddr;
170 newsud->sud_prot = sud->sud_prot;
171
172 newseg->s_ops = seg->s_ops;
173 newseg->s_data = newsud;
174 return (0);
175 }
176
177 static int
segumap_unmap(struct seg * seg,caddr_t addr,size_t len)178 segumap_unmap(struct seg *seg, caddr_t addr, size_t len)
179 {
180 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
181
182 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
183
184 /* Only allow unmap of entire segment */
185 if (addr != seg->s_base || len != seg->s_size) {
186 return (EINVAL);
187 }
188 if (sud->sud_softlockcnt != 0) {
189 return (EAGAIN);
190 }
191
192 /*
193 * Unconditionally unload the entire segment range.
194 */
195 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
196
197 seg_free(seg);
198 return (0);
199 }
200
201 static void
segumap_free(struct seg * seg)202 segumap_free(struct seg *seg)
203 {
204 segumap_data_t *data = (segumap_data_t *)seg->s_data;
205
206 ASSERT(data != NULL);
207
208 rw_destroy(&data->sud_lock);
209 VERIFY(data->sud_softlockcnt == 0);
210 kmem_free(data, sizeof (*data));
211 seg->s_data = NULL;
212 }
213
214 /* ARGSUSED */
215 static faultcode_t
segumap_fault(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum fault_type type,enum seg_rw tw)216 segumap_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
217 enum fault_type type, enum seg_rw tw)
218 {
219 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
220
221 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
222
223 if (type == F_PROT) {
224 /*
225 * Since protection on the segment is fixed, there is nothing
226 * to do but report an error for protection faults.
227 */
228 return (FC_PROT);
229 } else if (type == F_SOFTUNLOCK) {
230 size_t plen = btop(len);
231
232 rw_enter(&sud->sud_lock, RW_WRITER);
233 VERIFY(sud->sud_softlockcnt >= plen);
234 sud->sud_softlockcnt -= plen;
235 rw_exit(&sud->sud_lock);
236 return (0);
237 }
238
239 ASSERT(type == F_INVAL || type == F_SOFTLOCK);
240 rw_enter(&sud->sud_lock, RW_WRITER);
241
242 if (type == F_INVAL ||
243 (type == F_SOFTLOCK && sud->sud_softlockcnt == 0)) {
244 /*
245 * Load the (entire) segment into the HAT.
246 *
247 * It's possible that threads racing into as_fault will cause
248 * seg_umap to load the same range multiple times in quick
249 * succession. Redundant hat_devload operations are safe.
250 */
251 for (uintptr_t i = 0; i < seg->s_size; i += PAGESIZE) {
252 pfn_t pfn;
253
254 pfn = hat_getpfnum(kas.a_hat, sud->sud_kaddr + i);
255 VERIFY(pfn != PFN_INVALID);
256 hat_devload(seg->s_as->a_hat, seg->s_base + i,
257 PAGESIZE, pfn, sud->sud_prot, HAT_LOAD);
258 }
259 }
260 if (type == F_SOFTLOCK) {
261 size_t nval = sud->sud_softlockcnt + btop(len);
262
263 if (sud->sud_softlockcnt >= nval) {
264 rw_exit(&sud->sud_lock);
265 return (FC_MAKE_ERR(EOVERFLOW));
266 }
267 sud->sud_softlockcnt = nval;
268 }
269
270 rw_exit(&sud->sud_lock);
271 return (0);
272 }
273
274 /* ARGSUSED */
275 static faultcode_t
segumap_faulta(struct seg * seg,caddr_t addr)276 segumap_faulta(struct seg *seg, caddr_t addr)
277 {
278 /* Do nothing since asynch pagefault should not load translation. */
279 return (0);
280 }
281
282 /* ARGSUSED */
283 static int
segumap_setprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)284 segumap_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
285 {
286 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
287
288 /*
289 * The seg_umap driver does not yet allow protection to be changed.
290 */
291 return (EACCES);
292 }
293
294 /* ARGSUSED */
295 static int
segumap_checkprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)296 segumap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
297 {
298 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
299 int error = 0;
300
301 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
302
303 rw_enter(&sud->sud_lock, RW_READER);
304 if ((sud->sud_prot & prot) != prot) {
305 error = EACCES;
306 }
307 rw_exit(&sud->sud_lock);
308 return (error);
309 }
310
311 /* ARGSUSED */
312 static int
segumap_sync(struct seg * seg,caddr_t addr,size_t len,int attr,uint_t flags)313 segumap_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
314 {
315 /* Always succeed since there are no backing store to sync */
316 return (0);
317 }
318
319 /* ARGSUSED */
320 static size_t
segumap_incore(struct seg * seg,caddr_t addr,size_t len,char * vec)321 segumap_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
322 {
323 size_t sz = 0;
324
325 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
326
327 len = (len + PAGEOFFSET) & PAGEMASK;
328 while (len > 0) {
329 *vec = 1;
330 sz += PAGESIZE;
331 vec++;
332 len -= PAGESIZE;
333 }
334 return (sz);
335 }
336
337 /* ARGSUSED */
338 static int
segumap_lockop(struct seg * seg,caddr_t addr,size_t len,int attr,int op,ulong_t * lockmap,size_t pos)339 segumap_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op,
340 ulong_t *lockmap, size_t pos)
341 {
342 /* Report success since kernel pages are always in memory. */
343 return (0);
344 }
345
346 static int
segumap_getprot(struct seg * seg,caddr_t addr,size_t len,uint_t * protv)347 segumap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
348 {
349 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
350 size_t pgno;
351 uint_t prot;
352
353 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
354
355 rw_enter(&sud->sud_lock, RW_READER);
356 prot = sud->sud_prot;
357 rw_exit(&sud->sud_lock);
358
359 /*
360 * Reporting protection is simple since it is not tracked per-page.
361 */
362 pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
363 while (pgno > 0) {
364 protv[--pgno] = prot;
365 }
366 return (0);
367 }
368
369 /* ARGSUSED */
370 static u_offset_t
segumap_getoffset(struct seg * seg,caddr_t addr)371 segumap_getoffset(struct seg *seg, caddr_t addr)
372 {
373 /*
374 * To avoid leaking information about the layout of the kernel address
375 * space, always report '0' as the offset.
376 */
377 return (0);
378 }
379
380 /* ARGSUSED */
381 static int
segumap_gettype(struct seg * seg,caddr_t addr)382 segumap_gettype(struct seg *seg, caddr_t addr)
383 {
384 /*
385 * Since already-existing kernel pages are being mapped into userspace,
386 * always report the segment type as shared.
387 */
388 return (MAP_SHARED);
389 }
390
391 /* ARGSUSED */
392 static int
segumap_getvp(struct seg * seg,caddr_t addr,struct vnode ** vpp)393 segumap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
394 {
395 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
396
397 *vpp = NULL;
398 return (0);
399 }
400
401 /* ARGSUSED */
402 static int
segumap_advise(struct seg * seg,caddr_t addr,size_t len,uint_t behav)403 segumap_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
404 {
405 if (behav == MADV_PURGE) {
406 /* Purge does not make sense for this mapping */
407 return (EINVAL);
408 }
409 /* Indicate success for everything else. */
410 return (0);
411 }
412
413 /* ARGSUSED */
414 static void
segumap_dump(struct seg * seg)415 segumap_dump(struct seg *seg)
416 {
417 /*
418 * Since this is a mapping to share kernel data with userspace, nothing
419 * additional should be dumped.
420 */
421 }
422
423 /* ARGSUSED */
424 static int
segumap_pagelock(struct seg * seg,caddr_t addr,size_t len,struct page *** ppp,enum lock_type type,enum seg_rw rw)425 segumap_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
426 enum lock_type type, enum seg_rw rw)
427 {
428 return (ENOTSUP);
429 }
430
431 /* ARGSUSED */
432 static int
segumap_setpagesize(struct seg * seg,caddr_t addr,size_t len,uint_t szc)433 segumap_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
434 {
435 return (ENOTSUP);
436 }
437
438 static int
segumap_getmemid(struct seg * seg,caddr_t addr,memid_t * memidp)439 segumap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
440 {
441 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
442
443 memidp->val[0] = (uintptr_t)sud->sud_kaddr;
444 memidp->val[1] = (uintptr_t)(addr - seg->s_base);
445 return (0);
446 }
447
448 /* ARGSUSED */
449 static int
segumap_capable(struct seg * seg,segcapability_t capability)450 segumap_capable(struct seg *seg, segcapability_t capability)
451 {
452 /* no special capablities */
453 return (0);
454 }
455