xref: /titanic_50/usr/src/uts/i86xpv/vm/seg_mf.c (revision ec77975f4066916892ac3a662a2045cca3926268)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Machine frame segment driver.  This segment driver allows dom0 processes to
29  * map pages of other domains or Xen (e.g. during save/restore).  ioctl()s on
30  * the privcmd driver provide the MFN values backing each mapping, and we map
31  * them into the process's address space at this time.  Demand-faulting is not
32  * supported by this driver due to the requirements upon some of the ioctl()s.
33  */
34 
35 #pragma ident	"%Z%%M%	%I%	%E% SMI"
36 
37 #include <sys/types.h>
38 #include <sys/systm.h>
39 #include <sys/vmsystm.h>
40 #include <sys/mman.h>
41 #include <sys/errno.h>
42 #include <sys/kmem.h>
43 #include <sys/cmn_err.h>
44 #include <sys/vnode.h>
45 #include <sys/conf.h>
46 #include <sys/debug.h>
47 #include <sys/lgrp.h>
48 #include <sys/hypervisor.h>
49 
50 #include <vm/page.h>
51 #include <vm/hat.h>
52 #include <vm/as.h>
53 #include <vm/seg.h>
54 
55 #include <vm/hat_pte.h>
56 #include <vm/seg_mf.h>
57 
58 #include <sys/fs/snode.h>
59 
60 #define	VTOCVP(vp)	(VTOS(vp)->s_commonvp)
61 
62 #define	mfatob(n)	((n) * sizeof (mfn_t))
63 
64 struct segmf_data {
65 	kmutex_t	lock;
66 	struct vnode	*vp;
67 	uchar_t		prot;
68 	uchar_t		maxprot;
69 	size_t		softlockcnt;
70 	domid_t		domid;
71 	mfn_t		*mfns;
72 };
73 
74 static struct seg_ops segmf_ops;
75 
76 static struct segmf_data *
77 segmf_data_zalloc(struct seg *seg)
78 {
79 	struct segmf_data *data = kmem_zalloc(sizeof (*data), KM_SLEEP);
80 
81 	mutex_init(&data->lock, "segmf.lock", MUTEX_DEFAULT, NULL);
82 	seg->s_ops = &segmf_ops;
83 	seg->s_data = data;
84 	return (data);
85 }
86 
87 int
88 segmf_create(struct seg *seg, void *args)
89 {
90 	struct segmf_crargs *a = args;
91 	struct segmf_data *data;
92 	struct as *as = seg->s_as;
93 	pgcnt_t i, npages = seg_pages(seg);
94 	int error;
95 
96 	hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
97 
98 	data = segmf_data_zalloc(seg);
99 	data->vp = specfind(a->dev, VCHR);
100 	data->prot = a->prot;
101 	data->maxprot = a->maxprot;
102 
103 	data->mfns = kmem_alloc(mfatob(npages), KM_SLEEP);
104 	for (i = 0; i < npages; i++)
105 		data->mfns[i] = MFN_INVALID;
106 
107 	error = VOP_ADDMAP(VTOCVP(data->vp), 0, as, seg->s_base, seg->s_size,
108 	    data->prot, data->maxprot, MAP_SHARED, CRED());
109 
110 	if (error != 0)
111 		hat_unload(as->a_hat,
112 		    seg->s_base, seg->s_size, HAT_UNLOAD_UNMAP);
113 	return (error);
114 }
115 
116 /*
117  * Duplicate a seg and return new segment in newseg.
118  */
119 static int
120 segmf_dup(struct seg *seg, struct seg *newseg)
121 {
122 	struct segmf_data *data = seg->s_data;
123 	struct segmf_data *ndata;
124 	pgcnt_t npages = seg_pages(newseg);
125 
126 	ndata = segmf_data_zalloc(newseg);
127 
128 	VN_HOLD(data->vp);
129 	ndata->vp = data->vp;
130 	ndata->prot = data->prot;
131 	ndata->maxprot = data->maxprot;
132 	ndata->domid = data->domid;
133 
134 	ndata->mfns = kmem_alloc(mfatob(npages), KM_SLEEP);
135 	bcopy(data->mfns, ndata->mfns, mfatob(npages));
136 
137 	return (VOP_ADDMAP(VTOCVP(ndata->vp), 0, newseg->s_as,
138 	    newseg->s_base, newseg->s_size, ndata->prot, ndata->maxprot,
139 	    MAP_SHARED, CRED()));
140 }
141 
142 /*
143  * We only support unmapping the whole segment, and we automatically unlock
144  * what we previously soft-locked.
145  */
146 static int
147 segmf_unmap(struct seg *seg, caddr_t addr, size_t len)
148 {
149 	struct segmf_data *data = seg->s_data;
150 	offset_t off;
151 
152 	if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
153 	    (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
154 		panic("segmf_unmap");
155 
156 	if (addr != seg->s_base || len != seg->s_size)
157 		return (ENOTSUP);
158 
159 	hat_unload(seg->s_as->a_hat, addr, len,
160 	    HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
161 
162 	off = (offset_t)seg_page(seg, addr);
163 
164 	ASSERT(data->vp != NULL);
165 
166 	(void) VOP_DELMAP(VTOCVP(data->vp), off, seg->s_as, addr, len,
167 	    data->prot, data->maxprot, MAP_SHARED, CRED());
168 
169 	seg_free(seg);
170 	return (0);
171 }
172 
173 static void
174 segmf_free(struct seg *seg)
175 {
176 	struct segmf_data *data = seg->s_data;
177 	pgcnt_t npages = seg_pages(seg);
178 
179 	kmem_free(data->mfns, mfatob(npages));
180 	VN_RELE(data->vp);
181 	mutex_destroy(&data->lock);
182 	kmem_free(data, sizeof (*data));
183 }
184 
185 static int segmf_faultpage_debug = 0;
186 
187 /*ARGSUSED*/
188 static int
189 segmf_faultpage(struct hat *hat, struct seg *seg, caddr_t addr,
190     enum fault_type type, uint_t prot)
191 {
192 	struct segmf_data *data = seg->s_data;
193 	uint_t hat_flags = HAT_LOAD_NOCONSIST;
194 	mfn_t mfn;
195 	x86pte_t pte;
196 
197 	mfn = data->mfns[seg_page(seg, addr)];
198 
199 	ASSERT(mfn != MFN_INVALID);
200 
201 	if (type == F_SOFTLOCK) {
202 		mutex_enter(&freemem_lock);
203 		data->softlockcnt++;
204 		mutex_exit(&freemem_lock);
205 		hat_flags |= HAT_LOAD_LOCK;
206 	} else
207 		hat_flags |= HAT_LOAD;
208 
209 	if (segmf_faultpage_debug > 0) {
210 		uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n",
211 		    (void *)addr, data->domid, mfn, prot);
212 		segmf_faultpage_debug--;
213 	}
214 
215 	/*
216 	 * Ask the HAT to load a throwaway mapping to page zero, then
217 	 * overwrite it with our foreign domain mapping. It gets removed
218 	 * later via hat_unload()
219 	 */
220 	hat_devload(hat, addr, MMU_PAGESIZE, (pfn_t)0,
221 	    PROT_READ | HAT_UNORDERED_OK, hat_flags);
222 
223 	pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER | PT_FOREIGN;
224 	if (prot & PROT_WRITE)
225 		pte |= PT_WRITABLE;
226 
227 	if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr, pte,
228 	    UVMF_INVLPG | UVMF_ALL, data->domid) != 0) {
229 		hat_flags = HAT_UNLOAD_UNMAP;
230 
231 		if (type == F_SOFTLOCK) {
232 			hat_flags |= HAT_UNLOAD_UNLOCK;
233 			mutex_enter(&freemem_lock);
234 			data->softlockcnt--;
235 			mutex_exit(&freemem_lock);
236 		}
237 
238 		hat_unload(hat, addr, MMU_PAGESIZE, hat_flags);
239 		return (FC_MAKE_ERR(EFAULT));
240 	}
241 
242 	return (0);
243 }
244 
245 static int
246 seg_rw_to_prot(enum seg_rw rw)
247 {
248 	switch (rw) {
249 	case S_READ:
250 		return (PROT_READ);
251 	case S_WRITE:
252 		return (PROT_WRITE);
253 	case S_EXEC:
254 		return (PROT_EXEC);
255 	case S_OTHER:
256 	default:
257 		break;
258 	}
259 	return (PROT_READ | PROT_WRITE | PROT_EXEC);
260 }
261 
262 static void
263 segmf_softunlock(struct hat *hat, struct seg *seg, caddr_t addr, size_t len)
264 {
265 	struct segmf_data *data = seg->s_data;
266 
267 	hat_unlock(hat, addr, len);
268 
269 	mutex_enter(&freemem_lock);
270 	ASSERT(data->softlockcnt >= btopr(len));
271 	data->softlockcnt -= btopr(len);
272 	mutex_exit(&freemem_lock);
273 
274 	if (data->softlockcnt == 0) {
275 		struct as *as = seg->s_as;
276 
277 		if (AS_ISUNMAPWAIT(as)) {
278 			mutex_enter(&as->a_contents);
279 			if (AS_ISUNMAPWAIT(as)) {
280 				AS_CLRUNMAPWAIT(as);
281 				cv_broadcast(&as->a_cv);
282 			}
283 			mutex_exit(&as->a_contents);
284 		}
285 	}
286 }
287 
288 static int
289 segmf_fault_range(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
290     enum fault_type type, enum seg_rw rw)
291 {
292 	struct segmf_data *data = seg->s_data;
293 	int error = 0;
294 	caddr_t a;
295 
296 	if ((data->prot & seg_rw_to_prot(rw)) == 0)
297 		return (FC_PROT);
298 
299 	/* loop over the address range handling each fault */
300 
301 	for (a = addr; a < addr + len; a += PAGESIZE) {
302 		error = segmf_faultpage(hat, seg, a, type, data->prot);
303 		if (error != 0)
304 			break;
305 	}
306 
307 	if (error != 0 && type == F_SOFTLOCK) {
308 		size_t done = (size_t)(a - addr);
309 
310 		/*
311 		 * Undo what's been done so far.
312 		 */
313 		if (done > 0)
314 			segmf_softunlock(hat, seg, addr, done);
315 	}
316 
317 	return (error);
318 }
319 
320 /*
321  * We never demand-fault for seg_mf.
322  */
323 /*ARGSUSED*/
324 static int
325 segmf_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
326     enum fault_type type, enum seg_rw rw)
327 {
328 	return (FC_MAKE_ERR(EFAULT));
329 }
330 
331 /*ARGSUSED*/
332 static int
333 segmf_faulta(struct seg *seg, caddr_t addr)
334 {
335 	return (0);
336 }
337 
338 /*ARGSUSED*/
339 static int
340 segmf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
341 {
342 	return (EINVAL);
343 }
344 
345 /*ARGSUSED*/
346 static int
347 segmf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
348 {
349 	return (EINVAL);
350 }
351 
352 /*ARGSUSED*/
353 static int
354 segmf_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
355 {
356 	return (-1);
357 }
358 
359 /*ARGSUSED*/
360 static int
361 segmf_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
362 {
363 	return (0);
364 }
365 
366 /*
367  * XXPV	Hmm.  Should we say that mf mapping are "in core?"
368  */
369 
370 /*ARGSUSED*/
371 static size_t
372 segmf_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
373 {
374 	size_t v;
375 
376 	for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
377 	    len -= PAGESIZE, v += PAGESIZE)
378 		*vec++ = 1;
379 	return (v);
380 }
381 
382 /*ARGSUSED*/
383 static int
384 segmf_lockop(struct seg *seg, caddr_t addr,
385     size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
386 {
387 	return (0);
388 }
389 
390 static int
391 segmf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
392 {
393 	struct segmf_data *data = seg->s_data;
394 	pgcnt_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
395 
396 	if (pgno != 0) {
397 		do
398 			protv[--pgno] = data->prot;
399 		while (pgno != 0)
400 			;
401 	}
402 	return (0);
403 }
404 
405 static u_offset_t
406 segmf_getoffset(struct seg *seg, caddr_t addr)
407 {
408 	return (addr - seg->s_base);
409 }
410 
411 /*ARGSUSED*/
412 static int
413 segmf_gettype(struct seg *seg, caddr_t addr)
414 {
415 	return (MAP_SHARED);
416 }
417 
418 /*ARGSUSED1*/
419 static int
420 segmf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
421 {
422 	struct segmf_data *data = seg->s_data;
423 
424 	*vpp = VTOCVP(data->vp);
425 	return (0);
426 }
427 
428 /*ARGSUSED*/
429 static int
430 segmf_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
431 {
432 	return (0);
433 }
434 
435 /*ARGSUSED*/
436 static void
437 segmf_dump(struct seg *seg)
438 {}
439 
440 /*ARGSUSED*/
441 static int
442 segmf_pagelock(struct seg *seg, caddr_t addr, size_t len,
443     struct page ***ppp, enum lock_type type, enum seg_rw rw)
444 {
445 	return (ENOTSUP);
446 }
447 
448 /*ARGSUSED*/
449 static int
450 segmf_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
451 {
452 	return (ENOTSUP);
453 }
454 
455 static int
456 segmf_getmemid(struct seg *seg, caddr_t addr, memid_t *memid)
457 {
458 	struct segmf_data *data = seg->s_data;
459 
460 	memid->val[0] = (uintptr_t)VTOCVP(data->vp);
461 	memid->val[1] = (uintptr_t)seg_page(seg, addr);
462 	return (0);
463 }
464 
465 /*ARGSUSED*/
466 static lgrp_mem_policy_info_t *
467 segmf_getpolicy(struct seg *seg, caddr_t addr)
468 {
469 	return (NULL);
470 }
471 
472 /*ARGSUSED*/
473 static int
474 segmf_capable(struct seg *seg, segcapability_t capability)
475 {
476 	return (0);
477 }
478 
479 /*
480  * Add a set of contiguous foreign MFNs to the segment. soft-locking them.  The
481  * pre-faulting is necessary due to live migration; in particular we must
482  * return an error in response to IOCTL_PRIVCMD_MMAPBATCH rather than faulting
483  * later on a bad MFN.  Whilst this isn't necessary for the other MMAP
484  * ioctl()s, we lock them too, as they should be transitory.
485  */
486 int
487 segmf_add_mfns(struct seg *seg, caddr_t addr, mfn_t mfn,
488     pgcnt_t pgcnt, domid_t domid)
489 {
490 	struct segmf_data *data = seg->s_data;
491 	pgcnt_t base = seg_page(seg, addr);
492 	faultcode_t fc;
493 	pgcnt_t i;
494 	int error = 0;
495 
496 	if (seg->s_ops != &segmf_ops)
497 		return (EINVAL);
498 
499 	/*
500 	 * Don't mess with dom0.
501 	 *
502 	 * Only allow the domid to be set once for the segment.
503 	 * After that attempts to add mappings to this segment for
504 	 * other domains explicitly fails.
505 	 */
506 
507 	if (domid == 0 || domid == DOMID_SELF)
508 		return (EACCES);
509 
510 	mutex_enter(&data->lock);
511 
512 	if (data->domid == 0)
513 		data->domid = domid;
514 
515 	if (data->domid != domid) {
516 		error = EINVAL;
517 		goto out;
518 	}
519 
520 	base = seg_page(seg, addr);
521 
522 	for (i = 0; i < pgcnt; i++)
523 		data->mfns[base + i] = mfn++;
524 
525 	fc = segmf_fault_range(seg->s_as->a_hat, seg, addr,
526 	    pgcnt * MMU_PAGESIZE, F_SOFTLOCK, S_OTHER);
527 
528 	if (fc != 0) {
529 		error = fc_decode(fc);
530 		for (i = 0; i < pgcnt; i++)
531 			data->mfns[base + i] = MFN_INVALID;
532 	}
533 
534 out:
535 	mutex_exit(&data->lock);
536 	return (error);
537 }
538 
539 static struct seg_ops segmf_ops = {
540 	segmf_dup,
541 	segmf_unmap,
542 	segmf_free,
543 	segmf_fault,
544 	segmf_faulta,
545 	segmf_setprot,
546 	segmf_checkprot,
547 	(int (*)())segmf_kluster,
548 	(size_t (*)(struct seg *))NULL,	/* swapout */
549 	segmf_sync,
550 	segmf_incore,
551 	segmf_lockop,
552 	segmf_getprot,
553 	segmf_getoffset,
554 	segmf_gettype,
555 	segmf_getvp,
556 	segmf_advise,
557 	segmf_dump,
558 	segmf_pagelock,
559 	segmf_setpagesize,
560 	segmf_getmemid,
561 	segmf_getpolicy,
562 	segmf_capable
563 };
564