1843e1988Sjohnlev /*
2843e1988Sjohnlev * CDDL HEADER START
3843e1988Sjohnlev *
4843e1988Sjohnlev * The contents of this file are subject to the terms of the
5843e1988Sjohnlev * Common Development and Distribution License (the "License").
6843e1988Sjohnlev * You may not use this file except in compliance with the License.
7843e1988Sjohnlev *
8843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev * See the License for the specific language governing permissions
11843e1988Sjohnlev * and limitations under the License.
12843e1988Sjohnlev *
13843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev *
19843e1988Sjohnlev * CDDL HEADER END
20843e1988Sjohnlev */
21843e1988Sjohnlev
22843e1988Sjohnlev /*
237eea693dSMark Johnson * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24843e1988Sjohnlev * Use is subject to license terms.
25843e1988Sjohnlev */
26843e1988Sjohnlev
27843e1988Sjohnlev /*
28843e1988Sjohnlev * Machine frame segment driver. This segment driver allows dom0 processes to
29843e1988Sjohnlev * map pages of other domains or Xen (e.g. during save/restore). ioctl()s on
30843e1988Sjohnlev * the privcmd driver provide the MFN values backing each mapping, and we map
31843e1988Sjohnlev * them into the process's address space at this time. Demand-faulting is not
32843e1988Sjohnlev * supported by this driver due to the requirements upon some of the ioctl()s.
33843e1988Sjohnlev */
34843e1988Sjohnlev
35843e1988Sjohnlev
36843e1988Sjohnlev #include <sys/types.h>
37843e1988Sjohnlev #include <sys/systm.h>
38843e1988Sjohnlev #include <sys/vmsystm.h>
39843e1988Sjohnlev #include <sys/mman.h>
40843e1988Sjohnlev #include <sys/errno.h>
41843e1988Sjohnlev #include <sys/kmem.h>
42843e1988Sjohnlev #include <sys/cmn_err.h>
43843e1988Sjohnlev #include <sys/vnode.h>
44843e1988Sjohnlev #include <sys/conf.h>
45843e1988Sjohnlev #include <sys/debug.h>
46843e1988Sjohnlev #include <sys/lgrp.h>
47843e1988Sjohnlev #include <sys/hypervisor.h>
48843e1988Sjohnlev
49843e1988Sjohnlev #include <vm/page.h>
50843e1988Sjohnlev #include <vm/hat.h>
51843e1988Sjohnlev #include <vm/as.h>
52843e1988Sjohnlev #include <vm/seg.h>
53843e1988Sjohnlev
54843e1988Sjohnlev #include <vm/hat_pte.h>
557eea693dSMark Johnson #include <vm/hat_i86.h>
56843e1988Sjohnlev #include <vm/seg_mf.h>
57843e1988Sjohnlev
58843e1988Sjohnlev #include <sys/fs/snode.h>
59843e1988Sjohnlev
60843e1988Sjohnlev #define VTOCVP(vp) (VTOS(vp)->s_commonvp)
61843e1988Sjohnlev
627eea693dSMark Johnson typedef struct segmf_mfn_s {
637eea693dSMark Johnson mfn_t m_mfn;
647eea693dSMark Johnson } segmf_mfn_t;
657eea693dSMark Johnson
667eea693dSMark Johnson /* g_flags */
677eea693dSMark Johnson #define SEGMF_GFLAGS_WR 0x1
687eea693dSMark Johnson #define SEGMF_GFLAGS_MAPPED 0x2
697eea693dSMark Johnson typedef struct segmf_gref_s {
707eea693dSMark Johnson uint64_t g_ptep;
717eea693dSMark Johnson grant_ref_t g_gref;
727eea693dSMark Johnson uint32_t g_flags;
737eea693dSMark Johnson grant_handle_t g_handle;
747eea693dSMark Johnson } segmf_gref_t;
757eea693dSMark Johnson
767eea693dSMark Johnson typedef union segmf_mu_u {
777eea693dSMark Johnson segmf_mfn_t m;
787eea693dSMark Johnson segmf_gref_t g;
797eea693dSMark Johnson } segmf_mu_t;
807eea693dSMark Johnson
817eea693dSMark Johnson typedef enum {
827eea693dSMark Johnson SEGMF_MAP_EMPTY = 0,
837eea693dSMark Johnson SEGMF_MAP_MFN,
847eea693dSMark Johnson SEGMF_MAP_GREF
857eea693dSMark Johnson } segmf_map_type_t;
867eea693dSMark Johnson
877eea693dSMark Johnson typedef struct segmf_map_s {
887eea693dSMark Johnson segmf_map_type_t t_type;
897eea693dSMark Johnson segmf_mu_t u;
907eea693dSMark Johnson } segmf_map_t;
91843e1988Sjohnlev
92843e1988Sjohnlev struct segmf_data {
93843e1988Sjohnlev kmutex_t lock;
94843e1988Sjohnlev struct vnode *vp;
95843e1988Sjohnlev uchar_t prot;
96843e1988Sjohnlev uchar_t maxprot;
97843e1988Sjohnlev size_t softlockcnt;
98843e1988Sjohnlev domid_t domid;
997eea693dSMark Johnson segmf_map_t *map;
100843e1988Sjohnlev };
101843e1988Sjohnlev
102843e1988Sjohnlev static struct seg_ops segmf_ops;
103843e1988Sjohnlev
1047eea693dSMark Johnson static int segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t len);
1057eea693dSMark Johnson
106843e1988Sjohnlev static struct segmf_data *
segmf_data_zalloc(struct seg * seg)107843e1988Sjohnlev segmf_data_zalloc(struct seg *seg)
108843e1988Sjohnlev {
109843e1988Sjohnlev struct segmf_data *data = kmem_zalloc(sizeof (*data), KM_SLEEP);
110843e1988Sjohnlev
111843e1988Sjohnlev mutex_init(&data->lock, "segmf.lock", MUTEX_DEFAULT, NULL);
112843e1988Sjohnlev seg->s_ops = &segmf_ops;
113843e1988Sjohnlev seg->s_data = data;
114843e1988Sjohnlev return (data);
115843e1988Sjohnlev }
116843e1988Sjohnlev
117843e1988Sjohnlev int
segmf_create(struct seg * seg,void * args)118843e1988Sjohnlev segmf_create(struct seg *seg, void *args)
119843e1988Sjohnlev {
120843e1988Sjohnlev struct segmf_crargs *a = args;
121843e1988Sjohnlev struct segmf_data *data;
122843e1988Sjohnlev struct as *as = seg->s_as;
123843e1988Sjohnlev pgcnt_t i, npages = seg_pages(seg);
124843e1988Sjohnlev int error;
125843e1988Sjohnlev
126843e1988Sjohnlev hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
127843e1988Sjohnlev
128843e1988Sjohnlev data = segmf_data_zalloc(seg);
129843e1988Sjohnlev data->vp = specfind(a->dev, VCHR);
130843e1988Sjohnlev data->prot = a->prot;
131843e1988Sjohnlev data->maxprot = a->maxprot;
132843e1988Sjohnlev
1337eea693dSMark Johnson data->map = kmem_alloc(npages * sizeof (segmf_map_t), KM_SLEEP);
1347eea693dSMark Johnson for (i = 0; i < npages; i++) {
1357eea693dSMark Johnson data->map[i].t_type = SEGMF_MAP_EMPTY;
1367eea693dSMark Johnson }
137843e1988Sjohnlev
138843e1988Sjohnlev error = VOP_ADDMAP(VTOCVP(data->vp), 0, as, seg->s_base, seg->s_size,
139da6c28aaSamw data->prot, data->maxprot, MAP_SHARED, CRED(), NULL);
140843e1988Sjohnlev
141843e1988Sjohnlev if (error != 0)
142843e1988Sjohnlev hat_unload(as->a_hat,
143843e1988Sjohnlev seg->s_base, seg->s_size, HAT_UNLOAD_UNMAP);
144843e1988Sjohnlev return (error);
145843e1988Sjohnlev }
146843e1988Sjohnlev
147843e1988Sjohnlev /*
148843e1988Sjohnlev * Duplicate a seg and return new segment in newseg.
149843e1988Sjohnlev */
150843e1988Sjohnlev static int
segmf_dup(struct seg * seg,struct seg * newseg)151843e1988Sjohnlev segmf_dup(struct seg *seg, struct seg *newseg)
152843e1988Sjohnlev {
153843e1988Sjohnlev struct segmf_data *data = seg->s_data;
154843e1988Sjohnlev struct segmf_data *ndata;
155843e1988Sjohnlev pgcnt_t npages = seg_pages(newseg);
1567eea693dSMark Johnson size_t sz;
157843e1988Sjohnlev
158843e1988Sjohnlev ndata = segmf_data_zalloc(newseg);
159843e1988Sjohnlev
160843e1988Sjohnlev VN_HOLD(data->vp);
161843e1988Sjohnlev ndata->vp = data->vp;
162843e1988Sjohnlev ndata->prot = data->prot;
163843e1988Sjohnlev ndata->maxprot = data->maxprot;
164843e1988Sjohnlev ndata->domid = data->domid;
165843e1988Sjohnlev
1667eea693dSMark Johnson sz = npages * sizeof (segmf_map_t);
1677eea693dSMark Johnson ndata->map = kmem_alloc(sz, KM_SLEEP);
1687eea693dSMark Johnson bcopy(data->map, ndata->map, sz);
169843e1988Sjohnlev
170843e1988Sjohnlev return (VOP_ADDMAP(VTOCVP(ndata->vp), 0, newseg->s_as,
171843e1988Sjohnlev newseg->s_base, newseg->s_size, ndata->prot, ndata->maxprot,
172da6c28aaSamw MAP_SHARED, CRED(), NULL));
173843e1988Sjohnlev }
174843e1988Sjohnlev
175843e1988Sjohnlev /*
176843e1988Sjohnlev * We only support unmapping the whole segment, and we automatically unlock
177843e1988Sjohnlev * what we previously soft-locked.
178843e1988Sjohnlev */
179843e1988Sjohnlev static int
segmf_unmap(struct seg * seg,caddr_t addr,size_t len)180843e1988Sjohnlev segmf_unmap(struct seg *seg, caddr_t addr, size_t len)
181843e1988Sjohnlev {
182843e1988Sjohnlev struct segmf_data *data = seg->s_data;
183843e1988Sjohnlev offset_t off;
184843e1988Sjohnlev
185843e1988Sjohnlev if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
186843e1988Sjohnlev (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
187843e1988Sjohnlev panic("segmf_unmap");
188843e1988Sjohnlev
189843e1988Sjohnlev if (addr != seg->s_base || len != seg->s_size)
190843e1988Sjohnlev return (ENOTSUP);
191843e1988Sjohnlev
192843e1988Sjohnlev hat_unload(seg->s_as->a_hat, addr, len,
193843e1988Sjohnlev HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
194843e1988Sjohnlev
195843e1988Sjohnlev off = (offset_t)seg_page(seg, addr);
196843e1988Sjohnlev
197843e1988Sjohnlev ASSERT(data->vp != NULL);
198843e1988Sjohnlev
199843e1988Sjohnlev (void) VOP_DELMAP(VTOCVP(data->vp), off, seg->s_as, addr, len,
200da6c28aaSamw data->prot, data->maxprot, MAP_SHARED, CRED(), NULL);
201843e1988Sjohnlev
202843e1988Sjohnlev seg_free(seg);
203843e1988Sjohnlev return (0);
204843e1988Sjohnlev }
205843e1988Sjohnlev
206843e1988Sjohnlev static void
segmf_free(struct seg * seg)207843e1988Sjohnlev segmf_free(struct seg *seg)
208843e1988Sjohnlev {
209843e1988Sjohnlev struct segmf_data *data = seg->s_data;
210843e1988Sjohnlev pgcnt_t npages = seg_pages(seg);
211843e1988Sjohnlev
2127eea693dSMark Johnson kmem_free(data->map, npages * sizeof (segmf_map_t));
213843e1988Sjohnlev VN_RELE(data->vp);
214843e1988Sjohnlev mutex_destroy(&data->lock);
215843e1988Sjohnlev kmem_free(data, sizeof (*data));
216843e1988Sjohnlev }
217843e1988Sjohnlev
218843e1988Sjohnlev static int segmf_faultpage_debug = 0;
219843e1988Sjohnlev /*ARGSUSED*/
220843e1988Sjohnlev static int
segmf_faultpage(struct hat * hat,struct seg * seg,caddr_t addr,enum fault_type type,uint_t prot)221843e1988Sjohnlev segmf_faultpage(struct hat *hat, struct seg *seg, caddr_t addr,
222843e1988Sjohnlev enum fault_type type, uint_t prot)
223843e1988Sjohnlev {
224843e1988Sjohnlev struct segmf_data *data = seg->s_data;
225843e1988Sjohnlev uint_t hat_flags = HAT_LOAD_NOCONSIST;
226843e1988Sjohnlev mfn_t mfn;
227843e1988Sjohnlev x86pte_t pte;
2287eea693dSMark Johnson segmf_map_t *map;
2297eea693dSMark Johnson uint_t idx;
230843e1988Sjohnlev
231843e1988Sjohnlev
2327eea693dSMark Johnson idx = seg_page(seg, addr);
2337eea693dSMark Johnson map = &data->map[idx];
2347eea693dSMark Johnson ASSERT(map->t_type == SEGMF_MAP_MFN);
2357eea693dSMark Johnson
2367eea693dSMark Johnson mfn = map->u.m.m_mfn;
237843e1988Sjohnlev
238843e1988Sjohnlev if (type == F_SOFTLOCK) {
239843e1988Sjohnlev mutex_enter(&freemem_lock);
240843e1988Sjohnlev data->softlockcnt++;
241843e1988Sjohnlev mutex_exit(&freemem_lock);
242843e1988Sjohnlev hat_flags |= HAT_LOAD_LOCK;
243843e1988Sjohnlev } else
244843e1988Sjohnlev hat_flags |= HAT_LOAD;
245843e1988Sjohnlev
246843e1988Sjohnlev if (segmf_faultpage_debug > 0) {
247843e1988Sjohnlev uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n",
248843e1988Sjohnlev (void *)addr, data->domid, mfn, prot);
249843e1988Sjohnlev segmf_faultpage_debug--;
250843e1988Sjohnlev }
251843e1988Sjohnlev
252843e1988Sjohnlev /*
253843e1988Sjohnlev * Ask the HAT to load a throwaway mapping to page zero, then
254843e1988Sjohnlev * overwrite it with our foreign domain mapping. It gets removed
255843e1988Sjohnlev * later via hat_unload()
256843e1988Sjohnlev */
257843e1988Sjohnlev hat_devload(hat, addr, MMU_PAGESIZE, (pfn_t)0,
258843e1988Sjohnlev PROT_READ | HAT_UNORDERED_OK, hat_flags);
259843e1988Sjohnlev
260843e1988Sjohnlev pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER | PT_FOREIGN;
261843e1988Sjohnlev if (prot & PROT_WRITE)
262843e1988Sjohnlev pte |= PT_WRITABLE;
263843e1988Sjohnlev
264843e1988Sjohnlev if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr, pte,
265843e1988Sjohnlev UVMF_INVLPG | UVMF_ALL, data->domid) != 0) {
266843e1988Sjohnlev hat_flags = HAT_UNLOAD_UNMAP;
267843e1988Sjohnlev
268843e1988Sjohnlev if (type == F_SOFTLOCK) {
269843e1988Sjohnlev hat_flags |= HAT_UNLOAD_UNLOCK;
270843e1988Sjohnlev mutex_enter(&freemem_lock);
271843e1988Sjohnlev data->softlockcnt--;
272843e1988Sjohnlev mutex_exit(&freemem_lock);
273843e1988Sjohnlev }
274843e1988Sjohnlev
275843e1988Sjohnlev hat_unload(hat, addr, MMU_PAGESIZE, hat_flags);
276843e1988Sjohnlev return (FC_MAKE_ERR(EFAULT));
277843e1988Sjohnlev }
278843e1988Sjohnlev
279843e1988Sjohnlev return (0);
280843e1988Sjohnlev }
281843e1988Sjohnlev
282843e1988Sjohnlev static int
seg_rw_to_prot(enum seg_rw rw)283843e1988Sjohnlev seg_rw_to_prot(enum seg_rw rw)
284843e1988Sjohnlev {
285843e1988Sjohnlev switch (rw) {
286843e1988Sjohnlev case S_READ:
287843e1988Sjohnlev return (PROT_READ);
288843e1988Sjohnlev case S_WRITE:
289843e1988Sjohnlev return (PROT_WRITE);
290843e1988Sjohnlev case S_EXEC:
291843e1988Sjohnlev return (PROT_EXEC);
292843e1988Sjohnlev case S_OTHER:
293843e1988Sjohnlev default:
294843e1988Sjohnlev break;
295843e1988Sjohnlev }
296843e1988Sjohnlev return (PROT_READ | PROT_WRITE | PROT_EXEC);
297843e1988Sjohnlev }
298843e1988Sjohnlev
299843e1988Sjohnlev static void
segmf_softunlock(struct hat * hat,struct seg * seg,caddr_t addr,size_t len)300843e1988Sjohnlev segmf_softunlock(struct hat *hat, struct seg *seg, caddr_t addr, size_t len)
301843e1988Sjohnlev {
302843e1988Sjohnlev struct segmf_data *data = seg->s_data;
303843e1988Sjohnlev
304843e1988Sjohnlev hat_unlock(hat, addr, len);
305843e1988Sjohnlev
306843e1988Sjohnlev mutex_enter(&freemem_lock);
307843e1988Sjohnlev ASSERT(data->softlockcnt >= btopr(len));
308843e1988Sjohnlev data->softlockcnt -= btopr(len);
309843e1988Sjohnlev mutex_exit(&freemem_lock);
310843e1988Sjohnlev
311843e1988Sjohnlev if (data->softlockcnt == 0) {
312843e1988Sjohnlev struct as *as = seg->s_as;
313843e1988Sjohnlev
314843e1988Sjohnlev if (AS_ISUNMAPWAIT(as)) {
315843e1988Sjohnlev mutex_enter(&as->a_contents);
316843e1988Sjohnlev if (AS_ISUNMAPWAIT(as)) {
317843e1988Sjohnlev AS_CLRUNMAPWAIT(as);
318843e1988Sjohnlev cv_broadcast(&as->a_cv);
319843e1988Sjohnlev }
320843e1988Sjohnlev mutex_exit(&as->a_contents);
321843e1988Sjohnlev }
322843e1988Sjohnlev }
323843e1988Sjohnlev }
324843e1988Sjohnlev
325843e1988Sjohnlev static int
segmf_fault_range(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum fault_type type,enum seg_rw rw)326843e1988Sjohnlev segmf_fault_range(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
327843e1988Sjohnlev enum fault_type type, enum seg_rw rw)
328843e1988Sjohnlev {
329843e1988Sjohnlev struct segmf_data *data = seg->s_data;
330843e1988Sjohnlev int error = 0;
331843e1988Sjohnlev caddr_t a;
332843e1988Sjohnlev
333843e1988Sjohnlev if ((data->prot & seg_rw_to_prot(rw)) == 0)
334843e1988Sjohnlev return (FC_PROT);
335843e1988Sjohnlev
336843e1988Sjohnlev /* loop over the address range handling each fault */
337843e1988Sjohnlev
338843e1988Sjohnlev for (a = addr; a < addr + len; a += PAGESIZE) {
339843e1988Sjohnlev error = segmf_faultpage(hat, seg, a, type, data->prot);
340843e1988Sjohnlev if (error != 0)
341843e1988Sjohnlev break;
342843e1988Sjohnlev }
343843e1988Sjohnlev
344843e1988Sjohnlev if (error != 0 && type == F_SOFTLOCK) {
345843e1988Sjohnlev size_t done = (size_t)(a - addr);
346843e1988Sjohnlev
347843e1988Sjohnlev /*
348843e1988Sjohnlev * Undo what's been done so far.
349843e1988Sjohnlev */
350843e1988Sjohnlev if (done > 0)
351843e1988Sjohnlev segmf_softunlock(hat, seg, addr, done);
352843e1988Sjohnlev }
353843e1988Sjohnlev
354843e1988Sjohnlev return (error);
355843e1988Sjohnlev }
356843e1988Sjohnlev
357843e1988Sjohnlev /*
358843e1988Sjohnlev * We never demand-fault for seg_mf.
359843e1988Sjohnlev */
360843e1988Sjohnlev /*ARGSUSED*/
361843e1988Sjohnlev static int
segmf_fault(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum fault_type type,enum seg_rw rw)362843e1988Sjohnlev segmf_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
363843e1988Sjohnlev enum fault_type type, enum seg_rw rw)
364843e1988Sjohnlev {
365843e1988Sjohnlev return (FC_MAKE_ERR(EFAULT));
366843e1988Sjohnlev }
367843e1988Sjohnlev
368843e1988Sjohnlev /*ARGSUSED*/
369843e1988Sjohnlev static int
segmf_faulta(struct seg * seg,caddr_t addr)370843e1988Sjohnlev segmf_faulta(struct seg *seg, caddr_t addr)
371843e1988Sjohnlev {
372843e1988Sjohnlev return (0);
373843e1988Sjohnlev }
374843e1988Sjohnlev
375843e1988Sjohnlev /*ARGSUSED*/
376843e1988Sjohnlev static int
segmf_setprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)377843e1988Sjohnlev segmf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
378843e1988Sjohnlev {
379843e1988Sjohnlev return (EINVAL);
380843e1988Sjohnlev }
381843e1988Sjohnlev
382843e1988Sjohnlev /*ARGSUSED*/
383843e1988Sjohnlev static int
segmf_checkprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)384843e1988Sjohnlev segmf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
385843e1988Sjohnlev {
386843e1988Sjohnlev return (EINVAL);
387843e1988Sjohnlev }
388843e1988Sjohnlev
389843e1988Sjohnlev /*ARGSUSED*/
390843e1988Sjohnlev static int
segmf_kluster(struct seg * seg,caddr_t addr,ssize_t delta)391843e1988Sjohnlev segmf_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
392843e1988Sjohnlev {
393843e1988Sjohnlev return (-1);
394843e1988Sjohnlev }
395843e1988Sjohnlev
396843e1988Sjohnlev /*ARGSUSED*/
397843e1988Sjohnlev static int
segmf_sync(struct seg * seg,caddr_t addr,size_t len,int attr,uint_t flags)398843e1988Sjohnlev segmf_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
399843e1988Sjohnlev {
400843e1988Sjohnlev return (0);
401843e1988Sjohnlev }
402843e1988Sjohnlev
403843e1988Sjohnlev /*
404843e1988Sjohnlev * XXPV Hmm. Should we say that mf mapping are "in core?"
405843e1988Sjohnlev */
406843e1988Sjohnlev
407843e1988Sjohnlev /*ARGSUSED*/
408843e1988Sjohnlev static size_t
segmf_incore(struct seg * seg,caddr_t addr,size_t len,char * vec)409843e1988Sjohnlev segmf_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
410843e1988Sjohnlev {
411843e1988Sjohnlev size_t v;
412843e1988Sjohnlev
413843e1988Sjohnlev for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
414843e1988Sjohnlev len -= PAGESIZE, v += PAGESIZE)
415843e1988Sjohnlev *vec++ = 1;
416843e1988Sjohnlev return (v);
417843e1988Sjohnlev }
418843e1988Sjohnlev
419843e1988Sjohnlev /*ARGSUSED*/
420843e1988Sjohnlev static int
segmf_lockop(struct seg * seg,caddr_t addr,size_t len,int attr,int op,ulong_t * lockmap,size_t pos)421843e1988Sjohnlev segmf_lockop(struct seg *seg, caddr_t addr,
422843e1988Sjohnlev size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
423843e1988Sjohnlev {
424843e1988Sjohnlev return (0);
425843e1988Sjohnlev }
426843e1988Sjohnlev
427843e1988Sjohnlev static int
segmf_getprot(struct seg * seg,caddr_t addr,size_t len,uint_t * protv)428843e1988Sjohnlev segmf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
429843e1988Sjohnlev {
430843e1988Sjohnlev struct segmf_data *data = seg->s_data;
431843e1988Sjohnlev pgcnt_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
432843e1988Sjohnlev
433843e1988Sjohnlev if (pgno != 0) {
434843e1988Sjohnlev do
435843e1988Sjohnlev protv[--pgno] = data->prot;
436843e1988Sjohnlev while (pgno != 0)
437843e1988Sjohnlev ;
438843e1988Sjohnlev }
439843e1988Sjohnlev return (0);
440843e1988Sjohnlev }
441843e1988Sjohnlev
442843e1988Sjohnlev static u_offset_t
segmf_getoffset(struct seg * seg,caddr_t addr)443843e1988Sjohnlev segmf_getoffset(struct seg *seg, caddr_t addr)
444843e1988Sjohnlev {
445843e1988Sjohnlev return (addr - seg->s_base);
446843e1988Sjohnlev }
447843e1988Sjohnlev
448843e1988Sjohnlev /*ARGSUSED*/
449843e1988Sjohnlev static int
segmf_gettype(struct seg * seg,caddr_t addr)450843e1988Sjohnlev segmf_gettype(struct seg *seg, caddr_t addr)
451843e1988Sjohnlev {
452843e1988Sjohnlev return (MAP_SHARED);
453843e1988Sjohnlev }
454843e1988Sjohnlev
455843e1988Sjohnlev /*ARGSUSED1*/
456843e1988Sjohnlev static int
segmf_getvp(struct seg * seg,caddr_t addr,struct vnode ** vpp)457843e1988Sjohnlev segmf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
458843e1988Sjohnlev {
459843e1988Sjohnlev struct segmf_data *data = seg->s_data;
460843e1988Sjohnlev
461843e1988Sjohnlev *vpp = VTOCVP(data->vp);
462843e1988Sjohnlev return (0);
463843e1988Sjohnlev }
464843e1988Sjohnlev
465843e1988Sjohnlev /*ARGSUSED*/
466843e1988Sjohnlev static int
segmf_advise(struct seg * seg,caddr_t addr,size_t len,uint_t behav)467843e1988Sjohnlev segmf_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
468843e1988Sjohnlev {
469843e1988Sjohnlev return (0);
470843e1988Sjohnlev }
471843e1988Sjohnlev
472843e1988Sjohnlev /*ARGSUSED*/
473843e1988Sjohnlev static void
segmf_dump(struct seg * seg)474843e1988Sjohnlev segmf_dump(struct seg *seg)
475843e1988Sjohnlev {}
476843e1988Sjohnlev
477843e1988Sjohnlev /*ARGSUSED*/
478843e1988Sjohnlev static int
segmf_pagelock(struct seg * seg,caddr_t addr,size_t len,struct page *** ppp,enum lock_type type,enum seg_rw rw)479843e1988Sjohnlev segmf_pagelock(struct seg *seg, caddr_t addr, size_t len,
480843e1988Sjohnlev struct page ***ppp, enum lock_type type, enum seg_rw rw)
481843e1988Sjohnlev {
482843e1988Sjohnlev return (ENOTSUP);
483843e1988Sjohnlev }
484843e1988Sjohnlev
485843e1988Sjohnlev /*ARGSUSED*/
486843e1988Sjohnlev static int
segmf_setpagesize(struct seg * seg,caddr_t addr,size_t len,uint_t szc)487843e1988Sjohnlev segmf_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
488843e1988Sjohnlev {
489843e1988Sjohnlev return (ENOTSUP);
490843e1988Sjohnlev }
491843e1988Sjohnlev
492843e1988Sjohnlev static int
segmf_getmemid(struct seg * seg,caddr_t addr,memid_t * memid)493843e1988Sjohnlev segmf_getmemid(struct seg *seg, caddr_t addr, memid_t *memid)
494843e1988Sjohnlev {
495843e1988Sjohnlev struct segmf_data *data = seg->s_data;
496843e1988Sjohnlev
497843e1988Sjohnlev memid->val[0] = (uintptr_t)VTOCVP(data->vp);
498843e1988Sjohnlev memid->val[1] = (uintptr_t)seg_page(seg, addr);
499843e1988Sjohnlev return (0);
500843e1988Sjohnlev }
501843e1988Sjohnlev
502843e1988Sjohnlev /*ARGSUSED*/
503843e1988Sjohnlev static lgrp_mem_policy_info_t *
segmf_getpolicy(struct seg * seg,caddr_t addr)504843e1988Sjohnlev segmf_getpolicy(struct seg *seg, caddr_t addr)
505843e1988Sjohnlev {
506843e1988Sjohnlev return (NULL);
507843e1988Sjohnlev }
508843e1988Sjohnlev
509843e1988Sjohnlev /*ARGSUSED*/
510843e1988Sjohnlev static int
segmf_capable(struct seg * seg,segcapability_t capability)511843e1988Sjohnlev segmf_capable(struct seg *seg, segcapability_t capability)
512843e1988Sjohnlev {
513843e1988Sjohnlev return (0);
514843e1988Sjohnlev }
515843e1988Sjohnlev
516843e1988Sjohnlev /*
517843e1988Sjohnlev * Add a set of contiguous foreign MFNs to the segment. soft-locking them. The
518843e1988Sjohnlev * pre-faulting is necessary due to live migration; in particular we must
519843e1988Sjohnlev * return an error in response to IOCTL_PRIVCMD_MMAPBATCH rather than faulting
520843e1988Sjohnlev * later on a bad MFN. Whilst this isn't necessary for the other MMAP
521843e1988Sjohnlev * ioctl()s, we lock them too, as they should be transitory.
522843e1988Sjohnlev */
523843e1988Sjohnlev int
segmf_add_mfns(struct seg * seg,caddr_t addr,mfn_t mfn,pgcnt_t pgcnt,domid_t domid)524843e1988Sjohnlev segmf_add_mfns(struct seg *seg, caddr_t addr, mfn_t mfn,
525843e1988Sjohnlev pgcnt_t pgcnt, domid_t domid)
526843e1988Sjohnlev {
527843e1988Sjohnlev struct segmf_data *data = seg->s_data;
5287eea693dSMark Johnson pgcnt_t base;
529843e1988Sjohnlev faultcode_t fc;
530843e1988Sjohnlev pgcnt_t i;
531843e1988Sjohnlev int error = 0;
532843e1988Sjohnlev
533843e1988Sjohnlev if (seg->s_ops != &segmf_ops)
534843e1988Sjohnlev return (EINVAL);
535843e1988Sjohnlev
536843e1988Sjohnlev /*
537843e1988Sjohnlev * Don't mess with dom0.
538843e1988Sjohnlev *
539843e1988Sjohnlev * Only allow the domid to be set once for the segment.
540843e1988Sjohnlev * After that attempts to add mappings to this segment for
541843e1988Sjohnlev * other domains explicitly fails.
542843e1988Sjohnlev */
543843e1988Sjohnlev
544843e1988Sjohnlev if (domid == 0 || domid == DOMID_SELF)
545843e1988Sjohnlev return (EACCES);
546843e1988Sjohnlev
547843e1988Sjohnlev mutex_enter(&data->lock);
548843e1988Sjohnlev
549843e1988Sjohnlev if (data->domid == 0)
550843e1988Sjohnlev data->domid = domid;
551843e1988Sjohnlev
552843e1988Sjohnlev if (data->domid != domid) {
553843e1988Sjohnlev error = EINVAL;
554843e1988Sjohnlev goto out;
555843e1988Sjohnlev }
556843e1988Sjohnlev
557843e1988Sjohnlev base = seg_page(seg, addr);
558843e1988Sjohnlev
5597eea693dSMark Johnson for (i = 0; i < pgcnt; i++) {
5607eea693dSMark Johnson data->map[base + i].t_type = SEGMF_MAP_MFN;
5617eea693dSMark Johnson data->map[base + i].u.m.m_mfn = mfn++;
5627eea693dSMark Johnson }
563843e1988Sjohnlev
564843e1988Sjohnlev fc = segmf_fault_range(seg->s_as->a_hat, seg, addr,
565843e1988Sjohnlev pgcnt * MMU_PAGESIZE, F_SOFTLOCK, S_OTHER);
566843e1988Sjohnlev
567843e1988Sjohnlev if (fc != 0) {
568843e1988Sjohnlev error = fc_decode(fc);
5697eea693dSMark Johnson for (i = 0; i < pgcnt; i++) {
5707eea693dSMark Johnson data->map[base + i].t_type = SEGMF_MAP_EMPTY;
5717eea693dSMark Johnson }
572843e1988Sjohnlev }
573843e1988Sjohnlev
574843e1988Sjohnlev out:
575843e1988Sjohnlev mutex_exit(&data->lock);
576843e1988Sjohnlev return (error);
577843e1988Sjohnlev }
578843e1988Sjohnlev
5797eea693dSMark Johnson int
segmf_add_grefs(struct seg * seg,caddr_t addr,uint_t flags,grant_ref_t * grefs,uint_t cnt,domid_t domid)5807eea693dSMark Johnson segmf_add_grefs(struct seg *seg, caddr_t addr, uint_t flags,
5817eea693dSMark Johnson grant_ref_t *grefs, uint_t cnt, domid_t domid)
5827eea693dSMark Johnson {
5837eea693dSMark Johnson struct segmf_data *data;
5847eea693dSMark Johnson segmf_map_t *map;
5857eea693dSMark Johnson faultcode_t fc;
5867eea693dSMark Johnson uint_t idx;
5877eea693dSMark Johnson uint_t i;
5887eea693dSMark Johnson int e;
5897eea693dSMark Johnson
5907eea693dSMark Johnson if (seg->s_ops != &segmf_ops)
5917eea693dSMark Johnson return (EINVAL);
5927eea693dSMark Johnson
5937eea693dSMark Johnson /*
5947eea693dSMark Johnson * Don't mess with dom0.
5957eea693dSMark Johnson *
5967eea693dSMark Johnson * Only allow the domid to be set once for the segment.
5977eea693dSMark Johnson * After that attempts to add mappings to this segment for
5987eea693dSMark Johnson * other domains explicitly fails.
5997eea693dSMark Johnson */
6007eea693dSMark Johnson
6017eea693dSMark Johnson if (domid == 0 || domid == DOMID_SELF)
6027eea693dSMark Johnson return (EACCES);
6037eea693dSMark Johnson
6047eea693dSMark Johnson data = seg->s_data;
6057eea693dSMark Johnson idx = seg_page(seg, addr);
6067eea693dSMark Johnson map = &data->map[idx];
6077eea693dSMark Johnson e = 0;
6087eea693dSMark Johnson
6097eea693dSMark Johnson mutex_enter(&data->lock);
6107eea693dSMark Johnson
6117eea693dSMark Johnson if (data->domid == 0)
6127eea693dSMark Johnson data->domid = domid;
6137eea693dSMark Johnson
6147eea693dSMark Johnson if (data->domid != domid) {
6157eea693dSMark Johnson e = EINVAL;
6167eea693dSMark Johnson goto out;
6177eea693dSMark Johnson }
6187eea693dSMark Johnson
6197eea693dSMark Johnson /* store away the grefs passed in then fault in the pages */
6207eea693dSMark Johnson for (i = 0; i < cnt; i++) {
6217eea693dSMark Johnson map[i].t_type = SEGMF_MAP_GREF;
6227eea693dSMark Johnson map[i].u.g.g_gref = grefs[i];
6237eea693dSMark Johnson map[i].u.g.g_handle = 0;
6247eea693dSMark Johnson map[i].u.g.g_flags = 0;
6257eea693dSMark Johnson if (flags & SEGMF_GREF_WR) {
6267eea693dSMark Johnson map[i].u.g.g_flags |= SEGMF_GFLAGS_WR;
6277eea693dSMark Johnson }
6287eea693dSMark Johnson }
6297eea693dSMark Johnson fc = segmf_fault_gref_range(seg, addr, cnt);
6307eea693dSMark Johnson if (fc != 0) {
6317eea693dSMark Johnson e = fc_decode(fc);
6327eea693dSMark Johnson for (i = 0; i < cnt; i++) {
6337eea693dSMark Johnson data->map[i].t_type = SEGMF_MAP_EMPTY;
6347eea693dSMark Johnson }
6357eea693dSMark Johnson }
6367eea693dSMark Johnson
6377eea693dSMark Johnson out:
6387eea693dSMark Johnson mutex_exit(&data->lock);
6397eea693dSMark Johnson return (e);
6407eea693dSMark Johnson }
6417eea693dSMark Johnson
6427eea693dSMark Johnson int
segmf_release_grefs(struct seg * seg,caddr_t addr,uint_t cnt)6437eea693dSMark Johnson segmf_release_grefs(struct seg *seg, caddr_t addr, uint_t cnt)
6447eea693dSMark Johnson {
6457eea693dSMark Johnson gnttab_unmap_grant_ref_t mapop[SEGMF_MAX_GREFS];
6467eea693dSMark Johnson struct segmf_data *data;
6477eea693dSMark Johnson segmf_map_t *map;
6487eea693dSMark Johnson uint_t idx;
6497eea693dSMark Johnson long e;
6507eea693dSMark Johnson int i;
6517eea693dSMark Johnson int n;
6527eea693dSMark Johnson
6537eea693dSMark Johnson
6547eea693dSMark Johnson if (cnt > SEGMF_MAX_GREFS) {
6557eea693dSMark Johnson return (-1);
6567eea693dSMark Johnson }
6577eea693dSMark Johnson
6587eea693dSMark Johnson idx = seg_page(seg, addr);
6597eea693dSMark Johnson data = seg->s_data;
6607eea693dSMark Johnson map = &data->map[idx];
6617eea693dSMark Johnson
6627eea693dSMark Johnson bzero(mapop, sizeof (gnttab_unmap_grant_ref_t) * cnt);
6637eea693dSMark Johnson
6647eea693dSMark Johnson /*
6657eea693dSMark Johnson * for each entry which isn't empty and is currently mapped,
6667eea693dSMark Johnson * set it up for an unmap then mark them empty.
6677eea693dSMark Johnson */
6687eea693dSMark Johnson n = 0;
6697eea693dSMark Johnson for (i = 0; i < cnt; i++) {
6707eea693dSMark Johnson ASSERT(map[i].t_type != SEGMF_MAP_MFN);
6717eea693dSMark Johnson if ((map[i].t_type == SEGMF_MAP_GREF) &&
6727eea693dSMark Johnson (map[i].u.g.g_flags & SEGMF_GFLAGS_MAPPED)) {
6737eea693dSMark Johnson mapop[n].handle = map[i].u.g.g_handle;
6747eea693dSMark Johnson mapop[n].host_addr = map[i].u.g.g_ptep;
6757eea693dSMark Johnson mapop[n].dev_bus_addr = 0;
6767eea693dSMark Johnson n++;
6777eea693dSMark Johnson }
6787eea693dSMark Johnson map[i].t_type = SEGMF_MAP_EMPTY;
6797eea693dSMark Johnson }
6807eea693dSMark Johnson
6817eea693dSMark Johnson /* if there's nothing to unmap, just return */
6827eea693dSMark Johnson if (n == 0) {
6837eea693dSMark Johnson return (0);
6847eea693dSMark Johnson }
6857eea693dSMark Johnson
6867eea693dSMark Johnson e = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &mapop, n);
6877eea693dSMark Johnson if (e != 0) {
6887eea693dSMark Johnson return (-1);
6897eea693dSMark Johnson }
6907eea693dSMark Johnson
6917eea693dSMark Johnson return (0);
6927eea693dSMark Johnson }
6937eea693dSMark Johnson
6947eea693dSMark Johnson
6957eea693dSMark Johnson void
segmf_add_gref_pte(struct seg * seg,caddr_t addr,uint64_t pte_ma)6967eea693dSMark Johnson segmf_add_gref_pte(struct seg *seg, caddr_t addr, uint64_t pte_ma)
6977eea693dSMark Johnson {
6987eea693dSMark Johnson struct segmf_data *data;
6997eea693dSMark Johnson uint_t idx;
7007eea693dSMark Johnson
7017eea693dSMark Johnson idx = seg_page(seg, addr);
7027eea693dSMark Johnson data = seg->s_data;
7037eea693dSMark Johnson
7047eea693dSMark Johnson data->map[idx].u.g.g_ptep = pte_ma;
7057eea693dSMark Johnson }
7067eea693dSMark Johnson
7077eea693dSMark Johnson
7087eea693dSMark Johnson static int
segmf_fault_gref_range(struct seg * seg,caddr_t addr,size_t cnt)7097eea693dSMark Johnson segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t cnt)
7107eea693dSMark Johnson {
7117eea693dSMark Johnson gnttab_map_grant_ref_t mapop[SEGMF_MAX_GREFS];
7127eea693dSMark Johnson struct segmf_data *data;
7137eea693dSMark Johnson segmf_map_t *map;
7147eea693dSMark Johnson uint_t idx;
7157eea693dSMark Johnson int e;
7167eea693dSMark Johnson int i;
7177eea693dSMark Johnson
7187eea693dSMark Johnson
7197eea693dSMark Johnson if (cnt > SEGMF_MAX_GREFS) {
7207eea693dSMark Johnson return (-1);
7217eea693dSMark Johnson }
7227eea693dSMark Johnson
7237eea693dSMark Johnson data = seg->s_data;
7247eea693dSMark Johnson idx = seg_page(seg, addr);
7257eea693dSMark Johnson map = &data->map[idx];
7267eea693dSMark Johnson
7277eea693dSMark Johnson bzero(mapop, sizeof (gnttab_map_grant_ref_t) * cnt);
7287eea693dSMark Johnson
7297eea693dSMark Johnson ASSERT(map->t_type == SEGMF_MAP_GREF);
7307eea693dSMark Johnson
7317eea693dSMark Johnson /*
7327eea693dSMark Johnson * map in each page passed in into the user apps AS. We do this by
7337eea693dSMark Johnson * passing the MA of the actual pte of the mapping to the hypervisor.
7347eea693dSMark Johnson */
7357eea693dSMark Johnson for (i = 0; i < cnt; i++) {
7367eea693dSMark Johnson mapop[i].host_addr = map[i].u.g.g_ptep;
7377eea693dSMark Johnson mapop[i].dom = data->domid;
7387eea693dSMark Johnson mapop[i].ref = map[i].u.g.g_gref;
7397eea693dSMark Johnson mapop[i].flags = GNTMAP_host_map | GNTMAP_application_map |
7407eea693dSMark Johnson GNTMAP_contains_pte;
7417eea693dSMark Johnson if (!(map[i].u.g.g_flags & SEGMF_GFLAGS_WR)) {
7427eea693dSMark Johnson mapop[i].flags |= GNTMAP_readonly;
7437eea693dSMark Johnson }
7447eea693dSMark Johnson }
7457eea693dSMark Johnson e = xen_map_gref(GNTTABOP_map_grant_ref, mapop, cnt, B_TRUE);
7467eea693dSMark Johnson if ((e != 0) || (mapop[0].status != GNTST_okay)) {
7477eea693dSMark Johnson return (FC_MAKE_ERR(EFAULT));
7487eea693dSMark Johnson }
7497eea693dSMark Johnson
7507eea693dSMark Johnson /* save handle for segmf_release_grefs() and mark it as mapped */
7517eea693dSMark Johnson for (i = 0; i < cnt; i++) {
7527eea693dSMark Johnson ASSERT(mapop[i].status == GNTST_okay);
7537eea693dSMark Johnson map[i].u.g.g_handle = mapop[i].handle;
7547eea693dSMark Johnson map[i].u.g.g_flags |= SEGMF_GFLAGS_MAPPED;
7557eea693dSMark Johnson }
7567eea693dSMark Johnson
7577eea693dSMark Johnson return (0);
7587eea693dSMark Johnson }
7597eea693dSMark Johnson
760843e1988Sjohnlev static struct seg_ops segmf_ops = {
761843e1988Sjohnlev segmf_dup,
762843e1988Sjohnlev segmf_unmap,
763843e1988Sjohnlev segmf_free,
764843e1988Sjohnlev segmf_fault,
765843e1988Sjohnlev segmf_faulta,
766843e1988Sjohnlev segmf_setprot,
767843e1988Sjohnlev segmf_checkprot,
768843e1988Sjohnlev (int (*)())segmf_kluster,
769843e1988Sjohnlev (size_t (*)(struct seg *))NULL, /* swapout */
770843e1988Sjohnlev segmf_sync,
771843e1988Sjohnlev segmf_incore,
772843e1988Sjohnlev segmf_lockop,
773843e1988Sjohnlev segmf_getprot,
774843e1988Sjohnlev segmf_getoffset,
775843e1988Sjohnlev segmf_gettype,
776843e1988Sjohnlev segmf_getvp,
777843e1988Sjohnlev segmf_advise,
778843e1988Sjohnlev segmf_dump,
779843e1988Sjohnlev segmf_pagelock,
780843e1988Sjohnlev segmf_setpagesize,
781843e1988Sjohnlev segmf_getmemid,
782843e1988Sjohnlev segmf_getpolicy,
783*9d12795fSRobert Mustacchi segmf_capable,
784*9d12795fSRobert Mustacchi seg_inherit_notsup
785843e1988Sjohnlev };
786