1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 */
7
8 #include <sys/types.h>
9 #include <sys/lock.h>
10 #include <sys/malloc.h>
11 #include <sys/sx.h>
12 #include <sys/systm.h>
13
14 #include <machine/vmm.h>
15
16 #include <vm/vm.h>
17 #include <vm/vm_param.h>
18 #include <vm/vm_extern.h>
19 #include <vm/pmap.h>
20 #include <vm/vm_map.h>
21 #include <vm/vm_object.h>
22 #include <vm/vm_page.h>
23
24 #include <dev/vmm/vmm_dev.h>
25 #include <dev/vmm/vmm_mem.h>
26
27 static void vm_free_memmap(struct vm *vm, int ident);
28
29 int
vm_mem_init(struct vm_mem * mem,vm_offset_t lo,vm_offset_t hi)30 vm_mem_init(struct vm_mem *mem, vm_offset_t lo, vm_offset_t hi)
31 {
32 mem->mem_vmspace = vmmops_vmspace_alloc(lo, hi);
33 if (mem->mem_vmspace == NULL)
34 return (ENOMEM);
35 sx_init(&mem->mem_segs_lock, "vm_mem_segs");
36 return (0);
37 }
38
39 static bool
sysmem_mapping(struct vm_mem * mem,int idx)40 sysmem_mapping(struct vm_mem *mem, int idx)
41 {
42 if (mem->mem_maps[idx].len != 0 &&
43 mem->mem_segs[mem->mem_maps[idx].segid].sysmem)
44 return (true);
45 else
46 return (false);
47 }
48
49 bool
vm_memseg_sysmem(struct vm * vm,int ident)50 vm_memseg_sysmem(struct vm *vm, int ident)
51 {
52 struct vm_mem *mem;
53
54 mem = vm_mem(vm);
55 vm_assert_memseg_locked(vm);
56
57 if (ident < 0 || ident >= VM_MAX_MEMSEGS)
58 return (false);
59
60 return (mem->mem_segs[ident].sysmem);
61 }
62
63 void
vm_mem_cleanup(struct vm * vm)64 vm_mem_cleanup(struct vm *vm)
65 {
66 struct vm_mem *mem;
67
68 mem = vm_mem(vm);
69
70 /*
71 * System memory is removed from the guest address space only when
72 * the VM is destroyed. This is because the mapping remains the same
73 * across VM reset.
74 *
75 * Device memory can be relocated by the guest (e.g. using PCI BARs)
76 * so those mappings are removed on a VM reset.
77 */
78 for (int i = 0; i < VM_MAX_MEMMAPS; i++) {
79 if (!sysmem_mapping(mem, i))
80 vm_free_memmap(vm, i);
81 }
82 }
83
84 void
vm_mem_destroy(struct vm * vm)85 vm_mem_destroy(struct vm *vm)
86 {
87 struct vm_mem *mem;
88
89 mem = vm_mem(vm);
90 vm_assert_memseg_xlocked(vm);
91
92 for (int i = 0; i < VM_MAX_MEMMAPS; i++) {
93 if (sysmem_mapping(mem, i))
94 vm_free_memmap(vm, i);
95 }
96
97 for (int i = 0; i < VM_MAX_MEMSEGS; i++)
98 vm_free_memseg(vm, i);
99
100 vmmops_vmspace_free(mem->mem_vmspace);
101
102 sx_xunlock(&mem->mem_segs_lock);
103 sx_destroy(&mem->mem_segs_lock);
104 }
105
106 struct vmspace *
vm_vmspace(struct vm * vm)107 vm_vmspace(struct vm *vm)
108 {
109 struct vm_mem *mem;
110
111 mem = vm_mem(vm);
112 return (mem->mem_vmspace);
113 }
114
115 void
vm_slock_memsegs(struct vm * vm)116 vm_slock_memsegs(struct vm *vm)
117 {
118 sx_slock(&vm_mem(vm)->mem_segs_lock);
119 }
120
121 void
vm_xlock_memsegs(struct vm * vm)122 vm_xlock_memsegs(struct vm *vm)
123 {
124 sx_xlock(&vm_mem(vm)->mem_segs_lock);
125 }
126
127 void
vm_unlock_memsegs(struct vm * vm)128 vm_unlock_memsegs(struct vm *vm)
129 {
130 sx_unlock(&vm_mem(vm)->mem_segs_lock);
131 }
132
133 void
vm_assert_memseg_locked(struct vm * vm)134 vm_assert_memseg_locked(struct vm *vm)
135 {
136 sx_assert(&vm_mem(vm)->mem_segs_lock, SX_LOCKED);
137 }
138
139 void
vm_assert_memseg_xlocked(struct vm * vm)140 vm_assert_memseg_xlocked(struct vm *vm)
141 {
142 sx_assert(&vm_mem(vm)->mem_segs_lock, SX_XLOCKED);
143 }
144
145 /*
146 * Return 'true' if 'gpa' is allocated in the guest address space.
147 *
148 * This function is called in the context of a running vcpu which acts as
149 * an implicit lock on 'vm->mem_maps[]'.
150 */
151 bool
vm_mem_allocated(struct vcpu * vcpu,vm_paddr_t gpa)152 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
153 {
154 struct vm *vm = vcpu_vm(vcpu);
155 struct vm_mem_map *mm;
156 int i;
157
158 #ifdef INVARIANTS
159 int hostcpu, state;
160 state = vcpu_get_state(vcpu, &hostcpu);
161 KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
162 ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
163 #endif
164
165 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
166 mm = &vm_mem(vm)->mem_maps[i];
167 if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
168 return (true); /* 'gpa' is sysmem or devmem */
169 }
170
171 return (false);
172 }
173
174 int
vm_alloc_memseg(struct vm * vm,int ident,size_t len,bool sysmem,struct domainset * obj_domainset)175 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
176 struct domainset *obj_domainset)
177 {
178 struct vm_mem_seg *seg;
179 struct vm_mem *mem;
180 vm_object_t obj;
181
182 mem = vm_mem(vm);
183 vm_assert_memseg_xlocked(vm);
184
185 if (ident < 0 || ident >= VM_MAX_MEMSEGS)
186 return (EINVAL);
187
188 if (len == 0 || (len & PAGE_MASK))
189 return (EINVAL);
190
191 seg = &mem->mem_segs[ident];
192 if (seg->object != NULL) {
193 if (seg->len == len && seg->sysmem == sysmem)
194 return (EEXIST);
195 else
196 return (EINVAL);
197 }
198
199 /*
200 * When given an impossible policy, signal an
201 * error to the user.
202 */
203 if (obj_domainset != NULL && domainset_empty_vm(obj_domainset))
204 return (EINVAL);
205 obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT);
206 if (obj == NULL)
207 return (ENOMEM);
208
209 seg->len = len;
210 seg->object = obj;
211 if (obj_domainset != NULL)
212 seg->object->domain.dr_policy = obj_domainset;
213 seg->sysmem = sysmem;
214
215 return (0);
216 }
217
218 int
vm_get_memseg(struct vm * vm,int ident,size_t * len,bool * sysmem,vm_object_t * objptr)219 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
220 vm_object_t *objptr)
221 {
222 struct vm_mem *mem;
223 struct vm_mem_seg *seg;
224
225 mem = vm_mem(vm);
226
227 vm_assert_memseg_locked(vm);
228
229 if (ident < 0 || ident >= VM_MAX_MEMSEGS)
230 return (EINVAL);
231
232 seg = &mem->mem_segs[ident];
233 if (len)
234 *len = seg->len;
235 if (sysmem)
236 *sysmem = seg->sysmem;
237 if (objptr)
238 *objptr = seg->object;
239 return (0);
240 }
241
242 void
vm_free_memseg(struct vm * vm,int ident)243 vm_free_memseg(struct vm *vm, int ident)
244 {
245 struct vm_mem_seg *seg;
246
247 KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
248 ("%s: invalid memseg ident %d", __func__, ident));
249
250 seg = &vm_mem(vm)->mem_segs[ident];
251 if (seg->object != NULL) {
252 vm_object_deallocate(seg->object);
253 bzero(seg, sizeof(struct vm_mem_seg));
254 }
255 }
256
257 int
vm_mmap_memseg(struct vm * vm,vm_paddr_t gpa,int segid,vm_ooffset_t first,size_t len,int prot,int flags)258 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
259 size_t len, int prot, int flags)
260 {
261 struct vm_mem *mem;
262 struct vm_mem_seg *seg;
263 struct vm_mem_map *m, *map;
264 struct vm_map *vmmap;
265 vm_ooffset_t last;
266 int i, error;
267
268 if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
269 return (EINVAL);
270
271 if (flags & ~VM_MEMMAP_F_WIRED)
272 return (EINVAL);
273
274 if (segid < 0 || segid >= VM_MAX_MEMSEGS)
275 return (EINVAL);
276
277 mem = vm_mem(vm);
278 seg = &mem->mem_segs[segid];
279 if (seg->object == NULL)
280 return (EINVAL);
281
282 last = first + len;
283 if (first < 0 || first >= last || last > seg->len)
284 return (EINVAL);
285
286 if ((gpa | first | last) & PAGE_MASK)
287 return (EINVAL);
288
289 map = NULL;
290 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
291 m = &mem->mem_maps[i];
292 if (m->len == 0) {
293 map = m;
294 break;
295 }
296 }
297 if (map == NULL)
298 return (ENOSPC);
299
300 vmmap = &mem->mem_vmspace->vm_map;
301 error = vm_map_find(vmmap, seg->object, first, &gpa, len, 0,
302 VMFS_NO_SPACE, prot, prot, 0);
303 if (error != KERN_SUCCESS)
304 return (EFAULT);
305
306 vm_object_reference(seg->object);
307
308 if (flags & VM_MEMMAP_F_WIRED) {
309 error = vm_map_wire(vmmap, gpa, gpa + len,
310 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
311 if (error != KERN_SUCCESS) {
312 vm_map_remove(vmmap, gpa, gpa + len);
313 return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
314 EFAULT);
315 }
316 }
317
318 map->gpa = gpa;
319 map->len = len;
320 map->segoff = first;
321 map->segid = segid;
322 map->prot = prot;
323 map->flags = flags;
324 return (0);
325 }
326
327 int
vm_munmap_memseg(struct vm * vm,vm_paddr_t gpa,size_t len)328 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
329 {
330 struct vm_mem *mem;
331 struct vm_mem_map *m;
332 int i;
333
334 mem = vm_mem(vm);
335 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
336 m = &mem->mem_maps[i];
337 #ifdef VM_MEMMAP_F_IOMMU
338 if ((m->flags & VM_MEMMAP_F_IOMMU) != 0)
339 continue;
340 #endif
341 if (m->gpa == gpa && m->len == len) {
342 vm_free_memmap(vm, i);
343 return (0);
344 }
345 }
346
347 return (EINVAL);
348 }
349
350 int
vm_mmap_getnext(struct vm * vm,vm_paddr_t * gpa,int * segid,vm_ooffset_t * segoff,size_t * len,int * prot,int * flags)351 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
352 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
353 {
354 struct vm_mem *mem;
355 struct vm_mem_map *mm, *mmnext;
356 int i;
357
358 mem = vm_mem(vm);
359
360 mmnext = NULL;
361 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
362 mm = &mem->mem_maps[i];
363 if (mm->len == 0 || mm->gpa < *gpa)
364 continue;
365 if (mmnext == NULL || mm->gpa < mmnext->gpa)
366 mmnext = mm;
367 }
368
369 if (mmnext != NULL) {
370 *gpa = mmnext->gpa;
371 if (segid)
372 *segid = mmnext->segid;
373 if (segoff)
374 *segoff = mmnext->segoff;
375 if (len)
376 *len = mmnext->len;
377 if (prot)
378 *prot = mmnext->prot;
379 if (flags)
380 *flags = mmnext->flags;
381 return (0);
382 } else {
383 return (ENOENT);
384 }
385 }
386
387 static void
vm_free_memmap(struct vm * vm,int ident)388 vm_free_memmap(struct vm *vm, int ident)
389 {
390 struct vm_mem_map *mm;
391 int error __diagused;
392
393 mm = &vm_mem(vm)->mem_maps[ident];
394 if (mm->len) {
395 error = vm_map_remove(&vm_vmspace(vm)->vm_map, mm->gpa,
396 mm->gpa + mm->len);
397 KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
398 __func__, error));
399 bzero(mm, sizeof(struct vm_mem_map));
400 }
401 }
402
403 vm_paddr_t
vmm_sysmem_maxaddr(struct vm * vm)404 vmm_sysmem_maxaddr(struct vm *vm)
405 {
406 struct vm_mem *mem;
407 struct vm_mem_map *mm;
408 vm_paddr_t maxaddr;
409 int i;
410
411 mem = vm_mem(vm);
412 maxaddr = 0;
413 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
414 mm = &mem->mem_maps[i];
415 if (sysmem_mapping(mem, i)) {
416 if (maxaddr < mm->gpa + mm->len)
417 maxaddr = mm->gpa + mm->len;
418 }
419 }
420 return (maxaddr);
421 }
422
423 static void *
_vm_gpa_hold(struct vm * vm,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)424 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
425 void **cookie)
426 {
427 struct vm_mem_map *mm;
428 vm_page_t m;
429 int i, count, pageoff;
430
431 pageoff = gpa & PAGE_MASK;
432 if (len > PAGE_SIZE - pageoff)
433 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
434
435 count = 0;
436 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
437 mm = &vm_mem(vm)->mem_maps[i];
438 if (gpa >= mm->gpa && gpa < mm->gpa + mm->len) {
439 count = vm_fault_quick_hold_pages(
440 &vm_vmspace(vm)->vm_map, trunc_page(gpa),
441 PAGE_SIZE, reqprot, &m, 1);
442 break;
443 }
444 }
445
446 if (count == 1) {
447 *cookie = m;
448 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
449 } else {
450 *cookie = NULL;
451 return (NULL);
452 }
453 }
454
455 void *
vm_gpa_hold(struct vcpu * vcpu,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)456 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
457 void **cookie)
458 {
459 #ifdef INVARIANTS
460 /*
461 * The current vcpu should be frozen to ensure 'vm_memmap[]'
462 * stability.
463 */
464 int state = vcpu_get_state(vcpu, NULL);
465 KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
466 __func__, state));
467 #endif
468 return (_vm_gpa_hold(vcpu_vm(vcpu), gpa, len, reqprot, cookie));
469 }
470
471 void *
vm_gpa_hold_global(struct vm * vm,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)472 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
473 void **cookie)
474 {
475 vm_assert_memseg_locked(vm);
476 return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
477 }
478
479 void
vm_gpa_release(void * cookie)480 vm_gpa_release(void *cookie)
481 {
482 vm_page_t m = cookie;
483
484 vm_page_unwire(m, PQ_ACTIVE);
485 }
486