1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 */
7
8 #include <sys/types.h>
9 #include <sys/lock.h>
10 #include <sys/malloc.h>
11 #include <sys/sx.h>
12 #include <sys/systm.h>
13
14 #include <machine/vmm.h>
15
16 #include <vm/vm.h>
17 #include <vm/vm_param.h>
18 #include <vm/vm_extern.h>
19 #include <vm/pmap.h>
20 #include <vm/vm_map.h>
21 #include <vm/vm_object.h>
22 #include <vm/vm_page.h>
23
24 #include <dev/vmm/vmm_dev.h>
25 #include <dev/vmm/vmm_mem.h>
26
27 static void vm_free_memmap(struct vm *vm, int ident);
28
29 int
vm_mem_init(struct vm_mem * mem,vm_offset_t lo,vm_offset_t hi)30 vm_mem_init(struct vm_mem *mem, vm_offset_t lo, vm_offset_t hi)
31 {
32 mem->mem_vmspace = vmmops_vmspace_alloc(lo, hi);
33 if (mem->mem_vmspace == NULL)
34 return (ENOMEM);
35 sx_init(&mem->mem_segs_lock, "vm_mem_segs");
36 return (0);
37 }
38
39 static bool
sysmem_mapping(struct vm_mem * mem,int idx)40 sysmem_mapping(struct vm_mem *mem, int idx)
41 {
42 if (mem->mem_maps[idx].len != 0 &&
43 mem->mem_segs[mem->mem_maps[idx].segid].sysmem)
44 return (true);
45 else
46 return (false);
47 }
48
49 bool
vm_memseg_sysmem(struct vm * vm,int ident)50 vm_memseg_sysmem(struct vm *vm, int ident)
51 {
52 struct vm_mem *mem;
53
54 mem = vm_mem(vm);
55 vm_assert_memseg_locked(vm);
56
57 if (ident < 0 || ident >= VM_MAX_MEMSEGS)
58 return (false);
59
60 return (mem->mem_segs[ident].sysmem);
61 }
62
63 void
vm_mem_cleanup(struct vm * vm)64 vm_mem_cleanup(struct vm *vm)
65 {
66 struct vm_mem *mem;
67
68 mem = vm_mem(vm);
69
70 /*
71 * System memory is removed from the guest address space only when
72 * the VM is destroyed. This is because the mapping remains the same
73 * across VM reset.
74 *
75 * Device memory can be relocated by the guest (e.g. using PCI BARs)
76 * so those mappings are removed on a VM reset.
77 */
78 for (int i = 0; i < VM_MAX_MEMMAPS; i++) {
79 if (!sysmem_mapping(mem, i))
80 vm_free_memmap(vm, i);
81 }
82 }
83
84 void
vm_mem_destroy(struct vm * vm)85 vm_mem_destroy(struct vm *vm)
86 {
87 struct vm_mem *mem;
88
89 mem = vm_mem(vm);
90 vm_assert_memseg_xlocked(vm);
91
92 for (int i = 0; i < VM_MAX_MEMMAPS; i++) {
93 if (sysmem_mapping(mem, i))
94 vm_free_memmap(vm, i);
95 }
96
97 for (int i = 0; i < VM_MAX_MEMSEGS; i++)
98 vm_free_memseg(vm, i);
99
100 vmmops_vmspace_free(mem->mem_vmspace);
101
102 sx_xunlock(&mem->mem_segs_lock);
103 sx_destroy(&mem->mem_segs_lock);
104 }
105
106 struct vmspace *
vm_vmspace(struct vm * vm)107 vm_vmspace(struct vm *vm)
108 {
109 struct vm_mem *mem;
110
111 mem = vm_mem(vm);
112 return (mem->mem_vmspace);
113 }
114
115 void
vm_slock_memsegs(struct vm * vm)116 vm_slock_memsegs(struct vm *vm)
117 {
118 sx_slock(&vm_mem(vm)->mem_segs_lock);
119 }
120
121 void
vm_xlock_memsegs(struct vm * vm)122 vm_xlock_memsegs(struct vm *vm)
123 {
124 sx_xlock(&vm_mem(vm)->mem_segs_lock);
125 }
126
127 void
vm_unlock_memsegs(struct vm * vm)128 vm_unlock_memsegs(struct vm *vm)
129 {
130 sx_unlock(&vm_mem(vm)->mem_segs_lock);
131 }
132
133 void
vm_assert_memseg_locked(struct vm * vm)134 vm_assert_memseg_locked(struct vm *vm)
135 {
136 sx_assert(&vm_mem(vm)->mem_segs_lock, SX_LOCKED);
137 }
138
139 void
vm_assert_memseg_xlocked(struct vm * vm)140 vm_assert_memseg_xlocked(struct vm *vm)
141 {
142 sx_assert(&vm_mem(vm)->mem_segs_lock, SX_XLOCKED);
143 }
144
145 /*
146 * Return 'true' if 'gpa' is allocated in the guest address space.
147 *
148 * This function is called in the context of a running vcpu which acts as
149 * an implicit lock on 'vm->mem_maps[]'.
150 */
151 bool
vm_mem_allocated(struct vcpu * vcpu,vm_paddr_t gpa)152 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
153 {
154 struct vm *vm = vcpu_vm(vcpu);
155 struct vm_mem_map *mm;
156 int i;
157
158 #ifdef INVARIANTS
159 int hostcpu, state;
160 state = vcpu_get_state(vcpu, &hostcpu);
161 KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
162 ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
163 #endif
164
165 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
166 mm = &vm_mem(vm)->mem_maps[i];
167 if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
168 return (true); /* 'gpa' is sysmem or devmem */
169 }
170
171 return (false);
172 }
173
174 int
vm_alloc_memseg(struct vm * vm,int ident,size_t len,bool sysmem,struct domainset * obj_domainset)175 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
176 struct domainset *obj_domainset)
177 {
178 struct vm_mem_seg *seg;
179 struct vm_mem *mem;
180 vm_object_t obj;
181
182 mem = vm_mem(vm);
183 vm_assert_memseg_xlocked(vm);
184
185 if (ident < 0 || ident >= VM_MAX_MEMSEGS)
186 return (EINVAL);
187
188 if (len == 0 || (len & PAGE_MASK))
189 return (EINVAL);
190
191 seg = &mem->mem_segs[ident];
192 if (seg->object != NULL) {
193 if (seg->len == len && seg->sysmem == sysmem)
194 return (EEXIST);
195 else
196 return (EINVAL);
197 }
198
199 /*
200 * When given an impossible policy, signal an
201 * error to the user.
202 */
203 if (obj_domainset != NULL && domainset_empty_vm(obj_domainset))
204 return (EINVAL);
205 obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT);
206 if (obj == NULL)
207 return (ENOMEM);
208
209 seg->len = len;
210 seg->object = obj;
211 if (obj_domainset != NULL)
212 seg->object->domain.dr_policy = obj_domainset;
213 seg->sysmem = sysmem;
214
215 return (0);
216 }
217
218 int
vm_get_memseg(struct vm * vm,int ident,size_t * len,bool * sysmem,vm_object_t * objptr)219 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
220 vm_object_t *objptr)
221 {
222 struct vm_mem *mem;
223 struct vm_mem_seg *seg;
224
225 mem = vm_mem(vm);
226
227 vm_assert_memseg_locked(vm);
228
229 if (ident < 0 || ident >= VM_MAX_MEMSEGS)
230 return (EINVAL);
231
232 seg = &mem->mem_segs[ident];
233 if (len)
234 *len = seg->len;
235 if (sysmem)
236 *sysmem = seg->sysmem;
237 if (objptr)
238 *objptr = seg->object;
239 return (0);
240 }
241
242 void
vm_free_memseg(struct vm * vm,int ident)243 vm_free_memseg(struct vm *vm, int ident)
244 {
245 struct vm_mem_seg *seg;
246
247 KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
248 ("%s: invalid memseg ident %d", __func__, ident));
249
250 seg = &vm_mem(vm)->mem_segs[ident];
251 if (seg->object != NULL) {
252 vm_object_deallocate(seg->object);
253 bzero(seg, sizeof(struct vm_mem_seg));
254 }
255 }
256
257 int
vm_mmap_memseg(struct vm * vm,vm_paddr_t gpa,int segid,vm_ooffset_t first,size_t len,int prot,int flags)258 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
259 size_t len, int prot, int flags)
260 {
261 struct vm_mem *mem;
262 struct vm_mem_seg *seg;
263 struct vm_mem_map *m, *map;
264 struct vm_map *vmmap;
265 vm_ooffset_t last;
266 int i, error;
267
268 if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
269 return (EINVAL);
270
271 if (flags & ~VM_MEMMAP_F_WIRED)
272 return (EINVAL);
273
274 if (segid < 0 || segid >= VM_MAX_MEMSEGS)
275 return (EINVAL);
276
277 mem = vm_mem(vm);
278 seg = &mem->mem_segs[segid];
279 if (seg->object == NULL)
280 return (EINVAL);
281
282 if (first + len < first || gpa + len < gpa)
283 return (EINVAL);
284 last = first + len;
285 if (first >= last || last > seg->len)
286 return (EINVAL);
287
288 if ((gpa | first | last) & PAGE_MASK)
289 return (EINVAL);
290
291 map = NULL;
292 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
293 m = &mem->mem_maps[i];
294 if (m->len == 0) {
295 map = m;
296 break;
297 }
298 }
299 if (map == NULL)
300 return (ENOSPC);
301
302 vmmap = &mem->mem_vmspace->vm_map;
303 vm_map_lock(vmmap);
304 error = vm_map_insert(vmmap, seg->object, first, gpa, gpa + len,
305 prot, prot, 0);
306 vm_map_unlock(vmmap);
307 if (error != KERN_SUCCESS)
308 return (vm_mmap_to_errno(error));
309 vm_object_reference(seg->object);
310
311 if (flags & VM_MEMMAP_F_WIRED) {
312 error = vm_map_wire(vmmap, gpa, gpa + len,
313 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
314 if (error != KERN_SUCCESS) {
315 vm_map_remove(vmmap, gpa, gpa + len);
316 return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
317 EFAULT);
318 }
319 }
320
321 map->gpa = gpa;
322 map->len = len;
323 map->segoff = first;
324 map->segid = segid;
325 map->prot = prot;
326 map->flags = flags;
327 return (0);
328 }
329
330 int
vm_munmap_memseg(struct vm * vm,vm_paddr_t gpa,size_t len)331 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
332 {
333 struct vm_mem *mem;
334 struct vm_mem_map *m;
335 int i;
336
337 mem = vm_mem(vm);
338 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
339 m = &mem->mem_maps[i];
340 #ifdef VM_MEMMAP_F_IOMMU
341 if ((m->flags & VM_MEMMAP_F_IOMMU) != 0)
342 continue;
343 #endif
344 if (m->gpa == gpa && m->len == len) {
345 vm_free_memmap(vm, i);
346 return (0);
347 }
348 }
349
350 return (EINVAL);
351 }
352
353 int
vm_mmap_getnext(struct vm * vm,vm_paddr_t * gpa,int * segid,vm_ooffset_t * segoff,size_t * len,int * prot,int * flags)354 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
355 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
356 {
357 struct vm_mem *mem;
358 struct vm_mem_map *mm, *mmnext;
359 int i;
360
361 mem = vm_mem(vm);
362
363 mmnext = NULL;
364 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
365 mm = &mem->mem_maps[i];
366 if (mm->len == 0 || mm->gpa < *gpa)
367 continue;
368 if (mmnext == NULL || mm->gpa < mmnext->gpa)
369 mmnext = mm;
370 }
371
372 if (mmnext != NULL) {
373 *gpa = mmnext->gpa;
374 if (segid)
375 *segid = mmnext->segid;
376 if (segoff)
377 *segoff = mmnext->segoff;
378 if (len)
379 *len = mmnext->len;
380 if (prot)
381 *prot = mmnext->prot;
382 if (flags)
383 *flags = mmnext->flags;
384 return (0);
385 } else {
386 return (ENOENT);
387 }
388 }
389
390 static void
vm_free_memmap(struct vm * vm,int ident)391 vm_free_memmap(struct vm *vm, int ident)
392 {
393 struct vm_mem_map *mm;
394 int error __diagused;
395
396 mm = &vm_mem(vm)->mem_maps[ident];
397 if (mm->len) {
398 error = vm_map_remove(&vm_vmspace(vm)->vm_map, mm->gpa,
399 mm->gpa + mm->len);
400 KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
401 __func__, error));
402 bzero(mm, sizeof(struct vm_mem_map));
403 }
404 }
405
406 vm_paddr_t
vmm_sysmem_maxaddr(struct vm * vm)407 vmm_sysmem_maxaddr(struct vm *vm)
408 {
409 struct vm_mem *mem;
410 struct vm_mem_map *mm;
411 vm_paddr_t maxaddr;
412 int i;
413
414 mem = vm_mem(vm);
415 maxaddr = 0;
416 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
417 mm = &mem->mem_maps[i];
418 if (sysmem_mapping(mem, i)) {
419 if (maxaddr < mm->gpa + mm->len)
420 maxaddr = mm->gpa + mm->len;
421 }
422 }
423 return (maxaddr);
424 }
425
426 static void *
_vm_gpa_hold(struct vm * vm,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)427 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
428 void **cookie)
429 {
430 struct vm_mem_map *mm;
431 vm_page_t m;
432 int i, count, pageoff;
433
434 pageoff = gpa & PAGE_MASK;
435 if (len > PAGE_SIZE - pageoff)
436 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
437
438 count = 0;
439 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
440 mm = &vm_mem(vm)->mem_maps[i];
441 if (gpa >= mm->gpa && gpa < mm->gpa + mm->len) {
442 count = vm_fault_quick_hold_pages(
443 &vm_vmspace(vm)->vm_map, trunc_page(gpa),
444 PAGE_SIZE, reqprot, &m, 1);
445 break;
446 }
447 }
448
449 if (count == 1) {
450 *cookie = m;
451 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
452 } else {
453 *cookie = NULL;
454 return (NULL);
455 }
456 }
457
458 void *
vm_gpa_hold(struct vcpu * vcpu,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)459 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
460 void **cookie)
461 {
462 #ifdef INVARIANTS
463 /*
464 * The current vcpu should be frozen to ensure 'vm_memmap[]'
465 * stability.
466 */
467 int state = vcpu_get_state(vcpu, NULL);
468 KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
469 __func__, state));
470 #endif
471 return (_vm_gpa_hold(vcpu_vm(vcpu), gpa, len, reqprot, cookie));
472 }
473
474 void *
vm_gpa_hold_global(struct vm * vm,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)475 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
476 void **cookie)
477 {
478 vm_assert_memseg_locked(vm);
479 return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
480 }
481
482 void
vm_gpa_release(void * cookie)483 vm_gpa_release(void *cookie)
484 {
485 vm_page_t m = cookie;
486
487 vm_page_unwire(m, PQ_ACTIVE);
488 }
489