xref: /freebsd/sys/dev/vmm/vmm_mem.c (revision 20a38e847251076b12c173d7aa0b37eef261fd32)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  */
7 
8 #include <sys/types.h>
9 #include <sys/lock.h>
10 #include <sys/malloc.h>
11 #include <sys/sx.h>
12 #include <sys/systm.h>
13 
14 #include <machine/vmm.h>
15 
16 #include <vm/vm.h>
17 #include <vm/vm_param.h>
18 #include <vm/vm_extern.h>
19 #include <vm/pmap.h>
20 #include <vm/vm_map.h>
21 #include <vm/vm_object.h>
22 #include <vm/vm_page.h>
23 
24 #include <dev/vmm/vmm_dev.h>
25 #include <dev/vmm/vmm_mem.h>
26 
27 static void vm_free_memmap(struct vm *vm, int ident);
28 
29 int
vm_mem_init(struct vm_mem * mem,vm_offset_t lo,vm_offset_t hi)30 vm_mem_init(struct vm_mem *mem, vm_offset_t lo, vm_offset_t hi)
31 {
32 	mem->mem_vmspace = vmmops_vmspace_alloc(lo, hi);
33 	if (mem->mem_vmspace == NULL)
34 		return (ENOMEM);
35 	sx_init(&mem->mem_segs_lock, "vm_mem_segs");
36 	return (0);
37 }
38 
39 static bool
sysmem_mapping(struct vm_mem * mem,int idx)40 sysmem_mapping(struct vm_mem *mem, int idx)
41 {
42 	if (mem->mem_maps[idx].len != 0 &&
43 	    mem->mem_segs[mem->mem_maps[idx].segid].sysmem)
44 		return (true);
45 	else
46 		return (false);
47 }
48 
49 bool
vm_memseg_sysmem(struct vm * vm,int ident)50 vm_memseg_sysmem(struct vm *vm, int ident)
51 {
52 	struct vm_mem *mem;
53 
54 	mem = vm_mem(vm);
55 	vm_assert_memseg_locked(vm);
56 
57 	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
58 		return (false);
59 
60 	return (mem->mem_segs[ident].sysmem);
61 }
62 
63 void
vm_mem_cleanup(struct vm * vm)64 vm_mem_cleanup(struct vm *vm)
65 {
66 	struct vm_mem *mem;
67 
68 	mem = vm_mem(vm);
69 
70 	/*
71 	 * System memory is removed from the guest address space only when
72 	 * the VM is destroyed. This is because the mapping remains the same
73 	 * across VM reset.
74 	 *
75 	 * Device memory can be relocated by the guest (e.g. using PCI BARs)
76 	 * so those mappings are removed on a VM reset.
77 	 */
78 	for (int i = 0; i < VM_MAX_MEMMAPS; i++) {
79 		if (!sysmem_mapping(mem, i))
80 			vm_free_memmap(vm, i);
81 	}
82 }
83 
84 void
vm_mem_destroy(struct vm * vm)85 vm_mem_destroy(struct vm *vm)
86 {
87 	struct vm_mem *mem;
88 
89 	mem = vm_mem(vm);
90 	vm_assert_memseg_xlocked(vm);
91 
92 	for (int i = 0; i < VM_MAX_MEMMAPS; i++) {
93 		if (sysmem_mapping(mem, i))
94 			vm_free_memmap(vm, i);
95 	}
96 
97 	for (int i = 0; i < VM_MAX_MEMSEGS; i++)
98 		vm_free_memseg(vm, i);
99 
100 	vmmops_vmspace_free(mem->mem_vmspace);
101 
102 	sx_xunlock(&mem->mem_segs_lock);
103 	sx_destroy(&mem->mem_segs_lock);
104 }
105 
106 struct vmspace *
vm_vmspace(struct vm * vm)107 vm_vmspace(struct vm *vm)
108 {
109 	struct vm_mem *mem;
110 
111 	mem = vm_mem(vm);
112 	return (mem->mem_vmspace);
113 }
114 
115 void
vm_slock_memsegs(struct vm * vm)116 vm_slock_memsegs(struct vm *vm)
117 {
118 	sx_slock(&vm_mem(vm)->mem_segs_lock);
119 }
120 
121 void
vm_xlock_memsegs(struct vm * vm)122 vm_xlock_memsegs(struct vm *vm)
123 {
124 	sx_xlock(&vm_mem(vm)->mem_segs_lock);
125 }
126 
127 void
vm_unlock_memsegs(struct vm * vm)128 vm_unlock_memsegs(struct vm *vm)
129 {
130 	sx_unlock(&vm_mem(vm)->mem_segs_lock);
131 }
132 
133 void
vm_assert_memseg_locked(struct vm * vm)134 vm_assert_memseg_locked(struct vm *vm)
135 {
136 	sx_assert(&vm_mem(vm)->mem_segs_lock, SX_LOCKED);
137 }
138 
139 void
vm_assert_memseg_xlocked(struct vm * vm)140 vm_assert_memseg_xlocked(struct vm *vm)
141 {
142 	sx_assert(&vm_mem(vm)->mem_segs_lock, SX_XLOCKED);
143 }
144 
145 /*
146  * Return 'true' if 'gpa' is allocated in the guest address space.
147  *
148  * This function is called in the context of a running vcpu which acts as
149  * an implicit lock on 'vm->mem_maps[]'.
150  */
151 bool
vm_mem_allocated(struct vcpu * vcpu,vm_paddr_t gpa)152 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
153 {
154 	struct vm *vm = vcpu_vm(vcpu);
155 	struct vm_mem_map *mm;
156 	int i;
157 
158 #ifdef INVARIANTS
159 	int hostcpu, state;
160 	state = vcpu_get_state(vcpu, &hostcpu);
161 	KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
162 	    ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
163 #endif
164 
165 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
166 		mm = &vm_mem(vm)->mem_maps[i];
167 		if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
168 			return (true);		/* 'gpa' is sysmem or devmem */
169 	}
170 
171 	return (false);
172 }
173 
174 int
vm_alloc_memseg(struct vm * vm,int ident,size_t len,bool sysmem,struct domainset * obj_domainset)175 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
176     struct domainset *obj_domainset)
177 {
178 	struct vm_mem_seg *seg;
179 	struct vm_mem *mem;
180 	vm_object_t obj;
181 
182 	mem = vm_mem(vm);
183 	vm_assert_memseg_xlocked(vm);
184 
185 	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
186 		return (EINVAL);
187 
188 	if (len == 0 || (len & PAGE_MASK))
189 		return (EINVAL);
190 
191 	seg = &mem->mem_segs[ident];
192 	if (seg->object != NULL) {
193 		if (seg->len == len && seg->sysmem == sysmem)
194 			return (EEXIST);
195 		else
196 			return (EINVAL);
197 	}
198 
199 	/*
200 	 * When given an impossible policy, signal an
201 	 * error to the user.
202 	 */
203 	if (obj_domainset != NULL && domainset_empty_vm(obj_domainset))
204 		return (EINVAL);
205 	obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT);
206 	if (obj == NULL)
207 		return (ENOMEM);
208 
209 	seg->len = len;
210 	seg->object = obj;
211 	if (obj_domainset != NULL)
212 		seg->object->domain.dr_policy = obj_domainset;
213 	seg->sysmem = sysmem;
214 
215 	return (0);
216 }
217 
218 int
vm_get_memseg(struct vm * vm,int ident,size_t * len,bool * sysmem,vm_object_t * objptr)219 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
220     vm_object_t *objptr)
221 {
222 	struct vm_mem *mem;
223 	struct vm_mem_seg *seg;
224 
225 	mem = vm_mem(vm);
226 
227 	vm_assert_memseg_locked(vm);
228 
229 	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
230 		return (EINVAL);
231 
232 	seg = &mem->mem_segs[ident];
233 	if (len)
234 		*len = seg->len;
235 	if (sysmem)
236 		*sysmem = seg->sysmem;
237 	if (objptr)
238 		*objptr = seg->object;
239 	return (0);
240 }
241 
242 void
vm_free_memseg(struct vm * vm,int ident)243 vm_free_memseg(struct vm *vm, int ident)
244 {
245 	struct vm_mem_seg *seg;
246 
247 	KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
248 	    ("%s: invalid memseg ident %d", __func__, ident));
249 
250 	seg = &vm_mem(vm)->mem_segs[ident];
251 	if (seg->object != NULL) {
252 		vm_object_deallocate(seg->object);
253 		bzero(seg, sizeof(struct vm_mem_seg));
254 	}
255 }
256 
257 int
vm_mmap_memseg(struct vm * vm,vm_paddr_t gpa,int segid,vm_ooffset_t first,size_t len,int prot,int flags)258 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
259     size_t len, int prot, int flags)
260 {
261 	struct vm_mem *mem;
262 	struct vm_mem_seg *seg;
263 	struct vm_mem_map *m, *map;
264 	struct vm_map *vmmap;
265 	vm_ooffset_t last;
266 	int i, error;
267 
268 	if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
269 		return (EINVAL);
270 
271 	if (flags & ~VM_MEMMAP_F_WIRED)
272 		return (EINVAL);
273 
274 	if (segid < 0 || segid >= VM_MAX_MEMSEGS)
275 		return (EINVAL);
276 
277 	mem = vm_mem(vm);
278 	seg = &mem->mem_segs[segid];
279 	if (seg->object == NULL)
280 		return (EINVAL);
281 
282 	if (first + len < first || gpa + len < gpa)
283 		return (EINVAL);
284 	last = first + len;
285 	if (first >= last || last > seg->len)
286 		return (EINVAL);
287 
288 	if ((gpa | first | last) & PAGE_MASK)
289 		return (EINVAL);
290 
291 	map = NULL;
292 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
293 		m = &mem->mem_maps[i];
294 		if (m->len == 0) {
295 			map = m;
296 			break;
297 		}
298 	}
299 	if (map == NULL)
300 		return (ENOSPC);
301 
302 	vmmap = &mem->mem_vmspace->vm_map;
303 	vm_map_lock(vmmap);
304 	error = vm_map_insert(vmmap, seg->object, first, gpa, gpa + len,
305 	    prot, prot, 0);
306 	vm_map_unlock(vmmap);
307 	if (error != KERN_SUCCESS)
308 		return (vm_mmap_to_errno(error));
309 	vm_object_reference(seg->object);
310 
311 	if (flags & VM_MEMMAP_F_WIRED) {
312 		error = vm_map_wire(vmmap, gpa, gpa + len,
313 		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
314 		if (error != KERN_SUCCESS) {
315 			vm_map_remove(vmmap, gpa, gpa + len);
316 			return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
317 			    EFAULT);
318 		}
319 	}
320 
321 	map->gpa = gpa;
322 	map->len = len;
323 	map->segoff = first;
324 	map->segid = segid;
325 	map->prot = prot;
326 	map->flags = flags;
327 	return (0);
328 }
329 
330 int
vm_munmap_memseg(struct vm * vm,vm_paddr_t gpa,size_t len)331 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
332 {
333 	struct vm_mem *mem;
334 	struct vm_mem_map *m;
335 	int i;
336 
337 	mem = vm_mem(vm);
338 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
339 		m = &mem->mem_maps[i];
340 #ifdef VM_MEMMAP_F_IOMMU
341 		if ((m->flags & VM_MEMMAP_F_IOMMU) != 0)
342 			continue;
343 #endif
344 		if (m->gpa == gpa && m->len == len) {
345 			vm_free_memmap(vm, i);
346 			return (0);
347 		}
348 	}
349 
350 	return (EINVAL);
351 }
352 
353 int
vm_mmap_getnext(struct vm * vm,vm_paddr_t * gpa,int * segid,vm_ooffset_t * segoff,size_t * len,int * prot,int * flags)354 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
355     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
356 {
357 	struct vm_mem *mem;
358 	struct vm_mem_map *mm, *mmnext;
359 	int i;
360 
361 	mem = vm_mem(vm);
362 
363 	mmnext = NULL;
364 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
365 		mm = &mem->mem_maps[i];
366 		if (mm->len == 0 || mm->gpa < *gpa)
367 			continue;
368 		if (mmnext == NULL || mm->gpa < mmnext->gpa)
369 			mmnext = mm;
370 	}
371 
372 	if (mmnext != NULL) {
373 		*gpa = mmnext->gpa;
374 		if (segid)
375 			*segid = mmnext->segid;
376 		if (segoff)
377 			*segoff = mmnext->segoff;
378 		if (len)
379 			*len = mmnext->len;
380 		if (prot)
381 			*prot = mmnext->prot;
382 		if (flags)
383 			*flags = mmnext->flags;
384 		return (0);
385 	} else {
386 		return (ENOENT);
387 	}
388 }
389 
390 static void
vm_free_memmap(struct vm * vm,int ident)391 vm_free_memmap(struct vm *vm, int ident)
392 {
393 	struct vm_mem_map *mm;
394 	int error __diagused;
395 
396 	mm = &vm_mem(vm)->mem_maps[ident];
397 	if (mm->len) {
398 		error = vm_map_remove(&vm_vmspace(vm)->vm_map, mm->gpa,
399 		    mm->gpa + mm->len);
400 		KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
401 		    __func__, error));
402 		bzero(mm, sizeof(struct vm_mem_map));
403 	}
404 }
405 
406 vm_paddr_t
vmm_sysmem_maxaddr(struct vm * vm)407 vmm_sysmem_maxaddr(struct vm *vm)
408 {
409 	struct vm_mem *mem;
410 	struct vm_mem_map *mm;
411 	vm_paddr_t maxaddr;
412 	int i;
413 
414 	mem = vm_mem(vm);
415 	maxaddr = 0;
416 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
417 		mm = &mem->mem_maps[i];
418 		if (sysmem_mapping(mem, i)) {
419 			if (maxaddr < mm->gpa + mm->len)
420 				maxaddr = mm->gpa + mm->len;
421 		}
422 	}
423 	return (maxaddr);
424 }
425 
426 static void *
_vm_gpa_hold(struct vm * vm,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)427 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
428     void **cookie)
429 {
430 	struct vm_mem_map *mm;
431 	vm_page_t m;
432 	int i, count, pageoff;
433 
434 	pageoff = gpa & PAGE_MASK;
435 	if (len > PAGE_SIZE - pageoff)
436 		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
437 
438 	count = 0;
439 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
440 		mm = &vm_mem(vm)->mem_maps[i];
441 		if (gpa >= mm->gpa && gpa < mm->gpa + mm->len) {
442 			count = vm_fault_quick_hold_pages(
443 			    &vm_vmspace(vm)->vm_map, trunc_page(gpa),
444 			    PAGE_SIZE, reqprot, &m, 1);
445 			break;
446 		}
447 	}
448 
449 	if (count == 1) {
450 		*cookie = m;
451 		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
452 	} else {
453 		*cookie = NULL;
454 		return (NULL);
455 	}
456 }
457 
458 void *
vm_gpa_hold(struct vcpu * vcpu,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)459 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
460     void **cookie)
461 {
462 #ifdef INVARIANTS
463 	/*
464 	 * The current vcpu should be frozen to ensure 'vm_memmap[]'
465 	 * stability.
466 	 */
467 	int state = vcpu_get_state(vcpu, NULL);
468 	KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
469 	    __func__, state));
470 #endif
471 	return (_vm_gpa_hold(vcpu_vm(vcpu), gpa, len, reqprot, cookie));
472 }
473 
474 void *
vm_gpa_hold_global(struct vm * vm,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)475 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
476     void **cookie)
477 {
478 	vm_assert_memseg_locked(vm);
479 	return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
480 }
481 
482 void
vm_gpa_release(void * cookie)483 vm_gpa_release(void *cookie)
484 {
485 	vm_page_t m = cookie;
486 
487 	vm_page_unwire(m, PQ_ACTIVE);
488 }
489