xref: /freebsd/sys/dev/vmm/vmm_mem.c (revision 059b0b7046639121f3dca48f5de051e019f9d57c)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  */
7 
8 #include <sys/types.h>
9 #include <sys/lock.h>
10 #include <sys/malloc.h>
11 #include <sys/sx.h>
12 #include <sys/systm.h>
13 
14 #include <machine/vmm.h>
15 
16 #include <vm/vm.h>
17 #include <vm/vm_param.h>
18 #include <vm/vm_extern.h>
19 #include <vm/pmap.h>
20 #include <vm/vm_map.h>
21 #include <vm/vm_object.h>
22 #include <vm/vm_page.h>
23 
24 #include <dev/vmm/vmm_dev.h>
25 #include <dev/vmm/vmm_mem.h>
26 
27 static void vm_free_memmap(struct vm *vm, int ident);
28 
29 int
vm_mem_init(struct vm_mem * mem,vm_offset_t lo,vm_offset_t hi)30 vm_mem_init(struct vm_mem *mem, vm_offset_t lo, vm_offset_t hi)
31 {
32 	mem->mem_vmspace = vmmops_vmspace_alloc(lo, hi);
33 	if (mem->mem_vmspace == NULL)
34 		return (ENOMEM);
35 	sx_init(&mem->mem_segs_lock, "vm_mem_segs");
36 	return (0);
37 }
38 
39 static bool
sysmem_mapping(struct vm_mem * mem,int idx)40 sysmem_mapping(struct vm_mem *mem, int idx)
41 {
42 	if (mem->mem_maps[idx].len != 0 &&
43 	    mem->mem_segs[mem->mem_maps[idx].segid].sysmem)
44 		return (true);
45 	else
46 		return (false);
47 }
48 
49 bool
vm_memseg_sysmem(struct vm * vm,int ident)50 vm_memseg_sysmem(struct vm *vm, int ident)
51 {
52 	struct vm_mem *mem;
53 
54 	mem = vm_mem(vm);
55 	vm_assert_memseg_locked(vm);
56 
57 	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
58 		return (false);
59 
60 	return (mem->mem_segs[ident].sysmem);
61 }
62 
63 void
vm_mem_cleanup(struct vm * vm)64 vm_mem_cleanup(struct vm *vm)
65 {
66 	struct vm_mem *mem;
67 
68 	mem = vm_mem(vm);
69 
70 	/*
71 	 * System memory is removed from the guest address space only when
72 	 * the VM is destroyed. This is because the mapping remains the same
73 	 * across VM reset.
74 	 *
75 	 * Device memory can be relocated by the guest (e.g. using PCI BARs)
76 	 * so those mappings are removed on a VM reset.
77 	 */
78 	for (int i = 0; i < VM_MAX_MEMMAPS; i++) {
79 		if (!sysmem_mapping(mem, i))
80 			vm_free_memmap(vm, i);
81 	}
82 }
83 
84 void
vm_mem_destroy(struct vm * vm)85 vm_mem_destroy(struct vm *vm)
86 {
87 	struct vm_mem *mem;
88 
89 	mem = vm_mem(vm);
90 	vm_assert_memseg_xlocked(vm);
91 
92 	for (int i = 0; i < VM_MAX_MEMMAPS; i++) {
93 		if (sysmem_mapping(mem, i))
94 			vm_free_memmap(vm, i);
95 	}
96 
97 	for (int i = 0; i < VM_MAX_MEMSEGS; i++)
98 		vm_free_memseg(vm, i);
99 
100 	vmmops_vmspace_free(mem->mem_vmspace);
101 
102 	sx_xunlock(&mem->mem_segs_lock);
103 	sx_destroy(&mem->mem_segs_lock);
104 }
105 
106 struct vmspace *
vm_vmspace(struct vm * vm)107 vm_vmspace(struct vm *vm)
108 {
109 	struct vm_mem *mem;
110 
111 	mem = vm_mem(vm);
112 	return (mem->mem_vmspace);
113 }
114 
115 void
vm_slock_memsegs(struct vm * vm)116 vm_slock_memsegs(struct vm *vm)
117 {
118 	sx_slock(&vm_mem(vm)->mem_segs_lock);
119 }
120 
121 void
vm_xlock_memsegs(struct vm * vm)122 vm_xlock_memsegs(struct vm *vm)
123 {
124 	sx_xlock(&vm_mem(vm)->mem_segs_lock);
125 }
126 
127 void
vm_unlock_memsegs(struct vm * vm)128 vm_unlock_memsegs(struct vm *vm)
129 {
130 	sx_unlock(&vm_mem(vm)->mem_segs_lock);
131 }
132 
133 void
vm_assert_memseg_locked(struct vm * vm)134 vm_assert_memseg_locked(struct vm *vm)
135 {
136 	sx_assert(&vm_mem(vm)->mem_segs_lock, SX_LOCKED);
137 }
138 
139 void
vm_assert_memseg_xlocked(struct vm * vm)140 vm_assert_memseg_xlocked(struct vm *vm)
141 {
142 	sx_assert(&vm_mem(vm)->mem_segs_lock, SX_XLOCKED);
143 }
144 
145 /*
146  * Return 'true' if 'gpa' is allocated in the guest address space.
147  *
148  * This function is called in the context of a running vcpu which acts as
149  * an implicit lock on 'vm->mem_maps[]'.
150  */
151 bool
vm_mem_allocated(struct vcpu * vcpu,vm_paddr_t gpa)152 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
153 {
154 	struct vm *vm = vcpu_vm(vcpu);
155 	struct vm_mem_map *mm;
156 	int i;
157 
158 #ifdef INVARIANTS
159 	int hostcpu, state;
160 	state = vcpu_get_state(vcpu, &hostcpu);
161 	KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
162 	    ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
163 #endif
164 
165 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
166 		mm = &vm_mem(vm)->mem_maps[i];
167 		if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
168 			return (true);		/* 'gpa' is sysmem or devmem */
169 	}
170 
171 	return (false);
172 }
173 
174 int
vm_alloc_memseg(struct vm * vm,int ident,size_t len,bool sysmem,struct domainset * obj_domainset)175 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
176     struct domainset *obj_domainset)
177 {
178 	struct vm_mem_seg *seg;
179 	struct vm_mem *mem;
180 	vm_object_t obj;
181 
182 	mem = vm_mem(vm);
183 	vm_assert_memseg_xlocked(vm);
184 
185 	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
186 		return (EINVAL);
187 
188 	if (len == 0 || (len & PAGE_MASK))
189 		return (EINVAL);
190 
191 	seg = &mem->mem_segs[ident];
192 	if (seg->object != NULL) {
193 		if (seg->len == len && seg->sysmem == sysmem)
194 			return (EEXIST);
195 		else
196 			return (EINVAL);
197 	}
198 
199 	/*
200 	 * When given an impossible policy, signal an
201 	 * error to the user.
202 	 */
203 	if (obj_domainset != NULL && domainset_empty_vm(obj_domainset))
204 		return (EINVAL);
205 	obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT);
206 	if (obj == NULL)
207 		return (ENOMEM);
208 
209 	seg->len = len;
210 	seg->object = obj;
211 	if (obj_domainset != NULL)
212 		seg->object->domain.dr_policy = obj_domainset;
213 	seg->sysmem = sysmem;
214 
215 	return (0);
216 }
217 
218 int
vm_get_memseg(struct vm * vm,int ident,size_t * len,bool * sysmem,vm_object_t * objptr)219 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
220     vm_object_t *objptr)
221 {
222 	struct vm_mem *mem;
223 	struct vm_mem_seg *seg;
224 
225 	mem = vm_mem(vm);
226 
227 	vm_assert_memseg_locked(vm);
228 
229 	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
230 		return (EINVAL);
231 
232 	seg = &mem->mem_segs[ident];
233 	if (len)
234 		*len = seg->len;
235 	if (sysmem)
236 		*sysmem = seg->sysmem;
237 	if (objptr)
238 		*objptr = seg->object;
239 	return (0);
240 }
241 
242 void
vm_free_memseg(struct vm * vm,int ident)243 vm_free_memseg(struct vm *vm, int ident)
244 {
245 	struct vm_mem_seg *seg;
246 
247 	KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
248 	    ("%s: invalid memseg ident %d", __func__, ident));
249 
250 	seg = &vm_mem(vm)->mem_segs[ident];
251 	if (seg->object != NULL) {
252 		vm_object_deallocate(seg->object);
253 		bzero(seg, sizeof(struct vm_mem_seg));
254 	}
255 }
256 
257 int
vm_mmap_memseg(struct vm * vm,vm_paddr_t gpa,int segid,vm_ooffset_t first,size_t len,int prot,int flags)258 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
259     size_t len, int prot, int flags)
260 {
261 	struct vm_mem *mem;
262 	struct vm_mem_seg *seg;
263 	struct vm_mem_map *m, *map;
264 	struct vm_map *vmmap;
265 	vm_ooffset_t last;
266 	int i, error;
267 
268 	if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
269 		return (EINVAL);
270 
271 	if (flags & ~VM_MEMMAP_F_WIRED)
272 		return (EINVAL);
273 
274 	if (segid < 0 || segid >= VM_MAX_MEMSEGS)
275 		return (EINVAL);
276 
277 	mem = vm_mem(vm);
278 	seg = &mem->mem_segs[segid];
279 	if (seg->object == NULL)
280 		return (EINVAL);
281 
282 	last = first + len;
283 	if (first < 0 || first >= last || last > seg->len)
284 		return (EINVAL);
285 
286 	if ((gpa | first | last) & PAGE_MASK)
287 		return (EINVAL);
288 
289 	map = NULL;
290 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
291 		m = &mem->mem_maps[i];
292 		if (m->len == 0) {
293 			map = m;
294 			break;
295 		}
296 	}
297 	if (map == NULL)
298 		return (ENOSPC);
299 
300 	vmmap = &mem->mem_vmspace->vm_map;
301 	error = vm_map_find(vmmap, seg->object, first, &gpa, len, 0,
302 	    VMFS_NO_SPACE, prot, prot, 0);
303 	if (error != KERN_SUCCESS)
304 		return (EFAULT);
305 
306 	vm_object_reference(seg->object);
307 
308 	if (flags & VM_MEMMAP_F_WIRED) {
309 		error = vm_map_wire(vmmap, gpa, gpa + len,
310 		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
311 		if (error != KERN_SUCCESS) {
312 			vm_map_remove(vmmap, gpa, gpa + len);
313 			return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
314 			    EFAULT);
315 		}
316 	}
317 
318 	map->gpa = gpa;
319 	map->len = len;
320 	map->segoff = first;
321 	map->segid = segid;
322 	map->prot = prot;
323 	map->flags = flags;
324 	return (0);
325 }
326 
327 int
vm_munmap_memseg(struct vm * vm,vm_paddr_t gpa,size_t len)328 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
329 {
330 	struct vm_mem *mem;
331 	struct vm_mem_map *m;
332 	int i;
333 
334 	mem = vm_mem(vm);
335 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
336 		m = &mem->mem_maps[i];
337 #ifdef VM_MEMMAP_F_IOMMU
338 		if ((m->flags & VM_MEMMAP_F_IOMMU) != 0)
339 			continue;
340 #endif
341 		if (m->gpa == gpa && m->len == len) {
342 			vm_free_memmap(vm, i);
343 			return (0);
344 		}
345 	}
346 
347 	return (EINVAL);
348 }
349 
350 int
vm_mmap_getnext(struct vm * vm,vm_paddr_t * gpa,int * segid,vm_ooffset_t * segoff,size_t * len,int * prot,int * flags)351 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
352     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
353 {
354 	struct vm_mem *mem;
355 	struct vm_mem_map *mm, *mmnext;
356 	int i;
357 
358 	mem = vm_mem(vm);
359 
360 	mmnext = NULL;
361 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
362 		mm = &mem->mem_maps[i];
363 		if (mm->len == 0 || mm->gpa < *gpa)
364 			continue;
365 		if (mmnext == NULL || mm->gpa < mmnext->gpa)
366 			mmnext = mm;
367 	}
368 
369 	if (mmnext != NULL) {
370 		*gpa = mmnext->gpa;
371 		if (segid)
372 			*segid = mmnext->segid;
373 		if (segoff)
374 			*segoff = mmnext->segoff;
375 		if (len)
376 			*len = mmnext->len;
377 		if (prot)
378 			*prot = mmnext->prot;
379 		if (flags)
380 			*flags = mmnext->flags;
381 		return (0);
382 	} else {
383 		return (ENOENT);
384 	}
385 }
386 
387 static void
vm_free_memmap(struct vm * vm,int ident)388 vm_free_memmap(struct vm *vm, int ident)
389 {
390 	struct vm_mem_map *mm;
391 	int error __diagused;
392 
393 	mm = &vm_mem(vm)->mem_maps[ident];
394 	if (mm->len) {
395 		error = vm_map_remove(&vm_vmspace(vm)->vm_map, mm->gpa,
396 		    mm->gpa + mm->len);
397 		KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
398 		    __func__, error));
399 		bzero(mm, sizeof(struct vm_mem_map));
400 	}
401 }
402 
403 vm_paddr_t
vmm_sysmem_maxaddr(struct vm * vm)404 vmm_sysmem_maxaddr(struct vm *vm)
405 {
406 	struct vm_mem *mem;
407 	struct vm_mem_map *mm;
408 	vm_paddr_t maxaddr;
409 	int i;
410 
411 	mem = vm_mem(vm);
412 	maxaddr = 0;
413 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
414 		mm = &mem->mem_maps[i];
415 		if (sysmem_mapping(mem, i)) {
416 			if (maxaddr < mm->gpa + mm->len)
417 				maxaddr = mm->gpa + mm->len;
418 		}
419 	}
420 	return (maxaddr);
421 }
422 
423 static void *
_vm_gpa_hold(struct vm * vm,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)424 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
425     void **cookie)
426 {
427 	struct vm_mem_map *mm;
428 	vm_page_t m;
429 	int i, count, pageoff;
430 
431 	pageoff = gpa & PAGE_MASK;
432 	if (len > PAGE_SIZE - pageoff)
433 		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
434 
435 	count = 0;
436 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
437 		mm = &vm_mem(vm)->mem_maps[i];
438 		if (gpa >= mm->gpa && gpa < mm->gpa + mm->len) {
439 			count = vm_fault_quick_hold_pages(
440 			    &vm_vmspace(vm)->vm_map, trunc_page(gpa),
441 			    PAGE_SIZE, reqprot, &m, 1);
442 			break;
443 		}
444 	}
445 
446 	if (count == 1) {
447 		*cookie = m;
448 		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
449 	} else {
450 		*cookie = NULL;
451 		return (NULL);
452 	}
453 }
454 
455 void *
vm_gpa_hold(struct vcpu * vcpu,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)456 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
457     void **cookie)
458 {
459 #ifdef INVARIANTS
460 	/*
461 	 * The current vcpu should be frozen to ensure 'vm_memmap[]'
462 	 * stability.
463 	 */
464 	int state = vcpu_get_state(vcpu, NULL);
465 	KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
466 	    __func__, state));
467 #endif
468 	return (_vm_gpa_hold(vcpu_vm(vcpu), gpa, len, reqprot, cookie));
469 }
470 
471 void *
vm_gpa_hold_global(struct vm * vm,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)472 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
473     void **cookie)
474 {
475 	vm_assert_memseg_locked(vm);
476 	return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
477 }
478 
479 void
vm_gpa_release(void * cookie)480 vm_gpa_release(void *cookie)
481 {
482 	vm_page_t m = cookie;
483 
484 	vm_page_unwire(m, PQ_ACTIVE);
485 }
486