1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 /* 31 * This file and its contents are supplied under the terms of the 32 * Common Development and Distribution License ("CDDL"), version 1.0. 33 * You may only use this file in accordance with the terms of version 34 * 1.0 of the CDDL. 35 * 36 * A full copy of the text of the CDDL should have accompanied this 37 * source. A copy of the CDDL is also available via the Internet at 38 * http://www.illumos.org/license/CDDL. 39 * 40 * Copyright 2020 Oxide Computer Company 41 */ 42 43 /* 44 * Memory ranges are represented with an RB tree. On insertion, the range 45 * is checked for overlaps. On lookup, the key has the same base and limit 46 * so it can be searched within the range. 47 */ 48 49 #include <sys/cdefs.h> 50 __FBSDID("$FreeBSD$"); 51 52 #include <sys/types.h> 53 #include <sys/errno.h> 54 #include <sys/tree.h> 55 #include <machine/vmm.h> 56 57 #include <assert.h> 58 #include <err.h> 59 #include <pthread.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 63 #include "mem.h" 64 65 struct mmio_rb_range { 66 RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */ 67 struct mem_range mr_param; 68 uint64_t mr_base; 69 uint64_t mr_end; 70 }; 71 72 struct mmio_rb_tree; 73 RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); 74 75 static RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback; 76 77 /* 78 * Per-vCPU cache. Since most accesses from a vCPU will be to 79 * consecutive addresses in a range, it makes sense to cache the 80 * result of a lookup. 81 */ 82 static struct mmio_rb_range **mmio_hint; 83 static int mmio_ncpu; 84 85 static pthread_rwlock_t mmio_rwlock; 86 87 static int 88 mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b) 89 { 90 if (a->mr_end < b->mr_base) 91 return (-1); 92 else if (a->mr_base > b->mr_end) 93 return (1); 94 return (0); 95 } 96 97 static int 98 mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr, 99 struct mmio_rb_range **entry) 100 { 101 struct mmio_rb_range find, *res; 102 103 find.mr_base = find.mr_end = addr; 104 105 res = RB_FIND(mmio_rb_tree, rbt, &find); 106 107 if (res != NULL) { 108 *entry = res; 109 return (0); 110 } 111 112 return (ENOENT); 113 } 114 115 static int 116 mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new) 117 { 118 struct mmio_rb_range *overlap; 119 120 overlap = RB_INSERT(mmio_rb_tree, rbt, new); 121 122 if (overlap != NULL) { 123 #ifdef RB_DEBUG 124 printf("overlap detected: new %lx:%lx, tree %lx:%lx, '%s' " 125 "claims region already claimed for '%s'\n", 126 new->mr_base, new->mr_end, 127 overlap->mr_base, overlap->mr_end, 128 new->mr_param.name, overlap->mr_param.name); 129 #endif 130 131 return (EEXIST); 132 } 133 134 return (0); 135 } 136 137 #if 0 138 static void 139 mmio_rb_dump(struct mmio_rb_tree *rbt) 140 { 141 int perror; 142 struct mmio_rb_range *np; 143 144 pthread_rwlock_rdlock(&mmio_rwlock); 145 RB_FOREACH(np, mmio_rb_tree, rbt) { 146 printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end, 147 np->mr_param.name); 148 } 149 perror = pthread_rwlock_unlock(&mmio_rwlock); 150 assert(perror == 0); 151 } 152 #endif 153 154 RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); 155 156 typedef int (mem_cb_t)(struct vmctx *ctx, int vcpu, uint64_t gpa, 157 struct mem_range *mr, void *arg); 158 159 static int 160 mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg) 161 { 162 int error; 163 struct mem_range *mr = arg; 164 165 error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size, 166 rval, mr->arg1, mr->arg2); 167 return (error); 168 } 169 170 static int 171 mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg) 172 { 173 int error; 174 struct mem_range *mr = arg; 175 176 error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size, 177 &wval, mr->arg1, mr->arg2); 178 return (error); 179 } 180 181 static int 182 access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb, 183 void *arg) 184 { 185 struct mmio_rb_range *entry; 186 int err, perror, immutable; 187 188 pthread_rwlock_rdlock(&mmio_rwlock); 189 /* 190 * First check the per-vCPU cache 191 */ 192 if (mmio_hint[vcpu] && 193 paddr >= mmio_hint[vcpu]->mr_base && 194 paddr <= mmio_hint[vcpu]->mr_end) { 195 entry = mmio_hint[vcpu]; 196 } else 197 entry = NULL; 198 199 if (entry == NULL) { 200 if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) { 201 /* Update the per-vCPU cache */ 202 mmio_hint[vcpu] = entry; 203 } else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) { 204 perror = pthread_rwlock_unlock(&mmio_rwlock); 205 assert(perror == 0); 206 return (ESRCH); 207 } 208 } 209 210 assert(entry != NULL); 211 212 /* 213 * An 'immutable' memory range is guaranteed to be never removed 214 * so there is no need to hold 'mmio_rwlock' while calling the 215 * handler. 216 * 217 * XXX writes to the PCIR_COMMAND register can cause register_mem() 218 * to be called. If the guest is using PCI extended config space 219 * to modify the PCIR_COMMAND register then register_mem() can 220 * deadlock on 'mmio_rwlock'. However by registering the extended 221 * config space window as 'immutable' the deadlock can be avoided. 222 */ 223 immutable = (entry->mr_param.flags & MEM_F_IMMUTABLE); 224 if (immutable) { 225 perror = pthread_rwlock_unlock(&mmio_rwlock); 226 assert(perror == 0); 227 } 228 229 err = cb(ctx, vcpu, paddr, &entry->mr_param, arg); 230 231 if (!immutable) { 232 perror = pthread_rwlock_unlock(&mmio_rwlock); 233 assert(perror == 0); 234 } 235 236 237 return (err); 238 } 239 240 static int 241 emulate_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr, 242 void *arg) 243 { 244 struct vm_mmio *mmio; 245 int err = 0; 246 247 mmio = arg; 248 249 if (mmio->read != 0) { 250 err = mem_read(ctx, vcpu, paddr, &mmio->data, mmio->bytes, mr); 251 } else { 252 err = mem_write(ctx, vcpu, paddr, mmio->data, mmio->bytes, mr); 253 } 254 255 return (err); 256 } 257 258 int 259 emulate_mem(struct vmctx *ctx, int vcpu, struct vm_mmio *mmio) 260 { 261 return (access_memory(ctx, vcpu, mmio->gpa, emulate_mem_cb, mmio)); 262 } 263 264 struct rw_mem_args { 265 uint64_t *val; 266 int size; 267 int operation; 268 }; 269 270 static int 271 rw_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr, 272 void *arg) 273 { 274 struct rw_mem_args *rma; 275 276 rma = arg; 277 return (mr->handler(ctx, vcpu, rma->operation, paddr, rma->size, 278 rma->val, mr->arg1, mr->arg2)); 279 } 280 281 int 282 read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size) 283 { 284 struct rw_mem_args rma; 285 286 rma.val = rval; 287 rma.size = size; 288 rma.operation = MEM_F_READ; 289 return (access_memory(ctx, vcpu, gpa, rw_mem_cb, &rma)); 290 } 291 292 int 293 write_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size) 294 { 295 struct rw_mem_args rma; 296 297 rma.val = &wval; 298 rma.size = size; 299 rma.operation = MEM_F_WRITE; 300 return (access_memory(ctx, vcpu, gpa, rw_mem_cb, &rma)); 301 } 302 303 static int 304 register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp) 305 { 306 struct mmio_rb_range *entry, *mrp; 307 int err, perror; 308 309 err = 0; 310 311 mrp = malloc(sizeof(struct mmio_rb_range)); 312 if (mrp == NULL) { 313 warn("%s: couldn't allocate memory for mrp\n", 314 __func__); 315 err = ENOMEM; 316 } else { 317 mrp->mr_param = *memp; 318 mrp->mr_base = memp->base; 319 mrp->mr_end = memp->base + memp->size - 1; 320 pthread_rwlock_wrlock(&mmio_rwlock); 321 if (mmio_rb_lookup(rbt, memp->base, &entry) != 0) 322 err = mmio_rb_add(rbt, mrp); 323 #ifndef __FreeBSD__ 324 else /* smatch warn: possible memory leak of 'mrp' */ 325 free(mrp); 326 #endif 327 perror = pthread_rwlock_unlock(&mmio_rwlock); 328 assert(perror == 0); 329 if (err) 330 free(mrp); 331 } 332 333 return (err); 334 } 335 336 int 337 register_mem(struct mem_range *memp) 338 { 339 340 return (register_mem_int(&mmio_rb_root, memp)); 341 } 342 343 int 344 register_mem_fallback(struct mem_range *memp) 345 { 346 347 return (register_mem_int(&mmio_rb_fallback, memp)); 348 } 349 350 int 351 unregister_mem(struct mem_range *memp) 352 { 353 struct mem_range *mr; 354 struct mmio_rb_range *entry = NULL; 355 int err, perror, i; 356 357 pthread_rwlock_wrlock(&mmio_rwlock); 358 err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry); 359 if (err == 0) { 360 mr = &entry->mr_param; 361 assert(mr->name == memp->name); 362 assert(mr->base == memp->base && mr->size == memp->size); 363 assert((mr->flags & MEM_F_IMMUTABLE) == 0); 364 RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry); 365 366 /* flush Per-vCPU cache */ 367 for (i = 0; i < mmio_ncpu; i++) { 368 if (mmio_hint[i] == entry) 369 mmio_hint[i] = NULL; 370 } 371 } 372 perror = pthread_rwlock_unlock(&mmio_rwlock); 373 assert(perror == 0); 374 375 if (entry) 376 free(entry); 377 378 return (err); 379 } 380 381 void 382 init_mem(int ncpu) 383 { 384 385 mmio_ncpu = ncpu; 386 mmio_hint = calloc(ncpu, sizeof(*mmio_hint)); 387 RB_INIT(&mmio_rb_root); 388 RB_INIT(&mmio_rb_fallback); 389 pthread_rwlock_init(&mmio_rwlock, NULL); 390 } 391