1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2025 Ant Group 4 * Author: Tiwei Bie <tiwei.btw@antgroup.com> 5 */ 6 #include <errno.h> 7 #include <fcntl.h> 8 #include <unistd.h> 9 #include <stdio.h> 10 #include <stdint.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <sys/ioctl.h> 14 #include <sys/eventfd.h> 15 #include <linux/limits.h> 16 #include <linux/vfio.h> 17 #include <linux/pci_regs.h> 18 #include <as-layout.h> 19 #include <um_malloc.h> 20 21 #include "vfio_user.h" 22 23 int uml_vfio_user_open_container(void) 24 { 25 int r, fd; 26 27 fd = open("/dev/vfio/vfio", O_RDWR); 28 if (fd < 0) 29 return -errno; 30 31 r = ioctl(fd, VFIO_GET_API_VERSION); 32 if (r != VFIO_API_VERSION) { 33 r = r < 0 ? -errno : -EINVAL; 34 goto error; 35 } 36 37 r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU); 38 if (r <= 0) { 39 r = r < 0 ? -errno : -EINVAL; 40 goto error; 41 } 42 43 return fd; 44 45 error: 46 close(fd); 47 return r; 48 } 49 50 int uml_vfio_user_setup_iommu(int container) 51 { 52 /* 53 * This is a bit tricky. See the big comment in 54 * vhost_user_set_mem_table() in virtio_uml.c. 55 */ 56 unsigned long reserved = uml_reserved - uml_physmem; 57 struct vfio_iommu_type1_dma_map dma_map = { 58 .argsz = sizeof(dma_map), 59 .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, 60 .vaddr = uml_reserved, 61 .iova = reserved, 62 .size = physmem_size - reserved, 63 }; 64 65 if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0) 66 return -errno; 67 68 if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0) 69 return -errno; 70 71 return 0; 72 } 73 74 int uml_vfio_user_get_group_id(const char *device) 75 { 76 char *path, *buf, *end; 77 const char *name; 78 int r; 79 80 path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); 81 if (!path) 82 return -ENOMEM; 83 84 sprintf(path, "/sys/bus/pci/devices/%s/iommu_group", device); 85 86 buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL); 87 if (!buf) { 88 r = -ENOMEM; 89 goto free_path; 90 } 91 92 r = readlink(path, buf, PATH_MAX); 93 if (r < 0) { 94 r = -errno; 95 goto free_buf; 96 } 97 buf[r] = '\0'; 98 99 name = basename(buf); 100 101 r = strtoul(name, &end, 10); 102 if (*end != '\0' || end == name) { 103 r = -EINVAL; 104 goto free_buf; 105 } 106 107 free_buf: 108 kfree(buf); 109 free_path: 110 kfree(path); 111 return r; 112 } 113 114 int uml_vfio_user_open_group(int group_id) 115 { 116 char *path; 117 int fd; 118 119 path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); 120 if (!path) 121 return -ENOMEM; 122 123 sprintf(path, "/dev/vfio/%d", group_id); 124 125 fd = open(path, O_RDWR); 126 if (fd < 0) { 127 fd = -errno; 128 goto out; 129 } 130 131 out: 132 kfree(path); 133 return fd; 134 } 135 136 int uml_vfio_user_set_container(int container, int group) 137 { 138 if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0) 139 return -errno; 140 return 0; 141 } 142 143 int uml_vfio_user_unset_container(int container, int group) 144 { 145 if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0) 146 return -errno; 147 return 0; 148 } 149 150 static int vfio_set_irqs(int device, int start, int count, int *irqfd) 151 { 152 struct vfio_irq_set *irq_set; 153 int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count; 154 int err = 0; 155 156 irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL); 157 if (!irq_set) 158 return -ENOMEM; 159 160 irq_set->argsz = argsz; 161 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 162 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; 163 irq_set->start = start; 164 irq_set->count = count; 165 memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count); 166 167 if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { 168 err = -errno; 169 goto out; 170 } 171 172 out: 173 kfree(irq_set); 174 return err; 175 } 176 177 int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev, 178 int group, const char *device) 179 { 180 struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; 181 struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; 182 int err, i; 183 184 dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device); 185 if (dev->device < 0) 186 return -errno; 187 188 if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) { 189 err = -errno; 190 goto close_device; 191 } 192 193 dev->num_regions = device_info.num_regions; 194 if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1) 195 dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1; 196 197 dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions, 198 UM_GFP_KERNEL); 199 if (!dev->region) { 200 err = -ENOMEM; 201 goto close_device; 202 } 203 204 for (i = 0; i < dev->num_regions; i++) { 205 struct vfio_region_info region = { 206 .argsz = sizeof(region), 207 .index = i, 208 }; 209 if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, ®ion) < 0) { 210 err = -errno; 211 goto free_region; 212 } 213 dev->region[i].size = region.size; 214 dev->region[i].offset = region.offset; 215 } 216 217 /* Only MSI-X is supported currently. */ 218 irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX; 219 if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) { 220 err = -errno; 221 goto free_region; 222 } 223 224 dev->irq_count = irq_info.count; 225 226 dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL); 227 if (!dev->irqfd) { 228 err = -ENOMEM; 229 goto free_region; 230 } 231 232 memset(dev->irqfd, -1, sizeof(int) * dev->irq_count); 233 234 err = vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd); 235 if (err) 236 goto free_irqfd; 237 238 return 0; 239 240 free_irqfd: 241 kfree(dev->irqfd); 242 free_region: 243 kfree(dev->region); 244 close_device: 245 close(dev->device); 246 return err; 247 } 248 249 void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev) 250 { 251 kfree(dev->irqfd); 252 kfree(dev->region); 253 close(dev->device); 254 } 255 256 int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index) 257 { 258 int irqfd; 259 260 irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 261 if (irqfd < 0) 262 return -errno; 263 264 dev->irqfd[index] = irqfd; 265 return irqfd; 266 } 267 268 void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index) 269 { 270 close(dev->irqfd[index]); 271 dev->irqfd[index] = -1; 272 } 273 274 int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev) 275 { 276 return vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd); 277 } 278 279 static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index, 280 uint64_t offset, void *buf, uint64_t size) 281 { 282 if (index >= dev->num_regions || offset + size > dev->region[index].size) 283 return -EINVAL; 284 285 if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0) 286 return -errno; 287 288 return 0; 289 } 290 291 static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index, 292 uint64_t offset, const void *buf, uint64_t size) 293 { 294 if (index >= dev->num_regions || offset + size > dev->region[index].size) 295 return -EINVAL; 296 297 if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0) 298 return -errno; 299 300 return 0; 301 } 302 303 int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev, 304 unsigned int offset, void *buf, int size) 305 { 306 return vfio_region_read(dev, VFIO_PCI_CONFIG_REGION_INDEX, 307 offset, buf, size); 308 } 309 310 int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev, 311 unsigned int offset, const void *buf, int size) 312 { 313 return vfio_region_write(dev, VFIO_PCI_CONFIG_REGION_INDEX, 314 offset, buf, size); 315 } 316 317 int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar, 318 unsigned int offset, void *buf, int size) 319 { 320 return vfio_region_read(dev, bar, offset, buf, size); 321 } 322 323 int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar, 324 unsigned int offset, const void *buf, int size) 325 { 326 return vfio_region_write(dev, bar, offset, buf, size); 327 } 328