vfio_pci.c (11c4cd07ba111a09f49625f9e4c851d83daf0a22) vfio_pci.c (abafbc551fddede3e0a08dee1dcde08fc0eb8476)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
4 * Author: Alex Williamson <alex.williamson@redhat.com>
5 *
6 * Derived from original vfio:
7 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
8 * Author: Tom Lyon, pugs@cisco.com

--- 12 unchanged lines hidden (view full) ---

21#include <linux/pci.h>
22#include <linux/pm_runtime.h>
23#include <linux/slab.h>
24#include <linux/types.h>
25#include <linux/uaccess.h>
26#include <linux/vfio.h>
27#include <linux/vgaarb.h>
28#include <linux/nospec.h>
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
4 * Author: Alex Williamson <alex.williamson@redhat.com>
5 *
6 * Derived from original vfio:
7 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
8 * Author: Tom Lyon, pugs@cisco.com

--- 12 unchanged lines hidden (view full) ---

21#include <linux/pci.h>
22#include <linux/pm_runtime.h>
23#include <linux/slab.h>
24#include <linux/types.h>
25#include <linux/uaccess.h>
26#include <linux/vfio.h>
27#include <linux/vgaarb.h>
28#include <linux/nospec.h>
29#include <linux/sched/mm.h>
29
30#include "vfio_pci_private.h"
31
32#define DRIVER_VERSION "0.2"
33#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
34#define DRIVER_DESC "VFIO PCI - User Level meta-driver"
35
36static char ids[1024] __initdata;

--- 142 unchanged lines hidden (view full) ---

179 */
180no_mmap:
181 vdev->bar_mmap_supported[bar] = false;
182 }
183}
184
185static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
186static void vfio_pci_disable(struct vfio_pci_device *vdev);
30
31#include "vfio_pci_private.h"
32
33#define DRIVER_VERSION "0.2"
34#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
35#define DRIVER_DESC "VFIO PCI - User Level meta-driver"
36
37static char ids[1024] __initdata;

--- 142 unchanged lines hidden (view full) ---

180 */
181no_mmap:
182 vdev->bar_mmap_supported[bar] = false;
183 }
184}
185
186static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
187static void vfio_pci_disable(struct vfio_pci_device *vdev);
188static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data);
187
188/*
189 * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
190 * _and_ the ability detect when the device is asserting INTx via PCI_STATUS.
191 * If a device implements the former but not the latter we would typically
192 * expect broken_intx_masking be set and require an exclusive interrupt.
193 * However since we do have control of the device's ability to assert INTx,
194 * we can instead pretend that the device does not implement INTx, virtualizing

--- 536 unchanged lines hidden (view full) ---

731 vdev->region[vdev->num_regions].flags = flags;
732 vdev->region[vdev->num_regions].data = data;
733
734 vdev->num_regions++;
735
736 return 0;
737}
738
189
190/*
191 * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
192 * _and_ the ability detect when the device is asserting INTx via PCI_STATUS.
193 * If a device implements the former but not the latter we would typically
194 * expect broken_intx_masking be set and require an exclusive interrupt.
195 * However since we do have control of the device's ability to assert INTx,
196 * we can instead pretend that the device does not implement INTx, virtualizing

--- 536 unchanged lines hidden (view full) ---

733 vdev->region[vdev->num_regions].flags = flags;
734 vdev->region[vdev->num_regions].data = data;
735
736 vdev->num_regions++;
737
738 return 0;
739}
740
741struct vfio_devices {
742 struct vfio_device **devices;
743 int cur_index;
744 int max_index;
745};
746
739static long vfio_pci_ioctl(void *device_data,
740 unsigned int cmd, unsigned long arg)
741{
742 struct vfio_pci_device *vdev = device_data;
743 unsigned long minsz;
744
745 if (cmd == VFIO_DEVICE_GET_INFO) {
746 struct vfio_device_info info;

--- 57 unchanged lines hidden (view full) ---

804 }
805 }
806
807 break;
808 case VFIO_PCI_ROM_REGION_INDEX:
809 {
810 void __iomem *io;
811 size_t size;
747static long vfio_pci_ioctl(void *device_data,
748 unsigned int cmd, unsigned long arg)
749{
750 struct vfio_pci_device *vdev = device_data;
751 unsigned long minsz;
752
753 if (cmd == VFIO_DEVICE_GET_INFO) {
754 struct vfio_device_info info;

--- 57 unchanged lines hidden (view full) ---

812 }
813 }
814
815 break;
816 case VFIO_PCI_ROM_REGION_INDEX:
817 {
818 void __iomem *io;
819 size_t size;
812 u16 orig_cmd;
820 u16 cmd;
813
814 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
815 info.flags = 0;
816
817 /* Report the BAR size, not the ROM size */
818 info.size = pci_resource_len(pdev, info.index);
819 if (!info.size) {
820 /* Shadow ROMs appear as PCI option ROMs */
821 if (pdev->resource[PCI_ROM_RESOURCE].flags &
822 IORESOURCE_ROM_SHADOW)
823 info.size = 0x20000;
824 else
825 break;
826 }
827
828 /*
829 * Is it really there? Enable memory decode for
830 * implicit access in pci_map_rom().
831 */
821
822 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
823 info.flags = 0;
824
825 /* Report the BAR size, not the ROM size */
826 info.size = pci_resource_len(pdev, info.index);
827 if (!info.size) {
828 /* Shadow ROMs appear as PCI option ROMs */
829 if (pdev->resource[PCI_ROM_RESOURCE].flags &
830 IORESOURCE_ROM_SHADOW)
831 info.size = 0x20000;
832 else
833 break;
834 }
835
836 /*
837 * Is it really there? Enable memory decode for
838 * implicit access in pci_map_rom().
839 */
832 pci_read_config_word(pdev, PCI_COMMAND, &orig_cmd);
833 pci_write_config_word(pdev, PCI_COMMAND,
834 orig_cmd | PCI_COMMAND_MEMORY);
835
840 cmd = vfio_pci_memory_lock_and_enable(vdev);
836 io = pci_map_rom(pdev, &size);
837 if (io) {
838 info.flags = VFIO_REGION_INFO_FLAG_READ;
839 pci_unmap_rom(pdev, io);
840 } else {
841 info.size = 0;
842 }
841 io = pci_map_rom(pdev, &size);
842 if (io) {
843 info.flags = VFIO_REGION_INFO_FLAG_READ;
844 pci_unmap_rom(pdev, io);
845 } else {
846 info.size = 0;
847 }
848 vfio_pci_memory_unlock_and_restore(vdev, cmd);
843
849
844 pci_write_config_word(pdev, PCI_COMMAND, orig_cmd);
845 break;
846 }
847 case VFIO_PCI_VGA_REGION_INDEX:
848 if (!vdev->has_vga)
849 return -EINVAL;
850
851 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
852 info.size = 0xc0000;

--- 126 unchanged lines hidden (view full) ---

979 hdr.start, hdr.count, data);
980
981 mutex_unlock(&vdev->igate);
982 kfree(data);
983
984 return ret;
985
986 } else if (cmd == VFIO_DEVICE_RESET) {
850 break;
851 }
852 case VFIO_PCI_VGA_REGION_INDEX:
853 if (!vdev->has_vga)
854 return -EINVAL;
855
856 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
857 info.size = 0xc0000;

--- 126 unchanged lines hidden (view full) ---

984 hdr.start, hdr.count, data);
985
986 mutex_unlock(&vdev->igate);
987 kfree(data);
988
989 return ret;
990
991 } else if (cmd == VFIO_DEVICE_RESET) {
987 return vdev->reset_works ?
988 pci_try_reset_function(vdev->pdev) : -EINVAL;
992 int ret;
989
993
994 if (!vdev->reset_works)
995 return -EINVAL;
996
997 vfio_pci_zap_and_down_write_memory_lock(vdev);
998 ret = pci_try_reset_function(vdev->pdev);
999 up_write(&vdev->memory_lock);
1000
1001 return ret;
1002
990 } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {
991 struct vfio_pci_hot_reset_info hdr;
992 struct vfio_pci_fill_info fill = { 0 };
993 struct vfio_pci_dependent_device *devices = NULL;
994 bool slot = false;
995 int ret = 0;
996
997 minsz = offsetofend(struct vfio_pci_hot_reset_info, count);

--- 62 unchanged lines hidden (view full) ---

1060 kfree(devices);
1061 return ret;
1062
1063 } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
1064 struct vfio_pci_hot_reset hdr;
1065 int32_t *group_fds;
1066 struct vfio_pci_group_entry *groups;
1067 struct vfio_pci_group_info info;
1003 } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {
1004 struct vfio_pci_hot_reset_info hdr;
1005 struct vfio_pci_fill_info fill = { 0 };
1006 struct vfio_pci_dependent_device *devices = NULL;
1007 bool slot = false;
1008 int ret = 0;
1009
1010 minsz = offsetofend(struct vfio_pci_hot_reset_info, count);

--- 62 unchanged lines hidden (view full) ---

1073 kfree(devices);
1074 return ret;
1075
1076 } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
1077 struct vfio_pci_hot_reset hdr;
1078 int32_t *group_fds;
1079 struct vfio_pci_group_entry *groups;
1080 struct vfio_pci_group_info info;
1081 struct vfio_devices devs = { .cur_index = 0 };
1068 bool slot = false;
1082 bool slot = false;
1069 int i, count = 0, ret = 0;
1083 int i, group_idx, mem_idx = 0, count = 0, ret = 0;
1070
1071 minsz = offsetofend(struct vfio_pci_hot_reset, count);
1072
1073 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1074 return -EFAULT;
1075
1076 if (hdr.argsz < minsz || hdr.flags)
1077 return -EINVAL;

--- 35 unchanged lines hidden (view full) ---

1113 return -EFAULT;
1114 }
1115
1116 /*
1117 * For each group_fd, get the group through the vfio external
1118 * user interface and store the group and iommu ID. This
1119 * ensures the group is held across the reset.
1120 */
1084
1085 minsz = offsetofend(struct vfio_pci_hot_reset, count);
1086
1087 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1088 return -EFAULT;
1089
1090 if (hdr.argsz < minsz || hdr.flags)
1091 return -EINVAL;

--- 35 unchanged lines hidden (view full) ---

1127 return -EFAULT;
1128 }
1129
1130 /*
1131 * For each group_fd, get the group through the vfio external
1132 * user interface and store the group and iommu ID. This
1133 * ensures the group is held across the reset.
1134 */
1121 for (i = 0; i < hdr.count; i++) {
1135 for (group_idx = 0; group_idx < hdr.count; group_idx++) {
1122 struct vfio_group *group;
1136 struct vfio_group *group;
1123 struct fd f = fdget(group_fds[i]);
1137 struct fd f = fdget(group_fds[group_idx]);
1124 if (!f.file) {
1125 ret = -EBADF;
1126 break;
1127 }
1128
1129 group = vfio_group_get_external_user(f.file);
1130 fdput(f);
1131 if (IS_ERR(group)) {
1132 ret = PTR_ERR(group);
1133 break;
1134 }
1135
1138 if (!f.file) {
1139 ret = -EBADF;
1140 break;
1141 }
1142
1143 group = vfio_group_get_external_user(f.file);
1144 fdput(f);
1145 if (IS_ERR(group)) {
1146 ret = PTR_ERR(group);
1147 break;
1148 }
1149
1136 groups[i].group = group;
1137 groups[i].id = vfio_external_user_iommu_id(group);
1150 groups[group_idx].group = group;
1151 groups[group_idx].id =
1152 vfio_external_user_iommu_id(group);
1138 }
1139
1140 kfree(group_fds);
1141
1142 /* release reference to groups on error */
1143 if (ret)
1144 goto hot_reset_release;
1145
1146 info.count = hdr.count;
1147 info.groups = groups;
1148
1149 /*
1150 * Test whether all the affected devices are contained
1151 * by the set of groups provided by the user.
1152 */
1153 ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
1154 vfio_pci_validate_devs,
1155 &info, slot);
1153 }
1154
1155 kfree(group_fds);
1156
1157 /* release reference to groups on error */
1158 if (ret)
1159 goto hot_reset_release;
1160
1161 info.count = hdr.count;
1162 info.groups = groups;
1163
1164 /*
1165 * Test whether all the affected devices are contained
1166 * by the set of groups provided by the user.
1167 */
1168 ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
1169 vfio_pci_validate_devs,
1170 &info, slot);
1156 if (!ret)
1157 /* User has access, do the reset */
1158 ret = pci_reset_bus(vdev->pdev);
1171 if (ret)
1172 goto hot_reset_release;
1159
1173
1174 devs.max_index = count;
1175 devs.devices = kcalloc(count, sizeof(struct vfio_device *),
1176 GFP_KERNEL);
1177 if (!devs.devices) {
1178 ret = -ENOMEM;
1179 goto hot_reset_release;
1180 }
1181
1182 /*
1183 * We need to get memory_lock for each device, but devices
1184 * can share mmap_sem, therefore we need to zap and hold
1185 * the vma_lock for each device, and only then get each
1186 * memory_lock.
1187 */
1188 ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
1189 vfio_pci_try_zap_and_vma_lock_cb,
1190 &devs, slot);
1191 if (ret)
1192 goto hot_reset_release;
1193
1194 for (; mem_idx < devs.cur_index; mem_idx++) {
1195 struct vfio_pci_device *tmp;
1196
1197 tmp = vfio_device_data(devs.devices[mem_idx]);
1198
1199 ret = down_write_trylock(&tmp->memory_lock);
1200 if (!ret) {
1201 ret = -EBUSY;
1202 goto hot_reset_release;
1203 }
1204 mutex_unlock(&tmp->vma_lock);
1205 }
1206
1207 /* User has access, do the reset */
1208 ret = pci_reset_bus(vdev->pdev);
1209
1160hot_reset_release:
1210hot_reset_release:
1161 for (i--; i >= 0; i--)
1162 vfio_group_put_external_user(groups[i].group);
1211 for (i = 0; i < devs.cur_index; i++) {
1212 struct vfio_device *device;
1213 struct vfio_pci_device *tmp;
1163
1214
1215 device = devs.devices[i];
1216 tmp = vfio_device_data(device);
1217
1218 if (i < mem_idx)
1219 up_write(&tmp->memory_lock);
1220 else
1221 mutex_unlock(&tmp->vma_lock);
1222 vfio_device_put(device);
1223 }
1224 kfree(devs.devices);
1225
1226 for (group_idx--; group_idx >= 0; group_idx--)
1227 vfio_group_put_external_user(groups[group_idx].group);
1228
1164 kfree(groups);
1165 return ret;
1166 } else if (cmd == VFIO_DEVICE_IOEVENTFD) {
1167 struct vfio_device_ioeventfd ioeventfd;
1168 int count;
1169
1170 minsz = offsetofend(struct vfio_device_ioeventfd, fd);
1171

--- 122 unchanged lines hidden (view full) ---

1294 size_t count, loff_t *ppos)
1295{
1296 if (!count)
1297 return 0;
1298
1299 return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true);
1300}
1301
1229 kfree(groups);
1230 return ret;
1231 } else if (cmd == VFIO_DEVICE_IOEVENTFD) {
1232 struct vfio_device_ioeventfd ioeventfd;
1233 int count;
1234
1235 minsz = offsetofend(struct vfio_device_ioeventfd, fd);
1236

--- 122 unchanged lines hidden (view full) ---

1359 size_t count, loff_t *ppos)
1360{
1361 if (!count)
1362 return 0;
1363
1364 return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true);
1365}
1366
1302static int vfio_pci_add_vma(struct vfio_pci_device *vdev,
1303 struct vm_area_struct *vma)
1367/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */
1368static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
1304{
1369{
1370 struct vfio_pci_mmap_vma *mmap_vma, *tmp;
1371
1372 /*
1373 * Lock ordering:
1374 * vma_lock is nested under mmap_sem for vm_ops callback paths.
1375 * The memory_lock semaphore is used by both code paths calling
1376 * into this function to zap vmas and the vm_ops.fault callback
1377 * to protect the memory enable state of the device.
1378 *
1379 * When zapping vmas we need to maintain the mmap_sem => vma_lock
1380 * ordering, which requires using vma_lock to walk vma_list to
1381 * acquire an mm, then dropping vma_lock to get the mmap_sem and
1382 * reacquiring vma_lock. This logic is derived from similar
1383 * requirements in uverbs_user_mmap_disassociate().
1384 *
1385 * mmap_sem must always be the top-level lock when it is taken.
1386 * Therefore we can only hold the memory_lock write lock when
1387 * vma_list is empty, as we'd need to take mmap_sem to clear
1388 * entries. vma_list can only be guaranteed empty when holding
1389 * vma_lock, thus memory_lock is nested under vma_lock.
1390 *
1391 * This enables the vm_ops.fault callback to acquire vma_lock,
1392 * followed by memory_lock read lock, while already holding
1393 * mmap_sem without risk of deadlock.
1394 */
1395 while (1) {
1396 struct mm_struct *mm = NULL;
1397
1398 if (try) {
1399 if (!mutex_trylock(&vdev->vma_lock))
1400 return 0;
1401 } else {
1402 mutex_lock(&vdev->vma_lock);
1403 }
1404 while (!list_empty(&vdev->vma_list)) {
1405 mmap_vma = list_first_entry(&vdev->vma_list,
1406 struct vfio_pci_mmap_vma,
1407 vma_next);
1408 mm = mmap_vma->vma->vm_mm;
1409 if (mmget_not_zero(mm))
1410 break;
1411
1412 list_del(&mmap_vma->vma_next);
1413 kfree(mmap_vma);
1414 mm = NULL;
1415 }
1416 if (!mm)
1417 return 1;
1418 mutex_unlock(&vdev->vma_lock);
1419
1420 if (try) {
1421 if (!down_read_trylock(&mm->mmap_sem)) {
1422 mmput(mm);
1423 return 0;
1424 }
1425 } else {
1426 down_read(&mm->mmap_sem);
1427 }
1428 if (mmget_still_valid(mm)) {
1429 if (try) {
1430 if (!mutex_trylock(&vdev->vma_lock)) {
1431 up_read(&mm->mmap_sem);
1432 mmput(mm);
1433 return 0;
1434 }
1435 } else {
1436 mutex_lock(&vdev->vma_lock);
1437 }
1438 list_for_each_entry_safe(mmap_vma, tmp,
1439 &vdev->vma_list, vma_next) {
1440 struct vm_area_struct *vma = mmap_vma->vma;
1441
1442 if (vma->vm_mm != mm)
1443 continue;
1444
1445 list_del(&mmap_vma->vma_next);
1446 kfree(mmap_vma);
1447
1448 zap_vma_ptes(vma, vma->vm_start,
1449 vma->vm_end - vma->vm_start);
1450 }
1451 mutex_unlock(&vdev->vma_lock);
1452 }
1453 up_read(&mm->mmap_sem);
1454 mmput(mm);
1455 }
1456}
1457
1458void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev)
1459{
1460 vfio_pci_zap_and_vma_lock(vdev, false);
1461 down_write(&vdev->memory_lock);
1462 mutex_unlock(&vdev->vma_lock);
1463}
1464
1465u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev)
1466{
1467 u16 cmd;
1468
1469 down_write(&vdev->memory_lock);
1470 pci_read_config_word(vdev->pdev, PCI_COMMAND, &cmd);
1471 if (!(cmd & PCI_COMMAND_MEMORY))
1472 pci_write_config_word(vdev->pdev, PCI_COMMAND,
1473 cmd | PCI_COMMAND_MEMORY);
1474
1475 return cmd;
1476}
1477
1478void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, u16 cmd)
1479{
1480 pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd);
1481 up_write(&vdev->memory_lock);
1482}
1483
1484/* Caller holds vma_lock */
1485static int __vfio_pci_add_vma(struct vfio_pci_device *vdev,
1486 struct vm_area_struct *vma)
1487{
1305 struct vfio_pci_mmap_vma *mmap_vma;
1306
1307 mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);
1308 if (!mmap_vma)
1309 return -ENOMEM;
1310
1311 mmap_vma->vma = vma;
1488 struct vfio_pci_mmap_vma *mmap_vma;
1489
1490 mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);
1491 if (!mmap_vma)
1492 return -ENOMEM;
1493
1494 mmap_vma->vma = vma;
1312
1313 mutex_lock(&vdev->vma_lock);
1314 list_add(&mmap_vma->vma_next, &vdev->vma_list);
1495 list_add(&mmap_vma->vma_next, &vdev->vma_list);
1315 mutex_unlock(&vdev->vma_lock);
1316
1317 return 0;
1318}
1319
1320/*
1321 * Zap mmaps on open so that we can fault them in on access and therefore
1322 * our vma_list only tracks mappings accessed since last zap.
1323 */

--- 17 unchanged lines hidden (view full) ---

1341 }
1342 mutex_unlock(&vdev->vma_lock);
1343}
1344
1345static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
1346{
1347 struct vm_area_struct *vma = vmf->vma;
1348 struct vfio_pci_device *vdev = vma->vm_private_data;
1496
1497 return 0;
1498}
1499
1500/*
1501 * Zap mmaps on open so that we can fault them in on access and therefore
1502 * our vma_list only tracks mappings accessed since last zap.
1503 */

--- 17 unchanged lines hidden (view full) ---

1521 }
1522 mutex_unlock(&vdev->vma_lock);
1523}
1524
1525static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
1526{
1527 struct vm_area_struct *vma = vmf->vma;
1528 struct vfio_pci_device *vdev = vma->vm_private_data;
1529 vm_fault_t ret = VM_FAULT_NOPAGE;
1349
1530
1350 if (vfio_pci_add_vma(vdev, vma))
1351 return VM_FAULT_OOM;
1531 mutex_lock(&vdev->vma_lock);
1532 down_read(&vdev->memory_lock);
1352
1533
1534 if (!__vfio_pci_memory_enabled(vdev)) {
1535 ret = VM_FAULT_SIGBUS;
1536 mutex_unlock(&vdev->vma_lock);
1537 goto up_out;
1538 }
1539
1540 if (__vfio_pci_add_vma(vdev, vma)) {
1541 ret = VM_FAULT_OOM;
1542 mutex_unlock(&vdev->vma_lock);
1543 goto up_out;
1544 }
1545
1546 mutex_unlock(&vdev->vma_lock);
1547
1353 if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
1354 vma->vm_end - vma->vm_start, vma->vm_page_prot))
1548 if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
1549 vma->vm_end - vma->vm_start, vma->vm_page_prot))
1355 return VM_FAULT_SIGBUS;
1550 ret = VM_FAULT_SIGBUS;
1356
1551
1357 return VM_FAULT_NOPAGE;
1552up_out:
1553 up_read(&vdev->memory_lock);
1554 return ret;
1358}
1359
1360static const struct vm_operations_struct vfio_pci_mmap_ops = {
1361 .open = vfio_pci_mmap_open,
1362 .close = vfio_pci_mmap_close,
1363 .fault = vfio_pci_mmap_fault,
1364};
1365

--- 309 unchanged lines hidden (view full) ---

1675 vdev->pdev = pdev;
1676 vdev->irq_type = VFIO_PCI_NUM_IRQS;
1677 mutex_init(&vdev->igate);
1678 spin_lock_init(&vdev->irqlock);
1679 mutex_init(&vdev->ioeventfds_lock);
1680 INIT_LIST_HEAD(&vdev->ioeventfds_list);
1681 mutex_init(&vdev->vma_lock);
1682 INIT_LIST_HEAD(&vdev->vma_list);
1555}
1556
1557static const struct vm_operations_struct vfio_pci_mmap_ops = {
1558 .open = vfio_pci_mmap_open,
1559 .close = vfio_pci_mmap_close,
1560 .fault = vfio_pci_mmap_fault,
1561};
1562

--- 309 unchanged lines hidden (view full) ---

1872 vdev->pdev = pdev;
1873 vdev->irq_type = VFIO_PCI_NUM_IRQS;
1874 mutex_init(&vdev->igate);
1875 spin_lock_init(&vdev->irqlock);
1876 mutex_init(&vdev->ioeventfds_lock);
1877 INIT_LIST_HEAD(&vdev->ioeventfds_list);
1878 mutex_init(&vdev->vma_lock);
1879 INIT_LIST_HEAD(&vdev->vma_list);
1880 init_rwsem(&vdev->memory_lock);
1683
1684 ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
1685 if (ret)
1686 goto out_free;
1687
1688 ret = vfio_pci_reflck_attach(vdev);
1689 if (ret)
1690 goto out_del_group_dev;

--- 237 unchanged lines hidden (view full) ---

1928 mutex_unlock(&reflck_lock);
1929}
1930
1931static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck)
1932{
1933 kref_put_mutex(&reflck->kref, vfio_pci_reflck_release, &reflck_lock);
1934}
1935
1881
1882 ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
1883 if (ret)
1884 goto out_free;
1885
1886 ret = vfio_pci_reflck_attach(vdev);
1887 if (ret)
1888 goto out_del_group_dev;

--- 237 unchanged lines hidden (view full) ---

2126 mutex_unlock(&reflck_lock);
2127}
2128
2129static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck)
2130{
2131 kref_put_mutex(&reflck->kref, vfio_pci_reflck_release, &reflck_lock);
2132}
2133
1936struct vfio_devices {
1937 struct vfio_device **devices;
1938 int cur_index;
1939 int max_index;
1940};
1941
1942static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
1943{
1944 struct vfio_devices *devs = data;
1945 struct vfio_device *device;
1946 struct vfio_pci_device *vdev;
1947
1948 if (devs->cur_index == devs->max_index)
1949 return -ENOSPC;

--- 14 unchanged lines hidden (view full) ---

1964 vfio_device_put(device);
1965 return -EBUSY;
1966 }
1967
1968 devs->devices[devs->cur_index++] = device;
1969 return 0;
1970}
1971
2134static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
2135{
2136 struct vfio_devices *devs = data;
2137 struct vfio_device *device;
2138 struct vfio_pci_device *vdev;
2139
2140 if (devs->cur_index == devs->max_index)
2141 return -ENOSPC;

--- 14 unchanged lines hidden (view full) ---

2156 vfio_device_put(device);
2157 return -EBUSY;
2158 }
2159
2160 devs->devices[devs->cur_index++] = device;
2161 return 0;
2162}
2163
2164static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
2165{
2166 struct vfio_devices *devs = data;
2167 struct vfio_device *device;
2168 struct vfio_pci_device *vdev;
2169
2170 if (devs->cur_index == devs->max_index)
2171 return -ENOSPC;
2172
2173 device = vfio_device_get_from_dev(&pdev->dev);
2174 if (!device)
2175 return -EINVAL;
2176
2177 if (pci_dev_driver(pdev) != &vfio_pci_driver) {
2178 vfio_device_put(device);
2179 return -EBUSY;
2180 }
2181
2182 vdev = vfio_device_data(device);
2183
2184 /*
2185 * Locking multiple devices is prone to deadlock, runaway and
2186 * unwind if we hit contention.
2187 */
2188 if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
2189 vfio_device_put(device);
2190 return -EBUSY;
2191 }
2192
2193 devs->devices[devs->cur_index++] = device;
2194 return 0;
2195}
2196
1972/*
1973 * If a bus or slot reset is available for the provided device and:
1974 * - All of the devices affected by that bus or slot reset are unused
1975 * (!refcnt)
1976 * - At least one of the affected devices is marked dirty via
1977 * needs_reset (such as by lack of FLR support)
1978 * Then attempt to perform that bus or slot reset. Callers are required
1979 * to hold vdev->reflck->lock, protecting the bus/slot reset group from

--- 144 unchanged lines hidden ---
2197/*
2198 * If a bus or slot reset is available for the provided device and:
2199 * - All of the devices affected by that bus or slot reset are unused
2200 * (!refcnt)
2201 * - At least one of the affected devices is marked dirty via
2202 * needs_reset (such as by lack of FLR support)
2203 * Then attempt to perform that bus or slot reset. Callers are required
2204 * to hold vdev->reflck->lock, protecting the bus/slot reset group from

--- 144 unchanged lines hidden ---