1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES 3 */ 4 #include "iommufd_private.h" 5 6 void iommufd_viommu_destroy(struct iommufd_object *obj) 7 { 8 struct iommufd_viommu *viommu = 9 container_of(obj, struct iommufd_viommu, obj); 10 11 if (viommu->ops && viommu->ops->destroy) 12 viommu->ops->destroy(viommu); 13 refcount_dec(&viommu->hwpt->common.obj.users); 14 xa_destroy(&viommu->vdevs); 15 } 16 17 int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) 18 { 19 struct iommu_viommu_alloc *cmd = ucmd->cmd; 20 const struct iommu_user_data user_data = { 21 .type = cmd->type, 22 .uptr = u64_to_user_ptr(cmd->data_uptr), 23 .len = cmd->data_len, 24 }; 25 struct iommufd_hwpt_paging *hwpt_paging; 26 struct iommufd_viommu *viommu; 27 struct iommufd_device *idev; 28 const struct iommu_ops *ops; 29 size_t viommu_size; 30 int rc; 31 32 if (cmd->flags || cmd->type == IOMMU_VIOMMU_TYPE_DEFAULT) 33 return -EOPNOTSUPP; 34 35 idev = iommufd_get_device(ucmd, cmd->dev_id); 36 if (IS_ERR(idev)) 37 return PTR_ERR(idev); 38 39 ops = dev_iommu_ops(idev->dev); 40 if (!ops->get_viommu_size || !ops->viommu_init) { 41 rc = -EOPNOTSUPP; 42 goto out_put_idev; 43 } 44 45 viommu_size = ops->get_viommu_size(idev->dev, cmd->type); 46 if (!viommu_size) { 47 rc = -EOPNOTSUPP; 48 goto out_put_idev; 49 } 50 51 /* 52 * It is a driver bug for providing a viommu_size smaller than the core 53 * vIOMMU structure size 54 */ 55 if (WARN_ON_ONCE(viommu_size < sizeof(*viommu))) { 56 rc = -EOPNOTSUPP; 57 goto out_put_idev; 58 } 59 60 hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id); 61 if (IS_ERR(hwpt_paging)) { 62 rc = PTR_ERR(hwpt_paging); 63 goto out_put_idev; 64 } 65 66 if (!hwpt_paging->nest_parent) { 67 rc = -EINVAL; 68 goto out_put_hwpt; 69 } 70 71 viommu = (struct iommufd_viommu *)_iommufd_object_alloc_ucmd( 72 ucmd, viommu_size, IOMMUFD_OBJ_VIOMMU); 73 if (IS_ERR(viommu)) { 74 rc = PTR_ERR(viommu); 75 goto out_put_hwpt; 76 } 77 78 xa_init(&viommu->vdevs); 79 viommu->type = cmd->type; 80 viommu->ictx = ucmd->ictx; 81 viommu->hwpt = hwpt_paging; 82 refcount_inc(&viommu->hwpt->common.obj.users); 83 INIT_LIST_HEAD(&viommu->veventqs); 84 init_rwsem(&viommu->veventqs_rwsem); 85 /* 86 * It is the most likely case that a physical IOMMU is unpluggable. A 87 * pluggable IOMMU instance (if exists) is responsible for refcounting 88 * on its own. 89 */ 90 viommu->iommu_dev = __iommu_get_iommu_dev(idev->dev); 91 92 rc = ops->viommu_init(viommu, hwpt_paging->common.domain, 93 user_data.len ? &user_data : NULL); 94 if (rc) 95 goto out_put_hwpt; 96 97 /* It is a driver bug that viommu->ops isn't filled */ 98 if (WARN_ON_ONCE(!viommu->ops)) { 99 rc = -EOPNOTSUPP; 100 goto out_put_hwpt; 101 } 102 103 cmd->out_viommu_id = viommu->obj.id; 104 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 105 106 out_put_hwpt: 107 iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj); 108 out_put_idev: 109 iommufd_put_object(ucmd->ictx, &idev->obj); 110 return rc; 111 } 112 113 void iommufd_vdevice_abort(struct iommufd_object *obj) 114 { 115 struct iommufd_vdevice *vdev = 116 container_of(obj, struct iommufd_vdevice, obj); 117 struct iommufd_viommu *viommu = vdev->viommu; 118 struct iommufd_device *idev = vdev->idev; 119 120 lockdep_assert_held(&idev->igroup->lock); 121 122 if (vdev->destroy) 123 vdev->destroy(vdev); 124 /* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */ 125 xa_cmpxchg(&viommu->vdevs, vdev->virt_id, vdev, NULL, GFP_KERNEL); 126 refcount_dec(&viommu->obj.users); 127 idev->vdev = NULL; 128 } 129 130 void iommufd_vdevice_destroy(struct iommufd_object *obj) 131 { 132 struct iommufd_vdevice *vdev = 133 container_of(obj, struct iommufd_vdevice, obj); 134 struct iommufd_device *idev = vdev->idev; 135 struct iommufd_ctx *ictx = idev->ictx; 136 137 mutex_lock(&idev->igroup->lock); 138 iommufd_vdevice_abort(obj); 139 mutex_unlock(&idev->igroup->lock); 140 iommufd_put_object(ictx, &idev->obj); 141 } 142 143 int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) 144 { 145 struct iommu_vdevice_alloc *cmd = ucmd->cmd; 146 struct iommufd_vdevice *vdev, *curr; 147 size_t vdev_size = sizeof(*vdev); 148 struct iommufd_viommu *viommu; 149 struct iommufd_device *idev; 150 u64 virt_id = cmd->virt_id; 151 int rc = 0; 152 153 /* virt_id indexes an xarray */ 154 if (virt_id > ULONG_MAX) 155 return -EINVAL; 156 157 viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); 158 if (IS_ERR(viommu)) 159 return PTR_ERR(viommu); 160 161 idev = iommufd_get_device(ucmd, cmd->dev_id); 162 if (IS_ERR(idev)) { 163 rc = PTR_ERR(idev); 164 goto out_put_viommu; 165 } 166 167 if (viommu->iommu_dev != __iommu_get_iommu_dev(idev->dev)) { 168 rc = -EINVAL; 169 goto out_put_idev; 170 } 171 172 mutex_lock(&idev->igroup->lock); 173 if (idev->destroying) { 174 rc = -ENOENT; 175 goto out_unlock_igroup; 176 } 177 178 if (idev->vdev) { 179 rc = -EEXIST; 180 goto out_unlock_igroup; 181 } 182 183 if (viommu->ops && viommu->ops->vdevice_size) { 184 /* 185 * It is a driver bug for: 186 * - ops->vdevice_size smaller than the core structure size 187 * - not implementing a pairing ops->vdevice_init op 188 */ 189 if (WARN_ON_ONCE(viommu->ops->vdevice_size < vdev_size || 190 !viommu->ops->vdevice_init)) { 191 rc = -EOPNOTSUPP; 192 goto out_put_idev; 193 } 194 vdev_size = viommu->ops->vdevice_size; 195 } 196 197 vdev = (struct iommufd_vdevice *)_iommufd_object_alloc( 198 ucmd->ictx, vdev_size, IOMMUFD_OBJ_VDEVICE); 199 if (IS_ERR(vdev)) { 200 rc = PTR_ERR(vdev); 201 goto out_unlock_igroup; 202 } 203 204 vdev->virt_id = virt_id; 205 vdev->viommu = viommu; 206 refcount_inc(&viommu->obj.users); 207 /* 208 * A wait_cnt reference is held on the idev so long as we have the 209 * pointer. iommufd_device_pre_destroy() will revoke it before the 210 * idev real destruction. 211 */ 212 vdev->idev = idev; 213 214 /* 215 * iommufd_device_destroy() delays until idev->vdev is NULL before 216 * freeing the idev, which only happens once the vdev is finished 217 * destruction. 218 */ 219 idev->vdev = vdev; 220 221 curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL); 222 if (curr) { 223 rc = xa_err(curr) ?: -EEXIST; 224 goto out_abort; 225 } 226 227 if (viommu->ops && viommu->ops->vdevice_init) { 228 rc = viommu->ops->vdevice_init(vdev); 229 if (rc) 230 goto out_abort; 231 } 232 233 cmd->out_vdevice_id = vdev->obj.id; 234 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 235 if (rc) 236 goto out_abort; 237 iommufd_object_finalize(ucmd->ictx, &vdev->obj); 238 goto out_unlock_igroup; 239 240 out_abort: 241 iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj); 242 out_unlock_igroup: 243 mutex_unlock(&idev->igroup->lock); 244 out_put_idev: 245 if (rc) 246 iommufd_put_object(ucmd->ictx, &idev->obj); 247 out_put_viommu: 248 iommufd_put_object(ucmd->ictx, &viommu->obj); 249 return rc; 250 } 251 252 static void iommufd_hw_queue_destroy_access(struct iommufd_ctx *ictx, 253 struct iommufd_access *access, 254 u64 base_iova, size_t length) 255 { 256 u64 aligned_iova = PAGE_ALIGN_DOWN(base_iova); 257 u64 offset = base_iova - aligned_iova; 258 259 iommufd_access_unpin_pages(access, aligned_iova, 260 PAGE_ALIGN(length + offset)); 261 iommufd_access_detach_internal(access); 262 iommufd_access_destroy_internal(ictx, access); 263 } 264 265 void iommufd_hw_queue_destroy(struct iommufd_object *obj) 266 { 267 struct iommufd_hw_queue *hw_queue = 268 container_of(obj, struct iommufd_hw_queue, obj); 269 270 if (hw_queue->destroy) 271 hw_queue->destroy(hw_queue); 272 if (hw_queue->access) 273 iommufd_hw_queue_destroy_access(hw_queue->viommu->ictx, 274 hw_queue->access, 275 hw_queue->base_addr, 276 hw_queue->length); 277 if (hw_queue->viommu) 278 refcount_dec(&hw_queue->viommu->obj.users); 279 } 280 281 /* 282 * When the HW accesses the guest queue via physical addresses, the underlying 283 * physical pages of the guest queue must be contiguous. Also, for the security 284 * concern that IOMMUFD_CMD_IOAS_UNMAP could potentially remove the mappings of 285 * the guest queue from the nesting parent iopt while the HW is still accessing 286 * the guest queue memory physically, such a HW queue must require an access to 287 * pin the underlying pages and prevent that from happening. 288 */ 289 static struct iommufd_access * 290 iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd, 291 struct iommufd_viommu *viommu, phys_addr_t *base_pa) 292 { 293 u64 aligned_iova = PAGE_ALIGN_DOWN(cmd->nesting_parent_iova); 294 u64 offset = cmd->nesting_parent_iova - aligned_iova; 295 struct iommufd_access *access; 296 struct page **pages; 297 size_t max_npages; 298 size_t length; 299 size_t i; 300 int rc; 301 302 /* max_npages = DIV_ROUND_UP(offset + cmd->length, PAGE_SIZE) */ 303 if (check_add_overflow(offset, cmd->length, &length)) 304 return ERR_PTR(-ERANGE); 305 if (check_add_overflow(length, PAGE_SIZE - 1, &length)) 306 return ERR_PTR(-ERANGE); 307 max_npages = length / PAGE_SIZE; 308 /* length needs to be page aligned too */ 309 length = max_npages * PAGE_SIZE; 310 311 /* 312 * Use kvcalloc() to avoid memory fragmentation for a large page array. 313 * Set __GFP_NOWARN to avoid syzkaller blowups 314 */ 315 pages = kvcalloc(max_npages, sizeof(*pages), GFP_KERNEL | __GFP_NOWARN); 316 if (!pages) 317 return ERR_PTR(-ENOMEM); 318 319 access = iommufd_access_create_internal(viommu->ictx); 320 if (IS_ERR(access)) { 321 rc = PTR_ERR(access); 322 goto out_free; 323 } 324 325 rc = iommufd_access_attach_internal(access, viommu->hwpt->ioas); 326 if (rc) 327 goto out_destroy; 328 329 rc = iommufd_access_pin_pages(access, aligned_iova, length, pages, 0); 330 if (rc) 331 goto out_detach; 332 333 /* Validate if the underlying physical pages are contiguous */ 334 for (i = 1; i < max_npages; i++) { 335 if (page_to_pfn(pages[i]) == page_to_pfn(pages[i - 1]) + 1) 336 continue; 337 rc = -EFAULT; 338 goto out_unpin; 339 } 340 341 *base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset; 342 kfree(pages); 343 return access; 344 345 out_unpin: 346 iommufd_access_unpin_pages(access, aligned_iova, length); 347 out_detach: 348 iommufd_access_detach_internal(access); 349 out_destroy: 350 iommufd_access_destroy_internal(viommu->ictx, access); 351 out_free: 352 kfree(pages); 353 return ERR_PTR(rc); 354 } 355 356 int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd) 357 { 358 struct iommu_hw_queue_alloc *cmd = ucmd->cmd; 359 struct iommufd_hw_queue *hw_queue; 360 struct iommufd_viommu *viommu; 361 struct iommufd_access *access; 362 size_t hw_queue_size; 363 phys_addr_t base_pa; 364 u64 last; 365 int rc; 366 367 if (cmd->flags || cmd->type == IOMMU_HW_QUEUE_TYPE_DEFAULT) 368 return -EOPNOTSUPP; 369 if (!cmd->length) 370 return -EINVAL; 371 if (check_add_overflow(cmd->nesting_parent_iova, cmd->length - 1, 372 &last)) 373 return -EOVERFLOW; 374 375 viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); 376 if (IS_ERR(viommu)) 377 return PTR_ERR(viommu); 378 379 if (!viommu->ops || !viommu->ops->get_hw_queue_size || 380 !viommu->ops->hw_queue_init_phys) { 381 rc = -EOPNOTSUPP; 382 goto out_put_viommu; 383 } 384 385 hw_queue_size = viommu->ops->get_hw_queue_size(viommu, cmd->type); 386 if (!hw_queue_size) { 387 rc = -EOPNOTSUPP; 388 goto out_put_viommu; 389 } 390 391 /* 392 * It is a driver bug for providing a hw_queue_size smaller than the 393 * core HW queue structure size 394 */ 395 if (WARN_ON_ONCE(hw_queue_size < sizeof(*hw_queue))) { 396 rc = -EOPNOTSUPP; 397 goto out_put_viommu; 398 } 399 400 hw_queue = (struct iommufd_hw_queue *)_iommufd_object_alloc_ucmd( 401 ucmd, hw_queue_size, IOMMUFD_OBJ_HW_QUEUE); 402 if (IS_ERR(hw_queue)) { 403 rc = PTR_ERR(hw_queue); 404 goto out_put_viommu; 405 } 406 407 access = iommufd_hw_queue_alloc_phys(cmd, viommu, &base_pa); 408 if (IS_ERR(access)) { 409 rc = PTR_ERR(access); 410 goto out_put_viommu; 411 } 412 413 hw_queue->viommu = viommu; 414 refcount_inc(&viommu->obj.users); 415 hw_queue->access = access; 416 hw_queue->type = cmd->type; 417 hw_queue->length = cmd->length; 418 hw_queue->base_addr = cmd->nesting_parent_iova; 419 420 rc = viommu->ops->hw_queue_init_phys(hw_queue, cmd->index, base_pa); 421 if (rc) 422 goto out_put_viommu; 423 424 cmd->out_hw_queue_id = hw_queue->obj.id; 425 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 426 427 out_put_viommu: 428 iommufd_put_object(ucmd->ictx, &viommu->obj); 429 return rc; 430 } 431