1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. 3 */ 4 #ifndef _UAPI_IOMMUFD_H 5 #define _UAPI_IOMMUFD_H 6 7 #include <linux/ioctl.h> 8 #include <linux/types.h> 9 10 #define IOMMUFD_TYPE (';') 11 12 /** 13 * DOC: General ioctl format 14 * 15 * The ioctl interface follows a general format to allow for extensibility. Each 16 * ioctl is passed in a structure pointer as the argument providing the size of 17 * the structure in the first u32. The kernel checks that any structure space 18 * beyond what it understands is 0. This allows userspace to use the backward 19 * compatible portion while consistently using the newer, larger, structures. 20 * 21 * ioctls use a standard meaning for common errnos: 22 * 23 * - ENOTTY: The IOCTL number itself is not supported at all 24 * - E2BIG: The IOCTL number is supported, but the provided structure has 25 * non-zero in a part the kernel does not understand. 26 * - EOPNOTSUPP: The IOCTL number is supported, and the structure is 27 * understood, however a known field has a value the kernel does not 28 * understand or support. 29 * - EINVAL: Everything about the IOCTL was understood, but a field is not 30 * correct. 31 * - ENOENT: An ID or IOVA provided does not exist. 32 * - ENOMEM: Out of memory. 33 * - EOVERFLOW: Mathematics overflowed. 34 * 35 * As well as additional errnos, within specific ioctls. 36 */ 37 enum { 38 IOMMUFD_CMD_BASE = 0x80, 39 IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, 40 IOMMUFD_CMD_IOAS_ALLOC = 0x81, 41 IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82, 42 IOMMUFD_CMD_IOAS_COPY = 0x83, 43 IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84, 44 IOMMUFD_CMD_IOAS_MAP = 0x85, 45 IOMMUFD_CMD_IOAS_UNMAP = 0x86, 46 IOMMUFD_CMD_OPTION = 0x87, 47 IOMMUFD_CMD_VFIO_IOAS = 0x88, 48 IOMMUFD_CMD_HWPT_ALLOC = 0x89, 49 IOMMUFD_CMD_GET_HW_INFO = 0x8a, 50 IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b, 51 IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, 52 IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, 53 IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, 54 IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, 55 IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, 56 IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, 57 IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, 58 IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93, 59 IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94, 60 }; 61 62 /** 63 * struct iommu_destroy - ioctl(IOMMU_DESTROY) 64 * @size: sizeof(struct iommu_destroy) 65 * @id: iommufd object ID to destroy. Can be any destroyable object type. 66 * 67 * Destroy any object held within iommufd. 68 */ 69 struct iommu_destroy { 70 __u32 size; 71 __u32 id; 72 }; 73 #define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY) 74 75 /** 76 * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC) 77 * @size: sizeof(struct iommu_ioas_alloc) 78 * @flags: Must be 0 79 * @out_ioas_id: Output IOAS ID for the allocated object 80 * 81 * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA) 82 * to memory mapping. 83 */ 84 struct iommu_ioas_alloc { 85 __u32 size; 86 __u32 flags; 87 __u32 out_ioas_id; 88 }; 89 #define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC) 90 91 /** 92 * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE) 93 * @start: First IOVA 94 * @last: Inclusive last IOVA 95 * 96 * An interval in IOVA space. 97 */ 98 struct iommu_iova_range { 99 __aligned_u64 start; 100 __aligned_u64 last; 101 }; 102 103 /** 104 * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES) 105 * @size: sizeof(struct iommu_ioas_iova_ranges) 106 * @ioas_id: IOAS ID to read ranges from 107 * @num_iovas: Input/Output total number of ranges in the IOAS 108 * @__reserved: Must be 0 109 * @allowed_iovas: Pointer to the output array of struct iommu_iova_range 110 * @out_iova_alignment: Minimum alignment required for mapping IOVA 111 * 112 * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges 113 * is not allowed. num_iovas will be set to the total number of iovas and 114 * the allowed_iovas[] will be filled in as space permits. 115 * 116 * The allowed ranges are dependent on the HW path the DMA operation takes, and 117 * can change during the lifetime of the IOAS. A fresh empty IOAS will have a 118 * full range, and each attached device will narrow the ranges based on that 119 * device's HW restrictions. Detaching a device can widen the ranges. Userspace 120 * should query ranges after every attach/detach to know what IOVAs are valid 121 * for mapping. 122 * 123 * On input num_iovas is the length of the allowed_iovas array. On output it is 124 * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set 125 * num_iovas to the required value if num_iovas is too small. In this case the 126 * caller should allocate a larger output array and re-issue the ioctl. 127 * 128 * out_iova_alignment returns the minimum IOVA alignment that can be given 129 * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy:: 130 * 131 * starting_iova % out_iova_alignment == 0 132 * (starting_iova + length) % out_iova_alignment == 0 133 * 134 * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot 135 * be higher than the system PAGE_SIZE. 136 */ 137 struct iommu_ioas_iova_ranges { 138 __u32 size; 139 __u32 ioas_id; 140 __u32 num_iovas; 141 __u32 __reserved; 142 __aligned_u64 allowed_iovas; 143 __aligned_u64 out_iova_alignment; 144 }; 145 #define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES) 146 147 /** 148 * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS) 149 * @size: sizeof(struct iommu_ioas_allow_iovas) 150 * @ioas_id: IOAS ID to allow IOVAs from 151 * @num_iovas: Input/Output total number of ranges in the IOAS 152 * @__reserved: Must be 0 153 * @allowed_iovas: Pointer to array of struct iommu_iova_range 154 * 155 * Ensure a range of IOVAs are always available for allocation. If this call 156 * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges 157 * that are narrower than the ranges provided here. This call will fail if 158 * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges. 159 * 160 * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as 161 * devices are attached the IOVA will narrow based on the device restrictions. 162 * When an allowed range is specified any narrowing will be refused, ie device 163 * attachment can fail if the device requires limiting within the allowed range. 164 * 165 * Automatic IOVA allocation is also impacted by this call. MAP will only 166 * allocate within the allowed IOVAs if they are present. 167 * 168 * This call replaces the entire allowed list with the given list. 169 */ 170 struct iommu_ioas_allow_iovas { 171 __u32 size; 172 __u32 ioas_id; 173 __u32 num_iovas; 174 __u32 __reserved; 175 __aligned_u64 allowed_iovas; 176 }; 177 #define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS) 178 179 /** 180 * enum iommufd_ioas_map_flags - Flags for map and copy 181 * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate 182 * IOVA to place the mapping at 183 * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping 184 * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping 185 */ 186 enum iommufd_ioas_map_flags { 187 IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0, 188 IOMMU_IOAS_MAP_WRITEABLE = 1 << 1, 189 IOMMU_IOAS_MAP_READABLE = 1 << 2, 190 }; 191 192 /** 193 * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP) 194 * @size: sizeof(struct iommu_ioas_map) 195 * @flags: Combination of enum iommufd_ioas_map_flags 196 * @ioas_id: IOAS ID to change the mapping of 197 * @__reserved: Must be 0 198 * @user_va: Userspace pointer to start mapping from 199 * @length: Number of bytes to map 200 * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set 201 * then this must be provided as input. 202 * 203 * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the 204 * mapping will be established at iova, otherwise a suitable location based on 205 * the reserved and allowed lists will be automatically selected and returned in 206 * iova. 207 * 208 * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently 209 * be unused, existing IOVA cannot be replaced. 210 */ 211 struct iommu_ioas_map { 212 __u32 size; 213 __u32 flags; 214 __u32 ioas_id; 215 __u32 __reserved; 216 __aligned_u64 user_va; 217 __aligned_u64 length; 218 __aligned_u64 iova; 219 }; 220 #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP) 221 222 /** 223 * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE) 224 * @size: sizeof(struct iommu_ioas_map_file) 225 * @flags: same as for iommu_ioas_map 226 * @ioas_id: same as for iommu_ioas_map 227 * @fd: the memfd to map 228 * @start: byte offset from start of file to map from 229 * @length: same as for iommu_ioas_map 230 * @iova: same as for iommu_ioas_map 231 * 232 * Set an IOVA mapping from a memfd file. All other arguments and semantics 233 * match those of IOMMU_IOAS_MAP. 234 */ 235 struct iommu_ioas_map_file { 236 __u32 size; 237 __u32 flags; 238 __u32 ioas_id; 239 __s32 fd; 240 __aligned_u64 start; 241 __aligned_u64 length; 242 __aligned_u64 iova; 243 }; 244 #define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE) 245 246 /** 247 * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY) 248 * @size: sizeof(struct iommu_ioas_copy) 249 * @flags: Combination of enum iommufd_ioas_map_flags 250 * @dst_ioas_id: IOAS ID to change the mapping of 251 * @src_ioas_id: IOAS ID to copy from 252 * @length: Number of bytes to copy and map 253 * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is 254 * set then this must be provided as input. 255 * @src_iova: IOVA to start the copy 256 * 257 * Copy an already existing mapping from src_ioas_id and establish it in 258 * dst_ioas_id. The src iova/length must exactly match a range used with 259 * IOMMU_IOAS_MAP. 260 * 261 * This may be used to efficiently clone a subset of an IOAS to another, or as a 262 * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over 263 * establishing equivalent new mappings, as internal resources are shared, and 264 * the kernel will pin the user memory only once. 265 */ 266 struct iommu_ioas_copy { 267 __u32 size; 268 __u32 flags; 269 __u32 dst_ioas_id; 270 __u32 src_ioas_id; 271 __aligned_u64 length; 272 __aligned_u64 dst_iova; 273 __aligned_u64 src_iova; 274 }; 275 #define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY) 276 277 /** 278 * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP) 279 * @size: sizeof(struct iommu_ioas_unmap) 280 * @ioas_id: IOAS ID to change the mapping of 281 * @iova: IOVA to start the unmapping at 282 * @length: Number of bytes to unmap, and return back the bytes unmapped 283 * 284 * Unmap an IOVA range. The iova/length must be a superset of a previously 285 * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or 286 * truncating ranges is not allowed. The values 0 to U64_MAX will unmap 287 * everything. 288 */ 289 struct iommu_ioas_unmap { 290 __u32 size; 291 __u32 ioas_id; 292 __aligned_u64 iova; 293 __aligned_u64 length; 294 }; 295 #define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP) 296 297 /** 298 * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and 299 * ioctl(IOMMU_OPTION_HUGE_PAGES) 300 * @IOMMU_OPTION_RLIMIT_MODE: 301 * Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege 302 * to invoke this. Value 0 (default) is user based accounting, 1 uses process 303 * based accounting. Global option, object_id must be 0 304 * @IOMMU_OPTION_HUGE_PAGES: 305 * Value 1 (default) allows contiguous pages to be combined when generating 306 * iommu mappings. Value 0 disables combining, everything is mapped to 307 * PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS 308 * option, the object_id must be the IOAS ID. 309 */ 310 enum iommufd_option { 311 IOMMU_OPTION_RLIMIT_MODE = 0, 312 IOMMU_OPTION_HUGE_PAGES = 1, 313 }; 314 315 /** 316 * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and 317 * ioctl(IOMMU_OPTION_OP_GET) 318 * @IOMMU_OPTION_OP_SET: Set the option's value 319 * @IOMMU_OPTION_OP_GET: Get the option's value 320 */ 321 enum iommufd_option_ops { 322 IOMMU_OPTION_OP_SET = 0, 323 IOMMU_OPTION_OP_GET = 1, 324 }; 325 326 /** 327 * struct iommu_option - iommu option multiplexer 328 * @size: sizeof(struct iommu_option) 329 * @option_id: One of enum iommufd_option 330 * @op: One of enum iommufd_option_ops 331 * @__reserved: Must be 0 332 * @object_id: ID of the object if required 333 * @val64: Option value to set or value returned on get 334 * 335 * Change a simple option value. This multiplexor allows controlling options 336 * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET 337 * will return the current value. 338 */ 339 struct iommu_option { 340 __u32 size; 341 __u32 option_id; 342 __u16 op; 343 __u16 __reserved; 344 __u32 object_id; 345 __aligned_u64 val64; 346 }; 347 #define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) 348 349 /** 350 * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls 351 * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS 352 * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS 353 * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility 354 */ 355 enum iommufd_vfio_ioas_op { 356 IOMMU_VFIO_IOAS_GET = 0, 357 IOMMU_VFIO_IOAS_SET = 1, 358 IOMMU_VFIO_IOAS_CLEAR = 2, 359 }; 360 361 /** 362 * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS) 363 * @size: sizeof(struct iommu_vfio_ioas) 364 * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set 365 * For IOMMU_VFIO_IOAS_GET will output the IOAS ID 366 * @op: One of enum iommufd_vfio_ioas_op 367 * @__reserved: Must be 0 368 * 369 * The VFIO compatibility support uses a single ioas because VFIO APIs do not 370 * support the ID field. Set or Get the IOAS that VFIO compatibility will use. 371 * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the 372 * compatibility ioas, either by taking what is already set, or auto creating 373 * one. From then on VFIO will continue to use that ioas and is not effected by 374 * this ioctl. SET or CLEAR does not destroy any auto-created IOAS. 375 */ 376 struct iommu_vfio_ioas { 377 __u32 size; 378 __u32 ioas_id; 379 __u16 op; 380 __u16 __reserved; 381 }; 382 #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) 383 384 /** 385 * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation 386 * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as 387 * the parent HWPT in a nesting configuration. 388 * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is 389 * enforced on device attachment 390 * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is 391 * valid. 392 * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The 393 * domain can be attached to any PASID on the device. 394 * Any domain attached to the non-PASID part of the 395 * device must also be flagged, otherwise attaching a 396 * PASID will blocked. 397 * For the user that wants to attach PASID, ioas is 398 * not recommended for both the non-PASID part 399 * and PASID part of the device. 400 * If IOMMU does not support PASID it will return 401 * error (-EOPNOTSUPP). 402 */ 403 enum iommufd_hwpt_alloc_flags { 404 IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, 405 IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, 406 IOMMU_HWPT_FAULT_ID_VALID = 1 << 2, 407 IOMMU_HWPT_ALLOC_PASID = 1 << 3, 408 }; 409 410 /** 411 * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table 412 * entry attributes 413 * @IOMMU_VTD_S1_SRE: Supervisor request 414 * @IOMMU_VTD_S1_EAFE: Extended access enable 415 * @IOMMU_VTD_S1_WPE: Write protect enable 416 */ 417 enum iommu_hwpt_vtd_s1_flags { 418 IOMMU_VTD_S1_SRE = 1 << 0, 419 IOMMU_VTD_S1_EAFE = 1 << 1, 420 IOMMU_VTD_S1_WPE = 1 << 2, 421 }; 422 423 /** 424 * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table 425 * info (IOMMU_HWPT_DATA_VTD_S1) 426 * @flags: Combination of enum iommu_hwpt_vtd_s1_flags 427 * @pgtbl_addr: The base address of the stage-1 page table. 428 * @addr_width: The address width of the stage-1 page table 429 * @__reserved: Must be 0 430 */ 431 struct iommu_hwpt_vtd_s1 { 432 __aligned_u64 flags; 433 __aligned_u64 pgtbl_addr; 434 __u32 addr_width; 435 __u32 __reserved; 436 }; 437 438 /** 439 * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE 440 * (IOMMU_HWPT_DATA_ARM_SMMUV3) 441 * 442 * @ste: The first two double words of the user space Stream Table Entry for 443 * the translation. Must be little-endian. 444 * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) 445 * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax 446 * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD 447 * 448 * -EIO will be returned if @ste is not legal or contains any non-allowed field. 449 * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass 450 * nested domain will translate the same as the nesting parent. The S1 will 451 * install a Context Descriptor Table pointing at userspace memory translated 452 * by the nesting parent. 453 * 454 * It's suggested to allocate a vDEVICE object carrying vSID and then re-attach 455 * the nested domain, as soon as the vSID is available in the VMM level: 456 * 457 * - when Cfg=translate, a vDEVICE must be allocated prior to attaching to the 458 * allocated nested domain, as CD/ATS invalidations and vevents need a vSID. 459 * - when Cfg=bypass/abort, a vDEVICE is not enforced during the nested domain 460 * attachment, to support a GBPA case where VM sets CR0.SMMUEN=0. However, if 461 * VM sets CR0.SMMUEN=1 while missing a vDEVICE object, kernel would fail to 462 * report events to the VM. E.g. F_TRANSLATION when guest STE.Cfg=abort. 463 */ 464 struct iommu_hwpt_arm_smmuv3 { 465 __aligned_le64 ste[2]; 466 }; 467 468 /** 469 * enum iommu_hwpt_data_type - IOMMU HWPT Data Type 470 * @IOMMU_HWPT_DATA_NONE: no data 471 * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table 472 * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table 473 */ 474 enum iommu_hwpt_data_type { 475 IOMMU_HWPT_DATA_NONE = 0, 476 IOMMU_HWPT_DATA_VTD_S1 = 1, 477 IOMMU_HWPT_DATA_ARM_SMMUV3 = 2, 478 }; 479 480 /** 481 * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) 482 * @size: sizeof(struct iommu_hwpt_alloc) 483 * @flags: Combination of enum iommufd_hwpt_alloc_flags 484 * @dev_id: The device to allocate this HWPT for 485 * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to 486 * @out_hwpt_id: The ID of the new HWPT 487 * @__reserved: Must be 0 488 * @data_type: One of enum iommu_hwpt_data_type 489 * @data_len: Length of the type specific data 490 * @data_uptr: User pointer to the type specific data 491 * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of 492 * IOMMU_HWPT_FAULT_ID_VALID is set. 493 * @__reserved2: Padding to 64-bit alignment. Must be 0. 494 * 495 * Explicitly allocate a hardware page table object. This is the same object 496 * type that is returned by iommufd_device_attach() and represents the 497 * underlying iommu driver's iommu_domain kernel object. 498 * 499 * A kernel-managed HWPT will be created with the mappings from the given 500 * IOAS via the @pt_id. The @data_type for this allocation must be set to 501 * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a 502 * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags. 503 * 504 * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a 505 * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be 506 * allocated previously via the same ioctl from a given IOAS (@pt_id). In this 507 * case, the @data_type must be set to a pre-defined type corresponding to an 508 * I/O page table type supported by the underlying IOMMU hardware. The device 509 * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU 510 * instance. 511 * 512 * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and 513 * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr 514 * must be given. 515 */ 516 struct iommu_hwpt_alloc { 517 __u32 size; 518 __u32 flags; 519 __u32 dev_id; 520 __u32 pt_id; 521 __u32 out_hwpt_id; 522 __u32 __reserved; 523 __u32 data_type; 524 __u32 data_len; 525 __aligned_u64 data_uptr; 526 __u32 fault_id; 527 __u32 __reserved2; 528 }; 529 #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) 530 531 /** 532 * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info 533 * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings 534 * on a nested_parent domain. 535 * https://www.intel.com/content/www/us/en/content-details/772415/content-details.html 536 */ 537 enum iommu_hw_info_vtd_flags { 538 IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0, 539 }; 540 541 /** 542 * struct iommu_hw_info_vtd - Intel VT-d hardware information 543 * 544 * @flags: Combination of enum iommu_hw_info_vtd_flags 545 * @__reserved: Must be 0 546 * 547 * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec 548 * section 11.4.2 Capability Register. 549 * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec 550 * section 11.4.3 Extended Capability Register. 551 * 552 * User needs to understand the Intel VT-d specification to decode the 553 * register value. 554 */ 555 struct iommu_hw_info_vtd { 556 __u32 flags; 557 __u32 __reserved; 558 __aligned_u64 cap_reg; 559 __aligned_u64 ecap_reg; 560 }; 561 562 /** 563 * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information 564 * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3) 565 * 566 * @flags: Must be set to 0 567 * @__reserved: Must be 0 568 * @idr: Implemented features for ARM SMMU Non-secure programming interface 569 * @iidr: Information about the implementation and implementer of ARM SMMU, 570 * and architecture version supported 571 * @aidr: ARM SMMU architecture version 572 * 573 * For the details of @idr, @iidr and @aidr, please refer to the chapters 574 * from 6.3.1 to 6.3.6 in the SMMUv3 Spec. 575 * 576 * This reports the raw HW capability, and not all bits are meaningful to be 577 * read by userspace. Only the following fields should be used: 578 * 579 * idr[0]: ST_LEVEL, TERM_MODEL, STALL_MODEL, TTENDIAN , CD2L, ASID16, TTF 580 * idr[1]: SIDSIZE, SSIDSIZE 581 * idr[3]: BBML, RIL 582 * idr[5]: VAX, GRAN64K, GRAN16K, GRAN4K 583 * 584 * - S1P should be assumed to be true if a NESTED HWPT can be created 585 * - VFIO/iommufd only support platforms with COHACC, it should be assumed to be 586 * true. 587 * - ATS is a per-device property. If the VMM describes any devices as ATS 588 * capable in ACPI/DT it should set the corresponding idr. 589 * 590 * This list may expand in future (eg E0PD, AIE, PBHA, D128, DS etc). It is 591 * important that VMMs do not read bits outside the list to allow for 592 * compatibility with future kernels. Several features in the SMMUv3 593 * architecture are not currently supported by the kernel for nesting: HTTU, 594 * BTM, MPAM and others. 595 */ 596 struct iommu_hw_info_arm_smmuv3 { 597 __u32 flags; 598 __u32 __reserved; 599 __u32 idr[6]; 600 __u32 iidr; 601 __u32 aidr; 602 }; 603 604 /** 605 * struct iommu_hw_info_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Hardware 606 * Information (IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV) 607 * 608 * @flags: Must be 0 609 * @version: Version number for the CMDQ-V HW for PARAM bits[03:00] 610 * @log2vcmdqs: Log2 of the total number of VCMDQs for PARAM bits[07:04] 611 * @log2vsids: Log2 of the total number of SID replacements for PARAM bits[15:12] 612 * @__reserved: Must be 0 613 * 614 * VMM can use these fields directly in its emulated global PARAM register. Note 615 * that only one Virtual Interface (VINTF) should be exposed to a VM, i.e. PARAM 616 * bits[11:08] should be set to 0 for log2 of the total number of VINTFs. 617 */ 618 struct iommu_hw_info_tegra241_cmdqv { 619 __u32 flags; 620 __u8 version; 621 __u8 log2vcmdqs; 622 __u8 log2vsids; 623 __u8 __reserved; 624 }; 625 626 /** 627 * enum iommu_hw_info_type - IOMMU Hardware Info Types 628 * @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware 629 * info 630 * @IOMMU_HW_INFO_TYPE_DEFAULT: Input to request for a default type 631 * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type 632 * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type 633 * @IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM 634 * SMMUv3) info type 635 */ 636 enum iommu_hw_info_type { 637 IOMMU_HW_INFO_TYPE_NONE = 0, 638 IOMMU_HW_INFO_TYPE_DEFAULT = 0, 639 IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, 640 IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, 641 IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = 3, 642 }; 643 644 /** 645 * enum iommufd_hw_capabilities 646 * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking 647 * If available, it means the following APIs 648 * are supported: 649 * 650 * IOMMU_HWPT_GET_DIRTY_BITMAP 651 * IOMMU_HWPT_SET_DIRTY_TRACKING 652 * 653 * @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it 654 * when the struct 655 * iommu_hw_info::out_max_pasid_log2 is zero. 656 * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it 657 * when the struct 658 * iommu_hw_info::out_max_pasid_log2 is zero. 659 */ 660 enum iommufd_hw_capabilities { 661 IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, 662 IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1, 663 IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2, 664 }; 665 666 /** 667 * enum iommufd_hw_info_flags - Flags for iommu_hw_info 668 * @IOMMU_HW_INFO_FLAG_INPUT_TYPE: If set, @in_data_type carries an input type 669 * for user space to request for a specific info 670 */ 671 enum iommufd_hw_info_flags { 672 IOMMU_HW_INFO_FLAG_INPUT_TYPE = 1 << 0, 673 }; 674 675 /** 676 * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) 677 * @size: sizeof(struct iommu_hw_info) 678 * @flags: Must be 0 679 * @dev_id: The device bound to the iommufd 680 * @data_len: Input the length of a user buffer in bytes. Output the length of 681 * data that kernel supports 682 * @data_uptr: User pointer to a user-space buffer used by the kernel to fill 683 * the iommu type specific hardware information data 684 * @in_data_type: This shares the same field with @out_data_type, making it be 685 * a bidirectional field. When IOMMU_HW_INFO_FLAG_INPUT_TYPE is 686 * set, an input type carried via this @in_data_type field will 687 * be valid, requesting for the info data to the given type. If 688 * IOMMU_HW_INFO_FLAG_INPUT_TYPE is unset, any input value will 689 * be seen as IOMMU_HW_INFO_TYPE_DEFAULT 690 * @out_data_type: Output the iommu hardware info type as defined in the enum 691 * iommu_hw_info_type. 692 * @out_capabilities: Output the generic iommu capability info type as defined 693 * in the enum iommu_hw_capabilities. 694 * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support. 695 * PCI devices turn to out_capabilities to check if the 696 * specific capabilities is supported or not. 697 * @__reserved: Must be 0 698 * 699 * Query an iommu type specific hardware information data from an iommu behind 700 * a given device that has been bound to iommufd. This hardware info data will 701 * be used to sync capabilities between the virtual iommu and the physical 702 * iommu, e.g. a nested translation setup needs to check the hardware info, so 703 * a guest stage-1 page table can be compatible with the physical iommu. 704 * 705 * To capture an iommu type specific hardware information data, @data_uptr and 706 * its length @data_len must be provided. Trailing bytes will be zeroed if the 707 * user buffer is larger than the data that kernel has. Otherwise, kernel only 708 * fills the buffer using the given length in @data_len. If the ioctl succeeds, 709 * @data_len will be updated to the length that kernel actually supports, 710 * @out_data_type will be filled to decode the data filled in the buffer 711 * pointed by @data_uptr. Input @data_len == zero is allowed. 712 */ 713 struct iommu_hw_info { 714 __u32 size; 715 __u32 flags; 716 __u32 dev_id; 717 __u32 data_len; 718 __aligned_u64 data_uptr; 719 union { 720 __u32 in_data_type; 721 __u32 out_data_type; 722 }; 723 __u8 out_max_pasid_log2; 724 __u8 __reserved[3]; 725 __aligned_u64 out_capabilities; 726 }; 727 #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) 728 729 /* 730 * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty 731 * tracking 732 * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking 733 */ 734 enum iommufd_hwpt_set_dirty_tracking_flags { 735 IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1, 736 }; 737 738 /** 739 * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING) 740 * @size: sizeof(struct iommu_hwpt_set_dirty_tracking) 741 * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags 742 * @hwpt_id: HW pagetable ID that represents the IOMMU domain 743 * @__reserved: Must be 0 744 * 745 * Toggle dirty tracking on an HW pagetable. 746 */ 747 struct iommu_hwpt_set_dirty_tracking { 748 __u32 size; 749 __u32 flags; 750 __u32 hwpt_id; 751 __u32 __reserved; 752 }; 753 #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \ 754 IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING) 755 756 /** 757 * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits 758 * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing 759 * any dirty bits metadata. This flag 760 * can be passed in the expectation 761 * where the next operation is an unmap 762 * of the same IOVA range. 763 * 764 */ 765 enum iommufd_hwpt_get_dirty_bitmap_flags { 766 IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1, 767 }; 768 769 /** 770 * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP) 771 * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap) 772 * @hwpt_id: HW pagetable ID that represents the IOMMU domain 773 * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags 774 * @__reserved: Must be 0 775 * @iova: base IOVA of the bitmap first bit 776 * @length: IOVA range size 777 * @page_size: page size granularity of each bit in the bitmap 778 * @data: bitmap where to set the dirty bits. The bitmap bits each 779 * represent a page_size which you deviate from an arbitrary iova. 780 * 781 * Checking a given IOVA is dirty: 782 * 783 * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64)) 784 * 785 * Walk the IOMMU pagetables for a given IOVA range to return a bitmap 786 * with the dirty IOVAs. In doing so it will also by default clear any 787 * dirty bit metadata set in the IOPTE. 788 */ 789 struct iommu_hwpt_get_dirty_bitmap { 790 __u32 size; 791 __u32 hwpt_id; 792 __u32 flags; 793 __u32 __reserved; 794 __aligned_u64 iova; 795 __aligned_u64 length; 796 __aligned_u64 page_size; 797 __aligned_u64 data; 798 }; 799 #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ 800 IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) 801 802 /** 803 * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation 804 * Data Type 805 * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 806 * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3 807 */ 808 enum iommu_hwpt_invalidate_data_type { 809 IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0, 810 IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1, 811 }; 812 813 /** 814 * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d 815 * stage-1 cache invalidation 816 * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies 817 * to all-levels page structure cache or just 818 * the leaf PTE cache. 819 */ 820 enum iommu_hwpt_vtd_s1_invalidate_flags { 821 IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0, 822 }; 823 824 /** 825 * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation 826 * (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) 827 * @addr: The start address of the range to be invalidated. It needs to 828 * be 4KB aligned. 829 * @npages: Number of contiguous 4K pages to be invalidated. 830 * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags 831 * @__reserved: Must be 0 832 * 833 * The Intel VT-d specific invalidation data for user-managed stage-1 cache 834 * invalidation in nested translation. Userspace uses this structure to 835 * tell the impacted cache scope after modifying the stage-1 page table. 836 * 837 * Invalidating all the caches related to the page table by setting @addr 838 * to be 0 and @npages to be U64_MAX. 839 * 840 * The device TLB will be invalidated automatically if ATS is enabled. 841 */ 842 struct iommu_hwpt_vtd_s1_invalidate { 843 __aligned_u64 addr; 844 __aligned_u64 npages; 845 __u32 flags; 846 __u32 __reserved; 847 }; 848 849 /** 850 * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cache invalidation 851 * (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3) 852 * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ. 853 * Must be little-endian. 854 * 855 * Supported command list only when passing in a vIOMMU via @hwpt_id: 856 * CMDQ_OP_TLBI_NSNH_ALL 857 * CMDQ_OP_TLBI_NH_VA 858 * CMDQ_OP_TLBI_NH_VAA 859 * CMDQ_OP_TLBI_NH_ALL 860 * CMDQ_OP_TLBI_NH_ASID 861 * CMDQ_OP_ATC_INV 862 * CMDQ_OP_CFGI_CD 863 * CMDQ_OP_CFGI_CD_ALL 864 * 865 * -EIO will be returned if the command is not supported. 866 */ 867 struct iommu_viommu_arm_smmuv3_invalidate { 868 __aligned_le64 cmd[2]; 869 }; 870 871 /** 872 * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) 873 * @size: sizeof(struct iommu_hwpt_invalidate) 874 * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation 875 * @data_uptr: User pointer to an array of driver-specific cache invalidation 876 * data. 877 * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data 878 * type of all the entries in the invalidation request array. It 879 * should be a type supported by the hwpt pointed by @hwpt_id. 880 * @entry_len: Length (in bytes) of a request entry in the request array 881 * @entry_num: Input the number of cache invalidation requests in the array. 882 * Output the number of requests successfully handled by kernel. 883 * @__reserved: Must be 0. 884 * 885 * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications 886 * on a user-managed page table should be followed by this operation, if a HWPT 887 * is passed in via @hwpt_id. Other caches, such as device cache or descriptor 888 * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field. 889 * 890 * Each ioctl can support one or more cache invalidation requests in the array 891 * that has a total size of @entry_len * @entry_num. 892 * 893 * An empty invalidation request array by setting @entry_num==0 is allowed, and 894 * @entry_len and @data_uptr would be ignored in this case. This can be used to 895 * check if the given @data_type is supported or not by kernel. 896 */ 897 struct iommu_hwpt_invalidate { 898 __u32 size; 899 __u32 hwpt_id; 900 __aligned_u64 data_uptr; 901 __u32 data_type; 902 __u32 entry_len; 903 __u32 entry_num; 904 __u32 __reserved; 905 }; 906 #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) 907 908 /** 909 * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault 910 * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is 911 * valid. 912 * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group. 913 */ 914 enum iommu_hwpt_pgfault_flags { 915 IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0), 916 IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1), 917 }; 918 919 /** 920 * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault 921 * @IOMMU_PGFAULT_PERM_READ: request for read permission 922 * @IOMMU_PGFAULT_PERM_WRITE: request for write permission 923 * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the 924 * Execute Requested bit set in PASID TLP Prefix. 925 * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the 926 * Privileged Mode Requested bit set in PASID TLP 927 * Prefix. 928 */ 929 enum iommu_hwpt_pgfault_perm { 930 IOMMU_PGFAULT_PERM_READ = (1 << 0), 931 IOMMU_PGFAULT_PERM_WRITE = (1 << 1), 932 IOMMU_PGFAULT_PERM_EXEC = (1 << 2), 933 IOMMU_PGFAULT_PERM_PRIV = (1 << 3), 934 }; 935 936 /** 937 * struct iommu_hwpt_pgfault - iommu page fault data 938 * @flags: Combination of enum iommu_hwpt_pgfault_flags 939 * @dev_id: id of the originated device 940 * @pasid: Process Address Space ID 941 * @grpid: Page Request Group Index 942 * @perm: Combination of enum iommu_hwpt_pgfault_perm 943 * @__reserved: Must be 0. 944 * @addr: Fault address 945 * @length: a hint of how much data the requestor is expecting to fetch. For 946 * example, if the PRI initiator knows it is going to do a 10MB 947 * transfer, it could fill in 10MB and the OS could pre-fault in 948 * 10MB of IOVA. It's default to 0 if there's no such hint. 949 * @cookie: kernel-managed cookie identifying a group of fault messages. The 950 * cookie number encoded in the last page fault of the group should 951 * be echoed back in the response message. 952 */ 953 struct iommu_hwpt_pgfault { 954 __u32 flags; 955 __u32 dev_id; 956 __u32 pasid; 957 __u32 grpid; 958 __u32 perm; 959 __u32 __reserved; 960 __aligned_u64 addr; 961 __u32 length; 962 __u32 cookie; 963 }; 964 965 /** 966 * enum iommufd_page_response_code - Return status of fault handlers 967 * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables 968 * populated, retry the access. This is the 969 * "Success" defined in PCI 10.4.2.1. 970 * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the 971 * access. This is the "Invalid Request" in PCI 972 * 10.4.2.1. 973 */ 974 enum iommufd_page_response_code { 975 IOMMUFD_PAGE_RESP_SUCCESS = 0, 976 IOMMUFD_PAGE_RESP_INVALID = 1, 977 }; 978 979 /** 980 * struct iommu_hwpt_page_response - IOMMU page fault response 981 * @cookie: The kernel-managed cookie reported in the fault message. 982 * @code: One of response code in enum iommufd_page_response_code. 983 */ 984 struct iommu_hwpt_page_response { 985 __u32 cookie; 986 __u32 code; 987 }; 988 989 /** 990 * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC) 991 * @size: sizeof(struct iommu_fault_alloc) 992 * @flags: Must be 0 993 * @out_fault_id: The ID of the new FAULT 994 * @out_fault_fd: The fd of the new FAULT 995 * 996 * Explicitly allocate a fault handling object. 997 */ 998 struct iommu_fault_alloc { 999 __u32 size; 1000 __u32 flags; 1001 __u32 out_fault_id; 1002 __u32 out_fault_fd; 1003 }; 1004 #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) 1005 1006 /** 1007 * enum iommu_viommu_type - Virtual IOMMU Type 1008 * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use 1009 * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type 1010 * @IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM 1011 * SMMUv3) enabled ARM SMMUv3 type 1012 */ 1013 enum iommu_viommu_type { 1014 IOMMU_VIOMMU_TYPE_DEFAULT = 0, 1015 IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1, 1016 IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV = 2, 1017 }; 1018 1019 /** 1020 * struct iommu_viommu_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Virtual Interface 1021 * (IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV) 1022 * @out_vintf_mmap_offset: mmap offset argument for VINTF's page0 1023 * @out_vintf_mmap_length: mmap length argument for VINTF's page0 1024 * 1025 * Both @out_vintf_mmap_offset and @out_vintf_mmap_length are reported by kernel 1026 * for user space to mmap the VINTF page0 from the host physical address space 1027 * to the guest physical address space so that a guest kernel can directly R/W 1028 * access to the VINTF page0 in order to control its virtual command queues. 1029 */ 1030 struct iommu_viommu_tegra241_cmdqv { 1031 __aligned_u64 out_vintf_mmap_offset; 1032 __aligned_u64 out_vintf_mmap_length; 1033 }; 1034 1035 /** 1036 * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC) 1037 * @size: sizeof(struct iommu_viommu_alloc) 1038 * @flags: Must be 0 1039 * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type 1040 * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU 1041 * @hwpt_id: ID of a nesting parent HWPT to associate to 1042 * @out_viommu_id: Output virtual IOMMU ID for the allocated object 1043 * @data_len: Length of the type specific data 1044 * @__reserved: Must be 0 1045 * @data_uptr: User pointer to a driver-specific virtual IOMMU data 1046 * 1047 * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's 1048 * virtualization support that is a security-isolated slice of the real IOMMU HW 1049 * that is unique to a specific VM. Operations global to the IOMMU are connected 1050 * to the vIOMMU, such as: 1051 * - Security namespace for guest owned ID, e.g. guest-controlled cache tags 1052 * - Non-device-affiliated event reporting, e.g. invalidation queue errors 1053 * - Access to a sharable nesting parent pagetable across physical IOMMUs 1054 * - Virtualization of various platforms IDs, e.g. RIDs and others 1055 * - Delivery of paravirtualized invalidation 1056 * - Direct assigned invalidation queues 1057 * - Direct assigned interrupts 1058 */ 1059 struct iommu_viommu_alloc { 1060 __u32 size; 1061 __u32 flags; 1062 __u32 type; 1063 __u32 dev_id; 1064 __u32 hwpt_id; 1065 __u32 out_viommu_id; 1066 __u32 data_len; 1067 __u32 __reserved; 1068 __aligned_u64 data_uptr; 1069 }; 1070 #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) 1071 1072 /** 1073 * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC) 1074 * @size: sizeof(struct iommu_vdevice_alloc) 1075 * @viommu_id: vIOMMU ID to associate with the virtual device 1076 * @dev_id: The physical device to allocate a virtual instance on the vIOMMU 1077 * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY 1078 * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID 1079 * of AMD IOMMU, and vRID of Intel VT-d 1080 * 1081 * Allocate a virtual device instance (for a physical device) against a vIOMMU. 1082 * This instance holds the device's information (related to its vIOMMU) in a VM. 1083 * User should use IOMMU_DESTROY to destroy the virtual device before 1084 * destroying the physical device (by closing vfio_cdev fd). Otherwise the 1085 * virtual device would be forcibly destroyed on physical device destruction, 1086 * its vdevice_id would be permanently leaked (unremovable & unreusable) until 1087 * iommu fd closed. 1088 */ 1089 struct iommu_vdevice_alloc { 1090 __u32 size; 1091 __u32 viommu_id; 1092 __u32 dev_id; 1093 __u32 out_vdevice_id; 1094 __aligned_u64 virt_id; 1095 }; 1096 #define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC) 1097 1098 /** 1099 * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS) 1100 * @size: sizeof(struct iommu_ioas_change_process) 1101 * @__reserved: Must be 0 1102 * 1103 * This transfers pinned memory counts for every memory map in every IOAS 1104 * in the context to the current process. This only supports maps created 1105 * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present. 1106 * If the ioctl returns a failure status, then nothing is changed. 1107 * 1108 * This API is useful for transferring operation of a device from one process 1109 * to another, such as during userland live update. 1110 */ 1111 struct iommu_ioas_change_process { 1112 __u32 size; 1113 __u32 __reserved; 1114 }; 1115 1116 #define IOMMU_IOAS_CHANGE_PROCESS \ 1117 _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS) 1118 1119 /** 1120 * enum iommu_veventq_flag - flag for struct iommufd_vevent_header 1121 * @IOMMU_VEVENTQ_FLAG_LOST_EVENTS: vEVENTQ has lost vEVENTs 1122 */ 1123 enum iommu_veventq_flag { 1124 IOMMU_VEVENTQ_FLAG_LOST_EVENTS = (1U << 0), 1125 }; 1126 1127 /** 1128 * struct iommufd_vevent_header - Virtual Event Header for a vEVENTQ Status 1129 * @flags: Combination of enum iommu_veventq_flag 1130 * @sequence: The sequence index of a vEVENT in the vEVENTQ, with a range of 1131 * [0, INT_MAX] where the following index of INT_MAX is 0 1132 * 1133 * Each iommufd_vevent_header reports a sequence index of the following vEVENT: 1134 * 1135 * +----------------------+-------+----------------------+-------+---+-------+ 1136 * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN | 1137 * +----------------------+-------+----------------------+-------+---+-------+ 1138 * 1139 * And this sequence index is expected to be monotonic to the sequence index of 1140 * the previous vEVENT. If two adjacent sequence indexes has a delta larger than 1141 * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs: 1142 * 1143 * +-----+----------------------+-------+----------------------+-------+-----+ 1144 * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... | 1145 * +-----+----------------------+-------+----------------------+-------+-----+ 1146 * 1147 * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT 1148 * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header 1149 * would be added to the tail, and no data would follow this header: 1150 * 1151 * +--+----------------------+-------+-----------------------------------------+ 1152 * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} | 1153 * +--+----------------------+-------+-----------------------------------------+ 1154 */ 1155 struct iommufd_vevent_header { 1156 __u32 flags; 1157 __u32 sequence; 1158 }; 1159 1160 /** 1161 * enum iommu_veventq_type - Virtual Event Queue Type 1162 * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use 1163 * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue 1164 * @IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV Extension IRQ 1165 */ 1166 enum iommu_veventq_type { 1167 IOMMU_VEVENTQ_TYPE_DEFAULT = 0, 1168 IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1, 1169 IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV = 2, 1170 }; 1171 1172 /** 1173 * struct iommu_vevent_arm_smmuv3 - ARM SMMUv3 Virtual Event 1174 * (IOMMU_VEVENTQ_TYPE_ARM_SMMUV3) 1175 * @evt: 256-bit ARM SMMUv3 Event record, little-endian. 1176 * Reported event records: (Refer to "7.3 Event records" in SMMUv3 HW Spec) 1177 * - 0x04 C_BAD_STE 1178 * - 0x06 F_STREAM_DISABLED 1179 * - 0x08 C_BAD_SUBSTREAMID 1180 * - 0x0a C_BAD_CD 1181 * - 0x10 F_TRANSLATION 1182 * - 0x11 F_ADDR_SIZE 1183 * - 0x12 F_ACCESS 1184 * - 0x13 F_PERMISSION 1185 * 1186 * StreamID field reports a virtual device ID. To receive a virtual event for a 1187 * device, a vDEVICE must be allocated via IOMMU_VDEVICE_ALLOC. 1188 */ 1189 struct iommu_vevent_arm_smmuv3 { 1190 __aligned_le64 evt[4]; 1191 }; 1192 1193 /** 1194 * struct iommu_vevent_tegra241_cmdqv - Tegra241 CMDQV IRQ 1195 * (IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV) 1196 * @lvcmdq_err_map: 128-bit logical vcmdq error map, little-endian. 1197 * (Refer to register LVCMDQ_ERR_MAPs per VINTF ) 1198 * 1199 * The 128-bit register value from HW exclusively reflect the error bits for a 1200 * Virtual Interface represented by a vIOMMU object. Read and report directly. 1201 */ 1202 struct iommu_vevent_tegra241_cmdqv { 1203 __aligned_le64 lvcmdq_err_map[2]; 1204 }; 1205 1206 /** 1207 * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC) 1208 * @size: sizeof(struct iommu_veventq_alloc) 1209 * @flags: Must be 0 1210 * @viommu_id: virtual IOMMU ID to associate the vEVENTQ with 1211 * @type: Type of the vEVENTQ. Must be defined in enum iommu_veventq_type 1212 * @veventq_depth: Maximum number of events in the vEVENTQ 1213 * @out_veventq_id: The ID of the new vEVENTQ 1214 * @out_veventq_fd: The fd of the new vEVENTQ. User space must close the 1215 * successfully returned fd after using it 1216 * @__reserved: Must be 0 1217 * 1218 * Explicitly allocate a virtual event queue interface for a vIOMMU. A vIOMMU 1219 * can have multiple FDs for different types, but is confined to one per @type. 1220 * User space should open the @out_veventq_fd to read vEVENTs out of a vEVENTQ, 1221 * if there are vEVENTs available. A vEVENTQ will lose events due to overflow, 1222 * if the number of the vEVENTs hits @veventq_depth. 1223 * 1224 * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by 1225 * a type-specific data structure, in a normal case: 1226 * 1227 * +-+---------+-------+---------+-------+-----+---------+-------+-+ 1228 * | | header0 | data0 | header1 | data1 | ... | headerN | dataN | | 1229 * +-+---------+-------+---------+-------+-----+---------+-------+-+ 1230 * 1231 * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to 1232 * struct iommufd_vevent_header). 1233 */ 1234 struct iommu_veventq_alloc { 1235 __u32 size; 1236 __u32 flags; 1237 __u32 viommu_id; 1238 __u32 type; 1239 __u32 veventq_depth; 1240 __u32 out_veventq_id; 1241 __u32 out_veventq_fd; 1242 __u32 __reserved; 1243 }; 1244 #define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC) 1245 1246 /** 1247 * enum iommu_hw_queue_type - HW Queue Type 1248 * @IOMMU_HW_QUEUE_TYPE_DEFAULT: Reserved for future use 1249 * @IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM 1250 * SMMUv3) Virtual Command Queue (VCMDQ) 1251 */ 1252 enum iommu_hw_queue_type { 1253 IOMMU_HW_QUEUE_TYPE_DEFAULT = 0, 1254 /* 1255 * TEGRA241_CMDQV requirements (otherwise, allocation will fail) 1256 * - alloc starts from the lowest @index=0 in ascending order 1257 * - destroy starts from the last allocated @index in descending order 1258 * - @base_addr must be aligned to @length in bytes and mapped in IOAS 1259 * - @length must be a power of 2, with a minimum 32 bytes and a maximum 1260 * 2 ^ idr[1].CMDQS * 16 bytes (use GET_HW_INFO call to read idr[1] 1261 * from struct iommu_hw_info_arm_smmuv3) 1262 * - suggest to back the queue memory with contiguous physical pages or 1263 * a single huge page with alignment of the queue size, and limit the 1264 * emulated vSMMU's IDR1.CMDQS to log2(huge page size / 16 bytes) 1265 */ 1266 IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV = 1, 1267 }; 1268 1269 /** 1270 * struct iommu_hw_queue_alloc - ioctl(IOMMU_HW_QUEUE_ALLOC) 1271 * @size: sizeof(struct iommu_hw_queue_alloc) 1272 * @flags: Must be 0 1273 * @viommu_id: Virtual IOMMU ID to associate the HW queue with 1274 * @type: One of enum iommu_hw_queue_type 1275 * @index: The logical index to the HW queue per virtual IOMMU for a multi-queue 1276 * model 1277 * @out_hw_queue_id: The ID of the new HW queue 1278 * @nesting_parent_iova: Base address of the queue memory in the guest physical 1279 * address space 1280 * @length: Length of the queue memory 1281 * 1282 * Allocate a HW queue object for a vIOMMU-specific HW-accelerated queue, which 1283 * allows HW to access a guest queue memory described using @nesting_parent_iova 1284 * and @length. 1285 * 1286 * A vIOMMU can allocate multiple queues, but it must use a different @index per 1287 * type to separate each allocation, e.g:: 1288 * 1289 * Type1 HW queue0, Type1 HW queue1, Type2 HW queue0, ... 1290 */ 1291 struct iommu_hw_queue_alloc { 1292 __u32 size; 1293 __u32 flags; 1294 __u32 viommu_id; 1295 __u32 type; 1296 __u32 index; 1297 __u32 out_hw_queue_id; 1298 __aligned_u64 nesting_parent_iova; 1299 __aligned_u64 length; 1300 }; 1301 #define IOMMU_HW_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HW_QUEUE_ALLOC) 1302 #endif 1303