1*35b05bd9SDavid Matlack // SPDX-License-Identifier: GPL-2.0-only 2*35b05bd9SDavid Matlack #include <stdint.h> 3*35b05bd9SDavid Matlack #include <unistd.h> 4*35b05bd9SDavid Matlack 5*35b05bd9SDavid Matlack #include <linux/bits.h> 6*35b05bd9SDavid Matlack #include <linux/errno.h> 7*35b05bd9SDavid Matlack #include <linux/idxd.h> 8*35b05bd9SDavid Matlack #include <linux/io.h> 9*35b05bd9SDavid Matlack #include <linux/pci_ids.h> 10*35b05bd9SDavid Matlack #include <linux/sizes.h> 11*35b05bd9SDavid Matlack 12*35b05bd9SDavid Matlack #include <vfio_util.h> 13*35b05bd9SDavid Matlack 14*35b05bd9SDavid Matlack #include "registers.h" 15*35b05bd9SDavid Matlack 16*35b05bd9SDavid Matlack /* Vectors 1+ are available for work queue completion interrupts. */ 17*35b05bd9SDavid Matlack #define MSIX_VECTOR 1 18*35b05bd9SDavid Matlack 19*35b05bd9SDavid Matlack struct dsa_state { 20*35b05bd9SDavid Matlack /* Descriptors for copy and batch operations. */ 21*35b05bd9SDavid Matlack struct dsa_hw_desc batch[32]; 22*35b05bd9SDavid Matlack struct dsa_hw_desc copy[1024]; 23*35b05bd9SDavid Matlack 24*35b05bd9SDavid Matlack /* Completion records for copy and batch operations. */ 25*35b05bd9SDavid Matlack struct dsa_completion_record copy_completion; 26*35b05bd9SDavid Matlack struct dsa_completion_record batch_completion; 27*35b05bd9SDavid Matlack 28*35b05bd9SDavid Matlack /* Cached device registers (and derived data) for easy access */ 29*35b05bd9SDavid Matlack union gen_cap_reg gen_cap; 30*35b05bd9SDavid Matlack union wq_cap_reg wq_cap; 31*35b05bd9SDavid Matlack union group_cap_reg group_cap; 32*35b05bd9SDavid Matlack union engine_cap_reg engine_cap; 33*35b05bd9SDavid Matlack union offsets_reg table_offsets; 34*35b05bd9SDavid Matlack void *wqcfg_table; 35*35b05bd9SDavid Matlack void *grpcfg_table; 36*35b05bd9SDavid Matlack u64 max_batches; 37*35b05bd9SDavid Matlack u64 max_copies_per_batch; 38*35b05bd9SDavid Matlack 39*35b05bd9SDavid Matlack /* The number of ongoing memcpy operations. */ 40*35b05bd9SDavid Matlack u64 memcpy_count; 41*35b05bd9SDavid Matlack 42*35b05bd9SDavid Matlack /* Buffers used by dsa_send_msi() to generate an interrupt */ 43*35b05bd9SDavid Matlack u64 send_msi_src; 44*35b05bd9SDavid Matlack u64 send_msi_dst; 45*35b05bd9SDavid Matlack }; 46*35b05bd9SDavid Matlack 47*35b05bd9SDavid Matlack static inline struct dsa_state *to_dsa_state(struct vfio_pci_device *device) 48*35b05bd9SDavid Matlack { 49*35b05bd9SDavid Matlack return device->driver.region.vaddr; 50*35b05bd9SDavid Matlack } 51*35b05bd9SDavid Matlack 52*35b05bd9SDavid Matlack static bool dsa_int_handle_request_required(struct vfio_pci_device *device) 53*35b05bd9SDavid Matlack { 54*35b05bd9SDavid Matlack void *bar0 = device->bars[0].vaddr; 55*35b05bd9SDavid Matlack union gen_cap_reg gen_cap; 56*35b05bd9SDavid Matlack u32 cmd_cap; 57*35b05bd9SDavid Matlack 58*35b05bd9SDavid Matlack gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET); 59*35b05bd9SDavid Matlack if (!gen_cap.cmd_cap) 60*35b05bd9SDavid Matlack return false; 61*35b05bd9SDavid Matlack 62*35b05bd9SDavid Matlack cmd_cap = readl(bar0 + IDXD_CMDCAP_OFFSET); 63*35b05bd9SDavid Matlack return (cmd_cap >> IDXD_CMD_REQUEST_INT_HANDLE) & 1; 64*35b05bd9SDavid Matlack } 65*35b05bd9SDavid Matlack 66*35b05bd9SDavid Matlack static int dsa_probe(struct vfio_pci_device *device) 67*35b05bd9SDavid Matlack { 68*35b05bd9SDavid Matlack if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL, 69*35b05bd9SDavid Matlack PCI_DEVICE_ID_INTEL_DSA_SPR0)) 70*35b05bd9SDavid Matlack return -EINVAL; 71*35b05bd9SDavid Matlack 72*35b05bd9SDavid Matlack if (dsa_int_handle_request_required(device)) { 73*35b05bd9SDavid Matlack printf("Device requires requesting interrupt handles\n"); 74*35b05bd9SDavid Matlack return -EINVAL; 75*35b05bd9SDavid Matlack } 76*35b05bd9SDavid Matlack 77*35b05bd9SDavid Matlack return 0; 78*35b05bd9SDavid Matlack } 79*35b05bd9SDavid Matlack 80*35b05bd9SDavid Matlack static void dsa_check_sw_err(struct vfio_pci_device *device) 81*35b05bd9SDavid Matlack { 82*35b05bd9SDavid Matlack void *reg = device->bars[0].vaddr + IDXD_SWERR_OFFSET; 83*35b05bd9SDavid Matlack union sw_err_reg err = {}; 84*35b05bd9SDavid Matlack int i; 85*35b05bd9SDavid Matlack 86*35b05bd9SDavid Matlack for (i = 0; i < ARRAY_SIZE(err.bits); i++) { 87*35b05bd9SDavid Matlack err.bits[i] = readq(reg + offsetof(union sw_err_reg, bits[i])); 88*35b05bd9SDavid Matlack 89*35b05bd9SDavid Matlack /* No errors */ 90*35b05bd9SDavid Matlack if (i == 0 && !err.valid) 91*35b05bd9SDavid Matlack return; 92*35b05bd9SDavid Matlack } 93*35b05bd9SDavid Matlack 94*35b05bd9SDavid Matlack fprintf(stderr, "SWERR: 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n", 95*35b05bd9SDavid Matlack err.bits[0], err.bits[1], err.bits[2], err.bits[3]); 96*35b05bd9SDavid Matlack 97*35b05bd9SDavid Matlack fprintf(stderr, " valid: 0x%x\n", err.valid); 98*35b05bd9SDavid Matlack fprintf(stderr, " overflow: 0x%x\n", err.overflow); 99*35b05bd9SDavid Matlack fprintf(stderr, " desc_valid: 0x%x\n", err.desc_valid); 100*35b05bd9SDavid Matlack fprintf(stderr, " wq_idx_valid: 0x%x\n", err.wq_idx_valid); 101*35b05bd9SDavid Matlack fprintf(stderr, " batch: 0x%x\n", err.batch); 102*35b05bd9SDavid Matlack fprintf(stderr, " fault_rw: 0x%x\n", err.fault_rw); 103*35b05bd9SDavid Matlack fprintf(stderr, " priv: 0x%x\n", err.priv); 104*35b05bd9SDavid Matlack fprintf(stderr, " error: 0x%x\n", err.error); 105*35b05bd9SDavid Matlack fprintf(stderr, " wq_idx: 0x%x\n", err.wq_idx); 106*35b05bd9SDavid Matlack fprintf(stderr, " operation: 0x%x\n", err.operation); 107*35b05bd9SDavid Matlack fprintf(stderr, " pasid: 0x%x\n", err.pasid); 108*35b05bd9SDavid Matlack fprintf(stderr, " batch_idx: 0x%x\n", err.batch_idx); 109*35b05bd9SDavid Matlack fprintf(stderr, " invalid_flags: 0x%x\n", err.invalid_flags); 110*35b05bd9SDavid Matlack fprintf(stderr, " fault_addr: 0x%lx\n", err.fault_addr); 111*35b05bd9SDavid Matlack 112*35b05bd9SDavid Matlack VFIO_FAIL("Software Error Detected!\n"); 113*35b05bd9SDavid Matlack } 114*35b05bd9SDavid Matlack 115*35b05bd9SDavid Matlack static void dsa_command(struct vfio_pci_device *device, u32 cmd) 116*35b05bd9SDavid Matlack { 117*35b05bd9SDavid Matlack union idxd_command_reg cmd_reg = { .cmd = cmd }; 118*35b05bd9SDavid Matlack u32 sleep_ms = 1, attempts = 5000 / sleep_ms; 119*35b05bd9SDavid Matlack void *bar0 = device->bars[0].vaddr; 120*35b05bd9SDavid Matlack u32 status; 121*35b05bd9SDavid Matlack u8 err; 122*35b05bd9SDavid Matlack 123*35b05bd9SDavid Matlack writel(cmd_reg.bits, bar0 + IDXD_CMD_OFFSET); 124*35b05bd9SDavid Matlack 125*35b05bd9SDavid Matlack for (;;) { 126*35b05bd9SDavid Matlack dsa_check_sw_err(device); 127*35b05bd9SDavid Matlack 128*35b05bd9SDavid Matlack status = readl(bar0 + IDXD_CMDSTS_OFFSET); 129*35b05bd9SDavid Matlack if (!(status & IDXD_CMDSTS_ACTIVE)) 130*35b05bd9SDavid Matlack break; 131*35b05bd9SDavid Matlack 132*35b05bd9SDavid Matlack VFIO_ASSERT_GT(--attempts, 0); 133*35b05bd9SDavid Matlack usleep(sleep_ms * 1000); 134*35b05bd9SDavid Matlack } 135*35b05bd9SDavid Matlack 136*35b05bd9SDavid Matlack err = status & IDXD_CMDSTS_ERR_MASK; 137*35b05bd9SDavid Matlack VFIO_ASSERT_EQ(err, 0, "Error issuing command 0x%x: 0x%x\n", cmd, err); 138*35b05bd9SDavid Matlack } 139*35b05bd9SDavid Matlack 140*35b05bd9SDavid Matlack static void dsa_wq_init(struct vfio_pci_device *device) 141*35b05bd9SDavid Matlack { 142*35b05bd9SDavid Matlack struct dsa_state *dsa = to_dsa_state(device); 143*35b05bd9SDavid Matlack union wq_cap_reg wq_cap = dsa->wq_cap; 144*35b05bd9SDavid Matlack union wqcfg wqcfg; 145*35b05bd9SDavid Matlack u64 wqcfg_size; 146*35b05bd9SDavid Matlack int i; 147*35b05bd9SDavid Matlack 148*35b05bd9SDavid Matlack VFIO_ASSERT_GT((u32)wq_cap.num_wqs, 0); 149*35b05bd9SDavid Matlack 150*35b05bd9SDavid Matlack wqcfg = (union wqcfg) { 151*35b05bd9SDavid Matlack .wq_size = wq_cap.total_wq_size, 152*35b05bd9SDavid Matlack .mode = 1, 153*35b05bd9SDavid Matlack .priority = 1, 154*35b05bd9SDavid Matlack /* 155*35b05bd9SDavid Matlack * Disable Address Translation Service (if enabled) so that VFIO 156*35b05bd9SDavid Matlack * selftests using this driver can generate I/O page faults. 157*35b05bd9SDavid Matlack */ 158*35b05bd9SDavid Matlack .wq_ats_disable = wq_cap.wq_ats_support, 159*35b05bd9SDavid Matlack .max_xfer_shift = dsa->gen_cap.max_xfer_shift, 160*35b05bd9SDavid Matlack .max_batch_shift = dsa->gen_cap.max_batch_shift, 161*35b05bd9SDavid Matlack .op_config[0] = BIT(DSA_OPCODE_MEMMOVE) | BIT(DSA_OPCODE_BATCH), 162*35b05bd9SDavid Matlack }; 163*35b05bd9SDavid Matlack 164*35b05bd9SDavid Matlack wqcfg_size = 1UL << (wq_cap.wqcfg_size + IDXD_WQCFG_MIN); 165*35b05bd9SDavid Matlack 166*35b05bd9SDavid Matlack for (i = 0; i < wqcfg_size / sizeof(wqcfg.bits[0]); i++) 167*35b05bd9SDavid Matlack writel(wqcfg.bits[i], dsa->wqcfg_table + offsetof(union wqcfg, bits[i])); 168*35b05bd9SDavid Matlack } 169*35b05bd9SDavid Matlack 170*35b05bd9SDavid Matlack static void dsa_group_init(struct vfio_pci_device *device) 171*35b05bd9SDavid Matlack { 172*35b05bd9SDavid Matlack struct dsa_state *dsa = to_dsa_state(device); 173*35b05bd9SDavid Matlack union group_cap_reg group_cap = dsa->group_cap; 174*35b05bd9SDavid Matlack union engine_cap_reg engine_cap = dsa->engine_cap; 175*35b05bd9SDavid Matlack 176*35b05bd9SDavid Matlack VFIO_ASSERT_GT((u32)group_cap.num_groups, 0); 177*35b05bd9SDavid Matlack VFIO_ASSERT_GT((u32)engine_cap.num_engines, 0); 178*35b05bd9SDavid Matlack 179*35b05bd9SDavid Matlack /* Assign work queue 0 and engine 0 to group 0 */ 180*35b05bd9SDavid Matlack writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, wqs[0])); 181*35b05bd9SDavid Matlack writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, engines)); 182*35b05bd9SDavid Matlack } 183*35b05bd9SDavid Matlack 184*35b05bd9SDavid Matlack static void dsa_register_cache_init(struct vfio_pci_device *device) 185*35b05bd9SDavid Matlack { 186*35b05bd9SDavid Matlack struct dsa_state *dsa = to_dsa_state(device); 187*35b05bd9SDavid Matlack void *bar0 = device->bars[0].vaddr; 188*35b05bd9SDavid Matlack 189*35b05bd9SDavid Matlack dsa->gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET); 190*35b05bd9SDavid Matlack dsa->wq_cap.bits = readq(bar0 + IDXD_WQCAP_OFFSET); 191*35b05bd9SDavid Matlack dsa->group_cap.bits = readq(bar0 + IDXD_GRPCAP_OFFSET); 192*35b05bd9SDavid Matlack dsa->engine_cap.bits = readq(bar0 + IDXD_ENGCAP_OFFSET); 193*35b05bd9SDavid Matlack 194*35b05bd9SDavid Matlack dsa->table_offsets.bits[0] = readq(bar0 + IDXD_TABLE_OFFSET); 195*35b05bd9SDavid Matlack dsa->table_offsets.bits[1] = readq(bar0 + IDXD_TABLE_OFFSET + 8); 196*35b05bd9SDavid Matlack 197*35b05bd9SDavid Matlack dsa->wqcfg_table = bar0 + dsa->table_offsets.wqcfg * IDXD_TABLE_MULT; 198*35b05bd9SDavid Matlack dsa->grpcfg_table = bar0 + dsa->table_offsets.grpcfg * IDXD_TABLE_MULT; 199*35b05bd9SDavid Matlack 200*35b05bd9SDavid Matlack dsa->max_batches = 1U << (dsa->wq_cap.total_wq_size + IDXD_WQCFG_MIN); 201*35b05bd9SDavid Matlack dsa->max_batches = min(dsa->max_batches, ARRAY_SIZE(dsa->batch)); 202*35b05bd9SDavid Matlack 203*35b05bd9SDavid Matlack dsa->max_copies_per_batch = 1UL << dsa->gen_cap.max_batch_shift; 204*35b05bd9SDavid Matlack dsa->max_copies_per_batch = min(dsa->max_copies_per_batch, ARRAY_SIZE(dsa->copy)); 205*35b05bd9SDavid Matlack } 206*35b05bd9SDavid Matlack 207*35b05bd9SDavid Matlack static void dsa_init(struct vfio_pci_device *device) 208*35b05bd9SDavid Matlack { 209*35b05bd9SDavid Matlack struct dsa_state *dsa = to_dsa_state(device); 210*35b05bd9SDavid Matlack 211*35b05bd9SDavid Matlack VFIO_ASSERT_GE(device->driver.region.size, sizeof(*dsa)); 212*35b05bd9SDavid Matlack 213*35b05bd9SDavid Matlack vfio_pci_config_writew(device, PCI_COMMAND, 214*35b05bd9SDavid Matlack PCI_COMMAND_MEMORY | 215*35b05bd9SDavid Matlack PCI_COMMAND_MASTER | 216*35b05bd9SDavid Matlack PCI_COMMAND_INTX_DISABLE); 217*35b05bd9SDavid Matlack 218*35b05bd9SDavid Matlack dsa_command(device, IDXD_CMD_RESET_DEVICE); 219*35b05bd9SDavid Matlack 220*35b05bd9SDavid Matlack dsa_register_cache_init(device); 221*35b05bd9SDavid Matlack dsa_wq_init(device); 222*35b05bd9SDavid Matlack dsa_group_init(device); 223*35b05bd9SDavid Matlack 224*35b05bd9SDavid Matlack dsa_command(device, IDXD_CMD_ENABLE_DEVICE); 225*35b05bd9SDavid Matlack dsa_command(device, IDXD_CMD_ENABLE_WQ); 226*35b05bd9SDavid Matlack 227*35b05bd9SDavid Matlack vfio_pci_msix_enable(device, MSIX_VECTOR, 1); 228*35b05bd9SDavid Matlack 229*35b05bd9SDavid Matlack device->driver.max_memcpy_count = 230*35b05bd9SDavid Matlack dsa->max_batches * dsa->max_copies_per_batch; 231*35b05bd9SDavid Matlack device->driver.max_memcpy_size = 1UL << dsa->gen_cap.max_xfer_shift; 232*35b05bd9SDavid Matlack device->driver.msi = MSIX_VECTOR; 233*35b05bd9SDavid Matlack } 234*35b05bd9SDavid Matlack 235*35b05bd9SDavid Matlack static void dsa_remove(struct vfio_pci_device *device) 236*35b05bd9SDavid Matlack { 237*35b05bd9SDavid Matlack dsa_command(device, IDXD_CMD_RESET_DEVICE); 238*35b05bd9SDavid Matlack vfio_pci_msix_disable(device); 239*35b05bd9SDavid Matlack } 240*35b05bd9SDavid Matlack 241*35b05bd9SDavid Matlack static int dsa_completion_wait(struct vfio_pci_device *device, 242*35b05bd9SDavid Matlack struct dsa_completion_record *completion) 243*35b05bd9SDavid Matlack { 244*35b05bd9SDavid Matlack u8 status; 245*35b05bd9SDavid Matlack 246*35b05bd9SDavid Matlack for (;;) { 247*35b05bd9SDavid Matlack dsa_check_sw_err(device); 248*35b05bd9SDavid Matlack 249*35b05bd9SDavid Matlack status = READ_ONCE(completion->status); 250*35b05bd9SDavid Matlack if (status) 251*35b05bd9SDavid Matlack break; 252*35b05bd9SDavid Matlack 253*35b05bd9SDavid Matlack usleep(1000); 254*35b05bd9SDavid Matlack } 255*35b05bd9SDavid Matlack 256*35b05bd9SDavid Matlack if (status == DSA_COMP_SUCCESS) 257*35b05bd9SDavid Matlack return 0; 258*35b05bd9SDavid Matlack 259*35b05bd9SDavid Matlack printf("Error detected during memcpy operation: 0x%x\n", status); 260*35b05bd9SDavid Matlack return -1; 261*35b05bd9SDavid Matlack } 262*35b05bd9SDavid Matlack 263*35b05bd9SDavid Matlack static void dsa_copy_desc_init(struct vfio_pci_device *device, 264*35b05bd9SDavid Matlack struct dsa_hw_desc *desc, 265*35b05bd9SDavid Matlack iova_t src, iova_t dst, u64 size, 266*35b05bd9SDavid Matlack bool interrupt) 267*35b05bd9SDavid Matlack { 268*35b05bd9SDavid Matlack struct dsa_state *dsa = to_dsa_state(device); 269*35b05bd9SDavid Matlack u16 flags; 270*35b05bd9SDavid Matlack 271*35b05bd9SDavid Matlack flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR; 272*35b05bd9SDavid Matlack 273*35b05bd9SDavid Matlack if (interrupt) 274*35b05bd9SDavid Matlack flags |= IDXD_OP_FLAG_RCI; 275*35b05bd9SDavid Matlack 276*35b05bd9SDavid Matlack *desc = (struct dsa_hw_desc) { 277*35b05bd9SDavid Matlack .opcode = DSA_OPCODE_MEMMOVE, 278*35b05bd9SDavid Matlack .flags = flags, 279*35b05bd9SDavid Matlack .priv = 1, 280*35b05bd9SDavid Matlack .src_addr = src, 281*35b05bd9SDavid Matlack .dst_addr = dst, 282*35b05bd9SDavid Matlack .xfer_size = size, 283*35b05bd9SDavid Matlack .completion_addr = to_iova(device, &dsa->copy_completion), 284*35b05bd9SDavid Matlack .int_handle = interrupt ? MSIX_VECTOR : 0, 285*35b05bd9SDavid Matlack }; 286*35b05bd9SDavid Matlack } 287*35b05bd9SDavid Matlack 288*35b05bd9SDavid Matlack static void dsa_batch_desc_init(struct vfio_pci_device *device, 289*35b05bd9SDavid Matlack struct dsa_hw_desc *desc, 290*35b05bd9SDavid Matlack u64 count) 291*35b05bd9SDavid Matlack { 292*35b05bd9SDavid Matlack struct dsa_state *dsa = to_dsa_state(device); 293*35b05bd9SDavid Matlack 294*35b05bd9SDavid Matlack *desc = (struct dsa_hw_desc) { 295*35b05bd9SDavid Matlack .opcode = DSA_OPCODE_BATCH, 296*35b05bd9SDavid Matlack .flags = IDXD_OP_FLAG_CRAV, 297*35b05bd9SDavid Matlack .priv = 1, 298*35b05bd9SDavid Matlack .completion_addr = to_iova(device, &dsa->batch_completion), 299*35b05bd9SDavid Matlack .desc_list_addr = to_iova(device, &dsa->copy[0]), 300*35b05bd9SDavid Matlack .desc_count = count, 301*35b05bd9SDavid Matlack }; 302*35b05bd9SDavid Matlack } 303*35b05bd9SDavid Matlack 304*35b05bd9SDavid Matlack static void dsa_desc_write(struct vfio_pci_device *device, struct dsa_hw_desc *desc) 305*35b05bd9SDavid Matlack { 306*35b05bd9SDavid Matlack /* Write the contents (not address) of the 64-byte descriptor to the device. */ 307*35b05bd9SDavid Matlack iosubmit_cmds512(device->bars[2].vaddr, desc, 1); 308*35b05bd9SDavid Matlack } 309*35b05bd9SDavid Matlack 310*35b05bd9SDavid Matlack static void dsa_memcpy_one(struct vfio_pci_device *device, 311*35b05bd9SDavid Matlack iova_t src, iova_t dst, u64 size, bool interrupt) 312*35b05bd9SDavid Matlack { 313*35b05bd9SDavid Matlack struct dsa_state *dsa = to_dsa_state(device); 314*35b05bd9SDavid Matlack 315*35b05bd9SDavid Matlack memset(&dsa->copy_completion, 0, sizeof(dsa->copy_completion)); 316*35b05bd9SDavid Matlack 317*35b05bd9SDavid Matlack dsa_copy_desc_init(device, &dsa->copy[0], src, dst, size, interrupt); 318*35b05bd9SDavid Matlack dsa_desc_write(device, &dsa->copy[0]); 319*35b05bd9SDavid Matlack } 320*35b05bd9SDavid Matlack 321*35b05bd9SDavid Matlack static void dsa_memcpy_batch(struct vfio_pci_device *device, 322*35b05bd9SDavid Matlack iova_t src, iova_t dst, u64 size, u64 count) 323*35b05bd9SDavid Matlack { 324*35b05bd9SDavid Matlack struct dsa_state *dsa = to_dsa_state(device); 325*35b05bd9SDavid Matlack int i; 326*35b05bd9SDavid Matlack 327*35b05bd9SDavid Matlack memset(&dsa->batch_completion, 0, sizeof(dsa->batch_completion)); 328*35b05bd9SDavid Matlack 329*35b05bd9SDavid Matlack for (i = 0; i < ARRAY_SIZE(dsa->copy); i++) { 330*35b05bd9SDavid Matlack struct dsa_hw_desc *copy_desc = &dsa->copy[i]; 331*35b05bd9SDavid Matlack 332*35b05bd9SDavid Matlack dsa_copy_desc_init(device, copy_desc, src, dst, size, false); 333*35b05bd9SDavid Matlack 334*35b05bd9SDavid Matlack /* Don't request completions for individual copies. */ 335*35b05bd9SDavid Matlack copy_desc->flags &= ~IDXD_OP_FLAG_RCR; 336*35b05bd9SDavid Matlack } 337*35b05bd9SDavid Matlack 338*35b05bd9SDavid Matlack for (i = 0; i < ARRAY_SIZE(dsa->batch) && count; i++) { 339*35b05bd9SDavid Matlack struct dsa_hw_desc *batch_desc = &dsa->batch[i]; 340*35b05bd9SDavid Matlack int nr_copies; 341*35b05bd9SDavid Matlack 342*35b05bd9SDavid Matlack nr_copies = min(count, dsa->max_copies_per_batch); 343*35b05bd9SDavid Matlack count -= nr_copies; 344*35b05bd9SDavid Matlack 345*35b05bd9SDavid Matlack /* 346*35b05bd9SDavid Matlack * Batches must have at least 2 copies, so handle the case where 347*35b05bd9SDavid Matlack * there is exactly 1 copy left by doing one less copy in this 348*35b05bd9SDavid Matlack * batch and then 2 in the next. 349*35b05bd9SDavid Matlack */ 350*35b05bd9SDavid Matlack if (count == 1) { 351*35b05bd9SDavid Matlack nr_copies--; 352*35b05bd9SDavid Matlack count++; 353*35b05bd9SDavid Matlack } 354*35b05bd9SDavid Matlack 355*35b05bd9SDavid Matlack dsa_batch_desc_init(device, batch_desc, nr_copies); 356*35b05bd9SDavid Matlack 357*35b05bd9SDavid Matlack /* Request a completion for the last batch. */ 358*35b05bd9SDavid Matlack if (!count) 359*35b05bd9SDavid Matlack batch_desc->flags |= IDXD_OP_FLAG_RCR; 360*35b05bd9SDavid Matlack 361*35b05bd9SDavid Matlack dsa_desc_write(device, batch_desc); 362*35b05bd9SDavid Matlack } 363*35b05bd9SDavid Matlack 364*35b05bd9SDavid Matlack VFIO_ASSERT_EQ(count, 0, "Failed to start %lu copies.\n", count); 365*35b05bd9SDavid Matlack } 366*35b05bd9SDavid Matlack 367*35b05bd9SDavid Matlack static void dsa_memcpy_start(struct vfio_pci_device *device, 368*35b05bd9SDavid Matlack iova_t src, iova_t dst, u64 size, u64 count) 369*35b05bd9SDavid Matlack { 370*35b05bd9SDavid Matlack struct dsa_state *dsa = to_dsa_state(device); 371*35b05bd9SDavid Matlack 372*35b05bd9SDavid Matlack /* DSA devices require at least 2 copies per batch. */ 373*35b05bd9SDavid Matlack if (count == 1) 374*35b05bd9SDavid Matlack dsa_memcpy_one(device, src, dst, size, false); 375*35b05bd9SDavid Matlack else 376*35b05bd9SDavid Matlack dsa_memcpy_batch(device, src, dst, size, count); 377*35b05bd9SDavid Matlack 378*35b05bd9SDavid Matlack dsa->memcpy_count = count; 379*35b05bd9SDavid Matlack } 380*35b05bd9SDavid Matlack 381*35b05bd9SDavid Matlack static int dsa_memcpy_wait(struct vfio_pci_device *device) 382*35b05bd9SDavid Matlack { 383*35b05bd9SDavid Matlack struct dsa_state *dsa = to_dsa_state(device); 384*35b05bd9SDavid Matlack int r; 385*35b05bd9SDavid Matlack 386*35b05bd9SDavid Matlack if (dsa->memcpy_count == 1) 387*35b05bd9SDavid Matlack r = dsa_completion_wait(device, &dsa->copy_completion); 388*35b05bd9SDavid Matlack else 389*35b05bd9SDavid Matlack r = dsa_completion_wait(device, &dsa->batch_completion); 390*35b05bd9SDavid Matlack 391*35b05bd9SDavid Matlack dsa->memcpy_count = 0; 392*35b05bd9SDavid Matlack 393*35b05bd9SDavid Matlack return r; 394*35b05bd9SDavid Matlack } 395*35b05bd9SDavid Matlack 396*35b05bd9SDavid Matlack static void dsa_send_msi(struct vfio_pci_device *device) 397*35b05bd9SDavid Matlack { 398*35b05bd9SDavid Matlack struct dsa_state *dsa = to_dsa_state(device); 399*35b05bd9SDavid Matlack 400*35b05bd9SDavid Matlack dsa_memcpy_one(device, 401*35b05bd9SDavid Matlack to_iova(device, &dsa->send_msi_src), 402*35b05bd9SDavid Matlack to_iova(device, &dsa->send_msi_dst), 403*35b05bd9SDavid Matlack sizeof(dsa->send_msi_src), true); 404*35b05bd9SDavid Matlack 405*35b05bd9SDavid Matlack VFIO_ASSERT_EQ(dsa_completion_wait(device, &dsa->copy_completion), 0); 406*35b05bd9SDavid Matlack } 407*35b05bd9SDavid Matlack 408*35b05bd9SDavid Matlack const struct vfio_pci_driver_ops dsa_ops = { 409*35b05bd9SDavid Matlack .name = "dsa", 410*35b05bd9SDavid Matlack .probe = dsa_probe, 411*35b05bd9SDavid Matlack .init = dsa_init, 412*35b05bd9SDavid Matlack .remove = dsa_remove, 413*35b05bd9SDavid Matlack .memcpy_start = dsa_memcpy_start, 414*35b05bd9SDavid Matlack .memcpy_wait = dsa_memcpy_wait, 415*35b05bd9SDavid Matlack .send_msi = dsa_send_msi, 416*35b05bd9SDavid Matlack }; 417