// SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ #include #include #include #include #include #include "idxd.h" #include "registers.h" static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) { struct idxd_desc *desc; struct idxd_device *idxd = wq->idxd; desc = wq->descs[idx]; memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); memset(desc->completion, 0, idxd->data->compl_size); desc->cpu = cpu; if (device_pasid_enabled(idxd)) desc->hw->pasid = idxd->pasid; return desc; } struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype) { int cpu, idx; struct idxd_device *idxd = wq->idxd; DEFINE_SBQ_WAIT(wait); struct sbq_wait_state *ws; struct sbitmap_queue *sbq; if (idxd->state != IDXD_DEV_ENABLED) return ERR_PTR(-EIO); sbq = &wq->sbq; idx = sbitmap_queue_get(sbq, &cpu); if (idx < 0) { if (optype == IDXD_OP_NONBLOCK) return ERR_PTR(-EAGAIN); } else { return __get_desc(wq, idx, cpu); } ws = &sbq->ws[0]; for (;;) { sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE); if (signal_pending_state(TASK_INTERRUPTIBLE, current)) break; idx = sbitmap_queue_get(sbq, &cpu); if (idx >= 0) break; schedule(); } sbitmap_finish_wait(sbq, ws, &wait); if (idx < 0) return ERR_PTR(-EAGAIN); return __get_desc(wq, idx, cpu); } EXPORT_SYMBOL_NS_GPL(idxd_alloc_desc, IDXD); void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) { int cpu = desc->cpu; desc->cpu = -1; sbitmap_queue_clear(&wq->sbq, desc->id, cpu); } EXPORT_SYMBOL_NS_GPL(idxd_free_desc, IDXD); static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, struct idxd_desc *desc) { struct idxd_desc *d, *n; lockdep_assert_held(&ie->list_lock); list_for_each_entry_safe(d, n, &ie->work_list, list) { if (d == desc) { list_del(&d->list); return d; } } /* * At this point, the desc needs to be aborted is held by the completion * handler where it has taken it off the pending list but has not added to the * work list. It will be cleaned up by the interrupt handler when it sees the * IDXD_COMP_DESC_ABORT for completion status. */ return NULL; } static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, struct idxd_desc *desc) { struct idxd_desc *d, *t, *found = NULL; struct llist_node *head; LIST_HEAD(flist); desc->completion->status = IDXD_COMP_DESC_ABORT; /* * Grab the list lock so it will block the irq thread handler. This allows the * abort code to locate the descriptor need to be aborted. */ spin_lock(&ie->list_lock); head = llist_del_all(&ie->pending_llist); if (head) { llist_for_each_entry_safe(d, t, head, llnode) { if (d == desc) { found = desc; continue; } if (d->completion->status) list_add_tail(&d->list, &flist); else list_add_tail(&d->list, &ie->work_list); } } if (!found) found = list_abort_desc(wq, ie, desc); spin_unlock(&ie->list_lock); if (found) idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false, NULL, NULL); /* * completing the descriptor will return desc to allocator and * the desc can be acquired by a different process and the * desc->list can be modified. Delete desc from list so the * list traversing does not get corrupted by the other process. */ list_for_each_entry_safe(d, t, &flist, list) { list_del_init(&d->list); idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true, NULL, NULL); } } /* * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver * has better control of number of descriptors being submitted to a shared wq by limiting * the number of driver allocated descriptors to the wq size. However, when the swq is * exported to a guest kernel, it may be shared with multiple guest kernels. This means * the likelihood of getting busy returned on the swq when submitting goes significantly up. * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving * up. The sysfs knob can be tuned by the system administrator. */ int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) { unsigned int retries = wq->enqcmds_retries; int rc; do { rc = enqcmds(portal, desc); if (rc == 0) break; cpu_relax(); } while (retries--); return rc; } int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; struct idxd_irq_entry *ie = NULL; u32 desc_flags = desc->hw->flags; void __iomem *portal; int rc; if (idxd->state != IDXD_DEV_ENABLED) return -EIO; if (!percpu_ref_tryget_live(&wq->wq_active)) { wait_for_completion(&wq->wq_resurrect); if (!percpu_ref_tryget_live(&wq->wq_active)) return -ENXIO; } portal = idxd_wq_portal_addr(wq); /* * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ if (desc_flags & IDXD_OP_FLAG_RCI) { ie = &wq->ie; desc->hw->int_handle = ie->int_handle; llist_add(&desc->llnode, &ie->pending_llist); } /* * The wmb() flushes writes to coherent DMA data before * possibly triggering a DMA read. The wmb() is necessary * even on UP because the recipient is a device. */ wmb(); if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { rc = idxd_enqcmds(wq, portal, desc->hw); if (rc < 0) { percpu_ref_put(&wq->wq_active); /* abort operation frees the descriptor */ if (ie) llist_abort_desc(wq, ie, desc); return rc; } } percpu_ref_put(&wq->wq_active); return 0; } EXPORT_SYMBOL_NS_GPL(idxd_submit_desc, IDXD);