/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2013 The FreeBSD Foundation
 *
 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
 * under sponsorship from the FreeBSD Foundation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include "opt_acpi.h"

#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/memdesc.h>
#include <sys/module.h>
#include <sys/rman.h>
#include <sys/taskqueue.h>
#include <sys/time.h>
#include <sys/tree.h>
#include <sys/vmem.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <contrib/dev/acpica/include/acpi.h>
#include <contrib/dev/acpica/include/accommon.h>
#include <dev/acpica/acpivar.h>
#include <dev/pci/pcireg.h>
#include <machine/bus.h>
#include <machine/cpu.h>
#include <x86/include/busdma_impl.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/intel_reg.h>
#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/intel_dmar.h>

static int
dmar_enable_qi(struct dmar_unit *unit)
{
	int error;

	DMAR_ASSERT_LOCKED(unit);
	unit->hw_gcmd |= DMAR_GCMD_QIE;
	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES)
	    != 0));
	return (error);
}

static int
dmar_disable_qi(struct dmar_unit *unit)
{
	int error;

	DMAR_ASSERT_LOCKED(unit);
	unit->hw_gcmd &= ~DMAR_GCMD_QIE;
	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES)
	    == 0));
	return (error);
}

static void
dmar_qi_advance_tail(struct iommu_unit *iommu)
{
	struct dmar_unit *unit;

	unit = IOMMU2DMAR(iommu);
	DMAR_ASSERT_LOCKED(unit);
	dmar_write4(unit, DMAR_IQT_REG, unit->x86c.inv_queue_tail);
}

static void
dmar_qi_ensure(struct iommu_unit *iommu, int descr_count)
{
	struct dmar_unit *unit;
	uint32_t head;
	int bytes;

	unit = IOMMU2DMAR(iommu);
	DMAR_ASSERT_LOCKED(unit);
	bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT;
	for (;;) {
		if (bytes <= unit->x86c.inv_queue_avail)
			break;
		/* refill */
		head = dmar_read4(unit, DMAR_IQH_REG);
		head &= DMAR_IQH_MASK;
		unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail -
		    DMAR_IQ_DESCR_SZ;
		if (head <= unit->x86c.inv_queue_tail)
			unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size;
		if (bytes <= unit->x86c.inv_queue_avail)
			break;

		/*
		 * No space in the queue, do busy wait.  Hardware must
		 * make a progress.  But first advance the tail to
		 * inform the descriptor streamer about entries we
		 * might have already filled, otherwise they could
		 * clog the whole queue..
		 *
		 * See dmar_qi_invalidate_locked() for a discussion
		 * about data race prevention.
		 */
		dmar_qi_advance_tail(DMAR2IOMMU(unit));
		unit->x86c.inv_queue_full++;
		cpu_spinwait();
	}
	unit->x86c.inv_queue_avail -= bytes;
}

static void
dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2)
{

	DMAR_ASSERT_LOCKED(unit);
#ifdef __LP64__
	atomic_store_64((uint64_t *)(unit->x86c.inv_queue +
	    unit->x86c.inv_queue_tail), data1);
#else
	*(volatile uint64_t *)(unit->x86c.inv_queue +
	    unit->x86c.inv_queue_tail) = data1;
#endif
	unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
	KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
	    ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
	    (uintmax_t)unit->x86c.inv_queue_size));
	unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
#ifdef __LP64__
	atomic_store_64((uint64_t *)(unit->x86c.inv_queue +
	    unit->x86c.inv_queue_tail), data2);
#else
	*(volatile uint64_t *)(unit->x86c.inv_queue +
	    unit->x86c.inv_queue_tail) = data2;
#endif
	unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
	KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
	    ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
	    (uintmax_t)unit->x86c.inv_queue_size));
	unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
}

static void
dmar_qi_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq, bool intr,
    bool memw, bool fence)
{
	struct dmar_unit *unit;

	unit = IOMMU2DMAR(iommu);
	DMAR_ASSERT_LOCKED(unit);
	dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID |
	    (intr ? DMAR_IQ_DESCR_WAIT_IF : 0) |
	    (memw ? DMAR_IQ_DESCR_WAIT_SW : 0) |
	    (fence ? DMAR_IQ_DESCR_WAIT_FN : 0) |
	    (memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0),
	    memw ? unit->x86c.inv_waitd_seq_hw_phys : 0);
}

static void
dmar_qi_invalidate_emit(struct iommu_domain *idomain, iommu_gaddr_t base,
    iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait)
{
	struct dmar_unit *unit;
	struct dmar_domain *domain;
	iommu_gaddr_t isize;
	int am;

	domain = __containerof(idomain, struct dmar_domain, iodom);
	unit = domain->dmar;
	DMAR_ASSERT_LOCKED(unit);
	for (; size > 0; base += isize, size -= isize) {
		am = calc_am(unit, base, size, &isize);
		dmar_qi_ensure(DMAR2IOMMU(unit), 1);
		dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV |
		    DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW |
		    DMAR_IQ_DESCR_IOTLB_DR |
		    DMAR_IQ_DESCR_IOTLB_DID(domain->domain),
		    base | am);
	}
	iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), pseq, emit_wait);
}

static void
dmar_qi_invalidate_glob_impl(struct dmar_unit *unit, uint64_t data1)
{
	struct iommu_qi_genseq gseq;

	DMAR_ASSERT_LOCKED(unit);
	dmar_qi_ensure(DMAR2IOMMU(unit), 2);
	dmar_qi_emit(unit, data1, 0);
	iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
	/* See dmar_qi_invalidate_sync(). */
	unit->x86c.inv_seq_waiters++;
	dmar_qi_advance_tail(DMAR2IOMMU(unit));
	iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false);
}

void
dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit)
{
	dmar_qi_invalidate_glob_impl(unit, DMAR_IQ_DESCR_CTX_INV |
	    DMAR_IQ_DESCR_CTX_GLOB);
}

void
dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit)
{
	dmar_qi_invalidate_glob_impl(unit, DMAR_IQ_DESCR_IOTLB_INV |
	    DMAR_IQ_DESCR_IOTLB_GLOB | DMAR_IQ_DESCR_IOTLB_DW |
	    DMAR_IQ_DESCR_IOTLB_DR);
}

void
dmar_qi_invalidate_iec_glob(struct dmar_unit *unit)
{
	dmar_qi_invalidate_glob_impl(unit, DMAR_IQ_DESCR_IEC_INV);
}

void
dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt)
{
	struct iommu_qi_genseq gseq;
	u_int c, l;

	DMAR_ASSERT_LOCKED(unit);
	KASSERT(start < unit->irte_cnt && start < start + cnt &&
	    start + cnt <= unit->irte_cnt,
	    ("inv iec overflow %d %d %d", unit->irte_cnt, start, cnt));
	for (; cnt > 0; cnt -= c, start += c) {
		l = ffs(start | cnt) - 1;
		c = 1 << l;
		dmar_qi_ensure(DMAR2IOMMU(unit), 1);
		dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV |
		    DMAR_IQ_DESCR_IEC_IDX | DMAR_IQ_DESCR_IEC_IIDX(start) |
		    DMAR_IQ_DESCR_IEC_IM(l), 0);
	}
	dmar_qi_ensure(DMAR2IOMMU(unit), 1);
	iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);

	/*
	 * Since iommu_qi_wait_for_seq() will not sleep, this increment's
	 * placement relative to advancing the tail doesn't matter.
	 */
	unit->x86c.inv_seq_waiters++;

	dmar_qi_advance_tail(DMAR2IOMMU(unit));

	/*
	 * The caller of the function, in particular,
	 * dmar_ir_program_irte(), may be called from the context
	 * where the sleeping is forbidden (in fact, the
	 * intr_table_lock mutex may be held, locked from
	 * intr_shuffle_irqs()).  Wait for the invalidation completion
	 * using the busy wait.
	 *
	 * The impact on the interrupt input setup code is small, the
	 * expected overhead is comparable with the chipset register
	 * read.  It is more harmful for the parallel DMA operations,
	 * since we own the dmar unit lock until whole invalidation
	 * queue is processed, which includes requests possibly issued
	 * before our request.
	 */
	iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, true);
}

int
dmar_qi_intr(void *arg)
{
	struct dmar_unit *unit;

	unit = IOMMU2DMAR((struct iommu_unit *)arg);
	KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled",
	    unit->iommu.unit));
	taskqueue_enqueue(unit->x86c.qi_taskqueue, &unit->x86c.qi_task);
	return (FILTER_HANDLED);
}

static void
dmar_qi_task(void *arg, int pending __unused)
{
	struct dmar_unit *unit;
	uint32_t ics;

	unit = IOMMU2DMAR(arg);
	iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit));

	/*
	 * Request an interrupt on the completion of the next invalidation
	 * wait descriptor with the IF field set.
	 */
	ics = dmar_read4(unit, DMAR_ICS_REG);
	if ((ics & DMAR_ICS_IWC) != 0) {
		ics = DMAR_ICS_IWC;
		dmar_write4(unit, DMAR_ICS_REG, ics);

		/*
		 * Drain a second time in case the DMAR processes an entry
		 * after the first call and before clearing DMAR_ICS_IWC.
		 * Otherwise, such entries will linger until a later entry
		 * that requests an interrupt is processed.
		 */
		iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit));
	}

	if (unit->x86c.inv_seq_waiters > 0) {
		/*
		 * Acquire the DMAR lock so that wakeup() is called only after
		 * the waiter is sleeping.
		 */
		DMAR_LOCK(unit);
		wakeup(&unit->x86c.inv_seq_waiters);
		DMAR_UNLOCK(unit);
	}
}

int
dmar_init_qi(struct dmar_unit *unit)
{
	uint64_t iqa;
	uint32_t ics;
	u_int qi_sz;

	if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0)
		return (0);
	unit->qi_enabled = 1;
	TUNABLE_INT_FETCH("hw.dmar.qi", &unit->qi_enabled);
	if (!unit->qi_enabled)
		return (0);

	unit->x86c.qi_buf_maxsz = DMAR_IQA_QS_MAX;
	unit->x86c.qi_cmd_sz = DMAR_IQ_DESCR_SZ;
	iommu_qi_common_init(DMAR2IOMMU(unit), dmar_qi_task);
	get_x86_iommu()->qi_ensure = dmar_qi_ensure;
	get_x86_iommu()->qi_emit_wait_descr = dmar_qi_emit_wait_descr;
	get_x86_iommu()->qi_advance_tail = dmar_qi_advance_tail;
	get_x86_iommu()->qi_invalidate_emit = dmar_qi_invalidate_emit;

	qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE);

	DMAR_LOCK(unit);
	dmar_write8(unit, DMAR_IQT_REG, 0);
	iqa = pmap_kextract((uintptr_t)unit->x86c.inv_queue);
	iqa |= qi_sz;
	dmar_write8(unit, DMAR_IQA_REG, iqa);
	dmar_enable_qi(unit);
	ics = dmar_read4(unit, DMAR_ICS_REG);
	if ((ics & DMAR_ICS_IWC) != 0) {
		ics = DMAR_ICS_IWC;
		dmar_write4(unit, DMAR_ICS_REG, ics);
	}
	dmar_enable_qi_intr(DMAR2IOMMU(unit));
	DMAR_UNLOCK(unit);

	return (0);
}

static void
dmar_fini_qi_helper(struct iommu_unit *iommu)
{
	dmar_disable_qi_intr(iommu);
	dmar_disable_qi(IOMMU2DMAR(iommu));
}

void
dmar_fini_qi(struct dmar_unit *unit)
{
	if (!unit->qi_enabled)
		return;
	iommu_qi_common_fini(DMAR2IOMMU(unit), dmar_fini_qi_helper);
	unit->qi_enabled = 0;
}

void
dmar_enable_qi_intr(struct iommu_unit *iommu)
{
	struct dmar_unit *unit;
	uint32_t iectl;

	unit = IOMMU2DMAR(iommu);
	DMAR_ASSERT_LOCKED(unit);
	KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported",
	    unit->iommu.unit));
	iectl = dmar_read4(unit, DMAR_IECTL_REG);
	iectl &= ~DMAR_IECTL_IM;
	dmar_write4(unit, DMAR_IECTL_REG, iectl);
}

void
dmar_disable_qi_intr(struct iommu_unit *iommu)
{
	struct dmar_unit *unit;
	uint32_t iectl;

	unit = IOMMU2DMAR(iommu);
	DMAR_ASSERT_LOCKED(unit);
	KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported",
	    unit->iommu.unit));
	iectl = dmar_read4(unit, DMAR_IECTL_REG);
	dmar_write4(unit, DMAR_IECTL_REG, iectl | DMAR_IECTL_IM);
}