/* SPDX-License-Identifier: BSD-2-Clause AND BSD-3-Clause */
/*	$NetBSD: qat_ae.c,v 1.1 2019/11/20 09:37:46 hikaru Exp $	*/

/*
 * Copyright (c) 2019 Internet Initiative Japan, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 *   Copyright(c) 2007-2019 Intel Corporation. All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
#if 0
__KERNEL_RCSID(0, "$NetBSD: qat_ae.c,v 1.1 2019/11/20 09:37:46 hikaru Exp $");
#endif

#include <sys/param.h>
#include <sys/bus.h>
#include <sys/firmware.h>
#include <sys/limits.h>
#include <sys/systm.h>

#include <machine/bus.h>

#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>

#include "qatreg.h"
#include "qatvar.h"
#include "qat_aevar.h"

static int	qat_ae_write_4(struct qat_softc *, u_char, bus_size_t,
		    uint32_t);
static int	qat_ae_read_4(struct qat_softc *, u_char, bus_size_t,
		    uint32_t *);
static void	qat_ae_ctx_indr_write(struct qat_softc *, u_char, uint32_t,
		    bus_size_t, uint32_t);
static int	qat_ae_ctx_indr_read(struct qat_softc *, u_char, uint32_t,
		    bus_size_t, uint32_t *);

static u_short	qat_aereg_get_10bit_addr(enum aereg_type, u_short);
static int	qat_aereg_rel_data_write(struct qat_softc *, u_char, u_char,
		    enum aereg_type, u_short, uint32_t);
static int	qat_aereg_rel_data_read(struct qat_softc *, u_char, u_char,
		    enum aereg_type, u_short, uint32_t *);
static int	qat_aereg_rel_rdxfer_write(struct qat_softc *, u_char, u_char,
		    enum aereg_type, u_short, uint32_t);
static int	qat_aereg_rel_wrxfer_write(struct qat_softc *, u_char, u_char,
		    enum aereg_type, u_short, uint32_t);
static int	qat_aereg_rel_nn_write(struct qat_softc *, u_char, u_char,
		    enum aereg_type, u_short, uint32_t);
static int	qat_aereg_abs_to_rel(struct qat_softc *, u_char, u_short,
		    u_short *, u_char *);
static int	qat_aereg_abs_data_write(struct qat_softc *, u_char,
		    enum aereg_type, u_short, uint32_t);

static void	qat_ae_enable_ctx(struct qat_softc *, u_char, u_int);
static void	qat_ae_disable_ctx(struct qat_softc *, u_char, u_int);
static void	qat_ae_write_ctx_mode(struct qat_softc *, u_char, u_char);
static void	qat_ae_write_nn_mode(struct qat_softc *, u_char, u_char);
static void	qat_ae_write_lm_mode(struct qat_softc *, u_char,
		    enum aereg_type, u_char);
static void	qat_ae_write_shared_cs_mode0(struct qat_softc *, u_char,
		    u_char);
static void	qat_ae_write_shared_cs_mode(struct qat_softc *, u_char, u_char);
static int	qat_ae_set_reload_ustore(struct qat_softc *, u_char, u_int, int,
		    u_int);

static enum qat_ae_status qat_ae_get_status(struct qat_softc *, u_char);
static int	qat_ae_is_active(struct qat_softc *, u_char);
static int	qat_ae_wait_num_cycles(struct qat_softc *, u_char, int, int);

static int	qat_ae_clear_reset(struct qat_softc *);
static int	qat_ae_check(struct qat_softc *);
static int	qat_ae_reset_timestamp(struct qat_softc *);
static void	qat_ae_clear_xfer(struct qat_softc *);
static int	qat_ae_clear_gprs(struct qat_softc *);

static void	qat_ae_get_shared_ustore_ae(u_char, u_char *);
static u_int	qat_ae_ucode_parity64(uint64_t);
static uint64_t	qat_ae_ucode_set_ecc(uint64_t);
static int	qat_ae_ucode_write(struct qat_softc *, u_char, u_int, u_int,
		    const uint64_t *);
static int	qat_ae_ucode_read(struct qat_softc *, u_char, u_int, u_int,
		    uint64_t *);
static u_int	qat_ae_concat_ucode(uint64_t *, u_int, u_int, u_int, u_int *);
static int	qat_ae_exec_ucode(struct qat_softc *, u_char, u_char,
		    uint64_t *, u_int, int, u_int, u_int *);
static int	qat_ae_exec_ucode_init_lm(struct qat_softc *, u_char, u_char,
		    int *, uint64_t *, u_int,
		    u_int *, u_int *, u_int *, u_int *, u_int *);
static int	qat_ae_restore_init_lm_gprs(struct qat_softc *, u_char, u_char,
		    u_int, u_int, u_int, u_int, u_int);
static int	qat_ae_get_inst_num(int);
static int	qat_ae_batch_put_lm(struct qat_softc *, u_char,
		    struct qat_ae_batch_init_list *, size_t);
static int	qat_ae_write_pc(struct qat_softc *, u_char, u_int, u_int);

static u_int	qat_aefw_csum(char *, int);
static const char *qat_aefw_uof_string(struct qat_softc *, size_t);
static struct uof_chunk_hdr *qat_aefw_uof_find_chunk(struct qat_softc *,
		    const char *, struct uof_chunk_hdr *);

static int	qat_aefw_load_mof(struct qat_softc *);
static void	qat_aefw_unload_mof(struct qat_softc *);
static int	qat_aefw_load_mmp(struct qat_softc *);
static void	qat_aefw_unload_mmp(struct qat_softc *);

static int	qat_aefw_mof_find_uof0(struct qat_softc *,
		    struct mof_uof_hdr *, struct mof_uof_chunk_hdr *,
		    u_int, size_t, const char *,
		    size_t *, void **);
static int	qat_aefw_mof_find_uof(struct qat_softc *);
static int	qat_aefw_mof_parse(struct qat_softc *);

static int	qat_aefw_uof_parse_image(struct qat_softc *,
		    struct qat_uof_image *, struct uof_chunk_hdr *uch);
static int	qat_aefw_uof_parse_images(struct qat_softc *);
static int	qat_aefw_uof_parse(struct qat_softc *);

static int	qat_aefw_alloc_auth_dmamem(struct qat_softc *, char *, size_t,
		    struct qat_dmamem *);
static int	qat_aefw_auth(struct qat_softc *, struct qat_dmamem *);
static int	qat_aefw_suof_load(struct qat_softc *sc,
		    struct qat_dmamem *dma);
static int	qat_aefw_suof_parse_image(struct qat_softc *,
		    struct qat_suof_image *, struct suof_chunk_hdr *);
static int	qat_aefw_suof_parse(struct qat_softc *);
static int	qat_aefw_suof_write(struct qat_softc *);

static int	qat_aefw_uof_assign_image(struct qat_softc *, struct qat_ae *,
		    struct qat_uof_image *);
static int	qat_aefw_uof_init_ae(struct qat_softc *, u_char);
static int	qat_aefw_uof_init(struct qat_softc *);

static int	qat_aefw_init_memory_one(struct qat_softc *,
		    struct uof_init_mem *);
static void	qat_aefw_free_lm_init(struct qat_softc *, u_char);
static int	qat_aefw_init_ustore(struct qat_softc *);
static int	qat_aefw_init_reg(struct qat_softc *, u_char, u_char,
		    enum aereg_type, u_short, u_int);
static int	qat_aefw_init_reg_sym_expr(struct qat_softc *, u_char,
		    struct qat_uof_image *);
static int	qat_aefw_init_memory(struct qat_softc *);
static int	qat_aefw_init_globals(struct qat_softc *);
static uint64_t	qat_aefw_get_uof_inst(struct qat_softc *,
		    struct qat_uof_page *, u_int);
static int	qat_aefw_do_pagein(struct qat_softc *, u_char,
		    struct qat_uof_page *);
static int	qat_aefw_uof_write_one(struct qat_softc *,
		    struct qat_uof_image *);
static int	qat_aefw_uof_write(struct qat_softc *);

static int
qat_ae_write_4(struct qat_softc *sc, u_char ae, bus_size_t offset,
	uint32_t value)
{
	int times = TIMEOUT_AE_CSR;

	do {
		qat_ae_local_write_4(sc, ae, offset, value);
		if ((qat_ae_local_read_4(sc, ae, LOCAL_CSR_STATUS) &
		    LOCAL_CSR_STATUS_STATUS) == 0)
			return 0;

	} while (times--);

	device_printf(sc->sc_dev,
	    "couldn't write AE CSR: ae 0x%hhx offset 0x%lx\n", ae, (long)offset);
	return EFAULT;
}

static int
qat_ae_read_4(struct qat_softc *sc, u_char ae, bus_size_t offset,
	uint32_t *value)
{
	int times = TIMEOUT_AE_CSR;
	uint32_t v;

	do {
		v = qat_ae_local_read_4(sc, ae, offset);
		if ((qat_ae_local_read_4(sc, ae, LOCAL_CSR_STATUS) &
		    LOCAL_CSR_STATUS_STATUS) == 0) {
			*value = v;
			return 0;
		}
	} while (times--);

	device_printf(sc->sc_dev,
	    "couldn't read AE CSR: ae 0x%hhx offset 0x%lx\n", ae, (long)offset);
	return EFAULT;
}

static void
qat_ae_ctx_indr_write(struct qat_softc *sc, u_char ae, uint32_t ctx_mask,
    bus_size_t offset, uint32_t value)
{
	int ctx;
	uint32_t ctxptr;

	MPASS(offset == CTX_FUTURE_COUNT_INDIRECT ||
	    offset == FUTURE_COUNT_SIGNAL_INDIRECT ||
	    offset == CTX_STS_INDIRECT ||
	    offset == CTX_WAKEUP_EVENTS_INDIRECT ||
	    offset == CTX_SIG_EVENTS_INDIRECT ||
	    offset == LM_ADDR_0_INDIRECT ||
	    offset == LM_ADDR_1_INDIRECT ||
	    offset == INDIRECT_LM_ADDR_0_BYTE_INDEX ||
	    offset == INDIRECT_LM_ADDR_1_BYTE_INDEX);

	qat_ae_read_4(sc, ae, CSR_CTX_POINTER, &ctxptr);
	for (ctx = 0; ctx < MAX_AE_CTX; ctx++) {
		if ((ctx_mask & (1 << ctx)) == 0)
			continue;
		qat_ae_write_4(sc, ae, CSR_CTX_POINTER, ctx);
		qat_ae_write_4(sc, ae, offset, value);
	}
	qat_ae_write_4(sc, ae, CSR_CTX_POINTER, ctxptr);
}

static int
qat_ae_ctx_indr_read(struct qat_softc *sc, u_char ae, uint32_t ctx,
    bus_size_t offset, uint32_t *value)
{
	int error;
	uint32_t ctxptr;

	MPASS(offset == CTX_FUTURE_COUNT_INDIRECT ||
	    offset == FUTURE_COUNT_SIGNAL_INDIRECT ||
	    offset == CTX_STS_INDIRECT ||
	    offset == CTX_WAKEUP_EVENTS_INDIRECT ||
	    offset == CTX_SIG_EVENTS_INDIRECT ||
	    offset == LM_ADDR_0_INDIRECT ||
	    offset == LM_ADDR_1_INDIRECT ||
	    offset == INDIRECT_LM_ADDR_0_BYTE_INDEX ||
	    offset == INDIRECT_LM_ADDR_1_BYTE_INDEX);

	/* save the ctx ptr */
	qat_ae_read_4(sc, ae, CSR_CTX_POINTER, &ctxptr);
	if ((ctxptr & CSR_CTX_POINTER_CONTEXT) !=
	    (ctx & CSR_CTX_POINTER_CONTEXT))
		qat_ae_write_4(sc, ae, CSR_CTX_POINTER, ctx);

	error = qat_ae_read_4(sc, ae, offset, value);

	/* restore ctx ptr */
	if ((ctxptr & CSR_CTX_POINTER_CONTEXT) !=
	    (ctx & CSR_CTX_POINTER_CONTEXT))
		qat_ae_write_4(sc, ae, CSR_CTX_POINTER, ctxptr);

	return error;
}

static u_short
qat_aereg_get_10bit_addr(enum aereg_type regtype, u_short reg)
{
	u_short addr;

	switch (regtype) {
	case AEREG_GPA_ABS:
	case AEREG_GPB_ABS:
		addr = (reg & 0x7f) | 0x80;
		break;
	case AEREG_GPA_REL:
	case AEREG_GPB_REL:
		addr = reg & 0x1f;
		break;
	case AEREG_SR_RD_REL:
	case AEREG_SR_WR_REL:
	case AEREG_SR_REL:
		addr = 0x180 | (reg & 0x1f);
		break;
	case AEREG_SR_INDX:
		addr = 0x140 | ((reg & 0x3) << 1);
		break;
	case AEREG_DR_RD_REL:
	case AEREG_DR_WR_REL:
	case AEREG_DR_REL:
		addr = 0x1c0 | (reg & 0x1f);
		break;
	case AEREG_DR_INDX:
		addr = 0x100 | ((reg & 0x3) << 1);
		break;
	case AEREG_NEIGH_INDX:
		addr = 0x241 | ((reg & 0x3) << 1);
		break;
	case AEREG_NEIGH_REL:
		addr = 0x280 | (reg & 0x1f);
		break;
	case AEREG_LMEM0:
		addr = 0x200;
		break;
	case AEREG_LMEM1:
		addr = 0x220;
		break;
	case AEREG_NO_DEST:
		addr = 0x300 | (reg & 0xff);
		break;
	default:
		addr = AEREG_BAD_REGADDR;
		break;
	}
	return (addr);
}

static int
qat_aereg_rel_data_write(struct qat_softc *sc, u_char ae, u_char ctx,
    enum aereg_type regtype, u_short relreg, uint32_t value)
{
	uint16_t srchi, srclo, destaddr, data16hi, data16lo;
	uint64_t inst[] = {
		0x0F440000000ull,	/* immed_w1[reg, val_hi16] */
		0x0F040000000ull,	/* immed_w0[reg, val_lo16] */
		0x0F0000C0300ull,	/* nop */
		0x0E000010000ull	/* ctx_arb[kill] */
	};
	const int ninst = nitems(inst);
	const int imm_w1 = 0, imm_w0 = 1;
	unsigned int ctxen;
	uint16_t mask;

	/* This logic only works for GPRs and LM index registers,
	   not NN or XFER registers! */
	MPASS(regtype == AEREG_GPA_REL || regtype == AEREG_GPB_REL ||
	    regtype == AEREG_LMEM0 || regtype == AEREG_LMEM1);

	if ((regtype == AEREG_GPA_REL) || (regtype == AEREG_GPB_REL)) {
		/* determine the context mode */
		qat_ae_read_4(sc, ae, CTX_ENABLES, &ctxen);
		if (ctxen & CTX_ENABLES_INUSE_CONTEXTS) {
			/* 4-ctx mode */
			if (ctx & 0x1)
				return EINVAL;
			mask = 0x1f;
		} else {
			/* 8-ctx mode */
			mask = 0x0f;
		}
		if (relreg & ~mask)
			return EINVAL;
	}
	if ((destaddr = qat_aereg_get_10bit_addr(regtype, relreg)) ==
	    AEREG_BAD_REGADDR) {
		return EINVAL;
	}

	data16lo = 0xffff & value;
	data16hi = 0xffff & (value >> 16);
	srchi = qat_aereg_get_10bit_addr(AEREG_NO_DEST,
		(uint16_t)(0xff & data16hi));
	srclo = qat_aereg_get_10bit_addr(AEREG_NO_DEST,
		(uint16_t)(0xff & data16lo));

	switch (regtype) {
	case AEREG_GPA_REL:	/* A rel source */
		inst[imm_w1] = inst[imm_w1] | ((data16hi >> 8) << 20) |
		    ((srchi & 0x3ff) << 10) | (destaddr & 0x3ff);
		inst[imm_w0] = inst[imm_w0] | ((data16lo >> 8) << 20) |
		    ((srclo & 0x3ff) << 10) | (destaddr & 0x3ff);
		break;
	default:
		inst[imm_w1] = inst[imm_w1] | ((data16hi >> 8) << 20) |
		    ((destaddr & 0x3ff) << 10) | (srchi & 0x3ff);
		inst[imm_w0] = inst[imm_w0] | ((data16lo >> 8) << 20) |
		    ((destaddr & 0x3ff) << 10) | (srclo & 0x3ff);
		break;
	}

	return qat_ae_exec_ucode(sc, ae, ctx, inst, ninst, 1, ninst * 5, NULL);
}

static int
qat_aereg_rel_data_read(struct qat_softc *sc, u_char ae, u_char ctx,
    enum aereg_type regtype, u_short relreg, uint32_t *value)
{
	uint64_t inst, savucode;
	uint32_t ctxen, misc, nmisc, savctx, ctxarbctl, ulo, uhi;
	u_int uaddr, ustore_addr;
	int error;
	u_short mask, regaddr;
	u_char nae;

	MPASS(regtype == AEREG_GPA_REL || regtype == AEREG_GPB_REL ||
	    regtype == AEREG_SR_REL || regtype == AEREG_SR_RD_REL ||
	    regtype == AEREG_DR_REL || regtype == AEREG_DR_RD_REL ||
	    regtype == AEREG_LMEM0 || regtype == AEREG_LMEM1);

	if ((regtype == AEREG_GPA_REL) || (regtype == AEREG_GPB_REL) ||
	    (regtype == AEREG_SR_REL) || (regtype == AEREG_SR_RD_REL) ||
	    (regtype == AEREG_DR_REL) || (regtype == AEREG_DR_RD_REL))
	{
		/* determine the context mode */
		qat_ae_read_4(sc, ae, CTX_ENABLES, &ctxen);
		if (ctxen & CTX_ENABLES_INUSE_CONTEXTS) {
			/* 4-ctx mode */
			if (ctx & 0x1)
				return EINVAL;
			mask = 0x1f;
		} else {
			/* 8-ctx mode */
			mask = 0x0f;
		}
		if (relreg & ~mask)
			return EINVAL;
	}
	if ((regaddr = qat_aereg_get_10bit_addr(regtype, relreg)) ==
	    AEREG_BAD_REGADDR) {
		return EINVAL;
	}

	/* instruction -- alu[--, --, B, reg] */
	switch (regtype) {
	case AEREG_GPA_REL:
		/* A rel source */
		inst = 0xA070000000ull | (regaddr & 0x3ff);
		break;
	default:
		inst = (0xA030000000ull | ((regaddr & 0x3ff) << 10));
		break;
	}

	/* backup shared control store bit, and force AE to
	 * none-shared mode before executing ucode snippet */
	qat_ae_read_4(sc, ae, AE_MISC_CONTROL, &misc);
	if (misc & AE_MISC_CONTROL_SHARE_CS) {
		qat_ae_get_shared_ustore_ae(ae, &nae);
		if ((1 << nae) & sc->sc_ae_mask && qat_ae_is_active(sc, nae))
			return EBUSY;
	}

	nmisc = misc & ~AE_MISC_CONTROL_SHARE_CS;
	qat_ae_write_4(sc, ae, AE_MISC_CONTROL, nmisc);

	/* read current context */
	qat_ae_read_4(sc, ae, ACTIVE_CTX_STATUS, &savctx);
	qat_ae_read_4(sc, ae, CTX_ARB_CNTL, &ctxarbctl);

	qat_ae_read_4(sc, ae, CTX_ENABLES, &ctxen);
	/* prevent clearing the W1C bits: the breakpoint bit,
	ECC error bit, and Parity error bit */
	ctxen &= CTX_ENABLES_IGNORE_W1C_MASK;

	/* change the context */
	if (ctx != (savctx & ACTIVE_CTX_STATUS_ACNO))
		qat_ae_write_4(sc, ae, ACTIVE_CTX_STATUS,
		    ctx & ACTIVE_CTX_STATUS_ACNO);
	/* save a ustore location */
	if ((error = qat_ae_ucode_read(sc, ae, 0, 1, &savucode)) != 0) {
		/* restore AE_MISC_CONTROL csr */
		qat_ae_write_4(sc, ae, AE_MISC_CONTROL, misc);

		/* restore the context */
		if (ctx != (savctx & ACTIVE_CTX_STATUS_ACNO)) {
			qat_ae_write_4(sc, ae, ACTIVE_CTX_STATUS,
			    savctx & ACTIVE_CTX_STATUS_ACNO);
		}
		qat_ae_write_4(sc, ae, CTX_ARB_CNTL, ctxarbctl);

		return (error);
	}

	/* turn off ustore parity */
	qat_ae_write_4(sc, ae, CTX_ENABLES,
	    ctxen & (~CTX_ENABLES_CNTL_STORE_PARITY_ENABLE));

	/* save ustore-addr csr */
	qat_ae_read_4(sc, ae, USTORE_ADDRESS, &ustore_addr);

	/* write the ALU instruction to ustore, enable ecs bit */
	uaddr = 0 | USTORE_ADDRESS_ECS;

	/* set the uaddress */
	qat_ae_write_4(sc, ae, USTORE_ADDRESS, uaddr);
	inst = qat_ae_ucode_set_ecc(inst);

	ulo = (uint32_t)(inst & 0xffffffff);
	uhi = (uint32_t)(inst >> 32);

	qat_ae_write_4(sc, ae, USTORE_DATA_LOWER, ulo);

	/* this will auto increment the address */
	qat_ae_write_4(sc, ae, USTORE_DATA_UPPER, uhi);

	/* set the uaddress */
	qat_ae_write_4(sc, ae, USTORE_ADDRESS, uaddr);

	/* delay for at least 8 cycles */
	qat_ae_wait_num_cycles(sc, ae, 0x8, 0);

	/* read ALU output -- the instruction should have been executed
	prior to clearing the ECS in putUwords */
	qat_ae_read_4(sc, ae, ALU_OUT, value);

	/* restore ustore-addr csr */
	qat_ae_write_4(sc, ae, USTORE_ADDRESS, ustore_addr);

	/* restore the ustore */
	error = qat_ae_ucode_write(sc, ae, 0, 1, &savucode);

	/* restore the context */
	if (ctx != (savctx & ACTIVE_CTX_STATUS_ACNO)) {
		qat_ae_write_4(sc, ae, ACTIVE_CTX_STATUS,
		    savctx & ACTIVE_CTX_STATUS_ACNO);
	}

	qat_ae_write_4(sc, ae, CTX_ARB_CNTL, ctxarbctl);

	/* restore AE_MISC_CONTROL csr */
	qat_ae_write_4(sc, ae, AE_MISC_CONTROL, misc);

	qat_ae_write_4(sc, ae, CTX_ENABLES, ctxen);

	return error;
}

static int
qat_aereg_rel_rdxfer_write(struct qat_softc *sc, u_char ae, u_char ctx,
    enum aereg_type regtype, u_short relreg, uint32_t value)
{
	bus_size_t addr;
	int error;
	uint32_t ctxen;
	u_short mask;
	u_short dr_offset;

	MPASS(regtype == AEREG_SR_REL || regtype == AEREG_DR_REL ||
	    regtype == AEREG_SR_RD_REL || regtype == AEREG_DR_RD_REL);

	error = qat_ae_read_4(sc, ae, CTX_ENABLES, &ctxen);
	if (ctxen & CTX_ENABLES_INUSE_CONTEXTS) {
		if (ctx & 0x1) {
			device_printf(sc->sc_dev,
			    "bad ctx argument in 4-ctx mode,ctx=0x%x\n", ctx);
			return EINVAL;
		}
		mask = 0x1f;
		dr_offset = 0x20;

	} else {
		mask = 0x0f;
		dr_offset = 0x10;
	}

	if (relreg & ~mask)
		return EINVAL;

	addr = relreg + (ctx << 0x5);

	switch (regtype) {
	case AEREG_SR_REL:
	case AEREG_SR_RD_REL:
		qat_ae_xfer_write_4(sc, ae, addr, value);
		break;
	case AEREG_DR_REL:
	case AEREG_DR_RD_REL:
		qat_ae_xfer_write_4(sc, ae, addr + dr_offset, value);
		break;
	default:
		error = EINVAL;
	}

	return error;
}

static int
qat_aereg_rel_wrxfer_write(struct qat_softc *sc, u_char ae, u_char ctx,
    enum aereg_type regtype, u_short relreg, uint32_t value)
{

	panic("notyet");

	return 0;
}

static int
qat_aereg_rel_nn_write(struct qat_softc *sc, u_char ae, u_char ctx,
    enum aereg_type regtype, u_short relreg, uint32_t value)
{

	panic("notyet");

	return 0;
}

static int
qat_aereg_abs_to_rel(struct qat_softc *sc, u_char ae,
	u_short absreg, u_short *relreg, u_char *ctx)
{
	uint32_t ctxen;

	qat_ae_read_4(sc, ae, CTX_ENABLES, &ctxen);
	if (ctxen & CTX_ENABLES_INUSE_CONTEXTS) {
		/* 4-ctx mode */
		*relreg = absreg & 0x1f;
		*ctx = (absreg >> 0x4) & 0x6;
	} else {
		/* 8-ctx mode */
		*relreg = absreg & 0x0f;
		*ctx = (absreg >> 0x4) & 0x7;
	}

	return 0;
}

static int
qat_aereg_abs_data_write(struct qat_softc *sc, u_char ae,
	enum aereg_type regtype, u_short absreg, uint32_t value)
{
	int error;
	u_short relreg;
	u_char ctx;

	qat_aereg_abs_to_rel(sc, ae, absreg, &relreg, &ctx);

	switch (regtype) {
	case AEREG_GPA_ABS:
		MPASS(absreg < MAX_GPR_REG);
		error = qat_aereg_rel_data_write(sc, ae, ctx, AEREG_GPA_REL,
		    relreg, value);
		break;
	case AEREG_GPB_ABS:
		MPASS(absreg < MAX_GPR_REG);
		error = qat_aereg_rel_data_write(sc, ae, ctx, AEREG_GPB_REL,
		    relreg, value);
		break;
	case AEREG_DR_RD_ABS:
		MPASS(absreg < MAX_XFER_REG);
		error = qat_aereg_rel_rdxfer_write(sc, ae, ctx, AEREG_DR_RD_REL,
		    relreg, value);
		break;
	case AEREG_SR_RD_ABS:
		MPASS(absreg < MAX_XFER_REG);
		error = qat_aereg_rel_rdxfer_write(sc, ae, ctx, AEREG_SR_RD_REL,
		    relreg, value);
		break;
	case AEREG_DR_WR_ABS:
		MPASS(absreg < MAX_XFER_REG);
		error = qat_aereg_rel_wrxfer_write(sc, ae, ctx, AEREG_DR_WR_REL,
		    relreg, value);
		break;
	case AEREG_SR_WR_ABS:
		MPASS(absreg < MAX_XFER_REG);
		error = qat_aereg_rel_wrxfer_write(sc, ae, ctx, AEREG_SR_WR_REL,
		    relreg, value);
		break;
	case AEREG_NEIGH_ABS:
		MPASS(absreg < MAX_NN_REG);
		if (absreg >= MAX_NN_REG)
			return EINVAL;
		error = qat_aereg_rel_nn_write(sc, ae, ctx, AEREG_NEIGH_REL,
		    relreg, value);
		break;
	default:
		panic("Invalid Register Type");
	}

	return error;
}

static void
qat_ae_enable_ctx(struct qat_softc *sc, u_char ae, u_int ctx_mask)
{
	uint32_t ctxen;

	qat_ae_read_4(sc, ae, CTX_ENABLES, &ctxen);
	ctxen &= CTX_ENABLES_IGNORE_W1C_MASK;

	if (ctxen & CTX_ENABLES_INUSE_CONTEXTS) {
		ctx_mask &= 0x55;
	} else {
		ctx_mask &= 0xff;
	}

	ctxen |= __SHIFTIN(ctx_mask, CTX_ENABLES_ENABLE);
	qat_ae_write_4(sc, ae, CTX_ENABLES, ctxen);
}

static void
qat_ae_disable_ctx(struct qat_softc *sc, u_char ae, u_int ctx_mask)
{
	uint32_t ctxen;

	qat_ae_read_4(sc, ae, CTX_ENABLES, &ctxen);
	ctxen &= CTX_ENABLES_IGNORE_W1C_MASK;
	ctxen &= ~(__SHIFTIN(ctx_mask & AE_ALL_CTX, CTX_ENABLES_ENABLE));
	qat_ae_write_4(sc, ae, CTX_ENABLES, ctxen);
}

static void
qat_ae_write_ctx_mode(struct qat_softc *sc, u_char ae, u_char mode)
{
	uint32_t val, nval;

	qat_ae_read_4(sc, ae, CTX_ENABLES, &val);
	val &= CTX_ENABLES_IGNORE_W1C_MASK;

	if (mode == 4)
		nval = val | CTX_ENABLES_INUSE_CONTEXTS;
	else
		nval = val & ~CTX_ENABLES_INUSE_CONTEXTS;

	if (val != nval)
		qat_ae_write_4(sc, ae, CTX_ENABLES, nval);
}

static void
qat_ae_write_nn_mode(struct qat_softc *sc, u_char ae, u_char mode)
{
	uint32_t val, nval;

	qat_ae_read_4(sc, ae, CTX_ENABLES, &val);
	val &= CTX_ENABLES_IGNORE_W1C_MASK;

	if (mode)
		nval = val | CTX_ENABLES_NN_MODE;
	else
		nval = val & ~CTX_ENABLES_NN_MODE;

	if (val != nval)
		qat_ae_write_4(sc, ae, CTX_ENABLES, nval);
}

static void
qat_ae_write_lm_mode(struct qat_softc *sc, u_char ae,
	enum aereg_type lm, u_char mode)
{
	uint32_t val, nval;
	uint32_t bit;

	qat_ae_read_4(sc, ae, CTX_ENABLES, &val);
	val &= CTX_ENABLES_IGNORE_W1C_MASK;

	switch (lm) {
	case AEREG_LMEM0:
		bit = CTX_ENABLES_LMADDR_0_GLOBAL;
		break;
	case AEREG_LMEM1:
		bit = CTX_ENABLES_LMADDR_1_GLOBAL;
		break;
	default:
		panic("invalid lmem reg type");
		break;
	}

	if (mode)
		nval = val | bit;
	else
		nval = val & ~bit;

	if (val != nval)
		qat_ae_write_4(sc, ae, CTX_ENABLES, nval);
}

static void
qat_ae_write_shared_cs_mode0(struct qat_softc *sc, u_char ae, u_char mode)
{
	uint32_t val, nval;

	qat_ae_read_4(sc, ae, AE_MISC_CONTROL, &val);

	if (mode == 1)
		nval = val | AE_MISC_CONTROL_SHARE_CS;
	else
		nval = val & ~AE_MISC_CONTROL_SHARE_CS;

	if (val != nval)
		qat_ae_write_4(sc, ae, AE_MISC_CONTROL, nval);
}

static void
qat_ae_write_shared_cs_mode(struct qat_softc *sc, u_char ae, u_char mode)
{
	u_char nae;

	qat_ae_get_shared_ustore_ae(ae, &nae);

	qat_ae_write_shared_cs_mode0(sc, ae, mode);

	if ((sc->sc_ae_mask & (1 << nae))) {
		qat_ae_write_shared_cs_mode0(sc, nae, mode);
	}
}

static int
qat_ae_set_reload_ustore(struct qat_softc *sc, u_char ae,
	u_int reload_size, int shared_mode, u_int ustore_dram_addr)
{
	uint32_t val, cs_reload;

	switch (reload_size) {
	case 0:
		cs_reload = 0x0;
		break;
	case QAT_2K:
		cs_reload = 0x1;
		break;
	case QAT_4K:
		cs_reload = 0x2;
		break;
	case QAT_8K:
		cs_reload = 0x3;
		break;
	default:
		return EINVAL;
	}

	if (cs_reload)
		QAT_AE(sc, ae).qae_ustore_dram_addr = ustore_dram_addr;

	QAT_AE(sc, ae).qae_reload_size = reload_size;

	qat_ae_read_4(sc, ae, AE_MISC_CONTROL, &val);
	val &= ~(AE_MISC_CONTROL_ONE_CTX_RELOAD |
	    AE_MISC_CONTROL_CS_RELOAD | AE_MISC_CONTROL_SHARE_CS);
	val |= __SHIFTIN(cs_reload, AE_MISC_CONTROL_CS_RELOAD) |
	    __SHIFTIN(shared_mode, AE_MISC_CONTROL_ONE_CTX_RELOAD);
	qat_ae_write_4(sc, ae, AE_MISC_CONTROL, val);

	return 0;
}

static enum qat_ae_status
qat_ae_get_status(struct qat_softc *sc, u_char ae)
{
	int error;
	uint32_t val = 0;

	error = qat_ae_read_4(sc, ae, CTX_ENABLES, &val);
	if (error || val & CTX_ENABLES_ENABLE)
		return QAT_AE_ENABLED;

	qat_ae_read_4(sc, ae, ACTIVE_CTX_STATUS, &val);
	if (val & ACTIVE_CTX_STATUS_ABO)
		return QAT_AE_ACTIVE;

	return QAT_AE_DISABLED;
}


static int
qat_ae_is_active(struct qat_softc *sc, u_char ae)
{
	uint32_t val;

	if (qat_ae_get_status(sc, ae) != QAT_AE_DISABLED)
		return 1;

	qat_ae_read_4(sc, ae, ACTIVE_CTX_STATUS, &val);
	if (val & ACTIVE_CTX_STATUS_ABO)
		return 1;
	else
		return 0;
}

/* returns 1 if actually waited for specified number of cycles */
static int
qat_ae_wait_num_cycles(struct qat_softc *sc, u_char ae, int cycles, int check)
{
	uint32_t cnt, actx;
	int pcnt, ccnt, elapsed, times;

	qat_ae_read_4(sc, ae, PROFILE_COUNT, &cnt);
	pcnt = cnt & 0xffff;

	times = TIMEOUT_AE_CHECK;
	do {
		qat_ae_read_4(sc, ae, PROFILE_COUNT, &cnt);
		ccnt = cnt & 0xffff;

		elapsed = ccnt - pcnt;
		if (elapsed == 0) {
			times--;
		}
		if (times <= 0) {
			device_printf(sc->sc_dev,
			    "qat_ae_wait_num_cycles timeout\n");
			return -1;
		}

		if (elapsed < 0)
			elapsed += 0x10000;

		if (elapsed >= CYCLES_FROM_READY2EXE && check) {
			if (qat_ae_read_4(sc, ae, ACTIVE_CTX_STATUS,
			    &actx) == 0) {
				if ((actx & ACTIVE_CTX_STATUS_ABO) == 0)
					return 0;
			}
		}
	} while (cycles > elapsed);

	if (check && qat_ae_read_4(sc, ae, ACTIVE_CTX_STATUS, &actx) == 0) {
		if ((actx & ACTIVE_CTX_STATUS_ABO) == 0)
			return 0;
	}

	return 1;
}

int
qat_ae_init(struct qat_softc *sc)
{
	int error;
	uint32_t mask, val = 0;
	u_char ae;

	/* XXX adf_initSysMemInfo */

	/* XXX Disable clock gating for some chip if debug mode */

	for (ae = 0, mask = sc->sc_ae_mask; mask; ae++, mask >>= 1) {
		struct qat_ae *qae = &sc->sc_ae[ae];
		if (!(mask & 1))
			continue;

		qae->qae_ustore_size = USTORE_SIZE;

		qae->qae_free_addr = 0;
		qae->qae_free_size = USTORE_SIZE;
		qae->qae_live_ctx_mask = AE_ALL_CTX;
		qae->qae_ustore_dram_addr = 0;
		qae->qae_reload_size = 0;
	}

	/* XXX Enable attention interrupt */

	error = qat_ae_clear_reset(sc);
	if (error)
		return error;

	qat_ae_clear_xfer(sc);

	if (!sc->sc_hw.qhw_fw_auth) {
		error = qat_ae_clear_gprs(sc);
		if (error)
			return error;
	}

	/* Set SIGNATURE_ENABLE[0] to 0x1 in order to enable ALU_OUT csr */
	for (ae = 0, mask = sc->sc_ae_mask; mask; ae++, mask >>= 1) {
		if (!(mask & 1))
			continue;
		qat_ae_read_4(sc, ae, SIGNATURE_ENABLE, &val);
		val |= 0x1;
		qat_ae_write_4(sc, ae, SIGNATURE_ENABLE, val);
	}

	error = qat_ae_clear_reset(sc);
	if (error)
		return error;

	/* XXX XXX XXX Clean MMP memory if mem scrub is supported */
	/* halMem_ScrubMMPMemory */

	return 0;
}

int
qat_ae_start(struct qat_softc *sc)
{
	int error;
	u_char ae;

	for (ae = 0; ae < sc->sc_ae_num; ae++) {
		if ((sc->sc_ae_mask & (1 << ae)) == 0)
			continue;

		error = qat_aefw_start(sc, ae, 0xff);
		if (error)
			return error;
	}

	return 0;
}

void
qat_ae_cluster_intr(void *arg)
{
	/* Nothing to implement until we support SRIOV. */
	printf("qat_ae_cluster_intr\n");
}

static int
qat_ae_clear_reset(struct qat_softc *sc)
{
	int error;
	uint32_t times, reset, clock, reg, mask;
	u_char ae;

	reset = qat_cap_global_read_4(sc, CAP_GLOBAL_CTL_RESET);
	reset &= ~(__SHIFTIN(sc->sc_ae_mask, CAP_GLOBAL_CTL_RESET_AE_MASK));
	reset &= ~(__SHIFTIN(sc->sc_accel_mask, CAP_GLOBAL_CTL_RESET_ACCEL_MASK));
	times = TIMEOUT_AE_RESET;
	do {
		qat_cap_global_write_4(sc, CAP_GLOBAL_CTL_RESET, reset);
		if ((times--) == 0) {
			device_printf(sc->sc_dev, "couldn't reset AEs\n");
			return EBUSY;
		}
		reg = qat_cap_global_read_4(sc, CAP_GLOBAL_CTL_RESET);
	} while ((__SHIFTIN(sc->sc_ae_mask, CAP_GLOBAL_CTL_RESET_AE_MASK) |
	    __SHIFTIN(sc->sc_accel_mask, CAP_GLOBAL_CTL_RESET_ACCEL_MASK))
	    & reg);

	/* Enable clock for AE and QAT */
	clock = qat_cap_global_read_4(sc, CAP_GLOBAL_CTL_CLK_EN);
	clock |= __SHIFTIN(sc->sc_ae_mask, CAP_GLOBAL_CTL_CLK_EN_AE_MASK);
	clock |= __SHIFTIN(sc->sc_accel_mask, CAP_GLOBAL_CTL_CLK_EN_ACCEL_MASK);
	qat_cap_global_write_4(sc, CAP_GLOBAL_CTL_CLK_EN, clock);

	error = qat_ae_check(sc);
	if (error)
		return error;

	/*
	 * Set undefined power-up/reset states to reasonable default values...
	 * just to make sure we're starting from a known point
	 */
	for (ae = 0, mask = sc->sc_ae_mask; mask; ae++, mask >>= 1) {
		if (!(mask & 1))
			continue;

		/* init the ctx_enable */
		qat_ae_write_4(sc, ae, CTX_ENABLES,
		    CTX_ENABLES_INIT);

		/* initialize the PCs */
		qat_ae_ctx_indr_write(sc, ae, AE_ALL_CTX,
		    CTX_STS_INDIRECT,
		    UPC_MASK & CTX_STS_INDIRECT_UPC_INIT);

		/* init the ctx_arb */
		qat_ae_write_4(sc, ae, CTX_ARB_CNTL,
		    CTX_ARB_CNTL_INIT);

		/* enable cc */
		qat_ae_write_4(sc, ae, CC_ENABLE,
		    CC_ENABLE_INIT);
		qat_ae_ctx_indr_write(sc, ae, AE_ALL_CTX,
		    CTX_WAKEUP_EVENTS_INDIRECT,
		    CTX_WAKEUP_EVENTS_INDIRECT_INIT);
		qat_ae_ctx_indr_write(sc, ae, AE_ALL_CTX,
		    CTX_SIG_EVENTS_INDIRECT,
		    CTX_SIG_EVENTS_INDIRECT_INIT);
	}

	if ((sc->sc_ae_mask != 0) &&
	    sc->sc_flags & QAT_FLAG_ESRAM_ENABLE_AUTO_INIT) {
		/* XXX XXX XXX init eSram only when this is boot time */
	}

	if ((sc->sc_ae_mask != 0) &&
	    sc->sc_flags & QAT_FLAG_SHRAM_WAIT_READY) {
		/* XXX XXX XXX wait shram to complete initialization */
	}

	qat_ae_reset_timestamp(sc);

	return 0;
}

static int
qat_ae_check(struct qat_softc *sc)
{
	int error, times, ae;
	uint32_t cnt, pcnt, mask;

	for (ae = 0, mask = sc->sc_ae_mask; mask; ae++, mask >>= 1) {
		if (!(mask & 1))
			continue;

		times = TIMEOUT_AE_CHECK;
		error = qat_ae_read_4(sc, ae, PROFILE_COUNT, &cnt);
		if (error) {
			device_printf(sc->sc_dev,
			    "couldn't access AE %d CSR\n", ae);
			return error;
		}
		pcnt = cnt & 0xffff;

		while (1) {
			error = qat_ae_read_4(sc, ae,
			    PROFILE_COUNT, &cnt);
			if (error) {
				device_printf(sc->sc_dev,
				    "couldn't access AE %d CSR\n", ae);
				return error;
			}
			cnt &= 0xffff;
			if (cnt == pcnt)
				times--;
			else
				break;
			if (times <= 0) {
				device_printf(sc->sc_dev,
				    "AE %d CSR is useless\n", ae);
				return EFAULT;
			}
		}
	}

	return 0;
}

static int
qat_ae_reset_timestamp(struct qat_softc *sc)
{
	uint32_t misc, mask;
	u_char ae;

	/* stop the timestamp timers */
	misc = qat_cap_global_read_4(sc, CAP_GLOBAL_CTL_MISC);
	if (misc & CAP_GLOBAL_CTL_MISC_TIMESTAMP_EN) {
		qat_cap_global_write_4(sc, CAP_GLOBAL_CTL_MISC,
		    misc & (~CAP_GLOBAL_CTL_MISC_TIMESTAMP_EN));
	}

	for (ae = 0, mask = sc->sc_ae_mask; mask; ae++, mask >>= 1) {
		if (!(mask & 1))
			continue;
		qat_ae_write_4(sc, ae, TIMESTAMP_LOW, 0);
		qat_ae_write_4(sc, ae, TIMESTAMP_HIGH, 0);
	}

	/* start timestamp timers */
	qat_cap_global_write_4(sc, CAP_GLOBAL_CTL_MISC,
	    misc | CAP_GLOBAL_CTL_MISC_TIMESTAMP_EN);

	return 0;
}

static void
qat_ae_clear_xfer(struct qat_softc *sc)
{
	u_int mask, reg;
	u_char ae;

	for (ae = 0, mask = sc->sc_ae_mask; mask; ae++, mask >>= 1) {
		if (!(mask & 1))
			continue;

		for (reg = 0; reg < MAX_GPR_REG; reg++) {
			qat_aereg_abs_data_write(sc, ae, AEREG_SR_RD_ABS,
			    reg, 0);
			qat_aereg_abs_data_write(sc, ae, AEREG_DR_RD_ABS,
			    reg, 0);
		}
	}
}

static int
qat_ae_clear_gprs(struct qat_softc *sc)
{
	uint32_t val;
	uint32_t saved_ctx = 0;
	int times = TIMEOUT_AE_CHECK, rv;
	u_char ae;
	u_int mask;

	for (ae = 0, mask = sc->sc_ae_mask; mask; ae++, mask >>= 1) {
		if (!(mask & 1))
			continue;

		/* turn off share control store bit */
		val = qat_ae_read_4(sc, ae, AE_MISC_CONTROL, &val);
		val &= ~AE_MISC_CONTROL_SHARE_CS;
		qat_ae_write_4(sc, ae, AE_MISC_CONTROL, val);

		/* turn off ucode parity */
		/* make sure nn_mode is set to self */
		qat_ae_read_4(sc, ae, CTX_ENABLES, &val);
		val &= CTX_ENABLES_IGNORE_W1C_MASK;
		val |= CTX_ENABLES_NN_MODE;
		val &= ~CTX_ENABLES_CNTL_STORE_PARITY_ENABLE;
		qat_ae_write_4(sc, ae, CTX_ENABLES, val);

		/* copy instructions to ustore */
		qat_ae_ucode_write(sc, ae, 0, nitems(ae_clear_gprs_inst),
		    ae_clear_gprs_inst);

		/* set PC */
		qat_ae_ctx_indr_write(sc, ae, AE_ALL_CTX, CTX_STS_INDIRECT,
		    UPC_MASK & CTX_STS_INDIRECT_UPC_INIT);

		/* save current context */
		qat_ae_read_4(sc, ae, ACTIVE_CTX_STATUS, &saved_ctx);
		/* change the active context */
		/* start the context from ctx 0 */
		qat_ae_write_4(sc, ae, ACTIVE_CTX_STATUS, 0);

		/* wakeup-event voluntary */
		qat_ae_ctx_indr_write(sc, ae, AE_ALL_CTX,
		    CTX_WAKEUP_EVENTS_INDIRECT,
		    CTX_WAKEUP_EVENTS_INDIRECT_VOLUNTARY);
		/* clean signals */
		qat_ae_ctx_indr_write(sc, ae, AE_ALL_CTX,
		    CTX_SIG_EVENTS_INDIRECT, 0);
		qat_ae_write_4(sc, ae, CTX_SIG_EVENTS_ACTIVE, 0);

		qat_ae_enable_ctx(sc, ae, AE_ALL_CTX);
	}

	for (ae = 0, mask = sc->sc_ae_mask; mask; ae++, mask >>= 1) {
		if (!(mask & 1))
			continue;
		/* wait for AE to finish */
		do {
			rv = qat_ae_wait_num_cycles(sc, ae, AE_EXEC_CYCLE, 1);
		} while (rv && times--);
		if (times <= 0) {
			device_printf(sc->sc_dev,
			    "qat_ae_clear_gprs timeout");
			return ETIMEDOUT;
		}
		qat_ae_disable_ctx(sc, ae, AE_ALL_CTX);
		/* change the active context */
		qat_ae_write_4(sc, ae, ACTIVE_CTX_STATUS,
		    saved_ctx & ACTIVE_CTX_STATUS_ACNO);
		/* init the ctx_enable */
		qat_ae_write_4(sc, ae, CTX_ENABLES, CTX_ENABLES_INIT);
		/* initialize the PCs */
		qat_ae_ctx_indr_write(sc, ae, AE_ALL_CTX,
		    CTX_STS_INDIRECT, UPC_MASK & CTX_STS_INDIRECT_UPC_INIT);
		/* init the ctx_arb */
		qat_ae_write_4(sc, ae, CTX_ARB_CNTL, CTX_ARB_CNTL_INIT);
		/* enable cc */
		qat_ae_write_4(sc, ae, CC_ENABLE, CC_ENABLE_INIT);
		qat_ae_ctx_indr_write(sc, ae, AE_ALL_CTX,
		    CTX_WAKEUP_EVENTS_INDIRECT, CTX_WAKEUP_EVENTS_INDIRECT_INIT);
		qat_ae_ctx_indr_write(sc, ae, AE_ALL_CTX, CTX_SIG_EVENTS_INDIRECT,
		    CTX_SIG_EVENTS_INDIRECT_INIT);
	}

	return 0;
}

static void
qat_ae_get_shared_ustore_ae(u_char ae, u_char *nae)
{
	if (ae & 0x1)
		*nae = ae - 1;
	else
		*nae = ae + 1;
}

static u_int
qat_ae_ucode_parity64(uint64_t ucode)
{

	ucode ^= ucode >> 1;
	ucode ^= ucode >> 2;
	ucode ^= ucode >> 4;
	ucode ^= ucode >> 8;
	ucode ^= ucode >> 16;
	ucode ^= ucode >> 32;

	return ((u_int)(ucode & 1));
}

static uint64_t
qat_ae_ucode_set_ecc(uint64_t ucode)
{
	static const uint64_t
		bit0mask=0xff800007fffULL, bit1mask=0x1f801ff801fULL,
		bit2mask=0xe387e0781e1ULL, bit3mask=0x7cb8e388e22ULL,
		bit4mask=0xaf5b2c93244ULL, bit5mask=0xf56d5525488ULL,
		bit6mask=0xdaf69a46910ULL;

	/* clear the ecc bits */
	ucode &= ~(0x7fULL << USTORE_ECC_BIT_0);

	ucode |= (uint64_t)qat_ae_ucode_parity64(bit0mask & ucode) <<
	    USTORE_ECC_BIT_0;
	ucode |= (uint64_t)qat_ae_ucode_parity64(bit1mask & ucode) <<
	    USTORE_ECC_BIT_1;
	ucode |= (uint64_t)qat_ae_ucode_parity64(bit2mask & ucode) <<
	    USTORE_ECC_BIT_2;
	ucode |= (uint64_t)qat_ae_ucode_parity64(bit3mask & ucode) <<
	    USTORE_ECC_BIT_3;
	ucode |= (uint64_t)qat_ae_ucode_parity64(bit4mask & ucode) <<
	    USTORE_ECC_BIT_4;
	ucode |= (uint64_t)qat_ae_ucode_parity64(bit5mask & ucode) <<
	    USTORE_ECC_BIT_5;
	ucode |= (uint64_t)qat_ae_ucode_parity64(bit6mask & ucode) <<
	    USTORE_ECC_BIT_6;

	return (ucode);
}

static int
qat_ae_ucode_write(struct qat_softc *sc, u_char ae, u_int uaddr, u_int ninst,
	const uint64_t *ucode)
{
	uint64_t tmp;
	uint32_t ustore_addr, ulo, uhi;
	int i;

	qat_ae_read_4(sc, ae, USTORE_ADDRESS, &ustore_addr);
	uaddr |= USTORE_ADDRESS_ECS;

	qat_ae_write_4(sc, ae, USTORE_ADDRESS, uaddr);
	for (i = 0; i < ninst; i++) {
		tmp = qat_ae_ucode_set_ecc(ucode[i]);
		ulo = (uint32_t)(tmp & 0xffffffff);
		uhi = (uint32_t)(tmp >> 32);

		qat_ae_write_4(sc, ae, USTORE_DATA_LOWER, ulo);
		/* this will auto increment the address */
		qat_ae_write_4(sc, ae, USTORE_DATA_UPPER, uhi);
	}
	qat_ae_write_4(sc, ae, USTORE_ADDRESS, ustore_addr);

	return 0;
}

static int
qat_ae_ucode_read(struct qat_softc *sc, u_char ae, u_int uaddr, u_int ninst,
    uint64_t *ucode)
{
	uint32_t misc, ustore_addr, ulo, uhi;
	u_int ii;
	u_char nae;

	if (qat_ae_get_status(sc, ae) != QAT_AE_DISABLED)
		return EBUSY;

	/* determine whether it neighbour AE runs in shared control store
	 * status */
	qat_ae_read_4(sc, ae, AE_MISC_CONTROL, &misc);
	if (misc & AE_MISC_CONTROL_SHARE_CS) {
		qat_ae_get_shared_ustore_ae(ae, &nae);
		if ((sc->sc_ae_mask & (1 << nae)) && qat_ae_is_active(sc, nae))
			return EBUSY;
	}

	/* if reloadable, then get it all from dram-ustore */
	if (__SHIFTOUT(misc, AE_MISC_CONTROL_CS_RELOAD))
		panic("notyet"); /* XXX getReloadUwords */

	/* disable SHARE_CS bit to workaround silicon bug */
	qat_ae_write_4(sc, ae, AE_MISC_CONTROL, misc & 0xfffffffb);

	MPASS(uaddr + ninst <= USTORE_SIZE);

	/* save ustore-addr csr */
	qat_ae_read_4(sc, ae, USTORE_ADDRESS, &ustore_addr);

	uaddr |= USTORE_ADDRESS_ECS;	/* enable ecs bit */
	for (ii = 0; ii < ninst; ii++) {
		qat_ae_write_4(sc, ae, USTORE_ADDRESS, uaddr);

		uaddr++;
		qat_ae_read_4(sc, ae, USTORE_DATA_LOWER, &ulo);
		qat_ae_read_4(sc, ae, USTORE_DATA_UPPER, &uhi);
		ucode[ii] = uhi;
		ucode[ii] = (ucode[ii] << 32) | ulo;
	}

	/* restore SHARE_CS bit to workaround silicon bug */
	qat_ae_write_4(sc, ae, AE_MISC_CONTROL, misc);
	qat_ae_write_4(sc, ae, USTORE_ADDRESS, ustore_addr);

	return 0;
}

static u_int
qat_ae_concat_ucode(uint64_t *ucode, u_int ninst, u_int size, u_int addr,
    u_int *value)
{
	const uint64_t *inst_arr;
	u_int ninst0, curvalue;
	int ii, vali, fixup, usize = 0;

	if (size == 0)
		return 0;

	ninst0 = ninst;
	vali = 0;
	curvalue = value[vali++];

	switch (size) {
	case 0x1:
		inst_arr = ae_inst_1b;
		usize = nitems(ae_inst_1b);
		break;
	case 0x2:
		inst_arr = ae_inst_2b;
		usize = nitems(ae_inst_2b);
		break;
	case 0x3:
		inst_arr = ae_inst_3b;
		usize = nitems(ae_inst_3b);
		break;
	default:
		inst_arr = ae_inst_4b;
		usize = nitems(ae_inst_4b);
		break;
	}

	fixup = ninst;
	for (ii = 0; ii < usize; ii++)
		ucode[ninst++] = inst_arr[ii];

	INSERT_IMMED_GPRA_CONST(ucode[fixup], (addr));
	fixup++;
	INSERT_IMMED_GPRA_CONST(ucode[fixup], 0);
	fixup++;
	INSERT_IMMED_GPRB_CONST(ucode[fixup], (curvalue >> 0));
	fixup++;
	INSERT_IMMED_GPRB_CONST(ucode[fixup], (curvalue >> 16));
	/* XXX fixup++ ? */

	if (size <= 0x4)
		return (ninst - ninst0);

	size -= sizeof(u_int);
	while (size >= sizeof(u_int)) {
		curvalue = value[vali++];
		fixup = ninst;
		ucode[ninst++] = ae_inst_4b[0x2];
		ucode[ninst++] = ae_inst_4b[0x3];
		ucode[ninst++] = ae_inst_4b[0x8];
		INSERT_IMMED_GPRB_CONST(ucode[fixup], (curvalue >> 16));
		fixup++;
		INSERT_IMMED_GPRB_CONST(ucode[fixup], (curvalue >> 0));
		/* XXX fixup++ ? */

		addr += sizeof(u_int);
		size -= sizeof(u_int);
	}
	/* call this function recusive when the left size less than 4 */
	ninst +=
	    qat_ae_concat_ucode(ucode, ninst, size, addr, value + vali);

	return (ninst - ninst0);
}

static int
qat_ae_exec_ucode(struct qat_softc *sc, u_char ae, u_char ctx,
    uint64_t *ucode, u_int ninst, int cond_code_off, u_int max_cycles,
    u_int *endpc)
{
	int error = 0, share_cs = 0;
	uint64_t savucode[MAX_EXEC_INST];
	uint32_t indr_lm_addr_0, indr_lm_addr_1;
	uint32_t indr_lm_addr_byte_0, indr_lm_addr_byte_1;
	uint32_t indr_future_cnt_sig;
	uint32_t indr_sig, active_sig;
	uint32_t wakeup_ev, savpc, savcc, savctx, ctxarbctl;
	uint32_t misc, nmisc, ctxen;
	u_char nae;

	MPASS(ninst <= USTORE_SIZE);

	if (qat_ae_is_active(sc, ae))
		return EBUSY;

	/* save current LM addr */
	qat_ae_ctx_indr_read(sc, ae, ctx, LM_ADDR_0_INDIRECT, &indr_lm_addr_0);
	qat_ae_ctx_indr_read(sc, ae, ctx, LM_ADDR_1_INDIRECT, &indr_lm_addr_1);
	qat_ae_ctx_indr_read(sc, ae, ctx, INDIRECT_LM_ADDR_0_BYTE_INDEX,
	    &indr_lm_addr_byte_0);
	qat_ae_ctx_indr_read(sc, ae, ctx, INDIRECT_LM_ADDR_1_BYTE_INDEX,
	    &indr_lm_addr_byte_1);

	/* backup shared control store bit, and force AE to
	   none-shared mode before executing ucode snippet */
	qat_ae_read_4(sc, ae, AE_MISC_CONTROL, &misc);
	if (misc & AE_MISC_CONTROL_SHARE_CS) {
		share_cs = 1;
		qat_ae_get_shared_ustore_ae(ae, &nae);
		if ((sc->sc_ae_mask & (1 << nae)) && qat_ae_is_active(sc, nae))
			return EBUSY;
	}
	nmisc = misc & ~AE_MISC_CONTROL_SHARE_CS;
	qat_ae_write_4(sc, ae, AE_MISC_CONTROL, nmisc);

	/* save current states: */
	if (ninst <= MAX_EXEC_INST) {
		error = qat_ae_ucode_read(sc, ae, 0, ninst, savucode);
		if (error) {
			qat_ae_write_4(sc, ae, AE_MISC_CONTROL, misc);
			return error;
		}
	}

	/* save wakeup-events */
	qat_ae_ctx_indr_read(sc, ae, ctx, CTX_WAKEUP_EVENTS_INDIRECT,
	    &wakeup_ev);
	/* save PC */
	qat_ae_ctx_indr_read(sc, ae, ctx, CTX_STS_INDIRECT, &savpc);
	savpc &= UPC_MASK;

	/* save ctx enables */
	qat_ae_read_4(sc, ae, CTX_ENABLES, &ctxen);
	ctxen &= CTX_ENABLES_IGNORE_W1C_MASK;
	/* save conditional-code */
	qat_ae_read_4(sc, ae, CC_ENABLE, &savcc);
	/* save current context */
	qat_ae_read_4(sc, ae, ACTIVE_CTX_STATUS, &savctx);
	qat_ae_read_4(sc, ae, CTX_ARB_CNTL, &ctxarbctl);

	/* save indirect csrs */
	qat_ae_ctx_indr_read(sc, ae, ctx, FUTURE_COUNT_SIGNAL_INDIRECT,
	    &indr_future_cnt_sig);
	qat_ae_ctx_indr_read(sc, ae, ctx, CTX_SIG_EVENTS_INDIRECT, &indr_sig);
	qat_ae_read_4(sc, ae, CTX_SIG_EVENTS_ACTIVE, &active_sig);

	/* turn off ucode parity */
	qat_ae_write_4(sc, ae, CTX_ENABLES,
	    ctxen & ~CTX_ENABLES_CNTL_STORE_PARITY_ENABLE);

	/* copy instructions to ustore */
	qat_ae_ucode_write(sc, ae, 0, ninst, ucode);
	/* set PC */
	qat_ae_ctx_indr_write(sc, ae, 1 << ctx, CTX_STS_INDIRECT, 0);
	/* change the active context */
	qat_ae_write_4(sc, ae, ACTIVE_CTX_STATUS,
	    ctx & ACTIVE_CTX_STATUS_ACNO);

	if (cond_code_off) {
		/* disable conditional-code*/
		qat_ae_write_4(sc, ae, CC_ENABLE, savcc & 0xffffdfff);
	}

	/* wakeup-event voluntary */
	qat_ae_ctx_indr_write(sc, ae, 1 << ctx,
	    CTX_WAKEUP_EVENTS_INDIRECT, CTX_WAKEUP_EVENTS_INDIRECT_VOLUNTARY);

	/* clean signals */
	qat_ae_ctx_indr_write(sc, ae, 1 << ctx, CTX_SIG_EVENTS_INDIRECT, 0);
	qat_ae_write_4(sc, ae, CTX_SIG_EVENTS_ACTIVE, 0);

	/* enable context */
	qat_ae_enable_ctx(sc, ae, 1 << ctx);

	/* wait for it to finish */
	if (qat_ae_wait_num_cycles(sc, ae, max_cycles, 1) != 0)
		error = ETIMEDOUT;

	/* see if we need to get the current PC */
	if (endpc != NULL) {
		uint32_t ctx_status;

		qat_ae_ctx_indr_read(sc, ae, ctx, CTX_STS_INDIRECT,
		    &ctx_status);
		*endpc = ctx_status & UPC_MASK;
	}
#if 0
	{
		uint32_t ctx_status;

		qat_ae_ctx_indr_read(sc, ae, ctx, CTX_STS_INDIRECT,
		    &ctx_status);
		printf("%s: endpc 0x%08x\n", __func__,
		    ctx_status & UPC_MASK);
	}
#endif

	/* retore to previous states: */
	/* disable context */
	qat_ae_disable_ctx(sc, ae, 1 << ctx);
	if (ninst <= MAX_EXEC_INST) {
		/* instructions */
		qat_ae_ucode_write(sc, ae, 0, ninst, savucode);
	}
	/* wakeup-events */
	qat_ae_ctx_indr_write(sc, ae, 1 << ctx, CTX_WAKEUP_EVENTS_INDIRECT,
	    wakeup_ev);
	qat_ae_ctx_indr_write(sc, ae, 1 << ctx, CTX_STS_INDIRECT, savpc);

	/* only restore shared control store bit,
	   other bit might be changed by AE code snippet */
	qat_ae_read_4(sc, ae, AE_MISC_CONTROL, &misc);
	if (share_cs)
		nmisc = misc | AE_MISC_CONTROL_SHARE_CS;
	else
		nmisc = misc & ~AE_MISC_CONTROL_SHARE_CS;
	qat_ae_write_4(sc, ae, AE_MISC_CONTROL, nmisc);
	/* conditional-code */
	qat_ae_write_4(sc, ae, CC_ENABLE, savcc);
	/* change the active context */
	qat_ae_write_4(sc, ae, ACTIVE_CTX_STATUS,
	    savctx & ACTIVE_CTX_STATUS_ACNO);
	/* restore the nxt ctx to run */
	qat_ae_write_4(sc, ae, CTX_ARB_CNTL, ctxarbctl);
	/* restore current LM addr */
	qat_ae_ctx_indr_write(sc, ae, 1 << ctx, LM_ADDR_0_INDIRECT,
	    indr_lm_addr_0);
	qat_ae_ctx_indr_write(sc, ae, 1 << ctx, LM_ADDR_1_INDIRECT,
	    indr_lm_addr_1);
	qat_ae_ctx_indr_write(sc, ae, 1 << ctx, INDIRECT_LM_ADDR_0_BYTE_INDEX,
	    indr_lm_addr_byte_0);
	qat_ae_ctx_indr_write(sc, ae, 1 << ctx, INDIRECT_LM_ADDR_1_BYTE_INDEX,
	    indr_lm_addr_byte_1);

	/* restore indirect csrs */
	qat_ae_ctx_indr_write(sc, ae, 1 << ctx, FUTURE_COUNT_SIGNAL_INDIRECT,
	    indr_future_cnt_sig);
	qat_ae_ctx_indr_write(sc, ae, 1 << ctx, CTX_SIG_EVENTS_INDIRECT,
	    indr_sig);
	qat_ae_write_4(sc, ae, CTX_SIG_EVENTS_ACTIVE, active_sig);

	/* ctx-enables */
	qat_ae_write_4(sc, ae, CTX_ENABLES, ctxen);

	return error;
}

static int
qat_ae_exec_ucode_init_lm(struct qat_softc *sc, u_char ae, u_char ctx,
    int *first_exec, uint64_t *ucode, u_int ninst,
    u_int *gpr_a0, u_int *gpr_a1, u_int *gpr_a2, u_int *gpr_b0, u_int *gpr_b1)
{

	if (*first_exec) {
		qat_aereg_rel_data_read(sc, ae, ctx, AEREG_GPA_REL, 0, gpr_a0);
		qat_aereg_rel_data_read(sc, ae, ctx, AEREG_GPA_REL, 1, gpr_a1);
		qat_aereg_rel_data_read(sc, ae, ctx, AEREG_GPA_REL, 2, gpr_a2);
		qat_aereg_rel_data_read(sc, ae, ctx, AEREG_GPB_REL, 0, gpr_b0);
		qat_aereg_rel_data_read(sc, ae, ctx, AEREG_GPB_REL, 1, gpr_b1);
		*first_exec = 0;
	}

	return qat_ae_exec_ucode(sc, ae, ctx, ucode, ninst, 1, ninst * 5, NULL);
}

static int
qat_ae_restore_init_lm_gprs(struct qat_softc *sc, u_char ae, u_char ctx,
    u_int gpr_a0, u_int gpr_a1, u_int gpr_a2, u_int gpr_b0, u_int gpr_b1)
{
	qat_aereg_rel_data_write(sc, ae, ctx, AEREG_GPA_REL, 0, gpr_a0);
	qat_aereg_rel_data_write(sc, ae, ctx, AEREG_GPA_REL, 1, gpr_a1);
	qat_aereg_rel_data_write(sc, ae, ctx, AEREG_GPA_REL, 2, gpr_a2);
	qat_aereg_rel_data_write(sc, ae, ctx, AEREG_GPB_REL, 0, gpr_b0);
	qat_aereg_rel_data_write(sc, ae, ctx, AEREG_GPB_REL, 1, gpr_b1);

	return 0;
}

static int
qat_ae_get_inst_num(int lmsize)
{
	int ninst, left;

	if (lmsize == 0)
		return 0;

	left = lmsize % sizeof(u_int);

	if (left) {
		ninst = nitems(ae_inst_1b) +
		    qat_ae_get_inst_num(lmsize - left);
	} else {
		/* 3 instruction is needed for further code */
		ninst = (lmsize - sizeof(u_int)) * 3 / 4 + nitems(ae_inst_4b);
	}

	return (ninst);
}

static int
qat_ae_batch_put_lm(struct qat_softc *sc, u_char ae,
    struct qat_ae_batch_init_list *qabi_list, size_t nqabi)
{
	struct qat_ae_batch_init *qabi;
	size_t alloc_ninst, ninst;
	uint64_t *ucode;
	u_int gpr_a0, gpr_a1, gpr_a2, gpr_b0, gpr_b1;
	int insnsz, error = 0, execed = 0, first_exec = 1;

	if (STAILQ_FIRST(qabi_list) == NULL)
		return 0;

	alloc_ninst = min(USTORE_SIZE, nqabi);
	ucode = qat_alloc_mem(sizeof(uint64_t) * alloc_ninst);

	ninst = 0;
	STAILQ_FOREACH(qabi, qabi_list, qabi_next) {
		insnsz = qat_ae_get_inst_num(qabi->qabi_size);
		if (insnsz + ninst > alloc_ninst) {
			/* add ctx_arb[kill] */
			ucode[ninst++] = 0x0E000010000ull;
			execed = 1;

			error = qat_ae_exec_ucode_init_lm(sc, ae, 0,
			    &first_exec, ucode, ninst,
			    &gpr_a0, &gpr_a1, &gpr_a2, &gpr_b0, &gpr_b1);
			if (error) {
				qat_ae_restore_init_lm_gprs(sc, ae, 0,
				    gpr_a0, gpr_a1, gpr_a2, gpr_b0, gpr_b1);
				qat_free_mem(ucode);
				return error;
			}
			/* run microExec to execute the microcode */
			ninst = 0;
		}
		ninst += qat_ae_concat_ucode(ucode, ninst,
		    qabi->qabi_size, qabi->qabi_addr, qabi->qabi_value);
	}

	if (ninst > 0) {
		ucode[ninst++] = 0x0E000010000ull;
		execed = 1;

		error = qat_ae_exec_ucode_init_lm(sc, ae, 0,
		    &first_exec, ucode, ninst,
		    &gpr_a0, &gpr_a1, &gpr_a2, &gpr_b0, &gpr_b1);
	}
	if (execed) {
		qat_ae_restore_init_lm_gprs(sc, ae, 0,
		    gpr_a0, gpr_a1, gpr_a2, gpr_b0, gpr_b1);
	}

	qat_free_mem(ucode);

	return error;
}

static int
qat_ae_write_pc(struct qat_softc *sc, u_char ae, u_int ctx_mask, u_int upc)
{

	if (qat_ae_is_active(sc, ae))
		return EBUSY;

	qat_ae_ctx_indr_write(sc, ae, ctx_mask, CTX_STS_INDIRECT,
	    UPC_MASK & upc);
	return 0;
}

static inline u_int
qat_aefw_csum_calc(u_int reg, int ch)
{
	int i;
	u_int topbit = CRC_BITMASK(CRC_WIDTH - 1);
	u_int inbyte = (u_int)((reg >> 0x18) ^ ch);

	reg ^= inbyte << (CRC_WIDTH - 0x8);
	for (i = 0; i < 0x8; i++) {
		if (reg & topbit)
			reg = (reg << 1) ^ CRC_POLY;
		else
			reg <<= 1;
	}

	return (reg & CRC_WIDTHMASK(CRC_WIDTH));
}

static u_int
qat_aefw_csum(char *buf, int size)
{
	u_int csum = 0;

	while (size--) {
		csum = qat_aefw_csum_calc(csum, *buf++);
	}

	return csum;
}

static const char *
qat_aefw_uof_string(struct qat_softc *sc, size_t offset)
{
	if (offset >= sc->sc_aefw_uof.qafu_str_tab_size)
		return NULL;
	if (sc->sc_aefw_uof.qafu_str_tab == NULL)
		return NULL;

	return (const char *)((uintptr_t)sc->sc_aefw_uof.qafu_str_tab + offset);
}

static struct uof_chunk_hdr *
qat_aefw_uof_find_chunk(struct qat_softc *sc,
	const char *id, struct uof_chunk_hdr *cur)
{
	struct uof_obj_hdr *uoh = sc->sc_aefw_uof.qafu_obj_hdr;
	struct uof_chunk_hdr *uch;
	int i;

	uch = (struct uof_chunk_hdr *)(uoh + 1);
	for (i = 0; i < uoh->uoh_num_chunks; i++, uch++) {
		if (uch->uch_offset + uch->uch_size > sc->sc_aefw_uof.qafu_size)
			return NULL;

		if (cur < uch && !strncmp(uch->uch_id, id, UOF_OBJ_ID_LEN))
			return uch;
	}

	return NULL;
}

static int
qat_aefw_load_mof(struct qat_softc *sc)
{
	const struct firmware *fw;

	fw = firmware_get(sc->sc_hw.qhw_mof_fwname);
	if (fw == NULL) {
		device_printf(sc->sc_dev, "couldn't load MOF firmware %s\n",
		    sc->sc_hw.qhw_mof_fwname);
		return ENXIO;
	}

	sc->sc_fw_mof = qat_alloc_mem(fw->datasize);
	sc->sc_fw_mof_size = fw->datasize;
	memcpy(sc->sc_fw_mof, fw->data, fw->datasize);
	firmware_put(fw, FIRMWARE_UNLOAD);
	return 0;
}

static void
qat_aefw_unload_mof(struct qat_softc *sc)
{
	if (sc->sc_fw_mof != NULL) {
		qat_free_mem(sc->sc_fw_mof);
		sc->sc_fw_mof = NULL;
	}
}

static int
qat_aefw_load_mmp(struct qat_softc *sc)
{
	const struct firmware *fw;

	fw = firmware_get(sc->sc_hw.qhw_mmp_fwname);
	if (fw == NULL) {
		device_printf(sc->sc_dev, "couldn't load MOF firmware %s\n",
		    sc->sc_hw.qhw_mmp_fwname);
		return ENXIO;
	}

	sc->sc_fw_mmp = qat_alloc_mem(fw->datasize);
	sc->sc_fw_mmp_size = fw->datasize;
	memcpy(sc->sc_fw_mmp, fw->data, fw->datasize);
	firmware_put(fw, FIRMWARE_UNLOAD);
	return 0;
}

static void
qat_aefw_unload_mmp(struct qat_softc *sc)
{
	if (sc->sc_fw_mmp != NULL) {
		qat_free_mem(sc->sc_fw_mmp);
		sc->sc_fw_mmp = NULL;
	}
}

static int
qat_aefw_mof_find_uof0(struct qat_softc *sc,
	struct mof_uof_hdr *muh, struct mof_uof_chunk_hdr *head,
	u_int nchunk, size_t size, const char *id,
	size_t *fwsize, void **fwptr)
{
	int i;
	char *uof_name;

	for (i = 0; i < nchunk; i++) {
		struct mof_uof_chunk_hdr *much = &head[i];

		if (strncmp(much->much_id, id, MOF_OBJ_ID_LEN))
			return EINVAL;

		if (much->much_offset + much->much_size > size)
			return EINVAL;

		if (sc->sc_mof.qmf_sym_size <= much->much_name)
			return EINVAL;

		uof_name = (char *)((uintptr_t)sc->sc_mof.qmf_sym +
		    much->much_name);

		if (!strcmp(uof_name, sc->sc_fw_uof_name)) {
			*fwptr = (void *)((uintptr_t)muh +
			    (uintptr_t)much->much_offset);
			*fwsize = (size_t)much->much_size;
			return 0;
		}
	}

	return ENOENT;
}

static int
qat_aefw_mof_find_uof(struct qat_softc *sc)
{
	struct mof_uof_hdr *uof_hdr, *suof_hdr;
	u_int nuof_chunks = 0, nsuof_chunks = 0;
	int error;

	uof_hdr = sc->sc_mof.qmf_uof_objs;
	suof_hdr = sc->sc_mof.qmf_suof_objs;

	if (uof_hdr != NULL) {
		if (uof_hdr->muh_max_chunks < uof_hdr->muh_num_chunks) {
			return EINVAL;
		}
		nuof_chunks = uof_hdr->muh_num_chunks;
	}
	if (suof_hdr != NULL) {
		if (suof_hdr->muh_max_chunks < suof_hdr->muh_num_chunks)
			return EINVAL;
		nsuof_chunks = suof_hdr->muh_num_chunks;
	}

	if (nuof_chunks + nsuof_chunks == 0)
		return EINVAL;

	if (uof_hdr != NULL) {
		error = qat_aefw_mof_find_uof0(sc, uof_hdr,
		    (struct mof_uof_chunk_hdr *)(uof_hdr + 1), nuof_chunks,
		    sc->sc_mof.qmf_uof_objs_size, UOF_IMAG,
		    &sc->sc_fw_uof_size, &sc->sc_fw_uof);
		if (error && error != ENOENT)
			return error;
	}

	if (suof_hdr != NULL) {
		error = qat_aefw_mof_find_uof0(sc, suof_hdr,
		    (struct mof_uof_chunk_hdr *)(suof_hdr + 1), nsuof_chunks,
		    sc->sc_mof.qmf_suof_objs_size, SUOF_IMAG,
		    &sc->sc_fw_suof_size, &sc->sc_fw_suof);
		if (error && error != ENOENT)
			return error;
	}

	if (sc->sc_fw_uof == NULL && sc->sc_fw_suof == NULL)
		return ENOENT;

	return 0;
}

static int
qat_aefw_mof_parse(struct qat_softc *sc)
{
	const struct mof_file_hdr *mfh;
	const struct mof_file_chunk_hdr *mfch;
	size_t size;
	u_int csum;
	int error, i;

	size = sc->sc_fw_mof_size;

	if (size < sizeof(struct mof_file_hdr))
		return EINVAL;
	size -= sizeof(struct mof_file_hdr);

	mfh = sc->sc_fw_mof;

	if (mfh->mfh_fid != MOF_FID)
		return EINVAL;

	csum = qat_aefw_csum((char *)((uintptr_t)sc->sc_fw_mof +
	    offsetof(struct mof_file_hdr, mfh_min_ver)),
	    sc->sc_fw_mof_size -
	    offsetof(struct mof_file_hdr, mfh_min_ver));
	if (mfh->mfh_csum != csum)
		return EINVAL;

	if (mfh->mfh_min_ver != MOF_MIN_VER ||
	    mfh->mfh_maj_ver != MOF_MAJ_VER)
		return EINVAL;

	if (mfh->mfh_max_chunks < mfh->mfh_num_chunks)
		return EINVAL;

	if (size < sizeof(struct mof_file_chunk_hdr) * mfh->mfh_num_chunks)
		return EINVAL;
	mfch = (const struct mof_file_chunk_hdr *)(mfh + 1);

	for (i = 0; i < mfh->mfh_num_chunks; i++, mfch++) {
		if (mfch->mfch_offset + mfch->mfch_size > sc->sc_fw_mof_size)
			return EINVAL;

		if (!strncmp(mfch->mfch_id, SYM_OBJS, MOF_OBJ_ID_LEN)) {
			if (sc->sc_mof.qmf_sym != NULL)
				return EINVAL;

			sc->sc_mof.qmf_sym =
			    (void *)((uintptr_t)sc->sc_fw_mof +
			    (uintptr_t)mfch->mfch_offset + sizeof(u_int));
			sc->sc_mof.qmf_sym_size =
			    *(u_int *)((uintptr_t)sc->sc_fw_mof +
			    (uintptr_t)mfch->mfch_offset);

			if (sc->sc_mof.qmf_sym_size % sizeof(u_int) != 0)
				return EINVAL;
			if (mfch->mfch_size != sc->sc_mof.qmf_sym_size +
			    sizeof(u_int) || mfch->mfch_size == 0)
				return EINVAL;
			if (*(char *)((uintptr_t)sc->sc_mof.qmf_sym +
			    sc->sc_mof.qmf_sym_size - 1) != '\0')
				return EINVAL;

		} else if (!strncmp(mfch->mfch_id, UOF_OBJS, MOF_OBJ_ID_LEN)) {
			if (sc->sc_mof.qmf_uof_objs != NULL)
				return EINVAL;

			sc->sc_mof.qmf_uof_objs =
			    (void *)((uintptr_t)sc->sc_fw_mof +
			    (uintptr_t)mfch->mfch_offset);
			sc->sc_mof.qmf_uof_objs_size = mfch->mfch_size;

		} else if (!strncmp(mfch->mfch_id, SUOF_OBJS, MOF_OBJ_ID_LEN)) {
			if (sc->sc_mof.qmf_suof_objs != NULL)
				return EINVAL;

			sc->sc_mof.qmf_suof_objs =
			    (void *)((uintptr_t)sc->sc_fw_mof +
			    (uintptr_t)mfch->mfch_offset);
			sc->sc_mof.qmf_suof_objs_size = mfch->mfch_size;
		}
	}

	if (sc->sc_mof.qmf_sym == NULL ||
	    (sc->sc_mof.qmf_uof_objs == NULL &&
	    sc->sc_mof.qmf_suof_objs == NULL))
		return EINVAL;

	error = qat_aefw_mof_find_uof(sc);
	if (error)
		return error;
	return 0;
}

static int
qat_aefw_uof_parse_image(struct qat_softc *sc,
	struct qat_uof_image *qui, struct uof_chunk_hdr *uch)
{
	struct uof_image *image;
	struct uof_code_page *page;
	uintptr_t base = (uintptr_t)sc->sc_aefw_uof.qafu_obj_hdr;
	size_t lim = uch->uch_offset + uch->uch_size, size;
	int i, p;

	size = uch->uch_size;
	if (size < sizeof(struct uof_image))
		return EINVAL;
	size -= sizeof(struct uof_image);

	qui->qui_image = image =
	    (struct uof_image *)(base + uch->uch_offset);

#define ASSIGN_OBJ_TAB(np, typep, type, base, off, lim)			\
do {									\
	u_int nent;							\
	nent = ((struct uof_obj_table *)((base) + (off)))->uot_nentries;\
	if ((lim) < off + sizeof(struct uof_obj_table) +		\
	    sizeof(type) * nent)					\
		return EINVAL;						\
	*(np) = nent;							\
	if (nent > 0)							\
		*(typep) = (type)((struct uof_obj_table *)		\
		    ((base) + (off)) + 1);				\
	else								\
		*(typep) = NULL;					\
} while (0)

	ASSIGN_OBJ_TAB(&qui->qui_num_ae_reg, &qui->qui_ae_reg,
	    struct uof_ae_reg *, base, image->ui_reg_tab, lim);
	ASSIGN_OBJ_TAB(&qui->qui_num_init_reg_sym, &qui->qui_init_reg_sym,
	    struct uof_init_reg_sym *, base, image->ui_init_reg_sym_tab, lim);
	ASSIGN_OBJ_TAB(&qui->qui_num_sbreak, &qui->qui_sbreak,
	    struct qui_sbreak *, base, image->ui_sbreak_tab, lim);

	if (size < sizeof(struct uof_code_page) * image->ui_num_pages)
		return EINVAL;
	if (nitems(qui->qui_pages) < image->ui_num_pages)
		return EINVAL;

	page = (struct uof_code_page *)(image + 1);

	for (p = 0; p < image->ui_num_pages; p++, page++) {
		struct qat_uof_page *qup = &qui->qui_pages[p];
		struct uof_code_area *uca;

		qup->qup_page_num = page->ucp_page_num;
		qup->qup_def_page = page->ucp_def_page;
		qup->qup_page_region = page->ucp_page_region;
		qup->qup_beg_vaddr = page->ucp_beg_vaddr;
		qup->qup_beg_paddr = page->ucp_beg_paddr;

		ASSIGN_OBJ_TAB(&qup->qup_num_uc_var, &qup->qup_uc_var,
		    struct uof_uword_fixup *, base,
		    page->ucp_uc_var_tab, lim);
		ASSIGN_OBJ_TAB(&qup->qup_num_imp_var, &qup->qup_imp_var,
		    struct uof_import_var *, base,
		    page->ucp_imp_var_tab, lim);
		ASSIGN_OBJ_TAB(&qup->qup_num_imp_expr, &qup->qup_imp_expr,
		    struct uof_uword_fixup *, base,
		    page->ucp_imp_expr_tab, lim);
		ASSIGN_OBJ_TAB(&qup->qup_num_neigh_reg, &qup->qup_neigh_reg,
		    struct uof_uword_fixup *, base,
		    page->ucp_neigh_reg_tab, lim);

		if (lim < page->ucp_code_area + sizeof(struct uof_code_area))
			return EINVAL;

		uca = (struct uof_code_area *)(base + page->ucp_code_area);
		qup->qup_num_micro_words = uca->uca_num_micro_words;

		ASSIGN_OBJ_TAB(&qup->qup_num_uw_blocks, &qup->qup_uw_blocks,
		    struct qat_uof_uword_block *, base,
		    uca->uca_uword_block_tab, lim);

		for (i = 0; i < qup->qup_num_uw_blocks; i++) {
			u_int uwordoff = ((struct uof_uword_block *)(
			    &qup->qup_uw_blocks[i]))->uub_uword_offset;

			if (lim < uwordoff)
				return EINVAL;

			qup->qup_uw_blocks[i].quub_micro_words =
			    (base + uwordoff);
		}
	}

#undef ASSIGN_OBJ_TAB

	return 0;
}

static int
qat_aefw_uof_parse_images(struct qat_softc *sc)
{
	struct uof_chunk_hdr *uch = NULL;
	int i, error;

	for (i = 0; i < MAX_NUM_AE * MAX_AE_CTX; i++) {
		uch = qat_aefw_uof_find_chunk(sc, UOF_IMAG, uch);
		if (uch == NULL)
			break;

		if (i >= nitems(sc->sc_aefw_uof.qafu_imgs))
			return ENOENT;

		error = qat_aefw_uof_parse_image(sc, &sc->sc_aefw_uof.qafu_imgs[i], uch);
		if (error)
			return error;

		sc->sc_aefw_uof.qafu_num_imgs++;
	}

	return 0;
}

static int
qat_aefw_uof_parse(struct qat_softc *sc)
{
	struct uof_file_hdr *ufh;
	struct uof_file_chunk_hdr *ufch;
	struct uof_obj_hdr *uoh;
	struct uof_chunk_hdr *uch;
	void *uof = NULL;
	size_t size, uof_size, hdr_size;
	uintptr_t base;
	u_int csum;
	int i;

	size = sc->sc_fw_uof_size;
	if (size < MIN_UOF_SIZE)
		return EINVAL;
	size -= sizeof(struct uof_file_hdr);

	ufh = sc->sc_fw_uof;

	if (ufh->ufh_id != UOF_FID)
		return EINVAL;
	if (ufh->ufh_min_ver != UOF_MIN_VER || ufh->ufh_maj_ver != UOF_MAJ_VER)
		return EINVAL;

	if (ufh->ufh_max_chunks < ufh->ufh_num_chunks)
		return EINVAL;
	if (size < sizeof(struct uof_file_chunk_hdr) * ufh->ufh_num_chunks)
		return EINVAL;
	ufch = (struct uof_file_chunk_hdr *)(ufh + 1);

	uof_size = 0;
	for (i = 0; i < ufh->ufh_num_chunks; i++, ufch++) {
		if (ufch->ufch_offset + ufch->ufch_size > sc->sc_fw_uof_size)
			return EINVAL;

		if (!strncmp(ufch->ufch_id, UOF_OBJS, UOF_OBJ_ID_LEN)) {
			if (uof != NULL)
				return EINVAL;

			uof =
			    (void *)((uintptr_t)sc->sc_fw_uof +
			    ufch->ufch_offset);
			uof_size = ufch->ufch_size;

			csum = qat_aefw_csum(uof, uof_size);
			if (csum != ufch->ufch_csum)
				return EINVAL;
		}
	}

	if (uof == NULL)
		return ENOENT;

	size = uof_size;
	if (size < sizeof(struct uof_obj_hdr))
		return EINVAL;
	size -= sizeof(struct uof_obj_hdr);

	uoh = uof;

	if (size < sizeof(struct uof_chunk_hdr) * uoh->uoh_num_chunks)
		return EINVAL;

	/* Check if the UOF objects are compatible with the chip */
	if ((uoh->uoh_cpu_type & sc->sc_hw.qhw_prod_type) == 0)
		return ENOTSUP;

	if (uoh->uoh_min_cpu_ver > sc->sc_rev ||
	    uoh->uoh_max_cpu_ver < sc->sc_rev)
		return ENOTSUP;

	sc->sc_aefw_uof.qafu_size = uof_size;
	sc->sc_aefw_uof.qafu_obj_hdr = uoh;

	base = (uintptr_t)sc->sc_aefw_uof.qafu_obj_hdr;

	/* map uof string-table */
	uch = qat_aefw_uof_find_chunk(sc, UOF_STRT, NULL);
	if (uch != NULL) {
		hdr_size = offsetof(struct uof_str_tab, ust_strings);
		sc->sc_aefw_uof.qafu_str_tab =
		    (void *)(base + uch->uch_offset + hdr_size);
		sc->sc_aefw_uof.qafu_str_tab_size = uch->uch_size - hdr_size;
	}

	/* get ustore mem inits table -- should be only one */
	uch = qat_aefw_uof_find_chunk(sc, UOF_IMEM, NULL);
	if (uch != NULL) {
		if (uch->uch_size < sizeof(struct uof_obj_table))
			return EINVAL;
		sc->sc_aefw_uof.qafu_num_init_mem = ((struct uof_obj_table *)(base +
		    uch->uch_offset))->uot_nentries;
		if (sc->sc_aefw_uof.qafu_num_init_mem) {
			sc->sc_aefw_uof.qafu_init_mem =
			    (struct uof_init_mem *)(base + uch->uch_offset +
			    sizeof(struct uof_obj_table));
			sc->sc_aefw_uof.qafu_init_mem_size =
			    uch->uch_size - sizeof(struct uof_obj_table);
		}
	}

	uch = qat_aefw_uof_find_chunk(sc, UOF_MSEG, NULL);
	if (uch != NULL) {
		if (uch->uch_size < sizeof(struct uof_obj_table) +
		    sizeof(struct uof_var_mem_seg))
			return EINVAL;
		sc->sc_aefw_uof.qafu_var_mem_seg =
		    (struct uof_var_mem_seg *)(base + uch->uch_offset +
		    sizeof(struct uof_obj_table));
	}

	return qat_aefw_uof_parse_images(sc);
}

static int
qat_aefw_suof_parse_image(struct qat_softc *sc, struct qat_suof_image *qsi,
    struct suof_chunk_hdr *sch)
{
	struct qat_aefw_suof *qafs = &sc->sc_aefw_suof;
	struct simg_ae_mode *ae_mode;
	u_int maj_ver;

	qsi->qsi_simg_buf = qafs->qafs_suof_buf + sch->sch_offset +
	    sizeof(struct suof_obj_hdr);
	qsi->qsi_simg_len =
	    ((struct suof_obj_hdr *)
	    (qafs->qafs_suof_buf + sch->sch_offset))->soh_img_length;

	qsi->qsi_css_header = qsi->qsi_simg_buf;
	qsi->qsi_css_key = qsi->qsi_css_header + sizeof(struct css_hdr);
	qsi->qsi_css_signature = qsi->qsi_css_key +
	    CSS_FWSK_MODULUS_LEN + CSS_FWSK_EXPONENT_LEN;
	qsi->qsi_css_simg = qsi->qsi_css_signature + CSS_SIGNATURE_LEN;

	ae_mode = (struct simg_ae_mode *)qsi->qsi_css_simg;
	qsi->qsi_ae_mask = ae_mode->sam_ae_mask;
	qsi->qsi_simg_name = (u_long)&ae_mode->sam_simg_name;
	qsi->qsi_appmeta_data = (u_long)&ae_mode->sam_appmeta_data;
	qsi->qsi_fw_type = ae_mode->sam_fw_type;

	if (ae_mode->sam_dev_type != sc->sc_hw.qhw_prod_type)
		return EINVAL;

	maj_ver = (QAT_PID_MAJOR_REV | (sc->sc_rev & QAT_PID_MINOR_REV)) & 0xff;
	if ((maj_ver > ae_mode->sam_devmax_ver) ||
	    (maj_ver < ae_mode->sam_devmin_ver)) {
		return EINVAL;
	}

	return 0;
}

static int
qat_aefw_suof_parse(struct qat_softc *sc)
{
	struct suof_file_hdr *sfh;
	struct suof_chunk_hdr *sch;
	struct qat_aefw_suof *qafs = &sc->sc_aefw_suof;
	struct qat_suof_image *qsi;
	size_t size;
	u_int csum;
	int ae0_img = MAX_AE;
	int i, error;

	size = sc->sc_fw_suof_size;
	if (size < sizeof(struct suof_file_hdr))
		return EINVAL;

	sfh = sc->sc_fw_suof;

	if (sfh->sfh_file_id != SUOF_FID)
		return EINVAL;
	if (sfh->sfh_fw_type != 0)
		return EINVAL;
	if (sfh->sfh_num_chunks <= 1)
		return EINVAL;
	if (sfh->sfh_min_ver != SUOF_MIN_VER ||
	    sfh->sfh_maj_ver != SUOF_MAJ_VER)
		return EINVAL;

	csum = qat_aefw_csum((char *)&sfh->sfh_min_ver,
	    size - offsetof(struct suof_file_hdr, sfh_min_ver));
	if (csum != sfh->sfh_check_sum)
		return EINVAL;

	size -= sizeof(struct suof_file_hdr);

	qafs->qafs_file_id = SUOF_FID;
	qafs->qafs_suof_buf = sc->sc_fw_suof;
	qafs->qafs_suof_size = sc->sc_fw_suof_size;
	qafs->qafs_check_sum = sfh->sfh_check_sum;
	qafs->qafs_min_ver = sfh->sfh_min_ver;
	qafs->qafs_maj_ver = sfh->sfh_maj_ver;
	qafs->qafs_fw_type = sfh->sfh_fw_type;

	if (size < sizeof(struct suof_chunk_hdr))
		return EINVAL;
	sch = (struct suof_chunk_hdr *)(sfh + 1);
	size -= sizeof(struct suof_chunk_hdr);

	if (size < sizeof(struct suof_str_tab))
		return EINVAL;
	size -= offsetof(struct suof_str_tab, sst_strings);

	qafs->qafs_sym_size = ((struct suof_str_tab *)
	    (qafs->qafs_suof_buf + sch->sch_offset))->sst_tab_length;
	if (size < qafs->qafs_sym_size)
		return EINVAL;
	qafs->qafs_sym_str = qafs->qafs_suof_buf + sch->sch_offset +
	    offsetof(struct suof_str_tab, sst_strings);

	qafs->qafs_num_simgs = sfh->sfh_num_chunks - 1;
	if (qafs->qafs_num_simgs == 0)
		return EINVAL;

	qsi = qat_alloc_mem(
	    sizeof(struct qat_suof_image) * qafs->qafs_num_simgs);
	qafs->qafs_simg = qsi;

	for (i = 0; i < qafs->qafs_num_simgs; i++) {
		error = qat_aefw_suof_parse_image(sc, &qsi[i], &sch[i + 1]);
		if (error)
			return error;
		if ((qsi[i].qsi_ae_mask & 0x1) != 0)
			ae0_img = i;
	}

	if (ae0_img != qafs->qafs_num_simgs - 1) {
		struct qat_suof_image last_qsi;

		memcpy(&last_qsi, &qsi[qafs->qafs_num_simgs - 1],
		    sizeof(struct qat_suof_image));
		memcpy(&qsi[qafs->qafs_num_simgs - 1], &qsi[ae0_img],
		    sizeof(struct qat_suof_image));
		memcpy(&qsi[ae0_img], &last_qsi,
		    sizeof(struct qat_suof_image));
	}

	return 0;
}

static int
qat_aefw_alloc_auth_dmamem(struct qat_softc *sc, char *image, size_t size,
    struct qat_dmamem *dma)
{
	struct css_hdr *css = (struct css_hdr *)image;
	struct auth_chunk *auth_chunk;
	struct fw_auth_desc *auth_desc;
	size_t mapsize, simg_offset = sizeof(struct auth_chunk);
	bus_size_t bus_addr;
	uintptr_t virt_addr;
	int error;

	if (size > AE_IMG_OFFSET + CSS_MAX_IMAGE_LEN)
		return EINVAL;

	mapsize = (css->css_fw_type == CSS_AE_FIRMWARE) ?
	    CSS_AE_SIMG_LEN + simg_offset :
	    size + CSS_FWSK_PAD_LEN + simg_offset;
	error = qat_alloc_dmamem(sc, dma, 1, mapsize, PAGE_SIZE);
	if (error)
		return error;

	memset(dma->qdm_dma_vaddr, 0, mapsize);

	auth_chunk = dma->qdm_dma_vaddr;
	auth_chunk->ac_chunk_size = mapsize;
	auth_chunk->ac_chunk_bus_addr = dma->qdm_dma_seg.ds_addr;

	virt_addr = (uintptr_t)dma->qdm_dma_vaddr;
	virt_addr += simg_offset;
	bus_addr = auth_chunk->ac_chunk_bus_addr;
	bus_addr += simg_offset;

	auth_desc = &auth_chunk->ac_fw_auth_desc;
	auth_desc->fad_css_hdr_high = (uint64_t)bus_addr >> 32;
	auth_desc->fad_css_hdr_low = bus_addr;

	memcpy((void *)virt_addr, image, sizeof(struct css_hdr));
	/* pub key */
	virt_addr += sizeof(struct css_hdr);
	bus_addr += sizeof(struct css_hdr);
	image += sizeof(struct css_hdr);

	auth_desc->fad_fwsk_pub_high = (uint64_t)bus_addr >> 32;
	auth_desc->fad_fwsk_pub_low = bus_addr;

	memcpy((void *)virt_addr, image, CSS_FWSK_MODULUS_LEN);
	memset((void *)(virt_addr + CSS_FWSK_MODULUS_LEN), 0, CSS_FWSK_PAD_LEN);
	memcpy((void *)(virt_addr + CSS_FWSK_MODULUS_LEN + CSS_FWSK_PAD_LEN),
	    image + CSS_FWSK_MODULUS_LEN, sizeof(uint32_t));

	virt_addr += CSS_FWSK_PUB_LEN;
	bus_addr += CSS_FWSK_PUB_LEN;
	image += CSS_FWSK_MODULUS_LEN + CSS_FWSK_EXPONENT_LEN;

	auth_desc->fad_signature_high = (uint64_t)bus_addr >> 32;
	auth_desc->fad_signature_low = bus_addr;

	memcpy((void *)virt_addr, image, CSS_SIGNATURE_LEN);

	virt_addr += CSS_SIGNATURE_LEN;
	bus_addr += CSS_SIGNATURE_LEN;
	image += CSS_SIGNATURE_LEN;

	auth_desc->fad_img_high = (uint64_t)bus_addr >> 32;
	auth_desc->fad_img_low = bus_addr;
	auth_desc->fad_img_len = size - AE_IMG_OFFSET;

	memcpy((void *)virt_addr, image, auth_desc->fad_img_len);

	if (css->css_fw_type == CSS_AE_FIRMWARE) {
		auth_desc->fad_img_ae_mode_data_high = auth_desc->fad_img_high;
		auth_desc->fad_img_ae_mode_data_low = auth_desc->fad_img_low;

		bus_addr += sizeof(struct simg_ae_mode);

		auth_desc->fad_img_ae_init_data_high = (uint64_t)bus_addr >> 32;
		auth_desc->fad_img_ae_init_data_low = bus_addr;

		bus_addr += SIMG_AE_INIT_SEQ_LEN;

		auth_desc->fad_img_ae_insts_high = (uint64_t)bus_addr >> 32;
		auth_desc->fad_img_ae_insts_low = bus_addr;
	} else {
		auth_desc->fad_img_ae_insts_high = auth_desc->fad_img_high;
		auth_desc->fad_img_ae_insts_low = auth_desc->fad_img_low;
	}

	bus_dmamap_sync(dma->qdm_dma_tag, dma->qdm_dma_map,
	    BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);

	return 0;
}

static int
qat_aefw_auth(struct qat_softc *sc, struct qat_dmamem *dma)
{
	bus_addr_t addr;
	uint32_t fcu, sts;
	int retry = 0;

	addr = dma->qdm_dma_seg.ds_addr;
	qat_cap_global_write_4(sc, FCU_DRAM_ADDR_HI, (uint64_t)addr >> 32);
	qat_cap_global_write_4(sc, FCU_DRAM_ADDR_LO, addr);
	qat_cap_global_write_4(sc, FCU_CTRL, FCU_CTRL_CMD_AUTH);

	do {
		DELAY(FW_AUTH_WAIT_PERIOD * 1000);
		fcu = qat_cap_global_read_4(sc, FCU_STATUS);
		sts = __SHIFTOUT(fcu, FCU_STATUS_STS);
		if (sts == FCU_STATUS_STS_VERI_FAIL)
			goto fail;
		if (fcu & FCU_STATUS_AUTHFWLD &&
		    sts == FCU_STATUS_STS_VERI_DONE) {
			return 0;
		}
	} while (retry++ < FW_AUTH_MAX_RETRY);

fail:
	device_printf(sc->sc_dev,
	   "firmware authentication error: status 0x%08x retry %d\n",
	   fcu, retry);
	return EINVAL;
}

static int
qat_aefw_suof_load(struct qat_softc *sc, struct qat_dmamem *dma)
{
	struct simg_ae_mode *ae_mode;
	uint32_t fcu, sts, loaded;
	u_int mask;
	u_char ae;
	int retry = 0;

	ae_mode = (struct simg_ae_mode *)((uintptr_t)dma->qdm_dma_vaddr +
	    sizeof(struct auth_chunk) + sizeof(struct css_hdr) +
	    CSS_FWSK_PUB_LEN + CSS_SIGNATURE_LEN);

	for (ae = 0, mask = sc->sc_ae_mask; mask; ae++, mask >>= 1) {
		if (!(mask & 1))
			continue;
		if (!((ae_mode->sam_ae_mask >> ae) & 0x1))
			continue;
		if (qat_ae_is_active(sc, ae)) {
			device_printf(sc->sc_dev, "AE %d is active\n", ae);
			return EINVAL;
		}
		qat_cap_global_write_4(sc, FCU_CTRL,
		    FCU_CTRL_CMD_LOAD | __SHIFTIN(ae, FCU_CTRL_AE));
		do {
			DELAY(FW_AUTH_WAIT_PERIOD * 1000);
			fcu = qat_cap_global_read_4(sc, FCU_STATUS);
			sts = __SHIFTOUT(fcu, FCU_STATUS_STS);
			loaded = __SHIFTOUT(fcu, FCU_STATUS_LOADED_AE);
			if (sts == FCU_STATUS_STS_LOAD_DONE &&
			    (loaded & (1 << ae))) {
				break;
			}
		} while (retry++ < FW_AUTH_MAX_RETRY);

		if (retry > FW_AUTH_MAX_RETRY) {
			device_printf(sc->sc_dev,
			    "firmware load timeout: status %08x\n", fcu);
			return EINVAL;
		}
	}

	return 0;
}

static int
qat_aefw_suof_write(struct qat_softc *sc)
{
	struct qat_suof_image *qsi;
	int i, error = 0;

	for (i = 0; i < sc->sc_aefw_suof.qafs_num_simgs; i++) {
		qsi = &sc->sc_aefw_suof.qafs_simg[i];
		error = qat_aefw_alloc_auth_dmamem(sc, qsi->qsi_simg_buf,
		    qsi->qsi_simg_len, &qsi->qsi_dma);
		if (error)
			return error;
		error = qat_aefw_auth(sc, &qsi->qsi_dma);
		if (error) {
			qat_free_dmamem(sc, &qsi->qsi_dma);
			return error;
		}
		error = qat_aefw_suof_load(sc, &qsi->qsi_dma);
		if (error) {
			qat_free_dmamem(sc, &qsi->qsi_dma);
			return error;
		}
		qat_free_dmamem(sc, &qsi->qsi_dma);
	}
	qat_free_mem(sc->sc_aefw_suof.qafs_simg);

	return 0;
}

static int
qat_aefw_uof_assign_image(struct qat_softc *sc, struct qat_ae *qae,
	struct qat_uof_image *qui)
{
	struct qat_ae_slice *slice;
	int i, npages, nregions;

	if (qae->qae_num_slices >= nitems(qae->qae_slices))
		return ENOENT;

	if (qui->qui_image->ui_ae_mode &
	    (AE_MODE_RELOAD_CTX_SHARED | AE_MODE_SHARED_USTORE)) {
		/* XXX */
		device_printf(sc->sc_dev,
		    "shared ae mode is not supported yet\n");
		return ENOTSUP;
	}

	qae->qae_shareable_ustore = 0; /* XXX */
	qae->qae_effect_ustore_size = USTORE_SIZE;

	slice = &qae->qae_slices[qae->qae_num_slices];

	slice->qas_image = qui;
	slice->qas_assigned_ctx_mask = qui->qui_image->ui_ctx_assigned;

	nregions = qui->qui_image->ui_num_page_regions;
	npages = qui->qui_image->ui_num_pages;

	if (nregions > nitems(slice->qas_regions))
		return ENOENT;
	if (npages > nitems(slice->qas_pages))
		return ENOENT;

	for (i = 0; i < nregions; i++) {
		STAILQ_INIT(&slice->qas_regions[i].qar_waiting_pages);
	}
	for (i = 0; i < npages; i++) {
		struct qat_ae_page *page = &slice->qas_pages[i];
		int region;

		page->qap_page = &qui->qui_pages[i];
		region = page->qap_page->qup_page_region;
		if (region >= nregions)
			return EINVAL;

		page->qap_region = &slice->qas_regions[region];
	}

	qae->qae_num_slices++;

	return 0;
}

static int
qat_aefw_uof_init_ae(struct qat_softc *sc, u_char ae)
{
	struct uof_image *image;
	struct qat_ae *qae = &(QAT_AE(sc, ae));
	int s;
	u_char nn_mode;

	for (s = 0; s < qae->qae_num_slices; s++) {
		if (qae->qae_slices[s].qas_image == NULL)
			continue;

		image = qae->qae_slices[s].qas_image->qui_image;
		qat_ae_write_ctx_mode(sc, ae,
		    __SHIFTOUT(image->ui_ae_mode, AE_MODE_CTX_MODE));

		nn_mode = __SHIFTOUT(image->ui_ae_mode, AE_MODE_NN_MODE);
		if (nn_mode != AE_MODE_NN_MODE_DONTCARE)
			qat_ae_write_nn_mode(sc, ae, nn_mode);

		qat_ae_write_lm_mode(sc, ae, AEREG_LMEM0,
		    __SHIFTOUT(image->ui_ae_mode, AE_MODE_LMEM0));
		qat_ae_write_lm_mode(sc, ae, AEREG_LMEM1,
		    __SHIFTOUT(image->ui_ae_mode, AE_MODE_LMEM1));

		qat_ae_write_shared_cs_mode(sc, ae,
		    __SHIFTOUT(image->ui_ae_mode, AE_MODE_SHARED_USTORE));
		qat_ae_set_reload_ustore(sc, ae, image->ui_reloadable_size,
		    __SHIFTOUT(image->ui_ae_mode, AE_MODE_RELOAD_CTX_SHARED),
		    qae->qae_reloc_ustore_dram);
	}

	return 0;
}

static int
qat_aefw_uof_init(struct qat_softc *sc)
{
	int ae, i, error;
	uint32_t mask;

	for (ae = 0, mask = sc->sc_ae_mask; mask; ae++, mask >>= 1) {
		struct qat_ae *qae;

		if (!(mask & 1))
			continue;

		qae = &(QAT_AE(sc, ae));

		for (i = 0; i < sc->sc_aefw_uof.qafu_num_imgs; i++) {
			if ((sc->sc_aefw_uof.qafu_imgs[i].qui_image->ui_ae_assigned &
			    (1 << ae)) == 0)
				continue;

			error = qat_aefw_uof_assign_image(sc, qae,
			    &sc->sc_aefw_uof.qafu_imgs[i]);
			if (error)
				return error;
		}

		/* XXX UcLo_initNumUwordUsed */

		qae->qae_reloc_ustore_dram = UINT_MAX; /* XXX */

		error = qat_aefw_uof_init_ae(sc, ae);
		if (error)
			return error;
	}

	return 0;
}

int
qat_aefw_load(struct qat_softc *sc)
{
	int error;

	error = qat_aefw_load_mof(sc);
	if (error)
		return error;

	error = qat_aefw_load_mmp(sc);
	if (error)
		return error;

	error = qat_aefw_mof_parse(sc);
	if (error) {
		device_printf(sc->sc_dev, "couldn't parse mof: %d\n", error);
		return error;
	}

	if (sc->sc_hw.qhw_fw_auth) {
		error = qat_aefw_suof_parse(sc);
		if (error) {
			device_printf(sc->sc_dev, "couldn't parse suof: %d\n",
			    error);
			return error;
		}

		error = qat_aefw_suof_write(sc);
		if (error) {
			device_printf(sc->sc_dev,
			    "could not write firmware: %d\n", error);
			return error;
		}

	} else {
		error = qat_aefw_uof_parse(sc);
		if (error) {
			device_printf(sc->sc_dev, "couldn't parse uof: %d\n",
			    error);
			return error;
		}

		error = qat_aefw_uof_init(sc);
		if (error) {
			device_printf(sc->sc_dev,
			    "couldn't init for aefw: %d\n", error);
			return error;
		}

		error = qat_aefw_uof_write(sc);
		if (error) {
			device_printf(sc->sc_dev,
			    "Could not write firmware: %d\n", error);
			return error;
		}
	}

	return 0;
}

void
qat_aefw_unload(struct qat_softc *sc)
{
	qat_aefw_unload_mmp(sc);
	qat_aefw_unload_mof(sc);
}

int
qat_aefw_start(struct qat_softc *sc, u_char ae, u_int ctx_mask)
{
	uint32_t fcu;
	int retry = 0;

	if (sc->sc_hw.qhw_fw_auth) {
		qat_cap_global_write_4(sc, FCU_CTRL, FCU_CTRL_CMD_START);
		do {
			DELAY(FW_AUTH_WAIT_PERIOD * 1000);
			fcu = qat_cap_global_read_4(sc, FCU_STATUS);
			if (fcu & FCU_STATUS_DONE)
				return 0;
		} while (retry++ < FW_AUTH_MAX_RETRY);

		device_printf(sc->sc_dev,
		    "firmware start timeout: status %08x\n", fcu);
		return EINVAL;
	} else {
		qat_ae_ctx_indr_write(sc, ae, (~ctx_mask) & AE_ALL_CTX,
		    CTX_WAKEUP_EVENTS_INDIRECT,
		    CTX_WAKEUP_EVENTS_INDIRECT_SLEEP);
		qat_ae_enable_ctx(sc, ae, ctx_mask);
	}

	return 0;
}

static int
qat_aefw_init_memory_one(struct qat_softc *sc, struct uof_init_mem *uim)
{
	struct qat_aefw_uof *qafu = &sc->sc_aefw_uof;
	struct qat_ae_batch_init_list *qabi_list;
	struct uof_mem_val_attr *memattr;
	size_t *curinit;
	u_long ael;
	int i;
	const char *sym;
	char *ep;

	memattr = (struct uof_mem_val_attr *)(uim + 1);

	switch (uim->uim_region) {
	case LMEM_REGION:
		if ((uim->uim_addr + uim->uim_num_bytes) > MAX_LMEM_REG * 4) {
			device_printf(sc->sc_dev,
			    "Invalid lmem addr or bytes\n");
			return ENOBUFS;
		}
		if (uim->uim_scope != UOF_SCOPE_LOCAL)
			return EINVAL;
		sym = qat_aefw_uof_string(sc, uim->uim_sym_name);
		ael = strtoul(sym, &ep, 10);
		if (ep == sym || ael > MAX_AE)
			return EINVAL;
		if ((sc->sc_ae_mask & (1 << ael)) == 0)
			return 0; /* ae is fused out */

		curinit = &qafu->qafu_num_lm_init[ael];
		qabi_list = &qafu->qafu_lm_init[ael];

		for (i = 0; i < uim->uim_num_val_attr; i++, memattr++) {
			struct qat_ae_batch_init *qabi;

			qabi = qat_alloc_mem(sizeof(struct qat_ae_batch_init));
			if (*curinit == 0)
				STAILQ_INIT(qabi_list);
			STAILQ_INSERT_TAIL(qabi_list, qabi, qabi_next);

			qabi->qabi_ae = (u_int)ael;
			qabi->qabi_addr =
			    uim->uim_addr + memattr->umva_byte_offset;
			qabi->qabi_value = &memattr->umva_value;
			qabi->qabi_size = 4;
			qafu->qafu_num_lm_init_inst[ael] +=
			    qat_ae_get_inst_num(qabi->qabi_size);
			(*curinit)++;
			if (*curinit >= MAX_LMEM_REG) {
				device_printf(sc->sc_dev,
				    "Invalid lmem val attr\n");
				return ENOBUFS;
			}
		}
		break;
	case SRAM_REGION:
	case DRAM_REGION:
	case DRAM1_REGION:
	case SCRATCH_REGION:
	case UMEM_REGION:
		/* XXX */
		/* fallthrough */
	default:
		device_printf(sc->sc_dev,
		    "unsupported memory region to init: %d\n",
		    uim->uim_region);
		return ENOTSUP;
	}

	return 0;
}

static void
qat_aefw_free_lm_init(struct qat_softc *sc, u_char ae)
{
	struct qat_aefw_uof *qafu = &sc->sc_aefw_uof;
	struct qat_ae_batch_init *qabi;

	while ((qabi = STAILQ_FIRST(&qafu->qafu_lm_init[ae])) != NULL) {
		STAILQ_REMOVE_HEAD(&qafu->qafu_lm_init[ae], qabi_next);
		qat_free_mem(qabi);
	}

	qafu->qafu_num_lm_init[ae] = 0;
	qafu->qafu_num_lm_init_inst[ae] = 0;
}

static int
qat_aefw_init_ustore(struct qat_softc *sc)
{
	uint64_t *fill;
	uint32_t dont_init;
	int a, i, p;
	int error = 0;
	int usz, end, start;
	u_char ae, nae;

	fill = qat_alloc_mem(MAX_USTORE * sizeof(uint64_t));

	for (a = 0; a < sc->sc_aefw_uof.qafu_num_imgs; a++) {
		struct qat_uof_image *qui = &sc->sc_aefw_uof.qafu_imgs[a];
		struct uof_image *ui = qui->qui_image;

		for (i = 0; i < MAX_USTORE; i++)
			memcpy(&fill[i], ui->ui_fill_pattern, sizeof(uint64_t));
		/*
		 * Compute do_not_init value as a value that will not be equal
		 * to fill data when cast to an int
		 */
		dont_init = 0;
		if (dont_init == (uint32_t)fill[0])
			dont_init = 0xffffffff;

		for (p = 0; p < ui->ui_num_pages; p++) {
			struct qat_uof_page *qup = &qui->qui_pages[p];
			if (!qup->qup_def_page)
				continue;

			for (i = qup->qup_beg_paddr;
			    i < qup->qup_beg_paddr + qup->qup_num_micro_words;
			    i++ ) {
				fill[i] = (uint64_t)dont_init;
			}
		}

		for (ae = 0; ae < sc->sc_ae_num; ae++) {
			MPASS(ae < UOF_MAX_NUM_OF_AE);
			if ((ui->ui_ae_assigned & (1 << ae)) == 0)
				continue;

			if (QAT_AE(sc, ae).qae_shareable_ustore && (ae & 1)) {
				qat_ae_get_shared_ustore_ae(ae, &nae);
				if (ui->ui_ae_assigned & (1 << ae))
					continue;
			}
			usz = QAT_AE(sc, ae).qae_effect_ustore_size;

			/* initialize the areas not going to be overwritten */
			end = -1;
			do {
				/* find next uword that needs to be initialized */
				for (start = end + 1; start < usz; start++) {
					if ((uint32_t)fill[start] != dont_init)
						break;
				}
				/* see if there are no more such uwords */
				if (start >= usz)
					break;
				for (end = start + 1; end < usz; end++) {
					if ((uint32_t)fill[end] == dont_init)
						break;
				}
				if (QAT_AE(sc, ae).qae_shareable_ustore) {
					error = ENOTSUP; /* XXX */
					goto out;
				} else {
					error = qat_ae_ucode_write(sc, ae,
					    start, end - start, &fill[start]);
					if (error) {
						goto out;
					}
				}

			} while (end < usz);
		}
	}

out:
	qat_free_mem(fill);
	return error;
}

static int
qat_aefw_init_reg(struct qat_softc *sc, u_char ae, u_char ctx_mask,
    enum aereg_type regtype, u_short regaddr, u_int value)
{
	int error = 0;
	u_char ctx;

	switch (regtype) {
	case AEREG_GPA_REL:
	case AEREG_GPB_REL:
	case AEREG_SR_REL:
	case AEREG_SR_RD_REL:
	case AEREG_SR_WR_REL:
	case AEREG_DR_REL:
	case AEREG_DR_RD_REL:
	case AEREG_DR_WR_REL:
	case AEREG_NEIGH_REL:
		/* init for all valid ctx */
		for (ctx = 0; ctx < MAX_AE_CTX; ctx++) {
			if ((ctx_mask & (1 << ctx)) == 0)
				continue;
			error = qat_aereg_rel_data_write(sc, ae, ctx, regtype,
			    regaddr, value);
		}
		break;
	case AEREG_GPA_ABS:
	case AEREG_GPB_ABS:
	case AEREG_SR_ABS:
	case AEREG_SR_RD_ABS:
	case AEREG_SR_WR_ABS:
	case AEREG_DR_ABS:
	case AEREG_DR_RD_ABS:
	case AEREG_DR_WR_ABS:
		error = qat_aereg_abs_data_write(sc, ae, regtype,
		    regaddr, value);
		break;
	default:
		error = EINVAL;
		break;
	}

	return error;
}

static int
qat_aefw_init_reg_sym_expr(struct qat_softc *sc, u_char ae,
    struct qat_uof_image *qui)
{
	u_int i, expres;
	u_char ctx_mask;

	for (i = 0; i < qui->qui_num_init_reg_sym; i++) {
		struct uof_init_reg_sym *uirs = &qui->qui_init_reg_sym[i];

		if (uirs->uirs_value_type == EXPR_VAL) {
			/* XXX */
			device_printf(sc->sc_dev,
			    "does not support initializing EXPR_VAL\n");
			return ENOTSUP;
		} else {
			expres = uirs->uirs_value;
		}

		switch (uirs->uirs_init_type) {
		case INIT_REG:
			if (__SHIFTOUT(qui->qui_image->ui_ae_mode,
			    AE_MODE_CTX_MODE) == MAX_AE_CTX) {
				ctx_mask = 0xff; /* 8-ctx mode */
			} else {
				ctx_mask = 0x55; /* 4-ctx mode */
			}
			qat_aefw_init_reg(sc, ae, ctx_mask,
			    (enum aereg_type)uirs->uirs_reg_type,
			    (u_short)uirs->uirs_addr_offset, expres);
			break;
		case INIT_REG_CTX:
			if (__SHIFTOUT(qui->qui_image->ui_ae_mode,
			    AE_MODE_CTX_MODE) == MAX_AE_CTX) {
				ctx_mask = 0xff; /* 8-ctx mode */
			} else {
				ctx_mask = 0x55; /* 4-ctx mode */
			}
			if (((1 << uirs->uirs_ctx) & ctx_mask) == 0)
				return EINVAL;
			qat_aefw_init_reg(sc, ae, 1 << uirs->uirs_ctx,
			    (enum aereg_type)uirs->uirs_reg_type,
			    (u_short)uirs->uirs_addr_offset, expres);
			break;
		case INIT_EXPR:
		case INIT_EXPR_ENDIAN_SWAP:
		default:
			device_printf(sc->sc_dev,
			    "does not support initializing init_type %d\n",
			    uirs->uirs_init_type);
			return ENOTSUP;
		}
	}

	return 0;
}

static int
qat_aefw_init_memory(struct qat_softc *sc)
{
	struct qat_aefw_uof *qafu = &sc->sc_aefw_uof;
	size_t uimsz, initmemsz = qafu->qafu_init_mem_size;
	struct uof_init_mem *uim;
	int error, i;
	u_char ae;

	uim = qafu->qafu_init_mem;
	for (i = 0; i < qafu->qafu_num_init_mem; i++) {
		uimsz = sizeof(struct uof_init_mem) +
		    sizeof(struct uof_mem_val_attr) * uim->uim_num_val_attr;
		if (uimsz > initmemsz) {
			device_printf(sc->sc_dev,
			    "invalid uof_init_mem or uof_mem_val_attr size\n");
			return EINVAL;
		}

		if (uim->uim_num_bytes > 0) {
			error = qat_aefw_init_memory_one(sc, uim);
			if (error) {
				device_printf(sc->sc_dev,
				    "Could not init ae memory: %d\n", error);
				return error;
			}
		}
		uim = (struct uof_init_mem *)((uintptr_t)uim + uimsz);
		initmemsz -= uimsz;
	}

	/* run Batch put LM API */
	for (ae = 0; ae < MAX_AE; ae++) {
		error = qat_ae_batch_put_lm(sc, ae, &qafu->qafu_lm_init[ae],
		    qafu->qafu_num_lm_init_inst[ae]);
		if (error)
			device_printf(sc->sc_dev, "Could not put lm\n");

		qat_aefw_free_lm_init(sc, ae);
	}

	error = qat_aefw_init_ustore(sc);

	/* XXX run Batch put LM API */

	return error;
}

static int
qat_aefw_init_globals(struct qat_softc *sc)
{
	struct qat_aefw_uof *qafu = &sc->sc_aefw_uof;
	int error, i, p, s;
	u_char ae;

	/* initialize the memory segments */
	if (qafu->qafu_num_init_mem > 0) {
		error = qat_aefw_init_memory(sc);
		if (error)
			return error;
	} else {
		error = qat_aefw_init_ustore(sc);
		if (error)
			return error;
	}

	/* XXX bind import variables with ivd values */

	/* XXX bind the uC global variables
	 * local variables will done on-the-fly */
	for (i = 0; i < sc->sc_aefw_uof.qafu_num_imgs; i++) {
		for (p = 0; p < sc->sc_aefw_uof.qafu_imgs[i].qui_image->ui_num_pages; p++) {
			struct qat_uof_page *qup =
			    &sc->sc_aefw_uof.qafu_imgs[i].qui_pages[p];
			if (qup->qup_num_uw_blocks &&
			    (qup->qup_num_uc_var || qup->qup_num_imp_var)) {
				device_printf(sc->sc_dev,
				    "not support uC global variables\n");
				return ENOTSUP;
			}
		}
	}

	for (ae = 0; ae < sc->sc_ae_num; ae++) {
		struct qat_ae *qae = &(QAT_AE(sc, ae));

		for (s = 0; s < qae->qae_num_slices; s++) {
			struct qat_ae_slice *qas = &qae->qae_slices[s];

			if (qas->qas_image == NULL)
				continue;

			error =
			    qat_aefw_init_reg_sym_expr(sc, ae, qas->qas_image);
			if (error)
				return error;
		}
	}

	return 0;
}

static uint64_t
qat_aefw_get_uof_inst(struct qat_softc *sc, struct qat_uof_page *qup,
    u_int addr)
{
	uint64_t uinst = 0;
	u_int i;

	/* find the block */
	for (i = 0; i < qup->qup_num_uw_blocks; i++) {
		struct qat_uof_uword_block *quub = &qup->qup_uw_blocks[i];

		if ((addr >= quub->quub_start_addr) &&
		    (addr <= (quub->quub_start_addr +
		    (quub->quub_num_words - 1)))) {
			/* unpack n bytes and assigned to the 64-bit uword value.
			note: the microwords are stored as packed bytes.
			*/
			addr -= quub->quub_start_addr;
			addr *= AEV2_PACKED_UWORD_BYTES;
			memcpy(&uinst,
			    (void *)((uintptr_t)quub->quub_micro_words + addr),
			    AEV2_PACKED_UWORD_BYTES);
			uinst = uinst & UWORD_MASK;

			return uinst;
		}
	}

	return INVLD_UWORD;
}

static int
qat_aefw_do_pagein(struct qat_softc *sc, u_char ae, struct qat_uof_page *qup)
{
	struct qat_ae *qae = &(QAT_AE(sc, ae));
	uint64_t fill, *ucode_cpybuf;
	u_int error, i, upaddr, ninst, cpylen;

	if (qup->qup_num_uc_var || qup->qup_num_neigh_reg ||
	    qup->qup_num_imp_var || qup->qup_num_imp_expr) {
		device_printf(sc->sc_dev,
		    "does not support fixup locals\n");
		return ENOTSUP;
	}

	ucode_cpybuf = qat_alloc_mem(UWORD_CPYBUF_SIZE * sizeof(uint64_t));

	/* XXX get fill-pattern from an image -- they are all the same */
	memcpy(&fill, sc->sc_aefw_uof.qafu_imgs[0].qui_image->ui_fill_pattern,
	    sizeof(uint64_t));

	upaddr = qup->qup_beg_paddr;
	ninst = qup->qup_num_micro_words;
	while (ninst > 0) {
		cpylen = min(ninst, UWORD_CPYBUF_SIZE);

		/* load the buffer */
		for (i = 0; i < cpylen; i++) {
			/* keep below code structure in case there are
			 * different handling for shared secnarios */
			if (!qae->qae_shareable_ustore) {
				/* qat_aefw_get_uof_inst() takes an address that
				 * is relative to the start of the page.
				 * So we don't need to add in the physical
				 * offset of the page. */
				if (qup->qup_page_region != 0) {
					/* XXX */
					device_printf(sc->sc_dev,
					    "region != 0 is not supported\n");
					qat_free_mem(ucode_cpybuf);
					return ENOTSUP;
				} else {
					/* for mixing case, it should take
					 * physical address */
					ucode_cpybuf[i] = qat_aefw_get_uof_inst(
					    sc, qup, upaddr + i);
					if (ucode_cpybuf[i] == INVLD_UWORD) {
					    /* fill hole in the uof */
					    ucode_cpybuf[i] = fill;
					}
				}
			} else {
				/* XXX */
				qat_free_mem(ucode_cpybuf);
				return ENOTSUP;
			}
		}

		/* copy the buffer to ustore */
		if (!qae->qae_shareable_ustore) {
			error = qat_ae_ucode_write(sc, ae, upaddr, cpylen,
			    ucode_cpybuf);
			if (error)
				return error;
		} else {
			/* XXX */
			qat_free_mem(ucode_cpybuf);
			return ENOTSUP;
		}
		upaddr += cpylen;
		ninst -= cpylen;
	}

	qat_free_mem(ucode_cpybuf);

	return 0;
}

static int
qat_aefw_uof_write_one(struct qat_softc *sc, struct qat_uof_image *qui)
{
	struct uof_image *ui = qui->qui_image;
	struct qat_ae_page *qap;
	u_int s, p, c;
	int error;
	u_char ae, ctx_mask;

	if (__SHIFTOUT(ui->ui_ae_mode, AE_MODE_CTX_MODE) == MAX_AE_CTX)
		ctx_mask = 0xff; /* 8-ctx mode */
	else
		ctx_mask = 0x55; /* 4-ctx mode */

	/* load the default page and set assigned CTX PC
	 * to the entrypoint address */
	for (ae = 0; ae < sc->sc_ae_num; ae++) {
		struct qat_ae *qae = &(QAT_AE(sc, ae));
		struct qat_ae_slice *qas;
		u_int metadata;

		MPASS(ae < UOF_MAX_NUM_OF_AE);

		if ((ui->ui_ae_assigned & (1 << ae)) == 0)
			continue;

		/* find the slice to which this image is assigned */
		for (s = 0; s < qae->qae_num_slices; s++) {
			qas = &qae->qae_slices[s];
			if (ui->ui_ctx_assigned & qas->qas_assigned_ctx_mask)
				break;
		}
		if (s >= qae->qae_num_slices)
			continue;

		qas = &qae->qae_slices[s];

		for (p = 0; p < ui->ui_num_pages; p++) {
			qap = &qas->qas_pages[p];

			/* Only load pages loaded by default */
			if (!qap->qap_page->qup_def_page)
				continue;

			error = qat_aefw_do_pagein(sc, ae, qap->qap_page);
			if (error)
				return error;
		}

		metadata = qas->qas_image->qui_image->ui_app_metadata;
		if (metadata != 0xffffffff && bootverbose) {
			device_printf(sc->sc_dev,
			    "loaded firmware: %s\n",
			    qat_aefw_uof_string(sc, metadata));
		}

		/* Assume starting page is page 0 */
		qap = &qas->qas_pages[0];
		for (c = 0; c < MAX_AE_CTX; c++) {
			if (ctx_mask & (1 << c))
				qas->qas_cur_pages[c] = qap;
			else
				qas->qas_cur_pages[c] = NULL;
		}

		/* set the live context */
		qae->qae_live_ctx_mask = ui->ui_ctx_assigned;

		/* set context PC to the image entrypoint address */
		error = qat_ae_write_pc(sc, ae, ui->ui_ctx_assigned,
		    ui->ui_entry_address);
		if (error)
			return error;
	}

	/* XXX store the checksum for convenience */

	return 0;
}

static int
qat_aefw_uof_write(struct qat_softc *sc)
{
	int error = 0;
	int i;

	error = qat_aefw_init_globals(sc);
	if (error) {
		device_printf(sc->sc_dev,
		    "Could not initialize globals\n");
		return error;
	}

	for (i = 0; i < sc->sc_aefw_uof.qafu_num_imgs; i++) {
		error = qat_aefw_uof_write_one(sc,
		    &sc->sc_aefw_uof.qafu_imgs[i]);
		if (error)
			break;
	}

	/* XXX UcLo_computeFreeUstore */

	return error;
}