/*-
 * Copyright (c) 2016-2017 Alexander Motin <mav@FreeBSD.org>
 * Copyright (C) 2013 Intel Corporation
 * Copyright (C) 2015 EMC Corporation
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * The Non-Transparent Bridge (NTB) is a device that allows you to connect
 * two or more systems using a PCI-e links, providing remote memory access.
 *
 * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
 *
 * NOTE: Much of the code in this module is shared with Linux. Any patches may
 * be picked up and redistributed in Linux with a dual GPL/BSD license.
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/endian.h>
#include <sys/interrupt.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/pciio.h>
#include <sys/taskqueue.h>
#include <sys/tree.h>
#include <sys/queue.h>
#include <sys/rman.h>
#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/bus.h>
#include <machine/intr_machdep.h>
#include <machine/resource.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <dev/iommu/iommu.h>

#include "ntb_hw_intel.h"
#include "../ntb.h"

#define MAX_MSIX_INTERRUPTS	\
	MAX(MAX(XEON_DB_COUNT, ATOM_DB_COUNT), XEON_GEN3_DB_COUNT)

#define NTB_HB_TIMEOUT		1 /* second */
#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
#define BAR_HIGH_MASK		(~((1ull << 12) - 1))

#define	NTB_MSIX_VER_GUARD	0xaabbccdd
#define	NTB_MSIX_RECEIVED	0xe0f0e0f0

/*
 * PCI constants could be somewhere more generic, but aren't defined/used in
 * pci.c.
 */
#define	PCI_MSIX_ENTRY_SIZE		16
#define	PCI_MSIX_ENTRY_LOWER_ADDR	0
#define	PCI_MSIX_ENTRY_UPPER_ADDR	4
#define	PCI_MSIX_ENTRY_DATA		8

enum ntb_device_type {
	NTB_XEON_GEN1,
	NTB_XEON_GEN3,
	NTB_ATOM
};

/* ntb_conn_type are hardware numbers, cannot change. */
enum ntb_conn_type {
	NTB_CONN_TRANSPARENT = 0,
	NTB_CONN_B2B = 1,
	NTB_CONN_RP = 2,
};

enum ntb_b2b_direction {
	NTB_DEV_USD = 0,
	NTB_DEV_DSD = 1,
};

enum ntb_bar {
	NTB_CONFIG_BAR = 0,
	NTB_B2B_BAR_1,
	NTB_B2B_BAR_2,
	NTB_B2B_BAR_3,
	NTB_MAX_BARS
};

enum {
	NTB_MSIX_GUARD = 0,
	NTB_MSIX_DATA0,
	NTB_MSIX_DATA1,
	NTB_MSIX_DATA2,
	NTB_MSIX_OFS0,
	NTB_MSIX_OFS1,
	NTB_MSIX_OFS2,
	NTB_MSIX_DONE,
	NTB_MAX_MSIX_SPAD
};

/* Device features and workarounds */
#define HAS_FEATURE(ntb, feature)	\
	(((ntb)->features & (feature)) != 0)

struct ntb_hw_info {
	uint32_t		device_id;
	const char		*desc;
	enum ntb_device_type	type;
	uint32_t		features;
};

struct ntb_pci_bar_info {
	bus_space_tag_t		pci_bus_tag;
	bus_space_handle_t	pci_bus_handle;
	int			pci_resource_id;
	struct resource		*pci_resource;
	vm_paddr_t		pbase;
	caddr_t			vbase;
	vm_size_t		size;
	vm_memattr_t		map_mode;

	/* Configuration register offsets */
	uint32_t		psz_off;
	uint32_t		ssz_off;
	uint32_t		pbarxlat_off;
};

struct ntb_int_info {
	struct resource	*res;
	int		rid;
	void		*tag;
};

struct ntb_vec {
	struct ntb_softc	*ntb;
	uint32_t		num;
	unsigned		masked;
};

struct ntb_reg {
	uint32_t	ntb_ctl;
	uint32_t	lnk_sta;
	uint8_t		db_size;
	unsigned	mw_bar[NTB_MAX_BARS];
};

struct ntb_alt_reg {
	uint32_t	db_bell;
	uint32_t	db_mask;
	uint32_t	spad;
};

struct ntb_xlat_reg {
	uint32_t	bar0_base;
	uint32_t	bar2_base;
	uint32_t	bar4_base;
	uint32_t	bar5_base;

	uint32_t	bar2_xlat;
	uint32_t	bar4_xlat;
	uint32_t	bar5_xlat;

	uint32_t	bar2_limit;
	uint32_t	bar4_limit;
	uint32_t	bar5_limit;
};

struct ntb_b2b_addr {
	uint64_t	bar0_addr;
	uint64_t	bar2_addr64;
	uint64_t	bar4_addr64;
	uint64_t	bar4_addr32;
	uint64_t	bar5_addr32;
};

struct ntb_msix_data {
	uint32_t	nmd_ofs;
	uint32_t	nmd_data;
};

struct ntb_softc {
	/* ntb.c context. Do not move! Must go first! */
	void			*ntb_store;

	device_t		device;
	enum ntb_device_type	type;
	uint32_t		features;

	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
	uint32_t		allocated_interrupts;

	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
	bool			peer_msix_good;
	bool			peer_msix_done;
	struct ntb_pci_bar_info	*peer_lapic_bar;
	struct callout		peer_msix_work;

	bus_dma_tag_t		bar0_dma_tag;
	bus_dmamap_t		bar0_dma_map;

	struct callout		heartbeat_timer;
	struct callout		lr_timer;

	struct ntb_vec		*msix_vec;

	uint32_t		ppd;
	enum ntb_conn_type	conn_type;
	enum ntb_b2b_direction	dev_type;

	/* Offset of peer bar0 in B2B BAR */
	uint64_t			b2b_off;
	/* Memory window used to access peer bar0 */
#define B2B_MW_DISABLED			UINT8_MAX
	uint8_t				b2b_mw_idx;
	uint32_t			msix_xlat;
	uint8_t				msix_mw_idx;

	uint8_t				mw_count;
	uint8_t				spad_count;
	uint8_t				db_count;
	uint8_t				db_vec_count;
	uint8_t				db_vec_shift;

	/* Protects local db_mask. */
#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
	struct mtx			db_mask_lock;

	volatile uint32_t		ntb_ctl;
	volatile uint32_t		lnk_sta;

	uint64_t			db_valid_mask;
	uint64_t			db_link_mask;
	uint64_t			db_mask;
	uint64_t			fake_db;	/* NTB_SB01BASE_LOCKUP*/
	uint64_t			force_db;	/* NTB_SB01BASE_LOCKUP*/

	int				last_ts;	/* ticks @ last irq */

	const struct ntb_reg		*reg;
	const struct ntb_alt_reg	*self_reg;
	const struct ntb_alt_reg	*peer_reg;
	const struct ntb_xlat_reg	*xlat_reg;
};

#ifdef __i386__
static __inline uint64_t
bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
    bus_size_t offset)
{

	return (bus_space_read_4(tag, handle, offset) |
	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
}

static __inline void
bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
    bus_size_t offset, uint64_t val)
{

	bus_space_write_4(tag, handle, offset, val);
	bus_space_write_4(tag, handle, offset + 4, val >> 32);
}
#endif

#define intel_ntb_bar_read(SIZE, bar, offset) \
	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
#define intel_ntb_bar_write(SIZE, bar, offset, val) \
	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
#define intel_ntb_reg_read(SIZE, offset) \
	    intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
#define intel_ntb_reg_write(SIZE, offset, val) \
	    intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
#define intel_ntb_mw_read(SIZE, offset) \
	    intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
		offset)
#define intel_ntb_mw_write(SIZE, offset, val) \
	    intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
		offset, val)

static int intel_ntb_probe(device_t device);
static int intel_ntb_attach(device_t device);
static int intel_ntb_detach(device_t device);
static uint64_t intel_ntb_db_valid_mask(device_t dev);
static void intel_ntb_spad_clear(device_t dev);
static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
    enum ntb_width *width);
static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
    enum ntb_width width);
static int intel_ntb_link_disable(device_t dev);
static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);

static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
    vm_memattr_t);
static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
    const char *);
static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
static int map_memory_window_bar(struct ntb_softc *ntb,
    struct ntb_pci_bar_info *bar);
static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
static int intel_ntb_init_isr(struct ntb_softc *ntb);
static int intel_ntb_xeon_gen3_init_isr(struct ntb_softc *ntb);
static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
static void ndev_vec_isr(void *arg);
static void ndev_irq_isr(void *arg);
static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
static void intel_ntb_exchange_msix(void *);
static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
static int intel_ntb_detect_xeon_gen3(struct ntb_softc *ntb);
static int intel_ntb_detect_atom(struct ntb_softc *ntb);
static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
static int intel_ntb_xeon_gen3_init_dev(struct ntb_softc *ntb);
static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
    enum ntb_bar regbar);
static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
    enum ntb_bar idx);
static int xeon_setup_b2b_mw(struct ntb_softc *,
    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
static int xeon_gen3_setup_b2b_mw(struct ntb_softc *);
static int intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr,
    size_t size);
static inline bool link_is_up(struct ntb_softc *ntb);
static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
static inline bool atom_link_is_err(struct ntb_softc *ntb);
static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
static void atom_link_hb(void *arg);
static void recover_atom_link(void *arg);
static bool intel_ntb_poll_link(struct ntb_softc *ntb);
static void save_bar_parameters(struct ntb_pci_bar_info *bar);
static void intel_ntb_sysctl_init(struct ntb_softc *);
static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);

static unsigned g_ntb_hw_debug_level;
SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
    &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
#define intel_ntb_printf(lvl, ...) do {				\
	if ((lvl) <= g_ntb_hw_debug_level) {			\
		device_printf(ntb->device, __VA_ARGS__);	\
	}							\
} while (0)

#define	_NTB_PAT_UC	0
#define	_NTB_PAT_WC	1
#define	_NTB_PAT_WT	4
#define	_NTB_PAT_WP	5
#define	_NTB_PAT_WB	6
#define	_NTB_PAT_UCM	7
static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
    &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
    "UC: "  __XSTRING(_NTB_PAT_UC) ", "
    "WC: "  __XSTRING(_NTB_PAT_WC) ", "
    "WT: "  __XSTRING(_NTB_PAT_WT) ", "
    "WP: "  __XSTRING(_NTB_PAT_WP) ", "
    "WB: "  __XSTRING(_NTB_PAT_WB) ", "
    "UC-: " __XSTRING(_NTB_PAT_UCM));

static inline vm_memattr_t
intel_ntb_pat_flags(void)
{

	switch (g_ntb_mw_pat) {
	case _NTB_PAT_WC:
		return (VM_MEMATTR_WRITE_COMBINING);
	case _NTB_PAT_WT:
		return (VM_MEMATTR_WRITE_THROUGH);
	case _NTB_PAT_WP:
		return (VM_MEMATTR_WRITE_PROTECTED);
	case _NTB_PAT_WB:
		return (VM_MEMATTR_WRITE_BACK);
	case _NTB_PAT_UCM:
		return (VM_MEMATTR_WEAK_UNCACHEABLE);
	case _NTB_PAT_UC:
		/* FALLTHROUGH */
	default:
		return (VM_MEMATTR_UNCACHEABLE);
	}
}

/*
 * Well, this obviously doesn't belong here, but it doesn't seem to exist
 * anywhere better yet.
 */
static inline const char *
intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
{

	switch (pat) {
	case VM_MEMATTR_WRITE_COMBINING:
		return ("WRITE_COMBINING");
	case VM_MEMATTR_WRITE_THROUGH:
		return ("WRITE_THROUGH");
	case VM_MEMATTR_WRITE_PROTECTED:
		return ("WRITE_PROTECTED");
	case VM_MEMATTR_WRITE_BACK:
		return ("WRITE_BACK");
	case VM_MEMATTR_WEAK_UNCACHEABLE:
		return ("UNCACHED");
	case VM_MEMATTR_UNCACHEABLE:
		return ("UNCACHEABLE");
	default:
		return ("UNKNOWN");
	}
}

static int g_ntb_msix_idx = 1;
SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
    0, "Use this memory window to access the peer MSIX message complex on "
    "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
    "Like b2b_mw_idx, negative values index from the last available memory "
    "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");

static int g_ntb_mw_idx = -1;
SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
    0, "Use this memory window to access the peer NTB registers.  A "
    "non-negative value starts from the first MW index; a negative value "
    "starts from the last MW index.  The default is -1, i.e., the last "
    "available memory window.  Both sides of the NTB MUST set the same "
    "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");

/* Hardware owns the low 16 bits of features. */
#define NTB_BAR_SIZE_4K		(1 << 0)
#define NTB_SDOORBELL_LOCKUP	(1 << 1)
#define NTB_SB01BASE_LOCKUP	(1 << 2)
#define NTB_B2BDOORBELL_BIT14	(1 << 3)
/* Software/configuration owns the top 16 bits. */
#define NTB_SPLIT_BAR		(1ull << 16)
#define NTB_ONE_MSIX		(1ull << 17)

#define NTB_FEATURES_STR \
    "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
    "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"

static struct ntb_hw_info pci_ids[] = {
	/* XXX: PS/SS IDs left out until they are supported. */
	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
		NTB_ATOM, 0 },

	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B",
		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B",
		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
		    NTB_SB01BASE_LOCKUP },
	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B",
		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
		    NTB_SB01BASE_LOCKUP },

	{ 0x201C8086, "SKL Xeon E5 V5 Non-Transparent Bridge B2B",
		NTB_XEON_GEN3, 0 },
};

static const struct ntb_reg atom_reg = {
	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
	.db_size = sizeof(uint64_t),
	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
};

static const struct ntb_alt_reg atom_pri_reg = {
	.db_bell = ATOM_PDOORBELL_OFFSET,
	.db_mask = ATOM_PDBMSK_OFFSET,
	.spad = ATOM_SPAD_OFFSET,
};

static const struct ntb_alt_reg atom_b2b_reg = {
	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
	.spad = ATOM_B2B_SPAD_OFFSET,
};

static const struct ntb_xlat_reg atom_sec_xlat = {
#if 0
	/* "FIXME" says the Linux driver. */
	.bar0_base = ATOM_SBAR0BASE_OFFSET,
	.bar2_base = ATOM_SBAR2BASE_OFFSET,
	.bar4_base = ATOM_SBAR4BASE_OFFSET,

	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
#endif

	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
};

static const struct ntb_reg xeon_reg = {
	.ntb_ctl = XEON_NTBCNTL_OFFSET,
	.lnk_sta = XEON_LINK_STATUS_OFFSET,
	.db_size = sizeof(uint16_t),
	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
};

static const struct ntb_alt_reg xeon_pri_reg = {
	.db_bell = XEON_PDOORBELL_OFFSET,
	.db_mask = XEON_PDBMSK_OFFSET,
	.spad = XEON_SPAD_OFFSET,
};

static const struct ntb_alt_reg xeon_b2b_reg = {
	.db_bell = XEON_B2B_DOORBELL_OFFSET,
	.spad = XEON_B2B_SPAD_OFFSET,
};

static const struct ntb_xlat_reg xeon_sec_xlat = {
	.bar0_base = XEON_SBAR0BASE_OFFSET,
	.bar2_base = XEON_SBAR2BASE_OFFSET,
	.bar4_base = XEON_SBAR4BASE_OFFSET,
	.bar5_base = XEON_SBAR5BASE_OFFSET,

	.bar2_limit = XEON_SBAR2LMT_OFFSET,
	.bar4_limit = XEON_SBAR4LMT_OFFSET,
	.bar5_limit = XEON_SBAR5LMT_OFFSET,

	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
};

static struct ntb_b2b_addr xeon_b2b_usd_addr = {
	.bar0_addr = XEON_B2B_BAR0_ADDR,
	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
};

static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
	.bar0_addr = XEON_B2B_BAR0_ADDR,
	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
};

static const struct ntb_reg xeon_gen3_reg = {
	.ntb_ctl = XEON_GEN3_REG_IMNTB_CTRL,
	.lnk_sta = XEON_GEN3_INT_LNK_STS_OFFSET,
	.db_size = sizeof(uint32_t),
	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
};

static const struct ntb_alt_reg xeon_gen3_pri_reg = {
	.db_bell = XEON_GEN3_REG_EMDOORBELL,
	.db_mask = XEON_GEN3_REG_IMINT_DISABLE,
	.spad = XEON_GEN3_REG_IMSPAD,
};

static const struct ntb_alt_reg xeon_gen3_b2b_reg = {
	.db_bell = XEON_GEN3_REG_IMDOORBELL,
	.db_mask = XEON_GEN3_REG_EMINT_DISABLE,
	.spad = XEON_GEN3_REG_IMB2B_SSPAD,
};

static const struct ntb_xlat_reg xeon_gen3_sec_xlat = {
	.bar0_base = XEON_GEN3_EXT_REG_BAR0BASE,
	.bar2_base = XEON_GEN3_EXT_REG_BAR1BASE,
	.bar4_base = XEON_GEN3_EXT_REG_BAR2BASE,

	.bar2_limit = XEON_GEN3_REG_IMBAR1XLIMIT,
	.bar4_limit = XEON_GEN3_REG_IMBAR2XLIMIT,

	.bar2_xlat = XEON_GEN3_REG_IMBAR1XBASE,
	.bar4_xlat = XEON_GEN3_REG_IMBAR2XBASE,
};

SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
    "B2B MW segment overrides -- MUST be the same on both sides");

SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
    "hardware, use this 64-bit address on the bus between the NTB devices for "
    "the window at BAR2, on the upstream side of the link.  MUST be the same "
    "address on both sides.");
SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
    "(split-BAR mode).");
SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
    "(split-BAR mode).");

SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
    "hardware, use this 64-bit address on the bus between the NTB devices for "
    "the window at BAR2, on the downstream side of the link.  MUST be the same"
    " address on both sides.");
SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
    "(split-BAR mode).");
SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
    "(split-BAR mode).");

/*
 * OS <-> Driver interface structures
 */
MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");

/*
 * OS <-> Driver linkage functions
 */
static int
intel_ntb_probe(device_t device)
{
	struct ntb_hw_info *p;

	p = intel_ntb_get_device_info(pci_get_devid(device));
	if (p == NULL)
		return (ENXIO);

	device_set_desc(device, p->desc);
	return (0);
}

static int
intel_ntb_attach(device_t device)
{
	struct ntb_softc *ntb;
	struct ntb_hw_info *p;
	int error;

	ntb = device_get_softc(device);
	p = intel_ntb_get_device_info(pci_get_devid(device));

	ntb->device = device;
	ntb->type = p->type;
	ntb->features = p->features;
	ntb->b2b_mw_idx = B2B_MW_DISABLED;
	ntb->msix_mw_idx = B2B_MW_DISABLED;

	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
	callout_init(&ntb->heartbeat_timer, 1);
	callout_init(&ntb->lr_timer, 1);
	callout_init(&ntb->peer_msix_work, 1);
	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);

	if (ntb->type == NTB_ATOM)
		error = intel_ntb_detect_atom(ntb);
	else if (ntb->type == NTB_XEON_GEN3)
		error = intel_ntb_detect_xeon_gen3(ntb);
	else
		error = intel_ntb_detect_xeon(ntb);
	if (error != 0)
		goto out;

	intel_ntb_detect_max_mw(ntb);

	pci_enable_busmaster(ntb->device);

	error = intel_ntb_map_pci_bars(ntb);
	if (error != 0)
		goto out;
	if (ntb->type == NTB_ATOM)
		error = intel_ntb_atom_init_dev(ntb);
	else if (ntb->type == NTB_XEON_GEN3)
		error = intel_ntb_xeon_gen3_init_dev(ntb);
	else
		error = intel_ntb_xeon_init_dev(ntb);
	if (error != 0)
		goto out;

	intel_ntb_spad_clear(device);

	intel_ntb_poll_link(ntb);

	intel_ntb_sysctl_init(ntb);

	/* Attach children to this controller */
	error = ntb_register_device(device);

out:
	if (error != 0)
		intel_ntb_detach(device);
	return (error);
}

static int
intel_ntb_detach(device_t device)
{
	struct ntb_softc *ntb;

	ntb = device_get_softc(device);

	/* Detach & delete all children */
	ntb_unregister_device(device);

	if (ntb->self_reg != NULL) {
		DB_MASK_LOCK(ntb);
		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
		DB_MASK_UNLOCK(ntb);
	}
	callout_drain(&ntb->heartbeat_timer);
	callout_drain(&ntb->lr_timer);
	callout_drain(&ntb->peer_msix_work);
	pci_disable_busmaster(ntb->device);
	if (ntb->type == NTB_XEON_GEN1)
		intel_ntb_teardown_xeon(ntb);
	intel_ntb_teardown_interrupts(ntb);

	mtx_destroy(&ntb->db_mask_lock);

	intel_ntb_unmap_pci_bar(ntb);

	return (0);
}

/*
 * Driver internal routines
 */
static inline enum ntb_bar
intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
{

	KASSERT(mw < ntb->mw_count,
	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));

	return (ntb->reg->mw_bar[mw]);
}

static inline bool
bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
{
	/* XXX This assertion could be stronger. */
	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
}

static inline void
bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
    uint32_t *xlat, uint32_t *lmt)
{
	uint32_t basev, lmtv, xlatv;

	switch (bar) {
	case NTB_B2B_BAR_1:
		basev = ntb->xlat_reg->bar2_base;
		lmtv = ntb->xlat_reg->bar2_limit;
		xlatv = ntb->xlat_reg->bar2_xlat;
		break;
	case NTB_B2B_BAR_2:
		basev = ntb->xlat_reg->bar4_base;
		lmtv = ntb->xlat_reg->bar4_limit;
		xlatv = ntb->xlat_reg->bar4_xlat;
		break;
	case NTB_B2B_BAR_3:
		basev = ntb->xlat_reg->bar5_base;
		lmtv = ntb->xlat_reg->bar5_limit;
		xlatv = ntb->xlat_reg->bar5_xlat;
		break;
	default:
		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
		    ("bad bar"));
		basev = lmtv = xlatv = 0;
		break;
	}

	if (base != NULL)
		*base = basev;
	if (xlat != NULL)
		*xlat = xlatv;
	if (lmt != NULL)
		*lmt = lmtv;
}

static int
intel_ntb_map_pci_bars(struct ntb_softc *ntb)
{
	struct ntb_pci_bar_info *bar;
	int rc;

	bar = &ntb->bar_info[NTB_CONFIG_BAR];
	bar->pci_resource_id = PCIR_BAR(0);
	rc = map_mmr_bar(ntb, bar);
	if (rc != 0)
		goto out;

	/*
	 * At least on Xeon v4 NTB device leaks to host some remote side
	 * BAR0 writes supposed to update scratchpad registers.  I am not
	 * sure why it happens, but it may be related to the fact that
	 * on a link side BAR0 is 32KB, while on a host side it is 64KB.
	 * Without this hack DMAR blocks those accesses as not allowed.
	 */
	if (bus_dma_tag_create(bus_get_dma_tag(ntb->device), 1, 0,
	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
	    bar->size, 1, bar->size, 0, NULL, NULL, &ntb->bar0_dma_tag)) {
		device_printf(ntb->device, "Unable to create BAR0 tag\n");
		return (ENOMEM);
	}
	if (bus_dmamap_create(ntb->bar0_dma_tag, 0, &ntb->bar0_dma_map)) {
		device_printf(ntb->device, "Unable to create BAR0 map\n");
		return (ENOMEM);
	}
	if (bus_dma_iommu_load_ident(ntb->bar0_dma_tag, ntb->bar0_dma_map,
	    bar->pbase, bar->size, 0)) {
		device_printf(ntb->device, "Unable to load BAR0 map\n");
		return (ENOMEM);
	}

	bar = &ntb->bar_info[NTB_B2B_BAR_1];
	bar->pci_resource_id = PCIR_BAR(2);
	rc = map_memory_window_bar(ntb, bar);
	if (rc != 0)
		goto out;
	if (ntb->type == NTB_XEON_GEN3) {
		bar->psz_off = XEON_GEN3_INT_REG_IMBAR1SZ;
		bar->ssz_off = XEON_GEN3_INT_REG_EMBAR1SZ;
		bar->pbarxlat_off = XEON_GEN3_REG_EMBAR1XBASE;
	} else {
		bar->psz_off = XEON_PBAR23SZ_OFFSET;
		bar->ssz_off = XEON_SBAR23SZ_OFFSET;
		bar->pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
	}

	bar = &ntb->bar_info[NTB_B2B_BAR_2];
	bar->pci_resource_id = PCIR_BAR(4);
	rc = map_memory_window_bar(ntb, bar);
	if (rc != 0)
		goto out;
	if (ntb->type == NTB_XEON_GEN3) {
		bar->psz_off = XEON_GEN3_INT_REG_IMBAR2SZ;
		bar->ssz_off = XEON_GEN3_INT_REG_EMBAR2SZ;
		bar->pbarxlat_off = XEON_GEN3_REG_EMBAR2XBASE;
	} else {
		bar->psz_off = XEON_PBAR4SZ_OFFSET;
		bar->ssz_off = XEON_SBAR4SZ_OFFSET;
		bar->pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
	}

	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
		goto out;

	if (ntb->type == NTB_XEON_GEN3) {
		device_printf(ntb->device, "no split bar support\n");
		return (ENXIO);
	}

	bar = &ntb->bar_info[NTB_B2B_BAR_3];
	bar->pci_resource_id = PCIR_BAR(5);
	rc = map_memory_window_bar(ntb, bar);
	bar->psz_off = XEON_PBAR5SZ_OFFSET;
	bar->ssz_off = XEON_SBAR5SZ_OFFSET;
	bar->pbarxlat_off = XEON_PBAR5XLAT_OFFSET;

out:
	if (rc != 0)
		device_printf(ntb->device,
		    "unable to allocate pci resource\n");
	return (rc);
}

static void
print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
    const char *kind)
{

	device_printf(ntb->device,
	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
	    (char *)bar->vbase + bar->size - 1,
	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
	    (uintmax_t)bar->size, kind);
}

static int
map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
{

	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
	    &bar->pci_resource_id, RF_ACTIVE);
	if (bar->pci_resource == NULL)
		return (ENXIO);

	save_bar_parameters(bar);
	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
	print_map_success(ntb, bar, "mmr");
	return (0);
}

static int
map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
{
	int rc;
	vm_memattr_t mapmode;
	uint8_t bar_size_bits = 0;

	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
	    &bar->pci_resource_id, RF_ACTIVE);

	if (bar->pci_resource == NULL)
		return (ENXIO);

	save_bar_parameters(bar);
	/*
	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
	 * hardware issue. To work around this, query the size it should be
	 * configured to by the device and modify the resource to correspond to
	 * this new size. The BIOS on systems with this problem is required to
	 * provide enough address space to allow the driver to make this change
	 * safely.
	 *
	 * Ideally I could have just specified the size when I allocated the
	 * resource like:
	 *  bus_alloc_resource(ntb->device,
	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
	 *	1ul << bar_size_bits, RF_ACTIVE);
	 * but the PCI driver does not honor the size in this call, so we have
	 * to modify it after the fact.
	 */
	if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
		if (bar->pci_resource_id == PCIR_BAR(2))
			bar_size_bits = pci_read_config(ntb->device,
			    XEON_PBAR23SZ_OFFSET, 1);
		else
			bar_size_bits = pci_read_config(ntb->device,
			    XEON_PBAR45SZ_OFFSET, 1);

		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
		    bar->pci_resource, bar->pbase,
		    bar->pbase + (1ul << bar_size_bits) - 1);
		if (rc != 0) {
			device_printf(ntb->device,
			    "unable to resize bar\n");
			return (rc);
		}

		save_bar_parameters(bar);
	}

	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
	print_map_success(ntb, bar, "mw");

	/*
	 * Optionally, mark MW BARs as anything other than UC to improve
	 * performance.
	 */
	mapmode = intel_ntb_pat_flags();
	if (mapmode == bar->map_mode)
		return (0);

	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
	if (rc == 0) {
		bar->map_mode = mapmode;
		device_printf(ntb->device,
		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
		    "%s.\n",
		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
		    (char *)bar->vbase + bar->size - 1,
		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
		    intel_ntb_vm_memattr_to_str(mapmode));
	} else
		device_printf(ntb->device,
		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
		    "%s: %d\n",
		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
		    (char *)bar->vbase + bar->size - 1,
		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
		    intel_ntb_vm_memattr_to_str(mapmode), rc);
		/* Proceed anyway */
	return (0);
}

static void
intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
{
	struct ntb_pci_bar_info *bar;
	int i;

	if (ntb->bar0_dma_map != NULL) {
		bus_dmamap_unload(ntb->bar0_dma_tag, ntb->bar0_dma_map);
		bus_dmamap_destroy(ntb->bar0_dma_tag, ntb->bar0_dma_map);
	}
	if (ntb->bar0_dma_tag != NULL)
		bus_dma_tag_destroy(ntb->bar0_dma_tag);
	for (i = 0; i < NTB_MAX_BARS; i++) {
		bar = &ntb->bar_info[i];
		if (bar->pci_resource != NULL)
			bus_release_resource(ntb->device, SYS_RES_MEMORY,
			    bar->pci_resource_id, bar->pci_resource);
	}
}

static int
intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
{
	uint32_t i;
	int rc;

	for (i = 0; i < num_vectors; i++) {
		ntb->int_info[i].rid = i + 1;
		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
		if (ntb->int_info[i].res == NULL) {
			device_printf(ntb->device,
			    "bus_alloc_resource failed\n");
			return (ENOMEM);
		}
		ntb->int_info[i].tag = NULL;
		ntb->allocated_interrupts++;
		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
		if (rc != 0) {
			device_printf(ntb->device, "bus_setup_intr failed\n");
			return (ENXIO);
		}
	}
	return (0);
}

/*
 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
 * should be okay.  This tunable should enable us to test that hypothesis
 * when someone gets their hands on some Xeon hardware.
 */
static int ntb_force_remap_mode;
SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
    " to a smaller number of ithreads, even if the desired number are "
    "available");

/*
 * In case it is NOT ok, give consumers an abort button.
 */
static int ntb_prefer_intx;
SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
    "than remapping MSI-X messages over available slots (match Linux driver "
    "behavior)");

/*
 * Remap the desired number of MSI-X messages to available ithreads in a simple
 * round-robin fashion.
 */
static int
intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
{
	u_int *vectors;
	uint32_t i;
	int rc;

	if (ntb_prefer_intx != 0)
		return (ENXIO);

	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);

	for (i = 0; i < desired; i++)
		vectors[i] = (i % avail) + 1;

	rc = pci_remap_msix(dev, desired, vectors);
	free(vectors, M_NTB);
	return (rc);
}

static int
intel_ntb_xeon_gen3_init_isr(struct ntb_softc *ntb)
{
	uint64_t i, reg;
	uint32_t desired_vectors, num_vectors;
	int rc;

	ntb->allocated_interrupts = 0;
	ntb->last_ts = ticks;

	/* Mask all the interrupts, including hardware interrupt */
	intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_DISABLE, ~0ULL);

	/* Clear Interrupt Status */
	reg = intel_ntb_reg_read(8, XEON_GEN3_REG_IMINT_STATUS);
	intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_STATUS, reg);

	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
	    XEON_GEN3_DB_MSIX_VECTOR_COUNT);

	rc = pci_alloc_msix(ntb->device, &num_vectors);
	if (rc != 0) {
		device_printf(ntb->device,
		    "Interrupt allocation failed %d\n", rc);
		return (rc);
	}
	if (desired_vectors != num_vectors) {
		device_printf(ntb->device, "Couldn't get %d vectors\n",
		    XEON_GEN3_DB_MSIX_VECTOR_COUNT);
		return (ENXIO);
	}
	/* 32 db + 1 hardware */
	if (num_vectors == XEON_GEN3_DB_MSIX_VECTOR_COUNT) {
		/* Program INTVECXX source register */
		for (i = 0; i < XEON_GEN3_DB_MSIX_VECTOR_COUNT; i++) {
			/* interrupt source i for vector i */
			intel_ntb_reg_write(1, XEON_GEN3_REG_IMINTVEC00 + i, i);
			if (i == (XEON_GEN3_DB_MSIX_VECTOR_COUNT - 1)) {
				intel_ntb_reg_write(1,
				    XEON_GEN3_REG_IMINTVEC00 + i,
				    XEON_GEN3_LINK_VECTOR_INDEX);
			}
		}

		intel_ntb_create_msix_vec(ntb, num_vectors);
		rc = intel_ntb_setup_msix(ntb, num_vectors);

		/* enable all interrupts */
		intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_DISABLE, 0ULL);
	} else {
		device_printf(ntb->device, "need to remap interrupts, giving up.\n");
		return (ENXIO);
	}

	return (0);
}

static int
intel_ntb_init_isr(struct ntb_softc *ntb)
{
	uint32_t desired_vectors, num_vectors;
	int rc;

	ntb->allocated_interrupts = 0;
	ntb->last_ts = ticks;

	/*
	 * Mask all doorbell interrupts.  (Except link events!)
	 */
	DB_MASK_LOCK(ntb);
	ntb->db_mask = ntb->db_valid_mask;
	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
	DB_MASK_UNLOCK(ntb);

	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
	    ntb->db_count);
	if (desired_vectors >= 1) {
		rc = pci_alloc_msix(ntb->device, &num_vectors);

		if (ntb_force_remap_mode != 0 && rc == 0 &&
		    num_vectors == desired_vectors)
			num_vectors--;

		if (rc == 0 && num_vectors < desired_vectors) {
			rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
			    num_vectors);
			if (rc == 0)
				num_vectors = desired_vectors;
			else
				pci_release_msi(ntb->device);
		}
		if (rc != 0)
			num_vectors = 1;
	} else
		num_vectors = 1;

	if (ntb->type == NTB_XEON_GEN1 && num_vectors < ntb->db_vec_count) {
		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
			device_printf(ntb->device,
			    "Errata workaround does not support MSI or INTX\n");
			return (EINVAL);
		}

		ntb->db_vec_count = 1;
		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
		rc = intel_ntb_setup_legacy_interrupt(ntb);
	} else {
		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
		    HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
			device_printf(ntb->device,
			    "Errata workaround expects %d doorbell bits\n",
			    XEON_NONLINK_DB_MSIX_BITS);
			return (EINVAL);
		}

		intel_ntb_create_msix_vec(ntb, num_vectors);
		rc = intel_ntb_setup_msix(ntb, num_vectors);
	}
	if (rc != 0) {
		device_printf(ntb->device,
		    "Error allocating interrupts: %d\n", rc);
		intel_ntb_free_msix_vec(ntb);
	}

	return (rc);
}

static int
intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
{
	int rc;

	ntb->int_info[0].rid = 0;
	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
	if (ntb->int_info[0].res == NULL) {
		device_printf(ntb->device, "bus_alloc_resource failed\n");
		return (ENOMEM);
	}

	ntb->int_info[0].tag = NULL;
	ntb->allocated_interrupts = 1;

	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
	    ntb, &ntb->int_info[0].tag);
	if (rc != 0) {
		device_printf(ntb->device, "bus_setup_intr failed\n");
		return (ENXIO);
	}

	return (0);
}

static void
intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
{
	struct ntb_int_info *current_int;
	int i;

	for (i = 0; i < ntb->allocated_interrupts; i++) {
		current_int = &ntb->int_info[i];
		if (current_int->tag != NULL)
			bus_teardown_intr(ntb->device, current_int->res,
			    current_int->tag);

		if (current_int->res != NULL)
			bus_release_resource(ntb->device, SYS_RES_IRQ,
			    rman_get_rid(current_int->res), current_int->res);
	}

	intel_ntb_free_msix_vec(ntb);
	pci_release_msi(ntb->device);
}

static inline uint64_t
db_ioread(struct ntb_softc *ntb, uint64_t regoff)
{

	switch (ntb->type) {
	case NTB_ATOM:
	case NTB_XEON_GEN3:
		return (intel_ntb_reg_read(8, regoff));
	case NTB_XEON_GEN1:
		return (intel_ntb_reg_read(2, regoff));
	}
	__assert_unreachable();
}

static inline void
db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
{

	KASSERT((val & ~ntb->db_valid_mask) == 0,
	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
	     (uintmax_t)(val & ~ntb->db_valid_mask),
	     (uintmax_t)ntb->db_valid_mask));

	if (regoff == ntb->self_reg->db_mask)
		DB_MASK_ASSERT(ntb, MA_OWNED);
	db_iowrite_raw(ntb, regoff, val);
}

static inline void
db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
{

	switch (ntb->type) {
	case NTB_ATOM:
	case NTB_XEON_GEN3:
		intel_ntb_reg_write(8, regoff, val);
		break;
	case NTB_XEON_GEN1:
		intel_ntb_reg_write(2, regoff, (uint16_t)val);
		break;
	}
}

static void
intel_ntb_db_set_mask(device_t dev, uint64_t bits)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	DB_MASK_LOCK(ntb);
	ntb->db_mask |= bits;
	if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
	DB_MASK_UNLOCK(ntb);
}

static void
intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
{
	struct ntb_softc *ntb = device_get_softc(dev);
	uint64_t ibits;
	int i;

	KASSERT((bits & ~ntb->db_valid_mask) == 0,
	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
	     (uintmax_t)(bits & ~ntb->db_valid_mask),
	     (uintmax_t)ntb->db_valid_mask));

	DB_MASK_LOCK(ntb);
	ibits = ntb->fake_db & ntb->db_mask & bits;
	ntb->db_mask &= ~bits;
	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
		/* Simulate fake interrupts if unmasked DB bits are set. */
		ntb->force_db |= ibits;
		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
			if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
				swi_sched(ntb->int_info[i].tag, 0);
		}
	} else {
		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
	}
	DB_MASK_UNLOCK(ntb);
}

static uint64_t
intel_ntb_db_read(device_t dev)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
		return (ntb->fake_db);
	if (ntb->type == NTB_XEON_GEN3)
		return (intel_ntb_reg_read(8, XEON_GEN3_REG_IMINT_STATUS));
	else
		return (db_ioread(ntb, ntb->self_reg->db_bell));
}

static void
intel_ntb_db_clear(device_t dev, uint64_t bits)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	KASSERT((bits & ~ntb->db_valid_mask) == 0,
	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
	     (uintmax_t)(bits & ~ntb->db_valid_mask),
	     (uintmax_t)ntb->db_valid_mask));

	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
		DB_MASK_LOCK(ntb);
		ntb->fake_db &= ~bits;
		DB_MASK_UNLOCK(ntb);
		return;
	}

	if (ntb->type == NTB_XEON_GEN3)
		intel_ntb_reg_write(4, XEON_GEN3_REG_IMINT_STATUS,
		    (uint32_t)bits);
	else
		db_iowrite(ntb, ntb->self_reg->db_bell, bits);
}

static inline uint64_t
intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
{
	uint64_t shift, mask;

	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
		/*
		 * Remap vectors in custom way to make at least first
		 * three doorbells to not generate stray events.
		 * This breaks Linux compatibility (if one existed)
		 * when more then one DB is used (not by if_ntb).
		 */
		if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
			return (1 << db_vector);
		if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
			return (0x7ffc);
	}

	shift = ntb->db_vec_shift;
	mask = (1ull << shift) - 1;
	return (mask << (shift * db_vector));
}

static void
intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
{
	uint64_t vec_mask;

	ntb->last_ts = ticks;
	vec_mask = intel_ntb_vec_mask(ntb, vec);

	if (ntb->type == NTB_XEON_GEN3 && vec == XEON_GEN3_LINK_VECTOR_INDEX)
		vec_mask |= ntb->db_link_mask;
	if ((vec_mask & ntb->db_link_mask) != 0) {
		if (intel_ntb_poll_link(ntb))
			ntb_link_event(ntb->device);
		if (ntb->type == NTB_XEON_GEN3)
			intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_STATUS,
			    intel_ntb_reg_read(8, XEON_GEN3_REG_IMINT_STATUS));
	}

	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
	    (vec_mask & ntb->db_link_mask) == 0) {
		DB_MASK_LOCK(ntb);

		/*
		 * Do not report same DB events again if not cleared yet,
		 * unless the mask was just cleared for them and this
		 * interrupt handler call can be the consequence of it.
		 */
		vec_mask &= ~ntb->fake_db | ntb->force_db;
		ntb->force_db &= ~vec_mask;

		/* Update our internal doorbell register. */
		ntb->fake_db |= vec_mask;

		/* Do not report masked DB events. */
		vec_mask &= ~ntb->db_mask;

		DB_MASK_UNLOCK(ntb);
	}

	if ((vec_mask & ntb->db_valid_mask) != 0)
		ntb_db_event(ntb->device, vec);
}

static void
ndev_vec_isr(void *arg)
{
	struct ntb_vec *nvec = arg;

	intel_ntb_interrupt(nvec->ntb, nvec->num);
}

static void
ndev_irq_isr(void *arg)
{
	/* If we couldn't set up MSI-X, we only have the one vector. */
	intel_ntb_interrupt(arg, 0);
}

static int
intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
{
	uint32_t i;

	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
	    M_ZERO | M_WAITOK);
	for (i = 0; i < num_vectors; i++) {
		ntb->msix_vec[i].num = i;
		ntb->msix_vec[i].ntb = ntb;
	}

	return (0);
}

static void
intel_ntb_free_msix_vec(struct ntb_softc *ntb)
{

	if (ntb->msix_vec == NULL)
		return;

	free(ntb->msix_vec, M_NTB);
	ntb->msix_vec = NULL;
}

static void
intel_ntb_get_msix_info(struct ntb_softc *ntb)
{
	struct pci_devinfo *dinfo;
	struct pcicfg_msix *msix;
	uint32_t laddr, data, i, offset;

	dinfo = device_get_ivars(ntb->device);
	msix = &dinfo->cfg.msix;

	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));

	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;

		laddr = bus_read_4(msix->msix_table_res, offset +
		    PCI_MSIX_ENTRY_LOWER_ADDR);
		intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);

		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
		     MSI_INTEL_ADDR_BASE));
		ntb->msix_data[i].nmd_ofs = laddr;

		data = bus_read_4(msix->msix_table_res, offset +
		    PCI_MSIX_ENTRY_DATA);
		intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);

		ntb->msix_data[i].nmd_data = data;
	}
}

static struct ntb_hw_info *
intel_ntb_get_device_info(uint32_t device_id)
{
	struct ntb_hw_info *ep;

	for (ep = pci_ids; ep < &pci_ids[nitems(pci_ids)]; ep++) {
		if (ep->device_id == device_id)
			return (ep);
	}
	return (NULL);
}

static void
intel_ntb_teardown_xeon(struct ntb_softc *ntb)
{

	if (ntb->reg != NULL)
		intel_ntb_link_disable(ntb->device);
}

static void
intel_ntb_detect_max_mw(struct ntb_softc *ntb)
{

	switch (ntb->type) {
	case NTB_ATOM:
		ntb->mw_count = ATOM_MW_COUNT;
		break;
	case NTB_XEON_GEN1:
		if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
			ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
		else
			ntb->mw_count = XEON_SNB_MW_COUNT;
		break;
	case NTB_XEON_GEN3:
		if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
			ntb->mw_count = XEON_GEN3_SPLIT_MW_COUNT;
		else
			ntb->mw_count = XEON_GEN3_MW_COUNT;
		break;
	}
}

static int
intel_ntb_detect_xeon(struct ntb_softc *ntb)
{
	uint8_t ppd, conn_type;

	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
	ntb->ppd = ppd;

	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
		ntb->dev_type = NTB_DEV_DSD;
	else
		ntb->dev_type = NTB_DEV_USD;

	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
		ntb->features |= NTB_SPLIT_BAR;

	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
	    !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
		device_printf(ntb->device,
		    "Can not apply SB01BASE_LOCKUP workaround "
		    "with split BARs disabled!\n");
		device_printf(ntb->device,
		    "Expect system hangs under heavy NTB traffic!\n");
		ntb->features &= ~NTB_SB01BASE_LOCKUP;
	}

	/*
	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
	 * errata workaround; only do one at a time.
	 */
	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
		ntb->features &= ~NTB_SDOORBELL_LOCKUP;

	conn_type = ppd & XEON_PPD_CONN_TYPE;
	switch (conn_type) {
	case NTB_CONN_B2B:
		ntb->conn_type = conn_type;
		break;
	case NTB_CONN_RP:
	case NTB_CONN_TRANSPARENT:
	default:
		device_printf(ntb->device, "Unsupported connection type: %u\n",
		    (unsigned)conn_type);
		return (ENXIO);
	}
	return (0);
}

static int
intel_ntb_detect_atom(struct ntb_softc *ntb)
{
	uint32_t ppd, conn_type;

	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
	ntb->ppd = ppd;

	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
		ntb->dev_type = NTB_DEV_DSD;
	else
		ntb->dev_type = NTB_DEV_USD;

	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
	switch (conn_type) {
	case NTB_CONN_B2B:
		ntb->conn_type = conn_type;
		break;
	default:
		device_printf(ntb->device, "Unsupported NTB configuration\n");
		return (ENXIO);
	}
	return (0);
}

static int
intel_ntb_detect_xeon_gen3(struct ntb_softc *ntb)
{
	uint8_t ppd, conn_type;

	ppd = pci_read_config(ntb->device, XEON_GEN3_INT_REG_PPD, 1);
	ntb->ppd = ppd;

	/* check port definition */
	conn_type = XEON_GEN3_REG_PPD_PORT_DEF_F(ppd);
	switch (conn_type) {
	case NTB_CONN_B2B:
		ntb->conn_type = conn_type;
		break;
	default:
		device_printf(ntb->device, "Unsupported connection type: %u\n",
		    conn_type);
		return (ENXIO);
	}

	/* check cross link configuration status */
	if (XEON_GEN3_REG_PPD_CONF_STS_F(ppd)) {
		/* NTB Port is configured as DSD/USP */
		ntb->dev_type = NTB_DEV_DSD;
	} else {
		/* NTB Port is configured as USD/DSP */
		ntb->dev_type = NTB_DEV_USD;
	}

	if (XEON_GEN3_REG_PPD_ONE_MSIX_F(ppd)) {
		/*
		 * This bit when set, causes only a single MSI-X message to be
		 * generated if MSI-X is enabled.
		 */
		ntb->features |= NTB_ONE_MSIX;
	}

	if (XEON_GEN3_REG_PPD_BAR45_SPL_F(ppd)) {
		/* BARs 4 and 5 are presented as two 32b non-prefetchable BARs */
		ntb->features |= NTB_SPLIT_BAR;
	}

	device_printf(ntb->device, "conn type 0x%02x, dev type 0x%02x,"
	    "features 0x%02x\n", ntb->conn_type, ntb->dev_type, ntb->features);

	return (0);
}

static int
intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
{
	int rc;

	ntb->spad_count		= XEON_SPAD_COUNT;
	ntb->db_count		= XEON_DB_COUNT;
	ntb->db_link_mask	= XEON_DB_LINK_BIT;
	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;

	if (ntb->conn_type != NTB_CONN_B2B) {
		device_printf(ntb->device, "Connection type %d not supported\n",
		    ntb->conn_type);
		return (ENXIO);
	}

	ntb->reg = &xeon_reg;
	ntb->self_reg = &xeon_pri_reg;
	ntb->peer_reg = &xeon_b2b_reg;
	ntb->xlat_reg = &xeon_sec_xlat;

	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
		ntb->force_db = ntb->fake_db = 0;
		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
		    ntb->mw_count;
		intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
		    g_ntb_msix_idx, ntb->msix_mw_idx);
		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
		    VM_MEMATTR_UNCACHEABLE);
		KASSERT(rc == 0, ("shouldn't fail"));
	} else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
		/*
		 * There is a Xeon hardware errata related to writes to SDOORBELL or
		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
		 * which may hang the system.  To workaround this, use a memory
		 * window to access the interrupt and scratch pad registers on the
		 * remote system.
		 */
		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
		    ntb->mw_count;
		intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
		    g_ntb_mw_idx, ntb->b2b_mw_idx);
		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
		    VM_MEMATTR_UNCACHEABLE);
		KASSERT(rc == 0, ("shouldn't fail"));
	} else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
		/*
		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
		 * mirrored to the remote system.  Shrink the number of bits by one,
		 * since bit 14 is the last bit.
		 *
		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
		 * anyway.  Nor for non-B2B connection types.
		 */
		ntb->db_count = XEON_DB_COUNT - 1;

	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;

	if (ntb->dev_type == NTB_DEV_USD)
		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
		    &xeon_b2b_usd_addr);
	else
		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
		    &xeon_b2b_dsd_addr);
	if (rc != 0)
		return (rc);

	/* Enable Bus Master and Memory Space on the secondary side */
	intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);

	/*
	 * Mask all doorbell interrupts.
	 */
	DB_MASK_LOCK(ntb);
	ntb->db_mask = ntb->db_valid_mask;
	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
	DB_MASK_UNLOCK(ntb);

	rc = intel_ntb_init_isr(ntb);
	return (rc);
}

static int
intel_ntb_xeon_gen3_init_dev(struct ntb_softc *ntb)
{
	int rc;

	ntb->spad_count = XEON_GEN3_SPAD_COUNT;
	ntb->db_count = XEON_GEN3_DB_COUNT;
	ntb->db_link_mask = XEON_GEN3_DB_LINK_BIT;
	ntb->db_vec_count = XEON_GEN3_DB_MSIX_VECTOR_COUNT;
	ntb->db_vec_shift = XEON_GEN3_DB_MSIX_VECTOR_SHIFT;

	if (ntb->conn_type != NTB_CONN_B2B) {
		device_printf(ntb->device, "Connection type %d not supported\n",
		    ntb->conn_type);
		return (ENXIO);
	}

	ntb->reg = &xeon_gen3_reg;
	ntb->self_reg = &xeon_gen3_pri_reg;
	ntb->peer_reg = &xeon_gen3_b2b_reg;
	ntb->xlat_reg = &xeon_gen3_sec_xlat;

	ntb->db_valid_mask = (1ULL << ntb->db_count) - 1;

	xeon_gen3_setup_b2b_mw(ntb);

	/* Enable Bus Master and Memory Space on the External Side */
	intel_ntb_reg_write(2, XEON_GEN3_EXT_REG_PCI_CMD,
	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);

	/* Setup Interrupt */
	rc = intel_ntb_xeon_gen3_init_isr(ntb);

	return (rc);
}

static int
intel_ntb_atom_init_dev(struct ntb_softc *ntb)
{
	int error;

	KASSERT(ntb->conn_type == NTB_CONN_B2B,
	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));

	ntb->spad_count		 = ATOM_SPAD_COUNT;
	ntb->db_count		 = ATOM_DB_COUNT;
	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;

	ntb->reg = &atom_reg;
	ntb->self_reg = &atom_pri_reg;
	ntb->peer_reg = &atom_b2b_reg;
	ntb->xlat_reg = &atom_sec_xlat;

	/*
	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
	 * resolved.  Mask transaction layer internal parity errors.
	 */
	pci_write_config(ntb->device, 0xFC, 0x4, 4);

	configure_atom_secondary_side_bars(ntb);

	/* Enable Bus Master and Memory Space on the secondary side */
	intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);

	error = intel_ntb_init_isr(ntb);
	if (error != 0)
		return (error);

	/* Initiate PCI-E link training */
	intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);

	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);

	return (0);
}

/* XXX: Linux driver doesn't seem to do any of this for Atom. */
static void
configure_atom_secondary_side_bars(struct ntb_softc *ntb)
{

	if (ntb->dev_type == NTB_DEV_USD) {
		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
		    XEON_B2B_BAR2_ADDR64);
		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
		    XEON_B2B_BAR4_ADDR64);
		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
	} else {
		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
		    XEON_B2B_BAR2_ADDR64);
		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
		    XEON_B2B_BAR4_ADDR64);
		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
	}
}

/*
 * When working around Xeon SDOORBELL errata by remapping remote registers in a
 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
 * remains for use by a higher layer.
 *
 * Will only be used if working around SDOORBELL errata and the BIOS-configured
 * MW size is sufficiently large.
 */
static unsigned int ntb_b2b_mw_share;
SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
    "value here.");

static void
xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
    enum ntb_bar regbar)
{
	struct ntb_pci_bar_info *bar;
	uint8_t bar_sz;

	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
		return;

	bar = &ntb->bar_info[idx];
	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
	if (idx == regbar) {
		if (ntb->b2b_off != 0)
			bar_sz--;
		else
			bar_sz = 0;
	}
	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
	(void)bar_sz;
}

static void
xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
    enum ntb_bar idx, enum ntb_bar regbar)
{
	uint64_t reg_val;
	uint32_t base_reg, lmt_reg;

	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
	if (idx == regbar) {
		if (ntb->b2b_off)
			bar_addr += ntb->b2b_off;
		else
			bar_addr = 0;
	}

	if (!bar_is_64bit(ntb, idx)) {
		intel_ntb_reg_write(4, base_reg, bar_addr);
		reg_val = intel_ntb_reg_read(4, base_reg);
		(void)reg_val;

		intel_ntb_reg_write(4, lmt_reg, bar_addr);
		reg_val = intel_ntb_reg_read(4, lmt_reg);
		(void)reg_val;
	} else {
		intel_ntb_reg_write(8, base_reg, bar_addr);
		reg_val = intel_ntb_reg_read(8, base_reg);
		(void)reg_val;

		intel_ntb_reg_write(8, lmt_reg, bar_addr);
		reg_val = intel_ntb_reg_read(8, lmt_reg);
		(void)reg_val;
	}
}

static void
xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
{
	struct ntb_pci_bar_info *bar;

	bar = &ntb->bar_info[idx];
	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
		intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
		base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
	} else {
		intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
		base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
	}
	(void)base_addr;
}

static int
xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
    const struct ntb_b2b_addr *peer_addr)
{
	struct ntb_pci_bar_info *b2b_bar;
	vm_size_t bar_size;
	uint64_t bar_addr;
	enum ntb_bar b2b_bar_num, i;

	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
		b2b_bar = NULL;
		b2b_bar_num = NTB_CONFIG_BAR;
		ntb->b2b_off = 0;
	} else {
		b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
		    ("invalid b2b mw bar"));

		b2b_bar = &ntb->bar_info[b2b_bar_num];
		bar_size = b2b_bar->size;

		if (ntb_b2b_mw_share != 0 &&
		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
			ntb->b2b_off = bar_size >> 1;
		else if (bar_size >= XEON_B2B_MIN_SIZE) {
			ntb->b2b_off = 0;
		} else {
			device_printf(ntb->device,
			    "B2B bar size is too small!\n");
			return (EIO);
		}
	}

	/*
	 * Reset the secondary bar sizes to match the primary bar sizes.
	 * (Except, disable or halve the size of the B2B secondary bar.)
	 */
	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
		xeon_reset_sbar_size(ntb, i, b2b_bar_num);

	bar_addr = 0;
	if (b2b_bar_num == NTB_CONFIG_BAR)
		bar_addr = addr->bar0_addr;
	else if (b2b_bar_num == NTB_B2B_BAR_1)
		bar_addr = addr->bar2_addr64;
	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
		bar_addr = addr->bar4_addr64;
	else if (b2b_bar_num == NTB_B2B_BAR_2)
		bar_addr = addr->bar4_addr32;
	else if (b2b_bar_num == NTB_B2B_BAR_3)
		bar_addr = addr->bar5_addr32;
	else
		KASSERT(false, ("invalid bar"));

	intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);

	/*
	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
	 * register BAR.  The B2B BAR is either disabled above or configured
	 * half-size.  It starts at PBAR xlat + offset.
	 *
	 * Also set up incoming BAR limits == base (zero length window).
	 */
	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
	    b2b_bar_num);
	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
		    NTB_B2B_BAR_2, b2b_bar_num);
		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
		    NTB_B2B_BAR_3, b2b_bar_num);
	} else
		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
		    NTB_B2B_BAR_2, b2b_bar_num);

	/* Zero incoming translation addrs */
	intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
	intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);

	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
		uint32_t xlat_reg, lmt_reg;
		enum ntb_bar bar_num;

		/*
		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
		 * workaround
		 */
		bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
		bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
		if (bar_is_64bit(ntb, bar_num)) {
			intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
			ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
			intel_ntb_reg_write(8, lmt_reg, 0);
		} else {
			intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
			ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
			intel_ntb_reg_write(4, lmt_reg, 0);
		}

		ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
	}
	(void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
	(void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);

	/* Zero outgoing translation limits (whole bar size windows) */
	intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
	intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);

	/* Set outgoing translation offsets */
	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
	} else
		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);

	/* Set the translation offset for B2B registers */
	bar_addr = 0;
	if (b2b_bar_num == NTB_CONFIG_BAR)
		bar_addr = peer_addr->bar0_addr;
	else if (b2b_bar_num == NTB_B2B_BAR_1)
		bar_addr = peer_addr->bar2_addr64;
	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
		bar_addr = peer_addr->bar4_addr64;
	else if (b2b_bar_num == NTB_B2B_BAR_2)
		bar_addr = peer_addr->bar4_addr32;
	else if (b2b_bar_num == NTB_B2B_BAR_3)
		bar_addr = peer_addr->bar5_addr32;
	else
		KASSERT(false, ("invalid bar"));

	/*
	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
	 * at a time.
	 */
	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
	return (0);
}

static int
xeon_gen3_setup_b2b_mw(struct ntb_softc *ntb)
{
	uint64_t reg;
	uint32_t embarsz, imbarsz;

	/* IMBAR1SZ should be equal to EMBAR1SZ */
	embarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_EMBAR1SZ, 1);
	imbarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_IMBAR1SZ, 1);
	if (embarsz != imbarsz) {
		device_printf(ntb->device,
		    "IMBAR1SZ (%u) should be equal to EMBAR1SZ (%u)\n",
		    imbarsz, embarsz);
		return (EIO);
	}

	/* IMBAR2SZ should be equal to EMBAR2SZ */
	embarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_EMBAR2SZ, 1);
	imbarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_IMBAR2SZ, 1);
	if (embarsz != imbarsz) {
		device_printf(ntb->device,
		    "IMBAR2SZ (%u) should be equal to EMBAR2SZ (%u)\n",
		    imbarsz, embarsz);
		return (EIO);
	}

	/* Client will provide the incoming IMBAR1/2XBASE, zero it for now */
	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR1XBASE, 0);
	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR2XBASE, 0);

	/*
	 * If the value in EMBAR1LIMIT is set equal to the value in EMBAR1,
	 * the memory window for EMBAR1 is disabled.
	 * Note: It is needed to avoid malacious access.
	 */
	reg = pci_read_config(ntb->device, XEON_GEN3_EXT_REG_BAR1BASE, 8);
	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR1XLIMIT, reg);

	reg = pci_read_config(ntb->device, XEON_GEN3_EXT_REG_BAR2BASE, 8);
	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR2XLIMIT, reg);

	return (0);
}

static inline bool
_xeon_link_is_up(struct ntb_softc *ntb)
{

	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
		return (true);
	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
}

static inline bool
link_is_up(struct ntb_softc *ntb)
{

	if (ntb->type == NTB_XEON_GEN1 || ntb->type == NTB_XEON_GEN3)
		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
		    !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));

	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
}

static inline bool
atom_link_is_err(struct ntb_softc *ntb)
{
	uint32_t status;

	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));

	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
		return (true);

	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
}

/* Atom does not have link status interrupt, poll on that platform */
static void
atom_link_hb(void *arg)
{
	struct ntb_softc *ntb = arg;
	sbintime_t timo, poll_ts;

	timo = NTB_HB_TIMEOUT * hz;
	poll_ts = ntb->last_ts + timo;

	/*
	 * Delay polling the link status if an interrupt was received, unless
	 * the cached link status says the link is down.
	 */
	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
		timo = poll_ts - ticks;
		goto out;
	}

	if (intel_ntb_poll_link(ntb))
		ntb_link_event(ntb->device);

	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
		/* Link is down with error, proceed with recovery */
		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
		return;
	}

out:
	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
}

static void
atom_perform_link_restart(struct ntb_softc *ntb)
{
	uint32_t status;

	/* Driver resets the NTB ModPhy lanes - magic! */
	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);

	/* Driver waits 100ms to allow the NTB ModPhy to settle */
	pause("ModPhy", hz / 10);

	/* Clear AER Errors, write to clear */
	status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
	intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);

	/* Clear unexpected electrical idle event in LTSSM, write to clear */
	status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
	intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);

	/* Clear DeSkew Buffer error, write to clear */
	status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
	status |= ATOM_DESKEWSTS_DBERR;
	intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);

	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
	status &= ATOM_IBIST_ERR_OFLOW;
	intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);

	/* Releases the NTB state machine to allow the link to retrain */
	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
	intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
}

static int
intel_ntb_port_number(device_t dev)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	return (ntb->dev_type == NTB_DEV_USD ? 0 : 1);
}

static int
intel_ntb_peer_port_count(device_t dev)
{

	return (1);
}

static int
intel_ntb_peer_port_number(device_t dev, int pidx)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	if (pidx != 0)
		return (-EINVAL);

	return (ntb->dev_type == NTB_DEV_USD ? 1 : 0);
}

static int
intel_ntb_peer_port_idx(device_t dev, int port)
{
	int peer_port;

	peer_port = intel_ntb_peer_port_number(dev, 0);
	if (peer_port == -EINVAL || port != peer_port)
		return (-EINVAL);

	return (0);
}

static int
intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
    enum ntb_width width __unused)
{
	struct ntb_softc *ntb = device_get_softc(dev);
	uint32_t cntl;

	intel_ntb_printf(2, "%s\n", __func__);

	if (ntb->type == NTB_ATOM) {
		pci_write_config(ntb->device, NTB_PPD_OFFSET,
		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
		return (0);
	}

	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
		ntb_link_event(dev);
		return (0);
	}

	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
	return (0);
}

static int
intel_ntb_link_disable(device_t dev)
{
	struct ntb_softc *ntb = device_get_softc(dev);
	uint32_t cntl;

	intel_ntb_printf(2, "%s\n", __func__);

	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
		ntb_link_event(dev);
		return (0);
	}

	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
	return (0);
}

static bool
intel_ntb_link_enabled(device_t dev)
{
	struct ntb_softc *ntb = device_get_softc(dev);
	uint32_t cntl;

	if (ntb->type == NTB_ATOM) {
		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
	}

	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
		return (true);

	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
}

static void
recover_atom_link(void *arg)
{
	struct ntb_softc *ntb = arg;
	unsigned speed, width, oldspeed, oldwidth;
	uint32_t status32;

	atom_perform_link_restart(ntb);

	/*
	 * There is a potential race between the 2 NTB devices recovering at
	 * the same time.  If the times are the same, the link will not recover
	 * and the driver will be stuck in this loop forever.  Add a random
	 * interval to the recovery time to prevent this race.
	 */
	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);

	if (atom_link_is_err(ntb))
		goto retry;

	status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
		goto out;

	status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
	width = NTB_LNK_STA_WIDTH(status32);
	speed = status32 & NTB_LINK_SPEED_MASK;

	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
	if (oldwidth != width || oldspeed != speed)
		goto retry;

out:
	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
	    ntb);
	return;

retry:
	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
	    ntb);
}

/*
 * Polls the HW link status register(s); returns true if something has changed.
 */
static bool
intel_ntb_poll_link(struct ntb_softc *ntb)
{
	uint32_t ntb_cntl;
	uint16_t reg_val;

	if (ntb->type == NTB_ATOM) {
		ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
		if (ntb_cntl == ntb->ntb_ctl)
			return (false);

		ntb->ntb_ctl = ntb_cntl;
		ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
	} else {
		if (ntb->type == NTB_XEON_GEN1)
			db_iowrite_raw(ntb, ntb->self_reg->db_bell,
			    ntb->db_link_mask);

		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
		if (reg_val == ntb->lnk_sta)
			return (false);

		ntb->lnk_sta = reg_val;

		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
			if (_xeon_link_is_up(ntb)) {
				if (!ntb->peer_msix_good) {
					callout_reset(&ntb->peer_msix_work, 0,
					    intel_ntb_exchange_msix, ntb);
					return (false);
				}
			} else {
				ntb->peer_msix_good = false;
				ntb->peer_msix_done = false;
			}
		}
	}
	return (true);
}

static inline enum ntb_speed
intel_ntb_link_sta_speed(struct ntb_softc *ntb)
{

	if (!link_is_up(ntb))
		return (NTB_SPEED_NONE);
	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
}

static inline enum ntb_width
intel_ntb_link_sta_width(struct ntb_softc *ntb)
{

	if (!link_is_up(ntb))
		return (NTB_WIDTH_NONE);
	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
}

SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
    "Driver state, statistics, and HW registers");

#define NTB_REGSZ_MASK	(3ul << 30)
#define NTB_REG_64	(1ul << 30)
#define NTB_REG_32	(2ul << 30)
#define NTB_REG_16	(3ul << 30)
#define NTB_REG_8	(0ul << 30)

#define NTB_DB_READ	(1ul << 29)
#define NTB_PCI_REG	(1ul << 28)
#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)

static void
intel_ntb_sysctl_init(struct ntb_softc *ntb)
{
	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
	struct sysctl_ctx_list *ctx;
	struct sysctl_oid *tree, *tmptree;

	ctx = device_get_sysctl_ctx(ntb->device);
	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));

	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
	    CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, ntb, 0,
	    sysctl_handle_link_status_human, "A",
	    "Link status (human readable)");
	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
	    CTLFLAG_RD | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, ntb, 0,
	    sysctl_handle_link_status, "IU",
	    "Link status (1=active, 0=inactive)");
	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
	    CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, ntb, 0,
	    sysctl_handle_link_admin, "IU",
	    "Set/get interface status (1=UP, 0=DOWN)");

	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
	    "Driver state, statistics, and HW registers");
	tree_par = SYSCTL_CHILDREN(tree);

	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
	    &ntb->ppd, 0, "Raw PPD register (cached)");

	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
		    &ntb->b2b_mw_idx, 0,
		    "Index of the MW used for B2B remote register access");
		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
		    CTLFLAG_RD, &ntb->b2b_off,
		    "If non-zero, offset of B2B register region in shared MW");
	}

	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
	    CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, ntb, 0,
	    sysctl_handle_features, "A", "Features/errata of this NTB device");

	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
	    "NTB CTL register (cached)");
	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
	    "LNK STA register (cached)");

	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
	    &ntb->mw_count, 0, "MW count");
	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
	    &ntb->spad_count, 0, "Scratchpad count");
	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
	    &ntb->db_count, 0, "Doorbell count");
	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
	    &ntb->db_vec_count, 0, "Doorbell vector count");
	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
	    &ntb->db_vec_shift, 0, "Doorbell vector shift");

	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
	    &ntb->db_valid_mask, "Doorbell valid mask");
	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
	    &ntb->db_link_mask, "Doorbell link mask");
	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
	    &ntb->db_mask, "Doorbell mask (cached)");

	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
	    "Raw HW registers (big-endian)");
	regpar = SYSCTL_CHILDREN(tmptree);

	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_32 | ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
	    "NTB Control register");
	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_32 | 0x19c, sysctl_handle_register, "IU",
	    "NTB Link Capabilities");
	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_32 | 0x1a0, sysctl_handle_register, "IU",
	    "NTB Link Control register");

	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
	    sysctl_handle_register, "QU", "Doorbell mask register");
	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
	    sysctl_handle_register, "QU", "Doorbell register");

	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
	} else {
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
	}

	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
	    sysctl_handle_register, "QU", "Incoming LMT23 register");
	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
		    sysctl_handle_register, "IU", "Incoming LMT4 register");
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
		    sysctl_handle_register, "IU", "Incoming LMT5 register");
	} else {
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
		    sysctl_handle_register, "QU", "Incoming LMT45 register");
	}

	if (ntb->type == NTB_ATOM)
		return;

	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xeon HW statistics");
	statpar = SYSCTL_CHILDREN(tmptree);
	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
	    sysctl_handle_register, "SU", "Upstream Memory Miss");

	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xeon HW errors");
	errpar = SYSCTL_CHILDREN(tmptree);

	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
	    sysctl_handle_register, "CU", "PPD");

	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");

	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");

	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
	    sysctl_handle_register, "SU", "DEVSTS");
	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
	    sysctl_handle_register, "SU", "LNKSTS");
	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
	    sysctl_handle_register, "SU", "SLNKSTS");

	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
	    sysctl_handle_register, "IU", "UNCERRSTS");
	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
	    sysctl_handle_register, "IU", "CORERRSTS");

	if (ntb->conn_type != NTB_CONN_B2B)
		return;

	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01l",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETL,
	    sysctl_handle_register, "IU", "Outgoing XLAT0L register");
	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01u",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETU,
	    sysctl_handle_register, "IU", "Outgoing XLAT0U register");
	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
	} else {
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
	}

	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
	} else {
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
	}

	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
		    sysctl_handle_register, "IU",
		    "Secondary BAR4 base register");
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
		    sysctl_handle_register, "IU",
		    "Secondary BAR5 base register");
	} else {
		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
		    sysctl_handle_register, "QU",
		    "Secondary BAR45 base register");
	}
}

static int
sysctl_handle_features(SYSCTL_HANDLER_ARGS)
{
	struct ntb_softc *ntb = arg1;
	struct sbuf sb;
	int error;

	sbuf_new_for_sysctl(&sb, NULL, 256, req);

	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
	error = sbuf_finish(&sb);
	sbuf_delete(&sb);

	if (error || !req->newptr)
		return (error);
	return (EINVAL);
}

static int
sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
{
	struct ntb_softc *ntb = arg1;
	unsigned old, new;
	int error;

	old = intel_ntb_link_enabled(ntb->device);

	error = SYSCTL_OUT(req, &old, sizeof(old));
	if (error != 0 || req->newptr == NULL)
		return (error);

	error = SYSCTL_IN(req, &new, sizeof(new));
	if (error != 0)
		return (error);

	intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
	    (new != 0)? "en" : "dis");

	if (new != 0)
		error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
	else
		error = intel_ntb_link_disable(ntb->device);
	return (error);
}

static int
sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
{
	struct ntb_softc *ntb = arg1;
	struct sbuf sb;
	enum ntb_speed speed;
	enum ntb_width width;
	int error;

	sbuf_new_for_sysctl(&sb, NULL, 32, req);

	if (intel_ntb_link_is_up(ntb->device, &speed, &width))
		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
		    (unsigned)speed, (unsigned)width);
	else
		sbuf_printf(&sb, "down");

	error = sbuf_finish(&sb);
	sbuf_delete(&sb);

	if (error || !req->newptr)
		return (error);
	return (EINVAL);
}

static int
sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
{
	struct ntb_softc *ntb = arg1;
	unsigned res;
	int error;

	res = intel_ntb_link_is_up(ntb->device, NULL, NULL);

	error = SYSCTL_OUT(req, &res, sizeof(res));
	if (error || !req->newptr)
		return (error);
	return (EINVAL);
}

static int
sysctl_handle_register(SYSCTL_HANDLER_ARGS)
{
	struct ntb_softc *ntb;
	const void *outp;
	uintptr_t sz;
	uint64_t umv;
	char be[sizeof(umv)];
	size_t outsz;
	uint32_t reg;
	bool db, pci;
	int error;

	ntb = arg1;
	reg = arg2 & ~NTB_REGFLAGS_MASK;
	sz = arg2 & NTB_REGSZ_MASK;
	db = (arg2 & NTB_DB_READ) != 0;
	pci = (arg2 & NTB_PCI_REG) != 0;

	KASSERT(!(db && pci), ("bogus"));

	if (db) {
		KASSERT(sz == NTB_REG_64, ("bogus"));
		umv = db_ioread(ntb, reg);
		outsz = sizeof(uint64_t);
	} else {
		switch (sz) {
		case NTB_REG_64:
			if (pci)
				umv = pci_read_config(ntb->device, reg, 8);
			else
				umv = intel_ntb_reg_read(8, reg);
			outsz = sizeof(uint64_t);
			break;
		case NTB_REG_32:
			if (pci)
				umv = pci_read_config(ntb->device, reg, 4);
			else
				umv = intel_ntb_reg_read(4, reg);
			outsz = sizeof(uint32_t);
			break;
		case NTB_REG_16:
			if (pci)
				umv = pci_read_config(ntb->device, reg, 2);
			else
				umv = intel_ntb_reg_read(2, reg);
			outsz = sizeof(uint16_t);
			break;
		case NTB_REG_8:
			if (pci)
				umv = pci_read_config(ntb->device, reg, 1);
			else
				umv = intel_ntb_reg_read(1, reg);
			outsz = sizeof(uint8_t);
			break;
		default:
			panic("bogus");
			break;
		}
	}

	/* Encode bigendian so that sysctl -x is legible. */
	be64enc(be, umv);
	outp = ((char *)be) + sizeof(umv) - outsz;

	error = SYSCTL_OUT(req, outp, outsz);
	if (error || !req->newptr)
		return (error);
	return (EINVAL);
}

static unsigned
intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
{

	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
	    uidx >= ntb->b2b_mw_idx) ||
	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
		uidx++;
	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
	    uidx >= ntb->b2b_mw_idx) &&
	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
		uidx++;
	return (uidx);
}

#ifndef EARLY_AP_STARTUP
static int msix_ready;

static void
intel_ntb_msix_ready(void *arg __unused)
{

	msix_ready = 1;
}
SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
    intel_ntb_msix_ready, NULL);
#endif

static void
intel_ntb_exchange_msix(void *ctx)
{
	struct ntb_softc *ntb;
	uint32_t val;
	unsigned i;

	ntb = ctx;

	if (ntb->peer_msix_good)
		goto msix_good;
	if (ntb->peer_msix_done)
		goto msix_done;

#ifndef EARLY_AP_STARTUP
	/* Block MSIX negotiation until SMP started and IRQ reshuffled. */
	if (!msix_ready)
		goto reschedule;
#endif

	intel_ntb_get_msix_info(ntb);
	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
		    ntb->msix_data[i].nmd_data);
		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
		    ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
	}
	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);

	intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
	if (val != NTB_MSIX_VER_GUARD)
		goto reschedule;

	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
		intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
		intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
		ntb->peer_msix_data[i].nmd_data = val;
		intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
		intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
		ntb->peer_msix_data[i].nmd_ofs = val;
	}

	ntb->peer_msix_done = true;

msix_done:
	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
	intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
	if (val != NTB_MSIX_RECEIVED)
		goto reschedule;

	intel_ntb_spad_clear(ntb->device);
	ntb->peer_msix_good = true;
	/* Give peer time to see our NTB_MSIX_RECEIVED. */
	goto reschedule;

msix_good:
	intel_ntb_poll_link(ntb);
	ntb_link_event(ntb->device);
	return;

reschedule:
	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
	if (_xeon_link_is_up(ntb)) {
		callout_reset(&ntb->peer_msix_work,
		    hz * (ntb->peer_msix_good ? 2 : 1) / 10,
		    intel_ntb_exchange_msix, ntb);
	} else
		intel_ntb_spad_clear(ntb->device);
}

/*
 * Public API to the rest of the OS
 */

static uint8_t
intel_ntb_spad_count(device_t dev)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	return (ntb->spad_count);
}

static uint8_t
intel_ntb_mw_count(device_t dev)
{
	struct ntb_softc *ntb = device_get_softc(dev);
	uint8_t res;

	res = ntb->mw_count;
	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
		res--;
	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
		res--;
	return (res);
}

static int
intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	if (idx >= ntb->spad_count)
		return (EINVAL);

	intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);

	return (0);
}

/*
 * Zeros the local scratchpad.
 */
static void
intel_ntb_spad_clear(device_t dev)
{
	struct ntb_softc *ntb = device_get_softc(dev);
	unsigned i;

	for (i = 0; i < ntb->spad_count; i++)
		intel_ntb_spad_write(dev, i, 0);
}

static int
intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	if (idx >= ntb->spad_count)
		return (EINVAL);

	*val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);

	return (0);
}

static int
intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	if (idx >= ntb->spad_count)
		return (EINVAL);

	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
		intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
	else
		intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);

	return (0);
}

static int
intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	if (idx >= ntb->spad_count)
		return (EINVAL);

	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
		*val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
	else
		*val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);

	return (0);
}

static int
intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
    bus_addr_t *plimit)
{
	struct ntb_softc *ntb = device_get_softc(dev);
	struct ntb_pci_bar_info *bar;
	bus_addr_t limit;
	size_t bar_b2b_off;
	enum ntb_bar bar_num;

	if (mw_idx >= intel_ntb_mw_count(dev))
		return (EINVAL);
	mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);

	bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
	bar = &ntb->bar_info[bar_num];
	bar_b2b_off = 0;
	if (mw_idx == ntb->b2b_mw_idx) {
		KASSERT(ntb->b2b_off != 0,
		    ("user shouldn't get non-shared b2b mw"));
		bar_b2b_off = ntb->b2b_off;
	}

	if (bar_is_64bit(ntb, bar_num))
		limit = BUS_SPACE_MAXADDR;
	else
		limit = BUS_SPACE_MAXADDR_32BIT;

	if (base != NULL)
		*base = bar->pbase + bar_b2b_off;
	if (vbase != NULL)
		*vbase = bar->vbase + bar_b2b_off;
	if (size != NULL)
		*size = bar->size - bar_b2b_off;
	if (align != NULL)
		*align = bar->size;
	if (align_size != NULL)
		*align_size = 1;
	if (plimit != NULL)
		*plimit = limit;
	return (0);
}

static int
intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
{
	struct ntb_softc *ntb = device_get_softc(dev);
	struct ntb_pci_bar_info *bar;
	uint64_t base, limit, reg_val;
	size_t bar_size, mw_size;
	uint32_t base_reg, xlat_reg, limit_reg;
	enum ntb_bar bar_num;

	if (idx >= intel_ntb_mw_count(dev))
		return (EINVAL);
	idx = intel_ntb_user_mw_to_idx(ntb, idx);

	bar_num = intel_ntb_mw_to_bar(ntb, idx);
	bar = &ntb->bar_info[bar_num];

	bar_size = bar->size;
	if (idx == ntb->b2b_mw_idx)
		mw_size = bar_size - ntb->b2b_off;
	else
		mw_size = bar_size;

	/* Hardware requires that addr is aligned to bar size */
	if ((addr & (bar_size - 1)) != 0)
		return (EINVAL);

	if (size > mw_size)
		return (EINVAL);

	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);

	limit = 0;
	if (bar_is_64bit(ntb, bar_num)) {
		base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;

		if (limit_reg != 0 && size != mw_size)
			limit = base + size;
		else
			limit = base + mw_size;

		/* Set and verify translation address */
		intel_ntb_reg_write(8, xlat_reg, addr);
		reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
		if (reg_val != addr) {
			intel_ntb_reg_write(8, xlat_reg, 0);
			return (EIO);
		}

		/* Set and verify the limit */
		intel_ntb_reg_write(8, limit_reg, limit);
		reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
		if (reg_val != limit) {
			intel_ntb_reg_write(8, limit_reg, base);
			intel_ntb_reg_write(8, xlat_reg, 0);
			return (EIO);
		}

		if (ntb->type == NTB_XEON_GEN3) {
			limit = base + size;

			/* set EMBAR1/2XLIMIT */
			if (!idx)
				intel_ntb_reg_write(8,
				    XEON_GEN3_REG_EMBAR1XLIMIT, limit);
			else
				intel_ntb_reg_write(8,
				    XEON_GEN3_REG_EMBAR2XLIMIT, limit);
		}
	} else {
		/* Configure 32-bit (split) BAR MW */
		if (ntb->type == NTB_XEON_GEN3)
			return (EIO);

		if ((addr & UINT32_MAX) != addr)
			return (ERANGE);
		if (((addr + size) & UINT32_MAX) != (addr + size))
			return (ERANGE);

		base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;

		if (limit_reg != 0 && size != mw_size)
			limit = base + size;

		/* Set and verify translation address */
		intel_ntb_reg_write(4, xlat_reg, addr);
		reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
		if (reg_val != addr) {
			intel_ntb_reg_write(4, xlat_reg, 0);
			return (EIO);
		}

		/* Set and verify the limit */
		intel_ntb_reg_write(4, limit_reg, limit);
		reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
		if (reg_val != limit) {
			intel_ntb_reg_write(4, limit_reg, base);
			intel_ntb_reg_write(4, xlat_reg, 0);
			return (EIO);
		}
	}
	return (0);
}

static int
intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
{

	return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
}

static int
intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
{
	struct ntb_softc *ntb = device_get_softc(dev);
	struct ntb_pci_bar_info *bar;

	if (idx >= intel_ntb_mw_count(dev))
		return (EINVAL);
	idx = intel_ntb_user_mw_to_idx(ntb, idx);

	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
	*mode = bar->map_mode;
	return (0);
}

static int
intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	if (idx >= intel_ntb_mw_count(dev))
		return (EINVAL);

	idx = intel_ntb_user_mw_to_idx(ntb, idx);
	return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
}

static int
intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
{
	struct ntb_pci_bar_info *bar;
	int rc;

	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
	if (bar->map_mode == mode)
		return (0);

	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
	if (rc == 0)
		bar->map_mode = mode;

	return (rc);
}

static void
intel_ntb_peer_db_set(device_t dev, uint64_t bits)
{
	struct ntb_softc *ntb = device_get_softc(dev);
	uint64_t db;

	if ((bits & ~ntb->db_valid_mask) != 0) {
		device_printf(ntb->device, "Invalid doorbell bits %#jx\n",
		    (uintmax_t)bits);
		return;
	}

	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
		struct ntb_pci_bar_info *lapic;
		unsigned i;

		lapic = ntb->peer_lapic_bar;

		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
			if ((bits & intel_ntb_db_vector_mask(dev, i)) != 0)
				bus_space_write_4(lapic->pci_bus_tag,
				    lapic->pci_bus_handle,
				    ntb->peer_msix_data[i].nmd_ofs,
				    ntb->peer_msix_data[i].nmd_data);
		}
		return;
	}

	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
		intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bits);
		return;
	}

	if (ntb->type == NTB_XEON_GEN3) {
		while (bits != 0) {
			db = ffsll(bits);

			intel_ntb_reg_write(1,
			    ntb->peer_reg->db_bell + (db - 1) * 4, 0x1);

			bits = bits & (bits - 1);
		}
	} else {
		db_iowrite(ntb, ntb->peer_reg->db_bell, bits);
	}
}

static int
intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
{
	struct ntb_softc *ntb = device_get_softc(dev);
	struct ntb_pci_bar_info *bar;
	uint64_t regoff;

	KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));

	if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
		bar = &ntb->bar_info[NTB_CONFIG_BAR];
		regoff = ntb->peer_reg->db_bell;
	} else {
		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
		    ("invalid b2b idx"));

		bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
		regoff = XEON_PDOORBELL_OFFSET;
	}
	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));

	/* HACK: Specific to current x86 bus implementation. */
	*db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
	*db_size = ntb->reg->db_size;
	return (0);
}

static uint64_t
intel_ntb_db_valid_mask(device_t dev)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	return (ntb->db_valid_mask);
}

static int
intel_ntb_db_vector_count(device_t dev)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	return (ntb->db_vec_count);
}

static uint64_t
intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	if (vector > ntb->db_vec_count)
		return (0);
	return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
}

static bool
intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
{
	struct ntb_softc *ntb = device_get_softc(dev);

	if (speed != NULL)
		*speed = intel_ntb_link_sta_speed(ntb);
	if (width != NULL)
		*width = intel_ntb_link_sta_width(ntb);
	return (link_is_up(ntb));
}

static void
save_bar_parameters(struct ntb_pci_bar_info *bar)
{

	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
	bar->pbase = rman_get_start(bar->pci_resource);
	bar->size = rman_get_size(bar->pci_resource);
	bar->vbase = rman_get_virtual(bar->pci_resource);
}

static device_method_t ntb_intel_methods[] = {
	/* Device interface */
	DEVMETHOD(device_probe,		intel_ntb_probe),
	DEVMETHOD(device_attach,	intel_ntb_attach),
	DEVMETHOD(device_detach,	intel_ntb_detach),
	/* Bus interface */
	DEVMETHOD(bus_child_location_str, ntb_child_location_str),
	DEVMETHOD(bus_print_child,	ntb_print_child),
	DEVMETHOD(bus_get_dma_tag,	ntb_get_dma_tag),
	/* NTB interface */
	DEVMETHOD(ntb_port_number,	intel_ntb_port_number),
	DEVMETHOD(ntb_peer_port_count,	intel_ntb_peer_port_count),
	DEVMETHOD(ntb_peer_port_number,	intel_ntb_peer_port_number),
	DEVMETHOD(ntb_peer_port_idx, 	intel_ntb_peer_port_idx),
	DEVMETHOD(ntb_link_is_up,	intel_ntb_link_is_up),
	DEVMETHOD(ntb_link_enable,	intel_ntb_link_enable),
	DEVMETHOD(ntb_link_disable,	intel_ntb_link_disable),
	DEVMETHOD(ntb_link_enabled,	intel_ntb_link_enabled),
	DEVMETHOD(ntb_mw_count,		intel_ntb_mw_count),
	DEVMETHOD(ntb_mw_get_range,	intel_ntb_mw_get_range),
	DEVMETHOD(ntb_mw_set_trans,	intel_ntb_mw_set_trans),
	DEVMETHOD(ntb_mw_clear_trans,	intel_ntb_mw_clear_trans),
	DEVMETHOD(ntb_mw_get_wc,	intel_ntb_mw_get_wc),
	DEVMETHOD(ntb_mw_set_wc,	intel_ntb_mw_set_wc),
	DEVMETHOD(ntb_spad_count,	intel_ntb_spad_count),
	DEVMETHOD(ntb_spad_clear,	intel_ntb_spad_clear),
	DEVMETHOD(ntb_spad_write,	intel_ntb_spad_write),
	DEVMETHOD(ntb_spad_read,	intel_ntb_spad_read),
	DEVMETHOD(ntb_peer_spad_write,	intel_ntb_peer_spad_write),
	DEVMETHOD(ntb_peer_spad_read,	intel_ntb_peer_spad_read),
	DEVMETHOD(ntb_db_valid_mask,	intel_ntb_db_valid_mask),
	DEVMETHOD(ntb_db_vector_count,	intel_ntb_db_vector_count),
	DEVMETHOD(ntb_db_vector_mask,	intel_ntb_db_vector_mask),
	DEVMETHOD(ntb_db_clear,		intel_ntb_db_clear),
	DEVMETHOD(ntb_db_clear_mask,	intel_ntb_db_clear_mask),
	DEVMETHOD(ntb_db_read,		intel_ntb_db_read),
	DEVMETHOD(ntb_db_set_mask,	intel_ntb_db_set_mask),
	DEVMETHOD(ntb_peer_db_addr,	intel_ntb_peer_db_addr),
	DEVMETHOD(ntb_peer_db_set,	intel_ntb_peer_db_set),
	DEVMETHOD_END
};

static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
    sizeof(struct ntb_softc));
DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1);
MODULE_VERSION(ntb_hw_intel, 1);
MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ntb_hw_intel, pci_ids,
    nitems(pci_ids));