xref: /freebsd/sys/dev/ntb/ntb_hw/ntb_hw_intel.c (revision a90b9d0159070121c221b966469c3e36d912bf82)
1 /*-
2  * Copyright (c) 2016-2017 Alexander Motin <mav@FreeBSD.org>
3  * Copyright (C) 2013 Intel Corporation
4  * Copyright (C) 2015 EMC Corporation
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31  * two or more systems using a PCI-e links, providing remote memory access.
32  *
33  * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
34  *
35  * NOTE: Much of the code in this module is shared with Linux. Any patches may
36  * be picked up and redistributed in Linux with a dual GPL/BSD license.
37  */
38 
39 #include <sys/param.h>
40 #include <sys/kernel.h>
41 #include <sys/systm.h>
42 #include <sys/bus.h>
43 #include <sys/endian.h>
44 #include <sys/interrupt.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/module.h>
48 #include <sys/mutex.h>
49 #include <sys/pciio.h>
50 #include <sys/taskqueue.h>
51 #include <sys/tree.h>
52 #include <sys/queue.h>
53 #include <sys/rman.h>
54 #include <sys/sbuf.h>
55 #include <sys/sysctl.h>
56 #include <vm/vm.h>
57 #include <vm/pmap.h>
58 #include <machine/bus.h>
59 #include <machine/intr_machdep.h>
60 #include <machine/resource.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/iommu/iommu.h>
64 
65 #include "ntb_hw_intel.h"
66 #include "../ntb.h"
67 
68 #define MAX_MSIX_INTERRUPTS	\
69 	MAX(MAX(XEON_DB_COUNT, ATOM_DB_COUNT), XEON_GEN3_DB_COUNT)
70 
71 #define NTB_HB_TIMEOUT		1 /* second */
72 #define ATOM_LINK_RECOVERY_TIME	500 /* ms */
73 #define BAR_HIGH_MASK		(~((1ull << 12) - 1))
74 
75 #define	NTB_MSIX_VER_GUARD	0xaabbccdd
76 #define	NTB_MSIX_RECEIVED	0xe0f0e0f0
77 
78 /*
79  * PCI constants could be somewhere more generic, but aren't defined/used in
80  * pci.c.
81  */
82 #define	PCI_MSIX_ENTRY_SIZE		16
83 #define	PCI_MSIX_ENTRY_LOWER_ADDR	0
84 #define	PCI_MSIX_ENTRY_UPPER_ADDR	4
85 #define	PCI_MSIX_ENTRY_DATA		8
86 
87 enum ntb_device_type {
88 	NTB_XEON_GEN1,
89 	NTB_XEON_GEN3,
90 	NTB_XEON_GEN4,
91 	NTB_ATOM
92 };
93 
94 /* ntb_conn_type are hardware numbers, cannot change. */
95 enum ntb_conn_type {
96 	NTB_CONN_TRANSPARENT = 0,
97 	NTB_CONN_B2B = 1,
98 	NTB_CONN_RP = 2,
99 };
100 
101 enum ntb_b2b_direction {
102 	NTB_DEV_USD = 0,
103 	NTB_DEV_DSD = 1,
104 };
105 
106 enum ntb_bar {
107 	NTB_CONFIG_BAR = 0,
108 	NTB_B2B_BAR_1,
109 	NTB_B2B_BAR_2,
110 	NTB_B2B_BAR_3,
111 	NTB_MAX_BARS
112 };
113 
114 enum {
115 	NTB_MSIX_GUARD = 0,
116 	NTB_MSIX_DATA0,
117 	NTB_MSIX_DATA1,
118 	NTB_MSIX_DATA2,
119 	NTB_MSIX_OFS0,
120 	NTB_MSIX_OFS1,
121 	NTB_MSIX_OFS2,
122 	NTB_MSIX_DONE,
123 	NTB_MAX_MSIX_SPAD
124 };
125 
126 /* Device features and workarounds */
127 #define HAS_FEATURE(ntb, feature)	\
128 	(((ntb)->features & (feature)) != 0)
129 
130 struct ntb_hw_info {
131 	uint32_t		device_id;
132 	const char		*desc;
133 	enum ntb_device_type	type;
134 	uint32_t		features;
135 };
136 
137 struct ntb_pci_bar_info {
138 	bus_space_tag_t		pci_bus_tag;
139 	bus_space_handle_t	pci_bus_handle;
140 	int			pci_resource_id;
141 	struct resource		*pci_resource;
142 	vm_paddr_t		pbase;
143 	caddr_t			vbase;
144 	vm_size_t		size;
145 	vm_memattr_t		map_mode;
146 
147 	/* Configuration register offsets */
148 	uint32_t		psz_off;
149 	uint32_t		ssz_off;
150 	uint32_t		pbarxlat_off;
151 };
152 
153 struct ntb_int_info {
154 	struct resource	*res;
155 	int		rid;
156 	void		*tag;
157 };
158 
159 struct ntb_vec {
160 	struct ntb_softc	*ntb;
161 	uint32_t		num;
162 	unsigned		masked;
163 };
164 
165 struct ntb_reg {
166 	uint32_t	ntb_ctl;
167 	uint32_t	lnk_sta;
168 	uint8_t		db_size;
169 	unsigned	mw_bar[NTB_MAX_BARS];
170 };
171 
172 struct ntb_alt_reg {
173 	uint32_t	db_bell;
174 	uint32_t	db_mask;
175 	uint32_t	db_clear;
176 	uint32_t	spad;
177 };
178 
179 struct ntb_xlat_reg {
180 	uint32_t	bar0_base;
181 	uint32_t	bar2_base;
182 	uint32_t	bar4_base;
183 	uint32_t	bar5_base;
184 
185 	uint32_t	bar2_xlat;
186 	uint32_t	bar4_xlat;
187 	uint32_t	bar5_xlat;
188 
189 	uint32_t	bar2_limit;
190 	uint32_t	bar4_limit;
191 	uint32_t	bar5_limit;
192 };
193 
194 struct ntb_b2b_addr {
195 	uint64_t	bar0_addr;
196 	uint64_t	bar2_addr64;
197 	uint64_t	bar4_addr64;
198 	uint64_t	bar4_addr32;
199 	uint64_t	bar5_addr32;
200 };
201 
202 struct ntb_msix_data {
203 	uint32_t	nmd_ofs;
204 	uint32_t	nmd_data;
205 };
206 
207 struct ntb_softc {
208 	/* ntb.c context. Do not move! Must go first! */
209 	void			*ntb_store;
210 
211 	device_t		device;
212 	enum ntb_device_type	type;
213 	uint32_t		features;
214 
215 	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
216 	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
217 	uint32_t		allocated_interrupts;
218 
219 	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
220 	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
221 	bool			peer_msix_good;
222 	bool			peer_msix_done;
223 	struct ntb_pci_bar_info	*peer_lapic_bar;
224 	struct callout		peer_msix_work;
225 
226 	bus_dma_tag_t		bar0_dma_tag;
227 	bus_dmamap_t		bar0_dma_map;
228 
229 	struct callout		heartbeat_timer;
230 	struct callout		lr_timer;
231 
232 	struct ntb_vec		*msix_vec;
233 
234 	uint32_t		ppd;
235 	enum ntb_conn_type	conn_type;
236 	enum ntb_b2b_direction	dev_type;
237 
238 	/* Offset of peer bar0 in B2B BAR */
239 	uint64_t			b2b_off;
240 	/* Memory window used to access peer bar0 */
241 #define B2B_MW_DISABLED			UINT8_MAX
242 	uint8_t				b2b_mw_idx;
243 	uint32_t			msix_xlat;
244 	uint8_t				msix_mw_idx;
245 
246 	uint8_t				mw_count;
247 	uint8_t				spad_count;
248 	uint8_t				db_count;
249 	uint8_t				db_vec_count;
250 	uint8_t				db_vec_shift;
251 
252 	/* Protects local db_mask. */
253 #define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
254 #define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
255 #define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
256 	struct mtx			db_mask_lock;
257 
258 	volatile uint32_t		ntb_ctl;
259 	volatile uint32_t		lnk_sta;
260 
261 	uint64_t			db_valid_mask;
262 	uint64_t			db_link_mask;
263 	uint64_t			db_mask;
264 	uint64_t			fake_db;	/* NTB_SB01BASE_LOCKUP*/
265 	uint64_t			force_db;	/* NTB_SB01BASE_LOCKUP*/
266 
267 	int				last_ts;	/* ticks @ last irq */
268 
269 	const struct ntb_reg		*reg;
270 	const struct ntb_alt_reg	*self_reg;
271 	const struct ntb_alt_reg	*peer_reg;
272 	const struct ntb_xlat_reg	*xlat_reg;
273 };
274 
275 #ifdef __i386__
276 static __inline uint64_t
277 bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
278     bus_size_t offset)
279 {
280 
281 	return (bus_space_read_4(tag, handle, offset) |
282 	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
283 }
284 
285 static __inline void
286 bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
287     bus_size_t offset, uint64_t val)
288 {
289 
290 	bus_space_write_4(tag, handle, offset, val);
291 	bus_space_write_4(tag, handle, offset + 4, val >> 32);
292 }
293 #endif
294 
295 #define intel_ntb_bar_read(SIZE, bar, offset) \
296 	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
297 	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
298 #define intel_ntb_bar_write(SIZE, bar, offset, val) \
299 	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
300 	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
301 #define intel_ntb_reg_read(SIZE, offset) \
302 	    intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
303 #define intel_ntb_reg_write(SIZE, offset, val) \
304 	    intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
305 #define intel_ntb_mw_read(SIZE, offset) \
306 	    intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
307 		offset)
308 #define intel_ntb_mw_write(SIZE, offset, val) \
309 	    intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
310 		offset, val)
311 
312 static int intel_ntb_probe(device_t device);
313 static int intel_ntb_attach(device_t device);
314 static int intel_ntb_detach(device_t device);
315 static uint64_t intel_ntb_db_valid_mask(device_t dev);
316 static void intel_ntb_spad_clear(device_t dev);
317 static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
318 static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
319     enum ntb_width *width);
320 static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
321     enum ntb_width width);
322 static int intel_ntb_link_disable(device_t dev);
323 static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
324 static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
325 
326 static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
327 static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
328 static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
329 static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
330     uint32_t *base, uint32_t *xlat, uint32_t *lmt);
331 static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
332 static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
333     vm_memattr_t);
334 static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
335     const char *);
336 static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
337 static int map_memory_window_bar(struct ntb_softc *ntb,
338     struct ntb_pci_bar_info *bar);
339 static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
340 static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
341 static int intel_ntb_init_isr(struct ntb_softc *ntb);
342 static int intel_ntb_xeon_gen3_init_isr(struct ntb_softc *ntb);
343 static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
344 static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
345 static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
346 static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
347 static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
348 static void ndev_vec_isr(void *arg);
349 static void ndev_irq_isr(void *arg);
350 static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
351 static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
352 static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
353 static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
354 static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
355 static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
356 static void intel_ntb_exchange_msix(void *);
357 static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
358 static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
359 static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
360 static int intel_ntb_detect_xeon_gen3(struct ntb_softc *ntb);
361 static int intel_ntb_detect_xeon_gen4(struct ntb_softc *ntb);
362 static int intel_ntb_detect_xeon_gen4_cfg(struct ntb_softc *ntb);
363 static int intel_ntb_detect_atom(struct ntb_softc *ntb);
364 static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
365 static int intel_ntb_xeon_gen3_init_dev(struct ntb_softc *ntb);
366 static int intel_ntb_xeon_gen4_init_dev(struct ntb_softc *ntb);
367 static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
368 static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
369 static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
370 static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
371     enum ntb_bar regbar);
372 static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
373     uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
374 static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
375     enum ntb_bar idx);
376 static int xeon_setup_b2b_mw(struct ntb_softc *,
377     const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
378 static int xeon_gen3_setup_b2b_mw(struct ntb_softc *);
379 static int xeon_gen4_setup_b2b_mw(struct ntb_softc *);
380 static int intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr,
381     size_t size);
382 static inline bool link_is_up(struct ntb_softc *ntb);
383 static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
384 static inline bool atom_link_is_err(struct ntb_softc *ntb);
385 static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
386 static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
387 static void atom_link_hb(void *arg);
388 static void recover_atom_link(void *arg);
389 static bool intel_ntb_poll_link(struct ntb_softc *ntb);
390 static void save_bar_parameters(struct ntb_pci_bar_info *bar);
391 static void intel_ntb_sysctl_init(struct ntb_softc *);
392 static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
393 static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
394 static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
395 static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
396 static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
397 
398 static unsigned g_ntb_hw_debug_level;
399 SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
400     &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
401 #define intel_ntb_printf(lvl, ...) do {				\
402 	if ((lvl) <= g_ntb_hw_debug_level) {			\
403 		device_printf(ntb->device, __VA_ARGS__);	\
404 	}							\
405 } while (0)
406 
407 #define	_NTB_PAT_UC	0
408 #define	_NTB_PAT_WC	1
409 #define	_NTB_PAT_WT	4
410 #define	_NTB_PAT_WP	5
411 #define	_NTB_PAT_WB	6
412 #define	_NTB_PAT_UCM	7
413 static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
414 SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
415     &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
416     "UC: "  __XSTRING(_NTB_PAT_UC) ", "
417     "WC: "  __XSTRING(_NTB_PAT_WC) ", "
418     "WT: "  __XSTRING(_NTB_PAT_WT) ", "
419     "WP: "  __XSTRING(_NTB_PAT_WP) ", "
420     "WB: "  __XSTRING(_NTB_PAT_WB) ", "
421     "UC-: " __XSTRING(_NTB_PAT_UCM));
422 
423 static inline vm_memattr_t
424 intel_ntb_pat_flags(void)
425 {
426 
427 	switch (g_ntb_mw_pat) {
428 	case _NTB_PAT_WC:
429 		return (VM_MEMATTR_WRITE_COMBINING);
430 	case _NTB_PAT_WT:
431 		return (VM_MEMATTR_WRITE_THROUGH);
432 	case _NTB_PAT_WP:
433 		return (VM_MEMATTR_WRITE_PROTECTED);
434 	case _NTB_PAT_WB:
435 		return (VM_MEMATTR_WRITE_BACK);
436 	case _NTB_PAT_UCM:
437 		return (VM_MEMATTR_WEAK_UNCACHEABLE);
438 	case _NTB_PAT_UC:
439 		/* FALLTHROUGH */
440 	default:
441 		return (VM_MEMATTR_UNCACHEABLE);
442 	}
443 }
444 
445 /*
446  * Well, this obviously doesn't belong here, but it doesn't seem to exist
447  * anywhere better yet.
448  */
449 static inline const char *
450 intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
451 {
452 
453 	switch (pat) {
454 	case VM_MEMATTR_WRITE_COMBINING:
455 		return ("WRITE_COMBINING");
456 	case VM_MEMATTR_WRITE_THROUGH:
457 		return ("WRITE_THROUGH");
458 	case VM_MEMATTR_WRITE_PROTECTED:
459 		return ("WRITE_PROTECTED");
460 	case VM_MEMATTR_WRITE_BACK:
461 		return ("WRITE_BACK");
462 	case VM_MEMATTR_WEAK_UNCACHEABLE:
463 		return ("UNCACHED");
464 	case VM_MEMATTR_UNCACHEABLE:
465 		return ("UNCACHEABLE");
466 	default:
467 		return ("UNKNOWN");
468 	}
469 }
470 
471 static int g_ntb_msix_idx = 1;
472 SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
473     0, "Use this memory window to access the peer MSIX message complex on "
474     "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
475     "Like b2b_mw_idx, negative values index from the last available memory "
476     "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
477 
478 static int g_ntb_mw_idx = -1;
479 SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
480     0, "Use this memory window to access the peer NTB registers.  A "
481     "non-negative value starts from the first MW index; a negative value "
482     "starts from the last MW index.  The default is -1, i.e., the last "
483     "available memory window.  Both sides of the NTB MUST set the same "
484     "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
485 
486 /* Hardware owns the low 16 bits of features. */
487 #define NTB_BAR_SIZE_4K		(1 << 0)
488 #define NTB_SDOORBELL_LOCKUP	(1 << 1)
489 #define NTB_SB01BASE_LOCKUP	(1 << 2)
490 #define NTB_B2BDOORBELL_BIT14	(1 << 3)
491 #define NTB_BAR_ALIGN		(1 << 4)
492 #define NTB_LTR_BAD			(1 << 5)
493 /* Software/configuration owns the top 16 bits. */
494 #define NTB_SPLIT_BAR		(1ull << 16)
495 #define NTB_ONE_MSIX		(1ull << 17)
496 
497 #define NTB_FEATURES_STR \
498     "\20\21SPLIT_BAR4\06LTR_BAD\05BAR_ALIGN"  \
499 	"\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
500     "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
501 
502 static struct ntb_hw_info pci_ids[] = {
503 	/* XXX: PS/SS IDs left out until they are supported. */
504 	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
505 		NTB_ATOM, 0 },
506 
507 	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
508 		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
509 	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
510 		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
511 	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B",
512 		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
513 		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
514 	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B",
515 		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
516 		    NTB_SB01BASE_LOCKUP },
517 	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B",
518 		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
519 		    NTB_SB01BASE_LOCKUP },
520 
521 	{ 0x201C8086, "SKL Xeon E5 V5 Non-Transparent Bridge B2B",
522 		NTB_XEON_GEN3, 0 },
523 
524 	{ 0x347e8086, "ICX/SPR Xeon Non-Transparent Bridge B2B",
525 	    NTB_XEON_GEN4, 0 },
526 };
527 
528 static const struct ntb_reg atom_reg = {
529 	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
530 	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
531 	.db_size = sizeof(uint64_t),
532 	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
533 };
534 
535 static const struct ntb_alt_reg atom_pri_reg = {
536 	.db_bell = ATOM_PDOORBELL_OFFSET,
537 	.db_mask = ATOM_PDBMSK_OFFSET,
538 	.spad = ATOM_SPAD_OFFSET,
539 };
540 
541 static const struct ntb_alt_reg atom_b2b_reg = {
542 	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
543 	.spad = ATOM_B2B_SPAD_OFFSET,
544 };
545 
546 static const struct ntb_xlat_reg atom_sec_xlat = {
547 #if 0
548 	/* "FIXME" says the Linux driver. */
549 	.bar0_base = ATOM_SBAR0BASE_OFFSET,
550 	.bar2_base = ATOM_SBAR2BASE_OFFSET,
551 	.bar4_base = ATOM_SBAR4BASE_OFFSET,
552 
553 	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
554 	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
555 #endif
556 
557 	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
558 	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
559 };
560 
561 static const struct ntb_reg xeon_reg = {
562 	.ntb_ctl = XEON_NTBCNTL_OFFSET,
563 	.lnk_sta = XEON_LINK_STATUS_OFFSET,
564 	.db_size = sizeof(uint16_t),
565 	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
566 };
567 
568 static const struct ntb_alt_reg xeon_pri_reg = {
569 	.db_bell = XEON_PDOORBELL_OFFSET,
570 	.db_mask = XEON_PDBMSK_OFFSET,
571 	.spad = XEON_SPAD_OFFSET,
572 };
573 
574 static const struct ntb_alt_reg xeon_b2b_reg = {
575 	.db_bell = XEON_B2B_DOORBELL_OFFSET,
576 	.spad = XEON_B2B_SPAD_OFFSET,
577 };
578 
579 static const struct ntb_xlat_reg xeon_sec_xlat = {
580 	.bar0_base = XEON_SBAR0BASE_OFFSET,
581 	.bar2_base = XEON_SBAR2BASE_OFFSET,
582 	.bar4_base = XEON_SBAR4BASE_OFFSET,
583 	.bar5_base = XEON_SBAR5BASE_OFFSET,
584 
585 	.bar2_limit = XEON_SBAR2LMT_OFFSET,
586 	.bar4_limit = XEON_SBAR4LMT_OFFSET,
587 	.bar5_limit = XEON_SBAR5LMT_OFFSET,
588 
589 	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
590 	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
591 	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
592 };
593 
594 static struct ntb_b2b_addr xeon_b2b_usd_addr = {
595 	.bar0_addr = XEON_B2B_BAR0_ADDR,
596 	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
597 	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
598 	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
599 	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
600 };
601 
602 static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
603 	.bar0_addr = XEON_B2B_BAR0_ADDR,
604 	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
605 	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
606 	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
607 	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
608 };
609 
610 static const struct ntb_reg xeon_gen3_reg = {
611 	.ntb_ctl = XEON_GEN3_REG_IMNTB_CTRL,
612 	.lnk_sta = XEON_GEN3_INT_LNK_STS_OFFSET,
613 	.db_size = sizeof(uint32_t),
614 	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
615 };
616 
617 static const struct ntb_alt_reg xeon_gen3_pri_reg = {
618 	.db_bell = XEON_GEN3_REG_EMDOORBELL,
619 	.db_mask = XEON_GEN3_REG_IMINT_DISABLE,
620 	.spad = XEON_GEN3_REG_IMSPAD,
621 };
622 
623 static const struct ntb_alt_reg xeon_gen3_b2b_reg = {
624 	.db_bell = XEON_GEN3_REG_IMDOORBELL,
625 	.db_mask = XEON_GEN3_REG_EMINT_DISABLE,
626 	.spad = XEON_GEN3_REG_IMB2B_SSPAD,
627 };
628 
629 static const struct ntb_xlat_reg xeon_gen3_sec_xlat = {
630 	.bar0_base = XEON_GEN3_EXT_REG_BAR0BASE,
631 	.bar2_base = XEON_GEN3_EXT_REG_BAR1BASE,
632 	.bar4_base = XEON_GEN3_EXT_REG_BAR2BASE,
633 
634 	.bar2_limit = XEON_GEN3_REG_IMBAR1XLIMIT,
635 	.bar4_limit = XEON_GEN3_REG_IMBAR2XLIMIT,
636 
637 	.bar2_xlat = XEON_GEN3_REG_IMBAR1XBASE,
638 	.bar4_xlat = XEON_GEN3_REG_IMBAR2XBASE,
639 };
640 
641 static const struct ntb_reg xeon_gen4_reg = {
642 	.ntb_ctl = XEON_GEN4_REG_IMNTB_CTL,
643 	.lnk_sta = XEON_GEN4_REG_LINK_STATUS, /* mmio */
644 	.db_size = sizeof(uint32_t),
645 	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
646 };
647 
648 static const struct ntb_alt_reg xeon_gen4_pri_reg = {
649 	.db_clear = XEON_GEN4_REG_IMINT_STATUS,
650 	.db_mask = XEON_GEN4_REG_IMINT_DISABLE,
651 	.spad = XEON_GEN4_REG_IMSPAD,
652 };
653 
654 static const struct ntb_alt_reg xeon_gen4_b2b_reg = {
655 	.db_bell = XEON_GEN4_REG_IMDOORBELL,
656 	.spad = XEON_GEN4_REG_EMSPAD,
657 };
658 
659 static const struct ntb_xlat_reg xeon_gen4_sec_xlat = {
660 	.bar2_limit = XEON_GEN4_REG_IMBAR1XLIMIT,
661 	.bar2_xlat = XEON_GEN4_REG_IMBAR1XBASE,
662 
663 	.bar4_limit = XEON_GEN4_REG_IMBAR1XLIMIT,
664 	.bar4_xlat = XEON_GEN4_REG_IMBAR2XBASE,
665 };
666 
667 SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
668     "B2B MW segment overrides -- MUST be the same on both sides");
669 
670 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
671     &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
672     "hardware, use this 64-bit address on the bus between the NTB devices for "
673     "the window at BAR2, on the upstream side of the link.  MUST be the same "
674     "address on both sides.");
675 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
676     &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
677 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
678     &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
679     "(split-BAR mode).");
680 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
681     &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
682     "(split-BAR mode).");
683 
684 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
685     &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
686     "hardware, use this 64-bit address on the bus between the NTB devices for "
687     "the window at BAR2, on the downstream side of the link.  MUST be the same"
688     " address on both sides.");
689 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
690     &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
691 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
692     &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
693     "(split-BAR mode).");
694 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
695     &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
696     "(split-BAR mode).");
697 
698 /*
699  * OS <-> Driver interface structures
700  */
701 MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
702 
703 /*
704  * OS <-> Driver linkage functions
705  */
706 static int
707 intel_ntb_probe(device_t device)
708 {
709 	struct ntb_hw_info *p;
710 
711 	p = intel_ntb_get_device_info(pci_get_devid(device));
712 	if (p == NULL)
713 		return (ENXIO);
714 
715 	device_set_desc(device, p->desc);
716 	return (0);
717 }
718 
719 static int
720 intel_ntb_attach(device_t device)
721 {
722 	struct ntb_softc *ntb;
723 	struct ntb_hw_info *p;
724 	int error;
725 
726 	ntb = device_get_softc(device);
727 	p = intel_ntb_get_device_info(pci_get_devid(device));
728 
729 	ntb->device = device;
730 	ntb->type = p->type;
731 	ntb->features = p->features;
732 	ntb->b2b_mw_idx = B2B_MW_DISABLED;
733 	ntb->msix_mw_idx = B2B_MW_DISABLED;
734 
735 	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
736 	callout_init(&ntb->heartbeat_timer, 1);
737 	callout_init(&ntb->lr_timer, 1);
738 	callout_init(&ntb->peer_msix_work, 1);
739 	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
740 
741 	if (ntb->type == NTB_ATOM)
742 		error = intel_ntb_detect_atom(ntb);
743 	else if (ntb->type == NTB_XEON_GEN3)
744 		error = intel_ntb_detect_xeon_gen3(ntb);
745 	else if (ntb->type == NTB_XEON_GEN4)
746 		error = intel_ntb_detect_xeon_gen4(ntb);
747 	else
748 		error = intel_ntb_detect_xeon(ntb);
749 	if (error != 0)
750 		goto out;
751 
752 	intel_ntb_detect_max_mw(ntb);
753 
754 	pci_enable_busmaster(ntb->device);
755 
756 	error = intel_ntb_map_pci_bars(ntb);
757 	if (error != 0)
758 		goto out;
759 	if (ntb->type == NTB_ATOM)
760 		error = intel_ntb_atom_init_dev(ntb);
761 	else if (ntb->type == NTB_XEON_GEN3)
762 		error = intel_ntb_xeon_gen3_init_dev(ntb);
763 	else if (ntb->type == NTB_XEON_GEN4)
764 		error = intel_ntb_xeon_gen4_init_dev(ntb);
765 	else
766 		error = intel_ntb_xeon_init_dev(ntb);
767 	if (error != 0)
768 		goto out;
769 
770 	intel_ntb_spad_clear(device);
771 
772 	intel_ntb_poll_link(ntb);
773 
774 	intel_ntb_sysctl_init(ntb);
775 
776 	/* Attach children to this controller */
777 	error = ntb_register_device(device);
778 
779 out:
780 	if (error != 0)
781 		intel_ntb_detach(device);
782 	return (error);
783 }
784 
785 static int
786 intel_ntb_detach(device_t device)
787 {
788 	struct ntb_softc *ntb;
789 
790 	ntb = device_get_softc(device);
791 
792 	/* Detach & delete all children */
793 	ntb_unregister_device(device);
794 
795 	if (ntb->self_reg != NULL) {
796 		DB_MASK_LOCK(ntb);
797 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
798 		DB_MASK_UNLOCK(ntb);
799 	}
800 	callout_drain(&ntb->heartbeat_timer);
801 	callout_drain(&ntb->lr_timer);
802 	callout_drain(&ntb->peer_msix_work);
803 	pci_disable_busmaster(ntb->device);
804 	if (ntb->type == NTB_XEON_GEN1)
805 		intel_ntb_teardown_xeon(ntb);
806 	intel_ntb_teardown_interrupts(ntb);
807 
808 	mtx_destroy(&ntb->db_mask_lock);
809 
810 	intel_ntb_unmap_pci_bar(ntb);
811 
812 	return (0);
813 }
814 
815 /*
816  * Driver internal routines
817  */
818 static inline enum ntb_bar
819 intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
820 {
821 
822 	KASSERT(mw < ntb->mw_count,
823 	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
824 	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
825 
826 	return (ntb->reg->mw_bar[mw]);
827 }
828 
829 static inline bool
830 bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
831 {
832 	/* XXX This assertion could be stronger. */
833 	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
834 	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
835 }
836 
837 static inline void
838 bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
839     uint32_t *xlat, uint32_t *lmt)
840 {
841 	uint32_t basev, lmtv, xlatv;
842 
843 	switch (bar) {
844 	case NTB_B2B_BAR_1:
845 		basev = ntb->xlat_reg->bar2_base;
846 		lmtv = ntb->xlat_reg->bar2_limit;
847 		xlatv = ntb->xlat_reg->bar2_xlat;
848 		break;
849 	case NTB_B2B_BAR_2:
850 		basev = ntb->xlat_reg->bar4_base;
851 		lmtv = ntb->xlat_reg->bar4_limit;
852 		xlatv = ntb->xlat_reg->bar4_xlat;
853 		break;
854 	case NTB_B2B_BAR_3:
855 		basev = ntb->xlat_reg->bar5_base;
856 		lmtv = ntb->xlat_reg->bar5_limit;
857 		xlatv = ntb->xlat_reg->bar5_xlat;
858 		break;
859 	default:
860 		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
861 		    ("bad bar"));
862 		basev = lmtv = xlatv = 0;
863 		break;
864 	}
865 
866 	if (base != NULL)
867 		*base = basev;
868 	if (xlat != NULL)
869 		*xlat = xlatv;
870 	if (lmt != NULL)
871 		*lmt = lmtv;
872 }
873 
874 static int
875 intel_ntb_map_pci_bars(struct ntb_softc *ntb)
876 {
877 	struct ntb_pci_bar_info *bar;
878 	int rc;
879 
880 	bar = &ntb->bar_info[NTB_CONFIG_BAR];
881 	bar->pci_resource_id = PCIR_BAR(0);
882 	rc = map_mmr_bar(ntb, bar);
883 	if (rc != 0)
884 		goto out;
885 
886 	/*
887 	 * At least on Xeon v4 NTB device leaks to host some remote side
888 	 * BAR0 writes supposed to update scratchpad registers.  I am not
889 	 * sure why it happens, but it may be related to the fact that
890 	 * on a link side BAR0 is 32KB, while on a host side it is 64KB.
891 	 * Without this hack DMAR blocks those accesses as not allowed.
892 	 */
893 	if (bus_dma_tag_create(bus_get_dma_tag(ntb->device), 1, 0,
894 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
895 	    bar->size, 1, bar->size, 0, NULL, NULL, &ntb->bar0_dma_tag)) {
896 		device_printf(ntb->device, "Unable to create BAR0 tag\n");
897 		return (ENOMEM);
898 	}
899 	if (bus_dmamap_create(ntb->bar0_dma_tag, 0, &ntb->bar0_dma_map)) {
900 		device_printf(ntb->device, "Unable to create BAR0 map\n");
901 		return (ENOMEM);
902 	}
903 	if (bus_dma_iommu_load_ident(ntb->bar0_dma_tag, ntb->bar0_dma_map,
904 	    bar->pbase, bar->size, 0)) {
905 		device_printf(ntb->device, "Unable to load BAR0 map\n");
906 		return (ENOMEM);
907 	}
908 
909 	bar = &ntb->bar_info[NTB_B2B_BAR_1];
910 	bar->pci_resource_id = PCIR_BAR(2);
911 	rc = map_memory_window_bar(ntb, bar);
912 	if (rc != 0)
913 		goto out;
914 	if (ntb->type == NTB_XEON_GEN3) {
915 		bar->psz_off = XEON_GEN3_INT_REG_IMBAR1SZ;
916 		bar->ssz_off = XEON_GEN3_INT_REG_EMBAR1SZ;
917 		bar->pbarxlat_off = XEON_GEN3_REG_EMBAR1XBASE;
918 	} else if (ntb->type == NTB_XEON_GEN4) {
919 		bar->psz_off = XEON_GEN4_CFG_REG_IMBAR1SZ;
920 		bar->ssz_off = XEON_GEN4_CFG_REG_EMBAR1SZ;
921 		bar->pbarxlat_off = XEON_GEN4_REG_EXT_BAR1BASE;
922 	} else {
923 		bar->psz_off = XEON_PBAR23SZ_OFFSET;
924 		bar->ssz_off = XEON_SBAR23SZ_OFFSET;
925 		bar->pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
926 	}
927 
928 	bar = &ntb->bar_info[NTB_B2B_BAR_2];
929 	bar->pci_resource_id = PCIR_BAR(4);
930 	rc = map_memory_window_bar(ntb, bar);
931 	if (rc != 0)
932 		goto out;
933 	if (ntb->type == NTB_XEON_GEN3) {
934 		bar->psz_off = XEON_GEN3_INT_REG_IMBAR2SZ;
935 		bar->ssz_off = XEON_GEN3_INT_REG_EMBAR2SZ;
936 		bar->pbarxlat_off = XEON_GEN3_REG_EMBAR2XBASE;
937 	} else if (ntb->type == NTB_XEON_GEN4) {
938 		bar->psz_off = XEON_GEN4_CFG_REG_IMBAR2SZ;
939 		bar->ssz_off = XEON_GEN4_CFG_REG_EMBAR2SZ;
940 		bar->pbarxlat_off = XEON_GEN4_REG_EXT_BAR2BASE;
941 	} else {
942 		bar->psz_off = XEON_PBAR4SZ_OFFSET;
943 		bar->ssz_off = XEON_SBAR4SZ_OFFSET;
944 		bar->pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
945 	}
946 
947 	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
948 		goto out;
949 
950 	if (ntb->type == NTB_XEON_GEN3 ||
951 	    ntb->type == NTB_XEON_GEN4) {
952 		device_printf(ntb->device, "no split bar support\n");
953 		return (ENXIO);
954 	}
955 
956 	bar = &ntb->bar_info[NTB_B2B_BAR_3];
957 	bar->pci_resource_id = PCIR_BAR(5);
958 	rc = map_memory_window_bar(ntb, bar);
959 	bar->psz_off = XEON_PBAR5SZ_OFFSET;
960 	bar->ssz_off = XEON_SBAR5SZ_OFFSET;
961 	bar->pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
962 
963 out:
964 	if (rc != 0)
965 		device_printf(ntb->device,
966 		    "unable to allocate pci resource\n");
967 	return (rc);
968 }
969 
970 static void
971 print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
972     const char *kind)
973 {
974 
975 	device_printf(ntb->device,
976 	    "Mapped BAR%d v:[%p-%p] p:[0x%jx-0x%jx] (0x%jx bytes) (%s)\n",
977 	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
978 	    (char *)bar->vbase + bar->size - 1,
979 	    (uintmax_t)bar->pbase, (uintmax_t)(bar->pbase + bar->size - 1),
980 	    (uintmax_t)bar->size, kind);
981 }
982 
983 static int
984 map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
985 {
986 
987 	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
988 	    &bar->pci_resource_id, RF_ACTIVE);
989 	if (bar->pci_resource == NULL)
990 		return (ENXIO);
991 
992 	save_bar_parameters(bar);
993 	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
994 	print_map_success(ntb, bar, "mmr");
995 	return (0);
996 }
997 
998 static int
999 map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
1000 {
1001 	int rc;
1002 	vm_memattr_t mapmode;
1003 	uint8_t bar_size_bits = 0;
1004 
1005 	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
1006 	    &bar->pci_resource_id, RF_ACTIVE);
1007 
1008 	if (bar->pci_resource == NULL)
1009 		return (ENXIO);
1010 
1011 	save_bar_parameters(bar);
1012 	/*
1013 	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
1014 	 * hardware issue. To work around this, query the size it should be
1015 	 * configured to by the device and modify the resource to correspond to
1016 	 * this new size. The BIOS on systems with this problem is required to
1017 	 * provide enough address space to allow the driver to make this change
1018 	 * safely.
1019 	 *
1020 	 * Ideally I could have just specified the size when I allocated the
1021 	 * resource like:
1022 	 *  bus_alloc_resource(ntb->device,
1023 	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
1024 	 *	1ul << bar_size_bits, RF_ACTIVE);
1025 	 * but the PCI driver does not honor the size in this call, so we have
1026 	 * to modify it after the fact.
1027 	 */
1028 	if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
1029 		if (bar->pci_resource_id == PCIR_BAR(2))
1030 			bar_size_bits = pci_read_config(ntb->device,
1031 			    XEON_PBAR23SZ_OFFSET, 1);
1032 		else
1033 			bar_size_bits = pci_read_config(ntb->device,
1034 			    XEON_PBAR45SZ_OFFSET, 1);
1035 
1036 		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
1037 		    bar->pci_resource, bar->pbase,
1038 		    bar->pbase + (1ul << bar_size_bits) - 1);
1039 		if (rc != 0) {
1040 			device_printf(ntb->device,
1041 			    "unable to resize bar\n");
1042 			return (rc);
1043 		}
1044 
1045 		save_bar_parameters(bar);
1046 	}
1047 
1048 	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
1049 	print_map_success(ntb, bar, "mw");
1050 
1051 	/*
1052 	 * Optionally, mark MW BARs as anything other than UC to improve
1053 	 * performance.
1054 	 */
1055 	mapmode = intel_ntb_pat_flags();
1056 	if (mapmode == bar->map_mode)
1057 		return (0);
1058 
1059 	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
1060 	if (rc == 0) {
1061 		bar->map_mode = mapmode;
1062 		device_printf(ntb->device,
1063 		    "Marked BAR%d v:[%p-%p] p:[0x%jx-0x%jx] as "
1064 		    "%s.\n",
1065 		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
1066 		    (char *)bar->vbase + bar->size - 1,
1067 		    (uintmax_t)bar->pbase,
1068 		    (uintmax_t)(bar->pbase + bar->size - 1),
1069 		    intel_ntb_vm_memattr_to_str(mapmode));
1070 	} else
1071 		device_printf(ntb->device,
1072 		    "Unable to mark BAR%d v:[%p-%p] p:[0x%jx-0x%jx] as "
1073 		    "%s: %d\n",
1074 		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
1075 		    (char *)bar->vbase + bar->size - 1,
1076 		    (uintmax_t)bar->pbase,
1077 		    (uintmax_t)(bar->pbase + bar->size - 1),
1078 		    intel_ntb_vm_memattr_to_str(mapmode), rc);
1079 		/* Proceed anyway */
1080 	return (0);
1081 }
1082 
1083 static void
1084 intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
1085 {
1086 	struct ntb_pci_bar_info *bar;
1087 	int i;
1088 
1089 	if (ntb->bar0_dma_map != NULL) {
1090 		bus_dmamap_unload(ntb->bar0_dma_tag, ntb->bar0_dma_map);
1091 		bus_dmamap_destroy(ntb->bar0_dma_tag, ntb->bar0_dma_map);
1092 	}
1093 	if (ntb->bar0_dma_tag != NULL)
1094 		bus_dma_tag_destroy(ntb->bar0_dma_tag);
1095 	for (i = 0; i < NTB_MAX_BARS; i++) {
1096 		bar = &ntb->bar_info[i];
1097 		if (bar->pci_resource != NULL)
1098 			bus_release_resource(ntb->device, SYS_RES_MEMORY,
1099 			    bar->pci_resource_id, bar->pci_resource);
1100 	}
1101 }
1102 
1103 static int
1104 intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
1105 {
1106 	uint32_t i;
1107 	int rc;
1108 
1109 	for (i = 0; i < num_vectors; i++) {
1110 		ntb->int_info[i].rid = i + 1;
1111 		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
1112 		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
1113 		if (ntb->int_info[i].res == NULL) {
1114 			device_printf(ntb->device,
1115 			    "bus_alloc_resource failed\n");
1116 			return (ENOMEM);
1117 		}
1118 		ntb->int_info[i].tag = NULL;
1119 		ntb->allocated_interrupts++;
1120 		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
1121 		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
1122 		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
1123 		if (rc != 0) {
1124 			device_printf(ntb->device, "bus_setup_intr failed\n");
1125 			return (ENXIO);
1126 		}
1127 	}
1128 	return (0);
1129 }
1130 
1131 /*
1132  * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
1133  * cannot be allocated for each MSI-X message.  JHB seems to think remapping
1134  * should be okay.  This tunable should enable us to test that hypothesis
1135  * when someone gets their hands on some Xeon hardware.
1136  */
1137 static int ntb_force_remap_mode;
1138 SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
1139     &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
1140     " to a smaller number of ithreads, even if the desired number are "
1141     "available");
1142 
1143 /*
1144  * In case it is NOT ok, give consumers an abort button.
1145  */
1146 static int ntb_prefer_intx;
1147 SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
1148     &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
1149     "than remapping MSI-X messages over available slots (match Linux driver "
1150     "behavior)");
1151 
1152 /*
1153  * Remap the desired number of MSI-X messages to available ithreads in a simple
1154  * round-robin fashion.
1155  */
1156 static int
1157 intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
1158 {
1159 	u_int *vectors;
1160 	uint32_t i;
1161 	int rc;
1162 
1163 	if (ntb_prefer_intx != 0)
1164 		return (ENXIO);
1165 
1166 	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
1167 
1168 	for (i = 0; i < desired; i++)
1169 		vectors[i] = (i % avail) + 1;
1170 
1171 	rc = pci_remap_msix(dev, desired, vectors);
1172 	free(vectors, M_NTB);
1173 	return (rc);
1174 }
1175 
1176 static int
1177 intel_ntb_xeon_gen3_init_isr(struct ntb_softc *ntb)
1178 {
1179 	uint64_t i, reg;
1180 	uint32_t desired_vectors, num_vectors;
1181 	int rc;
1182 
1183 	ntb->allocated_interrupts = 0;
1184 	ntb->last_ts = ticks;
1185 
1186 	/* Mask all the interrupts, including hardware interrupt */
1187 	intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_DISABLE, ~0ULL);
1188 
1189 	/* Clear Interrupt Status */
1190 	reg = intel_ntb_reg_read(8, XEON_GEN3_REG_IMINT_STATUS);
1191 	intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_STATUS, reg);
1192 
1193 	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1194 	    XEON_GEN3_DB_MSIX_VECTOR_COUNT);
1195 
1196 	rc = pci_alloc_msix(ntb->device, &num_vectors);
1197 	if (rc != 0) {
1198 		device_printf(ntb->device,
1199 		    "Interrupt allocation failed %d\n", rc);
1200 		return (rc);
1201 	}
1202 	if (desired_vectors != num_vectors) {
1203 		device_printf(ntb->device, "Couldn't get %d vectors\n",
1204 		    XEON_GEN3_DB_MSIX_VECTOR_COUNT);
1205 		return (ENXIO);
1206 	}
1207 	/* 32 db + 1 hardware */
1208 	if (num_vectors == XEON_GEN3_DB_MSIX_VECTOR_COUNT) {
1209 		/* Program INTVECXX source register */
1210 		for (i = 0; i < XEON_GEN3_DB_MSIX_VECTOR_COUNT; i++) {
1211 			/* interrupt source i for vector i */
1212 			intel_ntb_reg_write(1, XEON_GEN3_REG_IMINTVEC00 + i, i);
1213 			if (i == (XEON_GEN3_DB_MSIX_VECTOR_COUNT - 1)) {
1214 				intel_ntb_reg_write(1,
1215 				    XEON_GEN3_REG_IMINTVEC00 + i,
1216 				    XEON_GEN3_LINK_VECTOR_INDEX);
1217 			}
1218 		}
1219 
1220 		intel_ntb_create_msix_vec(ntb, num_vectors);
1221 		rc = intel_ntb_setup_msix(ntb, num_vectors);
1222 
1223 		/* enable all interrupts */
1224 		intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_DISABLE, 0ULL);
1225 	} else {
1226 		device_printf(ntb->device, "need to remap interrupts, giving up.\n");
1227 		return (ENXIO);
1228 	}
1229 
1230 	return (rc);
1231 }
1232 
1233 static int
1234 intel_ntb_xeon_gen4_init_isr(struct ntb_softc *ntb)
1235 {
1236 	uint64_t i, reg;
1237 	uint32_t desired_vectors, num_vectors;
1238 	int rc;
1239 
1240 	ntb->allocated_interrupts = 0;
1241 	ntb->last_ts = ticks;
1242 
1243 	/* Mask all the interrupts, including hardware interrupt */
1244 	intel_ntb_reg_write(8, XEON_GEN4_REG_IMINT_DISABLE, ~0ULL);
1245 
1246 	/* Clear Interrupt Status */
1247 	reg = intel_ntb_reg_read(8, XEON_GEN4_REG_IMINT_STATUS);
1248 	intel_ntb_reg_write(8, XEON_GEN4_REG_IMINT_STATUS, reg);
1249 
1250 	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1251 	    XEON_GEN4_DB_MSIX_VECTOR_COUNT);
1252 
1253 	rc = pci_alloc_msix(ntb->device, &num_vectors);
1254 	if (rc != 0) {
1255 		device_printf(ntb->device,
1256 		    "Interrupt allocation failed %d\n", rc);
1257 		return (rc);
1258 	}
1259 	if (desired_vectors != num_vectors) {
1260 		device_printf(ntb->device, "Couldn't get %d vectors\n",
1261 		    XEON_GEN4_DB_MSIX_VECTOR_COUNT);
1262 		return (ENXIO);
1263 	}
1264 	if (num_vectors != XEON_GEN4_DB_MSIX_VECTOR_COUNT) {
1265 		device_printf(ntb->device,
1266 		    "Need to remap interrupts, giving up\n");
1267 		return (ENXIO);
1268 	}
1269 
1270 	/*
1271 	 * The MSIX vectors and the interrupt status bits are not lined up
1272 	 * on Gen3 (Skylake) and Gen4. By default the link status bit is bit
1273 	 * 32, however it is by default MSIX vector0. We need to fixup to
1274 	 * line them up. The vectors at reset is 1-32,0. We need to reprogram
1275 	 * to 0-32.
1276 	 */
1277 	for (i = 0; i < XEON_GEN4_DB_MSIX_VECTOR_COUNT; i++)
1278 		intel_ntb_reg_write(1, XEON_GEN4_REG_INTVEC + i, i);
1279 
1280 	intel_ntb_create_msix_vec(ntb, num_vectors);
1281 	rc = intel_ntb_setup_msix(ntb, num_vectors);
1282 
1283 	/* enable all interrupts */
1284 	intel_ntb_reg_write(8, XEON_GEN4_REG_IMINT_DISABLE, 0ULL);
1285 
1286 	return (rc);
1287 }
1288 
1289 static int
1290 intel_ntb_init_isr(struct ntb_softc *ntb)
1291 {
1292 	uint32_t desired_vectors, num_vectors;
1293 	int rc;
1294 
1295 	ntb->allocated_interrupts = 0;
1296 	ntb->last_ts = ticks;
1297 
1298 	/*
1299 	 * Mask all doorbell interrupts.  (Except link events!)
1300 	 */
1301 	DB_MASK_LOCK(ntb);
1302 	ntb->db_mask = ntb->db_valid_mask;
1303 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1304 	DB_MASK_UNLOCK(ntb);
1305 
1306 	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1307 	    ntb->db_count);
1308 	if (desired_vectors >= 1) {
1309 		rc = pci_alloc_msix(ntb->device, &num_vectors);
1310 
1311 		if (ntb_force_remap_mode != 0 && rc == 0 &&
1312 		    num_vectors == desired_vectors)
1313 			num_vectors--;
1314 
1315 		if (rc == 0 && num_vectors < desired_vectors) {
1316 			rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
1317 			    num_vectors);
1318 			if (rc == 0)
1319 				num_vectors = desired_vectors;
1320 			else
1321 				pci_release_msi(ntb->device);
1322 		}
1323 		if (rc != 0)
1324 			num_vectors = 1;
1325 	} else
1326 		num_vectors = 1;
1327 
1328 	if (ntb->type == NTB_XEON_GEN1 && num_vectors < ntb->db_vec_count) {
1329 		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1330 			device_printf(ntb->device,
1331 			    "Errata workaround does not support MSI or INTX\n");
1332 			return (EINVAL);
1333 		}
1334 
1335 		ntb->db_vec_count = 1;
1336 		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
1337 		rc = intel_ntb_setup_legacy_interrupt(ntb);
1338 	} else {
1339 		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
1340 		    HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1341 			device_printf(ntb->device,
1342 			    "Errata workaround expects %d doorbell bits\n",
1343 			    XEON_NONLINK_DB_MSIX_BITS);
1344 			return (EINVAL);
1345 		}
1346 
1347 		intel_ntb_create_msix_vec(ntb, num_vectors);
1348 		rc = intel_ntb_setup_msix(ntb, num_vectors);
1349 	}
1350 	if (rc != 0) {
1351 		device_printf(ntb->device,
1352 		    "Error allocating interrupts: %d\n", rc);
1353 		intel_ntb_free_msix_vec(ntb);
1354 	}
1355 
1356 	return (rc);
1357 }
1358 
1359 static int
1360 intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
1361 {
1362 	int rc;
1363 
1364 	ntb->int_info[0].rid = 0;
1365 	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
1366 	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
1367 	if (ntb->int_info[0].res == NULL) {
1368 		device_printf(ntb->device, "bus_alloc_resource failed\n");
1369 		return (ENOMEM);
1370 	}
1371 
1372 	ntb->int_info[0].tag = NULL;
1373 	ntb->allocated_interrupts = 1;
1374 
1375 	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
1376 	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
1377 	    ntb, &ntb->int_info[0].tag);
1378 	if (rc != 0) {
1379 		device_printf(ntb->device, "bus_setup_intr failed\n");
1380 		return (ENXIO);
1381 	}
1382 
1383 	return (0);
1384 }
1385 
1386 static void
1387 intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
1388 {
1389 	struct ntb_int_info *current_int;
1390 	int i;
1391 
1392 	for (i = 0; i < ntb->allocated_interrupts; i++) {
1393 		current_int = &ntb->int_info[i];
1394 		if (current_int->tag != NULL)
1395 			bus_teardown_intr(ntb->device, current_int->res,
1396 			    current_int->tag);
1397 
1398 		if (current_int->res != NULL)
1399 			bus_release_resource(ntb->device, SYS_RES_IRQ,
1400 			    rman_get_rid(current_int->res), current_int->res);
1401 	}
1402 
1403 	intel_ntb_free_msix_vec(ntb);
1404 	pci_release_msi(ntb->device);
1405 }
1406 
1407 static inline uint64_t
1408 db_ioread(struct ntb_softc *ntb, uint64_t regoff)
1409 {
1410 
1411 	switch (ntb->type) {
1412 	case NTB_ATOM:
1413 	case NTB_XEON_GEN3:
1414 	case NTB_XEON_GEN4:
1415 		return (intel_ntb_reg_read(8, regoff));
1416 	case NTB_XEON_GEN1:
1417 		return (intel_ntb_reg_read(2, regoff));
1418 	}
1419 	__assert_unreachable();
1420 }
1421 
1422 static inline void
1423 db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1424 {
1425 
1426 	KASSERT((val & ~ntb->db_valid_mask) == 0,
1427 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1428 	     (uintmax_t)(val & ~ntb->db_valid_mask),
1429 	     (uintmax_t)ntb->db_valid_mask));
1430 
1431 	if (regoff == ntb->self_reg->db_mask)
1432 		DB_MASK_ASSERT(ntb, MA_OWNED);
1433 	db_iowrite_raw(ntb, regoff, val);
1434 }
1435 
1436 static inline void
1437 db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1438 {
1439 
1440 	switch (ntb->type) {
1441 	case NTB_ATOM:
1442 	case NTB_XEON_GEN3:
1443 	case NTB_XEON_GEN4:
1444 		intel_ntb_reg_write(8, regoff, val);
1445 		break;
1446 	case NTB_XEON_GEN1:
1447 		intel_ntb_reg_write(2, regoff, (uint16_t)val);
1448 		break;
1449 	}
1450 }
1451 
1452 static void
1453 intel_ntb_db_set_mask(device_t dev, uint64_t bits)
1454 {
1455 	struct ntb_softc *ntb = device_get_softc(dev);
1456 
1457 	DB_MASK_LOCK(ntb);
1458 	ntb->db_mask |= bits;
1459 	if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1460 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1461 	DB_MASK_UNLOCK(ntb);
1462 }
1463 
1464 static void
1465 intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
1466 {
1467 	struct ntb_softc *ntb = device_get_softc(dev);
1468 	uint64_t ibits;
1469 	int i;
1470 
1471 	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1472 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1473 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1474 	     (uintmax_t)ntb->db_valid_mask));
1475 
1476 	DB_MASK_LOCK(ntb);
1477 	ibits = ntb->fake_db & ntb->db_mask & bits;
1478 	ntb->db_mask &= ~bits;
1479 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1480 		/* Simulate fake interrupts if unmasked DB bits are set. */
1481 		ntb->force_db |= ibits;
1482 		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1483 			if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
1484 				swi_sched(ntb->int_info[i].tag, 0);
1485 		}
1486 	} else {
1487 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1488 	}
1489 	DB_MASK_UNLOCK(ntb);
1490 }
1491 
1492 static uint64_t
1493 intel_ntb_db_read(device_t dev)
1494 {
1495 	struct ntb_softc *ntb = device_get_softc(dev);
1496 
1497 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1498 		return (ntb->fake_db);
1499 	if (ntb->type == NTB_XEON_GEN3)
1500 		return (intel_ntb_reg_read(8, XEON_GEN3_REG_IMINT_STATUS));
1501 	else
1502 		return (db_ioread(ntb, ntb->self_reg->db_bell));
1503 }
1504 
1505 static void
1506 intel_ntb_db_clear(device_t dev, uint64_t bits)
1507 {
1508 	struct ntb_softc *ntb = device_get_softc(dev);
1509 
1510 	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1511 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1512 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1513 	     (uintmax_t)ntb->db_valid_mask));
1514 
1515 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1516 		DB_MASK_LOCK(ntb);
1517 		ntb->fake_db &= ~bits;
1518 		DB_MASK_UNLOCK(ntb);
1519 		return;
1520 	}
1521 
1522 	if (ntb->type == NTB_XEON_GEN3)
1523 		intel_ntb_reg_write(4, XEON_GEN3_REG_IMINT_STATUS,
1524 		    (uint32_t)bits);
1525 	else
1526 		db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1527 }
1528 
1529 static inline uint64_t
1530 intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1531 {
1532 	uint64_t shift, mask;
1533 
1534 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1535 		/*
1536 		 * Remap vectors in custom way to make at least first
1537 		 * three doorbells to not generate stray events.
1538 		 * This breaks Linux compatibility (if one existed)
1539 		 * when more then one DB is used (not by if_ntb).
1540 		 */
1541 		if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
1542 			return (1 << db_vector);
1543 		if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
1544 			return (0x7ffc);
1545 	}
1546 
1547 	shift = ntb->db_vec_shift;
1548 	mask = (1ull << shift) - 1;
1549 	return (mask << (shift * db_vector));
1550 }
1551 
1552 static void
1553 intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1554 {
1555 	uint64_t vec_mask;
1556 
1557 	ntb->last_ts = ticks;
1558 	vec_mask = intel_ntb_vec_mask(ntb, vec);
1559 
1560 	if ((ntb->type == NTB_XEON_GEN3 || ntb->type == NTB_XEON_GEN4) &&
1561 	    vec == XEON_GEN3_LINK_VECTOR_INDEX)
1562 		vec_mask |= ntb->db_link_mask;
1563 	if ((vec_mask & ntb->db_link_mask) != 0) {
1564 		if (intel_ntb_poll_link(ntb))
1565 			ntb_link_event(ntb->device);
1566 		if (ntb->type == NTB_XEON_GEN3)
1567 			intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_STATUS,
1568 			    intel_ntb_reg_read(8, XEON_GEN3_REG_IMINT_STATUS));
1569 		if (ntb->type == NTB_XEON_GEN4)
1570 			intel_ntb_reg_write(8, XEON_GEN4_REG_IMINT_STATUS,
1571 			    intel_ntb_reg_read(8, XEON_GEN4_REG_IMINT_STATUS));
1572 	}
1573 
1574 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1575 	    (vec_mask & ntb->db_link_mask) == 0) {
1576 		DB_MASK_LOCK(ntb);
1577 
1578 		/*
1579 		 * Do not report same DB events again if not cleared yet,
1580 		 * unless the mask was just cleared for them and this
1581 		 * interrupt handler call can be the consequence of it.
1582 		 */
1583 		vec_mask &= ~ntb->fake_db | ntb->force_db;
1584 		ntb->force_db &= ~vec_mask;
1585 
1586 		/* Update our internal doorbell register. */
1587 		ntb->fake_db |= vec_mask;
1588 
1589 		/* Do not report masked DB events. */
1590 		vec_mask &= ~ntb->db_mask;
1591 
1592 		DB_MASK_UNLOCK(ntb);
1593 	}
1594 
1595 	if ((vec_mask & ntb->db_valid_mask) != 0)
1596 		ntb_db_event(ntb->device, vec);
1597 }
1598 
1599 static void
1600 ndev_vec_isr(void *arg)
1601 {
1602 	struct ntb_vec *nvec = arg;
1603 
1604 	intel_ntb_interrupt(nvec->ntb, nvec->num);
1605 }
1606 
1607 static void
1608 ndev_irq_isr(void *arg)
1609 {
1610 	/* If we couldn't set up MSI-X, we only have the one vector. */
1611 	intel_ntb_interrupt(arg, 0);
1612 }
1613 
1614 static int
1615 intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1616 {
1617 	uint32_t i;
1618 
1619 	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1620 	    M_ZERO | M_WAITOK);
1621 	for (i = 0; i < num_vectors; i++) {
1622 		ntb->msix_vec[i].num = i;
1623 		ntb->msix_vec[i].ntb = ntb;
1624 	}
1625 
1626 	return (0);
1627 }
1628 
1629 static void
1630 intel_ntb_free_msix_vec(struct ntb_softc *ntb)
1631 {
1632 
1633 	if (ntb->msix_vec == NULL)
1634 		return;
1635 
1636 	free(ntb->msix_vec, M_NTB);
1637 	ntb->msix_vec = NULL;
1638 }
1639 
1640 static void
1641 intel_ntb_get_msix_info(struct ntb_softc *ntb)
1642 {
1643 	struct pci_devinfo *dinfo;
1644 	struct pcicfg_msix *msix;
1645 	uint32_t laddr, data, i, offset;
1646 
1647 	dinfo = device_get_ivars(ntb->device);
1648 	msix = &dinfo->cfg.msix;
1649 
1650 	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
1651 
1652 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1653 		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
1654 
1655 		laddr = bus_read_4(msix->msix_table_res, offset +
1656 		    PCI_MSIX_ENTRY_LOWER_ADDR);
1657 		intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
1658 
1659 		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
1660 		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
1661 		     MSI_INTEL_ADDR_BASE));
1662 		ntb->msix_data[i].nmd_ofs = laddr;
1663 
1664 		data = bus_read_4(msix->msix_table_res, offset +
1665 		    PCI_MSIX_ENTRY_DATA);
1666 		intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
1667 
1668 		ntb->msix_data[i].nmd_data = data;
1669 	}
1670 }
1671 
1672 static struct ntb_hw_info *
1673 intel_ntb_get_device_info(uint32_t device_id)
1674 {
1675 	struct ntb_hw_info *ep;
1676 
1677 	for (ep = pci_ids; ep < &pci_ids[nitems(pci_ids)]; ep++) {
1678 		if (ep->device_id == device_id)
1679 			return (ep);
1680 	}
1681 	return (NULL);
1682 }
1683 
1684 static void
1685 intel_ntb_teardown_xeon(struct ntb_softc *ntb)
1686 {
1687 
1688 	if (ntb->reg != NULL)
1689 		intel_ntb_link_disable(ntb->device);
1690 }
1691 
1692 static void
1693 intel_ntb_detect_max_mw(struct ntb_softc *ntb)
1694 {
1695 
1696 	switch (ntb->type) {
1697 	case NTB_ATOM:
1698 		ntb->mw_count = ATOM_MW_COUNT;
1699 		break;
1700 	case NTB_XEON_GEN1:
1701 		if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1702 			ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1703 		else
1704 			ntb->mw_count = XEON_SNB_MW_COUNT;
1705 		break;
1706 	case NTB_XEON_GEN3:
1707 	case NTB_XEON_GEN4:
1708 		if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1709 			ntb->mw_count = XEON_GEN3_SPLIT_MW_COUNT;
1710 		else
1711 			ntb->mw_count = XEON_GEN3_MW_COUNT;
1712 		break;
1713 	}
1714 }
1715 
1716 static int
1717 intel_ntb_detect_xeon(struct ntb_softc *ntb)
1718 {
1719 	uint8_t ppd, conn_type;
1720 
1721 	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1722 	ntb->ppd = ppd;
1723 
1724 	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1725 		ntb->dev_type = NTB_DEV_DSD;
1726 	else
1727 		ntb->dev_type = NTB_DEV_USD;
1728 
1729 	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1730 		ntb->features |= NTB_SPLIT_BAR;
1731 
1732 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1733 	    !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1734 		device_printf(ntb->device,
1735 		    "Can not apply SB01BASE_LOCKUP workaround "
1736 		    "with split BARs disabled!\n");
1737 		device_printf(ntb->device,
1738 		    "Expect system hangs under heavy NTB traffic!\n");
1739 		ntb->features &= ~NTB_SB01BASE_LOCKUP;
1740 	}
1741 
1742 	/*
1743 	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
1744 	 * errata workaround; only do one at a time.
1745 	 */
1746 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1747 		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
1748 
1749 	conn_type = ppd & XEON_PPD_CONN_TYPE;
1750 	switch (conn_type) {
1751 	case NTB_CONN_B2B:
1752 		ntb->conn_type = conn_type;
1753 		break;
1754 	case NTB_CONN_RP:
1755 	case NTB_CONN_TRANSPARENT:
1756 	default:
1757 		device_printf(ntb->device, "Unsupported connection type: %u\n",
1758 		    (unsigned)conn_type);
1759 		return (ENXIO);
1760 	}
1761 	return (0);
1762 }
1763 
1764 static int
1765 intel_ntb_detect_atom(struct ntb_softc *ntb)
1766 {
1767 	uint32_t ppd, conn_type;
1768 
1769 	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1770 	ntb->ppd = ppd;
1771 
1772 	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1773 		ntb->dev_type = NTB_DEV_DSD;
1774 	else
1775 		ntb->dev_type = NTB_DEV_USD;
1776 
1777 	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1778 	switch (conn_type) {
1779 	case NTB_CONN_B2B:
1780 		ntb->conn_type = conn_type;
1781 		break;
1782 	default:
1783 		device_printf(ntb->device, "Unsupported NTB configuration\n");
1784 		return (ENXIO);
1785 	}
1786 	return (0);
1787 }
1788 
1789 static int
1790 intel_ntb_detect_xeon_gen3(struct ntb_softc *ntb)
1791 {
1792 	uint8_t ppd, conn_type;
1793 
1794 	ppd = pci_read_config(ntb->device, XEON_GEN3_INT_REG_PPD, 1);
1795 	ntb->ppd = ppd;
1796 
1797 	/* check port definition */
1798 	conn_type = XEON_GEN3_REG_PPD_PORT_DEF_F(ppd);
1799 	switch (conn_type) {
1800 	case NTB_CONN_B2B:
1801 		ntb->conn_type = conn_type;
1802 		break;
1803 	default:
1804 		device_printf(ntb->device, "Unsupported connection type: %u\n",
1805 		    conn_type);
1806 		return (ENXIO);
1807 	}
1808 
1809 	/* check cross link configuration status */
1810 	if (XEON_GEN3_REG_PPD_CONF_STS_F(ppd)) {
1811 		/* NTB Port is configured as DSD/USP */
1812 		ntb->dev_type = NTB_DEV_DSD;
1813 	} else {
1814 		/* NTB Port is configured as USD/DSP */
1815 		ntb->dev_type = NTB_DEV_USD;
1816 	}
1817 
1818 	if (XEON_GEN3_REG_PPD_ONE_MSIX_F(ppd)) {
1819 		/*
1820 		 * This bit when set, causes only a single MSI-X message to be
1821 		 * generated if MSI-X is enabled.
1822 		 */
1823 		ntb->features |= NTB_ONE_MSIX;
1824 	}
1825 
1826 	if (XEON_GEN3_REG_PPD_BAR45_SPL_F(ppd)) {
1827 		/* BARs 4 and 5 are presented as two 32b non-prefetchable BARs */
1828 		ntb->features |= NTB_SPLIT_BAR;
1829 	}
1830 
1831 	device_printf(ntb->device, "conn type 0x%02x, dev type 0x%02x,"
1832 	    "features 0x%02x\n", ntb->conn_type, ntb->dev_type, ntb->features);
1833 
1834 	return (0);
1835 }
1836 
1837 static int
1838 intel_ntb_is_ICX(struct ntb_softc *ntb)
1839 {
1840 	uint8_t revision;
1841 
1842 	revision = pci_get_revid(ntb->device);
1843 	if (ntb->type == NTB_XEON_GEN4 &&
1844 	    revision >= PCI_DEV_REV_ICX_MIN &&
1845 	    revision <= PCI_DEV_REV_ICX_MAX)
1846 		return (1);
1847 
1848 	return (0);
1849 }
1850 
1851 static int
1852 intel_ntb_is_SPR(struct ntb_softc *ntb)
1853 {
1854 	uint8_t revision;
1855 
1856 	revision = pci_get_revid(ntb->device);
1857 	if (ntb->type == NTB_XEON_GEN4 &&
1858 	    revision > PCI_DEV_REV_ICX_MAX)
1859 		return (1);
1860 
1861 	return (0);
1862 }
1863 
1864 static int
1865 intel_ntb_detect_xeon_gen4(struct ntb_softc *ntb)
1866 {
1867 	if (intel_ntb_is_ICX(ntb)) {
1868 		ntb->features |= NTB_BAR_ALIGN;
1869 		ntb->features |= NTB_LTR_BAD;
1870 	}
1871 	return (0);
1872 }
1873 
1874 static int
1875 intel_ntb_detect_xeon_gen4_cfg(struct ntb_softc *ntb)
1876 {
1877 	uint32_t ppd1;
1878 
1879 	ppd1 = intel_ntb_reg_read(4, XEON_GEN4_REG_PPD1);
1880 	ntb->ppd = ppd1;
1881 	if (intel_ntb_is_ICX(ntb)) {
1882 		if ((ppd1 & GEN4_PPD_TOPO_MASK) == GEN4_PPD_TOPO_B2B_USD) {
1883 			/* NTB Port is configured as USD/DSP */
1884 			ntb->conn_type = NTB_CONN_B2B;
1885 			ntb->dev_type = NTB_DEV_USD;
1886 		} else if ((ppd1 & GEN4_PPD_TOPO_MASK) == GEN4_PPD_TOPO_B2B_DSD) {
1887 			/* NTB Port is configured as DSD/USP */
1888 			ntb->conn_type = NTB_CONN_B2B;
1889 			ntb->dev_type = NTB_DEV_DSD;
1890 		} else {
1891 			device_printf(ntb->device, "Unsupported connection type: %u\n",
1892 			    (ppd1 & GEN4_PPD_CONN_MASK));
1893 			return (ENXIO);
1894 		}
1895 	} else if (intel_ntb_is_SPR(ntb)) {
1896 		if ((ppd1 & SPR_PPD_TOPO_MASK) == SPR_PPD_TOPO_B2B_USD) {
1897 			/* NTB Port is configured as USD/DSP */
1898 			ntb->conn_type = NTB_CONN_B2B;
1899 			ntb->dev_type = NTB_DEV_USD;
1900 		} else if ((ppd1 & SPR_PPD_TOPO_MASK) == SPR_PPD_TOPO_B2B_DSD) {
1901 			/* NTB Port is configured as DSD/USP */
1902 			ntb->conn_type = NTB_CONN_B2B;
1903 			ntb->dev_type = NTB_DEV_DSD;
1904 		} else {
1905 			device_printf(ntb->device, "Unsupported connection type: %u\n",
1906 			    (ppd1 & SPR_PPD_CONN_MASK));
1907 			return (ENXIO);
1908 		}
1909 	}
1910 
1911 	device_printf(ntb->device, "conn type 0x%02x, dev type 0x%02x,"
1912 	    "features 0x%02x\n", ntb->conn_type, ntb->dev_type, ntb->features);
1913 
1914 	return (0);
1915 }
1916 
1917 static int
1918 intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
1919 {
1920 	int rc;
1921 
1922 	ntb->spad_count		= XEON_SPAD_COUNT;
1923 	ntb->db_count		= XEON_DB_COUNT;
1924 	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1925 	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1926 	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1927 
1928 	if (ntb->conn_type != NTB_CONN_B2B) {
1929 		device_printf(ntb->device, "Connection type %d not supported\n",
1930 		    ntb->conn_type);
1931 		return (ENXIO);
1932 	}
1933 
1934 	ntb->reg = &xeon_reg;
1935 	ntb->self_reg = &xeon_pri_reg;
1936 	ntb->peer_reg = &xeon_b2b_reg;
1937 	ntb->xlat_reg = &xeon_sec_xlat;
1938 
1939 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1940 		ntb->force_db = ntb->fake_db = 0;
1941 		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
1942 		    ntb->mw_count;
1943 		intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
1944 		    g_ntb_msix_idx, ntb->msix_mw_idx);
1945 		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
1946 		    VM_MEMATTR_UNCACHEABLE);
1947 		KASSERT(rc == 0, ("shouldn't fail"));
1948 	} else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
1949 		/*
1950 		 * There is a Xeon hardware errata related to writes to SDOORBELL or
1951 		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1952 		 * which may hang the system.  To workaround this, use a memory
1953 		 * window to access the interrupt and scratch pad registers on the
1954 		 * remote system.
1955 		 */
1956 		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
1957 		    ntb->mw_count;
1958 		intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
1959 		    g_ntb_mw_idx, ntb->b2b_mw_idx);
1960 		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
1961 		    VM_MEMATTR_UNCACHEABLE);
1962 		KASSERT(rc == 0, ("shouldn't fail"));
1963 	} else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
1964 		/*
1965 		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1966 		 * mirrored to the remote system.  Shrink the number of bits by one,
1967 		 * since bit 14 is the last bit.
1968 		 *
1969 		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1970 		 * anyway.  Nor for non-B2B connection types.
1971 		 */
1972 		ntb->db_count = XEON_DB_COUNT - 1;
1973 
1974 	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1975 
1976 	if (ntb->dev_type == NTB_DEV_USD)
1977 		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1978 		    &xeon_b2b_usd_addr);
1979 	else
1980 		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1981 		    &xeon_b2b_dsd_addr);
1982 	if (rc != 0)
1983 		return (rc);
1984 
1985 	/* Enable Bus Master and Memory Space on the secondary side */
1986 	intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1987 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1988 
1989 	/*
1990 	 * Mask all doorbell interrupts.
1991 	 */
1992 	DB_MASK_LOCK(ntb);
1993 	ntb->db_mask = ntb->db_valid_mask;
1994 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1995 	DB_MASK_UNLOCK(ntb);
1996 
1997 	rc = intel_ntb_init_isr(ntb);
1998 	return (rc);
1999 }
2000 
2001 static int
2002 intel_ntb_xeon_gen3_init_dev(struct ntb_softc *ntb)
2003 {
2004 	int rc;
2005 
2006 	ntb->spad_count = XEON_GEN3_SPAD_COUNT;
2007 	ntb->db_count = XEON_GEN3_DB_COUNT;
2008 	ntb->db_link_mask = XEON_GEN3_DB_LINK_BIT;
2009 	ntb->db_vec_count = XEON_GEN3_DB_MSIX_VECTOR_COUNT;
2010 	ntb->db_vec_shift = XEON_GEN3_DB_MSIX_VECTOR_SHIFT;
2011 
2012 	if (ntb->conn_type != NTB_CONN_B2B) {
2013 		device_printf(ntb->device, "Connection type %d not supported\n",
2014 		    ntb->conn_type);
2015 		return (ENXIO);
2016 	}
2017 
2018 	ntb->reg = &xeon_gen3_reg;
2019 	ntb->self_reg = &xeon_gen3_pri_reg;
2020 	ntb->peer_reg = &xeon_gen3_b2b_reg;
2021 	ntb->xlat_reg = &xeon_gen3_sec_xlat;
2022 
2023 	ntb->db_valid_mask = (1ULL << ntb->db_count) - 1;
2024 
2025 	xeon_gen3_setup_b2b_mw(ntb);
2026 
2027 	/* Enable Bus Master and Memory Space on the External Side */
2028 	intel_ntb_reg_write(2, XEON_GEN3_EXT_REG_PCI_CMD,
2029 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
2030 
2031 	/* Setup Interrupt */
2032 	rc = intel_ntb_xeon_gen3_init_isr(ntb);
2033 
2034 	return (rc);
2035 }
2036 
2037 static int
2038 intel_ntb_xeon_gen4_init_dev(struct ntb_softc *ntb)
2039 {
2040 	int rc;
2041 	uint16_t lnkctl;
2042 
2043 	ntb->spad_count = XEON_GEN4_SPAD_COUNT;
2044 	ntb->db_count = XEON_GEN4_DB_COUNT;
2045 	ntb->db_link_mask = XEON_GEN4_DB_LINK_BIT;
2046 	ntb->db_vec_count = XEON_GEN4_DB_MSIX_VECTOR_COUNT;
2047 	ntb->db_vec_shift = XEON_GEN4_DB_MSIX_VECTOR_SHIFT;
2048 
2049 	if (intel_ntb_detect_xeon_gen4_cfg(ntb) != 0)
2050 		return (ENXIO);
2051 
2052 	ntb->reg = &xeon_gen4_reg;
2053 	ntb->self_reg = &xeon_gen4_pri_reg;
2054 	ntb->peer_reg = &xeon_gen4_b2b_reg;
2055 	ntb->xlat_reg = &xeon_gen4_sec_xlat;
2056 
2057 	ntb->db_valid_mask = (1ULL << ntb->db_count) - 1;
2058 	xeon_gen4_setup_b2b_mw(ntb);
2059 
2060 	/* init link setup */
2061 	lnkctl = intel_ntb_reg_read(2, XEON_GEN4_REG_LINK_CTRL);
2062 	lnkctl |= GEN4_LINK_CTRL_LINK_DISABLE;
2063 	intel_ntb_reg_write(2, XEON_GEN4_REG_LINK_CTRL, lnkctl);
2064 
2065 	/* Setup Interrupt */
2066 	rc = intel_ntb_xeon_gen4_init_isr(ntb);
2067 	return (rc);
2068 }
2069 
2070 static int
2071 intel_ntb_atom_init_dev(struct ntb_softc *ntb)
2072 {
2073 	int error;
2074 
2075 	KASSERT(ntb->conn_type == NTB_CONN_B2B,
2076 	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
2077 
2078 	ntb->spad_count		 = ATOM_SPAD_COUNT;
2079 	ntb->db_count		 = ATOM_DB_COUNT;
2080 	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
2081 	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
2082 	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
2083 
2084 	ntb->reg = &atom_reg;
2085 	ntb->self_reg = &atom_pri_reg;
2086 	ntb->peer_reg = &atom_b2b_reg;
2087 	ntb->xlat_reg = &atom_sec_xlat;
2088 
2089 	/*
2090 	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
2091 	 * resolved.  Mask transaction layer internal parity errors.
2092 	 */
2093 	pci_write_config(ntb->device, 0xFC, 0x4, 4);
2094 
2095 	configure_atom_secondary_side_bars(ntb);
2096 
2097 	/* Enable Bus Master and Memory Space on the secondary side */
2098 	intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
2099 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
2100 
2101 	error = intel_ntb_init_isr(ntb);
2102 	if (error != 0)
2103 		return (error);
2104 
2105 	/* Initiate PCI-E link training */
2106 	intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
2107 
2108 	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
2109 
2110 	return (0);
2111 }
2112 
2113 /* XXX: Linux driver doesn't seem to do any of this for Atom. */
2114 static void
2115 configure_atom_secondary_side_bars(struct ntb_softc *ntb)
2116 {
2117 
2118 	if (ntb->dev_type == NTB_DEV_USD) {
2119 		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
2120 		    XEON_B2B_BAR2_ADDR64);
2121 		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
2122 		    XEON_B2B_BAR4_ADDR64);
2123 		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
2124 		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
2125 	} else {
2126 		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
2127 		    XEON_B2B_BAR2_ADDR64);
2128 		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
2129 		    XEON_B2B_BAR4_ADDR64);
2130 		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
2131 		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
2132 	}
2133 }
2134 
2135 /*
2136  * When working around Xeon SDOORBELL errata by remapping remote registers in a
2137  * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
2138  * remains for use by a higher layer.
2139  *
2140  * Will only be used if working around SDOORBELL errata and the BIOS-configured
2141  * MW size is sufficiently large.
2142  */
2143 static unsigned int ntb_b2b_mw_share;
2144 SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
2145     0, "If enabled (non-zero), prefer to share half of the B2B peer register "
2146     "MW with higher level consumers.  Both sides of the NTB MUST set the same "
2147     "value here.");
2148 
2149 static void
2150 xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
2151     enum ntb_bar regbar)
2152 {
2153 	struct ntb_pci_bar_info *bar;
2154 	uint8_t bar_sz;
2155 
2156 	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
2157 		return;
2158 
2159 	bar = &ntb->bar_info[idx];
2160 	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
2161 	if (idx == regbar) {
2162 		if (ntb->b2b_off != 0)
2163 			bar_sz--;
2164 		else
2165 			bar_sz = 0;
2166 	}
2167 	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
2168 	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
2169 	(void)bar_sz;
2170 }
2171 
2172 static void
2173 xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
2174     enum ntb_bar idx, enum ntb_bar regbar)
2175 {
2176 	uint64_t reg_val;
2177 	uint32_t base_reg, lmt_reg;
2178 
2179 	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
2180 	if (idx == regbar) {
2181 		if (ntb->b2b_off)
2182 			bar_addr += ntb->b2b_off;
2183 		else
2184 			bar_addr = 0;
2185 	}
2186 
2187 	if (!bar_is_64bit(ntb, idx)) {
2188 		intel_ntb_reg_write(4, base_reg, bar_addr);
2189 		reg_val = intel_ntb_reg_read(4, base_reg);
2190 		(void)reg_val;
2191 
2192 		intel_ntb_reg_write(4, lmt_reg, bar_addr);
2193 		reg_val = intel_ntb_reg_read(4, lmt_reg);
2194 		(void)reg_val;
2195 	} else {
2196 		intel_ntb_reg_write(8, base_reg, bar_addr);
2197 		reg_val = intel_ntb_reg_read(8, base_reg);
2198 		(void)reg_val;
2199 
2200 		intel_ntb_reg_write(8, lmt_reg, bar_addr);
2201 		reg_val = intel_ntb_reg_read(8, lmt_reg);
2202 		(void)reg_val;
2203 	}
2204 }
2205 
2206 static void
2207 xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
2208 {
2209 	struct ntb_pci_bar_info *bar;
2210 
2211 	bar = &ntb->bar_info[idx];
2212 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
2213 		intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
2214 		base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
2215 	} else {
2216 		intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
2217 		base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
2218 	}
2219 	(void)base_addr;
2220 }
2221 
2222 static int
2223 xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
2224     const struct ntb_b2b_addr *peer_addr)
2225 {
2226 	struct ntb_pci_bar_info *b2b_bar;
2227 	vm_size_t bar_size;
2228 	uint64_t bar_addr;
2229 	enum ntb_bar b2b_bar_num, i;
2230 
2231 	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
2232 		b2b_bar = NULL;
2233 		b2b_bar_num = NTB_CONFIG_BAR;
2234 		ntb->b2b_off = 0;
2235 	} else {
2236 		b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
2237 		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
2238 		    ("invalid b2b mw bar"));
2239 
2240 		b2b_bar = &ntb->bar_info[b2b_bar_num];
2241 		bar_size = b2b_bar->size;
2242 
2243 		if (ntb_b2b_mw_share != 0 &&
2244 		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
2245 			ntb->b2b_off = bar_size >> 1;
2246 		else if (bar_size >= XEON_B2B_MIN_SIZE) {
2247 			ntb->b2b_off = 0;
2248 		} else {
2249 			device_printf(ntb->device,
2250 			    "B2B bar size is too small!\n");
2251 			return (EIO);
2252 		}
2253 	}
2254 
2255 	/*
2256 	 * Reset the secondary bar sizes to match the primary bar sizes.
2257 	 * (Except, disable or halve the size of the B2B secondary bar.)
2258 	 */
2259 	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
2260 		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
2261 
2262 	bar_addr = 0;
2263 	if (b2b_bar_num == NTB_CONFIG_BAR)
2264 		bar_addr = addr->bar0_addr;
2265 	else if (b2b_bar_num == NTB_B2B_BAR_1)
2266 		bar_addr = addr->bar2_addr64;
2267 	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2268 		bar_addr = addr->bar4_addr64;
2269 	else if (b2b_bar_num == NTB_B2B_BAR_2)
2270 		bar_addr = addr->bar4_addr32;
2271 	else if (b2b_bar_num == NTB_B2B_BAR_3)
2272 		bar_addr = addr->bar5_addr32;
2273 	else
2274 		KASSERT(false, ("invalid bar"));
2275 
2276 	intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
2277 
2278 	/*
2279 	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
2280 	 * register BAR.  The B2B BAR is either disabled above or configured
2281 	 * half-size.  It starts at PBAR xlat + offset.
2282 	 *
2283 	 * Also set up incoming BAR limits == base (zero length window).
2284 	 */
2285 	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
2286 	    b2b_bar_num);
2287 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2288 		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
2289 		    NTB_B2B_BAR_2, b2b_bar_num);
2290 		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
2291 		    NTB_B2B_BAR_3, b2b_bar_num);
2292 	} else
2293 		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
2294 		    NTB_B2B_BAR_2, b2b_bar_num);
2295 
2296 	/* Zero incoming translation addrs */
2297 	intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
2298 	intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
2299 
2300 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2301 		uint32_t xlat_reg, lmt_reg;
2302 		enum ntb_bar bar_num;
2303 
2304 		/*
2305 		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
2306 		 * workaround
2307 		 */
2308 		bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
2309 		bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
2310 		if (bar_is_64bit(ntb, bar_num)) {
2311 			intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
2312 			ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
2313 			intel_ntb_reg_write(8, lmt_reg, 0);
2314 		} else {
2315 			intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
2316 			ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
2317 			intel_ntb_reg_write(4, lmt_reg, 0);
2318 		}
2319 
2320 		ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
2321 	}
2322 	(void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
2323 	(void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
2324 
2325 	/* Zero outgoing translation limits (whole bar size windows) */
2326 	intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
2327 	intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
2328 
2329 	/* Set outgoing translation offsets */
2330 	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
2331 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2332 		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
2333 		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
2334 	} else
2335 		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
2336 
2337 	/* Set the translation offset for B2B registers */
2338 	bar_addr = 0;
2339 	if (b2b_bar_num == NTB_CONFIG_BAR)
2340 		bar_addr = peer_addr->bar0_addr;
2341 	else if (b2b_bar_num == NTB_B2B_BAR_1)
2342 		bar_addr = peer_addr->bar2_addr64;
2343 	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2344 		bar_addr = peer_addr->bar4_addr64;
2345 	else if (b2b_bar_num == NTB_B2B_BAR_2)
2346 		bar_addr = peer_addr->bar4_addr32;
2347 	else if (b2b_bar_num == NTB_B2B_BAR_3)
2348 		bar_addr = peer_addr->bar5_addr32;
2349 	else
2350 		KASSERT(false, ("invalid bar"));
2351 
2352 	/*
2353 	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
2354 	 * at a time.
2355 	 */
2356 	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
2357 	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
2358 	return (0);
2359 }
2360 
2361 static int
2362 xeon_gen3_setup_b2b_mw(struct ntb_softc *ntb)
2363 {
2364 	uint64_t reg;
2365 	uint32_t embarsz, imbarsz;
2366 
2367 	/* IMBAR1SZ should be equal to EMBAR1SZ */
2368 	embarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_EMBAR1SZ, 1);
2369 	imbarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_IMBAR1SZ, 1);
2370 	if (embarsz != imbarsz) {
2371 		device_printf(ntb->device,
2372 		    "IMBAR1SZ (%u) should be equal to EMBAR1SZ (%u)\n",
2373 		    imbarsz, embarsz);
2374 		return (EIO);
2375 	}
2376 
2377 	/* IMBAR2SZ should be equal to EMBAR2SZ */
2378 	embarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_EMBAR2SZ, 1);
2379 	imbarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_IMBAR2SZ, 1);
2380 	if (embarsz != imbarsz) {
2381 		device_printf(ntb->device,
2382 		    "IMBAR2SZ (%u) should be equal to EMBAR2SZ (%u)\n",
2383 		    imbarsz, embarsz);
2384 		return (EIO);
2385 	}
2386 
2387 	/* Client will provide the incoming IMBAR1/2XBASE, zero it for now */
2388 	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR1XBASE, 0);
2389 	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR2XBASE, 0);
2390 
2391 	/*
2392 	 * If the value in IMBAR1XLIMIT is set equal to the value in IMBAR1XBASE,
2393 	 * the local memory window exposure from EMBAR1 is disabled.
2394 	 * Note: It is needed to avoid malicious access.
2395 	 */
2396 	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR1XLIMIT, 0);
2397 	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR2XLIMIT, 0);
2398 
2399 	/* Config outgoing translation limits (whole bar size windows) */
2400 	reg = intel_ntb_reg_read(8, XEON_GEN3_REG_EMBAR1XBASE);
2401 	reg += ntb->bar_info[NTB_B2B_BAR_1].size;
2402 	intel_ntb_reg_write(8, XEON_GEN3_REG_EMBAR1XLIMIT, reg);
2403 
2404 	reg = intel_ntb_reg_read(8, XEON_GEN3_REG_EMBAR2XBASE);
2405 	reg += ntb->bar_info[NTB_B2B_BAR_2].size;
2406 	intel_ntb_reg_write(8, XEON_GEN3_REG_EMBAR2XLIMIT, reg);
2407 
2408 	return (0);
2409 }
2410 
2411 static int
2412 xeon_gen4_setup_b2b_mw(struct ntb_softc *ntb)
2413 {
2414 	uint32_t embarsz, imbarsz;
2415 
2416 	/* IMBAR23SZ should be equal to EMBAR23SZ */
2417 	imbarsz = pci_read_config(ntb->device, XEON_GEN4_CFG_REG_IMBAR1SZ, 1);
2418 	embarsz = pci_read_config(ntb->device, XEON_GEN4_CFG_REG_EMBAR1SZ, 1);
2419 	if (embarsz != imbarsz) {
2420 		device_printf(ntb->device,
2421 		    "IMBAR23SZ (%u) should be equal to EMBAR23SZ (%u)\n",
2422 		    imbarsz, embarsz);
2423 		return (EIO);
2424 	}
2425 	/* IMBAR45SZ should be equal to EMBAR45SZ */
2426 	imbarsz = pci_read_config(ntb->device, XEON_GEN4_CFG_REG_IMBAR2SZ, 1);
2427 	embarsz = pci_read_config(ntb->device, XEON_GEN4_CFG_REG_EMBAR2SZ, 1);
2428 	if (embarsz != imbarsz) {
2429 		device_printf(ntb->device,
2430 		    "IMBAR45SZ (%u) should be equal to EMBAR45SZ (%u)\n",
2431 		    imbarsz, embarsz);
2432 		return (EIO);
2433 	}
2434 
2435 	/* Client will provide the incoming IMBARXBASE, zero it for now */
2436 	intel_ntb_reg_write(8, XEON_GEN4_REG_IMBAR1XBASE, 0);
2437 	intel_ntb_reg_write(8, XEON_GEN4_REG_IMBAR2XBASE, 0);
2438 
2439 	/*
2440 	 * If the value in IMBARXLIMIT is set equal to the value in IMBARXBASE,
2441 	 * the local memory window exposure from EMBAR is disabled.
2442 	 * Note: It is needed to avoid malicious access.
2443 	 */
2444 	intel_ntb_reg_write(8, XEON_GEN4_REG_IMBAR1XLIMIT, 0);
2445 	intel_ntb_reg_write(8, XEON_GEN4_REG_IMBAR2XLIMIT, 0);
2446 
2447 	/* EMBARXLIMIT & EMBARXBASE are gone for gen4, noop here */
2448 
2449 	return (0);
2450 }
2451 
2452 static inline bool
2453 _xeon_link_is_up(struct ntb_softc *ntb)
2454 {
2455 
2456 	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2457 		return (true);
2458 	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
2459 }
2460 
2461 static inline bool
2462 link_is_up(struct ntb_softc *ntb)
2463 {
2464 
2465 	if (ntb->type == NTB_XEON_GEN1 ||
2466 	    ntb->type == NTB_XEON_GEN3 ||
2467 	    ntb->type == NTB_XEON_GEN4)
2468 		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
2469 		    !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
2470 
2471 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
2472 	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
2473 }
2474 
2475 static inline bool
2476 atom_link_is_err(struct ntb_softc *ntb)
2477 {
2478 	uint32_t status;
2479 
2480 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
2481 
2482 	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
2483 	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
2484 		return (true);
2485 
2486 	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
2487 	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
2488 }
2489 
2490 /* Atom does not have link status interrupt, poll on that platform */
2491 static void
2492 atom_link_hb(void *arg)
2493 {
2494 	struct ntb_softc *ntb = arg;
2495 	sbintime_t timo, poll_ts;
2496 
2497 	timo = NTB_HB_TIMEOUT * hz;
2498 	poll_ts = ntb->last_ts + timo;
2499 
2500 	/*
2501 	 * Delay polling the link status if an interrupt was received, unless
2502 	 * the cached link status says the link is down.
2503 	 */
2504 	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
2505 		timo = poll_ts - ticks;
2506 		goto out;
2507 	}
2508 
2509 	if (intel_ntb_poll_link(ntb))
2510 		ntb_link_event(ntb->device);
2511 
2512 	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
2513 		/* Link is down with error, proceed with recovery */
2514 		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
2515 		return;
2516 	}
2517 
2518 out:
2519 	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
2520 }
2521 
2522 static void
2523 atom_perform_link_restart(struct ntb_softc *ntb)
2524 {
2525 	uint32_t status;
2526 
2527 	/* Driver resets the NTB ModPhy lanes - magic! */
2528 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
2529 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
2530 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
2531 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
2532 
2533 	/* Driver waits 100ms to allow the NTB ModPhy to settle */
2534 	pause("ModPhy", hz / 10);
2535 
2536 	/* Clear AER Errors, write to clear */
2537 	status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
2538 	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
2539 	intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
2540 
2541 	/* Clear unexpected electrical idle event in LTSSM, write to clear */
2542 	status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
2543 	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
2544 	intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
2545 
2546 	/* Clear DeSkew Buffer error, write to clear */
2547 	status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
2548 	status |= ATOM_DESKEWSTS_DBERR;
2549 	intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
2550 
2551 	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
2552 	status &= ATOM_IBIST_ERR_OFLOW;
2553 	intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
2554 
2555 	/* Releases the NTB state machine to allow the link to retrain */
2556 	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
2557 	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
2558 	intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
2559 }
2560 
2561 static int
2562 intel_ntb_port_number(device_t dev)
2563 {
2564 	struct ntb_softc *ntb = device_get_softc(dev);
2565 
2566 	return (ntb->dev_type == NTB_DEV_USD ? 0 : 1);
2567 }
2568 
2569 static int
2570 intel_ntb_peer_port_count(device_t dev)
2571 {
2572 
2573 	return (1);
2574 }
2575 
2576 static int
2577 intel_ntb_peer_port_number(device_t dev, int pidx)
2578 {
2579 	struct ntb_softc *ntb = device_get_softc(dev);
2580 
2581 	if (pidx != 0)
2582 		return (-EINVAL);
2583 
2584 	return (ntb->dev_type == NTB_DEV_USD ? 1 : 0);
2585 }
2586 
2587 static int
2588 intel_ntb_peer_port_idx(device_t dev, int port)
2589 {
2590 	int peer_port;
2591 
2592 	peer_port = intel_ntb_peer_port_number(dev, 0);
2593 	if (peer_port == -EINVAL || port != peer_port)
2594 		return (-EINVAL);
2595 
2596 	return (0);
2597 }
2598 
2599 static int
2600 intel_ntb4_link_enable(device_t dev, enum ntb_speed speed __unused,
2601     enum ntb_width width __unused)
2602 {
2603 	struct ntb_softc *ntb = device_get_softc(dev);
2604 	uint32_t cntl, ppd0, ltr;
2605 	uint16_t lnkctl;
2606 
2607 	if (!HAS_FEATURE(ntb, NTB_LTR_BAD)) {
2608 		/* Setup active snoop LTR values */
2609 		ltr = NTB_LTR_ACTIVE_REQMNT | NTB_LTR_ACTIVE_VAL | NTB_LTR_ACTIVE_LATSCALE;
2610 		/* Setup active non-snoop values */
2611 		ltr = (ltr << NTB_LTR_NS_SHIFT) | ltr;
2612 		intel_ntb_reg_write(4, XEON_GEN4_REG_EXT_LTR_ACTIVE, ltr);
2613 
2614 		/* Setup idle snoop LTR values */
2615 		ltr = NTB_LTR_IDLE_VAL | NTB_LTR_IDLE_LATSCALE | NTB_LTR_IDLE_REQMNT;
2616 		/* Setup idle non-snoop values */
2617 		ltr = (ltr << NTB_LTR_NS_SHIFT) | ltr;
2618 		intel_ntb_reg_write(4, XEON_GEN4_REG_EXT_LTR_IDLE, ltr);
2619 
2620 		/* setup PCIe LTR to active */
2621 		intel_ntb_reg_write(4, XEON_GEN4_REG_EXT_LTR_SWSEL, NTB_LTR_SWSEL_ACTIVE);
2622 	}
2623 
2624 	cntl = NTB_CTL_E2I_BAR23_SNOOP | NTB_CTL_I2E_BAR23_SNOOP;
2625 	cntl |= NTB_CTL_E2I_BAR45_SNOOP | NTB_CTL_I2E_BAR45_SNOOP;
2626 	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2627 
2628 	lnkctl = intel_ntb_reg_read(2, XEON_GEN4_REG_LINK_CTRL);
2629 	lnkctl &= ~GEN4_LINK_CTRL_LINK_DISABLE;
2630 	intel_ntb_reg_write(2, XEON_GEN4_REG_LINK_CTRL, lnkctl);
2631 
2632 	/* start link training in PPD0 */
2633 	ppd0 = intel_ntb_reg_read(4, XEON_GEN4_REG_PPD0);
2634 	ppd0 |= GEN4_PPD_LINKTRN;
2635 	intel_ntb_reg_write(4, XEON_GEN4_REG_PPD0, ppd0);
2636 
2637 	/* make sure link training has started */
2638 	ppd0 = intel_ntb_reg_read(4, XEON_GEN4_REG_PPD0);
2639 	if (!(ppd0 & GEN4_PPD_LINKTRN))
2640 		intel_ntb_printf(2, "Link is not training\n");
2641 
2642 	return (0);
2643 }
2644 
2645 static int
2646 intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
2647     enum ntb_width width __unused)
2648 {
2649 	struct ntb_softc *ntb = device_get_softc(dev);
2650 	uint32_t cntl;
2651 
2652 	intel_ntb_printf(2, "%s\n", __func__);
2653 
2654 	if (ntb->type == NTB_XEON_GEN4)
2655 		return (intel_ntb4_link_enable(dev, speed, width));
2656 
2657 	if (ntb->type == NTB_ATOM) {
2658 		pci_write_config(ntb->device, NTB_PPD_OFFSET,
2659 		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
2660 		return (0);
2661 	}
2662 
2663 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2664 		ntb_link_event(dev);
2665 		return (0);
2666 	}
2667 
2668 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2669 	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
2670 	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
2671 	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
2672 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2673 		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
2674 	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2675 	return (0);
2676 }
2677 
2678 static int
2679 intel_ntb4_link_disable(device_t dev)
2680 {
2681 	struct ntb_softc *ntb = device_get_softc(dev);
2682 	uint32_t cntl;
2683 	uint16_t lnkctl;
2684 
2685 	/* clear the snoop bits */
2686 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2687 	cntl &= ~(NTB_CTL_E2I_BAR23_SNOOP | NTB_CTL_I2E_BAR23_SNOOP);
2688 	cntl &= ~(NTB_CTL_E2I_BAR45_SNOOP | NTB_CTL_I2E_BAR45_SNOOP);
2689 	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2690 
2691 	lnkctl = intel_ntb_reg_read(2, XEON_GEN4_REG_LINK_CTRL);
2692 	lnkctl |= GEN4_LINK_CTRL_LINK_DISABLE;
2693 	intel_ntb_reg_write(2, XEON_GEN4_REG_LINK_CTRL, lnkctl);
2694 
2695 	/* set LTR to idle */
2696 	if (!HAS_FEATURE(ntb, NTB_LTR_BAD))
2697 		intel_ntb_reg_write(4, XEON_GEN4_REG_EXT_LTR_SWSEL, NTB_LTR_SWSEL_IDLE);
2698 
2699 	return (0);
2700 }
2701 
2702 static int
2703 intel_ntb_link_disable(device_t dev)
2704 {
2705 	struct ntb_softc *ntb = device_get_softc(dev);
2706 	uint32_t cntl;
2707 
2708 	intel_ntb_printf(2, "%s\n", __func__);
2709 
2710 	if (ntb->type == NTB_XEON_GEN4)
2711 		return (intel_ntb4_link_disable(dev));
2712 
2713 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2714 		ntb_link_event(dev);
2715 		return (0);
2716 	}
2717 
2718 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2719 	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
2720 	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
2721 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2722 		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
2723 	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
2724 	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2725 	return (0);
2726 }
2727 
2728 static bool
2729 intel_ntb_link_enabled(device_t dev)
2730 {
2731 	struct ntb_softc *ntb = device_get_softc(dev);
2732 	uint32_t cntl;
2733 
2734 	if (ntb->type == NTB_ATOM) {
2735 		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
2736 		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
2737 	}
2738 
2739 	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2740 		return (true);
2741 
2742 	if (ntb->type == NTB_XEON_GEN4) {
2743 		cntl = intel_ntb_reg_read(2, XEON_GEN4_REG_LINK_CTRL);
2744 		return ((cntl & GEN4_LINK_CTRL_LINK_DISABLE) == 0);
2745 	}
2746 
2747 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2748 	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
2749 }
2750 
2751 static void
2752 recover_atom_link(void *arg)
2753 {
2754 	struct ntb_softc *ntb = arg;
2755 	unsigned speed, width, oldspeed, oldwidth;
2756 	uint32_t status32;
2757 
2758 	atom_perform_link_restart(ntb);
2759 
2760 	/*
2761 	 * There is a potential race between the 2 NTB devices recovering at
2762 	 * the same time.  If the times are the same, the link will not recover
2763 	 * and the driver will be stuck in this loop forever.  Add a random
2764 	 * interval to the recovery time to prevent this race.
2765 	 */
2766 	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
2767 	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
2768 
2769 	if (atom_link_is_err(ntb))
2770 		goto retry;
2771 
2772 	status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2773 	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
2774 		goto out;
2775 
2776 	status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2777 	width = NTB_LNK_STA_WIDTH(status32);
2778 	speed = status32 & NTB_LINK_SPEED_MASK;
2779 
2780 	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
2781 	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
2782 	if (oldwidth != width || oldspeed != speed)
2783 		goto retry;
2784 
2785 out:
2786 	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
2787 	    ntb);
2788 	return;
2789 
2790 retry:
2791 	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
2792 	    ntb);
2793 }
2794 
2795 /*
2796  * Polls the HW link status register(s); returns true if something has changed.
2797  */
2798 static bool
2799 intel_ntb_atom_poll_link(struct ntb_softc *ntb)
2800 {
2801 	uint32_t ntb_cntl;
2802 
2803 	ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2804 	if (ntb_cntl == ntb->ntb_ctl)
2805 		return (false);
2806 
2807 	ntb->ntb_ctl = ntb_cntl;
2808 	ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2809 	return (true);
2810 }
2811 
2812 static bool
2813 intel_ntb_xeon_gen1_poll_link(struct ntb_softc *ntb)
2814 {
2815 	uint16_t reg_val;
2816 
2817 	if (ntb->type == NTB_XEON_GEN1)
2818 		db_iowrite_raw(ntb, ntb->self_reg->db_bell,
2819 			ntb->db_link_mask);
2820 
2821 	reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2822 	if (reg_val == ntb->lnk_sta)
2823 		return (false);
2824 
2825 	ntb->lnk_sta = reg_val;
2826 
2827 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2828 		if (_xeon_link_is_up(ntb)) {
2829 			if (!ntb->peer_msix_good) {
2830 				callout_reset(&ntb->peer_msix_work, 0,
2831 				    intel_ntb_exchange_msix, ntb);
2832 				return (false);
2833 			}
2834 		} else {
2835 			ntb->peer_msix_good = false;
2836 			ntb->peer_msix_done = false;
2837 		}
2838 	}
2839 	return (true);
2840 }
2841 
2842 static bool
2843 intel_ntb_xeon_gen4_poll_link(struct ntb_softc *ntb)
2844 {
2845 	uint16_t reg_val;
2846 
2847 	/*
2848 	* We need to write to DLLSCS bit in the SLOTSTS before we
2849 	* can clear the hardware link interrupt on ICX NTB.
2850 	*/
2851 	intel_ntb_reg_write(2, XEON_GEN4_REG_SLOTSTS, GEN4_SLOTSTS_DLLSCS);
2852 	db_iowrite_raw(ntb, ntb->self_reg->db_clear, ntb->db_link_mask);
2853 
2854 	reg_val = intel_ntb_reg_read(2, ntb->reg->lnk_sta);
2855 	if (reg_val == ntb->lnk_sta)
2856 		return (false);
2857 
2858 	ntb->lnk_sta = reg_val;
2859 	return (true);
2860 }
2861 
2862 static bool
2863 intel_ntb_poll_link(struct ntb_softc *ntb)
2864 {
2865 	bool val;
2866 
2867 	switch(ntb->type) {
2868 	case NTB_ATOM:
2869 		val = intel_ntb_atom_poll_link(ntb);
2870 		break;
2871 	case NTB_XEON_GEN4:
2872 		val = intel_ntb_xeon_gen4_poll_link(ntb);
2873 		break;
2874 	default:
2875 		val = intel_ntb_xeon_gen1_poll_link(ntb);
2876 		break;
2877 	}
2878 	return (val);
2879 }
2880 
2881 static inline enum ntb_speed
2882 intel_ntb_link_sta_speed(struct ntb_softc *ntb)
2883 {
2884 
2885 	if (!link_is_up(ntb))
2886 		return (NTB_SPEED_NONE);
2887 	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
2888 }
2889 
2890 static inline enum ntb_width
2891 intel_ntb_link_sta_width(struct ntb_softc *ntb)
2892 {
2893 
2894 	if (!link_is_up(ntb))
2895 		return (NTB_WIDTH_NONE);
2896 	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
2897 }
2898 
2899 SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
2900     "Driver state, statistics, and HW registers");
2901 
2902 #define NTB_REGSZ_MASK	(3ul << 30)
2903 #define NTB_REG_64	(1ul << 30)
2904 #define NTB_REG_32	(2ul << 30)
2905 #define NTB_REG_16	(3ul << 30)
2906 #define NTB_REG_8	(0ul << 30)
2907 
2908 #define NTB_DB_READ	(1ul << 29)
2909 #define NTB_PCI_REG	(1ul << 28)
2910 #define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
2911 
2912 static void
2913 intel_ntb_sysctl_init(struct ntb_softc *ntb)
2914 {
2915 	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
2916 	struct sysctl_ctx_list *ctx;
2917 	struct sysctl_oid *tree, *tmptree;
2918 
2919 	ctx = device_get_sysctl_ctx(ntb->device);
2920 	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
2921 
2922 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
2923 	    CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_MPSAFE, ntb, 0,
2924 	    sysctl_handle_link_status_human, "A",
2925 	    "Link status (human readable)");
2926 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
2927 	    CTLFLAG_RD | CTLTYPE_UINT | CTLFLAG_MPSAFE, ntb, 0,
2928 	    sysctl_handle_link_status, "IU",
2929 	    "Link status (1=active, 0=inactive)");
2930 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
2931 	    CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, ntb, 0,
2932 	    sysctl_handle_link_admin, "IU",
2933 	    "Set/get interface status (1=UP, 0=DOWN)");
2934 
2935 	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
2936 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
2937 	    "Driver state, statistics, and HW registers");
2938 	tree_par = SYSCTL_CHILDREN(tree);
2939 
2940 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
2941 	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
2942 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
2943 	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
2944 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
2945 	    &ntb->ppd, 0, "Raw PPD register (cached)");
2946 
2947 	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
2948 		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
2949 		    &ntb->b2b_mw_idx, 0,
2950 		    "Index of the MW used for B2B remote register access");
2951 		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
2952 		    CTLFLAG_RD, &ntb->b2b_off,
2953 		    "If non-zero, offset of B2B register region in shared MW");
2954 	}
2955 
2956 	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
2957 	    CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_MPSAFE, ntb, 0,
2958 	    sysctl_handle_features, "A", "Features/errata of this NTB device");
2959 
2960 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
2961 	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
2962 	    "NTB CTL register (cached)");
2963 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
2964 	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
2965 	    "LNK STA register (cached)");
2966 
2967 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
2968 	    &ntb->mw_count, 0, "MW count");
2969 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
2970 	    &ntb->spad_count, 0, "Scratchpad count");
2971 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
2972 	    &ntb->db_count, 0, "Doorbell count");
2973 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
2974 	    &ntb->db_vec_count, 0, "Doorbell vector count");
2975 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
2976 	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
2977 
2978 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2979 	    &ntb->db_valid_mask, "Doorbell valid mask");
2980 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2981 	    &ntb->db_link_mask, "Doorbell link mask");
2982 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2983 	    &ntb->db_mask, "Doorbell mask (cached)");
2984 
2985 	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2986 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
2987 	    "Raw HW registers (big-endian)");
2988 	regpar = SYSCTL_CHILDREN(tmptree);
2989 
2990 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2991 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
2992 	    NTB_REG_32 | ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2993 	    "NTB Control register");
2994 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2995 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
2996 	    NTB_REG_32 | 0x19c, sysctl_handle_register, "IU",
2997 	    "NTB Link Capabilities");
2998 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2999 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3000 	    NTB_REG_32 | 0x1a0, sysctl_handle_register, "IU",
3001 	    "NTB Link Control register");
3002 
3003 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
3004 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3005 	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
3006 	    sysctl_handle_register, "QU", "Doorbell mask register");
3007 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
3008 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3009 	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
3010 	    sysctl_handle_register, "QU", "Doorbell register");
3011 
3012 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
3013 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3014 	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
3015 	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
3016 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
3017 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
3018 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3019 		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
3020 		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
3021 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
3022 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3023 		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
3024 		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
3025 	} else {
3026 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
3027 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3028 		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
3029 		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
3030 	}
3031 
3032 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
3033 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3034 	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
3035 	    sysctl_handle_register, "QU", "Incoming LMT23 register");
3036 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
3037 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
3038 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3039 		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
3040 		    sysctl_handle_register, "IU", "Incoming LMT4 register");
3041 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
3042 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3043 		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
3044 		    sysctl_handle_register, "IU", "Incoming LMT5 register");
3045 	} else {
3046 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
3047 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3048 		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
3049 		    sysctl_handle_register, "QU", "Incoming LMT45 register");
3050 	}
3051 
3052 	if (ntb->type == NTB_ATOM)
3053 		return;
3054 
3055 	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
3056 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xeon HW statistics");
3057 	statpar = SYSCTL_CHILDREN(tmptree);
3058 	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
3059 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3060 	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
3061 	    sysctl_handle_register, "SU", "Upstream Memory Miss");
3062 
3063 	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
3064 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xeon HW errors");
3065 	errpar = SYSCTL_CHILDREN(tmptree);
3066 
3067 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
3068 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3069 	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
3070 	    sysctl_handle_register, "CU", "PPD");
3071 
3072 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
3073 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3074 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
3075 	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
3076 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
3077 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3078 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
3079 	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
3080 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
3081 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3082 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
3083 	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
3084 
3085 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
3086 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3087 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
3088 	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
3089 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
3090 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3091 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
3092 	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
3093 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
3094 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3095 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
3096 	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
3097 
3098 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
3099 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3100 	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
3101 	    sysctl_handle_register, "SU", "DEVSTS");
3102 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
3103 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3104 	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
3105 	    sysctl_handle_register, "SU", "LNKSTS");
3106 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
3107 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3108 	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
3109 	    sysctl_handle_register, "SU", "SLNKSTS");
3110 
3111 	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
3112 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3113 	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
3114 	    sysctl_handle_register, "IU", "UNCERRSTS");
3115 	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
3116 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3117 	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
3118 	    sysctl_handle_register, "IU", "CORERRSTS");
3119 
3120 	if (ntb->conn_type != NTB_CONN_B2B)
3121 		return;
3122 
3123 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01l",
3124 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3125 	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETL,
3126 	    sysctl_handle_register, "IU", "Outgoing XLAT0L register");
3127 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01u",
3128 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3129 	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETU,
3130 	    sysctl_handle_register, "IU", "Outgoing XLAT0U register");
3131 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
3132 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3133 	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
3134 	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
3135 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
3136 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
3137 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3138 		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
3139 		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
3140 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
3141 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3142 		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
3143 		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
3144 	} else {
3145 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
3146 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3147 		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
3148 		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
3149 	}
3150 
3151 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
3152 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3153 	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
3154 	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
3155 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
3156 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
3157 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3158 		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
3159 		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
3160 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
3161 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3162 		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
3163 		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
3164 	} else {
3165 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
3166 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3167 		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
3168 		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
3169 	}
3170 
3171 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
3172 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3173 	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
3174 	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
3175 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
3176 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3177 	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
3178 	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
3179 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
3180 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
3181 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3182 		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
3183 		    sysctl_handle_register, "IU",
3184 		    "Secondary BAR4 base register");
3185 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
3186 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3187 		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
3188 		    sysctl_handle_register, "IU",
3189 		    "Secondary BAR5 base register");
3190 	} else {
3191 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
3192 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3193 		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
3194 		    sysctl_handle_register, "QU",
3195 		    "Secondary BAR45 base register");
3196 	}
3197 }
3198 
3199 static int
3200 sysctl_handle_features(SYSCTL_HANDLER_ARGS)
3201 {
3202 	struct ntb_softc *ntb = arg1;
3203 	struct sbuf sb;
3204 	int error;
3205 
3206 	sbuf_new_for_sysctl(&sb, NULL, 256, req);
3207 
3208 	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
3209 	error = sbuf_finish(&sb);
3210 	sbuf_delete(&sb);
3211 
3212 	if (error || !req->newptr)
3213 		return (error);
3214 	return (EINVAL);
3215 }
3216 
3217 static int
3218 sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
3219 {
3220 	struct ntb_softc *ntb = arg1;
3221 	unsigned old, new;
3222 	int error;
3223 
3224 	old = intel_ntb_link_enabled(ntb->device);
3225 
3226 	error = SYSCTL_OUT(req, &old, sizeof(old));
3227 	if (error != 0 || req->newptr == NULL)
3228 		return (error);
3229 
3230 	error = SYSCTL_IN(req, &new, sizeof(new));
3231 	if (error != 0)
3232 		return (error);
3233 
3234 	intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
3235 	    (new != 0)? "en" : "dis");
3236 
3237 	if (new != 0)
3238 		error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
3239 	else
3240 		error = intel_ntb_link_disable(ntb->device);
3241 	return (error);
3242 }
3243 
3244 static int
3245 sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
3246 {
3247 	struct ntb_softc *ntb = arg1;
3248 	struct sbuf sb;
3249 	enum ntb_speed speed;
3250 	enum ntb_width width;
3251 	int error;
3252 
3253 	sbuf_new_for_sysctl(&sb, NULL, 32, req);
3254 
3255 	if (intel_ntb_link_is_up(ntb->device, &speed, &width))
3256 		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
3257 		    (unsigned)speed, (unsigned)width);
3258 	else
3259 		sbuf_printf(&sb, "down");
3260 
3261 	error = sbuf_finish(&sb);
3262 	sbuf_delete(&sb);
3263 
3264 	if (error || !req->newptr)
3265 		return (error);
3266 	return (EINVAL);
3267 }
3268 
3269 static int
3270 sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
3271 {
3272 	struct ntb_softc *ntb = arg1;
3273 	unsigned res;
3274 	int error;
3275 
3276 	res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
3277 
3278 	error = SYSCTL_OUT(req, &res, sizeof(res));
3279 	if (error || !req->newptr)
3280 		return (error);
3281 	return (EINVAL);
3282 }
3283 
3284 static int
3285 sysctl_handle_register(SYSCTL_HANDLER_ARGS)
3286 {
3287 	struct ntb_softc *ntb;
3288 	const void *outp;
3289 	uintptr_t sz;
3290 	uint64_t umv;
3291 	char be[sizeof(umv)];
3292 	size_t outsz;
3293 	uint32_t reg;
3294 	bool db, pci;
3295 	int error;
3296 
3297 	ntb = arg1;
3298 	reg = arg2 & ~NTB_REGFLAGS_MASK;
3299 	sz = arg2 & NTB_REGSZ_MASK;
3300 	db = (arg2 & NTB_DB_READ) != 0;
3301 	pci = (arg2 & NTB_PCI_REG) != 0;
3302 
3303 	KASSERT(!(db && pci), ("bogus"));
3304 
3305 	if (db) {
3306 		KASSERT(sz == NTB_REG_64, ("bogus"));
3307 		umv = db_ioread(ntb, reg);
3308 		outsz = sizeof(uint64_t);
3309 	} else {
3310 		switch (sz) {
3311 		case NTB_REG_64:
3312 			if (pci)
3313 				umv = pci_read_config(ntb->device, reg, 8);
3314 			else
3315 				umv = intel_ntb_reg_read(8, reg);
3316 			outsz = sizeof(uint64_t);
3317 			break;
3318 		case NTB_REG_32:
3319 			if (pci)
3320 				umv = pci_read_config(ntb->device, reg, 4);
3321 			else
3322 				umv = intel_ntb_reg_read(4, reg);
3323 			outsz = sizeof(uint32_t);
3324 			break;
3325 		case NTB_REG_16:
3326 			if (pci)
3327 				umv = pci_read_config(ntb->device, reg, 2);
3328 			else
3329 				umv = intel_ntb_reg_read(2, reg);
3330 			outsz = sizeof(uint16_t);
3331 			break;
3332 		case NTB_REG_8:
3333 			if (pci)
3334 				umv = pci_read_config(ntb->device, reg, 1);
3335 			else
3336 				umv = intel_ntb_reg_read(1, reg);
3337 			outsz = sizeof(uint8_t);
3338 			break;
3339 		default:
3340 			panic("bogus");
3341 			break;
3342 		}
3343 	}
3344 
3345 	/* Encode bigendian so that sysctl -x is legible. */
3346 	be64enc(be, umv);
3347 	outp = ((char *)be) + sizeof(umv) - outsz;
3348 
3349 	error = SYSCTL_OUT(req, outp, outsz);
3350 	if (error || !req->newptr)
3351 		return (error);
3352 	return (EINVAL);
3353 }
3354 
3355 static unsigned
3356 intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
3357 {
3358 
3359 	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
3360 	    uidx >= ntb->b2b_mw_idx) ||
3361 	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
3362 		uidx++;
3363 	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
3364 	    uidx >= ntb->b2b_mw_idx) &&
3365 	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
3366 		uidx++;
3367 	return (uidx);
3368 }
3369 
3370 #ifndef EARLY_AP_STARTUP
3371 static int msix_ready;
3372 
3373 static void
3374 intel_ntb_msix_ready(void *arg __unused)
3375 {
3376 
3377 	msix_ready = 1;
3378 }
3379 SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
3380     intel_ntb_msix_ready, NULL);
3381 #endif
3382 
3383 static void
3384 intel_ntb_exchange_msix(void *ctx)
3385 {
3386 	struct ntb_softc *ntb;
3387 	uint32_t val;
3388 	unsigned i;
3389 
3390 	ntb = ctx;
3391 
3392 	if (ntb->peer_msix_good)
3393 		goto msix_good;
3394 	if (ntb->peer_msix_done)
3395 		goto msix_done;
3396 
3397 #ifndef EARLY_AP_STARTUP
3398 	/* Block MSIX negotiation until SMP started and IRQ reshuffled. */
3399 	if (!msix_ready)
3400 		goto reschedule;
3401 #endif
3402 
3403 	intel_ntb_get_msix_info(ntb);
3404 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3405 		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
3406 		    ntb->msix_data[i].nmd_data);
3407 		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
3408 		    ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
3409 	}
3410 	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
3411 
3412 	intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
3413 	if (val != NTB_MSIX_VER_GUARD)
3414 		goto reschedule;
3415 
3416 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3417 		intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
3418 		intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
3419 		ntb->peer_msix_data[i].nmd_data = val;
3420 		intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
3421 		intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
3422 		ntb->peer_msix_data[i].nmd_ofs = val;
3423 	}
3424 
3425 	ntb->peer_msix_done = true;
3426 
3427 msix_done:
3428 	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
3429 	intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
3430 	if (val != NTB_MSIX_RECEIVED)
3431 		goto reschedule;
3432 
3433 	intel_ntb_spad_clear(ntb->device);
3434 	ntb->peer_msix_good = true;
3435 	/* Give peer time to see our NTB_MSIX_RECEIVED. */
3436 	goto reschedule;
3437 
3438 msix_good:
3439 	intel_ntb_poll_link(ntb);
3440 	ntb_link_event(ntb->device);
3441 	return;
3442 
3443 reschedule:
3444 	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
3445 	if (_xeon_link_is_up(ntb)) {
3446 		callout_reset(&ntb->peer_msix_work,
3447 		    hz * (ntb->peer_msix_good ? 2 : 1) / 10,
3448 		    intel_ntb_exchange_msix, ntb);
3449 	} else
3450 		intel_ntb_spad_clear(ntb->device);
3451 }
3452 
3453 /*
3454  * Public API to the rest of the OS
3455  */
3456 
3457 static uint8_t
3458 intel_ntb_spad_count(device_t dev)
3459 {
3460 	struct ntb_softc *ntb = device_get_softc(dev);
3461 
3462 	return (ntb->spad_count);
3463 }
3464 
3465 static uint8_t
3466 intel_ntb_mw_count(device_t dev)
3467 {
3468 	struct ntb_softc *ntb = device_get_softc(dev);
3469 	uint8_t res;
3470 
3471 	res = ntb->mw_count;
3472 	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
3473 		res--;
3474 	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
3475 		res--;
3476 	return (res);
3477 }
3478 
3479 static int
3480 intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
3481 {
3482 	struct ntb_softc *ntb = device_get_softc(dev);
3483 
3484 	if (idx >= ntb->spad_count)
3485 		return (EINVAL);
3486 
3487 	intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
3488 
3489 	return (0);
3490 }
3491 
3492 /*
3493  * Zeros the local scratchpad.
3494  */
3495 static void
3496 intel_ntb_spad_clear(device_t dev)
3497 {
3498 	struct ntb_softc *ntb = device_get_softc(dev);
3499 	unsigned i;
3500 
3501 	for (i = 0; i < ntb->spad_count; i++)
3502 		intel_ntb_spad_write(dev, i, 0);
3503 }
3504 
3505 static int
3506 intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
3507 {
3508 	struct ntb_softc *ntb = device_get_softc(dev);
3509 
3510 	if (idx >= ntb->spad_count)
3511 		return (EINVAL);
3512 
3513 	*val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
3514 
3515 	return (0);
3516 }
3517 
3518 static int
3519 intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
3520 {
3521 	struct ntb_softc *ntb = device_get_softc(dev);
3522 
3523 	if (idx >= ntb->spad_count)
3524 		return (EINVAL);
3525 
3526 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
3527 		intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
3528 	else
3529 		intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
3530 
3531 	return (0);
3532 }
3533 
3534 static int
3535 intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
3536 {
3537 	struct ntb_softc *ntb = device_get_softc(dev);
3538 
3539 	if (idx >= ntb->spad_count)
3540 		return (EINVAL);
3541 
3542 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
3543 		*val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
3544 	else
3545 		*val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
3546 
3547 	return (0);
3548 }
3549 
3550 static int
3551 intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
3552     caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
3553     bus_addr_t *plimit)
3554 {
3555 	struct ntb_softc *ntb = device_get_softc(dev);
3556 	struct ntb_pci_bar_info *bar;
3557 	bus_addr_t limit;
3558 	size_t bar_b2b_off;
3559 	enum ntb_bar bar_num;
3560 
3561 	if (mw_idx >= intel_ntb_mw_count(dev))
3562 		return (EINVAL);
3563 	mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
3564 
3565 	bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
3566 	bar = &ntb->bar_info[bar_num];
3567 	bar_b2b_off = 0;
3568 	if (mw_idx == ntb->b2b_mw_idx) {
3569 		KASSERT(ntb->b2b_off != 0,
3570 		    ("user shouldn't get non-shared b2b mw"));
3571 		bar_b2b_off = ntb->b2b_off;
3572 	}
3573 
3574 	if (bar_is_64bit(ntb, bar_num))
3575 		limit = BUS_SPACE_MAXADDR;
3576 	else
3577 		limit = BUS_SPACE_MAXADDR_32BIT;
3578 
3579 	if (base != NULL)
3580 		*base = bar->pbase + bar_b2b_off;
3581 	if (vbase != NULL)
3582 		*vbase = bar->vbase + bar_b2b_off;
3583 	if (size != NULL)
3584 		*size = bar->size - bar_b2b_off;
3585 	if (align != NULL)
3586 		*align = bar->size;
3587 	if (align_size != NULL)
3588 		*align_size = 1;
3589 	if (plimit != NULL)
3590 		*plimit = limit;
3591 	return (0);
3592 }
3593 
3594 static int
3595 intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
3596 {
3597 	struct ntb_softc *ntb = device_get_softc(dev);
3598 	struct ntb_pci_bar_info *bar;
3599 	uint64_t base, limit, reg_val;
3600 	size_t bar_size, mw_size;
3601 	uint32_t base_reg, xlat_reg, limit_reg;
3602 	enum ntb_bar bar_num;
3603 
3604 	if (idx >= intel_ntb_mw_count(dev))
3605 		return (EINVAL);
3606 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3607 
3608 	bar_num = intel_ntb_mw_to_bar(ntb, idx);
3609 	bar = &ntb->bar_info[bar_num];
3610 
3611 	bar_size = bar->size;
3612 	if (idx == ntb->b2b_mw_idx)
3613 		mw_size = bar_size - ntb->b2b_off;
3614 	else
3615 		mw_size = bar_size;
3616 
3617 	/* Hardware requires that addr is aligned to bar size */
3618 	if ((addr & (bar_size - 1)) != 0)
3619 		return (EINVAL);
3620 
3621 	if (size > mw_size)
3622 		return (EINVAL);
3623 
3624 	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
3625 
3626 	limit = 0;
3627 	if (bar_is_64bit(ntb, bar_num)) {
3628 		if (ntb->type == NTB_XEON_GEN3 || ntb->type == NTB_XEON_GEN4)
3629 			base = addr;
3630 		else
3631 			base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
3632 
3633 		if (limit_reg != 0 && size != mw_size)
3634 			limit = base + size;
3635 		else
3636 			limit = base + mw_size;
3637 
3638 		/* Set and verify translation address */
3639 		intel_ntb_reg_write(8, xlat_reg, addr);
3640 		reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
3641 		if (reg_val != addr) {
3642 			intel_ntb_reg_write(8, xlat_reg, 0);
3643 			return (EIO);
3644 		}
3645 
3646 		/* Set and verify the limit */
3647 		intel_ntb_reg_write(8, limit_reg, limit);
3648 		reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
3649 		if (reg_val != limit) {
3650 			intel_ntb_reg_write(8, limit_reg, base);
3651 			intel_ntb_reg_write(8, xlat_reg, 0);
3652 			return (EIO);
3653 		}
3654 	} else {
3655 		/* Configure 32-bit (split) BAR MW */
3656 		if (ntb->type == NTB_XEON_GEN3 || ntb->type == NTB_XEON_GEN4)
3657 			return (EIO);
3658 
3659 		if ((addr & UINT32_MAX) != addr)
3660 			return (ERANGE);
3661 		if (((addr + size) & UINT32_MAX) != (addr + size))
3662 			return (ERANGE);
3663 
3664 		base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
3665 
3666 		if (limit_reg != 0 && size != mw_size)
3667 			limit = base + size;
3668 
3669 		/* Set and verify translation address */
3670 		intel_ntb_reg_write(4, xlat_reg, addr);
3671 		reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
3672 		if (reg_val != addr) {
3673 			intel_ntb_reg_write(4, xlat_reg, 0);
3674 			return (EIO);
3675 		}
3676 
3677 		/* Set and verify the limit */
3678 		intel_ntb_reg_write(4, limit_reg, limit);
3679 		reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
3680 		if (reg_val != limit) {
3681 			intel_ntb_reg_write(4, limit_reg, base);
3682 			intel_ntb_reg_write(4, xlat_reg, 0);
3683 			return (EIO);
3684 		}
3685 	}
3686 	return (0);
3687 }
3688 
3689 static int
3690 intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
3691 {
3692 
3693 	return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
3694 }
3695 
3696 static int
3697 intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
3698 {
3699 	struct ntb_softc *ntb = device_get_softc(dev);
3700 	struct ntb_pci_bar_info *bar;
3701 
3702 	if (idx >= intel_ntb_mw_count(dev))
3703 		return (EINVAL);
3704 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3705 
3706 	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
3707 	*mode = bar->map_mode;
3708 	return (0);
3709 }
3710 
3711 static int
3712 intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
3713 {
3714 	struct ntb_softc *ntb = device_get_softc(dev);
3715 
3716 	if (idx >= intel_ntb_mw_count(dev))
3717 		return (EINVAL);
3718 
3719 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3720 	return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
3721 }
3722 
3723 static int
3724 intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
3725 {
3726 	struct ntb_pci_bar_info *bar;
3727 	int rc;
3728 
3729 	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
3730 	if (bar->map_mode == mode)
3731 		return (0);
3732 
3733 	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
3734 	if (rc == 0)
3735 		bar->map_mode = mode;
3736 
3737 	return (rc);
3738 }
3739 
3740 static void
3741 intel_ntb_peer_db_set(device_t dev, uint64_t bits)
3742 {
3743 	struct ntb_softc *ntb = device_get_softc(dev);
3744 	uint64_t db;
3745 
3746 	if ((bits & ~ntb->db_valid_mask) != 0) {
3747 		device_printf(ntb->device, "Invalid doorbell bits %#jx\n",
3748 		    (uintmax_t)bits);
3749 		return;
3750 	}
3751 
3752 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
3753 		struct ntb_pci_bar_info *lapic;
3754 		unsigned i;
3755 
3756 		lapic = ntb->peer_lapic_bar;
3757 
3758 		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3759 			if ((bits & intel_ntb_db_vector_mask(dev, i)) != 0)
3760 				bus_space_write_4(lapic->pci_bus_tag,
3761 				    lapic->pci_bus_handle,
3762 				    ntb->peer_msix_data[i].nmd_ofs,
3763 				    ntb->peer_msix_data[i].nmd_data);
3764 		}
3765 		return;
3766 	}
3767 
3768 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3769 		intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bits);
3770 		return;
3771 	}
3772 
3773 	if (ntb->type == NTB_XEON_GEN3 || ntb->type == NTB_XEON_GEN4) {
3774 		while (bits != 0) {
3775 			db = ffsll(bits);
3776 
3777 			intel_ntb_reg_write(1,
3778 			    ntb->peer_reg->db_bell + (db - 1) * 4, 0x1);
3779 
3780 			bits = bits & (bits - 1);
3781 		}
3782 	} else {
3783 		db_iowrite(ntb, ntb->peer_reg->db_bell, bits);
3784 	}
3785 }
3786 
3787 static int
3788 intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
3789 {
3790 	struct ntb_softc *ntb = device_get_softc(dev);
3791 	struct ntb_pci_bar_info *bar;
3792 	uint64_t regoff;
3793 
3794 	KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
3795 
3796 	if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3797 		bar = &ntb->bar_info[NTB_CONFIG_BAR];
3798 		regoff = ntb->peer_reg->db_bell;
3799 	} else {
3800 		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
3801 		    ("invalid b2b idx"));
3802 
3803 		bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
3804 		regoff = XEON_PDOORBELL_OFFSET;
3805 	}
3806 	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
3807 
3808 	/* HACK: Specific to current x86 bus implementation. */
3809 	*db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
3810 	*db_size = ntb->reg->db_size;
3811 	return (0);
3812 }
3813 
3814 static uint64_t
3815 intel_ntb_db_valid_mask(device_t dev)
3816 {
3817 	struct ntb_softc *ntb = device_get_softc(dev);
3818 
3819 	return (ntb->db_valid_mask);
3820 }
3821 
3822 static int
3823 intel_ntb_db_vector_count(device_t dev)
3824 {
3825 	struct ntb_softc *ntb = device_get_softc(dev);
3826 
3827 	return (ntb->db_vec_count);
3828 }
3829 
3830 static uint64_t
3831 intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
3832 {
3833 	struct ntb_softc *ntb = device_get_softc(dev);
3834 
3835 	if (vector > ntb->db_vec_count)
3836 		return (0);
3837 	return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
3838 }
3839 
3840 static bool
3841 intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
3842 {
3843 	struct ntb_softc *ntb = device_get_softc(dev);
3844 
3845 	if (speed != NULL)
3846 		*speed = intel_ntb_link_sta_speed(ntb);
3847 	if (width != NULL)
3848 		*width = intel_ntb_link_sta_width(ntb);
3849 	return (link_is_up(ntb));
3850 }
3851 
3852 static void
3853 save_bar_parameters(struct ntb_pci_bar_info *bar)
3854 {
3855 
3856 	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
3857 	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
3858 	bar->pbase = rman_get_start(bar->pci_resource);
3859 	bar->size = rman_get_size(bar->pci_resource);
3860 	bar->vbase = rman_get_virtual(bar->pci_resource);
3861 }
3862 
3863 static device_method_t ntb_intel_methods[] = {
3864 	/* Device interface */
3865 	DEVMETHOD(device_probe,		intel_ntb_probe),
3866 	DEVMETHOD(device_attach,	intel_ntb_attach),
3867 	DEVMETHOD(device_detach,	intel_ntb_detach),
3868 	/* Bus interface */
3869 	DEVMETHOD(bus_child_location,	ntb_child_location),
3870 	DEVMETHOD(bus_print_child,	ntb_print_child),
3871 	DEVMETHOD(bus_get_dma_tag,	ntb_get_dma_tag),
3872 	/* NTB interface */
3873 	DEVMETHOD(ntb_port_number,	intel_ntb_port_number),
3874 	DEVMETHOD(ntb_peer_port_count,	intel_ntb_peer_port_count),
3875 	DEVMETHOD(ntb_peer_port_number,	intel_ntb_peer_port_number),
3876 	DEVMETHOD(ntb_peer_port_idx, 	intel_ntb_peer_port_idx),
3877 	DEVMETHOD(ntb_link_is_up,	intel_ntb_link_is_up),
3878 	DEVMETHOD(ntb_link_enable,	intel_ntb_link_enable),
3879 	DEVMETHOD(ntb_link_disable,	intel_ntb_link_disable),
3880 	DEVMETHOD(ntb_link_enabled,	intel_ntb_link_enabled),
3881 	DEVMETHOD(ntb_mw_count,		intel_ntb_mw_count),
3882 	DEVMETHOD(ntb_mw_get_range,	intel_ntb_mw_get_range),
3883 	DEVMETHOD(ntb_mw_set_trans,	intel_ntb_mw_set_trans),
3884 	DEVMETHOD(ntb_mw_clear_trans,	intel_ntb_mw_clear_trans),
3885 	DEVMETHOD(ntb_mw_get_wc,	intel_ntb_mw_get_wc),
3886 	DEVMETHOD(ntb_mw_set_wc,	intel_ntb_mw_set_wc),
3887 	DEVMETHOD(ntb_spad_count,	intel_ntb_spad_count),
3888 	DEVMETHOD(ntb_spad_clear,	intel_ntb_spad_clear),
3889 	DEVMETHOD(ntb_spad_write,	intel_ntb_spad_write),
3890 	DEVMETHOD(ntb_spad_read,	intel_ntb_spad_read),
3891 	DEVMETHOD(ntb_peer_spad_write,	intel_ntb_peer_spad_write),
3892 	DEVMETHOD(ntb_peer_spad_read,	intel_ntb_peer_spad_read),
3893 	DEVMETHOD(ntb_db_valid_mask,	intel_ntb_db_valid_mask),
3894 	DEVMETHOD(ntb_db_vector_count,	intel_ntb_db_vector_count),
3895 	DEVMETHOD(ntb_db_vector_mask,	intel_ntb_db_vector_mask),
3896 	DEVMETHOD(ntb_db_clear,		intel_ntb_db_clear),
3897 	DEVMETHOD(ntb_db_clear_mask,	intel_ntb_db_clear_mask),
3898 	DEVMETHOD(ntb_db_read,		intel_ntb_db_read),
3899 	DEVMETHOD(ntb_db_set_mask,	intel_ntb_db_set_mask),
3900 	DEVMETHOD(ntb_peer_db_addr,	intel_ntb_peer_db_addr),
3901 	DEVMETHOD(ntb_peer_db_set,	intel_ntb_peer_db_set),
3902 	DEVMETHOD_END
3903 };
3904 
3905 static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
3906     sizeof(struct ntb_softc));
3907 DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, NULL, NULL);
3908 MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1);
3909 MODULE_VERSION(ntb_hw_intel, 1);
3910 MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ntb_hw_intel, pci_ids,
3911     nitems(pci_ids));
3912