xref: /freebsd/sys/dev/ntb/ntb_hw/ntb_hw_intel.c (revision 1799696096df87b52968b8996d00c91e0a5de8d9)
1 /*-
2  * Copyright (c) 2016-2017 Alexander Motin <mav@FreeBSD.org>
3  * Copyright (C) 2013 Intel Corporation
4  * Copyright (C) 2015 EMC Corporation
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31  * two or more systems using a PCI-e links, providing remote memory access.
32  *
33  * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
34  *
35  * NOTE: Much of the code in this module is shared with Linux. Any patches may
36  * be picked up and redistributed in Linux with a dual GPL/BSD license.
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/systm.h>
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/interrupt.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/module.h>
51 #include <sys/mutex.h>
52 #include <sys/pciio.h>
53 #include <sys/taskqueue.h>
54 #include <sys/tree.h>
55 #include <sys/queue.h>
56 #include <sys/rman.h>
57 #include <sys/sbuf.h>
58 #include <sys/sysctl.h>
59 #include <vm/vm.h>
60 #include <vm/pmap.h>
61 #include <machine/bus.h>
62 #include <machine/intr_machdep.h>
63 #include <machine/resource.h>
64 #include <dev/pci/pcireg.h>
65 #include <dev/pci/pcivar.h>
66 #include <dev/iommu/iommu.h>
67 
68 #include "ntb_hw_intel.h"
69 #include "../ntb.h"
70 
71 #define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
72 
73 #define NTB_HB_TIMEOUT		1 /* second */
74 #define ATOM_LINK_RECOVERY_TIME	500 /* ms */
75 #define BAR_HIGH_MASK		(~((1ull << 12) - 1))
76 
77 #define	NTB_MSIX_VER_GUARD	0xaabbccdd
78 #define	NTB_MSIX_RECEIVED	0xe0f0e0f0
79 
80 /*
81  * PCI constants could be somewhere more generic, but aren't defined/used in
82  * pci.c.
83  */
84 #define	PCI_MSIX_ENTRY_SIZE		16
85 #define	PCI_MSIX_ENTRY_LOWER_ADDR	0
86 #define	PCI_MSIX_ENTRY_UPPER_ADDR	4
87 #define	PCI_MSIX_ENTRY_DATA		8
88 
89 enum ntb_device_type {
90 	NTB_XEON,
91 	NTB_ATOM
92 };
93 
94 /* ntb_conn_type are hardware numbers, cannot change. */
95 enum ntb_conn_type {
96 	NTB_CONN_TRANSPARENT = 0,
97 	NTB_CONN_B2B = 1,
98 	NTB_CONN_RP = 2,
99 };
100 
101 enum ntb_b2b_direction {
102 	NTB_DEV_USD = 0,
103 	NTB_DEV_DSD = 1,
104 };
105 
106 enum ntb_bar {
107 	NTB_CONFIG_BAR = 0,
108 	NTB_B2B_BAR_1,
109 	NTB_B2B_BAR_2,
110 	NTB_B2B_BAR_3,
111 	NTB_MAX_BARS
112 };
113 
114 enum {
115 	NTB_MSIX_GUARD = 0,
116 	NTB_MSIX_DATA0,
117 	NTB_MSIX_DATA1,
118 	NTB_MSIX_DATA2,
119 	NTB_MSIX_OFS0,
120 	NTB_MSIX_OFS1,
121 	NTB_MSIX_OFS2,
122 	NTB_MSIX_DONE,
123 	NTB_MAX_MSIX_SPAD
124 };
125 
126 /* Device features and workarounds */
127 #define HAS_FEATURE(ntb, feature)	\
128 	(((ntb)->features & (feature)) != 0)
129 
130 struct ntb_hw_info {
131 	uint32_t		device_id;
132 	const char		*desc;
133 	enum ntb_device_type	type;
134 	uint32_t		features;
135 };
136 
137 struct ntb_pci_bar_info {
138 	bus_space_tag_t		pci_bus_tag;
139 	bus_space_handle_t	pci_bus_handle;
140 	int			pci_resource_id;
141 	struct resource		*pci_resource;
142 	vm_paddr_t		pbase;
143 	caddr_t			vbase;
144 	vm_size_t		size;
145 	vm_memattr_t		map_mode;
146 
147 	/* Configuration register offsets */
148 	uint32_t		psz_off;
149 	uint32_t		ssz_off;
150 	uint32_t		pbarxlat_off;
151 };
152 
153 struct ntb_int_info {
154 	struct resource	*res;
155 	int		rid;
156 	void		*tag;
157 };
158 
159 struct ntb_vec {
160 	struct ntb_softc	*ntb;
161 	uint32_t		num;
162 	unsigned		masked;
163 };
164 
165 struct ntb_reg {
166 	uint32_t	ntb_ctl;
167 	uint32_t	lnk_sta;
168 	uint8_t		db_size;
169 	unsigned	mw_bar[NTB_MAX_BARS];
170 };
171 
172 struct ntb_alt_reg {
173 	uint32_t	db_bell;
174 	uint32_t	db_mask;
175 	uint32_t	spad;
176 };
177 
178 struct ntb_xlat_reg {
179 	uint32_t	bar0_base;
180 	uint32_t	bar2_base;
181 	uint32_t	bar4_base;
182 	uint32_t	bar5_base;
183 
184 	uint32_t	bar2_xlat;
185 	uint32_t	bar4_xlat;
186 	uint32_t	bar5_xlat;
187 
188 	uint32_t	bar2_limit;
189 	uint32_t	bar4_limit;
190 	uint32_t	bar5_limit;
191 };
192 
193 struct ntb_b2b_addr {
194 	uint64_t	bar0_addr;
195 	uint64_t	bar2_addr64;
196 	uint64_t	bar4_addr64;
197 	uint64_t	bar4_addr32;
198 	uint64_t	bar5_addr32;
199 };
200 
201 struct ntb_msix_data {
202 	uint32_t	nmd_ofs;
203 	uint32_t	nmd_data;
204 };
205 
206 struct ntb_softc {
207 	/* ntb.c context. Do not move! Must go first! */
208 	void			*ntb_store;
209 
210 	device_t		device;
211 	enum ntb_device_type	type;
212 	uint32_t		features;
213 
214 	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
215 	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
216 	uint32_t		allocated_interrupts;
217 
218 	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
219 	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
220 	bool			peer_msix_good;
221 	bool			peer_msix_done;
222 	struct ntb_pci_bar_info	*peer_lapic_bar;
223 	struct callout		peer_msix_work;
224 
225 	bus_dma_tag_t		bar0_dma_tag;
226 	bus_dmamap_t		bar0_dma_map;
227 
228 	struct callout		heartbeat_timer;
229 	struct callout		lr_timer;
230 
231 	struct ntb_vec		*msix_vec;
232 
233 	uint32_t		ppd;
234 	enum ntb_conn_type	conn_type;
235 	enum ntb_b2b_direction	dev_type;
236 
237 	/* Offset of peer bar0 in B2B BAR */
238 	uint64_t			b2b_off;
239 	/* Memory window used to access peer bar0 */
240 #define B2B_MW_DISABLED			UINT8_MAX
241 	uint8_t				b2b_mw_idx;
242 	uint32_t			msix_xlat;
243 	uint8_t				msix_mw_idx;
244 
245 	uint8_t				mw_count;
246 	uint8_t				spad_count;
247 	uint8_t				db_count;
248 	uint8_t				db_vec_count;
249 	uint8_t				db_vec_shift;
250 
251 	/* Protects local db_mask. */
252 #define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
253 #define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
254 #define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
255 	struct mtx			db_mask_lock;
256 
257 	volatile uint32_t		ntb_ctl;
258 	volatile uint32_t		lnk_sta;
259 
260 	uint64_t			db_valid_mask;
261 	uint64_t			db_link_mask;
262 	uint64_t			db_mask;
263 	uint64_t			fake_db;	/* NTB_SB01BASE_LOCKUP*/
264 	uint64_t			force_db;	/* NTB_SB01BASE_LOCKUP*/
265 
266 	int				last_ts;	/* ticks @ last irq */
267 
268 	const struct ntb_reg		*reg;
269 	const struct ntb_alt_reg	*self_reg;
270 	const struct ntb_alt_reg	*peer_reg;
271 	const struct ntb_xlat_reg	*xlat_reg;
272 };
273 
274 #ifdef __i386__
275 static __inline uint64_t
276 bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
277     bus_size_t offset)
278 {
279 
280 	return (bus_space_read_4(tag, handle, offset) |
281 	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
282 }
283 
284 static __inline void
285 bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
286     bus_size_t offset, uint64_t val)
287 {
288 
289 	bus_space_write_4(tag, handle, offset, val);
290 	bus_space_write_4(tag, handle, offset + 4, val >> 32);
291 }
292 #endif
293 
294 #define intel_ntb_bar_read(SIZE, bar, offset) \
295 	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
296 	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
297 #define intel_ntb_bar_write(SIZE, bar, offset, val) \
298 	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
299 	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
300 #define intel_ntb_reg_read(SIZE, offset) \
301 	    intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
302 #define intel_ntb_reg_write(SIZE, offset, val) \
303 	    intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
304 #define intel_ntb_mw_read(SIZE, offset) \
305 	    intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
306 		offset)
307 #define intel_ntb_mw_write(SIZE, offset, val) \
308 	    intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
309 		offset, val)
310 
311 static int intel_ntb_probe(device_t device);
312 static int intel_ntb_attach(device_t device);
313 static int intel_ntb_detach(device_t device);
314 static uint64_t intel_ntb_db_valid_mask(device_t dev);
315 static void intel_ntb_spad_clear(device_t dev);
316 static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
317 static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
318     enum ntb_width *width);
319 static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
320     enum ntb_width width);
321 static int intel_ntb_link_disable(device_t dev);
322 static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
323 static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
324 
325 static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
326 static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
327 static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
328 static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
329     uint32_t *base, uint32_t *xlat, uint32_t *lmt);
330 static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
331 static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
332     vm_memattr_t);
333 static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
334     const char *);
335 static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
336 static int map_memory_window_bar(struct ntb_softc *ntb,
337     struct ntb_pci_bar_info *bar);
338 static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
339 static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
340 static int intel_ntb_init_isr(struct ntb_softc *ntb);
341 static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
342 static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
343 static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
344 static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
345 static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
346 static void ndev_vec_isr(void *arg);
347 static void ndev_irq_isr(void *arg);
348 static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
349 static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
350 static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
351 static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
352 static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
353 static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
354 static void intel_ntb_exchange_msix(void *);
355 static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
356 static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
357 static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
358 static int intel_ntb_detect_atom(struct ntb_softc *ntb);
359 static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
360 static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
361 static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
362 static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
363 static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
364     enum ntb_bar regbar);
365 static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
366     uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
367 static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
368     enum ntb_bar idx);
369 static int xeon_setup_b2b_mw(struct ntb_softc *,
370     const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
371 static inline bool link_is_up(struct ntb_softc *ntb);
372 static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
373 static inline bool atom_link_is_err(struct ntb_softc *ntb);
374 static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
375 static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
376 static void atom_link_hb(void *arg);
377 static void recover_atom_link(void *arg);
378 static bool intel_ntb_poll_link(struct ntb_softc *ntb);
379 static void save_bar_parameters(struct ntb_pci_bar_info *bar);
380 static void intel_ntb_sysctl_init(struct ntb_softc *);
381 static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
382 static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
383 static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
384 static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
385 static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
386 
387 static unsigned g_ntb_hw_debug_level;
388 SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
389     &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
390 #define intel_ntb_printf(lvl, ...) do {				\
391 	if ((lvl) <= g_ntb_hw_debug_level) {			\
392 		device_printf(ntb->device, __VA_ARGS__);	\
393 	}							\
394 } while (0)
395 
396 #define	_NTB_PAT_UC	0
397 #define	_NTB_PAT_WC	1
398 #define	_NTB_PAT_WT	4
399 #define	_NTB_PAT_WP	5
400 #define	_NTB_PAT_WB	6
401 #define	_NTB_PAT_UCM	7
402 static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
403 SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
404     &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
405     "UC: "  __XSTRING(_NTB_PAT_UC) ", "
406     "WC: "  __XSTRING(_NTB_PAT_WC) ", "
407     "WT: "  __XSTRING(_NTB_PAT_WT) ", "
408     "WP: "  __XSTRING(_NTB_PAT_WP) ", "
409     "WB: "  __XSTRING(_NTB_PAT_WB) ", "
410     "UC-: " __XSTRING(_NTB_PAT_UCM));
411 
412 static inline vm_memattr_t
413 intel_ntb_pat_flags(void)
414 {
415 
416 	switch (g_ntb_mw_pat) {
417 	case _NTB_PAT_WC:
418 		return (VM_MEMATTR_WRITE_COMBINING);
419 	case _NTB_PAT_WT:
420 		return (VM_MEMATTR_WRITE_THROUGH);
421 	case _NTB_PAT_WP:
422 		return (VM_MEMATTR_WRITE_PROTECTED);
423 	case _NTB_PAT_WB:
424 		return (VM_MEMATTR_WRITE_BACK);
425 	case _NTB_PAT_UCM:
426 		return (VM_MEMATTR_WEAK_UNCACHEABLE);
427 	case _NTB_PAT_UC:
428 		/* FALLTHROUGH */
429 	default:
430 		return (VM_MEMATTR_UNCACHEABLE);
431 	}
432 }
433 
434 /*
435  * Well, this obviously doesn't belong here, but it doesn't seem to exist
436  * anywhere better yet.
437  */
438 static inline const char *
439 intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
440 {
441 
442 	switch (pat) {
443 	case VM_MEMATTR_WRITE_COMBINING:
444 		return ("WRITE_COMBINING");
445 	case VM_MEMATTR_WRITE_THROUGH:
446 		return ("WRITE_THROUGH");
447 	case VM_MEMATTR_WRITE_PROTECTED:
448 		return ("WRITE_PROTECTED");
449 	case VM_MEMATTR_WRITE_BACK:
450 		return ("WRITE_BACK");
451 	case VM_MEMATTR_WEAK_UNCACHEABLE:
452 		return ("UNCACHED");
453 	case VM_MEMATTR_UNCACHEABLE:
454 		return ("UNCACHEABLE");
455 	default:
456 		return ("UNKNOWN");
457 	}
458 }
459 
460 static int g_ntb_msix_idx = 1;
461 SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
462     0, "Use this memory window to access the peer MSIX message complex on "
463     "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
464     "Like b2b_mw_idx, negative values index from the last available memory "
465     "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
466 
467 static int g_ntb_mw_idx = -1;
468 SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
469     0, "Use this memory window to access the peer NTB registers.  A "
470     "non-negative value starts from the first MW index; a negative value "
471     "starts from the last MW index.  The default is -1, i.e., the last "
472     "available memory window.  Both sides of the NTB MUST set the same "
473     "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
474 
475 /* Hardware owns the low 16 bits of features. */
476 #define NTB_BAR_SIZE_4K		(1 << 0)
477 #define NTB_SDOORBELL_LOCKUP	(1 << 1)
478 #define NTB_SB01BASE_LOCKUP	(1 << 2)
479 #define NTB_B2BDOORBELL_BIT14	(1 << 3)
480 /* Software/configuration owns the top 16 bits. */
481 #define NTB_SPLIT_BAR		(1ull << 16)
482 
483 #define NTB_FEATURES_STR \
484     "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
485     "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
486 
487 static struct ntb_hw_info pci_ids[] = {
488 	/* XXX: PS/SS IDs left out until they are supported. */
489 	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
490 		NTB_ATOM, 0 },
491 
492 	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
493 		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
494 	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
495 		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
496 	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
497 		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
498 		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
499 	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
500 		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
501 		    NTB_SB01BASE_LOCKUP },
502 	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
503 		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
504 		    NTB_SB01BASE_LOCKUP },
505 };
506 
507 static const struct ntb_reg atom_reg = {
508 	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
509 	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
510 	.db_size = sizeof(uint64_t),
511 	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
512 };
513 
514 static const struct ntb_alt_reg atom_pri_reg = {
515 	.db_bell = ATOM_PDOORBELL_OFFSET,
516 	.db_mask = ATOM_PDBMSK_OFFSET,
517 	.spad = ATOM_SPAD_OFFSET,
518 };
519 
520 static const struct ntb_alt_reg atom_b2b_reg = {
521 	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
522 	.spad = ATOM_B2B_SPAD_OFFSET,
523 };
524 
525 static const struct ntb_xlat_reg atom_sec_xlat = {
526 #if 0
527 	/* "FIXME" says the Linux driver. */
528 	.bar0_base = ATOM_SBAR0BASE_OFFSET,
529 	.bar2_base = ATOM_SBAR2BASE_OFFSET,
530 	.bar4_base = ATOM_SBAR4BASE_OFFSET,
531 
532 	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
533 	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
534 #endif
535 
536 	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
537 	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
538 };
539 
540 static const struct ntb_reg xeon_reg = {
541 	.ntb_ctl = XEON_NTBCNTL_OFFSET,
542 	.lnk_sta = XEON_LINK_STATUS_OFFSET,
543 	.db_size = sizeof(uint16_t),
544 	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
545 };
546 
547 static const struct ntb_alt_reg xeon_pri_reg = {
548 	.db_bell = XEON_PDOORBELL_OFFSET,
549 	.db_mask = XEON_PDBMSK_OFFSET,
550 	.spad = XEON_SPAD_OFFSET,
551 };
552 
553 static const struct ntb_alt_reg xeon_b2b_reg = {
554 	.db_bell = XEON_B2B_DOORBELL_OFFSET,
555 	.spad = XEON_B2B_SPAD_OFFSET,
556 };
557 
558 static const struct ntb_xlat_reg xeon_sec_xlat = {
559 	.bar0_base = XEON_SBAR0BASE_OFFSET,
560 	.bar2_base = XEON_SBAR2BASE_OFFSET,
561 	.bar4_base = XEON_SBAR4BASE_OFFSET,
562 	.bar5_base = XEON_SBAR5BASE_OFFSET,
563 
564 	.bar2_limit = XEON_SBAR2LMT_OFFSET,
565 	.bar4_limit = XEON_SBAR4LMT_OFFSET,
566 	.bar5_limit = XEON_SBAR5LMT_OFFSET,
567 
568 	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
569 	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
570 	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
571 };
572 
573 static struct ntb_b2b_addr xeon_b2b_usd_addr = {
574 	.bar0_addr = XEON_B2B_BAR0_ADDR,
575 	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
576 	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
577 	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
578 	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
579 };
580 
581 static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
582 	.bar0_addr = XEON_B2B_BAR0_ADDR,
583 	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
584 	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
585 	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
586 	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
587 };
588 
589 SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
590     "B2B MW segment overrides -- MUST be the same on both sides");
591 
592 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
593     &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
594     "hardware, use this 64-bit address on the bus between the NTB devices for "
595     "the window at BAR2, on the upstream side of the link.  MUST be the same "
596     "address on both sides.");
597 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
598     &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
599 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
600     &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
601     "(split-BAR mode).");
602 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
603     &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
604     "(split-BAR mode).");
605 
606 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
607     &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
608     "hardware, use this 64-bit address on the bus between the NTB devices for "
609     "the window at BAR2, on the downstream side of the link.  MUST be the same"
610     " address on both sides.");
611 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
612     &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
613 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
614     &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
615     "(split-BAR mode).");
616 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
617     &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
618     "(split-BAR mode).");
619 
620 /*
621  * OS <-> Driver interface structures
622  */
623 MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
624 
625 /*
626  * OS <-> Driver linkage functions
627  */
628 static int
629 intel_ntb_probe(device_t device)
630 {
631 	struct ntb_hw_info *p;
632 
633 	p = intel_ntb_get_device_info(pci_get_devid(device));
634 	if (p == NULL)
635 		return (ENXIO);
636 
637 	device_set_desc(device, p->desc);
638 	return (0);
639 }
640 
641 static int
642 intel_ntb_attach(device_t device)
643 {
644 	struct ntb_softc *ntb;
645 	struct ntb_hw_info *p;
646 	int error;
647 
648 	ntb = device_get_softc(device);
649 	p = intel_ntb_get_device_info(pci_get_devid(device));
650 
651 	ntb->device = device;
652 	ntb->type = p->type;
653 	ntb->features = p->features;
654 	ntb->b2b_mw_idx = B2B_MW_DISABLED;
655 	ntb->msix_mw_idx = B2B_MW_DISABLED;
656 
657 	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
658 	callout_init(&ntb->heartbeat_timer, 1);
659 	callout_init(&ntb->lr_timer, 1);
660 	callout_init(&ntb->peer_msix_work, 1);
661 	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
662 
663 	if (ntb->type == NTB_ATOM)
664 		error = intel_ntb_detect_atom(ntb);
665 	else
666 		error = intel_ntb_detect_xeon(ntb);
667 	if (error != 0)
668 		goto out;
669 
670 	intel_ntb_detect_max_mw(ntb);
671 
672 	pci_enable_busmaster(ntb->device);
673 
674 	error = intel_ntb_map_pci_bars(ntb);
675 	if (error != 0)
676 		goto out;
677 	if (ntb->type == NTB_ATOM)
678 		error = intel_ntb_atom_init_dev(ntb);
679 	else
680 		error = intel_ntb_xeon_init_dev(ntb);
681 	if (error != 0)
682 		goto out;
683 
684 	intel_ntb_spad_clear(device);
685 
686 	intel_ntb_poll_link(ntb);
687 
688 	intel_ntb_sysctl_init(ntb);
689 
690 	/* Attach children to this controller */
691 	error = ntb_register_device(device);
692 
693 out:
694 	if (error != 0)
695 		intel_ntb_detach(device);
696 	return (error);
697 }
698 
699 static int
700 intel_ntb_detach(device_t device)
701 {
702 	struct ntb_softc *ntb;
703 
704 	ntb = device_get_softc(device);
705 
706 	/* Detach & delete all children */
707 	ntb_unregister_device(device);
708 
709 	if (ntb->self_reg != NULL) {
710 		DB_MASK_LOCK(ntb);
711 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
712 		DB_MASK_UNLOCK(ntb);
713 	}
714 	callout_drain(&ntb->heartbeat_timer);
715 	callout_drain(&ntb->lr_timer);
716 	callout_drain(&ntb->peer_msix_work);
717 	pci_disable_busmaster(ntb->device);
718 	if (ntb->type == NTB_XEON)
719 		intel_ntb_teardown_xeon(ntb);
720 	intel_ntb_teardown_interrupts(ntb);
721 
722 	mtx_destroy(&ntb->db_mask_lock);
723 
724 	intel_ntb_unmap_pci_bar(ntb);
725 
726 	return (0);
727 }
728 
729 /*
730  * Driver internal routines
731  */
732 static inline enum ntb_bar
733 intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
734 {
735 
736 	KASSERT(mw < ntb->mw_count,
737 	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
738 	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
739 
740 	return (ntb->reg->mw_bar[mw]);
741 }
742 
743 static inline bool
744 bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
745 {
746 	/* XXX This assertion could be stronger. */
747 	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
748 	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
749 }
750 
751 static inline void
752 bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
753     uint32_t *xlat, uint32_t *lmt)
754 {
755 	uint32_t basev, lmtv, xlatv;
756 
757 	switch (bar) {
758 	case NTB_B2B_BAR_1:
759 		basev = ntb->xlat_reg->bar2_base;
760 		lmtv = ntb->xlat_reg->bar2_limit;
761 		xlatv = ntb->xlat_reg->bar2_xlat;
762 		break;
763 	case NTB_B2B_BAR_2:
764 		basev = ntb->xlat_reg->bar4_base;
765 		lmtv = ntb->xlat_reg->bar4_limit;
766 		xlatv = ntb->xlat_reg->bar4_xlat;
767 		break;
768 	case NTB_B2B_BAR_3:
769 		basev = ntb->xlat_reg->bar5_base;
770 		lmtv = ntb->xlat_reg->bar5_limit;
771 		xlatv = ntb->xlat_reg->bar5_xlat;
772 		break;
773 	default:
774 		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
775 		    ("bad bar"));
776 		basev = lmtv = xlatv = 0;
777 		break;
778 	}
779 
780 	if (base != NULL)
781 		*base = basev;
782 	if (xlat != NULL)
783 		*xlat = xlatv;
784 	if (lmt != NULL)
785 		*lmt = lmtv;
786 }
787 
788 static int
789 intel_ntb_map_pci_bars(struct ntb_softc *ntb)
790 {
791 	struct ntb_pci_bar_info *bar;
792 	int rc;
793 
794 	bar = &ntb->bar_info[NTB_CONFIG_BAR];
795 	bar->pci_resource_id = PCIR_BAR(0);
796 	rc = map_mmr_bar(ntb, bar);
797 	if (rc != 0)
798 		goto out;
799 
800 	/*
801 	 * At least on Xeon v4 NTB device leaks to host some remote side
802 	 * BAR0 writes supposed to update scratchpad registers.  I am not
803 	 * sure why it happens, but it may be related to the fact that
804 	 * on a link side BAR0 is 32KB, while on a host side it is 64KB.
805 	 * Without this hack DMAR blocks those accesses as not allowed.
806 	 */
807 	if (bus_dma_tag_create(bus_get_dma_tag(ntb->device), 1, 0,
808 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
809 	    bar->size, 1, bar->size, 0, NULL, NULL, &ntb->bar0_dma_tag)) {
810 		device_printf(ntb->device, "Unable to create BAR0 tag\n");
811 		return (ENOMEM);
812 	}
813 	if (bus_dmamap_create(ntb->bar0_dma_tag, 0, &ntb->bar0_dma_map)) {
814 		device_printf(ntb->device, "Unable to create BAR0 map\n");
815 		return (ENOMEM);
816 	}
817 	if (bus_dma_iommu_load_ident(ntb->bar0_dma_tag, ntb->bar0_dma_map,
818 	    bar->pbase, bar->size, 0)) {
819 		device_printf(ntb->device, "Unable to load BAR0 map\n");
820 		return (ENOMEM);
821 	}
822 
823 	bar = &ntb->bar_info[NTB_B2B_BAR_1];
824 	bar->pci_resource_id = PCIR_BAR(2);
825 	rc = map_memory_window_bar(ntb, bar);
826 	if (rc != 0)
827 		goto out;
828 	bar->psz_off = XEON_PBAR23SZ_OFFSET;
829 	bar->ssz_off = XEON_SBAR23SZ_OFFSET;
830 	bar->pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
831 
832 	bar = &ntb->bar_info[NTB_B2B_BAR_2];
833 	bar->pci_resource_id = PCIR_BAR(4);
834 	rc = map_memory_window_bar(ntb, bar);
835 	if (rc != 0)
836 		goto out;
837 	bar->psz_off = XEON_PBAR4SZ_OFFSET;
838 	bar->ssz_off = XEON_SBAR4SZ_OFFSET;
839 	bar->pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
840 
841 	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
842 		goto out;
843 
844 	bar = &ntb->bar_info[NTB_B2B_BAR_3];
845 	bar->pci_resource_id = PCIR_BAR(5);
846 	rc = map_memory_window_bar(ntb, bar);
847 	bar->psz_off = XEON_PBAR5SZ_OFFSET;
848 	bar->ssz_off = XEON_SBAR5SZ_OFFSET;
849 	bar->pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
850 
851 out:
852 	if (rc != 0)
853 		device_printf(ntb->device,
854 		    "unable to allocate pci resource\n");
855 	return (rc);
856 }
857 
858 static void
859 print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
860     const char *kind)
861 {
862 
863 	device_printf(ntb->device,
864 	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
865 	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
866 	    (char *)bar->vbase + bar->size - 1,
867 	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
868 	    (uintmax_t)bar->size, kind);
869 }
870 
871 static int
872 map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
873 {
874 
875 	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
876 	    &bar->pci_resource_id, RF_ACTIVE);
877 	if (bar->pci_resource == NULL)
878 		return (ENXIO);
879 
880 	save_bar_parameters(bar);
881 	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
882 	print_map_success(ntb, bar, "mmr");
883 	return (0);
884 }
885 
886 static int
887 map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
888 {
889 	int rc;
890 	vm_memattr_t mapmode;
891 	uint8_t bar_size_bits = 0;
892 
893 	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
894 	    &bar->pci_resource_id, RF_ACTIVE);
895 
896 	if (bar->pci_resource == NULL)
897 		return (ENXIO);
898 
899 	save_bar_parameters(bar);
900 	/*
901 	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
902 	 * hardware issue. To work around this, query the size it should be
903 	 * configured to by the device and modify the resource to correspond to
904 	 * this new size. The BIOS on systems with this problem is required to
905 	 * provide enough address space to allow the driver to make this change
906 	 * safely.
907 	 *
908 	 * Ideally I could have just specified the size when I allocated the
909 	 * resource like:
910 	 *  bus_alloc_resource(ntb->device,
911 	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
912 	 *	1ul << bar_size_bits, RF_ACTIVE);
913 	 * but the PCI driver does not honor the size in this call, so we have
914 	 * to modify it after the fact.
915 	 */
916 	if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
917 		if (bar->pci_resource_id == PCIR_BAR(2))
918 			bar_size_bits = pci_read_config(ntb->device,
919 			    XEON_PBAR23SZ_OFFSET, 1);
920 		else
921 			bar_size_bits = pci_read_config(ntb->device,
922 			    XEON_PBAR45SZ_OFFSET, 1);
923 
924 		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
925 		    bar->pci_resource, bar->pbase,
926 		    bar->pbase + (1ul << bar_size_bits) - 1);
927 		if (rc != 0) {
928 			device_printf(ntb->device,
929 			    "unable to resize bar\n");
930 			return (rc);
931 		}
932 
933 		save_bar_parameters(bar);
934 	}
935 
936 	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
937 	print_map_success(ntb, bar, "mw");
938 
939 	/*
940 	 * Optionally, mark MW BARs as anything other than UC to improve
941 	 * performance.
942 	 */
943 	mapmode = intel_ntb_pat_flags();
944 	if (mapmode == bar->map_mode)
945 		return (0);
946 
947 	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
948 	if (rc == 0) {
949 		bar->map_mode = mapmode;
950 		device_printf(ntb->device,
951 		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
952 		    "%s.\n",
953 		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
954 		    (char *)bar->vbase + bar->size - 1,
955 		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
956 		    intel_ntb_vm_memattr_to_str(mapmode));
957 	} else
958 		device_printf(ntb->device,
959 		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
960 		    "%s: %d\n",
961 		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
962 		    (char *)bar->vbase + bar->size - 1,
963 		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
964 		    intel_ntb_vm_memattr_to_str(mapmode), rc);
965 		/* Proceed anyway */
966 	return (0);
967 }
968 
969 static void
970 intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
971 {
972 	struct ntb_pci_bar_info *bar;
973 	int i;
974 
975 	if (ntb->bar0_dma_map != NULL) {
976 		bus_dmamap_unload(ntb->bar0_dma_tag, ntb->bar0_dma_map);
977 		bus_dmamap_destroy(ntb->bar0_dma_tag, ntb->bar0_dma_map);
978 	}
979 	if (ntb->bar0_dma_tag != NULL)
980 		bus_dma_tag_destroy(ntb->bar0_dma_tag);
981 	for (i = 0; i < NTB_MAX_BARS; i++) {
982 		bar = &ntb->bar_info[i];
983 		if (bar->pci_resource != NULL)
984 			bus_release_resource(ntb->device, SYS_RES_MEMORY,
985 			    bar->pci_resource_id, bar->pci_resource);
986 	}
987 }
988 
989 static int
990 intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
991 {
992 	uint32_t i;
993 	int rc;
994 
995 	for (i = 0; i < num_vectors; i++) {
996 		ntb->int_info[i].rid = i + 1;
997 		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
998 		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
999 		if (ntb->int_info[i].res == NULL) {
1000 			device_printf(ntb->device,
1001 			    "bus_alloc_resource failed\n");
1002 			return (ENOMEM);
1003 		}
1004 		ntb->int_info[i].tag = NULL;
1005 		ntb->allocated_interrupts++;
1006 		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
1007 		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
1008 		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
1009 		if (rc != 0) {
1010 			device_printf(ntb->device, "bus_setup_intr failed\n");
1011 			return (ENXIO);
1012 		}
1013 	}
1014 	return (0);
1015 }
1016 
1017 /*
1018  * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
1019  * cannot be allocated for each MSI-X message.  JHB seems to think remapping
1020  * should be okay.  This tunable should enable us to test that hypothesis
1021  * when someone gets their hands on some Xeon hardware.
1022  */
1023 static int ntb_force_remap_mode;
1024 SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
1025     &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
1026     " to a smaller number of ithreads, even if the desired number are "
1027     "available");
1028 
1029 /*
1030  * In case it is NOT ok, give consumers an abort button.
1031  */
1032 static int ntb_prefer_intx;
1033 SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
1034     &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
1035     "than remapping MSI-X messages over available slots (match Linux driver "
1036     "behavior)");
1037 
1038 /*
1039  * Remap the desired number of MSI-X messages to available ithreads in a simple
1040  * round-robin fashion.
1041  */
1042 static int
1043 intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
1044 {
1045 	u_int *vectors;
1046 	uint32_t i;
1047 	int rc;
1048 
1049 	if (ntb_prefer_intx != 0)
1050 		return (ENXIO);
1051 
1052 	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
1053 
1054 	for (i = 0; i < desired; i++)
1055 		vectors[i] = (i % avail) + 1;
1056 
1057 	rc = pci_remap_msix(dev, desired, vectors);
1058 	free(vectors, M_NTB);
1059 	return (rc);
1060 }
1061 
1062 static int
1063 intel_ntb_init_isr(struct ntb_softc *ntb)
1064 {
1065 	uint32_t desired_vectors, num_vectors;
1066 	int rc;
1067 
1068 	ntb->allocated_interrupts = 0;
1069 	ntb->last_ts = ticks;
1070 
1071 	/*
1072 	 * Mask all doorbell interrupts.  (Except link events!)
1073 	 */
1074 	DB_MASK_LOCK(ntb);
1075 	ntb->db_mask = ntb->db_valid_mask;
1076 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1077 	DB_MASK_UNLOCK(ntb);
1078 
1079 	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1080 	    ntb->db_count);
1081 	if (desired_vectors >= 1) {
1082 		rc = pci_alloc_msix(ntb->device, &num_vectors);
1083 
1084 		if (ntb_force_remap_mode != 0 && rc == 0 &&
1085 		    num_vectors == desired_vectors)
1086 			num_vectors--;
1087 
1088 		if (rc == 0 && num_vectors < desired_vectors) {
1089 			rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
1090 			    num_vectors);
1091 			if (rc == 0)
1092 				num_vectors = desired_vectors;
1093 			else
1094 				pci_release_msi(ntb->device);
1095 		}
1096 		if (rc != 0)
1097 			num_vectors = 1;
1098 	} else
1099 		num_vectors = 1;
1100 
1101 	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
1102 		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1103 			device_printf(ntb->device,
1104 			    "Errata workaround does not support MSI or INTX\n");
1105 			return (EINVAL);
1106 		}
1107 
1108 		ntb->db_vec_count = 1;
1109 		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
1110 		rc = intel_ntb_setup_legacy_interrupt(ntb);
1111 	} else {
1112 		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
1113 		    HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1114 			device_printf(ntb->device,
1115 			    "Errata workaround expects %d doorbell bits\n",
1116 			    XEON_NONLINK_DB_MSIX_BITS);
1117 			return (EINVAL);
1118 		}
1119 
1120 		intel_ntb_create_msix_vec(ntb, num_vectors);
1121 		rc = intel_ntb_setup_msix(ntb, num_vectors);
1122 	}
1123 	if (rc != 0) {
1124 		device_printf(ntb->device,
1125 		    "Error allocating interrupts: %d\n", rc);
1126 		intel_ntb_free_msix_vec(ntb);
1127 	}
1128 
1129 	return (rc);
1130 }
1131 
1132 static int
1133 intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
1134 {
1135 	int rc;
1136 
1137 	ntb->int_info[0].rid = 0;
1138 	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
1139 	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
1140 	if (ntb->int_info[0].res == NULL) {
1141 		device_printf(ntb->device, "bus_alloc_resource failed\n");
1142 		return (ENOMEM);
1143 	}
1144 
1145 	ntb->int_info[0].tag = NULL;
1146 	ntb->allocated_interrupts = 1;
1147 
1148 	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
1149 	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
1150 	    ntb, &ntb->int_info[0].tag);
1151 	if (rc != 0) {
1152 		device_printf(ntb->device, "bus_setup_intr failed\n");
1153 		return (ENXIO);
1154 	}
1155 
1156 	return (0);
1157 }
1158 
1159 static void
1160 intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
1161 {
1162 	struct ntb_int_info *current_int;
1163 	int i;
1164 
1165 	for (i = 0; i < ntb->allocated_interrupts; i++) {
1166 		current_int = &ntb->int_info[i];
1167 		if (current_int->tag != NULL)
1168 			bus_teardown_intr(ntb->device, current_int->res,
1169 			    current_int->tag);
1170 
1171 		if (current_int->res != NULL)
1172 			bus_release_resource(ntb->device, SYS_RES_IRQ,
1173 			    rman_get_rid(current_int->res), current_int->res);
1174 	}
1175 
1176 	intel_ntb_free_msix_vec(ntb);
1177 	pci_release_msi(ntb->device);
1178 }
1179 
1180 /*
1181  * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
1182  * out to make code clearer.
1183  */
1184 static inline uint64_t
1185 db_ioread(struct ntb_softc *ntb, uint64_t regoff)
1186 {
1187 
1188 	if (ntb->type == NTB_ATOM)
1189 		return (intel_ntb_reg_read(8, regoff));
1190 
1191 	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1192 
1193 	return (intel_ntb_reg_read(2, regoff));
1194 }
1195 
1196 static inline void
1197 db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1198 {
1199 
1200 	KASSERT((val & ~ntb->db_valid_mask) == 0,
1201 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1202 	     (uintmax_t)(val & ~ntb->db_valid_mask),
1203 	     (uintmax_t)ntb->db_valid_mask));
1204 
1205 	if (regoff == ntb->self_reg->db_mask)
1206 		DB_MASK_ASSERT(ntb, MA_OWNED);
1207 	db_iowrite_raw(ntb, regoff, val);
1208 }
1209 
1210 static inline void
1211 db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1212 {
1213 
1214 	if (ntb->type == NTB_ATOM) {
1215 		intel_ntb_reg_write(8, regoff, val);
1216 		return;
1217 	}
1218 
1219 	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1220 	intel_ntb_reg_write(2, regoff, (uint16_t)val);
1221 }
1222 
1223 static void
1224 intel_ntb_db_set_mask(device_t dev, uint64_t bits)
1225 {
1226 	struct ntb_softc *ntb = device_get_softc(dev);
1227 
1228 	DB_MASK_LOCK(ntb);
1229 	ntb->db_mask |= bits;
1230 	if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1231 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1232 	DB_MASK_UNLOCK(ntb);
1233 }
1234 
1235 static void
1236 intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
1237 {
1238 	struct ntb_softc *ntb = device_get_softc(dev);
1239 	uint64_t ibits;
1240 	int i;
1241 
1242 	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1243 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1244 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1245 	     (uintmax_t)ntb->db_valid_mask));
1246 
1247 	DB_MASK_LOCK(ntb);
1248 	ibits = ntb->fake_db & ntb->db_mask & bits;
1249 	ntb->db_mask &= ~bits;
1250 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1251 		/* Simulate fake interrupts if unmasked DB bits are set. */
1252 		ntb->force_db |= ibits;
1253 		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1254 			if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
1255 				swi_sched(ntb->int_info[i].tag, 0);
1256 		}
1257 	} else {
1258 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1259 	}
1260 	DB_MASK_UNLOCK(ntb);
1261 }
1262 
1263 static uint64_t
1264 intel_ntb_db_read(device_t dev)
1265 {
1266 	struct ntb_softc *ntb = device_get_softc(dev);
1267 
1268 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1269 		return (ntb->fake_db);
1270 
1271 	return (db_ioread(ntb, ntb->self_reg->db_bell));
1272 }
1273 
1274 static void
1275 intel_ntb_db_clear(device_t dev, uint64_t bits)
1276 {
1277 	struct ntb_softc *ntb = device_get_softc(dev);
1278 
1279 	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1280 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1281 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1282 	     (uintmax_t)ntb->db_valid_mask));
1283 
1284 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1285 		DB_MASK_LOCK(ntb);
1286 		ntb->fake_db &= ~bits;
1287 		DB_MASK_UNLOCK(ntb);
1288 		return;
1289 	}
1290 
1291 	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1292 }
1293 
1294 static inline uint64_t
1295 intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1296 {
1297 	uint64_t shift, mask;
1298 
1299 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1300 		/*
1301 		 * Remap vectors in custom way to make at least first
1302 		 * three doorbells to not generate stray events.
1303 		 * This breaks Linux compatibility (if one existed)
1304 		 * when more then one DB is used (not by if_ntb).
1305 		 */
1306 		if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
1307 			return (1 << db_vector);
1308 		if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
1309 			return (0x7ffc);
1310 	}
1311 
1312 	shift = ntb->db_vec_shift;
1313 	mask = (1ull << shift) - 1;
1314 	return (mask << (shift * db_vector));
1315 }
1316 
1317 static void
1318 intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1319 {
1320 	uint64_t vec_mask;
1321 
1322 	ntb->last_ts = ticks;
1323 	vec_mask = intel_ntb_vec_mask(ntb, vec);
1324 
1325 	if ((vec_mask & ntb->db_link_mask) != 0) {
1326 		if (intel_ntb_poll_link(ntb))
1327 			ntb_link_event(ntb->device);
1328 	}
1329 
1330 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1331 	    (vec_mask & ntb->db_link_mask) == 0) {
1332 		DB_MASK_LOCK(ntb);
1333 
1334 		/*
1335 		 * Do not report same DB events again if not cleared yet,
1336 		 * unless the mask was just cleared for them and this
1337 		 * interrupt handler call can be the consequence of it.
1338 		 */
1339 		vec_mask &= ~ntb->fake_db | ntb->force_db;
1340 		ntb->force_db &= ~vec_mask;
1341 
1342 		/* Update our internal doorbell register. */
1343 		ntb->fake_db |= vec_mask;
1344 
1345 		/* Do not report masked DB events. */
1346 		vec_mask &= ~ntb->db_mask;
1347 
1348 		DB_MASK_UNLOCK(ntb);
1349 	}
1350 
1351 	if ((vec_mask & ntb->db_valid_mask) != 0)
1352 		ntb_db_event(ntb->device, vec);
1353 }
1354 
1355 static void
1356 ndev_vec_isr(void *arg)
1357 {
1358 	struct ntb_vec *nvec = arg;
1359 
1360 	intel_ntb_interrupt(nvec->ntb, nvec->num);
1361 }
1362 
1363 static void
1364 ndev_irq_isr(void *arg)
1365 {
1366 	/* If we couldn't set up MSI-X, we only have the one vector. */
1367 	intel_ntb_interrupt(arg, 0);
1368 }
1369 
1370 static int
1371 intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1372 {
1373 	uint32_t i;
1374 
1375 	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1376 	    M_ZERO | M_WAITOK);
1377 	for (i = 0; i < num_vectors; i++) {
1378 		ntb->msix_vec[i].num = i;
1379 		ntb->msix_vec[i].ntb = ntb;
1380 	}
1381 
1382 	return (0);
1383 }
1384 
1385 static void
1386 intel_ntb_free_msix_vec(struct ntb_softc *ntb)
1387 {
1388 
1389 	if (ntb->msix_vec == NULL)
1390 		return;
1391 
1392 	free(ntb->msix_vec, M_NTB);
1393 	ntb->msix_vec = NULL;
1394 }
1395 
1396 static void
1397 intel_ntb_get_msix_info(struct ntb_softc *ntb)
1398 {
1399 	struct pci_devinfo *dinfo;
1400 	struct pcicfg_msix *msix;
1401 	uint32_t laddr, data, i, offset;
1402 
1403 	dinfo = device_get_ivars(ntb->device);
1404 	msix = &dinfo->cfg.msix;
1405 
1406 	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
1407 
1408 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1409 		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
1410 
1411 		laddr = bus_read_4(msix->msix_table_res, offset +
1412 		    PCI_MSIX_ENTRY_LOWER_ADDR);
1413 		intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
1414 
1415 		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
1416 		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
1417 		     MSI_INTEL_ADDR_BASE));
1418 		ntb->msix_data[i].nmd_ofs = laddr;
1419 
1420 		data = bus_read_4(msix->msix_table_res, offset +
1421 		    PCI_MSIX_ENTRY_DATA);
1422 		intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
1423 
1424 		ntb->msix_data[i].nmd_data = data;
1425 	}
1426 }
1427 
1428 static struct ntb_hw_info *
1429 intel_ntb_get_device_info(uint32_t device_id)
1430 {
1431 	struct ntb_hw_info *ep;
1432 
1433 	for (ep = pci_ids; ep < &pci_ids[nitems(pci_ids)]; ep++) {
1434 		if (ep->device_id == device_id)
1435 			return (ep);
1436 	}
1437 	return (NULL);
1438 }
1439 
1440 static void
1441 intel_ntb_teardown_xeon(struct ntb_softc *ntb)
1442 {
1443 
1444 	if (ntb->reg != NULL)
1445 		intel_ntb_link_disable(ntb->device);
1446 }
1447 
1448 static void
1449 intel_ntb_detect_max_mw(struct ntb_softc *ntb)
1450 {
1451 
1452 	if (ntb->type == NTB_ATOM) {
1453 		ntb->mw_count = ATOM_MW_COUNT;
1454 		return;
1455 	}
1456 
1457 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1458 		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1459 	else
1460 		ntb->mw_count = XEON_SNB_MW_COUNT;
1461 }
1462 
1463 static int
1464 intel_ntb_detect_xeon(struct ntb_softc *ntb)
1465 {
1466 	uint8_t ppd, conn_type;
1467 
1468 	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1469 	ntb->ppd = ppd;
1470 
1471 	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1472 		ntb->dev_type = NTB_DEV_DSD;
1473 	else
1474 		ntb->dev_type = NTB_DEV_USD;
1475 
1476 	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1477 		ntb->features |= NTB_SPLIT_BAR;
1478 
1479 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1480 	    !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1481 		device_printf(ntb->device,
1482 		    "Can not apply SB01BASE_LOCKUP workaround "
1483 		    "with split BARs disabled!\n");
1484 		device_printf(ntb->device,
1485 		    "Expect system hangs under heavy NTB traffic!\n");
1486 		ntb->features &= ~NTB_SB01BASE_LOCKUP;
1487 	}
1488 
1489 	/*
1490 	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
1491 	 * errata workaround; only do one at a time.
1492 	 */
1493 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1494 		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
1495 
1496 	conn_type = ppd & XEON_PPD_CONN_TYPE;
1497 	switch (conn_type) {
1498 	case NTB_CONN_B2B:
1499 		ntb->conn_type = conn_type;
1500 		break;
1501 	case NTB_CONN_RP:
1502 	case NTB_CONN_TRANSPARENT:
1503 	default:
1504 		device_printf(ntb->device, "Unsupported connection type: %u\n",
1505 		    (unsigned)conn_type);
1506 		return (ENXIO);
1507 	}
1508 	return (0);
1509 }
1510 
1511 static int
1512 intel_ntb_detect_atom(struct ntb_softc *ntb)
1513 {
1514 	uint32_t ppd, conn_type;
1515 
1516 	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1517 	ntb->ppd = ppd;
1518 
1519 	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1520 		ntb->dev_type = NTB_DEV_DSD;
1521 	else
1522 		ntb->dev_type = NTB_DEV_USD;
1523 
1524 	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1525 	switch (conn_type) {
1526 	case NTB_CONN_B2B:
1527 		ntb->conn_type = conn_type;
1528 		break;
1529 	default:
1530 		device_printf(ntb->device, "Unsupported NTB configuration\n");
1531 		return (ENXIO);
1532 	}
1533 	return (0);
1534 }
1535 
1536 static int
1537 intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
1538 {
1539 	int rc;
1540 
1541 	ntb->spad_count		= XEON_SPAD_COUNT;
1542 	ntb->db_count		= XEON_DB_COUNT;
1543 	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1544 	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1545 	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1546 
1547 	if (ntb->conn_type != NTB_CONN_B2B) {
1548 		device_printf(ntb->device, "Connection type %d not supported\n",
1549 		    ntb->conn_type);
1550 		return (ENXIO);
1551 	}
1552 
1553 	ntb->reg = &xeon_reg;
1554 	ntb->self_reg = &xeon_pri_reg;
1555 	ntb->peer_reg = &xeon_b2b_reg;
1556 	ntb->xlat_reg = &xeon_sec_xlat;
1557 
1558 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1559 		ntb->force_db = ntb->fake_db = 0;
1560 		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
1561 		    ntb->mw_count;
1562 		intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
1563 		    g_ntb_msix_idx, ntb->msix_mw_idx);
1564 		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
1565 		    VM_MEMATTR_UNCACHEABLE);
1566 		KASSERT(rc == 0, ("shouldn't fail"));
1567 	} else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
1568 		/*
1569 		 * There is a Xeon hardware errata related to writes to SDOORBELL or
1570 		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1571 		 * which may hang the system.  To workaround this, use a memory
1572 		 * window to access the interrupt and scratch pad registers on the
1573 		 * remote system.
1574 		 */
1575 		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
1576 		    ntb->mw_count;
1577 		intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
1578 		    g_ntb_mw_idx, ntb->b2b_mw_idx);
1579 		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
1580 		    VM_MEMATTR_UNCACHEABLE);
1581 		KASSERT(rc == 0, ("shouldn't fail"));
1582 	} else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
1583 		/*
1584 		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1585 		 * mirrored to the remote system.  Shrink the number of bits by one,
1586 		 * since bit 14 is the last bit.
1587 		 *
1588 		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1589 		 * anyway.  Nor for non-B2B connection types.
1590 		 */
1591 		ntb->db_count = XEON_DB_COUNT - 1;
1592 
1593 	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1594 
1595 	if (ntb->dev_type == NTB_DEV_USD)
1596 		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1597 		    &xeon_b2b_usd_addr);
1598 	else
1599 		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1600 		    &xeon_b2b_dsd_addr);
1601 	if (rc != 0)
1602 		return (rc);
1603 
1604 	/* Enable Bus Master and Memory Space on the secondary side */
1605 	intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1606 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1607 
1608 	/*
1609 	 * Mask all doorbell interrupts.
1610 	 */
1611 	DB_MASK_LOCK(ntb);
1612 	ntb->db_mask = ntb->db_valid_mask;
1613 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1614 	DB_MASK_UNLOCK(ntb);
1615 
1616 	rc = intel_ntb_init_isr(ntb);
1617 	return (rc);
1618 }
1619 
1620 static int
1621 intel_ntb_atom_init_dev(struct ntb_softc *ntb)
1622 {
1623 	int error;
1624 
1625 	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1626 	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1627 
1628 	ntb->spad_count		 = ATOM_SPAD_COUNT;
1629 	ntb->db_count		 = ATOM_DB_COUNT;
1630 	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1631 	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1632 	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1633 
1634 	ntb->reg = &atom_reg;
1635 	ntb->self_reg = &atom_pri_reg;
1636 	ntb->peer_reg = &atom_b2b_reg;
1637 	ntb->xlat_reg = &atom_sec_xlat;
1638 
1639 	/*
1640 	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1641 	 * resolved.  Mask transaction layer internal parity errors.
1642 	 */
1643 	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1644 
1645 	configure_atom_secondary_side_bars(ntb);
1646 
1647 	/* Enable Bus Master and Memory Space on the secondary side */
1648 	intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
1649 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1650 
1651 	error = intel_ntb_init_isr(ntb);
1652 	if (error != 0)
1653 		return (error);
1654 
1655 	/* Initiate PCI-E link training */
1656 	intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1657 
1658 	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1659 
1660 	return (0);
1661 }
1662 
1663 /* XXX: Linux driver doesn't seem to do any of this for Atom. */
1664 static void
1665 configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1666 {
1667 
1668 	if (ntb->dev_type == NTB_DEV_USD) {
1669 		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1670 		    XEON_B2B_BAR2_ADDR64);
1671 		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1672 		    XEON_B2B_BAR4_ADDR64);
1673 		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1674 		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1675 	} else {
1676 		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1677 		    XEON_B2B_BAR2_ADDR64);
1678 		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1679 		    XEON_B2B_BAR4_ADDR64);
1680 		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1681 		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1682 	}
1683 }
1684 
1685 
1686 /*
1687  * When working around Xeon SDOORBELL errata by remapping remote registers in a
1688  * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1689  * remains for use by a higher layer.
1690  *
1691  * Will only be used if working around SDOORBELL errata and the BIOS-configured
1692  * MW size is sufficiently large.
1693  */
1694 static unsigned int ntb_b2b_mw_share;
1695 SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1696     0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1697     "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1698     "value here.");
1699 
1700 static void
1701 xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1702     enum ntb_bar regbar)
1703 {
1704 	struct ntb_pci_bar_info *bar;
1705 	uint8_t bar_sz;
1706 
1707 	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1708 		return;
1709 
1710 	bar = &ntb->bar_info[idx];
1711 	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1712 	if (idx == regbar) {
1713 		if (ntb->b2b_off != 0)
1714 			bar_sz--;
1715 		else
1716 			bar_sz = 0;
1717 	}
1718 	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1719 	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1720 	(void)bar_sz;
1721 }
1722 
1723 static void
1724 xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1725     enum ntb_bar idx, enum ntb_bar regbar)
1726 {
1727 	uint64_t reg_val;
1728 	uint32_t base_reg, lmt_reg;
1729 
1730 	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1731 	if (idx == regbar) {
1732 		if (ntb->b2b_off)
1733 			bar_addr += ntb->b2b_off;
1734 		else
1735 			bar_addr = 0;
1736 	}
1737 
1738 	if (!bar_is_64bit(ntb, idx)) {
1739 		intel_ntb_reg_write(4, base_reg, bar_addr);
1740 		reg_val = intel_ntb_reg_read(4, base_reg);
1741 		(void)reg_val;
1742 
1743 		intel_ntb_reg_write(4, lmt_reg, bar_addr);
1744 		reg_val = intel_ntb_reg_read(4, lmt_reg);
1745 		(void)reg_val;
1746 	} else {
1747 		intel_ntb_reg_write(8, base_reg, bar_addr);
1748 		reg_val = intel_ntb_reg_read(8, base_reg);
1749 		(void)reg_val;
1750 
1751 		intel_ntb_reg_write(8, lmt_reg, bar_addr);
1752 		reg_val = intel_ntb_reg_read(8, lmt_reg);
1753 		(void)reg_val;
1754 	}
1755 }
1756 
1757 static void
1758 xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1759 {
1760 	struct ntb_pci_bar_info *bar;
1761 
1762 	bar = &ntb->bar_info[idx];
1763 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1764 		intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1765 		base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
1766 	} else {
1767 		intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1768 		base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
1769 	}
1770 	(void)base_addr;
1771 }
1772 
1773 static int
1774 xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1775     const struct ntb_b2b_addr *peer_addr)
1776 {
1777 	struct ntb_pci_bar_info *b2b_bar;
1778 	vm_size_t bar_size;
1779 	uint64_t bar_addr;
1780 	enum ntb_bar b2b_bar_num, i;
1781 
1782 	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1783 		b2b_bar = NULL;
1784 		b2b_bar_num = NTB_CONFIG_BAR;
1785 		ntb->b2b_off = 0;
1786 	} else {
1787 		b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1788 		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1789 		    ("invalid b2b mw bar"));
1790 
1791 		b2b_bar = &ntb->bar_info[b2b_bar_num];
1792 		bar_size = b2b_bar->size;
1793 
1794 		if (ntb_b2b_mw_share != 0 &&
1795 		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1796 			ntb->b2b_off = bar_size >> 1;
1797 		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1798 			ntb->b2b_off = 0;
1799 		} else {
1800 			device_printf(ntb->device,
1801 			    "B2B bar size is too small!\n");
1802 			return (EIO);
1803 		}
1804 	}
1805 
1806 	/*
1807 	 * Reset the secondary bar sizes to match the primary bar sizes.
1808 	 * (Except, disable or halve the size of the B2B secondary bar.)
1809 	 */
1810 	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1811 		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1812 
1813 	bar_addr = 0;
1814 	if (b2b_bar_num == NTB_CONFIG_BAR)
1815 		bar_addr = addr->bar0_addr;
1816 	else if (b2b_bar_num == NTB_B2B_BAR_1)
1817 		bar_addr = addr->bar2_addr64;
1818 	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1819 		bar_addr = addr->bar4_addr64;
1820 	else if (b2b_bar_num == NTB_B2B_BAR_2)
1821 		bar_addr = addr->bar4_addr32;
1822 	else if (b2b_bar_num == NTB_B2B_BAR_3)
1823 		bar_addr = addr->bar5_addr32;
1824 	else
1825 		KASSERT(false, ("invalid bar"));
1826 
1827 	intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1828 
1829 	/*
1830 	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1831 	 * register BAR.  The B2B BAR is either disabled above or configured
1832 	 * half-size.  It starts at PBAR xlat + offset.
1833 	 *
1834 	 * Also set up incoming BAR limits == base (zero length window).
1835 	 */
1836 	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1837 	    b2b_bar_num);
1838 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1839 		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1840 		    NTB_B2B_BAR_2, b2b_bar_num);
1841 		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1842 		    NTB_B2B_BAR_3, b2b_bar_num);
1843 	} else
1844 		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1845 		    NTB_B2B_BAR_2, b2b_bar_num);
1846 
1847 	/* Zero incoming translation addrs */
1848 	intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1849 	intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1850 
1851 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1852 		uint32_t xlat_reg, lmt_reg;
1853 		enum ntb_bar bar_num;
1854 
1855 		/*
1856 		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
1857 		 * workaround
1858 		 */
1859 		bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
1860 		bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
1861 		if (bar_is_64bit(ntb, bar_num)) {
1862 			intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
1863 			ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
1864 			intel_ntb_reg_write(8, lmt_reg, 0);
1865 		} else {
1866 			intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
1867 			ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
1868 			intel_ntb_reg_write(4, lmt_reg, 0);
1869 		}
1870 
1871 		ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
1872 	}
1873 	(void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
1874 	(void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
1875 
1876 	/* Zero outgoing translation limits (whole bar size windows) */
1877 	intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1878 	intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1879 
1880 	/* Set outgoing translation offsets */
1881 	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1882 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1883 		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1884 		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1885 	} else
1886 		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1887 
1888 	/* Set the translation offset for B2B registers */
1889 	bar_addr = 0;
1890 	if (b2b_bar_num == NTB_CONFIG_BAR)
1891 		bar_addr = peer_addr->bar0_addr;
1892 	else if (b2b_bar_num == NTB_B2B_BAR_1)
1893 		bar_addr = peer_addr->bar2_addr64;
1894 	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1895 		bar_addr = peer_addr->bar4_addr64;
1896 	else if (b2b_bar_num == NTB_B2B_BAR_2)
1897 		bar_addr = peer_addr->bar4_addr32;
1898 	else if (b2b_bar_num == NTB_B2B_BAR_3)
1899 		bar_addr = peer_addr->bar5_addr32;
1900 	else
1901 		KASSERT(false, ("invalid bar"));
1902 
1903 	/*
1904 	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1905 	 * at a time.
1906 	 */
1907 	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1908 	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1909 	return (0);
1910 }
1911 
1912 static inline bool
1913 _xeon_link_is_up(struct ntb_softc *ntb)
1914 {
1915 
1916 	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1917 		return (true);
1918 	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1919 }
1920 
1921 static inline bool
1922 link_is_up(struct ntb_softc *ntb)
1923 {
1924 
1925 	if (ntb->type == NTB_XEON)
1926 		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
1927 		    !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
1928 
1929 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1930 	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1931 }
1932 
1933 static inline bool
1934 atom_link_is_err(struct ntb_softc *ntb)
1935 {
1936 	uint32_t status;
1937 
1938 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1939 
1940 	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1941 	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1942 		return (true);
1943 
1944 	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1945 	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1946 }
1947 
1948 /* Atom does not have link status interrupt, poll on that platform */
1949 static void
1950 atom_link_hb(void *arg)
1951 {
1952 	struct ntb_softc *ntb = arg;
1953 	sbintime_t timo, poll_ts;
1954 
1955 	timo = NTB_HB_TIMEOUT * hz;
1956 	poll_ts = ntb->last_ts + timo;
1957 
1958 	/*
1959 	 * Delay polling the link status if an interrupt was received, unless
1960 	 * the cached link status says the link is down.
1961 	 */
1962 	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1963 		timo = poll_ts - ticks;
1964 		goto out;
1965 	}
1966 
1967 	if (intel_ntb_poll_link(ntb))
1968 		ntb_link_event(ntb->device);
1969 
1970 	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1971 		/* Link is down with error, proceed with recovery */
1972 		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1973 		return;
1974 	}
1975 
1976 out:
1977 	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1978 }
1979 
1980 static void
1981 atom_perform_link_restart(struct ntb_softc *ntb)
1982 {
1983 	uint32_t status;
1984 
1985 	/* Driver resets the NTB ModPhy lanes - magic! */
1986 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
1987 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
1988 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
1989 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
1990 
1991 	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1992 	pause("ModPhy", hz / 10);
1993 
1994 	/* Clear AER Errors, write to clear */
1995 	status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
1996 	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1997 	intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
1998 
1999 	/* Clear unexpected electrical idle event in LTSSM, write to clear */
2000 	status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
2001 	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
2002 	intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
2003 
2004 	/* Clear DeSkew Buffer error, write to clear */
2005 	status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
2006 	status |= ATOM_DESKEWSTS_DBERR;
2007 	intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
2008 
2009 	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
2010 	status &= ATOM_IBIST_ERR_OFLOW;
2011 	intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
2012 
2013 	/* Releases the NTB state machine to allow the link to retrain */
2014 	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
2015 	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
2016 	intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
2017 }
2018 
2019 static int
2020 intel_ntb_port_number(device_t dev)
2021 {
2022 	struct ntb_softc *ntb = device_get_softc(dev);
2023 
2024 	return (ntb->dev_type == NTB_DEV_USD ? 0 : 1);
2025 }
2026 
2027 static int
2028 intel_ntb_peer_port_count(device_t dev)
2029 {
2030 
2031 	return (1);
2032 }
2033 
2034 static int
2035 intel_ntb_peer_port_number(device_t dev, int pidx)
2036 {
2037 	struct ntb_softc *ntb = device_get_softc(dev);
2038 
2039 	if (pidx != 0)
2040 		return (-EINVAL);
2041 
2042 	return (ntb->dev_type == NTB_DEV_USD ? 1 : 0);
2043 }
2044 
2045 static int
2046 intel_ntb_peer_port_idx(device_t dev, int port)
2047 {
2048 	int peer_port;
2049 
2050 	peer_port = intel_ntb_peer_port_number(dev, 0);
2051 	if (peer_port == -EINVAL || port != peer_port)
2052 		return (-EINVAL);
2053 
2054 	return (0);
2055 }
2056 
2057 static int
2058 intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
2059     enum ntb_width width __unused)
2060 {
2061 	struct ntb_softc *ntb = device_get_softc(dev);
2062 	uint32_t cntl;
2063 
2064 	intel_ntb_printf(2, "%s\n", __func__);
2065 
2066 	if (ntb->type == NTB_ATOM) {
2067 		pci_write_config(ntb->device, NTB_PPD_OFFSET,
2068 		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
2069 		return (0);
2070 	}
2071 
2072 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2073 		ntb_link_event(dev);
2074 		return (0);
2075 	}
2076 
2077 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2078 	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
2079 	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
2080 	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
2081 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2082 		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
2083 	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2084 	return (0);
2085 }
2086 
2087 static int
2088 intel_ntb_link_disable(device_t dev)
2089 {
2090 	struct ntb_softc *ntb = device_get_softc(dev);
2091 	uint32_t cntl;
2092 
2093 	intel_ntb_printf(2, "%s\n", __func__);
2094 
2095 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2096 		ntb_link_event(dev);
2097 		return (0);
2098 	}
2099 
2100 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2101 	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
2102 	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
2103 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2104 		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
2105 	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
2106 	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2107 	return (0);
2108 }
2109 
2110 static bool
2111 intel_ntb_link_enabled(device_t dev)
2112 {
2113 	struct ntb_softc *ntb = device_get_softc(dev);
2114 	uint32_t cntl;
2115 
2116 	if (ntb->type == NTB_ATOM) {
2117 		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
2118 		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
2119 	}
2120 
2121 	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2122 		return (true);
2123 
2124 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2125 	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
2126 }
2127 
2128 static void
2129 recover_atom_link(void *arg)
2130 {
2131 	struct ntb_softc *ntb = arg;
2132 	unsigned speed, width, oldspeed, oldwidth;
2133 	uint32_t status32;
2134 
2135 	atom_perform_link_restart(ntb);
2136 
2137 	/*
2138 	 * There is a potential race between the 2 NTB devices recovering at
2139 	 * the same time.  If the times are the same, the link will not recover
2140 	 * and the driver will be stuck in this loop forever.  Add a random
2141 	 * interval to the recovery time to prevent this race.
2142 	 */
2143 	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
2144 	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
2145 
2146 	if (atom_link_is_err(ntb))
2147 		goto retry;
2148 
2149 	status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2150 	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
2151 		goto out;
2152 
2153 	status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2154 	width = NTB_LNK_STA_WIDTH(status32);
2155 	speed = status32 & NTB_LINK_SPEED_MASK;
2156 
2157 	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
2158 	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
2159 	if (oldwidth != width || oldspeed != speed)
2160 		goto retry;
2161 
2162 out:
2163 	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
2164 	    ntb);
2165 	return;
2166 
2167 retry:
2168 	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
2169 	    ntb);
2170 }
2171 
2172 /*
2173  * Polls the HW link status register(s); returns true if something has changed.
2174  */
2175 static bool
2176 intel_ntb_poll_link(struct ntb_softc *ntb)
2177 {
2178 	uint32_t ntb_cntl;
2179 	uint16_t reg_val;
2180 
2181 	if (ntb->type == NTB_ATOM) {
2182 		ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2183 		if (ntb_cntl == ntb->ntb_ctl)
2184 			return (false);
2185 
2186 		ntb->ntb_ctl = ntb_cntl;
2187 		ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2188 	} else {
2189 		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
2190 
2191 		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2192 		if (reg_val == ntb->lnk_sta)
2193 			return (false);
2194 
2195 		ntb->lnk_sta = reg_val;
2196 
2197 		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2198 			if (_xeon_link_is_up(ntb)) {
2199 				if (!ntb->peer_msix_good) {
2200 					callout_reset(&ntb->peer_msix_work, 0,
2201 					    intel_ntb_exchange_msix, ntb);
2202 					return (false);
2203 				}
2204 			} else {
2205 				ntb->peer_msix_good = false;
2206 				ntb->peer_msix_done = false;
2207 			}
2208 		}
2209 	}
2210 	return (true);
2211 }
2212 
2213 static inline enum ntb_speed
2214 intel_ntb_link_sta_speed(struct ntb_softc *ntb)
2215 {
2216 
2217 	if (!link_is_up(ntb))
2218 		return (NTB_SPEED_NONE);
2219 	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
2220 }
2221 
2222 static inline enum ntb_width
2223 intel_ntb_link_sta_width(struct ntb_softc *ntb)
2224 {
2225 
2226 	if (!link_is_up(ntb))
2227 		return (NTB_WIDTH_NONE);
2228 	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
2229 }
2230 
2231 SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
2232     "Driver state, statistics, and HW registers");
2233 
2234 #define NTB_REGSZ_MASK	(3ul << 30)
2235 #define NTB_REG_64	(1ul << 30)
2236 #define NTB_REG_32	(2ul << 30)
2237 #define NTB_REG_16	(3ul << 30)
2238 #define NTB_REG_8	(0ul << 30)
2239 
2240 #define NTB_DB_READ	(1ul << 29)
2241 #define NTB_PCI_REG	(1ul << 28)
2242 #define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
2243 
2244 static void
2245 intel_ntb_sysctl_init(struct ntb_softc *ntb)
2246 {
2247 	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
2248 	struct sysctl_ctx_list *ctx;
2249 	struct sysctl_oid *tree, *tmptree;
2250 
2251 	ctx = device_get_sysctl_ctx(ntb->device);
2252 	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
2253 
2254 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
2255 	    CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, ntb, 0,
2256 	    sysctl_handle_link_status_human, "A",
2257 	    "Link status (human readable)");
2258 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
2259 	    CTLFLAG_RD | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, ntb, 0,
2260 	    sysctl_handle_link_status, "IU",
2261 	    "Link status (1=active, 0=inactive)");
2262 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
2263 	    CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, ntb, 0,
2264 	    sysctl_handle_link_admin, "IU",
2265 	    "Set/get interface status (1=UP, 0=DOWN)");
2266 
2267 	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
2268 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
2269 	    "Driver state, statistics, and HW registers");
2270 	tree_par = SYSCTL_CHILDREN(tree);
2271 
2272 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
2273 	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
2274 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
2275 	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
2276 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
2277 	    &ntb->ppd, 0, "Raw PPD register (cached)");
2278 
2279 	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
2280 		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
2281 		    &ntb->b2b_mw_idx, 0,
2282 		    "Index of the MW used for B2B remote register access");
2283 		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
2284 		    CTLFLAG_RD, &ntb->b2b_off,
2285 		    "If non-zero, offset of B2B register region in shared MW");
2286 	}
2287 
2288 	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
2289 	    CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, ntb, 0,
2290 	    sysctl_handle_features, "A", "Features/errata of this NTB device");
2291 
2292 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
2293 	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
2294 	    "NTB CTL register (cached)");
2295 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
2296 	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
2297 	    "LNK STA register (cached)");
2298 
2299 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
2300 	    &ntb->mw_count, 0, "MW count");
2301 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
2302 	    &ntb->spad_count, 0, "Scratchpad count");
2303 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
2304 	    &ntb->db_count, 0, "Doorbell count");
2305 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
2306 	    &ntb->db_vec_count, 0, "Doorbell vector count");
2307 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
2308 	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
2309 
2310 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2311 	    &ntb->db_valid_mask, "Doorbell valid mask");
2312 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2313 	    &ntb->db_link_mask, "Doorbell link mask");
2314 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2315 	    &ntb->db_mask, "Doorbell mask (cached)");
2316 
2317 	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2318 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
2319 	    "Raw HW registers (big-endian)");
2320 	regpar = SYSCTL_CHILDREN(tmptree);
2321 
2322 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2323 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2324 	    NTB_REG_32 | ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2325 	    "NTB Control register");
2326 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2327 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2328 	    NTB_REG_32 | 0x19c, sysctl_handle_register, "IU",
2329 	    "NTB Link Capabilities");
2330 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2331 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2332 	    NTB_REG_32 | 0x1a0, sysctl_handle_register, "IU",
2333 	    "NTB Link Control register");
2334 
2335 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
2336 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2337 	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
2338 	    sysctl_handle_register, "QU", "Doorbell mask register");
2339 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
2340 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2341 	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
2342 	    sysctl_handle_register, "QU", "Doorbell register");
2343 
2344 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
2345 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2346 	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2347 	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2348 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2349 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2350 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2351 		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2352 		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2353 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2354 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2355 		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2356 		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2357 	} else {
2358 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2359 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2360 		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2361 		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2362 	}
2363 
2364 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2365 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2366 	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2367 	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2368 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2369 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2370 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2371 		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2372 		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2373 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2374 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2375 		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2376 		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2377 	} else {
2378 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2379 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2380 		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2381 		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2382 	}
2383 
2384 	if (ntb->type == NTB_ATOM)
2385 		return;
2386 
2387 	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2388 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xeon HW statistics");
2389 	statpar = SYSCTL_CHILDREN(tmptree);
2390 	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2391 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2392 	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2393 	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2394 
2395 	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2396 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xeon HW errors");
2397 	errpar = SYSCTL_CHILDREN(tmptree);
2398 
2399 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
2400 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2401 	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
2402 	    sysctl_handle_register, "CU", "PPD");
2403 
2404 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
2405 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2406 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
2407 	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
2408 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
2409 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2410 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
2411 	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
2412 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
2413 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2414 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
2415 	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
2416 
2417 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
2418 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2419 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
2420 	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
2421 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
2422 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2423 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
2424 	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
2425 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
2426 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2427 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
2428 	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
2429 
2430 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
2431 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2432 	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2433 	    sysctl_handle_register, "SU", "DEVSTS");
2434 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
2435 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2436 	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2437 	    sysctl_handle_register, "SU", "LNKSTS");
2438 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
2439 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2440 	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
2441 	    sysctl_handle_register, "SU", "SLNKSTS");
2442 
2443 	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2444 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2445 	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2446 	    sysctl_handle_register, "IU", "UNCERRSTS");
2447 	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2448 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2449 	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2450 	    sysctl_handle_register, "IU", "CORERRSTS");
2451 
2452 	if (ntb->conn_type != NTB_CONN_B2B)
2453 		return;
2454 
2455 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01l",
2456 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2457 	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETL,
2458 	    sysctl_handle_register, "IU", "Outgoing XLAT0L register");
2459 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01u",
2460 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2461 	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETU,
2462 	    sysctl_handle_register, "IU", "Outgoing XLAT0U register");
2463 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2464 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2465 	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2466 	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2467 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2468 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2469 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2470 		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2471 		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2472 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2473 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2474 		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2475 		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2476 	} else {
2477 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2478 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2479 		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2480 		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2481 	}
2482 
2483 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2484 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2485 	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2486 	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2487 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2488 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2489 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2490 		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2491 		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2492 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2493 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2494 		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2495 		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2496 	} else {
2497 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2498 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2499 		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2500 		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2501 	}
2502 
2503 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2504 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2505 	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2506 	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2507 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2508 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2509 	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2510 	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2511 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2512 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2513 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2514 		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2515 		    sysctl_handle_register, "IU",
2516 		    "Secondary BAR4 base register");
2517 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2518 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2519 		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2520 		    sysctl_handle_register, "IU",
2521 		    "Secondary BAR5 base register");
2522 	} else {
2523 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2524 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2525 		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2526 		    sysctl_handle_register, "QU",
2527 		    "Secondary BAR45 base register");
2528 	}
2529 }
2530 
2531 static int
2532 sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2533 {
2534 	struct ntb_softc *ntb = arg1;
2535 	struct sbuf sb;
2536 	int error;
2537 
2538 	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2539 
2540 	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2541 	error = sbuf_finish(&sb);
2542 	sbuf_delete(&sb);
2543 
2544 	if (error || !req->newptr)
2545 		return (error);
2546 	return (EINVAL);
2547 }
2548 
2549 static int
2550 sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
2551 {
2552 	struct ntb_softc *ntb = arg1;
2553 	unsigned old, new;
2554 	int error;
2555 
2556 	old = intel_ntb_link_enabled(ntb->device);
2557 
2558 	error = SYSCTL_OUT(req, &old, sizeof(old));
2559 	if (error != 0 || req->newptr == NULL)
2560 		return (error);
2561 
2562 	error = SYSCTL_IN(req, &new, sizeof(new));
2563 	if (error != 0)
2564 		return (error);
2565 
2566 	intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
2567 	    (new != 0)? "en" : "dis");
2568 
2569 	if (new != 0)
2570 		error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
2571 	else
2572 		error = intel_ntb_link_disable(ntb->device);
2573 	return (error);
2574 }
2575 
2576 static int
2577 sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
2578 {
2579 	struct ntb_softc *ntb = arg1;
2580 	struct sbuf sb;
2581 	enum ntb_speed speed;
2582 	enum ntb_width width;
2583 	int error;
2584 
2585 	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2586 
2587 	if (intel_ntb_link_is_up(ntb->device, &speed, &width))
2588 		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2589 		    (unsigned)speed, (unsigned)width);
2590 	else
2591 		sbuf_printf(&sb, "down");
2592 
2593 	error = sbuf_finish(&sb);
2594 	sbuf_delete(&sb);
2595 
2596 	if (error || !req->newptr)
2597 		return (error);
2598 	return (EINVAL);
2599 }
2600 
2601 static int
2602 sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2603 {
2604 	struct ntb_softc *ntb = arg1;
2605 	unsigned res;
2606 	int error;
2607 
2608 	res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
2609 
2610 	error = SYSCTL_OUT(req, &res, sizeof(res));
2611 	if (error || !req->newptr)
2612 		return (error);
2613 	return (EINVAL);
2614 }
2615 
2616 static int
2617 sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2618 {
2619 	struct ntb_softc *ntb;
2620 	const void *outp;
2621 	uintptr_t sz;
2622 	uint64_t umv;
2623 	char be[sizeof(umv)];
2624 	size_t outsz;
2625 	uint32_t reg;
2626 	bool db, pci;
2627 	int error;
2628 
2629 	ntb = arg1;
2630 	reg = arg2 & ~NTB_REGFLAGS_MASK;
2631 	sz = arg2 & NTB_REGSZ_MASK;
2632 	db = (arg2 & NTB_DB_READ) != 0;
2633 	pci = (arg2 & NTB_PCI_REG) != 0;
2634 
2635 	KASSERT(!(db && pci), ("bogus"));
2636 
2637 	if (db) {
2638 		KASSERT(sz == NTB_REG_64, ("bogus"));
2639 		umv = db_ioread(ntb, reg);
2640 		outsz = sizeof(uint64_t);
2641 	} else {
2642 		switch (sz) {
2643 		case NTB_REG_64:
2644 			if (pci)
2645 				umv = pci_read_config(ntb->device, reg, 8);
2646 			else
2647 				umv = intel_ntb_reg_read(8, reg);
2648 			outsz = sizeof(uint64_t);
2649 			break;
2650 		case NTB_REG_32:
2651 			if (pci)
2652 				umv = pci_read_config(ntb->device, reg, 4);
2653 			else
2654 				umv = intel_ntb_reg_read(4, reg);
2655 			outsz = sizeof(uint32_t);
2656 			break;
2657 		case NTB_REG_16:
2658 			if (pci)
2659 				umv = pci_read_config(ntb->device, reg, 2);
2660 			else
2661 				umv = intel_ntb_reg_read(2, reg);
2662 			outsz = sizeof(uint16_t);
2663 			break;
2664 		case NTB_REG_8:
2665 			if (pci)
2666 				umv = pci_read_config(ntb->device, reg, 1);
2667 			else
2668 				umv = intel_ntb_reg_read(1, reg);
2669 			outsz = sizeof(uint8_t);
2670 			break;
2671 		default:
2672 			panic("bogus");
2673 			break;
2674 		}
2675 	}
2676 
2677 	/* Encode bigendian so that sysctl -x is legible. */
2678 	be64enc(be, umv);
2679 	outp = ((char *)be) + sizeof(umv) - outsz;
2680 
2681 	error = SYSCTL_OUT(req, outp, outsz);
2682 	if (error || !req->newptr)
2683 		return (error);
2684 	return (EINVAL);
2685 }
2686 
2687 static unsigned
2688 intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
2689 {
2690 
2691 	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2692 	    uidx >= ntb->b2b_mw_idx) ||
2693 	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2694 		uidx++;
2695 	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2696 	    uidx >= ntb->b2b_mw_idx) &&
2697 	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2698 		uidx++;
2699 	return (uidx);
2700 }
2701 
2702 #ifndef EARLY_AP_STARTUP
2703 static int msix_ready;
2704 
2705 static void
2706 intel_ntb_msix_ready(void *arg __unused)
2707 {
2708 
2709 	msix_ready = 1;
2710 }
2711 SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
2712     intel_ntb_msix_ready, NULL);
2713 #endif
2714 
2715 static void
2716 intel_ntb_exchange_msix(void *ctx)
2717 {
2718 	struct ntb_softc *ntb;
2719 	uint32_t val;
2720 	unsigned i;
2721 
2722 	ntb = ctx;
2723 
2724 	if (ntb->peer_msix_good)
2725 		goto msix_good;
2726 	if (ntb->peer_msix_done)
2727 		goto msix_done;
2728 
2729 #ifndef EARLY_AP_STARTUP
2730 	/* Block MSIX negotiation until SMP started and IRQ reshuffled. */
2731 	if (!msix_ready)
2732 		goto reschedule;
2733 #endif
2734 
2735 	intel_ntb_get_msix_info(ntb);
2736 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2737 		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
2738 		    ntb->msix_data[i].nmd_data);
2739 		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
2740 		    ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
2741 	}
2742 	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
2743 
2744 	intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
2745 	if (val != NTB_MSIX_VER_GUARD)
2746 		goto reschedule;
2747 
2748 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2749 		intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
2750 		intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
2751 		ntb->peer_msix_data[i].nmd_data = val;
2752 		intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
2753 		intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
2754 		ntb->peer_msix_data[i].nmd_ofs = val;
2755 	}
2756 
2757 	ntb->peer_msix_done = true;
2758 
2759 msix_done:
2760 	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
2761 	intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
2762 	if (val != NTB_MSIX_RECEIVED)
2763 		goto reschedule;
2764 
2765 	intel_ntb_spad_clear(ntb->device);
2766 	ntb->peer_msix_good = true;
2767 	/* Give peer time to see our NTB_MSIX_RECEIVED. */
2768 	goto reschedule;
2769 
2770 msix_good:
2771 	intel_ntb_poll_link(ntb);
2772 	ntb_link_event(ntb->device);
2773 	return;
2774 
2775 reschedule:
2776 	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2777 	if (_xeon_link_is_up(ntb)) {
2778 		callout_reset(&ntb->peer_msix_work,
2779 		    hz * (ntb->peer_msix_good ? 2 : 1) / 10,
2780 		    intel_ntb_exchange_msix, ntb);
2781 	} else
2782 		intel_ntb_spad_clear(ntb->device);
2783 }
2784 
2785 /*
2786  * Public API to the rest of the OS
2787  */
2788 
2789 static uint8_t
2790 intel_ntb_spad_count(device_t dev)
2791 {
2792 	struct ntb_softc *ntb = device_get_softc(dev);
2793 
2794 	return (ntb->spad_count);
2795 }
2796 
2797 static uint8_t
2798 intel_ntb_mw_count(device_t dev)
2799 {
2800 	struct ntb_softc *ntb = device_get_softc(dev);
2801 	uint8_t res;
2802 
2803 	res = ntb->mw_count;
2804 	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
2805 		res--;
2806 	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
2807 		res--;
2808 	return (res);
2809 }
2810 
2811 static int
2812 intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
2813 {
2814 	struct ntb_softc *ntb = device_get_softc(dev);
2815 
2816 	if (idx >= ntb->spad_count)
2817 		return (EINVAL);
2818 
2819 	intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2820 
2821 	return (0);
2822 }
2823 
2824 /*
2825  * Zeros the local scratchpad.
2826  */
2827 static void
2828 intel_ntb_spad_clear(device_t dev)
2829 {
2830 	struct ntb_softc *ntb = device_get_softc(dev);
2831 	unsigned i;
2832 
2833 	for (i = 0; i < ntb->spad_count; i++)
2834 		intel_ntb_spad_write(dev, i, 0);
2835 }
2836 
2837 static int
2838 intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
2839 {
2840 	struct ntb_softc *ntb = device_get_softc(dev);
2841 
2842 	if (idx >= ntb->spad_count)
2843 		return (EINVAL);
2844 
2845 	*val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2846 
2847 	return (0);
2848 }
2849 
2850 static int
2851 intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
2852 {
2853 	struct ntb_softc *ntb = device_get_softc(dev);
2854 
2855 	if (idx >= ntb->spad_count)
2856 		return (EINVAL);
2857 
2858 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
2859 		intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
2860 	else
2861 		intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2862 
2863 	return (0);
2864 }
2865 
2866 static int
2867 intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
2868 {
2869 	struct ntb_softc *ntb = device_get_softc(dev);
2870 
2871 	if (idx >= ntb->spad_count)
2872 		return (EINVAL);
2873 
2874 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
2875 		*val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
2876 	else
2877 		*val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
2878 
2879 	return (0);
2880 }
2881 
2882 static int
2883 intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
2884     caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
2885     bus_addr_t *plimit)
2886 {
2887 	struct ntb_softc *ntb = device_get_softc(dev);
2888 	struct ntb_pci_bar_info *bar;
2889 	bus_addr_t limit;
2890 	size_t bar_b2b_off;
2891 	enum ntb_bar bar_num;
2892 
2893 	if (mw_idx >= intel_ntb_mw_count(dev))
2894 		return (EINVAL);
2895 	mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
2896 
2897 	bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
2898 	bar = &ntb->bar_info[bar_num];
2899 	bar_b2b_off = 0;
2900 	if (mw_idx == ntb->b2b_mw_idx) {
2901 		KASSERT(ntb->b2b_off != 0,
2902 		    ("user shouldn't get non-shared b2b mw"));
2903 		bar_b2b_off = ntb->b2b_off;
2904 	}
2905 
2906 	if (bar_is_64bit(ntb, bar_num))
2907 		limit = BUS_SPACE_MAXADDR;
2908 	else
2909 		limit = BUS_SPACE_MAXADDR_32BIT;
2910 
2911 	if (base != NULL)
2912 		*base = bar->pbase + bar_b2b_off;
2913 	if (vbase != NULL)
2914 		*vbase = bar->vbase + bar_b2b_off;
2915 	if (size != NULL)
2916 		*size = bar->size - bar_b2b_off;
2917 	if (align != NULL)
2918 		*align = bar->size;
2919 	if (align_size != NULL)
2920 		*align_size = 1;
2921 	if (plimit != NULL)
2922 		*plimit = limit;
2923 	return (0);
2924 }
2925 
2926 static int
2927 intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
2928 {
2929 	struct ntb_softc *ntb = device_get_softc(dev);
2930 	struct ntb_pci_bar_info *bar;
2931 	uint64_t base, limit, reg_val;
2932 	size_t bar_size, mw_size;
2933 	uint32_t base_reg, xlat_reg, limit_reg;
2934 	enum ntb_bar bar_num;
2935 
2936 	if (idx >= intel_ntb_mw_count(dev))
2937 		return (EINVAL);
2938 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
2939 
2940 	bar_num = intel_ntb_mw_to_bar(ntb, idx);
2941 	bar = &ntb->bar_info[bar_num];
2942 
2943 	bar_size = bar->size;
2944 	if (idx == ntb->b2b_mw_idx)
2945 		mw_size = bar_size - ntb->b2b_off;
2946 	else
2947 		mw_size = bar_size;
2948 
2949 	/* Hardware requires that addr is aligned to bar size */
2950 	if ((addr & (bar_size - 1)) != 0)
2951 		return (EINVAL);
2952 
2953 	if (size > mw_size)
2954 		return (EINVAL);
2955 
2956 	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
2957 
2958 	limit = 0;
2959 	if (bar_is_64bit(ntb, bar_num)) {
2960 		base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
2961 
2962 		if (limit_reg != 0 && size != mw_size)
2963 			limit = base + size;
2964 
2965 		/* Set and verify translation address */
2966 		intel_ntb_reg_write(8, xlat_reg, addr);
2967 		reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
2968 		if (reg_val != addr) {
2969 			intel_ntb_reg_write(8, xlat_reg, 0);
2970 			return (EIO);
2971 		}
2972 
2973 		/* Set and verify the limit */
2974 		intel_ntb_reg_write(8, limit_reg, limit);
2975 		reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
2976 		if (reg_val != limit) {
2977 			intel_ntb_reg_write(8, limit_reg, base);
2978 			intel_ntb_reg_write(8, xlat_reg, 0);
2979 			return (EIO);
2980 		}
2981 	} else {
2982 		/* Configure 32-bit (split) BAR MW */
2983 
2984 		if ((addr & UINT32_MAX) != addr)
2985 			return (ERANGE);
2986 		if (((addr + size) & UINT32_MAX) != (addr + size))
2987 			return (ERANGE);
2988 
2989 		base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
2990 
2991 		if (limit_reg != 0 && size != mw_size)
2992 			limit = base + size;
2993 
2994 		/* Set and verify translation address */
2995 		intel_ntb_reg_write(4, xlat_reg, addr);
2996 		reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
2997 		if (reg_val != addr) {
2998 			intel_ntb_reg_write(4, xlat_reg, 0);
2999 			return (EIO);
3000 		}
3001 
3002 		/* Set and verify the limit */
3003 		intel_ntb_reg_write(4, limit_reg, limit);
3004 		reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
3005 		if (reg_val != limit) {
3006 			intel_ntb_reg_write(4, limit_reg, base);
3007 			intel_ntb_reg_write(4, xlat_reg, 0);
3008 			return (EIO);
3009 		}
3010 	}
3011 	return (0);
3012 }
3013 
3014 static int
3015 intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
3016 {
3017 
3018 	return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
3019 }
3020 
3021 static int
3022 intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
3023 {
3024 	struct ntb_softc *ntb = device_get_softc(dev);
3025 	struct ntb_pci_bar_info *bar;
3026 
3027 	if (idx >= intel_ntb_mw_count(dev))
3028 		return (EINVAL);
3029 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3030 
3031 	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
3032 	*mode = bar->map_mode;
3033 	return (0);
3034 }
3035 
3036 static int
3037 intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
3038 {
3039 	struct ntb_softc *ntb = device_get_softc(dev);
3040 
3041 	if (idx >= intel_ntb_mw_count(dev))
3042 		return (EINVAL);
3043 
3044 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3045 	return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
3046 }
3047 
3048 static int
3049 intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
3050 {
3051 	struct ntb_pci_bar_info *bar;
3052 	int rc;
3053 
3054 	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
3055 	if (bar->map_mode == mode)
3056 		return (0);
3057 
3058 	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
3059 	if (rc == 0)
3060 		bar->map_mode = mode;
3061 
3062 	return (rc);
3063 }
3064 
3065 static void
3066 intel_ntb_peer_db_set(device_t dev, uint64_t bit)
3067 {
3068 	struct ntb_softc *ntb = device_get_softc(dev);
3069 
3070 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
3071 		struct ntb_pci_bar_info *lapic;
3072 		unsigned i;
3073 
3074 		lapic = ntb->peer_lapic_bar;
3075 
3076 		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3077 			if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0)
3078 				bus_space_write_4(lapic->pci_bus_tag,
3079 				    lapic->pci_bus_handle,
3080 				    ntb->peer_msix_data[i].nmd_ofs,
3081 				    ntb->peer_msix_data[i].nmd_data);
3082 		}
3083 		return;
3084 	}
3085 
3086 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3087 		intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
3088 		return;
3089 	}
3090 
3091 	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
3092 }
3093 
3094 static int
3095 intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
3096 {
3097 	struct ntb_softc *ntb = device_get_softc(dev);
3098 	struct ntb_pci_bar_info *bar;
3099 	uint64_t regoff;
3100 
3101 	KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
3102 
3103 	if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3104 		bar = &ntb->bar_info[NTB_CONFIG_BAR];
3105 		regoff = ntb->peer_reg->db_bell;
3106 	} else {
3107 		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
3108 		    ("invalid b2b idx"));
3109 
3110 		bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
3111 		regoff = XEON_PDOORBELL_OFFSET;
3112 	}
3113 	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
3114 
3115 	/* HACK: Specific to current x86 bus implementation. */
3116 	*db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
3117 	*db_size = ntb->reg->db_size;
3118 	return (0);
3119 }
3120 
3121 static uint64_t
3122 intel_ntb_db_valid_mask(device_t dev)
3123 {
3124 	struct ntb_softc *ntb = device_get_softc(dev);
3125 
3126 	return (ntb->db_valid_mask);
3127 }
3128 
3129 static int
3130 intel_ntb_db_vector_count(device_t dev)
3131 {
3132 	struct ntb_softc *ntb = device_get_softc(dev);
3133 
3134 	return (ntb->db_vec_count);
3135 }
3136 
3137 static uint64_t
3138 intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
3139 {
3140 	struct ntb_softc *ntb = device_get_softc(dev);
3141 
3142 	if (vector > ntb->db_vec_count)
3143 		return (0);
3144 	return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
3145 }
3146 
3147 static bool
3148 intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
3149 {
3150 	struct ntb_softc *ntb = device_get_softc(dev);
3151 
3152 	if (speed != NULL)
3153 		*speed = intel_ntb_link_sta_speed(ntb);
3154 	if (width != NULL)
3155 		*width = intel_ntb_link_sta_width(ntb);
3156 	return (link_is_up(ntb));
3157 }
3158 
3159 static void
3160 save_bar_parameters(struct ntb_pci_bar_info *bar)
3161 {
3162 
3163 	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
3164 	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
3165 	bar->pbase = rman_get_start(bar->pci_resource);
3166 	bar->size = rman_get_size(bar->pci_resource);
3167 	bar->vbase = rman_get_virtual(bar->pci_resource);
3168 }
3169 
3170 static device_method_t ntb_intel_methods[] = {
3171 	/* Device interface */
3172 	DEVMETHOD(device_probe,		intel_ntb_probe),
3173 	DEVMETHOD(device_attach,	intel_ntb_attach),
3174 	DEVMETHOD(device_detach,	intel_ntb_detach),
3175 	/* Bus interface */
3176 	DEVMETHOD(bus_child_location_str, ntb_child_location_str),
3177 	DEVMETHOD(bus_print_child,	ntb_print_child),
3178 	DEVMETHOD(bus_get_dma_tag,	ntb_get_dma_tag),
3179 	/* NTB interface */
3180 	DEVMETHOD(ntb_port_number,	intel_ntb_port_number),
3181 	DEVMETHOD(ntb_peer_port_count,	intel_ntb_peer_port_count),
3182 	DEVMETHOD(ntb_peer_port_number,	intel_ntb_peer_port_number),
3183 	DEVMETHOD(ntb_peer_port_idx, 	intel_ntb_peer_port_idx),
3184 	DEVMETHOD(ntb_link_is_up,	intel_ntb_link_is_up),
3185 	DEVMETHOD(ntb_link_enable,	intel_ntb_link_enable),
3186 	DEVMETHOD(ntb_link_disable,	intel_ntb_link_disable),
3187 	DEVMETHOD(ntb_link_enabled,	intel_ntb_link_enabled),
3188 	DEVMETHOD(ntb_mw_count,		intel_ntb_mw_count),
3189 	DEVMETHOD(ntb_mw_get_range,	intel_ntb_mw_get_range),
3190 	DEVMETHOD(ntb_mw_set_trans,	intel_ntb_mw_set_trans),
3191 	DEVMETHOD(ntb_mw_clear_trans,	intel_ntb_mw_clear_trans),
3192 	DEVMETHOD(ntb_mw_get_wc,	intel_ntb_mw_get_wc),
3193 	DEVMETHOD(ntb_mw_set_wc,	intel_ntb_mw_set_wc),
3194 	DEVMETHOD(ntb_spad_count,	intel_ntb_spad_count),
3195 	DEVMETHOD(ntb_spad_clear,	intel_ntb_spad_clear),
3196 	DEVMETHOD(ntb_spad_write,	intel_ntb_spad_write),
3197 	DEVMETHOD(ntb_spad_read,	intel_ntb_spad_read),
3198 	DEVMETHOD(ntb_peer_spad_write,	intel_ntb_peer_spad_write),
3199 	DEVMETHOD(ntb_peer_spad_read,	intel_ntb_peer_spad_read),
3200 	DEVMETHOD(ntb_db_valid_mask,	intel_ntb_db_valid_mask),
3201 	DEVMETHOD(ntb_db_vector_count,	intel_ntb_db_vector_count),
3202 	DEVMETHOD(ntb_db_vector_mask,	intel_ntb_db_vector_mask),
3203 	DEVMETHOD(ntb_db_clear,		intel_ntb_db_clear),
3204 	DEVMETHOD(ntb_db_clear_mask,	intel_ntb_db_clear_mask),
3205 	DEVMETHOD(ntb_db_read,		intel_ntb_db_read),
3206 	DEVMETHOD(ntb_db_set_mask,	intel_ntb_db_set_mask),
3207 	DEVMETHOD(ntb_peer_db_addr,	intel_ntb_peer_db_addr),
3208 	DEVMETHOD(ntb_peer_db_set,	intel_ntb_peer_db_set),
3209 	DEVMETHOD_END
3210 };
3211 
3212 static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
3213     sizeof(struct ntb_softc));
3214 DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
3215 MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1);
3216 MODULE_VERSION(ntb_hw_intel, 1);
3217 MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ntb_hw_intel, pci_ids,
3218     nitems(pci_ids));
3219