xref: /freebsd/sys/dev/ntb/ntb_hw/ntb_hw_intel.c (revision 6f63e88c0166ed3e5f2805a9e667c7d24d304cf1)
1 /*-
2  * Copyright (c) 2016-2017 Alexander Motin <mav@FreeBSD.org>
3  * Copyright (C) 2013 Intel Corporation
4  * Copyright (C) 2015 EMC Corporation
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31  * two or more systems using a PCI-e links, providing remote memory access.
32  *
33  * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
34  *
35  * NOTE: Much of the code in this module is shared with Linux. Any patches may
36  * be picked up and redistributed in Linux with a dual GPL/BSD license.
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/systm.h>
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/interrupt.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/module.h>
51 #include <sys/mutex.h>
52 #include <sys/pciio.h>
53 #include <sys/queue.h>
54 #include <sys/rman.h>
55 #include <sys/sbuf.h>
56 #include <sys/sysctl.h>
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <machine/bus.h>
60 #include <machine/intr_machdep.h>
61 #include <machine/resource.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 
65 #include "ntb_hw_intel.h"
66 #include "../ntb.h"
67 
68 #define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
69 
70 #define NTB_HB_TIMEOUT		1 /* second */
71 #define ATOM_LINK_RECOVERY_TIME	500 /* ms */
72 #define BAR_HIGH_MASK		(~((1ull << 12) - 1))
73 
74 #define	NTB_MSIX_VER_GUARD	0xaabbccdd
75 #define	NTB_MSIX_RECEIVED	0xe0f0e0f0
76 
77 /*
78  * PCI constants could be somewhere more generic, but aren't defined/used in
79  * pci.c.
80  */
81 #define	PCI_MSIX_ENTRY_SIZE		16
82 #define	PCI_MSIX_ENTRY_LOWER_ADDR	0
83 #define	PCI_MSIX_ENTRY_UPPER_ADDR	4
84 #define	PCI_MSIX_ENTRY_DATA		8
85 
86 enum ntb_device_type {
87 	NTB_XEON,
88 	NTB_ATOM
89 };
90 
91 /* ntb_conn_type are hardware numbers, cannot change. */
92 enum ntb_conn_type {
93 	NTB_CONN_TRANSPARENT = 0,
94 	NTB_CONN_B2B = 1,
95 	NTB_CONN_RP = 2,
96 };
97 
98 enum ntb_b2b_direction {
99 	NTB_DEV_USD = 0,
100 	NTB_DEV_DSD = 1,
101 };
102 
103 enum ntb_bar {
104 	NTB_CONFIG_BAR = 0,
105 	NTB_B2B_BAR_1,
106 	NTB_B2B_BAR_2,
107 	NTB_B2B_BAR_3,
108 	NTB_MAX_BARS
109 };
110 
111 enum {
112 	NTB_MSIX_GUARD = 0,
113 	NTB_MSIX_DATA0,
114 	NTB_MSIX_DATA1,
115 	NTB_MSIX_DATA2,
116 	NTB_MSIX_OFS0,
117 	NTB_MSIX_OFS1,
118 	NTB_MSIX_OFS2,
119 	NTB_MSIX_DONE,
120 	NTB_MAX_MSIX_SPAD
121 };
122 
123 /* Device features and workarounds */
124 #define HAS_FEATURE(ntb, feature)	\
125 	(((ntb)->features & (feature)) != 0)
126 
127 struct ntb_hw_info {
128 	uint32_t		device_id;
129 	const char		*desc;
130 	enum ntb_device_type	type;
131 	uint32_t		features;
132 };
133 
134 struct ntb_pci_bar_info {
135 	bus_space_tag_t		pci_bus_tag;
136 	bus_space_handle_t	pci_bus_handle;
137 	int			pci_resource_id;
138 	struct resource		*pci_resource;
139 	vm_paddr_t		pbase;
140 	caddr_t			vbase;
141 	vm_size_t		size;
142 	vm_memattr_t		map_mode;
143 
144 	/* Configuration register offsets */
145 	uint32_t		psz_off;
146 	uint32_t		ssz_off;
147 	uint32_t		pbarxlat_off;
148 };
149 
150 struct ntb_int_info {
151 	struct resource	*res;
152 	int		rid;
153 	void		*tag;
154 };
155 
156 struct ntb_vec {
157 	struct ntb_softc	*ntb;
158 	uint32_t		num;
159 	unsigned		masked;
160 };
161 
162 struct ntb_reg {
163 	uint32_t	ntb_ctl;
164 	uint32_t	lnk_sta;
165 	uint8_t		db_size;
166 	unsigned	mw_bar[NTB_MAX_BARS];
167 };
168 
169 struct ntb_alt_reg {
170 	uint32_t	db_bell;
171 	uint32_t	db_mask;
172 	uint32_t	spad;
173 };
174 
175 struct ntb_xlat_reg {
176 	uint32_t	bar0_base;
177 	uint32_t	bar2_base;
178 	uint32_t	bar4_base;
179 	uint32_t	bar5_base;
180 
181 	uint32_t	bar2_xlat;
182 	uint32_t	bar4_xlat;
183 	uint32_t	bar5_xlat;
184 
185 	uint32_t	bar2_limit;
186 	uint32_t	bar4_limit;
187 	uint32_t	bar5_limit;
188 };
189 
190 struct ntb_b2b_addr {
191 	uint64_t	bar0_addr;
192 	uint64_t	bar2_addr64;
193 	uint64_t	bar4_addr64;
194 	uint64_t	bar4_addr32;
195 	uint64_t	bar5_addr32;
196 };
197 
198 struct ntb_msix_data {
199 	uint32_t	nmd_ofs;
200 	uint32_t	nmd_data;
201 };
202 
203 struct ntb_softc {
204 	/* ntb.c context. Do not move! Must go first! */
205 	void			*ntb_store;
206 
207 	device_t		device;
208 	enum ntb_device_type	type;
209 	uint32_t		features;
210 
211 	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
212 	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
213 	uint32_t		allocated_interrupts;
214 
215 	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
216 	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
217 	bool			peer_msix_good;
218 	bool			peer_msix_done;
219 	struct ntb_pci_bar_info	*peer_lapic_bar;
220 	struct callout		peer_msix_work;
221 
222 	bus_dma_tag_t		bar0_dma_tag;
223 	bus_dmamap_t		bar0_dma_map;
224 
225 	struct callout		heartbeat_timer;
226 	struct callout		lr_timer;
227 
228 	struct ntb_vec		*msix_vec;
229 
230 	uint32_t		ppd;
231 	enum ntb_conn_type	conn_type;
232 	enum ntb_b2b_direction	dev_type;
233 
234 	/* Offset of peer bar0 in B2B BAR */
235 	uint64_t			b2b_off;
236 	/* Memory window used to access peer bar0 */
237 #define B2B_MW_DISABLED			UINT8_MAX
238 	uint8_t				b2b_mw_idx;
239 	uint32_t			msix_xlat;
240 	uint8_t				msix_mw_idx;
241 
242 	uint8_t				mw_count;
243 	uint8_t				spad_count;
244 	uint8_t				db_count;
245 	uint8_t				db_vec_count;
246 	uint8_t				db_vec_shift;
247 
248 	/* Protects local db_mask. */
249 #define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
250 #define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
251 #define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
252 	struct mtx			db_mask_lock;
253 
254 	volatile uint32_t		ntb_ctl;
255 	volatile uint32_t		lnk_sta;
256 
257 	uint64_t			db_valid_mask;
258 	uint64_t			db_link_mask;
259 	uint64_t			db_mask;
260 	uint64_t			fake_db;	/* NTB_SB01BASE_LOCKUP*/
261 	uint64_t			force_db;	/* NTB_SB01BASE_LOCKUP*/
262 
263 	int				last_ts;	/* ticks @ last irq */
264 
265 	const struct ntb_reg		*reg;
266 	const struct ntb_alt_reg	*self_reg;
267 	const struct ntb_alt_reg	*peer_reg;
268 	const struct ntb_xlat_reg	*xlat_reg;
269 };
270 
271 #ifdef __i386__
272 static __inline uint64_t
273 bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
274     bus_size_t offset)
275 {
276 
277 	return (bus_space_read_4(tag, handle, offset) |
278 	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
279 }
280 
281 static __inline void
282 bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
283     bus_size_t offset, uint64_t val)
284 {
285 
286 	bus_space_write_4(tag, handle, offset, val);
287 	bus_space_write_4(tag, handle, offset + 4, val >> 32);
288 }
289 #endif
290 
291 #define intel_ntb_bar_read(SIZE, bar, offset) \
292 	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
293 	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
294 #define intel_ntb_bar_write(SIZE, bar, offset, val) \
295 	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
296 	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
297 #define intel_ntb_reg_read(SIZE, offset) \
298 	    intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
299 #define intel_ntb_reg_write(SIZE, offset, val) \
300 	    intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
301 #define intel_ntb_mw_read(SIZE, offset) \
302 	    intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
303 		offset)
304 #define intel_ntb_mw_write(SIZE, offset, val) \
305 	    intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
306 		offset, val)
307 
308 static int intel_ntb_probe(device_t device);
309 static int intel_ntb_attach(device_t device);
310 static int intel_ntb_detach(device_t device);
311 static uint64_t intel_ntb_db_valid_mask(device_t dev);
312 static void intel_ntb_spad_clear(device_t dev);
313 static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
314 static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
315     enum ntb_width *width);
316 static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
317     enum ntb_width width);
318 static int intel_ntb_link_disable(device_t dev);
319 static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
320 static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
321 
322 static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
323 static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
324 static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
325 static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
326     uint32_t *base, uint32_t *xlat, uint32_t *lmt);
327 static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
328 static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
329     vm_memattr_t);
330 static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
331     const char *);
332 static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
333 static int map_memory_window_bar(struct ntb_softc *ntb,
334     struct ntb_pci_bar_info *bar);
335 static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
336 static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
337 static int intel_ntb_init_isr(struct ntb_softc *ntb);
338 static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
339 static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
340 static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
341 static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
342 static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
343 static void ndev_vec_isr(void *arg);
344 static void ndev_irq_isr(void *arg);
345 static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
346 static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
347 static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
348 static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
349 static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
350 static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
351 static void intel_ntb_exchange_msix(void *);
352 static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
353 static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
354 static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
355 static int intel_ntb_detect_atom(struct ntb_softc *ntb);
356 static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
357 static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
358 static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
359 static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
360 static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
361     enum ntb_bar regbar);
362 static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
363     uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
364 static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
365     enum ntb_bar idx);
366 static int xeon_setup_b2b_mw(struct ntb_softc *,
367     const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
368 static inline bool link_is_up(struct ntb_softc *ntb);
369 static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
370 static inline bool atom_link_is_err(struct ntb_softc *ntb);
371 static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
372 static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
373 static void atom_link_hb(void *arg);
374 static void recover_atom_link(void *arg);
375 static bool intel_ntb_poll_link(struct ntb_softc *ntb);
376 static void save_bar_parameters(struct ntb_pci_bar_info *bar);
377 static void intel_ntb_sysctl_init(struct ntb_softc *);
378 static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
379 static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
380 static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
381 static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
382 static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
383 
384 static unsigned g_ntb_hw_debug_level;
385 SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
386     &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
387 #define intel_ntb_printf(lvl, ...) do {				\
388 	if ((lvl) <= g_ntb_hw_debug_level) {			\
389 		device_printf(ntb->device, __VA_ARGS__);	\
390 	}							\
391 } while (0)
392 
393 #define	_NTB_PAT_UC	0
394 #define	_NTB_PAT_WC	1
395 #define	_NTB_PAT_WT	4
396 #define	_NTB_PAT_WP	5
397 #define	_NTB_PAT_WB	6
398 #define	_NTB_PAT_UCM	7
399 static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
400 SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
401     &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
402     "UC: "  __XSTRING(_NTB_PAT_UC) ", "
403     "WC: "  __XSTRING(_NTB_PAT_WC) ", "
404     "WT: "  __XSTRING(_NTB_PAT_WT) ", "
405     "WP: "  __XSTRING(_NTB_PAT_WP) ", "
406     "WB: "  __XSTRING(_NTB_PAT_WB) ", "
407     "UC-: " __XSTRING(_NTB_PAT_UCM));
408 
409 static inline vm_memattr_t
410 intel_ntb_pat_flags(void)
411 {
412 
413 	switch (g_ntb_mw_pat) {
414 	case _NTB_PAT_WC:
415 		return (VM_MEMATTR_WRITE_COMBINING);
416 	case _NTB_PAT_WT:
417 		return (VM_MEMATTR_WRITE_THROUGH);
418 	case _NTB_PAT_WP:
419 		return (VM_MEMATTR_WRITE_PROTECTED);
420 	case _NTB_PAT_WB:
421 		return (VM_MEMATTR_WRITE_BACK);
422 	case _NTB_PAT_UCM:
423 		return (VM_MEMATTR_WEAK_UNCACHEABLE);
424 	case _NTB_PAT_UC:
425 		/* FALLTHROUGH */
426 	default:
427 		return (VM_MEMATTR_UNCACHEABLE);
428 	}
429 }
430 
431 /*
432  * Well, this obviously doesn't belong here, but it doesn't seem to exist
433  * anywhere better yet.
434  */
435 static inline const char *
436 intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
437 {
438 
439 	switch (pat) {
440 	case VM_MEMATTR_WRITE_COMBINING:
441 		return ("WRITE_COMBINING");
442 	case VM_MEMATTR_WRITE_THROUGH:
443 		return ("WRITE_THROUGH");
444 	case VM_MEMATTR_WRITE_PROTECTED:
445 		return ("WRITE_PROTECTED");
446 	case VM_MEMATTR_WRITE_BACK:
447 		return ("WRITE_BACK");
448 	case VM_MEMATTR_WEAK_UNCACHEABLE:
449 		return ("UNCACHED");
450 	case VM_MEMATTR_UNCACHEABLE:
451 		return ("UNCACHEABLE");
452 	default:
453 		return ("UNKNOWN");
454 	}
455 }
456 
457 static int g_ntb_msix_idx = 1;
458 SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
459     0, "Use this memory window to access the peer MSIX message complex on "
460     "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
461     "Like b2b_mw_idx, negative values index from the last available memory "
462     "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
463 
464 static int g_ntb_mw_idx = -1;
465 SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
466     0, "Use this memory window to access the peer NTB registers.  A "
467     "non-negative value starts from the first MW index; a negative value "
468     "starts from the last MW index.  The default is -1, i.e., the last "
469     "available memory window.  Both sides of the NTB MUST set the same "
470     "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
471 
472 /* Hardware owns the low 16 bits of features. */
473 #define NTB_BAR_SIZE_4K		(1 << 0)
474 #define NTB_SDOORBELL_LOCKUP	(1 << 1)
475 #define NTB_SB01BASE_LOCKUP	(1 << 2)
476 #define NTB_B2BDOORBELL_BIT14	(1 << 3)
477 /* Software/configuration owns the top 16 bits. */
478 #define NTB_SPLIT_BAR		(1ull << 16)
479 
480 #define NTB_FEATURES_STR \
481     "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
482     "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
483 
484 static struct ntb_hw_info pci_ids[] = {
485 	/* XXX: PS/SS IDs left out until they are supported. */
486 	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
487 		NTB_ATOM, 0 },
488 
489 	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
490 		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
491 	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
492 		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
493 	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
494 		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
495 		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
496 	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
497 		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
498 		    NTB_SB01BASE_LOCKUP },
499 	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
500 		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
501 		    NTB_SB01BASE_LOCKUP },
502 };
503 
504 static const struct ntb_reg atom_reg = {
505 	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
506 	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
507 	.db_size = sizeof(uint64_t),
508 	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
509 };
510 
511 static const struct ntb_alt_reg atom_pri_reg = {
512 	.db_bell = ATOM_PDOORBELL_OFFSET,
513 	.db_mask = ATOM_PDBMSK_OFFSET,
514 	.spad = ATOM_SPAD_OFFSET,
515 };
516 
517 static const struct ntb_alt_reg atom_b2b_reg = {
518 	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
519 	.spad = ATOM_B2B_SPAD_OFFSET,
520 };
521 
522 static const struct ntb_xlat_reg atom_sec_xlat = {
523 #if 0
524 	/* "FIXME" says the Linux driver. */
525 	.bar0_base = ATOM_SBAR0BASE_OFFSET,
526 	.bar2_base = ATOM_SBAR2BASE_OFFSET,
527 	.bar4_base = ATOM_SBAR4BASE_OFFSET,
528 
529 	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
530 	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
531 #endif
532 
533 	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
534 	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
535 };
536 
537 static const struct ntb_reg xeon_reg = {
538 	.ntb_ctl = XEON_NTBCNTL_OFFSET,
539 	.lnk_sta = XEON_LINK_STATUS_OFFSET,
540 	.db_size = sizeof(uint16_t),
541 	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
542 };
543 
544 static const struct ntb_alt_reg xeon_pri_reg = {
545 	.db_bell = XEON_PDOORBELL_OFFSET,
546 	.db_mask = XEON_PDBMSK_OFFSET,
547 	.spad = XEON_SPAD_OFFSET,
548 };
549 
550 static const struct ntb_alt_reg xeon_b2b_reg = {
551 	.db_bell = XEON_B2B_DOORBELL_OFFSET,
552 	.spad = XEON_B2B_SPAD_OFFSET,
553 };
554 
555 static const struct ntb_xlat_reg xeon_sec_xlat = {
556 	.bar0_base = XEON_SBAR0BASE_OFFSET,
557 	.bar2_base = XEON_SBAR2BASE_OFFSET,
558 	.bar4_base = XEON_SBAR4BASE_OFFSET,
559 	.bar5_base = XEON_SBAR5BASE_OFFSET,
560 
561 	.bar2_limit = XEON_SBAR2LMT_OFFSET,
562 	.bar4_limit = XEON_SBAR4LMT_OFFSET,
563 	.bar5_limit = XEON_SBAR5LMT_OFFSET,
564 
565 	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
566 	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
567 	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
568 };
569 
570 static struct ntb_b2b_addr xeon_b2b_usd_addr = {
571 	.bar0_addr = XEON_B2B_BAR0_ADDR,
572 	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
573 	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
574 	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
575 	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
576 };
577 
578 static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
579 	.bar0_addr = XEON_B2B_BAR0_ADDR,
580 	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
581 	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
582 	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
583 	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
584 };
585 
586 SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
587     "B2B MW segment overrides -- MUST be the same on both sides");
588 
589 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
590     &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
591     "hardware, use this 64-bit address on the bus between the NTB devices for "
592     "the window at BAR2, on the upstream side of the link.  MUST be the same "
593     "address on both sides.");
594 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
595     &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
596 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
597     &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
598     "(split-BAR mode).");
599 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
600     &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
601     "(split-BAR mode).");
602 
603 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
604     &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
605     "hardware, use this 64-bit address on the bus between the NTB devices for "
606     "the window at BAR2, on the downstream side of the link.  MUST be the same"
607     " address on both sides.");
608 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
609     &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
610 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
611     &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
612     "(split-BAR mode).");
613 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
614     &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
615     "(split-BAR mode).");
616 
617 /*
618  * OS <-> Driver interface structures
619  */
620 MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
621 
622 /*
623  * OS <-> Driver linkage functions
624  */
625 static int
626 intel_ntb_probe(device_t device)
627 {
628 	struct ntb_hw_info *p;
629 
630 	p = intel_ntb_get_device_info(pci_get_devid(device));
631 	if (p == NULL)
632 		return (ENXIO);
633 
634 	device_set_desc(device, p->desc);
635 	return (0);
636 }
637 
638 static int
639 intel_ntb_attach(device_t device)
640 {
641 	struct ntb_softc *ntb;
642 	struct ntb_hw_info *p;
643 	int error;
644 
645 	ntb = device_get_softc(device);
646 	p = intel_ntb_get_device_info(pci_get_devid(device));
647 
648 	ntb->device = device;
649 	ntb->type = p->type;
650 	ntb->features = p->features;
651 	ntb->b2b_mw_idx = B2B_MW_DISABLED;
652 	ntb->msix_mw_idx = B2B_MW_DISABLED;
653 
654 	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
655 	callout_init(&ntb->heartbeat_timer, 1);
656 	callout_init(&ntb->lr_timer, 1);
657 	callout_init(&ntb->peer_msix_work, 1);
658 	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
659 
660 	if (ntb->type == NTB_ATOM)
661 		error = intel_ntb_detect_atom(ntb);
662 	else
663 		error = intel_ntb_detect_xeon(ntb);
664 	if (error != 0)
665 		goto out;
666 
667 	intel_ntb_detect_max_mw(ntb);
668 
669 	pci_enable_busmaster(ntb->device);
670 
671 	error = intel_ntb_map_pci_bars(ntb);
672 	if (error != 0)
673 		goto out;
674 	if (ntb->type == NTB_ATOM)
675 		error = intel_ntb_atom_init_dev(ntb);
676 	else
677 		error = intel_ntb_xeon_init_dev(ntb);
678 	if (error != 0)
679 		goto out;
680 
681 	intel_ntb_spad_clear(device);
682 
683 	intel_ntb_poll_link(ntb);
684 
685 	intel_ntb_sysctl_init(ntb);
686 
687 	/* Attach children to this controller */
688 	error = ntb_register_device(device);
689 
690 out:
691 	if (error != 0)
692 		intel_ntb_detach(device);
693 	return (error);
694 }
695 
696 static int
697 intel_ntb_detach(device_t device)
698 {
699 	struct ntb_softc *ntb;
700 
701 	ntb = device_get_softc(device);
702 
703 	/* Detach & delete all children */
704 	ntb_unregister_device(device);
705 
706 	if (ntb->self_reg != NULL) {
707 		DB_MASK_LOCK(ntb);
708 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
709 		DB_MASK_UNLOCK(ntb);
710 	}
711 	callout_drain(&ntb->heartbeat_timer);
712 	callout_drain(&ntb->lr_timer);
713 	callout_drain(&ntb->peer_msix_work);
714 	pci_disable_busmaster(ntb->device);
715 	if (ntb->type == NTB_XEON)
716 		intel_ntb_teardown_xeon(ntb);
717 	intel_ntb_teardown_interrupts(ntb);
718 
719 	mtx_destroy(&ntb->db_mask_lock);
720 
721 	intel_ntb_unmap_pci_bar(ntb);
722 
723 	return (0);
724 }
725 
726 /*
727  * Driver internal routines
728  */
729 static inline enum ntb_bar
730 intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
731 {
732 
733 	KASSERT(mw < ntb->mw_count,
734 	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
735 	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
736 
737 	return (ntb->reg->mw_bar[mw]);
738 }
739 
740 static inline bool
741 bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
742 {
743 	/* XXX This assertion could be stronger. */
744 	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
745 	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
746 }
747 
748 static inline void
749 bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
750     uint32_t *xlat, uint32_t *lmt)
751 {
752 	uint32_t basev, lmtv, xlatv;
753 
754 	switch (bar) {
755 	case NTB_B2B_BAR_1:
756 		basev = ntb->xlat_reg->bar2_base;
757 		lmtv = ntb->xlat_reg->bar2_limit;
758 		xlatv = ntb->xlat_reg->bar2_xlat;
759 		break;
760 	case NTB_B2B_BAR_2:
761 		basev = ntb->xlat_reg->bar4_base;
762 		lmtv = ntb->xlat_reg->bar4_limit;
763 		xlatv = ntb->xlat_reg->bar4_xlat;
764 		break;
765 	case NTB_B2B_BAR_3:
766 		basev = ntb->xlat_reg->bar5_base;
767 		lmtv = ntb->xlat_reg->bar5_limit;
768 		xlatv = ntb->xlat_reg->bar5_xlat;
769 		break;
770 	default:
771 		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
772 		    ("bad bar"));
773 		basev = lmtv = xlatv = 0;
774 		break;
775 	}
776 
777 	if (base != NULL)
778 		*base = basev;
779 	if (xlat != NULL)
780 		*xlat = xlatv;
781 	if (lmt != NULL)
782 		*lmt = lmtv;
783 }
784 
785 static int
786 intel_ntb_map_pci_bars(struct ntb_softc *ntb)
787 {
788 	struct ntb_pci_bar_info *bar;
789 	int rc;
790 
791 	bar = &ntb->bar_info[NTB_CONFIG_BAR];
792 	bar->pci_resource_id = PCIR_BAR(0);
793 	rc = map_mmr_bar(ntb, bar);
794 	if (rc != 0)
795 		goto out;
796 
797 	/*
798 	 * At least on Xeon v4 NTB device leaks to host some remote side
799 	 * BAR0 writes supposed to update scratchpad registers.  I am not
800 	 * sure why it happens, but it may be related to the fact that
801 	 * on a link side BAR0 is 32KB, while on a host side it is 64KB.
802 	 * Without this hack DMAR blocks those accesses as not allowed.
803 	 */
804 	if (bus_dma_tag_create(bus_get_dma_tag(ntb->device), 1, 0,
805 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
806 	    bar->size, 1, bar->size, 0, NULL, NULL, &ntb->bar0_dma_tag)) {
807 		device_printf(ntb->device, "Unable to create BAR0 tag\n");
808 		return (ENOMEM);
809 	}
810 	if (bus_dmamap_create(ntb->bar0_dma_tag, 0, &ntb->bar0_dma_map)) {
811 		device_printf(ntb->device, "Unable to create BAR0 map\n");
812 		return (ENOMEM);
813 	}
814 	if (bus_dma_dmar_load_ident(ntb->bar0_dma_tag, ntb->bar0_dma_map,
815 	    bar->pbase, bar->size, 0)) {
816 		device_printf(ntb->device, "Unable to load BAR0 map\n");
817 		return (ENOMEM);
818 	}
819 
820 	bar = &ntb->bar_info[NTB_B2B_BAR_1];
821 	bar->pci_resource_id = PCIR_BAR(2);
822 	rc = map_memory_window_bar(ntb, bar);
823 	if (rc != 0)
824 		goto out;
825 	bar->psz_off = XEON_PBAR23SZ_OFFSET;
826 	bar->ssz_off = XEON_SBAR23SZ_OFFSET;
827 	bar->pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
828 
829 	bar = &ntb->bar_info[NTB_B2B_BAR_2];
830 	bar->pci_resource_id = PCIR_BAR(4);
831 	rc = map_memory_window_bar(ntb, bar);
832 	if (rc != 0)
833 		goto out;
834 	bar->psz_off = XEON_PBAR4SZ_OFFSET;
835 	bar->ssz_off = XEON_SBAR4SZ_OFFSET;
836 	bar->pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
837 
838 	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
839 		goto out;
840 
841 	bar = &ntb->bar_info[NTB_B2B_BAR_3];
842 	bar->pci_resource_id = PCIR_BAR(5);
843 	rc = map_memory_window_bar(ntb, bar);
844 	bar->psz_off = XEON_PBAR5SZ_OFFSET;
845 	bar->ssz_off = XEON_SBAR5SZ_OFFSET;
846 	bar->pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
847 
848 out:
849 	if (rc != 0)
850 		device_printf(ntb->device,
851 		    "unable to allocate pci resource\n");
852 	return (rc);
853 }
854 
855 static void
856 print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
857     const char *kind)
858 {
859 
860 	device_printf(ntb->device,
861 	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
862 	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
863 	    (char *)bar->vbase + bar->size - 1,
864 	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
865 	    (uintmax_t)bar->size, kind);
866 }
867 
868 static int
869 map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
870 {
871 
872 	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
873 	    &bar->pci_resource_id, RF_ACTIVE);
874 	if (bar->pci_resource == NULL)
875 		return (ENXIO);
876 
877 	save_bar_parameters(bar);
878 	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
879 	print_map_success(ntb, bar, "mmr");
880 	return (0);
881 }
882 
883 static int
884 map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
885 {
886 	int rc;
887 	vm_memattr_t mapmode;
888 	uint8_t bar_size_bits = 0;
889 
890 	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
891 	    &bar->pci_resource_id, RF_ACTIVE);
892 
893 	if (bar->pci_resource == NULL)
894 		return (ENXIO);
895 
896 	save_bar_parameters(bar);
897 	/*
898 	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
899 	 * hardware issue. To work around this, query the size it should be
900 	 * configured to by the device and modify the resource to correspond to
901 	 * this new size. The BIOS on systems with this problem is required to
902 	 * provide enough address space to allow the driver to make this change
903 	 * safely.
904 	 *
905 	 * Ideally I could have just specified the size when I allocated the
906 	 * resource like:
907 	 *  bus_alloc_resource(ntb->device,
908 	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
909 	 *	1ul << bar_size_bits, RF_ACTIVE);
910 	 * but the PCI driver does not honor the size in this call, so we have
911 	 * to modify it after the fact.
912 	 */
913 	if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
914 		if (bar->pci_resource_id == PCIR_BAR(2))
915 			bar_size_bits = pci_read_config(ntb->device,
916 			    XEON_PBAR23SZ_OFFSET, 1);
917 		else
918 			bar_size_bits = pci_read_config(ntb->device,
919 			    XEON_PBAR45SZ_OFFSET, 1);
920 
921 		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
922 		    bar->pci_resource, bar->pbase,
923 		    bar->pbase + (1ul << bar_size_bits) - 1);
924 		if (rc != 0) {
925 			device_printf(ntb->device,
926 			    "unable to resize bar\n");
927 			return (rc);
928 		}
929 
930 		save_bar_parameters(bar);
931 	}
932 
933 	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
934 	print_map_success(ntb, bar, "mw");
935 
936 	/*
937 	 * Optionally, mark MW BARs as anything other than UC to improve
938 	 * performance.
939 	 */
940 	mapmode = intel_ntb_pat_flags();
941 	if (mapmode == bar->map_mode)
942 		return (0);
943 
944 	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
945 	if (rc == 0) {
946 		bar->map_mode = mapmode;
947 		device_printf(ntb->device,
948 		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
949 		    "%s.\n",
950 		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
951 		    (char *)bar->vbase + bar->size - 1,
952 		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
953 		    intel_ntb_vm_memattr_to_str(mapmode));
954 	} else
955 		device_printf(ntb->device,
956 		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
957 		    "%s: %d\n",
958 		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
959 		    (char *)bar->vbase + bar->size - 1,
960 		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
961 		    intel_ntb_vm_memattr_to_str(mapmode), rc);
962 		/* Proceed anyway */
963 	return (0);
964 }
965 
966 static void
967 intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
968 {
969 	struct ntb_pci_bar_info *bar;
970 	int i;
971 
972 	if (ntb->bar0_dma_map != NULL) {
973 		bus_dmamap_unload(ntb->bar0_dma_tag, ntb->bar0_dma_map);
974 		bus_dmamap_destroy(ntb->bar0_dma_tag, ntb->bar0_dma_map);
975 	}
976 	if (ntb->bar0_dma_tag != NULL)
977 		bus_dma_tag_destroy(ntb->bar0_dma_tag);
978 	for (i = 0; i < NTB_MAX_BARS; i++) {
979 		bar = &ntb->bar_info[i];
980 		if (bar->pci_resource != NULL)
981 			bus_release_resource(ntb->device, SYS_RES_MEMORY,
982 			    bar->pci_resource_id, bar->pci_resource);
983 	}
984 }
985 
986 static int
987 intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
988 {
989 	uint32_t i;
990 	int rc;
991 
992 	for (i = 0; i < num_vectors; i++) {
993 		ntb->int_info[i].rid = i + 1;
994 		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
995 		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
996 		if (ntb->int_info[i].res == NULL) {
997 			device_printf(ntb->device,
998 			    "bus_alloc_resource failed\n");
999 			return (ENOMEM);
1000 		}
1001 		ntb->int_info[i].tag = NULL;
1002 		ntb->allocated_interrupts++;
1003 		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
1004 		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
1005 		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
1006 		if (rc != 0) {
1007 			device_printf(ntb->device, "bus_setup_intr failed\n");
1008 			return (ENXIO);
1009 		}
1010 	}
1011 	return (0);
1012 }
1013 
1014 /*
1015  * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
1016  * cannot be allocated for each MSI-X message.  JHB seems to think remapping
1017  * should be okay.  This tunable should enable us to test that hypothesis
1018  * when someone gets their hands on some Xeon hardware.
1019  */
1020 static int ntb_force_remap_mode;
1021 SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
1022     &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
1023     " to a smaller number of ithreads, even if the desired number are "
1024     "available");
1025 
1026 /*
1027  * In case it is NOT ok, give consumers an abort button.
1028  */
1029 static int ntb_prefer_intx;
1030 SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
1031     &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
1032     "than remapping MSI-X messages over available slots (match Linux driver "
1033     "behavior)");
1034 
1035 /*
1036  * Remap the desired number of MSI-X messages to available ithreads in a simple
1037  * round-robin fashion.
1038  */
1039 static int
1040 intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
1041 {
1042 	u_int *vectors;
1043 	uint32_t i;
1044 	int rc;
1045 
1046 	if (ntb_prefer_intx != 0)
1047 		return (ENXIO);
1048 
1049 	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
1050 
1051 	for (i = 0; i < desired; i++)
1052 		vectors[i] = (i % avail) + 1;
1053 
1054 	rc = pci_remap_msix(dev, desired, vectors);
1055 	free(vectors, M_NTB);
1056 	return (rc);
1057 }
1058 
1059 static int
1060 intel_ntb_init_isr(struct ntb_softc *ntb)
1061 {
1062 	uint32_t desired_vectors, num_vectors;
1063 	int rc;
1064 
1065 	ntb->allocated_interrupts = 0;
1066 	ntb->last_ts = ticks;
1067 
1068 	/*
1069 	 * Mask all doorbell interrupts.  (Except link events!)
1070 	 */
1071 	DB_MASK_LOCK(ntb);
1072 	ntb->db_mask = ntb->db_valid_mask;
1073 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1074 	DB_MASK_UNLOCK(ntb);
1075 
1076 	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1077 	    ntb->db_count);
1078 	if (desired_vectors >= 1) {
1079 		rc = pci_alloc_msix(ntb->device, &num_vectors);
1080 
1081 		if (ntb_force_remap_mode != 0 && rc == 0 &&
1082 		    num_vectors == desired_vectors)
1083 			num_vectors--;
1084 
1085 		if (rc == 0 && num_vectors < desired_vectors) {
1086 			rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
1087 			    num_vectors);
1088 			if (rc == 0)
1089 				num_vectors = desired_vectors;
1090 			else
1091 				pci_release_msi(ntb->device);
1092 		}
1093 		if (rc != 0)
1094 			num_vectors = 1;
1095 	} else
1096 		num_vectors = 1;
1097 
1098 	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
1099 		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1100 			device_printf(ntb->device,
1101 			    "Errata workaround does not support MSI or INTX\n");
1102 			return (EINVAL);
1103 		}
1104 
1105 		ntb->db_vec_count = 1;
1106 		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
1107 		rc = intel_ntb_setup_legacy_interrupt(ntb);
1108 	} else {
1109 		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
1110 		    HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1111 			device_printf(ntb->device,
1112 			    "Errata workaround expects %d doorbell bits\n",
1113 			    XEON_NONLINK_DB_MSIX_BITS);
1114 			return (EINVAL);
1115 		}
1116 
1117 		intel_ntb_create_msix_vec(ntb, num_vectors);
1118 		rc = intel_ntb_setup_msix(ntb, num_vectors);
1119 	}
1120 	if (rc != 0) {
1121 		device_printf(ntb->device,
1122 		    "Error allocating interrupts: %d\n", rc);
1123 		intel_ntb_free_msix_vec(ntb);
1124 	}
1125 
1126 	return (rc);
1127 }
1128 
1129 static int
1130 intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
1131 {
1132 	int rc;
1133 
1134 	ntb->int_info[0].rid = 0;
1135 	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
1136 	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
1137 	if (ntb->int_info[0].res == NULL) {
1138 		device_printf(ntb->device, "bus_alloc_resource failed\n");
1139 		return (ENOMEM);
1140 	}
1141 
1142 	ntb->int_info[0].tag = NULL;
1143 	ntb->allocated_interrupts = 1;
1144 
1145 	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
1146 	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
1147 	    ntb, &ntb->int_info[0].tag);
1148 	if (rc != 0) {
1149 		device_printf(ntb->device, "bus_setup_intr failed\n");
1150 		return (ENXIO);
1151 	}
1152 
1153 	return (0);
1154 }
1155 
1156 static void
1157 intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
1158 {
1159 	struct ntb_int_info *current_int;
1160 	int i;
1161 
1162 	for (i = 0; i < ntb->allocated_interrupts; i++) {
1163 		current_int = &ntb->int_info[i];
1164 		if (current_int->tag != NULL)
1165 			bus_teardown_intr(ntb->device, current_int->res,
1166 			    current_int->tag);
1167 
1168 		if (current_int->res != NULL)
1169 			bus_release_resource(ntb->device, SYS_RES_IRQ,
1170 			    rman_get_rid(current_int->res), current_int->res);
1171 	}
1172 
1173 	intel_ntb_free_msix_vec(ntb);
1174 	pci_release_msi(ntb->device);
1175 }
1176 
1177 /*
1178  * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
1179  * out to make code clearer.
1180  */
1181 static inline uint64_t
1182 db_ioread(struct ntb_softc *ntb, uint64_t regoff)
1183 {
1184 
1185 	if (ntb->type == NTB_ATOM)
1186 		return (intel_ntb_reg_read(8, regoff));
1187 
1188 	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1189 
1190 	return (intel_ntb_reg_read(2, regoff));
1191 }
1192 
1193 static inline void
1194 db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1195 {
1196 
1197 	KASSERT((val & ~ntb->db_valid_mask) == 0,
1198 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1199 	     (uintmax_t)(val & ~ntb->db_valid_mask),
1200 	     (uintmax_t)ntb->db_valid_mask));
1201 
1202 	if (regoff == ntb->self_reg->db_mask)
1203 		DB_MASK_ASSERT(ntb, MA_OWNED);
1204 	db_iowrite_raw(ntb, regoff, val);
1205 }
1206 
1207 static inline void
1208 db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1209 {
1210 
1211 	if (ntb->type == NTB_ATOM) {
1212 		intel_ntb_reg_write(8, regoff, val);
1213 		return;
1214 	}
1215 
1216 	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1217 	intel_ntb_reg_write(2, regoff, (uint16_t)val);
1218 }
1219 
1220 static void
1221 intel_ntb_db_set_mask(device_t dev, uint64_t bits)
1222 {
1223 	struct ntb_softc *ntb = device_get_softc(dev);
1224 
1225 	DB_MASK_LOCK(ntb);
1226 	ntb->db_mask |= bits;
1227 	if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1228 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1229 	DB_MASK_UNLOCK(ntb);
1230 }
1231 
1232 static void
1233 intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
1234 {
1235 	struct ntb_softc *ntb = device_get_softc(dev);
1236 	uint64_t ibits;
1237 	int i;
1238 
1239 	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1240 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1241 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1242 	     (uintmax_t)ntb->db_valid_mask));
1243 
1244 	DB_MASK_LOCK(ntb);
1245 	ibits = ntb->fake_db & ntb->db_mask & bits;
1246 	ntb->db_mask &= ~bits;
1247 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1248 		/* Simulate fake interrupts if unmasked DB bits are set. */
1249 		ntb->force_db |= ibits;
1250 		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1251 			if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
1252 				swi_sched(ntb->int_info[i].tag, 0);
1253 		}
1254 	} else {
1255 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1256 	}
1257 	DB_MASK_UNLOCK(ntb);
1258 }
1259 
1260 static uint64_t
1261 intel_ntb_db_read(device_t dev)
1262 {
1263 	struct ntb_softc *ntb = device_get_softc(dev);
1264 
1265 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1266 		return (ntb->fake_db);
1267 
1268 	return (db_ioread(ntb, ntb->self_reg->db_bell));
1269 }
1270 
1271 static void
1272 intel_ntb_db_clear(device_t dev, uint64_t bits)
1273 {
1274 	struct ntb_softc *ntb = device_get_softc(dev);
1275 
1276 	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1277 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1278 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1279 	     (uintmax_t)ntb->db_valid_mask));
1280 
1281 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1282 		DB_MASK_LOCK(ntb);
1283 		ntb->fake_db &= ~bits;
1284 		DB_MASK_UNLOCK(ntb);
1285 		return;
1286 	}
1287 
1288 	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1289 }
1290 
1291 static inline uint64_t
1292 intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1293 {
1294 	uint64_t shift, mask;
1295 
1296 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1297 		/*
1298 		 * Remap vectors in custom way to make at least first
1299 		 * three doorbells to not generate stray events.
1300 		 * This breaks Linux compatibility (if one existed)
1301 		 * when more then one DB is used (not by if_ntb).
1302 		 */
1303 		if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
1304 			return (1 << db_vector);
1305 		if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
1306 			return (0x7ffc);
1307 	}
1308 
1309 	shift = ntb->db_vec_shift;
1310 	mask = (1ull << shift) - 1;
1311 	return (mask << (shift * db_vector));
1312 }
1313 
1314 static void
1315 intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1316 {
1317 	uint64_t vec_mask;
1318 
1319 	ntb->last_ts = ticks;
1320 	vec_mask = intel_ntb_vec_mask(ntb, vec);
1321 
1322 	if ((vec_mask & ntb->db_link_mask) != 0) {
1323 		if (intel_ntb_poll_link(ntb))
1324 			ntb_link_event(ntb->device);
1325 	}
1326 
1327 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1328 	    (vec_mask & ntb->db_link_mask) == 0) {
1329 		DB_MASK_LOCK(ntb);
1330 
1331 		/*
1332 		 * Do not report same DB events again if not cleared yet,
1333 		 * unless the mask was just cleared for them and this
1334 		 * interrupt handler call can be the consequence of it.
1335 		 */
1336 		vec_mask &= ~ntb->fake_db | ntb->force_db;
1337 		ntb->force_db &= ~vec_mask;
1338 
1339 		/* Update our internal doorbell register. */
1340 		ntb->fake_db |= vec_mask;
1341 
1342 		/* Do not report masked DB events. */
1343 		vec_mask &= ~ntb->db_mask;
1344 
1345 		DB_MASK_UNLOCK(ntb);
1346 	}
1347 
1348 	if ((vec_mask & ntb->db_valid_mask) != 0)
1349 		ntb_db_event(ntb->device, vec);
1350 }
1351 
1352 static void
1353 ndev_vec_isr(void *arg)
1354 {
1355 	struct ntb_vec *nvec = arg;
1356 
1357 	intel_ntb_interrupt(nvec->ntb, nvec->num);
1358 }
1359 
1360 static void
1361 ndev_irq_isr(void *arg)
1362 {
1363 	/* If we couldn't set up MSI-X, we only have the one vector. */
1364 	intel_ntb_interrupt(arg, 0);
1365 }
1366 
1367 static int
1368 intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1369 {
1370 	uint32_t i;
1371 
1372 	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1373 	    M_ZERO | M_WAITOK);
1374 	for (i = 0; i < num_vectors; i++) {
1375 		ntb->msix_vec[i].num = i;
1376 		ntb->msix_vec[i].ntb = ntb;
1377 	}
1378 
1379 	return (0);
1380 }
1381 
1382 static void
1383 intel_ntb_free_msix_vec(struct ntb_softc *ntb)
1384 {
1385 
1386 	if (ntb->msix_vec == NULL)
1387 		return;
1388 
1389 	free(ntb->msix_vec, M_NTB);
1390 	ntb->msix_vec = NULL;
1391 }
1392 
1393 static void
1394 intel_ntb_get_msix_info(struct ntb_softc *ntb)
1395 {
1396 	struct pci_devinfo *dinfo;
1397 	struct pcicfg_msix *msix;
1398 	uint32_t laddr, data, i, offset;
1399 
1400 	dinfo = device_get_ivars(ntb->device);
1401 	msix = &dinfo->cfg.msix;
1402 
1403 	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
1404 
1405 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1406 		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
1407 
1408 		laddr = bus_read_4(msix->msix_table_res, offset +
1409 		    PCI_MSIX_ENTRY_LOWER_ADDR);
1410 		intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
1411 
1412 		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
1413 		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
1414 		     MSI_INTEL_ADDR_BASE));
1415 		ntb->msix_data[i].nmd_ofs = laddr;
1416 
1417 		data = bus_read_4(msix->msix_table_res, offset +
1418 		    PCI_MSIX_ENTRY_DATA);
1419 		intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
1420 
1421 		ntb->msix_data[i].nmd_data = data;
1422 	}
1423 }
1424 
1425 static struct ntb_hw_info *
1426 intel_ntb_get_device_info(uint32_t device_id)
1427 {
1428 	struct ntb_hw_info *ep;
1429 
1430 	for (ep = pci_ids; ep < &pci_ids[nitems(pci_ids)]; ep++) {
1431 		if (ep->device_id == device_id)
1432 			return (ep);
1433 	}
1434 	return (NULL);
1435 }
1436 
1437 static void
1438 intel_ntb_teardown_xeon(struct ntb_softc *ntb)
1439 {
1440 
1441 	if (ntb->reg != NULL)
1442 		intel_ntb_link_disable(ntb->device);
1443 }
1444 
1445 static void
1446 intel_ntb_detect_max_mw(struct ntb_softc *ntb)
1447 {
1448 
1449 	if (ntb->type == NTB_ATOM) {
1450 		ntb->mw_count = ATOM_MW_COUNT;
1451 		return;
1452 	}
1453 
1454 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1455 		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1456 	else
1457 		ntb->mw_count = XEON_SNB_MW_COUNT;
1458 }
1459 
1460 static int
1461 intel_ntb_detect_xeon(struct ntb_softc *ntb)
1462 {
1463 	uint8_t ppd, conn_type;
1464 
1465 	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1466 	ntb->ppd = ppd;
1467 
1468 	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1469 		ntb->dev_type = NTB_DEV_DSD;
1470 	else
1471 		ntb->dev_type = NTB_DEV_USD;
1472 
1473 	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1474 		ntb->features |= NTB_SPLIT_BAR;
1475 
1476 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1477 	    !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1478 		device_printf(ntb->device,
1479 		    "Can not apply SB01BASE_LOCKUP workaround "
1480 		    "with split BARs disabled!\n");
1481 		device_printf(ntb->device,
1482 		    "Expect system hangs under heavy NTB traffic!\n");
1483 		ntb->features &= ~NTB_SB01BASE_LOCKUP;
1484 	}
1485 
1486 	/*
1487 	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
1488 	 * errata workaround; only do one at a time.
1489 	 */
1490 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1491 		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
1492 
1493 	conn_type = ppd & XEON_PPD_CONN_TYPE;
1494 	switch (conn_type) {
1495 	case NTB_CONN_B2B:
1496 		ntb->conn_type = conn_type;
1497 		break;
1498 	case NTB_CONN_RP:
1499 	case NTB_CONN_TRANSPARENT:
1500 	default:
1501 		device_printf(ntb->device, "Unsupported connection type: %u\n",
1502 		    (unsigned)conn_type);
1503 		return (ENXIO);
1504 	}
1505 	return (0);
1506 }
1507 
1508 static int
1509 intel_ntb_detect_atom(struct ntb_softc *ntb)
1510 {
1511 	uint32_t ppd, conn_type;
1512 
1513 	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1514 	ntb->ppd = ppd;
1515 
1516 	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1517 		ntb->dev_type = NTB_DEV_DSD;
1518 	else
1519 		ntb->dev_type = NTB_DEV_USD;
1520 
1521 	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1522 	switch (conn_type) {
1523 	case NTB_CONN_B2B:
1524 		ntb->conn_type = conn_type;
1525 		break;
1526 	default:
1527 		device_printf(ntb->device, "Unsupported NTB configuration\n");
1528 		return (ENXIO);
1529 	}
1530 	return (0);
1531 }
1532 
1533 static int
1534 intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
1535 {
1536 	int rc;
1537 
1538 	ntb->spad_count		= XEON_SPAD_COUNT;
1539 	ntb->db_count		= XEON_DB_COUNT;
1540 	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1541 	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1542 	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1543 
1544 	if (ntb->conn_type != NTB_CONN_B2B) {
1545 		device_printf(ntb->device, "Connection type %d not supported\n",
1546 		    ntb->conn_type);
1547 		return (ENXIO);
1548 	}
1549 
1550 	ntb->reg = &xeon_reg;
1551 	ntb->self_reg = &xeon_pri_reg;
1552 	ntb->peer_reg = &xeon_b2b_reg;
1553 	ntb->xlat_reg = &xeon_sec_xlat;
1554 
1555 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1556 		ntb->force_db = ntb->fake_db = 0;
1557 		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
1558 		    ntb->mw_count;
1559 		intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
1560 		    g_ntb_msix_idx, ntb->msix_mw_idx);
1561 		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
1562 		    VM_MEMATTR_UNCACHEABLE);
1563 		KASSERT(rc == 0, ("shouldn't fail"));
1564 	} else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
1565 		/*
1566 		 * There is a Xeon hardware errata related to writes to SDOORBELL or
1567 		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1568 		 * which may hang the system.  To workaround this, use a memory
1569 		 * window to access the interrupt and scratch pad registers on the
1570 		 * remote system.
1571 		 */
1572 		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
1573 		    ntb->mw_count;
1574 		intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
1575 		    g_ntb_mw_idx, ntb->b2b_mw_idx);
1576 		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
1577 		    VM_MEMATTR_UNCACHEABLE);
1578 		KASSERT(rc == 0, ("shouldn't fail"));
1579 	} else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
1580 		/*
1581 		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1582 		 * mirrored to the remote system.  Shrink the number of bits by one,
1583 		 * since bit 14 is the last bit.
1584 		 *
1585 		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1586 		 * anyway.  Nor for non-B2B connection types.
1587 		 */
1588 		ntb->db_count = XEON_DB_COUNT - 1;
1589 
1590 	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1591 
1592 	if (ntb->dev_type == NTB_DEV_USD)
1593 		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1594 		    &xeon_b2b_usd_addr);
1595 	else
1596 		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1597 		    &xeon_b2b_dsd_addr);
1598 	if (rc != 0)
1599 		return (rc);
1600 
1601 	/* Enable Bus Master and Memory Space on the secondary side */
1602 	intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1603 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1604 
1605 	/*
1606 	 * Mask all doorbell interrupts.
1607 	 */
1608 	DB_MASK_LOCK(ntb);
1609 	ntb->db_mask = ntb->db_valid_mask;
1610 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1611 	DB_MASK_UNLOCK(ntb);
1612 
1613 	rc = intel_ntb_init_isr(ntb);
1614 	return (rc);
1615 }
1616 
1617 static int
1618 intel_ntb_atom_init_dev(struct ntb_softc *ntb)
1619 {
1620 	int error;
1621 
1622 	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1623 	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1624 
1625 	ntb->spad_count		 = ATOM_SPAD_COUNT;
1626 	ntb->db_count		 = ATOM_DB_COUNT;
1627 	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1628 	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1629 	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1630 
1631 	ntb->reg = &atom_reg;
1632 	ntb->self_reg = &atom_pri_reg;
1633 	ntb->peer_reg = &atom_b2b_reg;
1634 	ntb->xlat_reg = &atom_sec_xlat;
1635 
1636 	/*
1637 	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1638 	 * resolved.  Mask transaction layer internal parity errors.
1639 	 */
1640 	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1641 
1642 	configure_atom_secondary_side_bars(ntb);
1643 
1644 	/* Enable Bus Master and Memory Space on the secondary side */
1645 	intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
1646 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1647 
1648 	error = intel_ntb_init_isr(ntb);
1649 	if (error != 0)
1650 		return (error);
1651 
1652 	/* Initiate PCI-E link training */
1653 	intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1654 
1655 	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1656 
1657 	return (0);
1658 }
1659 
1660 /* XXX: Linux driver doesn't seem to do any of this for Atom. */
1661 static void
1662 configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1663 {
1664 
1665 	if (ntb->dev_type == NTB_DEV_USD) {
1666 		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1667 		    XEON_B2B_BAR2_ADDR64);
1668 		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1669 		    XEON_B2B_BAR4_ADDR64);
1670 		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1671 		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1672 	} else {
1673 		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1674 		    XEON_B2B_BAR2_ADDR64);
1675 		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1676 		    XEON_B2B_BAR4_ADDR64);
1677 		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1678 		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1679 	}
1680 }
1681 
1682 
1683 /*
1684  * When working around Xeon SDOORBELL errata by remapping remote registers in a
1685  * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1686  * remains for use by a higher layer.
1687  *
1688  * Will only be used if working around SDOORBELL errata and the BIOS-configured
1689  * MW size is sufficiently large.
1690  */
1691 static unsigned int ntb_b2b_mw_share;
1692 SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1693     0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1694     "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1695     "value here.");
1696 
1697 static void
1698 xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1699     enum ntb_bar regbar)
1700 {
1701 	struct ntb_pci_bar_info *bar;
1702 	uint8_t bar_sz;
1703 
1704 	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1705 		return;
1706 
1707 	bar = &ntb->bar_info[idx];
1708 	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1709 	if (idx == regbar) {
1710 		if (ntb->b2b_off != 0)
1711 			bar_sz--;
1712 		else
1713 			bar_sz = 0;
1714 	}
1715 	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1716 	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1717 	(void)bar_sz;
1718 }
1719 
1720 static void
1721 xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1722     enum ntb_bar idx, enum ntb_bar regbar)
1723 {
1724 	uint64_t reg_val;
1725 	uint32_t base_reg, lmt_reg;
1726 
1727 	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1728 	if (idx == regbar) {
1729 		if (ntb->b2b_off)
1730 			bar_addr += ntb->b2b_off;
1731 		else
1732 			bar_addr = 0;
1733 	}
1734 
1735 	if (!bar_is_64bit(ntb, idx)) {
1736 		intel_ntb_reg_write(4, base_reg, bar_addr);
1737 		reg_val = intel_ntb_reg_read(4, base_reg);
1738 		(void)reg_val;
1739 
1740 		intel_ntb_reg_write(4, lmt_reg, bar_addr);
1741 		reg_val = intel_ntb_reg_read(4, lmt_reg);
1742 		(void)reg_val;
1743 	} else {
1744 		intel_ntb_reg_write(8, base_reg, bar_addr);
1745 		reg_val = intel_ntb_reg_read(8, base_reg);
1746 		(void)reg_val;
1747 
1748 		intel_ntb_reg_write(8, lmt_reg, bar_addr);
1749 		reg_val = intel_ntb_reg_read(8, lmt_reg);
1750 		(void)reg_val;
1751 	}
1752 }
1753 
1754 static void
1755 xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1756 {
1757 	struct ntb_pci_bar_info *bar;
1758 
1759 	bar = &ntb->bar_info[idx];
1760 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1761 		intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1762 		base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
1763 	} else {
1764 		intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1765 		base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
1766 	}
1767 	(void)base_addr;
1768 }
1769 
1770 static int
1771 xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1772     const struct ntb_b2b_addr *peer_addr)
1773 {
1774 	struct ntb_pci_bar_info *b2b_bar;
1775 	vm_size_t bar_size;
1776 	uint64_t bar_addr;
1777 	enum ntb_bar b2b_bar_num, i;
1778 
1779 	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1780 		b2b_bar = NULL;
1781 		b2b_bar_num = NTB_CONFIG_BAR;
1782 		ntb->b2b_off = 0;
1783 	} else {
1784 		b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1785 		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1786 		    ("invalid b2b mw bar"));
1787 
1788 		b2b_bar = &ntb->bar_info[b2b_bar_num];
1789 		bar_size = b2b_bar->size;
1790 
1791 		if (ntb_b2b_mw_share != 0 &&
1792 		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1793 			ntb->b2b_off = bar_size >> 1;
1794 		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1795 			ntb->b2b_off = 0;
1796 		} else {
1797 			device_printf(ntb->device,
1798 			    "B2B bar size is too small!\n");
1799 			return (EIO);
1800 		}
1801 	}
1802 
1803 	/*
1804 	 * Reset the secondary bar sizes to match the primary bar sizes.
1805 	 * (Except, disable or halve the size of the B2B secondary bar.)
1806 	 */
1807 	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1808 		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1809 
1810 	bar_addr = 0;
1811 	if (b2b_bar_num == NTB_CONFIG_BAR)
1812 		bar_addr = addr->bar0_addr;
1813 	else if (b2b_bar_num == NTB_B2B_BAR_1)
1814 		bar_addr = addr->bar2_addr64;
1815 	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1816 		bar_addr = addr->bar4_addr64;
1817 	else if (b2b_bar_num == NTB_B2B_BAR_2)
1818 		bar_addr = addr->bar4_addr32;
1819 	else if (b2b_bar_num == NTB_B2B_BAR_3)
1820 		bar_addr = addr->bar5_addr32;
1821 	else
1822 		KASSERT(false, ("invalid bar"));
1823 
1824 	intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1825 
1826 	/*
1827 	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1828 	 * register BAR.  The B2B BAR is either disabled above or configured
1829 	 * half-size.  It starts at PBAR xlat + offset.
1830 	 *
1831 	 * Also set up incoming BAR limits == base (zero length window).
1832 	 */
1833 	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1834 	    b2b_bar_num);
1835 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1836 		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1837 		    NTB_B2B_BAR_2, b2b_bar_num);
1838 		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1839 		    NTB_B2B_BAR_3, b2b_bar_num);
1840 	} else
1841 		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1842 		    NTB_B2B_BAR_2, b2b_bar_num);
1843 
1844 	/* Zero incoming translation addrs */
1845 	intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1846 	intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1847 
1848 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1849 		uint32_t xlat_reg, lmt_reg;
1850 		enum ntb_bar bar_num;
1851 
1852 		/*
1853 		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
1854 		 * workaround
1855 		 */
1856 		bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
1857 		bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
1858 		if (bar_is_64bit(ntb, bar_num)) {
1859 			intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
1860 			ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
1861 			intel_ntb_reg_write(8, lmt_reg, 0);
1862 		} else {
1863 			intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
1864 			ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
1865 			intel_ntb_reg_write(4, lmt_reg, 0);
1866 		}
1867 
1868 		ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
1869 	}
1870 	(void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
1871 	(void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
1872 
1873 	/* Zero outgoing translation limits (whole bar size windows) */
1874 	intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1875 	intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1876 
1877 	/* Set outgoing translation offsets */
1878 	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1879 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1880 		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1881 		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1882 	} else
1883 		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1884 
1885 	/* Set the translation offset for B2B registers */
1886 	bar_addr = 0;
1887 	if (b2b_bar_num == NTB_CONFIG_BAR)
1888 		bar_addr = peer_addr->bar0_addr;
1889 	else if (b2b_bar_num == NTB_B2B_BAR_1)
1890 		bar_addr = peer_addr->bar2_addr64;
1891 	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1892 		bar_addr = peer_addr->bar4_addr64;
1893 	else if (b2b_bar_num == NTB_B2B_BAR_2)
1894 		bar_addr = peer_addr->bar4_addr32;
1895 	else if (b2b_bar_num == NTB_B2B_BAR_3)
1896 		bar_addr = peer_addr->bar5_addr32;
1897 	else
1898 		KASSERT(false, ("invalid bar"));
1899 
1900 	/*
1901 	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1902 	 * at a time.
1903 	 */
1904 	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1905 	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1906 	return (0);
1907 }
1908 
1909 static inline bool
1910 _xeon_link_is_up(struct ntb_softc *ntb)
1911 {
1912 
1913 	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1914 		return (true);
1915 	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1916 }
1917 
1918 static inline bool
1919 link_is_up(struct ntb_softc *ntb)
1920 {
1921 
1922 	if (ntb->type == NTB_XEON)
1923 		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
1924 		    !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
1925 
1926 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1927 	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1928 }
1929 
1930 static inline bool
1931 atom_link_is_err(struct ntb_softc *ntb)
1932 {
1933 	uint32_t status;
1934 
1935 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1936 
1937 	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1938 	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1939 		return (true);
1940 
1941 	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1942 	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1943 }
1944 
1945 /* Atom does not have link status interrupt, poll on that platform */
1946 static void
1947 atom_link_hb(void *arg)
1948 {
1949 	struct ntb_softc *ntb = arg;
1950 	sbintime_t timo, poll_ts;
1951 
1952 	timo = NTB_HB_TIMEOUT * hz;
1953 	poll_ts = ntb->last_ts + timo;
1954 
1955 	/*
1956 	 * Delay polling the link status if an interrupt was received, unless
1957 	 * the cached link status says the link is down.
1958 	 */
1959 	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1960 		timo = poll_ts - ticks;
1961 		goto out;
1962 	}
1963 
1964 	if (intel_ntb_poll_link(ntb))
1965 		ntb_link_event(ntb->device);
1966 
1967 	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1968 		/* Link is down with error, proceed with recovery */
1969 		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1970 		return;
1971 	}
1972 
1973 out:
1974 	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1975 }
1976 
1977 static void
1978 atom_perform_link_restart(struct ntb_softc *ntb)
1979 {
1980 	uint32_t status;
1981 
1982 	/* Driver resets the NTB ModPhy lanes - magic! */
1983 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
1984 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
1985 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
1986 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
1987 
1988 	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1989 	pause("ModPhy", hz / 10);
1990 
1991 	/* Clear AER Errors, write to clear */
1992 	status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
1993 	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1994 	intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
1995 
1996 	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1997 	status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
1998 	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
1999 	intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
2000 
2001 	/* Clear DeSkew Buffer error, write to clear */
2002 	status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
2003 	status |= ATOM_DESKEWSTS_DBERR;
2004 	intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
2005 
2006 	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
2007 	status &= ATOM_IBIST_ERR_OFLOW;
2008 	intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
2009 
2010 	/* Releases the NTB state machine to allow the link to retrain */
2011 	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
2012 	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
2013 	intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
2014 }
2015 
2016 static int
2017 intel_ntb_port_number(device_t dev)
2018 {
2019 	struct ntb_softc *ntb = device_get_softc(dev);
2020 
2021 	return (ntb->dev_type == NTB_DEV_USD ? 0 : 1);
2022 }
2023 
2024 static int
2025 intel_ntb_peer_port_count(device_t dev)
2026 {
2027 
2028 	return (1);
2029 }
2030 
2031 static int
2032 intel_ntb_peer_port_number(device_t dev, int pidx)
2033 {
2034 	struct ntb_softc *ntb = device_get_softc(dev);
2035 
2036 	if (pidx != 0)
2037 		return (-EINVAL);
2038 
2039 	return (ntb->dev_type == NTB_DEV_USD ? 1 : 0);
2040 }
2041 
2042 static int
2043 intel_ntb_peer_port_idx(device_t dev, int port)
2044 {
2045 	int peer_port;
2046 
2047 	peer_port = intel_ntb_peer_port_number(dev, 0);
2048 	if (peer_port == -EINVAL || port != peer_port)
2049 		return (-EINVAL);
2050 
2051 	return (0);
2052 }
2053 
2054 static int
2055 intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
2056     enum ntb_width width __unused)
2057 {
2058 	struct ntb_softc *ntb = device_get_softc(dev);
2059 	uint32_t cntl;
2060 
2061 	intel_ntb_printf(2, "%s\n", __func__);
2062 
2063 	if (ntb->type == NTB_ATOM) {
2064 		pci_write_config(ntb->device, NTB_PPD_OFFSET,
2065 		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
2066 		return (0);
2067 	}
2068 
2069 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2070 		ntb_link_event(dev);
2071 		return (0);
2072 	}
2073 
2074 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2075 	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
2076 	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
2077 	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
2078 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2079 		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
2080 	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2081 	return (0);
2082 }
2083 
2084 static int
2085 intel_ntb_link_disable(device_t dev)
2086 {
2087 	struct ntb_softc *ntb = device_get_softc(dev);
2088 	uint32_t cntl;
2089 
2090 	intel_ntb_printf(2, "%s\n", __func__);
2091 
2092 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2093 		ntb_link_event(dev);
2094 		return (0);
2095 	}
2096 
2097 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2098 	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
2099 	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
2100 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2101 		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
2102 	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
2103 	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2104 	return (0);
2105 }
2106 
2107 static bool
2108 intel_ntb_link_enabled(device_t dev)
2109 {
2110 	struct ntb_softc *ntb = device_get_softc(dev);
2111 	uint32_t cntl;
2112 
2113 	if (ntb->type == NTB_ATOM) {
2114 		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
2115 		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
2116 	}
2117 
2118 	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2119 		return (true);
2120 
2121 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2122 	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
2123 }
2124 
2125 static void
2126 recover_atom_link(void *arg)
2127 {
2128 	struct ntb_softc *ntb = arg;
2129 	unsigned speed, width, oldspeed, oldwidth;
2130 	uint32_t status32;
2131 
2132 	atom_perform_link_restart(ntb);
2133 
2134 	/*
2135 	 * There is a potential race between the 2 NTB devices recovering at
2136 	 * the same time.  If the times are the same, the link will not recover
2137 	 * and the driver will be stuck in this loop forever.  Add a random
2138 	 * interval to the recovery time to prevent this race.
2139 	 */
2140 	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
2141 	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
2142 
2143 	if (atom_link_is_err(ntb))
2144 		goto retry;
2145 
2146 	status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2147 	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
2148 		goto out;
2149 
2150 	status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2151 	width = NTB_LNK_STA_WIDTH(status32);
2152 	speed = status32 & NTB_LINK_SPEED_MASK;
2153 
2154 	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
2155 	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
2156 	if (oldwidth != width || oldspeed != speed)
2157 		goto retry;
2158 
2159 out:
2160 	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
2161 	    ntb);
2162 	return;
2163 
2164 retry:
2165 	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
2166 	    ntb);
2167 }
2168 
2169 /*
2170  * Polls the HW link status register(s); returns true if something has changed.
2171  */
2172 static bool
2173 intel_ntb_poll_link(struct ntb_softc *ntb)
2174 {
2175 	uint32_t ntb_cntl;
2176 	uint16_t reg_val;
2177 
2178 	if (ntb->type == NTB_ATOM) {
2179 		ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2180 		if (ntb_cntl == ntb->ntb_ctl)
2181 			return (false);
2182 
2183 		ntb->ntb_ctl = ntb_cntl;
2184 		ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2185 	} else {
2186 		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
2187 
2188 		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2189 		if (reg_val == ntb->lnk_sta)
2190 			return (false);
2191 
2192 		ntb->lnk_sta = reg_val;
2193 
2194 		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2195 			if (_xeon_link_is_up(ntb)) {
2196 				if (!ntb->peer_msix_good) {
2197 					callout_reset(&ntb->peer_msix_work, 0,
2198 					    intel_ntb_exchange_msix, ntb);
2199 					return (false);
2200 				}
2201 			} else {
2202 				ntb->peer_msix_good = false;
2203 				ntb->peer_msix_done = false;
2204 			}
2205 		}
2206 	}
2207 	return (true);
2208 }
2209 
2210 static inline enum ntb_speed
2211 intel_ntb_link_sta_speed(struct ntb_softc *ntb)
2212 {
2213 
2214 	if (!link_is_up(ntb))
2215 		return (NTB_SPEED_NONE);
2216 	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
2217 }
2218 
2219 static inline enum ntb_width
2220 intel_ntb_link_sta_width(struct ntb_softc *ntb)
2221 {
2222 
2223 	if (!link_is_up(ntb))
2224 		return (NTB_WIDTH_NONE);
2225 	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
2226 }
2227 
2228 SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
2229     "Driver state, statistics, and HW registers");
2230 
2231 #define NTB_REGSZ_MASK	(3ul << 30)
2232 #define NTB_REG_64	(1ul << 30)
2233 #define NTB_REG_32	(2ul << 30)
2234 #define NTB_REG_16	(3ul << 30)
2235 #define NTB_REG_8	(0ul << 30)
2236 
2237 #define NTB_DB_READ	(1ul << 29)
2238 #define NTB_PCI_REG	(1ul << 28)
2239 #define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
2240 
2241 static void
2242 intel_ntb_sysctl_init(struct ntb_softc *ntb)
2243 {
2244 	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
2245 	struct sysctl_ctx_list *ctx;
2246 	struct sysctl_oid *tree, *tmptree;
2247 
2248 	ctx = device_get_sysctl_ctx(ntb->device);
2249 	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
2250 
2251 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
2252 	    CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, ntb, 0,
2253 	    sysctl_handle_link_status_human, "A",
2254 	    "Link status (human readable)");
2255 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
2256 	    CTLFLAG_RD | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, ntb, 0,
2257 	    sysctl_handle_link_status, "IU",
2258 	    "Link status (1=active, 0=inactive)");
2259 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
2260 	    CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, ntb, 0,
2261 	    sysctl_handle_link_admin, "IU",
2262 	    "Set/get interface status (1=UP, 0=DOWN)");
2263 
2264 	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
2265 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
2266 	    "Driver state, statistics, and HW registers");
2267 	tree_par = SYSCTL_CHILDREN(tree);
2268 
2269 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
2270 	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
2271 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
2272 	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
2273 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
2274 	    &ntb->ppd, 0, "Raw PPD register (cached)");
2275 
2276 	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
2277 		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
2278 		    &ntb->b2b_mw_idx, 0,
2279 		    "Index of the MW used for B2B remote register access");
2280 		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
2281 		    CTLFLAG_RD, &ntb->b2b_off,
2282 		    "If non-zero, offset of B2B register region in shared MW");
2283 	}
2284 
2285 	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
2286 	    CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, ntb, 0,
2287 	    sysctl_handle_features, "A", "Features/errata of this NTB device");
2288 
2289 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
2290 	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
2291 	    "NTB CTL register (cached)");
2292 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
2293 	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
2294 	    "LNK STA register (cached)");
2295 
2296 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
2297 	    &ntb->mw_count, 0, "MW count");
2298 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
2299 	    &ntb->spad_count, 0, "Scratchpad count");
2300 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
2301 	    &ntb->db_count, 0, "Doorbell count");
2302 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
2303 	    &ntb->db_vec_count, 0, "Doorbell vector count");
2304 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
2305 	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
2306 
2307 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2308 	    &ntb->db_valid_mask, "Doorbell valid mask");
2309 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2310 	    &ntb->db_link_mask, "Doorbell link mask");
2311 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2312 	    &ntb->db_mask, "Doorbell mask (cached)");
2313 
2314 	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2315 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
2316 	    "Raw HW registers (big-endian)");
2317 	regpar = SYSCTL_CHILDREN(tmptree);
2318 
2319 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2320 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2321 	    NTB_REG_32 | ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2322 	    "NTB Control register");
2323 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2324 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2325 	    NTB_REG_32 | 0x19c, sysctl_handle_register, "IU",
2326 	    "NTB Link Capabilities");
2327 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2328 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2329 	    NTB_REG_32 | 0x1a0, sysctl_handle_register, "IU",
2330 	    "NTB Link Control register");
2331 
2332 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
2333 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2334 	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
2335 	    sysctl_handle_register, "QU", "Doorbell mask register");
2336 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
2337 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2338 	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
2339 	    sysctl_handle_register, "QU", "Doorbell register");
2340 
2341 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
2342 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2343 	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2344 	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2345 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2346 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2347 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2348 		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2349 		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2350 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2351 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2352 		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2353 		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2354 	} else {
2355 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2356 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2357 		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2358 		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2359 	}
2360 
2361 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2362 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2363 	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2364 	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2365 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2366 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2367 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2368 		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2369 		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2370 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2371 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2372 		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2373 		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2374 	} else {
2375 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2376 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2377 		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2378 		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2379 	}
2380 
2381 	if (ntb->type == NTB_ATOM)
2382 		return;
2383 
2384 	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2385 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xeon HW statistics");
2386 	statpar = SYSCTL_CHILDREN(tmptree);
2387 	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2388 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2389 	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2390 	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2391 
2392 	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2393 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xeon HW errors");
2394 	errpar = SYSCTL_CHILDREN(tmptree);
2395 
2396 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
2397 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2398 	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
2399 	    sysctl_handle_register, "CU", "PPD");
2400 
2401 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
2402 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2403 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
2404 	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
2405 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
2406 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2407 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
2408 	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
2409 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
2410 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2411 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
2412 	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
2413 
2414 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
2415 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2416 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
2417 	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
2418 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
2419 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2420 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
2421 	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
2422 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
2423 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2424 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
2425 	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
2426 
2427 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
2428 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2429 	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2430 	    sysctl_handle_register, "SU", "DEVSTS");
2431 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
2432 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2433 	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2434 	    sysctl_handle_register, "SU", "LNKSTS");
2435 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
2436 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2437 	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
2438 	    sysctl_handle_register, "SU", "SLNKSTS");
2439 
2440 	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2441 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2442 	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2443 	    sysctl_handle_register, "IU", "UNCERRSTS");
2444 	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2445 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2446 	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2447 	    sysctl_handle_register, "IU", "CORERRSTS");
2448 
2449 	if (ntb->conn_type != NTB_CONN_B2B)
2450 		return;
2451 
2452 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01l",
2453 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2454 	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETL,
2455 	    sysctl_handle_register, "IU", "Outgoing XLAT0L register");
2456 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01u",
2457 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2458 	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETU,
2459 	    sysctl_handle_register, "IU", "Outgoing XLAT0U register");
2460 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2461 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2462 	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2463 	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2464 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2465 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2466 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2467 		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2468 		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2469 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2470 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2471 		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2472 		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2473 	} else {
2474 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2475 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2476 		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2477 		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2478 	}
2479 
2480 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2481 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2482 	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2483 	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2484 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2485 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2486 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2487 		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2488 		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2489 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2490 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2491 		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2492 		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2493 	} else {
2494 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2495 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2496 		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2497 		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2498 	}
2499 
2500 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2501 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2502 	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2503 	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2504 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2505 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2506 	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2507 	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2508 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2509 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2510 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2511 		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2512 		    sysctl_handle_register, "IU",
2513 		    "Secondary BAR4 base register");
2514 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2515 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2516 		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2517 		    sysctl_handle_register, "IU",
2518 		    "Secondary BAR5 base register");
2519 	} else {
2520 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2521 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2522 		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2523 		    sysctl_handle_register, "QU",
2524 		    "Secondary BAR45 base register");
2525 	}
2526 }
2527 
2528 static int
2529 sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2530 {
2531 	struct ntb_softc *ntb = arg1;
2532 	struct sbuf sb;
2533 	int error;
2534 
2535 	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2536 
2537 	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2538 	error = sbuf_finish(&sb);
2539 	sbuf_delete(&sb);
2540 
2541 	if (error || !req->newptr)
2542 		return (error);
2543 	return (EINVAL);
2544 }
2545 
2546 static int
2547 sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
2548 {
2549 	struct ntb_softc *ntb = arg1;
2550 	unsigned old, new;
2551 	int error;
2552 
2553 	old = intel_ntb_link_enabled(ntb->device);
2554 
2555 	error = SYSCTL_OUT(req, &old, sizeof(old));
2556 	if (error != 0 || req->newptr == NULL)
2557 		return (error);
2558 
2559 	error = SYSCTL_IN(req, &new, sizeof(new));
2560 	if (error != 0)
2561 		return (error);
2562 
2563 	intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
2564 	    (new != 0)? "en" : "dis");
2565 
2566 	if (new != 0)
2567 		error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
2568 	else
2569 		error = intel_ntb_link_disable(ntb->device);
2570 	return (error);
2571 }
2572 
2573 static int
2574 sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
2575 {
2576 	struct ntb_softc *ntb = arg1;
2577 	struct sbuf sb;
2578 	enum ntb_speed speed;
2579 	enum ntb_width width;
2580 	int error;
2581 
2582 	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2583 
2584 	if (intel_ntb_link_is_up(ntb->device, &speed, &width))
2585 		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2586 		    (unsigned)speed, (unsigned)width);
2587 	else
2588 		sbuf_printf(&sb, "down");
2589 
2590 	error = sbuf_finish(&sb);
2591 	sbuf_delete(&sb);
2592 
2593 	if (error || !req->newptr)
2594 		return (error);
2595 	return (EINVAL);
2596 }
2597 
2598 static int
2599 sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2600 {
2601 	struct ntb_softc *ntb = arg1;
2602 	unsigned res;
2603 	int error;
2604 
2605 	res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
2606 
2607 	error = SYSCTL_OUT(req, &res, sizeof(res));
2608 	if (error || !req->newptr)
2609 		return (error);
2610 	return (EINVAL);
2611 }
2612 
2613 static int
2614 sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2615 {
2616 	struct ntb_softc *ntb;
2617 	const void *outp;
2618 	uintptr_t sz;
2619 	uint64_t umv;
2620 	char be[sizeof(umv)];
2621 	size_t outsz;
2622 	uint32_t reg;
2623 	bool db, pci;
2624 	int error;
2625 
2626 	ntb = arg1;
2627 	reg = arg2 & ~NTB_REGFLAGS_MASK;
2628 	sz = arg2 & NTB_REGSZ_MASK;
2629 	db = (arg2 & NTB_DB_READ) != 0;
2630 	pci = (arg2 & NTB_PCI_REG) != 0;
2631 
2632 	KASSERT(!(db && pci), ("bogus"));
2633 
2634 	if (db) {
2635 		KASSERT(sz == NTB_REG_64, ("bogus"));
2636 		umv = db_ioread(ntb, reg);
2637 		outsz = sizeof(uint64_t);
2638 	} else {
2639 		switch (sz) {
2640 		case NTB_REG_64:
2641 			if (pci)
2642 				umv = pci_read_config(ntb->device, reg, 8);
2643 			else
2644 				umv = intel_ntb_reg_read(8, reg);
2645 			outsz = sizeof(uint64_t);
2646 			break;
2647 		case NTB_REG_32:
2648 			if (pci)
2649 				umv = pci_read_config(ntb->device, reg, 4);
2650 			else
2651 				umv = intel_ntb_reg_read(4, reg);
2652 			outsz = sizeof(uint32_t);
2653 			break;
2654 		case NTB_REG_16:
2655 			if (pci)
2656 				umv = pci_read_config(ntb->device, reg, 2);
2657 			else
2658 				umv = intel_ntb_reg_read(2, reg);
2659 			outsz = sizeof(uint16_t);
2660 			break;
2661 		case NTB_REG_8:
2662 			if (pci)
2663 				umv = pci_read_config(ntb->device, reg, 1);
2664 			else
2665 				umv = intel_ntb_reg_read(1, reg);
2666 			outsz = sizeof(uint8_t);
2667 			break;
2668 		default:
2669 			panic("bogus");
2670 			break;
2671 		}
2672 	}
2673 
2674 	/* Encode bigendian so that sysctl -x is legible. */
2675 	be64enc(be, umv);
2676 	outp = ((char *)be) + sizeof(umv) - outsz;
2677 
2678 	error = SYSCTL_OUT(req, outp, outsz);
2679 	if (error || !req->newptr)
2680 		return (error);
2681 	return (EINVAL);
2682 }
2683 
2684 static unsigned
2685 intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
2686 {
2687 
2688 	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2689 	    uidx >= ntb->b2b_mw_idx) ||
2690 	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2691 		uidx++;
2692 	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2693 	    uidx >= ntb->b2b_mw_idx) &&
2694 	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2695 		uidx++;
2696 	return (uidx);
2697 }
2698 
2699 #ifndef EARLY_AP_STARTUP
2700 static int msix_ready;
2701 
2702 static void
2703 intel_ntb_msix_ready(void *arg __unused)
2704 {
2705 
2706 	msix_ready = 1;
2707 }
2708 SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
2709     intel_ntb_msix_ready, NULL);
2710 #endif
2711 
2712 static void
2713 intel_ntb_exchange_msix(void *ctx)
2714 {
2715 	struct ntb_softc *ntb;
2716 	uint32_t val;
2717 	unsigned i;
2718 
2719 	ntb = ctx;
2720 
2721 	if (ntb->peer_msix_good)
2722 		goto msix_good;
2723 	if (ntb->peer_msix_done)
2724 		goto msix_done;
2725 
2726 #ifndef EARLY_AP_STARTUP
2727 	/* Block MSIX negotiation until SMP started and IRQ reshuffled. */
2728 	if (!msix_ready)
2729 		goto reschedule;
2730 #endif
2731 
2732 	intel_ntb_get_msix_info(ntb);
2733 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2734 		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
2735 		    ntb->msix_data[i].nmd_data);
2736 		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
2737 		    ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
2738 	}
2739 	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
2740 
2741 	intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
2742 	if (val != NTB_MSIX_VER_GUARD)
2743 		goto reschedule;
2744 
2745 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2746 		intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
2747 		intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
2748 		ntb->peer_msix_data[i].nmd_data = val;
2749 		intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
2750 		intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
2751 		ntb->peer_msix_data[i].nmd_ofs = val;
2752 	}
2753 
2754 	ntb->peer_msix_done = true;
2755 
2756 msix_done:
2757 	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
2758 	intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
2759 	if (val != NTB_MSIX_RECEIVED)
2760 		goto reschedule;
2761 
2762 	intel_ntb_spad_clear(ntb->device);
2763 	ntb->peer_msix_good = true;
2764 	/* Give peer time to see our NTB_MSIX_RECEIVED. */
2765 	goto reschedule;
2766 
2767 msix_good:
2768 	intel_ntb_poll_link(ntb);
2769 	ntb_link_event(ntb->device);
2770 	return;
2771 
2772 reschedule:
2773 	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2774 	if (_xeon_link_is_up(ntb)) {
2775 		callout_reset(&ntb->peer_msix_work,
2776 		    hz * (ntb->peer_msix_good ? 2 : 1) / 10,
2777 		    intel_ntb_exchange_msix, ntb);
2778 	} else
2779 		intel_ntb_spad_clear(ntb->device);
2780 }
2781 
2782 /*
2783  * Public API to the rest of the OS
2784  */
2785 
2786 static uint8_t
2787 intel_ntb_spad_count(device_t dev)
2788 {
2789 	struct ntb_softc *ntb = device_get_softc(dev);
2790 
2791 	return (ntb->spad_count);
2792 }
2793 
2794 static uint8_t
2795 intel_ntb_mw_count(device_t dev)
2796 {
2797 	struct ntb_softc *ntb = device_get_softc(dev);
2798 	uint8_t res;
2799 
2800 	res = ntb->mw_count;
2801 	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
2802 		res--;
2803 	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
2804 		res--;
2805 	return (res);
2806 }
2807 
2808 static int
2809 intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
2810 {
2811 	struct ntb_softc *ntb = device_get_softc(dev);
2812 
2813 	if (idx >= ntb->spad_count)
2814 		return (EINVAL);
2815 
2816 	intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2817 
2818 	return (0);
2819 }
2820 
2821 /*
2822  * Zeros the local scratchpad.
2823  */
2824 static void
2825 intel_ntb_spad_clear(device_t dev)
2826 {
2827 	struct ntb_softc *ntb = device_get_softc(dev);
2828 	unsigned i;
2829 
2830 	for (i = 0; i < ntb->spad_count; i++)
2831 		intel_ntb_spad_write(dev, i, 0);
2832 }
2833 
2834 static int
2835 intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
2836 {
2837 	struct ntb_softc *ntb = device_get_softc(dev);
2838 
2839 	if (idx >= ntb->spad_count)
2840 		return (EINVAL);
2841 
2842 	*val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2843 
2844 	return (0);
2845 }
2846 
2847 static int
2848 intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
2849 {
2850 	struct ntb_softc *ntb = device_get_softc(dev);
2851 
2852 	if (idx >= ntb->spad_count)
2853 		return (EINVAL);
2854 
2855 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
2856 		intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
2857 	else
2858 		intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2859 
2860 	return (0);
2861 }
2862 
2863 static int
2864 intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
2865 {
2866 	struct ntb_softc *ntb = device_get_softc(dev);
2867 
2868 	if (idx >= ntb->spad_count)
2869 		return (EINVAL);
2870 
2871 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
2872 		*val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
2873 	else
2874 		*val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
2875 
2876 	return (0);
2877 }
2878 
2879 static int
2880 intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
2881     caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
2882     bus_addr_t *plimit)
2883 {
2884 	struct ntb_softc *ntb = device_get_softc(dev);
2885 	struct ntb_pci_bar_info *bar;
2886 	bus_addr_t limit;
2887 	size_t bar_b2b_off;
2888 	enum ntb_bar bar_num;
2889 
2890 	if (mw_idx >= intel_ntb_mw_count(dev))
2891 		return (EINVAL);
2892 	mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
2893 
2894 	bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
2895 	bar = &ntb->bar_info[bar_num];
2896 	bar_b2b_off = 0;
2897 	if (mw_idx == ntb->b2b_mw_idx) {
2898 		KASSERT(ntb->b2b_off != 0,
2899 		    ("user shouldn't get non-shared b2b mw"));
2900 		bar_b2b_off = ntb->b2b_off;
2901 	}
2902 
2903 	if (bar_is_64bit(ntb, bar_num))
2904 		limit = BUS_SPACE_MAXADDR;
2905 	else
2906 		limit = BUS_SPACE_MAXADDR_32BIT;
2907 
2908 	if (base != NULL)
2909 		*base = bar->pbase + bar_b2b_off;
2910 	if (vbase != NULL)
2911 		*vbase = bar->vbase + bar_b2b_off;
2912 	if (size != NULL)
2913 		*size = bar->size - bar_b2b_off;
2914 	if (align != NULL)
2915 		*align = bar->size;
2916 	if (align_size != NULL)
2917 		*align_size = 1;
2918 	if (plimit != NULL)
2919 		*plimit = limit;
2920 	return (0);
2921 }
2922 
2923 static int
2924 intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
2925 {
2926 	struct ntb_softc *ntb = device_get_softc(dev);
2927 	struct ntb_pci_bar_info *bar;
2928 	uint64_t base, limit, reg_val;
2929 	size_t bar_size, mw_size;
2930 	uint32_t base_reg, xlat_reg, limit_reg;
2931 	enum ntb_bar bar_num;
2932 
2933 	if (idx >= intel_ntb_mw_count(dev))
2934 		return (EINVAL);
2935 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
2936 
2937 	bar_num = intel_ntb_mw_to_bar(ntb, idx);
2938 	bar = &ntb->bar_info[bar_num];
2939 
2940 	bar_size = bar->size;
2941 	if (idx == ntb->b2b_mw_idx)
2942 		mw_size = bar_size - ntb->b2b_off;
2943 	else
2944 		mw_size = bar_size;
2945 
2946 	/* Hardware requires that addr is aligned to bar size */
2947 	if ((addr & (bar_size - 1)) != 0)
2948 		return (EINVAL);
2949 
2950 	if (size > mw_size)
2951 		return (EINVAL);
2952 
2953 	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
2954 
2955 	limit = 0;
2956 	if (bar_is_64bit(ntb, bar_num)) {
2957 		base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
2958 
2959 		if (limit_reg != 0 && size != mw_size)
2960 			limit = base + size;
2961 
2962 		/* Set and verify translation address */
2963 		intel_ntb_reg_write(8, xlat_reg, addr);
2964 		reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
2965 		if (reg_val != addr) {
2966 			intel_ntb_reg_write(8, xlat_reg, 0);
2967 			return (EIO);
2968 		}
2969 
2970 		/* Set and verify the limit */
2971 		intel_ntb_reg_write(8, limit_reg, limit);
2972 		reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
2973 		if (reg_val != limit) {
2974 			intel_ntb_reg_write(8, limit_reg, base);
2975 			intel_ntb_reg_write(8, xlat_reg, 0);
2976 			return (EIO);
2977 		}
2978 	} else {
2979 		/* Configure 32-bit (split) BAR MW */
2980 
2981 		if ((addr & UINT32_MAX) != addr)
2982 			return (ERANGE);
2983 		if (((addr + size) & UINT32_MAX) != (addr + size))
2984 			return (ERANGE);
2985 
2986 		base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
2987 
2988 		if (limit_reg != 0 && size != mw_size)
2989 			limit = base + size;
2990 
2991 		/* Set and verify translation address */
2992 		intel_ntb_reg_write(4, xlat_reg, addr);
2993 		reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
2994 		if (reg_val != addr) {
2995 			intel_ntb_reg_write(4, xlat_reg, 0);
2996 			return (EIO);
2997 		}
2998 
2999 		/* Set and verify the limit */
3000 		intel_ntb_reg_write(4, limit_reg, limit);
3001 		reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
3002 		if (reg_val != limit) {
3003 			intel_ntb_reg_write(4, limit_reg, base);
3004 			intel_ntb_reg_write(4, xlat_reg, 0);
3005 			return (EIO);
3006 		}
3007 	}
3008 	return (0);
3009 }
3010 
3011 static int
3012 intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
3013 {
3014 
3015 	return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
3016 }
3017 
3018 static int
3019 intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
3020 {
3021 	struct ntb_softc *ntb = device_get_softc(dev);
3022 	struct ntb_pci_bar_info *bar;
3023 
3024 	if (idx >= intel_ntb_mw_count(dev))
3025 		return (EINVAL);
3026 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3027 
3028 	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
3029 	*mode = bar->map_mode;
3030 	return (0);
3031 }
3032 
3033 static int
3034 intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
3035 {
3036 	struct ntb_softc *ntb = device_get_softc(dev);
3037 
3038 	if (idx >= intel_ntb_mw_count(dev))
3039 		return (EINVAL);
3040 
3041 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3042 	return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
3043 }
3044 
3045 static int
3046 intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
3047 {
3048 	struct ntb_pci_bar_info *bar;
3049 	int rc;
3050 
3051 	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
3052 	if (bar->map_mode == mode)
3053 		return (0);
3054 
3055 	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
3056 	if (rc == 0)
3057 		bar->map_mode = mode;
3058 
3059 	return (rc);
3060 }
3061 
3062 static void
3063 intel_ntb_peer_db_set(device_t dev, uint64_t bit)
3064 {
3065 	struct ntb_softc *ntb = device_get_softc(dev);
3066 
3067 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
3068 		struct ntb_pci_bar_info *lapic;
3069 		unsigned i;
3070 
3071 		lapic = ntb->peer_lapic_bar;
3072 
3073 		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3074 			if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0)
3075 				bus_space_write_4(lapic->pci_bus_tag,
3076 				    lapic->pci_bus_handle,
3077 				    ntb->peer_msix_data[i].nmd_ofs,
3078 				    ntb->peer_msix_data[i].nmd_data);
3079 		}
3080 		return;
3081 	}
3082 
3083 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3084 		intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
3085 		return;
3086 	}
3087 
3088 	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
3089 }
3090 
3091 static int
3092 intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
3093 {
3094 	struct ntb_softc *ntb = device_get_softc(dev);
3095 	struct ntb_pci_bar_info *bar;
3096 	uint64_t regoff;
3097 
3098 	KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
3099 
3100 	if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3101 		bar = &ntb->bar_info[NTB_CONFIG_BAR];
3102 		regoff = ntb->peer_reg->db_bell;
3103 	} else {
3104 		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
3105 		    ("invalid b2b idx"));
3106 
3107 		bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
3108 		regoff = XEON_PDOORBELL_OFFSET;
3109 	}
3110 	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
3111 
3112 	/* HACK: Specific to current x86 bus implementation. */
3113 	*db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
3114 	*db_size = ntb->reg->db_size;
3115 	return (0);
3116 }
3117 
3118 static uint64_t
3119 intel_ntb_db_valid_mask(device_t dev)
3120 {
3121 	struct ntb_softc *ntb = device_get_softc(dev);
3122 
3123 	return (ntb->db_valid_mask);
3124 }
3125 
3126 static int
3127 intel_ntb_db_vector_count(device_t dev)
3128 {
3129 	struct ntb_softc *ntb = device_get_softc(dev);
3130 
3131 	return (ntb->db_vec_count);
3132 }
3133 
3134 static uint64_t
3135 intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
3136 {
3137 	struct ntb_softc *ntb = device_get_softc(dev);
3138 
3139 	if (vector > ntb->db_vec_count)
3140 		return (0);
3141 	return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
3142 }
3143 
3144 static bool
3145 intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
3146 {
3147 	struct ntb_softc *ntb = device_get_softc(dev);
3148 
3149 	if (speed != NULL)
3150 		*speed = intel_ntb_link_sta_speed(ntb);
3151 	if (width != NULL)
3152 		*width = intel_ntb_link_sta_width(ntb);
3153 	return (link_is_up(ntb));
3154 }
3155 
3156 static void
3157 save_bar_parameters(struct ntb_pci_bar_info *bar)
3158 {
3159 
3160 	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
3161 	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
3162 	bar->pbase = rman_get_start(bar->pci_resource);
3163 	bar->size = rman_get_size(bar->pci_resource);
3164 	bar->vbase = rman_get_virtual(bar->pci_resource);
3165 }
3166 
3167 static device_method_t ntb_intel_methods[] = {
3168 	/* Device interface */
3169 	DEVMETHOD(device_probe,		intel_ntb_probe),
3170 	DEVMETHOD(device_attach,	intel_ntb_attach),
3171 	DEVMETHOD(device_detach,	intel_ntb_detach),
3172 	/* Bus interface */
3173 	DEVMETHOD(bus_child_location_str, ntb_child_location_str),
3174 	DEVMETHOD(bus_print_child,	ntb_print_child),
3175 	DEVMETHOD(bus_get_dma_tag,	ntb_get_dma_tag),
3176 	/* NTB interface */
3177 	DEVMETHOD(ntb_port_number,	intel_ntb_port_number),
3178 	DEVMETHOD(ntb_peer_port_count,	intel_ntb_peer_port_count),
3179 	DEVMETHOD(ntb_peer_port_number,	intel_ntb_peer_port_number),
3180 	DEVMETHOD(ntb_peer_port_idx, 	intel_ntb_peer_port_idx),
3181 	DEVMETHOD(ntb_link_is_up,	intel_ntb_link_is_up),
3182 	DEVMETHOD(ntb_link_enable,	intel_ntb_link_enable),
3183 	DEVMETHOD(ntb_link_disable,	intel_ntb_link_disable),
3184 	DEVMETHOD(ntb_link_enabled,	intel_ntb_link_enabled),
3185 	DEVMETHOD(ntb_mw_count,		intel_ntb_mw_count),
3186 	DEVMETHOD(ntb_mw_get_range,	intel_ntb_mw_get_range),
3187 	DEVMETHOD(ntb_mw_set_trans,	intel_ntb_mw_set_trans),
3188 	DEVMETHOD(ntb_mw_clear_trans,	intel_ntb_mw_clear_trans),
3189 	DEVMETHOD(ntb_mw_get_wc,	intel_ntb_mw_get_wc),
3190 	DEVMETHOD(ntb_mw_set_wc,	intel_ntb_mw_set_wc),
3191 	DEVMETHOD(ntb_spad_count,	intel_ntb_spad_count),
3192 	DEVMETHOD(ntb_spad_clear,	intel_ntb_spad_clear),
3193 	DEVMETHOD(ntb_spad_write,	intel_ntb_spad_write),
3194 	DEVMETHOD(ntb_spad_read,	intel_ntb_spad_read),
3195 	DEVMETHOD(ntb_peer_spad_write,	intel_ntb_peer_spad_write),
3196 	DEVMETHOD(ntb_peer_spad_read,	intel_ntb_peer_spad_read),
3197 	DEVMETHOD(ntb_db_valid_mask,	intel_ntb_db_valid_mask),
3198 	DEVMETHOD(ntb_db_vector_count,	intel_ntb_db_vector_count),
3199 	DEVMETHOD(ntb_db_vector_mask,	intel_ntb_db_vector_mask),
3200 	DEVMETHOD(ntb_db_clear,		intel_ntb_db_clear),
3201 	DEVMETHOD(ntb_db_clear_mask,	intel_ntb_db_clear_mask),
3202 	DEVMETHOD(ntb_db_read,		intel_ntb_db_read),
3203 	DEVMETHOD(ntb_db_set_mask,	intel_ntb_db_set_mask),
3204 	DEVMETHOD(ntb_peer_db_addr,	intel_ntb_peer_db_addr),
3205 	DEVMETHOD(ntb_peer_db_set,	intel_ntb_peer_db_set),
3206 	DEVMETHOD_END
3207 };
3208 
3209 static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
3210     sizeof(struct ntb_softc));
3211 DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
3212 MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1);
3213 MODULE_VERSION(ntb_hw_intel, 1);
3214 MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ntb_hw_intel, pci_ids,
3215     nitems(pci_ids));
3216