xref: /freebsd/sys/dev/ntb/ntb_hw/ntb_hw_intel.c (revision 9abb92653894a0f70d372758c2c965fc99b69866)
1 /*-
2  * Copyright (c) 2016-2017 Alexander Motin <mav@FreeBSD.org>
3  * Copyright (C) 2013 Intel Corporation
4  * Copyright (C) 2015 EMC Corporation
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31  * two or more systems using a PCI-e links, providing remote memory access.
32  *
33  * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
34  *
35  * NOTE: Much of the code in this module is shared with Linux. Any patches may
36  * be picked up and redistributed in Linux with a dual GPL/BSD license.
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/systm.h>
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/interrupt.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/module.h>
51 #include <sys/mutex.h>
52 #include <sys/pciio.h>
53 #include <sys/queue.h>
54 #include <sys/rman.h>
55 #include <sys/sbuf.h>
56 #include <sys/sysctl.h>
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <machine/bus.h>
60 #include <machine/intr_machdep.h>
61 #include <machine/resource.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/iommu/iommu.h>
65 
66 #include "ntb_hw_intel.h"
67 #include "../ntb.h"
68 
69 #define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
70 
71 #define NTB_HB_TIMEOUT		1 /* second */
72 #define ATOM_LINK_RECOVERY_TIME	500 /* ms */
73 #define BAR_HIGH_MASK		(~((1ull << 12) - 1))
74 
75 #define	NTB_MSIX_VER_GUARD	0xaabbccdd
76 #define	NTB_MSIX_RECEIVED	0xe0f0e0f0
77 
78 /*
79  * PCI constants could be somewhere more generic, but aren't defined/used in
80  * pci.c.
81  */
82 #define	PCI_MSIX_ENTRY_SIZE		16
83 #define	PCI_MSIX_ENTRY_LOWER_ADDR	0
84 #define	PCI_MSIX_ENTRY_UPPER_ADDR	4
85 #define	PCI_MSIX_ENTRY_DATA		8
86 
87 enum ntb_device_type {
88 	NTB_XEON,
89 	NTB_ATOM
90 };
91 
92 /* ntb_conn_type are hardware numbers, cannot change. */
93 enum ntb_conn_type {
94 	NTB_CONN_TRANSPARENT = 0,
95 	NTB_CONN_B2B = 1,
96 	NTB_CONN_RP = 2,
97 };
98 
99 enum ntb_b2b_direction {
100 	NTB_DEV_USD = 0,
101 	NTB_DEV_DSD = 1,
102 };
103 
104 enum ntb_bar {
105 	NTB_CONFIG_BAR = 0,
106 	NTB_B2B_BAR_1,
107 	NTB_B2B_BAR_2,
108 	NTB_B2B_BAR_3,
109 	NTB_MAX_BARS
110 };
111 
112 enum {
113 	NTB_MSIX_GUARD = 0,
114 	NTB_MSIX_DATA0,
115 	NTB_MSIX_DATA1,
116 	NTB_MSIX_DATA2,
117 	NTB_MSIX_OFS0,
118 	NTB_MSIX_OFS1,
119 	NTB_MSIX_OFS2,
120 	NTB_MSIX_DONE,
121 	NTB_MAX_MSIX_SPAD
122 };
123 
124 /* Device features and workarounds */
125 #define HAS_FEATURE(ntb, feature)	\
126 	(((ntb)->features & (feature)) != 0)
127 
128 struct ntb_hw_info {
129 	uint32_t		device_id;
130 	const char		*desc;
131 	enum ntb_device_type	type;
132 	uint32_t		features;
133 };
134 
135 struct ntb_pci_bar_info {
136 	bus_space_tag_t		pci_bus_tag;
137 	bus_space_handle_t	pci_bus_handle;
138 	int			pci_resource_id;
139 	struct resource		*pci_resource;
140 	vm_paddr_t		pbase;
141 	caddr_t			vbase;
142 	vm_size_t		size;
143 	vm_memattr_t		map_mode;
144 
145 	/* Configuration register offsets */
146 	uint32_t		psz_off;
147 	uint32_t		ssz_off;
148 	uint32_t		pbarxlat_off;
149 };
150 
151 struct ntb_int_info {
152 	struct resource	*res;
153 	int		rid;
154 	void		*tag;
155 };
156 
157 struct ntb_vec {
158 	struct ntb_softc	*ntb;
159 	uint32_t		num;
160 	unsigned		masked;
161 };
162 
163 struct ntb_reg {
164 	uint32_t	ntb_ctl;
165 	uint32_t	lnk_sta;
166 	uint8_t		db_size;
167 	unsigned	mw_bar[NTB_MAX_BARS];
168 };
169 
170 struct ntb_alt_reg {
171 	uint32_t	db_bell;
172 	uint32_t	db_mask;
173 	uint32_t	spad;
174 };
175 
176 struct ntb_xlat_reg {
177 	uint32_t	bar0_base;
178 	uint32_t	bar2_base;
179 	uint32_t	bar4_base;
180 	uint32_t	bar5_base;
181 
182 	uint32_t	bar2_xlat;
183 	uint32_t	bar4_xlat;
184 	uint32_t	bar5_xlat;
185 
186 	uint32_t	bar2_limit;
187 	uint32_t	bar4_limit;
188 	uint32_t	bar5_limit;
189 };
190 
191 struct ntb_b2b_addr {
192 	uint64_t	bar0_addr;
193 	uint64_t	bar2_addr64;
194 	uint64_t	bar4_addr64;
195 	uint64_t	bar4_addr32;
196 	uint64_t	bar5_addr32;
197 };
198 
199 struct ntb_msix_data {
200 	uint32_t	nmd_ofs;
201 	uint32_t	nmd_data;
202 };
203 
204 struct ntb_softc {
205 	/* ntb.c context. Do not move! Must go first! */
206 	void			*ntb_store;
207 
208 	device_t		device;
209 	enum ntb_device_type	type;
210 	uint32_t		features;
211 
212 	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
213 	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
214 	uint32_t		allocated_interrupts;
215 
216 	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
217 	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
218 	bool			peer_msix_good;
219 	bool			peer_msix_done;
220 	struct ntb_pci_bar_info	*peer_lapic_bar;
221 	struct callout		peer_msix_work;
222 
223 	bus_dma_tag_t		bar0_dma_tag;
224 	bus_dmamap_t		bar0_dma_map;
225 
226 	struct callout		heartbeat_timer;
227 	struct callout		lr_timer;
228 
229 	struct ntb_vec		*msix_vec;
230 
231 	uint32_t		ppd;
232 	enum ntb_conn_type	conn_type;
233 	enum ntb_b2b_direction	dev_type;
234 
235 	/* Offset of peer bar0 in B2B BAR */
236 	uint64_t			b2b_off;
237 	/* Memory window used to access peer bar0 */
238 #define B2B_MW_DISABLED			UINT8_MAX
239 	uint8_t				b2b_mw_idx;
240 	uint32_t			msix_xlat;
241 	uint8_t				msix_mw_idx;
242 
243 	uint8_t				mw_count;
244 	uint8_t				spad_count;
245 	uint8_t				db_count;
246 	uint8_t				db_vec_count;
247 	uint8_t				db_vec_shift;
248 
249 	/* Protects local db_mask. */
250 #define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
251 #define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
252 #define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
253 	struct mtx			db_mask_lock;
254 
255 	volatile uint32_t		ntb_ctl;
256 	volatile uint32_t		lnk_sta;
257 
258 	uint64_t			db_valid_mask;
259 	uint64_t			db_link_mask;
260 	uint64_t			db_mask;
261 	uint64_t			fake_db;	/* NTB_SB01BASE_LOCKUP*/
262 	uint64_t			force_db;	/* NTB_SB01BASE_LOCKUP*/
263 
264 	int				last_ts;	/* ticks @ last irq */
265 
266 	const struct ntb_reg		*reg;
267 	const struct ntb_alt_reg	*self_reg;
268 	const struct ntb_alt_reg	*peer_reg;
269 	const struct ntb_xlat_reg	*xlat_reg;
270 };
271 
272 #ifdef __i386__
273 static __inline uint64_t
274 bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
275     bus_size_t offset)
276 {
277 
278 	return (bus_space_read_4(tag, handle, offset) |
279 	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
280 }
281 
282 static __inline void
283 bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
284     bus_size_t offset, uint64_t val)
285 {
286 
287 	bus_space_write_4(tag, handle, offset, val);
288 	bus_space_write_4(tag, handle, offset + 4, val >> 32);
289 }
290 #endif
291 
292 #define intel_ntb_bar_read(SIZE, bar, offset) \
293 	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
294 	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
295 #define intel_ntb_bar_write(SIZE, bar, offset, val) \
296 	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
297 	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
298 #define intel_ntb_reg_read(SIZE, offset) \
299 	    intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
300 #define intel_ntb_reg_write(SIZE, offset, val) \
301 	    intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
302 #define intel_ntb_mw_read(SIZE, offset) \
303 	    intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
304 		offset)
305 #define intel_ntb_mw_write(SIZE, offset, val) \
306 	    intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
307 		offset, val)
308 
309 static int intel_ntb_probe(device_t device);
310 static int intel_ntb_attach(device_t device);
311 static int intel_ntb_detach(device_t device);
312 static uint64_t intel_ntb_db_valid_mask(device_t dev);
313 static void intel_ntb_spad_clear(device_t dev);
314 static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
315 static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
316     enum ntb_width *width);
317 static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
318     enum ntb_width width);
319 static int intel_ntb_link_disable(device_t dev);
320 static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
321 static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
322 
323 static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
324 static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
325 static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
326 static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
327     uint32_t *base, uint32_t *xlat, uint32_t *lmt);
328 static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
329 static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
330     vm_memattr_t);
331 static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
332     const char *);
333 static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
334 static int map_memory_window_bar(struct ntb_softc *ntb,
335     struct ntb_pci_bar_info *bar);
336 static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
337 static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
338 static int intel_ntb_init_isr(struct ntb_softc *ntb);
339 static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
340 static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
341 static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
342 static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
343 static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
344 static void ndev_vec_isr(void *arg);
345 static void ndev_irq_isr(void *arg);
346 static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
347 static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
348 static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
349 static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
350 static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
351 static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
352 static void intel_ntb_exchange_msix(void *);
353 static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
354 static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
355 static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
356 static int intel_ntb_detect_atom(struct ntb_softc *ntb);
357 static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
358 static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
359 static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
360 static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
361 static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
362     enum ntb_bar regbar);
363 static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
364     uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
365 static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
366     enum ntb_bar idx);
367 static int xeon_setup_b2b_mw(struct ntb_softc *,
368     const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
369 static inline bool link_is_up(struct ntb_softc *ntb);
370 static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
371 static inline bool atom_link_is_err(struct ntb_softc *ntb);
372 static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
373 static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
374 static void atom_link_hb(void *arg);
375 static void recover_atom_link(void *arg);
376 static bool intel_ntb_poll_link(struct ntb_softc *ntb);
377 static void save_bar_parameters(struct ntb_pci_bar_info *bar);
378 static void intel_ntb_sysctl_init(struct ntb_softc *);
379 static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
380 static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
381 static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
382 static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
383 static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
384 
385 static unsigned g_ntb_hw_debug_level;
386 SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
387     &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
388 #define intel_ntb_printf(lvl, ...) do {				\
389 	if ((lvl) <= g_ntb_hw_debug_level) {			\
390 		device_printf(ntb->device, __VA_ARGS__);	\
391 	}							\
392 } while (0)
393 
394 #define	_NTB_PAT_UC	0
395 #define	_NTB_PAT_WC	1
396 #define	_NTB_PAT_WT	4
397 #define	_NTB_PAT_WP	5
398 #define	_NTB_PAT_WB	6
399 #define	_NTB_PAT_UCM	7
400 static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
401 SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
402     &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
403     "UC: "  __XSTRING(_NTB_PAT_UC) ", "
404     "WC: "  __XSTRING(_NTB_PAT_WC) ", "
405     "WT: "  __XSTRING(_NTB_PAT_WT) ", "
406     "WP: "  __XSTRING(_NTB_PAT_WP) ", "
407     "WB: "  __XSTRING(_NTB_PAT_WB) ", "
408     "UC-: " __XSTRING(_NTB_PAT_UCM));
409 
410 static inline vm_memattr_t
411 intel_ntb_pat_flags(void)
412 {
413 
414 	switch (g_ntb_mw_pat) {
415 	case _NTB_PAT_WC:
416 		return (VM_MEMATTR_WRITE_COMBINING);
417 	case _NTB_PAT_WT:
418 		return (VM_MEMATTR_WRITE_THROUGH);
419 	case _NTB_PAT_WP:
420 		return (VM_MEMATTR_WRITE_PROTECTED);
421 	case _NTB_PAT_WB:
422 		return (VM_MEMATTR_WRITE_BACK);
423 	case _NTB_PAT_UCM:
424 		return (VM_MEMATTR_WEAK_UNCACHEABLE);
425 	case _NTB_PAT_UC:
426 		/* FALLTHROUGH */
427 	default:
428 		return (VM_MEMATTR_UNCACHEABLE);
429 	}
430 }
431 
432 /*
433  * Well, this obviously doesn't belong here, but it doesn't seem to exist
434  * anywhere better yet.
435  */
436 static inline const char *
437 intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
438 {
439 
440 	switch (pat) {
441 	case VM_MEMATTR_WRITE_COMBINING:
442 		return ("WRITE_COMBINING");
443 	case VM_MEMATTR_WRITE_THROUGH:
444 		return ("WRITE_THROUGH");
445 	case VM_MEMATTR_WRITE_PROTECTED:
446 		return ("WRITE_PROTECTED");
447 	case VM_MEMATTR_WRITE_BACK:
448 		return ("WRITE_BACK");
449 	case VM_MEMATTR_WEAK_UNCACHEABLE:
450 		return ("UNCACHED");
451 	case VM_MEMATTR_UNCACHEABLE:
452 		return ("UNCACHEABLE");
453 	default:
454 		return ("UNKNOWN");
455 	}
456 }
457 
458 static int g_ntb_msix_idx = 1;
459 SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
460     0, "Use this memory window to access the peer MSIX message complex on "
461     "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
462     "Like b2b_mw_idx, negative values index from the last available memory "
463     "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
464 
465 static int g_ntb_mw_idx = -1;
466 SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
467     0, "Use this memory window to access the peer NTB registers.  A "
468     "non-negative value starts from the first MW index; a negative value "
469     "starts from the last MW index.  The default is -1, i.e., the last "
470     "available memory window.  Both sides of the NTB MUST set the same "
471     "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
472 
473 /* Hardware owns the low 16 bits of features. */
474 #define NTB_BAR_SIZE_4K		(1 << 0)
475 #define NTB_SDOORBELL_LOCKUP	(1 << 1)
476 #define NTB_SB01BASE_LOCKUP	(1 << 2)
477 #define NTB_B2BDOORBELL_BIT14	(1 << 3)
478 /* Software/configuration owns the top 16 bits. */
479 #define NTB_SPLIT_BAR		(1ull << 16)
480 
481 #define NTB_FEATURES_STR \
482     "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
483     "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
484 
485 static struct ntb_hw_info pci_ids[] = {
486 	/* XXX: PS/SS IDs left out until they are supported. */
487 	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
488 		NTB_ATOM, 0 },
489 
490 	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
491 		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
492 	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
493 		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
494 	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
495 		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
496 		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
497 	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
498 		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
499 		    NTB_SB01BASE_LOCKUP },
500 	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
501 		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
502 		    NTB_SB01BASE_LOCKUP },
503 };
504 
505 static const struct ntb_reg atom_reg = {
506 	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
507 	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
508 	.db_size = sizeof(uint64_t),
509 	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
510 };
511 
512 static const struct ntb_alt_reg atom_pri_reg = {
513 	.db_bell = ATOM_PDOORBELL_OFFSET,
514 	.db_mask = ATOM_PDBMSK_OFFSET,
515 	.spad = ATOM_SPAD_OFFSET,
516 };
517 
518 static const struct ntb_alt_reg atom_b2b_reg = {
519 	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
520 	.spad = ATOM_B2B_SPAD_OFFSET,
521 };
522 
523 static const struct ntb_xlat_reg atom_sec_xlat = {
524 #if 0
525 	/* "FIXME" says the Linux driver. */
526 	.bar0_base = ATOM_SBAR0BASE_OFFSET,
527 	.bar2_base = ATOM_SBAR2BASE_OFFSET,
528 	.bar4_base = ATOM_SBAR4BASE_OFFSET,
529 
530 	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
531 	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
532 #endif
533 
534 	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
535 	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
536 };
537 
538 static const struct ntb_reg xeon_reg = {
539 	.ntb_ctl = XEON_NTBCNTL_OFFSET,
540 	.lnk_sta = XEON_LINK_STATUS_OFFSET,
541 	.db_size = sizeof(uint16_t),
542 	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
543 };
544 
545 static const struct ntb_alt_reg xeon_pri_reg = {
546 	.db_bell = XEON_PDOORBELL_OFFSET,
547 	.db_mask = XEON_PDBMSK_OFFSET,
548 	.spad = XEON_SPAD_OFFSET,
549 };
550 
551 static const struct ntb_alt_reg xeon_b2b_reg = {
552 	.db_bell = XEON_B2B_DOORBELL_OFFSET,
553 	.spad = XEON_B2B_SPAD_OFFSET,
554 };
555 
556 static const struct ntb_xlat_reg xeon_sec_xlat = {
557 	.bar0_base = XEON_SBAR0BASE_OFFSET,
558 	.bar2_base = XEON_SBAR2BASE_OFFSET,
559 	.bar4_base = XEON_SBAR4BASE_OFFSET,
560 	.bar5_base = XEON_SBAR5BASE_OFFSET,
561 
562 	.bar2_limit = XEON_SBAR2LMT_OFFSET,
563 	.bar4_limit = XEON_SBAR4LMT_OFFSET,
564 	.bar5_limit = XEON_SBAR5LMT_OFFSET,
565 
566 	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
567 	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
568 	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
569 };
570 
571 static struct ntb_b2b_addr xeon_b2b_usd_addr = {
572 	.bar0_addr = XEON_B2B_BAR0_ADDR,
573 	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
574 	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
575 	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
576 	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
577 };
578 
579 static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
580 	.bar0_addr = XEON_B2B_BAR0_ADDR,
581 	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
582 	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
583 	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
584 	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
585 };
586 
587 SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
588     "B2B MW segment overrides -- MUST be the same on both sides");
589 
590 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
591     &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
592     "hardware, use this 64-bit address on the bus between the NTB devices for "
593     "the window at BAR2, on the upstream side of the link.  MUST be the same "
594     "address on both sides.");
595 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
596     &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
597 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
598     &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
599     "(split-BAR mode).");
600 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
601     &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
602     "(split-BAR mode).");
603 
604 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
605     &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
606     "hardware, use this 64-bit address on the bus between the NTB devices for "
607     "the window at BAR2, on the downstream side of the link.  MUST be the same"
608     " address on both sides.");
609 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
610     &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
611 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
612     &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
613     "(split-BAR mode).");
614 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
615     &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
616     "(split-BAR mode).");
617 
618 /*
619  * OS <-> Driver interface structures
620  */
621 MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
622 
623 /*
624  * OS <-> Driver linkage functions
625  */
626 static int
627 intel_ntb_probe(device_t device)
628 {
629 	struct ntb_hw_info *p;
630 
631 	p = intel_ntb_get_device_info(pci_get_devid(device));
632 	if (p == NULL)
633 		return (ENXIO);
634 
635 	device_set_desc(device, p->desc);
636 	return (0);
637 }
638 
639 static int
640 intel_ntb_attach(device_t device)
641 {
642 	struct ntb_softc *ntb;
643 	struct ntb_hw_info *p;
644 	int error;
645 
646 	ntb = device_get_softc(device);
647 	p = intel_ntb_get_device_info(pci_get_devid(device));
648 
649 	ntb->device = device;
650 	ntb->type = p->type;
651 	ntb->features = p->features;
652 	ntb->b2b_mw_idx = B2B_MW_DISABLED;
653 	ntb->msix_mw_idx = B2B_MW_DISABLED;
654 
655 	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
656 	callout_init(&ntb->heartbeat_timer, 1);
657 	callout_init(&ntb->lr_timer, 1);
658 	callout_init(&ntb->peer_msix_work, 1);
659 	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
660 
661 	if (ntb->type == NTB_ATOM)
662 		error = intel_ntb_detect_atom(ntb);
663 	else
664 		error = intel_ntb_detect_xeon(ntb);
665 	if (error != 0)
666 		goto out;
667 
668 	intel_ntb_detect_max_mw(ntb);
669 
670 	pci_enable_busmaster(ntb->device);
671 
672 	error = intel_ntb_map_pci_bars(ntb);
673 	if (error != 0)
674 		goto out;
675 	if (ntb->type == NTB_ATOM)
676 		error = intel_ntb_atom_init_dev(ntb);
677 	else
678 		error = intel_ntb_xeon_init_dev(ntb);
679 	if (error != 0)
680 		goto out;
681 
682 	intel_ntb_spad_clear(device);
683 
684 	intel_ntb_poll_link(ntb);
685 
686 	intel_ntb_sysctl_init(ntb);
687 
688 	/* Attach children to this controller */
689 	error = ntb_register_device(device);
690 
691 out:
692 	if (error != 0)
693 		intel_ntb_detach(device);
694 	return (error);
695 }
696 
697 static int
698 intel_ntb_detach(device_t device)
699 {
700 	struct ntb_softc *ntb;
701 
702 	ntb = device_get_softc(device);
703 
704 	/* Detach & delete all children */
705 	ntb_unregister_device(device);
706 
707 	if (ntb->self_reg != NULL) {
708 		DB_MASK_LOCK(ntb);
709 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
710 		DB_MASK_UNLOCK(ntb);
711 	}
712 	callout_drain(&ntb->heartbeat_timer);
713 	callout_drain(&ntb->lr_timer);
714 	callout_drain(&ntb->peer_msix_work);
715 	pci_disable_busmaster(ntb->device);
716 	if (ntb->type == NTB_XEON)
717 		intel_ntb_teardown_xeon(ntb);
718 	intel_ntb_teardown_interrupts(ntb);
719 
720 	mtx_destroy(&ntb->db_mask_lock);
721 
722 	intel_ntb_unmap_pci_bar(ntb);
723 
724 	return (0);
725 }
726 
727 /*
728  * Driver internal routines
729  */
730 static inline enum ntb_bar
731 intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
732 {
733 
734 	KASSERT(mw < ntb->mw_count,
735 	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
736 	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
737 
738 	return (ntb->reg->mw_bar[mw]);
739 }
740 
741 static inline bool
742 bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
743 {
744 	/* XXX This assertion could be stronger. */
745 	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
746 	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
747 }
748 
749 static inline void
750 bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
751     uint32_t *xlat, uint32_t *lmt)
752 {
753 	uint32_t basev, lmtv, xlatv;
754 
755 	switch (bar) {
756 	case NTB_B2B_BAR_1:
757 		basev = ntb->xlat_reg->bar2_base;
758 		lmtv = ntb->xlat_reg->bar2_limit;
759 		xlatv = ntb->xlat_reg->bar2_xlat;
760 		break;
761 	case NTB_B2B_BAR_2:
762 		basev = ntb->xlat_reg->bar4_base;
763 		lmtv = ntb->xlat_reg->bar4_limit;
764 		xlatv = ntb->xlat_reg->bar4_xlat;
765 		break;
766 	case NTB_B2B_BAR_3:
767 		basev = ntb->xlat_reg->bar5_base;
768 		lmtv = ntb->xlat_reg->bar5_limit;
769 		xlatv = ntb->xlat_reg->bar5_xlat;
770 		break;
771 	default:
772 		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
773 		    ("bad bar"));
774 		basev = lmtv = xlatv = 0;
775 		break;
776 	}
777 
778 	if (base != NULL)
779 		*base = basev;
780 	if (xlat != NULL)
781 		*xlat = xlatv;
782 	if (lmt != NULL)
783 		*lmt = lmtv;
784 }
785 
786 static int
787 intel_ntb_map_pci_bars(struct ntb_softc *ntb)
788 {
789 	struct ntb_pci_bar_info *bar;
790 	int rc;
791 
792 	bar = &ntb->bar_info[NTB_CONFIG_BAR];
793 	bar->pci_resource_id = PCIR_BAR(0);
794 	rc = map_mmr_bar(ntb, bar);
795 	if (rc != 0)
796 		goto out;
797 
798 	/*
799 	 * At least on Xeon v4 NTB device leaks to host some remote side
800 	 * BAR0 writes supposed to update scratchpad registers.  I am not
801 	 * sure why it happens, but it may be related to the fact that
802 	 * on a link side BAR0 is 32KB, while on a host side it is 64KB.
803 	 * Without this hack DMAR blocks those accesses as not allowed.
804 	 */
805 	if (bus_dma_tag_create(bus_get_dma_tag(ntb->device), 1, 0,
806 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
807 	    bar->size, 1, bar->size, 0, NULL, NULL, &ntb->bar0_dma_tag)) {
808 		device_printf(ntb->device, "Unable to create BAR0 tag\n");
809 		return (ENOMEM);
810 	}
811 	if (bus_dmamap_create(ntb->bar0_dma_tag, 0, &ntb->bar0_dma_map)) {
812 		device_printf(ntb->device, "Unable to create BAR0 map\n");
813 		return (ENOMEM);
814 	}
815 	if (bus_dma_iommu_load_ident(ntb->bar0_dma_tag, ntb->bar0_dma_map,
816 	    bar->pbase, bar->size, 0)) {
817 		device_printf(ntb->device, "Unable to load BAR0 map\n");
818 		return (ENOMEM);
819 	}
820 
821 	bar = &ntb->bar_info[NTB_B2B_BAR_1];
822 	bar->pci_resource_id = PCIR_BAR(2);
823 	rc = map_memory_window_bar(ntb, bar);
824 	if (rc != 0)
825 		goto out;
826 	bar->psz_off = XEON_PBAR23SZ_OFFSET;
827 	bar->ssz_off = XEON_SBAR23SZ_OFFSET;
828 	bar->pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
829 
830 	bar = &ntb->bar_info[NTB_B2B_BAR_2];
831 	bar->pci_resource_id = PCIR_BAR(4);
832 	rc = map_memory_window_bar(ntb, bar);
833 	if (rc != 0)
834 		goto out;
835 	bar->psz_off = XEON_PBAR4SZ_OFFSET;
836 	bar->ssz_off = XEON_SBAR4SZ_OFFSET;
837 	bar->pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
838 
839 	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
840 		goto out;
841 
842 	bar = &ntb->bar_info[NTB_B2B_BAR_3];
843 	bar->pci_resource_id = PCIR_BAR(5);
844 	rc = map_memory_window_bar(ntb, bar);
845 	bar->psz_off = XEON_PBAR5SZ_OFFSET;
846 	bar->ssz_off = XEON_SBAR5SZ_OFFSET;
847 	bar->pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
848 
849 out:
850 	if (rc != 0)
851 		device_printf(ntb->device,
852 		    "unable to allocate pci resource\n");
853 	return (rc);
854 }
855 
856 static void
857 print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
858     const char *kind)
859 {
860 
861 	device_printf(ntb->device,
862 	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
863 	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
864 	    (char *)bar->vbase + bar->size - 1,
865 	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
866 	    (uintmax_t)bar->size, kind);
867 }
868 
869 static int
870 map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
871 {
872 
873 	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
874 	    &bar->pci_resource_id, RF_ACTIVE);
875 	if (bar->pci_resource == NULL)
876 		return (ENXIO);
877 
878 	save_bar_parameters(bar);
879 	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
880 	print_map_success(ntb, bar, "mmr");
881 	return (0);
882 }
883 
884 static int
885 map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
886 {
887 	int rc;
888 	vm_memattr_t mapmode;
889 	uint8_t bar_size_bits = 0;
890 
891 	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
892 	    &bar->pci_resource_id, RF_ACTIVE);
893 
894 	if (bar->pci_resource == NULL)
895 		return (ENXIO);
896 
897 	save_bar_parameters(bar);
898 	/*
899 	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
900 	 * hardware issue. To work around this, query the size it should be
901 	 * configured to by the device and modify the resource to correspond to
902 	 * this new size. The BIOS on systems with this problem is required to
903 	 * provide enough address space to allow the driver to make this change
904 	 * safely.
905 	 *
906 	 * Ideally I could have just specified the size when I allocated the
907 	 * resource like:
908 	 *  bus_alloc_resource(ntb->device,
909 	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
910 	 *	1ul << bar_size_bits, RF_ACTIVE);
911 	 * but the PCI driver does not honor the size in this call, so we have
912 	 * to modify it after the fact.
913 	 */
914 	if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
915 		if (bar->pci_resource_id == PCIR_BAR(2))
916 			bar_size_bits = pci_read_config(ntb->device,
917 			    XEON_PBAR23SZ_OFFSET, 1);
918 		else
919 			bar_size_bits = pci_read_config(ntb->device,
920 			    XEON_PBAR45SZ_OFFSET, 1);
921 
922 		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
923 		    bar->pci_resource, bar->pbase,
924 		    bar->pbase + (1ul << bar_size_bits) - 1);
925 		if (rc != 0) {
926 			device_printf(ntb->device,
927 			    "unable to resize bar\n");
928 			return (rc);
929 		}
930 
931 		save_bar_parameters(bar);
932 	}
933 
934 	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
935 	print_map_success(ntb, bar, "mw");
936 
937 	/*
938 	 * Optionally, mark MW BARs as anything other than UC to improve
939 	 * performance.
940 	 */
941 	mapmode = intel_ntb_pat_flags();
942 	if (mapmode == bar->map_mode)
943 		return (0);
944 
945 	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
946 	if (rc == 0) {
947 		bar->map_mode = mapmode;
948 		device_printf(ntb->device,
949 		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
950 		    "%s.\n",
951 		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
952 		    (char *)bar->vbase + bar->size - 1,
953 		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
954 		    intel_ntb_vm_memattr_to_str(mapmode));
955 	} else
956 		device_printf(ntb->device,
957 		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
958 		    "%s: %d\n",
959 		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
960 		    (char *)bar->vbase + bar->size - 1,
961 		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
962 		    intel_ntb_vm_memattr_to_str(mapmode), rc);
963 		/* Proceed anyway */
964 	return (0);
965 }
966 
967 static void
968 intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
969 {
970 	struct ntb_pci_bar_info *bar;
971 	int i;
972 
973 	if (ntb->bar0_dma_map != NULL) {
974 		bus_dmamap_unload(ntb->bar0_dma_tag, ntb->bar0_dma_map);
975 		bus_dmamap_destroy(ntb->bar0_dma_tag, ntb->bar0_dma_map);
976 	}
977 	if (ntb->bar0_dma_tag != NULL)
978 		bus_dma_tag_destroy(ntb->bar0_dma_tag);
979 	for (i = 0; i < NTB_MAX_BARS; i++) {
980 		bar = &ntb->bar_info[i];
981 		if (bar->pci_resource != NULL)
982 			bus_release_resource(ntb->device, SYS_RES_MEMORY,
983 			    bar->pci_resource_id, bar->pci_resource);
984 	}
985 }
986 
987 static int
988 intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
989 {
990 	uint32_t i;
991 	int rc;
992 
993 	for (i = 0; i < num_vectors; i++) {
994 		ntb->int_info[i].rid = i + 1;
995 		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
996 		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
997 		if (ntb->int_info[i].res == NULL) {
998 			device_printf(ntb->device,
999 			    "bus_alloc_resource failed\n");
1000 			return (ENOMEM);
1001 		}
1002 		ntb->int_info[i].tag = NULL;
1003 		ntb->allocated_interrupts++;
1004 		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
1005 		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
1006 		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
1007 		if (rc != 0) {
1008 			device_printf(ntb->device, "bus_setup_intr failed\n");
1009 			return (ENXIO);
1010 		}
1011 	}
1012 	return (0);
1013 }
1014 
1015 /*
1016  * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
1017  * cannot be allocated for each MSI-X message.  JHB seems to think remapping
1018  * should be okay.  This tunable should enable us to test that hypothesis
1019  * when someone gets their hands on some Xeon hardware.
1020  */
1021 static int ntb_force_remap_mode;
1022 SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
1023     &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
1024     " to a smaller number of ithreads, even if the desired number are "
1025     "available");
1026 
1027 /*
1028  * In case it is NOT ok, give consumers an abort button.
1029  */
1030 static int ntb_prefer_intx;
1031 SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
1032     &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
1033     "than remapping MSI-X messages over available slots (match Linux driver "
1034     "behavior)");
1035 
1036 /*
1037  * Remap the desired number of MSI-X messages to available ithreads in a simple
1038  * round-robin fashion.
1039  */
1040 static int
1041 intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
1042 {
1043 	u_int *vectors;
1044 	uint32_t i;
1045 	int rc;
1046 
1047 	if (ntb_prefer_intx != 0)
1048 		return (ENXIO);
1049 
1050 	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
1051 
1052 	for (i = 0; i < desired; i++)
1053 		vectors[i] = (i % avail) + 1;
1054 
1055 	rc = pci_remap_msix(dev, desired, vectors);
1056 	free(vectors, M_NTB);
1057 	return (rc);
1058 }
1059 
1060 static int
1061 intel_ntb_init_isr(struct ntb_softc *ntb)
1062 {
1063 	uint32_t desired_vectors, num_vectors;
1064 	int rc;
1065 
1066 	ntb->allocated_interrupts = 0;
1067 	ntb->last_ts = ticks;
1068 
1069 	/*
1070 	 * Mask all doorbell interrupts.  (Except link events!)
1071 	 */
1072 	DB_MASK_LOCK(ntb);
1073 	ntb->db_mask = ntb->db_valid_mask;
1074 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1075 	DB_MASK_UNLOCK(ntb);
1076 
1077 	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1078 	    ntb->db_count);
1079 	if (desired_vectors >= 1) {
1080 		rc = pci_alloc_msix(ntb->device, &num_vectors);
1081 
1082 		if (ntb_force_remap_mode != 0 && rc == 0 &&
1083 		    num_vectors == desired_vectors)
1084 			num_vectors--;
1085 
1086 		if (rc == 0 && num_vectors < desired_vectors) {
1087 			rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
1088 			    num_vectors);
1089 			if (rc == 0)
1090 				num_vectors = desired_vectors;
1091 			else
1092 				pci_release_msi(ntb->device);
1093 		}
1094 		if (rc != 0)
1095 			num_vectors = 1;
1096 	} else
1097 		num_vectors = 1;
1098 
1099 	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
1100 		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1101 			device_printf(ntb->device,
1102 			    "Errata workaround does not support MSI or INTX\n");
1103 			return (EINVAL);
1104 		}
1105 
1106 		ntb->db_vec_count = 1;
1107 		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
1108 		rc = intel_ntb_setup_legacy_interrupt(ntb);
1109 	} else {
1110 		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
1111 		    HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1112 			device_printf(ntb->device,
1113 			    "Errata workaround expects %d doorbell bits\n",
1114 			    XEON_NONLINK_DB_MSIX_BITS);
1115 			return (EINVAL);
1116 		}
1117 
1118 		intel_ntb_create_msix_vec(ntb, num_vectors);
1119 		rc = intel_ntb_setup_msix(ntb, num_vectors);
1120 	}
1121 	if (rc != 0) {
1122 		device_printf(ntb->device,
1123 		    "Error allocating interrupts: %d\n", rc);
1124 		intel_ntb_free_msix_vec(ntb);
1125 	}
1126 
1127 	return (rc);
1128 }
1129 
1130 static int
1131 intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
1132 {
1133 	int rc;
1134 
1135 	ntb->int_info[0].rid = 0;
1136 	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
1137 	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
1138 	if (ntb->int_info[0].res == NULL) {
1139 		device_printf(ntb->device, "bus_alloc_resource failed\n");
1140 		return (ENOMEM);
1141 	}
1142 
1143 	ntb->int_info[0].tag = NULL;
1144 	ntb->allocated_interrupts = 1;
1145 
1146 	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
1147 	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
1148 	    ntb, &ntb->int_info[0].tag);
1149 	if (rc != 0) {
1150 		device_printf(ntb->device, "bus_setup_intr failed\n");
1151 		return (ENXIO);
1152 	}
1153 
1154 	return (0);
1155 }
1156 
1157 static void
1158 intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
1159 {
1160 	struct ntb_int_info *current_int;
1161 	int i;
1162 
1163 	for (i = 0; i < ntb->allocated_interrupts; i++) {
1164 		current_int = &ntb->int_info[i];
1165 		if (current_int->tag != NULL)
1166 			bus_teardown_intr(ntb->device, current_int->res,
1167 			    current_int->tag);
1168 
1169 		if (current_int->res != NULL)
1170 			bus_release_resource(ntb->device, SYS_RES_IRQ,
1171 			    rman_get_rid(current_int->res), current_int->res);
1172 	}
1173 
1174 	intel_ntb_free_msix_vec(ntb);
1175 	pci_release_msi(ntb->device);
1176 }
1177 
1178 /*
1179  * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
1180  * out to make code clearer.
1181  */
1182 static inline uint64_t
1183 db_ioread(struct ntb_softc *ntb, uint64_t regoff)
1184 {
1185 
1186 	if (ntb->type == NTB_ATOM)
1187 		return (intel_ntb_reg_read(8, regoff));
1188 
1189 	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1190 
1191 	return (intel_ntb_reg_read(2, regoff));
1192 }
1193 
1194 static inline void
1195 db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1196 {
1197 
1198 	KASSERT((val & ~ntb->db_valid_mask) == 0,
1199 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1200 	     (uintmax_t)(val & ~ntb->db_valid_mask),
1201 	     (uintmax_t)ntb->db_valid_mask));
1202 
1203 	if (regoff == ntb->self_reg->db_mask)
1204 		DB_MASK_ASSERT(ntb, MA_OWNED);
1205 	db_iowrite_raw(ntb, regoff, val);
1206 }
1207 
1208 static inline void
1209 db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1210 {
1211 
1212 	if (ntb->type == NTB_ATOM) {
1213 		intel_ntb_reg_write(8, regoff, val);
1214 		return;
1215 	}
1216 
1217 	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1218 	intel_ntb_reg_write(2, regoff, (uint16_t)val);
1219 }
1220 
1221 static void
1222 intel_ntb_db_set_mask(device_t dev, uint64_t bits)
1223 {
1224 	struct ntb_softc *ntb = device_get_softc(dev);
1225 
1226 	DB_MASK_LOCK(ntb);
1227 	ntb->db_mask |= bits;
1228 	if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1229 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1230 	DB_MASK_UNLOCK(ntb);
1231 }
1232 
1233 static void
1234 intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
1235 {
1236 	struct ntb_softc *ntb = device_get_softc(dev);
1237 	uint64_t ibits;
1238 	int i;
1239 
1240 	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1241 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1242 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1243 	     (uintmax_t)ntb->db_valid_mask));
1244 
1245 	DB_MASK_LOCK(ntb);
1246 	ibits = ntb->fake_db & ntb->db_mask & bits;
1247 	ntb->db_mask &= ~bits;
1248 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1249 		/* Simulate fake interrupts if unmasked DB bits are set. */
1250 		ntb->force_db |= ibits;
1251 		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1252 			if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
1253 				swi_sched(ntb->int_info[i].tag, 0);
1254 		}
1255 	} else {
1256 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1257 	}
1258 	DB_MASK_UNLOCK(ntb);
1259 }
1260 
1261 static uint64_t
1262 intel_ntb_db_read(device_t dev)
1263 {
1264 	struct ntb_softc *ntb = device_get_softc(dev);
1265 
1266 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1267 		return (ntb->fake_db);
1268 
1269 	return (db_ioread(ntb, ntb->self_reg->db_bell));
1270 }
1271 
1272 static void
1273 intel_ntb_db_clear(device_t dev, uint64_t bits)
1274 {
1275 	struct ntb_softc *ntb = device_get_softc(dev);
1276 
1277 	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1278 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1279 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1280 	     (uintmax_t)ntb->db_valid_mask));
1281 
1282 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1283 		DB_MASK_LOCK(ntb);
1284 		ntb->fake_db &= ~bits;
1285 		DB_MASK_UNLOCK(ntb);
1286 		return;
1287 	}
1288 
1289 	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1290 }
1291 
1292 static inline uint64_t
1293 intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1294 {
1295 	uint64_t shift, mask;
1296 
1297 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1298 		/*
1299 		 * Remap vectors in custom way to make at least first
1300 		 * three doorbells to not generate stray events.
1301 		 * This breaks Linux compatibility (if one existed)
1302 		 * when more then one DB is used (not by if_ntb).
1303 		 */
1304 		if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
1305 			return (1 << db_vector);
1306 		if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
1307 			return (0x7ffc);
1308 	}
1309 
1310 	shift = ntb->db_vec_shift;
1311 	mask = (1ull << shift) - 1;
1312 	return (mask << (shift * db_vector));
1313 }
1314 
1315 static void
1316 intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1317 {
1318 	uint64_t vec_mask;
1319 
1320 	ntb->last_ts = ticks;
1321 	vec_mask = intel_ntb_vec_mask(ntb, vec);
1322 
1323 	if ((vec_mask & ntb->db_link_mask) != 0) {
1324 		if (intel_ntb_poll_link(ntb))
1325 			ntb_link_event(ntb->device);
1326 	}
1327 
1328 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1329 	    (vec_mask & ntb->db_link_mask) == 0) {
1330 		DB_MASK_LOCK(ntb);
1331 
1332 		/*
1333 		 * Do not report same DB events again if not cleared yet,
1334 		 * unless the mask was just cleared for them and this
1335 		 * interrupt handler call can be the consequence of it.
1336 		 */
1337 		vec_mask &= ~ntb->fake_db | ntb->force_db;
1338 		ntb->force_db &= ~vec_mask;
1339 
1340 		/* Update our internal doorbell register. */
1341 		ntb->fake_db |= vec_mask;
1342 
1343 		/* Do not report masked DB events. */
1344 		vec_mask &= ~ntb->db_mask;
1345 
1346 		DB_MASK_UNLOCK(ntb);
1347 	}
1348 
1349 	if ((vec_mask & ntb->db_valid_mask) != 0)
1350 		ntb_db_event(ntb->device, vec);
1351 }
1352 
1353 static void
1354 ndev_vec_isr(void *arg)
1355 {
1356 	struct ntb_vec *nvec = arg;
1357 
1358 	intel_ntb_interrupt(nvec->ntb, nvec->num);
1359 }
1360 
1361 static void
1362 ndev_irq_isr(void *arg)
1363 {
1364 	/* If we couldn't set up MSI-X, we only have the one vector. */
1365 	intel_ntb_interrupt(arg, 0);
1366 }
1367 
1368 static int
1369 intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1370 {
1371 	uint32_t i;
1372 
1373 	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1374 	    M_ZERO | M_WAITOK);
1375 	for (i = 0; i < num_vectors; i++) {
1376 		ntb->msix_vec[i].num = i;
1377 		ntb->msix_vec[i].ntb = ntb;
1378 	}
1379 
1380 	return (0);
1381 }
1382 
1383 static void
1384 intel_ntb_free_msix_vec(struct ntb_softc *ntb)
1385 {
1386 
1387 	if (ntb->msix_vec == NULL)
1388 		return;
1389 
1390 	free(ntb->msix_vec, M_NTB);
1391 	ntb->msix_vec = NULL;
1392 }
1393 
1394 static void
1395 intel_ntb_get_msix_info(struct ntb_softc *ntb)
1396 {
1397 	struct pci_devinfo *dinfo;
1398 	struct pcicfg_msix *msix;
1399 	uint32_t laddr, data, i, offset;
1400 
1401 	dinfo = device_get_ivars(ntb->device);
1402 	msix = &dinfo->cfg.msix;
1403 
1404 	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
1405 
1406 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1407 		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
1408 
1409 		laddr = bus_read_4(msix->msix_table_res, offset +
1410 		    PCI_MSIX_ENTRY_LOWER_ADDR);
1411 		intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
1412 
1413 		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
1414 		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
1415 		     MSI_INTEL_ADDR_BASE));
1416 		ntb->msix_data[i].nmd_ofs = laddr;
1417 
1418 		data = bus_read_4(msix->msix_table_res, offset +
1419 		    PCI_MSIX_ENTRY_DATA);
1420 		intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
1421 
1422 		ntb->msix_data[i].nmd_data = data;
1423 	}
1424 }
1425 
1426 static struct ntb_hw_info *
1427 intel_ntb_get_device_info(uint32_t device_id)
1428 {
1429 	struct ntb_hw_info *ep;
1430 
1431 	for (ep = pci_ids; ep < &pci_ids[nitems(pci_ids)]; ep++) {
1432 		if (ep->device_id == device_id)
1433 			return (ep);
1434 	}
1435 	return (NULL);
1436 }
1437 
1438 static void
1439 intel_ntb_teardown_xeon(struct ntb_softc *ntb)
1440 {
1441 
1442 	if (ntb->reg != NULL)
1443 		intel_ntb_link_disable(ntb->device);
1444 }
1445 
1446 static void
1447 intel_ntb_detect_max_mw(struct ntb_softc *ntb)
1448 {
1449 
1450 	if (ntb->type == NTB_ATOM) {
1451 		ntb->mw_count = ATOM_MW_COUNT;
1452 		return;
1453 	}
1454 
1455 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1456 		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1457 	else
1458 		ntb->mw_count = XEON_SNB_MW_COUNT;
1459 }
1460 
1461 static int
1462 intel_ntb_detect_xeon(struct ntb_softc *ntb)
1463 {
1464 	uint8_t ppd, conn_type;
1465 
1466 	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1467 	ntb->ppd = ppd;
1468 
1469 	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1470 		ntb->dev_type = NTB_DEV_DSD;
1471 	else
1472 		ntb->dev_type = NTB_DEV_USD;
1473 
1474 	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1475 		ntb->features |= NTB_SPLIT_BAR;
1476 
1477 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1478 	    !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1479 		device_printf(ntb->device,
1480 		    "Can not apply SB01BASE_LOCKUP workaround "
1481 		    "with split BARs disabled!\n");
1482 		device_printf(ntb->device,
1483 		    "Expect system hangs under heavy NTB traffic!\n");
1484 		ntb->features &= ~NTB_SB01BASE_LOCKUP;
1485 	}
1486 
1487 	/*
1488 	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
1489 	 * errata workaround; only do one at a time.
1490 	 */
1491 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1492 		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
1493 
1494 	conn_type = ppd & XEON_PPD_CONN_TYPE;
1495 	switch (conn_type) {
1496 	case NTB_CONN_B2B:
1497 		ntb->conn_type = conn_type;
1498 		break;
1499 	case NTB_CONN_RP:
1500 	case NTB_CONN_TRANSPARENT:
1501 	default:
1502 		device_printf(ntb->device, "Unsupported connection type: %u\n",
1503 		    (unsigned)conn_type);
1504 		return (ENXIO);
1505 	}
1506 	return (0);
1507 }
1508 
1509 static int
1510 intel_ntb_detect_atom(struct ntb_softc *ntb)
1511 {
1512 	uint32_t ppd, conn_type;
1513 
1514 	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1515 	ntb->ppd = ppd;
1516 
1517 	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1518 		ntb->dev_type = NTB_DEV_DSD;
1519 	else
1520 		ntb->dev_type = NTB_DEV_USD;
1521 
1522 	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1523 	switch (conn_type) {
1524 	case NTB_CONN_B2B:
1525 		ntb->conn_type = conn_type;
1526 		break;
1527 	default:
1528 		device_printf(ntb->device, "Unsupported NTB configuration\n");
1529 		return (ENXIO);
1530 	}
1531 	return (0);
1532 }
1533 
1534 static int
1535 intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
1536 {
1537 	int rc;
1538 
1539 	ntb->spad_count		= XEON_SPAD_COUNT;
1540 	ntb->db_count		= XEON_DB_COUNT;
1541 	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1542 	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1543 	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1544 
1545 	if (ntb->conn_type != NTB_CONN_B2B) {
1546 		device_printf(ntb->device, "Connection type %d not supported\n",
1547 		    ntb->conn_type);
1548 		return (ENXIO);
1549 	}
1550 
1551 	ntb->reg = &xeon_reg;
1552 	ntb->self_reg = &xeon_pri_reg;
1553 	ntb->peer_reg = &xeon_b2b_reg;
1554 	ntb->xlat_reg = &xeon_sec_xlat;
1555 
1556 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1557 		ntb->force_db = ntb->fake_db = 0;
1558 		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
1559 		    ntb->mw_count;
1560 		intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
1561 		    g_ntb_msix_idx, ntb->msix_mw_idx);
1562 		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
1563 		    VM_MEMATTR_UNCACHEABLE);
1564 		KASSERT(rc == 0, ("shouldn't fail"));
1565 	} else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
1566 		/*
1567 		 * There is a Xeon hardware errata related to writes to SDOORBELL or
1568 		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1569 		 * which may hang the system.  To workaround this, use a memory
1570 		 * window to access the interrupt and scratch pad registers on the
1571 		 * remote system.
1572 		 */
1573 		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
1574 		    ntb->mw_count;
1575 		intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
1576 		    g_ntb_mw_idx, ntb->b2b_mw_idx);
1577 		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
1578 		    VM_MEMATTR_UNCACHEABLE);
1579 		KASSERT(rc == 0, ("shouldn't fail"));
1580 	} else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
1581 		/*
1582 		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1583 		 * mirrored to the remote system.  Shrink the number of bits by one,
1584 		 * since bit 14 is the last bit.
1585 		 *
1586 		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1587 		 * anyway.  Nor for non-B2B connection types.
1588 		 */
1589 		ntb->db_count = XEON_DB_COUNT - 1;
1590 
1591 	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1592 
1593 	if (ntb->dev_type == NTB_DEV_USD)
1594 		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1595 		    &xeon_b2b_usd_addr);
1596 	else
1597 		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1598 		    &xeon_b2b_dsd_addr);
1599 	if (rc != 0)
1600 		return (rc);
1601 
1602 	/* Enable Bus Master and Memory Space on the secondary side */
1603 	intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1604 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1605 
1606 	/*
1607 	 * Mask all doorbell interrupts.
1608 	 */
1609 	DB_MASK_LOCK(ntb);
1610 	ntb->db_mask = ntb->db_valid_mask;
1611 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1612 	DB_MASK_UNLOCK(ntb);
1613 
1614 	rc = intel_ntb_init_isr(ntb);
1615 	return (rc);
1616 }
1617 
1618 static int
1619 intel_ntb_atom_init_dev(struct ntb_softc *ntb)
1620 {
1621 	int error;
1622 
1623 	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1624 	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1625 
1626 	ntb->spad_count		 = ATOM_SPAD_COUNT;
1627 	ntb->db_count		 = ATOM_DB_COUNT;
1628 	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1629 	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1630 	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1631 
1632 	ntb->reg = &atom_reg;
1633 	ntb->self_reg = &atom_pri_reg;
1634 	ntb->peer_reg = &atom_b2b_reg;
1635 	ntb->xlat_reg = &atom_sec_xlat;
1636 
1637 	/*
1638 	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1639 	 * resolved.  Mask transaction layer internal parity errors.
1640 	 */
1641 	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1642 
1643 	configure_atom_secondary_side_bars(ntb);
1644 
1645 	/* Enable Bus Master and Memory Space on the secondary side */
1646 	intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
1647 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1648 
1649 	error = intel_ntb_init_isr(ntb);
1650 	if (error != 0)
1651 		return (error);
1652 
1653 	/* Initiate PCI-E link training */
1654 	intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1655 
1656 	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1657 
1658 	return (0);
1659 }
1660 
1661 /* XXX: Linux driver doesn't seem to do any of this for Atom. */
1662 static void
1663 configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1664 {
1665 
1666 	if (ntb->dev_type == NTB_DEV_USD) {
1667 		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1668 		    XEON_B2B_BAR2_ADDR64);
1669 		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1670 		    XEON_B2B_BAR4_ADDR64);
1671 		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1672 		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1673 	} else {
1674 		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1675 		    XEON_B2B_BAR2_ADDR64);
1676 		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1677 		    XEON_B2B_BAR4_ADDR64);
1678 		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1679 		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1680 	}
1681 }
1682 
1683 
1684 /*
1685  * When working around Xeon SDOORBELL errata by remapping remote registers in a
1686  * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1687  * remains for use by a higher layer.
1688  *
1689  * Will only be used if working around SDOORBELL errata and the BIOS-configured
1690  * MW size is sufficiently large.
1691  */
1692 static unsigned int ntb_b2b_mw_share;
1693 SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1694     0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1695     "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1696     "value here.");
1697 
1698 static void
1699 xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1700     enum ntb_bar regbar)
1701 {
1702 	struct ntb_pci_bar_info *bar;
1703 	uint8_t bar_sz;
1704 
1705 	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1706 		return;
1707 
1708 	bar = &ntb->bar_info[idx];
1709 	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1710 	if (idx == regbar) {
1711 		if (ntb->b2b_off != 0)
1712 			bar_sz--;
1713 		else
1714 			bar_sz = 0;
1715 	}
1716 	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1717 	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1718 	(void)bar_sz;
1719 }
1720 
1721 static void
1722 xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1723     enum ntb_bar idx, enum ntb_bar regbar)
1724 {
1725 	uint64_t reg_val;
1726 	uint32_t base_reg, lmt_reg;
1727 
1728 	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1729 	if (idx == regbar) {
1730 		if (ntb->b2b_off)
1731 			bar_addr += ntb->b2b_off;
1732 		else
1733 			bar_addr = 0;
1734 	}
1735 
1736 	if (!bar_is_64bit(ntb, idx)) {
1737 		intel_ntb_reg_write(4, base_reg, bar_addr);
1738 		reg_val = intel_ntb_reg_read(4, base_reg);
1739 		(void)reg_val;
1740 
1741 		intel_ntb_reg_write(4, lmt_reg, bar_addr);
1742 		reg_val = intel_ntb_reg_read(4, lmt_reg);
1743 		(void)reg_val;
1744 	} else {
1745 		intel_ntb_reg_write(8, base_reg, bar_addr);
1746 		reg_val = intel_ntb_reg_read(8, base_reg);
1747 		(void)reg_val;
1748 
1749 		intel_ntb_reg_write(8, lmt_reg, bar_addr);
1750 		reg_val = intel_ntb_reg_read(8, lmt_reg);
1751 		(void)reg_val;
1752 	}
1753 }
1754 
1755 static void
1756 xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1757 {
1758 	struct ntb_pci_bar_info *bar;
1759 
1760 	bar = &ntb->bar_info[idx];
1761 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1762 		intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1763 		base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
1764 	} else {
1765 		intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1766 		base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
1767 	}
1768 	(void)base_addr;
1769 }
1770 
1771 static int
1772 xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1773     const struct ntb_b2b_addr *peer_addr)
1774 {
1775 	struct ntb_pci_bar_info *b2b_bar;
1776 	vm_size_t bar_size;
1777 	uint64_t bar_addr;
1778 	enum ntb_bar b2b_bar_num, i;
1779 
1780 	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1781 		b2b_bar = NULL;
1782 		b2b_bar_num = NTB_CONFIG_BAR;
1783 		ntb->b2b_off = 0;
1784 	} else {
1785 		b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1786 		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1787 		    ("invalid b2b mw bar"));
1788 
1789 		b2b_bar = &ntb->bar_info[b2b_bar_num];
1790 		bar_size = b2b_bar->size;
1791 
1792 		if (ntb_b2b_mw_share != 0 &&
1793 		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1794 			ntb->b2b_off = bar_size >> 1;
1795 		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1796 			ntb->b2b_off = 0;
1797 		} else {
1798 			device_printf(ntb->device,
1799 			    "B2B bar size is too small!\n");
1800 			return (EIO);
1801 		}
1802 	}
1803 
1804 	/*
1805 	 * Reset the secondary bar sizes to match the primary bar sizes.
1806 	 * (Except, disable or halve the size of the B2B secondary bar.)
1807 	 */
1808 	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1809 		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1810 
1811 	bar_addr = 0;
1812 	if (b2b_bar_num == NTB_CONFIG_BAR)
1813 		bar_addr = addr->bar0_addr;
1814 	else if (b2b_bar_num == NTB_B2B_BAR_1)
1815 		bar_addr = addr->bar2_addr64;
1816 	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1817 		bar_addr = addr->bar4_addr64;
1818 	else if (b2b_bar_num == NTB_B2B_BAR_2)
1819 		bar_addr = addr->bar4_addr32;
1820 	else if (b2b_bar_num == NTB_B2B_BAR_3)
1821 		bar_addr = addr->bar5_addr32;
1822 	else
1823 		KASSERT(false, ("invalid bar"));
1824 
1825 	intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1826 
1827 	/*
1828 	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1829 	 * register BAR.  The B2B BAR is either disabled above or configured
1830 	 * half-size.  It starts at PBAR xlat + offset.
1831 	 *
1832 	 * Also set up incoming BAR limits == base (zero length window).
1833 	 */
1834 	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1835 	    b2b_bar_num);
1836 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1837 		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1838 		    NTB_B2B_BAR_2, b2b_bar_num);
1839 		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1840 		    NTB_B2B_BAR_3, b2b_bar_num);
1841 	} else
1842 		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1843 		    NTB_B2B_BAR_2, b2b_bar_num);
1844 
1845 	/* Zero incoming translation addrs */
1846 	intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1847 	intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1848 
1849 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1850 		uint32_t xlat_reg, lmt_reg;
1851 		enum ntb_bar bar_num;
1852 
1853 		/*
1854 		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
1855 		 * workaround
1856 		 */
1857 		bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
1858 		bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
1859 		if (bar_is_64bit(ntb, bar_num)) {
1860 			intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
1861 			ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
1862 			intel_ntb_reg_write(8, lmt_reg, 0);
1863 		} else {
1864 			intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
1865 			ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
1866 			intel_ntb_reg_write(4, lmt_reg, 0);
1867 		}
1868 
1869 		ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
1870 	}
1871 	(void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
1872 	(void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
1873 
1874 	/* Zero outgoing translation limits (whole bar size windows) */
1875 	intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1876 	intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1877 
1878 	/* Set outgoing translation offsets */
1879 	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1880 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1881 		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1882 		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1883 	} else
1884 		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1885 
1886 	/* Set the translation offset for B2B registers */
1887 	bar_addr = 0;
1888 	if (b2b_bar_num == NTB_CONFIG_BAR)
1889 		bar_addr = peer_addr->bar0_addr;
1890 	else if (b2b_bar_num == NTB_B2B_BAR_1)
1891 		bar_addr = peer_addr->bar2_addr64;
1892 	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1893 		bar_addr = peer_addr->bar4_addr64;
1894 	else if (b2b_bar_num == NTB_B2B_BAR_2)
1895 		bar_addr = peer_addr->bar4_addr32;
1896 	else if (b2b_bar_num == NTB_B2B_BAR_3)
1897 		bar_addr = peer_addr->bar5_addr32;
1898 	else
1899 		KASSERT(false, ("invalid bar"));
1900 
1901 	/*
1902 	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1903 	 * at a time.
1904 	 */
1905 	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1906 	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1907 	return (0);
1908 }
1909 
1910 static inline bool
1911 _xeon_link_is_up(struct ntb_softc *ntb)
1912 {
1913 
1914 	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1915 		return (true);
1916 	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1917 }
1918 
1919 static inline bool
1920 link_is_up(struct ntb_softc *ntb)
1921 {
1922 
1923 	if (ntb->type == NTB_XEON)
1924 		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
1925 		    !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
1926 
1927 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1928 	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1929 }
1930 
1931 static inline bool
1932 atom_link_is_err(struct ntb_softc *ntb)
1933 {
1934 	uint32_t status;
1935 
1936 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1937 
1938 	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1939 	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1940 		return (true);
1941 
1942 	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1943 	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1944 }
1945 
1946 /* Atom does not have link status interrupt, poll on that platform */
1947 static void
1948 atom_link_hb(void *arg)
1949 {
1950 	struct ntb_softc *ntb = arg;
1951 	sbintime_t timo, poll_ts;
1952 
1953 	timo = NTB_HB_TIMEOUT * hz;
1954 	poll_ts = ntb->last_ts + timo;
1955 
1956 	/*
1957 	 * Delay polling the link status if an interrupt was received, unless
1958 	 * the cached link status says the link is down.
1959 	 */
1960 	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1961 		timo = poll_ts - ticks;
1962 		goto out;
1963 	}
1964 
1965 	if (intel_ntb_poll_link(ntb))
1966 		ntb_link_event(ntb->device);
1967 
1968 	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1969 		/* Link is down with error, proceed with recovery */
1970 		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1971 		return;
1972 	}
1973 
1974 out:
1975 	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1976 }
1977 
1978 static void
1979 atom_perform_link_restart(struct ntb_softc *ntb)
1980 {
1981 	uint32_t status;
1982 
1983 	/* Driver resets the NTB ModPhy lanes - magic! */
1984 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
1985 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
1986 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
1987 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
1988 
1989 	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1990 	pause("ModPhy", hz / 10);
1991 
1992 	/* Clear AER Errors, write to clear */
1993 	status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
1994 	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1995 	intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
1996 
1997 	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1998 	status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
1999 	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
2000 	intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
2001 
2002 	/* Clear DeSkew Buffer error, write to clear */
2003 	status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
2004 	status |= ATOM_DESKEWSTS_DBERR;
2005 	intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
2006 
2007 	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
2008 	status &= ATOM_IBIST_ERR_OFLOW;
2009 	intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
2010 
2011 	/* Releases the NTB state machine to allow the link to retrain */
2012 	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
2013 	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
2014 	intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
2015 }
2016 
2017 static int
2018 intel_ntb_port_number(device_t dev)
2019 {
2020 	struct ntb_softc *ntb = device_get_softc(dev);
2021 
2022 	return (ntb->dev_type == NTB_DEV_USD ? 0 : 1);
2023 }
2024 
2025 static int
2026 intel_ntb_peer_port_count(device_t dev)
2027 {
2028 
2029 	return (1);
2030 }
2031 
2032 static int
2033 intel_ntb_peer_port_number(device_t dev, int pidx)
2034 {
2035 	struct ntb_softc *ntb = device_get_softc(dev);
2036 
2037 	if (pidx != 0)
2038 		return (-EINVAL);
2039 
2040 	return (ntb->dev_type == NTB_DEV_USD ? 1 : 0);
2041 }
2042 
2043 static int
2044 intel_ntb_peer_port_idx(device_t dev, int port)
2045 {
2046 	int peer_port;
2047 
2048 	peer_port = intel_ntb_peer_port_number(dev, 0);
2049 	if (peer_port == -EINVAL || port != peer_port)
2050 		return (-EINVAL);
2051 
2052 	return (0);
2053 }
2054 
2055 static int
2056 intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
2057     enum ntb_width width __unused)
2058 {
2059 	struct ntb_softc *ntb = device_get_softc(dev);
2060 	uint32_t cntl;
2061 
2062 	intel_ntb_printf(2, "%s\n", __func__);
2063 
2064 	if (ntb->type == NTB_ATOM) {
2065 		pci_write_config(ntb->device, NTB_PPD_OFFSET,
2066 		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
2067 		return (0);
2068 	}
2069 
2070 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2071 		ntb_link_event(dev);
2072 		return (0);
2073 	}
2074 
2075 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2076 	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
2077 	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
2078 	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
2079 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2080 		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
2081 	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2082 	return (0);
2083 }
2084 
2085 static int
2086 intel_ntb_link_disable(device_t dev)
2087 {
2088 	struct ntb_softc *ntb = device_get_softc(dev);
2089 	uint32_t cntl;
2090 
2091 	intel_ntb_printf(2, "%s\n", __func__);
2092 
2093 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2094 		ntb_link_event(dev);
2095 		return (0);
2096 	}
2097 
2098 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2099 	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
2100 	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
2101 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2102 		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
2103 	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
2104 	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2105 	return (0);
2106 }
2107 
2108 static bool
2109 intel_ntb_link_enabled(device_t dev)
2110 {
2111 	struct ntb_softc *ntb = device_get_softc(dev);
2112 	uint32_t cntl;
2113 
2114 	if (ntb->type == NTB_ATOM) {
2115 		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
2116 		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
2117 	}
2118 
2119 	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2120 		return (true);
2121 
2122 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2123 	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
2124 }
2125 
2126 static void
2127 recover_atom_link(void *arg)
2128 {
2129 	struct ntb_softc *ntb = arg;
2130 	unsigned speed, width, oldspeed, oldwidth;
2131 	uint32_t status32;
2132 
2133 	atom_perform_link_restart(ntb);
2134 
2135 	/*
2136 	 * There is a potential race between the 2 NTB devices recovering at
2137 	 * the same time.  If the times are the same, the link will not recover
2138 	 * and the driver will be stuck in this loop forever.  Add a random
2139 	 * interval to the recovery time to prevent this race.
2140 	 */
2141 	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
2142 	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
2143 
2144 	if (atom_link_is_err(ntb))
2145 		goto retry;
2146 
2147 	status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2148 	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
2149 		goto out;
2150 
2151 	status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2152 	width = NTB_LNK_STA_WIDTH(status32);
2153 	speed = status32 & NTB_LINK_SPEED_MASK;
2154 
2155 	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
2156 	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
2157 	if (oldwidth != width || oldspeed != speed)
2158 		goto retry;
2159 
2160 out:
2161 	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
2162 	    ntb);
2163 	return;
2164 
2165 retry:
2166 	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
2167 	    ntb);
2168 }
2169 
2170 /*
2171  * Polls the HW link status register(s); returns true if something has changed.
2172  */
2173 static bool
2174 intel_ntb_poll_link(struct ntb_softc *ntb)
2175 {
2176 	uint32_t ntb_cntl;
2177 	uint16_t reg_val;
2178 
2179 	if (ntb->type == NTB_ATOM) {
2180 		ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2181 		if (ntb_cntl == ntb->ntb_ctl)
2182 			return (false);
2183 
2184 		ntb->ntb_ctl = ntb_cntl;
2185 		ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2186 	} else {
2187 		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
2188 
2189 		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2190 		if (reg_val == ntb->lnk_sta)
2191 			return (false);
2192 
2193 		ntb->lnk_sta = reg_val;
2194 
2195 		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2196 			if (_xeon_link_is_up(ntb)) {
2197 				if (!ntb->peer_msix_good) {
2198 					callout_reset(&ntb->peer_msix_work, 0,
2199 					    intel_ntb_exchange_msix, ntb);
2200 					return (false);
2201 				}
2202 			} else {
2203 				ntb->peer_msix_good = false;
2204 				ntb->peer_msix_done = false;
2205 			}
2206 		}
2207 	}
2208 	return (true);
2209 }
2210 
2211 static inline enum ntb_speed
2212 intel_ntb_link_sta_speed(struct ntb_softc *ntb)
2213 {
2214 
2215 	if (!link_is_up(ntb))
2216 		return (NTB_SPEED_NONE);
2217 	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
2218 }
2219 
2220 static inline enum ntb_width
2221 intel_ntb_link_sta_width(struct ntb_softc *ntb)
2222 {
2223 
2224 	if (!link_is_up(ntb))
2225 		return (NTB_WIDTH_NONE);
2226 	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
2227 }
2228 
2229 SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
2230     "Driver state, statistics, and HW registers");
2231 
2232 #define NTB_REGSZ_MASK	(3ul << 30)
2233 #define NTB_REG_64	(1ul << 30)
2234 #define NTB_REG_32	(2ul << 30)
2235 #define NTB_REG_16	(3ul << 30)
2236 #define NTB_REG_8	(0ul << 30)
2237 
2238 #define NTB_DB_READ	(1ul << 29)
2239 #define NTB_PCI_REG	(1ul << 28)
2240 #define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
2241 
2242 static void
2243 intel_ntb_sysctl_init(struct ntb_softc *ntb)
2244 {
2245 	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
2246 	struct sysctl_ctx_list *ctx;
2247 	struct sysctl_oid *tree, *tmptree;
2248 
2249 	ctx = device_get_sysctl_ctx(ntb->device);
2250 	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
2251 
2252 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
2253 	    CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, ntb, 0,
2254 	    sysctl_handle_link_status_human, "A",
2255 	    "Link status (human readable)");
2256 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
2257 	    CTLFLAG_RD | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, ntb, 0,
2258 	    sysctl_handle_link_status, "IU",
2259 	    "Link status (1=active, 0=inactive)");
2260 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
2261 	    CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, ntb, 0,
2262 	    sysctl_handle_link_admin, "IU",
2263 	    "Set/get interface status (1=UP, 0=DOWN)");
2264 
2265 	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
2266 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
2267 	    "Driver state, statistics, and HW registers");
2268 	tree_par = SYSCTL_CHILDREN(tree);
2269 
2270 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
2271 	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
2272 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
2273 	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
2274 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
2275 	    &ntb->ppd, 0, "Raw PPD register (cached)");
2276 
2277 	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
2278 		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
2279 		    &ntb->b2b_mw_idx, 0,
2280 		    "Index of the MW used for B2B remote register access");
2281 		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
2282 		    CTLFLAG_RD, &ntb->b2b_off,
2283 		    "If non-zero, offset of B2B register region in shared MW");
2284 	}
2285 
2286 	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
2287 	    CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, ntb, 0,
2288 	    sysctl_handle_features, "A", "Features/errata of this NTB device");
2289 
2290 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
2291 	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
2292 	    "NTB CTL register (cached)");
2293 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
2294 	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
2295 	    "LNK STA register (cached)");
2296 
2297 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
2298 	    &ntb->mw_count, 0, "MW count");
2299 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
2300 	    &ntb->spad_count, 0, "Scratchpad count");
2301 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
2302 	    &ntb->db_count, 0, "Doorbell count");
2303 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
2304 	    &ntb->db_vec_count, 0, "Doorbell vector count");
2305 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
2306 	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
2307 
2308 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2309 	    &ntb->db_valid_mask, "Doorbell valid mask");
2310 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2311 	    &ntb->db_link_mask, "Doorbell link mask");
2312 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2313 	    &ntb->db_mask, "Doorbell mask (cached)");
2314 
2315 	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2316 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
2317 	    "Raw HW registers (big-endian)");
2318 	regpar = SYSCTL_CHILDREN(tmptree);
2319 
2320 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2321 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2322 	    NTB_REG_32 | ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2323 	    "NTB Control register");
2324 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2325 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2326 	    NTB_REG_32 | 0x19c, sysctl_handle_register, "IU",
2327 	    "NTB Link Capabilities");
2328 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2329 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2330 	    NTB_REG_32 | 0x1a0, sysctl_handle_register, "IU",
2331 	    "NTB Link Control register");
2332 
2333 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
2334 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2335 	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
2336 	    sysctl_handle_register, "QU", "Doorbell mask register");
2337 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
2338 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2339 	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
2340 	    sysctl_handle_register, "QU", "Doorbell register");
2341 
2342 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
2343 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2344 	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2345 	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2346 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2347 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2348 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2349 		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2350 		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2351 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2352 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2353 		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2354 		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2355 	} else {
2356 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2357 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2358 		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2359 		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2360 	}
2361 
2362 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2363 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2364 	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2365 	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2366 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2367 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2368 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2369 		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2370 		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2371 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2372 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2373 		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2374 		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2375 	} else {
2376 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2377 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2378 		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2379 		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2380 	}
2381 
2382 	if (ntb->type == NTB_ATOM)
2383 		return;
2384 
2385 	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2386 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xeon HW statistics");
2387 	statpar = SYSCTL_CHILDREN(tmptree);
2388 	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2389 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2390 	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2391 	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2392 
2393 	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2394 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xeon HW errors");
2395 	errpar = SYSCTL_CHILDREN(tmptree);
2396 
2397 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
2398 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2399 	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
2400 	    sysctl_handle_register, "CU", "PPD");
2401 
2402 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
2403 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2404 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
2405 	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
2406 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
2407 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2408 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
2409 	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
2410 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
2411 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2412 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
2413 	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
2414 
2415 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
2416 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2417 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
2418 	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
2419 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
2420 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2421 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
2422 	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
2423 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
2424 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2425 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
2426 	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
2427 
2428 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
2429 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2430 	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2431 	    sysctl_handle_register, "SU", "DEVSTS");
2432 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
2433 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2434 	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2435 	    sysctl_handle_register, "SU", "LNKSTS");
2436 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
2437 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2438 	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
2439 	    sysctl_handle_register, "SU", "SLNKSTS");
2440 
2441 	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2442 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2443 	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2444 	    sysctl_handle_register, "IU", "UNCERRSTS");
2445 	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2446 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2447 	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2448 	    sysctl_handle_register, "IU", "CORERRSTS");
2449 
2450 	if (ntb->conn_type != NTB_CONN_B2B)
2451 		return;
2452 
2453 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01l",
2454 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2455 	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETL,
2456 	    sysctl_handle_register, "IU", "Outgoing XLAT0L register");
2457 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01u",
2458 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2459 	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETU,
2460 	    sysctl_handle_register, "IU", "Outgoing XLAT0U register");
2461 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2462 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2463 	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2464 	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2465 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2466 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2467 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2468 		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2469 		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2470 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2471 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2472 		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2473 		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2474 	} else {
2475 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2476 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2477 		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2478 		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2479 	}
2480 
2481 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2482 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2483 	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2484 	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2485 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2486 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2487 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2488 		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2489 		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2490 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2491 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2492 		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2493 		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2494 	} else {
2495 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2496 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2497 		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2498 		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2499 	}
2500 
2501 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2502 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2503 	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2504 	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2505 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2506 	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2507 	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2508 	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2509 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2510 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2511 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2512 		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2513 		    sysctl_handle_register, "IU",
2514 		    "Secondary BAR4 base register");
2515 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2516 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2517 		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2518 		    sysctl_handle_register, "IU",
2519 		    "Secondary BAR5 base register");
2520 	} else {
2521 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2522 		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_NEEDGIANT, ntb,
2523 		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2524 		    sysctl_handle_register, "QU",
2525 		    "Secondary BAR45 base register");
2526 	}
2527 }
2528 
2529 static int
2530 sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2531 {
2532 	struct ntb_softc *ntb = arg1;
2533 	struct sbuf sb;
2534 	int error;
2535 
2536 	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2537 
2538 	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2539 	error = sbuf_finish(&sb);
2540 	sbuf_delete(&sb);
2541 
2542 	if (error || !req->newptr)
2543 		return (error);
2544 	return (EINVAL);
2545 }
2546 
2547 static int
2548 sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
2549 {
2550 	struct ntb_softc *ntb = arg1;
2551 	unsigned old, new;
2552 	int error;
2553 
2554 	old = intel_ntb_link_enabled(ntb->device);
2555 
2556 	error = SYSCTL_OUT(req, &old, sizeof(old));
2557 	if (error != 0 || req->newptr == NULL)
2558 		return (error);
2559 
2560 	error = SYSCTL_IN(req, &new, sizeof(new));
2561 	if (error != 0)
2562 		return (error);
2563 
2564 	intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
2565 	    (new != 0)? "en" : "dis");
2566 
2567 	if (new != 0)
2568 		error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
2569 	else
2570 		error = intel_ntb_link_disable(ntb->device);
2571 	return (error);
2572 }
2573 
2574 static int
2575 sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
2576 {
2577 	struct ntb_softc *ntb = arg1;
2578 	struct sbuf sb;
2579 	enum ntb_speed speed;
2580 	enum ntb_width width;
2581 	int error;
2582 
2583 	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2584 
2585 	if (intel_ntb_link_is_up(ntb->device, &speed, &width))
2586 		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2587 		    (unsigned)speed, (unsigned)width);
2588 	else
2589 		sbuf_printf(&sb, "down");
2590 
2591 	error = sbuf_finish(&sb);
2592 	sbuf_delete(&sb);
2593 
2594 	if (error || !req->newptr)
2595 		return (error);
2596 	return (EINVAL);
2597 }
2598 
2599 static int
2600 sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2601 {
2602 	struct ntb_softc *ntb = arg1;
2603 	unsigned res;
2604 	int error;
2605 
2606 	res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
2607 
2608 	error = SYSCTL_OUT(req, &res, sizeof(res));
2609 	if (error || !req->newptr)
2610 		return (error);
2611 	return (EINVAL);
2612 }
2613 
2614 static int
2615 sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2616 {
2617 	struct ntb_softc *ntb;
2618 	const void *outp;
2619 	uintptr_t sz;
2620 	uint64_t umv;
2621 	char be[sizeof(umv)];
2622 	size_t outsz;
2623 	uint32_t reg;
2624 	bool db, pci;
2625 	int error;
2626 
2627 	ntb = arg1;
2628 	reg = arg2 & ~NTB_REGFLAGS_MASK;
2629 	sz = arg2 & NTB_REGSZ_MASK;
2630 	db = (arg2 & NTB_DB_READ) != 0;
2631 	pci = (arg2 & NTB_PCI_REG) != 0;
2632 
2633 	KASSERT(!(db && pci), ("bogus"));
2634 
2635 	if (db) {
2636 		KASSERT(sz == NTB_REG_64, ("bogus"));
2637 		umv = db_ioread(ntb, reg);
2638 		outsz = sizeof(uint64_t);
2639 	} else {
2640 		switch (sz) {
2641 		case NTB_REG_64:
2642 			if (pci)
2643 				umv = pci_read_config(ntb->device, reg, 8);
2644 			else
2645 				umv = intel_ntb_reg_read(8, reg);
2646 			outsz = sizeof(uint64_t);
2647 			break;
2648 		case NTB_REG_32:
2649 			if (pci)
2650 				umv = pci_read_config(ntb->device, reg, 4);
2651 			else
2652 				umv = intel_ntb_reg_read(4, reg);
2653 			outsz = sizeof(uint32_t);
2654 			break;
2655 		case NTB_REG_16:
2656 			if (pci)
2657 				umv = pci_read_config(ntb->device, reg, 2);
2658 			else
2659 				umv = intel_ntb_reg_read(2, reg);
2660 			outsz = sizeof(uint16_t);
2661 			break;
2662 		case NTB_REG_8:
2663 			if (pci)
2664 				umv = pci_read_config(ntb->device, reg, 1);
2665 			else
2666 				umv = intel_ntb_reg_read(1, reg);
2667 			outsz = sizeof(uint8_t);
2668 			break;
2669 		default:
2670 			panic("bogus");
2671 			break;
2672 		}
2673 	}
2674 
2675 	/* Encode bigendian so that sysctl -x is legible. */
2676 	be64enc(be, umv);
2677 	outp = ((char *)be) + sizeof(umv) - outsz;
2678 
2679 	error = SYSCTL_OUT(req, outp, outsz);
2680 	if (error || !req->newptr)
2681 		return (error);
2682 	return (EINVAL);
2683 }
2684 
2685 static unsigned
2686 intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
2687 {
2688 
2689 	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2690 	    uidx >= ntb->b2b_mw_idx) ||
2691 	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2692 		uidx++;
2693 	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2694 	    uidx >= ntb->b2b_mw_idx) &&
2695 	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2696 		uidx++;
2697 	return (uidx);
2698 }
2699 
2700 #ifndef EARLY_AP_STARTUP
2701 static int msix_ready;
2702 
2703 static void
2704 intel_ntb_msix_ready(void *arg __unused)
2705 {
2706 
2707 	msix_ready = 1;
2708 }
2709 SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
2710     intel_ntb_msix_ready, NULL);
2711 #endif
2712 
2713 static void
2714 intel_ntb_exchange_msix(void *ctx)
2715 {
2716 	struct ntb_softc *ntb;
2717 	uint32_t val;
2718 	unsigned i;
2719 
2720 	ntb = ctx;
2721 
2722 	if (ntb->peer_msix_good)
2723 		goto msix_good;
2724 	if (ntb->peer_msix_done)
2725 		goto msix_done;
2726 
2727 #ifndef EARLY_AP_STARTUP
2728 	/* Block MSIX negotiation until SMP started and IRQ reshuffled. */
2729 	if (!msix_ready)
2730 		goto reschedule;
2731 #endif
2732 
2733 	intel_ntb_get_msix_info(ntb);
2734 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2735 		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
2736 		    ntb->msix_data[i].nmd_data);
2737 		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
2738 		    ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
2739 	}
2740 	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
2741 
2742 	intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
2743 	if (val != NTB_MSIX_VER_GUARD)
2744 		goto reschedule;
2745 
2746 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2747 		intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
2748 		intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
2749 		ntb->peer_msix_data[i].nmd_data = val;
2750 		intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
2751 		intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
2752 		ntb->peer_msix_data[i].nmd_ofs = val;
2753 	}
2754 
2755 	ntb->peer_msix_done = true;
2756 
2757 msix_done:
2758 	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
2759 	intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
2760 	if (val != NTB_MSIX_RECEIVED)
2761 		goto reschedule;
2762 
2763 	intel_ntb_spad_clear(ntb->device);
2764 	ntb->peer_msix_good = true;
2765 	/* Give peer time to see our NTB_MSIX_RECEIVED. */
2766 	goto reschedule;
2767 
2768 msix_good:
2769 	intel_ntb_poll_link(ntb);
2770 	ntb_link_event(ntb->device);
2771 	return;
2772 
2773 reschedule:
2774 	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2775 	if (_xeon_link_is_up(ntb)) {
2776 		callout_reset(&ntb->peer_msix_work,
2777 		    hz * (ntb->peer_msix_good ? 2 : 1) / 10,
2778 		    intel_ntb_exchange_msix, ntb);
2779 	} else
2780 		intel_ntb_spad_clear(ntb->device);
2781 }
2782 
2783 /*
2784  * Public API to the rest of the OS
2785  */
2786 
2787 static uint8_t
2788 intel_ntb_spad_count(device_t dev)
2789 {
2790 	struct ntb_softc *ntb = device_get_softc(dev);
2791 
2792 	return (ntb->spad_count);
2793 }
2794 
2795 static uint8_t
2796 intel_ntb_mw_count(device_t dev)
2797 {
2798 	struct ntb_softc *ntb = device_get_softc(dev);
2799 	uint8_t res;
2800 
2801 	res = ntb->mw_count;
2802 	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
2803 		res--;
2804 	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
2805 		res--;
2806 	return (res);
2807 }
2808 
2809 static int
2810 intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
2811 {
2812 	struct ntb_softc *ntb = device_get_softc(dev);
2813 
2814 	if (idx >= ntb->spad_count)
2815 		return (EINVAL);
2816 
2817 	intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2818 
2819 	return (0);
2820 }
2821 
2822 /*
2823  * Zeros the local scratchpad.
2824  */
2825 static void
2826 intel_ntb_spad_clear(device_t dev)
2827 {
2828 	struct ntb_softc *ntb = device_get_softc(dev);
2829 	unsigned i;
2830 
2831 	for (i = 0; i < ntb->spad_count; i++)
2832 		intel_ntb_spad_write(dev, i, 0);
2833 }
2834 
2835 static int
2836 intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
2837 {
2838 	struct ntb_softc *ntb = device_get_softc(dev);
2839 
2840 	if (idx >= ntb->spad_count)
2841 		return (EINVAL);
2842 
2843 	*val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2844 
2845 	return (0);
2846 }
2847 
2848 static int
2849 intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
2850 {
2851 	struct ntb_softc *ntb = device_get_softc(dev);
2852 
2853 	if (idx >= ntb->spad_count)
2854 		return (EINVAL);
2855 
2856 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
2857 		intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
2858 	else
2859 		intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2860 
2861 	return (0);
2862 }
2863 
2864 static int
2865 intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
2866 {
2867 	struct ntb_softc *ntb = device_get_softc(dev);
2868 
2869 	if (idx >= ntb->spad_count)
2870 		return (EINVAL);
2871 
2872 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
2873 		*val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
2874 	else
2875 		*val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
2876 
2877 	return (0);
2878 }
2879 
2880 static int
2881 intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
2882     caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
2883     bus_addr_t *plimit)
2884 {
2885 	struct ntb_softc *ntb = device_get_softc(dev);
2886 	struct ntb_pci_bar_info *bar;
2887 	bus_addr_t limit;
2888 	size_t bar_b2b_off;
2889 	enum ntb_bar bar_num;
2890 
2891 	if (mw_idx >= intel_ntb_mw_count(dev))
2892 		return (EINVAL);
2893 	mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
2894 
2895 	bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
2896 	bar = &ntb->bar_info[bar_num];
2897 	bar_b2b_off = 0;
2898 	if (mw_idx == ntb->b2b_mw_idx) {
2899 		KASSERT(ntb->b2b_off != 0,
2900 		    ("user shouldn't get non-shared b2b mw"));
2901 		bar_b2b_off = ntb->b2b_off;
2902 	}
2903 
2904 	if (bar_is_64bit(ntb, bar_num))
2905 		limit = BUS_SPACE_MAXADDR;
2906 	else
2907 		limit = BUS_SPACE_MAXADDR_32BIT;
2908 
2909 	if (base != NULL)
2910 		*base = bar->pbase + bar_b2b_off;
2911 	if (vbase != NULL)
2912 		*vbase = bar->vbase + bar_b2b_off;
2913 	if (size != NULL)
2914 		*size = bar->size - bar_b2b_off;
2915 	if (align != NULL)
2916 		*align = bar->size;
2917 	if (align_size != NULL)
2918 		*align_size = 1;
2919 	if (plimit != NULL)
2920 		*plimit = limit;
2921 	return (0);
2922 }
2923 
2924 static int
2925 intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
2926 {
2927 	struct ntb_softc *ntb = device_get_softc(dev);
2928 	struct ntb_pci_bar_info *bar;
2929 	uint64_t base, limit, reg_val;
2930 	size_t bar_size, mw_size;
2931 	uint32_t base_reg, xlat_reg, limit_reg;
2932 	enum ntb_bar bar_num;
2933 
2934 	if (idx >= intel_ntb_mw_count(dev))
2935 		return (EINVAL);
2936 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
2937 
2938 	bar_num = intel_ntb_mw_to_bar(ntb, idx);
2939 	bar = &ntb->bar_info[bar_num];
2940 
2941 	bar_size = bar->size;
2942 	if (idx == ntb->b2b_mw_idx)
2943 		mw_size = bar_size - ntb->b2b_off;
2944 	else
2945 		mw_size = bar_size;
2946 
2947 	/* Hardware requires that addr is aligned to bar size */
2948 	if ((addr & (bar_size - 1)) != 0)
2949 		return (EINVAL);
2950 
2951 	if (size > mw_size)
2952 		return (EINVAL);
2953 
2954 	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
2955 
2956 	limit = 0;
2957 	if (bar_is_64bit(ntb, bar_num)) {
2958 		base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
2959 
2960 		if (limit_reg != 0 && size != mw_size)
2961 			limit = base + size;
2962 
2963 		/* Set and verify translation address */
2964 		intel_ntb_reg_write(8, xlat_reg, addr);
2965 		reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
2966 		if (reg_val != addr) {
2967 			intel_ntb_reg_write(8, xlat_reg, 0);
2968 			return (EIO);
2969 		}
2970 
2971 		/* Set and verify the limit */
2972 		intel_ntb_reg_write(8, limit_reg, limit);
2973 		reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
2974 		if (reg_val != limit) {
2975 			intel_ntb_reg_write(8, limit_reg, base);
2976 			intel_ntb_reg_write(8, xlat_reg, 0);
2977 			return (EIO);
2978 		}
2979 	} else {
2980 		/* Configure 32-bit (split) BAR MW */
2981 
2982 		if ((addr & UINT32_MAX) != addr)
2983 			return (ERANGE);
2984 		if (((addr + size) & UINT32_MAX) != (addr + size))
2985 			return (ERANGE);
2986 
2987 		base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
2988 
2989 		if (limit_reg != 0 && size != mw_size)
2990 			limit = base + size;
2991 
2992 		/* Set and verify translation address */
2993 		intel_ntb_reg_write(4, xlat_reg, addr);
2994 		reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
2995 		if (reg_val != addr) {
2996 			intel_ntb_reg_write(4, xlat_reg, 0);
2997 			return (EIO);
2998 		}
2999 
3000 		/* Set and verify the limit */
3001 		intel_ntb_reg_write(4, limit_reg, limit);
3002 		reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
3003 		if (reg_val != limit) {
3004 			intel_ntb_reg_write(4, limit_reg, base);
3005 			intel_ntb_reg_write(4, xlat_reg, 0);
3006 			return (EIO);
3007 		}
3008 	}
3009 	return (0);
3010 }
3011 
3012 static int
3013 intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
3014 {
3015 
3016 	return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
3017 }
3018 
3019 static int
3020 intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
3021 {
3022 	struct ntb_softc *ntb = device_get_softc(dev);
3023 	struct ntb_pci_bar_info *bar;
3024 
3025 	if (idx >= intel_ntb_mw_count(dev))
3026 		return (EINVAL);
3027 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3028 
3029 	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
3030 	*mode = bar->map_mode;
3031 	return (0);
3032 }
3033 
3034 static int
3035 intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
3036 {
3037 	struct ntb_softc *ntb = device_get_softc(dev);
3038 
3039 	if (idx >= intel_ntb_mw_count(dev))
3040 		return (EINVAL);
3041 
3042 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3043 	return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
3044 }
3045 
3046 static int
3047 intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
3048 {
3049 	struct ntb_pci_bar_info *bar;
3050 	int rc;
3051 
3052 	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
3053 	if (bar->map_mode == mode)
3054 		return (0);
3055 
3056 	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
3057 	if (rc == 0)
3058 		bar->map_mode = mode;
3059 
3060 	return (rc);
3061 }
3062 
3063 static void
3064 intel_ntb_peer_db_set(device_t dev, uint64_t bit)
3065 {
3066 	struct ntb_softc *ntb = device_get_softc(dev);
3067 
3068 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
3069 		struct ntb_pci_bar_info *lapic;
3070 		unsigned i;
3071 
3072 		lapic = ntb->peer_lapic_bar;
3073 
3074 		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3075 			if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0)
3076 				bus_space_write_4(lapic->pci_bus_tag,
3077 				    lapic->pci_bus_handle,
3078 				    ntb->peer_msix_data[i].nmd_ofs,
3079 				    ntb->peer_msix_data[i].nmd_data);
3080 		}
3081 		return;
3082 	}
3083 
3084 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3085 		intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
3086 		return;
3087 	}
3088 
3089 	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
3090 }
3091 
3092 static int
3093 intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
3094 {
3095 	struct ntb_softc *ntb = device_get_softc(dev);
3096 	struct ntb_pci_bar_info *bar;
3097 	uint64_t regoff;
3098 
3099 	KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
3100 
3101 	if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3102 		bar = &ntb->bar_info[NTB_CONFIG_BAR];
3103 		regoff = ntb->peer_reg->db_bell;
3104 	} else {
3105 		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
3106 		    ("invalid b2b idx"));
3107 
3108 		bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
3109 		regoff = XEON_PDOORBELL_OFFSET;
3110 	}
3111 	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
3112 
3113 	/* HACK: Specific to current x86 bus implementation. */
3114 	*db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
3115 	*db_size = ntb->reg->db_size;
3116 	return (0);
3117 }
3118 
3119 static uint64_t
3120 intel_ntb_db_valid_mask(device_t dev)
3121 {
3122 	struct ntb_softc *ntb = device_get_softc(dev);
3123 
3124 	return (ntb->db_valid_mask);
3125 }
3126 
3127 static int
3128 intel_ntb_db_vector_count(device_t dev)
3129 {
3130 	struct ntb_softc *ntb = device_get_softc(dev);
3131 
3132 	return (ntb->db_vec_count);
3133 }
3134 
3135 static uint64_t
3136 intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
3137 {
3138 	struct ntb_softc *ntb = device_get_softc(dev);
3139 
3140 	if (vector > ntb->db_vec_count)
3141 		return (0);
3142 	return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
3143 }
3144 
3145 static bool
3146 intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
3147 {
3148 	struct ntb_softc *ntb = device_get_softc(dev);
3149 
3150 	if (speed != NULL)
3151 		*speed = intel_ntb_link_sta_speed(ntb);
3152 	if (width != NULL)
3153 		*width = intel_ntb_link_sta_width(ntb);
3154 	return (link_is_up(ntb));
3155 }
3156 
3157 static void
3158 save_bar_parameters(struct ntb_pci_bar_info *bar)
3159 {
3160 
3161 	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
3162 	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
3163 	bar->pbase = rman_get_start(bar->pci_resource);
3164 	bar->size = rman_get_size(bar->pci_resource);
3165 	bar->vbase = rman_get_virtual(bar->pci_resource);
3166 }
3167 
3168 static device_method_t ntb_intel_methods[] = {
3169 	/* Device interface */
3170 	DEVMETHOD(device_probe,		intel_ntb_probe),
3171 	DEVMETHOD(device_attach,	intel_ntb_attach),
3172 	DEVMETHOD(device_detach,	intel_ntb_detach),
3173 	/* Bus interface */
3174 	DEVMETHOD(bus_child_location_str, ntb_child_location_str),
3175 	DEVMETHOD(bus_print_child,	ntb_print_child),
3176 	DEVMETHOD(bus_get_dma_tag,	ntb_get_dma_tag),
3177 	/* NTB interface */
3178 	DEVMETHOD(ntb_port_number,	intel_ntb_port_number),
3179 	DEVMETHOD(ntb_peer_port_count,	intel_ntb_peer_port_count),
3180 	DEVMETHOD(ntb_peer_port_number,	intel_ntb_peer_port_number),
3181 	DEVMETHOD(ntb_peer_port_idx, 	intel_ntb_peer_port_idx),
3182 	DEVMETHOD(ntb_link_is_up,	intel_ntb_link_is_up),
3183 	DEVMETHOD(ntb_link_enable,	intel_ntb_link_enable),
3184 	DEVMETHOD(ntb_link_disable,	intel_ntb_link_disable),
3185 	DEVMETHOD(ntb_link_enabled,	intel_ntb_link_enabled),
3186 	DEVMETHOD(ntb_mw_count,		intel_ntb_mw_count),
3187 	DEVMETHOD(ntb_mw_get_range,	intel_ntb_mw_get_range),
3188 	DEVMETHOD(ntb_mw_set_trans,	intel_ntb_mw_set_trans),
3189 	DEVMETHOD(ntb_mw_clear_trans,	intel_ntb_mw_clear_trans),
3190 	DEVMETHOD(ntb_mw_get_wc,	intel_ntb_mw_get_wc),
3191 	DEVMETHOD(ntb_mw_set_wc,	intel_ntb_mw_set_wc),
3192 	DEVMETHOD(ntb_spad_count,	intel_ntb_spad_count),
3193 	DEVMETHOD(ntb_spad_clear,	intel_ntb_spad_clear),
3194 	DEVMETHOD(ntb_spad_write,	intel_ntb_spad_write),
3195 	DEVMETHOD(ntb_spad_read,	intel_ntb_spad_read),
3196 	DEVMETHOD(ntb_peer_spad_write,	intel_ntb_peer_spad_write),
3197 	DEVMETHOD(ntb_peer_spad_read,	intel_ntb_peer_spad_read),
3198 	DEVMETHOD(ntb_db_valid_mask,	intel_ntb_db_valid_mask),
3199 	DEVMETHOD(ntb_db_vector_count,	intel_ntb_db_vector_count),
3200 	DEVMETHOD(ntb_db_vector_mask,	intel_ntb_db_vector_mask),
3201 	DEVMETHOD(ntb_db_clear,		intel_ntb_db_clear),
3202 	DEVMETHOD(ntb_db_clear_mask,	intel_ntb_db_clear_mask),
3203 	DEVMETHOD(ntb_db_read,		intel_ntb_db_read),
3204 	DEVMETHOD(ntb_db_set_mask,	intel_ntb_db_set_mask),
3205 	DEVMETHOD(ntb_peer_db_addr,	intel_ntb_peer_db_addr),
3206 	DEVMETHOD(ntb_peer_db_set,	intel_ntb_peer_db_set),
3207 	DEVMETHOD_END
3208 };
3209 
3210 static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
3211     sizeof(struct ntb_softc));
3212 DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
3213 MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1);
3214 MODULE_VERSION(ntb_hw_intel, 1);
3215 MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ntb_hw_intel, pci_ids,
3216     nitems(pci_ids));
3217