xref: /illumos-gate/usr/src/uts/common/io/vioif/vioif.h (revision 1bff1300cebf1ea8e11ce928b10e208097e67f24)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019 Joyent, Inc.
14  */
15 
16 /*
17  * VIRTIO NETWORK DRIVER
18  */
19 
20 #ifndef _VIOIF_H
21 #define	_VIOIF_H
22 
23 #include "virtio.h"
24 
25 #ifdef __cplusplus
26 extern "C" {
27 #endif
28 
29 /*
30  * VIRTIO NETWORK CONFIGURATION REGISTERS
31  *
32  * These are offsets into the device-specific configuration space available
33  * through the virtio_dev_*() family of functions.
34  */
35 #define	VIRTIO_NET_CONFIG_MAC		0x00	/* 48 R/W */
36 #define	VIRTIO_NET_CONFIG_STATUS	0x06	/* 16 R   */
37 #define	VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	0x08	/* 16 R   */
38 #define	VIRTIO_NET_CONFIG_MTU		0x0A	/* 16 R   */
39 
40 /*
41  * VIRTIO NETWORK VIRTQUEUES
42  *
43  * Note that the control queue is only present if VIRTIO_NET_F_CTRL_VQ is
44  * negotiated with the device.
45  */
46 #define	VIRTIO_NET_VIRTQ_RX		0
47 #define	VIRTIO_NET_VIRTQ_TX		1
48 #define	VIRTIO_NET_VIRTQ_CONTROL	2
49 
50 /*
51  * VIRTIO NETWORK FEATURE BITS
52  */
53 
54 /*
55  * CSUM, GUEST_CSUM:
56  *	Partial checksum support.  These features signal that the device will
57  *	accept packets with partial checksums (CSUM), and that the driver will
58  *	accept packets with partial checksums (GUEST_CSUM).  These features
59  *	combine the use of the VIRTIO_NET_HDR_F_NEEDS_CSUM flag, and the
60  *	"csum_start" and "csum_offset" fields, in the virtio net header.
61  */
62 #define	VIRTIO_NET_F_CSUM		(1ULL << 0)
63 #define	VIRTIO_NET_F_GUEST_CSUM		(1ULL << 1)
64 
65 /*
66  * MTU:
67  *	The device offers a maximum MTU value at VIRTIO_NET_CONFIG_MTU.  If
68  *	this is not negotiated, we allow the largest possible MTU that our
69  *	buffer allocations support in case jumbo frames are tacitly supported
70  *	by the device.  The default MTU is always 1500.
71  */
72 #define	VIRTIO_NET_F_MTU		(1ULL << 3)
73 
74 /*
75  * MAC:
76  *	The device has an assigned primary MAC address.  If this feature bit is
77  *	not set, the driver must provide a locally assigned MAC address.  See
78  *	IEEE 802, "48-bit universal LAN MAC addresses" for more details on
79  *	assignment.
80  */
81 #define	VIRTIO_NET_F_MAC		(1ULL << 5)
82 
83 /*
84  * GUEST_TSO4, GUEST_TSO6, GUEST_UFO:
85  *	Inbound segmentation offload support.  These features depend on having
86  *	VIRTIO_NET_F_GUEST_CSUM and signal that the driver can accept large
87  *	combined TCP (v4 or v6) packets, or reassembled UDP fragments.
88  */
89 #define	VIRTIO_NET_F_GUEST_TSO4		(1ULL << 7)
90 #define	VIRTIO_NET_F_GUEST_TSO6		(1ULL << 8)
91 #define	VIRTIO_NET_F_GUEST_UFO		(1ULL << 10)
92 
93 /*
94  * GUEST_ECN:
95  *	Depends on either VIRTIO_NET_F_GUEST_TSO4 or VIRTIO_NET_F_GUEST_TSO6.
96  *	This feature means the driver will look for the VIRTIO_NET_HDR_GSO_ECN
97  *	bit in the "gso_type" of the virtio net header.  This bit tells the
98  *	driver that the Explicit Congestion Notification (ECN) bit was set in
99  *	the original TCP packets.
100  */
101 #define	VIRTIO_NET_F_GUEST_ECN		(1ULL << 9)
102 
103 /*
104  * HOST_TSO4, HOST_TSO6, HOST_UFO:
105  *	Outbound segmentation offload support.  These features depend on having
106  *	VIRTIO_NET_F_CSUM and signal that the device will accept large combined
107  *	TCP (v4 or v6) packets that require segmentation offload, or large
108  *	combined UDP packets that require fragmentation offload.
109  */
110 #define	VIRTIO_NET_F_HOST_TSO4		(1ULL << 11)
111 #define	VIRTIO_NET_F_HOST_TSO6		(1ULL << 12)
112 #define	VIRTIO_NET_F_HOST_UFO		(1ULL << 14)
113 
114 /*
115  * HOST_ECN:
116  *	Depends on either VIRTIO_NET_F_HOST_TSO4 or VIRTIO_NET_F_HOST_TSO6.
117  *	This features means the device will accept packets that both require
118  *	segmentation offload and have the Explicit Congestion Notification
119  *	(ECN) bit set.  If this feature is not present, the device must not
120  *	send large segments that require ECN to be set.
121  */
122 #define	VIRTIO_NET_F_HOST_ECN		(1ULL << 13)
123 
124 /*
125  * GSO:
126  *	The GSO feature is, in theory, the combination of HOST_TSO4, HOST_TSO6,
127  *	and HOST_ECN.  This is only useful for legacy devices; newer devices
128  *	should be using the more specific bits above.
129  */
130 #define	VIRTIO_NET_F_GSO		(1ULL << 6)
131 
132 /*
133  * MRG_RXBUF:
134  *	This feature allows the receipt of large packets without needing to
135  *	allocate large buffers.  The "virtio_net_hdr" will include an extra
136  *	value: the number of buffers to gang together.
137  */
138 #define	VIRTIO_NET_F_MRG_RXBUF		(1ULL << 15)
139 
140 /*
141  * STATUS:
142  *	The VIRTIO_NET_CONFIG_STATUS configuration register is available, which
143  *	allows the driver to read the link state from the device.
144  */
145 #define	VIRTIO_NET_F_STATUS		(1ULL << 16)
146 
147 /*
148  * CTRL_VQ, CTRL_RX, CTRL_VLAN:
149  *	These features signal that the device exposes the control queue
150  *	(VIRTIO_NET_VIRTQ_CONTROL), in the case of CTRL_VQ; and that the
151  *	control queue supports extra commands (CTRL_RX, CTRL_VLAN).
152  */
153 #define	VIRTIO_NET_F_CTRL_VQ		(1ULL << 17)
154 #define	VIRTIO_NET_F_CTRL_RX		(1ULL << 18)
155 #define	VIRTIO_NET_F_CTRL_VLAN		(1ULL << 19)
156 #define	VIRTIO_NET_F_CTRL_RX_EXTRA	(1ULL << 20)
157 
158 /*
159  * These features are supported by the driver and we will request them from the
160  * device.  Note that we do not currently request GUEST_CSUM, as the driver
161  * does not presently support receiving frames with any offload features from
162  * the device.
163  */
164 #define	VIRTIO_NET_WANTED_FEATURES	(VIRTIO_NET_F_CSUM |		\
165 					VIRTIO_NET_F_GSO |		\
166 					VIRTIO_NET_F_HOST_TSO4 |	\
167 					VIRTIO_NET_F_HOST_ECN |		\
168 					VIRTIO_NET_F_MAC |		\
169 					VIRTIO_NET_F_MTU)
170 
171 /*
172  * VIRTIO NETWORK HEADER
173  *
174  * This structure appears at the start of each transmit or receive packet
175  * buffer.
176  */
177 struct virtio_net_hdr {
178 	uint8_t				vnh_flags;
179 	uint8_t				vnh_gso_type;
180 	uint16_t			vnh_hdr_len;
181 	uint16_t			vnh_gso_size;
182 	uint16_t			vnh_csum_start;
183 	uint16_t			vnh_csum_offset;
184 } __packed;
185 
186 /*
187  * VIRTIO NETWORK HEADER: FLAGS (vnh_flags)
188  */
189 #define	VIRTIO_NET_HDR_F_NEEDS_CSUM	0x01
190 
191 /*
192  * VIRTIO NETWORK HEADER: OFFLOAD OPTIONS (vnh_gso_type)
193  *
194  * Each of these is an offload type, except for the ECN value which is
195  * logically OR-ed with one of the other types.
196  */
197 #define	VIRTIO_NET_HDR_GSO_NONE		0
198 #define	VIRTIO_NET_HDR_GSO_TCPV4	1
199 #define	VIRTIO_NET_HDR_GSO_UDP		3
200 #define	VIRTIO_NET_HDR_GSO_TCPV6	4
201 #define	VIRTIO_NET_HDR_GSO_ECN		0x80
202 
203 
204 /*
205  * DRIVER PARAMETERS
206  */
207 
208 /*
209  * At attach, we allocate a fixed pool of buffers for receipt and transmission
210  * of frames.  The maximum number of buffers of each type that we will allocate
211  * is specified here.  If the ring size is smaller than this number, we will
212  * use the ring size instead.
213  */
214 #define	VIRTIO_NET_TX_BUFS		256
215 #define	VIRTIO_NET_RX_BUFS		256
216 
217 /*
218  * The virtio net header and the first buffer segment share the same DMA
219  * allocation.  We round up the virtio header size to a multiple of 4 and add 2
220  * bytes so that the IP header, which starts immediately after the 14 or 18
221  * byte Ethernet header, is then correctly aligned:
222  *
223  *   0                10      16   18                              32/36
224  *   | virtio_net_hdr | %4==0 | +2 | Ethernet header (14/18 bytes) | IPv4 ...
225  *
226  * Note that for this to work correctly, the DMA allocation must also be 4 byte
227  * aligned.
228  */
229 #define	VIOIF_HEADER_ALIGN		4
230 #define	VIOIF_HEADER_SKIP		(P2ROUNDUP( \
231 					    sizeof (struct virtio_net_hdr), \
232 					    VIOIF_HEADER_ALIGN) + 2)
233 
234 /*
235  * Given we are not negotiating VIRTIO_NET_F_MRG_RXBUF, the specification says
236  * we must be able to accept a 1514 byte packet, or if any segmentation offload
237  * features have been negotiated a 65550 byte packet.  To keep things simple,
238  * we'll assume segmentation offload is possible in most cases.  In addition to
239  * the packet payload, we need to account for the Ethernet header and the
240  * virtio_net_hdr.
241  */
242 #define	VIOIF_RX_DATA_SIZE		65550
243 #define	VIOIF_RX_BUF_SIZE		(VIOIF_RX_DATA_SIZE + \
244 					    sizeof (struct ether_header) + \
245 					    VIOIF_HEADER_SKIP)
246 
247 /*
248  * If we assume that a large allocation will probably have mostly 4K page sized
249  * cookies, 64 segments allows us 256KB for a single frame.  We're in control
250  * of the allocation we use for receive buffers, so this value only has an
251  * impact on the length of chain we're able to create for external transmit
252  * buffer mappings.
253  */
254 #define	VIOIF_MAX_SEGS			64
255 
256 /*
257  * We pre-allocate a reasonably large buffer to copy small packets
258  * there. Bigger packets are mapped, packets with multiple
259  * cookies are mapped as indirect buffers.
260  */
261 #define	VIOIF_TX_INLINE_SIZE		(2 * 1024)
262 
263 
264 /*
265  * TYPE DEFINITIONS
266  */
267 
268 typedef struct vioif vioif_t;
269 
270 /*
271  * Receive buffers are allocated in advance as a combination of DMA memory and
272  * a descriptor chain.  Receive buffers can be loaned to the networking stack
273  * to avoid copying, and this object contains the free routine to pass to
274  * desballoc().
275  *
276  * When receive buffers are not in use, they are linked into the per-instance
277  * free list, "vif_rxbufs" via "rb_link".  Under normal conditions, we expect
278  * the free list to be empty much of the time; most buffers will be in the ring
279  * or on loan.
280  */
281 typedef struct vioif_rxbuf {
282 	vioif_t				*rb_vioif;
283 	frtn_t				rb_frtn;
284 
285 	virtio_dma_t			*rb_dma;
286 	virtio_chain_t			*rb_chain;
287 
288 	list_node_t			rb_link;
289 } vioif_rxbuf_t;
290 
291 /*
292  * Transmit buffers are also allocated in advance.  DMA memory is allocated for
293  * the virtio net header, and to hold small packets.  Larger packets are mapped
294  * from storage loaned to the driver by the network stack.
295  *
296  * When transmit buffers are not in use, they are linked into the per-instance
297  * free list, "vif_txbufs" via "tb_link".
298  */
299 typedef struct vioif_txbuf {
300 	mblk_t				*tb_mp;
301 
302 	/*
303 	 * Inline buffer space (VIOIF_TX_INLINE_SIZE) for storage of the virtio
304 	 * net header, and to hold copied (rather than mapped) packet data.
305 	 */
306 	virtio_dma_t			*tb_dma;
307 	virtio_chain_t			*tb_chain;
308 
309 	/*
310 	 * External buffer mapping.  The capacity is fixed at allocation time,
311 	 * and "tb_ndmaext" tracks the current number of mappings.
312 	 */
313 	virtio_dma_t			**tb_dmaext;
314 	uint_t				tb_dmaext_capacity;
315 	uint_t				tb_ndmaext;
316 
317 	list_node_t			tb_link;
318 } vioif_txbuf_t;
319 
320 typedef enum vioif_runstate {
321 	VIOIF_RUNSTATE_STOPPED = 1,
322 	VIOIF_RUNSTATE_STOPPING,
323 	VIOIF_RUNSTATE_RUNNING
324 } vioif_runstate_t;
325 
326 /*
327  * Per-instance driver object.
328  */
329 struct vioif {
330 	dev_info_t			*vif_dip;
331 	virtio_t			*vif_virtio;
332 
333 	kmutex_t			vif_mutex;
334 
335 	/*
336 	 * The NIC is considered RUNNING between the mc_start(9E) and
337 	 * mc_stop(9E) calls.  Otherwise it is STOPPING (while draining
338 	 * resources) then STOPPED.  When not RUNNING, we will drop incoming
339 	 * frames and refuse to insert more receive buffers into the receive
340 	 * queue.
341 	 */
342 	vioif_runstate_t		vif_runstate;
343 
344 	mac_handle_t			vif_mac_handle;
345 
346 	virtio_queue_t			*vif_rx_vq;
347 	virtio_queue_t			*vif_tx_vq;
348 
349 	/* TX virtqueue management resources */
350 	boolean_t			vif_tx_corked;
351 	boolean_t			vif_tx_drain;
352 	timeout_id_t			vif_tx_reclaim_tid;
353 
354 	/*
355 	 * Configured offload features:
356 	 */
357 	unsigned int			vif_tx_csum:1;
358 	unsigned int			vif_tx_tso4:1;
359 
360 	/*
361 	 * For debugging, it is useful to know whether the MAC address we
362 	 * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or
363 	 * was otherwise generated or set from within the guest.
364 	 */
365 	unsigned int			vif_mac_from_host:1;
366 
367 	uint_t				vif_mtu;
368 	uint_t				vif_mtu_max;
369 	uint8_t				vif_mac[ETHERADDRL];
370 
371 	/*
372 	 * Receive buffer free list and accounting:
373 	 */
374 	list_t				vif_rxbufs;
375 	uint_t				vif_nrxbufs_alloc;
376 	uint_t				vif_nrxbufs_onloan;
377 	uint_t				vif_nrxbufs_onloan_max;
378 	uint_t				vif_rxbufs_capacity;
379 	vioif_rxbuf_t			*vif_rxbufs_mem;
380 
381 	/*
382 	 * Transmit buffer free list and accounting:
383 	 */
384 	list_t				vif_txbufs;
385 	uint_t				vif_ntxbufs_alloc;
386 	uint_t				vif_txbufs_capacity;
387 	vioif_txbuf_t			*vif_txbufs_mem;
388 
389 	/*
390 	 * These copy size thresholds are exposed as private MAC properties so
391 	 * that they can be tuned without rebooting.
392 	 */
393 	uint_t				vif_rxcopy_thresh;
394 	uint_t				vif_txcopy_thresh;
395 
396 	/*
397 	 * Statistics visible through mac:
398 	 */
399 	uint64_t			vif_ipackets;
400 	uint64_t			vif_opackets;
401 	uint64_t			vif_rbytes;
402 	uint64_t			vif_obytes;
403 	uint64_t			vif_brdcstxmt;
404 	uint64_t			vif_brdcstrcv;
405 	uint64_t			vif_multixmt;
406 	uint64_t			vif_multircv;
407 	uint64_t			vif_norecvbuf;
408 	uint64_t			vif_notxbuf;
409 	uint64_t			vif_ierrors;
410 	uint64_t			vif_oerrors;
411 
412 	/*
413 	 * Internal debugging statistics:
414 	 */
415 	uint64_t			vif_rxfail_dma_handle;
416 	uint64_t			vif_rxfail_dma_buffer;
417 	uint64_t			vif_rxfail_dma_bind;
418 	uint64_t			vif_rxfail_chain_undersize;
419 	uint64_t			vif_rxfail_no_descriptors;
420 	uint64_t			vif_txfail_dma_handle;
421 	uint64_t			vif_txfail_dma_bind;
422 	uint64_t			vif_txfail_indirect_limit;
423 
424 	uint64_t			vif_stat_tx_reclaim;
425 };
426 
427 #ifdef __cplusplus
428 }
429 #endif
430 
431 #endif /* _VIOIF_H */
432