xref: /freebsd/sys/dev/gve/gve.h (revision 1719886f6d08408b834d270c59ffcfd821c8f63a)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2023 Google LLC
5  *
6  * Redistribution and use in source and binary forms, with or without modification,
7  * are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * 3. Neither the name of the copyright holder nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software without
18  *    specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 #ifndef _GVE_FBSD_H
32 #define _GVE_FBSD_H
33 
34 #include "gve_desc.h"
35 #include "gve_plat.h"
36 #include "gve_register.h"
37 
38 #ifndef PCI_VENDOR_ID_GOOGLE
39 #define PCI_VENDOR_ID_GOOGLE	0x1ae0
40 #endif
41 
42 #define PCI_DEV_ID_GVNIC	0x0042
43 #define GVE_REGISTER_BAR	0
44 #define GVE_DOORBELL_BAR	2
45 
46 /* Driver can alloc up to 2 segments for the header and 2 for the payload. */
47 #define GVE_TX_MAX_DESCS	4
48 #define GVE_TX_BUFRING_ENTRIES	4096
49 
50 #define ADMINQ_SIZE PAGE_SIZE
51 
52 #define GVE_DEFAULT_RX_BUFFER_SIZE 2048
53 /* Each RX bounce buffer page can fit two packet buffers. */
54 #define GVE_DEFAULT_RX_BUFFER_OFFSET (PAGE_SIZE / 2)
55 
56 /*
57  * Number of descriptors per queue page list.
58  * Page count AKA QPL size can be derived by dividing the number of elements in
59  * a page by the number of descriptors available.
60  */
61 #define GVE_QPL_DIVISOR	16
62 
63 static MALLOC_DEFINE(M_GVE, "gve", "gve allocations");
64 
65 struct gve_dma_handle {
66 	bus_addr_t	bus_addr;
67 	void		*cpu_addr;
68 	bus_dma_tag_t	tag;
69 	bus_dmamap_t	map;
70 };
71 
72 union gve_tx_desc {
73 	struct gve_tx_pkt_desc pkt; /* first desc for a packet */
74 	struct gve_tx_mtd_desc mtd; /* optional metadata descriptor */
75 	struct gve_tx_seg_desc seg; /* subsequent descs for a packet */
76 };
77 
78 /* Tracks the memory in the fifo occupied by a segment of a packet */
79 struct gve_tx_iovec {
80 	uint32_t iov_offset; /* offset into this segment */
81 	uint32_t iov_len; /* length */
82 	uint32_t iov_padding; /* padding associated with this segment */
83 };
84 
85 /* Tracks allowed and current queue settings */
86 struct gve_queue_config {
87 	uint16_t max_queues;
88 	uint16_t num_queues; /* current */
89 };
90 
91 struct gve_irq_db {
92 	__be32 index;
93 } __aligned(CACHE_LINE_SIZE);
94 
95 /*
96  * GVE_QUEUE_FORMAT_UNSPECIFIED must be zero since 0 is the default value
97  * when the entire configure_device_resources command is zeroed out and the
98  * queue_format is not specified.
99  */
100 enum gve_queue_format {
101 	GVE_QUEUE_FORMAT_UNSPECIFIED	= 0x0,
102 	GVE_GQI_RDA_FORMAT		= 0x1,
103 	GVE_GQI_QPL_FORMAT		= 0x2,
104 	GVE_DQO_RDA_FORMAT		= 0x3,
105 };
106 
107 enum gve_state_flags_bit {
108 	GVE_STATE_FLAG_ADMINQ_OK,
109 	GVE_STATE_FLAG_RESOURCES_OK,
110 	GVE_STATE_FLAG_QPLREG_OK,
111 	GVE_STATE_FLAG_RX_RINGS_OK,
112 	GVE_STATE_FLAG_TX_RINGS_OK,
113 	GVE_STATE_FLAG_QUEUES_UP,
114 	GVE_STATE_FLAG_LINK_UP,
115 	GVE_STATE_FLAG_DO_RESET,
116 	GVE_STATE_FLAG_IN_RESET,
117 	GVE_NUM_STATE_FLAGS /* Not part of the enum space */
118 };
119 
120 BITSET_DEFINE(gve_state_flags, GVE_NUM_STATE_FLAGS);
121 
122 #define GVE_DEVICE_STATUS_RESET (0x1 << 1)
123 #define GVE_DEVICE_STATUS_LINK_STATUS (0x1 << 2)
124 
125 #define GVE_RING_LOCK(ring)	mtx_lock(&(ring)->ring_mtx)
126 #define GVE_RING_TRYLOCK(ring)	mtx_trylock(&(ring)->ring_mtx)
127 #define GVE_RING_UNLOCK(ring)	mtx_unlock(&(ring)->ring_mtx)
128 #define GVE_RING_ASSERT(ring)	mtx_assert(&(ring)->ring_mtx, MA_OWNED)
129 
130 #define GVE_IFACE_LOCK_INIT(lock)     sx_init(&lock, "gve interface lock")
131 #define GVE_IFACE_LOCK_DESTROY(lock)  sx_destroy(&lock)
132 #define GVE_IFACE_LOCK_LOCK(lock)     sx_xlock(&lock)
133 #define GVE_IFACE_LOCK_UNLOCK(lock)   sx_unlock(&lock)
134 #define GVE_IFACE_LOCK_ASSERT(lock)   sx_assert(&lock, SA_XLOCKED)
135 
136 struct gve_queue_page_list {
137 	uint32_t id;
138 	uint32_t num_dmas;
139 	uint32_t num_pages;
140 	vm_offset_t kva;
141 	vm_page_t *pages;
142 	struct gve_dma_handle *dmas;
143 };
144 
145 struct gve_irq {
146 	struct resource *res;
147 	void *cookie;
148 };
149 
150 struct gve_rx_slot_page_info {
151 	void *page_address;
152 	vm_page_t page;
153 	uint32_t page_offset;
154 	uint16_t pad;
155 };
156 
157 /*
158  * A single received packet split across multiple buffers may be
159  * reconstructed using the information in this structure.
160  */
161 struct gve_rx_ctx {
162 	/* head and tail of mbuf chain for the current packet */
163 	struct mbuf *mbuf_head;
164 	struct mbuf *mbuf_tail;
165 	uint32_t total_size;
166 	uint8_t frag_cnt;
167 	bool is_tcp;
168 	bool drop_pkt;
169 };
170 
171 struct gve_ring_com {
172 	struct gve_priv *priv;
173 	uint32_t id;
174 
175 	/*
176 	 * BAR2 offset for this ring's doorbell and the
177 	 * counter-array offset for this ring's counter.
178 	 * Acquired from the device individually for each
179 	 * queue in the queue_create adminq command.
180 	 */
181 	struct gve_queue_resources *q_resources;
182 	struct gve_dma_handle q_resources_mem;
183 
184 	/* Byte offset into BAR2 where this ring's 4-byte irq doorbell lies. */
185 	uint32_t irq_db_offset;
186 	/* Byte offset into BAR2 where this ring's 4-byte doorbell lies. */
187 	uint32_t db_offset;
188 	/*
189 	 * Index, not byte-offset, into the counter array where this ring's
190 	 * 4-byte counter lies.
191 	 */
192 	uint32_t counter_idx;
193 
194 	/*
195 	 * The index of the MSIX vector that was assigned to
196 	 * this ring in `gve_alloc_irqs`.
197 	 *
198 	 * It is passed to the device in the queue_create adminq
199 	 * command.
200 	 *
201 	 * Additionally, this also serves as the index into
202 	 * `priv->irq_db_indices` where this ring's irq doorbell's
203 	 * BAR2 offset, `irq_db_idx`, can be found.
204 	 */
205 	int ntfy_id;
206 
207 	/*
208 	 * The fixed bounce buffer for this ring.
209 	 * Once allocated, has to be offered to the device
210 	 * over the register-page-list adminq command.
211 	 */
212 	struct gve_queue_page_list *qpl;
213 
214 	struct task cleanup_task;
215 	struct taskqueue *cleanup_tq;
216 } __aligned(CACHE_LINE_SIZE);
217 
218 struct gve_rxq_stats {
219 	counter_u64_t rbytes;
220 	counter_u64_t rpackets;
221 	counter_u64_t rx_dropped_pkt;
222 	counter_u64_t rx_copybreak_cnt;
223 	counter_u64_t rx_frag_flip_cnt;
224 	counter_u64_t rx_frag_copy_cnt;
225 	counter_u64_t rx_dropped_pkt_desc_err;
226 	counter_u64_t rx_dropped_pkt_mbuf_alloc_fail;
227 };
228 
229 #define NUM_RX_STATS (sizeof(struct gve_rxq_stats) / sizeof(counter_u64_t))
230 
231 /* power-of-2 sized receive ring */
232 struct gve_rx_ring {
233 	struct gve_ring_com com;
234 	struct gve_dma_handle desc_ring_mem;
235 	struct gve_dma_handle data_ring_mem;
236 
237 	/* accessed in the receive hot path */
238 	struct {
239 		struct gve_rx_desc *desc_ring;
240 		union gve_rx_data_slot *data_ring;
241 		struct gve_rx_slot_page_info *page_info;
242 
243 		struct gve_rx_ctx ctx;
244 		struct lro_ctrl lro;
245 		uint8_t seq_no; /* helps traverse the descriptor ring */
246 		uint32_t cnt; /* free-running total number of completed packets */
247 		uint32_t fill_cnt; /* free-running total number of descs and buffs posted */
248 		uint32_t mask; /* masks the cnt and fill_cnt to the size of the ring */
249 		struct gve_rxq_stats stats;
250 	} __aligned(CACHE_LINE_SIZE);
251 
252 } __aligned(CACHE_LINE_SIZE);
253 
254 /*
255  * A contiguous representation of the pages composing the Tx bounce buffer.
256  * The xmit taskqueue and the completion taskqueue both simultaneously use it.
257  * Both operate on `available`: the xmit tq lowers it and the completion tq
258  * raises it. `head` is the last location written at and so only the xmit tq
259  * uses it.
260  */
261 struct gve_tx_fifo {
262 	vm_offset_t base; /* address of base of FIFO */
263 	uint32_t size; /* total size */
264 	volatile int available; /* how much space is still available */
265 	uint32_t head; /* offset to write at */
266 };
267 
268 struct gve_tx_buffer_state {
269 	struct mbuf *mbuf;
270 	struct gve_tx_iovec iov[GVE_TX_MAX_DESCS];
271 };
272 
273 struct gve_txq_stats {
274 	counter_u64_t tbytes;
275 	counter_u64_t tpackets;
276 	counter_u64_t tso_packet_cnt;
277 	counter_u64_t tx_dropped_pkt;
278 	counter_u64_t tx_dropped_pkt_nospace_device;
279 	counter_u64_t tx_dropped_pkt_nospace_bufring;
280 	counter_u64_t tx_dropped_pkt_vlan;
281 };
282 
283 #define NUM_TX_STATS (sizeof(struct gve_txq_stats) / sizeof(counter_u64_t))
284 
285 /* power-of-2 sized transmit ring */
286 struct gve_tx_ring {
287 	struct gve_ring_com com;
288 	struct gve_dma_handle desc_ring_mem;
289 
290 	struct task xmit_task;
291 	struct taskqueue *xmit_tq;
292 
293 	/* accessed in the transmit hot path */
294 	struct {
295 		union gve_tx_desc *desc_ring;
296 		struct gve_tx_buffer_state *info;
297 		struct buf_ring *br;
298 
299 		struct gve_tx_fifo fifo;
300 		struct mtx ring_mtx;
301 
302 		uint32_t req; /* free-running total number of packets written to the nic */
303 		uint32_t done; /* free-running total number of completed packets */
304 		uint32_t mask; /* masks the req and done to the size of the ring */
305 		struct gve_txq_stats stats;
306 	} __aligned(CACHE_LINE_SIZE);
307 
308 } __aligned(CACHE_LINE_SIZE);
309 
310 struct gve_priv {
311 	if_t ifp;
312 	device_t dev;
313 	struct ifmedia media;
314 
315 	uint8_t mac[ETHER_ADDR_LEN];
316 
317 	struct gve_dma_handle aq_mem;
318 
319 	struct resource *reg_bar; /* BAR0 */
320 	struct resource *db_bar; /* BAR2 */
321 	struct resource *msix_table;
322 
323 	uint32_t mgmt_msix_idx;
324 	uint32_t rx_copybreak;
325 
326 	uint16_t num_event_counters;
327 	uint16_t default_num_queues;
328 	uint16_t tx_desc_cnt;
329 	uint16_t rx_desc_cnt;
330 	uint16_t rx_pages_per_qpl;
331 	uint64_t max_registered_pages;
332 	uint64_t num_registered_pages;
333 	uint32_t supported_features;
334 	uint16_t max_mtu;
335 
336 	struct gve_dma_handle counter_array_mem;
337 	__be32 *counters;
338 	struct gve_dma_handle irqs_db_mem;
339 	struct gve_irq_db *irq_db_indices;
340 
341 	enum gve_queue_format queue_format;
342 	struct gve_queue_page_list *qpls;
343 	struct gve_queue_config tx_cfg;
344 	struct gve_queue_config rx_cfg;
345 	uint32_t num_queues;
346 
347 	struct gve_irq *irq_tbl;
348 	struct gve_tx_ring *tx;
349 	struct gve_rx_ring *rx;
350 
351 	/*
352 	 * Admin queue - see gve_adminq.h
353 	 * Since AQ cmds do not run in steady state, 32 bit counters suffice
354 	 */
355 	struct gve_adminq_command *adminq;
356 	vm_paddr_t adminq_bus_addr;
357 	uint32_t adminq_mask; /* masks prod_cnt to adminq size */
358 	uint32_t adminq_prod_cnt; /* free-running count of AQ cmds executed */
359 	uint32_t adminq_cmd_fail; /* free-running count of AQ cmds failed */
360 	uint32_t adminq_timeouts; /* free-running count of AQ cmds timeouts */
361 	/* free-running count of each distinct AQ cmd executed */
362 	uint32_t adminq_describe_device_cnt;
363 	uint32_t adminq_cfg_device_resources_cnt;
364 	uint32_t adminq_register_page_list_cnt;
365 	uint32_t adminq_unregister_page_list_cnt;
366 	uint32_t adminq_create_tx_queue_cnt;
367 	uint32_t adminq_create_rx_queue_cnt;
368 	uint32_t adminq_destroy_tx_queue_cnt;
369 	uint32_t adminq_destroy_rx_queue_cnt;
370 	uint32_t adminq_dcfg_device_resources_cnt;
371 	uint32_t adminq_set_driver_parameter_cnt;
372 	uint32_t adminq_verify_driver_compatibility_cnt;
373 
374 	uint32_t interface_up_cnt;
375 	uint32_t interface_down_cnt;
376 	uint32_t reset_cnt;
377 
378 	struct task service_task;
379 	struct taskqueue *service_tq;
380 
381 	struct gve_state_flags state_flags;
382 	struct sx gve_iface_lock;
383 };
384 
385 static inline bool
386 gve_get_state_flag(struct gve_priv *priv, int pos)
387 {
388 	return (BIT_ISSET(GVE_NUM_STATE_FLAGS, pos, &priv->state_flags));
389 }
390 
391 static inline void
392 gve_set_state_flag(struct gve_priv *priv, int pos)
393 {
394 	BIT_SET_ATOMIC(GVE_NUM_STATE_FLAGS, pos, &priv->state_flags);
395 }
396 
397 static inline void
398 gve_clear_state_flag(struct gve_priv *priv, int pos)
399 {
400 	BIT_CLR_ATOMIC(GVE_NUM_STATE_FLAGS, pos, &priv->state_flags);
401 }
402 
403 /* Defined in gve_main.c */
404 void gve_schedule_reset(struct gve_priv *priv);
405 
406 /* Register access functions defined in gve_utils.c */
407 uint32_t gve_reg_bar_read_4(struct gve_priv *priv, bus_size_t offset);
408 void gve_reg_bar_write_4(struct gve_priv *priv, bus_size_t offset, uint32_t val);
409 void gve_db_bar_write_4(struct gve_priv *priv, bus_size_t offset, uint32_t val);
410 
411 /* QPL (Queue Page List) functions defined in gve_qpl.c */
412 int gve_alloc_qpls(struct gve_priv *priv);
413 void gve_free_qpls(struct gve_priv *priv);
414 int gve_register_qpls(struct gve_priv *priv);
415 int gve_unregister_qpls(struct gve_priv *priv);
416 
417 /* TX functions defined in gve_tx.c */
418 int gve_alloc_tx_rings(struct gve_priv *priv);
419 void gve_free_tx_rings(struct gve_priv *priv);
420 int gve_create_tx_rings(struct gve_priv *priv);
421 int gve_destroy_tx_rings(struct gve_priv *priv);
422 int gve_tx_intr(void *arg);
423 int gve_xmit_ifp(if_t ifp, struct mbuf *mbuf);
424 void gve_qflush(if_t ifp);
425 void gve_xmit_tq(void *arg, int pending);
426 void gve_tx_cleanup_tq(void *arg, int pending);
427 
428 /* RX functions defined in gve_rx.c */
429 int gve_alloc_rx_rings(struct gve_priv *priv);
430 void gve_free_rx_rings(struct gve_priv *priv);
431 int gve_create_rx_rings(struct gve_priv *priv);
432 int gve_destroy_rx_rings(struct gve_priv *priv);
433 int gve_rx_intr(void *arg);
434 void gve_rx_cleanup_tq(void *arg, int pending);
435 
436 /* DMA functions defined in gve_utils.c */
437 int gve_dma_alloc_coherent(struct gve_priv *priv, int size, int align,
438     struct gve_dma_handle *dma);
439 void gve_dma_free_coherent(struct gve_dma_handle *dma);
440 int gve_dmamap_create(struct gve_priv *priv, int size, int align,
441     struct gve_dma_handle *dma);
442 void gve_dmamap_destroy(struct gve_dma_handle *dma);
443 
444 /* IRQ functions defined in gve_utils.c */
445 void gve_free_irqs(struct gve_priv *priv);
446 int gve_alloc_irqs(struct gve_priv *priv);
447 void gve_unmask_all_queue_irqs(struct gve_priv *priv);
448 void gve_mask_all_queue_irqs(struct gve_priv *priv);
449 
450 /* Systcl functions defined in gve_sysctl.c*/
451 void gve_setup_sysctl(struct gve_priv *priv);
452 void gve_accum_stats(struct gve_priv *priv, uint64_t *rpackets,
453     uint64_t *rbytes, uint64_t *rx_dropped_pkt, uint64_t *tpackets,
454     uint64_t *tbytes, uint64_t *tx_dropped_pkt);
455 
456 /* Stats functions defined in gve_utils.c */
457 void gve_alloc_counters(counter_u64_t *stat, int num_stats);
458 void gve_free_counters(counter_u64_t *stat, int num_stats);
459 
460 #endif /* _GVE_FBSD_H_ */
461