xref: /freebsd/sys/dev/gve/gve_adminq.c (revision c27f7d6b9cf6d4ab01cb3d0972726c14e0aca146)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2023-2024 Google LLC
5  *
6  * Redistribution and use in source and binary forms, with or without modification,
7  * are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * 3. Neither the name of the copyright holder nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software without
18  *    specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 #include <sys/endian.h>
32 #include <sys/socket.h>
33 #include <sys/time.h>
34 
35 #include <net/ethernet.h>
36 #include <net/if.h>
37 #include <net/if_var.h>
38 
39 #include "gve.h"
40 #include "gve_adminq.h"
41 
42 #define GVE_ADMINQ_SLEEP_LEN_MS 20
43 #define GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK 10
44 #define GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION 1
45 #define GVE_REG_ADMINQ_ADDR 16
46 #define ADMINQ_SLOTS (ADMINQ_SIZE / sizeof(struct gve_adminq_command))
47 
48 #define GVE_DEVICE_OPTION_ERROR_FMT "%s option error:\n" \
49     "Expected: length=%d, feature_mask=%x.\n" \
50     "Actual: length=%d, feature_mask=%x.\n"
51 
52 #define GVE_DEVICE_OPTION_TOO_BIG_FMT "Length of %s option larger than expected." \
53     " Possible older version of guest driver.\n"
54 
55 static
56 void gve_parse_device_option(struct gve_priv *priv,
57     struct gve_device_descriptor *device_descriptor,
58     struct gve_device_option *option,
59     struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
60     struct gve_device_option_dqo_rda **dev_op_dqo_rda,
61     struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
62     struct gve_device_option_modify_ring **dev_op_modify_ring,
63     struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
64 {
65 	uint32_t req_feat_mask = be32toh(option->required_features_mask);
66 	uint16_t option_length = be16toh(option->option_length);
67 	uint16_t option_id = be16toh(option->option_id);
68 
69 	/*
70 	 * If the length or feature mask doesn't match, continue without
71 	 * enabling the feature.
72 	 */
73 	switch (option_id) {
74 	case GVE_DEV_OPT_ID_GQI_QPL:
75 		if (option_length < sizeof(**dev_op_gqi_qpl) ||
76 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL) {
77 			device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT,
78 			    "GQI QPL", (int)sizeof(**dev_op_gqi_qpl),
79 			    GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL,
80 			    option_length, req_feat_mask);
81 			break;
82 		}
83 
84 		if (option_length > sizeof(**dev_op_gqi_qpl)) {
85 			device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT,
86 			    "GQI QPL");
87 		}
88 		*dev_op_gqi_qpl = (void *)(option + 1);
89 		break;
90 
91 	case GVE_DEV_OPT_ID_DQO_RDA:
92 		if (option_length < sizeof(**dev_op_dqo_rda) ||
93 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA) {
94 			device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT,
95 			    "DQO RDA", (int)sizeof(**dev_op_dqo_rda),
96 			    GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA,
97 			    option_length, req_feat_mask);
98 			break;
99 		}
100 
101 		if (option_length > sizeof(**dev_op_dqo_rda)) {
102 			device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT,
103 			    "DQO RDA");
104 		}
105 		*dev_op_dqo_rda = (void *)(option + 1);
106 		break;
107 
108 	case GVE_DEV_OPT_ID_DQO_QPL:
109 		if (option_length < sizeof(**dev_op_dqo_qpl) ||
110 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL) {
111 			device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT,
112 			    "DQO QPL", (int)sizeof(**dev_op_dqo_qpl),
113 			    GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL,
114 			    option_length, req_feat_mask);
115 			break;
116 		}
117 
118 		if (option_length > sizeof(**dev_op_dqo_qpl)) {
119 			device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT,
120 			    "DQO QPL");
121 		}
122 		*dev_op_dqo_qpl = (void *)(option + 1);
123 		break;
124 
125 	case GVE_DEV_OPT_ID_MODIFY_RING:
126 		if (option_length < (sizeof(**dev_op_modify_ring) -
127 		    sizeof(struct gve_ring_size_bound)) ||
128 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) {
129 			device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT,
130 			    "Modify Ring", (int)sizeof(**dev_op_modify_ring),
131 			    GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING,
132 			    option_length, req_feat_mask);
133 			break;
134 		}
135 
136 		if (option_length > sizeof(**dev_op_modify_ring)) {
137 			device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT,
138 			    "Modify Ring");
139 		}
140 		*dev_op_modify_ring = (void *)(option + 1);
141 
142 		/* Min ring size included; set the minimum ring size. */
143 		if (option_length == sizeof(**dev_op_modify_ring)) {
144 			priv->min_rx_desc_cnt = max(
145 			    be16toh((*dev_op_modify_ring)->min_ring_size.rx),
146 			    GVE_DEFAULT_MIN_RX_RING_SIZE);
147 			priv->min_tx_desc_cnt = max(
148 			    be16toh((*dev_op_modify_ring)->min_ring_size.tx),
149 			    GVE_DEFAULT_MIN_TX_RING_SIZE);
150 		}
151 		break;
152 
153 	case GVE_DEV_OPT_ID_JUMBO_FRAMES:
154 		if (option_length < sizeof(**dev_op_jumbo_frames) ||
155 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) {
156 			device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT,
157 			    "Jumbo Frames", (int)sizeof(**dev_op_jumbo_frames),
158 			    GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES,
159 			    option_length, req_feat_mask);
160 			break;
161 		}
162 
163 		if (option_length > sizeof(**dev_op_jumbo_frames)) {
164 			device_printf(priv->dev,
165 			    GVE_DEVICE_OPTION_TOO_BIG_FMT, "Jumbo Frames");
166 		}
167 		*dev_op_jumbo_frames = (void *)(option + 1);
168 		break;
169 
170 	default:
171 		/*
172 		 * If we don't recognize the option just continue
173 		 * without doing anything.
174 		 */
175 		device_printf(priv->dev, "Unrecognized device option 0x%hx not enabled.\n",
176 		    option_id);
177 	}
178 }
179 
180 /* Process all device options for a given describe device call. */
181 static int
182 gve_process_device_options(struct gve_priv *priv,
183     struct gve_device_descriptor *descriptor,
184     struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
185     struct gve_device_option_dqo_rda **dev_op_dqo_rda,
186     struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
187     struct gve_device_option_modify_ring **dev_op_modify_ring,
188     struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
189 {
190 	char *desc_end = (char *)descriptor + be16toh(descriptor->total_length);
191 	const int num_options = be16toh(descriptor->num_device_options);
192 	struct gve_device_option *dev_opt;
193 	int i;
194 
195 	/* The options struct directly follows the device descriptor. */
196 	dev_opt = (void *)(descriptor + 1);
197 	for (i = 0; i < num_options; i++) {
198 		if ((char *)(dev_opt + 1) > desc_end ||
199 		    (char *)(dev_opt + 1) + be16toh(dev_opt->option_length) > desc_end) {
200 			device_printf(priv->dev,
201 			    "options exceed device descriptor's total length.\n");
202 			return (EINVAL);
203 		}
204 
205 		gve_parse_device_option(priv, descriptor, dev_opt,
206 		    dev_op_gqi_qpl,
207 		    dev_op_dqo_rda,
208 		    dev_op_dqo_qpl,
209 		    dev_op_modify_ring,
210 		    dev_op_jumbo_frames);
211 		dev_opt = (void *)((char *)(dev_opt + 1) + be16toh(dev_opt->option_length));
212 	}
213 
214 	return (0);
215 }
216 
217 static int gve_adminq_execute_cmd(struct gve_priv *priv,
218     struct gve_adminq_command *cmd);
219 
220 static int
221 gve_adminq_destroy_tx_queue(struct gve_priv *priv, uint32_t id)
222 {
223 	struct gve_adminq_command cmd = (struct gve_adminq_command){};
224 
225 	cmd.opcode = htobe32(GVE_ADMINQ_DESTROY_TX_QUEUE);
226 	cmd.destroy_tx_queue.queue_id = htobe32(id);
227 
228 	return (gve_adminq_execute_cmd(priv, &cmd));
229 }
230 
231 static int
232 gve_adminq_destroy_rx_queue(struct gve_priv *priv, uint32_t id)
233 {
234 	struct gve_adminq_command cmd = (struct gve_adminq_command){};
235 
236 	cmd.opcode = htobe32(GVE_ADMINQ_DESTROY_RX_QUEUE);
237 	cmd.destroy_rx_queue.queue_id = htobe32(id);
238 
239 	return (gve_adminq_execute_cmd(priv, &cmd));
240 }
241 
242 int
243 gve_adminq_destroy_rx_queues(struct gve_priv *priv, uint32_t num_queues)
244 {
245 	int err;
246 	int i;
247 
248 	for (i = 0; i < num_queues; i++) {
249 		err = gve_adminq_destroy_rx_queue(priv, i);
250 		if (err != 0) {
251 			device_printf(priv->dev, "Failed to destroy rxq %d, err: %d\n",
252 			    i, err);
253 		}
254 	}
255 
256 	if (err != 0)
257 		return (err);
258 
259 	device_printf(priv->dev, "Destroyed %d rx queues\n", num_queues);
260 	return (0);
261 }
262 
263 int
264 gve_adminq_destroy_tx_queues(struct gve_priv *priv, uint32_t num_queues)
265 {
266 	int err;
267 	int i;
268 
269 	for (i = 0; i < num_queues; i++) {
270 		err = gve_adminq_destroy_tx_queue(priv, i);
271 		if (err != 0) {
272 			device_printf(priv->dev, "Failed to destroy txq %d, err: %d\n",
273 			    i, err);
274 		}
275 	}
276 
277 	if (err != 0)
278 		return (err);
279 
280 	device_printf(priv->dev, "Destroyed %d tx queues\n", num_queues);
281 	return (0);
282 }
283 
284 static int
285 gve_adminq_create_rx_queue(struct gve_priv *priv, uint32_t queue_index)
286 {
287 	struct gve_adminq_command cmd = (struct gve_adminq_command){};
288 	struct gve_rx_ring *rx = &priv->rx[queue_index];
289 	struct gve_dma_handle *qres_dma = &rx->com.q_resources_mem;
290 
291 	bus_dmamap_sync(qres_dma->tag, qres_dma->map, BUS_DMASYNC_PREREAD);
292 
293 	cmd.opcode = htobe32(GVE_ADMINQ_CREATE_RX_QUEUE);
294 	cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
295 		.queue_id = htobe32(queue_index),
296 		.ntfy_id = htobe32(rx->com.ntfy_id),
297 		.queue_resources_addr = htobe64(qres_dma->bus_addr),
298 		.rx_ring_size = htobe16(priv->rx_desc_cnt),
299 		.packet_buffer_size = htobe16(GVE_DEFAULT_RX_BUFFER_SIZE),
300 	};
301 
302 	if (gve_is_gqi(priv)) {
303 		cmd.create_rx_queue.rx_desc_ring_addr =
304 		    htobe64(rx->desc_ring_mem.bus_addr);
305 		cmd.create_rx_queue.rx_data_ring_addr =
306 		    htobe64(rx->data_ring_mem.bus_addr);
307 		cmd.create_rx_queue.index =
308 		    htobe32(queue_index);
309 		cmd.create_rx_queue.queue_page_list_id =
310 		    htobe32((rx->com.qpl)->id);
311 	} else {
312 		cmd.create_rx_queue.queue_page_list_id =
313 		    htobe32(GVE_RAW_ADDRESSING_QPL_ID);
314 		cmd.create_rx_queue.rx_desc_ring_addr =
315 		    htobe64(rx->dqo.compl_ring_mem.bus_addr);
316 		cmd.create_rx_queue.rx_data_ring_addr =
317 		    htobe64(rx->desc_ring_mem.bus_addr);
318 		cmd.create_rx_queue.rx_buff_ring_size =
319 		    htobe16(priv->rx_desc_cnt);
320 		cmd.create_rx_queue.enable_rsc =
321 		    !!((if_getcapenable(priv->ifp) & IFCAP_LRO) &&
322 			!gve_disable_hw_lro);
323 	}
324 
325 	return (gve_adminq_execute_cmd(priv, &cmd));
326 }
327 
328 int
329 gve_adminq_create_rx_queues(struct gve_priv *priv, uint32_t num_queues)
330 {
331 	int err;
332 	int i;
333 
334 	for (i = 0; i < num_queues; i++) {
335 		err = gve_adminq_create_rx_queue(priv, i);
336 		if (err != 0) {
337 			device_printf(priv->dev, "Failed to create rxq %d, err: %d\n",
338 			    i, err);
339 			goto abort;
340 		}
341 	}
342 
343 	if (bootverbose)
344 		device_printf(priv->dev, "Created %d rx queues\n", num_queues);
345 	return (0);
346 
347 abort:
348 	gve_adminq_destroy_rx_queues(priv, i);
349 	return (err);
350 }
351 
352 static int
353 gve_adminq_create_tx_queue(struct gve_priv *priv, uint32_t queue_index)
354 {
355 	struct gve_adminq_command cmd = (struct gve_adminq_command){};
356 	struct gve_tx_ring *tx = &priv->tx[queue_index];
357 	struct gve_dma_handle *qres_dma = &tx->com.q_resources_mem;
358 
359 	bus_dmamap_sync(qres_dma->tag, qres_dma->map, BUS_DMASYNC_PREREAD);
360 
361 	cmd.opcode = htobe32(GVE_ADMINQ_CREATE_TX_QUEUE);
362 	cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
363 		.queue_id = htobe32(queue_index),
364 		.queue_resources_addr = htobe64(qres_dma->bus_addr),
365 		.tx_ring_addr = htobe64(tx->desc_ring_mem.bus_addr),
366 		.ntfy_id = htobe32(tx->com.ntfy_id),
367 		.tx_ring_size = htobe16(priv->tx_desc_cnt),
368 	};
369 
370 	if (gve_is_gqi(priv)) {
371 		cmd.create_tx_queue.queue_page_list_id =
372 		    htobe32((tx->com.qpl)->id);
373 	} else {
374 		cmd.create_tx_queue.queue_page_list_id =
375 		    htobe32(GVE_RAW_ADDRESSING_QPL_ID);
376 		cmd.create_tx_queue.tx_comp_ring_addr =
377 		    htobe64(tx->dqo.compl_ring_mem.bus_addr);
378 		cmd.create_tx_queue.tx_comp_ring_size =
379 		    htobe16(priv->tx_desc_cnt);
380 	}
381 	return (gve_adminq_execute_cmd(priv, &cmd));
382 }
383 
384 int
385 gve_adminq_create_tx_queues(struct gve_priv *priv, uint32_t num_queues)
386 {
387 	int err;
388 	int i;
389 
390 	for (i = 0; i < num_queues; i++) {
391 		err = gve_adminq_create_tx_queue(priv, i);
392 		if (err != 0) {
393 			device_printf(priv->dev, "Failed to create txq %d, err: %d\n",
394 			    i, err);
395 			goto abort;
396 		}
397 	}
398 
399 	if (bootverbose)
400 		device_printf(priv->dev, "Created %d tx queues\n", num_queues);
401 	return (0);
402 
403 abort:
404 	gve_adminq_destroy_tx_queues(priv, i);
405 	return (err);
406 }
407 
408 int
409 gve_adminq_set_mtu(struct gve_priv *priv, uint32_t mtu) {
410 	struct gve_adminq_command cmd = (struct gve_adminq_command){};
411 
412 	cmd.opcode = htobe32(GVE_ADMINQ_SET_DRIVER_PARAMETER);
413 	cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) {
414 		.parameter_type = htobe32(GVE_SET_PARAM_MTU),
415 		.parameter_value = htobe64(mtu),
416 	};
417 
418 	return (gve_adminq_execute_cmd(priv, &cmd));
419 }
420 
421 static void
422 gve_enable_supported_features(struct gve_priv *priv,
423     uint32_t supported_features_mask,
424     const struct gve_device_option_modify_ring *dev_op_modify_ring,
425     const struct gve_device_option_jumbo_frames *dev_op_jumbo_frames)
426 {
427 	if (dev_op_modify_ring &&
428 	    (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) {
429 		if (bootverbose)
430 			device_printf(priv->dev, "MODIFY RING device option enabled.\n");
431 		priv->modify_ringsize_enabled = true;
432 		priv->max_rx_desc_cnt = be16toh(dev_op_modify_ring->max_ring_size.rx);
433 		priv->max_tx_desc_cnt = be16toh(dev_op_modify_ring->max_ring_size.tx);
434 	}
435 
436 	if (dev_op_jumbo_frames &&
437 	    (supported_features_mask & GVE_SUP_JUMBO_FRAMES_MASK)) {
438 		if (bootverbose)
439 			device_printf(priv->dev, "JUMBO FRAMES device option enabled: %u.\n",
440 			    be16toh(dev_op_jumbo_frames->max_mtu));
441 		priv->max_mtu = be16toh(dev_op_jumbo_frames->max_mtu);
442 	}
443 }
444 
445 int
446 gve_adminq_describe_device(struct gve_priv *priv)
447 {
448 	struct gve_adminq_command aq_cmd = (struct gve_adminq_command){};
449 	struct gve_device_descriptor *desc;
450 	struct gve_dma_handle desc_mem;
451 	struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
452 	struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
453 	struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL;
454 	struct gve_device_option_modify_ring *dev_op_modify_ring = NULL;
455 	struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL;
456 	uint32_t supported_features_mask = 0;
457 	int rc;
458 	int i;
459 
460 	rc = gve_dma_alloc_coherent(priv, ADMINQ_SIZE, ADMINQ_SIZE, &desc_mem);
461 	if (rc != 0) {
462 		device_printf(priv->dev, "Failed to alloc DMA mem for DescribeDevice.\n");
463 		return (rc);
464 	}
465 
466 	desc = desc_mem.cpu_addr;
467 
468 	aq_cmd.opcode = htobe32(GVE_ADMINQ_DESCRIBE_DEVICE);
469 	aq_cmd.describe_device.device_descriptor_addr = htobe64(
470 	    desc_mem.bus_addr);
471 	aq_cmd.describe_device.device_descriptor_version = htobe32(
472 	    GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION);
473 	aq_cmd.describe_device.available_length = htobe32(ADMINQ_SIZE);
474 
475 	bus_dmamap_sync(desc_mem.tag, desc_mem.map, BUS_DMASYNC_PREWRITE);
476 
477 	rc = gve_adminq_execute_cmd(priv, &aq_cmd);
478 	if (rc != 0)
479 		goto free_device_descriptor;
480 
481 	bus_dmamap_sync(desc_mem.tag, desc_mem.map, BUS_DMASYNC_POSTREAD);
482 
483 	/* Default min in case device options don't have min values */
484 	priv->min_rx_desc_cnt = GVE_DEFAULT_MIN_RX_RING_SIZE;
485 	priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE;
486 
487 	rc = gve_process_device_options(priv, desc,
488 	    &dev_op_gqi_qpl,
489 	    &dev_op_dqo_rda,
490 	    &dev_op_dqo_qpl,
491 	    &dev_op_modify_ring,
492 	    &dev_op_jumbo_frames);
493 	if (rc != 0)
494 		goto free_device_descriptor;
495 
496 	if (dev_op_dqo_rda != NULL) {
497 		snprintf(gve_queue_format, sizeof(gve_queue_format),
498 		    "%s", "DQO RDA");
499 		priv->queue_format = GVE_DQO_RDA_FORMAT;
500 		supported_features_mask = be32toh(
501 		    dev_op_dqo_rda->supported_features_mask);
502 		if (bootverbose)
503 			device_printf(priv->dev,
504 			    "Driver is running with DQO RDA queue format.\n");
505 	} else if (dev_op_dqo_qpl != NULL) {
506 		snprintf(gve_queue_format, sizeof(gve_queue_format),
507 		    "%s", "DQO QPL");
508 		priv->queue_format = GVE_DQO_QPL_FORMAT;
509 		supported_features_mask = be32toh(
510 		    dev_op_dqo_qpl->supported_features_mask);
511 		if (bootverbose)
512 			device_printf(priv->dev,
513 			    "Driver is running with DQO QPL queue format.\n");
514 	} else if (dev_op_gqi_qpl != NULL) {
515 		snprintf(gve_queue_format, sizeof(gve_queue_format),
516 		    "%s", "GQI QPL");
517 		priv->queue_format = GVE_GQI_QPL_FORMAT;
518 		supported_features_mask = be32toh(
519 		    dev_op_gqi_qpl->supported_features_mask);
520 		if (bootverbose)
521 			device_printf(priv->dev,
522 			    "Driver is running with GQI QPL queue format.\n");
523 	} else {
524 		device_printf(priv->dev, "No compatible queue formats\n");
525 		rc = EINVAL;
526 		goto free_device_descriptor;
527 	}
528 
529         priv->num_event_counters = be16toh(desc->counters);
530 	priv->default_num_queues = be16toh(desc->default_num_queues);
531 	priv->tx_desc_cnt = be16toh(desc->tx_queue_entries);
532 	priv->rx_desc_cnt = be16toh(desc->rx_queue_entries);
533 	priv->rx_pages_per_qpl = be16toh(desc->rx_pages_per_qpl);
534 	priv->max_registered_pages = be64toh(desc->max_registered_pages);
535 	priv->max_mtu = be16toh(desc->mtu);
536 	priv->default_num_queues = be16toh(desc->default_num_queues);
537 	priv->supported_features =  supported_features_mask;
538 
539 	/* Default max to current in case modify ring size option is disabled */
540 	priv->max_rx_desc_cnt = priv->rx_desc_cnt;
541 	priv->max_tx_desc_cnt = priv->tx_desc_cnt;
542 
543 	gve_enable_supported_features(priv, supported_features_mask,
544 	    dev_op_modify_ring, dev_op_jumbo_frames);
545 
546 	for (i = 0; i < ETHER_ADDR_LEN; i++)
547 		priv->mac[i] = desc->mac[i];
548 
549 free_device_descriptor:
550 	gve_dma_free_coherent(&desc_mem);
551 
552 	return (rc);
553 }
554 
555 int
556 gve_adminq_register_page_list(struct gve_priv *priv,
557     struct gve_queue_page_list *qpl)
558 {
559 	struct gve_adminq_command cmd = (struct gve_adminq_command){};
560 	uint32_t num_entries = qpl->num_pages;
561 	uint32_t size = num_entries * sizeof(qpl->dmas[0].bus_addr);
562 	__be64 *page_list;
563 	struct gve_dma_handle dma;
564 	int err;
565 	int i;
566 
567 	err = gve_dma_alloc_coherent(priv, size, PAGE_SIZE, &dma);
568 	if (err != 0)
569 		return (ENOMEM);
570 
571 	page_list = dma.cpu_addr;
572 
573 	for (i = 0; i < num_entries; i++)
574 		page_list[i] = htobe64(qpl->dmas[i].bus_addr);
575 
576 	bus_dmamap_sync(dma.tag, dma.map, BUS_DMASYNC_PREWRITE);
577 
578 	cmd.opcode = htobe32(GVE_ADMINQ_REGISTER_PAGE_LIST);
579 	cmd.reg_page_list = (struct gve_adminq_register_page_list) {
580 		.page_list_id = htobe32(qpl->id),
581 		.num_pages = htobe32(num_entries),
582 		.page_address_list_addr = htobe64(dma.bus_addr),
583 		.page_size = htobe64(PAGE_SIZE),
584 	};
585 
586 	err = gve_adminq_execute_cmd(priv, &cmd);
587 	gve_dma_free_coherent(&dma);
588 	return (err);
589 }
590 
591 int
592 gve_adminq_unregister_page_list(struct gve_priv *priv, uint32_t page_list_id)
593 {
594 	struct gve_adminq_command cmd = (struct gve_adminq_command){};
595 
596 	cmd.opcode = htobe32(GVE_ADMINQ_UNREGISTER_PAGE_LIST);
597 	cmd.unreg_page_list = (struct gve_adminq_unregister_page_list) {
598 		.page_list_id = htobe32(page_list_id),
599 	};
600 
601 	return (gve_adminq_execute_cmd(priv, &cmd));
602 }
603 
604 #define GVE_NTFY_BLK_BASE_MSIX_IDX	0
605 int
606 gve_adminq_configure_device_resources(struct gve_priv *priv)
607 {
608 	struct gve_adminq_command aq_cmd = (struct gve_adminq_command){};
609 
610 	bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map,
611 	    BUS_DMASYNC_PREREAD);
612 	bus_dmamap_sync(priv->counter_array_mem.tag,
613 	    priv->counter_array_mem.map, BUS_DMASYNC_PREREAD);
614 
615 	aq_cmd.opcode = htobe32(GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES);
616 	aq_cmd.configure_device_resources =
617 	    (struct gve_adminq_configure_device_resources) {
618 		.counter_array = htobe64(priv->counter_array_mem.bus_addr),
619 		.irq_db_addr = htobe64(priv->irqs_db_mem.bus_addr),
620 		.num_counters = htobe32(priv->num_event_counters),
621 		.num_irq_dbs = htobe32(priv->num_queues),
622 		.irq_db_stride = htobe32(sizeof(struct gve_irq_db)),
623 		.ntfy_blk_msix_base_idx = htobe32(GVE_NTFY_BLK_BASE_MSIX_IDX),
624 		.queue_format = priv->queue_format,
625 	};
626 
627 	return (gve_adminq_execute_cmd(priv, &aq_cmd));
628 }
629 
630 int
631 gve_adminq_deconfigure_device_resources(struct gve_priv *priv)
632 {
633 	struct gve_adminq_command aq_cmd = (struct gve_adminq_command){};
634 
635 	aq_cmd.opcode = htobe32(GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES);
636 	return (gve_adminq_execute_cmd(priv, &aq_cmd));
637 }
638 
639 int
640 gve_adminq_verify_driver_compatibility(struct gve_priv *priv,
641     uint64_t driver_info_len,
642     vm_paddr_t driver_info_addr)
643 {
644 	struct gve_adminq_command aq_cmd = (struct gve_adminq_command){};
645 
646 	aq_cmd.opcode = htobe32(GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY);
647 	aq_cmd.verify_driver_compatibility = (struct gve_adminq_verify_driver_compatibility) {
648 		.driver_info_len = htobe64(driver_info_len),
649 		.driver_info_addr = htobe64(driver_info_addr),
650 	};
651 
652 	return (gve_adminq_execute_cmd(priv, &aq_cmd));
653 }
654 
655 int
656 gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
657     struct gve_ptype_lut *ptype_lut_dqo)
658 {
659 	struct gve_adminq_command aq_cmd = (struct gve_adminq_command){};
660 	struct gve_ptype_map *ptype_map;
661 	struct gve_dma_handle dma;
662 	int err = 0;
663 	int i;
664 
665 	err = gve_dma_alloc_coherent(priv, sizeof(*ptype_map), PAGE_SIZE, &dma);
666 	if (err)
667 		return (err);
668 	ptype_map = dma.cpu_addr;
669 
670 	aq_cmd.opcode = htobe32(GVE_ADMINQ_GET_PTYPE_MAP);
671 	aq_cmd.get_ptype_map = (struct gve_adminq_get_ptype_map) {
672 		.ptype_map_len = htobe64(sizeof(*ptype_map)),
673 		.ptype_map_addr = htobe64(dma.bus_addr),
674 	};
675 
676 	err = gve_adminq_execute_cmd(priv, &aq_cmd);
677 	if (err)
678 		goto err;
679 
680 	/* Populate ptype_lut_dqo. */
681 	for (i = 0; i < GVE_NUM_PTYPES; i++) {
682 		ptype_lut_dqo->ptypes[i].l3_type = ptype_map->ptypes[i].l3_type;
683 		ptype_lut_dqo->ptypes[i].l4_type = ptype_map->ptypes[i].l4_type;
684 	}
685 err:
686 	gve_dma_free_coherent(&dma);
687 	return (err);
688 }
689 
690 int
691 gve_adminq_alloc(struct gve_priv *priv)
692 {
693 	int rc;
694 
695 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_ADMINQ_OK))
696 		return (0);
697 
698 	if (priv->aq_mem.cpu_addr == NULL) {
699 		rc = gve_dma_alloc_coherent(priv, ADMINQ_SIZE, ADMINQ_SIZE,
700 		    &priv->aq_mem);
701 		if (rc != 0) {
702 			device_printf(priv->dev, "Failed to allocate admin queue mem\n");
703 			return (rc);
704 		}
705 	}
706 
707 	priv->adminq = priv->aq_mem.cpu_addr;
708 	priv->adminq_bus_addr = priv->aq_mem.bus_addr;
709 
710 	if (priv->adminq == NULL)
711 		return (ENOMEM);
712 
713 	priv->adminq_mask = ADMINQ_SLOTS - 1;
714 	priv->adminq_prod_cnt = 0;
715 	priv->adminq_cmd_fail = 0;
716 	priv->adminq_timeouts = 0;
717 	priv->adminq_describe_device_cnt = 0;
718 	priv->adminq_cfg_device_resources_cnt = 0;
719 	priv->adminq_register_page_list_cnt = 0;
720 	priv->adminq_unregister_page_list_cnt = 0;
721 	priv->adminq_create_tx_queue_cnt = 0;
722 	priv->adminq_create_rx_queue_cnt = 0;
723 	priv->adminq_destroy_tx_queue_cnt = 0;
724 	priv->adminq_destroy_rx_queue_cnt = 0;
725 	priv->adminq_dcfg_device_resources_cnt = 0;
726 	priv->adminq_set_driver_parameter_cnt = 0;
727 	priv->adminq_get_ptype_map_cnt = 0;
728 
729 	gve_reg_bar_write_4(priv, GVE_REG_ADMINQ_ADDR,
730 	    priv->adminq_bus_addr / ADMINQ_SIZE);
731 
732 	gve_set_state_flag(priv, GVE_STATE_FLAG_ADMINQ_OK);
733 	return (0);
734 }
735 
736 void
737 gve_release_adminq(struct gve_priv *priv)
738 {
739 	if (!gve_get_state_flag(priv, GVE_STATE_FLAG_ADMINQ_OK))
740 		return;
741 
742 	gve_reg_bar_write_4(priv, GVE_REG_ADMINQ_ADDR, 0);
743 	while (gve_reg_bar_read_4(priv, GVE_REG_ADMINQ_ADDR)) {
744 		device_printf(priv->dev, "Waiting until admin queue is released.\n");
745 		pause("gve release adminq", GVE_ADMINQ_SLEEP_LEN_MS);
746 	}
747 
748 	gve_dma_free_coherent(&priv->aq_mem);
749 	priv->aq_mem = (struct gve_dma_handle){};
750 	priv->adminq = 0;
751 	priv->adminq_bus_addr = 0;
752 
753 	gve_clear_state_flag(priv, GVE_STATE_FLAG_ADMINQ_OK);
754 
755 	if (bootverbose)
756 		device_printf(priv->dev, "Admin queue released\n");
757 }
758 
759 static int
760 gve_adminq_parse_err(struct gve_priv *priv, uint32_t opcode, uint32_t status)
761 {
762 	if (status != GVE_ADMINQ_COMMAND_PASSED &&
763 	    status != GVE_ADMINQ_COMMAND_UNSET) {
764 		device_printf(priv->dev, "AQ command(%u): failed with status %d\n", opcode, status);
765 		priv->adminq_cmd_fail++;
766 	}
767 	switch (status) {
768 	case GVE_ADMINQ_COMMAND_PASSED:
769 		return (0);
770 
771 	case GVE_ADMINQ_COMMAND_UNSET:
772 		device_printf(priv->dev,
773 		    "AQ command(%u): err and status both unset, this should not be possible.\n",
774 		    opcode);
775 		return (EINVAL);
776 
777 	case GVE_ADMINQ_COMMAND_ERROR_ABORTED:
778 	case GVE_ADMINQ_COMMAND_ERROR_CANCELLED:
779 	case GVE_ADMINQ_COMMAND_ERROR_DATALOSS:
780 	case GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION:
781 	case GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE:
782 		return (EAGAIN);
783 
784 	case GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS:
785 	case GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR:
786 	case GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT:
787 	case GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND:
788 	case GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE:
789 	case GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR:
790 		return (EINVAL);
791 
792 	case GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED:
793 		return (ETIMEDOUT);
794 
795 	case GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED:
796 	case GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED:
797 		return (EACCES);
798 
799 	case GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED:
800 		return (ENOMEM);
801 
802 	case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED:
803 		return (EOPNOTSUPP);
804 
805 	default:
806 		device_printf(priv->dev, "AQ command(%u): unknown status code %d\n",
807 		    opcode, status);
808 		return (EINVAL);
809 	}
810 }
811 
812 static void
813 gve_adminq_kick_cmd(struct gve_priv *priv, uint32_t prod_cnt)
814 {
815 	gve_reg_bar_write_4(priv, ADMINQ_DOORBELL, prod_cnt);
816 
817 }
818 
819 static bool
820 gve_adminq_wait_for_cmd(struct gve_priv *priv, uint32_t prod_cnt)
821 {
822 	int i;
823 
824 	for (i = 0; i < GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK; i++) {
825 		if (gve_reg_bar_read_4(priv, ADMINQ_EVENT_COUNTER) == prod_cnt)
826 			return (true);
827 		pause("gve adminq cmd", GVE_ADMINQ_SLEEP_LEN_MS);
828 	}
829 
830 	return (false);
831 }
832 
833 /*
834  * Flushes all AQ commands currently queued and waits for them to complete.
835  * If there are failures, it will return the first error.
836  */
837 static int
838 gve_adminq_kick_and_wait(struct gve_priv *priv)
839 {
840 	struct gve_adminq_command *cmd;
841 	uint32_t status, err;
842 	uint32_t tail, head;
843 	uint32_t opcode;
844 	int i;
845 
846 	tail = gve_reg_bar_read_4(priv, ADMINQ_EVENT_COUNTER);
847 	head = priv->adminq_prod_cnt;
848 
849 	gve_adminq_kick_cmd(priv, head);
850 	if (!gve_adminq_wait_for_cmd(priv, head)) {
851 		device_printf(priv->dev, "AQ commands timed out, need to reset AQ\n");
852 		priv->adminq_timeouts++;
853 		return (ENOTRECOVERABLE);
854 	}
855 	bus_dmamap_sync(
856 	    priv->aq_mem.tag, priv->aq_mem.map, BUS_DMASYNC_POSTREAD);
857 
858 	for (i = tail; i < head; i++) {
859 		cmd = &priv->adminq[i & priv->adminq_mask];
860 		status = be32toh(cmd->status);
861 		opcode = be32toh(cmd->opcode);
862 		err = gve_adminq_parse_err(priv, opcode, status);
863 		if (err != 0)
864 			return (err);
865 	}
866 
867 	return (0);
868 }
869 
870 /*
871  * This function is not threadsafe - the caller is responsible for any
872  * necessary locks.
873  */
874 static int
875 gve_adminq_issue_cmd(struct gve_priv *priv, struct gve_adminq_command *cmd_orig)
876 {
877 	struct gve_adminq_command *cmd;
878 	uint32_t opcode;
879 	uint32_t tail;
880 	int err;
881 
882 	tail = gve_reg_bar_read_4(priv, ADMINQ_EVENT_COUNTER);
883 
884 	/* Check if next command will overflow the buffer. */
885 	if ((priv->adminq_prod_cnt - tail) > priv->adminq_mask) {
886 		/* Flush existing commands to make room. */
887 		err = gve_adminq_kick_and_wait(priv);
888 		if (err != 0)
889 			return (err);
890 
891 		/* Retry. */
892 		tail = gve_reg_bar_read_4(priv, ADMINQ_EVENT_COUNTER);
893 		if ((priv->adminq_prod_cnt - tail) > priv->adminq_mask) {
894 			/*
895 			 * This should never happen. We just flushed the
896 			 * command queue so there should be enough space.
897                          */
898 			return (ENOMEM);
899 		}
900 	}
901 
902 	cmd = &priv->adminq[priv->adminq_prod_cnt & priv->adminq_mask];
903 	priv->adminq_prod_cnt++;
904 
905 	memcpy(cmd, cmd_orig, sizeof(*cmd_orig));
906 
907 	bus_dmamap_sync(
908 	    priv->aq_mem.tag, priv->aq_mem.map, BUS_DMASYNC_PREWRITE);
909 
910 	opcode = be32toh(cmd->opcode);
911 
912 	switch (opcode) {
913 	case GVE_ADMINQ_DESCRIBE_DEVICE:
914 		priv->adminq_describe_device_cnt++;
915 		break;
916 
917 	case GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES:
918 		priv->adminq_cfg_device_resources_cnt++;
919 		break;
920 
921 	case GVE_ADMINQ_REGISTER_PAGE_LIST:
922 		priv->adminq_register_page_list_cnt++;
923 		break;
924 
925 	case GVE_ADMINQ_UNREGISTER_PAGE_LIST:
926 		priv->adminq_unregister_page_list_cnt++;
927 		break;
928 
929 	case GVE_ADMINQ_CREATE_TX_QUEUE:
930 		priv->adminq_create_tx_queue_cnt++;
931 		break;
932 
933 	case GVE_ADMINQ_CREATE_RX_QUEUE:
934 		priv->adminq_create_rx_queue_cnt++;
935 		break;
936 
937 	case GVE_ADMINQ_DESTROY_TX_QUEUE:
938 		priv->adminq_destroy_tx_queue_cnt++;
939 		break;
940 
941 	case GVE_ADMINQ_DESTROY_RX_QUEUE:
942 		priv->adminq_destroy_rx_queue_cnt++;
943 		break;
944 
945 	case GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES:
946 		priv->adminq_dcfg_device_resources_cnt++;
947 		break;
948 
949 	case GVE_ADMINQ_SET_DRIVER_PARAMETER:
950 		priv->adminq_set_driver_parameter_cnt++;
951 		break;
952 
953 	case GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY:
954 		priv->adminq_verify_driver_compatibility_cnt++;
955 		break;
956 
957 	case GVE_ADMINQ_GET_PTYPE_MAP:
958 		priv->adminq_get_ptype_map_cnt++;
959 		break;
960 
961 	default:
962 		device_printf(priv->dev, "Unknown AQ command opcode %d\n", opcode);
963 	}
964 
965 	return (0);
966 }
967 
968 /*
969  * This function is not threadsafe - the caller is responsible for any
970  * necessary locks.
971  * The caller is also responsible for making sure there are no commands
972  * waiting to be executed.
973  */
974 static int
975 gve_adminq_execute_cmd(struct gve_priv *priv, struct gve_adminq_command *cmd_orig)
976 {
977 	uint32_t tail, head;
978 	int err;
979 
980 	tail = gve_reg_bar_read_4(priv, ADMINQ_EVENT_COUNTER);
981 	head = priv->adminq_prod_cnt;
982 
983 	if (tail != head)
984 		return (EINVAL);
985 	err = gve_adminq_issue_cmd(priv, cmd_orig);
986 	if (err != 0)
987 		return (err);
988 	return (gve_adminq_kick_and_wait(priv));
989 }
990