xref: /linux/drivers/net/ethernet/google/gve/gve_adminq.c (revision 55d0969c451159cff86949b38c39171cab962069)
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6 
7 #include <linux/etherdevice.h>
8 #include <linux/pci.h>
9 #include "gve.h"
10 #include "gve_adminq.h"
11 #include "gve_register.h"
12 
13 #define GVE_MAX_ADMINQ_RELEASE_CHECK	500
14 #define GVE_ADMINQ_SLEEP_LEN		20
15 #define GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK	100
16 
17 #define GVE_DEVICE_OPTION_ERROR_FMT "%s option error:\n" \
18 "Expected: length=%d, feature_mask=%x.\n" \
19 "Actual: length=%d, feature_mask=%x.\n"
20 
21 #define GVE_DEVICE_OPTION_TOO_BIG_FMT "Length of %s option larger than expected. Possible older version of guest driver.\n"
22 
23 static
24 struct gve_device_option *gve_get_next_option(struct gve_device_descriptor *descriptor,
25 					      struct gve_device_option *option)
26 {
27 	void *option_end, *descriptor_end;
28 
29 	option_end = (void *)(option + 1) + be16_to_cpu(option->option_length);
30 	descriptor_end = (void *)descriptor + be16_to_cpu(descriptor->total_length);
31 
32 	return option_end > descriptor_end ? NULL : (struct gve_device_option *)option_end;
33 }
34 
35 #define GVE_DEVICE_OPTION_NO_MIN_RING_SIZE	8
36 
37 static
38 void gve_parse_device_option(struct gve_priv *priv,
39 			     struct gve_device_descriptor *device_descriptor,
40 			     struct gve_device_option *option,
41 			     struct gve_device_option_gqi_rda **dev_op_gqi_rda,
42 			     struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
43 			     struct gve_device_option_dqo_rda **dev_op_dqo_rda,
44 			     struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
45 			     struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
46 			     struct gve_device_option_buffer_sizes **dev_op_buffer_sizes,
47 			     struct gve_device_option_flow_steering **dev_op_flow_steering,
48 			     struct gve_device_option_rss_config **dev_op_rss_config,
49 			     struct gve_device_option_modify_ring **dev_op_modify_ring)
50 {
51 	u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
52 	u16 option_length = be16_to_cpu(option->option_length);
53 	u16 option_id = be16_to_cpu(option->option_id);
54 
55 	/* If the length or feature mask doesn't match, continue without
56 	 * enabling the feature.
57 	 */
58 	switch (option_id) {
59 	case GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING:
60 		if (option_length != GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING ||
61 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING) {
62 			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
63 				 "Raw Addressing",
64 				 GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING,
65 				 GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING,
66 				 option_length, req_feat_mask);
67 			break;
68 		}
69 
70 		dev_info(&priv->pdev->dev,
71 			 "Gqi raw addressing device option enabled.\n");
72 		priv->queue_format = GVE_GQI_RDA_FORMAT;
73 		break;
74 	case GVE_DEV_OPT_ID_GQI_RDA:
75 		if (option_length < sizeof(**dev_op_gqi_rda) ||
76 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA) {
77 			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
78 				 "GQI RDA", (int)sizeof(**dev_op_gqi_rda),
79 				 GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA,
80 				 option_length, req_feat_mask);
81 			break;
82 		}
83 
84 		if (option_length > sizeof(**dev_op_gqi_rda)) {
85 			dev_warn(&priv->pdev->dev,
86 				 GVE_DEVICE_OPTION_TOO_BIG_FMT, "GQI RDA");
87 		}
88 		*dev_op_gqi_rda = (void *)(option + 1);
89 		break;
90 	case GVE_DEV_OPT_ID_GQI_QPL:
91 		if (option_length < sizeof(**dev_op_gqi_qpl) ||
92 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL) {
93 			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
94 				 "GQI QPL", (int)sizeof(**dev_op_gqi_qpl),
95 				 GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL,
96 				 option_length, req_feat_mask);
97 			break;
98 		}
99 
100 		if (option_length > sizeof(**dev_op_gqi_qpl)) {
101 			dev_warn(&priv->pdev->dev,
102 				 GVE_DEVICE_OPTION_TOO_BIG_FMT, "GQI QPL");
103 		}
104 		*dev_op_gqi_qpl = (void *)(option + 1);
105 		break;
106 	case GVE_DEV_OPT_ID_DQO_RDA:
107 		if (option_length < sizeof(**dev_op_dqo_rda) ||
108 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA) {
109 			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
110 				 "DQO RDA", (int)sizeof(**dev_op_dqo_rda),
111 				 GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA,
112 				 option_length, req_feat_mask);
113 			break;
114 		}
115 
116 		if (option_length > sizeof(**dev_op_dqo_rda)) {
117 			dev_warn(&priv->pdev->dev,
118 				 GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO RDA");
119 		}
120 		*dev_op_dqo_rda = (void *)(option + 1);
121 		break;
122 	case GVE_DEV_OPT_ID_DQO_QPL:
123 		if (option_length < sizeof(**dev_op_dqo_qpl) ||
124 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL) {
125 			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
126 				 "DQO QPL", (int)sizeof(**dev_op_dqo_qpl),
127 				 GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL,
128 				 option_length, req_feat_mask);
129 			break;
130 		}
131 
132 		if (option_length > sizeof(**dev_op_dqo_qpl)) {
133 			dev_warn(&priv->pdev->dev,
134 				 GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO QPL");
135 		}
136 		*dev_op_dqo_qpl = (void *)(option + 1);
137 		break;
138 	case GVE_DEV_OPT_ID_JUMBO_FRAMES:
139 		if (option_length < sizeof(**dev_op_jumbo_frames) ||
140 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) {
141 			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
142 				 "Jumbo Frames",
143 				 (int)sizeof(**dev_op_jumbo_frames),
144 				 GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES,
145 				 option_length, req_feat_mask);
146 			break;
147 		}
148 
149 		if (option_length > sizeof(**dev_op_jumbo_frames)) {
150 			dev_warn(&priv->pdev->dev,
151 				 GVE_DEVICE_OPTION_TOO_BIG_FMT,
152 				 "Jumbo Frames");
153 		}
154 		*dev_op_jumbo_frames = (void *)(option + 1);
155 		break;
156 	case GVE_DEV_OPT_ID_BUFFER_SIZES:
157 		if (option_length < sizeof(**dev_op_buffer_sizes) ||
158 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES) {
159 			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
160 				 "Buffer Sizes",
161 				 (int)sizeof(**dev_op_buffer_sizes),
162 				 GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES,
163 				 option_length, req_feat_mask);
164 			break;
165 		}
166 
167 		if (option_length > sizeof(**dev_op_buffer_sizes))
168 			dev_warn(&priv->pdev->dev,
169 				 GVE_DEVICE_OPTION_TOO_BIG_FMT,
170 				 "Buffer Sizes");
171 		*dev_op_buffer_sizes = (void *)(option + 1);
172 		break;
173 	case GVE_DEV_OPT_ID_MODIFY_RING:
174 		if (option_length < GVE_DEVICE_OPTION_NO_MIN_RING_SIZE ||
175 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) {
176 			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
177 				 "Modify Ring", (int)sizeof(**dev_op_modify_ring),
178 				 GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING,
179 				 option_length, req_feat_mask);
180 			break;
181 		}
182 
183 		if (option_length > sizeof(**dev_op_modify_ring)) {
184 			dev_warn(&priv->pdev->dev,
185 				 GVE_DEVICE_OPTION_TOO_BIG_FMT, "Modify Ring");
186 		}
187 
188 		*dev_op_modify_ring = (void *)(option + 1);
189 
190 		/* device has not provided min ring size */
191 		if (option_length == GVE_DEVICE_OPTION_NO_MIN_RING_SIZE)
192 			priv->default_min_ring_size = true;
193 		break;
194 	case GVE_DEV_OPT_ID_FLOW_STEERING:
195 		if (option_length < sizeof(**dev_op_flow_steering) ||
196 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING) {
197 			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
198 				 "Flow Steering",
199 				 (int)sizeof(**dev_op_flow_steering),
200 				 GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING,
201 				 option_length, req_feat_mask);
202 			break;
203 		}
204 
205 		if (option_length > sizeof(**dev_op_flow_steering))
206 			dev_warn(&priv->pdev->dev,
207 				 GVE_DEVICE_OPTION_TOO_BIG_FMT,
208 				 "Flow Steering");
209 		*dev_op_flow_steering = (void *)(option + 1);
210 		break;
211 	case GVE_DEV_OPT_ID_RSS_CONFIG:
212 		if (option_length < sizeof(**dev_op_rss_config) ||
213 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG) {
214 			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
215 				 "RSS config",
216 				 (int)sizeof(**dev_op_rss_config),
217 				 GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG,
218 				 option_length, req_feat_mask);
219 			break;
220 		}
221 
222 		if (option_length > sizeof(**dev_op_rss_config))
223 			dev_warn(&priv->pdev->dev,
224 				 GVE_DEVICE_OPTION_TOO_BIG_FMT,
225 				 "RSS config");
226 		*dev_op_rss_config = (void *)(option + 1);
227 		break;
228 	default:
229 		/* If we don't recognize the option just continue
230 		 * without doing anything.
231 		 */
232 		dev_dbg(&priv->pdev->dev, "Unrecognized device option 0x%hx not enabled.\n",
233 			option_id);
234 	}
235 }
236 
237 /* Process all device options for a given describe device call. */
238 static int
239 gve_process_device_options(struct gve_priv *priv,
240 			   struct gve_device_descriptor *descriptor,
241 			   struct gve_device_option_gqi_rda **dev_op_gqi_rda,
242 			   struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
243 			   struct gve_device_option_dqo_rda **dev_op_dqo_rda,
244 			   struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
245 			   struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
246 			   struct gve_device_option_buffer_sizes **dev_op_buffer_sizes,
247 			   struct gve_device_option_flow_steering **dev_op_flow_steering,
248 			   struct gve_device_option_rss_config **dev_op_rss_config,
249 			   struct gve_device_option_modify_ring **dev_op_modify_ring)
250 {
251 	const int num_options = be16_to_cpu(descriptor->num_device_options);
252 	struct gve_device_option *dev_opt;
253 	int i;
254 
255 	/* The options struct directly follows the device descriptor. */
256 	dev_opt = (void *)(descriptor + 1);
257 	for (i = 0; i < num_options; i++) {
258 		struct gve_device_option *next_opt;
259 
260 		next_opt = gve_get_next_option(descriptor, dev_opt);
261 		if (!next_opt) {
262 			dev_err(&priv->dev->dev,
263 				"options exceed device_descriptor's total length.\n");
264 			return -EINVAL;
265 		}
266 
267 		gve_parse_device_option(priv, descriptor, dev_opt,
268 					dev_op_gqi_rda, dev_op_gqi_qpl,
269 					dev_op_dqo_rda, dev_op_jumbo_frames,
270 					dev_op_dqo_qpl, dev_op_buffer_sizes,
271 					dev_op_flow_steering, dev_op_rss_config,
272 					dev_op_modify_ring);
273 		dev_opt = next_opt;
274 	}
275 
276 	return 0;
277 }
278 
279 int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
280 {
281 	priv->adminq_pool = dma_pool_create("adminq_pool", dev,
282 					    GVE_ADMINQ_BUFFER_SIZE, 0, 0);
283 	if (unlikely(!priv->adminq_pool))
284 		return -ENOMEM;
285 	priv->adminq = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL,
286 				      &priv->adminq_bus_addr);
287 	if (unlikely(!priv->adminq)) {
288 		dma_pool_destroy(priv->adminq_pool);
289 		return -ENOMEM;
290 	}
291 
292 	priv->adminq_mask =
293 		(GVE_ADMINQ_BUFFER_SIZE / sizeof(union gve_adminq_command)) - 1;
294 	priv->adminq_prod_cnt = 0;
295 	priv->adminq_cmd_fail = 0;
296 	priv->adminq_timeouts = 0;
297 	priv->adminq_describe_device_cnt = 0;
298 	priv->adminq_cfg_device_resources_cnt = 0;
299 	priv->adminq_register_page_list_cnt = 0;
300 	priv->adminq_unregister_page_list_cnt = 0;
301 	priv->adminq_create_tx_queue_cnt = 0;
302 	priv->adminq_create_rx_queue_cnt = 0;
303 	priv->adminq_destroy_tx_queue_cnt = 0;
304 	priv->adminq_destroy_rx_queue_cnt = 0;
305 	priv->adminq_dcfg_device_resources_cnt = 0;
306 	priv->adminq_set_driver_parameter_cnt = 0;
307 	priv->adminq_report_stats_cnt = 0;
308 	priv->adminq_report_link_speed_cnt = 0;
309 	priv->adminq_get_ptype_map_cnt = 0;
310 	priv->adminq_query_flow_rules_cnt = 0;
311 	priv->adminq_cfg_flow_rule_cnt = 0;
312 	priv->adminq_cfg_rss_cnt = 0;
313 	priv->adminq_query_rss_cnt = 0;
314 
315 	/* Setup Admin queue with the device */
316 	if (priv->pdev->revision < 0x1) {
317 		iowrite32be(priv->adminq_bus_addr / PAGE_SIZE,
318 			    &priv->reg_bar0->adminq_pfn);
319 	} else {
320 		iowrite16be(GVE_ADMINQ_BUFFER_SIZE,
321 			    &priv->reg_bar0->adminq_length);
322 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
323 		iowrite32be(priv->adminq_bus_addr >> 32,
324 			    &priv->reg_bar0->adminq_base_address_hi);
325 #endif
326 		iowrite32be(priv->adminq_bus_addr,
327 			    &priv->reg_bar0->adminq_base_address_lo);
328 		iowrite32be(GVE_DRIVER_STATUS_RUN_MASK, &priv->reg_bar0->driver_status);
329 	}
330 	mutex_init(&priv->adminq_lock);
331 	gve_set_admin_queue_ok(priv);
332 	return 0;
333 }
334 
335 void gve_adminq_release(struct gve_priv *priv)
336 {
337 	int i = 0;
338 
339 	/* Tell the device the adminq is leaving */
340 	if (priv->pdev->revision < 0x1) {
341 		iowrite32be(0x0, &priv->reg_bar0->adminq_pfn);
342 		while (ioread32be(&priv->reg_bar0->adminq_pfn)) {
343 			/* If this is reached the device is unrecoverable and still
344 			 * holding memory. Continue looping to avoid memory corruption,
345 			 * but WARN so it is visible what is going on.
346 			 */
347 			if (i == GVE_MAX_ADMINQ_RELEASE_CHECK)
348 				WARN(1, "Unrecoverable platform error!");
349 			i++;
350 			msleep(GVE_ADMINQ_SLEEP_LEN);
351 		}
352 	} else {
353 		iowrite32be(GVE_DRIVER_STATUS_RESET_MASK, &priv->reg_bar0->driver_status);
354 		while (!(ioread32be(&priv->reg_bar0->device_status)
355 				& GVE_DEVICE_STATUS_DEVICE_IS_RESET)) {
356 			if (i == GVE_MAX_ADMINQ_RELEASE_CHECK)
357 				WARN(1, "Unrecoverable platform error!");
358 			i++;
359 			msleep(GVE_ADMINQ_SLEEP_LEN);
360 		}
361 	}
362 	gve_clear_device_rings_ok(priv);
363 	gve_clear_device_resources_ok(priv);
364 	gve_clear_admin_queue_ok(priv);
365 }
366 
367 void gve_adminq_free(struct device *dev, struct gve_priv *priv)
368 {
369 	if (!gve_get_admin_queue_ok(priv))
370 		return;
371 	gve_adminq_release(priv);
372 	dma_pool_free(priv->adminq_pool, priv->adminq, priv->adminq_bus_addr);
373 	dma_pool_destroy(priv->adminq_pool);
374 	gve_clear_admin_queue_ok(priv);
375 }
376 
377 static void gve_adminq_kick_cmd(struct gve_priv *priv, u32 prod_cnt)
378 {
379 	iowrite32be(prod_cnt, &priv->reg_bar0->adminq_doorbell);
380 }
381 
382 static bool gve_adminq_wait_for_cmd(struct gve_priv *priv, u32 prod_cnt)
383 {
384 	int i;
385 
386 	for (i = 0; i < GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK; i++) {
387 		if (ioread32be(&priv->reg_bar0->adminq_event_counter)
388 		    == prod_cnt)
389 			return true;
390 		msleep(GVE_ADMINQ_SLEEP_LEN);
391 	}
392 
393 	return false;
394 }
395 
396 static int gve_adminq_parse_err(struct gve_priv *priv, u32 status)
397 {
398 	if (status != GVE_ADMINQ_COMMAND_PASSED &&
399 	    status != GVE_ADMINQ_COMMAND_UNSET) {
400 		dev_err(&priv->pdev->dev, "AQ command failed with status %d\n", status);
401 		priv->adminq_cmd_fail++;
402 	}
403 	switch (status) {
404 	case GVE_ADMINQ_COMMAND_PASSED:
405 		return 0;
406 	case GVE_ADMINQ_COMMAND_UNSET:
407 		dev_err(&priv->pdev->dev, "parse_aq_err: err and status both unset, this should not be possible.\n");
408 		return -EINVAL;
409 	case GVE_ADMINQ_COMMAND_ERROR_ABORTED:
410 	case GVE_ADMINQ_COMMAND_ERROR_CANCELLED:
411 	case GVE_ADMINQ_COMMAND_ERROR_DATALOSS:
412 	case GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION:
413 	case GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE:
414 		return -EAGAIN;
415 	case GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS:
416 	case GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR:
417 	case GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT:
418 	case GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND:
419 	case GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE:
420 	case GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR:
421 		return -EINVAL;
422 	case GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED:
423 		return -ETIME;
424 	case GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED:
425 	case GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED:
426 		return -EACCES;
427 	case GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED:
428 		return -ENOMEM;
429 	case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED:
430 		return -EOPNOTSUPP;
431 	default:
432 		dev_err(&priv->pdev->dev, "parse_aq_err: unknown status code %d\n", status);
433 		return -EINVAL;
434 	}
435 }
436 
437 /* Flushes all AQ commands currently queued and waits for them to complete.
438  * If there are failures, it will return the first error.
439  */
440 static int gve_adminq_kick_and_wait(struct gve_priv *priv)
441 {
442 	int tail, head;
443 	int i;
444 
445 	tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
446 	head = priv->adminq_prod_cnt;
447 
448 	gve_adminq_kick_cmd(priv, head);
449 	if (!gve_adminq_wait_for_cmd(priv, head)) {
450 		dev_err(&priv->pdev->dev, "AQ commands timed out, need to reset AQ\n");
451 		priv->adminq_timeouts++;
452 		return -ENOTRECOVERABLE;
453 	}
454 
455 	for (i = tail; i < head; i++) {
456 		union gve_adminq_command *cmd;
457 		u32 status, err;
458 
459 		cmd = &priv->adminq[i & priv->adminq_mask];
460 		status = be32_to_cpu(READ_ONCE(cmd->status));
461 		err = gve_adminq_parse_err(priv, status);
462 		if (err)
463 			// Return the first error if we failed.
464 			return err;
465 	}
466 
467 	return 0;
468 }
469 
470 /* This function is not threadsafe - the caller is responsible for any
471  * necessary locks.
472  */
473 static int gve_adminq_issue_cmd(struct gve_priv *priv,
474 				union gve_adminq_command *cmd_orig)
475 {
476 	union gve_adminq_command *cmd;
477 	u32 opcode;
478 	u32 tail;
479 
480 	tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
481 
482 	// Check if next command will overflow the buffer.
483 	if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) ==
484 	    (tail & priv->adminq_mask)) {
485 		int err;
486 
487 		// Flush existing commands to make room.
488 		err = gve_adminq_kick_and_wait(priv);
489 		if (err)
490 			return err;
491 
492 		// Retry.
493 		tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
494 		if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) ==
495 		    (tail & priv->adminq_mask)) {
496 			// This should never happen. We just flushed the
497 			// command queue so there should be enough space.
498 			return -ENOMEM;
499 		}
500 	}
501 
502 	cmd = &priv->adminq[priv->adminq_prod_cnt & priv->adminq_mask];
503 	priv->adminq_prod_cnt++;
504 
505 	memcpy(cmd, cmd_orig, sizeof(*cmd_orig));
506 	opcode = be32_to_cpu(READ_ONCE(cmd->opcode));
507 	if (opcode == GVE_ADMINQ_EXTENDED_COMMAND)
508 		opcode = be32_to_cpu(cmd->extended_command.inner_opcode);
509 
510 	switch (opcode) {
511 	case GVE_ADMINQ_DESCRIBE_DEVICE:
512 		priv->adminq_describe_device_cnt++;
513 		break;
514 	case GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES:
515 		priv->adminq_cfg_device_resources_cnt++;
516 		break;
517 	case GVE_ADMINQ_REGISTER_PAGE_LIST:
518 		priv->adminq_register_page_list_cnt++;
519 		break;
520 	case GVE_ADMINQ_UNREGISTER_PAGE_LIST:
521 		priv->adminq_unregister_page_list_cnt++;
522 		break;
523 	case GVE_ADMINQ_CREATE_TX_QUEUE:
524 		priv->adminq_create_tx_queue_cnt++;
525 		break;
526 	case GVE_ADMINQ_CREATE_RX_QUEUE:
527 		priv->adminq_create_rx_queue_cnt++;
528 		break;
529 	case GVE_ADMINQ_DESTROY_TX_QUEUE:
530 		priv->adminq_destroy_tx_queue_cnt++;
531 		break;
532 	case GVE_ADMINQ_DESTROY_RX_QUEUE:
533 		priv->adminq_destroy_rx_queue_cnt++;
534 		break;
535 	case GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES:
536 		priv->adminq_dcfg_device_resources_cnt++;
537 		break;
538 	case GVE_ADMINQ_SET_DRIVER_PARAMETER:
539 		priv->adminq_set_driver_parameter_cnt++;
540 		break;
541 	case GVE_ADMINQ_REPORT_STATS:
542 		priv->adminq_report_stats_cnt++;
543 		break;
544 	case GVE_ADMINQ_REPORT_LINK_SPEED:
545 		priv->adminq_report_link_speed_cnt++;
546 		break;
547 	case GVE_ADMINQ_GET_PTYPE_MAP:
548 		priv->adminq_get_ptype_map_cnt++;
549 		break;
550 	case GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY:
551 		priv->adminq_verify_driver_compatibility_cnt++;
552 		break;
553 	case GVE_ADMINQ_QUERY_FLOW_RULES:
554 		priv->adminq_query_flow_rules_cnt++;
555 		break;
556 	case GVE_ADMINQ_CONFIGURE_FLOW_RULE:
557 		priv->adminq_cfg_flow_rule_cnt++;
558 		break;
559 	case GVE_ADMINQ_CONFIGURE_RSS:
560 		priv->adminq_cfg_rss_cnt++;
561 		break;
562 	case GVE_ADMINQ_QUERY_RSS:
563 		priv->adminq_query_rss_cnt++;
564 		break;
565 	default:
566 		dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode);
567 	}
568 
569 	return 0;
570 }
571 
572 static int gve_adminq_execute_cmd(struct gve_priv *priv,
573 				  union gve_adminq_command *cmd_orig)
574 {
575 	u32 tail, head;
576 	int err;
577 
578 	mutex_lock(&priv->adminq_lock);
579 	tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
580 	head = priv->adminq_prod_cnt;
581 	if (tail != head) {
582 		err = -EINVAL;
583 		goto out;
584 	}
585 
586 	err = gve_adminq_issue_cmd(priv, cmd_orig);
587 	if (err)
588 		goto out;
589 
590 	err = gve_adminq_kick_and_wait(priv);
591 
592 out:
593 	mutex_unlock(&priv->adminq_lock);
594 	return err;
595 }
596 
597 static int gve_adminq_execute_extended_cmd(struct gve_priv *priv, u32 opcode,
598 					   size_t cmd_size, void *cmd_orig)
599 {
600 	union gve_adminq_command cmd;
601 	dma_addr_t inner_cmd_bus;
602 	void *inner_cmd;
603 	int err;
604 
605 	inner_cmd = dma_alloc_coherent(&priv->pdev->dev, cmd_size,
606 				       &inner_cmd_bus, GFP_KERNEL);
607 	if (!inner_cmd)
608 		return -ENOMEM;
609 
610 	memcpy(inner_cmd, cmd_orig, cmd_size);
611 
612 	memset(&cmd, 0, sizeof(cmd));
613 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_EXTENDED_COMMAND);
614 	cmd.extended_command = (struct gve_adminq_extended_command) {
615 		.inner_opcode = cpu_to_be32(opcode),
616 		.inner_length = cpu_to_be32(cmd_size),
617 		.inner_command_addr = cpu_to_be64(inner_cmd_bus),
618 	};
619 
620 	err = gve_adminq_execute_cmd(priv, &cmd);
621 
622 	dma_free_coherent(&priv->pdev->dev, cmd_size, inner_cmd, inner_cmd_bus);
623 	return err;
624 }
625 
626 /* The device specifies that the management vector can either be the first irq
627  * or the last irq. ntfy_blk_msix_base_idx indicates the first irq assigned to
628  * the ntfy blks. It if is 0 then the management vector is last, if it is 1 then
629  * the management vector is first.
630  *
631  * gve arranges the msix vectors so that the management vector is last.
632  */
633 #define GVE_NTFY_BLK_BASE_MSIX_IDX	0
634 int gve_adminq_configure_device_resources(struct gve_priv *priv,
635 					  dma_addr_t counter_array_bus_addr,
636 					  u32 num_counters,
637 					  dma_addr_t db_array_bus_addr,
638 					  u32 num_ntfy_blks)
639 {
640 	union gve_adminq_command cmd;
641 
642 	memset(&cmd, 0, sizeof(cmd));
643 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES);
644 	cmd.configure_device_resources =
645 		(struct gve_adminq_configure_device_resources) {
646 		.counter_array = cpu_to_be64(counter_array_bus_addr),
647 		.num_counters = cpu_to_be32(num_counters),
648 		.irq_db_addr = cpu_to_be64(db_array_bus_addr),
649 		.num_irq_dbs = cpu_to_be32(num_ntfy_blks),
650 		.irq_db_stride = cpu_to_be32(sizeof(*priv->irq_db_indices)),
651 		.ntfy_blk_msix_base_idx =
652 					cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX),
653 		.queue_format = priv->queue_format,
654 	};
655 
656 	return gve_adminq_execute_cmd(priv, &cmd);
657 }
658 
659 int gve_adminq_deconfigure_device_resources(struct gve_priv *priv)
660 {
661 	union gve_adminq_command cmd;
662 
663 	memset(&cmd, 0, sizeof(cmd));
664 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES);
665 
666 	return gve_adminq_execute_cmd(priv, &cmd);
667 }
668 
669 static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
670 {
671 	struct gve_tx_ring *tx = &priv->tx[queue_index];
672 	union gve_adminq_command cmd;
673 
674 	memset(&cmd, 0, sizeof(cmd));
675 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
676 	cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
677 		.queue_id = cpu_to_be32(queue_index),
678 		.queue_resources_addr =
679 			cpu_to_be64(tx->q_resources_bus),
680 		.tx_ring_addr = cpu_to_be64(tx->bus),
681 		.ntfy_id = cpu_to_be32(tx->ntfy_id),
682 		.tx_ring_size = cpu_to_be16(priv->tx_desc_cnt),
683 	};
684 
685 	if (gve_is_gqi(priv)) {
686 		u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
687 			GVE_RAW_ADDRESSING_QPL_ID : tx->tx_fifo.qpl->id;
688 
689 		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
690 	} else {
691 		u32 qpl_id = 0;
692 
693 		if (priv->queue_format == GVE_DQO_RDA_FORMAT)
694 			qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
695 		else
696 			qpl_id = tx->dqo.qpl->id;
697 		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
698 		cmd.create_tx_queue.tx_comp_ring_addr =
699 			cpu_to_be64(tx->complq_bus_dqo);
700 		cmd.create_tx_queue.tx_comp_ring_size =
701 			cpu_to_be16(priv->tx_desc_cnt);
702 	}
703 
704 	return gve_adminq_issue_cmd(priv, &cmd);
705 }
706 
707 int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues)
708 {
709 	int err;
710 	int i;
711 
712 	for (i = start_id; i < start_id + num_queues; i++) {
713 		err = gve_adminq_create_tx_queue(priv, i);
714 		if (err)
715 			return err;
716 	}
717 
718 	return gve_adminq_kick_and_wait(priv);
719 }
720 
721 static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv,
722 					       union gve_adminq_command *cmd,
723 					       u32 queue_index)
724 {
725 	struct gve_rx_ring *rx = &priv->rx[queue_index];
726 
727 	memset(cmd, 0, sizeof(*cmd));
728 	cmd->opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
729 	cmd->create_rx_queue = (struct gve_adminq_create_rx_queue) {
730 		.queue_id = cpu_to_be32(queue_index),
731 		.ntfy_id = cpu_to_be32(rx->ntfy_id),
732 		.queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
733 		.rx_ring_size = cpu_to_be16(priv->rx_desc_cnt),
734 	};
735 
736 	if (gve_is_gqi(priv)) {
737 		u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
738 			GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id;
739 
740 		cmd->create_rx_queue.rx_desc_ring_addr =
741 			cpu_to_be64(rx->desc.bus);
742 		cmd->create_rx_queue.rx_data_ring_addr =
743 			cpu_to_be64(rx->data.data_bus);
744 		cmd->create_rx_queue.index = cpu_to_be32(queue_index);
745 		cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
746 		cmd->create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size);
747 	} else {
748 		u32 qpl_id = 0;
749 
750 		if (priv->queue_format == GVE_DQO_RDA_FORMAT)
751 			qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
752 		else
753 			qpl_id = rx->dqo.qpl->id;
754 		cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
755 		cmd->create_rx_queue.rx_desc_ring_addr =
756 			cpu_to_be64(rx->dqo.complq.bus);
757 		cmd->create_rx_queue.rx_data_ring_addr =
758 			cpu_to_be64(rx->dqo.bufq.bus);
759 		cmd->create_rx_queue.packet_buffer_size =
760 			cpu_to_be16(priv->data_buffer_size_dqo);
761 		cmd->create_rx_queue.rx_buff_ring_size =
762 			cpu_to_be16(priv->rx_desc_cnt);
763 		cmd->create_rx_queue.enable_rsc =
764 			!!(priv->dev->features & NETIF_F_LRO);
765 		if (priv->header_split_enabled)
766 			cmd->create_rx_queue.header_buffer_size =
767 				cpu_to_be16(priv->header_buf_size);
768 	}
769 }
770 
771 static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
772 {
773 	union gve_adminq_command cmd;
774 
775 	gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index);
776 	return gve_adminq_issue_cmd(priv, &cmd);
777 }
778 
779 /* Unlike gve_adminq_create_rx_queue, this actually rings the doorbell */
780 int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index)
781 {
782 	union gve_adminq_command cmd;
783 
784 	gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index);
785 	return gve_adminq_execute_cmd(priv, &cmd);
786 }
787 
788 int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues)
789 {
790 	int err;
791 	int i;
792 
793 	for (i = 0; i < num_queues; i++) {
794 		err = gve_adminq_create_rx_queue(priv, i);
795 		if (err)
796 			return err;
797 	}
798 
799 	return gve_adminq_kick_and_wait(priv);
800 }
801 
802 static int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
803 {
804 	union gve_adminq_command cmd;
805 	int err;
806 
807 	memset(&cmd, 0, sizeof(cmd));
808 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE);
809 	cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) {
810 		.queue_id = cpu_to_be32(queue_index),
811 	};
812 
813 	err = gve_adminq_issue_cmd(priv, &cmd);
814 	if (err)
815 		return err;
816 
817 	return 0;
818 }
819 
820 int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues)
821 {
822 	int err;
823 	int i;
824 
825 	for (i = start_id; i < start_id + num_queues; i++) {
826 		err = gve_adminq_destroy_tx_queue(priv, i);
827 		if (err)
828 			return err;
829 	}
830 
831 	return gve_adminq_kick_and_wait(priv);
832 }
833 
834 static void gve_adminq_make_destroy_rx_queue_cmd(union gve_adminq_command *cmd,
835 						 u32 queue_index)
836 {
837 	memset(cmd, 0, sizeof(*cmd));
838 	cmd->opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
839 	cmd->destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
840 		.queue_id = cpu_to_be32(queue_index),
841 	};
842 }
843 
844 static int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
845 {
846 	union gve_adminq_command cmd;
847 
848 	gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index);
849 	return gve_adminq_issue_cmd(priv, &cmd);
850 }
851 
852 /* Unlike gve_adminq_destroy_rx_queue, this actually rings the doorbell */
853 int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index)
854 {
855 	union gve_adminq_command cmd;
856 
857 	gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index);
858 	return gve_adminq_execute_cmd(priv, &cmd);
859 }
860 
861 int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
862 {
863 	int err;
864 	int i;
865 
866 	for (i = 0; i < num_queues; i++) {
867 		err = gve_adminq_destroy_rx_queue(priv, i);
868 		if (err)
869 			return err;
870 	}
871 
872 	return gve_adminq_kick_and_wait(priv);
873 }
874 
875 static void gve_set_default_desc_cnt(struct gve_priv *priv,
876 			const struct gve_device_descriptor *descriptor)
877 {
878 	priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
879 	priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
880 
881 	/* set default ranges */
882 	priv->max_tx_desc_cnt = priv->tx_desc_cnt;
883 	priv->max_rx_desc_cnt = priv->rx_desc_cnt;
884 	priv->min_tx_desc_cnt = priv->tx_desc_cnt;
885 	priv->min_rx_desc_cnt = priv->rx_desc_cnt;
886 }
887 
888 static void gve_enable_supported_features(struct gve_priv *priv,
889 					  u32 supported_features_mask,
890 					  const struct gve_device_option_jumbo_frames
891 					  *dev_op_jumbo_frames,
892 					  const struct gve_device_option_dqo_qpl
893 					  *dev_op_dqo_qpl,
894 					  const struct gve_device_option_buffer_sizes
895 					  *dev_op_buffer_sizes,
896 					  const struct gve_device_option_flow_steering
897 					  *dev_op_flow_steering,
898 					  const struct gve_device_option_rss_config
899 					  *dev_op_rss_config,
900 					  const struct gve_device_option_modify_ring
901 					  *dev_op_modify_ring)
902 {
903 	/* Before control reaches this point, the page-size-capped max MTU from
904 	 * the gve_device_descriptor field has already been stored in
905 	 * priv->dev->max_mtu. We overwrite it with the true max MTU below.
906 	 */
907 	if (dev_op_jumbo_frames &&
908 	    (supported_features_mask & GVE_SUP_JUMBO_FRAMES_MASK)) {
909 		dev_info(&priv->pdev->dev,
910 			 "JUMBO FRAMES device option enabled.\n");
911 		priv->dev->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu);
912 	}
913 
914 	/* Override pages for qpl for DQO-QPL */
915 	if (dev_op_dqo_qpl) {
916 		priv->tx_pages_per_qpl =
917 			be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl);
918 		if (priv->tx_pages_per_qpl == 0)
919 			priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES;
920 	}
921 
922 	if (dev_op_buffer_sizes &&
923 	    (supported_features_mask & GVE_SUP_BUFFER_SIZES_MASK)) {
924 		priv->max_rx_buffer_size =
925 			be16_to_cpu(dev_op_buffer_sizes->packet_buffer_size);
926 		priv->header_buf_size =
927 			be16_to_cpu(dev_op_buffer_sizes->header_buffer_size);
928 		dev_info(&priv->pdev->dev,
929 			 "BUFFER SIZES device option enabled with max_rx_buffer_size of %u, header_buf_size of %u.\n",
930 			 priv->max_rx_buffer_size, priv->header_buf_size);
931 	}
932 
933 	/* Read and store ring size ranges given by device */
934 	if (dev_op_modify_ring &&
935 	    (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) {
936 		priv->modify_ring_size_enabled = true;
937 
938 		/* max ring size for DQO QPL should not be overwritten because of device limit */
939 		if (priv->queue_format != GVE_DQO_QPL_FORMAT) {
940 			priv->max_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_rx_ring_size);
941 			priv->max_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_tx_ring_size);
942 		}
943 		if (priv->default_min_ring_size) {
944 			/* If device hasn't provided minimums, use default minimums */
945 			priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE;
946 			priv->min_rx_desc_cnt = GVE_DEFAULT_MIN_RX_RING_SIZE;
947 		} else {
948 			priv->min_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_rx_ring_size);
949 			priv->min_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_tx_ring_size);
950 		}
951 	}
952 
953 	if (dev_op_flow_steering &&
954 	    (supported_features_mask & GVE_SUP_FLOW_STEERING_MASK)) {
955 		if (dev_op_flow_steering->max_flow_rules) {
956 			priv->max_flow_rules =
957 				be32_to_cpu(dev_op_flow_steering->max_flow_rules);
958 			priv->dev->hw_features |= NETIF_F_NTUPLE;
959 			dev_info(&priv->pdev->dev,
960 				 "FLOW STEERING device option enabled with max rule limit of %u.\n",
961 				 priv->max_flow_rules);
962 		}
963 	}
964 
965 	if (dev_op_rss_config &&
966 	    (supported_features_mask & GVE_SUP_RSS_CONFIG_MASK)) {
967 		priv->rss_key_size =
968 			be16_to_cpu(dev_op_rss_config->hash_key_size);
969 		priv->rss_lut_size =
970 			be16_to_cpu(dev_op_rss_config->hash_lut_size);
971 	}
972 }
973 
974 int gve_adminq_describe_device(struct gve_priv *priv)
975 {
976 	struct gve_device_option_flow_steering *dev_op_flow_steering = NULL;
977 	struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL;
978 	struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL;
979 	struct gve_device_option_modify_ring *dev_op_modify_ring = NULL;
980 	struct gve_device_option_rss_config *dev_op_rss_config = NULL;
981 	struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
982 	struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
983 	struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
984 	struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL;
985 	struct gve_device_descriptor *descriptor;
986 	u32 supported_features_mask = 0;
987 	union gve_adminq_command cmd;
988 	dma_addr_t descriptor_bus;
989 	int err = 0;
990 	u8 *mac;
991 	u16 mtu;
992 
993 	memset(&cmd, 0, sizeof(cmd));
994 	descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL,
995 				    &descriptor_bus);
996 	if (!descriptor)
997 		return -ENOMEM;
998 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESCRIBE_DEVICE);
999 	cmd.describe_device.device_descriptor_addr =
1000 						cpu_to_be64(descriptor_bus);
1001 	cmd.describe_device.device_descriptor_version =
1002 			cpu_to_be32(GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION);
1003 	cmd.describe_device.available_length =
1004 		cpu_to_be32(GVE_ADMINQ_BUFFER_SIZE);
1005 
1006 	err = gve_adminq_execute_cmd(priv, &cmd);
1007 	if (err)
1008 		goto free_device_descriptor;
1009 
1010 	err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
1011 					 &dev_op_gqi_qpl, &dev_op_dqo_rda,
1012 					 &dev_op_jumbo_frames, &dev_op_dqo_qpl,
1013 					 &dev_op_buffer_sizes,
1014 					 &dev_op_flow_steering,
1015 					 &dev_op_rss_config,
1016 					 &dev_op_modify_ring);
1017 	if (err)
1018 		goto free_device_descriptor;
1019 
1020 	/* If the GQI_RAW_ADDRESSING option is not enabled and the queue format
1021 	 * is not set to GqiRda, choose the queue format in a priority order:
1022 	 * DqoRda, DqoQpl, GqiRda, GqiQpl. Use GqiQpl as default.
1023 	 */
1024 	if (dev_op_dqo_rda) {
1025 		priv->queue_format = GVE_DQO_RDA_FORMAT;
1026 		dev_info(&priv->pdev->dev,
1027 			 "Driver is running with DQO RDA queue format.\n");
1028 		supported_features_mask =
1029 			be32_to_cpu(dev_op_dqo_rda->supported_features_mask);
1030 	} else if (dev_op_dqo_qpl) {
1031 		priv->queue_format = GVE_DQO_QPL_FORMAT;
1032 		supported_features_mask =
1033 			be32_to_cpu(dev_op_dqo_qpl->supported_features_mask);
1034 	}  else if (dev_op_gqi_rda) {
1035 		priv->queue_format = GVE_GQI_RDA_FORMAT;
1036 		dev_info(&priv->pdev->dev,
1037 			 "Driver is running with GQI RDA queue format.\n");
1038 		supported_features_mask =
1039 			be32_to_cpu(dev_op_gqi_rda->supported_features_mask);
1040 	} else if (priv->queue_format == GVE_GQI_RDA_FORMAT) {
1041 		dev_info(&priv->pdev->dev,
1042 			 "Driver is running with GQI RDA queue format.\n");
1043 	} else {
1044 		priv->queue_format = GVE_GQI_QPL_FORMAT;
1045 		if (dev_op_gqi_qpl)
1046 			supported_features_mask =
1047 				be32_to_cpu(dev_op_gqi_qpl->supported_features_mask);
1048 		dev_info(&priv->pdev->dev,
1049 			 "Driver is running with GQI QPL queue format.\n");
1050 	}
1051 
1052 	/* set default descriptor counts */
1053 	gve_set_default_desc_cnt(priv, descriptor);
1054 
1055 	/* DQO supports LRO. */
1056 	if (!gve_is_gqi(priv))
1057 		priv->dev->hw_features |= NETIF_F_LRO;
1058 
1059 	priv->max_registered_pages =
1060 				be64_to_cpu(descriptor->max_registered_pages);
1061 	mtu = be16_to_cpu(descriptor->mtu);
1062 	if (mtu < ETH_MIN_MTU) {
1063 		dev_err(&priv->pdev->dev, "MTU %d below minimum MTU\n", mtu);
1064 		err = -EINVAL;
1065 		goto free_device_descriptor;
1066 	}
1067 	priv->dev->max_mtu = mtu;
1068 	priv->num_event_counters = be16_to_cpu(descriptor->counters);
1069 	eth_hw_addr_set(priv->dev, descriptor->mac);
1070 	mac = descriptor->mac;
1071 	dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac);
1072 	priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
1073 	priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
1074 
1075 	gve_enable_supported_features(priv, supported_features_mask,
1076 				      dev_op_jumbo_frames, dev_op_dqo_qpl,
1077 				      dev_op_buffer_sizes, dev_op_flow_steering,
1078 				      dev_op_rss_config, dev_op_modify_ring);
1079 
1080 free_device_descriptor:
1081 	dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
1082 	return err;
1083 }
1084 
1085 int gve_adminq_register_page_list(struct gve_priv *priv,
1086 				  struct gve_queue_page_list *qpl)
1087 {
1088 	struct device *hdev = &priv->pdev->dev;
1089 	u32 num_entries = qpl->num_entries;
1090 	u32 size = num_entries * sizeof(qpl->page_buses[0]);
1091 	union gve_adminq_command cmd;
1092 	dma_addr_t page_list_bus;
1093 	__be64 *page_list;
1094 	int err;
1095 	int i;
1096 
1097 	memset(&cmd, 0, sizeof(cmd));
1098 	page_list = dma_alloc_coherent(hdev, size, &page_list_bus, GFP_KERNEL);
1099 	if (!page_list)
1100 		return -ENOMEM;
1101 
1102 	for (i = 0; i < num_entries; i++)
1103 		page_list[i] = cpu_to_be64(qpl->page_buses[i]);
1104 
1105 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_REGISTER_PAGE_LIST);
1106 	cmd.reg_page_list = (struct gve_adminq_register_page_list) {
1107 		.page_list_id = cpu_to_be32(qpl->id),
1108 		.num_pages = cpu_to_be32(num_entries),
1109 		.page_address_list_addr = cpu_to_be64(page_list_bus),
1110 		.page_size = cpu_to_be64(PAGE_SIZE),
1111 	};
1112 
1113 	err = gve_adminq_execute_cmd(priv, &cmd);
1114 	dma_free_coherent(hdev, size, page_list, page_list_bus);
1115 	return err;
1116 }
1117 
1118 int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id)
1119 {
1120 	union gve_adminq_command cmd;
1121 
1122 	memset(&cmd, 0, sizeof(cmd));
1123 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_UNREGISTER_PAGE_LIST);
1124 	cmd.unreg_page_list = (struct gve_adminq_unregister_page_list) {
1125 		.page_list_id = cpu_to_be32(page_list_id),
1126 	};
1127 
1128 	return gve_adminq_execute_cmd(priv, &cmd);
1129 }
1130 
1131 int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu)
1132 {
1133 	union gve_adminq_command cmd;
1134 
1135 	memset(&cmd, 0, sizeof(cmd));
1136 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER);
1137 	cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) {
1138 		.parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU),
1139 		.parameter_value = cpu_to_be64(mtu),
1140 	};
1141 
1142 	return gve_adminq_execute_cmd(priv, &cmd);
1143 }
1144 
1145 int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
1146 			    dma_addr_t stats_report_addr, u64 interval)
1147 {
1148 	union gve_adminq_command cmd;
1149 
1150 	memset(&cmd, 0, sizeof(cmd));
1151 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_STATS);
1152 	cmd.report_stats = (struct gve_adminq_report_stats) {
1153 		.stats_report_len = cpu_to_be64(stats_report_len),
1154 		.stats_report_addr = cpu_to_be64(stats_report_addr),
1155 		.interval = cpu_to_be64(interval),
1156 	};
1157 
1158 	return gve_adminq_execute_cmd(priv, &cmd);
1159 }
1160 
1161 int gve_adminq_verify_driver_compatibility(struct gve_priv *priv,
1162 					   u64 driver_info_len,
1163 					   dma_addr_t driver_info_addr)
1164 {
1165 	union gve_adminq_command cmd;
1166 
1167 	memset(&cmd, 0, sizeof(cmd));
1168 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY);
1169 	cmd.verify_driver_compatibility = (struct gve_adminq_verify_driver_compatibility) {
1170 		.driver_info_len = cpu_to_be64(driver_info_len),
1171 		.driver_info_addr = cpu_to_be64(driver_info_addr),
1172 	};
1173 
1174 	return gve_adminq_execute_cmd(priv, &cmd);
1175 }
1176 
1177 int gve_adminq_report_link_speed(struct gve_priv *priv)
1178 {
1179 	union gve_adminq_command gvnic_cmd;
1180 	dma_addr_t link_speed_region_bus;
1181 	__be64 *link_speed_region;
1182 	int err;
1183 
1184 	link_speed_region =
1185 		dma_alloc_coherent(&priv->pdev->dev, sizeof(*link_speed_region),
1186 				   &link_speed_region_bus, GFP_KERNEL);
1187 
1188 	if (!link_speed_region)
1189 		return -ENOMEM;
1190 
1191 	memset(&gvnic_cmd, 0, sizeof(gvnic_cmd));
1192 	gvnic_cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_LINK_SPEED);
1193 	gvnic_cmd.report_link_speed.link_speed_address =
1194 		cpu_to_be64(link_speed_region_bus);
1195 
1196 	err = gve_adminq_execute_cmd(priv, &gvnic_cmd);
1197 
1198 	priv->link_speed = be64_to_cpu(*link_speed_region);
1199 	dma_free_coherent(&priv->pdev->dev, sizeof(*link_speed_region), link_speed_region,
1200 			  link_speed_region_bus);
1201 	return err;
1202 }
1203 
1204 int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
1205 				 struct gve_ptype_lut *ptype_lut)
1206 {
1207 	struct gve_ptype_map *ptype_map;
1208 	union gve_adminq_command cmd;
1209 	dma_addr_t ptype_map_bus;
1210 	int err = 0;
1211 	int i;
1212 
1213 	memset(&cmd, 0, sizeof(cmd));
1214 	ptype_map = dma_alloc_coherent(&priv->pdev->dev, sizeof(*ptype_map),
1215 				       &ptype_map_bus, GFP_KERNEL);
1216 	if (!ptype_map)
1217 		return -ENOMEM;
1218 
1219 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_GET_PTYPE_MAP);
1220 	cmd.get_ptype_map = (struct gve_adminq_get_ptype_map) {
1221 		.ptype_map_len = cpu_to_be64(sizeof(*ptype_map)),
1222 		.ptype_map_addr = cpu_to_be64(ptype_map_bus),
1223 	};
1224 
1225 	err = gve_adminq_execute_cmd(priv, &cmd);
1226 	if (err)
1227 		goto err;
1228 
1229 	/* Populate ptype_lut. */
1230 	for (i = 0; i < GVE_NUM_PTYPES; i++) {
1231 		ptype_lut->ptypes[i].l3_type =
1232 			ptype_map->ptypes[i].l3_type;
1233 		ptype_lut->ptypes[i].l4_type =
1234 			ptype_map->ptypes[i].l4_type;
1235 	}
1236 err:
1237 	dma_free_coherent(&priv->pdev->dev, sizeof(*ptype_map), ptype_map,
1238 			  ptype_map_bus);
1239 	return err;
1240 }
1241 
1242 static int
1243 gve_adminq_configure_flow_rule(struct gve_priv *priv,
1244 			       struct gve_adminq_configure_flow_rule *flow_rule_cmd)
1245 {
1246 	int err = gve_adminq_execute_extended_cmd(priv,
1247 			GVE_ADMINQ_CONFIGURE_FLOW_RULE,
1248 			sizeof(struct gve_adminq_configure_flow_rule),
1249 			flow_rule_cmd);
1250 
1251 	if (err) {
1252 		dev_err(&priv->pdev->dev, "Timeout to configure the flow rule, trigger reset");
1253 		gve_reset(priv, true);
1254 	} else {
1255 		priv->flow_rules_cache.rules_cache_synced = false;
1256 	}
1257 
1258 	return err;
1259 }
1260 
1261 int gve_adminq_add_flow_rule(struct gve_priv *priv, struct gve_adminq_flow_rule *rule, u32 loc)
1262 {
1263 	struct gve_adminq_configure_flow_rule flow_rule_cmd = {
1264 		.opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_ADD),
1265 		.location = cpu_to_be32(loc),
1266 		.rule = *rule,
1267 	};
1268 
1269 	return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd);
1270 }
1271 
1272 int gve_adminq_del_flow_rule(struct gve_priv *priv, u32 loc)
1273 {
1274 	struct gve_adminq_configure_flow_rule flow_rule_cmd = {
1275 		.opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_DEL),
1276 		.location = cpu_to_be32(loc),
1277 	};
1278 
1279 	return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd);
1280 }
1281 
1282 int gve_adminq_reset_flow_rules(struct gve_priv *priv)
1283 {
1284 	struct gve_adminq_configure_flow_rule flow_rule_cmd = {
1285 		.opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_RESET),
1286 	};
1287 
1288 	return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd);
1289 }
1290 
1291 int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh)
1292 {
1293 	dma_addr_t lut_bus = 0, key_bus = 0;
1294 	u16 key_size = 0, lut_size = 0;
1295 	union gve_adminq_command cmd;
1296 	__be32 *lut = NULL;
1297 	u8 hash_alg = 0;
1298 	u8 *key = NULL;
1299 	int err = 0;
1300 	u16 i;
1301 
1302 	switch (rxfh->hfunc) {
1303 	case ETH_RSS_HASH_NO_CHANGE:
1304 		break;
1305 	case ETH_RSS_HASH_TOP:
1306 		hash_alg = ETH_RSS_HASH_TOP;
1307 		break;
1308 	default:
1309 		return -EOPNOTSUPP;
1310 	}
1311 
1312 	if (rxfh->indir) {
1313 		lut_size = priv->rss_lut_size;
1314 		lut = dma_alloc_coherent(&priv->pdev->dev,
1315 					 lut_size * sizeof(*lut),
1316 					 &lut_bus, GFP_KERNEL);
1317 		if (!lut)
1318 			return -ENOMEM;
1319 
1320 		for (i = 0; i < priv->rss_lut_size; i++)
1321 			lut[i] = cpu_to_be32(rxfh->indir[i]);
1322 	}
1323 
1324 	if (rxfh->key) {
1325 		key_size = priv->rss_key_size;
1326 		key = dma_alloc_coherent(&priv->pdev->dev,
1327 					 key_size, &key_bus, GFP_KERNEL);
1328 		if (!key) {
1329 			err = -ENOMEM;
1330 			goto out;
1331 		}
1332 
1333 		memcpy(key, rxfh->key, key_size);
1334 	}
1335 
1336 	/* Zero-valued fields in the cmd.configure_rss instruct the device to
1337 	 * not update those fields.
1338 	 */
1339 	memset(&cmd, 0, sizeof(cmd));
1340 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_RSS);
1341 	cmd.configure_rss = (struct gve_adminq_configure_rss) {
1342 		.hash_types = cpu_to_be16(BIT(GVE_RSS_HASH_TCPV4) |
1343 					  BIT(GVE_RSS_HASH_UDPV4) |
1344 					  BIT(GVE_RSS_HASH_TCPV6) |
1345 					  BIT(GVE_RSS_HASH_UDPV6)),
1346 		.hash_alg = hash_alg,
1347 		.hash_key_size = cpu_to_be16(key_size),
1348 		.hash_lut_size = cpu_to_be16(lut_size),
1349 		.hash_key_addr = cpu_to_be64(key_bus),
1350 		.hash_lut_addr = cpu_to_be64(lut_bus),
1351 	};
1352 
1353 	err = gve_adminq_execute_cmd(priv, &cmd);
1354 
1355 out:
1356 	if (lut)
1357 		dma_free_coherent(&priv->pdev->dev,
1358 				  lut_size * sizeof(*lut),
1359 				  lut, lut_bus);
1360 	if (key)
1361 		dma_free_coherent(&priv->pdev->dev,
1362 				  key_size, key, key_bus);
1363 	return err;
1364 }
1365 
1366 /* In the dma memory that the driver allocated for the device to query the flow rules, the device
1367  * will first write it with a struct of gve_query_flow_rules_descriptor. Next to it, the device
1368  * will write an array of rules or rule ids with the count that specified in the descriptor.
1369  * For GVE_FLOW_RULE_QUERY_STATS, the device will only write the descriptor.
1370  */
1371 static int gve_adminq_process_flow_rules_query(struct gve_priv *priv, u16 query_opcode,
1372 					       struct gve_query_flow_rules_descriptor *descriptor)
1373 {
1374 	struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
1375 	u32 num_queried_rules, total_memory_len, rule_info_len;
1376 	void *rule_info;
1377 
1378 	total_memory_len = be32_to_cpu(descriptor->total_length);
1379 	num_queried_rules = be32_to_cpu(descriptor->num_queried_rules);
1380 	rule_info = (void *)(descriptor + 1);
1381 
1382 	switch (query_opcode) {
1383 	case GVE_FLOW_RULE_QUERY_RULES:
1384 		rule_info_len = num_queried_rules * sizeof(*flow_rules_cache->rules_cache);
1385 		if (sizeof(*descriptor) + rule_info_len != total_memory_len) {
1386 			dev_err(&priv->dev->dev, "flow rules query is out of memory.\n");
1387 			return -ENOMEM;
1388 		}
1389 
1390 		memcpy(flow_rules_cache->rules_cache, rule_info, rule_info_len);
1391 		flow_rules_cache->rules_cache_num = num_queried_rules;
1392 		break;
1393 	case GVE_FLOW_RULE_QUERY_IDS:
1394 		rule_info_len = num_queried_rules * sizeof(*flow_rules_cache->rule_ids_cache);
1395 		if (sizeof(*descriptor) + rule_info_len != total_memory_len) {
1396 			dev_err(&priv->dev->dev, "flow rule ids query is out of memory.\n");
1397 			return -ENOMEM;
1398 		}
1399 
1400 		memcpy(flow_rules_cache->rule_ids_cache, rule_info, rule_info_len);
1401 		flow_rules_cache->rule_ids_cache_num = num_queried_rules;
1402 		break;
1403 	case GVE_FLOW_RULE_QUERY_STATS:
1404 		priv->num_flow_rules = be32_to_cpu(descriptor->num_flow_rules);
1405 		priv->max_flow_rules = be32_to_cpu(descriptor->max_flow_rules);
1406 		return 0;
1407 	default:
1408 		return -EINVAL;
1409 	}
1410 
1411 	return  0;
1412 }
1413 
1414 int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc)
1415 {
1416 	struct gve_query_flow_rules_descriptor *descriptor;
1417 	union gve_adminq_command cmd;
1418 	dma_addr_t descriptor_bus;
1419 	int err = 0;
1420 
1421 	memset(&cmd, 0, sizeof(cmd));
1422 	descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, &descriptor_bus);
1423 	if (!descriptor)
1424 		return -ENOMEM;
1425 
1426 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_QUERY_FLOW_RULES);
1427 	cmd.query_flow_rules = (struct gve_adminq_query_flow_rules) {
1428 		.opcode = cpu_to_be16(query_opcode),
1429 		.starting_rule_id = cpu_to_be32(starting_loc),
1430 		.available_length = cpu_to_be64(GVE_ADMINQ_BUFFER_SIZE),
1431 		.rule_descriptor_addr = cpu_to_be64(descriptor_bus),
1432 	};
1433 	err = gve_adminq_execute_cmd(priv, &cmd);
1434 	if (err)
1435 		goto out;
1436 
1437 	err = gve_adminq_process_flow_rules_query(priv, query_opcode, descriptor);
1438 
1439 out:
1440 	dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
1441 	return err;
1442 }
1443 
1444 static int gve_adminq_process_rss_query(struct gve_priv *priv,
1445 					struct gve_query_rss_descriptor *descriptor,
1446 					struct ethtool_rxfh_param *rxfh)
1447 {
1448 	u32 total_memory_length;
1449 	u16 hash_lut_length;
1450 	void *rss_info_addr;
1451 	__be32 *lut;
1452 	u16 i;
1453 
1454 	total_memory_length = be32_to_cpu(descriptor->total_length);
1455 	hash_lut_length = priv->rss_lut_size * sizeof(*rxfh->indir);
1456 
1457 	if (sizeof(*descriptor) + priv->rss_key_size + hash_lut_length != total_memory_length) {
1458 		dev_err(&priv->dev->dev,
1459 			"rss query desc from device has invalid length parameter.\n");
1460 		return -EINVAL;
1461 	}
1462 
1463 	rxfh->hfunc = descriptor->hash_alg;
1464 
1465 	rss_info_addr = (void *)(descriptor + 1);
1466 	if (rxfh->key)
1467 		memcpy(rxfh->key, rss_info_addr, priv->rss_key_size);
1468 
1469 	rss_info_addr += priv->rss_key_size;
1470 	lut = (__be32 *)rss_info_addr;
1471 	if (rxfh->indir) {
1472 		for (i = 0; i < priv->rss_lut_size; i++)
1473 			rxfh->indir[i] = be32_to_cpu(lut[i]);
1474 	}
1475 
1476 	return 0;
1477 }
1478 
1479 int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh)
1480 {
1481 	struct gve_query_rss_descriptor *descriptor;
1482 	union gve_adminq_command cmd;
1483 	dma_addr_t descriptor_bus;
1484 	int err = 0;
1485 
1486 	descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, &descriptor_bus);
1487 	if (!descriptor)
1488 		return -ENOMEM;
1489 
1490 	memset(&cmd, 0, sizeof(cmd));
1491 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_QUERY_RSS);
1492 	cmd.query_rss = (struct gve_adminq_query_rss) {
1493 		.available_length = cpu_to_be64(GVE_ADMINQ_BUFFER_SIZE),
1494 		.rss_descriptor_addr = cpu_to_be64(descriptor_bus),
1495 	};
1496 	err = gve_adminq_execute_cmd(priv, &cmd);
1497 	if (err)
1498 		goto out;
1499 
1500 	err = gve_adminq_process_rss_query(priv, descriptor, rxfh);
1501 
1502 out:
1503 	dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
1504 	return err;
1505 }
1506