xref: /linux/drivers/net/ethernet/intel/ice/devlink/devlink.c (revision 4d3f59bfa2cd3193b8bbe724df0a9cd41bdc507d)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020, Intel Corporation. */
3 
4 #include <linux/vmalloc.h>
5 
6 #include "ice.h"
7 #include "ice_lib.h"
8 #include "devlink.h"
9 #include "port.h"
10 #include "ice_eswitch.h"
11 #include "ice_fw_update.h"
12 #include "ice_dcb_lib.h"
13 #include "ice_sf_eth.h"
14 
15 /* context for devlink info version reporting */
16 struct ice_info_ctx {
17 	char buf[128];
18 	struct ice_orom_info pending_orom;
19 	struct ice_nvm_info pending_nvm;
20 	struct ice_netlist_info pending_netlist;
21 	struct ice_hw_dev_caps dev_caps;
22 };
23 
24 /* The following functions are used to format specific strings for various
25  * devlink info versions. The ctx parameter is used to provide the storage
26  * buffer, as well as any ancillary information calculated when the info
27  * request was made.
28  *
29  * If a version does not exist, for example when attempting to get the
30  * inactive version of flash when there is no pending update, the function
31  * should leave the buffer in the ctx structure empty.
32  */
33 
34 static void ice_info_get_dsn(struct ice_pf *pf, struct ice_info_ctx *ctx)
35 {
36 	u8 dsn[8];
37 
38 	/* Copy the DSN into an array in Big Endian format */
39 	put_unaligned_be64(pci_get_dsn(pf->pdev), dsn);
40 
41 	snprintf(ctx->buf, sizeof(ctx->buf), "%8phD", dsn);
42 }
43 
44 static void ice_info_pba(struct ice_pf *pf, struct ice_info_ctx *ctx)
45 {
46 	struct ice_hw *hw = &pf->hw;
47 	int status;
48 
49 	status = ice_read_pba_string(hw, (u8 *)ctx->buf, sizeof(ctx->buf));
50 	if (status)
51 		/* We failed to locate the PBA, so just skip this entry */
52 		dev_dbg(ice_pf_to_dev(pf), "Failed to read Product Board Assembly string, status %d\n",
53 			status);
54 }
55 
56 static void ice_info_fw_mgmt(struct ice_pf *pf, struct ice_info_ctx *ctx)
57 {
58 	struct ice_hw *hw = &pf->hw;
59 
60 	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
61 		 hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch);
62 }
63 
64 static void ice_info_fw_api(struct ice_pf *pf, struct ice_info_ctx *ctx)
65 {
66 	struct ice_hw *hw = &pf->hw;
67 
68 	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", hw->api_maj_ver,
69 		 hw->api_min_ver, hw->api_patch);
70 }
71 
72 static void ice_info_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx)
73 {
74 	struct ice_hw *hw = &pf->hw;
75 
76 	snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", hw->fw_build);
77 }
78 
79 static void ice_info_orom_ver(struct ice_pf *pf, struct ice_info_ctx *ctx)
80 {
81 	struct ice_orom_info *orom = &pf->hw.flash.orom;
82 
83 	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
84 		 orom->major, orom->build, orom->patch);
85 }
86 
87 static void
88 ice_info_pending_orom_ver(struct ice_pf __always_unused *pf,
89 			  struct ice_info_ctx *ctx)
90 {
91 	struct ice_orom_info *orom = &ctx->pending_orom;
92 
93 	if (ctx->dev_caps.common_cap.nvm_update_pending_orom)
94 		snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
95 			 orom->major, orom->build, orom->patch);
96 }
97 
98 static void ice_info_nvm_ver(struct ice_pf *pf, struct ice_info_ctx *ctx)
99 {
100 	struct ice_nvm_info *nvm = &pf->hw.flash.nvm;
101 
102 	snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor);
103 }
104 
105 static void
106 ice_info_pending_nvm_ver(struct ice_pf __always_unused *pf,
107 			 struct ice_info_ctx *ctx)
108 {
109 	struct ice_nvm_info *nvm = &ctx->pending_nvm;
110 
111 	if (ctx->dev_caps.common_cap.nvm_update_pending_nvm)
112 		snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x",
113 			 nvm->major, nvm->minor);
114 }
115 
116 static void ice_info_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx)
117 {
118 	struct ice_nvm_info *nvm = &pf->hw.flash.nvm;
119 
120 	snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack);
121 }
122 
123 static void
124 ice_info_pending_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx)
125 {
126 	struct ice_nvm_info *nvm = &ctx->pending_nvm;
127 
128 	if (ctx->dev_caps.common_cap.nvm_update_pending_nvm)
129 		snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack);
130 }
131 
132 static void ice_info_ddp_pkg_name(struct ice_pf *pf, struct ice_info_ctx *ctx)
133 {
134 	struct ice_hw *hw = &pf->hw;
135 
136 	snprintf(ctx->buf, sizeof(ctx->buf), "%s", hw->active_pkg_name);
137 }
138 
139 static void
140 ice_info_ddp_pkg_version(struct ice_pf *pf, struct ice_info_ctx *ctx)
141 {
142 	struct ice_pkg_ver *pkg = &pf->hw.active_pkg_ver;
143 
144 	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u.%u",
145 		 pkg->major, pkg->minor, pkg->update, pkg->draft);
146 }
147 
148 static void
149 ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, struct ice_info_ctx *ctx)
150 {
151 	snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", pf->hw.active_track_id);
152 }
153 
154 static void ice_info_netlist_ver(struct ice_pf *pf, struct ice_info_ctx *ctx)
155 {
156 	struct ice_netlist_info *netlist = &pf->hw.flash.netlist;
157 
158 	/* The netlist version fields are BCD formatted */
159 	snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x",
160 		 netlist->major, netlist->minor,
161 		 netlist->type >> 16, netlist->type & 0xFFFF,
162 		 netlist->rev, netlist->cust_ver);
163 }
164 
165 static void ice_info_netlist_build(struct ice_pf *pf, struct ice_info_ctx *ctx)
166 {
167 	struct ice_netlist_info *netlist = &pf->hw.flash.netlist;
168 
169 	snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash);
170 }
171 
172 static void
173 ice_info_pending_netlist_ver(struct ice_pf __always_unused *pf,
174 			     struct ice_info_ctx *ctx)
175 {
176 	struct ice_netlist_info *netlist = &ctx->pending_netlist;
177 
178 	/* The netlist version fields are BCD formatted */
179 	if (ctx->dev_caps.common_cap.nvm_update_pending_netlist)
180 		snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x",
181 			 netlist->major, netlist->minor,
182 			 netlist->type >> 16, netlist->type & 0xFFFF,
183 			 netlist->rev, netlist->cust_ver);
184 }
185 
186 static void
187 ice_info_pending_netlist_build(struct ice_pf __always_unused *pf,
188 			       struct ice_info_ctx *ctx)
189 {
190 	struct ice_netlist_info *netlist = &ctx->pending_netlist;
191 
192 	if (ctx->dev_caps.common_cap.nvm_update_pending_netlist)
193 		snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash);
194 }
195 
196 static void ice_info_cgu_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx)
197 {
198 	u32 id, cfg_ver, fw_ver;
199 
200 	if (!ice_is_feature_supported(pf, ICE_F_CGU))
201 		return;
202 	if (ice_aq_get_cgu_info(&pf->hw, &id, &cfg_ver, &fw_ver))
203 		return;
204 	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", id, cfg_ver, fw_ver);
205 }
206 
207 static void ice_info_cgu_id(struct ice_pf *pf, struct ice_info_ctx *ctx)
208 {
209 	if (!ice_is_feature_supported(pf, ICE_F_CGU))
210 		return;
211 	snprintf(ctx->buf, sizeof(ctx->buf), "%u", pf->hw.cgu_part_number);
212 }
213 
214 #define fixed(key, getter) { ICE_VERSION_FIXED, key, getter, NULL }
215 #define running(key, getter) { ICE_VERSION_RUNNING, key, getter, NULL }
216 #define stored(key, getter, fallback) { ICE_VERSION_STORED, key, getter, fallback }
217 
218 /* The combined() macro inserts both the running entry as well as a stored
219  * entry. The running entry will always report the version from the active
220  * handler. The stored entry will first try the pending handler, and fallback
221  * to the active handler if the pending function does not report a version.
222  * The pending handler should check the status of a pending update for the
223  * relevant flash component. It should only fill in the buffer in the case
224  * where a valid pending version is available. This ensures that the related
225  * stored and running versions remain in sync, and that stored versions are
226  * correctly reported as expected.
227  */
228 #define combined(key, active, pending) \
229 	running(key, active), \
230 	stored(key, pending, active)
231 
232 enum ice_version_type {
233 	ICE_VERSION_FIXED,
234 	ICE_VERSION_RUNNING,
235 	ICE_VERSION_STORED,
236 };
237 
238 static const struct ice_devlink_version {
239 	enum ice_version_type type;
240 	const char *key;
241 	void (*getter)(struct ice_pf *pf, struct ice_info_ctx *ctx);
242 	void (*fallback)(struct ice_pf *pf, struct ice_info_ctx *ctx);
243 } ice_devlink_versions[] = {
244 	fixed(DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, ice_info_pba),
245 	running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt),
246 	running("fw.mgmt.api", ice_info_fw_api),
247 	running("fw.mgmt.build", ice_info_fw_build),
248 	combined(DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, ice_info_orom_ver, ice_info_pending_orom_ver),
249 	combined("fw.psid.api", ice_info_nvm_ver, ice_info_pending_nvm_ver),
250 	combined(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack, ice_info_pending_eetrack),
251 	running("fw.app.name", ice_info_ddp_pkg_name),
252 	running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version),
253 	running("fw.app.bundle_id", ice_info_ddp_pkg_bundle_id),
254 	combined("fw.netlist", ice_info_netlist_ver, ice_info_pending_netlist_ver),
255 	combined("fw.netlist.build", ice_info_netlist_build, ice_info_pending_netlist_build),
256 	fixed("cgu.id", ice_info_cgu_id),
257 	running("fw.cgu", ice_info_cgu_fw_build),
258 };
259 
260 /**
261  * ice_devlink_info_get - .info_get devlink handler
262  * @devlink: devlink instance structure
263  * @req: the devlink info request
264  * @extack: extended netdev ack structure
265  *
266  * Callback for the devlink .info_get operation. Reports information about the
267  * device.
268  *
269  * Return: zero on success or an error code on failure.
270  */
271 static int ice_devlink_info_get(struct devlink *devlink,
272 				struct devlink_info_req *req,
273 				struct netlink_ext_ack *extack)
274 {
275 	struct ice_pf *pf = devlink_priv(devlink);
276 	struct device *dev = ice_pf_to_dev(pf);
277 	struct ice_hw *hw = &pf->hw;
278 	struct ice_info_ctx *ctx;
279 	size_t i;
280 	int err;
281 
282 	err = ice_wait_for_reset(pf, 10 * HZ);
283 	if (err) {
284 		NL_SET_ERR_MSG_MOD(extack, "Device is busy resetting");
285 		return err;
286 	}
287 
288 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
289 	if (!ctx)
290 		return -ENOMEM;
291 
292 	/* discover capabilities first */
293 	err = ice_discover_dev_caps(hw, &ctx->dev_caps);
294 	if (err) {
295 		dev_dbg(dev, "Failed to discover device capabilities, status %d aq_err %s\n",
296 			err, ice_aq_str(hw->adminq.sq_last_status));
297 		NL_SET_ERR_MSG_MOD(extack, "Unable to discover device capabilities");
298 		goto out_free_ctx;
299 	}
300 
301 	if (ctx->dev_caps.common_cap.nvm_update_pending_orom) {
302 		err = ice_get_inactive_orom_ver(hw, &ctx->pending_orom);
303 		if (err) {
304 			dev_dbg(dev, "Unable to read inactive Option ROM version data, status %d aq_err %s\n",
305 				err, ice_aq_str(hw->adminq.sq_last_status));
306 
307 			/* disable display of pending Option ROM */
308 			ctx->dev_caps.common_cap.nvm_update_pending_orom = false;
309 		}
310 	}
311 
312 	if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) {
313 		err = ice_get_inactive_nvm_ver(hw, &ctx->pending_nvm);
314 		if (err) {
315 			dev_dbg(dev, "Unable to read inactive NVM version data, status %d aq_err %s\n",
316 				err, ice_aq_str(hw->adminq.sq_last_status));
317 
318 			/* disable display of pending Option ROM */
319 			ctx->dev_caps.common_cap.nvm_update_pending_nvm = false;
320 		}
321 	}
322 
323 	if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) {
324 		err = ice_get_inactive_netlist_ver(hw, &ctx->pending_netlist);
325 		if (err) {
326 			dev_dbg(dev, "Unable to read inactive Netlist version data, status %d aq_err %s\n",
327 				err, ice_aq_str(hw->adminq.sq_last_status));
328 
329 			/* disable display of pending Option ROM */
330 			ctx->dev_caps.common_cap.nvm_update_pending_netlist = false;
331 		}
332 	}
333 
334 	ice_info_get_dsn(pf, ctx);
335 
336 	err = devlink_info_serial_number_put(req, ctx->buf);
337 	if (err) {
338 		NL_SET_ERR_MSG_MOD(extack, "Unable to set serial number");
339 		goto out_free_ctx;
340 	}
341 
342 	for (i = 0; i < ARRAY_SIZE(ice_devlink_versions); i++) {
343 		enum ice_version_type type = ice_devlink_versions[i].type;
344 		const char *key = ice_devlink_versions[i].key;
345 
346 		memset(ctx->buf, 0, sizeof(ctx->buf));
347 
348 		ice_devlink_versions[i].getter(pf, ctx);
349 
350 		/* If the default getter doesn't report a version, use the
351 		 * fallback function. This is primarily useful in the case of
352 		 * "stored" versions that want to report the same value as the
353 		 * running version in the normal case of no pending update.
354 		 */
355 		if (ctx->buf[0] == '\0' && ice_devlink_versions[i].fallback)
356 			ice_devlink_versions[i].fallback(pf, ctx);
357 
358 		/* Do not report missing versions */
359 		if (ctx->buf[0] == '\0')
360 			continue;
361 
362 		switch (type) {
363 		case ICE_VERSION_FIXED:
364 			err = devlink_info_version_fixed_put(req, key, ctx->buf);
365 			if (err) {
366 				NL_SET_ERR_MSG_MOD(extack, "Unable to set fixed version");
367 				goto out_free_ctx;
368 			}
369 			break;
370 		case ICE_VERSION_RUNNING:
371 			err = devlink_info_version_running_put(req, key, ctx->buf);
372 			if (err) {
373 				NL_SET_ERR_MSG_MOD(extack, "Unable to set running version");
374 				goto out_free_ctx;
375 			}
376 			break;
377 		case ICE_VERSION_STORED:
378 			err = devlink_info_version_stored_put(req, key, ctx->buf);
379 			if (err) {
380 				NL_SET_ERR_MSG_MOD(extack, "Unable to set stored version");
381 				goto out_free_ctx;
382 			}
383 			break;
384 		}
385 	}
386 
387 out_free_ctx:
388 	kfree(ctx);
389 	return err;
390 }
391 
392 /**
393  * ice_devlink_reload_empr_start - Start EMP reset to activate new firmware
394  * @pf: pointer to the pf instance
395  * @extack: netlink extended ACK structure
396  *
397  * Allow user to activate new Embedded Management Processor firmware by
398  * issuing device specific EMP reset. Called in response to
399  * a DEVLINK_CMD_RELOAD with the DEVLINK_RELOAD_ACTION_FW_ACTIVATE.
400  *
401  * Note that teardown and rebuild of the driver state happens automatically as
402  * part of an interrupt and watchdog task. This is because all physical
403  * functions on the device must be able to reset when an EMP reset occurs from
404  * any source.
405  */
406 static int
407 ice_devlink_reload_empr_start(struct ice_pf *pf,
408 			      struct netlink_ext_ack *extack)
409 {
410 	struct device *dev = ice_pf_to_dev(pf);
411 	struct ice_hw *hw = &pf->hw;
412 	u8 pending;
413 	int err;
414 
415 	err = ice_get_pending_updates(pf, &pending, extack);
416 	if (err)
417 		return err;
418 
419 	/* pending is a bitmask of which flash banks have a pending update,
420 	 * including the main NVM bank, the Option ROM bank, and the netlist
421 	 * bank. If any of these bits are set, then there is a pending update
422 	 * waiting to be activated.
423 	 */
424 	if (!pending) {
425 		NL_SET_ERR_MSG_MOD(extack, "No pending firmware update");
426 		return -ECANCELED;
427 	}
428 
429 	if (pf->fw_emp_reset_disabled) {
430 		NL_SET_ERR_MSG_MOD(extack, "EMP reset is not available. To activate firmware, a reboot or power cycle is needed");
431 		return -ECANCELED;
432 	}
433 
434 	dev_dbg(dev, "Issuing device EMP reset to activate firmware\n");
435 
436 	err = ice_aq_nvm_update_empr(hw);
437 	if (err) {
438 		dev_err(dev, "Failed to trigger EMP device reset to reload firmware, err %d aq_err %s\n",
439 			err, ice_aq_str(hw->adminq.sq_last_status));
440 		NL_SET_ERR_MSG_MOD(extack, "Failed to trigger EMP device reset to reload firmware");
441 		return err;
442 	}
443 
444 	return 0;
445 }
446 
447 /**
448  * ice_devlink_reinit_down - unload given PF
449  * @pf: pointer to the PF struct
450  */
451 static void ice_devlink_reinit_down(struct ice_pf *pf)
452 {
453 	/* No need to take devl_lock, it's already taken by devlink API */
454 	ice_unload(pf);
455 	rtnl_lock();
456 	ice_vsi_decfg(ice_get_main_vsi(pf));
457 	rtnl_unlock();
458 	ice_deinit_dev(pf);
459 }
460 
461 /**
462  * ice_devlink_reload_down - prepare for reload
463  * @devlink: pointer to the devlink instance to reload
464  * @netns_change: if true, the network namespace is changing
465  * @action: the action to perform
466  * @limit: limits on what reload should do, such as not resetting
467  * @extack: netlink extended ACK structure
468  */
469 static int
470 ice_devlink_reload_down(struct devlink *devlink, bool netns_change,
471 			enum devlink_reload_action action,
472 			enum devlink_reload_limit limit,
473 			struct netlink_ext_ack *extack)
474 {
475 	struct ice_pf *pf = devlink_priv(devlink);
476 
477 	switch (action) {
478 	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
479 		if (ice_is_eswitch_mode_switchdev(pf)) {
480 			NL_SET_ERR_MSG_MOD(extack,
481 					   "Go to legacy mode before doing reinit");
482 			return -EOPNOTSUPP;
483 		}
484 		if (ice_is_adq_active(pf)) {
485 			NL_SET_ERR_MSG_MOD(extack,
486 					   "Turn off ADQ before doing reinit");
487 			return -EOPNOTSUPP;
488 		}
489 		if (ice_has_vfs(pf)) {
490 			NL_SET_ERR_MSG_MOD(extack,
491 					   "Remove all VFs before doing reinit");
492 			return -EOPNOTSUPP;
493 		}
494 		ice_devlink_reinit_down(pf);
495 		return 0;
496 	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
497 		return ice_devlink_reload_empr_start(pf, extack);
498 	default:
499 		WARN_ON(1);
500 		return -EOPNOTSUPP;
501 	}
502 }
503 
504 /**
505  * ice_devlink_reload_empr_finish - Wait for EMP reset to finish
506  * @pf: pointer to the pf instance
507  * @extack: netlink extended ACK structure
508  *
509  * Wait for driver to finish rebuilding after EMP reset is completed. This
510  * includes time to wait for both the actual device reset as well as the time
511  * for the driver's rebuild to complete.
512  */
513 static int
514 ice_devlink_reload_empr_finish(struct ice_pf *pf,
515 			       struct netlink_ext_ack *extack)
516 {
517 	int err;
518 
519 	err = ice_wait_for_reset(pf, 60 * HZ);
520 	if (err) {
521 		NL_SET_ERR_MSG_MOD(extack, "Device still resetting after 1 minute");
522 		return err;
523 	}
524 
525 	return 0;
526 }
527 
528 /**
529  * ice_get_tx_topo_user_sel - Read user's choice from flash
530  * @pf: pointer to pf structure
531  * @layers: value read from flash will be saved here
532  *
533  * Reads user's preference for Tx Scheduler Topology Tree from PFA TLV.
534  *
535  * Return: zero when read was successful, negative values otherwise.
536  */
537 static int ice_get_tx_topo_user_sel(struct ice_pf *pf, uint8_t *layers)
538 {
539 	struct ice_aqc_nvm_tx_topo_user_sel usr_sel = {};
540 	struct ice_hw *hw = &pf->hw;
541 	int err;
542 
543 	err = ice_acquire_nvm(hw, ICE_RES_READ);
544 	if (err)
545 		return err;
546 
547 	err = ice_aq_read_nvm(hw, ICE_AQC_NVM_TX_TOPO_MOD_ID, 0,
548 			      sizeof(usr_sel), &usr_sel, true, true, NULL);
549 	if (err)
550 		goto exit_release_res;
551 
552 	if (usr_sel.data & ICE_AQC_NVM_TX_TOPO_USER_SEL)
553 		*layers = ICE_SCHED_5_LAYERS;
554 	else
555 		*layers = ICE_SCHED_9_LAYERS;
556 
557 exit_release_res:
558 	ice_release_nvm(hw);
559 
560 	return err;
561 }
562 
563 /**
564  * ice_update_tx_topo_user_sel - Save user's preference in flash
565  * @pf: pointer to pf structure
566  * @layers: value to be saved in flash
567  *
568  * Variable "layers" defines user's preference about number of layers in Tx
569  * Scheduler Topology Tree. This choice should be stored in PFA TLV field
570  * and be picked up by driver, next time during init.
571  *
572  * Return: zero when save was successful, negative values otherwise.
573  */
574 static int ice_update_tx_topo_user_sel(struct ice_pf *pf, int layers)
575 {
576 	struct ice_aqc_nvm_tx_topo_user_sel usr_sel = {};
577 	struct ice_hw *hw = &pf->hw;
578 	int err;
579 
580 	err = ice_acquire_nvm(hw, ICE_RES_WRITE);
581 	if (err)
582 		return err;
583 
584 	err = ice_aq_read_nvm(hw, ICE_AQC_NVM_TX_TOPO_MOD_ID, 0,
585 			      sizeof(usr_sel), &usr_sel, true, true, NULL);
586 	if (err)
587 		goto exit_release_res;
588 
589 	if (layers == ICE_SCHED_5_LAYERS)
590 		usr_sel.data |= ICE_AQC_NVM_TX_TOPO_USER_SEL;
591 	else
592 		usr_sel.data &= ~ICE_AQC_NVM_TX_TOPO_USER_SEL;
593 
594 	err = ice_write_one_nvm_block(pf, ICE_AQC_NVM_TX_TOPO_MOD_ID, 2,
595 				      sizeof(usr_sel.data), &usr_sel.data,
596 				      true, NULL, NULL);
597 exit_release_res:
598 	ice_release_nvm(hw);
599 
600 	return err;
601 }
602 
603 /**
604  * ice_devlink_tx_sched_layers_get - Get tx_scheduling_layers parameter
605  * @devlink: pointer to the devlink instance
606  * @id: the parameter ID to set
607  * @ctx: context to store the parameter value
608  *
609  * Return: zero on success and negative value on failure.
610  */
611 static int ice_devlink_tx_sched_layers_get(struct devlink *devlink, u32 id,
612 					   struct devlink_param_gset_ctx *ctx)
613 {
614 	struct ice_pf *pf = devlink_priv(devlink);
615 	int err;
616 
617 	err = ice_get_tx_topo_user_sel(pf, &ctx->val.vu8);
618 	if (err)
619 		return err;
620 
621 	return 0;
622 }
623 
624 /**
625  * ice_devlink_tx_sched_layers_set - Set tx_scheduling_layers parameter
626  * @devlink: pointer to the devlink instance
627  * @id: the parameter ID to set
628  * @ctx: context to get the parameter value
629  * @extack: netlink extended ACK structure
630  *
631  * Return: zero on success and negative value on failure.
632  */
633 static int ice_devlink_tx_sched_layers_set(struct devlink *devlink, u32 id,
634 					   struct devlink_param_gset_ctx *ctx,
635 					   struct netlink_ext_ack *extack)
636 {
637 	struct ice_pf *pf = devlink_priv(devlink);
638 	int err;
639 
640 	err = ice_update_tx_topo_user_sel(pf, ctx->val.vu8);
641 	if (err)
642 		return err;
643 
644 	NL_SET_ERR_MSG_MOD(extack,
645 			   "Tx scheduling layers have been changed on this device. You must do the PCI slot powercycle for the change to take effect.");
646 
647 	return 0;
648 }
649 
650 /**
651  * ice_devlink_tx_sched_layers_validate - Validate passed tx_scheduling_layers
652  *                                        parameter value
653  * @devlink: unused pointer to devlink instance
654  * @id: the parameter ID to validate
655  * @val: value to validate
656  * @extack: netlink extended ACK structure
657  *
658  * Supported values are:
659  * - 5 - five layers Tx Scheduler Topology Tree
660  * - 9 - nine layers Tx Scheduler Topology Tree
661  *
662  * Return: zero when passed parameter value is supported. Negative value on
663  * error.
664  */
665 static int ice_devlink_tx_sched_layers_validate(struct devlink *devlink, u32 id,
666 						union devlink_param_value val,
667 						struct netlink_ext_ack *extack)
668 {
669 	if (val.vu8 != ICE_SCHED_5_LAYERS && val.vu8 != ICE_SCHED_9_LAYERS) {
670 		NL_SET_ERR_MSG_MOD(extack,
671 				   "Wrong number of tx scheduler layers provided.");
672 		return -EINVAL;
673 	}
674 
675 	return 0;
676 }
677 
678 /**
679  * ice_tear_down_devlink_rate_tree - removes devlink-rate exported tree
680  * @pf: pf struct
681  *
682  * This function tears down tree exported during VF's creation.
683  */
684 void ice_tear_down_devlink_rate_tree(struct ice_pf *pf)
685 {
686 	struct devlink *devlink;
687 	struct ice_vf *vf;
688 	unsigned int bkt;
689 
690 	devlink = priv_to_devlink(pf);
691 
692 	devl_lock(devlink);
693 	mutex_lock(&pf->vfs.table_lock);
694 	ice_for_each_vf(pf, bkt, vf) {
695 		if (vf->devlink_port.devlink_rate)
696 			devl_rate_leaf_destroy(&vf->devlink_port);
697 	}
698 	mutex_unlock(&pf->vfs.table_lock);
699 
700 	devl_rate_nodes_destroy(devlink);
701 	devl_unlock(devlink);
702 }
703 
704 /**
705  * ice_enable_custom_tx - try to enable custom Tx feature
706  * @pf: pf struct
707  *
708  * This function tries to enable custom Tx feature,
709  * it's not possible to enable it, if DCB or ADQ is active.
710  */
711 static bool ice_enable_custom_tx(struct ice_pf *pf)
712 {
713 	struct ice_port_info *pi = ice_get_main_vsi(pf)->port_info;
714 	struct device *dev = ice_pf_to_dev(pf);
715 
716 	if (pi->is_custom_tx_enabled)
717 		/* already enabled, return true */
718 		return true;
719 
720 	if (ice_is_adq_active(pf)) {
721 		dev_err(dev, "ADQ active, can't modify Tx scheduler tree\n");
722 		return false;
723 	}
724 
725 	if (ice_is_dcb_active(pf)) {
726 		dev_err(dev, "DCB active, can't modify Tx scheduler tree\n");
727 		return false;
728 	}
729 
730 	pi->is_custom_tx_enabled = true;
731 
732 	return true;
733 }
734 
735 /**
736  * ice_traverse_tx_tree - traverse Tx scheduler tree
737  * @devlink: devlink struct
738  * @node: current node, used for recursion
739  * @tc_node: tc_node struct, that is treated as a root
740  * @pf: pf struct
741  *
742  * This function traverses Tx scheduler tree and exports
743  * entire structure to the devlink-rate.
744  */
745 static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node *node,
746 				 struct ice_sched_node *tc_node, struct ice_pf *pf)
747 {
748 	struct devlink_rate *rate_node = NULL;
749 	struct ice_dynamic_port *sf;
750 	struct ice_vf *vf;
751 	int i;
752 
753 	if (node->rate_node)
754 		/* already added, skip to the next */
755 		goto traverse_children;
756 
757 	if (node->parent == tc_node) {
758 		/* create root node */
759 		rate_node = devl_rate_node_create(devlink, node, node->name, NULL);
760 	} else if (node->vsi_handle &&
761 		   pf->vsi[node->vsi_handle]->type == ICE_VSI_VF &&
762 		   pf->vsi[node->vsi_handle]->vf) {
763 		vf = pf->vsi[node->vsi_handle]->vf;
764 		if (!vf->devlink_port.devlink_rate)
765 			/* leaf nodes doesn't have children
766 			 * so we don't set rate_node
767 			 */
768 			devl_rate_leaf_create(&vf->devlink_port, node,
769 					      node->parent->rate_node);
770 	} else if (node->vsi_handle &&
771 		   pf->vsi[node->vsi_handle]->type == ICE_VSI_SF &&
772 		   pf->vsi[node->vsi_handle]->sf) {
773 		sf = pf->vsi[node->vsi_handle]->sf;
774 		if (!sf->devlink_port.devlink_rate)
775 			/* leaf nodes doesn't have children
776 			 * so we don't set rate_node
777 			 */
778 			devl_rate_leaf_create(&sf->devlink_port, node,
779 					      node->parent->rate_node);
780 	} else if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF &&
781 		   node->parent->rate_node) {
782 		rate_node = devl_rate_node_create(devlink, node, node->name,
783 						  node->parent->rate_node);
784 	}
785 
786 	if (rate_node && !IS_ERR(rate_node))
787 		node->rate_node = rate_node;
788 
789 traverse_children:
790 	for (i = 0; i < node->num_children; i++)
791 		ice_traverse_tx_tree(devlink, node->children[i], tc_node, pf);
792 }
793 
794 /**
795  * ice_devlink_rate_init_tx_topology - export Tx scheduler tree to devlink rate
796  * @devlink: devlink struct
797  * @vsi: main vsi struct
798  *
799  * This function finds a root node, then calls ice_traverse_tx tree, which
800  * traverses the tree and exports it's contents to devlink rate.
801  */
802 int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi)
803 {
804 	struct ice_port_info *pi = vsi->port_info;
805 	struct ice_sched_node *tc_node;
806 	struct ice_pf *pf = vsi->back;
807 	int i;
808 
809 	tc_node = pi->root->children[0];
810 	mutex_lock(&pi->sched_lock);
811 	for (i = 0; i < tc_node->num_children; i++)
812 		ice_traverse_tx_tree(devlink, tc_node->children[i], tc_node, pf);
813 	mutex_unlock(&pi->sched_lock);
814 
815 	return 0;
816 }
817 
818 static void ice_clear_rate_nodes(struct ice_sched_node *node)
819 {
820 	node->rate_node = NULL;
821 
822 	for (int i = 0; i < node->num_children; i++)
823 		ice_clear_rate_nodes(node->children[i]);
824 }
825 
826 /**
827  * ice_devlink_rate_clear_tx_topology - clear node->rate_node
828  * @vsi: main vsi struct
829  *
830  * Clear rate_node to cleanup creation of Tx topology.
831  *
832  */
833 void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi)
834 {
835 	struct ice_port_info *pi = vsi->port_info;
836 
837 	mutex_lock(&pi->sched_lock);
838 	ice_clear_rate_nodes(pi->root->children[0]);
839 	mutex_unlock(&pi->sched_lock);
840 }
841 
842 /**
843  * ice_set_object_tx_share - sets node scheduling parameter
844  * @pi: devlink struct instance
845  * @node: node struct instance
846  * @bw: bandwidth in bytes per second
847  * @extack: extended netdev ack structure
848  *
849  * This function sets ICE_MIN_BW scheduling BW limit.
850  */
851 static int ice_set_object_tx_share(struct ice_port_info *pi, struct ice_sched_node *node,
852 				   u64 bw, struct netlink_ext_ack *extack)
853 {
854 	int status;
855 
856 	mutex_lock(&pi->sched_lock);
857 	/* converts bytes per second to kilo bits per second */
858 	node->tx_share = div_u64(bw, 125);
859 	status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW, node->tx_share);
860 	mutex_unlock(&pi->sched_lock);
861 
862 	if (status)
863 		NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_share");
864 
865 	return status;
866 }
867 
868 /**
869  * ice_set_object_tx_max - sets node scheduling parameter
870  * @pi: devlink struct instance
871  * @node: node struct instance
872  * @bw: bandwidth in bytes per second
873  * @extack: extended netdev ack structure
874  *
875  * This function sets ICE_MAX_BW scheduling BW limit.
876  */
877 static int ice_set_object_tx_max(struct ice_port_info *pi, struct ice_sched_node *node,
878 				 u64 bw, struct netlink_ext_ack *extack)
879 {
880 	int status;
881 
882 	mutex_lock(&pi->sched_lock);
883 	/* converts bytes per second value to kilo bits per second */
884 	node->tx_max = div_u64(bw, 125);
885 	status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW, node->tx_max);
886 	mutex_unlock(&pi->sched_lock);
887 
888 	if (status)
889 		NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_max");
890 
891 	return status;
892 }
893 
894 /**
895  * ice_set_object_tx_priority - sets node scheduling parameter
896  * @pi: devlink struct instance
897  * @node: node struct instance
898  * @priority: value representing priority for strict priority arbitration
899  * @extack: extended netdev ack structure
900  *
901  * This function sets priority of node among siblings.
902  */
903 static int ice_set_object_tx_priority(struct ice_port_info *pi, struct ice_sched_node *node,
904 				      u32 priority, struct netlink_ext_ack *extack)
905 {
906 	int status;
907 
908 	if (priority >= 8) {
909 		NL_SET_ERR_MSG_MOD(extack, "Priority should be less than 8");
910 		return -EINVAL;
911 	}
912 
913 	mutex_lock(&pi->sched_lock);
914 	node->tx_priority = priority;
915 	status = ice_sched_set_node_priority(pi, node, node->tx_priority);
916 	mutex_unlock(&pi->sched_lock);
917 
918 	if (status)
919 		NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_priority");
920 
921 	return status;
922 }
923 
924 /**
925  * ice_set_object_tx_weight - sets node scheduling parameter
926  * @pi: devlink struct instance
927  * @node: node struct instance
928  * @weight: value represeting relative weight for WFQ arbitration
929  * @extack: extended netdev ack structure
930  *
931  * This function sets node weight for WFQ algorithm.
932  */
933 static int ice_set_object_tx_weight(struct ice_port_info *pi, struct ice_sched_node *node,
934 				    u32 weight, struct netlink_ext_ack *extack)
935 {
936 	int status;
937 
938 	if (weight > 200 || weight < 1) {
939 		NL_SET_ERR_MSG_MOD(extack, "Weight must be between 1 and 200");
940 		return -EINVAL;
941 	}
942 
943 	mutex_lock(&pi->sched_lock);
944 	node->tx_weight = weight;
945 	status = ice_sched_set_node_weight(pi, node, node->tx_weight);
946 	mutex_unlock(&pi->sched_lock);
947 
948 	if (status)
949 		NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_weight");
950 
951 	return status;
952 }
953 
954 /**
955  * ice_get_pi_from_dev_rate - get port info from devlink_rate
956  * @rate_node: devlink struct instance
957  *
958  * This function returns corresponding port_info struct of devlink_rate
959  */
960 static struct ice_port_info *ice_get_pi_from_dev_rate(struct devlink_rate *rate_node)
961 {
962 	struct ice_pf *pf = devlink_priv(rate_node->devlink);
963 
964 	return ice_get_main_vsi(pf)->port_info;
965 }
966 
967 static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
968 				     struct netlink_ext_ack *extack)
969 {
970 	struct ice_sched_node *node;
971 	struct ice_port_info *pi;
972 
973 	pi = ice_get_pi_from_dev_rate(rate_node);
974 
975 	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
976 		return -EBUSY;
977 
978 	/* preallocate memory for ice_sched_node */
979 	node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL);
980 	*priv = node;
981 
982 	return 0;
983 }
984 
985 static int ice_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
986 				     struct netlink_ext_ack *extack)
987 {
988 	struct ice_sched_node *node, *tc_node;
989 	struct ice_port_info *pi;
990 
991 	pi = ice_get_pi_from_dev_rate(rate_node);
992 	tc_node = pi->root->children[0];
993 	node = priv;
994 
995 	if (!rate_node->parent || !node || tc_node == node || !extack)
996 		return 0;
997 
998 	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
999 		return -EBUSY;
1000 
1001 	/* can't allow to delete a node with children */
1002 	if (node->num_children)
1003 		return -EINVAL;
1004 
1005 	mutex_lock(&pi->sched_lock);
1006 	ice_free_sched_node(pi, node);
1007 	mutex_unlock(&pi->sched_lock);
1008 
1009 	return 0;
1010 }
1011 
1012 static int ice_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
1013 					    u64 tx_max, struct netlink_ext_ack *extack)
1014 {
1015 	struct ice_sched_node *node = priv;
1016 
1017 	if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink)))
1018 		return -EBUSY;
1019 
1020 	if (!node)
1021 		return 0;
1022 
1023 	return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_leaf),
1024 				     node, tx_max, extack);
1025 }
1026 
1027 static int ice_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
1028 					      u64 tx_share, struct netlink_ext_ack *extack)
1029 {
1030 	struct ice_sched_node *node = priv;
1031 
1032 	if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink)))
1033 		return -EBUSY;
1034 
1035 	if (!node)
1036 		return 0;
1037 
1038 	return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_leaf), node,
1039 				       tx_share, extack);
1040 }
1041 
1042 static int ice_devlink_rate_leaf_tx_priority_set(struct devlink_rate *rate_leaf, void *priv,
1043 						 u32 tx_priority, struct netlink_ext_ack *extack)
1044 {
1045 	struct ice_sched_node *node = priv;
1046 
1047 	if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink)))
1048 		return -EBUSY;
1049 
1050 	if (!node)
1051 		return 0;
1052 
1053 	return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_leaf), node,
1054 					  tx_priority, extack);
1055 }
1056 
1057 static int ice_devlink_rate_leaf_tx_weight_set(struct devlink_rate *rate_leaf, void *priv,
1058 					       u32 tx_weight, struct netlink_ext_ack *extack)
1059 {
1060 	struct ice_sched_node *node = priv;
1061 
1062 	if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink)))
1063 		return -EBUSY;
1064 
1065 	if (!node)
1066 		return 0;
1067 
1068 	return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_leaf), node,
1069 					tx_weight, extack);
1070 }
1071 
1072 static int ice_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
1073 					    u64 tx_max, struct netlink_ext_ack *extack)
1074 {
1075 	struct ice_sched_node *node = priv;
1076 
1077 	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
1078 		return -EBUSY;
1079 
1080 	if (!node)
1081 		return 0;
1082 
1083 	return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_node),
1084 				     node, tx_max, extack);
1085 }
1086 
1087 static int ice_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
1088 					      u64 tx_share, struct netlink_ext_ack *extack)
1089 {
1090 	struct ice_sched_node *node = priv;
1091 
1092 	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
1093 		return -EBUSY;
1094 
1095 	if (!node)
1096 		return 0;
1097 
1098 	return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_node),
1099 				       node, tx_share, extack);
1100 }
1101 
1102 static int ice_devlink_rate_node_tx_priority_set(struct devlink_rate *rate_node, void *priv,
1103 						 u32 tx_priority, struct netlink_ext_ack *extack)
1104 {
1105 	struct ice_sched_node *node = priv;
1106 
1107 	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
1108 		return -EBUSY;
1109 
1110 	if (!node)
1111 		return 0;
1112 
1113 	return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_node),
1114 					  node, tx_priority, extack);
1115 }
1116 
1117 static int ice_devlink_rate_node_tx_weight_set(struct devlink_rate *rate_node, void *priv,
1118 					       u32 tx_weight, struct netlink_ext_ack *extack)
1119 {
1120 	struct ice_sched_node *node = priv;
1121 
1122 	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
1123 		return -EBUSY;
1124 
1125 	if (!node)
1126 		return 0;
1127 
1128 	return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_node),
1129 					node, tx_weight, extack);
1130 }
1131 
1132 static int ice_devlink_set_parent(struct devlink_rate *devlink_rate,
1133 				  struct devlink_rate *parent,
1134 				  void *priv, void *parent_priv,
1135 				  struct netlink_ext_ack *extack)
1136 {
1137 	struct ice_port_info *pi = ice_get_pi_from_dev_rate(devlink_rate);
1138 	struct ice_sched_node *tc_node, *node, *parent_node;
1139 	u16 num_nodes_added;
1140 	u32 first_node_teid;
1141 	u32 node_teid;
1142 	int status;
1143 
1144 	tc_node = pi->root->children[0];
1145 	node = priv;
1146 
1147 	if (!extack)
1148 		return 0;
1149 
1150 	if (!ice_enable_custom_tx(devlink_priv(devlink_rate->devlink)))
1151 		return -EBUSY;
1152 
1153 	if (!parent) {
1154 		if (!node || tc_node == node || node->num_children)
1155 			return -EINVAL;
1156 
1157 		mutex_lock(&pi->sched_lock);
1158 		ice_free_sched_node(pi, node);
1159 		mutex_unlock(&pi->sched_lock);
1160 
1161 		return 0;
1162 	}
1163 
1164 	parent_node = parent_priv;
1165 
1166 	/* if the node doesn't exist, create it */
1167 	if (!node->parent) {
1168 		mutex_lock(&pi->sched_lock);
1169 		status = ice_sched_add_elems(pi, tc_node, parent_node,
1170 					     parent_node->tx_sched_layer + 1,
1171 					     1, &num_nodes_added, &first_node_teid,
1172 					     &node);
1173 		mutex_unlock(&pi->sched_lock);
1174 
1175 		if (status) {
1176 			NL_SET_ERR_MSG_MOD(extack, "Can't add a new node");
1177 			return status;
1178 		}
1179 
1180 		if (devlink_rate->tx_share)
1181 			ice_set_object_tx_share(pi, node, devlink_rate->tx_share, extack);
1182 		if (devlink_rate->tx_max)
1183 			ice_set_object_tx_max(pi, node, devlink_rate->tx_max, extack);
1184 		if (devlink_rate->tx_priority)
1185 			ice_set_object_tx_priority(pi, node, devlink_rate->tx_priority, extack);
1186 		if (devlink_rate->tx_weight)
1187 			ice_set_object_tx_weight(pi, node, devlink_rate->tx_weight, extack);
1188 	} else {
1189 		node_teid = le32_to_cpu(node->info.node_teid);
1190 		mutex_lock(&pi->sched_lock);
1191 		status = ice_sched_move_nodes(pi, parent_node, 1, &node_teid);
1192 		mutex_unlock(&pi->sched_lock);
1193 
1194 		if (status)
1195 			NL_SET_ERR_MSG_MOD(extack, "Can't move existing node to a new parent");
1196 	}
1197 
1198 	return status;
1199 }
1200 
1201 /**
1202  * ice_devlink_reinit_up - do reinit of the given PF
1203  * @pf: pointer to the PF struct
1204  */
1205 static int ice_devlink_reinit_up(struct ice_pf *pf)
1206 {
1207 	struct ice_vsi *vsi = ice_get_main_vsi(pf);
1208 	int err;
1209 
1210 	err = ice_init_hw(&pf->hw);
1211 	if (err) {
1212 		dev_err(ice_pf_to_dev(pf), "ice_init_hw failed: %d\n", err);
1213 		return err;
1214 	}
1215 
1216 	err = ice_init_dev(pf);
1217 	if (err)
1218 		goto unroll_hw_init;
1219 
1220 	vsi->flags = ICE_VSI_FLAG_INIT;
1221 
1222 	rtnl_lock();
1223 	err = ice_vsi_cfg(vsi);
1224 	rtnl_unlock();
1225 	if (err)
1226 		goto err_vsi_cfg;
1227 
1228 	/* No need to take devl_lock, it's already taken by devlink API */
1229 	err = ice_load(pf);
1230 	if (err)
1231 		goto err_load;
1232 
1233 	return 0;
1234 
1235 err_load:
1236 	rtnl_lock();
1237 	ice_vsi_decfg(vsi);
1238 	rtnl_unlock();
1239 err_vsi_cfg:
1240 	ice_deinit_dev(pf);
1241 unroll_hw_init:
1242 	ice_deinit_hw(&pf->hw);
1243 	return err;
1244 }
1245 
1246 /**
1247  * ice_devlink_reload_up - do reload up after reinit
1248  * @devlink: pointer to the devlink instance reloading
1249  * @action: the action requested
1250  * @limit: limits imposed by userspace, such as not resetting
1251  * @actions_performed: on return, indicate what actions actually performed
1252  * @extack: netlink extended ACK structure
1253  */
1254 static int
1255 ice_devlink_reload_up(struct devlink *devlink,
1256 		      enum devlink_reload_action action,
1257 		      enum devlink_reload_limit limit,
1258 		      u32 *actions_performed,
1259 		      struct netlink_ext_ack *extack)
1260 {
1261 	struct ice_pf *pf = devlink_priv(devlink);
1262 
1263 	switch (action) {
1264 	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
1265 		*actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
1266 		return ice_devlink_reinit_up(pf);
1267 	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
1268 		*actions_performed = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE);
1269 		return ice_devlink_reload_empr_finish(pf, extack);
1270 	default:
1271 		WARN_ON(1);
1272 		return -EOPNOTSUPP;
1273 	}
1274 }
1275 
1276 static const struct devlink_ops ice_devlink_ops = {
1277 	.supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK,
1278 	.reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
1279 			  BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
1280 	.reload_down = ice_devlink_reload_down,
1281 	.reload_up = ice_devlink_reload_up,
1282 	.eswitch_mode_get = ice_eswitch_mode_get,
1283 	.eswitch_mode_set = ice_eswitch_mode_set,
1284 	.info_get = ice_devlink_info_get,
1285 	.flash_update = ice_devlink_flash_update,
1286 
1287 	.rate_node_new = ice_devlink_rate_node_new,
1288 	.rate_node_del = ice_devlink_rate_node_del,
1289 
1290 	.rate_leaf_tx_max_set = ice_devlink_rate_leaf_tx_max_set,
1291 	.rate_leaf_tx_share_set = ice_devlink_rate_leaf_tx_share_set,
1292 	.rate_leaf_tx_priority_set = ice_devlink_rate_leaf_tx_priority_set,
1293 	.rate_leaf_tx_weight_set = ice_devlink_rate_leaf_tx_weight_set,
1294 
1295 	.rate_node_tx_max_set = ice_devlink_rate_node_tx_max_set,
1296 	.rate_node_tx_share_set = ice_devlink_rate_node_tx_share_set,
1297 	.rate_node_tx_priority_set = ice_devlink_rate_node_tx_priority_set,
1298 	.rate_node_tx_weight_set = ice_devlink_rate_node_tx_weight_set,
1299 
1300 	.rate_leaf_parent_set = ice_devlink_set_parent,
1301 	.rate_node_parent_set = ice_devlink_set_parent,
1302 
1303 	.port_new = ice_devlink_port_new,
1304 };
1305 
1306 static const struct devlink_ops ice_sf_devlink_ops;
1307 
1308 static int
1309 ice_devlink_enable_roce_get(struct devlink *devlink, u32 id,
1310 			    struct devlink_param_gset_ctx *ctx)
1311 {
1312 	struct ice_pf *pf = devlink_priv(devlink);
1313 
1314 	ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? true : false;
1315 
1316 	return 0;
1317 }
1318 
1319 static int ice_devlink_enable_roce_set(struct devlink *devlink, u32 id,
1320 				       struct devlink_param_gset_ctx *ctx,
1321 				       struct netlink_ext_ack *extack)
1322 {
1323 	struct ice_pf *pf = devlink_priv(devlink);
1324 	bool roce_ena = ctx->val.vbool;
1325 	int ret;
1326 
1327 	if (!roce_ena) {
1328 		ice_unplug_aux_dev(pf);
1329 		pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2;
1330 		return 0;
1331 	}
1332 
1333 	pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2;
1334 	ret = ice_plug_aux_dev(pf);
1335 	if (ret)
1336 		pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2;
1337 
1338 	return ret;
1339 }
1340 
1341 static int
1342 ice_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
1343 				 union devlink_param_value val,
1344 				 struct netlink_ext_ack *extack)
1345 {
1346 	struct ice_pf *pf = devlink_priv(devlink);
1347 
1348 	if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags))
1349 		return -EOPNOTSUPP;
1350 
1351 	if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP) {
1352 		NL_SET_ERR_MSG_MOD(extack, "iWARP is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously");
1353 		return -EOPNOTSUPP;
1354 	}
1355 
1356 	return 0;
1357 }
1358 
1359 static int
1360 ice_devlink_enable_iw_get(struct devlink *devlink, u32 id,
1361 			  struct devlink_param_gset_ctx *ctx)
1362 {
1363 	struct ice_pf *pf = devlink_priv(devlink);
1364 
1365 	ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP;
1366 
1367 	return 0;
1368 }
1369 
1370 static int ice_devlink_enable_iw_set(struct devlink *devlink, u32 id,
1371 				     struct devlink_param_gset_ctx *ctx,
1372 				     struct netlink_ext_ack *extack)
1373 {
1374 	struct ice_pf *pf = devlink_priv(devlink);
1375 	bool iw_ena = ctx->val.vbool;
1376 	int ret;
1377 
1378 	if (!iw_ena) {
1379 		ice_unplug_aux_dev(pf);
1380 		pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP;
1381 		return 0;
1382 	}
1383 
1384 	pf->rdma_mode |= IIDC_RDMA_PROTOCOL_IWARP;
1385 	ret = ice_plug_aux_dev(pf);
1386 	if (ret)
1387 		pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP;
1388 
1389 	return ret;
1390 }
1391 
1392 static int
1393 ice_devlink_enable_iw_validate(struct devlink *devlink, u32 id,
1394 			       union devlink_param_value val,
1395 			       struct netlink_ext_ack *extack)
1396 {
1397 	struct ice_pf *pf = devlink_priv(devlink);
1398 
1399 	if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags))
1400 		return -EOPNOTSUPP;
1401 
1402 	if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2) {
1403 		NL_SET_ERR_MSG_MOD(extack, "RoCEv2 is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously");
1404 		return -EOPNOTSUPP;
1405 	}
1406 
1407 	return 0;
1408 }
1409 
1410 #define DEVLINK_LOCAL_FWD_DISABLED_STR "disabled"
1411 #define DEVLINK_LOCAL_FWD_ENABLED_STR "enabled"
1412 #define DEVLINK_LOCAL_FWD_PRIORITIZED_STR "prioritized"
1413 
1414 /**
1415  * ice_devlink_local_fwd_mode_to_str - Get string for local_fwd mode.
1416  * @mode: local forwarding for mode used in port_info struct.
1417  *
1418  * Return: Mode respective string or "Invalid".
1419  */
1420 static const char *
1421 ice_devlink_local_fwd_mode_to_str(enum ice_local_fwd_mode mode)
1422 {
1423 	switch (mode) {
1424 	case ICE_LOCAL_FWD_MODE_ENABLED:
1425 		return DEVLINK_LOCAL_FWD_ENABLED_STR;
1426 	case ICE_LOCAL_FWD_MODE_PRIORITIZED:
1427 		return DEVLINK_LOCAL_FWD_PRIORITIZED_STR;
1428 	case ICE_LOCAL_FWD_MODE_DISABLED:
1429 		return DEVLINK_LOCAL_FWD_DISABLED_STR;
1430 	}
1431 
1432 	return "Invalid";
1433 }
1434 
1435 /**
1436  * ice_devlink_local_fwd_str_to_mode - Get local_fwd mode from string name.
1437  * @mode_str: local forwarding mode string.
1438  *
1439  * Return: Mode value or negative number if invalid.
1440  */
1441 static int ice_devlink_local_fwd_str_to_mode(const char *mode_str)
1442 {
1443 	if (!strcmp(mode_str, DEVLINK_LOCAL_FWD_ENABLED_STR))
1444 		return ICE_LOCAL_FWD_MODE_ENABLED;
1445 	else if (!strcmp(mode_str, DEVLINK_LOCAL_FWD_PRIORITIZED_STR))
1446 		return ICE_LOCAL_FWD_MODE_PRIORITIZED;
1447 	else if (!strcmp(mode_str, DEVLINK_LOCAL_FWD_DISABLED_STR))
1448 		return ICE_LOCAL_FWD_MODE_DISABLED;
1449 
1450 	return -EINVAL;
1451 }
1452 
1453 /**
1454  * ice_devlink_local_fwd_get - Get local_fwd parameter.
1455  * @devlink: Pointer to the devlink instance.
1456  * @id: The parameter ID to set.
1457  * @ctx: Context to store the parameter value.
1458  *
1459  * Return: Zero.
1460  */
1461 static int ice_devlink_local_fwd_get(struct devlink *devlink, u32 id,
1462 				     struct devlink_param_gset_ctx *ctx)
1463 {
1464 	struct ice_pf *pf = devlink_priv(devlink);
1465 	struct ice_port_info *pi;
1466 	const char *mode_str;
1467 
1468 	pi = pf->hw.port_info;
1469 	mode_str = ice_devlink_local_fwd_mode_to_str(pi->local_fwd_mode);
1470 	snprintf(ctx->val.vstr, sizeof(ctx->val.vstr), "%s", mode_str);
1471 
1472 	return 0;
1473 }
1474 
1475 /**
1476  * ice_devlink_local_fwd_set - Set local_fwd parameter.
1477  * @devlink: Pointer to the devlink instance.
1478  * @id: The parameter ID to set.
1479  * @ctx: Context to get the parameter value.
1480  * @extack: Netlink extended ACK structure.
1481  *
1482  * Return: Zero.
1483  */
1484 static int ice_devlink_local_fwd_set(struct devlink *devlink, u32 id,
1485 				     struct devlink_param_gset_ctx *ctx,
1486 				     struct netlink_ext_ack *extack)
1487 {
1488 	int new_local_fwd_mode = ice_devlink_local_fwd_str_to_mode(ctx->val.vstr);
1489 	struct ice_pf *pf = devlink_priv(devlink);
1490 	struct device *dev = ice_pf_to_dev(pf);
1491 	struct ice_port_info *pi;
1492 
1493 	pi = pf->hw.port_info;
1494 	if (pi->local_fwd_mode != new_local_fwd_mode) {
1495 		pi->local_fwd_mode = new_local_fwd_mode;
1496 		dev_info(dev, "Setting local_fwd to %s\n", ctx->val.vstr);
1497 		ice_schedule_reset(pf, ICE_RESET_CORER);
1498 	}
1499 
1500 	return 0;
1501 }
1502 
1503 /**
1504  * ice_devlink_local_fwd_validate - Validate passed local_fwd parameter value.
1505  * @devlink: Unused pointer to devlink instance.
1506  * @id: The parameter ID to validate.
1507  * @val: Value to validate.
1508  * @extack: Netlink extended ACK structure.
1509  *
1510  * Supported values are:
1511  * "enabled" - local_fwd is enabled, "disabled" - local_fwd is disabled
1512  * "prioritized" - local_fwd traffic is prioritized in scheduling.
1513  *
1514  * Return: Zero when passed parameter value is supported. Negative value on
1515  * error.
1516  */
1517 static int ice_devlink_local_fwd_validate(struct devlink *devlink, u32 id,
1518 					  union devlink_param_value val,
1519 					  struct netlink_ext_ack *extack)
1520 {
1521 	if (ice_devlink_local_fwd_str_to_mode(val.vstr) < 0) {
1522 		NL_SET_ERR_MSG_MOD(extack, "Error: Requested value is not supported.");
1523 		return -EINVAL;
1524 	}
1525 
1526 	return 0;
1527 }
1528 
1529 enum ice_param_id {
1530 	ICE_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
1531 	ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS,
1532 	ICE_DEVLINK_PARAM_ID_LOCAL_FWD,
1533 };
1534 
1535 static const struct devlink_param ice_dvl_rdma_params[] = {
1536 	DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
1537 			      ice_devlink_enable_roce_get,
1538 			      ice_devlink_enable_roce_set,
1539 			      ice_devlink_enable_roce_validate),
1540 	DEVLINK_PARAM_GENERIC(ENABLE_IWARP, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
1541 			      ice_devlink_enable_iw_get,
1542 			      ice_devlink_enable_iw_set,
1543 			      ice_devlink_enable_iw_validate),
1544 };
1545 
1546 static const struct devlink_param ice_dvl_sched_params[] = {
1547 	DEVLINK_PARAM_DRIVER(ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS,
1548 			     "tx_scheduling_layers",
1549 			     DEVLINK_PARAM_TYPE_U8,
1550 			     BIT(DEVLINK_PARAM_CMODE_PERMANENT),
1551 			     ice_devlink_tx_sched_layers_get,
1552 			     ice_devlink_tx_sched_layers_set,
1553 			     ice_devlink_tx_sched_layers_validate),
1554 	DEVLINK_PARAM_DRIVER(ICE_DEVLINK_PARAM_ID_LOCAL_FWD,
1555 			     "local_forwarding", DEVLINK_PARAM_TYPE_STRING,
1556 			     BIT(DEVLINK_PARAM_CMODE_RUNTIME),
1557 			     ice_devlink_local_fwd_get,
1558 			     ice_devlink_local_fwd_set,
1559 			     ice_devlink_local_fwd_validate),
1560 };
1561 
1562 static void ice_devlink_free(void *devlink_ptr)
1563 {
1564 	devlink_free((struct devlink *)devlink_ptr);
1565 }
1566 
1567 /**
1568  * ice_allocate_pf - Allocate devlink and return PF structure pointer
1569  * @dev: the device to allocate for
1570  *
1571  * Allocate a devlink instance for this device and return the private area as
1572  * the PF structure. The devlink memory is kept track of through devres by
1573  * adding an action to remove it when unwinding.
1574  */
1575 struct ice_pf *ice_allocate_pf(struct device *dev)
1576 {
1577 	struct devlink *devlink;
1578 
1579 	devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev);
1580 	if (!devlink)
1581 		return NULL;
1582 
1583 	/* Add an action to teardown the devlink when unwinding the driver */
1584 	if (devm_add_action_or_reset(dev, ice_devlink_free, devlink))
1585 		return NULL;
1586 
1587 	return devlink_priv(devlink);
1588 }
1589 
1590 /**
1591  * ice_allocate_sf - Allocate devlink and return SF structure pointer
1592  * @dev: the device to allocate for
1593  * @pf: pointer to the PF structure
1594  *
1595  * Allocate a devlink instance for SF.
1596  *
1597  * Return: ice_sf_priv pointer to allocated memory or ERR_PTR in case of error
1598  */
1599 struct ice_sf_priv *ice_allocate_sf(struct device *dev, struct ice_pf *pf)
1600 {
1601 	struct devlink *devlink;
1602 	int err;
1603 
1604 	devlink = devlink_alloc(&ice_sf_devlink_ops, sizeof(struct ice_sf_priv),
1605 				dev);
1606 	if (!devlink)
1607 		return ERR_PTR(-ENOMEM);
1608 
1609 	err = devl_nested_devlink_set(priv_to_devlink(pf), devlink);
1610 	if (err) {
1611 		devlink_free(devlink);
1612 		return ERR_PTR(err);
1613 	}
1614 
1615 	return devlink_priv(devlink);
1616 }
1617 
1618 /**
1619  * ice_devlink_register - Register devlink interface for this PF
1620  * @pf: the PF to register the devlink for.
1621  *
1622  * Register the devlink instance associated with this physical function.
1623  *
1624  * Return: zero on success or an error code on failure.
1625  */
1626 void ice_devlink_register(struct ice_pf *pf)
1627 {
1628 	struct devlink *devlink = priv_to_devlink(pf);
1629 
1630 	devl_register(devlink);
1631 }
1632 
1633 /**
1634  * ice_devlink_unregister - Unregister devlink resources for this PF.
1635  * @pf: the PF structure to cleanup
1636  *
1637  * Releases resources used by devlink and cleans up associated memory.
1638  */
1639 void ice_devlink_unregister(struct ice_pf *pf)
1640 {
1641 	devl_unregister(priv_to_devlink(pf));
1642 }
1643 
1644 int ice_devlink_register_params(struct ice_pf *pf)
1645 {
1646 	struct devlink *devlink = priv_to_devlink(pf);
1647 	struct ice_hw *hw = &pf->hw;
1648 	int status;
1649 
1650 	status = devl_params_register(devlink, ice_dvl_rdma_params,
1651 				      ARRAY_SIZE(ice_dvl_rdma_params));
1652 	if (status)
1653 		return status;
1654 
1655 	if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en)
1656 		status = devl_params_register(devlink, ice_dvl_sched_params,
1657 					      ARRAY_SIZE(ice_dvl_sched_params));
1658 
1659 	return status;
1660 }
1661 
1662 void ice_devlink_unregister_params(struct ice_pf *pf)
1663 {
1664 	struct devlink *devlink = priv_to_devlink(pf);
1665 	struct ice_hw *hw = &pf->hw;
1666 
1667 	devl_params_unregister(devlink, ice_dvl_rdma_params,
1668 			       ARRAY_SIZE(ice_dvl_rdma_params));
1669 
1670 	if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en)
1671 		devl_params_unregister(devlink, ice_dvl_sched_params,
1672 				       ARRAY_SIZE(ice_dvl_sched_params));
1673 }
1674 
1675 #define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024)
1676 
1677 static const struct devlink_region_ops ice_nvm_region_ops;
1678 static const struct devlink_region_ops ice_sram_region_ops;
1679 
1680 /**
1681  * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents
1682  * @devlink: the devlink instance
1683  * @ops: the devlink region to snapshot
1684  * @extack: extended ACK response structure
1685  * @data: on exit points to snapshot data buffer
1686  *
1687  * This function is called in response to a DEVLINK_CMD_REGION_NEW for either
1688  * the nvm-flash or shadow-ram region.
1689  *
1690  * It captures a snapshot of the NVM or Shadow RAM flash contents. This
1691  * snapshot can then later be viewed via the DEVLINK_CMD_REGION_READ netlink
1692  * interface.
1693  *
1694  * @returns zero on success, and updates the data pointer. Returns a non-zero
1695  * error code on failure.
1696  */
1697 static int ice_devlink_nvm_snapshot(struct devlink *devlink,
1698 				    const struct devlink_region_ops *ops,
1699 				    struct netlink_ext_ack *extack, u8 **data)
1700 {
1701 	struct ice_pf *pf = devlink_priv(devlink);
1702 	struct device *dev = ice_pf_to_dev(pf);
1703 	struct ice_hw *hw = &pf->hw;
1704 	bool read_shadow_ram;
1705 	u8 *nvm_data, *tmp, i;
1706 	u32 nvm_size, left;
1707 	s8 num_blks;
1708 	int status;
1709 
1710 	if (ops == &ice_nvm_region_ops) {
1711 		read_shadow_ram = false;
1712 		nvm_size = hw->flash.flash_size;
1713 	} else if (ops == &ice_sram_region_ops) {
1714 		read_shadow_ram = true;
1715 		nvm_size = hw->flash.sr_words * 2u;
1716 	} else {
1717 		NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function");
1718 		return -EOPNOTSUPP;
1719 	}
1720 
1721 	nvm_data = vzalloc(nvm_size);
1722 	if (!nvm_data)
1723 		return -ENOMEM;
1724 
1725 	num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE);
1726 	tmp = nvm_data;
1727 	left = nvm_size;
1728 
1729 	/* Some systems take longer to read the NVM than others which causes the
1730 	 * FW to reclaim the NVM lock before the entire NVM has been read. Fix
1731 	 * this by breaking the reads of the NVM into smaller chunks that will
1732 	 * probably not take as long. This has some overhead since we are
1733 	 * increasing the number of AQ commands, but it should always work
1734 	 */
1735 	for (i = 0; i < num_blks; i++) {
1736 		u32 read_sz = min_t(u32, ICE_DEVLINK_READ_BLK_SIZE, left);
1737 
1738 		status = ice_acquire_nvm(hw, ICE_RES_READ);
1739 		if (status) {
1740 			dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
1741 				status, hw->adminq.sq_last_status);
1742 			NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
1743 			vfree(nvm_data);
1744 			return -EIO;
1745 		}
1746 
1747 		status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE,
1748 					   &read_sz, tmp, read_shadow_ram);
1749 		if (status) {
1750 			dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
1751 				read_sz, status, hw->adminq.sq_last_status);
1752 			NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
1753 			ice_release_nvm(hw);
1754 			vfree(nvm_data);
1755 			return -EIO;
1756 		}
1757 		ice_release_nvm(hw);
1758 
1759 		tmp += read_sz;
1760 		left -= read_sz;
1761 	}
1762 
1763 	*data = nvm_data;
1764 
1765 	return 0;
1766 }
1767 
1768 /**
1769  * ice_devlink_nvm_read - Read a portion of NVM flash contents
1770  * @devlink: the devlink instance
1771  * @ops: the devlink region to snapshot
1772  * @extack: extended ACK response structure
1773  * @offset: the offset to start at
1774  * @size: the amount to read
1775  * @data: the data buffer to read into
1776  *
1777  * This function is called in response to DEVLINK_CMD_REGION_READ to directly
1778  * read a section of the NVM contents.
1779  *
1780  * It reads from either the nvm-flash or shadow-ram region contents.
1781  *
1782  * @returns zero on success, and updates the data pointer. Returns a non-zero
1783  * error code on failure.
1784  */
1785 static int ice_devlink_nvm_read(struct devlink *devlink,
1786 				const struct devlink_region_ops *ops,
1787 				struct netlink_ext_ack *extack,
1788 				u64 offset, u32 size, u8 *data)
1789 {
1790 	struct ice_pf *pf = devlink_priv(devlink);
1791 	struct device *dev = ice_pf_to_dev(pf);
1792 	struct ice_hw *hw = &pf->hw;
1793 	bool read_shadow_ram;
1794 	u64 nvm_size;
1795 	int status;
1796 
1797 	if (ops == &ice_nvm_region_ops) {
1798 		read_shadow_ram = false;
1799 		nvm_size = hw->flash.flash_size;
1800 	} else if (ops == &ice_sram_region_ops) {
1801 		read_shadow_ram = true;
1802 		nvm_size = hw->flash.sr_words * 2u;
1803 	} else {
1804 		NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function");
1805 		return -EOPNOTSUPP;
1806 	}
1807 
1808 	if (offset + size >= nvm_size) {
1809 		NL_SET_ERR_MSG_MOD(extack, "Cannot read beyond the region size");
1810 		return -ERANGE;
1811 	}
1812 
1813 	status = ice_acquire_nvm(hw, ICE_RES_READ);
1814 	if (status) {
1815 		dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
1816 			status, hw->adminq.sq_last_status);
1817 		NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
1818 		return -EIO;
1819 	}
1820 
1821 	status = ice_read_flat_nvm(hw, (u32)offset, &size, data,
1822 				   read_shadow_ram);
1823 	if (status) {
1824 		dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
1825 			size, status, hw->adminq.sq_last_status);
1826 		NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
1827 		ice_release_nvm(hw);
1828 		return -EIO;
1829 	}
1830 	ice_release_nvm(hw);
1831 
1832 	return 0;
1833 }
1834 
1835 /**
1836  * ice_devlink_devcaps_snapshot - Capture snapshot of device capabilities
1837  * @devlink: the devlink instance
1838  * @ops: the devlink region being snapshotted
1839  * @extack: extended ACK response structure
1840  * @data: on exit points to snapshot data buffer
1841  *
1842  * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for
1843  * the device-caps devlink region. It captures a snapshot of the device
1844  * capabilities reported by firmware.
1845  *
1846  * @returns zero on success, and updates the data pointer. Returns a non-zero
1847  * error code on failure.
1848  */
1849 static int
1850 ice_devlink_devcaps_snapshot(struct devlink *devlink,
1851 			     const struct devlink_region_ops *ops,
1852 			     struct netlink_ext_ack *extack, u8 **data)
1853 {
1854 	struct ice_pf *pf = devlink_priv(devlink);
1855 	struct device *dev = ice_pf_to_dev(pf);
1856 	struct ice_hw *hw = &pf->hw;
1857 	void *devcaps;
1858 	int status;
1859 
1860 	devcaps = vzalloc(ICE_AQ_MAX_BUF_LEN);
1861 	if (!devcaps)
1862 		return -ENOMEM;
1863 
1864 	status = ice_aq_list_caps(hw, devcaps, ICE_AQ_MAX_BUF_LEN, NULL,
1865 				  ice_aqc_opc_list_dev_caps, NULL);
1866 	if (status) {
1867 		dev_dbg(dev, "ice_aq_list_caps: failed to read device capabilities, err %d aq_err %d\n",
1868 			status, hw->adminq.sq_last_status);
1869 		NL_SET_ERR_MSG_MOD(extack, "Failed to read device capabilities");
1870 		vfree(devcaps);
1871 		return status;
1872 	}
1873 
1874 	*data = (u8 *)devcaps;
1875 
1876 	return 0;
1877 }
1878 
1879 static const struct devlink_region_ops ice_nvm_region_ops = {
1880 	.name = "nvm-flash",
1881 	.destructor = vfree,
1882 	.snapshot = ice_devlink_nvm_snapshot,
1883 	.read = ice_devlink_nvm_read,
1884 };
1885 
1886 static const struct devlink_region_ops ice_sram_region_ops = {
1887 	.name = "shadow-ram",
1888 	.destructor = vfree,
1889 	.snapshot = ice_devlink_nvm_snapshot,
1890 	.read = ice_devlink_nvm_read,
1891 };
1892 
1893 static const struct devlink_region_ops ice_devcaps_region_ops = {
1894 	.name = "device-caps",
1895 	.destructor = vfree,
1896 	.snapshot = ice_devlink_devcaps_snapshot,
1897 };
1898 
1899 /**
1900  * ice_devlink_init_regions - Initialize devlink regions
1901  * @pf: the PF device structure
1902  *
1903  * Create devlink regions used to enable access to dump the contents of the
1904  * flash memory on the device.
1905  */
1906 void ice_devlink_init_regions(struct ice_pf *pf)
1907 {
1908 	struct devlink *devlink = priv_to_devlink(pf);
1909 	struct device *dev = ice_pf_to_dev(pf);
1910 	u64 nvm_size, sram_size;
1911 
1912 	nvm_size = pf->hw.flash.flash_size;
1913 	pf->nvm_region = devl_region_create(devlink, &ice_nvm_region_ops, 1,
1914 					    nvm_size);
1915 	if (IS_ERR(pf->nvm_region)) {
1916 		dev_err(dev, "failed to create NVM devlink region, err %ld\n",
1917 			PTR_ERR(pf->nvm_region));
1918 		pf->nvm_region = NULL;
1919 	}
1920 
1921 	sram_size = pf->hw.flash.sr_words * 2u;
1922 	pf->sram_region = devl_region_create(devlink, &ice_sram_region_ops,
1923 					     1, sram_size);
1924 	if (IS_ERR(pf->sram_region)) {
1925 		dev_err(dev, "failed to create shadow-ram devlink region, err %ld\n",
1926 			PTR_ERR(pf->sram_region));
1927 		pf->sram_region = NULL;
1928 	}
1929 
1930 	pf->devcaps_region = devl_region_create(devlink,
1931 						&ice_devcaps_region_ops, 10,
1932 						ICE_AQ_MAX_BUF_LEN);
1933 	if (IS_ERR(pf->devcaps_region)) {
1934 		dev_err(dev, "failed to create device-caps devlink region, err %ld\n",
1935 			PTR_ERR(pf->devcaps_region));
1936 		pf->devcaps_region = NULL;
1937 	}
1938 }
1939 
1940 /**
1941  * ice_devlink_destroy_regions - Destroy devlink regions
1942  * @pf: the PF device structure
1943  *
1944  * Remove previously created regions for this PF.
1945  */
1946 void ice_devlink_destroy_regions(struct ice_pf *pf)
1947 {
1948 	if (pf->nvm_region)
1949 		devl_region_destroy(pf->nvm_region);
1950 
1951 	if (pf->sram_region)
1952 		devl_region_destroy(pf->sram_region);
1953 
1954 	if (pf->devcaps_region)
1955 		devl_region_destroy(pf->devcaps_region);
1956 }
1957