xref: /linux/drivers/net/ethernet/intel/ice/devlink/devlink.c (revision b2657259fce933ae0ba3e07cd0052fb6a8e90689)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020, Intel Corporation. */
3 
4 #include <linux/vmalloc.h>
5 
6 #include "ice.h"
7 #include "ice_lib.h"
8 #include "devlink.h"
9 #include "port.h"
10 #include "ice_eswitch.h"
11 #include "ice_fw_update.h"
12 #include "ice_dcb_lib.h"
13 #include "ice_sf_eth.h"
14 
15 /* context for devlink info version reporting */
16 struct ice_info_ctx {
17 	char buf[128];
18 	struct ice_orom_info pending_orom;
19 	struct ice_nvm_info pending_nvm;
20 	struct ice_netlist_info pending_netlist;
21 	struct ice_hw_dev_caps dev_caps;
22 };
23 
24 /* The following functions are used to format specific strings for various
25  * devlink info versions. The ctx parameter is used to provide the storage
26  * buffer, as well as any ancillary information calculated when the info
27  * request was made.
28  *
29  * If a version does not exist, for example when attempting to get the
30  * inactive version of flash when there is no pending update, the function
31  * should leave the buffer in the ctx structure empty.
32  */
33 
34 static void ice_info_get_dsn(struct ice_pf *pf, struct ice_info_ctx *ctx)
35 {
36 	u8 dsn[8];
37 
38 	/* Copy the DSN into an array in Big Endian format */
39 	put_unaligned_be64(pci_get_dsn(pf->pdev), dsn);
40 
41 	snprintf(ctx->buf, sizeof(ctx->buf), "%8phD", dsn);
42 }
43 
44 static void ice_info_pba(struct ice_pf *pf, struct ice_info_ctx *ctx)
45 {
46 	struct ice_hw *hw = &pf->hw;
47 	int status;
48 
49 	status = ice_read_pba_string(hw, (u8 *)ctx->buf, sizeof(ctx->buf));
50 	if (status)
51 		/* We failed to locate the PBA, so just skip this entry */
52 		dev_dbg(ice_pf_to_dev(pf), "Failed to read Product Board Assembly string, status %d\n",
53 			status);
54 }
55 
56 static void ice_info_fw_mgmt(struct ice_pf *pf, struct ice_info_ctx *ctx)
57 {
58 	struct ice_hw *hw = &pf->hw;
59 
60 	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
61 		 hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch);
62 }
63 
64 static void ice_info_fw_api(struct ice_pf *pf, struct ice_info_ctx *ctx)
65 {
66 	struct ice_hw *hw = &pf->hw;
67 
68 	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", hw->api_maj_ver,
69 		 hw->api_min_ver, hw->api_patch);
70 }
71 
72 static void ice_info_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx)
73 {
74 	struct ice_hw *hw = &pf->hw;
75 
76 	snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", hw->fw_build);
77 }
78 
79 static void ice_info_orom_ver(struct ice_pf *pf, struct ice_info_ctx *ctx)
80 {
81 	struct ice_orom_info *orom = &pf->hw.flash.orom;
82 
83 	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
84 		 orom->major, orom->build, orom->patch);
85 }
86 
87 static void
88 ice_info_pending_orom_ver(struct ice_pf __always_unused *pf,
89 			  struct ice_info_ctx *ctx)
90 {
91 	struct ice_orom_info *orom = &ctx->pending_orom;
92 
93 	if (ctx->dev_caps.common_cap.nvm_update_pending_orom)
94 		snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
95 			 orom->major, orom->build, orom->patch);
96 }
97 
98 static void ice_info_nvm_ver(struct ice_pf *pf, struct ice_info_ctx *ctx)
99 {
100 	struct ice_nvm_info *nvm = &pf->hw.flash.nvm;
101 
102 	snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor);
103 }
104 
105 static void
106 ice_info_pending_nvm_ver(struct ice_pf __always_unused *pf,
107 			 struct ice_info_ctx *ctx)
108 {
109 	struct ice_nvm_info *nvm = &ctx->pending_nvm;
110 
111 	if (ctx->dev_caps.common_cap.nvm_update_pending_nvm)
112 		snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x",
113 			 nvm->major, nvm->minor);
114 }
115 
116 static void ice_info_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx)
117 {
118 	struct ice_nvm_info *nvm = &pf->hw.flash.nvm;
119 
120 	snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack);
121 }
122 
123 static void
124 ice_info_pending_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx)
125 {
126 	struct ice_nvm_info *nvm = &ctx->pending_nvm;
127 
128 	if (ctx->dev_caps.common_cap.nvm_update_pending_nvm)
129 		snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack);
130 }
131 
132 static void ice_info_ddp_pkg_name(struct ice_pf *pf, struct ice_info_ctx *ctx)
133 {
134 	struct ice_hw *hw = &pf->hw;
135 
136 	snprintf(ctx->buf, sizeof(ctx->buf), "%s", hw->active_pkg_name);
137 }
138 
139 static void
140 ice_info_ddp_pkg_version(struct ice_pf *pf, struct ice_info_ctx *ctx)
141 {
142 	struct ice_pkg_ver *pkg = &pf->hw.active_pkg_ver;
143 
144 	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u.%u",
145 		 pkg->major, pkg->minor, pkg->update, pkg->draft);
146 }
147 
148 static void
149 ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, struct ice_info_ctx *ctx)
150 {
151 	snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", pf->hw.active_track_id);
152 }
153 
154 static void ice_info_netlist_ver(struct ice_pf *pf, struct ice_info_ctx *ctx)
155 {
156 	struct ice_netlist_info *netlist = &pf->hw.flash.netlist;
157 
158 	/* The netlist version fields are BCD formatted */
159 	snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x",
160 		 netlist->major, netlist->minor,
161 		 netlist->type >> 16, netlist->type & 0xFFFF,
162 		 netlist->rev, netlist->cust_ver);
163 }
164 
165 static void ice_info_netlist_build(struct ice_pf *pf, struct ice_info_ctx *ctx)
166 {
167 	struct ice_netlist_info *netlist = &pf->hw.flash.netlist;
168 
169 	snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash);
170 }
171 
172 static void
173 ice_info_pending_netlist_ver(struct ice_pf __always_unused *pf,
174 			     struct ice_info_ctx *ctx)
175 {
176 	struct ice_netlist_info *netlist = &ctx->pending_netlist;
177 
178 	/* The netlist version fields are BCD formatted */
179 	if (ctx->dev_caps.common_cap.nvm_update_pending_netlist)
180 		snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x",
181 			 netlist->major, netlist->minor,
182 			 netlist->type >> 16, netlist->type & 0xFFFF,
183 			 netlist->rev, netlist->cust_ver);
184 }
185 
186 static void
187 ice_info_pending_netlist_build(struct ice_pf __always_unused *pf,
188 			       struct ice_info_ctx *ctx)
189 {
190 	struct ice_netlist_info *netlist = &ctx->pending_netlist;
191 
192 	if (ctx->dev_caps.common_cap.nvm_update_pending_netlist)
193 		snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash);
194 }
195 
196 static void ice_info_cgu_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx)
197 {
198 	u32 id, cfg_ver, fw_ver;
199 
200 	if (!ice_is_feature_supported(pf, ICE_F_CGU))
201 		return;
202 	if (ice_aq_get_cgu_info(&pf->hw, &id, &cfg_ver, &fw_ver))
203 		return;
204 	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", id, cfg_ver, fw_ver);
205 }
206 
207 static void ice_info_cgu_id(struct ice_pf *pf, struct ice_info_ctx *ctx)
208 {
209 	if (!ice_is_feature_supported(pf, ICE_F_CGU))
210 		return;
211 	snprintf(ctx->buf, sizeof(ctx->buf), "%u", pf->hw.cgu_part_number);
212 }
213 
214 #define fixed(key, getter) { ICE_VERSION_FIXED, key, getter, NULL }
215 #define running(key, getter) { ICE_VERSION_RUNNING, key, getter, NULL }
216 #define stored(key, getter, fallback) { ICE_VERSION_STORED, key, getter, fallback }
217 
218 /* The combined() macro inserts both the running entry as well as a stored
219  * entry. The running entry will always report the version from the active
220  * handler. The stored entry will first try the pending handler, and fallback
221  * to the active handler if the pending function does not report a version.
222  * The pending handler should check the status of a pending update for the
223  * relevant flash component. It should only fill in the buffer in the case
224  * where a valid pending version is available. This ensures that the related
225  * stored and running versions remain in sync, and that stored versions are
226  * correctly reported as expected.
227  */
228 #define combined(key, active, pending) \
229 	running(key, active), \
230 	stored(key, pending, active)
231 
232 enum ice_version_type {
233 	ICE_VERSION_FIXED,
234 	ICE_VERSION_RUNNING,
235 	ICE_VERSION_STORED,
236 };
237 
238 static const struct ice_devlink_version {
239 	enum ice_version_type type;
240 	const char *key;
241 	void (*getter)(struct ice_pf *pf, struct ice_info_ctx *ctx);
242 	void (*fallback)(struct ice_pf *pf, struct ice_info_ctx *ctx);
243 } ice_devlink_versions[] = {
244 	fixed(DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, ice_info_pba),
245 	running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt),
246 	running("fw.mgmt.api", ice_info_fw_api),
247 	running("fw.mgmt.build", ice_info_fw_build),
248 	combined(DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, ice_info_orom_ver, ice_info_pending_orom_ver),
249 	combined("fw.psid.api", ice_info_nvm_ver, ice_info_pending_nvm_ver),
250 	combined(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack, ice_info_pending_eetrack),
251 	running("fw.app.name", ice_info_ddp_pkg_name),
252 	running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version),
253 	running("fw.app.bundle_id", ice_info_ddp_pkg_bundle_id),
254 	combined("fw.netlist", ice_info_netlist_ver, ice_info_pending_netlist_ver),
255 	combined("fw.netlist.build", ice_info_netlist_build, ice_info_pending_netlist_build),
256 	fixed("cgu.id", ice_info_cgu_id),
257 	running("fw.cgu", ice_info_cgu_fw_build),
258 };
259 
260 /**
261  * ice_devlink_info_get - .info_get devlink handler
262  * @devlink: devlink instance structure
263  * @req: the devlink info request
264  * @extack: extended netdev ack structure
265  *
266  * Callback for the devlink .info_get operation. Reports information about the
267  * device.
268  *
269  * Return: zero on success or an error code on failure.
270  */
271 static int ice_devlink_info_get(struct devlink *devlink,
272 				struct devlink_info_req *req,
273 				struct netlink_ext_ack *extack)
274 {
275 	struct ice_pf *pf = devlink_priv(devlink);
276 	struct device *dev = ice_pf_to_dev(pf);
277 	struct ice_hw *hw = &pf->hw;
278 	struct ice_info_ctx *ctx;
279 	size_t i;
280 	int err;
281 
282 	err = ice_wait_for_reset(pf, 10 * HZ);
283 	if (err) {
284 		NL_SET_ERR_MSG_MOD(extack, "Device is busy resetting");
285 		return err;
286 	}
287 
288 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
289 	if (!ctx)
290 		return -ENOMEM;
291 
292 	/* discover capabilities first */
293 	err = ice_discover_dev_caps(hw, &ctx->dev_caps);
294 	if (err) {
295 		dev_dbg(dev, "Failed to discover device capabilities, status %d aq_err %s\n",
296 			err, ice_aq_str(hw->adminq.sq_last_status));
297 		NL_SET_ERR_MSG_MOD(extack, "Unable to discover device capabilities");
298 		goto out_free_ctx;
299 	}
300 
301 	if (ctx->dev_caps.common_cap.nvm_update_pending_orom) {
302 		err = ice_get_inactive_orom_ver(hw, &ctx->pending_orom);
303 		if (err) {
304 			dev_dbg(dev, "Unable to read inactive Option ROM version data, status %d aq_err %s\n",
305 				err, ice_aq_str(hw->adminq.sq_last_status));
306 
307 			/* disable display of pending Option ROM */
308 			ctx->dev_caps.common_cap.nvm_update_pending_orom = false;
309 		}
310 	}
311 
312 	if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) {
313 		err = ice_get_inactive_nvm_ver(hw, &ctx->pending_nvm);
314 		if (err) {
315 			dev_dbg(dev, "Unable to read inactive NVM version data, status %d aq_err %s\n",
316 				err, ice_aq_str(hw->adminq.sq_last_status));
317 
318 			/* disable display of pending Option ROM */
319 			ctx->dev_caps.common_cap.nvm_update_pending_nvm = false;
320 		}
321 	}
322 
323 	if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) {
324 		err = ice_get_inactive_netlist_ver(hw, &ctx->pending_netlist);
325 		if (err) {
326 			dev_dbg(dev, "Unable to read inactive Netlist version data, status %d aq_err %s\n",
327 				err, ice_aq_str(hw->adminq.sq_last_status));
328 
329 			/* disable display of pending Option ROM */
330 			ctx->dev_caps.common_cap.nvm_update_pending_netlist = false;
331 		}
332 	}
333 
334 	ice_info_get_dsn(pf, ctx);
335 
336 	err = devlink_info_serial_number_put(req, ctx->buf);
337 	if (err) {
338 		NL_SET_ERR_MSG_MOD(extack, "Unable to set serial number");
339 		goto out_free_ctx;
340 	}
341 
342 	for (i = 0; i < ARRAY_SIZE(ice_devlink_versions); i++) {
343 		enum ice_version_type type = ice_devlink_versions[i].type;
344 		const char *key = ice_devlink_versions[i].key;
345 
346 		memset(ctx->buf, 0, sizeof(ctx->buf));
347 
348 		ice_devlink_versions[i].getter(pf, ctx);
349 
350 		/* If the default getter doesn't report a version, use the
351 		 * fallback function. This is primarily useful in the case of
352 		 * "stored" versions that want to report the same value as the
353 		 * running version in the normal case of no pending update.
354 		 */
355 		if (ctx->buf[0] == '\0' && ice_devlink_versions[i].fallback)
356 			ice_devlink_versions[i].fallback(pf, ctx);
357 
358 		/* Do not report missing versions */
359 		if (ctx->buf[0] == '\0')
360 			continue;
361 
362 		switch (type) {
363 		case ICE_VERSION_FIXED:
364 			err = devlink_info_version_fixed_put(req, key, ctx->buf);
365 			if (err) {
366 				NL_SET_ERR_MSG_MOD(extack, "Unable to set fixed version");
367 				goto out_free_ctx;
368 			}
369 			break;
370 		case ICE_VERSION_RUNNING:
371 			err = devlink_info_version_running_put_ext(req, key,
372 								   ctx->buf,
373 								   DEVLINK_INFO_VERSION_TYPE_COMPONENT);
374 			if (err) {
375 				NL_SET_ERR_MSG_MOD(extack, "Unable to set running version");
376 				goto out_free_ctx;
377 			}
378 			break;
379 		case ICE_VERSION_STORED:
380 			err = devlink_info_version_stored_put_ext(req, key,
381 								  ctx->buf,
382 								  DEVLINK_INFO_VERSION_TYPE_COMPONENT);
383 			if (err) {
384 				NL_SET_ERR_MSG_MOD(extack, "Unable to set stored version");
385 				goto out_free_ctx;
386 			}
387 			break;
388 		}
389 	}
390 
391 out_free_ctx:
392 	kfree(ctx);
393 	return err;
394 }
395 
396 /**
397  * ice_devlink_reload_empr_start - Start EMP reset to activate new firmware
398  * @pf: pointer to the pf instance
399  * @extack: netlink extended ACK structure
400  *
401  * Allow user to activate new Embedded Management Processor firmware by
402  * issuing device specific EMP reset. Called in response to
403  * a DEVLINK_CMD_RELOAD with the DEVLINK_RELOAD_ACTION_FW_ACTIVATE.
404  *
405  * Note that teardown and rebuild of the driver state happens automatically as
406  * part of an interrupt and watchdog task. This is because all physical
407  * functions on the device must be able to reset when an EMP reset occurs from
408  * any source.
409  */
410 static int
411 ice_devlink_reload_empr_start(struct ice_pf *pf,
412 			      struct netlink_ext_ack *extack)
413 {
414 	struct device *dev = ice_pf_to_dev(pf);
415 	struct ice_hw *hw = &pf->hw;
416 	u8 pending;
417 	int err;
418 
419 	err = ice_get_pending_updates(pf, &pending, extack);
420 	if (err)
421 		return err;
422 
423 	/* pending is a bitmask of which flash banks have a pending update,
424 	 * including the main NVM bank, the Option ROM bank, and the netlist
425 	 * bank. If any of these bits are set, then there is a pending update
426 	 * waiting to be activated.
427 	 */
428 	if (!pending) {
429 		NL_SET_ERR_MSG_MOD(extack, "No pending firmware update");
430 		return -ECANCELED;
431 	}
432 
433 	if (pf->fw_emp_reset_disabled) {
434 		NL_SET_ERR_MSG_MOD(extack, "EMP reset is not available. To activate firmware, a reboot or power cycle is needed");
435 		return -ECANCELED;
436 	}
437 
438 	dev_dbg(dev, "Issuing device EMP reset to activate firmware\n");
439 
440 	err = ice_aq_nvm_update_empr(hw);
441 	if (err) {
442 		dev_err(dev, "Failed to trigger EMP device reset to reload firmware, err %d aq_err %s\n",
443 			err, ice_aq_str(hw->adminq.sq_last_status));
444 		NL_SET_ERR_MSG_MOD(extack, "Failed to trigger EMP device reset to reload firmware");
445 		return err;
446 	}
447 
448 	return 0;
449 }
450 
451 /**
452  * ice_devlink_reinit_down - unload given PF
453  * @pf: pointer to the PF struct
454  */
455 static void ice_devlink_reinit_down(struct ice_pf *pf)
456 {
457 	/* No need to take devl_lock, it's already taken by devlink API */
458 	ice_unload(pf);
459 	rtnl_lock();
460 	ice_vsi_decfg(ice_get_main_vsi(pf));
461 	rtnl_unlock();
462 	ice_deinit_dev(pf);
463 }
464 
465 /**
466  * ice_devlink_reload_down - prepare for reload
467  * @devlink: pointer to the devlink instance to reload
468  * @netns_change: if true, the network namespace is changing
469  * @action: the action to perform
470  * @limit: limits on what reload should do, such as not resetting
471  * @extack: netlink extended ACK structure
472  */
473 static int
474 ice_devlink_reload_down(struct devlink *devlink, bool netns_change,
475 			enum devlink_reload_action action,
476 			enum devlink_reload_limit limit,
477 			struct netlink_ext_ack *extack)
478 {
479 	struct ice_pf *pf = devlink_priv(devlink);
480 
481 	switch (action) {
482 	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
483 		if (ice_is_eswitch_mode_switchdev(pf)) {
484 			NL_SET_ERR_MSG_MOD(extack,
485 					   "Go to legacy mode before doing reinit");
486 			return -EOPNOTSUPP;
487 		}
488 		if (ice_is_adq_active(pf)) {
489 			NL_SET_ERR_MSG_MOD(extack,
490 					   "Turn off ADQ before doing reinit");
491 			return -EOPNOTSUPP;
492 		}
493 		if (ice_has_vfs(pf)) {
494 			NL_SET_ERR_MSG_MOD(extack,
495 					   "Remove all VFs before doing reinit");
496 			return -EOPNOTSUPP;
497 		}
498 		ice_devlink_reinit_down(pf);
499 		return 0;
500 	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
501 		return ice_devlink_reload_empr_start(pf, extack);
502 	default:
503 		WARN_ON(1);
504 		return -EOPNOTSUPP;
505 	}
506 }
507 
508 /**
509  * ice_devlink_reload_empr_finish - Wait for EMP reset to finish
510  * @pf: pointer to the pf instance
511  * @extack: netlink extended ACK structure
512  *
513  * Wait for driver to finish rebuilding after EMP reset is completed. This
514  * includes time to wait for both the actual device reset as well as the time
515  * for the driver's rebuild to complete.
516  */
517 static int
518 ice_devlink_reload_empr_finish(struct ice_pf *pf,
519 			       struct netlink_ext_ack *extack)
520 {
521 	int err;
522 
523 	err = ice_wait_for_reset(pf, 60 * HZ);
524 	if (err) {
525 		NL_SET_ERR_MSG_MOD(extack, "Device still resetting after 1 minute");
526 		return err;
527 	}
528 
529 	return 0;
530 }
531 
532 /**
533  * ice_get_tx_topo_user_sel - Read user's choice from flash
534  * @pf: pointer to pf structure
535  * @layers: value read from flash will be saved here
536  *
537  * Reads user's preference for Tx Scheduler Topology Tree from PFA TLV.
538  *
539  * Return: zero when read was successful, negative values otherwise.
540  */
541 static int ice_get_tx_topo_user_sel(struct ice_pf *pf, uint8_t *layers)
542 {
543 	struct ice_aqc_nvm_tx_topo_user_sel usr_sel = {};
544 	struct ice_hw *hw = &pf->hw;
545 	int err;
546 
547 	err = ice_acquire_nvm(hw, ICE_RES_READ);
548 	if (err)
549 		return err;
550 
551 	err = ice_aq_read_nvm(hw, ICE_AQC_NVM_TX_TOPO_MOD_ID, 0,
552 			      sizeof(usr_sel), &usr_sel, true, true, NULL);
553 	if (err)
554 		goto exit_release_res;
555 
556 	if (usr_sel.data & ICE_AQC_NVM_TX_TOPO_USER_SEL)
557 		*layers = ICE_SCHED_5_LAYERS;
558 	else
559 		*layers = ICE_SCHED_9_LAYERS;
560 
561 exit_release_res:
562 	ice_release_nvm(hw);
563 
564 	return err;
565 }
566 
567 /**
568  * ice_update_tx_topo_user_sel - Save user's preference in flash
569  * @pf: pointer to pf structure
570  * @layers: value to be saved in flash
571  *
572  * Variable "layers" defines user's preference about number of layers in Tx
573  * Scheduler Topology Tree. This choice should be stored in PFA TLV field
574  * and be picked up by driver, next time during init.
575  *
576  * Return: zero when save was successful, negative values otherwise.
577  */
578 static int ice_update_tx_topo_user_sel(struct ice_pf *pf, int layers)
579 {
580 	struct ice_aqc_nvm_tx_topo_user_sel usr_sel = {};
581 	struct ice_hw *hw = &pf->hw;
582 	int err;
583 
584 	err = ice_acquire_nvm(hw, ICE_RES_WRITE);
585 	if (err)
586 		return err;
587 
588 	err = ice_aq_read_nvm(hw, ICE_AQC_NVM_TX_TOPO_MOD_ID, 0,
589 			      sizeof(usr_sel), &usr_sel, true, true, NULL);
590 	if (err)
591 		goto exit_release_res;
592 
593 	if (layers == ICE_SCHED_5_LAYERS)
594 		usr_sel.data |= ICE_AQC_NVM_TX_TOPO_USER_SEL;
595 	else
596 		usr_sel.data &= ~ICE_AQC_NVM_TX_TOPO_USER_SEL;
597 
598 	err = ice_write_one_nvm_block(pf, ICE_AQC_NVM_TX_TOPO_MOD_ID, 2,
599 				      sizeof(usr_sel.data), &usr_sel.data,
600 				      true, NULL, NULL);
601 exit_release_res:
602 	ice_release_nvm(hw);
603 
604 	return err;
605 }
606 
607 /**
608  * ice_devlink_tx_sched_layers_get - Get tx_scheduling_layers parameter
609  * @devlink: pointer to the devlink instance
610  * @id: the parameter ID to set
611  * @ctx: context to store the parameter value
612  *
613  * Return: zero on success and negative value on failure.
614  */
615 static int ice_devlink_tx_sched_layers_get(struct devlink *devlink, u32 id,
616 					   struct devlink_param_gset_ctx *ctx)
617 {
618 	struct ice_pf *pf = devlink_priv(devlink);
619 	int err;
620 
621 	err = ice_get_tx_topo_user_sel(pf, &ctx->val.vu8);
622 	if (err)
623 		return err;
624 
625 	return 0;
626 }
627 
628 /**
629  * ice_devlink_tx_sched_layers_set - Set tx_scheduling_layers parameter
630  * @devlink: pointer to the devlink instance
631  * @id: the parameter ID to set
632  * @ctx: context to get the parameter value
633  * @extack: netlink extended ACK structure
634  *
635  * Return: zero on success and negative value on failure.
636  */
637 static int ice_devlink_tx_sched_layers_set(struct devlink *devlink, u32 id,
638 					   struct devlink_param_gset_ctx *ctx,
639 					   struct netlink_ext_ack *extack)
640 {
641 	struct ice_pf *pf = devlink_priv(devlink);
642 	int err;
643 
644 	err = ice_update_tx_topo_user_sel(pf, ctx->val.vu8);
645 	if (err)
646 		return err;
647 
648 	NL_SET_ERR_MSG_MOD(extack,
649 			   "Tx scheduling layers have been changed on this device. You must do the PCI slot powercycle for the change to take effect.");
650 
651 	return 0;
652 }
653 
654 /**
655  * ice_devlink_tx_sched_layers_validate - Validate passed tx_scheduling_layers
656  *                                        parameter value
657  * @devlink: unused pointer to devlink instance
658  * @id: the parameter ID to validate
659  * @val: value to validate
660  * @extack: netlink extended ACK structure
661  *
662  * Supported values are:
663  * - 5 - five layers Tx Scheduler Topology Tree
664  * - 9 - nine layers Tx Scheduler Topology Tree
665  *
666  * Return: zero when passed parameter value is supported. Negative value on
667  * error.
668  */
669 static int ice_devlink_tx_sched_layers_validate(struct devlink *devlink, u32 id,
670 						union devlink_param_value val,
671 						struct netlink_ext_ack *extack)
672 {
673 	if (val.vu8 != ICE_SCHED_5_LAYERS && val.vu8 != ICE_SCHED_9_LAYERS) {
674 		NL_SET_ERR_MSG_MOD(extack,
675 				   "Wrong number of tx scheduler layers provided.");
676 		return -EINVAL;
677 	}
678 
679 	return 0;
680 }
681 
682 /**
683  * ice_tear_down_devlink_rate_tree - removes devlink-rate exported tree
684  * @pf: pf struct
685  *
686  * This function tears down tree exported during VF's creation.
687  */
688 void ice_tear_down_devlink_rate_tree(struct ice_pf *pf)
689 {
690 	struct devlink *devlink;
691 	struct ice_vf *vf;
692 	unsigned int bkt;
693 
694 	devlink = priv_to_devlink(pf);
695 
696 	devl_lock(devlink);
697 	mutex_lock(&pf->vfs.table_lock);
698 	ice_for_each_vf(pf, bkt, vf) {
699 		if (vf->devlink_port.devlink_rate)
700 			devl_rate_leaf_destroy(&vf->devlink_port);
701 	}
702 	mutex_unlock(&pf->vfs.table_lock);
703 
704 	devl_rate_nodes_destroy(devlink);
705 	devl_unlock(devlink);
706 }
707 
708 /**
709  * ice_enable_custom_tx - try to enable custom Tx feature
710  * @pf: pf struct
711  *
712  * This function tries to enable custom Tx feature,
713  * it's not possible to enable it, if DCB or ADQ is active.
714  */
715 static bool ice_enable_custom_tx(struct ice_pf *pf)
716 {
717 	struct ice_port_info *pi = ice_get_main_vsi(pf)->port_info;
718 	struct device *dev = ice_pf_to_dev(pf);
719 
720 	if (pi->is_custom_tx_enabled)
721 		/* already enabled, return true */
722 		return true;
723 
724 	if (ice_is_adq_active(pf)) {
725 		dev_err(dev, "ADQ active, can't modify Tx scheduler tree\n");
726 		return false;
727 	}
728 
729 	if (ice_is_dcb_active(pf)) {
730 		dev_err(dev, "DCB active, can't modify Tx scheduler tree\n");
731 		return false;
732 	}
733 
734 	pi->is_custom_tx_enabled = true;
735 
736 	return true;
737 }
738 
739 /**
740  * ice_traverse_tx_tree - traverse Tx scheduler tree
741  * @devlink: devlink struct
742  * @node: current node, used for recursion
743  * @tc_node: tc_node struct, that is treated as a root
744  * @pf: pf struct
745  *
746  * This function traverses Tx scheduler tree and exports
747  * entire structure to the devlink-rate.
748  */
749 static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node *node,
750 				 struct ice_sched_node *tc_node, struct ice_pf *pf)
751 {
752 	struct devlink_rate *rate_node = NULL;
753 	struct ice_dynamic_port *sf;
754 	struct ice_vf *vf;
755 	int i;
756 
757 	if (node->rate_node)
758 		/* already added, skip to the next */
759 		goto traverse_children;
760 
761 	if (node->parent == tc_node) {
762 		/* create root node */
763 		rate_node = devl_rate_node_create(devlink, node, node->name, NULL);
764 	} else if (node->vsi_handle &&
765 		   pf->vsi[node->vsi_handle]->type == ICE_VSI_VF &&
766 		   pf->vsi[node->vsi_handle]->vf) {
767 		vf = pf->vsi[node->vsi_handle]->vf;
768 		if (!vf->devlink_port.devlink_rate)
769 			/* leaf nodes doesn't have children
770 			 * so we don't set rate_node
771 			 */
772 			devl_rate_leaf_create(&vf->devlink_port, node,
773 					      node->parent->rate_node);
774 	} else if (node->vsi_handle &&
775 		   pf->vsi[node->vsi_handle]->type == ICE_VSI_SF &&
776 		   pf->vsi[node->vsi_handle]->sf) {
777 		sf = pf->vsi[node->vsi_handle]->sf;
778 		if (!sf->devlink_port.devlink_rate)
779 			/* leaf nodes doesn't have children
780 			 * so we don't set rate_node
781 			 */
782 			devl_rate_leaf_create(&sf->devlink_port, node,
783 					      node->parent->rate_node);
784 	} else if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF &&
785 		   node->parent->rate_node) {
786 		rate_node = devl_rate_node_create(devlink, node, node->name,
787 						  node->parent->rate_node);
788 	}
789 
790 	if (rate_node && !IS_ERR(rate_node))
791 		node->rate_node = rate_node;
792 
793 traverse_children:
794 	for (i = 0; i < node->num_children; i++)
795 		ice_traverse_tx_tree(devlink, node->children[i], tc_node, pf);
796 }
797 
798 /**
799  * ice_devlink_rate_init_tx_topology - export Tx scheduler tree to devlink rate
800  * @devlink: devlink struct
801  * @vsi: main vsi struct
802  *
803  * This function finds a root node, then calls ice_traverse_tx tree, which
804  * traverses the tree and exports it's contents to devlink rate.
805  */
806 int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi)
807 {
808 	struct ice_port_info *pi = vsi->port_info;
809 	struct ice_sched_node *tc_node;
810 	struct ice_pf *pf = vsi->back;
811 	int i;
812 
813 	tc_node = pi->root->children[0];
814 	mutex_lock(&pi->sched_lock);
815 	for (i = 0; i < tc_node->num_children; i++)
816 		ice_traverse_tx_tree(devlink, tc_node->children[i], tc_node, pf);
817 	mutex_unlock(&pi->sched_lock);
818 
819 	return 0;
820 }
821 
822 static void ice_clear_rate_nodes(struct ice_sched_node *node)
823 {
824 	node->rate_node = NULL;
825 
826 	for (int i = 0; i < node->num_children; i++)
827 		ice_clear_rate_nodes(node->children[i]);
828 }
829 
830 /**
831  * ice_devlink_rate_clear_tx_topology - clear node->rate_node
832  * @vsi: main vsi struct
833  *
834  * Clear rate_node to cleanup creation of Tx topology.
835  *
836  */
837 void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi)
838 {
839 	struct ice_port_info *pi = vsi->port_info;
840 
841 	mutex_lock(&pi->sched_lock);
842 	ice_clear_rate_nodes(pi->root->children[0]);
843 	mutex_unlock(&pi->sched_lock);
844 }
845 
846 /**
847  * ice_set_object_tx_share - sets node scheduling parameter
848  * @pi: devlink struct instance
849  * @node: node struct instance
850  * @bw: bandwidth in bytes per second
851  * @extack: extended netdev ack structure
852  *
853  * This function sets ICE_MIN_BW scheduling BW limit.
854  */
855 static int ice_set_object_tx_share(struct ice_port_info *pi, struct ice_sched_node *node,
856 				   u64 bw, struct netlink_ext_ack *extack)
857 {
858 	int status;
859 
860 	mutex_lock(&pi->sched_lock);
861 	/* converts bytes per second to kilo bits per second */
862 	node->tx_share = div_u64(bw, 125);
863 	status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW, node->tx_share);
864 	mutex_unlock(&pi->sched_lock);
865 
866 	if (status)
867 		NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_share");
868 
869 	return status;
870 }
871 
872 /**
873  * ice_set_object_tx_max - sets node scheduling parameter
874  * @pi: devlink struct instance
875  * @node: node struct instance
876  * @bw: bandwidth in bytes per second
877  * @extack: extended netdev ack structure
878  *
879  * This function sets ICE_MAX_BW scheduling BW limit.
880  */
881 static int ice_set_object_tx_max(struct ice_port_info *pi, struct ice_sched_node *node,
882 				 u64 bw, struct netlink_ext_ack *extack)
883 {
884 	int status;
885 
886 	mutex_lock(&pi->sched_lock);
887 	/* converts bytes per second value to kilo bits per second */
888 	node->tx_max = div_u64(bw, 125);
889 	status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW, node->tx_max);
890 	mutex_unlock(&pi->sched_lock);
891 
892 	if (status)
893 		NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_max");
894 
895 	return status;
896 }
897 
898 /**
899  * ice_set_object_tx_priority - sets node scheduling parameter
900  * @pi: devlink struct instance
901  * @node: node struct instance
902  * @priority: value representing priority for strict priority arbitration
903  * @extack: extended netdev ack structure
904  *
905  * This function sets priority of node among siblings.
906  */
907 static int ice_set_object_tx_priority(struct ice_port_info *pi, struct ice_sched_node *node,
908 				      u32 priority, struct netlink_ext_ack *extack)
909 {
910 	int status;
911 
912 	if (priority >= 8) {
913 		NL_SET_ERR_MSG_MOD(extack, "Priority should be less than 8");
914 		return -EINVAL;
915 	}
916 
917 	mutex_lock(&pi->sched_lock);
918 	node->tx_priority = priority;
919 	status = ice_sched_set_node_priority(pi, node, node->tx_priority);
920 	mutex_unlock(&pi->sched_lock);
921 
922 	if (status)
923 		NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_priority");
924 
925 	return status;
926 }
927 
928 /**
929  * ice_set_object_tx_weight - sets node scheduling parameter
930  * @pi: devlink struct instance
931  * @node: node struct instance
932  * @weight: value represeting relative weight for WFQ arbitration
933  * @extack: extended netdev ack structure
934  *
935  * This function sets node weight for WFQ algorithm.
936  */
937 static int ice_set_object_tx_weight(struct ice_port_info *pi, struct ice_sched_node *node,
938 				    u32 weight, struct netlink_ext_ack *extack)
939 {
940 	int status;
941 
942 	if (weight > 200 || weight < 1) {
943 		NL_SET_ERR_MSG_MOD(extack, "Weight must be between 1 and 200");
944 		return -EINVAL;
945 	}
946 
947 	mutex_lock(&pi->sched_lock);
948 	node->tx_weight = weight;
949 	status = ice_sched_set_node_weight(pi, node, node->tx_weight);
950 	mutex_unlock(&pi->sched_lock);
951 
952 	if (status)
953 		NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_weight");
954 
955 	return status;
956 }
957 
958 /**
959  * ice_get_pi_from_dev_rate - get port info from devlink_rate
960  * @rate_node: devlink struct instance
961  *
962  * This function returns corresponding port_info struct of devlink_rate
963  */
964 static struct ice_port_info *ice_get_pi_from_dev_rate(struct devlink_rate *rate_node)
965 {
966 	struct ice_pf *pf = devlink_priv(rate_node->devlink);
967 
968 	return ice_get_main_vsi(pf)->port_info;
969 }
970 
971 static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
972 				     struct netlink_ext_ack *extack)
973 {
974 	struct ice_sched_node *node;
975 	struct ice_port_info *pi;
976 
977 	pi = ice_get_pi_from_dev_rate(rate_node);
978 
979 	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
980 		return -EBUSY;
981 
982 	/* preallocate memory for ice_sched_node */
983 	node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL);
984 	*priv = node;
985 
986 	return 0;
987 }
988 
989 static int ice_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
990 				     struct netlink_ext_ack *extack)
991 {
992 	struct ice_sched_node *node, *tc_node;
993 	struct ice_port_info *pi;
994 
995 	pi = ice_get_pi_from_dev_rate(rate_node);
996 	tc_node = pi->root->children[0];
997 	node = priv;
998 
999 	if (!rate_node->parent || !node || tc_node == node || !extack)
1000 		return 0;
1001 
1002 	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
1003 		return -EBUSY;
1004 
1005 	/* can't allow to delete a node with children */
1006 	if (node->num_children)
1007 		return -EINVAL;
1008 
1009 	mutex_lock(&pi->sched_lock);
1010 	ice_free_sched_node(pi, node);
1011 	mutex_unlock(&pi->sched_lock);
1012 
1013 	return 0;
1014 }
1015 
1016 static int ice_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
1017 					    u64 tx_max, struct netlink_ext_ack *extack)
1018 {
1019 	struct ice_sched_node *node = priv;
1020 
1021 	if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink)))
1022 		return -EBUSY;
1023 
1024 	if (!node)
1025 		return 0;
1026 
1027 	return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_leaf),
1028 				     node, tx_max, extack);
1029 }
1030 
1031 static int ice_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
1032 					      u64 tx_share, struct netlink_ext_ack *extack)
1033 {
1034 	struct ice_sched_node *node = priv;
1035 
1036 	if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink)))
1037 		return -EBUSY;
1038 
1039 	if (!node)
1040 		return 0;
1041 
1042 	return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_leaf), node,
1043 				       tx_share, extack);
1044 }
1045 
1046 static int ice_devlink_rate_leaf_tx_priority_set(struct devlink_rate *rate_leaf, void *priv,
1047 						 u32 tx_priority, struct netlink_ext_ack *extack)
1048 {
1049 	struct ice_sched_node *node = priv;
1050 
1051 	if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink)))
1052 		return -EBUSY;
1053 
1054 	if (!node)
1055 		return 0;
1056 
1057 	return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_leaf), node,
1058 					  tx_priority, extack);
1059 }
1060 
1061 static int ice_devlink_rate_leaf_tx_weight_set(struct devlink_rate *rate_leaf, void *priv,
1062 					       u32 tx_weight, struct netlink_ext_ack *extack)
1063 {
1064 	struct ice_sched_node *node = priv;
1065 
1066 	if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink)))
1067 		return -EBUSY;
1068 
1069 	if (!node)
1070 		return 0;
1071 
1072 	return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_leaf), node,
1073 					tx_weight, extack);
1074 }
1075 
1076 static int ice_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
1077 					    u64 tx_max, struct netlink_ext_ack *extack)
1078 {
1079 	struct ice_sched_node *node = priv;
1080 
1081 	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
1082 		return -EBUSY;
1083 
1084 	if (!node)
1085 		return 0;
1086 
1087 	return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_node),
1088 				     node, tx_max, extack);
1089 }
1090 
1091 static int ice_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
1092 					      u64 tx_share, struct netlink_ext_ack *extack)
1093 {
1094 	struct ice_sched_node *node = priv;
1095 
1096 	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
1097 		return -EBUSY;
1098 
1099 	if (!node)
1100 		return 0;
1101 
1102 	return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_node),
1103 				       node, tx_share, extack);
1104 }
1105 
1106 static int ice_devlink_rate_node_tx_priority_set(struct devlink_rate *rate_node, void *priv,
1107 						 u32 tx_priority, struct netlink_ext_ack *extack)
1108 {
1109 	struct ice_sched_node *node = priv;
1110 
1111 	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
1112 		return -EBUSY;
1113 
1114 	if (!node)
1115 		return 0;
1116 
1117 	return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_node),
1118 					  node, tx_priority, extack);
1119 }
1120 
1121 static int ice_devlink_rate_node_tx_weight_set(struct devlink_rate *rate_node, void *priv,
1122 					       u32 tx_weight, struct netlink_ext_ack *extack)
1123 {
1124 	struct ice_sched_node *node = priv;
1125 
1126 	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
1127 		return -EBUSY;
1128 
1129 	if (!node)
1130 		return 0;
1131 
1132 	return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_node),
1133 					node, tx_weight, extack);
1134 }
1135 
1136 static int ice_devlink_set_parent(struct devlink_rate *devlink_rate,
1137 				  struct devlink_rate *parent,
1138 				  void *priv, void *parent_priv,
1139 				  struct netlink_ext_ack *extack)
1140 {
1141 	struct ice_port_info *pi = ice_get_pi_from_dev_rate(devlink_rate);
1142 	struct ice_sched_node *tc_node, *node, *parent_node;
1143 	u16 num_nodes_added;
1144 	u32 first_node_teid;
1145 	u32 node_teid;
1146 	int status;
1147 
1148 	tc_node = pi->root->children[0];
1149 	node = priv;
1150 
1151 	if (!extack)
1152 		return 0;
1153 
1154 	if (!ice_enable_custom_tx(devlink_priv(devlink_rate->devlink)))
1155 		return -EBUSY;
1156 
1157 	if (!parent) {
1158 		if (!node || tc_node == node || node->num_children)
1159 			return -EINVAL;
1160 
1161 		mutex_lock(&pi->sched_lock);
1162 		ice_free_sched_node(pi, node);
1163 		mutex_unlock(&pi->sched_lock);
1164 
1165 		return 0;
1166 	}
1167 
1168 	parent_node = parent_priv;
1169 
1170 	/* if the node doesn't exist, create it */
1171 	if (!node->parent) {
1172 		mutex_lock(&pi->sched_lock);
1173 		status = ice_sched_add_elems(pi, tc_node, parent_node,
1174 					     parent_node->tx_sched_layer + 1,
1175 					     1, &num_nodes_added, &first_node_teid,
1176 					     &node);
1177 		mutex_unlock(&pi->sched_lock);
1178 
1179 		if (status) {
1180 			NL_SET_ERR_MSG_MOD(extack, "Can't add a new node");
1181 			return status;
1182 		}
1183 
1184 		if (devlink_rate->tx_share)
1185 			ice_set_object_tx_share(pi, node, devlink_rate->tx_share, extack);
1186 		if (devlink_rate->tx_max)
1187 			ice_set_object_tx_max(pi, node, devlink_rate->tx_max, extack);
1188 		if (devlink_rate->tx_priority)
1189 			ice_set_object_tx_priority(pi, node, devlink_rate->tx_priority, extack);
1190 		if (devlink_rate->tx_weight)
1191 			ice_set_object_tx_weight(pi, node, devlink_rate->tx_weight, extack);
1192 	} else {
1193 		node_teid = le32_to_cpu(node->info.node_teid);
1194 		mutex_lock(&pi->sched_lock);
1195 		status = ice_sched_move_nodes(pi, parent_node, 1, &node_teid);
1196 		mutex_unlock(&pi->sched_lock);
1197 
1198 		if (status)
1199 			NL_SET_ERR_MSG_MOD(extack, "Can't move existing node to a new parent");
1200 	}
1201 
1202 	return status;
1203 }
1204 
1205 static void ice_set_min_max_msix(struct ice_pf *pf)
1206 {
1207 	struct devlink *devlink = priv_to_devlink(pf);
1208 	union devlink_param_value val;
1209 	int err;
1210 
1211 	err = devl_param_driverinit_value_get(devlink,
1212 					      DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
1213 					      &val);
1214 	if (!err)
1215 		pf->msix.min = val.vu32;
1216 
1217 	err = devl_param_driverinit_value_get(devlink,
1218 					      DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
1219 					      &val);
1220 	if (!err)
1221 		pf->msix.max = val.vu32;
1222 }
1223 
1224 /**
1225  * ice_devlink_reinit_up - do reinit of the given PF
1226  * @pf: pointer to the PF struct
1227  */
1228 static int ice_devlink_reinit_up(struct ice_pf *pf)
1229 {
1230 	struct ice_vsi *vsi = ice_get_main_vsi(pf);
1231 	int err;
1232 
1233 	err = ice_init_hw(&pf->hw);
1234 	if (err) {
1235 		dev_err(ice_pf_to_dev(pf), "ice_init_hw failed: %d\n", err);
1236 		return err;
1237 	}
1238 
1239 	/* load MSI-X values */
1240 	ice_set_min_max_msix(pf);
1241 
1242 	err = ice_init_dev(pf);
1243 	if (err)
1244 		goto unroll_hw_init;
1245 
1246 	vsi->flags = ICE_VSI_FLAG_INIT;
1247 
1248 	rtnl_lock();
1249 	err = ice_vsi_cfg(vsi);
1250 	rtnl_unlock();
1251 	if (err)
1252 		goto err_vsi_cfg;
1253 
1254 	/* No need to take devl_lock, it's already taken by devlink API */
1255 	err = ice_load(pf);
1256 	if (err)
1257 		goto err_load;
1258 
1259 	return 0;
1260 
1261 err_load:
1262 	rtnl_lock();
1263 	ice_vsi_decfg(vsi);
1264 	rtnl_unlock();
1265 err_vsi_cfg:
1266 	ice_deinit_dev(pf);
1267 unroll_hw_init:
1268 	ice_deinit_hw(&pf->hw);
1269 	return err;
1270 }
1271 
1272 /**
1273  * ice_devlink_reload_up - do reload up after reinit
1274  * @devlink: pointer to the devlink instance reloading
1275  * @action: the action requested
1276  * @limit: limits imposed by userspace, such as not resetting
1277  * @actions_performed: on return, indicate what actions actually performed
1278  * @extack: netlink extended ACK structure
1279  */
1280 static int
1281 ice_devlink_reload_up(struct devlink *devlink,
1282 		      enum devlink_reload_action action,
1283 		      enum devlink_reload_limit limit,
1284 		      u32 *actions_performed,
1285 		      struct netlink_ext_ack *extack)
1286 {
1287 	struct ice_pf *pf = devlink_priv(devlink);
1288 
1289 	switch (action) {
1290 	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
1291 		*actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
1292 		return ice_devlink_reinit_up(pf);
1293 	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
1294 		*actions_performed = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE);
1295 		return ice_devlink_reload_empr_finish(pf, extack);
1296 	default:
1297 		WARN_ON(1);
1298 		return -EOPNOTSUPP;
1299 	}
1300 }
1301 
1302 static const struct devlink_ops ice_devlink_ops = {
1303 	.supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK,
1304 	.reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
1305 			  BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
1306 	.reload_down = ice_devlink_reload_down,
1307 	.reload_up = ice_devlink_reload_up,
1308 	.eswitch_mode_get = ice_eswitch_mode_get,
1309 	.eswitch_mode_set = ice_eswitch_mode_set,
1310 	.info_get = ice_devlink_info_get,
1311 	.flash_update = ice_devlink_flash_update,
1312 
1313 	.rate_node_new = ice_devlink_rate_node_new,
1314 	.rate_node_del = ice_devlink_rate_node_del,
1315 
1316 	.rate_leaf_tx_max_set = ice_devlink_rate_leaf_tx_max_set,
1317 	.rate_leaf_tx_share_set = ice_devlink_rate_leaf_tx_share_set,
1318 	.rate_leaf_tx_priority_set = ice_devlink_rate_leaf_tx_priority_set,
1319 	.rate_leaf_tx_weight_set = ice_devlink_rate_leaf_tx_weight_set,
1320 
1321 	.rate_node_tx_max_set = ice_devlink_rate_node_tx_max_set,
1322 	.rate_node_tx_share_set = ice_devlink_rate_node_tx_share_set,
1323 	.rate_node_tx_priority_set = ice_devlink_rate_node_tx_priority_set,
1324 	.rate_node_tx_weight_set = ice_devlink_rate_node_tx_weight_set,
1325 
1326 	.rate_leaf_parent_set = ice_devlink_set_parent,
1327 	.rate_node_parent_set = ice_devlink_set_parent,
1328 
1329 	.port_new = ice_devlink_port_new,
1330 };
1331 
1332 static const struct devlink_ops ice_sf_devlink_ops;
1333 
1334 static int
1335 ice_devlink_enable_roce_get(struct devlink *devlink, u32 id,
1336 			    struct devlink_param_gset_ctx *ctx)
1337 {
1338 	struct ice_pf *pf = devlink_priv(devlink);
1339 
1340 	ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? true : false;
1341 
1342 	return 0;
1343 }
1344 
1345 static int ice_devlink_enable_roce_set(struct devlink *devlink, u32 id,
1346 				       struct devlink_param_gset_ctx *ctx,
1347 				       struct netlink_ext_ack *extack)
1348 {
1349 	struct ice_pf *pf = devlink_priv(devlink);
1350 	bool roce_ena = ctx->val.vbool;
1351 	int ret;
1352 
1353 	if (!roce_ena) {
1354 		ice_unplug_aux_dev(pf);
1355 		pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2;
1356 		return 0;
1357 	}
1358 
1359 	pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2;
1360 	ret = ice_plug_aux_dev(pf);
1361 	if (ret)
1362 		pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2;
1363 
1364 	return ret;
1365 }
1366 
1367 static int
1368 ice_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
1369 				 union devlink_param_value val,
1370 				 struct netlink_ext_ack *extack)
1371 {
1372 	struct ice_pf *pf = devlink_priv(devlink);
1373 
1374 	if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags))
1375 		return -EOPNOTSUPP;
1376 
1377 	if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP) {
1378 		NL_SET_ERR_MSG_MOD(extack, "iWARP is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously");
1379 		return -EOPNOTSUPP;
1380 	}
1381 
1382 	return 0;
1383 }
1384 
1385 static int
1386 ice_devlink_enable_iw_get(struct devlink *devlink, u32 id,
1387 			  struct devlink_param_gset_ctx *ctx)
1388 {
1389 	struct ice_pf *pf = devlink_priv(devlink);
1390 
1391 	ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP;
1392 
1393 	return 0;
1394 }
1395 
1396 static int ice_devlink_enable_iw_set(struct devlink *devlink, u32 id,
1397 				     struct devlink_param_gset_ctx *ctx,
1398 				     struct netlink_ext_ack *extack)
1399 {
1400 	struct ice_pf *pf = devlink_priv(devlink);
1401 	bool iw_ena = ctx->val.vbool;
1402 	int ret;
1403 
1404 	if (!iw_ena) {
1405 		ice_unplug_aux_dev(pf);
1406 		pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP;
1407 		return 0;
1408 	}
1409 
1410 	pf->rdma_mode |= IIDC_RDMA_PROTOCOL_IWARP;
1411 	ret = ice_plug_aux_dev(pf);
1412 	if (ret)
1413 		pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP;
1414 
1415 	return ret;
1416 }
1417 
1418 static int
1419 ice_devlink_enable_iw_validate(struct devlink *devlink, u32 id,
1420 			       union devlink_param_value val,
1421 			       struct netlink_ext_ack *extack)
1422 {
1423 	struct ice_pf *pf = devlink_priv(devlink);
1424 
1425 	if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags))
1426 		return -EOPNOTSUPP;
1427 
1428 	if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2) {
1429 		NL_SET_ERR_MSG_MOD(extack, "RoCEv2 is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously");
1430 		return -EOPNOTSUPP;
1431 	}
1432 
1433 	return 0;
1434 }
1435 
1436 #define DEVLINK_LOCAL_FWD_DISABLED_STR "disabled"
1437 #define DEVLINK_LOCAL_FWD_ENABLED_STR "enabled"
1438 #define DEVLINK_LOCAL_FWD_PRIORITIZED_STR "prioritized"
1439 
1440 /**
1441  * ice_devlink_local_fwd_mode_to_str - Get string for local_fwd mode.
1442  * @mode: local forwarding for mode used in port_info struct.
1443  *
1444  * Return: Mode respective string or "Invalid".
1445  */
1446 static const char *
1447 ice_devlink_local_fwd_mode_to_str(enum ice_local_fwd_mode mode)
1448 {
1449 	switch (mode) {
1450 	case ICE_LOCAL_FWD_MODE_ENABLED:
1451 		return DEVLINK_LOCAL_FWD_ENABLED_STR;
1452 	case ICE_LOCAL_FWD_MODE_PRIORITIZED:
1453 		return DEVLINK_LOCAL_FWD_PRIORITIZED_STR;
1454 	case ICE_LOCAL_FWD_MODE_DISABLED:
1455 		return DEVLINK_LOCAL_FWD_DISABLED_STR;
1456 	}
1457 
1458 	return "Invalid";
1459 }
1460 
1461 /**
1462  * ice_devlink_local_fwd_str_to_mode - Get local_fwd mode from string name.
1463  * @mode_str: local forwarding mode string.
1464  *
1465  * Return: Mode value or negative number if invalid.
1466  */
1467 static int ice_devlink_local_fwd_str_to_mode(const char *mode_str)
1468 {
1469 	if (!strcmp(mode_str, DEVLINK_LOCAL_FWD_ENABLED_STR))
1470 		return ICE_LOCAL_FWD_MODE_ENABLED;
1471 	else if (!strcmp(mode_str, DEVLINK_LOCAL_FWD_PRIORITIZED_STR))
1472 		return ICE_LOCAL_FWD_MODE_PRIORITIZED;
1473 	else if (!strcmp(mode_str, DEVLINK_LOCAL_FWD_DISABLED_STR))
1474 		return ICE_LOCAL_FWD_MODE_DISABLED;
1475 
1476 	return -EINVAL;
1477 }
1478 
1479 /**
1480  * ice_devlink_local_fwd_get - Get local_fwd parameter.
1481  * @devlink: Pointer to the devlink instance.
1482  * @id: The parameter ID to set.
1483  * @ctx: Context to store the parameter value.
1484  *
1485  * Return: Zero.
1486  */
1487 static int ice_devlink_local_fwd_get(struct devlink *devlink, u32 id,
1488 				     struct devlink_param_gset_ctx *ctx)
1489 {
1490 	struct ice_pf *pf = devlink_priv(devlink);
1491 	struct ice_port_info *pi;
1492 	const char *mode_str;
1493 
1494 	pi = pf->hw.port_info;
1495 	mode_str = ice_devlink_local_fwd_mode_to_str(pi->local_fwd_mode);
1496 	snprintf(ctx->val.vstr, sizeof(ctx->val.vstr), "%s", mode_str);
1497 
1498 	return 0;
1499 }
1500 
1501 /**
1502  * ice_devlink_local_fwd_set - Set local_fwd parameter.
1503  * @devlink: Pointer to the devlink instance.
1504  * @id: The parameter ID to set.
1505  * @ctx: Context to get the parameter value.
1506  * @extack: Netlink extended ACK structure.
1507  *
1508  * Return: Zero.
1509  */
1510 static int ice_devlink_local_fwd_set(struct devlink *devlink, u32 id,
1511 				     struct devlink_param_gset_ctx *ctx,
1512 				     struct netlink_ext_ack *extack)
1513 {
1514 	int new_local_fwd_mode = ice_devlink_local_fwd_str_to_mode(ctx->val.vstr);
1515 	struct ice_pf *pf = devlink_priv(devlink);
1516 	struct device *dev = ice_pf_to_dev(pf);
1517 	struct ice_port_info *pi;
1518 
1519 	pi = pf->hw.port_info;
1520 	if (pi->local_fwd_mode != new_local_fwd_mode) {
1521 		pi->local_fwd_mode = new_local_fwd_mode;
1522 		dev_info(dev, "Setting local_fwd to %s\n", ctx->val.vstr);
1523 		ice_schedule_reset(pf, ICE_RESET_CORER);
1524 	}
1525 
1526 	return 0;
1527 }
1528 
1529 /**
1530  * ice_devlink_local_fwd_validate - Validate passed local_fwd parameter value.
1531  * @devlink: Unused pointer to devlink instance.
1532  * @id: The parameter ID to validate.
1533  * @val: Value to validate.
1534  * @extack: Netlink extended ACK structure.
1535  *
1536  * Supported values are:
1537  * "enabled" - local_fwd is enabled, "disabled" - local_fwd is disabled
1538  * "prioritized" - local_fwd traffic is prioritized in scheduling.
1539  *
1540  * Return: Zero when passed parameter value is supported. Negative value on
1541  * error.
1542  */
1543 static int ice_devlink_local_fwd_validate(struct devlink *devlink, u32 id,
1544 					  union devlink_param_value val,
1545 					  struct netlink_ext_ack *extack)
1546 {
1547 	if (ice_devlink_local_fwd_str_to_mode(val.vstr) < 0) {
1548 		NL_SET_ERR_MSG_MOD(extack, "Error: Requested value is not supported.");
1549 		return -EINVAL;
1550 	}
1551 
1552 	return 0;
1553 }
1554 
1555 static int
1556 ice_devlink_msix_max_pf_validate(struct devlink *devlink, u32 id,
1557 				 union devlink_param_value val,
1558 				 struct netlink_ext_ack *extack)
1559 {
1560 	struct ice_pf *pf = devlink_priv(devlink);
1561 
1562 	if (val.vu32 > pf->hw.func_caps.common_cap.num_msix_vectors)
1563 		return -EINVAL;
1564 
1565 	return 0;
1566 }
1567 
1568 static int
1569 ice_devlink_msix_min_pf_validate(struct devlink *devlink, u32 id,
1570 				 union devlink_param_value val,
1571 				 struct netlink_ext_ack *extack)
1572 {
1573 	if (val.vu32 < ICE_MIN_MSIX)
1574 		return -EINVAL;
1575 
1576 	return 0;
1577 }
1578 
1579 enum ice_param_id {
1580 	ICE_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
1581 	ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS,
1582 	ICE_DEVLINK_PARAM_ID_LOCAL_FWD,
1583 };
1584 
1585 static const struct devlink_param ice_dvl_rdma_params[] = {
1586 	DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
1587 			      ice_devlink_enable_roce_get,
1588 			      ice_devlink_enable_roce_set,
1589 			      ice_devlink_enable_roce_validate),
1590 	DEVLINK_PARAM_GENERIC(ENABLE_IWARP, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
1591 			      ice_devlink_enable_iw_get,
1592 			      ice_devlink_enable_iw_set,
1593 			      ice_devlink_enable_iw_validate),
1594 };
1595 
1596 static const struct devlink_param ice_dvl_msix_params[] = {
1597 	DEVLINK_PARAM_GENERIC(MSIX_VEC_PER_PF_MAX,
1598 			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
1599 			      NULL, NULL, ice_devlink_msix_max_pf_validate),
1600 	DEVLINK_PARAM_GENERIC(MSIX_VEC_PER_PF_MIN,
1601 			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
1602 			      NULL, NULL, ice_devlink_msix_min_pf_validate),
1603 };
1604 
1605 static const struct devlink_param ice_dvl_sched_params[] = {
1606 	DEVLINK_PARAM_DRIVER(ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS,
1607 			     "tx_scheduling_layers",
1608 			     DEVLINK_PARAM_TYPE_U8,
1609 			     BIT(DEVLINK_PARAM_CMODE_PERMANENT),
1610 			     ice_devlink_tx_sched_layers_get,
1611 			     ice_devlink_tx_sched_layers_set,
1612 			     ice_devlink_tx_sched_layers_validate),
1613 	DEVLINK_PARAM_DRIVER(ICE_DEVLINK_PARAM_ID_LOCAL_FWD,
1614 			     "local_forwarding", DEVLINK_PARAM_TYPE_STRING,
1615 			     BIT(DEVLINK_PARAM_CMODE_RUNTIME),
1616 			     ice_devlink_local_fwd_get,
1617 			     ice_devlink_local_fwd_set,
1618 			     ice_devlink_local_fwd_validate),
1619 };
1620 
1621 static void ice_devlink_free(void *devlink_ptr)
1622 {
1623 	devlink_free((struct devlink *)devlink_ptr);
1624 }
1625 
1626 /**
1627  * ice_allocate_pf - Allocate devlink and return PF structure pointer
1628  * @dev: the device to allocate for
1629  *
1630  * Allocate a devlink instance for this device and return the private area as
1631  * the PF structure. The devlink memory is kept track of through devres by
1632  * adding an action to remove it when unwinding.
1633  */
1634 struct ice_pf *ice_allocate_pf(struct device *dev)
1635 {
1636 	struct devlink *devlink;
1637 
1638 	devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev);
1639 	if (!devlink)
1640 		return NULL;
1641 
1642 	/* Add an action to teardown the devlink when unwinding the driver */
1643 	if (devm_add_action_or_reset(dev, ice_devlink_free, devlink))
1644 		return NULL;
1645 
1646 	return devlink_priv(devlink);
1647 }
1648 
1649 /**
1650  * ice_allocate_sf - Allocate devlink and return SF structure pointer
1651  * @dev: the device to allocate for
1652  * @pf: pointer to the PF structure
1653  *
1654  * Allocate a devlink instance for SF.
1655  *
1656  * Return: ice_sf_priv pointer to allocated memory or ERR_PTR in case of error
1657  */
1658 struct ice_sf_priv *ice_allocate_sf(struct device *dev, struct ice_pf *pf)
1659 {
1660 	struct devlink *devlink;
1661 	int err;
1662 
1663 	devlink = devlink_alloc(&ice_sf_devlink_ops, sizeof(struct ice_sf_priv),
1664 				dev);
1665 	if (!devlink)
1666 		return ERR_PTR(-ENOMEM);
1667 
1668 	err = devl_nested_devlink_set(priv_to_devlink(pf), devlink);
1669 	if (err) {
1670 		devlink_free(devlink);
1671 		return ERR_PTR(err);
1672 	}
1673 
1674 	return devlink_priv(devlink);
1675 }
1676 
1677 /**
1678  * ice_devlink_register - Register devlink interface for this PF
1679  * @pf: the PF to register the devlink for.
1680  *
1681  * Register the devlink instance associated with this physical function.
1682  *
1683  * Return: zero on success or an error code on failure.
1684  */
1685 void ice_devlink_register(struct ice_pf *pf)
1686 {
1687 	struct devlink *devlink = priv_to_devlink(pf);
1688 
1689 	devl_register(devlink);
1690 }
1691 
1692 /**
1693  * ice_devlink_unregister - Unregister devlink resources for this PF.
1694  * @pf: the PF structure to cleanup
1695  *
1696  * Releases resources used by devlink and cleans up associated memory.
1697  */
1698 void ice_devlink_unregister(struct ice_pf *pf)
1699 {
1700 	devl_unregister(priv_to_devlink(pf));
1701 }
1702 
1703 int ice_devlink_register_params(struct ice_pf *pf)
1704 {
1705 	struct devlink *devlink = priv_to_devlink(pf);
1706 	union devlink_param_value value;
1707 	struct ice_hw *hw = &pf->hw;
1708 	int status;
1709 
1710 	status = devl_params_register(devlink, ice_dvl_rdma_params,
1711 				      ARRAY_SIZE(ice_dvl_rdma_params));
1712 	if (status)
1713 		return status;
1714 
1715 	status = devl_params_register(devlink, ice_dvl_msix_params,
1716 				      ARRAY_SIZE(ice_dvl_msix_params));
1717 	if (status)
1718 		goto unregister_rdma_params;
1719 
1720 	if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en)
1721 		status = devl_params_register(devlink, ice_dvl_sched_params,
1722 					      ARRAY_SIZE(ice_dvl_sched_params));
1723 	if (status)
1724 		goto unregister_msix_params;
1725 
1726 	value.vu32 = pf->msix.max;
1727 	devl_param_driverinit_value_set(devlink,
1728 					DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
1729 					value);
1730 	value.vu32 = pf->msix.min;
1731 	devl_param_driverinit_value_set(devlink,
1732 					DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
1733 					value);
1734 	return 0;
1735 
1736 unregister_msix_params:
1737 	devl_params_unregister(devlink, ice_dvl_msix_params,
1738 			       ARRAY_SIZE(ice_dvl_msix_params));
1739 unregister_rdma_params:
1740 	devl_params_unregister(devlink, ice_dvl_rdma_params,
1741 			       ARRAY_SIZE(ice_dvl_rdma_params));
1742 	return status;
1743 }
1744 
1745 void ice_devlink_unregister_params(struct ice_pf *pf)
1746 {
1747 	struct devlink *devlink = priv_to_devlink(pf);
1748 	struct ice_hw *hw = &pf->hw;
1749 
1750 	devl_params_unregister(devlink, ice_dvl_rdma_params,
1751 			       ARRAY_SIZE(ice_dvl_rdma_params));
1752 	devl_params_unregister(devlink, ice_dvl_msix_params,
1753 			       ARRAY_SIZE(ice_dvl_msix_params));
1754 
1755 	if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en)
1756 		devl_params_unregister(devlink, ice_dvl_sched_params,
1757 				       ARRAY_SIZE(ice_dvl_sched_params));
1758 }
1759 
1760 #define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024)
1761 
1762 static const struct devlink_region_ops ice_nvm_region_ops;
1763 static const struct devlink_region_ops ice_sram_region_ops;
1764 
1765 /**
1766  * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents
1767  * @devlink: the devlink instance
1768  * @ops: the devlink region to snapshot
1769  * @extack: extended ACK response structure
1770  * @data: on exit points to snapshot data buffer
1771  *
1772  * This function is called in response to a DEVLINK_CMD_REGION_NEW for either
1773  * the nvm-flash or shadow-ram region.
1774  *
1775  * It captures a snapshot of the NVM or Shadow RAM flash contents. This
1776  * snapshot can then later be viewed via the DEVLINK_CMD_REGION_READ netlink
1777  * interface.
1778  *
1779  * @returns zero on success, and updates the data pointer. Returns a non-zero
1780  * error code on failure.
1781  */
1782 static int ice_devlink_nvm_snapshot(struct devlink *devlink,
1783 				    const struct devlink_region_ops *ops,
1784 				    struct netlink_ext_ack *extack, u8 **data)
1785 {
1786 	struct ice_pf *pf = devlink_priv(devlink);
1787 	struct device *dev = ice_pf_to_dev(pf);
1788 	struct ice_hw *hw = &pf->hw;
1789 	bool read_shadow_ram;
1790 	u8 *nvm_data, *tmp, i;
1791 	u32 nvm_size, left;
1792 	s8 num_blks;
1793 	int status;
1794 
1795 	if (ops == &ice_nvm_region_ops) {
1796 		read_shadow_ram = false;
1797 		nvm_size = hw->flash.flash_size;
1798 	} else if (ops == &ice_sram_region_ops) {
1799 		read_shadow_ram = true;
1800 		nvm_size = hw->flash.sr_words * 2u;
1801 	} else {
1802 		NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function");
1803 		return -EOPNOTSUPP;
1804 	}
1805 
1806 	nvm_data = vzalloc(nvm_size);
1807 	if (!nvm_data)
1808 		return -ENOMEM;
1809 
1810 	num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE);
1811 	tmp = nvm_data;
1812 	left = nvm_size;
1813 
1814 	/* Some systems take longer to read the NVM than others which causes the
1815 	 * FW to reclaim the NVM lock before the entire NVM has been read. Fix
1816 	 * this by breaking the reads of the NVM into smaller chunks that will
1817 	 * probably not take as long. This has some overhead since we are
1818 	 * increasing the number of AQ commands, but it should always work
1819 	 */
1820 	for (i = 0; i < num_blks; i++) {
1821 		u32 read_sz = min_t(u32, ICE_DEVLINK_READ_BLK_SIZE, left);
1822 
1823 		status = ice_acquire_nvm(hw, ICE_RES_READ);
1824 		if (status) {
1825 			dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
1826 				status, hw->adminq.sq_last_status);
1827 			NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
1828 			vfree(nvm_data);
1829 			return -EIO;
1830 		}
1831 
1832 		status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE,
1833 					   &read_sz, tmp, read_shadow_ram);
1834 		if (status) {
1835 			dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
1836 				read_sz, status, hw->adminq.sq_last_status);
1837 			NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
1838 			ice_release_nvm(hw);
1839 			vfree(nvm_data);
1840 			return -EIO;
1841 		}
1842 		ice_release_nvm(hw);
1843 
1844 		tmp += read_sz;
1845 		left -= read_sz;
1846 	}
1847 
1848 	*data = nvm_data;
1849 
1850 	return 0;
1851 }
1852 
1853 /**
1854  * ice_devlink_nvm_read - Read a portion of NVM flash contents
1855  * @devlink: the devlink instance
1856  * @ops: the devlink region to snapshot
1857  * @extack: extended ACK response structure
1858  * @offset: the offset to start at
1859  * @size: the amount to read
1860  * @data: the data buffer to read into
1861  *
1862  * This function is called in response to DEVLINK_CMD_REGION_READ to directly
1863  * read a section of the NVM contents.
1864  *
1865  * It reads from either the nvm-flash or shadow-ram region contents.
1866  *
1867  * @returns zero on success, and updates the data pointer. Returns a non-zero
1868  * error code on failure.
1869  */
1870 static int ice_devlink_nvm_read(struct devlink *devlink,
1871 				const struct devlink_region_ops *ops,
1872 				struct netlink_ext_ack *extack,
1873 				u64 offset, u32 size, u8 *data)
1874 {
1875 	struct ice_pf *pf = devlink_priv(devlink);
1876 	struct device *dev = ice_pf_to_dev(pf);
1877 	struct ice_hw *hw = &pf->hw;
1878 	bool read_shadow_ram;
1879 	u64 nvm_size;
1880 	int status;
1881 
1882 	if (ops == &ice_nvm_region_ops) {
1883 		read_shadow_ram = false;
1884 		nvm_size = hw->flash.flash_size;
1885 	} else if (ops == &ice_sram_region_ops) {
1886 		read_shadow_ram = true;
1887 		nvm_size = hw->flash.sr_words * 2u;
1888 	} else {
1889 		NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function");
1890 		return -EOPNOTSUPP;
1891 	}
1892 
1893 	if (offset + size >= nvm_size) {
1894 		NL_SET_ERR_MSG_MOD(extack, "Cannot read beyond the region size");
1895 		return -ERANGE;
1896 	}
1897 
1898 	status = ice_acquire_nvm(hw, ICE_RES_READ);
1899 	if (status) {
1900 		dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
1901 			status, hw->adminq.sq_last_status);
1902 		NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
1903 		return -EIO;
1904 	}
1905 
1906 	status = ice_read_flat_nvm(hw, (u32)offset, &size, data,
1907 				   read_shadow_ram);
1908 	if (status) {
1909 		dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
1910 			size, status, hw->adminq.sq_last_status);
1911 		NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
1912 		ice_release_nvm(hw);
1913 		return -EIO;
1914 	}
1915 	ice_release_nvm(hw);
1916 
1917 	return 0;
1918 }
1919 
1920 /**
1921  * ice_devlink_devcaps_snapshot - Capture snapshot of device capabilities
1922  * @devlink: the devlink instance
1923  * @ops: the devlink region being snapshotted
1924  * @extack: extended ACK response structure
1925  * @data: on exit points to snapshot data buffer
1926  *
1927  * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for
1928  * the device-caps devlink region. It captures a snapshot of the device
1929  * capabilities reported by firmware.
1930  *
1931  * @returns zero on success, and updates the data pointer. Returns a non-zero
1932  * error code on failure.
1933  */
1934 static int
1935 ice_devlink_devcaps_snapshot(struct devlink *devlink,
1936 			     const struct devlink_region_ops *ops,
1937 			     struct netlink_ext_ack *extack, u8 **data)
1938 {
1939 	struct ice_pf *pf = devlink_priv(devlink);
1940 	struct device *dev = ice_pf_to_dev(pf);
1941 	struct ice_hw *hw = &pf->hw;
1942 	void *devcaps;
1943 	int status;
1944 
1945 	devcaps = vzalloc(ICE_AQ_MAX_BUF_LEN);
1946 	if (!devcaps)
1947 		return -ENOMEM;
1948 
1949 	status = ice_aq_list_caps(hw, devcaps, ICE_AQ_MAX_BUF_LEN, NULL,
1950 				  ice_aqc_opc_list_dev_caps, NULL);
1951 	if (status) {
1952 		dev_dbg(dev, "ice_aq_list_caps: failed to read device capabilities, err %d aq_err %d\n",
1953 			status, hw->adminq.sq_last_status);
1954 		NL_SET_ERR_MSG_MOD(extack, "Failed to read device capabilities");
1955 		vfree(devcaps);
1956 		return status;
1957 	}
1958 
1959 	*data = (u8 *)devcaps;
1960 
1961 	return 0;
1962 }
1963 
1964 static const struct devlink_region_ops ice_nvm_region_ops = {
1965 	.name = "nvm-flash",
1966 	.destructor = vfree,
1967 	.snapshot = ice_devlink_nvm_snapshot,
1968 	.read = ice_devlink_nvm_read,
1969 };
1970 
1971 static const struct devlink_region_ops ice_sram_region_ops = {
1972 	.name = "shadow-ram",
1973 	.destructor = vfree,
1974 	.snapshot = ice_devlink_nvm_snapshot,
1975 	.read = ice_devlink_nvm_read,
1976 };
1977 
1978 static const struct devlink_region_ops ice_devcaps_region_ops = {
1979 	.name = "device-caps",
1980 	.destructor = vfree,
1981 	.snapshot = ice_devlink_devcaps_snapshot,
1982 };
1983 
1984 /**
1985  * ice_devlink_init_regions - Initialize devlink regions
1986  * @pf: the PF device structure
1987  *
1988  * Create devlink regions used to enable access to dump the contents of the
1989  * flash memory on the device.
1990  */
1991 void ice_devlink_init_regions(struct ice_pf *pf)
1992 {
1993 	struct devlink *devlink = priv_to_devlink(pf);
1994 	struct device *dev = ice_pf_to_dev(pf);
1995 	u64 nvm_size, sram_size;
1996 
1997 	nvm_size = pf->hw.flash.flash_size;
1998 	pf->nvm_region = devl_region_create(devlink, &ice_nvm_region_ops, 1,
1999 					    nvm_size);
2000 	if (IS_ERR(pf->nvm_region)) {
2001 		dev_err(dev, "failed to create NVM devlink region, err %ld\n",
2002 			PTR_ERR(pf->nvm_region));
2003 		pf->nvm_region = NULL;
2004 	}
2005 
2006 	sram_size = pf->hw.flash.sr_words * 2u;
2007 	pf->sram_region = devl_region_create(devlink, &ice_sram_region_ops,
2008 					     1, sram_size);
2009 	if (IS_ERR(pf->sram_region)) {
2010 		dev_err(dev, "failed to create shadow-ram devlink region, err %ld\n",
2011 			PTR_ERR(pf->sram_region));
2012 		pf->sram_region = NULL;
2013 	}
2014 
2015 	pf->devcaps_region = devl_region_create(devlink,
2016 						&ice_devcaps_region_ops, 10,
2017 						ICE_AQ_MAX_BUF_LEN);
2018 	if (IS_ERR(pf->devcaps_region)) {
2019 		dev_err(dev, "failed to create device-caps devlink region, err %ld\n",
2020 			PTR_ERR(pf->devcaps_region));
2021 		pf->devcaps_region = NULL;
2022 	}
2023 }
2024 
2025 /**
2026  * ice_devlink_destroy_regions - Destroy devlink regions
2027  * @pf: the PF device structure
2028  *
2029  * Remove previously created regions for this PF.
2030  */
2031 void ice_devlink_destroy_regions(struct ice_pf *pf)
2032 {
2033 	if (pf->nvm_region)
2034 		devl_region_destroy(pf->nvm_region);
2035 
2036 	if (pf->sram_region)
2037 		devl_region_destroy(pf->sram_region);
2038 
2039 	if (pf->devcaps_region)
2040 		devl_region_destroy(pf->devcaps_region);
2041 }
2042