xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c (revision 8bf22c33e7a172fbc72464f4cc484d23a6b412ba)
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Mellanox Technologies.
3 
4 #include <net/netdev_lock.h>
5 
6 #include "health.h"
7 #include "params.h"
8 #include "txrx.h"
9 #include "devlink.h"
10 #include "ptp.h"
11 #include "lib/tout.h"
12 
13 /* Keep this string array consistent with the MLX5E_RQ_STATE_* enums in en.h */
14 static const char * const rq_sw_state_type_name[] = {
15 	[MLX5E_RQ_STATE_ENABLED] = "enabled",
16 	[MLX5E_RQ_STATE_RECOVERING] = "recovering",
17 	[MLX5E_RQ_STATE_DIM] = "dim",
18 	[MLX5E_RQ_STATE_NO_CSUM_COMPLETE] = "no_csum_complete",
19 	[MLX5E_RQ_STATE_CSUM_FULL] = "csum_full",
20 	[MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX] = "mini_cqe_hw_stridx",
21 	[MLX5E_RQ_STATE_SHAMPO] = "shampo",
22 	[MLX5E_RQ_STATE_MINI_CQE_ENHANCED] = "mini_cqe_enhanced",
23 	[MLX5E_RQ_STATE_XSK] = "xsk",
24 };
25 
mlx5e_query_rq_state(struct mlx5_core_dev * dev,u32 rqn,u8 * state)26 static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
27 {
28 	int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
29 	void *out;
30 	void *rqc;
31 	int err;
32 
33 	out = kvzalloc(outlen, GFP_KERNEL);
34 	if (!out)
35 		return -ENOMEM;
36 
37 	err = mlx5_core_query_rq(dev, rqn, out);
38 	if (err)
39 		goto out;
40 
41 	rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
42 	*state = MLX5_GET(rqc, rqc, state);
43 
44 out:
45 	kvfree(out);
46 	return err;
47 }
48 
mlx5e_wait_for_icosq_flush(struct mlx5e_icosq * icosq)49 static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq)
50 {
51 	struct mlx5_core_dev *dev = icosq->channel->mdev;
52 	unsigned long exp_time;
53 
54 	exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
55 
56 	while (time_before(jiffies, exp_time)) {
57 		if (icosq->cc == icosq->pc)
58 			return 0;
59 
60 		msleep(20);
61 	}
62 
63 	netdev_err(icosq->channel->netdev,
64 		   "Wait for ICOSQ 0x%x flush timeout (cc = 0x%x, pc = 0x%x)\n",
65 		   icosq->sqn, icosq->cc, icosq->pc);
66 
67 	return -ETIMEDOUT;
68 }
69 
mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq * icosq)70 static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq)
71 {
72 	WARN_ONCE(icosq->cc != icosq->pc, "ICOSQ 0x%x: cc (0x%x) != pc (0x%x)\n",
73 		  icosq->sqn, icosq->cc, icosq->pc);
74 	icosq->cc = 0;
75 	icosq->pc = 0;
76 }
77 
mlx5e_rx_reporter_err_icosq_cqe_recover(void * ctx)78 static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
79 {
80 	struct mlx5e_rq *xskrq = NULL;
81 	struct mlx5_core_dev *mdev;
82 	struct mlx5e_icosq *icosq;
83 	struct net_device *dev;
84 	struct mlx5e_rq *rq;
85 	u8 state;
86 	int err;
87 
88 	icosq = ctx;
89 
90 	mutex_lock(&icosq->channel->icosq_recovery_lock);
91 
92 	/* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */
93 	rq = &icosq->channel->rq;
94 	if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state))
95 		xskrq = &icosq->channel->xskrq;
96 	mdev = icosq->channel->mdev;
97 	dev = icosq->channel->netdev;
98 	err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state);
99 	if (err) {
100 		netdev_err(dev, "Failed to query ICOSQ 0x%x state. err = %d\n",
101 			   icosq->sqn, err);
102 		goto out;
103 	}
104 
105 	if (state != MLX5_SQC_STATE_ERR)
106 		goto out;
107 
108 	mlx5e_deactivate_rq(rq);
109 	if (xskrq)
110 		mlx5e_deactivate_rq(xskrq);
111 
112 	err = mlx5e_wait_for_icosq_flush(icosq);
113 	if (err)
114 		goto out;
115 
116 	mlx5e_deactivate_icosq(icosq);
117 
118 	/* At this point, both the rq and the icosq are disabled */
119 
120 	err = mlx5e_health_sq_to_ready(mdev, dev, icosq->sqn);
121 	if (err)
122 		goto out;
123 
124 	mlx5e_reset_icosq_cc_pc(icosq);
125 
126 	mlx5e_free_rx_missing_descs(rq);
127 	if (xskrq)
128 		mlx5e_free_rx_missing_descs(xskrq);
129 
130 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
131 	mlx5e_activate_icosq(icosq);
132 
133 	mlx5e_activate_rq(rq);
134 	rq->stats->recover++;
135 
136 	if (xskrq) {
137 		mlx5e_activate_rq(xskrq);
138 		xskrq->stats->recover++;
139 	}
140 
141 	mlx5e_trigger_napi_icosq(icosq->channel);
142 
143 	mutex_unlock(&icosq->channel->icosq_recovery_lock);
144 
145 	return 0;
146 out:
147 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
148 	mutex_unlock(&icosq->channel->icosq_recovery_lock);
149 	return err;
150 }
151 
mlx5e_rx_reporter_err_rq_cqe_recover(void * ctx)152 static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
153 {
154 	struct mlx5e_rq *rq = ctx;
155 	int err;
156 
157 	mlx5e_deactivate_rq(rq);
158 	err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR);
159 	clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
160 	if (err)
161 		return err;
162 
163 	mlx5e_activate_rq(rq);
164 	rq->stats->recover++;
165 	if (rq->channel)
166 		mlx5e_trigger_napi_icosq(rq->channel);
167 	else
168 		mlx5e_trigger_napi_sched(rq->cq.napi);
169 	return 0;
170 }
171 
mlx5e_rx_reporter_timeout_recover(void * ctx)172 static int mlx5e_rx_reporter_timeout_recover(void *ctx)
173 {
174 	struct mlx5_eq_comp *eq;
175 	struct mlx5e_priv *priv;
176 	struct mlx5e_rq *rq;
177 	int err;
178 
179 	rq = ctx;
180 	priv = rq->priv;
181 
182 	/* Acquire netdev instance lock to synchronize with channel close and
183 	 * reopen flows. Either successfully obtain the lock, or detect that
184 	 * channels are closing for another reason, making this work no longer
185 	 * necessary.
186 	 */
187 	while (!netdev_trylock(rq->netdev)) {
188 		if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state))
189 			return 0;
190 		msleep(20);
191 	}
192 	mutex_lock(&priv->state_lock);
193 
194 	eq = rq->cq.mcq.eq;
195 
196 	err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats);
197 	if (err && rq->icosq)
198 		clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state);
199 
200 	mutex_unlock(&priv->state_lock);
201 	netdev_unlock(rq->netdev);
202 
203 	return err;
204 }
205 
mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx * err_ctx)206 static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
207 {
208 	return err_ctx->recover(err_ctx->ctx);
209 }
210 
mlx5e_rx_reporter_recover(struct devlink_health_reporter * reporter,void * context,struct netlink_ext_ack * extack)211 static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter,
212 				     void *context,
213 				     struct netlink_ext_ack *extack)
214 {
215 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
216 	struct mlx5e_err_ctx *err_ctx = context;
217 
218 	return err_ctx ? mlx5e_rx_reporter_recover_from_ctx(err_ctx) :
219 			 mlx5e_health_recover_channels(priv);
220 }
221 
mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq * icosq,u8 hw_state,struct devlink_fmsg * fmsg)222 static void mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state,
223 					  struct devlink_fmsg *fmsg)
224 {
225 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ");
226 	devlink_fmsg_u32_pair_put(fmsg, "sqn", icosq->sqn);
227 	devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state);
228 	devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cc);
229 	devlink_fmsg_u32_pair_put(fmsg, "pc", icosq->pc);
230 	devlink_fmsg_u32_pair_put(fmsg, "WQE size", mlx5_wq_cyc_get_size(&icosq->wq));
231 
232 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
233 	devlink_fmsg_u32_pair_put(fmsg, "cqn", icosq->cq.mcq.cqn);
234 	devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cq.wq.cc);
235 	devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&icosq->cq.wq));
236 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
237 
238 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
239 }
240 
mlx5e_health_rq_put_sw_state(struct devlink_fmsg * fmsg,struct mlx5e_rq * rq)241 static void mlx5e_health_rq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_rq *rq)
242 {
243 	int i;
244 
245 	BUILD_BUG_ON_MSG(ARRAY_SIZE(rq_sw_state_type_name) != MLX5E_NUM_RQ_STATES,
246 			 "rq_sw_state_type_name string array must be consistent with MLX5E_RQ_STATE_* enum in en.h");
247 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State");
248 
249 	for (i = 0; i < ARRAY_SIZE(rq_sw_state_type_name); ++i)
250 		devlink_fmsg_u32_pair_put(fmsg, rq_sw_state_type_name[i],
251 					  test_bit(i, &rq->state));
252 
253 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
254 }
255 
256 static int
mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq * rq,struct devlink_fmsg * fmsg)257 mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq,
258 						  struct devlink_fmsg *fmsg)
259 {
260 	u16 wqe_counter;
261 	int wqes_sz;
262 	u8 hw_state;
263 	u16 wq_head;
264 	int err;
265 
266 	err = mlx5e_query_rq_state(rq->mdev, rq->rqn, &hw_state);
267 	if (err)
268 		return err;
269 
270 	wqes_sz = mlx5e_rqwq_get_cur_sz(rq);
271 	wq_head = mlx5e_rqwq_get_head(rq);
272 	wqe_counter = mlx5e_rqwq_get_wqe_counter(rq);
273 
274 	devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn);
275 	devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state);
276 	devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter);
277 	devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz);
278 	devlink_fmsg_u32_pair_put(fmsg, "cc", wq_head);
279 	mlx5e_health_rq_put_sw_state(fmsg, rq);
280 	mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg);
281 	mlx5e_health_eq_diag_fmsg(rq->cq.mcq.eq, fmsg);
282 
283 	if (rq->icosq) {
284 		struct mlx5e_icosq *icosq = rq->icosq;
285 		u8 icosq_hw_state;
286 		int err;
287 
288 		err = mlx5_core_query_sq_state(rq->mdev, icosq->sqn, &icosq_hw_state);
289 		if (err)
290 			return err;
291 
292 		mlx5e_reporter_icosq_diagnose(icosq, icosq_hw_state, fmsg);
293 	}
294 
295 	return 0;
296 }
297 
mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq * rq,struct devlink_fmsg * fmsg)298 static void mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
299 						    struct devlink_fmsg *fmsg)
300 {
301 	devlink_fmsg_obj_nest_start(fmsg);
302 	devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix);
303 	mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg);
304 	devlink_fmsg_obj_nest_end(fmsg);
305 }
306 
mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq * rq,struct devlink_fmsg * fmsg)307 static void mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq,
308 						  struct devlink_fmsg *fmsg)
309 {
310 	struct mlx5e_priv *priv = rq->priv;
311 	struct mlx5e_params *params;
312 	u32 rq_stride, rq_sz;
313 	bool real_time;
314 
315 	params = &priv->channels.params;
316 	rq_sz = mlx5e_rqwq_get_size(rq);
317 	real_time =  mlx5_is_real_time_rq(rq->mdev);
318 	rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(rq->mdev, params, NULL));
319 
320 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
321 	devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type);
322 	devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride);
323 	devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz);
324 	devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
325 	mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg);
326 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
327 }
328 
329 static void
mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv * priv,struct mlx5e_ptp * ptp_ch,struct devlink_fmsg * fmsg)330 mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx5e_ptp *ptp_ch,
331 					     struct devlink_fmsg *fmsg)
332 {
333 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
334 	devlink_fmsg_u32_pair_put(fmsg, "filter_type",
335 				  priv->hwtstamp_config.rx_filter);
336 	mlx5e_rx_reporter_diagnose_generic_rq(&ptp_ch->rq, fmsg);
337 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
338 }
339 
340 static void
mlx5e_rx_reporter_diagnose_common_config(struct mlx5e_priv * priv,struct devlink_fmsg * fmsg)341 mlx5e_rx_reporter_diagnose_common_config(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg)
342 {
343 	struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq;
344 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
345 
346 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config");
347 	mlx5e_rx_reporter_diagnose_generic_rq(generic_rq, fmsg);
348 	if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state))
349 		mlx5e_rx_reporter_diagnose_common_ptp_config(priv, ptp_ch, fmsg);
350 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
351 }
352 
mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq * rq,struct devlink_fmsg * fmsg)353 static void mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq,
354 							   struct devlink_fmsg *fmsg)
355 {
356 	devlink_fmsg_obj_nest_start(fmsg);
357 	devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
358 	mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg);
359 	devlink_fmsg_obj_nest_end(fmsg);
360 }
361 
mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(struct mlx5e_rx_res * rx_res,struct devlink_fmsg * fmsg)362 static void mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(struct mlx5e_rx_res *rx_res,
363 							struct devlink_fmsg *fmsg)
364 {
365 	unsigned int max_nch = mlx5e_rx_res_get_max_nch(rx_res);
366 	int i;
367 
368 	devlink_fmsg_arr_pair_nest_start(fmsg, "Direct TIRs");
369 
370 	for (i = 0; i < max_nch; i++) {
371 		devlink_fmsg_obj_nest_start(fmsg);
372 
373 		devlink_fmsg_u32_pair_put(fmsg, "ix", i);
374 		devlink_fmsg_u32_pair_put(fmsg, "tirn", mlx5e_rx_res_get_tirn_direct(rx_res, i));
375 		devlink_fmsg_u32_pair_put(fmsg, "rqtn", mlx5e_rx_res_get_rqtn_direct(rx_res, i));
376 
377 		devlink_fmsg_obj_nest_end(fmsg);
378 	}
379 
380 	devlink_fmsg_arr_pair_nest_end(fmsg);
381 }
382 
mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(struct mlx5e_rss * rss,bool inner,struct devlink_fmsg * fmsg)383 static void mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(struct mlx5e_rss *rss, bool inner,
384 						       struct devlink_fmsg *fmsg)
385 {
386 	bool found_valid_tir = false;
387 	int tt;
388 
389 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
390 		if (!mlx5e_rss_valid_tir(rss, tt, inner))
391 			continue;
392 
393 		if (!found_valid_tir) {
394 			char *tir_msg = inner ? "Inner TIRs Numbers" : "TIRs Numbers";
395 
396 			found_valid_tir = true;
397 			devlink_fmsg_arr_pair_nest_start(fmsg, tir_msg);
398 		}
399 
400 		devlink_fmsg_obj_nest_start(fmsg);
401 		devlink_fmsg_string_pair_put(fmsg, "tt", mlx5_ttc_get_name(tt));
402 		devlink_fmsg_u32_pair_put(fmsg, "tirn", mlx5e_rss_get_tirn(rss, tt, inner));
403 		devlink_fmsg_obj_nest_end(fmsg);
404 	}
405 
406 	if (found_valid_tir)
407 		devlink_fmsg_arr_pair_nest_end(fmsg);
408 }
409 
mlx5e_rx_reporter_diagnose_rx_res_rss_ix(struct mlx5e_rx_res * rx_res,u32 rss_idx,struct devlink_fmsg * fmsg)410 static void mlx5e_rx_reporter_diagnose_rx_res_rss_ix(struct mlx5e_rx_res *rx_res, u32 rss_idx,
411 						     struct devlink_fmsg *fmsg)
412 {
413 	struct mlx5e_rss *rss = mlx5e_rx_res_rss_get(rx_res, rss_idx);
414 
415 	if (!rss)
416 		return;
417 
418 	devlink_fmsg_obj_nest_start(fmsg);
419 
420 	devlink_fmsg_u32_pair_put(fmsg, "Index", rss_idx);
421 	devlink_fmsg_u32_pair_put(fmsg, "rqtn", mlx5e_rss_get_rqtn(rss));
422 	mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(rss, false, fmsg);
423 	if (mlx5e_rss_get_inner_ft_support(rss))
424 		mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(rss, true, fmsg);
425 
426 	devlink_fmsg_obj_nest_end(fmsg);
427 }
428 
mlx5e_rx_reporter_diagnose_rx_res_rss(struct mlx5e_rx_res * rx_res,struct devlink_fmsg * fmsg)429 static void mlx5e_rx_reporter_diagnose_rx_res_rss(struct mlx5e_rx_res *rx_res,
430 						  struct devlink_fmsg *fmsg)
431 {
432 	int rss_ix;
433 
434 	devlink_fmsg_arr_pair_nest_start(fmsg, "RSS");
435 	for (rss_ix = 0; rss_ix < MLX5E_MAX_NUM_RSS; rss_ix++)
436 		mlx5e_rx_reporter_diagnose_rx_res_rss_ix(rx_res, rss_ix, fmsg);
437 	devlink_fmsg_arr_pair_nest_end(fmsg);
438 }
439 
mlx5e_rx_reporter_diagnose_rx_res(struct mlx5e_priv * priv,struct devlink_fmsg * fmsg)440 static void mlx5e_rx_reporter_diagnose_rx_res(struct mlx5e_priv *priv,
441 					      struct devlink_fmsg *fmsg)
442 {
443 	struct mlx5e_rx_res *rx_res = priv->rx_res;
444 
445 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX resources");
446 	mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(rx_res, fmsg);
447 	mlx5e_rx_reporter_diagnose_rx_res_rss(rx_res, fmsg);
448 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
449 }
450 
mlx5e_rx_reporter_diagnose_rqs(struct mlx5e_priv * priv,struct devlink_fmsg * fmsg)451 static void mlx5e_rx_reporter_diagnose_rqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg)
452 {
453 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
454 	int i;
455 
456 	devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
457 
458 	for (i = 0; i < priv->channels.num; i++) {
459 		struct mlx5e_channel *c = priv->channels.c[i];
460 		struct mlx5e_rq *rq;
461 
462 		rq = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state) ?
463 			&c->xskrq : &c->rq;
464 
465 		mlx5e_rx_reporter_build_diagnose_output(rq, fmsg);
466 	}
467 	if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state))
468 		mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg);
469 
470 	devlink_fmsg_arr_pair_nest_end(fmsg);
471 }
472 
mlx5e_rx_reporter_diagnose(struct devlink_health_reporter * reporter,struct devlink_fmsg * fmsg,struct netlink_ext_ack * extack)473 static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
474 				      struct devlink_fmsg *fmsg,
475 				      struct netlink_ext_ack *extack)
476 {
477 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
478 
479 	mutex_lock(&priv->state_lock);
480 
481 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
482 		goto unlock;
483 
484 	mlx5e_rx_reporter_diagnose_common_config(priv, fmsg);
485 	mlx5e_rx_reporter_diagnose_rqs(priv, fmsg);
486 	mlx5e_rx_reporter_diagnose_rx_res(priv, fmsg);
487 unlock:
488 	mutex_unlock(&priv->state_lock);
489 	return 0;
490 }
491 
mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv * priv,struct devlink_fmsg * fmsg,void * ctx)492 static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
493 					void *ctx)
494 {
495 	struct mlx5e_txqsq *icosq = ctx;
496 	struct mlx5_rsc_key key = {};
497 
498 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
499 		return 0;
500 
501 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
502 	key.size = PAGE_SIZE;
503 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
504 	mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
505 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
506 
507 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ");
508 
509 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
510 	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
511 	key.index1 = icosq->sqn;
512 	key.num_of_obj1 = 1;
513 	mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
514 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
515 
516 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
517 	key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
518 	key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
519 	mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
520 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
521 
522 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
523 
524 	return 0;
525 }
526 
mlx5e_rx_reporter_dump_rq(struct mlx5e_priv * priv,struct devlink_fmsg * fmsg,void * ctx)527 static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
528 				     void *ctx)
529 {
530 	struct mlx5_rsc_key key = {};
531 	struct mlx5e_rq *rq = ctx;
532 
533 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
534 		return 0;
535 
536 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice");
537 	key.size = PAGE_SIZE;
538 	key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL;
539 	mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
540 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
541 
542 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
543 
544 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
545 	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
546 	key.index1 = rq->rqn;
547 	key.num_of_obj1 = 1;
548 	mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
549 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
550 
551 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "receive_buff");
552 	key.rsc = MLX5_SGMT_TYPE_RCV_BUFF;
553 	key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
554 	mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
555 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
556 
557 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
558 
559 	return 0;
560 }
561 
mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv * priv,struct devlink_fmsg * fmsg)562 static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv,
563 					  struct devlink_fmsg *fmsg)
564 {
565 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
566 	struct mlx5_rsc_key key = {};
567 
568 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
569 		return 0;
570 
571 	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice");
572 	key.size = PAGE_SIZE;
573 	key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL;
574 	mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
575 	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
576 	devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
577 
578 	for (int i = 0; i < priv->channels.num; i++) {
579 		struct mlx5e_rq *rq = &priv->channels.c[i]->rq;
580 
581 		mlx5e_health_queue_dump(priv, fmsg, rq->rqn, "RQ");
582 	}
583 
584 	if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state))
585 		mlx5e_health_queue_dump(priv, fmsg, ptp_ch->rq.rqn, "PTP RQ");
586 
587 	devlink_fmsg_arr_pair_nest_end(fmsg);
588 	return 0;
589 }
590 
mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv * priv,struct mlx5e_err_ctx * err_ctx,struct devlink_fmsg * fmsg)591 static int mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
592 					   struct mlx5e_err_ctx *err_ctx,
593 					   struct devlink_fmsg *fmsg)
594 {
595 	return err_ctx->dump(priv, fmsg, err_ctx->ctx);
596 }
597 
mlx5e_rx_reporter_dump(struct devlink_health_reporter * reporter,struct devlink_fmsg * fmsg,void * context,struct netlink_ext_ack * extack)598 static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter,
599 				  struct devlink_fmsg *fmsg, void *context,
600 				  struct netlink_ext_ack *extack)
601 {
602 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
603 	struct mlx5e_err_ctx *err_ctx = context;
604 
605 	return err_ctx ? mlx5e_rx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
606 			 mlx5e_rx_reporter_dump_all_rqs(priv, fmsg);
607 }
608 
mlx5e_reporter_rx_timeout(struct mlx5e_rq * rq)609 void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
610 {
611 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
612 	struct mlx5e_icosq *icosq = rq->icosq;
613 	struct mlx5e_priv *priv = rq->priv;
614 	struct mlx5e_err_ctx err_ctx = {};
615 	char icosq_str[32] = {};
616 
617 	err_ctx.ctx = rq;
618 	err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
619 	err_ctx.dump = mlx5e_rx_reporter_dump_rq;
620 
621 	if (icosq)
622 		snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn);
623 	snprintf(err_str, sizeof(err_str),
624 		 "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x",
625 		 rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn);
626 
627 	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
628 }
629 
mlx5e_reporter_rq_cqe_err(struct mlx5e_rq * rq)630 void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq)
631 {
632 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
633 	struct mlx5e_priv *priv = rq->priv;
634 	struct mlx5e_err_ctx err_ctx = {};
635 
636 	err_ctx.ctx = rq;
637 	err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover;
638 	err_ctx.dump = mlx5e_rx_reporter_dump_rq;
639 	snprintf(err_str, sizeof(err_str), "ERR CQE on RQ: 0x%x", rq->rqn);
640 
641 	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
642 }
643 
mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq * icosq)644 void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
645 {
646 	struct mlx5e_priv *priv = icosq->channel->priv;
647 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
648 	struct mlx5e_err_ctx err_ctx = {};
649 
650 	err_ctx.ctx = icosq;
651 	err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover;
652 	err_ctx.dump = mlx5e_rx_reporter_dump_icosq;
653 	snprintf(err_str, sizeof(err_str), "ERR CQE on ICOSQ: 0x%x", icosq->sqn);
654 
655 	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
656 }
657 
mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel * c)658 void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c)
659 {
660 	mutex_lock(&c->icosq_recovery_lock);
661 }
662 
mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel * c)663 void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c)
664 {
665 	mutex_unlock(&c->icosq_recovery_lock);
666 }
667 
668 #define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500
669 #define MLX5E_REPORTER_RX_BURST_PERIOD 500
670 
671 static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
672 	.name = "rx",
673 	.recover = mlx5e_rx_reporter_recover,
674 	.diagnose = mlx5e_rx_reporter_diagnose,
675 	.dump = mlx5e_rx_reporter_dump,
676 	.default_graceful_period = MLX5E_REPORTER_RX_GRACEFUL_PERIOD,
677 	.default_burst_period = MLX5E_REPORTER_RX_BURST_PERIOD,
678 };
679 
mlx5e_reporter_rx_create(struct mlx5e_priv * priv)680 void mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
681 {
682 	struct devlink_port *port = priv->netdev->devlink_port;
683 	struct devlink_health_reporter *reporter;
684 
685 	reporter = devlink_port_health_reporter_create(port,
686 						       &mlx5_rx_reporter_ops,
687 						       priv);
688 	if (IS_ERR(reporter)) {
689 		netdev_warn(priv->netdev, "Failed to create rx reporter, err = %pe\n",
690 			    reporter);
691 		return;
692 	}
693 	priv->rx_reporter = reporter;
694 }
695 
mlx5e_reporter_rx_destroy(struct mlx5e_priv * priv)696 void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv)
697 {
698 	if (!priv->rx_reporter)
699 		return;
700 
701 	devlink_health_reporter_destroy(priv->rx_reporter);
702 	priv->rx_reporter = NULL;
703 }
704