xref: /linux/drivers/infiniband/hw/mlx5/counters.c (revision f6ff1c760431be34e4daaa44f242be911becd998)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
4  */
5 
6 #include "mlx5_ib.h"
7 #include <linux/mlx5/eswitch.h>
8 #include <linux/mlx5/vport.h>
9 #include "counters.h"
10 #include "ib_rep.h"
11 #include "qp.h"
12 
13 struct mlx5_ib_counter {
14 	const char *name;
15 	size_t offset;
16 	u32 type;
17 };
18 
19 #define INIT_Q_COUNTER(_name)		\
20 	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
21 
22 #define INIT_VPORT_Q_COUNTER(_name)		\
23 	{ .name = "vport_" #_name, .offset =	\
24 		MLX5_BYTE_OFF(query_q_counter_out, _name)}
25 
26 static const struct mlx5_ib_counter basic_q_cnts[] = {
27 	INIT_Q_COUNTER(rx_write_requests),
28 	INIT_Q_COUNTER(rx_read_requests),
29 	INIT_Q_COUNTER(rx_atomic_requests),
30 	INIT_Q_COUNTER(rx_dct_connect),
31 	INIT_Q_COUNTER(out_of_buffer),
32 };
33 
34 static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
35 	INIT_Q_COUNTER(out_of_sequence),
36 };
37 
38 static const struct mlx5_ib_counter retrans_q_cnts[] = {
39 	INIT_Q_COUNTER(duplicate_request),
40 	INIT_Q_COUNTER(rnr_nak_retry_err),
41 	INIT_Q_COUNTER(packet_seq_err),
42 	INIT_Q_COUNTER(implied_nak_seq_err),
43 	INIT_Q_COUNTER(local_ack_timeout_err),
44 };
45 
46 static const struct mlx5_ib_counter vport_basic_q_cnts[] = {
47 	INIT_VPORT_Q_COUNTER(rx_write_requests),
48 	INIT_VPORT_Q_COUNTER(rx_read_requests),
49 	INIT_VPORT_Q_COUNTER(rx_atomic_requests),
50 	INIT_VPORT_Q_COUNTER(rx_dct_connect),
51 	INIT_VPORT_Q_COUNTER(out_of_buffer),
52 };
53 
54 static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = {
55 	INIT_VPORT_Q_COUNTER(out_of_sequence),
56 };
57 
58 static const struct mlx5_ib_counter vport_retrans_q_cnts[] = {
59 	INIT_VPORT_Q_COUNTER(duplicate_request),
60 	INIT_VPORT_Q_COUNTER(rnr_nak_retry_err),
61 	INIT_VPORT_Q_COUNTER(packet_seq_err),
62 	INIT_VPORT_Q_COUNTER(implied_nak_seq_err),
63 	INIT_VPORT_Q_COUNTER(local_ack_timeout_err),
64 };
65 
66 #define INIT_CONG_COUNTER(_name)		\
67 	{ .name = #_name, .offset =	\
68 		MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
69 
70 static const struct mlx5_ib_counter cong_cnts[] = {
71 	INIT_CONG_COUNTER(rp_cnp_ignored),
72 	INIT_CONG_COUNTER(rp_cnp_handled),
73 	INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
74 	INIT_CONG_COUNTER(np_cnp_sent),
75 };
76 
77 static const struct mlx5_ib_counter extended_err_cnts[] = {
78 	INIT_Q_COUNTER(resp_local_length_error),
79 	INIT_Q_COUNTER(resp_cqe_error),
80 	INIT_Q_COUNTER(req_cqe_error),
81 	INIT_Q_COUNTER(req_remote_invalid_request),
82 	INIT_Q_COUNTER(req_remote_access_errors),
83 	INIT_Q_COUNTER(resp_remote_access_errors),
84 	INIT_Q_COUNTER(resp_cqe_flush_error),
85 	INIT_Q_COUNTER(req_cqe_flush_error),
86 };
87 
88 static const struct mlx5_ib_counter roce_accl_cnts[] = {
89 	INIT_Q_COUNTER(roce_adp_retrans),
90 	INIT_Q_COUNTER(roce_adp_retrans_to),
91 	INIT_Q_COUNTER(roce_slow_restart),
92 	INIT_Q_COUNTER(roce_slow_restart_cnps),
93 	INIT_Q_COUNTER(roce_slow_restart_trans),
94 };
95 
96 static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
97 	INIT_VPORT_Q_COUNTER(resp_local_length_error),
98 	INIT_VPORT_Q_COUNTER(resp_cqe_error),
99 	INIT_VPORT_Q_COUNTER(req_cqe_error),
100 	INIT_VPORT_Q_COUNTER(req_remote_invalid_request),
101 	INIT_VPORT_Q_COUNTER(req_remote_access_errors),
102 	INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
103 	INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
104 	INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
105 };
106 
107 static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
108 	INIT_VPORT_Q_COUNTER(roce_adp_retrans),
109 	INIT_VPORT_Q_COUNTER(roce_adp_retrans_to),
110 	INIT_VPORT_Q_COUNTER(roce_slow_restart),
111 	INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps),
112 	INIT_VPORT_Q_COUNTER(roce_slow_restart_trans),
113 };
114 
115 #define INIT_EXT_PPCNT_COUNTER(_name)		\
116 	{ .name = #_name, .offset =	\
117 	MLX5_BYTE_OFF(ppcnt_reg, \
118 		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
119 
120 static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
121 	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
122 };
123 
124 #define INIT_OP_COUNTER(_name, _type)		\
125 	{ .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
126 
127 static const struct mlx5_ib_counter basic_op_cnts[] = {
128 	INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
129 };
130 
131 static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
132 	INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
133 };
134 
135 static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
136 	INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
137 };
138 
139 static int mlx5_ib_read_counters(struct ib_counters *counters,
140 				 struct ib_counters_read_attr *read_attr,
141 				 struct uverbs_attr_bundle *attrs)
142 {
143 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
144 	struct mlx5_read_counters_attr mread_attr = {};
145 	struct mlx5_ib_flow_counters_desc *desc;
146 	int ret, i;
147 
148 	mutex_lock(&mcounters->mcntrs_mutex);
149 	if (mcounters->cntrs_max_index > read_attr->ncounters) {
150 		ret = -EINVAL;
151 		goto err_bound;
152 	}
153 
154 	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
155 				 GFP_KERNEL);
156 	if (!mread_attr.out) {
157 		ret = -ENOMEM;
158 		goto err_bound;
159 	}
160 
161 	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
162 	mread_attr.flags = read_attr->flags;
163 	ret = mcounters->read_counters(counters->device, &mread_attr);
164 	if (ret)
165 		goto err_read;
166 
167 	/* do the pass over the counters data array to assign according to the
168 	 * descriptions and indexing pairs
169 	 */
170 	desc = mcounters->counters_data;
171 	for (i = 0; i < mcounters->ncounters; i++)
172 		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
173 
174 err_read:
175 	kfree(mread_attr.out);
176 err_bound:
177 	mutex_unlock(&mcounters->mcntrs_mutex);
178 	return ret;
179 }
180 
181 static int mlx5_ib_destroy_counters(struct ib_counters *counters)
182 {
183 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
184 
185 	mlx5_ib_counters_clear_description(counters);
186 	if (mcounters->hw_cntrs_hndl)
187 		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
188 				mcounters->hw_cntrs_hndl);
189 	return 0;
190 }
191 
192 static int mlx5_ib_create_counters(struct ib_counters *counters,
193 				   struct uverbs_attr_bundle *attrs)
194 {
195 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
196 
197 	mutex_init(&mcounters->mcntrs_mutex);
198 	return 0;
199 }
200 
201 static bool vport_qcounters_supported(struct mlx5_ib_dev *dev)
202 {
203 	return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) &&
204 	       MLX5_CAP_GEN(dev->mdev, q_counter_aggregation);
205 }
206 
207 static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
208 						   u32 port_num)
209 {
210 	if ((is_mdev_switchdev_mode(dev->mdev) &&
211 	     !vport_qcounters_supported(dev)) || !port_num)
212 		return &dev->port[0].cnts;
213 
214 	return is_mdev_switchdev_mode(dev->mdev) ?
215 	       &dev->port[1].cnts : &dev->port[port_num - 1].cnts;
216 }
217 
218 /**
219  * mlx5_ib_get_counters_id - Returns counters id to use for device+port
220  * @dev:	Pointer to mlx5 IB device
221  * @port_num:	Zero based port number
222  *
223  * mlx5_ib_get_counters_id() Returns counters set id to use for given
224  * device port combination in switchdev and non switchdev mode of the
225  * parent device.
226  */
227 u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
228 {
229 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1);
230 
231 	return cnts->set_id;
232 }
233 
234 static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
235 {
236 	struct rdma_hw_stats *stats;
237 	u32 num_hw_counters;
238 	int i;
239 
240 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
241 			  cnts->num_ext_ppcnt_counters;
242 	stats = rdma_alloc_hw_stats_struct(cnts->descs,
243 					   num_hw_counters +
244 					   cnts->num_op_counters,
245 					   RDMA_HW_STATS_DEFAULT_LIFESPAN);
246 	if (!stats)
247 		return NULL;
248 
249 	for (i = 0; i < cnts->num_op_counters; i++)
250 		set_bit(num_hw_counters + i, stats->is_disabled);
251 
252 	return stats;
253 }
254 
255 static struct rdma_hw_stats *
256 mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
257 {
258 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
259 	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
260 
261 	return do_alloc_stats(cnts);
262 }
263 
264 static struct rdma_hw_stats *
265 mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
266 {
267 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
268 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
269 
270 	return do_alloc_stats(cnts);
271 }
272 
273 static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
274 				    const struct mlx5_ib_counters *cnts,
275 				    struct rdma_hw_stats *stats,
276 				    u16 set_id)
277 {
278 	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
279 	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
280 	__be32 val;
281 	int ret, i;
282 
283 	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
284 	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
285 	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
286 	if (ret)
287 		return ret;
288 
289 	for (i = 0; i < cnts->num_q_counters; i++) {
290 		val = *(__be32 *)((void *)out + cnts->offsets[i]);
291 		stats->value[i] = (u64)be32_to_cpu(val);
292 	}
293 
294 	return 0;
295 }
296 
297 static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
298 					    const struct mlx5_ib_counters *cnts,
299 					    struct rdma_hw_stats *stats)
300 {
301 	int offset = cnts->num_q_counters + cnts->num_cong_counters;
302 	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
303 	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
304 	int ret, i;
305 	void *out;
306 
307 	out = kvzalloc(sz, GFP_KERNEL);
308 	if (!out)
309 		return -ENOMEM;
310 
311 	MLX5_SET(ppcnt_reg, in, local_port, 1);
312 	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
313 	ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
314 				   0, 0);
315 	if (ret)
316 		goto free;
317 
318 	for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
319 		stats->value[i + offset] =
320 			be64_to_cpup((__be64 *)(out +
321 				    cnts->offsets[i + offset]));
322 free:
323 	kvfree(out);
324 	return ret;
325 }
326 
327 static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
328 					  u32 port_num,
329 					  const struct mlx5_ib_counters *cnts,
330 					  struct rdma_hw_stats *stats)
331 
332 {
333 	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
334 	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
335 	struct mlx5_core_dev *mdev;
336 	__be32 val;
337 	int ret, i;
338 
339 	if (!dev->port[port_num].rep ||
340 	    dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
341 		return 0;
342 
343 	mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
344 	if (!mdev)
345 		return -EOPNOTSUPP;
346 
347 	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
348 	MLX5_SET(query_q_counter_in, in, other_vport, 1);
349 	MLX5_SET(query_q_counter_in, in, vport_number,
350 		 dev->port[port_num].rep->vport);
351 	MLX5_SET(query_q_counter_in, in, aggregate, 1);
352 	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
353 	if (ret)
354 		return ret;
355 
356 	for (i = 0; i < cnts->num_q_counters; i++) {
357 		val = *(__be32 *)((void *)out + cnts->offsets[i]);
358 		stats->value[i] = (u64)be32_to_cpu(val);
359 	}
360 
361 	return 0;
362 }
363 
364 static int do_get_hw_stats(struct ib_device *ibdev,
365 			   struct rdma_hw_stats *stats,
366 			   u32 port_num, int index)
367 {
368 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
369 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
370 	struct mlx5_core_dev *mdev;
371 	int ret, num_counters;
372 
373 	if (!stats)
374 		return -EINVAL;
375 
376 	num_counters = cnts->num_q_counters +
377 		       cnts->num_cong_counters +
378 		       cnts->num_ext_ppcnt_counters;
379 
380 	if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
381 		ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts,
382 						     stats);
383 	else
384 		ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats,
385 					       cnts->set_id);
386 	if (ret)
387 		return ret;
388 
389 	/* We don't expose device counters over Vports */
390 	if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
391 		goto done;
392 
393 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
394 		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
395 		if (ret)
396 			return ret;
397 	}
398 
399 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
400 		if (!port_num)
401 			port_num = 1;
402 		mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
403 		if (!mdev) {
404 			/* If port is not affiliated yet, its in down state
405 			 * which doesn't have any counters yet, so it would be
406 			 * zero. So no need to read from the HCA.
407 			 */
408 			goto done;
409 		}
410 		ret = mlx5_lag_query_cong_counters(dev->mdev,
411 						   stats->value +
412 						   cnts->num_q_counters,
413 						   cnts->num_cong_counters,
414 						   cnts->offsets +
415 						   cnts->num_q_counters);
416 
417 		mlx5_ib_put_native_port_mdev(dev, port_num);
418 		if (ret)
419 			return ret;
420 	}
421 
422 done:
423 	return num_counters;
424 }
425 
426 static int do_get_op_stat(struct ib_device *ibdev,
427 			  struct rdma_hw_stats *stats,
428 			  u32 port_num, int index)
429 {
430 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
431 	const struct mlx5_ib_counters *cnts;
432 	const struct mlx5_ib_op_fc *opfcs;
433 	u64 packets = 0, bytes;
434 	u32 type;
435 	int ret;
436 
437 	cnts = get_counters(dev, port_num);
438 
439 	opfcs = cnts->opfcs;
440 	type = *(u32 *)cnts->descs[index].priv;
441 	if (type >= MLX5_IB_OPCOUNTER_MAX)
442 		return -EINVAL;
443 
444 	if (!opfcs[type].fc)
445 		goto out;
446 
447 	ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
448 			    &packets, &bytes);
449 	if (ret)
450 		return ret;
451 
452 out:
453 	stats->value[index] = packets;
454 	return index;
455 }
456 
457 static int do_get_op_stats(struct ib_device *ibdev,
458 			   struct rdma_hw_stats *stats,
459 			   u32 port_num)
460 {
461 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
462 	const struct mlx5_ib_counters *cnts;
463 	int index, ret, num_hw_counters;
464 
465 	cnts = get_counters(dev, port_num);
466 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
467 			  cnts->num_ext_ppcnt_counters;
468 	for (index = num_hw_counters;
469 	     index < (num_hw_counters + cnts->num_op_counters); index++) {
470 		ret = do_get_op_stat(ibdev, stats, port_num, index);
471 		if (ret != index)
472 			return ret;
473 	}
474 
475 	return cnts->num_op_counters;
476 }
477 
478 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
479 				struct rdma_hw_stats *stats,
480 				u32 port_num, int index)
481 {
482 	int num_counters, num_hw_counters, num_op_counters;
483 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
484 	const struct mlx5_ib_counters *cnts;
485 
486 	cnts = get_counters(dev, port_num);
487 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
488 		cnts->num_ext_ppcnt_counters;
489 	num_counters = num_hw_counters + cnts->num_op_counters;
490 
491 	if (index < 0 || index > num_counters)
492 		return -EINVAL;
493 	else if (index > 0 && index < num_hw_counters)
494 		return do_get_hw_stats(ibdev, stats, port_num, index);
495 	else if (index >= num_hw_counters && index < num_counters)
496 		return do_get_op_stat(ibdev, stats, port_num, index);
497 
498 	num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
499 	if (num_hw_counters < 0)
500 		return num_hw_counters;
501 
502 	num_op_counters = do_get_op_stats(ibdev, stats, port_num);
503 	if (num_op_counters < 0)
504 		return num_op_counters;
505 
506 	return num_hw_counters + num_op_counters;
507 }
508 
509 static struct rdma_hw_stats *
510 mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
511 {
512 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
513 	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
514 
515 	return do_alloc_stats(cnts);
516 }
517 
518 static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
519 {
520 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
521 	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
522 
523 	return mlx5_ib_query_q_counters(dev->mdev, cnts,
524 					counter->stats, counter->id);
525 }
526 
527 static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
528 {
529 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
530 	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
531 
532 	if (!counter->id)
533 		return 0;
534 
535 	MLX5_SET(dealloc_q_counter_in, in, opcode,
536 		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
537 	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
538 	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
539 }
540 
541 static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
542 				   struct ib_qp *qp)
543 {
544 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
545 	int err;
546 
547 	if (!counter->id) {
548 		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
549 		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
550 
551 		MLX5_SET(alloc_q_counter_in, in, opcode,
552 			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
553 		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
554 		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
555 		if (err)
556 			return err;
557 		counter->id =
558 			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
559 	}
560 
561 	err = mlx5_ib_qp_set_counter(qp, counter);
562 	if (err)
563 		goto fail_set_counter;
564 
565 	return 0;
566 
567 fail_set_counter:
568 	mlx5_ib_counter_dealloc(counter);
569 	counter->id = 0;
570 
571 	return err;
572 }
573 
574 static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
575 {
576 	return mlx5_ib_qp_set_counter(qp, NULL);
577 }
578 
579 static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
580 				  struct rdma_stat_desc *descs, size_t *offsets,
581 				  u32 port_num)
582 {
583 	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
584 			port_num != MLX5_VPORT_PF;
585 	const struct mlx5_ib_counter *names;
586 	int j = 0, i, size;
587 
588 	names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
589 	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
590 			  ARRAY_SIZE(basic_q_cnts);
591 	for (i = 0; i < size; i++, j++) {
592 		descs[j].name = names[i].name;
593 		offsets[j] = names[i].offset;
594 	}
595 
596 	names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
597 	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
598 			  ARRAY_SIZE(out_of_seq_q_cnts);
599 	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
600 		for (i = 0; i < size; i++, j++) {
601 			descs[j].name = names[i].name;
602 			offsets[j] = names[i].offset;
603 		}
604 	}
605 
606 	names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
607 	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
608 			  ARRAY_SIZE(retrans_q_cnts);
609 	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
610 		for (i = 0; i < size; i++, j++) {
611 			descs[j].name = names[i].name;
612 			offsets[j] = names[i].offset;
613 		}
614 	}
615 
616 	names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
617 	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
618 			  ARRAY_SIZE(extended_err_cnts);
619 	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
620 		for (i = 0; i < size; i++, j++) {
621 			descs[j].name = names[i].name;
622 			offsets[j] = names[i].offset;
623 		}
624 	}
625 
626 	names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
627 	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
628 			  ARRAY_SIZE(roce_accl_cnts);
629 	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
630 		for (i = 0; i < size; i++, j++) {
631 			descs[j].name = names[i].name;
632 			offsets[j] = names[i].offset;
633 		}
634 	}
635 
636 	if (is_vport)
637 		return;
638 
639 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
640 		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
641 			descs[j].name = cong_cnts[i].name;
642 			offsets[j] = cong_cnts[i].offset;
643 		}
644 	}
645 
646 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
647 		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
648 			descs[j].name = ext_ppcnt_cnts[i].name;
649 			offsets[j] = ext_ppcnt_cnts[i].offset;
650 		}
651 	}
652 
653 	for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
654 		descs[j].name = basic_op_cnts[i].name;
655 		descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
656 		descs[j].priv = &basic_op_cnts[i].type;
657 	}
658 
659 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
660 			       ft_field_support_2_nic_receive_rdma.bth_opcode)) {
661 		for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
662 			descs[j].name = rdmarx_cnp_op_cnts[i].name;
663 			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
664 			descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
665 		}
666 	}
667 
668 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
669 			       ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
670 		for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
671 			descs[j].name = rdmatx_cnp_op_cnts[i].name;
672 			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
673 			descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
674 		}
675 	}
676 }
677 
678 
679 static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
680 				    struct mlx5_ib_counters *cnts, u32 port_num)
681 {
682 	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
683 			port_num != MLX5_VPORT_PF;
684 	u32 num_counters, num_op_counters = 0, size;
685 
686 	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
687 			  ARRAY_SIZE(basic_q_cnts);
688 	num_counters = size;
689 
690 	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
691 			  ARRAY_SIZE(out_of_seq_q_cnts);
692 	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
693 		num_counters += size;
694 
695 	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
696 			  ARRAY_SIZE(retrans_q_cnts);
697 	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
698 		num_counters += size;
699 
700 	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
701 			  ARRAY_SIZE(extended_err_cnts);
702 	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
703 		num_counters += size;
704 
705 	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
706 			  ARRAY_SIZE(roce_accl_cnts);
707 	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
708 		num_counters += size;
709 
710 	cnts->num_q_counters = num_counters;
711 
712 	if (is_vport)
713 		goto skip_non_qcounters;
714 
715 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
716 		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
717 		num_counters += ARRAY_SIZE(cong_cnts);
718 	}
719 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
720 		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
721 		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
722 	}
723 
724 	num_op_counters = ARRAY_SIZE(basic_op_cnts);
725 
726 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
727 			       ft_field_support_2_nic_receive_rdma.bth_opcode))
728 		num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
729 
730 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
731 			       ft_field_support_2_nic_transmit_rdma.bth_opcode))
732 		num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
733 
734 skip_non_qcounters:
735 	cnts->num_op_counters = num_op_counters;
736 	num_counters += num_op_counters;
737 	cnts->descs = kcalloc(num_counters,
738 			      sizeof(struct rdma_stat_desc), GFP_KERNEL);
739 	if (!cnts->descs)
740 		return -ENOMEM;
741 
742 	cnts->offsets = kcalloc(num_counters,
743 				sizeof(*cnts->offsets), GFP_KERNEL);
744 	if (!cnts->offsets)
745 		goto err;
746 
747 	return 0;
748 
749 err:
750 	kfree(cnts->descs);
751 	cnts->descs = NULL;
752 	return -ENOMEM;
753 }
754 
755 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
756 {
757 	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
758 	int num_cnt_ports = dev->num_ports;
759 	int i, j;
760 
761 	if (is_mdev_switchdev_mode(dev->mdev))
762 		num_cnt_ports = min(2, num_cnt_ports);
763 
764 	MLX5_SET(dealloc_q_counter_in, in, opcode,
765 		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
766 
767 	for (i = 0; i < num_cnt_ports; i++) {
768 		if (dev->port[i].cnts.set_id) {
769 			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
770 				 dev->port[i].cnts.set_id);
771 			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
772 		}
773 		kfree(dev->port[i].cnts.descs);
774 		kfree(dev->port[i].cnts.offsets);
775 
776 		for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
777 			if (!dev->port[i].cnts.opfcs[j].fc)
778 				continue;
779 
780 			if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
781 				mlx5_ib_fs_remove_op_fc(dev,
782 					&dev->port[i].cnts.opfcs[j], j);
783 			mlx5_fc_destroy(dev->mdev,
784 					dev->port[i].cnts.opfcs[j].fc);
785 			dev->port[i].cnts.opfcs[j].fc = NULL;
786 		}
787 	}
788 }
789 
790 static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
791 {
792 	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
793 	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
794 	int num_cnt_ports = dev->num_ports;
795 	int err = 0;
796 	int i;
797 	bool is_shared;
798 
799 	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
800 	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
801 
802 	/*
803 	 * In switchdev we need to allocate two ports, one that is used for
804 	 * the device Q_counters and it is essentially the real Q_counters of
805 	 * this device, while the other is used as a helper for PF to be able to
806 	 * query all other vports.
807 	 */
808 	if (is_mdev_switchdev_mode(dev->mdev))
809 		num_cnt_ports = min(2, num_cnt_ports);
810 
811 	for (i = 0; i < num_cnt_ports; i++) {
812 		err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
813 		if (err)
814 			goto err_alloc;
815 
816 		mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
817 				      dev->port[i].cnts.offsets, i);
818 
819 		MLX5_SET(alloc_q_counter_in, in, uid,
820 			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
821 
822 		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
823 		if (err) {
824 			mlx5_ib_warn(dev,
825 				     "couldn't allocate queue counter for port %d, err %d\n",
826 				     i + 1, err);
827 			goto err_alloc;
828 		}
829 
830 		dev->port[i].cnts.set_id =
831 			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
832 	}
833 	return 0;
834 
835 err_alloc:
836 	mlx5_ib_dealloc_counters(dev);
837 	return err;
838 }
839 
840 static int read_flow_counters(struct ib_device *ibdev,
841 			      struct mlx5_read_counters_attr *read_attr)
842 {
843 	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
844 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
845 
846 	return mlx5_fc_query(dev->mdev, fc,
847 			     &read_attr->out[IB_COUNTER_PACKETS],
848 			     &read_attr->out[IB_COUNTER_BYTES]);
849 }
850 
851 /* flow counters currently expose two counters packets and bytes */
852 #define FLOW_COUNTERS_NUM 2
853 static int counters_set_description(
854 	struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
855 	struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
856 {
857 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
858 	u32 cntrs_max_index = 0;
859 	int i;
860 
861 	if (counters_type != MLX5_IB_COUNTERS_FLOW)
862 		return -EINVAL;
863 
864 	/* init the fields for the object */
865 	mcounters->type = counters_type;
866 	mcounters->read_counters = read_flow_counters;
867 	mcounters->counters_num = FLOW_COUNTERS_NUM;
868 	mcounters->ncounters = ncounters;
869 	/* each counter entry have both description and index pair */
870 	for (i = 0; i < ncounters; i++) {
871 		if (desc_data[i].description > IB_COUNTER_BYTES)
872 			return -EINVAL;
873 
874 		if (cntrs_max_index <= desc_data[i].index)
875 			cntrs_max_index = desc_data[i].index + 1;
876 	}
877 
878 	mutex_lock(&mcounters->mcntrs_mutex);
879 	mcounters->counters_data = desc_data;
880 	mcounters->cntrs_max_index = cntrs_max_index;
881 	mutex_unlock(&mcounters->mcntrs_mutex);
882 
883 	return 0;
884 }
885 
886 #define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
887 int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
888 				   struct mlx5_ib_create_flow *ucmd)
889 {
890 	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
891 	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
892 	struct mlx5_ib_flow_counters_desc *desc_data = NULL;
893 	bool hw_hndl = false;
894 	int ret = 0;
895 
896 	if (ucmd && ucmd->ncounters_data != 0) {
897 		cntrs_data = ucmd->data;
898 		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
899 			return -EINVAL;
900 
901 		desc_data = kcalloc(cntrs_data->ncounters,
902 				    sizeof(*desc_data),
903 				    GFP_KERNEL);
904 		if (!desc_data)
905 			return  -ENOMEM;
906 
907 		if (copy_from_user(desc_data,
908 				   u64_to_user_ptr(cntrs_data->counters_data),
909 				   sizeof(*desc_data) * cntrs_data->ncounters)) {
910 			ret = -EFAULT;
911 			goto free;
912 		}
913 	}
914 
915 	if (!mcounters->hw_cntrs_hndl) {
916 		mcounters->hw_cntrs_hndl = mlx5_fc_create(
917 			to_mdev(ibcounters->device)->mdev, false);
918 		if (IS_ERR(mcounters->hw_cntrs_hndl)) {
919 			ret = PTR_ERR(mcounters->hw_cntrs_hndl);
920 			goto free;
921 		}
922 		hw_hndl = true;
923 	}
924 
925 	if (desc_data) {
926 		/* counters already bound to at least one flow */
927 		if (mcounters->cntrs_max_index) {
928 			ret = -EINVAL;
929 			goto free_hndl;
930 		}
931 
932 		ret = counters_set_description(ibcounters,
933 					       MLX5_IB_COUNTERS_FLOW,
934 					       desc_data,
935 					       cntrs_data->ncounters);
936 		if (ret)
937 			goto free_hndl;
938 
939 	} else if (!mcounters->cntrs_max_index) {
940 		/* counters not bound yet, must have udata passed */
941 		ret = -EINVAL;
942 		goto free_hndl;
943 	}
944 
945 	return 0;
946 
947 free_hndl:
948 	if (hw_hndl) {
949 		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
950 				mcounters->hw_cntrs_hndl);
951 		mcounters->hw_cntrs_hndl = NULL;
952 	}
953 free:
954 	kfree(desc_data);
955 	return ret;
956 }
957 
958 void mlx5_ib_counters_clear_description(struct ib_counters *counters)
959 {
960 	struct mlx5_ib_mcounters *mcounters;
961 
962 	if (!counters || atomic_read(&counters->usecnt) != 1)
963 		return;
964 
965 	mcounters = to_mcounters(counters);
966 
967 	mutex_lock(&mcounters->mcntrs_mutex);
968 	kfree(mcounters->counters_data);
969 	mcounters->counters_data = NULL;
970 	mcounters->cntrs_max_index = 0;
971 	mutex_unlock(&mcounters->mcntrs_mutex);
972 }
973 
974 static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
975 			       unsigned int index, bool enable)
976 {
977 	struct mlx5_ib_dev *dev = to_mdev(device);
978 	struct mlx5_ib_counters *cnts;
979 	struct mlx5_ib_op_fc *opfc;
980 	u32 num_hw_counters, type;
981 	int ret;
982 
983 	cnts = &dev->port[port - 1].cnts;
984 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
985 		cnts->num_ext_ppcnt_counters;
986 	if (index < num_hw_counters ||
987 	    index >= (num_hw_counters + cnts->num_op_counters))
988 		return -EINVAL;
989 
990 	if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
991 		return -EINVAL;
992 
993 	type = *(u32 *)cnts->descs[index].priv;
994 	if (type >= MLX5_IB_OPCOUNTER_MAX)
995 		return -EINVAL;
996 
997 	opfc = &cnts->opfcs[type];
998 
999 	if (enable) {
1000 		if (opfc->fc)
1001 			return -EEXIST;
1002 
1003 		opfc->fc = mlx5_fc_create(dev->mdev, false);
1004 		if (IS_ERR(opfc->fc))
1005 			return PTR_ERR(opfc->fc);
1006 
1007 		ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
1008 		if (ret) {
1009 			mlx5_fc_destroy(dev->mdev, opfc->fc);
1010 			opfc->fc = NULL;
1011 		}
1012 		return ret;
1013 	}
1014 
1015 	if (!opfc->fc)
1016 		return -EINVAL;
1017 
1018 	mlx5_ib_fs_remove_op_fc(dev, opfc, type);
1019 	mlx5_fc_destroy(dev->mdev, opfc->fc);
1020 	opfc->fc = NULL;
1021 	return 0;
1022 }
1023 
1024 static const struct ib_device_ops hw_stats_ops = {
1025 	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1026 	.get_hw_stats = mlx5_ib_get_hw_stats,
1027 	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1028 	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1029 	.counter_dealloc = mlx5_ib_counter_dealloc,
1030 	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1031 	.counter_update_stats = mlx5_ib_counter_update_stats,
1032 	.modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
1033 			  mlx5_ib_modify_stat : NULL,
1034 };
1035 
1036 static const struct ib_device_ops hw_switchdev_vport_op = {
1037 	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1038 };
1039 
1040 static const struct ib_device_ops hw_switchdev_stats_ops = {
1041 	.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
1042 	.get_hw_stats = mlx5_ib_get_hw_stats,
1043 	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1044 	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1045 	.counter_dealloc = mlx5_ib_counter_dealloc,
1046 	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1047 	.counter_update_stats = mlx5_ib_counter_update_stats,
1048 };
1049 
1050 static const struct ib_device_ops counters_ops = {
1051 	.create_counters = mlx5_ib_create_counters,
1052 	.destroy_counters = mlx5_ib_destroy_counters,
1053 	.read_counters = mlx5_ib_read_counters,
1054 
1055 	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
1056 };
1057 
1058 int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
1059 {
1060 	ib_set_device_ops(&dev->ib_dev, &counters_ops);
1061 
1062 	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1063 		return 0;
1064 
1065 	if (is_mdev_switchdev_mode(dev->mdev)) {
1066 		ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
1067 		if (vport_qcounters_supported(dev))
1068 			ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op);
1069 	} else
1070 		ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
1071 	return mlx5_ib_alloc_counters(dev);
1072 }
1073 
1074 void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
1075 {
1076 	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1077 		return;
1078 
1079 	mlx5_ib_dealloc_counters(dev);
1080 }
1081