xref: /linux/drivers/infiniband/hw/mlx5/counters.c (revision 22c55fb9eb92395d999b8404d73e58540d11bdd8)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
4  */
5 
6 #include "mlx5_ib.h"
7 #include <linux/mlx5/eswitch.h>
8 #include <linux/mlx5/vport.h>
9 #include "counters.h"
10 #include "ib_rep.h"
11 #include "qp.h"
12 
13 struct mlx5_ib_counter {
14 	const char *name;
15 	size_t offset;
16 	u32 type;
17 };
18 
19 struct mlx5_rdma_counter {
20 	struct rdma_counter rdma_counter;
21 
22 	struct mlx5_fc *fc[MLX5_IB_OPCOUNTER_MAX];
23 	struct xarray qpn_opfc_xa;
24 };
25 
26 static struct mlx5_rdma_counter *to_mcounter(struct rdma_counter *counter)
27 {
28 	return container_of(counter, struct mlx5_rdma_counter, rdma_counter);
29 }
30 
31 #define INIT_Q_COUNTER(_name)		\
32 	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
33 
34 #define INIT_VPORT_Q_COUNTER(_name)		\
35 	{ .name = "vport_" #_name, .offset =	\
36 		MLX5_BYTE_OFF(query_q_counter_out, _name)}
37 
38 static const struct mlx5_ib_counter basic_q_cnts[] = {
39 	INIT_Q_COUNTER(rx_write_requests),
40 	INIT_Q_COUNTER(rx_read_requests),
41 	INIT_Q_COUNTER(rx_atomic_requests),
42 	INIT_Q_COUNTER(rx_dct_connect),
43 	INIT_Q_COUNTER(out_of_buffer),
44 };
45 
46 static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
47 	INIT_Q_COUNTER(out_of_sequence),
48 };
49 
50 static const struct mlx5_ib_counter retrans_q_cnts[] = {
51 	INIT_Q_COUNTER(duplicate_request),
52 	INIT_Q_COUNTER(rnr_nak_retry_err),
53 	INIT_Q_COUNTER(packet_seq_err),
54 	INIT_Q_COUNTER(implied_nak_seq_err),
55 	INIT_Q_COUNTER(local_ack_timeout_err),
56 };
57 
58 static const struct mlx5_ib_counter vport_basic_q_cnts[] = {
59 	INIT_VPORT_Q_COUNTER(rx_write_requests),
60 	INIT_VPORT_Q_COUNTER(rx_read_requests),
61 	INIT_VPORT_Q_COUNTER(rx_atomic_requests),
62 	INIT_VPORT_Q_COUNTER(rx_dct_connect),
63 	INIT_VPORT_Q_COUNTER(out_of_buffer),
64 };
65 
66 static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = {
67 	INIT_VPORT_Q_COUNTER(out_of_sequence),
68 };
69 
70 static const struct mlx5_ib_counter vport_retrans_q_cnts[] = {
71 	INIT_VPORT_Q_COUNTER(duplicate_request),
72 	INIT_VPORT_Q_COUNTER(rnr_nak_retry_err),
73 	INIT_VPORT_Q_COUNTER(packet_seq_err),
74 	INIT_VPORT_Q_COUNTER(implied_nak_seq_err),
75 	INIT_VPORT_Q_COUNTER(local_ack_timeout_err),
76 };
77 
78 #define INIT_CONG_COUNTER(_name)		\
79 	{ .name = #_name, .offset =	\
80 		MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
81 
82 static const struct mlx5_ib_counter cong_cnts[] = {
83 	INIT_CONG_COUNTER(rp_cnp_ignored),
84 	INIT_CONG_COUNTER(rp_cnp_handled),
85 	INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
86 	INIT_CONG_COUNTER(np_cnp_sent),
87 };
88 
89 static const struct mlx5_ib_counter extended_err_cnts[] = {
90 	INIT_Q_COUNTER(resp_local_length_error),
91 	INIT_Q_COUNTER(resp_cqe_error),
92 	INIT_Q_COUNTER(req_cqe_error),
93 	INIT_Q_COUNTER(req_remote_invalid_request),
94 	INIT_Q_COUNTER(req_remote_access_errors),
95 	INIT_Q_COUNTER(resp_remote_access_errors),
96 	INIT_Q_COUNTER(resp_cqe_flush_error),
97 	INIT_Q_COUNTER(req_cqe_flush_error),
98 	INIT_Q_COUNTER(req_transport_retries_exceeded),
99 	INIT_Q_COUNTER(req_rnr_retries_exceeded),
100 };
101 
102 static const struct mlx5_ib_counter roce_accl_cnts[] = {
103 	INIT_Q_COUNTER(roce_adp_retrans),
104 	INIT_Q_COUNTER(roce_adp_retrans_to),
105 	INIT_Q_COUNTER(roce_slow_restart),
106 	INIT_Q_COUNTER(roce_slow_restart_cnps),
107 	INIT_Q_COUNTER(roce_slow_restart_trans),
108 };
109 
110 static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
111 	INIT_VPORT_Q_COUNTER(resp_local_length_error),
112 	INIT_VPORT_Q_COUNTER(resp_cqe_error),
113 	INIT_VPORT_Q_COUNTER(req_cqe_error),
114 	INIT_VPORT_Q_COUNTER(req_remote_invalid_request),
115 	INIT_VPORT_Q_COUNTER(req_remote_access_errors),
116 	INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
117 	INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
118 	INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
119 	INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded),
120 	INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded),
121 };
122 
123 static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
124 	INIT_VPORT_Q_COUNTER(roce_adp_retrans),
125 	INIT_VPORT_Q_COUNTER(roce_adp_retrans_to),
126 	INIT_VPORT_Q_COUNTER(roce_slow_restart),
127 	INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps),
128 	INIT_VPORT_Q_COUNTER(roce_slow_restart_trans),
129 };
130 
131 #define INIT_EXT_PPCNT_COUNTER(_name)		\
132 	{ .name = #_name, .offset =	\
133 	MLX5_BYTE_OFF(ppcnt_reg, \
134 		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
135 
136 static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
137 	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
138 };
139 
140 #define INIT_OP_COUNTER(_name, _type)		\
141 	{ .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
142 
143 static const struct mlx5_ib_counter basic_op_cnts[] = {
144 	INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
145 };
146 
147 static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
148 	INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
149 };
150 
151 static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
152 	INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
153 };
154 
155 static const struct mlx5_ib_counter packets_op_cnts[] = {
156 	INIT_OP_COUNTER(rdma_tx_packets, RDMA_TX_PACKETS),
157 	INIT_OP_COUNTER(rdma_tx_bytes, RDMA_TX_BYTES),
158 	INIT_OP_COUNTER(rdma_rx_packets, RDMA_RX_PACKETS),
159 	INIT_OP_COUNTER(rdma_rx_bytes, RDMA_RX_BYTES),
160 };
161 
162 static int mlx5_ib_read_counters(struct ib_counters *counters,
163 				 struct ib_counters_read_attr *read_attr,
164 				 struct uverbs_attr_bundle *attrs)
165 {
166 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
167 	struct mlx5_read_counters_attr mread_attr = {};
168 	struct mlx5_ib_flow_counters_desc *desc;
169 	int ret, i;
170 
171 	mutex_lock(&mcounters->mcntrs_mutex);
172 	if (mcounters->cntrs_max_index > read_attr->ncounters) {
173 		ret = -EINVAL;
174 		goto err_bound;
175 	}
176 
177 	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
178 				 GFP_KERNEL);
179 	if (!mread_attr.out) {
180 		ret = -ENOMEM;
181 		goto err_bound;
182 	}
183 
184 	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
185 	mread_attr.flags = read_attr->flags;
186 	ret = mcounters->read_counters(counters->device, &mread_attr);
187 	if (ret)
188 		goto err_read;
189 
190 	/* do the pass over the counters data array to assign according to the
191 	 * descriptions and indexing pairs
192 	 */
193 	desc = mcounters->counters_data;
194 	for (i = 0; i < mcounters->ncounters; i++)
195 		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
196 
197 err_read:
198 	kfree(mread_attr.out);
199 err_bound:
200 	mutex_unlock(&mcounters->mcntrs_mutex);
201 	return ret;
202 }
203 
204 static int mlx5_ib_destroy_counters(struct ib_counters *counters)
205 {
206 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
207 
208 	mlx5_ib_counters_clear_description(counters);
209 	if (mcounters->hw_cntrs_hndl)
210 		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
211 				mcounters->hw_cntrs_hndl);
212 	return 0;
213 }
214 
215 static int mlx5_ib_create_counters(struct ib_counters *counters,
216 				   struct uverbs_attr_bundle *attrs)
217 {
218 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
219 
220 	mutex_init(&mcounters->mcntrs_mutex);
221 	return 0;
222 }
223 
224 static bool vport_qcounters_supported(struct mlx5_ib_dev *dev)
225 {
226 	return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) &&
227 	       MLX5_CAP_GEN(dev->mdev, q_counter_aggregation);
228 }
229 
230 static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
231 						   u32 port_num)
232 {
233 	if ((is_mdev_switchdev_mode(dev->mdev) &&
234 	     !vport_qcounters_supported(dev)) || !port_num)
235 		return &dev->port[0].cnts;
236 
237 	return is_mdev_switchdev_mode(dev->mdev) ?
238 	       &dev->port[1].cnts : &dev->port[port_num - 1].cnts;
239 }
240 
241 /**
242  * mlx5_ib_get_counters_id - Returns counters id to use for device+port
243  * @dev:	Pointer to mlx5 IB device
244  * @port_num:	Zero based port number
245  *
246  * mlx5_ib_get_counters_id() Returns counters set id to use for given
247  * device port combination in switchdev and non switchdev mode of the
248  * parent device.
249  */
250 u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
251 {
252 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1);
253 
254 	return cnts->set_id;
255 }
256 
257 static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
258 {
259 	struct rdma_hw_stats *stats;
260 	u32 num_hw_counters;
261 	int i;
262 
263 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
264 			  cnts->num_ext_ppcnt_counters;
265 	stats = rdma_alloc_hw_stats_struct(cnts->descs,
266 					   num_hw_counters +
267 					   cnts->num_op_counters,
268 					   RDMA_HW_STATS_DEFAULT_LIFESPAN);
269 	if (!stats)
270 		return NULL;
271 
272 	for (i = 0; i < cnts->num_op_counters; i++)
273 		set_bit(num_hw_counters + i, stats->is_disabled);
274 
275 	return stats;
276 }
277 
278 static struct rdma_hw_stats *
279 mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
280 {
281 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
282 	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
283 
284 	return do_alloc_stats(cnts);
285 }
286 
287 static struct rdma_hw_stats *
288 mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
289 {
290 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
291 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
292 
293 	return do_alloc_stats(cnts);
294 }
295 
296 static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
297 				    const struct mlx5_ib_counters *cnts,
298 				    struct rdma_hw_stats *stats,
299 				    u16 set_id)
300 {
301 	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
302 	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
303 	__be32 val;
304 	int ret, i;
305 
306 	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
307 	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
308 	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
309 	if (ret)
310 		return ret;
311 
312 	for (i = 0; i < cnts->num_q_counters; i++) {
313 		val = *(__be32 *)((void *)out + cnts->offsets[i]);
314 		stats->value[i] = (u64)be32_to_cpu(val);
315 	}
316 
317 	return 0;
318 }
319 
320 static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
321 					    const struct mlx5_ib_counters *cnts,
322 					    struct rdma_hw_stats *stats)
323 {
324 	int offset = cnts->num_q_counters + cnts->num_cong_counters;
325 	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
326 	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
327 	int ret, i;
328 	void *out;
329 
330 	out = kvzalloc(sz, GFP_KERNEL);
331 	if (!out)
332 		return -ENOMEM;
333 
334 	MLX5_SET(ppcnt_reg, in, local_port, 1);
335 	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
336 	ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
337 				   0, 0);
338 	if (ret)
339 		goto free;
340 
341 	for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
342 		stats->value[i + offset] =
343 			be64_to_cpup((__be64 *)(out +
344 				    cnts->offsets[i + offset]));
345 free:
346 	kvfree(out);
347 	return ret;
348 }
349 
350 static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
351 					  u32 port_num,
352 					  const struct mlx5_ib_counters *cnts,
353 					  struct rdma_hw_stats *stats)
354 
355 {
356 	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
357 	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
358 	struct mlx5_core_dev *mdev;
359 	__be32 val;
360 	int ret, i;
361 
362 	if (!dev->port[port_num].rep ||
363 	    dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
364 		return 0;
365 
366 	mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
367 	if (!mdev)
368 		return -EOPNOTSUPP;
369 
370 	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
371 	MLX5_SET(query_q_counter_in, in, other_vport, 1);
372 	MLX5_SET(query_q_counter_in, in, vport_number,
373 		 dev->port[port_num].rep->vport);
374 	MLX5_SET(query_q_counter_in, in, aggregate, 1);
375 	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
376 	if (ret)
377 		return ret;
378 
379 	for (i = 0; i < cnts->num_q_counters; i++) {
380 		val = *(__be32 *)((void *)out + cnts->offsets[i]);
381 		stats->value[i] = (u64)be32_to_cpu(val);
382 	}
383 
384 	return 0;
385 }
386 
387 static int do_get_hw_stats(struct ib_device *ibdev,
388 			   struct rdma_hw_stats *stats,
389 			   u32 port_num, int index)
390 {
391 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
392 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
393 	struct mlx5_core_dev *mdev;
394 	int ret, num_counters;
395 
396 	if (!stats)
397 		return -EINVAL;
398 
399 	num_counters = cnts->num_q_counters +
400 		       cnts->num_cong_counters +
401 		       cnts->num_ext_ppcnt_counters;
402 
403 	if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
404 		ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts,
405 						     stats);
406 	else
407 		ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats,
408 					       cnts->set_id);
409 	if (ret)
410 		return ret;
411 
412 	/* We don't expose device counters over Vports */
413 	if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
414 		goto done;
415 
416 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
417 		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
418 		if (ret)
419 			return ret;
420 	}
421 
422 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
423 		if (!port_num)
424 			port_num = 1;
425 		mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
426 		if (!mdev) {
427 			/* If port is not affiliated yet, its in down state
428 			 * which doesn't have any counters yet, so it would be
429 			 * zero. So no need to read from the HCA.
430 			 */
431 			goto done;
432 		}
433 		ret = mlx5_lag_query_cong_counters(mdev,
434 						   stats->value +
435 						   cnts->num_q_counters,
436 						   cnts->num_cong_counters,
437 						   cnts->offsets +
438 						   cnts->num_q_counters);
439 
440 		mlx5_ib_put_native_port_mdev(dev, port_num);
441 		if (ret)
442 			return ret;
443 	}
444 
445 done:
446 	return num_counters;
447 }
448 
449 static bool is_rdma_bytes_counter(u32 type)
450 {
451 	if (type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES ||
452 	    type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES ||
453 	    type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP ||
454 	    type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP)
455 		return true;
456 
457 	return false;
458 }
459 
460 static int do_per_qp_get_op_stat(struct rdma_counter *counter)
461 {
462 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
463 	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
464 	struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
465 	int i, ret, index, num_hw_counters;
466 	u64 packets = 0, bytes = 0;
467 
468 	for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
469 	     i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
470 		if (!mcounter->fc[i])
471 			continue;
472 
473 		ret = mlx5_fc_query(dev->mdev, mcounter->fc[i],
474 				    &packets, &bytes);
475 		if (ret)
476 			return ret;
477 
478 		num_hw_counters = cnts->num_q_counters +
479 				  cnts->num_cong_counters +
480 				  cnts->num_ext_ppcnt_counters;
481 
482 		index = i - MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP +
483 			num_hw_counters;
484 
485 		if (is_rdma_bytes_counter(i))
486 			counter->stats->value[index] = bytes;
487 		else
488 			counter->stats->value[index] = packets;
489 
490 		clear_bit(index, counter->stats->is_disabled);
491 	}
492 	return 0;
493 }
494 
495 static int do_get_op_stat(struct ib_device *ibdev,
496 			  struct rdma_hw_stats *stats,
497 			  u32 port_num, int index)
498 {
499 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
500 	const struct mlx5_ib_counters *cnts;
501 	const struct mlx5_ib_op_fc *opfcs;
502 	u64 packets, bytes;
503 	u32 type;
504 	int ret;
505 
506 	cnts = get_counters(dev, port_num);
507 
508 	opfcs = cnts->opfcs;
509 	type = *(u32 *)cnts->descs[index].priv;
510 	if (type >= MLX5_IB_OPCOUNTER_MAX)
511 		return -EINVAL;
512 
513 	if (!opfcs[type].fc)
514 		goto out;
515 
516 	ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
517 			    &packets, &bytes);
518 	if (ret)
519 		return ret;
520 
521 	if (is_rdma_bytes_counter(type))
522 		stats->value[index] = bytes;
523 	else
524 		stats->value[index] = packets;
525 out:
526 	return index;
527 }
528 
529 static int do_get_op_stats(struct ib_device *ibdev,
530 			   struct rdma_hw_stats *stats,
531 			   u32 port_num)
532 {
533 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
534 	const struct mlx5_ib_counters *cnts;
535 	int index, ret, num_hw_counters;
536 
537 	cnts = get_counters(dev, port_num);
538 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
539 			  cnts->num_ext_ppcnt_counters;
540 	for (index = num_hw_counters;
541 	     index < (num_hw_counters + cnts->num_op_counters); index++) {
542 		ret = do_get_op_stat(ibdev, stats, port_num, index);
543 		if (ret != index)
544 			return ret;
545 	}
546 
547 	return cnts->num_op_counters;
548 }
549 
550 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
551 				struct rdma_hw_stats *stats,
552 				u32 port_num, int index)
553 {
554 	int num_counters, num_hw_counters, num_op_counters;
555 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
556 	const struct mlx5_ib_counters *cnts;
557 
558 	cnts = get_counters(dev, port_num);
559 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
560 		cnts->num_ext_ppcnt_counters;
561 	num_counters = num_hw_counters + cnts->num_op_counters;
562 
563 	if (index < 0 || index > num_counters)
564 		return -EINVAL;
565 	else if (index > 0 && index < num_hw_counters)
566 		return do_get_hw_stats(ibdev, stats, port_num, index);
567 	else if (index >= num_hw_counters && index < num_counters)
568 		return do_get_op_stat(ibdev, stats, port_num, index);
569 
570 	num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
571 	if (num_hw_counters < 0)
572 		return num_hw_counters;
573 
574 	num_op_counters = do_get_op_stats(ibdev, stats, port_num);
575 	if (num_op_counters < 0)
576 		return num_op_counters;
577 
578 	return num_hw_counters + num_op_counters;
579 }
580 
581 static struct rdma_hw_stats *
582 mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
583 {
584 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
585 	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
586 
587 	return do_alloc_stats(cnts);
588 }
589 
590 static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
591 {
592 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
593 	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
594 	int ret;
595 
596 	ret = mlx5_ib_query_q_counters(dev->mdev, cnts, counter->stats,
597 				       counter->id);
598 	if (ret)
599 		return ret;
600 
601 	if (!counter->mode.bind_opcnt)
602 		return 0;
603 
604 	return do_per_qp_get_op_stat(counter);
605 }
606 
607 static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
608 {
609 	struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
610 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
611 	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
612 
613 	if (!counter->id)
614 		return 0;
615 
616 	WARN_ON(!xa_empty(&mcounter->qpn_opfc_xa));
617 	mlx5r_fs_destroy_fcs(dev, mcounter->fc);
618 	MLX5_SET(dealloc_q_counter_in, in, opcode,
619 		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
620 	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
621 	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
622 }
623 
624 static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
625 				   struct ib_qp *qp, u32 port)
626 {
627 	struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
628 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
629 	bool new = false;
630 	int err;
631 
632 	if (!counter->id) {
633 		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
634 		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
635 
636 		MLX5_SET(alloc_q_counter_in, in, opcode,
637 			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
638 		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
639 		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
640 		if (err)
641 			return err;
642 		counter->id =
643 			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
644 		new = true;
645 	}
646 
647 	err = mlx5_ib_qp_set_counter(qp, counter);
648 	if (err)
649 		goto fail_set_counter;
650 
651 	if (!counter->mode.bind_opcnt)
652 		return 0;
653 
654 	err = mlx5r_fs_bind_op_fc(qp, mcounter->fc, &mcounter->qpn_opfc_xa,
655 				  port);
656 	if (err)
657 		goto fail_bind_op_fc;
658 
659 	return 0;
660 
661 fail_bind_op_fc:
662 	mlx5_ib_qp_set_counter(qp, NULL);
663 fail_set_counter:
664 	if (new) {
665 		mlx5_ib_counter_dealloc(counter);
666 		counter->id = 0;
667 	}
668 
669 	return err;
670 }
671 
672 static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp, u32 port)
673 {
674 	struct rdma_counter *counter = qp->counter;
675 	struct mlx5_rdma_counter *mcounter;
676 	int err;
677 
678 	mcounter = to_mcounter(counter);
679 
680 	mlx5r_fs_unbind_op_fc(qp, &mcounter->qpn_opfc_xa);
681 
682 	err = mlx5_ib_qp_set_counter(qp, NULL);
683 	if (err)
684 		goto fail_set_counter;
685 
686 	return 0;
687 
688 fail_set_counter:
689 	if (counter->mode.bind_opcnt)
690 		mlx5r_fs_bind_op_fc(qp, mcounter->fc,
691 				    &mcounter->qpn_opfc_xa, port);
692 	return err;
693 }
694 
695 static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
696 				  struct rdma_stat_desc *descs, size_t *offsets,
697 				  u32 port_num)
698 {
699 	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
700 			port_num != MLX5_VPORT_PF;
701 	const struct mlx5_ib_counter *names;
702 	int j = 0, i, size;
703 
704 	names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
705 	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
706 			  ARRAY_SIZE(basic_q_cnts);
707 	for (i = 0; i < size; i++, j++) {
708 		descs[j].name = names[i].name;
709 		offsets[j] = names[i].offset;
710 	}
711 
712 	names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
713 	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
714 			  ARRAY_SIZE(out_of_seq_q_cnts);
715 	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
716 		for (i = 0; i < size; i++, j++) {
717 			descs[j].name = names[i].name;
718 			offsets[j] = names[i].offset;
719 		}
720 	}
721 
722 	names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
723 	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
724 			  ARRAY_SIZE(retrans_q_cnts);
725 	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
726 		for (i = 0; i < size; i++, j++) {
727 			descs[j].name = names[i].name;
728 			offsets[j] = names[i].offset;
729 		}
730 	}
731 
732 	names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
733 	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
734 			  ARRAY_SIZE(extended_err_cnts);
735 	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
736 		for (i = 0; i < size; i++, j++) {
737 			descs[j].name = names[i].name;
738 			offsets[j] = names[i].offset;
739 		}
740 	}
741 
742 	names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
743 	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
744 			  ARRAY_SIZE(roce_accl_cnts);
745 	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
746 		for (i = 0; i < size; i++, j++) {
747 			descs[j].name = names[i].name;
748 			offsets[j] = names[i].offset;
749 		}
750 	}
751 
752 	if (is_vport)
753 		return;
754 
755 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
756 		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
757 			descs[j].name = cong_cnts[i].name;
758 			offsets[j] = cong_cnts[i].offset;
759 		}
760 	}
761 
762 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
763 		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
764 			descs[j].name = ext_ppcnt_cnts[i].name;
765 			offsets[j] = ext_ppcnt_cnts[i].offset;
766 		}
767 	}
768 
769 	for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
770 		descs[j].name = basic_op_cnts[i].name;
771 		descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
772 		descs[j].priv = &basic_op_cnts[i].type;
773 	}
774 
775 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
776 			       ft_field_support_2_nic_receive_rdma.bth_opcode)) {
777 		for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
778 			descs[j].name = rdmarx_cnp_op_cnts[i].name;
779 			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
780 			descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
781 		}
782 	}
783 
784 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
785 			       ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
786 		for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
787 			descs[j].name = rdmatx_cnp_op_cnts[i].name;
788 			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
789 			descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
790 		}
791 	}
792 
793 	for (i = 0; i < ARRAY_SIZE(packets_op_cnts); i++, j++) {
794 		descs[j].name = packets_op_cnts[i].name;
795 		descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
796 		descs[j].priv = &packets_op_cnts[i].type;
797 	}
798 }
799 
800 
801 static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
802 				    struct mlx5_ib_counters *cnts, u32 port_num)
803 {
804 	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
805 			port_num != MLX5_VPORT_PF;
806 	u32 num_counters, num_op_counters = 0, size;
807 
808 	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
809 			  ARRAY_SIZE(basic_q_cnts);
810 	num_counters = size;
811 
812 	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
813 			  ARRAY_SIZE(out_of_seq_q_cnts);
814 	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
815 		num_counters += size;
816 
817 	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
818 			  ARRAY_SIZE(retrans_q_cnts);
819 	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
820 		num_counters += size;
821 
822 	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
823 			  ARRAY_SIZE(extended_err_cnts);
824 	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
825 		num_counters += size;
826 
827 	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
828 			  ARRAY_SIZE(roce_accl_cnts);
829 	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
830 		num_counters += size;
831 
832 	cnts->num_q_counters = num_counters;
833 
834 	if (is_vport)
835 		goto skip_non_qcounters;
836 
837 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
838 		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
839 		num_counters += ARRAY_SIZE(cong_cnts);
840 	}
841 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
842 		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
843 		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
844 	}
845 
846 	num_op_counters = ARRAY_SIZE(basic_op_cnts);
847 
848 	num_op_counters += ARRAY_SIZE(packets_op_cnts);
849 
850 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
851 			       ft_field_support_2_nic_receive_rdma.bth_opcode))
852 		num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
853 
854 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
855 			       ft_field_support_2_nic_transmit_rdma.bth_opcode))
856 		num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
857 
858 skip_non_qcounters:
859 	cnts->num_op_counters = num_op_counters;
860 	num_counters += num_op_counters;
861 	cnts->descs = kcalloc(num_counters,
862 			      sizeof(struct rdma_stat_desc), GFP_KERNEL);
863 	if (!cnts->descs)
864 		return -ENOMEM;
865 
866 	cnts->offsets = kcalloc(num_counters,
867 				sizeof(*cnts->offsets), GFP_KERNEL);
868 	if (!cnts->offsets)
869 		goto err;
870 
871 	return 0;
872 
873 err:
874 	kfree(cnts->descs);
875 	cnts->descs = NULL;
876 	return -ENOMEM;
877 }
878 
879 /*
880  * Checks if the given flow counter type should be sharing the same flow counter
881  * with another type and if it should, checks if that other type flow counter
882  * was already created, if both conditions are met return true and the counter
883  * else return false.
884  */
885 bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
886 				     struct mlx5_ib_op_fc **opfc)
887 {
888 	u32 shared_fc_type;
889 
890 	switch (type) {
891 	case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
892 		shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES;
893 		break;
894 	case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
895 		shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
896 		break;
897 	case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
898 		shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES;
899 		break;
900 	case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
901 		shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
902 		break;
903 	case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
904 		shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
905 		break;
906 	case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
907 		shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
908 		break;
909 	case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
910 		shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
911 		break;
912 	case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
913 		shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
914 		break;
915 	default:
916 		return false;
917 	}
918 
919 	*opfc = &opfcs[shared_fc_type];
920 	if (!(*opfc)->fc)
921 		return false;
922 
923 	return true;
924 }
925 
926 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
927 {
928 	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
929 	int num_cnt_ports = dev->num_ports;
930 	struct mlx5_ib_op_fc *in_use_opfc;
931 	int i, j;
932 
933 	if (is_mdev_switchdev_mode(dev->mdev))
934 		num_cnt_ports = min(2, num_cnt_ports);
935 
936 	MLX5_SET(dealloc_q_counter_in, in, opcode,
937 		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
938 
939 	for (i = 0; i < num_cnt_ports; i++) {
940 		if (dev->port[i].cnts.set_id) {
941 			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
942 				 dev->port[i].cnts.set_id);
943 			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
944 		}
945 		kfree(dev->port[i].cnts.descs);
946 		kfree(dev->port[i].cnts.offsets);
947 
948 		for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
949 			if (!dev->port[i].cnts.opfcs[j].fc)
950 				continue;
951 
952 			if (mlx5r_is_opfc_shared_and_in_use(
953 				    dev->port[i].cnts.opfcs, j, &in_use_opfc))
954 				goto skip;
955 
956 			mlx5_ib_fs_remove_op_fc(dev,
957 						&dev->port[i].cnts.opfcs[j], j);
958 			mlx5_fc_destroy(dev->mdev,
959 					dev->port[i].cnts.opfcs[j].fc);
960 skip:
961 			dev->port[i].cnts.opfcs[j].fc = NULL;
962 		}
963 	}
964 }
965 
966 static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
967 {
968 	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
969 	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
970 	int num_cnt_ports = dev->num_ports;
971 	int err = 0;
972 	int i;
973 	bool is_shared;
974 
975 	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
976 	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
977 
978 	/*
979 	 * In switchdev we need to allocate two ports, one that is used for
980 	 * the device Q_counters and it is essentially the real Q_counters of
981 	 * this device, while the other is used as a helper for PF to be able to
982 	 * query all other vports.
983 	 */
984 	if (is_mdev_switchdev_mode(dev->mdev))
985 		num_cnt_ports = min(2, num_cnt_ports);
986 
987 	for (i = 0; i < num_cnt_ports; i++) {
988 		err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
989 		if (err)
990 			goto err_alloc;
991 
992 		mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
993 				      dev->port[i].cnts.offsets, i);
994 
995 		MLX5_SET(alloc_q_counter_in, in, uid,
996 			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
997 
998 		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
999 		if (err) {
1000 			mlx5_ib_warn(dev,
1001 				     "couldn't allocate queue counter for port %d, err %d\n",
1002 				     i + 1, err);
1003 			goto err_alloc;
1004 		}
1005 
1006 		dev->port[i].cnts.set_id =
1007 			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
1008 	}
1009 	return 0;
1010 
1011 err_alloc:
1012 	mlx5_ib_dealloc_counters(dev);
1013 	return err;
1014 }
1015 
1016 static int read_flow_counters(struct ib_device *ibdev,
1017 			      struct mlx5_read_counters_attr *read_attr)
1018 {
1019 	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
1020 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
1021 
1022 	return mlx5_fc_query(dev->mdev, fc,
1023 			     &read_attr->out[IB_COUNTER_PACKETS],
1024 			     &read_attr->out[IB_COUNTER_BYTES]);
1025 }
1026 
1027 /* flow counters currently expose two counters packets and bytes */
1028 #define FLOW_COUNTERS_NUM 2
1029 static int counters_set_description(
1030 	struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
1031 	struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
1032 {
1033 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
1034 	u32 cntrs_max_index = 0;
1035 	int i;
1036 
1037 	if (counters_type != MLX5_IB_COUNTERS_FLOW)
1038 		return -EINVAL;
1039 
1040 	/* init the fields for the object */
1041 	mcounters->type = counters_type;
1042 	mcounters->read_counters = read_flow_counters;
1043 	mcounters->counters_num = FLOW_COUNTERS_NUM;
1044 	mcounters->ncounters = ncounters;
1045 	/* each counter entry have both description and index pair */
1046 	for (i = 0; i < ncounters; i++) {
1047 		if (desc_data[i].description > IB_COUNTER_BYTES)
1048 			return -EINVAL;
1049 
1050 		if (cntrs_max_index <= desc_data[i].index)
1051 			cntrs_max_index = desc_data[i].index + 1;
1052 	}
1053 
1054 	mutex_lock(&mcounters->mcntrs_mutex);
1055 	mcounters->counters_data = desc_data;
1056 	mcounters->cntrs_max_index = cntrs_max_index;
1057 	mutex_unlock(&mcounters->mcntrs_mutex);
1058 
1059 	return 0;
1060 }
1061 
1062 #define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
1063 int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
1064 				   struct mlx5_ib_create_flow *ucmd)
1065 {
1066 	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
1067 	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
1068 	struct mlx5_ib_flow_counters_desc *desc_data = NULL;
1069 	bool hw_hndl = false;
1070 	int ret = 0;
1071 
1072 	if (ucmd && ucmd->ncounters_data != 0) {
1073 		cntrs_data = ucmd->data;
1074 		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
1075 			return -EINVAL;
1076 
1077 		desc_data = kcalloc(cntrs_data->ncounters,
1078 				    sizeof(*desc_data),
1079 				    GFP_KERNEL);
1080 		if (!desc_data)
1081 			return  -ENOMEM;
1082 
1083 		if (copy_from_user(desc_data,
1084 				   u64_to_user_ptr(cntrs_data->counters_data),
1085 				   sizeof(*desc_data) * cntrs_data->ncounters)) {
1086 			ret = -EFAULT;
1087 			goto free;
1088 		}
1089 	}
1090 
1091 	if (!mcounters->hw_cntrs_hndl) {
1092 		mcounters->hw_cntrs_hndl = mlx5_fc_create(
1093 			to_mdev(ibcounters->device)->mdev, false);
1094 		if (IS_ERR(mcounters->hw_cntrs_hndl)) {
1095 			ret = PTR_ERR(mcounters->hw_cntrs_hndl);
1096 			goto free;
1097 		}
1098 		hw_hndl = true;
1099 	}
1100 
1101 	if (desc_data) {
1102 		/* counters already bound to at least one flow */
1103 		if (mcounters->cntrs_max_index) {
1104 			ret = -EINVAL;
1105 			goto free_hndl;
1106 		}
1107 
1108 		ret = counters_set_description(ibcounters,
1109 					       MLX5_IB_COUNTERS_FLOW,
1110 					       desc_data,
1111 					       cntrs_data->ncounters);
1112 		if (ret)
1113 			goto free_hndl;
1114 
1115 	} else if (!mcounters->cntrs_max_index) {
1116 		/* counters not bound yet, must have udata passed */
1117 		ret = -EINVAL;
1118 		goto free_hndl;
1119 	}
1120 
1121 	return 0;
1122 
1123 free_hndl:
1124 	if (hw_hndl) {
1125 		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
1126 				mcounters->hw_cntrs_hndl);
1127 		mcounters->hw_cntrs_hndl = NULL;
1128 	}
1129 free:
1130 	kfree(desc_data);
1131 	return ret;
1132 }
1133 
1134 void mlx5_ib_counters_clear_description(struct ib_counters *counters)
1135 {
1136 	struct mlx5_ib_mcounters *mcounters;
1137 
1138 	if (!counters || atomic_read(&counters->usecnt) != 1)
1139 		return;
1140 
1141 	mcounters = to_mcounters(counters);
1142 
1143 	mutex_lock(&mcounters->mcntrs_mutex);
1144 	kfree(mcounters->counters_data);
1145 	mcounters->counters_data = NULL;
1146 	mcounters->cntrs_max_index = 0;
1147 	mutex_unlock(&mcounters->mcntrs_mutex);
1148 }
1149 
1150 static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
1151 			       unsigned int index, bool enable)
1152 {
1153 	struct mlx5_ib_dev *dev = to_mdev(device);
1154 	struct mlx5_ib_op_fc *opfc, *in_use_opfc;
1155 	struct mlx5_ib_counters *cnts;
1156 	u32 num_hw_counters, type;
1157 	int ret;
1158 
1159 	cnts = &dev->port[port - 1].cnts;
1160 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
1161 		cnts->num_ext_ppcnt_counters;
1162 	if (index < num_hw_counters ||
1163 	    index >= (num_hw_counters + cnts->num_op_counters))
1164 		return -EINVAL;
1165 
1166 	if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
1167 		return -EINVAL;
1168 
1169 	type = *(u32 *)cnts->descs[index].priv;
1170 	if (type >= MLX5_IB_OPCOUNTER_MAX)
1171 		return -EINVAL;
1172 
1173 	opfc = &cnts->opfcs[type];
1174 
1175 	if (enable) {
1176 		if (opfc->fc)
1177 			return -EEXIST;
1178 
1179 		if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type,
1180 						    &in_use_opfc)) {
1181 			opfc->fc = in_use_opfc->fc;
1182 			opfc->rule[0] = in_use_opfc->rule[0];
1183 			return 0;
1184 		}
1185 
1186 		opfc->fc = mlx5_fc_create(dev->mdev, false);
1187 		if (IS_ERR(opfc->fc))
1188 			return PTR_ERR(opfc->fc);
1189 
1190 		ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
1191 		if (ret) {
1192 			mlx5_fc_destroy(dev->mdev, opfc->fc);
1193 			opfc->fc = NULL;
1194 		}
1195 		return ret;
1196 	}
1197 
1198 	if (!opfc->fc)
1199 		return -EINVAL;
1200 
1201 	if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, &in_use_opfc))
1202 		goto out;
1203 
1204 	mlx5_ib_fs_remove_op_fc(dev, opfc, type);
1205 	mlx5_fc_destroy(dev->mdev, opfc->fc);
1206 out:
1207 	opfc->fc = NULL;
1208 	return 0;
1209 }
1210 
1211 static void mlx5_ib_counter_init(struct rdma_counter *counter)
1212 {
1213 	struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
1214 
1215 	xa_init(&mcounter->qpn_opfc_xa);
1216 }
1217 
1218 static const struct ib_device_ops hw_stats_ops = {
1219 	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1220 	.get_hw_stats = mlx5_ib_get_hw_stats,
1221 	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1222 	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1223 	.counter_dealloc = mlx5_ib_counter_dealloc,
1224 	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1225 	.counter_update_stats = mlx5_ib_counter_update_stats,
1226 	.modify_hw_stat = mlx5_ib_modify_stat,
1227 	.counter_init = mlx5_ib_counter_init,
1228 
1229 	INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
1230 };
1231 
1232 static const struct ib_device_ops hw_switchdev_vport_op = {
1233 	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1234 };
1235 
1236 static const struct ib_device_ops hw_switchdev_stats_ops = {
1237 	.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
1238 	.get_hw_stats = mlx5_ib_get_hw_stats,
1239 	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1240 	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1241 	.counter_dealloc = mlx5_ib_counter_dealloc,
1242 	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1243 	.counter_update_stats = mlx5_ib_counter_update_stats,
1244 	.counter_init = mlx5_ib_counter_init,
1245 
1246 	INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
1247 };
1248 
1249 static const struct ib_device_ops counters_ops = {
1250 	.create_counters = mlx5_ib_create_counters,
1251 	.destroy_counters = mlx5_ib_destroy_counters,
1252 	.read_counters = mlx5_ib_read_counters,
1253 
1254 	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
1255 };
1256 
1257 int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
1258 {
1259 	ib_set_device_ops(&dev->ib_dev, &counters_ops);
1260 
1261 	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1262 		return 0;
1263 
1264 	if (is_mdev_switchdev_mode(dev->mdev)) {
1265 		ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
1266 		if (vport_qcounters_supported(dev))
1267 			ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op);
1268 	} else
1269 		ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
1270 	return mlx5_ib_alloc_counters(dev);
1271 }
1272 
1273 void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
1274 {
1275 	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1276 		return;
1277 
1278 	mlx5_ib_dealloc_counters(dev);
1279 }
1280