xref: /linux/drivers/infiniband/hw/mlx5/counters.c (revision 336b78c655c84ce9ce47219185171b3912109c0a)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
4  */
5 
6 #include "mlx5_ib.h"
7 #include <linux/mlx5/eswitch.h>
8 #include "counters.h"
9 #include "ib_rep.h"
10 #include "qp.h"
11 
12 struct mlx5_ib_counter {
13 	const char *name;
14 	size_t offset;
15 	u32 type;
16 };
17 
18 #define INIT_Q_COUNTER(_name)		\
19 	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
20 
21 static const struct mlx5_ib_counter basic_q_cnts[] = {
22 	INIT_Q_COUNTER(rx_write_requests),
23 	INIT_Q_COUNTER(rx_read_requests),
24 	INIT_Q_COUNTER(rx_atomic_requests),
25 	INIT_Q_COUNTER(out_of_buffer),
26 };
27 
28 static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
29 	INIT_Q_COUNTER(out_of_sequence),
30 };
31 
32 static const struct mlx5_ib_counter retrans_q_cnts[] = {
33 	INIT_Q_COUNTER(duplicate_request),
34 	INIT_Q_COUNTER(rnr_nak_retry_err),
35 	INIT_Q_COUNTER(packet_seq_err),
36 	INIT_Q_COUNTER(implied_nak_seq_err),
37 	INIT_Q_COUNTER(local_ack_timeout_err),
38 };
39 
40 #define INIT_CONG_COUNTER(_name)		\
41 	{ .name = #_name, .offset =	\
42 		MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
43 
44 static const struct mlx5_ib_counter cong_cnts[] = {
45 	INIT_CONG_COUNTER(rp_cnp_ignored),
46 	INIT_CONG_COUNTER(rp_cnp_handled),
47 	INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
48 	INIT_CONG_COUNTER(np_cnp_sent),
49 };
50 
51 static const struct mlx5_ib_counter extended_err_cnts[] = {
52 	INIT_Q_COUNTER(resp_local_length_error),
53 	INIT_Q_COUNTER(resp_cqe_error),
54 	INIT_Q_COUNTER(req_cqe_error),
55 	INIT_Q_COUNTER(req_remote_invalid_request),
56 	INIT_Q_COUNTER(req_remote_access_errors),
57 	INIT_Q_COUNTER(resp_remote_access_errors),
58 	INIT_Q_COUNTER(resp_cqe_flush_error),
59 	INIT_Q_COUNTER(req_cqe_flush_error),
60 };
61 
62 static const struct mlx5_ib_counter roce_accl_cnts[] = {
63 	INIT_Q_COUNTER(roce_adp_retrans),
64 	INIT_Q_COUNTER(roce_adp_retrans_to),
65 	INIT_Q_COUNTER(roce_slow_restart),
66 	INIT_Q_COUNTER(roce_slow_restart_cnps),
67 	INIT_Q_COUNTER(roce_slow_restart_trans),
68 };
69 
70 #define INIT_EXT_PPCNT_COUNTER(_name)		\
71 	{ .name = #_name, .offset =	\
72 	MLX5_BYTE_OFF(ppcnt_reg, \
73 		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
74 
75 static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
76 	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
77 };
78 
79 #define INIT_OP_COUNTER(_name, _type)		\
80 	{ .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
81 
82 static const struct mlx5_ib_counter basic_op_cnts[] = {
83 	INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
84 };
85 
86 static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
87 	INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
88 };
89 
90 static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
91 	INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
92 };
93 
94 static int mlx5_ib_read_counters(struct ib_counters *counters,
95 				 struct ib_counters_read_attr *read_attr,
96 				 struct uverbs_attr_bundle *attrs)
97 {
98 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
99 	struct mlx5_read_counters_attr mread_attr = {};
100 	struct mlx5_ib_flow_counters_desc *desc;
101 	int ret, i;
102 
103 	mutex_lock(&mcounters->mcntrs_mutex);
104 	if (mcounters->cntrs_max_index > read_attr->ncounters) {
105 		ret = -EINVAL;
106 		goto err_bound;
107 	}
108 
109 	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
110 				 GFP_KERNEL);
111 	if (!mread_attr.out) {
112 		ret = -ENOMEM;
113 		goto err_bound;
114 	}
115 
116 	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
117 	mread_attr.flags = read_attr->flags;
118 	ret = mcounters->read_counters(counters->device, &mread_attr);
119 	if (ret)
120 		goto err_read;
121 
122 	/* do the pass over the counters data array to assign according to the
123 	 * descriptions and indexing pairs
124 	 */
125 	desc = mcounters->counters_data;
126 	for (i = 0; i < mcounters->ncounters; i++)
127 		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
128 
129 err_read:
130 	kfree(mread_attr.out);
131 err_bound:
132 	mutex_unlock(&mcounters->mcntrs_mutex);
133 	return ret;
134 }
135 
136 static int mlx5_ib_destroy_counters(struct ib_counters *counters)
137 {
138 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
139 
140 	mlx5_ib_counters_clear_description(counters);
141 	if (mcounters->hw_cntrs_hndl)
142 		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
143 				mcounters->hw_cntrs_hndl);
144 	return 0;
145 }
146 
147 static int mlx5_ib_create_counters(struct ib_counters *counters,
148 				   struct uverbs_attr_bundle *attrs)
149 {
150 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
151 
152 	mutex_init(&mcounters->mcntrs_mutex);
153 	return 0;
154 }
155 
156 
157 static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
158 						   u32 port_num)
159 {
160 	return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
161 						   &dev->port[port_num].cnts;
162 }
163 
164 /**
165  * mlx5_ib_get_counters_id - Returns counters id to use for device+port
166  * @dev:	Pointer to mlx5 IB device
167  * @port_num:	Zero based port number
168  *
169  * mlx5_ib_get_counters_id() Returns counters set id to use for given
170  * device port combination in switchdev and non switchdev mode of the
171  * parent device.
172  */
173 u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
174 {
175 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
176 
177 	return cnts->set_id;
178 }
179 
180 static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
181 {
182 	struct rdma_hw_stats *stats;
183 	u32 num_hw_counters;
184 	int i;
185 
186 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
187 			  cnts->num_ext_ppcnt_counters;
188 	stats = rdma_alloc_hw_stats_struct(cnts->descs,
189 					   num_hw_counters +
190 					   cnts->num_op_counters,
191 					   RDMA_HW_STATS_DEFAULT_LIFESPAN);
192 	if (!stats)
193 		return NULL;
194 
195 	for (i = 0; i < cnts->num_op_counters; i++)
196 		set_bit(num_hw_counters + i, stats->is_disabled);
197 
198 	return stats;
199 }
200 
201 static struct rdma_hw_stats *
202 mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
203 {
204 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
205 	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
206 
207 	return do_alloc_stats(cnts);
208 }
209 
210 static struct rdma_hw_stats *
211 mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
212 {
213 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
214 	const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
215 
216 	return do_alloc_stats(cnts);
217 }
218 
219 static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
220 				    const struct mlx5_ib_counters *cnts,
221 				    struct rdma_hw_stats *stats,
222 				    u16 set_id)
223 {
224 	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
225 	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
226 	__be32 val;
227 	int ret, i;
228 
229 	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
230 	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
231 	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
232 	if (ret)
233 		return ret;
234 
235 	for (i = 0; i < cnts->num_q_counters; i++) {
236 		val = *(__be32 *)((void *)out + cnts->offsets[i]);
237 		stats->value[i] = (u64)be32_to_cpu(val);
238 	}
239 
240 	return 0;
241 }
242 
243 static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
244 					    const struct mlx5_ib_counters *cnts,
245 					    struct rdma_hw_stats *stats)
246 {
247 	int offset = cnts->num_q_counters + cnts->num_cong_counters;
248 	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
249 	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
250 	int ret, i;
251 	void *out;
252 
253 	out = kvzalloc(sz, GFP_KERNEL);
254 	if (!out)
255 		return -ENOMEM;
256 
257 	MLX5_SET(ppcnt_reg, in, local_port, 1);
258 	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
259 	ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
260 				   0, 0);
261 	if (ret)
262 		goto free;
263 
264 	for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
265 		stats->value[i + offset] =
266 			be64_to_cpup((__be64 *)(out +
267 				    cnts->offsets[i + offset]));
268 free:
269 	kvfree(out);
270 	return ret;
271 }
272 
273 static int do_get_hw_stats(struct ib_device *ibdev,
274 			   struct rdma_hw_stats *stats,
275 			   u32 port_num, int index)
276 {
277 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
278 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
279 	struct mlx5_core_dev *mdev;
280 	int ret, num_counters;
281 
282 	if (!stats)
283 		return -EINVAL;
284 
285 	num_counters = cnts->num_q_counters +
286 		       cnts->num_cong_counters +
287 		       cnts->num_ext_ppcnt_counters;
288 
289 	/* q_counters are per IB device, query the master mdev */
290 	ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
291 	if (ret)
292 		return ret;
293 
294 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
295 		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
296 		if (ret)
297 			return ret;
298 	}
299 
300 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
301 		if (!port_num)
302 			port_num = 1;
303 		mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
304 		if (!mdev) {
305 			/* If port is not affiliated yet, its in down state
306 			 * which doesn't have any counters yet, so it would be
307 			 * zero. So no need to read from the HCA.
308 			 */
309 			goto done;
310 		}
311 		ret = mlx5_lag_query_cong_counters(dev->mdev,
312 						   stats->value +
313 						   cnts->num_q_counters,
314 						   cnts->num_cong_counters,
315 						   cnts->offsets +
316 						   cnts->num_q_counters);
317 
318 		mlx5_ib_put_native_port_mdev(dev, port_num);
319 		if (ret)
320 			return ret;
321 	}
322 
323 done:
324 	return num_counters;
325 }
326 
327 static int do_get_op_stat(struct ib_device *ibdev,
328 			  struct rdma_hw_stats *stats,
329 			  u32 port_num, int index)
330 {
331 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
332 	const struct mlx5_ib_counters *cnts;
333 	const struct mlx5_ib_op_fc *opfcs;
334 	u64 packets = 0, bytes;
335 	u32 type;
336 	int ret;
337 
338 	cnts = get_counters(dev, port_num - 1);
339 	opfcs = cnts->opfcs;
340 	type = *(u32 *)cnts->descs[index].priv;
341 	if (type >= MLX5_IB_OPCOUNTER_MAX)
342 		return -EINVAL;
343 
344 	if (!opfcs[type].fc)
345 		goto out;
346 
347 	ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
348 			    &packets, &bytes);
349 	if (ret)
350 		return ret;
351 
352 out:
353 	stats->value[index] = packets;
354 	return index;
355 }
356 
357 static int do_get_op_stats(struct ib_device *ibdev,
358 			   struct rdma_hw_stats *stats,
359 			   u32 port_num)
360 {
361 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
362 	const struct mlx5_ib_counters *cnts;
363 	int index, ret, num_hw_counters;
364 
365 	cnts = get_counters(dev, port_num - 1);
366 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
367 			  cnts->num_ext_ppcnt_counters;
368 	for (index = num_hw_counters;
369 	     index < (num_hw_counters + cnts->num_op_counters); index++) {
370 		ret = do_get_op_stat(ibdev, stats, port_num, index);
371 		if (ret != index)
372 			return ret;
373 	}
374 
375 	return cnts->num_op_counters;
376 }
377 
378 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
379 				struct rdma_hw_stats *stats,
380 				u32 port_num, int index)
381 {
382 	int num_counters, num_hw_counters, num_op_counters;
383 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
384 	const struct mlx5_ib_counters *cnts;
385 
386 	cnts = get_counters(dev, port_num - 1);
387 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
388 		cnts->num_ext_ppcnt_counters;
389 	num_counters = num_hw_counters + cnts->num_op_counters;
390 
391 	if (index < 0 || index > num_counters)
392 		return -EINVAL;
393 	else if (index > 0 && index < num_hw_counters)
394 		return do_get_hw_stats(ibdev, stats, port_num, index);
395 	else if (index >= num_hw_counters && index < num_counters)
396 		return do_get_op_stat(ibdev, stats, port_num, index);
397 
398 	num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
399 	if (num_hw_counters < 0)
400 		return num_hw_counters;
401 
402 	num_op_counters = do_get_op_stats(ibdev, stats, port_num);
403 	if (num_op_counters < 0)
404 		return num_op_counters;
405 
406 	return num_hw_counters + num_op_counters;
407 }
408 
409 static struct rdma_hw_stats *
410 mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
411 {
412 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
413 	const struct mlx5_ib_counters *cnts =
414 		get_counters(dev, counter->port - 1);
415 
416 	return do_alloc_stats(cnts);
417 }
418 
419 static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
420 {
421 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
422 	const struct mlx5_ib_counters *cnts =
423 		get_counters(dev, counter->port - 1);
424 
425 	return mlx5_ib_query_q_counters(dev->mdev, cnts,
426 					counter->stats, counter->id);
427 }
428 
429 static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
430 {
431 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
432 	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
433 
434 	if (!counter->id)
435 		return 0;
436 
437 	MLX5_SET(dealloc_q_counter_in, in, opcode,
438 		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
439 	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
440 	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
441 }
442 
443 static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
444 				   struct ib_qp *qp)
445 {
446 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
447 	int err;
448 
449 	if (!counter->id) {
450 		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
451 		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
452 
453 		MLX5_SET(alloc_q_counter_in, in, opcode,
454 			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
455 		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
456 		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
457 		if (err)
458 			return err;
459 		counter->id =
460 			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
461 	}
462 
463 	err = mlx5_ib_qp_set_counter(qp, counter);
464 	if (err)
465 		goto fail_set_counter;
466 
467 	return 0;
468 
469 fail_set_counter:
470 	mlx5_ib_counter_dealloc(counter);
471 	counter->id = 0;
472 
473 	return err;
474 }
475 
476 static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
477 {
478 	return mlx5_ib_qp_set_counter(qp, NULL);
479 }
480 
481 static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
482 				  struct rdma_stat_desc *descs, size_t *offsets)
483 {
484 	int i;
485 	int j = 0;
486 
487 	for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
488 		descs[j].name = basic_q_cnts[i].name;
489 		offsets[j] = basic_q_cnts[i].offset;
490 	}
491 
492 	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
493 		for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
494 			descs[j].name = out_of_seq_q_cnts[i].name;
495 			offsets[j] = out_of_seq_q_cnts[i].offset;
496 		}
497 	}
498 
499 	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
500 		for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
501 			descs[j].name = retrans_q_cnts[i].name;
502 			offsets[j] = retrans_q_cnts[i].offset;
503 		}
504 	}
505 
506 	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
507 		for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
508 			descs[j].name = extended_err_cnts[i].name;
509 			offsets[j] = extended_err_cnts[i].offset;
510 		}
511 	}
512 
513 	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
514 		for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
515 			descs[j].name = roce_accl_cnts[i].name;
516 			offsets[j] = roce_accl_cnts[i].offset;
517 		}
518 	}
519 
520 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
521 		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
522 			descs[j].name = cong_cnts[i].name;
523 			offsets[j] = cong_cnts[i].offset;
524 		}
525 	}
526 
527 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
528 		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
529 			descs[j].name = ext_ppcnt_cnts[i].name;
530 			offsets[j] = ext_ppcnt_cnts[i].offset;
531 		}
532 	}
533 
534 	for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
535 		descs[j].name = basic_op_cnts[i].name;
536 		descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
537 		descs[j].priv = &basic_op_cnts[i].type;
538 	}
539 
540 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
541 			       ft_field_support_2_nic_receive_rdma.bth_opcode)) {
542 		for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
543 			descs[j].name = rdmarx_cnp_op_cnts[i].name;
544 			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
545 			descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
546 		}
547 	}
548 
549 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
550 			       ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
551 		for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
552 			descs[j].name = rdmatx_cnp_op_cnts[i].name;
553 			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
554 			descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
555 		}
556 	}
557 }
558 
559 
560 static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
561 				    struct mlx5_ib_counters *cnts)
562 {
563 	u32 num_counters, num_op_counters;
564 
565 	num_counters = ARRAY_SIZE(basic_q_cnts);
566 
567 	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
568 		num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
569 
570 	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
571 		num_counters += ARRAY_SIZE(retrans_q_cnts);
572 
573 	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
574 		num_counters += ARRAY_SIZE(extended_err_cnts);
575 
576 	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
577 		num_counters += ARRAY_SIZE(roce_accl_cnts);
578 
579 	cnts->num_q_counters = num_counters;
580 
581 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
582 		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
583 		num_counters += ARRAY_SIZE(cong_cnts);
584 	}
585 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
586 		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
587 		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
588 	}
589 
590 	num_op_counters = ARRAY_SIZE(basic_op_cnts);
591 
592 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
593 			       ft_field_support_2_nic_receive_rdma.bth_opcode))
594 		num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
595 
596 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
597 			       ft_field_support_2_nic_transmit_rdma.bth_opcode))
598 		num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
599 
600 	cnts->num_op_counters = num_op_counters;
601 	num_counters += num_op_counters;
602 	cnts->descs = kcalloc(num_counters,
603 			      sizeof(struct rdma_stat_desc), GFP_KERNEL);
604 	if (!cnts->descs)
605 		return -ENOMEM;
606 
607 	cnts->offsets = kcalloc(num_counters,
608 				sizeof(*cnts->offsets), GFP_KERNEL);
609 	if (!cnts->offsets)
610 		goto err;
611 
612 	return 0;
613 
614 err:
615 	kfree(cnts->descs);
616 	cnts->descs = NULL;
617 	return -ENOMEM;
618 }
619 
620 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
621 {
622 	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
623 	int num_cnt_ports;
624 	int i, j;
625 
626 	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
627 
628 	MLX5_SET(dealloc_q_counter_in, in, opcode,
629 		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
630 
631 	for (i = 0; i < num_cnt_ports; i++) {
632 		if (dev->port[i].cnts.set_id) {
633 			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
634 				 dev->port[i].cnts.set_id);
635 			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
636 		}
637 		kfree(dev->port[i].cnts.descs);
638 		kfree(dev->port[i].cnts.offsets);
639 
640 		for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
641 			if (!dev->port[i].cnts.opfcs[j].fc)
642 				continue;
643 
644 			if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
645 				mlx5_ib_fs_remove_op_fc(dev,
646 					&dev->port[i].cnts.opfcs[j], j);
647 			mlx5_fc_destroy(dev->mdev,
648 					dev->port[i].cnts.opfcs[j].fc);
649 			dev->port[i].cnts.opfcs[j].fc = NULL;
650 		}
651 	}
652 }
653 
654 static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
655 {
656 	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
657 	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
658 	int num_cnt_ports;
659 	int err = 0;
660 	int i;
661 	bool is_shared;
662 
663 	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
664 	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
665 	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
666 
667 	for (i = 0; i < num_cnt_ports; i++) {
668 		err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
669 		if (err)
670 			goto err_alloc;
671 
672 		mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
673 				      dev->port[i].cnts.offsets);
674 
675 		MLX5_SET(alloc_q_counter_in, in, uid,
676 			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
677 
678 		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
679 		if (err) {
680 			mlx5_ib_warn(dev,
681 				     "couldn't allocate queue counter for port %d, err %d\n",
682 				     i + 1, err);
683 			goto err_alloc;
684 		}
685 
686 		dev->port[i].cnts.set_id =
687 			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
688 	}
689 	return 0;
690 
691 err_alloc:
692 	mlx5_ib_dealloc_counters(dev);
693 	return err;
694 }
695 
696 static int read_flow_counters(struct ib_device *ibdev,
697 			      struct mlx5_read_counters_attr *read_attr)
698 {
699 	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
700 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
701 
702 	return mlx5_fc_query(dev->mdev, fc,
703 			     &read_attr->out[IB_COUNTER_PACKETS],
704 			     &read_attr->out[IB_COUNTER_BYTES]);
705 }
706 
707 /* flow counters currently expose two counters packets and bytes */
708 #define FLOW_COUNTERS_NUM 2
709 static int counters_set_description(
710 	struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
711 	struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
712 {
713 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
714 	u32 cntrs_max_index = 0;
715 	int i;
716 
717 	if (counters_type != MLX5_IB_COUNTERS_FLOW)
718 		return -EINVAL;
719 
720 	/* init the fields for the object */
721 	mcounters->type = counters_type;
722 	mcounters->read_counters = read_flow_counters;
723 	mcounters->counters_num = FLOW_COUNTERS_NUM;
724 	mcounters->ncounters = ncounters;
725 	/* each counter entry have both description and index pair */
726 	for (i = 0; i < ncounters; i++) {
727 		if (desc_data[i].description > IB_COUNTER_BYTES)
728 			return -EINVAL;
729 
730 		if (cntrs_max_index <= desc_data[i].index)
731 			cntrs_max_index = desc_data[i].index + 1;
732 	}
733 
734 	mutex_lock(&mcounters->mcntrs_mutex);
735 	mcounters->counters_data = desc_data;
736 	mcounters->cntrs_max_index = cntrs_max_index;
737 	mutex_unlock(&mcounters->mcntrs_mutex);
738 
739 	return 0;
740 }
741 
742 #define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
743 int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
744 				   struct mlx5_ib_create_flow *ucmd)
745 {
746 	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
747 	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
748 	struct mlx5_ib_flow_counters_desc *desc_data = NULL;
749 	bool hw_hndl = false;
750 	int ret = 0;
751 
752 	if (ucmd && ucmd->ncounters_data != 0) {
753 		cntrs_data = ucmd->data;
754 		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
755 			return -EINVAL;
756 
757 		desc_data = kcalloc(cntrs_data->ncounters,
758 				    sizeof(*desc_data),
759 				    GFP_KERNEL);
760 		if (!desc_data)
761 			return  -ENOMEM;
762 
763 		if (copy_from_user(desc_data,
764 				   u64_to_user_ptr(cntrs_data->counters_data),
765 				   sizeof(*desc_data) * cntrs_data->ncounters)) {
766 			ret = -EFAULT;
767 			goto free;
768 		}
769 	}
770 
771 	if (!mcounters->hw_cntrs_hndl) {
772 		mcounters->hw_cntrs_hndl = mlx5_fc_create(
773 			to_mdev(ibcounters->device)->mdev, false);
774 		if (IS_ERR(mcounters->hw_cntrs_hndl)) {
775 			ret = PTR_ERR(mcounters->hw_cntrs_hndl);
776 			goto free;
777 		}
778 		hw_hndl = true;
779 	}
780 
781 	if (desc_data) {
782 		/* counters already bound to at least one flow */
783 		if (mcounters->cntrs_max_index) {
784 			ret = -EINVAL;
785 			goto free_hndl;
786 		}
787 
788 		ret = counters_set_description(ibcounters,
789 					       MLX5_IB_COUNTERS_FLOW,
790 					       desc_data,
791 					       cntrs_data->ncounters);
792 		if (ret)
793 			goto free_hndl;
794 
795 	} else if (!mcounters->cntrs_max_index) {
796 		/* counters not bound yet, must have udata passed */
797 		ret = -EINVAL;
798 		goto free_hndl;
799 	}
800 
801 	return 0;
802 
803 free_hndl:
804 	if (hw_hndl) {
805 		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
806 				mcounters->hw_cntrs_hndl);
807 		mcounters->hw_cntrs_hndl = NULL;
808 	}
809 free:
810 	kfree(desc_data);
811 	return ret;
812 }
813 
814 void mlx5_ib_counters_clear_description(struct ib_counters *counters)
815 {
816 	struct mlx5_ib_mcounters *mcounters;
817 
818 	if (!counters || atomic_read(&counters->usecnt) != 1)
819 		return;
820 
821 	mcounters = to_mcounters(counters);
822 
823 	mutex_lock(&mcounters->mcntrs_mutex);
824 	kfree(mcounters->counters_data);
825 	mcounters->counters_data = NULL;
826 	mcounters->cntrs_max_index = 0;
827 	mutex_unlock(&mcounters->mcntrs_mutex);
828 }
829 
830 static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
831 			       unsigned int index, bool enable)
832 {
833 	struct mlx5_ib_dev *dev = to_mdev(device);
834 	struct mlx5_ib_counters *cnts;
835 	struct mlx5_ib_op_fc *opfc;
836 	u32 num_hw_counters, type;
837 	int ret;
838 
839 	cnts = &dev->port[port - 1].cnts;
840 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
841 		cnts->num_ext_ppcnt_counters;
842 	if (index < num_hw_counters ||
843 	    index >= (num_hw_counters + cnts->num_op_counters))
844 		return -EINVAL;
845 
846 	if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
847 		return -EINVAL;
848 
849 	type = *(u32 *)cnts->descs[index].priv;
850 	if (type >= MLX5_IB_OPCOUNTER_MAX)
851 		return -EINVAL;
852 
853 	opfc = &cnts->opfcs[type];
854 
855 	if (enable) {
856 		if (opfc->fc)
857 			return -EEXIST;
858 
859 		opfc->fc = mlx5_fc_create(dev->mdev, false);
860 		if (IS_ERR(opfc->fc))
861 			return PTR_ERR(opfc->fc);
862 
863 		ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
864 		if (ret) {
865 			mlx5_fc_destroy(dev->mdev, opfc->fc);
866 			opfc->fc = NULL;
867 		}
868 		return ret;
869 	}
870 
871 	if (!opfc->fc)
872 		return -EINVAL;
873 
874 	mlx5_ib_fs_remove_op_fc(dev, opfc, type);
875 	mlx5_fc_destroy(dev->mdev, opfc->fc);
876 	opfc->fc = NULL;
877 	return 0;
878 }
879 
880 static const struct ib_device_ops hw_stats_ops = {
881 	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
882 	.get_hw_stats = mlx5_ib_get_hw_stats,
883 	.counter_bind_qp = mlx5_ib_counter_bind_qp,
884 	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
885 	.counter_dealloc = mlx5_ib_counter_dealloc,
886 	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
887 	.counter_update_stats = mlx5_ib_counter_update_stats,
888 	.modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
889 			  mlx5_ib_modify_stat : NULL,
890 };
891 
892 static const struct ib_device_ops hw_switchdev_stats_ops = {
893 	.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
894 	.get_hw_stats = mlx5_ib_get_hw_stats,
895 	.counter_bind_qp = mlx5_ib_counter_bind_qp,
896 	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
897 	.counter_dealloc = mlx5_ib_counter_dealloc,
898 	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
899 	.counter_update_stats = mlx5_ib_counter_update_stats,
900 };
901 
902 static const struct ib_device_ops counters_ops = {
903 	.create_counters = mlx5_ib_create_counters,
904 	.destroy_counters = mlx5_ib_destroy_counters,
905 	.read_counters = mlx5_ib_read_counters,
906 
907 	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
908 };
909 
910 int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
911 {
912 	ib_set_device_ops(&dev->ib_dev, &counters_ops);
913 
914 	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
915 		return 0;
916 
917 	if (is_mdev_switchdev_mode(dev->mdev))
918 		ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
919 	else
920 		ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
921 	return mlx5_ib_alloc_counters(dev);
922 }
923 
924 void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
925 {
926 	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
927 		return;
928 
929 	mlx5_ib_dealloc_counters(dev);
930 }
931