xref: /linux/drivers/infiniband/hw/mlx5/counters.c (revision aead78125a987f48944bff2001f61df72b95afc4)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
4  */
5 
6 #include "mlx5_ib.h"
7 #include <linux/mlx5/eswitch.h>
8 #include <linux/mlx5/vport.h>
9 #include "counters.h"
10 #include "ib_rep.h"
11 #include "qp.h"
12 
13 struct mlx5_ib_counter {
14 	const char *name;
15 	size_t offset;
16 	u32 type;
17 };
18 
19 #define INIT_Q_COUNTER(_name)		\
20 	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
21 
22 #define INIT_VPORT_Q_COUNTER(_name)		\
23 	{ .name = "vport_" #_name, .offset =	\
24 		MLX5_BYTE_OFF(query_q_counter_out, _name)}
25 
26 static const struct mlx5_ib_counter basic_q_cnts[] = {
27 	INIT_Q_COUNTER(rx_write_requests),
28 	INIT_Q_COUNTER(rx_read_requests),
29 	INIT_Q_COUNTER(rx_atomic_requests),
30 	INIT_Q_COUNTER(out_of_buffer),
31 };
32 
33 static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
34 	INIT_Q_COUNTER(out_of_sequence),
35 };
36 
37 static const struct mlx5_ib_counter retrans_q_cnts[] = {
38 	INIT_Q_COUNTER(duplicate_request),
39 	INIT_Q_COUNTER(rnr_nak_retry_err),
40 	INIT_Q_COUNTER(packet_seq_err),
41 	INIT_Q_COUNTER(implied_nak_seq_err),
42 	INIT_Q_COUNTER(local_ack_timeout_err),
43 };
44 
45 static const struct mlx5_ib_counter vport_basic_q_cnts[] = {
46 	INIT_VPORT_Q_COUNTER(rx_write_requests),
47 	INIT_VPORT_Q_COUNTER(rx_read_requests),
48 	INIT_VPORT_Q_COUNTER(rx_atomic_requests),
49 	INIT_VPORT_Q_COUNTER(out_of_buffer),
50 };
51 
52 static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = {
53 	INIT_VPORT_Q_COUNTER(out_of_sequence),
54 };
55 
56 static const struct mlx5_ib_counter vport_retrans_q_cnts[] = {
57 	INIT_VPORT_Q_COUNTER(duplicate_request),
58 	INIT_VPORT_Q_COUNTER(rnr_nak_retry_err),
59 	INIT_VPORT_Q_COUNTER(packet_seq_err),
60 	INIT_VPORT_Q_COUNTER(implied_nak_seq_err),
61 	INIT_VPORT_Q_COUNTER(local_ack_timeout_err),
62 };
63 
64 #define INIT_CONG_COUNTER(_name)		\
65 	{ .name = #_name, .offset =	\
66 		MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
67 
68 static const struct mlx5_ib_counter cong_cnts[] = {
69 	INIT_CONG_COUNTER(rp_cnp_ignored),
70 	INIT_CONG_COUNTER(rp_cnp_handled),
71 	INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
72 	INIT_CONG_COUNTER(np_cnp_sent),
73 };
74 
75 static const struct mlx5_ib_counter extended_err_cnts[] = {
76 	INIT_Q_COUNTER(resp_local_length_error),
77 	INIT_Q_COUNTER(resp_cqe_error),
78 	INIT_Q_COUNTER(req_cqe_error),
79 	INIT_Q_COUNTER(req_remote_invalid_request),
80 	INIT_Q_COUNTER(req_remote_access_errors),
81 	INIT_Q_COUNTER(resp_remote_access_errors),
82 	INIT_Q_COUNTER(resp_cqe_flush_error),
83 	INIT_Q_COUNTER(req_cqe_flush_error),
84 };
85 
86 static const struct mlx5_ib_counter roce_accl_cnts[] = {
87 	INIT_Q_COUNTER(roce_adp_retrans),
88 	INIT_Q_COUNTER(roce_adp_retrans_to),
89 	INIT_Q_COUNTER(roce_slow_restart),
90 	INIT_Q_COUNTER(roce_slow_restart_cnps),
91 	INIT_Q_COUNTER(roce_slow_restart_trans),
92 };
93 
94 static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
95 	INIT_VPORT_Q_COUNTER(resp_local_length_error),
96 	INIT_VPORT_Q_COUNTER(resp_cqe_error),
97 	INIT_VPORT_Q_COUNTER(req_cqe_error),
98 	INIT_VPORT_Q_COUNTER(req_remote_invalid_request),
99 	INIT_VPORT_Q_COUNTER(req_remote_access_errors),
100 	INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
101 	INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
102 	INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
103 };
104 
105 static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
106 	INIT_VPORT_Q_COUNTER(roce_adp_retrans),
107 	INIT_VPORT_Q_COUNTER(roce_adp_retrans_to),
108 	INIT_VPORT_Q_COUNTER(roce_slow_restart),
109 	INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps),
110 	INIT_VPORT_Q_COUNTER(roce_slow_restart_trans),
111 };
112 
113 #define INIT_EXT_PPCNT_COUNTER(_name)		\
114 	{ .name = #_name, .offset =	\
115 	MLX5_BYTE_OFF(ppcnt_reg, \
116 		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
117 
118 static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
119 	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
120 };
121 
122 #define INIT_OP_COUNTER(_name, _type)		\
123 	{ .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
124 
125 static const struct mlx5_ib_counter basic_op_cnts[] = {
126 	INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
127 };
128 
129 static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
130 	INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
131 };
132 
133 static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
134 	INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
135 };
136 
137 static int mlx5_ib_read_counters(struct ib_counters *counters,
138 				 struct ib_counters_read_attr *read_attr,
139 				 struct uverbs_attr_bundle *attrs)
140 {
141 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
142 	struct mlx5_read_counters_attr mread_attr = {};
143 	struct mlx5_ib_flow_counters_desc *desc;
144 	int ret, i;
145 
146 	mutex_lock(&mcounters->mcntrs_mutex);
147 	if (mcounters->cntrs_max_index > read_attr->ncounters) {
148 		ret = -EINVAL;
149 		goto err_bound;
150 	}
151 
152 	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
153 				 GFP_KERNEL);
154 	if (!mread_attr.out) {
155 		ret = -ENOMEM;
156 		goto err_bound;
157 	}
158 
159 	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
160 	mread_attr.flags = read_attr->flags;
161 	ret = mcounters->read_counters(counters->device, &mread_attr);
162 	if (ret)
163 		goto err_read;
164 
165 	/* do the pass over the counters data array to assign according to the
166 	 * descriptions and indexing pairs
167 	 */
168 	desc = mcounters->counters_data;
169 	for (i = 0; i < mcounters->ncounters; i++)
170 		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
171 
172 err_read:
173 	kfree(mread_attr.out);
174 err_bound:
175 	mutex_unlock(&mcounters->mcntrs_mutex);
176 	return ret;
177 }
178 
179 static int mlx5_ib_destroy_counters(struct ib_counters *counters)
180 {
181 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
182 
183 	mlx5_ib_counters_clear_description(counters);
184 	if (mcounters->hw_cntrs_hndl)
185 		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
186 				mcounters->hw_cntrs_hndl);
187 	return 0;
188 }
189 
190 static int mlx5_ib_create_counters(struct ib_counters *counters,
191 				   struct uverbs_attr_bundle *attrs)
192 {
193 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
194 
195 	mutex_init(&mcounters->mcntrs_mutex);
196 	return 0;
197 }
198 
199 static bool vport_qcounters_supported(struct mlx5_ib_dev *dev)
200 {
201 	return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) &&
202 	       MLX5_CAP_GEN(dev->mdev, q_counter_aggregation);
203 }
204 
205 static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
206 						   u32 port_num)
207 {
208 	if ((is_mdev_switchdev_mode(dev->mdev) &&
209 	     !vport_qcounters_supported(dev)) || !port_num)
210 		return &dev->port[0].cnts;
211 
212 	return is_mdev_switchdev_mode(dev->mdev) ?
213 	       &dev->port[1].cnts : &dev->port[port_num - 1].cnts;
214 }
215 
216 /**
217  * mlx5_ib_get_counters_id - Returns counters id to use for device+port
218  * @dev:	Pointer to mlx5 IB device
219  * @port_num:	Zero based port number
220  *
221  * mlx5_ib_get_counters_id() Returns counters set id to use for given
222  * device port combination in switchdev and non switchdev mode of the
223  * parent device.
224  */
225 u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
226 {
227 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1);
228 
229 	return cnts->set_id;
230 }
231 
232 static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
233 {
234 	struct rdma_hw_stats *stats;
235 	u32 num_hw_counters;
236 	int i;
237 
238 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
239 			  cnts->num_ext_ppcnt_counters;
240 	stats = rdma_alloc_hw_stats_struct(cnts->descs,
241 					   num_hw_counters +
242 					   cnts->num_op_counters,
243 					   RDMA_HW_STATS_DEFAULT_LIFESPAN);
244 	if (!stats)
245 		return NULL;
246 
247 	for (i = 0; i < cnts->num_op_counters; i++)
248 		set_bit(num_hw_counters + i, stats->is_disabled);
249 
250 	return stats;
251 }
252 
253 static struct rdma_hw_stats *
254 mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
255 {
256 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
257 	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
258 
259 	return do_alloc_stats(cnts);
260 }
261 
262 static struct rdma_hw_stats *
263 mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
264 {
265 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
266 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
267 
268 	return do_alloc_stats(cnts);
269 }
270 
271 static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
272 				    const struct mlx5_ib_counters *cnts,
273 				    struct rdma_hw_stats *stats,
274 				    u16 set_id)
275 {
276 	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
277 	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
278 	__be32 val;
279 	int ret, i;
280 
281 	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
282 	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
283 	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
284 	if (ret)
285 		return ret;
286 
287 	for (i = 0; i < cnts->num_q_counters; i++) {
288 		val = *(__be32 *)((void *)out + cnts->offsets[i]);
289 		stats->value[i] = (u64)be32_to_cpu(val);
290 	}
291 
292 	return 0;
293 }
294 
295 static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
296 					    const struct mlx5_ib_counters *cnts,
297 					    struct rdma_hw_stats *stats)
298 {
299 	int offset = cnts->num_q_counters + cnts->num_cong_counters;
300 	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
301 	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
302 	int ret, i;
303 	void *out;
304 
305 	out = kvzalloc(sz, GFP_KERNEL);
306 	if (!out)
307 		return -ENOMEM;
308 
309 	MLX5_SET(ppcnt_reg, in, local_port, 1);
310 	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
311 	ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
312 				   0, 0);
313 	if (ret)
314 		goto free;
315 
316 	for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
317 		stats->value[i + offset] =
318 			be64_to_cpup((__be64 *)(out +
319 				    cnts->offsets[i + offset]));
320 free:
321 	kvfree(out);
322 	return ret;
323 }
324 
325 static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
326 					  u32 port_num,
327 					  const struct mlx5_ib_counters *cnts,
328 					  struct rdma_hw_stats *stats)
329 
330 {
331 	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
332 	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
333 	struct mlx5_core_dev *mdev;
334 	__be32 val;
335 	int ret, i;
336 
337 	if (!dev->port[port_num].rep ||
338 	    dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
339 		return 0;
340 
341 	mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
342 	if (!mdev)
343 		return -EOPNOTSUPP;
344 
345 	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
346 	MLX5_SET(query_q_counter_in, in, other_vport, 1);
347 	MLX5_SET(query_q_counter_in, in, vport_number,
348 		 dev->port[port_num].rep->vport);
349 	MLX5_SET(query_q_counter_in, in, aggregate, 1);
350 	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
351 	if (ret)
352 		return ret;
353 
354 	for (i = 0; i < cnts->num_q_counters; i++) {
355 		val = *(__be32 *)((void *)out + cnts->offsets[i]);
356 		stats->value[i] = (u64)be32_to_cpu(val);
357 	}
358 
359 	return 0;
360 }
361 
362 static int do_get_hw_stats(struct ib_device *ibdev,
363 			   struct rdma_hw_stats *stats,
364 			   u32 port_num, int index)
365 {
366 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
367 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
368 	struct mlx5_core_dev *mdev;
369 	int ret, num_counters;
370 
371 	if (!stats)
372 		return -EINVAL;
373 
374 	num_counters = cnts->num_q_counters +
375 		       cnts->num_cong_counters +
376 		       cnts->num_ext_ppcnt_counters;
377 
378 	if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
379 		ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts,
380 						     stats);
381 	else
382 		ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats,
383 					       cnts->set_id);
384 	if (ret)
385 		return ret;
386 
387 	/* We don't expose device counters over Vports */
388 	if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
389 		goto done;
390 
391 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
392 		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
393 		if (ret)
394 			return ret;
395 	}
396 
397 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
398 		if (!port_num)
399 			port_num = 1;
400 		mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
401 		if (!mdev) {
402 			/* If port is not affiliated yet, its in down state
403 			 * which doesn't have any counters yet, so it would be
404 			 * zero. So no need to read from the HCA.
405 			 */
406 			goto done;
407 		}
408 		ret = mlx5_lag_query_cong_counters(dev->mdev,
409 						   stats->value +
410 						   cnts->num_q_counters,
411 						   cnts->num_cong_counters,
412 						   cnts->offsets +
413 						   cnts->num_q_counters);
414 
415 		mlx5_ib_put_native_port_mdev(dev, port_num);
416 		if (ret)
417 			return ret;
418 	}
419 
420 done:
421 	return num_counters;
422 }
423 
424 static int do_get_op_stat(struct ib_device *ibdev,
425 			  struct rdma_hw_stats *stats,
426 			  u32 port_num, int index)
427 {
428 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
429 	const struct mlx5_ib_counters *cnts;
430 	const struct mlx5_ib_op_fc *opfcs;
431 	u64 packets = 0, bytes;
432 	u32 type;
433 	int ret;
434 
435 	cnts = get_counters(dev, port_num);
436 
437 	opfcs = cnts->opfcs;
438 	type = *(u32 *)cnts->descs[index].priv;
439 	if (type >= MLX5_IB_OPCOUNTER_MAX)
440 		return -EINVAL;
441 
442 	if (!opfcs[type].fc)
443 		goto out;
444 
445 	ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
446 			    &packets, &bytes);
447 	if (ret)
448 		return ret;
449 
450 out:
451 	stats->value[index] = packets;
452 	return index;
453 }
454 
455 static int do_get_op_stats(struct ib_device *ibdev,
456 			   struct rdma_hw_stats *stats,
457 			   u32 port_num)
458 {
459 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
460 	const struct mlx5_ib_counters *cnts;
461 	int index, ret, num_hw_counters;
462 
463 	cnts = get_counters(dev, port_num);
464 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
465 			  cnts->num_ext_ppcnt_counters;
466 	for (index = num_hw_counters;
467 	     index < (num_hw_counters + cnts->num_op_counters); index++) {
468 		ret = do_get_op_stat(ibdev, stats, port_num, index);
469 		if (ret != index)
470 			return ret;
471 	}
472 
473 	return cnts->num_op_counters;
474 }
475 
476 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
477 				struct rdma_hw_stats *stats,
478 				u32 port_num, int index)
479 {
480 	int num_counters, num_hw_counters, num_op_counters;
481 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
482 	const struct mlx5_ib_counters *cnts;
483 
484 	cnts = get_counters(dev, port_num);
485 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
486 		cnts->num_ext_ppcnt_counters;
487 	num_counters = num_hw_counters + cnts->num_op_counters;
488 
489 	if (index < 0 || index > num_counters)
490 		return -EINVAL;
491 	else if (index > 0 && index < num_hw_counters)
492 		return do_get_hw_stats(ibdev, stats, port_num, index);
493 	else if (index >= num_hw_counters && index < num_counters)
494 		return do_get_op_stat(ibdev, stats, port_num, index);
495 
496 	num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
497 	if (num_hw_counters < 0)
498 		return num_hw_counters;
499 
500 	num_op_counters = do_get_op_stats(ibdev, stats, port_num);
501 	if (num_op_counters < 0)
502 		return num_op_counters;
503 
504 	return num_hw_counters + num_op_counters;
505 }
506 
507 static struct rdma_hw_stats *
508 mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
509 {
510 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
511 	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
512 
513 	return do_alloc_stats(cnts);
514 }
515 
516 static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
517 {
518 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
519 	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
520 
521 	return mlx5_ib_query_q_counters(dev->mdev, cnts,
522 					counter->stats, counter->id);
523 }
524 
525 static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
526 {
527 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
528 	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
529 
530 	if (!counter->id)
531 		return 0;
532 
533 	MLX5_SET(dealloc_q_counter_in, in, opcode,
534 		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
535 	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
536 	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
537 }
538 
539 static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
540 				   struct ib_qp *qp)
541 {
542 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
543 	int err;
544 
545 	if (!counter->id) {
546 		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
547 		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
548 
549 		MLX5_SET(alloc_q_counter_in, in, opcode,
550 			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
551 		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
552 		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
553 		if (err)
554 			return err;
555 		counter->id =
556 			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
557 	}
558 
559 	err = mlx5_ib_qp_set_counter(qp, counter);
560 	if (err)
561 		goto fail_set_counter;
562 
563 	return 0;
564 
565 fail_set_counter:
566 	mlx5_ib_counter_dealloc(counter);
567 	counter->id = 0;
568 
569 	return err;
570 }
571 
572 static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
573 {
574 	return mlx5_ib_qp_set_counter(qp, NULL);
575 }
576 
577 static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
578 				  struct rdma_stat_desc *descs, size_t *offsets,
579 				  u32 port_num)
580 {
581 	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
582 			port_num != MLX5_VPORT_PF;
583 	const struct mlx5_ib_counter *names;
584 	int j = 0, i, size;
585 
586 	names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
587 	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
588 			  ARRAY_SIZE(basic_q_cnts);
589 	for (i = 0; i < size; i++, j++) {
590 		descs[j].name = names[i].name;
591 		offsets[j] = names[i].offset;
592 	}
593 
594 	names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
595 	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
596 			  ARRAY_SIZE(out_of_seq_q_cnts);
597 	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
598 		for (i = 0; i < size; i++, j++) {
599 			descs[j].name = names[i].name;
600 			offsets[j] = names[i].offset;
601 		}
602 	}
603 
604 	names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
605 	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
606 			  ARRAY_SIZE(retrans_q_cnts);
607 	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
608 		for (i = 0; i < size; i++, j++) {
609 			descs[j].name = names[i].name;
610 			offsets[j] = names[i].offset;
611 		}
612 	}
613 
614 	names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
615 	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
616 			  ARRAY_SIZE(extended_err_cnts);
617 	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
618 		for (i = 0; i < size; i++, j++) {
619 			descs[j].name = names[i].name;
620 			offsets[j] = names[i].offset;
621 		}
622 	}
623 
624 	names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
625 	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
626 			  ARRAY_SIZE(roce_accl_cnts);
627 	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
628 		for (i = 0; i < size; i++, j++) {
629 			descs[j].name = names[i].name;
630 			offsets[j] = names[i].offset;
631 		}
632 	}
633 
634 	if (is_vport)
635 		return;
636 
637 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
638 		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
639 			descs[j].name = cong_cnts[i].name;
640 			offsets[j] = cong_cnts[i].offset;
641 		}
642 	}
643 
644 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
645 		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
646 			descs[j].name = ext_ppcnt_cnts[i].name;
647 			offsets[j] = ext_ppcnt_cnts[i].offset;
648 		}
649 	}
650 
651 	for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
652 		descs[j].name = basic_op_cnts[i].name;
653 		descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
654 		descs[j].priv = &basic_op_cnts[i].type;
655 	}
656 
657 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
658 			       ft_field_support_2_nic_receive_rdma.bth_opcode)) {
659 		for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
660 			descs[j].name = rdmarx_cnp_op_cnts[i].name;
661 			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
662 			descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
663 		}
664 	}
665 
666 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
667 			       ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
668 		for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
669 			descs[j].name = rdmatx_cnp_op_cnts[i].name;
670 			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
671 			descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
672 		}
673 	}
674 }
675 
676 
677 static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
678 				    struct mlx5_ib_counters *cnts, u32 port_num)
679 {
680 	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
681 			port_num != MLX5_VPORT_PF;
682 	u32 num_counters, num_op_counters = 0, size;
683 
684 	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
685 			  ARRAY_SIZE(basic_q_cnts);
686 	num_counters = size;
687 
688 	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
689 			  ARRAY_SIZE(out_of_seq_q_cnts);
690 	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
691 		num_counters += size;
692 
693 	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
694 			  ARRAY_SIZE(retrans_q_cnts);
695 	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
696 		num_counters += size;
697 
698 	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
699 			  ARRAY_SIZE(extended_err_cnts);
700 	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
701 		num_counters += size;
702 
703 	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
704 			  ARRAY_SIZE(roce_accl_cnts);
705 	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
706 		num_counters += size;
707 
708 	cnts->num_q_counters = num_counters;
709 
710 	if (is_vport)
711 		goto skip_non_qcounters;
712 
713 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
714 		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
715 		num_counters += ARRAY_SIZE(cong_cnts);
716 	}
717 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
718 		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
719 		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
720 	}
721 
722 	num_op_counters = ARRAY_SIZE(basic_op_cnts);
723 
724 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
725 			       ft_field_support_2_nic_receive_rdma.bth_opcode))
726 		num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
727 
728 	if (MLX5_CAP_FLOWTABLE(dev->mdev,
729 			       ft_field_support_2_nic_transmit_rdma.bth_opcode))
730 		num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
731 
732 skip_non_qcounters:
733 	cnts->num_op_counters = num_op_counters;
734 	num_counters += num_op_counters;
735 	cnts->descs = kcalloc(num_counters,
736 			      sizeof(struct rdma_stat_desc), GFP_KERNEL);
737 	if (!cnts->descs)
738 		return -ENOMEM;
739 
740 	cnts->offsets = kcalloc(num_counters,
741 				sizeof(*cnts->offsets), GFP_KERNEL);
742 	if (!cnts->offsets)
743 		goto err;
744 
745 	return 0;
746 
747 err:
748 	kfree(cnts->descs);
749 	cnts->descs = NULL;
750 	return -ENOMEM;
751 }
752 
753 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
754 {
755 	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
756 	int num_cnt_ports = dev->num_ports;
757 	int i, j;
758 
759 	if (is_mdev_switchdev_mode(dev->mdev))
760 		num_cnt_ports = min(2, num_cnt_ports);
761 
762 	MLX5_SET(dealloc_q_counter_in, in, opcode,
763 		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
764 
765 	for (i = 0; i < num_cnt_ports; i++) {
766 		if (dev->port[i].cnts.set_id) {
767 			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
768 				 dev->port[i].cnts.set_id);
769 			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
770 		}
771 		kfree(dev->port[i].cnts.descs);
772 		kfree(dev->port[i].cnts.offsets);
773 
774 		for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
775 			if (!dev->port[i].cnts.opfcs[j].fc)
776 				continue;
777 
778 			if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
779 				mlx5_ib_fs_remove_op_fc(dev,
780 					&dev->port[i].cnts.opfcs[j], j);
781 			mlx5_fc_destroy(dev->mdev,
782 					dev->port[i].cnts.opfcs[j].fc);
783 			dev->port[i].cnts.opfcs[j].fc = NULL;
784 		}
785 	}
786 }
787 
788 static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
789 {
790 	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
791 	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
792 	int num_cnt_ports = dev->num_ports;
793 	int err = 0;
794 	int i;
795 	bool is_shared;
796 
797 	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
798 	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
799 
800 	/*
801 	 * In switchdev we need to allocate two ports, one that is used for
802 	 * the device Q_counters and it is essentially the real Q_counters of
803 	 * this device, while the other is used as a helper for PF to be able to
804 	 * query all other vports.
805 	 */
806 	if (is_mdev_switchdev_mode(dev->mdev))
807 		num_cnt_ports = min(2, num_cnt_ports);
808 
809 	for (i = 0; i < num_cnt_ports; i++) {
810 		err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
811 		if (err)
812 			goto err_alloc;
813 
814 		mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
815 				      dev->port[i].cnts.offsets, i);
816 
817 		MLX5_SET(alloc_q_counter_in, in, uid,
818 			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
819 
820 		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
821 		if (err) {
822 			mlx5_ib_warn(dev,
823 				     "couldn't allocate queue counter for port %d, err %d\n",
824 				     i + 1, err);
825 			goto err_alloc;
826 		}
827 
828 		dev->port[i].cnts.set_id =
829 			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
830 	}
831 	return 0;
832 
833 err_alloc:
834 	mlx5_ib_dealloc_counters(dev);
835 	return err;
836 }
837 
838 static int read_flow_counters(struct ib_device *ibdev,
839 			      struct mlx5_read_counters_attr *read_attr)
840 {
841 	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
842 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
843 
844 	return mlx5_fc_query(dev->mdev, fc,
845 			     &read_attr->out[IB_COUNTER_PACKETS],
846 			     &read_attr->out[IB_COUNTER_BYTES]);
847 }
848 
849 /* flow counters currently expose two counters packets and bytes */
850 #define FLOW_COUNTERS_NUM 2
851 static int counters_set_description(
852 	struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
853 	struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
854 {
855 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
856 	u32 cntrs_max_index = 0;
857 	int i;
858 
859 	if (counters_type != MLX5_IB_COUNTERS_FLOW)
860 		return -EINVAL;
861 
862 	/* init the fields for the object */
863 	mcounters->type = counters_type;
864 	mcounters->read_counters = read_flow_counters;
865 	mcounters->counters_num = FLOW_COUNTERS_NUM;
866 	mcounters->ncounters = ncounters;
867 	/* each counter entry have both description and index pair */
868 	for (i = 0; i < ncounters; i++) {
869 		if (desc_data[i].description > IB_COUNTER_BYTES)
870 			return -EINVAL;
871 
872 		if (cntrs_max_index <= desc_data[i].index)
873 			cntrs_max_index = desc_data[i].index + 1;
874 	}
875 
876 	mutex_lock(&mcounters->mcntrs_mutex);
877 	mcounters->counters_data = desc_data;
878 	mcounters->cntrs_max_index = cntrs_max_index;
879 	mutex_unlock(&mcounters->mcntrs_mutex);
880 
881 	return 0;
882 }
883 
884 #define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
885 int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
886 				   struct mlx5_ib_create_flow *ucmd)
887 {
888 	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
889 	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
890 	struct mlx5_ib_flow_counters_desc *desc_data = NULL;
891 	bool hw_hndl = false;
892 	int ret = 0;
893 
894 	if (ucmd && ucmd->ncounters_data != 0) {
895 		cntrs_data = ucmd->data;
896 		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
897 			return -EINVAL;
898 
899 		desc_data = kcalloc(cntrs_data->ncounters,
900 				    sizeof(*desc_data),
901 				    GFP_KERNEL);
902 		if (!desc_data)
903 			return  -ENOMEM;
904 
905 		if (copy_from_user(desc_data,
906 				   u64_to_user_ptr(cntrs_data->counters_data),
907 				   sizeof(*desc_data) * cntrs_data->ncounters)) {
908 			ret = -EFAULT;
909 			goto free;
910 		}
911 	}
912 
913 	if (!mcounters->hw_cntrs_hndl) {
914 		mcounters->hw_cntrs_hndl = mlx5_fc_create(
915 			to_mdev(ibcounters->device)->mdev, false);
916 		if (IS_ERR(mcounters->hw_cntrs_hndl)) {
917 			ret = PTR_ERR(mcounters->hw_cntrs_hndl);
918 			goto free;
919 		}
920 		hw_hndl = true;
921 	}
922 
923 	if (desc_data) {
924 		/* counters already bound to at least one flow */
925 		if (mcounters->cntrs_max_index) {
926 			ret = -EINVAL;
927 			goto free_hndl;
928 		}
929 
930 		ret = counters_set_description(ibcounters,
931 					       MLX5_IB_COUNTERS_FLOW,
932 					       desc_data,
933 					       cntrs_data->ncounters);
934 		if (ret)
935 			goto free_hndl;
936 
937 	} else if (!mcounters->cntrs_max_index) {
938 		/* counters not bound yet, must have udata passed */
939 		ret = -EINVAL;
940 		goto free_hndl;
941 	}
942 
943 	return 0;
944 
945 free_hndl:
946 	if (hw_hndl) {
947 		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
948 				mcounters->hw_cntrs_hndl);
949 		mcounters->hw_cntrs_hndl = NULL;
950 	}
951 free:
952 	kfree(desc_data);
953 	return ret;
954 }
955 
956 void mlx5_ib_counters_clear_description(struct ib_counters *counters)
957 {
958 	struct mlx5_ib_mcounters *mcounters;
959 
960 	if (!counters || atomic_read(&counters->usecnt) != 1)
961 		return;
962 
963 	mcounters = to_mcounters(counters);
964 
965 	mutex_lock(&mcounters->mcntrs_mutex);
966 	kfree(mcounters->counters_data);
967 	mcounters->counters_data = NULL;
968 	mcounters->cntrs_max_index = 0;
969 	mutex_unlock(&mcounters->mcntrs_mutex);
970 }
971 
972 static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
973 			       unsigned int index, bool enable)
974 {
975 	struct mlx5_ib_dev *dev = to_mdev(device);
976 	struct mlx5_ib_counters *cnts;
977 	struct mlx5_ib_op_fc *opfc;
978 	u32 num_hw_counters, type;
979 	int ret;
980 
981 	cnts = &dev->port[port - 1].cnts;
982 	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
983 		cnts->num_ext_ppcnt_counters;
984 	if (index < num_hw_counters ||
985 	    index >= (num_hw_counters + cnts->num_op_counters))
986 		return -EINVAL;
987 
988 	if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
989 		return -EINVAL;
990 
991 	type = *(u32 *)cnts->descs[index].priv;
992 	if (type >= MLX5_IB_OPCOUNTER_MAX)
993 		return -EINVAL;
994 
995 	opfc = &cnts->opfcs[type];
996 
997 	if (enable) {
998 		if (opfc->fc)
999 			return -EEXIST;
1000 
1001 		opfc->fc = mlx5_fc_create(dev->mdev, false);
1002 		if (IS_ERR(opfc->fc))
1003 			return PTR_ERR(opfc->fc);
1004 
1005 		ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
1006 		if (ret) {
1007 			mlx5_fc_destroy(dev->mdev, opfc->fc);
1008 			opfc->fc = NULL;
1009 		}
1010 		return ret;
1011 	}
1012 
1013 	if (!opfc->fc)
1014 		return -EINVAL;
1015 
1016 	mlx5_ib_fs_remove_op_fc(dev, opfc, type);
1017 	mlx5_fc_destroy(dev->mdev, opfc->fc);
1018 	opfc->fc = NULL;
1019 	return 0;
1020 }
1021 
1022 static const struct ib_device_ops hw_stats_ops = {
1023 	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1024 	.get_hw_stats = mlx5_ib_get_hw_stats,
1025 	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1026 	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1027 	.counter_dealloc = mlx5_ib_counter_dealloc,
1028 	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1029 	.counter_update_stats = mlx5_ib_counter_update_stats,
1030 	.modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
1031 			  mlx5_ib_modify_stat : NULL,
1032 };
1033 
1034 static const struct ib_device_ops hw_switchdev_vport_op = {
1035 	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1036 };
1037 
1038 static const struct ib_device_ops hw_switchdev_stats_ops = {
1039 	.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
1040 	.get_hw_stats = mlx5_ib_get_hw_stats,
1041 	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1042 	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1043 	.counter_dealloc = mlx5_ib_counter_dealloc,
1044 	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1045 	.counter_update_stats = mlx5_ib_counter_update_stats,
1046 };
1047 
1048 static const struct ib_device_ops counters_ops = {
1049 	.create_counters = mlx5_ib_create_counters,
1050 	.destroy_counters = mlx5_ib_destroy_counters,
1051 	.read_counters = mlx5_ib_read_counters,
1052 
1053 	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
1054 };
1055 
1056 int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
1057 {
1058 	ib_set_device_ops(&dev->ib_dev, &counters_ops);
1059 
1060 	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1061 		return 0;
1062 
1063 	if (is_mdev_switchdev_mode(dev->mdev)) {
1064 		ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
1065 		if (vport_qcounters_supported(dev))
1066 			ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op);
1067 	} else
1068 		ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
1069 	return mlx5_ib_alloc_counters(dev);
1070 }
1071 
1072 void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
1073 {
1074 	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1075 		return;
1076 
1077 	mlx5_ib_dealloc_counters(dev);
1078 }
1079