xref: /linux/drivers/infiniband/core/counters.c (revision 88ae02feda84f0f78ff7243ef437e79624fdcd80)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
4  */
5 #include <rdma/ib_verbs.h>
6 #include <rdma/rdma_counter.h>
7 
8 #include "core_priv.h"
9 #include "restrack.h"
10 
11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID)
12 
13 static int __counter_set_mode(struct rdma_port_counter *port_counter,
14 			      enum rdma_nl_counter_mode new_mode,
15 			      enum rdma_nl_counter_mask new_mask,
16 			      bool bind_opcnt)
17 {
18 	if (new_mode == RDMA_COUNTER_MODE_AUTO) {
19 		if (new_mask & (~ALL_AUTO_MODE_MASKS))
20 			return -EINVAL;
21 		if (port_counter->num_counters)
22 			return -EBUSY;
23 	}
24 
25 	port_counter->mode.mode = new_mode;
26 	port_counter->mode.mask = new_mask;
27 	port_counter->mode.bind_opcnt = bind_opcnt;
28 	return 0;
29 }
30 
31 /*
32  * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
33  *
34  * @dev: Device to operate
35  * @port: Port to use
36  * @mask: Mask to configure
37  * @extack: Message to the user
38  *
39  * Return 0 on success. If counter mode wasn't changed then it is considered
40  * as success as well.
41  * Return -EBUSY when changing to auto mode while there are bounded counters.
42  *
43  */
44 int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
45 			       enum rdma_nl_counter_mask mask,
46 			       bool bind_opcnt,
47 			       struct netlink_ext_ack *extack)
48 {
49 	struct rdma_port_counter *port_counter;
50 	enum rdma_nl_counter_mode mode;
51 	int ret;
52 
53 	port_counter = &dev->port_data[port].port_counter;
54 	if (!port_counter->hstats)
55 		return -EOPNOTSUPP;
56 
57 	mutex_lock(&port_counter->lock);
58 	if (mask)
59 		mode = RDMA_COUNTER_MODE_AUTO;
60 	else
61 		mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL :
62 						      RDMA_COUNTER_MODE_NONE;
63 
64 	if (port_counter->mode.mode == mode &&
65 	    port_counter->mode.mask == mask &&
66 	    port_counter->mode.bind_opcnt == bind_opcnt) {
67 		ret = 0;
68 		goto out;
69 	}
70 
71 	ret = __counter_set_mode(port_counter, mode, mask, bind_opcnt);
72 
73 out:
74 	mutex_unlock(&port_counter->lock);
75 	if (ret == -EBUSY)
76 		NL_SET_ERR_MSG(
77 			extack,
78 			"Modifying auto mode is not allowed when there is a bound QP");
79 	return ret;
80 }
81 
82 static void auto_mode_init_counter(struct rdma_counter *counter,
83 				   const struct ib_qp *qp,
84 				   enum rdma_nl_counter_mask new_mask)
85 {
86 	struct auto_mode_param *param = &counter->mode.param;
87 
88 	counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
89 	counter->mode.mask = new_mask;
90 
91 	if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
92 		param->qp_type = qp->qp_type;
93 }
94 
95 static int __rdma_counter_bind_qp(struct rdma_counter *counter,
96 				  struct ib_qp *qp, u32 port)
97 {
98 	int ret;
99 
100 	if (qp->counter)
101 		return -EINVAL;
102 
103 	if (!qp->device->ops.counter_bind_qp)
104 		return -EOPNOTSUPP;
105 
106 	mutex_lock(&counter->lock);
107 	ret = qp->device->ops.counter_bind_qp(counter, qp, port);
108 	mutex_unlock(&counter->lock);
109 
110 	return ret;
111 }
112 
113 int rdma_counter_modify(struct ib_device *dev, u32 port,
114 			unsigned int index, bool enable)
115 {
116 	struct rdma_hw_stats *stats;
117 	int ret = 0;
118 
119 	if (!dev->ops.modify_hw_stat)
120 		return -EOPNOTSUPP;
121 
122 	stats = ib_get_hw_stats_port(dev, port);
123 	if (!stats || index >= stats->num_counters ||
124 	    !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
125 		return -EINVAL;
126 
127 	mutex_lock(&stats->lock);
128 
129 	if (enable != test_bit(index, stats->is_disabled))
130 		goto out;
131 
132 	ret = dev->ops.modify_hw_stat(dev, port, index, enable);
133 	if (ret)
134 		goto out;
135 
136 	if (enable)
137 		clear_bit(index, stats->is_disabled);
138 	else
139 		set_bit(index, stats->is_disabled);
140 out:
141 	mutex_unlock(&stats->lock);
142 	return ret;
143 }
144 
145 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
146 					   struct ib_qp *qp,
147 					   enum rdma_nl_counter_mode mode,
148 					   bool bind_opcnt)
149 {
150 	struct rdma_port_counter *port_counter;
151 	struct rdma_counter *counter;
152 	int ret;
153 
154 	if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats)
155 		return NULL;
156 
157 	counter = rdma_zalloc_drv_obj(dev, rdma_counter);
158 	if (!counter)
159 		return NULL;
160 
161 	counter->device    = dev;
162 	counter->port      = port;
163 
164 	dev->ops.counter_init(counter);
165 
166 	rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
167 	counter->stats = dev->ops.counter_alloc_stats(counter);
168 	if (!counter->stats)
169 		goto err_stats;
170 
171 	port_counter = &dev->port_data[port].port_counter;
172 	mutex_lock(&port_counter->lock);
173 	switch (mode) {
174 	case RDMA_COUNTER_MODE_MANUAL:
175 		ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
176 					 0, bind_opcnt);
177 		if (ret) {
178 			mutex_unlock(&port_counter->lock);
179 			goto err_mode;
180 		}
181 		break;
182 	case RDMA_COUNTER_MODE_AUTO:
183 		auto_mode_init_counter(counter, qp, port_counter->mode.mask);
184 		break;
185 	default:
186 		ret = -EOPNOTSUPP;
187 		mutex_unlock(&port_counter->lock);
188 		goto err_mode;
189 	}
190 
191 	port_counter->num_counters++;
192 	mutex_unlock(&port_counter->lock);
193 
194 	counter->mode.mode = mode;
195 	counter->mode.bind_opcnt = bind_opcnt;
196 	kref_init(&counter->kref);
197 	mutex_init(&counter->lock);
198 
199 	ret = __rdma_counter_bind_qp(counter, qp, port);
200 	if (ret)
201 		goto err_mode;
202 
203 	rdma_restrack_parent_name(&counter->res, &qp->res);
204 	rdma_restrack_add(&counter->res);
205 	return counter;
206 
207 err_mode:
208 	rdma_free_hw_stats_struct(counter->stats);
209 err_stats:
210 	rdma_restrack_put(&counter->res);
211 	kfree(counter);
212 	return NULL;
213 }
214 
215 static void rdma_counter_free(struct rdma_counter *counter)
216 {
217 	struct rdma_port_counter *port_counter;
218 
219 	port_counter = &counter->device->port_data[counter->port].port_counter;
220 	mutex_lock(&port_counter->lock);
221 	port_counter->num_counters--;
222 	if (!port_counter->num_counters &&
223 	    (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
224 		__counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0,
225 				   false);
226 
227 	mutex_unlock(&port_counter->lock);
228 
229 	rdma_restrack_del(&counter->res);
230 	rdma_free_hw_stats_struct(counter->stats);
231 	kfree(counter);
232 }
233 
234 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
235 			    enum rdma_nl_counter_mask auto_mask)
236 {
237 	struct auto_mode_param *param = &counter->mode.param;
238 	bool match = true;
239 
240 	if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
241 		match &= (param->qp_type == qp->qp_type);
242 
243 	if (auto_mask & RDMA_COUNTER_MASK_PID)
244 		match &= (task_pid_nr(counter->res.task) ==
245 			  task_pid_nr(qp->res.task));
246 
247 	return match;
248 }
249 
250 static int __rdma_counter_unbind_qp(struct ib_qp *qp, u32 port)
251 {
252 	struct rdma_counter *counter = qp->counter;
253 	int ret;
254 
255 	if (!qp->device->ops.counter_unbind_qp)
256 		return -EOPNOTSUPP;
257 
258 	mutex_lock(&counter->lock);
259 	ret = qp->device->ops.counter_unbind_qp(qp, port);
260 	mutex_unlock(&counter->lock);
261 
262 	return ret;
263 }
264 
265 static void counter_history_stat_update(struct rdma_counter *counter)
266 {
267 	struct ib_device *dev = counter->device;
268 	struct rdma_port_counter *port_counter;
269 	int i;
270 
271 	port_counter = &dev->port_data[counter->port].port_counter;
272 	if (!port_counter->hstats)
273 		return;
274 
275 	rdma_counter_query_stats(counter);
276 
277 	for (i = 0; i < counter->stats->num_counters; i++)
278 		port_counter->hstats->value[i] += counter->stats->value[i];
279 }
280 
281 /*
282  * rdma_get_counter_auto_mode - Find the counter that @qp should be bound
283  *     with in auto mode
284  *
285  * Return: The counter (with ref-count increased) if found
286  */
287 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
288 						       u32 port)
289 {
290 	struct rdma_port_counter *port_counter;
291 	struct rdma_counter *counter = NULL;
292 	struct ib_device *dev = qp->device;
293 	struct rdma_restrack_entry *res;
294 	struct rdma_restrack_root *rt;
295 	unsigned long id = 0;
296 
297 	port_counter = &dev->port_data[port].port_counter;
298 	rt = &dev->res[RDMA_RESTRACK_COUNTER];
299 	xa_lock(&rt->xa);
300 	xa_for_each(&rt->xa, id, res) {
301 		counter = container_of(res, struct rdma_counter, res);
302 		if ((counter->device != qp->device) || (counter->port != port))
303 			goto next;
304 
305 		if (auto_mode_match(qp, counter, port_counter->mode.mask))
306 			break;
307 next:
308 		counter = NULL;
309 	}
310 
311 	if (counter && !kref_get_unless_zero(&counter->kref))
312 		counter = NULL;
313 
314 	xa_unlock(&rt->xa);
315 	return counter;
316 }
317 
318 static void counter_release(struct kref *kref)
319 {
320 	struct rdma_counter *counter;
321 
322 	counter = container_of(kref, struct rdma_counter, kref);
323 	counter_history_stat_update(counter);
324 	counter->device->ops.counter_dealloc(counter);
325 	rdma_counter_free(counter);
326 }
327 
328 /*
329  * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
330  *   the auto-mode rule
331  */
332 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port)
333 {
334 	struct rdma_port_counter *port_counter;
335 	struct ib_device *dev = qp->device;
336 	struct rdma_counter *counter;
337 	int ret;
338 
339 	if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res))
340 		return 0;
341 
342 	if (!rdma_is_port_valid(dev, port))
343 		return -EINVAL;
344 
345 	port_counter = &dev->port_data[port].port_counter;
346 	if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO)
347 		return 0;
348 
349 	counter = rdma_get_counter_auto_mode(qp, port);
350 	if (counter) {
351 		ret = __rdma_counter_bind_qp(counter, qp, port);
352 		if (ret) {
353 			kref_put(&counter->kref, counter_release);
354 			return ret;
355 		}
356 	} else {
357 		counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO,
358 					 port_counter->mode.bind_opcnt);
359 		if (!counter)
360 			return -ENOMEM;
361 	}
362 
363 	return 0;
364 }
365 
366 /*
367  * rdma_counter_unbind_qp - Unbind a qp from a counter
368  * @force:
369  *   true - Decrease the counter ref-count anyway (e.g., qp destroy)
370  */
371 int rdma_counter_unbind_qp(struct ib_qp *qp, u32 port, bool force)
372 {
373 	struct rdma_counter *counter = qp->counter;
374 	int ret;
375 
376 	if (!counter)
377 		return -EINVAL;
378 
379 	ret = __rdma_counter_unbind_qp(qp, port);
380 	if (ret && !force)
381 		return ret;
382 
383 	kref_put(&counter->kref, counter_release);
384 	return 0;
385 }
386 
387 int rdma_counter_query_stats(struct rdma_counter *counter)
388 {
389 	struct ib_device *dev = counter->device;
390 	int ret;
391 
392 	if (!dev->ops.counter_update_stats)
393 		return -EINVAL;
394 
395 	mutex_lock(&counter->lock);
396 	ret = dev->ops.counter_update_stats(counter);
397 	mutex_unlock(&counter->lock);
398 
399 	return ret;
400 }
401 
402 static u64 get_running_counters_hwstat_sum(struct ib_device *dev,
403 					   u32 port, u32 index)
404 {
405 	struct rdma_restrack_entry *res;
406 	struct rdma_restrack_root *rt;
407 	struct rdma_counter *counter;
408 	unsigned long id = 0;
409 	u64 sum = 0;
410 
411 	rt = &dev->res[RDMA_RESTRACK_COUNTER];
412 	xa_lock(&rt->xa);
413 	xa_for_each(&rt->xa, id, res) {
414 		if (!rdma_restrack_get(res))
415 			continue;
416 
417 		xa_unlock(&rt->xa);
418 
419 		counter = container_of(res, struct rdma_counter, res);
420 		if ((counter->device != dev) || (counter->port != port) ||
421 		    rdma_counter_query_stats(counter))
422 			goto next;
423 
424 		sum += counter->stats->value[index];
425 
426 next:
427 		xa_lock(&rt->xa);
428 		rdma_restrack_put(res);
429 	}
430 
431 	xa_unlock(&rt->xa);
432 	return sum;
433 }
434 
435 /*
436  * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a
437  *   specific port, including the running ones and history data
438  */
439 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index)
440 {
441 	struct rdma_port_counter *port_counter;
442 	u64 sum;
443 
444 	port_counter = &dev->port_data[port].port_counter;
445 	if (!port_counter->hstats)
446 		return 0;
447 
448 	sum = get_running_counters_hwstat_sum(dev, port, index);
449 	sum += port_counter->hstats->value[index];
450 
451 	return sum;
452 }
453 
454 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
455 {
456 	struct rdma_restrack_entry *res = NULL;
457 	struct ib_qp *qp = NULL;
458 
459 	res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num);
460 	if (IS_ERR(res))
461 		return NULL;
462 
463 	qp = container_of(res, struct ib_qp, res);
464 	if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
465 		goto err;
466 
467 	return qp;
468 
469 err:
470 	rdma_restrack_put(res);
471 	return NULL;
472 }
473 
474 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
475 						   u32 counter_id)
476 {
477 	struct rdma_restrack_entry *res;
478 	struct rdma_counter *counter;
479 
480 	res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id);
481 	if (IS_ERR(res))
482 		return NULL;
483 
484 	counter = container_of(res, struct rdma_counter, res);
485 	kref_get(&counter->kref);
486 	rdma_restrack_put(res);
487 
488 	return counter;
489 }
490 
491 /*
492  * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id
493  */
494 int rdma_counter_bind_qpn(struct ib_device *dev, u32 port,
495 			  u32 qp_num, u32 counter_id)
496 {
497 	struct rdma_port_counter *port_counter;
498 	struct rdma_counter *counter;
499 	struct ib_qp *qp;
500 	int ret;
501 
502 	port_counter = &dev->port_data[port].port_counter;
503 	if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
504 		return -EINVAL;
505 
506 	qp = rdma_counter_get_qp(dev, qp_num);
507 	if (!qp)
508 		return -ENOENT;
509 
510 	counter = rdma_get_counter_by_id(dev, counter_id);
511 	if (!counter) {
512 		ret = -ENOENT;
513 		goto err;
514 	}
515 
516 	if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) {
517 		ret = -EINVAL;
518 		goto err_task;
519 	}
520 
521 	if ((counter->device != qp->device) || (counter->port != qp->port)) {
522 		ret = -EINVAL;
523 		goto err_task;
524 	}
525 
526 	ret = __rdma_counter_bind_qp(counter, qp, port);
527 	if (ret)
528 		goto err_task;
529 
530 	rdma_restrack_put(&qp->res);
531 	return 0;
532 
533 err_task:
534 	kref_put(&counter->kref, counter_release);
535 err:
536 	rdma_restrack_put(&qp->res);
537 	return ret;
538 }
539 
540 /*
541  * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it
542  *   The id of new counter is returned in @counter_id
543  */
544 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port,
545 				u32 qp_num, u32 *counter_id)
546 {
547 	struct rdma_port_counter *port_counter;
548 	struct rdma_counter *counter;
549 	struct ib_qp *qp;
550 	int ret;
551 
552 	if (!rdma_is_port_valid(dev, port))
553 		return -EINVAL;
554 
555 	port_counter = &dev->port_data[port].port_counter;
556 	if (!port_counter->hstats)
557 		return -EOPNOTSUPP;
558 
559 	if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
560 		return -EINVAL;
561 
562 	qp = rdma_counter_get_qp(dev, qp_num);
563 	if (!qp)
564 		return -ENOENT;
565 
566 	if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
567 		ret = -EINVAL;
568 		goto err;
569 	}
570 
571 	counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL, true);
572 	if (!counter) {
573 		ret = -ENOMEM;
574 		goto err;
575 	}
576 
577 	if (counter_id)
578 		*counter_id = counter->id;
579 
580 	rdma_restrack_put(&qp->res);
581 	return 0;
582 
583 err:
584 	rdma_restrack_put(&qp->res);
585 	return ret;
586 }
587 
588 /*
589  * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter
590  */
591 int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
592 			    u32 qp_num, u32 counter_id)
593 {
594 	struct rdma_port_counter *port_counter;
595 	struct ib_qp *qp;
596 	int ret;
597 
598 	if (!rdma_is_port_valid(dev, port))
599 		return -EINVAL;
600 
601 	qp = rdma_counter_get_qp(dev, qp_num);
602 	if (!qp)
603 		return -ENOENT;
604 
605 	if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
606 		ret = -EINVAL;
607 		goto out;
608 	}
609 
610 	port_counter = &dev->port_data[port].port_counter;
611 	if (!qp->counter || qp->counter->id != counter_id ||
612 	    port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) {
613 		ret = -EINVAL;
614 		goto out;
615 	}
616 
617 	ret = rdma_counter_unbind_qp(qp, port, false);
618 
619 out:
620 	rdma_restrack_put(&qp->res);
621 	return ret;
622 }
623 
624 int rdma_counter_get_mode(struct ib_device *dev, u32 port,
625 			  enum rdma_nl_counter_mode *mode,
626 			  enum rdma_nl_counter_mask *mask,
627 			  bool *opcnt)
628 {
629 	struct rdma_port_counter *port_counter;
630 
631 	port_counter = &dev->port_data[port].port_counter;
632 	*mode = port_counter->mode.mode;
633 	*mask = port_counter->mode.mask;
634 	*opcnt = port_counter->mode.bind_opcnt;
635 
636 	return 0;
637 }
638 
639 void rdma_counter_init(struct ib_device *dev)
640 {
641 	struct rdma_port_counter *port_counter;
642 	u32 port, i;
643 
644 	if (!dev->port_data)
645 		return;
646 
647 	rdma_for_each_port(dev, port) {
648 		port_counter = &dev->port_data[port].port_counter;
649 		port_counter->mode.mode = RDMA_COUNTER_MODE_NONE;
650 		mutex_init(&port_counter->lock);
651 
652 		if (!dev->ops.alloc_hw_port_stats)
653 			continue;
654 
655 		port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port);
656 		if (!port_counter->hstats)
657 			goto fail;
658 	}
659 
660 	return;
661 
662 fail:
663 	for (i = port; i >= rdma_start_port(dev); i--) {
664 		port_counter = &dev->port_data[port].port_counter;
665 		rdma_free_hw_stats_struct(port_counter->hstats);
666 		port_counter->hstats = NULL;
667 		mutex_destroy(&port_counter->lock);
668 	}
669 }
670 
671 void rdma_counter_release(struct ib_device *dev)
672 {
673 	struct rdma_port_counter *port_counter;
674 	u32 port;
675 
676 	rdma_for_each_port(dev, port) {
677 		port_counter = &dev->port_data[port].port_counter;
678 		rdma_free_hw_stats_struct(port_counter->hstats);
679 		mutex_destroy(&port_counter->lock);
680 	}
681 }
682