xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c (revision 8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 // Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
3 
4 #include "en.h"
5 #include "pcie_cong_event.h"
6 
7 #define MLX5E_CONG_HIGH_STATE 0x7
8 
9 enum {
10 	MLX5E_INBOUND_CONG  = BIT(0),
11 	MLX5E_OUTBOUND_CONG = BIT(1),
12 };
13 
14 struct mlx5e_pcie_cong_thresh {
15 	u16 inbound_high;
16 	u16 inbound_low;
17 	u16 outbound_high;
18 	u16 outbound_low;
19 };
20 
21 struct mlx5e_pcie_cong_stats {
22 	u32 pci_bw_inbound_high;
23 	u32 pci_bw_inbound_low;
24 	u32 pci_bw_outbound_high;
25 	u32 pci_bw_outbound_low;
26 };
27 
28 struct mlx5e_pcie_cong_event {
29 	u64 obj_id;
30 
31 	struct mlx5e_priv *priv;
32 
33 	/* For event notifier and workqueue. */
34 	struct work_struct work;
35 	struct mlx5_nb nb;
36 
37 	/* Stores last read state. */
38 	u8 state;
39 
40 	/* For ethtool stats group. */
41 	struct mlx5e_pcie_cong_stats stats;
42 };
43 
44 /* In units of 0.01 % */
45 static const struct mlx5e_pcie_cong_thresh default_thresh_config = {
46 	.inbound_high = 9000,
47 	.inbound_low = 7500,
48 	.outbound_high = 9000,
49 	.outbound_low = 7500,
50 };
51 
52 static const struct counter_desc mlx5e_pcie_cong_stats_desc[] = {
53 	{ MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
54 			     pci_bw_inbound_high) },
55 	{ MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
56 			     pci_bw_inbound_low) },
57 	{ MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
58 			     pci_bw_outbound_high) },
59 	{ MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
60 			     pci_bw_outbound_low) },
61 };
62 
63 #define NUM_PCIE_CONG_COUNTERS ARRAY_SIZE(mlx5e_pcie_cong_stats_desc)
64 
MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie_cong)65 static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie_cong)
66 {
67 	return priv->cong_event ? NUM_PCIE_CONG_COUNTERS : 0;
68 }
69 
MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie_cong)70 static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie_cong) {}
71 
MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie_cong)72 static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie_cong)
73 {
74 	if (!priv->cong_event)
75 		return;
76 
77 	for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++)
78 		ethtool_puts(data, mlx5e_pcie_cong_stats_desc[i].format);
79 }
80 
MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie_cong)81 static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie_cong)
82 {
83 	if (!priv->cong_event)
84 		return;
85 
86 	for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++) {
87 		u32 ctr = MLX5E_READ_CTR32_CPU(&priv->cong_event->stats,
88 					       mlx5e_pcie_cong_stats_desc,
89 					       i);
90 
91 		mlx5e_ethtool_put_stat(data, ctr);
92 	}
93 }
94 
95 MLX5E_DEFINE_STATS_GRP(pcie_cong, 0);
96 
97 static int
mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev * dev,const struct mlx5e_pcie_cong_thresh * config,u64 * obj_id)98 mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev,
99 			     const struct mlx5e_pcie_cong_thresh *config,
100 			     u64 *obj_id)
101 {
102 	u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
103 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
104 	void *cong_obj;
105 	void *hdr;
106 	int err;
107 
108 	hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
109 	cong_obj = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, cong_obj);
110 
111 	MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
112 		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
113 
114 	MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
115 		 MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
116 
117 	MLX5_SET(pcie_cong_event_obj, cong_obj, inbound_event_en, 1);
118 	MLX5_SET(pcie_cong_event_obj, cong_obj, outbound_event_en, 1);
119 
120 	MLX5_SET(pcie_cong_event_obj, cong_obj,
121 		 inbound_cong_high_threshold, config->inbound_high);
122 	MLX5_SET(pcie_cong_event_obj, cong_obj,
123 		 inbound_cong_low_threshold, config->inbound_low);
124 
125 	MLX5_SET(pcie_cong_event_obj, cong_obj,
126 		 outbound_cong_high_threshold, config->outbound_high);
127 	MLX5_SET(pcie_cong_event_obj, cong_obj,
128 		 outbound_cong_low_threshold, config->outbound_low);
129 
130 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
131 	if (err)
132 		return err;
133 
134 	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
135 
136 	mlx5_core_dbg(dev, "PCIe congestion event (obj_id=%llu) created. Config: in: [%u, %u], out: [%u, %u]\n",
137 		      *obj_id,
138 		      config->inbound_high, config->inbound_low,
139 		      config->outbound_high, config->outbound_low);
140 
141 	return 0;
142 }
143 
mlx5_cmd_pcie_cong_event_destroy(struct mlx5_core_dev * dev,u64 obj_id)144 static int mlx5_cmd_pcie_cong_event_destroy(struct mlx5_core_dev *dev,
145 					    u64 obj_id)
146 {
147 	u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
148 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
149 	void *hdr;
150 
151 	hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
152 	MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
153 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
154 	MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
155 		 MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
156 	MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id);
157 
158 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
159 }
160 
mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev * dev,u64 obj_id,u32 * state)161 static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev,
162 					  u64 obj_id,
163 					  u32 *state)
164 {
165 	u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
166 	u32 out[MLX5_ST_SZ_DW(pcie_cong_event_cmd_out)];
167 	void *obj;
168 	void *hdr;
169 	u8 cong;
170 	int err;
171 
172 	hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
173 
174 	MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
175 		 MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
176 	MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
177 		 MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
178 	MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id);
179 
180 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
181 	if (err)
182 		return err;
183 
184 	obj = MLX5_ADDR_OF(pcie_cong_event_cmd_out, out, cong_obj);
185 
186 	if (state) {
187 		cong = MLX5_GET(pcie_cong_event_obj, obj, inbound_cong_state);
188 		if (cong == MLX5E_CONG_HIGH_STATE)
189 			*state |= MLX5E_INBOUND_CONG;
190 
191 		cong = MLX5_GET(pcie_cong_event_obj, obj, outbound_cong_state);
192 		if (cong == MLX5E_CONG_HIGH_STATE)
193 			*state |= MLX5E_OUTBOUND_CONG;
194 	}
195 
196 	return 0;
197 }
198 
mlx5e_pcie_cong_event_work(struct work_struct * work)199 static void mlx5e_pcie_cong_event_work(struct work_struct *work)
200 {
201 	struct mlx5e_pcie_cong_event *cong_event;
202 	struct mlx5_core_dev *dev;
203 	struct mlx5e_priv *priv;
204 	u32 new_cong_state = 0;
205 	u32 changes;
206 	int err;
207 
208 	cong_event = container_of(work, struct mlx5e_pcie_cong_event, work);
209 	priv = cong_event->priv;
210 	dev = priv->mdev;
211 
212 	err = mlx5_cmd_pcie_cong_event_query(dev, cong_event->obj_id,
213 					     &new_cong_state);
214 	if (err) {
215 		mlx5_core_warn(dev, "Error %d when querying PCIe cong event object (obj_id=%llu).\n",
216 			       err, cong_event->obj_id);
217 		return;
218 	}
219 
220 	changes = cong_event->state ^ new_cong_state;
221 	if (!changes)
222 		return;
223 
224 	cong_event->state = new_cong_state;
225 
226 	if (changes & MLX5E_INBOUND_CONG) {
227 		if (new_cong_state & MLX5E_INBOUND_CONG)
228 			cong_event->stats.pci_bw_inbound_high++;
229 		else
230 			cong_event->stats.pci_bw_inbound_low++;
231 	}
232 
233 	if (changes & MLX5E_OUTBOUND_CONG) {
234 		if (new_cong_state & MLX5E_OUTBOUND_CONG)
235 			cong_event->stats.pci_bw_outbound_high++;
236 		else
237 			cong_event->stats.pci_bw_outbound_low++;
238 	}
239 }
240 
mlx5e_pcie_cong_event_handler(struct notifier_block * nb,unsigned long event,void * eqe)241 static int mlx5e_pcie_cong_event_handler(struct notifier_block *nb,
242 					 unsigned long event, void *eqe)
243 {
244 	struct mlx5e_pcie_cong_event *cong_event;
245 
246 	cong_event = mlx5_nb_cof(nb, struct mlx5e_pcie_cong_event, nb);
247 	queue_work(cong_event->priv->wq, &cong_event->work);
248 
249 	return NOTIFY_OK;
250 }
251 
mlx5e_pcie_cong_event_init(struct mlx5e_priv * priv)252 int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv)
253 {
254 	struct mlx5e_pcie_cong_event *cong_event;
255 	struct mlx5_core_dev *mdev = priv->mdev;
256 	int err;
257 
258 	if (!mlx5_pcie_cong_event_supported(mdev))
259 		return 0;
260 
261 	cong_event = kvzalloc_node(sizeof(*cong_event), GFP_KERNEL,
262 				   mdev->priv.numa_node);
263 	if (!cong_event)
264 		return -ENOMEM;
265 
266 	INIT_WORK(&cong_event->work, mlx5e_pcie_cong_event_work);
267 	MLX5_NB_INIT(&cong_event->nb, mlx5e_pcie_cong_event_handler,
268 		     OBJECT_CHANGE);
269 
270 	cong_event->priv = priv;
271 
272 	err = mlx5_cmd_pcie_cong_event_set(mdev, &default_thresh_config,
273 					   &cong_event->obj_id);
274 	if (err) {
275 		mlx5_core_warn(mdev, "Error creating a PCIe congestion event object\n");
276 		goto err_free;
277 	}
278 
279 	err = mlx5_eq_notifier_register(mdev, &cong_event->nb);
280 	if (err) {
281 		mlx5_core_warn(mdev, "Error registering notifier for the PCIe congestion event\n");
282 		goto err_obj_destroy;
283 	}
284 
285 	priv->cong_event = cong_event;
286 
287 	return 0;
288 
289 err_obj_destroy:
290 	mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id);
291 err_free:
292 	kvfree(cong_event);
293 
294 	return err;
295 }
296 
mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv * priv)297 void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv)
298 {
299 	struct mlx5e_pcie_cong_event *cong_event = priv->cong_event;
300 	struct mlx5_core_dev *mdev = priv->mdev;
301 
302 	if (!cong_event)
303 		return;
304 
305 	priv->cong_event = NULL;
306 
307 	mlx5_eq_notifier_unregister(mdev, &cong_event->nb);
308 	cancel_work_sync(&cong_event->work);
309 
310 	if (mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id))
311 		mlx5_core_warn(mdev, "Error destroying PCIe congestion event (obj_id=%llu)\n",
312 			       cong_event->obj_id);
313 
314 	kvfree(cong_event);
315 }
316