1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 // Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
3
4 #include "en.h"
5 #include "pcie_cong_event.h"
6
7 #define MLX5E_CONG_HIGH_STATE 0x7
8
9 enum {
10 MLX5E_INBOUND_CONG = BIT(0),
11 MLX5E_OUTBOUND_CONG = BIT(1),
12 };
13
14 struct mlx5e_pcie_cong_thresh {
15 u16 inbound_high;
16 u16 inbound_low;
17 u16 outbound_high;
18 u16 outbound_low;
19 };
20
21 struct mlx5e_pcie_cong_stats {
22 u32 pci_bw_inbound_high;
23 u32 pci_bw_inbound_low;
24 u32 pci_bw_outbound_high;
25 u32 pci_bw_outbound_low;
26 };
27
28 struct mlx5e_pcie_cong_event {
29 u64 obj_id;
30
31 struct mlx5e_priv *priv;
32
33 /* For event notifier and workqueue. */
34 struct work_struct work;
35 struct mlx5_nb nb;
36
37 /* Stores last read state. */
38 u8 state;
39
40 /* For ethtool stats group. */
41 struct mlx5e_pcie_cong_stats stats;
42 };
43
44 /* In units of 0.01 % */
45 static const struct mlx5e_pcie_cong_thresh default_thresh_config = {
46 .inbound_high = 9000,
47 .inbound_low = 7500,
48 .outbound_high = 9000,
49 .outbound_low = 7500,
50 };
51
52 static const struct counter_desc mlx5e_pcie_cong_stats_desc[] = {
53 { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
54 pci_bw_inbound_high) },
55 { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
56 pci_bw_inbound_low) },
57 { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
58 pci_bw_outbound_high) },
59 { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
60 pci_bw_outbound_low) },
61 };
62
63 #define NUM_PCIE_CONG_COUNTERS ARRAY_SIZE(mlx5e_pcie_cong_stats_desc)
64
MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie_cong)65 static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie_cong)
66 {
67 return priv->cong_event ? NUM_PCIE_CONG_COUNTERS : 0;
68 }
69
MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie_cong)70 static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie_cong) {}
71
MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie_cong)72 static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie_cong)
73 {
74 if (!priv->cong_event)
75 return;
76
77 for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++)
78 ethtool_puts(data, mlx5e_pcie_cong_stats_desc[i].format);
79 }
80
MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie_cong)81 static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie_cong)
82 {
83 if (!priv->cong_event)
84 return;
85
86 for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++) {
87 u32 ctr = MLX5E_READ_CTR32_CPU(&priv->cong_event->stats,
88 mlx5e_pcie_cong_stats_desc,
89 i);
90
91 mlx5e_ethtool_put_stat(data, ctr);
92 }
93 }
94
95 MLX5E_DEFINE_STATS_GRP(pcie_cong, 0);
96
97 static int
mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev * dev,const struct mlx5e_pcie_cong_thresh * config,u64 * obj_id)98 mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev,
99 const struct mlx5e_pcie_cong_thresh *config,
100 u64 *obj_id)
101 {
102 u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
103 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
104 void *cong_obj;
105 void *hdr;
106 int err;
107
108 hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
109 cong_obj = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, cong_obj);
110
111 MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
112 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
113
114 MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
115 MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
116
117 MLX5_SET(pcie_cong_event_obj, cong_obj, inbound_event_en, 1);
118 MLX5_SET(pcie_cong_event_obj, cong_obj, outbound_event_en, 1);
119
120 MLX5_SET(pcie_cong_event_obj, cong_obj,
121 inbound_cong_high_threshold, config->inbound_high);
122 MLX5_SET(pcie_cong_event_obj, cong_obj,
123 inbound_cong_low_threshold, config->inbound_low);
124
125 MLX5_SET(pcie_cong_event_obj, cong_obj,
126 outbound_cong_high_threshold, config->outbound_high);
127 MLX5_SET(pcie_cong_event_obj, cong_obj,
128 outbound_cong_low_threshold, config->outbound_low);
129
130 err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
131 if (err)
132 return err;
133
134 *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
135
136 mlx5_core_dbg(dev, "PCIe congestion event (obj_id=%llu) created. Config: in: [%u, %u], out: [%u, %u]\n",
137 *obj_id,
138 config->inbound_high, config->inbound_low,
139 config->outbound_high, config->outbound_low);
140
141 return 0;
142 }
143
mlx5_cmd_pcie_cong_event_destroy(struct mlx5_core_dev * dev,u64 obj_id)144 static int mlx5_cmd_pcie_cong_event_destroy(struct mlx5_core_dev *dev,
145 u64 obj_id)
146 {
147 u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
148 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
149 void *hdr;
150
151 hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
152 MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
153 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
154 MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
155 MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
156 MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id);
157
158 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
159 }
160
mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev * dev,u64 obj_id,u32 * state)161 static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev,
162 u64 obj_id,
163 u32 *state)
164 {
165 u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
166 u32 out[MLX5_ST_SZ_DW(pcie_cong_event_cmd_out)];
167 void *obj;
168 void *hdr;
169 u8 cong;
170 int err;
171
172 hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
173
174 MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
175 MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
176 MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
177 MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
178 MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id);
179
180 err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
181 if (err)
182 return err;
183
184 obj = MLX5_ADDR_OF(pcie_cong_event_cmd_out, out, cong_obj);
185
186 if (state) {
187 cong = MLX5_GET(pcie_cong_event_obj, obj, inbound_cong_state);
188 if (cong == MLX5E_CONG_HIGH_STATE)
189 *state |= MLX5E_INBOUND_CONG;
190
191 cong = MLX5_GET(pcie_cong_event_obj, obj, outbound_cong_state);
192 if (cong == MLX5E_CONG_HIGH_STATE)
193 *state |= MLX5E_OUTBOUND_CONG;
194 }
195
196 return 0;
197 }
198
mlx5e_pcie_cong_event_work(struct work_struct * work)199 static void mlx5e_pcie_cong_event_work(struct work_struct *work)
200 {
201 struct mlx5e_pcie_cong_event *cong_event;
202 struct mlx5_core_dev *dev;
203 struct mlx5e_priv *priv;
204 u32 new_cong_state = 0;
205 u32 changes;
206 int err;
207
208 cong_event = container_of(work, struct mlx5e_pcie_cong_event, work);
209 priv = cong_event->priv;
210 dev = priv->mdev;
211
212 err = mlx5_cmd_pcie_cong_event_query(dev, cong_event->obj_id,
213 &new_cong_state);
214 if (err) {
215 mlx5_core_warn(dev, "Error %d when querying PCIe cong event object (obj_id=%llu).\n",
216 err, cong_event->obj_id);
217 return;
218 }
219
220 changes = cong_event->state ^ new_cong_state;
221 if (!changes)
222 return;
223
224 cong_event->state = new_cong_state;
225
226 if (changes & MLX5E_INBOUND_CONG) {
227 if (new_cong_state & MLX5E_INBOUND_CONG)
228 cong_event->stats.pci_bw_inbound_high++;
229 else
230 cong_event->stats.pci_bw_inbound_low++;
231 }
232
233 if (changes & MLX5E_OUTBOUND_CONG) {
234 if (new_cong_state & MLX5E_OUTBOUND_CONG)
235 cong_event->stats.pci_bw_outbound_high++;
236 else
237 cong_event->stats.pci_bw_outbound_low++;
238 }
239 }
240
mlx5e_pcie_cong_event_handler(struct notifier_block * nb,unsigned long event,void * eqe)241 static int mlx5e_pcie_cong_event_handler(struct notifier_block *nb,
242 unsigned long event, void *eqe)
243 {
244 struct mlx5e_pcie_cong_event *cong_event;
245
246 cong_event = mlx5_nb_cof(nb, struct mlx5e_pcie_cong_event, nb);
247 queue_work(cong_event->priv->wq, &cong_event->work);
248
249 return NOTIFY_OK;
250 }
251
mlx5e_pcie_cong_event_init(struct mlx5e_priv * priv)252 int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv)
253 {
254 struct mlx5e_pcie_cong_event *cong_event;
255 struct mlx5_core_dev *mdev = priv->mdev;
256 int err;
257
258 if (!mlx5_pcie_cong_event_supported(mdev))
259 return 0;
260
261 cong_event = kvzalloc_node(sizeof(*cong_event), GFP_KERNEL,
262 mdev->priv.numa_node);
263 if (!cong_event)
264 return -ENOMEM;
265
266 INIT_WORK(&cong_event->work, mlx5e_pcie_cong_event_work);
267 MLX5_NB_INIT(&cong_event->nb, mlx5e_pcie_cong_event_handler,
268 OBJECT_CHANGE);
269
270 cong_event->priv = priv;
271
272 err = mlx5_cmd_pcie_cong_event_set(mdev, &default_thresh_config,
273 &cong_event->obj_id);
274 if (err) {
275 mlx5_core_warn(mdev, "Error creating a PCIe congestion event object\n");
276 goto err_free;
277 }
278
279 err = mlx5_eq_notifier_register(mdev, &cong_event->nb);
280 if (err) {
281 mlx5_core_warn(mdev, "Error registering notifier for the PCIe congestion event\n");
282 goto err_obj_destroy;
283 }
284
285 priv->cong_event = cong_event;
286
287 return 0;
288
289 err_obj_destroy:
290 mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id);
291 err_free:
292 kvfree(cong_event);
293
294 return err;
295 }
296
mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv * priv)297 void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv)
298 {
299 struct mlx5e_pcie_cong_event *cong_event = priv->cong_event;
300 struct mlx5_core_dev *mdev = priv->mdev;
301
302 if (!cong_event)
303 return;
304
305 priv->cong_event = NULL;
306
307 mlx5_eq_notifier_unregister(mdev, &cong_event->nb);
308 cancel_work_sync(&cong_event->work);
309
310 if (mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id))
311 mlx5_core_warn(mdev, "Error destroying PCIe congestion event (obj_id=%llu)\n",
312 cong_event->obj_id);
313
314 kvfree(cong_event);
315 }
316