xref: /linux/drivers/net/ethernet/intel/ice/devlink/health.c (revision 816b02e63a759c4458edee142b721ab09c918b3d)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2024, Intel Corporation. */
3 
4 #include "health.h"
5 #include "ice.h"
6 
7 #define ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, obj, name) \
8 	devlink_fmsg_put(fmsg, #name, (obj)->name)
9 
10 /**
11  * ice_devlink_health_report - boilerplate to call given @reporter
12  *
13  * @reporter: devlink health reporter to call, do nothing on NULL
14  * @msg: message to pass up, "event name" is fine
15  * @priv_ctx: typically some event struct
16  */
17 static void ice_devlink_health_report(struct devlink_health_reporter *reporter,
18 				      const char *msg, void *priv_ctx)
19 {
20 	if (!reporter)
21 		return;
22 
23 	/* We do not do auto recovering, so return value of the below function
24 	 * will always be 0, thus we do ignore it.
25 	 */
26 	devlink_health_report(reporter, msg, priv_ctx);
27 }
28 
29 struct ice_mdd_event {
30 	enum ice_mdd_src src;
31 	u16 vf_num;
32 	u16 queue;
33 	u8 pf_num;
34 	u8 event;
35 };
36 
37 static const char *ice_mdd_src_to_str(enum ice_mdd_src src)
38 {
39 	switch (src) {
40 	case ICE_MDD_SRC_TX_PQM:
41 		return "tx_pqm";
42 	case ICE_MDD_SRC_TX_TCLAN:
43 		return "tx_tclan";
44 	case ICE_MDD_SRC_TX_TDPU:
45 		return "tx_tdpu";
46 	case ICE_MDD_SRC_RX:
47 		return "rx";
48 	default:
49 		return "invalid";
50 	}
51 }
52 
53 static int
54 ice_mdd_reporter_dump(struct devlink_health_reporter *reporter,
55 		      struct devlink_fmsg *fmsg, void *priv_ctx,
56 		      struct netlink_ext_ack *extack)
57 {
58 	struct ice_mdd_event *mdd_event = priv_ctx;
59 	const char *src;
60 
61 	if (!mdd_event)
62 		return 0;
63 
64 	src = ice_mdd_src_to_str(mdd_event->src);
65 
66 	devlink_fmsg_obj_nest_start(fmsg);
67 	devlink_fmsg_put(fmsg, "src", src);
68 	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, pf_num);
69 	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, vf_num);
70 	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, event);
71 	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, queue);
72 	devlink_fmsg_obj_nest_end(fmsg);
73 
74 	return 0;
75 }
76 
77 /**
78  * ice_report_mdd_event - Report an MDD event through devlink health
79  * @pf: the PF device structure
80  * @src: the HW block that was the source of this MDD event
81  * @pf_num: the pf_num on which the MDD event occurred
82  * @vf_num: the vf_num on which the MDD event occurred
83  * @event: the event type of the MDD event
84  * @queue: the queue on which the MDD event occurred
85  *
86  * Report an MDD event that has occurred on this PF.
87  */
88 void ice_report_mdd_event(struct ice_pf *pf, enum ice_mdd_src src, u8 pf_num,
89 			  u16 vf_num, u8 event, u16 queue)
90 {
91 	struct ice_mdd_event ev = {
92 		.src = src,
93 		.pf_num = pf_num,
94 		.vf_num = vf_num,
95 		.event = event,
96 		.queue = queue,
97 	};
98 
99 	ice_devlink_health_report(pf->health_reporters.mdd, "MDD event", &ev);
100 }
101 
102 /**
103  * ice_fmsg_put_ptr - put hex value of pointer into fmsg
104  *
105  * @fmsg: devlink fmsg under construction
106  * @name: name to pass
107  * @ptr: 64 bit value to print as hex and put into fmsg
108  */
109 static void ice_fmsg_put_ptr(struct devlink_fmsg *fmsg, const char *name,
110 			     void *ptr)
111 {
112 	char buf[sizeof(ptr) * 3];
113 
114 	sprintf(buf, "%p", ptr);
115 	devlink_fmsg_put(fmsg, name, buf);
116 }
117 
118 struct ice_tx_hang_event {
119 	u32 head;
120 	u32 intr;
121 	u16 vsi_num;
122 	u16 queue;
123 	u16 next_to_clean;
124 	u16 next_to_use;
125 	struct ice_tx_ring *tx_ring;
126 };
127 
128 static int ice_tx_hang_reporter_dump(struct devlink_health_reporter *reporter,
129 				     struct devlink_fmsg *fmsg, void *priv_ctx,
130 				     struct netlink_ext_ack *extack)
131 {
132 	struct ice_tx_hang_event *event = priv_ctx;
133 	struct sk_buff *skb;
134 
135 	if (!event)
136 		return 0;
137 
138 	skb = event->tx_ring->tx_buf->skb;
139 	devlink_fmsg_obj_nest_start(fmsg);
140 	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, head);
141 	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, intr);
142 	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, vsi_num);
143 	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, queue);
144 	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_clean);
145 	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_use);
146 	devlink_fmsg_put(fmsg, "irq-mapping", event->tx_ring->q_vector->name);
147 	ice_fmsg_put_ptr(fmsg, "desc-ptr", event->tx_ring->desc);
148 	ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)(long)event->tx_ring->dma);
149 	ice_fmsg_put_ptr(fmsg, "skb-ptr", skb);
150 	devlink_fmsg_binary_pair_put(fmsg, "desc", event->tx_ring->desc,
151 				     event->tx_ring->count * sizeof(struct ice_tx_desc));
152 	devlink_fmsg_dump_skb(fmsg, skb);
153 	devlink_fmsg_obj_nest_end(fmsg);
154 
155 	return 0;
156 }
157 
158 void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring,
159 			     u16 vsi_num, u32 head, u32 intr)
160 {
161 	struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf;
162 
163 	buf->tx_ring = tx_ring;
164 	buf->vsi_num = vsi_num;
165 	buf->head = head;
166 	buf->intr = intr;
167 }
168 
169 void ice_report_tx_hang(struct ice_pf *pf)
170 {
171 	struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf;
172 	struct ice_tx_ring *tx_ring = buf->tx_ring;
173 
174 	struct ice_tx_hang_event ev = {
175 		.head = buf->head,
176 		.intr = buf->intr,
177 		.vsi_num = buf->vsi_num,
178 		.queue = tx_ring->q_index,
179 		.next_to_clean = tx_ring->next_to_clean,
180 		.next_to_use = tx_ring->next_to_use,
181 		.tx_ring = tx_ring,
182 	};
183 
184 	ice_devlink_health_report(pf->health_reporters.tx_hang, "Tx hang", &ev);
185 }
186 
187 static struct devlink_health_reporter *
188 ice_init_devlink_rep(struct ice_pf *pf,
189 		     const struct devlink_health_reporter_ops *ops)
190 {
191 	struct devlink *devlink = priv_to_devlink(pf);
192 	struct devlink_health_reporter *rep;
193 	const u64 graceful_period = 0;
194 
195 	rep = devl_health_reporter_create(devlink, ops, graceful_period, pf);
196 	if (IS_ERR(rep)) {
197 		struct device *dev = ice_pf_to_dev(pf);
198 
199 		dev_err(dev, "failed to create devlink %s health report er",
200 			ops->name);
201 		return NULL;
202 	}
203 	return rep;
204 }
205 
206 #define ICE_DEFINE_HEALTH_REPORTER_OPS(_name) \
207 	static const struct devlink_health_reporter_ops ice_ ## _name ## _reporter_ops = { \
208 	.name = #_name, \
209 	.dump = ice_ ## _name ## _reporter_dump, \
210 }
211 
212 ICE_DEFINE_HEALTH_REPORTER_OPS(mdd);
213 ICE_DEFINE_HEALTH_REPORTER_OPS(tx_hang);
214 
215 /**
216  * ice_health_init - allocate and init all ice devlink health reporters and
217  * accompanied data
218  *
219  * @pf: PF struct
220  */
221 void ice_health_init(struct ice_pf *pf)
222 {
223 	struct ice_health *reps = &pf->health_reporters;
224 
225 	reps->mdd = ice_init_devlink_rep(pf, &ice_mdd_reporter_ops);
226 	reps->tx_hang = ice_init_devlink_rep(pf, &ice_tx_hang_reporter_ops);
227 }
228 
229 /**
230  * ice_deinit_devl_reporter - destroy given devlink health reporter
231  * @reporter: reporter to destroy
232  */
233 static void ice_deinit_devl_reporter(struct devlink_health_reporter *reporter)
234 {
235 	if (reporter)
236 		devl_health_reporter_destroy(reporter);
237 }
238 
239 /**
240  * ice_health_deinit - deallocate all ice devlink health reporters and
241  * accompanied data
242  *
243  * @pf: PF struct
244  */
245 void ice_health_deinit(struct ice_pf *pf)
246 {
247 	ice_deinit_devl_reporter(pf->health_reporters.mdd);
248 	ice_deinit_devl_reporter(pf->health_reporters.tx_hang);
249 }
250 
251 static
252 void ice_health_assign_healthy_state(struct devlink_health_reporter *reporter)
253 {
254 	if (reporter)
255 		devlink_health_reporter_state_update(reporter,
256 						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
257 }
258 
259 /**
260  * ice_health_clear - clear devlink health issues after a reset
261  * @pf: the PF device structure
262  *
263  * Mark the PF in healthy state again after a reset has completed.
264  */
265 void ice_health_clear(struct ice_pf *pf)
266 {
267 	ice_health_assign_healthy_state(pf->health_reporters.mdd);
268 	ice_health_assign_healthy_state(pf->health_reporters.tx_hang);
269 }
270