xref: /freebsd/sys/dev/mlx5/mlx5_core/mlx5_diagnostics.c (revision 95ee2897e98f5d444f26ed2334cc7c439f9c16c6)
1 /*-
2  * Copyright (c) 2013-2017, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include "opt_rss.h"
27 #include "opt_ratelimit.h"
28 
29 #include <dev/mlx5/driver.h>
30 #include <dev/mlx5/port.h>
31 #include <dev/mlx5/diagnostics.h>
32 #include <dev/mlx5/mlx5_core/mlx5_core.h>
33 #include <net/sff8472.h>
34 
35 const struct mlx5_core_diagnostics_entry
36 	mlx5_core_pci_diagnostics_table[
37 		MLX5_CORE_PCI_DIAGNOSTICS_NUM] = {
38 	MLX5_CORE_PCI_DIAGNOSTICS(MLX5_CORE_DIAGNOSTICS_ENTRY)
39 };
40 
41 const struct mlx5_core_diagnostics_entry
42 	mlx5_core_general_diagnostics_table[
43 		MLX5_CORE_GENERAL_DIAGNOSTICS_NUM] = {
44 	MLX5_CORE_GENERAL_DIAGNOSTICS(MLX5_CORE_DIAGNOSTICS_ENTRY)
45 };
46 
mlx5_core_get_index_of_diag_counter(const struct mlx5_core_diagnostics_entry * entry,int size,u16 counter_id)47 static int mlx5_core_get_index_of_diag_counter(
48 	const struct mlx5_core_diagnostics_entry *entry,
49 	int size, u16 counter_id)
50 {
51 	int x;
52 
53 	/* check for invalid counter ID */
54 	if (counter_id == 0)
55 		return -1;
56 
57 	/* lookup counter ID in table */
58 	for (x = 0; x != size; x++) {
59 		if (entry[x].counter_id == counter_id)
60 			return x;
61 	}
62 	return -1;
63 }
64 
mlx5_core_put_diag_counter(const struct mlx5_core_diagnostics_entry * entry,u64 * array,int size,u16 counter_id,u64 value)65 static void mlx5_core_put_diag_counter(
66 	const struct mlx5_core_diagnostics_entry *entry,
67 	u64 *array, int size, u16 counter_id, u64 value)
68 {
69 	int x;
70 
71 	/* check for invalid counter ID */
72 	if (counter_id == 0)
73 		return;
74 
75 	/* lookup counter ID in table */
76 	for (x = 0; x != size; x++) {
77 		if (entry[x].counter_id == counter_id) {
78 			array[x] = value;
79 			break;
80 		}
81 	}
82 }
83 
mlx5_core_set_diagnostics_full(struct mlx5_core_dev * dev,u8 enable_pci,u8 enable_general)84 int mlx5_core_set_diagnostics_full(struct mlx5_core_dev *dev,
85 				   u8 enable_pci, u8 enable_general)
86 {
87 	void *diag_params_ctx;
88 	void *in;
89 	int numcounters;
90 	int inlen;
91 	int err;
92 	int x;
93 	int y;
94 
95 	if (MLX5_CAP_GEN(dev, debug) == 0)
96 		return 0;
97 
98 	numcounters = MLX5_CAP_GEN(dev, num_of_diagnostic_counters);
99 	if (numcounters == 0)
100 		return 0;
101 
102 	inlen = MLX5_ST_SZ_BYTES(set_diagnostic_params_in) +
103 	    MLX5_ST_SZ_BYTES(diagnostic_counter) * numcounters;
104 	in = mlx5_vzalloc(inlen);
105 	if (in == NULL)
106 		return -ENOMEM;
107 
108 	diag_params_ctx = MLX5_ADDR_OF(set_diagnostic_params_in, in,
109 				       diagnostic_params_ctx);
110 
111 	MLX5_SET(diagnostic_params_context, diag_params_ctx,
112 		 enable, enable_pci || enable_general);
113 	MLX5_SET(diagnostic_params_context, diag_params_ctx,
114 		 single, 1);
115 	MLX5_SET(diagnostic_params_context, diag_params_ctx,
116 		 on_demand, 1);
117 
118 	/* collect the counters we want to enable */
119 	for (x = y = 0; x != numcounters; x++) {
120 		u16 counter_id =
121 			MLX5_CAP_DEBUG(dev, diagnostic_counter[x].counter_id);
122 		int index = -1;
123 
124 		if (index < 0 && enable_pci != 0) {
125 			/* check if counter ID exists in local table */
126 			index = mlx5_core_get_index_of_diag_counter(
127 			    mlx5_core_pci_diagnostics_table,
128 			    MLX5_CORE_PCI_DIAGNOSTICS_NUM,
129 			    counter_id);
130 		}
131 		if (index < 0 && enable_general != 0) {
132 			/* check if counter ID exists in local table */
133 			index = mlx5_core_get_index_of_diag_counter(
134 			    mlx5_core_general_diagnostics_table,
135 			    MLX5_CORE_GENERAL_DIAGNOSTICS_NUM,
136 			    counter_id);
137 		}
138 		if (index < 0)
139 			continue;
140 
141 		MLX5_SET(diagnostic_params_context,
142 			 diag_params_ctx,
143 			 counter_id[y].counter_id,
144 			 counter_id);
145 		y++;
146 	}
147 
148 	/* recompute input length */
149 	inlen = MLX5_ST_SZ_BYTES(set_diagnostic_params_in) +
150 	    MLX5_ST_SZ_BYTES(diagnostic_counter) * y;
151 
152 	/* set number of counters */
153 	MLX5_SET(diagnostic_params_context, diag_params_ctx,
154 		 num_of_counters, y);
155 
156 	/* execute firmware command */
157 	err = mlx5_set_diagnostic_params(dev, in, inlen);
158 
159 	kvfree(in);
160 
161 	return err;
162 }
163 
mlx5_core_get_diagnostics_full(struct mlx5_core_dev * dev,union mlx5_core_pci_diagnostics * pdiag,union mlx5_core_general_diagnostics * pgen)164 int mlx5_core_get_diagnostics_full(struct mlx5_core_dev *dev,
165 				   union mlx5_core_pci_diagnostics *pdiag,
166 				   union mlx5_core_general_diagnostics *pgen)
167 {
168 	void *out;
169 	void *in;
170 	int numcounters;
171 	int outlen;
172 	int inlen;
173 	int err;
174 	int x;
175 
176 	if (MLX5_CAP_GEN(dev, debug) == 0)
177 		return 0;
178 
179 	numcounters = MLX5_CAP_GEN(dev, num_of_diagnostic_counters);
180 	if (numcounters == 0)
181 		return 0;
182 
183 	outlen = MLX5_ST_SZ_BYTES(query_diagnostic_counters_out) +
184 	    MLX5_ST_SZ_BYTES(diagnostic_counter) * numcounters;
185 
186 	out = mlx5_vzalloc(outlen);
187 	if (out == NULL)
188 		return -ENOMEM;
189 
190 	err = mlx5_query_diagnostic_counters(dev, 1, 0, out, outlen);
191 	if (err == 0) {
192 		for (x = 0; x != numcounters; x++) {
193 			u16 counter_id = MLX5_GET(
194 			    query_diagnostic_counters_out,
195 			    out, diag_counter[x].counter_id);
196 			u64 counter_value = MLX5_GET64(
197 			    query_diagnostic_counters_out,
198 			    out, diag_counter[x].counter_value_h);
199 
200 			if (pdiag != NULL) {
201 				mlx5_core_put_diag_counter(
202 				    mlx5_core_pci_diagnostics_table,
203 				    pdiag->array,
204 				    MLX5_CORE_PCI_DIAGNOSTICS_NUM,
205 				    counter_id, counter_value);
206 			}
207 			if (pgen != NULL) {
208 				mlx5_core_put_diag_counter(
209 				    mlx5_core_general_diagnostics_table,
210 				    pgen->array,
211 				    MLX5_CORE_GENERAL_DIAGNOSTICS_NUM,
212 				    counter_id, counter_value);
213 			}
214 		}
215 	}
216 	kvfree(out);
217 
218 	if (pdiag != NULL) {
219 		inlen = MLX5_ST_SZ_BYTES(mpcnt_reg);
220 		outlen = MLX5_ST_SZ_BYTES(mpcnt_reg);
221 
222 		in = mlx5_vzalloc(inlen);
223 		if (in == NULL)
224 			return -ENOMEM;
225 
226 		out = mlx5_vzalloc(outlen);
227 		if (out == NULL) {
228 			kvfree(in);
229 			return -ENOMEM;
230 		}
231 		MLX5_SET(mpcnt_reg, in, grp,
232 			 MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP);
233 
234 		err = mlx5_core_access_reg(dev, in, inlen, out, outlen,
235 					   MLX5_REG_MPCNT, 0, 0);
236 		if (err == 0) {
237 			void *pcounters = MLX5_ADDR_OF(mpcnt_reg, out,
238 			    counter_set.pcie_perf_counters);
239 
240 			pdiag->counter.rx_pci_errors =
241 			    MLX5_GET(pcie_perf_counters,
242 				     pcounters, rx_errors);
243 			pdiag->counter.tx_pci_errors =
244 			    MLX5_GET(pcie_perf_counters,
245 				     pcounters, tx_errors);
246 		}
247 		MLX5_SET(mpcnt_reg, in, grp,
248 			 MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP);
249 
250 		err = mlx5_core_access_reg(dev, in, inlen, out, outlen,
251 		    MLX5_REG_MPCNT, 0, 0);
252 		if (err == 0) {
253 			void *pcounters = MLX5_ADDR_OF(mpcnt_reg, out,
254 			    counter_set.pcie_timers_states);
255 
256 			pdiag->counter.tx_pci_non_fatal_errors =
257 			    MLX5_GET(pcie_timers_states,
258 				     pcounters, non_fatal_err_msg_sent);
259 			pdiag->counter.tx_pci_fatal_errors =
260 			    MLX5_GET(pcie_timers_states,
261 				     pcounters, fatal_err_msg_sent);
262 		}
263 		kvfree(in);
264 		kvfree(out);
265 	}
266 	return 0;
267 }
268 
mlx5_core_supports_diagnostics(struct mlx5_core_dev * dev,u16 counter_id)269 int mlx5_core_supports_diagnostics(struct mlx5_core_dev *dev, u16 counter_id)
270 {
271 	int numcounters;
272 	int x;
273 
274 	if (MLX5_CAP_GEN(dev, debug) == 0)
275 		return 0;
276 
277 	/* check for any counter */
278 	if (counter_id == 0)
279 		return 1;
280 
281 	numcounters = MLX5_CAP_GEN(dev, num_of_diagnostic_counters);
282 
283 	/* check if counter ID exists in debug capability */
284 	for (x = 0; x != numcounters; x++) {
285 		if (MLX5_CAP_DEBUG(dev, diagnostic_counter[x].counter_id) ==
286 		    counter_id)
287 			return 1;
288 	}
289 	return 0;			/* not supported counter */
290 }
291 
292 /*
293  * Read the first three bytes of the eeprom in order to get the needed info
294  * for the whole reading.
295  * Byte 0 - Identifier byte
296  * Byte 1 - Revision byte
297  * Byte 2 - Status byte
298  */
299 int
mlx5_get_eeprom_info(struct mlx5_core_dev * dev,struct mlx5_eeprom * eeprom)300 mlx5_get_eeprom_info(struct mlx5_core_dev *dev, struct mlx5_eeprom *eeprom)
301 {
302 	u32 data = 0;
303 	int size_read = 0;
304 	int ret;
305 
306 	ret = mlx5_query_module_num(dev, &eeprom->module_num);
307 	if (ret) {
308 		mlx5_core_err(dev, "Failed query module error=%d\n", ret);
309 		return (-ret);
310 	}
311 
312 	/* Read the first three bytes to get Identifier, Revision and Status */
313 	ret = mlx5_query_eeprom(dev, eeprom->i2c_addr, eeprom->page_num,
314 	    eeprom->device_addr, MLX5_EEPROM_INFO_BYTES, eeprom->module_num, &data,
315 	    &size_read);
316 	if (ret) {
317 		mlx5_core_err(dev,
318 		    "Failed query EEPROM module error=0x%x\n", ret);
319 		return (-ret);
320 	}
321 
322 	switch (data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK) {
323 	case SFF_8024_ID_QSFP:
324 		eeprom->type = MLX5_ETH_MODULE_SFF_8436;
325 		eeprom->len = MLX5_ETH_MODULE_SFF_8436_LEN;
326 		break;
327 	case SFF_8024_ID_QSFPPLUS:
328 	case SFF_8024_ID_QSFP28:
329 		if ((data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK) == SFF_8024_ID_QSFP28 ||
330 		    ((data & MLX5_EEPROM_REVISION_ID_BYTE_MASK) >> 8) >= 0x3) {
331 			eeprom->type = MLX5_ETH_MODULE_SFF_8636;
332 			eeprom->len = MLX5_ETH_MODULE_SFF_8636_LEN;
333 		} else {
334 			eeprom->type = MLX5_ETH_MODULE_SFF_8436;
335 			eeprom->len = MLX5_ETH_MODULE_SFF_8436_LEN;
336 		}
337 		if ((data & MLX5_EEPROM_PAGE_3_VALID_BIT_MASK) == 0)
338 			eeprom->page_valid = 1;
339 		break;
340 	case SFF_8024_ID_SFP:
341 		eeprom->type = MLX5_ETH_MODULE_SFF_8472;
342 		eeprom->len = MLX5_ETH_MODULE_SFF_8472_LEN;
343 		break;
344 	default:
345 		mlx5_core_err(dev, "Not recognized cable type = 0x%x(%s)\n",
346 		    data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK,
347 		    sff_8024_id[data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK]);
348 		return (EINVAL);
349 	}
350 	return (0);
351 }
352 
353 /* Read both low and high pages of the eeprom */
354 int
mlx5_get_eeprom(struct mlx5_core_dev * dev,struct mlx5_eeprom * ee)355 mlx5_get_eeprom(struct mlx5_core_dev *dev, struct mlx5_eeprom *ee)
356 {
357 	int size_read = 0;
358 	int ret;
359 
360 	if (ee->len == 0)
361 		return (EINVAL);
362 
363 	/* Read low page of the eeprom */
364 	while (ee->device_addr < ee->len) {
365 		ret = mlx5_query_eeprom(dev, ee->i2c_addr, ee->page_num, ee->device_addr,
366 		    ee->len - ee->device_addr, ee->module_num,
367 		    ee->data + (ee->device_addr / 4), &size_read);
368 		if (ret) {
369 			mlx5_core_err(dev,
370 			    "Failed reading EEPROM, error = 0x%02x\n", ret);
371 			return (-ret);
372 		}
373 		ee->device_addr += size_read;
374 	}
375 
376 	/* Read high page of the eeprom */
377 	if (ee->page_valid == 1) {
378 		ee->device_addr = MLX5_EEPROM_HIGH_PAGE_OFFSET;
379 		ee->page_num = MLX5_EEPROM_HIGH_PAGE;
380 		size_read = 0;
381 		while (ee->device_addr < MLX5_EEPROM_PAGE_LENGTH) {
382 			ret = mlx5_query_eeprom(dev, ee->i2c_addr, ee->page_num,
383 			    ee->device_addr, MLX5_EEPROM_PAGE_LENGTH - ee->device_addr,
384 			    ee->module_num, ee->data + (ee->len / 4) +
385 			    ((ee->device_addr - MLX5_EEPROM_HIGH_PAGE_OFFSET) / 4),
386 			    &size_read);
387 			if (ret) {
388 				mlx5_core_err(dev,
389 				    "Failed reading EEPROM, error = 0x%02x\n",
390 				    ret);
391 				return (-ret);
392 			}
393 			ee->device_addr += size_read;
394 		}
395 	}
396 	return (0);
397 }
398 
399 /*
400  * Read cable EEPROM module information by first inspecting the first
401  * three bytes to get the initial information for a whole reading.
402  * Information will be printed to dmesg.
403  */
404 int
mlx5_read_eeprom(struct mlx5_core_dev * dev,struct mlx5_eeprom * eeprom)405 mlx5_read_eeprom(struct mlx5_core_dev *dev, struct mlx5_eeprom *eeprom)
406 {
407 	int error;
408 
409 	eeprom->i2c_addr = MLX5_I2C_ADDR_LOW;
410 	eeprom->device_addr = 0;
411 	eeprom->page_num = MLX5_EEPROM_LOW_PAGE;
412 	eeprom->page_valid = 0;
413 
414 	/* Read three first bytes to get important info */
415 	error = mlx5_get_eeprom_info(dev, eeprom);
416 	if (error) {
417 		mlx5_core_err(dev,
418 		    "Failed reading EEPROM initial information\n");
419 		return (error);
420 	}
421 	/*
422 	 * Allocate needed length buffer and additional space for
423 	 * page 0x03
424 	 */
425 	eeprom->data = malloc(eeprom->len + MLX5_EEPROM_PAGE_LENGTH,
426 	    M_MLX5_EEPROM, M_WAITOK | M_ZERO);
427 
428 	/* Read the whole eeprom information */
429 	error = mlx5_get_eeprom(dev, eeprom);
430 	if (error) {
431 		mlx5_core_err(dev, "Failed reading EEPROM\n");
432 		error = 0;
433 		/*
434 		 * Continue printing partial information in case of
435 		 * an error
436 		 */
437 	}
438 	free(eeprom->data, M_MLX5_EEPROM);
439 
440 	return (error);
441 }
442 
443 
444