xref: /linux/drivers/infiniband/hw/hfi1/fault.c (revision 22c5696e3fe029f4fc2decbe7cc6663b5d281223)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 /*
3  * Copyright(c) 2018 Intel Corporation.
4  */
5 
6 #include <linux/debugfs.h>
7 #include <linux/seq_file.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/bitmap.h>
11 
12 #include "debugfs.h"
13 #include "fault.h"
14 #include "trace.h"
15 
16 #define HFI1_FAULT_DIR_TX   BIT(0)
17 #define HFI1_FAULT_DIR_RX   BIT(1)
18 #define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX)
19 
_fault_stats_seq_start(struct seq_file * s,loff_t * pos)20 static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
21 {
22 	struct hfi1_opcode_stats_perctx *opstats;
23 
24 	if (*pos >= ARRAY_SIZE(opstats->stats))
25 		return NULL;
26 	return pos;
27 }
28 
_fault_stats_seq_next(struct seq_file * s,void * v,loff_t * pos)29 static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
30 {
31 	struct hfi1_opcode_stats_perctx *opstats;
32 
33 	++*pos;
34 	if (*pos >= ARRAY_SIZE(opstats->stats))
35 		return NULL;
36 	return pos;
37 }
38 
_fault_stats_seq_stop(struct seq_file * s,void * v)39 static void _fault_stats_seq_stop(struct seq_file *s, void *v)
40 {
41 }
42 
_fault_stats_seq_show(struct seq_file * s,void * v)43 static int _fault_stats_seq_show(struct seq_file *s, void *v)
44 {
45 	loff_t *spos = v;
46 	loff_t i = *spos, j;
47 	u64 n_packets = 0, n_bytes = 0;
48 	struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
49 	struct hfi1_devdata *dd = dd_from_dev(ibd);
50 	struct hfi1_ctxtdata *rcd;
51 
52 	for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
53 		rcd = hfi1_rcd_get_by_index(dd, j);
54 		if (rcd) {
55 			n_packets += rcd->opstats->stats[i].n_packets;
56 			n_bytes += rcd->opstats->stats[i].n_bytes;
57 		}
58 		hfi1_rcd_put(rcd);
59 	}
60 	for_each_possible_cpu(j) {
61 		struct hfi1_opcode_stats_perctx *sp =
62 			per_cpu_ptr(dd->tx_opstats, j);
63 
64 		n_packets += sp->stats[i].n_packets;
65 		n_bytes += sp->stats[i].n_bytes;
66 	}
67 	if (!n_packets && !n_bytes)
68 		return SEQ_SKIP;
69 	if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i])
70 		return SEQ_SKIP;
71 	seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
72 		   (unsigned long long)n_packets,
73 		   (unsigned long long)n_bytes,
74 		   (unsigned long long)ibd->fault->n_rxfaults[i],
75 		   (unsigned long long)ibd->fault->n_txfaults[i]);
76 	return 0;
77 }
78 
79 DEBUGFS_SEQ_FILE_OPS(fault_stats);
80 DEBUGFS_SEQ_FILE_OPEN(fault_stats);
81 DEBUGFS_FILE_OPS(fault_stats);
82 
fault_opcodes_open(struct inode * inode,struct file * file)83 static int fault_opcodes_open(struct inode *inode, struct file *file)
84 {
85 	file->private_data = inode->i_private;
86 	return nonseekable_open(inode, file);
87 }
88 
fault_opcodes_write(struct file * file,const char __user * buf,size_t len,loff_t * pos)89 static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
90 				   size_t len, loff_t *pos)
91 {
92 	ssize_t ret = 0;
93 	/* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */
94 	size_t copy, datalen = 1280;
95 	char *data, *token, *ptr, *end;
96 	struct fault *fault = file->private_data;
97 
98 	data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
99 	if (!data)
100 		return -ENOMEM;
101 	copy = min(len, datalen - 1);
102 	if (copy_from_user(data, buf, copy)) {
103 		ret = -EFAULT;
104 		goto free_data;
105 	}
106 
107 	ptr = data;
108 	token = ptr;
109 	for (ptr = data; *ptr; ptr = end + 1, token = ptr) {
110 		char *dash;
111 		unsigned long range_start, range_end, i;
112 		bool remove = false;
113 		unsigned long bound = 1U << BITS_PER_BYTE;
114 
115 		end = strchr(ptr, ',');
116 		if (end)
117 			*end = '\0';
118 		if (token[0] == '-') {
119 			remove = true;
120 			token++;
121 		}
122 		dash = strchr(token, '-');
123 		if (dash)
124 			*dash = '\0';
125 		if (kstrtoul(token, 0, &range_start))
126 			break;
127 		if (dash) {
128 			token = dash + 1;
129 			if (kstrtoul(token, 0, &range_end))
130 				break;
131 		} else {
132 			range_end = range_start;
133 		}
134 		if (range_start == range_end && range_start == -1UL) {
135 			bitmap_zero(fault->opcodes, sizeof(fault->opcodes) *
136 				    BITS_PER_BYTE);
137 			break;
138 		}
139 		/* Check the inputs */
140 		if (range_start >= bound || range_end >= bound)
141 			break;
142 
143 		for (i = range_start; i <= range_end; i++) {
144 			if (remove)
145 				clear_bit(i, fault->opcodes);
146 			else
147 				set_bit(i, fault->opcodes);
148 		}
149 		if (!end)
150 			break;
151 	}
152 	ret = len;
153 
154 free_data:
155 	kfree(data);
156 	return ret;
157 }
158 
fault_opcodes_read(struct file * file,char __user * buf,size_t len,loff_t * pos)159 static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
160 				  size_t len, loff_t *pos)
161 {
162 	ssize_t ret = 0;
163 	char *data;
164 	size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */
165 	unsigned long bit = 0, zero = 0;
166 	struct fault *fault = file->private_data;
167 	size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE;
168 
169 	data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
170 	if (!data)
171 		return -ENOMEM;
172 	bit = find_first_bit(fault->opcodes, bitsize);
173 	while (bit < bitsize) {
174 		zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
175 		if (zero - 1 != bit)
176 			size += scnprintf(data + size,
177 					 datalen - size - 1,
178 					 "0x%lx-0x%lx,", bit, zero - 1);
179 		else
180 			size += scnprintf(data + size,
181 					 datalen - size - 1, "0x%lx,",
182 					 bit);
183 		bit = find_next_bit(fault->opcodes, bitsize, zero);
184 	}
185 	data[size - 1] = '\n';
186 	data[size] = '\0';
187 	ret = simple_read_from_buffer(buf, len, pos, data, size);
188 	kfree(data);
189 	return ret;
190 }
191 
192 static const struct file_operations __fault_opcodes_fops = {
193 	.owner = THIS_MODULE,
194 	.open = fault_opcodes_open,
195 	.read = fault_opcodes_read,
196 	.write = fault_opcodes_write,
197 };
198 
hfi1_fault_exit_debugfs(struct hfi1_ibdev * ibd)199 void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
200 {
201 	if (ibd->fault)
202 		debugfs_remove_recursive(ibd->fault->dir);
203 	kfree(ibd->fault);
204 	ibd->fault = NULL;
205 }
206 
hfi1_fault_init_debugfs(struct hfi1_ibdev * ibd)207 int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
208 {
209 	struct dentry *parent = ibd->hfi1_ibdev_dbg;
210 	struct dentry *fault_dir;
211 
212 	ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL);
213 	if (!ibd->fault)
214 		return -ENOMEM;
215 
216 	ibd->fault->attr.interval = 1;
217 	ibd->fault->attr.require_end = ULONG_MAX;
218 	ibd->fault->attr.stacktrace_depth = 32;
219 	ibd->fault->attr.dname = NULL;
220 	ibd->fault->attr.verbose = 0;
221 	ibd->fault->enable = false;
222 	ibd->fault->opcode = false;
223 	ibd->fault->fault_skip = 0;
224 	ibd->fault->skip = 0;
225 	ibd->fault->direction = HFI1_FAULT_DIR_TXRX;
226 	ibd->fault->suppress_err = false;
227 	bitmap_zero(ibd->fault->opcodes,
228 		    sizeof(ibd->fault->opcodes) * BITS_PER_BYTE);
229 
230 	fault_dir =
231 		fault_create_debugfs_attr("fault", parent, &ibd->fault->attr);
232 	if (IS_ERR(fault_dir)) {
233 		kfree(ibd->fault);
234 		ibd->fault = NULL;
235 		return -ENOENT;
236 	}
237 	ibd->fault->dir = fault_dir;
238 
239 	debugfs_create_file("fault_stats", 0444, fault_dir, ibd,
240 			    &_fault_stats_file_ops);
241 	debugfs_create_bool("enable", 0600, fault_dir, &ibd->fault->enable);
242 	debugfs_create_bool("suppress_err", 0600, fault_dir,
243 			    &ibd->fault->suppress_err);
244 	debugfs_create_bool("opcode_mode", 0600, fault_dir,
245 			    &ibd->fault->opcode);
246 	debugfs_create_file("opcodes", 0600, fault_dir, ibd->fault,
247 			    &__fault_opcodes_fops);
248 	debugfs_create_u64("skip_pkts", 0600, fault_dir,
249 			   &ibd->fault->fault_skip);
250 	debugfs_create_u64("skip_usec", 0600, fault_dir,
251 			   &ibd->fault->fault_skip_usec);
252 	debugfs_create_u8("direction", 0600, fault_dir, &ibd->fault->direction);
253 
254 	return 0;
255 }
256 
hfi1_dbg_fault_suppress_err(struct hfi1_ibdev * ibd)257 bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
258 {
259 	if (ibd->fault)
260 		return ibd->fault->suppress_err;
261 	return false;
262 }
263 
__hfi1_should_fault(struct hfi1_ibdev * ibd,u32 opcode,u8 direction)264 static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode,
265 				u8 direction)
266 {
267 	bool ret = false;
268 
269 	if (!ibd->fault || !ibd->fault->enable)
270 		return false;
271 	if (!(ibd->fault->direction & direction))
272 		return false;
273 	if (ibd->fault->opcode) {
274 		if (bitmap_empty(ibd->fault->opcodes,
275 				 (sizeof(ibd->fault->opcodes) *
276 				  BITS_PER_BYTE)))
277 			return false;
278 		if (!(test_bit(opcode, ibd->fault->opcodes)))
279 			return false;
280 	}
281 	if (ibd->fault->fault_skip_usec &&
282 	    time_before(jiffies, ibd->fault->skip_usec))
283 		return false;
284 	if (ibd->fault->fault_skip && ibd->fault->skip) {
285 		ibd->fault->skip--;
286 		return false;
287 	}
288 	ret = should_fail(&ibd->fault->attr, 1);
289 	if (ret) {
290 		ibd->fault->skip = ibd->fault->fault_skip;
291 		ibd->fault->skip_usec = jiffies +
292 			usecs_to_jiffies(ibd->fault->fault_skip_usec);
293 	}
294 	return ret;
295 }
296 
hfi1_dbg_should_fault_tx(struct rvt_qp * qp,u32 opcode)297 bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode)
298 {
299 	struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
300 
301 	if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) {
302 		trace_hfi1_fault_opcode(qp, opcode);
303 		ibd->fault->n_txfaults[opcode]++;
304 		return true;
305 	}
306 	return false;
307 }
308 
hfi1_dbg_should_fault_rx(struct hfi1_packet * packet)309 bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
310 {
311 	struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev;
312 
313 	if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) {
314 		trace_hfi1_fault_packet(packet);
315 		ibd->fault->n_rxfaults[packet->opcode]++;
316 		return true;
317 	}
318 	return false;
319 }
320