xref: /freebsd/sys/dev/qat/qat_common/adf_aer.c (revision 4f0c9b76cf75724ef0b9c59bb8c182be24361d7c)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /* Copyright(c) 2007-2022 Intel Corporation */
3 /* $FreeBSD$ */
4 #include "qat_freebsd.h"
5 #include "adf_cfg.h"
6 #include "adf_common_drv.h"
7 #include "adf_accel_devices.h"
8 #include "icp_qat_uclo.h"
9 #include "icp_qat_fw.h"
10 #include "icp_qat_fw_init_admin.h"
11 #include "adf_cfg_strings.h"
12 #include "adf_transport_access_macros.h"
13 #include "adf_transport_internal.h"
14 #include <sys/bus.h>
15 #include <dev/pci/pcireg.h>
16 #include <dev/pci/pcivar.h>
17 #include <sys/systm.h>
18 
19 #define ADF_PPAERUCM_MASK (BIT(14) | BIT(20) | BIT(22))
20 
21 static struct workqueue_struct *fatal_error_wq;
22 struct adf_fatal_error_data {
23 	struct adf_accel_dev *accel_dev;
24 	struct work_struct work;
25 };
26 
27 static struct workqueue_struct *device_reset_wq;
28 
29 void
30 linux_complete_common(struct completion *c, int all)
31 {
32 	int wakeup_swapper;
33 
34 	sleepq_lock(c);
35 	c->done++;
36 	if (all)
37 		wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0);
38 	else
39 		wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0);
40 	sleepq_release(c);
41 	if (wakeup_swapper)
42 		kick_proc0();
43 }
44 
45 /* reset dev data */
46 struct adf_reset_dev_data {
47 	int mode;
48 	struct adf_accel_dev *accel_dev;
49 	struct completion compl;
50 	struct work_struct reset_work;
51 };
52 
53 int
54 adf_aer_store_ppaerucm_reg(device_t dev, struct adf_hw_device_data *hw_data)
55 {
56 	unsigned int aer_offset, reg_val = 0;
57 
58 	if (!hw_data)
59 		return -EINVAL;
60 
61 	if (pci_find_extcap(dev, PCIZ_AER, &aer_offset) == 0) {
62 		reg_val =
63 		    pci_read_config(dev, aer_offset + PCIR_AER_UC_MASK, 4);
64 
65 		hw_data->aerucm_mask = reg_val;
66 	} else {
67 		device_printf(dev,
68 			      "Unable to find AER capability of the device\n");
69 		return -ENODEV;
70 	}
71 
72 	return 0;
73 }
74 
75 void
76 adf_reset_sbr(struct adf_accel_dev *accel_dev)
77 {
78 	device_t pdev = accel_to_pci_dev(accel_dev);
79 	device_t parent = device_get_parent(device_get_parent(pdev));
80 	uint16_t bridge_ctl = 0;
81 
82 	if (accel_dev->is_vf)
83 		return;
84 
85 	if (!parent)
86 		parent = pdev;
87 
88 	if (!pcie_wait_for_pending_transactions(pdev, 0))
89 		device_printf(GET_DEV(accel_dev),
90 			      "Transaction still in progress. Proceeding\n");
91 
92 	device_printf(GET_DEV(accel_dev), "Secondary bus reset\n");
93 
94 	pci_save_state(pdev);
95 	bridge_ctl = pci_read_config(parent, PCIR_BRIDGECTL_1, 2);
96 	bridge_ctl |= PCIB_BCR_SECBUS_RESET;
97 	pci_write_config(parent, PCIR_BRIDGECTL_1, bridge_ctl, 2);
98 	pause_ms("adfrst", 100);
99 	bridge_ctl &= ~PCIB_BCR_SECBUS_RESET;
100 	pci_write_config(parent, PCIR_BRIDGECTL_1, bridge_ctl, 2);
101 	pause_ms("adfrst", 100);
102 	pci_restore_state(pdev);
103 }
104 
105 void
106 adf_reset_flr(struct adf_accel_dev *accel_dev)
107 {
108 	device_t pdev = accel_to_pci_dev(accel_dev);
109 
110 	pci_save_state(pdev);
111 	if (pcie_flr(pdev,
112 		     max(pcie_get_max_completion_timeout(pdev) / 1000, 10),
113 		     true)) {
114 		pci_restore_state(pdev);
115 		return;
116 	}
117 	pci_restore_state(pdev);
118 	device_printf(GET_DEV(accel_dev),
119 		      "FLR qat_dev%d failed trying secondary bus reset\n",
120 		      accel_dev->accel_id);
121 	adf_reset_sbr(accel_dev);
122 }
123 
124 void
125 adf_dev_pre_reset(struct adf_accel_dev *accel_dev)
126 {
127 	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
128 	device_t pdev = accel_to_pci_dev(accel_dev);
129 	u32 aer_offset, reg_val = 0;
130 
131 	if (pci_find_extcap(pdev, PCIZ_AER, &aer_offset) == 0) {
132 		reg_val =
133 		    pci_read_config(pdev, aer_offset + PCIR_AER_UC_MASK, 4);
134 		reg_val |= ADF_PPAERUCM_MASK;
135 		pci_write_config(pdev,
136 				 aer_offset + PCIR_AER_UC_MASK,
137 				 reg_val,
138 				 4);
139 	} else {
140 		device_printf(pdev,
141 			      "Unable to find AER capability of the device\n");
142 	}
143 
144 	if (hw_device->disable_arb) {
145 		device_printf(GET_DEV(accel_dev), "Disable arbiter.\n");
146 		hw_device->disable_arb(accel_dev);
147 	}
148 }
149 
150 void
151 adf_dev_post_reset(struct adf_accel_dev *accel_dev)
152 {
153 	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
154 	device_t pdev = accel_to_pci_dev(accel_dev);
155 	u32 aer_offset;
156 
157 	if (pci_find_extcap(pdev, PCIZ_AER, &aer_offset) == 0) {
158 		pci_write_config(pdev,
159 				 aer_offset + PCIR_AER_UC_MASK,
160 				 hw_device->aerucm_mask,
161 				 4);
162 	} else {
163 		device_printf(pdev,
164 			      "Unable to find AER capability of the device\n");
165 	}
166 }
167 
168 void
169 adf_dev_restore(struct adf_accel_dev *accel_dev)
170 {
171 	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
172 	device_t pdev = accel_to_pci_dev(accel_dev);
173 
174 	if (hw_device->pre_reset) {
175 		dev_dbg(GET_DEV(accel_dev), "Performing pre reset save\n");
176 		hw_device->pre_reset(accel_dev);
177 	}
178 
179 	if (hw_device->reset_device) {
180 		device_printf(GET_DEV(accel_dev),
181 			      "Resetting device qat_dev%d\n",
182 			      accel_dev->accel_id);
183 		hw_device->reset_device(accel_dev);
184 		pci_restore_state(pdev);
185 		pci_save_state(pdev);
186 	}
187 
188 	if (hw_device->post_reset) {
189 		dev_dbg(GET_DEV(accel_dev), "Performing post reset restore\n");
190 		hw_device->post_reset(accel_dev);
191 	}
192 }
193 
194 static void
195 adf_device_reset_worker(struct work_struct *work)
196 {
197 	struct adf_reset_dev_data *reset_data =
198 	    container_of(work, struct adf_reset_dev_data, reset_work);
199 	struct adf_accel_dev *accel_dev = reset_data->accel_dev;
200 
201 	if (adf_dev_restarting_notify(accel_dev)) {
202 		device_printf(GET_DEV(accel_dev),
203 			      "Unable to send RESTARTING notification.\n");
204 		return;
205 	}
206 
207 	if (adf_dev_stop(accel_dev)) {
208 		device_printf(GET_DEV(accel_dev), "Stopping device failed.\n");
209 		return;
210 	}
211 
212 	adf_dev_shutdown(accel_dev);
213 
214 	if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) {
215 		/* The device hanged and we can't restart it */
216 		/* so stop here */
217 		device_printf(GET_DEV(accel_dev), "Restart device failed\n");
218 		if (reset_data->mode == ADF_DEV_RESET_ASYNC)
219 			kfree(reset_data);
220 		WARN(1, "QAT: device restart failed. Device is unusable\n");
221 		return;
222 	}
223 
224 	adf_dev_restarted_notify(accel_dev);
225 	clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
226 
227 	/* The dev is back alive. Notify the caller if in sync mode */
228 	if (reset_data->mode == ADF_DEV_RESET_SYNC)
229 		complete(&reset_data->compl);
230 	else
231 		kfree(reset_data);
232 }
233 
234 int
235 adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
236 			   enum adf_dev_reset_mode mode)
237 {
238 	struct adf_reset_dev_data *reset_data;
239 	if (!adf_dev_started(accel_dev) ||
240 	    test_bit(ADF_STATUS_RESTARTING, &accel_dev->status))
241 		return 0;
242 	set_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
243 	reset_data = kzalloc(sizeof(*reset_data), GFP_ATOMIC);
244 	if (!reset_data)
245 		return -ENOMEM;
246 	reset_data->accel_dev = accel_dev;
247 	init_completion(&reset_data->compl);
248 	reset_data->mode = mode;
249 	INIT_WORK(&reset_data->reset_work, adf_device_reset_worker);
250 	queue_work(device_reset_wq, &reset_data->reset_work);
251 	/* If in sync mode wait for the result */
252 	if (mode == ADF_DEV_RESET_SYNC) {
253 		int ret = 0;
254 		/* Maximum device reset time is 10 seconds */
255 		unsigned long wait_jiffies = msecs_to_jiffies(10000);
256 		unsigned long timeout =
257 		    wait_for_completion_timeout(&reset_data->compl,
258 						wait_jiffies);
259 		if (!timeout) {
260 			device_printf(GET_DEV(accel_dev),
261 				      "Reset device timeout expired\n");
262 			ret = -EFAULT;
263 		}
264 		kfree(reset_data);
265 		return ret;
266 	}
267 	return 0;
268 }
269 
270 int
271 adf_dev_autoreset(struct adf_accel_dev *accel_dev)
272 {
273 	if (accel_dev->autoreset_on_error)
274 		return adf_dev_reset(accel_dev, ADF_DEV_RESET_ASYNC);
275 	return 0;
276 }
277 
278 static void
279 adf_notify_fatal_error_work(struct work_struct *work)
280 {
281 	struct adf_fatal_error_data *wq_data =
282 	    container_of(work, struct adf_fatal_error_data, work);
283 	struct adf_accel_dev *accel_dev = wq_data->accel_dev;
284 
285 	adf_error_notifier((uintptr_t)accel_dev);
286 	if (!accel_dev->is_vf) {
287 		if (accel_dev->u1.pf.vf_info)
288 			adf_pf2vf_notify_fatal_error(accel_dev);
289 		adf_dev_autoreset(accel_dev);
290 	}
291 
292 	kfree(wq_data);
293 }
294 
295 int
296 adf_notify_fatal_error(struct adf_accel_dev *accel_dev)
297 {
298 	struct adf_fatal_error_data *wq_data;
299 
300 	wq_data = kzalloc(sizeof(*wq_data), GFP_ATOMIC);
301 	if (!wq_data) {
302 		device_printf(GET_DEV(accel_dev),
303 			      "Failed to allocate memory\n");
304 		return ENOMEM;
305 	}
306 	wq_data->accel_dev = accel_dev;
307 
308 	INIT_WORK(&wq_data->work, adf_notify_fatal_error_work);
309 	queue_work(fatal_error_wq, &wq_data->work);
310 
311 	return 0;
312 }
313 
314 int __init
315 adf_init_fatal_error_wq(void)
316 {
317 	fatal_error_wq = create_workqueue("qat_fatal_error_wq");
318 	return !fatal_error_wq ? EFAULT : 0;
319 }
320 
321 void
322 adf_exit_fatal_error_wq(void)
323 {
324 	if (fatal_error_wq)
325 		destroy_workqueue(fatal_error_wq);
326 	fatal_error_wq = NULL;
327 }
328 
329 int
330 adf_init_aer(void)
331 {
332 	device_reset_wq = create_workqueue("qat_device_reset_wq");
333 	return !device_reset_wq ? -EFAULT : 0;
334 }
335 
336 void
337 adf_exit_aer(void)
338 {
339 	if (device_reset_wq)
340 		destroy_workqueue(device_reset_wq);
341 	device_reset_wq = NULL;
342 }
343