xref: /freebsd/sys/dev/qat/qat_common/adf_aer.c (revision 78cd75393ec79565c63927bf200f06f839a1dc05)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /* Copyright(c) 2007-2022 Intel Corporation */
3 #include "qat_freebsd.h"
4 #include "adf_cfg.h"
5 #include "adf_common_drv.h"
6 #include "adf_accel_devices.h"
7 #include "icp_qat_uclo.h"
8 #include "icp_qat_fw.h"
9 #include "icp_qat_fw_init_admin.h"
10 #include "adf_cfg_strings.h"
11 #include "adf_transport_access_macros.h"
12 #include "adf_transport_internal.h"
13 #include <sys/bus.h>
14 #include <dev/pci/pcireg.h>
15 #include <dev/pci/pcivar.h>
16 #include <sys/systm.h>
17 
18 #define ADF_PPAERUCM_MASK (BIT(14) | BIT(20) | BIT(22))
19 
20 static struct workqueue_struct *fatal_error_wq;
21 struct adf_fatal_error_data {
22 	struct adf_accel_dev *accel_dev;
23 	struct work_struct work;
24 };
25 
26 static struct workqueue_struct *device_reset_wq;
27 
28 void
29 linux_complete_common(struct completion *c, int all)
30 {
31 	int wakeup_swapper;
32 
33 	sleepq_lock(c);
34 	c->done++;
35 	if (all)
36 		wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0);
37 	else
38 		wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0);
39 	sleepq_release(c);
40 	if (wakeup_swapper)
41 		kick_proc0();
42 }
43 
44 /* reset dev data */
45 struct adf_reset_dev_data {
46 	int mode;
47 	struct adf_accel_dev *accel_dev;
48 	struct completion compl;
49 	struct work_struct reset_work;
50 };
51 
52 int
53 adf_aer_store_ppaerucm_reg(device_t dev, struct adf_hw_device_data *hw_data)
54 {
55 	unsigned int aer_offset, reg_val = 0;
56 
57 	if (!hw_data)
58 		return -EINVAL;
59 
60 	if (pci_find_extcap(dev, PCIZ_AER, &aer_offset) == 0) {
61 		reg_val =
62 		    pci_read_config(dev, aer_offset + PCIR_AER_UC_MASK, 4);
63 
64 		hw_data->aerucm_mask = reg_val;
65 	} else {
66 		device_printf(dev,
67 			      "Unable to find AER capability of the device\n");
68 		return -ENODEV;
69 	}
70 
71 	return 0;
72 }
73 
74 void
75 adf_reset_sbr(struct adf_accel_dev *accel_dev)
76 {
77 	device_t pdev = accel_to_pci_dev(accel_dev);
78 	device_t parent = device_get_parent(device_get_parent(pdev));
79 	uint16_t bridge_ctl = 0;
80 
81 	if (accel_dev->is_vf)
82 		return;
83 
84 	if (!parent)
85 		parent = pdev;
86 
87 	if (!pcie_wait_for_pending_transactions(pdev, 0))
88 		device_printf(GET_DEV(accel_dev),
89 			      "Transaction still in progress. Proceeding\n");
90 
91 	device_printf(GET_DEV(accel_dev), "Secondary bus reset\n");
92 
93 	pci_save_state(pdev);
94 	bridge_ctl = pci_read_config(parent, PCIR_BRIDGECTL_1, 2);
95 	bridge_ctl |= PCIB_BCR_SECBUS_RESET;
96 	pci_write_config(parent, PCIR_BRIDGECTL_1, bridge_ctl, 2);
97 	pause_ms("adfrst", 100);
98 	bridge_ctl &= ~PCIB_BCR_SECBUS_RESET;
99 	pci_write_config(parent, PCIR_BRIDGECTL_1, bridge_ctl, 2);
100 	pause_ms("adfrst", 100);
101 	pci_restore_state(pdev);
102 }
103 
104 void
105 adf_reset_flr(struct adf_accel_dev *accel_dev)
106 {
107 	device_t pdev = accel_to_pci_dev(accel_dev);
108 
109 	pci_save_state(pdev);
110 	if (pcie_flr(pdev,
111 		     max(pcie_get_max_completion_timeout(pdev) / 1000, 10),
112 		     true)) {
113 		pci_restore_state(pdev);
114 		return;
115 	}
116 	pci_restore_state(pdev);
117 	device_printf(GET_DEV(accel_dev),
118 		      "FLR qat_dev%d failed trying secondary bus reset\n",
119 		      accel_dev->accel_id);
120 	adf_reset_sbr(accel_dev);
121 }
122 
123 void
124 adf_dev_pre_reset(struct adf_accel_dev *accel_dev)
125 {
126 	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
127 	device_t pdev = accel_to_pci_dev(accel_dev);
128 	u32 aer_offset, reg_val = 0;
129 
130 	if (pci_find_extcap(pdev, PCIZ_AER, &aer_offset) == 0) {
131 		reg_val =
132 		    pci_read_config(pdev, aer_offset + PCIR_AER_UC_MASK, 4);
133 		reg_val |= ADF_PPAERUCM_MASK;
134 		pci_write_config(pdev,
135 				 aer_offset + PCIR_AER_UC_MASK,
136 				 reg_val,
137 				 4);
138 	} else {
139 		device_printf(pdev,
140 			      "Unable to find AER capability of the device\n");
141 	}
142 
143 	if (hw_device->disable_arb) {
144 		device_printf(GET_DEV(accel_dev), "Disable arbiter.\n");
145 		hw_device->disable_arb(accel_dev);
146 	}
147 }
148 
149 void
150 adf_dev_post_reset(struct adf_accel_dev *accel_dev)
151 {
152 	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
153 	device_t pdev = accel_to_pci_dev(accel_dev);
154 	u32 aer_offset;
155 
156 	if (pci_find_extcap(pdev, PCIZ_AER, &aer_offset) == 0) {
157 		pci_write_config(pdev,
158 				 aer_offset + PCIR_AER_UC_MASK,
159 				 hw_device->aerucm_mask,
160 				 4);
161 	} else {
162 		device_printf(pdev,
163 			      "Unable to find AER capability of the device\n");
164 	}
165 }
166 
167 void
168 adf_dev_restore(struct adf_accel_dev *accel_dev)
169 {
170 	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
171 	device_t pdev = accel_to_pci_dev(accel_dev);
172 
173 	if (hw_device->pre_reset) {
174 		dev_dbg(GET_DEV(accel_dev), "Performing pre reset save\n");
175 		hw_device->pre_reset(accel_dev);
176 	}
177 
178 	if (hw_device->reset_device) {
179 		device_printf(GET_DEV(accel_dev),
180 			      "Resetting device qat_dev%d\n",
181 			      accel_dev->accel_id);
182 		hw_device->reset_device(accel_dev);
183 		pci_restore_state(pdev);
184 		pci_save_state(pdev);
185 	}
186 
187 	if (hw_device->post_reset) {
188 		dev_dbg(GET_DEV(accel_dev), "Performing post reset restore\n");
189 		hw_device->post_reset(accel_dev);
190 	}
191 }
192 
193 static void
194 adf_device_reset_worker(struct work_struct *work)
195 {
196 	struct adf_reset_dev_data *reset_data =
197 	    container_of(work, struct adf_reset_dev_data, reset_work);
198 	struct adf_accel_dev *accel_dev = reset_data->accel_dev;
199 
200 	if (adf_dev_restarting_notify(accel_dev)) {
201 		device_printf(GET_DEV(accel_dev),
202 			      "Unable to send RESTARTING notification.\n");
203 		return;
204 	}
205 
206 	if (adf_dev_stop(accel_dev)) {
207 		device_printf(GET_DEV(accel_dev), "Stopping device failed.\n");
208 		return;
209 	}
210 
211 	adf_dev_shutdown(accel_dev);
212 
213 	if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) {
214 		/* The device hanged and we can't restart it */
215 		/* so stop here */
216 		device_printf(GET_DEV(accel_dev), "Restart device failed\n");
217 		if (reset_data->mode == ADF_DEV_RESET_ASYNC)
218 			kfree(reset_data);
219 		WARN(1, "QAT: device restart failed. Device is unusable\n");
220 		return;
221 	}
222 
223 	adf_dev_restarted_notify(accel_dev);
224 	clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
225 
226 	/* The dev is back alive. Notify the caller if in sync mode */
227 	if (reset_data->mode == ADF_DEV_RESET_SYNC)
228 		complete(&reset_data->compl);
229 	else
230 		kfree(reset_data);
231 }
232 
233 int
234 adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
235 			   enum adf_dev_reset_mode mode)
236 {
237 	struct adf_reset_dev_data *reset_data;
238 	if (!adf_dev_started(accel_dev) ||
239 	    test_bit(ADF_STATUS_RESTARTING, &accel_dev->status))
240 		return 0;
241 	set_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
242 	reset_data = kzalloc(sizeof(*reset_data), GFP_ATOMIC);
243 	if (!reset_data)
244 		return -ENOMEM;
245 	reset_data->accel_dev = accel_dev;
246 	init_completion(&reset_data->compl);
247 	reset_data->mode = mode;
248 	INIT_WORK(&reset_data->reset_work, adf_device_reset_worker);
249 	queue_work(device_reset_wq, &reset_data->reset_work);
250 	/* If in sync mode wait for the result */
251 	if (mode == ADF_DEV_RESET_SYNC) {
252 		int ret = 0;
253 		/* Maximum device reset time is 10 seconds */
254 		unsigned long wait_jiffies = msecs_to_jiffies(10000);
255 		unsigned long timeout =
256 		    wait_for_completion_timeout(&reset_data->compl,
257 						wait_jiffies);
258 		if (!timeout) {
259 			device_printf(GET_DEV(accel_dev),
260 				      "Reset device timeout expired\n");
261 			ret = -EFAULT;
262 		}
263 		kfree(reset_data);
264 		return ret;
265 	}
266 	return 0;
267 }
268 
269 int
270 adf_dev_autoreset(struct adf_accel_dev *accel_dev)
271 {
272 	if (accel_dev->autoreset_on_error)
273 		return adf_dev_reset(accel_dev, ADF_DEV_RESET_ASYNC);
274 	return 0;
275 }
276 
277 static void
278 adf_notify_fatal_error_work(struct work_struct *work)
279 {
280 	struct adf_fatal_error_data *wq_data =
281 	    container_of(work, struct adf_fatal_error_data, work);
282 	struct adf_accel_dev *accel_dev = wq_data->accel_dev;
283 
284 	adf_error_notifier((uintptr_t)accel_dev);
285 	if (!accel_dev->is_vf) {
286 		adf_dev_autoreset(accel_dev);
287 	}
288 
289 	kfree(wq_data);
290 }
291 
292 int
293 adf_notify_fatal_error(struct adf_accel_dev *accel_dev)
294 {
295 	struct adf_fatal_error_data *wq_data;
296 
297 	wq_data = kzalloc(sizeof(*wq_data), GFP_ATOMIC);
298 	if (!wq_data) {
299 		device_printf(GET_DEV(accel_dev),
300 			      "Failed to allocate memory\n");
301 		return ENOMEM;
302 	}
303 	wq_data->accel_dev = accel_dev;
304 
305 	INIT_WORK(&wq_data->work, adf_notify_fatal_error_work);
306 	queue_work(fatal_error_wq, &wq_data->work);
307 
308 	return 0;
309 }
310 
311 int __init
312 adf_init_fatal_error_wq(void)
313 {
314 	fatal_error_wq = create_workqueue("qat_fatal_error_wq");
315 	return !fatal_error_wq ? EFAULT : 0;
316 }
317 
318 void
319 adf_exit_fatal_error_wq(void)
320 {
321 	if (fatal_error_wq)
322 		destroy_workqueue(fatal_error_wq);
323 	fatal_error_wq = NULL;
324 }
325 
326 int
327 adf_init_aer(void)
328 {
329 	device_reset_wq = create_workqueue("qat_device_reset_wq");
330 	return !device_reset_wq ? -EFAULT : 0;
331 }
332 
333 void
334 adf_exit_aer(void)
335 {
336 	if (device_reset_wq)
337 		destroy_workqueue(device_reset_wq);
338 	device_reset_wq = NULL;
339 }
340