1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright IBM Corp. 2012
4 *
5 * Author(s):
6 * Jan Glauber <jang@linux.vnet.ibm.com>
7 */
8
9 #define pr_fmt(fmt) "zpci: " fmt
10
11 #include <linux/kernel.h>
12 #include <linux/pci.h>
13 #include <asm/pci_debug.h>
14 #include <asm/pci_dma.h>
15 #include <asm/sclp.h>
16
17 #include "pci_bus.h"
18 #include "pci_report.h"
19
20 /* Content Code Description for PCI Function Error */
21 struct zpci_ccdf_err {
22 u32 reserved1;
23 u32 fh; /* function handle */
24 u32 fid; /* function id */
25 u32 ett : 4; /* expected table type */
26 u32 mvn : 12; /* MSI vector number */
27 u32 dmaas : 8; /* DMA address space */
28 u32 : 6;
29 u32 q : 1; /* event qualifier */
30 u32 rw : 1; /* read/write */
31 u64 faddr; /* failing address */
32 u32 reserved3;
33 u16 reserved4;
34 u16 pec; /* PCI event code */
35 } __packed;
36
37 /* Content Code Description for PCI Function Availability */
38 struct zpci_ccdf_avail {
39 u32 reserved1;
40 u32 fh; /* function handle */
41 u32 fid; /* function id */
42 u32 reserved2;
43 u32 reserved3;
44 u32 reserved4;
45 u32 reserved5;
46 u16 reserved6;
47 u16 pec; /* PCI event code */
48 } __packed;
49
ers_result_indicates_abort(pci_ers_result_t ers_res)50 static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
51 {
52 switch (ers_res) {
53 case PCI_ERS_RESULT_CAN_RECOVER:
54 case PCI_ERS_RESULT_RECOVERED:
55 case PCI_ERS_RESULT_NEED_RESET:
56 case PCI_ERS_RESULT_NONE:
57 return false;
58 default:
59 return true;
60 }
61 }
62
is_passed_through(struct pci_dev * pdev)63 static bool is_passed_through(struct pci_dev *pdev)
64 {
65 struct zpci_dev *zdev = to_zpci(pdev);
66 bool ret;
67
68 mutex_lock(&zdev->kzdev_lock);
69 ret = !!zdev->kzdev;
70 mutex_unlock(&zdev->kzdev_lock);
71
72 return ret;
73 }
74
is_driver_supported(struct pci_driver * driver)75 static bool is_driver_supported(struct pci_driver *driver)
76 {
77 if (!driver || !driver->err_handler)
78 return false;
79 if (!driver->err_handler->error_detected)
80 return false;
81 return true;
82 }
83
zpci_event_notify_error_detected(struct pci_dev * pdev,struct pci_driver * driver)84 static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
85 struct pci_driver *driver)
86 {
87 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
88
89 ers_res = driver->err_handler->error_detected(pdev, pdev->error_state);
90 pci_uevent_ers(pdev, ers_res);
91 if (ers_result_indicates_abort(ers_res))
92 pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
93 else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
94 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
95
96 return ers_res;
97 }
98
zpci_event_do_error_state_clear(struct pci_dev * pdev,struct pci_driver * driver)99 static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
100 struct pci_driver *driver)
101 {
102 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
103 struct zpci_dev *zdev = to_zpci(pdev);
104 int rc;
105
106 /* The underlying device may have been disabled by the event */
107 if (!zdev_enabled(zdev))
108 return PCI_ERS_RESULT_NEED_RESET;
109
110 pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
111 rc = zpci_reset_load_store_blocked(zdev);
112 if (rc) {
113 pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
114 /* Let's try a full reset instead */
115 return PCI_ERS_RESULT_NEED_RESET;
116 }
117
118 if (driver->err_handler->mmio_enabled)
119 ers_res = driver->err_handler->mmio_enabled(pdev);
120 else
121 ers_res = PCI_ERS_RESULT_NONE;
122
123 if (ers_result_indicates_abort(ers_res)) {
124 pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
125 pci_name(pdev));
126 return ers_res;
127 } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
128 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
129 return ers_res;
130 }
131
132 pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
133 rc = zpci_clear_error_state(zdev);
134 if (!rc) {
135 pdev->error_state = pci_channel_io_normal;
136 } else {
137 pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
138 /* Let's try a full reset instead */
139 return PCI_ERS_RESULT_NEED_RESET;
140 }
141
142 return ers_res;
143 }
144
zpci_event_do_reset(struct pci_dev * pdev,struct pci_driver * driver)145 static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
146 struct pci_driver *driver)
147 {
148 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
149
150 pr_info("%s: Initiating reset\n", pci_name(pdev));
151 if (zpci_hot_reset_device(to_zpci(pdev))) {
152 pr_err("%s: The reset request failed\n", pci_name(pdev));
153 return ers_res;
154 }
155 pdev->error_state = pci_channel_io_normal;
156
157 if (driver->err_handler->slot_reset)
158 ers_res = driver->err_handler->slot_reset(pdev);
159 else
160 ers_res = PCI_ERS_RESULT_NONE;
161
162 if (ers_result_indicates_abort(ers_res)) {
163 pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
164 return ers_res;
165 }
166
167 return ers_res;
168 }
169
170 /* zpci_event_attempt_error_recovery - Try to recover the given PCI function
171 * @pdev: PCI function to recover currently in the error state
172 *
173 * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
174 * With the simplification that recovery always happens per function
175 * and the platform determines which functions are affected for
176 * multi-function devices.
177 */
zpci_event_attempt_error_recovery(struct pci_dev * pdev)178 static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
179 {
180 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
181 struct zpci_dev *zdev = to_zpci(pdev);
182 char *status_str = "success";
183 struct pci_driver *driver;
184
185 /*
186 * Ensure that the PCI function is not removed concurrently, no driver
187 * is unbound or probed and that userspace can't access its
188 * configuration space while we perform recovery.
189 */
190 device_lock(&pdev->dev);
191 if (pdev->error_state == pci_channel_io_perm_failure) {
192 ers_res = PCI_ERS_RESULT_DISCONNECT;
193 goto out_unlock;
194 }
195 pdev->error_state = pci_channel_io_frozen;
196
197 if (is_passed_through(pdev)) {
198 pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
199 pci_name(pdev));
200 status_str = "failed (pass-through)";
201 goto out_unlock;
202 }
203
204 driver = to_pci_driver(pdev->dev.driver);
205 if (!is_driver_supported(driver)) {
206 if (!driver) {
207 pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
208 pci_name(pdev));
209 status_str = "failed (no driver)";
210 } else {
211 pr_info("%s: The %s driver bound to the device does not support error recovery\n",
212 pci_name(pdev),
213 driver->name);
214 status_str = "failed (no driver support)";
215 }
216 goto out_unlock;
217 }
218
219 ers_res = zpci_event_notify_error_detected(pdev, driver);
220 if (ers_result_indicates_abort(ers_res)) {
221 status_str = "failed (abort on detection)";
222 goto out_unlock;
223 }
224
225 if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
226 ers_res = zpci_event_do_error_state_clear(pdev, driver);
227 if (ers_result_indicates_abort(ers_res)) {
228 status_str = "failed (abort on MMIO enable)";
229 goto out_unlock;
230 }
231 }
232
233 if (ers_res == PCI_ERS_RESULT_NEED_RESET)
234 ers_res = zpci_event_do_reset(pdev, driver);
235
236 /*
237 * ers_res can be PCI_ERS_RESULT_NONE either because the driver
238 * decided to return it, indicating that it abstains from voting
239 * on how to recover, or because it didn't implement the callback.
240 * Both cases assume, that if there is nothing else causing a
241 * disconnect, we recovered successfully.
242 */
243 if (ers_res == PCI_ERS_RESULT_NONE)
244 ers_res = PCI_ERS_RESULT_RECOVERED;
245
246 if (ers_res != PCI_ERS_RESULT_RECOVERED) {
247 pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
248 pr_err("%s: Automatic recovery failed; operator intervention is required\n",
249 pci_name(pdev));
250 status_str = "failed (driver can't recover)";
251 goto out_unlock;
252 }
253
254 pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
255 if (driver->err_handler->resume)
256 driver->err_handler->resume(pdev);
257 pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED);
258 out_unlock:
259 device_unlock(&pdev->dev);
260 zpci_report_status(zdev, "recovery", status_str);
261
262 return ers_res;
263 }
264
265 /* zpci_event_io_failure - Report PCI channel failure state to driver
266 * @pdev: PCI function for which to report
267 * @es: PCI channel failure state to report
268 */
zpci_event_io_failure(struct pci_dev * pdev,pci_channel_state_t es)269 static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
270 {
271 struct pci_driver *driver;
272
273 pci_dev_lock(pdev);
274 pdev->error_state = es;
275 /**
276 * While vfio-pci's error_detected callback notifies user-space QEMU
277 * reacts to this by freezing the guest. In an s390 environment PCI
278 * errors are rarely fatal so this is overkill. Instead in the future
279 * we will inject the error event and let the guest recover the device
280 * itself.
281 */
282 if (is_passed_through(pdev))
283 goto out;
284 driver = to_pci_driver(pdev->dev.driver);
285 if (driver && driver->err_handler && driver->err_handler->error_detected)
286 driver->err_handler->error_detected(pdev, pdev->error_state);
287 out:
288 pci_dev_unlock(pdev);
289 }
290
__zpci_event_error(struct zpci_ccdf_err * ccdf)291 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
292 {
293 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
294 struct pci_dev *pdev = NULL;
295 pci_ers_result_t ers_res;
296 u32 fh = 0;
297 int rc;
298
299 zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
300 ccdf->fid, ccdf->fh, ccdf->pec);
301 zpci_err("error CCDF:\n");
302 zpci_err_hex(ccdf, sizeof(*ccdf));
303
304 if (zdev) {
305 mutex_lock(&zdev->state_lock);
306 rc = clp_refresh_fh(zdev->fid, &fh);
307 if (rc)
308 goto no_pdev;
309 if (!fh || ccdf->fh != fh) {
310 /* Ignore events with stale handles */
311 zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
312 ccdf->fid, fh, ccdf->fh);
313 goto no_pdev;
314 }
315 zpci_update_fh(zdev, ccdf->fh);
316 if (zdev->zbus->bus)
317 pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
318 }
319
320 pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
321 pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
322
323 if (!pdev)
324 goto no_pdev;
325
326 switch (ccdf->pec) {
327 case 0x002a: /* Error event concerns FMB */
328 case 0x002b:
329 case 0x002c:
330 break;
331 case 0x0040: /* Service Action or Error Recovery Failed */
332 case 0x003b:
333 zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
334 break;
335 default: /* PCI function left in the error state attempt to recover */
336 ers_res = zpci_event_attempt_error_recovery(pdev);
337 if (ers_res != PCI_ERS_RESULT_RECOVERED)
338 zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
339 break;
340 }
341 pci_dev_put(pdev);
342 no_pdev:
343 if (zdev)
344 mutex_unlock(&zdev->state_lock);
345 zpci_zdev_put(zdev);
346 }
347
zpci_event_error(void * data)348 void zpci_event_error(void *data)
349 {
350 if (zpci_is_enabled())
351 __zpci_event_error(data);
352 }
353
zpci_event_hard_deconfigured(struct zpci_dev * zdev,u32 fh)354 static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
355 {
356 zpci_update_fh(zdev, fh);
357 /* Give the driver a hint that the function is
358 * already unusable.
359 */
360 zpci_bus_remove_device(zdev, true);
361 /* Even though the device is already gone we still
362 * need to free zPCI resources as part of the disable.
363 */
364 if (zdev_enabled(zdev))
365 zpci_disable_device(zdev);
366 zdev->state = ZPCI_FN_STATE_STANDBY;
367 }
368
zpci_event_reappear(struct zpci_dev * zdev)369 static void zpci_event_reappear(struct zpci_dev *zdev)
370 {
371 lockdep_assert_held(&zdev->state_lock);
372 /*
373 * The zdev is in the reserved state. This means that it was presumed to
374 * go away but there are still undropped references. Now, the platform
375 * announced its availability again. Bring back the lingering zdev
376 * to standby. This is safe because we hold a temporary reference
377 * now so that it won't go away. Account for the re-appearance of the
378 * underlying device by incrementing the reference count.
379 */
380 zdev->state = ZPCI_FN_STATE_STANDBY;
381 zpci_zdev_get(zdev);
382 zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh);
383 }
384
__zpci_event_availability(struct zpci_ccdf_avail * ccdf)385 static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
386 {
387 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
388 bool existing_zdev = !!zdev;
389 enum zpci_state state;
390
391 zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
392 ccdf->fid, ccdf->fh, ccdf->pec);
393
394 if (existing_zdev)
395 mutex_lock(&zdev->state_lock);
396
397 switch (ccdf->pec) {
398 case 0x0301: /* Reserved|Standby -> Configured */
399 if (!zdev) {
400 zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
401 if (IS_ERR(zdev))
402 break;
403 if (zpci_add_device(zdev)) {
404 kfree(zdev);
405 break;
406 }
407 } else {
408 if (zdev->state == ZPCI_FN_STATE_RESERVED)
409 zpci_event_reappear(zdev);
410 /* the configuration request may be stale */
411 else if (zdev->state != ZPCI_FN_STATE_STANDBY)
412 break;
413 zdev->state = ZPCI_FN_STATE_CONFIGURED;
414 }
415 zpci_scan_configured_device(zdev, ccdf->fh);
416 break;
417 case 0x0302: /* Reserved -> Standby */
418 if (!zdev) {
419 zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
420 if (IS_ERR(zdev))
421 break;
422 if (zpci_add_device(zdev)) {
423 kfree(zdev);
424 break;
425 }
426 } else {
427 if (zdev->state == ZPCI_FN_STATE_RESERVED)
428 zpci_event_reappear(zdev);
429 zpci_update_fh(zdev, ccdf->fh);
430 }
431 break;
432 case 0x0303: /* Deconfiguration requested */
433 if (zdev) {
434 /* The event may have been queued before we configured
435 * the device.
436 */
437 if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
438 break;
439 zpci_update_fh(zdev, ccdf->fh);
440 zpci_deconfigure_device(zdev);
441 }
442 break;
443 case 0x0304: /* Configured -> Standby|Reserved */
444 if (zdev) {
445 /* The event may have been queued before we configured
446 * the device.:
447 */
448 if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
449 zpci_event_hard_deconfigured(zdev, ccdf->fh);
450 /* The 0x0304 event may immediately reserve the device */
451 if (!clp_get_state(zdev->fid, &state) &&
452 state == ZPCI_FN_STATE_RESERVED) {
453 zpci_device_reserved(zdev);
454 }
455 }
456 break;
457 case 0x0306: /* 0x308 or 0x302 for multiple devices */
458 zpci_remove_reserved_devices();
459 zpci_scan_devices();
460 break;
461 case 0x0308: /* Standby -> Reserved */
462 if (!zdev)
463 break;
464 zpci_device_reserved(zdev);
465 break;
466 default:
467 break;
468 }
469 if (existing_zdev) {
470 mutex_unlock(&zdev->state_lock);
471 zpci_zdev_put(zdev);
472 }
473 }
474
zpci_event_availability(void * data)475 void zpci_event_availability(void *data)
476 {
477 if (zpci_is_enabled())
478 __zpci_event_availability(data);
479 }
480